PyPI - smartmemory - Versions diffs - 1.4.26__tar.gz → 1.4.28__tar.gz - Mend

smartmemory 1.4.26tar.gz → 1.4.28tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

{smartmemory-1.4.26 → smartmemory-1.4.28}/CHANGELOG.md RENAMED Viewed

@@ -3,6 +3,26 @@
 Notable, **user-facing** changes to the `smartmemory` distribution package. The wrapper is thin — it pins an exact `smartmemory-core` version and the two move in lockstep — so entries here highlight what a release *delivers* (features, fixes, security), not routine version-pin bumps. For full internal detail, see [`smartmemory-core`'s CHANGELOG](https://github.com/smart-memory/smart-memory-core/blob/main/CHANGELOG.md). Loosely follows [Keep a Changelog](https://keepachangelog.com); not every patch release gets an entry.
 ## [Unreleased]
+### Changed (auto, lockstep) — track smartmemory-core==1.4.28 (1.4.28)
+- Version copied from smartmemory-core 1.4.28 release (single-source lockstep).
+### Changed (auto, lockstep) — track smartmemory-core==1.4.27 (1.4.27)
+- Version copied from smartmemory-core 1.4.27 release (single-source lockstep).
+### Added — `smartmemory warm` + background model warming (DIST-LITE-WARMSTART-1)
+- New `smartmemory warm` CLI command pre-loads the local embedder (and reranker) so the
+  first `add`/`search` is instant instead of paying a cold model load (~12s, or ~38s the
+  first time the model downloads). Run once after install or before a demo. `--no-reranker`
+  warms the embedder only.
+- The local backend now **warms the embedder in the background at construction** (daemon
+  thread) so the user's first `add()` overlaps the model load instead of paying it inline.
+  Opt out with `SMARTMEMORY_NO_WARM=1`.
+- Direct (no-daemon) CLI ops (`add`, `recall`) now print a one-time "First run: loading
+  local models…" notice before a cold load, so first-run isn't a silent hang. (The daemon
+  path already prints "loading models" at `start`.)
+- Paired with the core reranker fix (non-blocking model load), this removes the
+  multi-second first-run hang from the local FREE path.
 ### Changed (auto, lockstep) — track smartmemory-core==1.4.26 (1.4.26)
 - Version copied from smartmemory-core 1.4.26 release (single-source lockstep).

{smartmemory-1.4.26 → smartmemory-1.4.28}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: smartmemory
-Version: 1.4.26
+Version: 1.4.28
 License-File: LICENSE
 License-File: LICENSE.agpl-v3
 License-File: LICENSE.header
@@ -10,7 +10,7 @@ Requires-Dist: fastapi>=0.110
 Requires-Dist: filelock>=3.12
 Requires-Dist: httpx>=0.27
 Requires-Dist: keyring>=24.0
-Requires-Dist: smartmemory-core[lite]==1.4.26
+Requires-Dist: smartmemory-core[lite]==1.4.28
 Requires-Dist: smartmemory-mcp>=0.2.0
 Requires-Dist: textual>=8.0
 Requires-Dist: tomli-w>=1.0

{smartmemory-1.4.26 → smartmemory-1.4.28}/pyproject.toml RENAMED Viewed

@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
 [project]
 name = "smartmemory"
-version = "1.4.26"
+version = "1.4.28"
 requires-python = ">=3.11"
 dependencies = [
-    "smartmemory-core[lite]==1.4.26",  # EXACT pin — wrapper and core versions move in lockstep
+    "smartmemory-core[lite]==1.4.28",  # EXACT pin — wrapper and core versions move in lockstep
     "filelock>=3.12",                    # cross-process SQLite write locking
     "smartmemory-mcp>=0.2.0",          # unified MCP server (PLAT-MCP-UNIFY-1)
     "httpx>=0.27",             # remote API calls (DIST-LITE-5)

{smartmemory-1.4.26 → smartmemory-1.4.28}/smartmemory_app/cli.py RENAMED Viewed

@@ -148,6 +148,26 @@ def restart_cmd(num_workers: int) -> None:
     click.echo("Daemon ready.")
+@cli.command("warm")
+@click.option("--no-reranker", is_flag=True, help="Warm only the embedder, skip the reranker model.")
+def warm_cmd(no_reranker: bool) -> None:
+    """Pre-load the local models so the first add/search is instant.
+    The first add() otherwise pays a cold embedder load (~12s, or ~38s the first
+    time the model downloads) and the first search() pays a cold reranker load.
+    Run this once after install — or before a demo — to move that cost off the
+    user's first real call (DIST-LITE-WARMSTART-1).
+    """
+    import time
+    from smartmemory_app.warm import warm_models
+    click.echo("Warming local models (one-time; subsequent runs are cached)...")
+    t0 = time.perf_counter()
+    warm_models(reranker=not no_reranker)
+    click.echo(f"Models warm in {time.perf_counter() - t0:.1f}s. First add/search will now be fast.")
 @cli.command("status")
 def status_cmd() -> None:
     """Show SmartMemory daemon status."""
@@ -229,6 +249,28 @@ def _validate_memory_type(ctx, param, value: str) -> str:
     return value
+_warm_notice_shown = False
+def _warm_notice() -> None:
+    """Show a one-time notice if a direct (no-daemon) op is about to pay a cold model
+    load, so first-run isn't a silent multi-second hang (DIST-LITE-WARMSTART-1). The
+    daemon path already prints "loading models" at start; this covers direct CLI ops.
+    """
+    global _warm_notice_shown
+    if _warm_notice_shown:
+        return
+    from smartmemory_app.warm import is_warm
+    if not is_warm():
+        click.echo(
+            "First run: loading local models (~10–40s, one-time). "
+            "Tip: run 'smartmemory warm' to pre-load.",
+            err=True,
+        )
+        _warm_notice_shown = True
 @cli.command(
     "add",
     context_settings=dict(
@@ -278,7 +320,7 @@ def add_cmd(ctx, text: str, memory_type: str, as_whole: bool) -> None:
         chunks = (
             [raw.strip()]
             if as_whole
-            else [l.strip() for l in raw.splitlines() if l.strip()]
+            else [ln.strip() for ln in raw.splitlines() if ln.strip()]
         )
         if not chunks:
             raise click.ClickException("Content cannot be empty.")
@@ -294,6 +336,7 @@ def add_cmd(ctx, text: str, memory_type: str, as_whole: bool) -> None:
             else:
                 from smartmemory_app.storage import ingest
+                _warm_notice()
                 ids.append(ingest(chunk, memory_type, properties=props))
         click.echo(f"Added {len(ids)} memories")
         for item_id in ids:
@@ -311,6 +354,7 @@ def add_cmd(ctx, text: str, memory_type: str, as_whole: bool) -> None:
     else:
         from smartmemory_app.storage import ingest
+        _warm_notice()
         click.echo(ingest(text, memory_type, properties=props))
@@ -353,6 +397,7 @@ def recall_cmd(
     else:
         from smartmemory_app.storage import recall
+        _warm_notice()
         context = recall(
             cwd,
             top_k,

{smartmemory-1.4.26 → smartmemory-1.4.28}/smartmemory_app/storage.py RENAMED Viewed

@@ -107,6 +107,15 @@ def _get_local_memory(data_dir: str | None = None) -> "SmartMemory":
             event_sink=get_event_sink(),    # DIST-LITE-3
         )
         atexit.register(_shutdown)
+        # DIST-LITE-WARMSTART-1: warm the local embedder in the background so the
+        # user's first add() overlaps the ~12s model load instead of paying it inline.
+        # Daemon thread, idempotent, opt out with SMARTMEMORY_NO_WARM=1.
+        try:
+            from smartmemory_app.warm import warm_models_background
+            warm_models_background()
+        except Exception:
+            pass
         return _memory

smartmemory-1.4.28/smartmemory_app/warm.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Model pre-warming for the local FREE path (DIST-LITE-WARMSTART-1).
+The first ``add()`` pays a cold embedder load (~12s, or ~38s the first time the
+model is downloaded) and the first ``search()`` would pay a cold reranker load.
+These helpers load the models ahead of (or in parallel with) the user's first real
+call so the one-command "wow" moment isn't a multi-second hang.
+- ``warm_models()`` — foreground; used by the ``smartmemory warm`` CLI and install
+  prefetch, where a visible one-time wait is acceptable.
+- ``warm_models_background()`` — fire-and-forget daemon thread; used at local
+  construction so the embedder load overlaps construction + user think-time. Opt out
+  with ``SMARTMEMORY_NO_WARM=1``.
+"""
+from __future__ import annotations
+import logging
+import os
+import threading
+logger = logging.getLogger(__name__)
+_warm_started = False
+_warm_lock = threading.Lock()
+def is_warm() -> bool:
+    """True if the local embedder model is already resident in this process.
+    Used to decide whether a foreground op is about to pay a cold load, so the
+    caller can show a progress notice instead of a silent hang.
+    """
+    try:
+        from smartmemory.plugins.embedding import EmbeddingService
+        return EmbeddingService._st_model is not None or EmbeddingService._pinned_local_model is not None
+    except Exception:
+        return False
+def warm_models(*, reranker: bool = True) -> None:
+    """Synchronously load the local embedder (and optionally the reranker).
+    Never raises — a warm failure must not break the caller; the model will simply
+    load lazily on first use as before.
+    """
+    try:
+        from smartmemory.plugins.embedding import EmbeddingService
+        if EmbeddingService().warm():
+            logger.debug("Embedder warmed")
+    except Exception as e:
+        logger.debug("Embedder warm skipped: %s", e)
+    if reranker:
+        try:
+            from smartmemory.search.rerank import CrossEncoderReranker
+            # block=True here is intentional: the CLI/prefetch path WANTS to wait.
+            CrossEncoderReranker.get_model(block=True)
+        except Exception as e:
+            logger.debug("Reranker warm skipped: %s", e)
+def warm_models_background(*, reranker: bool = False) -> None:
+    """Kick a one-time daemon thread to warm models without blocking the caller.
+    Defaults to embedder-only (every ``add()`` needs it; the reranker already
+    background-loads lazily on first search). Opt out with ``SMARTMEMORY_NO_WARM=1``.
+    Idempotent — only the first call per process starts a thread.
+    """
+    global _warm_started
+    if os.environ.get("SMARTMEMORY_NO_WARM"):
+        return
+    with _warm_lock:
+        if _warm_started:
+            return
+        _warm_started = True
+    threading.Thread(
+        target=warm_models,
+        kwargs={"reranker": reranker},
+        name="smartmemory-warm",
+        daemon=True,
+    ).start()

smartmemory-1.4.28/tests/integration/test_warm.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""DIST-LITE-WARMSTART-1 — background model warming orchestration.
+Tests the *scheduling* contract (opt-out + warm-once), not the model load itself
+(the embedder/reranker loads are verified separately and would require a model
+download here). Marked integration: exercises real threads.
+"""
+import threading
+import pytest
+import smartmemory_app.warm as warm
+pytestmark = pytest.mark.integration
+def _join_warm_threads(timeout=5):
+    for t in threading.enumerate():
+        if t.name == "smartmemory-warm":
+            t.join(timeout=timeout)
+def test_background_warm_opts_out(monkeypatch):
+    """SMARTMEMORY_NO_WARM=1 schedules nothing — no thread, no state change."""
+    monkeypatch.setenv("SMARTMEMORY_NO_WARM", "1")
+    warm._warm_started = False
+    before = threading.active_count()
+    warm.warm_models_background()
+    assert warm._warm_started is False
+    assert threading.active_count() == before
+def test_background_warm_runs_once(monkeypatch):
+    """Repeated calls schedule the warm work exactly once per process."""
+    monkeypatch.delenv("SMARTMEMORY_NO_WARM", raising=False)
+    warm._warm_started = False
+    calls = []
+    # Substitute the downstream loader (verified for real elsewhere) so this test
+    # asserts the SCHEDULER, deterministically and without a model download.
+    monkeypatch.setattr(warm, "warm_models", lambda **kwargs: calls.append(kwargs))
+    warm.warm_models_background()
+    warm.warm_models_background()
+    warm.warm_models_background()
+    _join_warm_threads()
+    assert warm._warm_started is True
+    assert len(calls) == 1  # only the first call did work
+def test_warm_models_never_raises(monkeypatch):
+    """warm_models() must swallow loader failures — a warm error can't break callers."""
+    import smartmemory.plugins.embedding as emb
+    def boom(self):
+        raise RuntimeError("simulated model load failure")
+    monkeypatch.setattr(emb.EmbeddingService, "warm", boom)
+    # reranker=False so we don't trigger a real cross-encoder download here.
+    warm.warm_models(reranker=False)  # must not raise
+def test_is_warm_false_when_unloaded(monkeypatch):
+    """is_warm() reports False when no local model is resident."""
+    import smartmemory.plugins.embedding as emb
+    monkeypatch.setattr(emb.EmbeddingService, "_st_model", None, raising=False)
+    monkeypatch.setattr(emb.EmbeddingService, "_pinned_local_model", None, raising=False)
+    assert warm.is_warm() is False
+def test_cli_warm_notice_prints_once_when_cold(monkeypatch, capsys):
+    """The direct-CLI cold-load notice fires exactly once, only when not warm."""
+    from smartmemory_app import cli
+    monkeypatch.setattr("smartmemory_app.warm.is_warm", lambda: False)
+    cli._warm_notice_shown = False
+    cli._warm_notice()
+    cli._warm_notice()
+    err = capsys.readouterr().err
+    assert err.count("First run: loading local models") == 1
+def test_add_cmd_direct_path_emits_notice(monkeypatch):
+    """The real `add` command, on the direct (no-daemon) path with a cold model,
+    emits the warm notice before the blocking ingest."""
+    from click.testing import CliRunner
+    from smartmemory_app import cli
+    monkeypatch.setattr(cli, "_daemon_request", lambda *a, **k: None)  # force direct path
+    monkeypatch.setattr("smartmemory_app.storage.ingest", lambda *a, **k: "id-123")
+    monkeypatch.setattr("smartmemory_app.warm.is_warm", lambda: False)
+    cli._warm_notice_shown = False
+    r = CliRunner(mix_stderr=False).invoke(cli.cli, ["add", "hello world"])
+    assert r.exit_code == 0, r.output
+    assert "id-123" in r.stdout
+    assert "First run: loading local models" in r.stderr