memorytalk 0.8.0__tar.gz → 0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {memorytalk-0.8.0 → memorytalk-0.8.2}/PKG-INFO +1 -1
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/__init__.py +27 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/search.py +2 -1
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/sync.py +45 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/__init__.py +1 -1
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/_format.py +1 -11
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/_render.py +1 -1
- memorytalk-0.8.2/memorytalk/cli/search.py +76 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/config.py +32 -4
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/lancedb.py +177 -21
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/schema.py +18 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/search_log.py +4 -3
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/search.py +16 -9
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/sync.py +33 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/backfill.py +119 -11
- memorytalk-0.8.2/memorytalk/service/index_buffer.py +206 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/search.py +49 -66
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/sessions.py +19 -5
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/PKG-INFO +1 -1
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/SOURCES.txt +1 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/pyproject.toml +1 -1
- memorytalk-0.8.0/memorytalk/cli/search.py +0 -53
- {memorytalk-0.8.0 → memorytalk-0.8.2}/LICENSE +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/README.md +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/__init__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/__main__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/__init__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/base.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/claude_code.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/codex.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/openclaw.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/cards.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/read.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/recall.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/reviews.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/sessions.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/status.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/_http.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/card.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/read.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/recall.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/review.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/server.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/session.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/setup.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/sync.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/upgrade.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/__init__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/embedding.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/storage.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/__init__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/cards.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/recall.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/reviews.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/sessions.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/store.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/sync_checkpoint.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/__init__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/card.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/cards.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/read.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/recall.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/review.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/reviews.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/session.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/status.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/server.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/__init__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/cards.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/events.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/read.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/recall.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/reviews.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/sync.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/__init__.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/console.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/dsl.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/env_template.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/formula.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/highlight.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/ids.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/indexes.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/settings_io.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/tag_filter.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/tags.py +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/dependency_links.txt +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/entry_points.txt +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/requires.txt +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/top_level.txt +0 -0
- {memorytalk-0.8.0 → memorytalk-0.8.2}/setup.cfg +0 -0
|
@@ -32,6 +32,7 @@ from memorytalk.service import (
|
|
|
32
32
|
RecallService, ReviewService,
|
|
33
33
|
)
|
|
34
34
|
from memorytalk.service.backfill import IndexBackfill
|
|
35
|
+
from memorytalk.service.index_buffer import IndexWriteBuffer
|
|
35
36
|
from memorytalk.service.search import SearchService
|
|
36
37
|
from memorytalk.service.sync import SyncWatcher
|
|
37
38
|
|
|
@@ -71,9 +72,22 @@ def create_app(config: Config | None = None) -> FastAPI:
|
|
|
71
72
|
app.state.vectors = vectors
|
|
72
73
|
app.state.embedder = embedder
|
|
73
74
|
app.state.events = events
|
|
75
|
+
# IndexWriteBuffer aggregates LanceDB inserts across sessions so
|
|
76
|
+
# one ``table.add()`` carries many embedder batches' worth of
|
|
77
|
+
# rows. Without it the ingest path creates one fragment per
|
|
78
|
+
# embedder batch (10 with DashScope) → vector search eventually
|
|
79
|
+
# EMFILEs on fd ceiling. See service/index_buffer.py and
|
|
80
|
+
# docs/issue #4 §4.3.
|
|
81
|
+
app.state.index_buffer = IndexWriteBuffer(
|
|
82
|
+
vectors=vectors, db=db,
|
|
83
|
+
flush_rows=config.settings.index.lance_flush_rows,
|
|
84
|
+
flush_interval_seconds=config.settings.index.lance_flush_interval_seconds,
|
|
85
|
+
)
|
|
86
|
+
app.state.index_buffer.start()
|
|
74
87
|
app.state.read = ReadService(db=db, events=events)
|
|
75
88
|
app.state.ingest = IngestService(
|
|
76
89
|
db=db, vectors=vectors, embedder=embedder, events=events,
|
|
90
|
+
index_buffer=app.state.index_buffer,
|
|
77
91
|
)
|
|
78
92
|
app.state.sync_checkpoints = sync_checkpoints
|
|
79
93
|
app.state.sync = SyncWatcher(
|
|
@@ -108,8 +122,15 @@ def create_app(config: Config | None = None) -> FastAPI:
|
|
|
108
122
|
# lifespan shutdown.
|
|
109
123
|
app.state.backfill = IndexBackfill(
|
|
110
124
|
db=db, vectors=vectors, embedder=embedder,
|
|
125
|
+
index_buffer=app.state.index_buffer,
|
|
111
126
|
)
|
|
112
127
|
app.state.backfill.start()
|
|
128
|
+
# Guaranteed one-shot compaction on every boot — grinds down the
|
|
129
|
+
# append-only fragment pile (cause of EMFILE in vector search)
|
|
130
|
+
# so a restart always makes progress. Side path off the re-embed
|
|
131
|
+
# loop: gated only on vectors, runs in the background, never
|
|
132
|
+
# blocks startup. See IndexBackfill.trigger_startup_compaction.
|
|
133
|
+
app.state.backfill.trigger_startup_compaction()
|
|
113
134
|
|
|
114
135
|
yield
|
|
115
136
|
|
|
@@ -123,6 +144,12 @@ def create_app(config: Config | None = None) -> FastAPI:
|
|
|
123
144
|
await app.state.backfill.stop()
|
|
124
145
|
except Exception:
|
|
125
146
|
pass
|
|
147
|
+
# Drain in-flight LanceDB writes before tearing down the DB
|
|
148
|
+
# — otherwise pending vectors are lost on shutdown.
|
|
149
|
+
try:
|
|
150
|
+
await app.state.index_buffer.stop()
|
|
151
|
+
except Exception:
|
|
152
|
+
pass
|
|
126
153
|
await db.close()
|
|
127
154
|
await sync_checkpoints.close()
|
|
128
155
|
|
|
@@ -21,7 +21,8 @@ async def post_search(payload: SearchRequest, request: Request) -> SearchRespons
|
|
|
21
21
|
query=payload.query or "",
|
|
22
22
|
where=payload.where,
|
|
23
23
|
top_k=payload.top_k,
|
|
24
|
-
|
|
24
|
+
recall_mode=payload.recall_mode,
|
|
25
|
+
recall_session_id=payload.recall_session_id,
|
|
25
26
|
)
|
|
26
27
|
except DSLError as e:
|
|
27
28
|
raise HTTPException(status_code=400, detail=str(e))
|
|
@@ -12,6 +12,50 @@ from fastapi import APIRouter, Query, Request
|
|
|
12
12
|
router = APIRouter()
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
def _gather_lance_health(state) -> dict:
|
|
16
|
+
"""Collect LanceDB-layer observability for ``index.lance``.
|
|
17
|
+
|
|
18
|
+
Pulls from three sources: the IndexWriteBuffer (write pipeline),
|
|
19
|
+
IndexBackfill (compaction cadence), and LanceStore (EMFILE
|
|
20
|
+
recovery count). All fields default to safe zeros / None when the
|
|
21
|
+
corresponding component is absent so a partially-disabled boot
|
|
22
|
+
still returns a well-shaped response.
|
|
23
|
+
"""
|
|
24
|
+
buf = getattr(state, "index_buffer", None)
|
|
25
|
+
backfill = getattr(state, "backfill", None)
|
|
26
|
+
vectors = getattr(state, "vectors", None)
|
|
27
|
+
|
|
28
|
+
soft = hard = None
|
|
29
|
+
try:
|
|
30
|
+
import resource
|
|
31
|
+
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
32
|
+
except (ImportError, OSError):
|
|
33
|
+
# Windows / sandboxed envs — leave None so the field's
|
|
34
|
+
# absence is the signal.
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
"pending_vector_rows": (buf.pending_rows if buf is not None else 0),
|
|
39
|
+
"last_flush_at": (buf.last_flush_at_iso if buf is not None else None),
|
|
40
|
+
"last_flush_error": (buf.last_flush_error if buf is not None else None),
|
|
41
|
+
"flush_count_since_boot": (buf.flush_count if buf is not None else 0),
|
|
42
|
+
"last_compaction_at": (
|
|
43
|
+
backfill.last_compact_at_iso if backfill is not None else None
|
|
44
|
+
),
|
|
45
|
+
"last_compaction_error": (
|
|
46
|
+
backfill.last_compact_error if backfill is not None else None
|
|
47
|
+
),
|
|
48
|
+
"emfile_recoveries_since_boot": (
|
|
49
|
+
vectors.emfile_recoveries if vectors is not None else 0
|
|
50
|
+
),
|
|
51
|
+
"last_emfile_at": (
|
|
52
|
+
vectors.last_emfile_at_iso if vectors is not None else None
|
|
53
|
+
),
|
|
54
|
+
"fd_soft_limit": soft,
|
|
55
|
+
"fd_hard_limit": hard,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
15
59
|
@router.get("/sync/status")
|
|
16
60
|
async def get_sync_status(request: Request, limit: int = Query(5, ge=0, le=20)):
|
|
17
61
|
config = request.app.state.config
|
|
@@ -31,6 +75,7 @@ async def get_sync_status(request: Request, limit: int = Query(5, ge=0, le=20)):
|
|
|
31
75
|
index["last_index_error"] = (
|
|
32
76
|
backfill.last_error if backfill is not None else None
|
|
33
77
|
)
|
|
78
|
+
index["lance"] = _gather_lance_health(request.app.state)
|
|
34
79
|
|
|
35
80
|
if not config.settings.sync.enabled:
|
|
36
81
|
return {"status": "disabled", "index": index}
|
|
@@ -15,7 +15,7 @@ import click
|
|
|
15
15
|
@click.option(
|
|
16
16
|
"--no-pager", "no_pager", is_flag=True, default=False,
|
|
17
17
|
help="Disable the scrollable pager (only applies to commands that "
|
|
18
|
-
"opt-in — currently
|
|
18
|
+
"opt-in — currently `read` and `search`). Equivalent to NO_PAGER=1.",
|
|
19
19
|
)
|
|
20
20
|
def main(no_pager: bool) -> None:
|
|
21
21
|
"""memory.talk v3."""
|
|
@@ -354,15 +354,12 @@ def fmt_recall(payload: dict) -> str:
|
|
|
354
354
|
def fmt_search(payload: dict) -> str:
|
|
355
355
|
query = payload.get("query") or ""
|
|
356
356
|
count = payload.get("count", 0)
|
|
357
|
-
hidden = int(payload.get("hidden_count") or 0)
|
|
358
357
|
sid = payload.get("search_id", "")
|
|
359
358
|
header = f"`search_id={sid}` · {count} results"
|
|
360
|
-
if hidden:
|
|
361
|
-
header += f" · {hidden} hidden"
|
|
362
359
|
parts: list[str] = [f"# search: {query}" if query else "# search",
|
|
363
360
|
"", header, ""]
|
|
364
361
|
|
|
365
|
-
if count == 0
|
|
362
|
+
if count == 0:
|
|
366
363
|
return "\n".join(parts) + "\n"
|
|
367
364
|
|
|
368
365
|
for entry in payload.get("results") or []:
|
|
@@ -374,13 +371,6 @@ def fmt_search(payload: dict) -> str:
|
|
|
374
371
|
parts.append(_fmt_search_session(entry))
|
|
375
372
|
parts.append("")
|
|
376
373
|
|
|
377
|
-
if hidden:
|
|
378
|
-
parts.append(
|
|
379
|
-
f"_({hidden} weak result{'s' if hidden != 1 else ''} hidden "
|
|
380
|
-
"by strong-floor filter — pass `--all` to see)_"
|
|
381
|
-
)
|
|
382
|
-
parts.append("")
|
|
383
|
-
|
|
384
374
|
return "\n".join(parts).rstrip() + "\n"
|
|
385
375
|
|
|
386
376
|
|
|
@@ -12,7 +12,7 @@ Errors:
|
|
|
12
12
|
- Markdown mode → ``**error:** <msg>`` to stderr, exit 1
|
|
13
13
|
- JSON mode → ``{"error": ...}`` to stdout, exit 1
|
|
14
14
|
|
|
15
|
-
Pager (opt-in per command
|
|
15
|
+
Pager (opt-in per command — currently ``read`` and ``search``):
|
|
16
16
|
|
|
17
17
|
- ``emit_md_paged`` wraps rich rendering in a less-style pager when
|
|
18
18
|
both stdin and stdout are TTYs. Subprocess / pipe / ``--json`` paths
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""CLI: search <query> [--where DSL] [--top-k N] [--json]."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from memorytalk.cli._format import fmt_error, fmt_search
|
|
8
|
+
from memorytalk.cli._http import ApiError, api, extract_error_message
|
|
9
|
+
from memorytalk.cli._render import (
|
|
10
|
+
emit_json, emit_json_err, emit_md_err, emit_md_paged,
|
|
11
|
+
)
|
|
12
|
+
from memorytalk.config import Config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command("search")
|
|
16
|
+
@click.argument("query", required=False, default="")
|
|
17
|
+
@click.option("--where", "-w", "where", type=str, default=None,
|
|
18
|
+
help="DSL filter (see docs/cli/v3/search.md#DSL)")
|
|
19
|
+
@click.option("--top-k", "top_k", type=int, default=None,
|
|
20
|
+
help="Total result cap (default = settings.search.default_top_k)")
|
|
21
|
+
@click.option("--recall", "recall_mode", is_flag=True, default=False,
|
|
22
|
+
help="Debug lens: rank like `recall` (cards-only, raw RRF, "
|
|
23
|
+
"no ranking_formula). Combine with --session to also "
|
|
24
|
+
"preview that session's recall_log dedup. Read-only — "
|
|
25
|
+
"does NOT bump recall_count or write recall_log.")
|
|
26
|
+
@click.option("--session", "session_id", type=str, default=None,
|
|
27
|
+
help="Session id for recall-mode dedup (only meaningful "
|
|
28
|
+
"with --recall).")
|
|
29
|
+
@click.option("--json", "json_out", is_flag=True, default=False, help="Emit JSON")
|
|
30
|
+
def search(
|
|
31
|
+
query: str, where: str | None, top_k: int | None,
|
|
32
|
+
recall_mode: bool, session_id: str | None, json_out: bool,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Hybrid FTS + vector search across cards and sessions."""
|
|
35
|
+
cfg = Config()
|
|
36
|
+
body: dict = {"query": query or ""}
|
|
37
|
+
if where:
|
|
38
|
+
body["where"] = where
|
|
39
|
+
if top_k is not None:
|
|
40
|
+
body["top_k"] = top_k
|
|
41
|
+
if recall_mode:
|
|
42
|
+
body["recall_mode"] = True
|
|
43
|
+
if session_id:
|
|
44
|
+
if not recall_mode:
|
|
45
|
+
# --session without --recall is a probable mistake; flag it
|
|
46
|
+
# rather than silently dropping the field.
|
|
47
|
+
emit_md_err(fmt_error(
|
|
48
|
+
"--session only takes effect with --recall (it scopes "
|
|
49
|
+
"the recall-log dedup preview)"
|
|
50
|
+
))
|
|
51
|
+
sys.exit(1)
|
|
52
|
+
body["recall_session_id"] = session_id
|
|
53
|
+
try:
|
|
54
|
+
result = api("POST", "/v3/search", cfg, json_body=body)
|
|
55
|
+
except ApiError as e:
|
|
56
|
+
if json_out:
|
|
57
|
+
emit_json_err(e.payload)
|
|
58
|
+
else:
|
|
59
|
+
emit_md_err(fmt_error(extract_error_message(e.payload)))
|
|
60
|
+
sys.exit(1)
|
|
61
|
+
except Exception as e:
|
|
62
|
+
if json_out:
|
|
63
|
+
emit_json_err(str(e))
|
|
64
|
+
else:
|
|
65
|
+
emit_md_err(fmt_error(f"cannot reach server: {e}"))
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
|
|
68
|
+
if json_out:
|
|
69
|
+
emit_json(result)
|
|
70
|
+
else:
|
|
71
|
+
# Long result blocks (cards + per-session hit fences + ctx
|
|
72
|
+
# windows) routinely exceed a terminal page; route through the
|
|
73
|
+
# same less-style pager that ``read`` uses. Subprocess / pipe /
|
|
74
|
+
# ``--no-pager`` / ``--json`` fall back to plain output — see
|
|
75
|
+
# emit_md_paged docstring.
|
|
76
|
+
emit_md_paged(fmt_search(result))
|
|
@@ -16,10 +16,17 @@ from pathlib import Path
|
|
|
16
16
|
from pydantic import BaseModel, ConfigDict
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
_DEFAULT_RANKING_FORMULA =
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
)
|
|
19
|
+
_DEFAULT_RANKING_FORMULA = "relevance"
|
|
20
|
+
# ↑ As of 0.8.x: explicit search defaults to pure relevance (raw RRF
|
|
21
|
+
# score from LanceDB hybrid recall). Earlier defaults mixed in forum-
|
|
22
|
+
# dynamics signals (review_up - review_down + log(read_count+1) - age),
|
|
23
|
+
# which made identifier-style queries like `vvp-ai` unreliable — the
|
|
24
|
+
# strongest text match could rank below weakly-matched high-read
|
|
25
|
+
# cards. The forum-stats counters are still maintained on every card
|
|
26
|
+
# and remain queryable via ``--where 'DSL'`` (filter); the *ranking*
|
|
27
|
+
# is just left as relevance. Users who want forum dynamics back can
|
|
28
|
+
# set ``settings.search.ranking_formula`` to a richer expression like
|
|
29
|
+
# the old default. See docs/cli/v3/search.md.
|
|
23
30
|
|
|
24
31
|
|
|
25
32
|
class ConfigValidationError(RuntimeError):
|
|
@@ -101,6 +108,26 @@ class ExploreConfig(BaseModel):
|
|
|
101
108
|
auto_default_limit: int = 5
|
|
102
109
|
|
|
103
110
|
|
|
111
|
+
class IndexConfig(BaseModel):
|
|
112
|
+
"""Vector index write tuning (0.8.x — issue #4 §4.3 fix).
|
|
113
|
+
|
|
114
|
+
Decouples LanceDB ``table.add()`` batch size from the embedder's
|
|
115
|
+
per-request cap. Embedding still batches small (API limit); these
|
|
116
|
+
knobs control how the embedded rows aggregate before they hit
|
|
117
|
+
LanceDB, which directly drives fragment count and downstream fd
|
|
118
|
+
pressure on search.
|
|
119
|
+
"""
|
|
120
|
+
# Row count that triggers a synchronous flush. 500 is a balance
|
|
121
|
+
# between fragment-count savings (50× fewer fragments than the
|
|
122
|
+
# naive embedder-batch-sized writes at DashScope's 10-cap) and
|
|
123
|
+
# search-visibility latency for newly-ingested rounds.
|
|
124
|
+
lance_flush_rows: int = 500
|
|
125
|
+
# Wall-clock interval for the background flusher — catches the
|
|
126
|
+
# last partial batch when ingest is bursty then idle. 0 disables
|
|
127
|
+
# the background tick (tests use this).
|
|
128
|
+
lance_flush_interval_seconds: float = 30.0
|
|
129
|
+
|
|
130
|
+
|
|
104
131
|
class Settings(BaseModel):
|
|
105
132
|
server: ServerConfig = ServerConfig()
|
|
106
133
|
vector: ProviderConfig = ProviderConfig(provider="lancedb")
|
|
@@ -110,6 +137,7 @@ class Settings(BaseModel):
|
|
|
110
137
|
recall: RecallConfig = RecallConfig()
|
|
111
138
|
sync: SyncConfig = SyncConfig()
|
|
112
139
|
explore: ExploreConfig = ExploreConfig()
|
|
140
|
+
index: IndexConfig = IndexConfig()
|
|
113
141
|
|
|
114
142
|
|
|
115
143
|
def _default_data_root() -> Path:
|
|
@@ -11,12 +11,28 @@ in search results come from the ``cards`` table. SQLite holds zero search
|
|
|
11
11
|
state; jsonl files hold zero search state.
|
|
12
12
|
"""
|
|
13
13
|
from __future__ import annotations
|
|
14
|
+
import asyncio
|
|
15
|
+
import datetime as _dt
|
|
16
|
+
import logging
|
|
14
17
|
from pathlib import Path
|
|
15
18
|
from typing import Optional
|
|
16
19
|
|
|
17
20
|
import pyarrow as pa
|
|
18
21
|
|
|
19
22
|
|
|
23
|
+
_log = logging.getLogger("memorytalk.lancedb")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _is_emfile(exc: BaseException) -> bool:
|
|
27
|
+
"""Recognize Lance's wrapped EMFILE — comes through as a
|
|
28
|
+
``RuntimeError`` whose ``str()`` contains "Too many open files".
|
|
29
|
+
We can't match on errno because Lance wraps the OS error inside
|
|
30
|
+
its own ``LanceError(IO)`` before raising. String match is fragile
|
|
31
|
+
but it's the only signal Lance gives us on this path."""
|
|
32
|
+
msg = str(exc)
|
|
33
|
+
return "Too many open files" in msg or "(os error 24)" in msg
|
|
34
|
+
|
|
35
|
+
|
|
20
36
|
def _segment(text: str) -> str:
|
|
21
37
|
"""jieba 预分词,空格连接(jieba.cut 同步,亚毫秒级)。"""
|
|
22
38
|
import jieba
|
|
@@ -38,6 +54,16 @@ class LanceStore:
|
|
|
38
54
|
self.db = db
|
|
39
55
|
self.data_dir = data_dir
|
|
40
56
|
self.dim = dim
|
|
57
|
+
# Per-table "FTS index confirmed present" memo. Avoids a
|
|
58
|
+
# ``list_indices()`` round trip on every search call once we've
|
|
59
|
+
# verified the index exists. Invalidated only on process restart
|
|
60
|
+
# (we don't drop FTS indices at runtime).
|
|
61
|
+
self._fts_index_known: set[str] = set()
|
|
62
|
+
# EMFILE recovery state — see _recover_from_emfile / _search_with_recovery.
|
|
63
|
+
self._recovery_lock = asyncio.Lock()
|
|
64
|
+
self.emfile_recoveries: int = 0
|
|
65
|
+
self.last_emfile_at_iso: str | None = None
|
|
66
|
+
self.last_recovery_error: str | None = None
|
|
41
67
|
self._cards_schema = pa.schema([
|
|
42
68
|
pa.field("card_id", pa.string()),
|
|
43
69
|
pa.field("text", pa.string()),
|
|
@@ -112,33 +138,86 @@ class LanceStore:
|
|
|
112
138
|
table = await self.db.open_table(self.ROUNDS)
|
|
113
139
|
await table.delete(f"session_id = '{session_id}'")
|
|
114
140
|
|
|
141
|
+
# ────────── compaction ──────────
|
|
142
|
+
|
|
143
|
+
async def optimize(self, table_name: str) -> dict:
|
|
144
|
+
"""Compact small fragments + prune old dataset versions.
|
|
145
|
+
|
|
146
|
+
Why this is load-bearing: the ingest / backfill path is
|
|
147
|
+
append-only — every embedder batch is one ``table.add`` →
|
|
148
|
+
one new fragment + one new dataset version (manifest + txn
|
|
149
|
+
file). Left unchecked these accumulate without bound (tens of
|
|
150
|
+
thousands of files in production). Search has **no vector ANN
|
|
151
|
+
index** (the only index we build is FTS), so vector queries
|
|
152
|
+
flat-scan every fragment, opening every fragment's files at
|
|
153
|
+
once — past a few thousand fragments this blows the process
|
|
154
|
+
file-descriptor ceiling (EMFILE / "Too many open files").
|
|
155
|
+
|
|
156
|
+
``optimize`` is LanceDB's VACUUM: merge fragments, fold new
|
|
157
|
+
data into indices, and prune old versions. We pass
|
|
158
|
+
``cleanup_older_than=timedelta(0)`` so **every version except
|
|
159
|
+
the latest is removed** — that's what actually reclaims the
|
|
160
|
+
manifest/txn file explosion (plain compaction merges data but
|
|
161
|
+
leaves the old versions' files around until pruned). Trade-off:
|
|
162
|
+
dataset time-travel history is discarded; v3 doesn't use it.
|
|
163
|
+
|
|
164
|
+
``delete_unverified`` stays at its safe default (False) so a
|
|
165
|
+
concurrent ingest / backfill write in flight can't be corrupted.
|
|
166
|
+
|
|
167
|
+
No-op (returns ``skipped``) when the table doesn't exist yet.
|
|
168
|
+
"""
|
|
169
|
+
import datetime as _dt
|
|
170
|
+
|
|
171
|
+
if not await self._exists(table_name):
|
|
172
|
+
return {"table": table_name, "skipped": "missing"}
|
|
173
|
+
table = await self.db.open_table(table_name)
|
|
174
|
+
stats = await table.optimize(cleanup_older_than=_dt.timedelta(0))
|
|
175
|
+
# OptimizeStats shape drifts across lancedb versions; don't
|
|
176
|
+
# hard-depend on field names — stringify for the caller's log.
|
|
177
|
+
return {"table": table_name, "stats": str(stats)}
|
|
178
|
+
|
|
115
179
|
# ────────── FTS index maintenance ──────────
|
|
116
180
|
|
|
117
181
|
async def ensure_fts_index(self, table_name: str) -> None:
|
|
118
182
|
"""Create the FTS index on the ``text`` column if absent.
|
|
119
183
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
184
|
+
Idempotent + memoized: once we've confirmed an FTS index covers
|
|
185
|
+
``text`` for a given table in this process, future calls are
|
|
186
|
+
free. We don't drop indices at runtime, so the memo can't go
|
|
187
|
+
stale within a single process lifetime — invalidation = restart.
|
|
188
|
+
|
|
189
|
+
**Exception handling note (issue #4 §4.2 fix):** earlier this
|
|
190
|
+
function swallowed any error from ``list_indices()`` and
|
|
191
|
+
fell through to ``create_index(..., replace=True)``. Under
|
|
192
|
+
EMFILE the swallowed list call was followed by a fresh index
|
|
193
|
+
build, *adding* pressure exactly when the process was already
|
|
194
|
+
over its fd quota. Now: a successful ``list_indices()`` that
|
|
195
|
+
returns no ``text`` index is the only signal to create; any
|
|
196
|
+
IO exception from ``list_indices()`` propagates so the upstream
|
|
197
|
+
EMFILE recovery path can take over instead of compounding.
|
|
124
198
|
"""
|
|
199
|
+
if table_name in self._fts_index_known:
|
|
200
|
+
return
|
|
125
201
|
if not await self._exists(table_name):
|
|
126
202
|
return
|
|
127
203
|
from lancedb.index import FTS
|
|
128
204
|
table = await self.db.open_table(table_name)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
#
|
|
205
|
+
# Let list_indices' exceptions bubble — see docstring.
|
|
206
|
+
indices = await table.list_indices()
|
|
207
|
+
for idx in indices:
|
|
208
|
+
cols = getattr(idx, "columns", None) or []
|
|
209
|
+
if "text" in cols:
|
|
210
|
+
self._fts_index_known.add(table_name)
|
|
211
|
+
return
|
|
212
|
+
# Confirmed absent — create. ``replace=False`` so a concurrent
|
|
213
|
+
# creator can't race us into a double build; if that ever fires
|
|
214
|
+
# the second caller gets a clear error rather than a silent
|
|
215
|
+
# second-rebuild storm.
|
|
138
216
|
await table.create_index(
|
|
139
217
|
"text", config=FTS(base_tokenizer="whitespace", with_position=True),
|
|
140
|
-
replace=
|
|
218
|
+
replace=False,
|
|
141
219
|
)
|
|
220
|
+
self._fts_index_known.add(table_name)
|
|
142
221
|
|
|
143
222
|
# ────────── search ──────────
|
|
144
223
|
|
|
@@ -155,10 +234,9 @@ class LanceStore:
|
|
|
155
234
|
text/vector but callers usually just need card_id + relevance).
|
|
156
235
|
Empty query → vector-only; no query and no vector → empty result.
|
|
157
236
|
"""
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
return await _run_hybrid(table, query, vector, top_k, where)
|
|
237
|
+
return await self._search_with_recovery(
|
|
238
|
+
self.CARDS, query, vector, top_k, where,
|
|
239
|
+
)
|
|
162
240
|
|
|
163
241
|
async def search_rounds(
|
|
164
242
|
self,
|
|
@@ -173,10 +251,88 @@ class LanceStore:
|
|
|
173
251
|
responsible for aggregating per session, dereffing the text from
|
|
174
252
|
jsonl for display, etc.
|
|
175
253
|
"""
|
|
176
|
-
|
|
254
|
+
return await self._search_with_recovery(
|
|
255
|
+
self.ROUNDS, query, vector, top_k, where,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# ────────── EMFILE recovery (issue #4 §6.2 fix) ──────────
|
|
259
|
+
|
|
260
|
+
async def _search_with_recovery(
|
|
261
|
+
self, table_name: str, query: str,
|
|
262
|
+
vector: list[float] | None, top_k: int, where: str | None,
|
|
263
|
+
) -> list[dict]:
|
|
264
|
+
"""Run a hybrid search; on EMFILE, drive a recovery once + retry.
|
|
265
|
+
|
|
266
|
+
Recovery (compaction + connection reset) is necessary because:
|
|
267
|
+
- compaction reclaims fragments on disk → fewer files to open;
|
|
268
|
+
- the in-process LanceDB readers hold fds to files Compaction
|
|
269
|
+
unlinked → only a fresh ``connect_async`` releases those.
|
|
270
|
+
|
|
271
|
+
Retry is gated to exactly one attempt: if the post-recovery
|
|
272
|
+
query still EMFILEs the underlying fragment / fd-budget mismatch
|
|
273
|
+
is past what we can fix in-process, and the original error
|
|
274
|
+
propagates as a 500 — operator action (restart, raise ulimit)
|
|
275
|
+
is required.
|
|
276
|
+
"""
|
|
277
|
+
if not await self._exists(table_name):
|
|
177
278
|
return []
|
|
178
|
-
|
|
179
|
-
|
|
279
|
+
try:
|
|
280
|
+
table = await self.db.open_table(table_name)
|
|
281
|
+
return await _run_hybrid(table, query, vector, top_k, where)
|
|
282
|
+
except Exception as e:
|
|
283
|
+
if not _is_emfile(e):
|
|
284
|
+
raise
|
|
285
|
+
_log.warning(
|
|
286
|
+
"EMFILE on search table=%s; triggering recovery", table_name,
|
|
287
|
+
)
|
|
288
|
+
await self._recover_from_emfile()
|
|
289
|
+
# Single retry — see docstring.
|
|
290
|
+
if not await self._exists(table_name):
|
|
291
|
+
return []
|
|
292
|
+
table = await self.db.open_table(table_name)
|
|
293
|
+
return await _run_hybrid(table, query, vector, top_k, where)
|
|
294
|
+
|
|
295
|
+
async def _recover_from_emfile(self) -> None:
|
|
296
|
+
"""Compact both tables + reset the LanceDB connection.
|
|
297
|
+
|
|
298
|
+
Lock-protected so concurrent EMFILE-ing requests don't pile up
|
|
299
|
+
N recoveries. The first request through the lock does the work;
|
|
300
|
+
followers see ``emfile_recoveries`` advanced and skip — they
|
|
301
|
+
proceed straight to retry, which now sees a fresh connection.
|
|
302
|
+
"""
|
|
303
|
+
gen_before = self.emfile_recoveries
|
|
304
|
+
async with self._recovery_lock:
|
|
305
|
+
if self.emfile_recoveries > gen_before:
|
|
306
|
+
return # someone else recovered while we waited
|
|
307
|
+
# 1. Compact — best-effort; failure here doesn't block retry.
|
|
308
|
+
for table_name in (self.ROUNDS, self.CARDS):
|
|
309
|
+
try:
|
|
310
|
+
await self.optimize(table_name)
|
|
311
|
+
except Exception as e:
|
|
312
|
+
_log.exception(
|
|
313
|
+
"optimize during EMFILE recovery failed table=%s",
|
|
314
|
+
table_name,
|
|
315
|
+
)
|
|
316
|
+
self.last_recovery_error = (
|
|
317
|
+
f"optimize {table_name}: {e}"
|
|
318
|
+
)
|
|
319
|
+
# 2. Reset connection — closes the held fds. Without this,
|
|
320
|
+
# post-compaction the process is still pinned to old files.
|
|
321
|
+
try:
|
|
322
|
+
import lancedb
|
|
323
|
+
try:
|
|
324
|
+
await self.db.close()
|
|
325
|
+
except Exception:
|
|
326
|
+
pass # already closed / unsupported — best effort
|
|
327
|
+
self.db = await lancedb.connect_async(str(self.data_dir))
|
|
328
|
+
except Exception as e:
|
|
329
|
+
_log.exception("connection reset during EMFILE recovery failed")
|
|
330
|
+
self.last_recovery_error = f"reconnect: {e}"
|
|
331
|
+
raise
|
|
332
|
+
self.emfile_recoveries += 1
|
|
333
|
+
self.last_emfile_at_iso = _dt.datetime.now(_dt.UTC).isoformat(
|
|
334
|
+
timespec="seconds",
|
|
335
|
+
).replace("+00:00", "Z")
|
|
180
336
|
|
|
181
337
|
|
|
182
338
|
async def _run_hybrid(
|
|
@@ -125,6 +125,13 @@ DDL = [
|
|
|
125
125
|
query TEXT NOT NULL,
|
|
126
126
|
where_dsl TEXT,
|
|
127
127
|
top_k INTEGER NOT NULL,
|
|
128
|
+
mode TEXT NOT NULL DEFAULT 'search',
|
|
129
|
+
-- 0.8.x: 'search' or 'recall'.
|
|
130
|
+
-- Audit replay distinguishes
|
|
131
|
+
-- normal queries from the
|
|
132
|
+
-- `search --recall` debug
|
|
133
|
+
-- lens (different ranking
|
|
134
|
+
-- semantics).
|
|
128
135
|
created_at TEXT NOT NULL,
|
|
129
136
|
response_json TEXT NOT NULL
|
|
130
137
|
)
|
|
@@ -202,6 +209,17 @@ async def _additive_migrations(conn: aiosqlite.Connection) -> None:
|
|
|
202
209
|
"ALTER TABLE cards ADD COLUMN tags TEXT NOT NULL DEFAULT '{}'"
|
|
203
210
|
)
|
|
204
211
|
|
|
212
|
+
# 1f. ``mode`` column on search_log (0.8.x — `search --recall`
|
|
213
|
+
# audit needs to distinguish lens). Old rows default to
|
|
214
|
+
# 'search', which matches historical behavior.
|
|
215
|
+
async with conn.execute("PRAGMA table_info(search_log)") as cursor:
|
|
216
|
+
slog_cols = {row[1] for row in await cursor.fetchall()}
|
|
217
|
+
if slog_cols and "mode" not in slog_cols:
|
|
218
|
+
await conn.execute(
|
|
219
|
+
"ALTER TABLE search_log ADD COLUMN "
|
|
220
|
+
"mode TEXT NOT NULL DEFAULT 'search'"
|
|
221
|
+
)
|
|
222
|
+
|
|
205
223
|
# 2. If the legacy ``rounds_index`` table is around, derive
|
|
206
224
|
# last_round_id from it (max-idx round per session), then drop it.
|
|
207
225
|
async with conn.execute(
|
|
@@ -21,12 +21,13 @@ class SearchLogStore:
|
|
|
21
21
|
top_k: int,
|
|
22
22
|
created_at: str,
|
|
23
23
|
response: dict,
|
|
24
|
+
mode: str = "search",
|
|
24
25
|
) -> None:
|
|
25
26
|
await self.conn.execute(
|
|
26
27
|
"INSERT INTO search_log "
|
|
27
|
-
"(search_id, query, where_dsl, top_k, created_at, response_json) "
|
|
28
|
-
"VALUES (?, ?, ?, ?, ?, ?)",
|
|
29
|
-
(search_id, query, where_dsl, top_k, created_at,
|
|
28
|
+
"(search_id, query, where_dsl, top_k, mode, created_at, response_json) "
|
|
29
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
30
|
+
(search_id, query, where_dsl, top_k, mode, created_at,
|
|
30
31
|
json.dumps(response, ensure_ascii=False)),
|
|
31
32
|
)
|
|
32
33
|
await self.conn.commit()
|
|
@@ -11,12 +11,15 @@ class SearchRequest(BaseModel):
|
|
|
11
11
|
query: str = ""
|
|
12
12
|
where: str | None = None
|
|
13
13
|
top_k: int | None = None # falls back to settings.search.default_top_k
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
-
|
|
14
|
+
# ── 0.8.x: --recall debug lens ──────────────────────────────────
|
|
15
|
+
# When True, the search service mimics ``RecallService``:
|
|
16
|
+
# cards-only, raw RRF relevance (no ranking_formula), and (when
|
|
17
|
+
# ``recall_session_id`` is supplied) dedup against that session's
|
|
18
|
+
# recall_log. Strictly read-only — does NOT bump recall_count or
|
|
19
|
+
# write recall_log entries. Use it to tune queries against the
|
|
20
|
+
# live recall behavior without polluting state.
|
|
21
|
+
recall_mode: bool = False
|
|
22
|
+
recall_session_id: str | None = None
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class _SessionHitContext(BaseModel):
|
|
@@ -64,7 +67,11 @@ class SearchResponse(BaseModel):
|
|
|
64
67
|
search_id: str
|
|
65
68
|
query: str
|
|
66
69
|
count: int
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
+
# 0.8.x — discriminator between normal search and the --recall
|
|
71
|
+
# debug lens, so audit / programmatic consumers can tell them apart
|
|
72
|
+
# without comparing top-level body shapes.
|
|
73
|
+
mode: Literal["search", "recall"] = "search"
|
|
74
|
+
# Set only on recall-mode + session_id supplied. Lets the JSON
|
|
75
|
+
# consumer see the dedup scope used to produce these results.
|
|
76
|
+
session_id: str | None = None
|
|
70
77
|
results: list[CardResult | SessionResult] = Field(default_factory=list)
|