memorytalk 0.8.0__tar.gz → 0.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {memorytalk-0.8.0 → memorytalk-0.8.2}/PKG-INFO +1 -1
  2. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/__init__.py +27 -0
  3. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/search.py +2 -1
  4. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/sync.py +45 -0
  5. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/__init__.py +1 -1
  6. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/_format.py +1 -11
  7. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/_render.py +1 -1
  8. memorytalk-0.8.2/memorytalk/cli/search.py +76 -0
  9. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/config.py +32 -4
  10. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/lancedb.py +177 -21
  11. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/schema.py +18 -0
  12. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/search_log.py +4 -3
  13. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/search.py +16 -9
  14. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/sync.py +33 -0
  15. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/backfill.py +119 -11
  16. memorytalk-0.8.2/memorytalk/service/index_buffer.py +206 -0
  17. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/search.py +49 -66
  18. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/sessions.py +19 -5
  19. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/PKG-INFO +1 -1
  20. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/SOURCES.txt +1 -0
  21. {memorytalk-0.8.0 → memorytalk-0.8.2}/pyproject.toml +1 -1
  22. memorytalk-0.8.0/memorytalk/cli/search.py +0 -53
  23. {memorytalk-0.8.0 → memorytalk-0.8.2}/LICENSE +0 -0
  24. {memorytalk-0.8.0 → memorytalk-0.8.2}/README.md +0 -0
  25. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/__init__.py +0 -0
  26. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/__main__.py +0 -0
  27. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/__init__.py +0 -0
  28. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/base.py +0 -0
  29. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/claude_code.py +0 -0
  30. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/codex.py +0 -0
  31. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/adapters/openclaw.py +0 -0
  32. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/cards.py +0 -0
  33. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/read.py +0 -0
  34. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/recall.py +0 -0
  35. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/reviews.py +0 -0
  36. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/sessions.py +0 -0
  37. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/api/status.py +0 -0
  38. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/_http.py +0 -0
  39. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/card.py +0 -0
  40. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/read.py +0 -0
  41. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/recall.py +0 -0
  42. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/review.py +0 -0
  43. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/server.py +0 -0
  44. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/session.py +0 -0
  45. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/setup.py +0 -0
  46. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/sync.py +0 -0
  47. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/cli/upgrade.py +0 -0
  48. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/__init__.py +0 -0
  49. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/embedding.py +0 -0
  50. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/provider/storage.py +0 -0
  51. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/__init__.py +0 -0
  52. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/cards.py +0 -0
  53. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/recall.py +0 -0
  54. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/reviews.py +0 -0
  55. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/sessions.py +0 -0
  56. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/store.py +0 -0
  57. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/repository/sync_checkpoint.py +0 -0
  58. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/__init__.py +0 -0
  59. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/card.py +0 -0
  60. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/cards.py +0 -0
  61. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/read.py +0 -0
  62. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/recall.py +0 -0
  63. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/review.py +0 -0
  64. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/reviews.py +0 -0
  65. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/session.py +0 -0
  66. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/schemas/status.py +0 -0
  67. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/server.py +0 -0
  68. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/__init__.py +0 -0
  69. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/cards.py +0 -0
  70. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/events.py +0 -0
  71. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/read.py +0 -0
  72. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/recall.py +0 -0
  73. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/reviews.py +0 -0
  74. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/service/sync.py +0 -0
  75. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/__init__.py +0 -0
  76. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/console.py +0 -0
  77. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/dsl.py +0 -0
  78. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/env_template.py +0 -0
  79. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/formula.py +0 -0
  80. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/highlight.py +0 -0
  81. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/ids.py +0 -0
  82. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/indexes.py +0 -0
  83. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/settings_io.py +0 -0
  84. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/tag_filter.py +0 -0
  85. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk/util/tags.py +0 -0
  86. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/dependency_links.txt +0 -0
  87. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/entry_points.txt +0 -0
  88. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/requires.txt +0 -0
  89. {memorytalk-0.8.0 → memorytalk-0.8.2}/memorytalk.egg-info/top_level.txt +0 -0
  90. {memorytalk-0.8.0 → memorytalk-0.8.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: memorytalk
3
- Version: 0.8.0
3
+ Version: 0.8.2
4
4
  Summary: Persistent cross-session memory for AI agents — Talk-Card architecture with forum-dynamics sinking/floating (v3)
5
5
  License-Expression: Apache-2.0
6
6
  Requires-Python: >=3.10
@@ -32,6 +32,7 @@ from memorytalk.service import (
32
32
  RecallService, ReviewService,
33
33
  )
34
34
  from memorytalk.service.backfill import IndexBackfill
35
+ from memorytalk.service.index_buffer import IndexWriteBuffer
35
36
  from memorytalk.service.search import SearchService
36
37
  from memorytalk.service.sync import SyncWatcher
37
38
 
@@ -71,9 +72,22 @@ def create_app(config: Config | None = None) -> FastAPI:
71
72
  app.state.vectors = vectors
72
73
  app.state.embedder = embedder
73
74
  app.state.events = events
75
+ # IndexWriteBuffer aggregates LanceDB inserts across sessions so
76
+ # one ``table.add()`` carries many embedder batches' worth of
77
+ # rows. Without it the ingest path creates one fragment per
78
+ # embedder batch (10 with DashScope) → vector search eventually
79
+ # EMFILEs on fd ceiling. See service/index_buffer.py and
80
+ # docs/issue #4 §4.3.
81
+ app.state.index_buffer = IndexWriteBuffer(
82
+ vectors=vectors, db=db,
83
+ flush_rows=config.settings.index.lance_flush_rows,
84
+ flush_interval_seconds=config.settings.index.lance_flush_interval_seconds,
85
+ )
86
+ app.state.index_buffer.start()
74
87
  app.state.read = ReadService(db=db, events=events)
75
88
  app.state.ingest = IngestService(
76
89
  db=db, vectors=vectors, embedder=embedder, events=events,
90
+ index_buffer=app.state.index_buffer,
77
91
  )
78
92
  app.state.sync_checkpoints = sync_checkpoints
79
93
  app.state.sync = SyncWatcher(
@@ -108,8 +122,15 @@ def create_app(config: Config | None = None) -> FastAPI:
108
122
  # lifespan shutdown.
109
123
  app.state.backfill = IndexBackfill(
110
124
  db=db, vectors=vectors, embedder=embedder,
125
+ index_buffer=app.state.index_buffer,
111
126
  )
112
127
  app.state.backfill.start()
128
+ # Guaranteed one-shot compaction on every boot — grinds down the
129
+ # append-only fragment pile (cause of EMFILE in vector search)
130
+ # so a restart always makes progress. Side path off the re-embed
131
+ # loop: gated only on vectors, runs in the background, never
132
+ # blocks startup. See IndexBackfill.trigger_startup_compaction.
133
+ app.state.backfill.trigger_startup_compaction()
113
134
 
114
135
  yield
115
136
 
@@ -123,6 +144,12 @@ def create_app(config: Config | None = None) -> FastAPI:
123
144
  await app.state.backfill.stop()
124
145
  except Exception:
125
146
  pass
147
+ # Drain in-flight LanceDB writes before tearing down the DB
148
+ # — otherwise pending vectors are lost on shutdown.
149
+ try:
150
+ await app.state.index_buffer.stop()
151
+ except Exception:
152
+ pass
126
153
  await db.close()
127
154
  await sync_checkpoints.close()
128
155
 
@@ -21,7 +21,8 @@ async def post_search(payload: SearchRequest, request: Request) -> SearchRespons
21
21
  query=payload.query or "",
22
22
  where=payload.where,
23
23
  top_k=payload.top_k,
24
- show_all=payload.show_all,
24
+ recall_mode=payload.recall_mode,
25
+ recall_session_id=payload.recall_session_id,
25
26
  )
26
27
  except DSLError as e:
27
28
  raise HTTPException(status_code=400, detail=str(e))
@@ -12,6 +12,50 @@ from fastapi import APIRouter, Query, Request
12
12
  router = APIRouter()
13
13
 
14
14
 
15
+ def _gather_lance_health(state) -> dict:
16
+ """Collect LanceDB-layer observability for ``index.lance``.
17
+
18
+ Pulls from three sources: the IndexWriteBuffer (write pipeline),
19
+ IndexBackfill (compaction cadence), and LanceStore (EMFILE
20
+ recovery count). All fields default to safe zeros / None when the
21
+ corresponding component is absent so a partially-disabled boot
22
+ still returns a well-shaped response.
23
+ """
24
+ buf = getattr(state, "index_buffer", None)
25
+ backfill = getattr(state, "backfill", None)
26
+ vectors = getattr(state, "vectors", None)
27
+
28
+ soft = hard = None
29
+ try:
30
+ import resource
31
+ soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
32
+ except (ImportError, OSError):
33
+ # Windows / sandboxed envs — leave None so the field's
34
+ # absence is the signal.
35
+ pass
36
+
37
+ return {
38
+ "pending_vector_rows": (buf.pending_rows if buf is not None else 0),
39
+ "last_flush_at": (buf.last_flush_at_iso if buf is not None else None),
40
+ "last_flush_error": (buf.last_flush_error if buf is not None else None),
41
+ "flush_count_since_boot": (buf.flush_count if buf is not None else 0),
42
+ "last_compaction_at": (
43
+ backfill.last_compact_at_iso if backfill is not None else None
44
+ ),
45
+ "last_compaction_error": (
46
+ backfill.last_compact_error if backfill is not None else None
47
+ ),
48
+ "emfile_recoveries_since_boot": (
49
+ vectors.emfile_recoveries if vectors is not None else 0
50
+ ),
51
+ "last_emfile_at": (
52
+ vectors.last_emfile_at_iso if vectors is not None else None
53
+ ),
54
+ "fd_soft_limit": soft,
55
+ "fd_hard_limit": hard,
56
+ }
57
+
58
+
15
59
  @router.get("/sync/status")
16
60
  async def get_sync_status(request: Request, limit: int = Query(5, ge=0, le=20)):
17
61
  config = request.app.state.config
@@ -31,6 +75,7 @@ async def get_sync_status(request: Request, limit: int = Query(5, ge=0, le=20)):
31
75
  index["last_index_error"] = (
32
76
  backfill.last_error if backfill is not None else None
33
77
  )
78
+ index["lance"] = _gather_lance_health(request.app.state)
34
79
 
35
80
  if not config.settings.sync.enabled:
36
81
  return {"status": "disabled", "index": index}
@@ -15,7 +15,7 @@ import click
15
15
  @click.option(
16
16
  "--no-pager", "no_pager", is_flag=True, default=False,
17
17
  help="Disable the scrollable pager (only applies to commands that "
18
- "opt-in — currently just `read`). Equivalent to NO_PAGER=1.",
18
+ "opt-in — currently `read` and `search`). Equivalent to NO_PAGER=1.",
19
19
  )
20
20
  def main(no_pager: bool) -> None:
21
21
  """memory.talk v3."""
@@ -354,15 +354,12 @@ def fmt_recall(payload: dict) -> str:
354
354
  def fmt_search(payload: dict) -> str:
355
355
  query = payload.get("query") or ""
356
356
  count = payload.get("count", 0)
357
- hidden = int(payload.get("hidden_count") or 0)
358
357
  sid = payload.get("search_id", "")
359
358
  header = f"`search_id={sid}` · {count} results"
360
- if hidden:
361
- header += f" · {hidden} hidden"
362
359
  parts: list[str] = [f"# search: {query}" if query else "# search",
363
360
  "", header, ""]
364
361
 
365
- if count == 0 and hidden == 0:
362
+ if count == 0:
366
363
  return "\n".join(parts) + "\n"
367
364
 
368
365
  for entry in payload.get("results") or []:
@@ -374,13 +371,6 @@ def fmt_search(payload: dict) -> str:
374
371
  parts.append(_fmt_search_session(entry))
375
372
  parts.append("")
376
373
 
377
- if hidden:
378
- parts.append(
379
- f"_({hidden} weak result{'s' if hidden != 1 else ''} hidden "
380
- "by strong-floor filter — pass `--all` to see)_"
381
- )
382
- parts.append("")
383
-
384
374
  return "\n".join(parts).rstrip() + "\n"
385
375
 
386
376
 
@@ -12,7 +12,7 @@ Errors:
12
12
  - Markdown mode → ``**error:** <msg>`` to stderr, exit 1
13
13
  - JSON mode → ``{"error": ...}`` to stdout, exit 1
14
14
 
15
- Pager (opt-in per command, currently only ``read``):
15
+ Pager (opt-in per command currently ``read`` and ``search``):
16
16
 
17
17
  - ``emit_md_paged`` wraps rich rendering in a less-style pager when
18
18
  both stdin and stdout are TTYs. Subprocess / pipe / ``--json`` paths
@@ -0,0 +1,76 @@
1
+ """CLI: search <query> [--where DSL] [--top-k N] [--json]."""
2
+ from __future__ import annotations
3
+ import sys
4
+
5
+ import click
6
+
7
+ from memorytalk.cli._format import fmt_error, fmt_search
8
+ from memorytalk.cli._http import ApiError, api, extract_error_message
9
+ from memorytalk.cli._render import (
10
+ emit_json, emit_json_err, emit_md_err, emit_md_paged,
11
+ )
12
+ from memorytalk.config import Config
13
+
14
+
15
+ @click.command("search")
16
+ @click.argument("query", required=False, default="")
17
+ @click.option("--where", "-w", "where", type=str, default=None,
18
+ help="DSL filter (see docs/cli/v3/search.md#DSL)")
19
+ @click.option("--top-k", "top_k", type=int, default=None,
20
+ help="Total result cap (default = settings.search.default_top_k)")
21
+ @click.option("--recall", "recall_mode", is_flag=True, default=False,
22
+ help="Debug lens: rank like `recall` (cards-only, raw RRF, "
23
+ "no ranking_formula). Combine with --session to also "
24
+ "preview that session's recall_log dedup. Read-only — "
25
+ "does NOT bump recall_count or write recall_log.")
26
+ @click.option("--session", "session_id", type=str, default=None,
27
+ help="Session id for recall-mode dedup (only meaningful "
28
+ "with --recall).")
29
+ @click.option("--json", "json_out", is_flag=True, default=False, help="Emit JSON")
30
+ def search(
31
+ query: str, where: str | None, top_k: int | None,
32
+ recall_mode: bool, session_id: str | None, json_out: bool,
33
+ ) -> None:
34
+ """Hybrid FTS + vector search across cards and sessions."""
35
+ cfg = Config()
36
+ body: dict = {"query": query or ""}
37
+ if where:
38
+ body["where"] = where
39
+ if top_k is not None:
40
+ body["top_k"] = top_k
41
+ if recall_mode:
42
+ body["recall_mode"] = True
43
+ if session_id:
44
+ if not recall_mode:
45
+ # --session without --recall is a probable mistake; flag it
46
+ # rather than silently dropping the field.
47
+ emit_md_err(fmt_error(
48
+ "--session only takes effect with --recall (it scopes "
49
+ "the recall-log dedup preview)"
50
+ ))
51
+ sys.exit(1)
52
+ body["recall_session_id"] = session_id
53
+ try:
54
+ result = api("POST", "/v3/search", cfg, json_body=body)
55
+ except ApiError as e:
56
+ if json_out:
57
+ emit_json_err(e.payload)
58
+ else:
59
+ emit_md_err(fmt_error(extract_error_message(e.payload)))
60
+ sys.exit(1)
61
+ except Exception as e:
62
+ if json_out:
63
+ emit_json_err(str(e))
64
+ else:
65
+ emit_md_err(fmt_error(f"cannot reach server: {e}"))
66
+ sys.exit(1)
67
+
68
+ if json_out:
69
+ emit_json(result)
70
+ else:
71
+ # Long result blocks (cards + per-session hit fences + ctx
72
+ # windows) routinely exceed a terminal page; route through the
73
+ # same less-style pager that ``read`` uses. Subprocess / pipe /
74
+ # ``--no-pager`` / ``--json`` fall back to plain output — see
75
+ # emit_md_paged docstring.
76
+ emit_md_paged(fmt_search(result))
@@ -16,10 +16,17 @@ from pathlib import Path
16
16
  from pydantic import BaseModel, ConfigDict
17
17
 
18
18
 
19
- _DEFAULT_RANKING_FORMULA = (
20
- "relevance + 0.1 * (review_up - review_down) "
21
- "+ 0.02 * log(read_count + 1) - 0.005 * age_days"
22
- )
19
+ _DEFAULT_RANKING_FORMULA = "relevance"
20
+ # As of 0.8.x: explicit search defaults to pure relevance (raw RRF
21
+ # score from LanceDB hybrid recall). Earlier defaults mixed in forum-
22
+ # dynamics signals (review_up - review_down + log(read_count+1) - age),
23
+ # which made identifier-style queries like `vvp-ai` unreliable — the
24
+ # strongest text match could rank below weakly-matched high-read
25
+ # cards. The forum-stats counters are still maintained on every card
26
+ # and remain queryable via ``--where 'DSL'`` (filter); the *ranking*
27
+ # is just left as relevance. Users who want forum dynamics back can
28
+ # set ``settings.search.ranking_formula`` to a richer expression like
29
+ # the old default. See docs/cli/v3/search.md.
23
30
 
24
31
 
25
32
  class ConfigValidationError(RuntimeError):
@@ -101,6 +108,26 @@ class ExploreConfig(BaseModel):
101
108
  auto_default_limit: int = 5
102
109
 
103
110
 
111
+ class IndexConfig(BaseModel):
112
+ """Vector index write tuning (0.8.x — issue #4 §4.3 fix).
113
+
114
+ Decouples LanceDB ``table.add()`` batch size from the embedder's
115
+ per-request cap. Embedding still batches small (API limit); these
116
+ knobs control how the embedded rows aggregate before they hit
117
+ LanceDB, which directly drives fragment count and downstream fd
118
+ pressure on search.
119
+ """
120
+ # Row count that triggers a synchronous flush. 500 is a balance
121
+ # between fragment-count savings (50× fewer fragments than the
122
+ # naive embedder-batch-sized writes at DashScope's 10-cap) and
123
+ # search-visibility latency for newly-ingested rounds.
124
+ lance_flush_rows: int = 500
125
+ # Wall-clock interval for the background flusher — catches the
126
+ # last partial batch when ingest is bursty then idle. 0 disables
127
+ # the background tick (tests use this).
128
+ lance_flush_interval_seconds: float = 30.0
129
+
130
+
104
131
  class Settings(BaseModel):
105
132
  server: ServerConfig = ServerConfig()
106
133
  vector: ProviderConfig = ProviderConfig(provider="lancedb")
@@ -110,6 +137,7 @@ class Settings(BaseModel):
110
137
  recall: RecallConfig = RecallConfig()
111
138
  sync: SyncConfig = SyncConfig()
112
139
  explore: ExploreConfig = ExploreConfig()
140
+ index: IndexConfig = IndexConfig()
113
141
 
114
142
 
115
143
  def _default_data_root() -> Path:
@@ -11,12 +11,28 @@ in search results come from the ``cards`` table. SQLite holds zero search
11
11
  state; jsonl files hold zero search state.
12
12
  """
13
13
  from __future__ import annotations
14
+ import asyncio
15
+ import datetime as _dt
16
+ import logging
14
17
  from pathlib import Path
15
18
  from typing import Optional
16
19
 
17
20
  import pyarrow as pa
18
21
 
19
22
 
23
+ _log = logging.getLogger("memorytalk.lancedb")
24
+
25
+
26
+ def _is_emfile(exc: BaseException) -> bool:
27
+ """Recognize Lance's wrapped EMFILE — comes through as a
28
+ ``RuntimeError`` whose ``str()`` contains "Too many open files".
29
+ We can't match on errno because Lance wraps the OS error inside
30
+ its own ``LanceError(IO)`` before raising. String match is fragile
31
+ but it's the only signal Lance gives us on this path."""
32
+ msg = str(exc)
33
+ return "Too many open files" in msg or "(os error 24)" in msg
34
+
35
+
20
36
  def _segment(text: str) -> str:
21
37
  """jieba 预分词,空格连接(jieba.cut 同步,亚毫秒级)。"""
22
38
  import jieba
@@ -38,6 +54,16 @@ class LanceStore:
38
54
  self.db = db
39
55
  self.data_dir = data_dir
40
56
  self.dim = dim
57
+ # Per-table "FTS index confirmed present" memo. Avoids a
58
+ # ``list_indices()`` round trip on every search call once we've
59
+ # verified the index exists. Invalidated only on process restart
60
+ # (we don't drop FTS indices at runtime).
61
+ self._fts_index_known: set[str] = set()
62
+ # EMFILE recovery state — see _recover_from_emfile / _search_with_recovery.
63
+ self._recovery_lock = asyncio.Lock()
64
+ self.emfile_recoveries: int = 0
65
+ self.last_emfile_at_iso: str | None = None
66
+ self.last_recovery_error: str | None = None
41
67
  self._cards_schema = pa.schema([
42
68
  pa.field("card_id", pa.string()),
43
69
  pa.field("text", pa.string()),
@@ -112,33 +138,86 @@ class LanceStore:
112
138
  table = await self.db.open_table(self.ROUNDS)
113
139
  await table.delete(f"session_id = '{session_id}'")
114
140
 
141
+ # ────────── compaction ──────────
142
+
143
+ async def optimize(self, table_name: str) -> dict:
144
+ """Compact small fragments + prune old dataset versions.
145
+
146
+ Why this is load-bearing: the ingest / backfill path is
147
+ append-only — every embedder batch is one ``table.add`` →
148
+ one new fragment + one new dataset version (manifest + txn
149
+ file). Left unchecked these accumulate without bound (tens of
150
+ thousands of files in production). Search has **no vector ANN
151
+ index** (the only index we build is FTS), so vector queries
152
+ flat-scan every fragment, opening every fragment's files at
153
+ once — past a few thousand fragments this blows the process
154
+ file-descriptor ceiling (EMFILE / "Too many open files").
155
+
156
+ ``optimize`` is LanceDB's VACUUM: merge fragments, fold new
157
+ data into indices, and prune old versions. We pass
158
+ ``cleanup_older_than=timedelta(0)`` so **every version except
159
+ the latest is removed** — that's what actually reclaims the
160
+ manifest/txn file explosion (plain compaction merges data but
161
+ leaves the old versions' files around until pruned). Trade-off:
162
+ dataset time-travel history is discarded; v3 doesn't use it.
163
+
164
+ ``delete_unverified`` stays at its safe default (False) so a
165
+ concurrent ingest / backfill write in flight can't be corrupted.
166
+
167
+ No-op (returns ``skipped``) when the table doesn't exist yet.
168
+ """
169
+ import datetime as _dt
170
+
171
+ if not await self._exists(table_name):
172
+ return {"table": table_name, "skipped": "missing"}
173
+ table = await self.db.open_table(table_name)
174
+ stats = await table.optimize(cleanup_older_than=_dt.timedelta(0))
175
+ # OptimizeStats shape drifts across lancedb versions; don't
176
+ # hard-depend on field names — stringify for the caller's log.
177
+ return {"table": table_name, "stats": str(stats)}
178
+
115
179
  # ────────── FTS index maintenance ──────────
116
180
 
117
181
  async def ensure_fts_index(self, table_name: str) -> None:
118
182
  """Create the FTS index on the ``text`` column if absent.
119
183
 
120
- LanceDB's hybrid search needs an FTS index on the text column.
121
- Calling this once before queries is enough (the index is shared
122
- across queries; LanceDB picks up new rows automatically). Cheap
123
- no-op when the index already exists.
184
+ Idempotent + memoized: once we've confirmed an FTS index covers
185
+ ``text`` for a given table in this process, future calls are
186
+ free. We don't drop indices at runtime, so the memo can't go
187
+ stale within a single process lifetime — invalidation = restart.
188
+
189
+ **Exception handling note (issue #4 §4.2 fix):** earlier this
190
+ function swallowed any error from ``list_indices()`` and
191
+ fell through to ``create_index(..., replace=True)``. Under
192
+ EMFILE the swallowed list call was followed by a fresh index
193
+ build, *adding* pressure exactly when the process was already
194
+ over its fd quota. Now: a successful ``list_indices()`` that
195
+ returns no ``text`` index is the only signal to create; any
196
+ IO exception from ``list_indices()`` propagates so the upstream
197
+ EMFILE recovery path can take over instead of compounding.
124
198
  """
199
+ if table_name in self._fts_index_known:
200
+ return
125
201
  if not await self._exists(table_name):
126
202
  return
127
203
  from lancedb.index import FTS
128
204
  table = await self.db.open_table(table_name)
129
- try:
130
- indices = await table.list_indices()
131
- for idx in indices:
132
- cols = getattr(idx, "columns", None) or []
133
- if "text" in cols:
134
- return # already indexed
135
- except Exception:
136
- pass # treat as "no index" and create one
137
- # whitespace tokenizer because ingest already segments via jieba.
205
+ # Let list_indices' exceptions bubble — see docstring.
206
+ indices = await table.list_indices()
207
+ for idx in indices:
208
+ cols = getattr(idx, "columns", None) or []
209
+ if "text" in cols:
210
+ self._fts_index_known.add(table_name)
211
+ return
212
+ # Confirmed absent create. ``replace=False`` so a concurrent
213
+ # creator can't race us into a double build; if that ever fires
214
+ # the second caller gets a clear error rather than a silent
215
+ # second-rebuild storm.
138
216
  await table.create_index(
139
217
  "text", config=FTS(base_tokenizer="whitespace", with_position=True),
140
- replace=True,
218
+ replace=False,
141
219
  )
220
+ self._fts_index_known.add(table_name)
142
221
 
143
222
  # ────────── search ──────────
144
223
 
@@ -155,10 +234,9 @@ class LanceStore:
155
234
  text/vector but callers usually just need card_id + relevance).
156
235
  Empty query → vector-only; no query and no vector → empty result.
157
236
  """
158
- if not await self._exists(self.CARDS):
159
- return []
160
- table = await self.db.open_table(self.CARDS)
161
- return await _run_hybrid(table, query, vector, top_k, where)
237
+ return await self._search_with_recovery(
238
+ self.CARDS, query, vector, top_k, where,
239
+ )
162
240
 
163
241
  async def search_rounds(
164
242
  self,
@@ -173,10 +251,88 @@ class LanceStore:
173
251
  responsible for aggregating per session, dereffing the text from
174
252
  jsonl for display, etc.
175
253
  """
176
- if not await self._exists(self.ROUNDS):
254
+ return await self._search_with_recovery(
255
+ self.ROUNDS, query, vector, top_k, where,
256
+ )
257
+
258
+ # ────────── EMFILE recovery (issue #4 §6.2 fix) ──────────
259
+
260
+ async def _search_with_recovery(
261
+ self, table_name: str, query: str,
262
+ vector: list[float] | None, top_k: int, where: str | None,
263
+ ) -> list[dict]:
264
+ """Run a hybrid search; on EMFILE, drive a recovery once + retry.
265
+
266
+ Recovery (compaction + connection reset) is necessary because:
267
+ - compaction reclaims fragments on disk → fewer files to open;
268
+ - the in-process LanceDB readers hold fds to files Compaction
269
+ unlinked → only a fresh ``connect_async`` releases those.
270
+
271
+ Retry is gated to exactly one attempt: if the post-recovery
272
+ query still EMFILEs the underlying fragment / fd-budget mismatch
273
+ is past what we can fix in-process, and the original error
274
+ propagates as a 500 — operator action (restart, raise ulimit)
275
+ is required.
276
+ """
277
+ if not await self._exists(table_name):
177
278
  return []
178
- table = await self.db.open_table(self.ROUNDS)
179
- return await _run_hybrid(table, query, vector, top_k, where)
279
+ try:
280
+ table = await self.db.open_table(table_name)
281
+ return await _run_hybrid(table, query, vector, top_k, where)
282
+ except Exception as e:
283
+ if not _is_emfile(e):
284
+ raise
285
+ _log.warning(
286
+ "EMFILE on search table=%s; triggering recovery", table_name,
287
+ )
288
+ await self._recover_from_emfile()
289
+ # Single retry — see docstring.
290
+ if not await self._exists(table_name):
291
+ return []
292
+ table = await self.db.open_table(table_name)
293
+ return await _run_hybrid(table, query, vector, top_k, where)
294
+
295
+ async def _recover_from_emfile(self) -> None:
296
+ """Compact both tables + reset the LanceDB connection.
297
+
298
+ Lock-protected so concurrent EMFILE-ing requests don't pile up
299
+ N recoveries. The first request through the lock does the work;
300
+ followers see ``emfile_recoveries`` advanced and skip — they
301
+ proceed straight to retry, which now sees a fresh connection.
302
+ """
303
+ gen_before = self.emfile_recoveries
304
+ async with self._recovery_lock:
305
+ if self.emfile_recoveries > gen_before:
306
+ return # someone else recovered while we waited
307
+ # 1. Compact — best-effort; failure here doesn't block retry.
308
+ for table_name in (self.ROUNDS, self.CARDS):
309
+ try:
310
+ await self.optimize(table_name)
311
+ except Exception as e:
312
+ _log.exception(
313
+ "optimize during EMFILE recovery failed table=%s",
314
+ table_name,
315
+ )
316
+ self.last_recovery_error = (
317
+ f"optimize {table_name}: {e}"
318
+ )
319
+ # 2. Reset connection — closes the held fds. Without this,
320
+ # post-compaction the process is still pinned to old files.
321
+ try:
322
+ import lancedb
323
+ try:
324
+ await self.db.close()
325
+ except Exception:
326
+ pass # already closed / unsupported — best effort
327
+ self.db = await lancedb.connect_async(str(self.data_dir))
328
+ except Exception as e:
329
+ _log.exception("connection reset during EMFILE recovery failed")
330
+ self.last_recovery_error = f"reconnect: {e}"
331
+ raise
332
+ self.emfile_recoveries += 1
333
+ self.last_emfile_at_iso = _dt.datetime.now(_dt.UTC).isoformat(
334
+ timespec="seconds",
335
+ ).replace("+00:00", "Z")
180
336
 
181
337
 
182
338
  async def _run_hybrid(
@@ -125,6 +125,13 @@ DDL = [
125
125
  query TEXT NOT NULL,
126
126
  where_dsl TEXT,
127
127
  top_k INTEGER NOT NULL,
128
+ mode TEXT NOT NULL DEFAULT 'search',
129
+ -- 0.8.x: 'search' or 'recall'.
130
+ -- Audit replay distinguishes
131
+ -- normal queries from the
132
+ -- `search --recall` debug
133
+ -- lens (different ranking
134
+ -- semantics).
128
135
  created_at TEXT NOT NULL,
129
136
  response_json TEXT NOT NULL
130
137
  )
@@ -202,6 +209,17 @@ async def _additive_migrations(conn: aiosqlite.Connection) -> None:
202
209
  "ALTER TABLE cards ADD COLUMN tags TEXT NOT NULL DEFAULT '{}'"
203
210
  )
204
211
 
212
+ # 1f. ``mode`` column on search_log (0.8.x — `search --recall`
213
+ # audit needs to distinguish lens). Old rows default to
214
+ # 'search', which matches historical behavior.
215
+ async with conn.execute("PRAGMA table_info(search_log)") as cursor:
216
+ slog_cols = {row[1] for row in await cursor.fetchall()}
217
+ if slog_cols and "mode" not in slog_cols:
218
+ await conn.execute(
219
+ "ALTER TABLE search_log ADD COLUMN "
220
+ "mode TEXT NOT NULL DEFAULT 'search'"
221
+ )
222
+
205
223
  # 2. If the legacy ``rounds_index`` table is around, derive
206
224
  # last_round_id from it (max-idx round per session), then drop it.
207
225
  async with conn.execute(
@@ -21,12 +21,13 @@ class SearchLogStore:
21
21
  top_k: int,
22
22
  created_at: str,
23
23
  response: dict,
24
+ mode: str = "search",
24
25
  ) -> None:
25
26
  await self.conn.execute(
26
27
  "INSERT INTO search_log "
27
- "(search_id, query, where_dsl, top_k, created_at, response_json) "
28
- "VALUES (?, ?, ?, ?, ?, ?)",
29
- (search_id, query, where_dsl, top_k, created_at,
28
+ "(search_id, query, where_dsl, top_k, mode, created_at, response_json) "
29
+ "VALUES (?, ?, ?, ?, ?, ?, ?)",
30
+ (search_id, query, where_dsl, top_k, mode, created_at,
30
31
  json.dumps(response, ensure_ascii=False)),
31
32
  )
32
33
  await self.conn.commit()
@@ -11,12 +11,15 @@ class SearchRequest(BaseModel):
11
11
  query: str = ""
12
12
  where: str | None = None
13
13
  top_k: int | None = None # falls back to settings.search.default_top_k
14
- # When false (default), apply per-type "strong-floor" filter: if any
15
- # result of that type clears the floor, hide everything below it; if
16
- # nothing clears the floor, return the whole bucket. To inspect what
17
- # was filtered, re-issue with show_all=true. See service/search.py
18
- # for the hardcoded floors (session 0.02, card 0.1) and rationale.
19
- show_all: bool = False
14
+ # ── 0.8.x: --recall debug lens ──────────────────────────────────
15
+ # When True, the search service mimics ``RecallService``:
16
+ # cards-only, raw RRF relevance (no ranking_formula), and (when
17
+ # ``recall_session_id`` is supplied) dedup against that session's
18
+ # recall_log. Strictly read-only does NOT bump recall_count or
19
+ # write recall_log entries. Use it to tune queries against the
20
+ # live recall behavior without polluting state.
21
+ recall_mode: bool = False
22
+ recall_session_id: str | None = None
20
23
 
21
24
 
22
25
  class _SessionHitContext(BaseModel):
@@ -64,7 +67,11 @@ class SearchResponse(BaseModel):
64
67
  search_id: str
65
68
  query: str
66
69
  count: int
67
- # Number of results filtered out by the strong-floor rule (sum across
68
- # both types). 0 when ``show_all=true`` or when nothing was filtered.
69
- hidden_count: int = 0
70
+ # 0.8.x discriminator between normal search and the --recall
71
+ # debug lens, so audit / programmatic consumers can tell them apart
72
+ # without comparing top-level body shapes.
73
+ mode: Literal["search", "recall"] = "search"
74
+ # Set only on recall-mode + session_id supplied. Lets the JSON
75
+ # consumer see the dedup scope used to produce these results.
76
+ session_id: str | None = None
70
77
  results: list[CardResult | SessionResult] = Field(default_factory=list)