know-cli 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {know_cli-0.4.1 → know_cli-0.4.2}/PKG-INFO +1 -1
  2. know_cli-0.4.2/docs/plans/2026-02-16-fix-testing-issues-search-graph-performance-plan.md +100 -0
  3. {know_cli-0.4.1 → know_cli-0.4.2}/pyproject.toml +1 -1
  4. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/__init__.py +1 -1
  5. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/search.py +66 -3
  6. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/daemon.py +4 -4
  7. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/daemon_db.py +37 -16
  8. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/import_graph.py +1 -2
  9. {know_cli-0.4.1 → know_cli-0.4.2}/.github/actions/know-cli/action.yml +0 -0
  10. {know_cli-0.4.1 → know_cli-0.4.2}/.github/workflows/example-advanced.yml +0 -0
  11. {know_cli-0.4.1 → know_cli-0.4.2}/.github/workflows/example-basic.yml +0 -0
  12. {know_cli-0.4.1 → know_cli-0.4.2}/.gitignore +0 -0
  13. {know_cli-0.4.1 → know_cli-0.4.2}/AGENTS.md +0 -0
  14. {know_cli-0.4.1 → know_cli-0.4.2}/KNOW_SKILL.md +0 -0
  15. {know_cli-0.4.1 → know_cli-0.4.2}/README.md +0 -0
  16. {know_cli-0.4.1 → know_cli-0.4.2}/docs/arc.md +0 -0
  17. {know_cli-0.4.1 → know_cli-0.4.2}/docs/architecture-diff.md +0 -0
  18. {know_cli-0.4.1 → know_cli-0.4.2}/docs/architecture.md +0 -0
  19. {know_cli-0.4.1 → know_cli-0.4.2}/docs/dependencies.md +0 -0
  20. {know_cli-0.4.1 → know_cli-0.4.2}/docs/digest-compact.md +0 -0
  21. {know_cli-0.4.1 → know_cli-0.4.2}/docs/digest-llm.md +0 -0
  22. {know_cli-0.4.1 → know_cli-0.4.2}/docs/onboarding-new-devs.md +0 -0
  23. {know_cli-0.4.1 → know_cli-0.4.2}/docs/onboarding-new_devs.md +0 -0
  24. {know_cli-0.4.1 → know_cli-0.4.2}/docs/plans/2026-02-15-refactor-know-cli-v2-daemon-architecture-plan.md +0 -0
  25. {know_cli-0.4.1 → know_cli-0.4.2}/docs/solutions/architecture/p2-p3-architectural-improvements.md +0 -0
  26. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/IMPROVEMENTS.md +0 -0
  27. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/ai.py +0 -0
  28. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/__init__.py +0 -0
  29. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/agent.py +0 -0
  30. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/core.py +0 -0
  31. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/diff.py +0 -0
  32. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/hooks.py +0 -0
  33. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/knowledge.py +0 -0
  34. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/mcp.py +0 -0
  35. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/cli/stats.py +0 -0
  36. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/config.py +0 -0
  37. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/context_engine.py +0 -0
  38. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/diff.py +0 -0
  39. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/embeddings.py +0 -0
  40. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/exceptions.py +0 -0
  41. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/generator.py +0 -0
  42. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/git_hooks.py +0 -0
  43. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/index.py +0 -0
  44. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/knowledge_base.py +0 -0
  45. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/logger.py +0 -0
  46. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/mcp_server.py +0 -0
  47. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/models.py +0 -0
  48. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/parsers.py +0 -0
  49. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/quality.py +0 -0
  50. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/scanner.py +0 -0
  51. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/semantic_search.py +0 -0
  52. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/stats.py +0 -0
  53. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/token_counter.py +0 -0
  54. {know_cli-0.4.1 → know_cli-0.4.2}/src/know/watcher.py +0 -0
  55. {know_cli-0.4.1 → know_cli-0.4.2}/tests/README.md +0 -0
  56. {know_cli-0.4.1 → know_cli-0.4.2}/tests/conftest.py +0 -0
  57. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_efficiency.py +0 -0
  58. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_know.py +0 -0
  59. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_token_features.py +0 -0
  60. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_unit.py +0 -0
  61. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_v2_features.py +0 -0
  62. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_week2.py +0 -0
  63. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_week3.py +0 -0
  64. {know_cli-0.4.1 → know_cli-0.4.2}/tests/test_week4.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: know-cli
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Context Intelligence for AI Coding Agents — smart, token-budgeted code context
5
5
  Project-URL: Homepage, https://github.com/sushilk1991/know-cli
6
6
  Project-URL: Repository, https://github.com/sushilk1991/know-cli
@@ -0,0 +1,100 @@
1
+ ---
2
+ title: "fix: Search fallback, empty import graph, and slow daemon startup"
3
+ type: fix
4
+ date: 2026-02-16
5
+ ---
6
+
7
+ # fix: Search fallback, empty import graph, and slow daemon startup
8
+
9
+ ## Overview
10
+
11
+ Four issues discovered when testing know-cli v0.4.1 in a real project plus two cross-cutting concerns identified by 4-reviewer analysis.
12
+
13
+ ## Problem Statement
14
+
15
+ 1. **`know search` crashes without numpy/fastembed** — crashes with `No module named 'numpy'` instead of falling back to BM25.
16
+ 2. **`know next-file` returns "No more relevant files found"** — BM25 exact-phrase quoting (`daemon_db.py:212`) can't match semantic queries.
17
+ 3. **`know related src/know/daemon.py` returns empty** — import graph only stores modules with outgoing edges (`import_graph.py:137-138`).
18
+ 4. **~21 second first-run latency** — daemon runs `_full_index()` before accepting connections (`daemon.py:118`).
19
+ 5. **`recall_memories` has same exact-phrase bug** as `search_chunks` (`daemon_db.py:419`).
20
+ 6. **Thread-unsafe shared SQLite connection** — background indexing thread + event loop queries share one `sqlite3.Connection`.
21
+
22
+ ## Proposed Solution
23
+
24
+ ### Fix 1: BM25 fallback for `know search`
25
+
26
+ **File:** `src/know/cli/search.py`
27
+
28
+ - [x] Wrap `from know.semantic_search import SemanticSearcher` in `try/except ImportError`
29
+ - [x] Fallback: use daemon-first pattern (`_get_daemon_client` → direct `DaemonDB`)
30
+ - [x] Mirror all three output branches: `--json`, `--quiet`, and rich
31
+ - [x] Always print hint: `[dim]Tip: pip install know-cli[search] for semantic search[/dim]`
32
+ - [x] Track stats in fallback path via `StatsTracker.record_search()`
33
+
34
+ ### Fix 2: OR-based BM25 query parsing + shared helper
35
+
36
+ **File:** `src/know/daemon_db.py`
37
+
38
+ - [x] Add `_build_fts_query()` static method
39
+ - [x] Cap at 12 terms to bound FTS5 OR query complexity
40
+ - [x] Update `search_chunks()` to use `_build_fts_query()`
41
+ - [x] Update `recall_memories()` to use `_build_fts_query()` (same exact-phrase bug)
42
+
43
+ ### Fix 3: Store all modules in import graph
44
+
45
+ **File:** `src/know/import_graph.py:137-138`
46
+
47
+ - [x] Remove the `if mod_edges:` guard — store even empty edge lists
48
+
49
+ ### Fix 4: Non-blocking daemon startup
50
+
51
+ **File:** `src/know/daemon.py`
52
+
53
+ - [x] Move `start_unix_server()` before `_full_index()`
54
+ - [x] Change `await self._full_index()` to `self._index_task = asyncio.create_task(self._full_index())`
55
+ - [x] Store task reference to prevent GC and enable status checks
56
+ - [x] No `_indexing` flag or `_background_index` wrapper needed (YAGNI)
57
+
58
+ ### Fix 5: Thread-safe SQLite connections
59
+
60
+ **File:** `src/know/daemon_db.py`
61
+
62
+ - [x] Replace shared `self._conn` with `threading.local()` per-thread connections
63
+ - [x] Add `PRAGMA busy_timeout=5000` to prevent `SQLITE_BUSY` errors
64
+ - [x] Each thread gets its own connection; WAL mode handles concurrent reads + single writer
65
+ - [x] Update `close()` to close thread-local connections
66
+
67
+ ## Acceptance Criteria
68
+
69
+ - [x] `know search "daemon socket"` works without fastembed (uses BM25)
70
+ - [x] `know search "daemon socket"` prints fastembed hint
71
+ - [x] `know search "daemon socket" --json` returns valid JSON in fallback
72
+ - [x] `know next-file "message framing protocol"` returns `src/know/daemon.py` (verified via direct DB)
73
+ - [x] `know next-file "knowledge base"` returns relevant file (OR-based BM25 matches partial terms)
74
+ - [x] `know related src/know/daemon.py` shows imports and imported-by
75
+ - [x] `know related src/know/daemon_db.py` shows both directions
76
+ - [x] `know recall "project architecture"` matches even without exact phrase
77
+ - [x] Daemon accepts first query within 2 seconds of startup
78
+ - [x] Full index completes in background
79
+ - [x] All 169 existing tests pass (excluding pre-existing test_week4 failures)
80
+ - [x] All 27 v2 tests pass
81
+
82
+ ## Execution Order
83
+
84
+ 1. Fix 5 (thread-safe connections) — foundational for Fix 4
85
+ 2. Fix 2 (BM25 query helper) — foundational for Fix 1
86
+ 3. Fix 1 (search fallback) — depends on Fix 2
87
+ 4. Fix 3 (import graph) — independent
88
+ 5. Fix 4 (non-blocking startup) — depends on Fix 5
89
+
90
+ ## References
91
+
92
+ - Previous plan: `docs/plans/2026-02-15-refactor-know-cli-v2-daemon-architecture-plan.md`
93
+ - Solution doc: `docs/solutions/architecture/p2-p3-architectural-improvements.md`
94
+ - Key files:
95
+ - `src/know/cli/search.py:34` — search command
96
+ - `src/know/daemon_db.py:208-225` — `search_chunks()` BM25 method
97
+ - `src/know/daemon_db.py:415-432` — `recall_memories()` BM25 method
98
+ - `src/know/import_graph.py:137-138` — edge storage logic
99
+ - `src/know/daemon.py:106-135` — daemon serve/startup
100
+ - `src/know/cli/agent.py:32-71` — next-file command
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "know-cli"
7
- version = "0.4.1"
7
+ version = "0.4.2"
8
8
  description = "Context Intelligence for AI Coding Agents — smart, token-budgeted code context"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  """know - Living documentation generator for codebases."""
2
2
 
3
- __version__ = "0.4.1"
3
+ __version__ = "0.4.2"
4
4
  __author__ = "Sushil Kumar"
5
5
 
6
6
  from know.cli import main
@@ -32,13 +32,22 @@ from know.scanner import CodebaseScanner
32
32
  )
33
33
  @click.pass_context
34
34
  def search(ctx: click.Context, query: str, top_k: int, index: bool, chunk: bool) -> None:
35
- """Search code semantically using embeddings."""
35
+ """Search code semantically using embeddings.
36
+
37
+ Falls back to BM25 keyword search if fastembed/numpy not installed.
38
+ """
36
39
  config = ctx.obj["config"]
37
40
 
38
- from know.semantic_search import SemanticSearcher
41
+ try:
42
+ from know.semantic_search import SemanticSearcher
43
+ searcher = SemanticSearcher(project_root=config.root)
44
+ _search_semantic(ctx, config, searcher, query, top_k, index, chunk)
45
+ except ImportError:
46
+ _search_bm25_fallback(ctx, config, query, top_k)
39
47
 
40
- searcher = SemanticSearcher(project_root=config.root)
41
48
 
49
+ def _search_semantic(ctx, config, searcher, query, top_k, index, chunk):
50
+ """Semantic search using fastembed embeddings."""
42
51
  if index:
43
52
  if not ctx.obj.get("quiet"):
44
53
  console.print(f"[dim]Indexing {config.root}...[/dim]")
@@ -93,6 +102,60 @@ def search(ctx: click.Context, query: str, top_k: int, index: bool, chunk: bool)
93
102
  console.print()
94
103
 
95
104
 
105
+ def _search_bm25_fallback(ctx, config, query, top_k):
106
+ """BM25 keyword search fallback when fastembed/numpy not installed."""
107
+ from know.cli.agent import _get_daemon_client, _get_db_fallback
108
+
109
+ if not ctx.obj.get("quiet") and not ctx.obj.get("json"):
110
+ console.print("[dim]Using BM25 keyword search (fastembed not installed)[/dim]")
111
+
112
+ import time as _time
113
+ t0 = _time.monotonic()
114
+
115
+ client = _get_daemon_client(config)
116
+ if client:
117
+ try:
118
+ result = client.call_sync("search", {"query": query, "limit": top_k})
119
+ results = result.get("results", [])
120
+ except Exception:
121
+ client = None
122
+
123
+ if not client:
124
+ db = _get_db_fallback(config)
125
+ results = db.search_chunks(query, top_k)
126
+
127
+ duration_ms = int((_time.monotonic() - t0) * 1000)
128
+
129
+ # Track stats
130
+ try:
131
+ from know.stats import StatsTracker
132
+ StatsTracker(config).record_search(query, len(results), duration_ms)
133
+ except Exception as e:
134
+ logger.debug(f"Stats tracking (search) failed: {e}")
135
+
136
+ if ctx.obj.get("json"):
137
+ import json
138
+ click.echo(json.dumps({"results": results}))
139
+ elif ctx.obj.get("quiet"):
140
+ for r in results:
141
+ click.echo(f"{r.get('file_path', '')}:{r.get('chunk_name', '')}")
142
+ else:
143
+ if not results:
144
+ console.print("[yellow]No results found[/yellow]")
145
+ console.print("[dim]Tip: pip install know-cli[search] for semantic search[/dim]")
146
+ sys.exit(2)
147
+
148
+ console.print(f"\n[bold]Top {len(results)} results (BM25):[/bold]\n")
149
+ for i, r in enumerate(results, 1):
150
+ label = f"{r.get('file_path', '')}:{r.get('chunk_name', '')}"
151
+ console.print(f"{i}. {label}")
152
+ if r.get('signature'):
153
+ console.print(f" [dim]{r['signature'][:200]}[/dim]")
154
+ console.print()
155
+
156
+ console.print("[dim]Tip: pip install know-cli[search] for semantic search[/dim]")
157
+
158
+
96
159
  @click.command()
97
160
  @click.argument("query", required=False, default=None)
98
161
  @click.option(
@@ -114,16 +114,16 @@ class KnowDaemon:
114
114
  pf.parent.mkdir(parents=True, exist_ok=True)
115
115
  pf.write_text(str(os.getpid()))
116
116
 
117
- # Initial indexing
118
- await self._full_index()
119
-
117
+ # Start server FIRST — accept connections immediately
120
118
  self._server = await asyncio.start_unix_server(
121
119
  self._handle_connection, path=str(sock)
122
120
  )
123
- # Restrict socket access to owner only
124
121
  os.chmod(str(sock), 0o600)
125
122
  logger.info(f"Daemon listening on {sock}")
126
123
 
124
+ # Index in background — queries use stale/cached data until done
125
+ self._index_task = asyncio.create_task(self._full_index())
126
+
127
127
  # Set up idle timeout check
128
128
  try:
129
129
  while True:
@@ -120,26 +120,44 @@ CREATE TABLE IF NOT EXISTS file_index (
120
120
  """
121
121
 
122
122
 
123
+ MAX_SEARCH_TERMS = 12
124
+
125
+
123
126
  class DaemonDB:
124
127
  """Unified project database with FTS5 search."""
125
128
 
126
129
  def __init__(self, project_root: Path):
127
130
  self.root = project_root
128
131
  self.db_path = project_root / ".know" / "daemon.db"
129
- self._conn: Optional[sqlite3.Connection] = None
132
+ self._local = threading.local()
130
133
  self._lock = threading.Lock()
131
134
  self._init_db()
132
135
 
133
136
  def _get_conn(self) -> sqlite3.Connection:
134
- if self._conn is None:
137
+ """Get a thread-local database connection."""
138
+ conn = getattr(self._local, 'conn', None)
139
+ if conn is None:
135
140
  with self._lock:
136
- if self._conn is None:
137
- self.db_path.parent.mkdir(parents=True, exist_ok=True)
138
- self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
139
- self._conn.row_factory = sqlite3.Row
140
- self._conn.execute("PRAGMA journal_mode=WAL")
141
- self._conn.execute("PRAGMA synchronous=NORMAL")
142
- return self._conn
141
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
142
+ conn = sqlite3.connect(str(self.db_path))
143
+ conn.row_factory = sqlite3.Row
144
+ conn.execute("PRAGMA journal_mode=WAL")
145
+ conn.execute("PRAGMA synchronous=NORMAL")
146
+ conn.execute("PRAGMA busy_timeout=5000")
147
+ self._local.conn = conn
148
+ return conn
149
+
150
+ @staticmethod
151
+ def _build_fts_query(query: str) -> str:
152
+ """Build OR-based FTS5 query from natural language string.
153
+
154
+ Each term is double-quoted to disable FTS5 operator interpretation.
155
+ BM25 ranking naturally boosts chunks matching more terms.
156
+ """
157
+ terms = query.strip().split()[:MAX_SEARCH_TERMS]
158
+ if not terms:
159
+ return ""
160
+ return " OR ".join('"' + t.replace('"', '""') + '"' for t in terms)
143
161
 
144
162
  def _init_db(self):
145
163
  conn = self._get_conn()
@@ -157,9 +175,10 @@ class DaemonDB:
157
175
  conn.commit()
158
176
 
159
177
  def close(self):
160
- if self._conn:
161
- self._conn.close()
162
- self._conn = None
178
+ conn = getattr(self._local, 'conn', None)
179
+ if conn is not None:
180
+ conn.close()
181
+ self._local.conn = None
163
182
 
164
183
  def __enter__(self):
165
184
  return self
@@ -208,8 +227,9 @@ class DaemonDB:
208
227
  def search_chunks(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
209
228
  """BM25 full-text search over code chunks."""
210
229
  conn = self._get_conn()
211
- # Escape FTS5 special characters by quoting the query
212
- safe_query = '"' + query.replace('"', '""') + '"'
230
+ safe_query = self._build_fts_query(query)
231
+ if not safe_query:
232
+ return []
213
233
  try:
214
234
  rows = conn.execute(
215
235
  """SELECT c.*, rank
@@ -415,8 +435,9 @@ class DaemonDB:
415
435
  def recall_memories(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
416
436
  """Search memories using FTS5 BM25."""
417
437
  conn = self._get_conn()
418
- # Escape FTS5 special characters by quoting the query
419
- safe_query = '"' + query.replace('"', '""') + '"'
438
+ safe_query = self._build_fts_query(query)
439
+ if not safe_query:
440
+ return []
420
441
  try:
421
442
  rows = conn.execute(
422
443
  """SELECT m.*, rank
@@ -134,8 +134,7 @@ class ImportGraph:
134
134
  for resolved, imp_type in targets:
135
135
  mod_edges.append((resolved, imp_type))
136
136
 
137
- if mod_edges:
138
- edges_by_source[mod_name] = mod_edges
137
+ edges_by_source[mod_name] = mod_edges
139
138
 
140
139
  # Persist via DaemonDB
141
140
  total_edges = 0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes