knowledge-master 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/PKG-INFO +1 -1
  2. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/cli.py +70 -0
  3. knowledge_master-0.5.0/knowledge_master/migrations.py +89 -0
  4. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/parsers/git_repo.py +10 -1
  5. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/store.py +42 -4
  6. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master.egg-info/PKG-INFO +1 -1
  7. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master.egg-info/SOURCES.txt +7 -1
  8. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/pyproject.toml +1 -1
  9. knowledge_master-0.5.0/tests/test_connectors.py +58 -0
  10. knowledge_master-0.5.0/tests/test_dedup.py +24 -0
  11. knowledge_master-0.5.0/tests/test_migrations.py +23 -0
  12. knowledge_master-0.5.0/tests/test_reranker.py +43 -0
  13. knowledge_master-0.5.0/tests/test_ts_parsers.py +96 -0
  14. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/LICENSE +0 -0
  15. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/README.md +0 -0
  16. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/__init__.py +0 -0
  17. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/__main__.py +0 -0
  18. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/api.py +0 -0
  19. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/chunking.py +0 -0
  20. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/connectors.py +0 -0
  21. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/embeddings.py +0 -0
  22. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/intelligence.py +0 -0
  23. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/parsers/__init__.py +0 -0
  24. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/parsers/markdown.py +0 -0
  25. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/rerank.py +0 -0
  26. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/server.py +0 -0
  27. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/static_analysis.py +0 -0
  28. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/ts_parsers.py +0 -0
  29. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/watcher.py +0 -0
  30. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master/web.py +0 -0
  31. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master.egg-info/dependency_links.txt +0 -0
  32. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master.egg-info/entry_points.txt +0 -0
  33. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master.egg-info/requires.txt +0 -0
  34. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/knowledge_master.egg-info/top_level.txt +0 -0
  35. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/setup.cfg +0 -0
  36. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/tests/test_api.py +0 -0
  37. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/tests/test_chunking.py +0 -0
  38. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/tests/test_cli.py +0 -0
  39. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/tests/test_intelligence.py +0 -0
  40. {knowledge_master-0.3.0 → knowledge_master-0.5.0}/tests/test_static_analysis.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowledge-master
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: Local-first knowledge graph for developers. Your AI agent's permanent memory.
5
5
  Author: Milenko Mitrovic
6
6
  License: MIT
@@ -460,5 +460,75 @@ def who_owns(file: str = typer.Argument(..., help="File path to check ownership"
460
460
  console.print("[dim]Run 'km index <repo>' first to extract ownership.[/]")
461
461
 
462
462
 
463
+ @app.command()
464
+ def upgrade():
465
+ """Upgrade graph schema to the latest version."""
466
+ from .migrations import check_and_migrate, get_schema_version, CURRENT_SCHEMA_VERSION
467
+
468
+ graph = store.get_graph()
469
+ current = get_schema_version(graph)
470
+ console.print(f"[bold]Current schema:[/] v{current}")
471
+ console.print(f"[bold]Target schema:[/] v{CURRENT_SCHEMA_VERSION}")
472
+
473
+ if current == CURRENT_SCHEMA_VERSION:
474
+ console.print("[green]✓ Already up to date[/]")
475
+ return
476
+
477
+ result = check_and_migrate(graph, auto_migrate=True)
478
+ for step in result["steps"]:
479
+ console.print(f" [green]✓[/] {step}")
480
+ console.print(f"\n[green]✓ Upgraded to v{CURRENT_SCHEMA_VERSION}[/]")
481
+
482
+
483
+ @app.command()
484
+ def prune(
485
+ older_than: int = typer.Option(30, help="Remove chunks not re-indexed in this many days"),
486
+ dry_run: bool = typer.Option(False, "--dry-run", help="Show what would be removed"),
487
+ ):
488
+ """Remove stale/orphaned data from the knowledge graph."""
489
+ graph = store.get_graph()
490
+
491
+ # Find orphaned chunks (no PART_OF edge)
492
+ orphaned = graph.query("MATCH (c:Chunk) WHERE NOT (c)-[:PART_OF]->() RETURN count(c)")
493
+ orphan_count = orphaned.result_set[0][0] if orphaned.result_set else 0
494
+
495
+ # Find documents with no chunks
496
+ empty_docs = graph.query(
497
+ "MATCH (d:Document) WHERE NOT ()-[:PART_OF]->(d) AND NOT (d)-[:IN_REPO]->() RETURN count(d)"
498
+ )
499
+ empty_count = empty_docs.result_set[0][0] if empty_docs.result_set else 0
500
+
501
+ # Find stale chunks (indexed_at older than threshold)
502
+ # FalkorDB timestamp() returns ms since epoch
503
+ threshold_ms = older_than * 86400 * 1000
504
+ stale = graph.query(
505
+ """MATCH (c:Chunk)
506
+ WHERE c.indexed_at IS NOT NULL AND (timestamp() - c.indexed_at) > $threshold
507
+ RETURN count(c)""",
508
+ params={"threshold": threshold_ms},
509
+ )
510
+ stale_count = stale.result_set[0][0] if stale.result_set else 0
511
+
512
+ console.print("[bold]Prune analysis:[/]")
513
+ console.print(f" Orphaned chunks (no document link): {orphan_count}")
514
+ console.print(f" Empty documents (no chunks): {empty_count}")
515
+ console.print(f" Stale chunks (>{older_than} days): {stale_count}")
516
+
517
+ if dry_run:
518
+ console.print("\n[yellow]Dry run — nothing removed.[/]")
519
+ return
520
+
521
+ total = 0
522
+ if orphan_count > 0:
523
+ graph.query("MATCH (c:Chunk) WHERE NOT (c)-[:PART_OF]->() DELETE c")
524
+ total += orphan_count
525
+
526
+ if empty_count > 0:
527
+ graph.query("MATCH (d:Document) WHERE NOT ()-[:PART_OF]->(d) AND NOT (d)-[:IN_REPO]->() DELETE d")
528
+ total += empty_count
529
+
530
+ console.print(f"\n[green]✓ Removed {total} stale nodes[/]")
531
+
532
+
463
533
  if __name__ == "__main__":
464
534
  app()
@@ -0,0 +1,89 @@
1
+ """Schema versioning and migrations for the knowledge graph."""
2
+
3
+ CURRENT_SCHEMA_VERSION = 4 # v0.4.0
4
+
5
+ # Migration definitions: version -> function that upgrades from previous version
6
+ MIGRATIONS = {}
7
+
8
+
9
+ def get_schema_version(graph) -> int:
10
+ """Get current schema version from graph metadata."""
11
+ try:
12
+ result = graph.query("MATCH (m:_Meta {key: 'schema_version'}) RETURN m.value")
13
+ if result.result_set:
14
+ return int(result.result_set[0][0])
15
+ except Exception:
16
+ pass
17
+ return 0 # no version = legacy graph
18
+
19
+
20
+ def set_schema_version(graph, version: int):
21
+ """Store schema version in graph metadata."""
22
+ graph.query(
23
+ "MERGE (m:_Meta {key: 'schema_version'}) SET m.value = $version",
24
+ params={"version": version},
25
+ )
26
+
27
+
28
+ def check_and_migrate(graph, auto_migrate: bool = True) -> dict:
29
+ """Check schema version and migrate if needed.
30
+
31
+ Returns: {"current": int, "target": int, "migrated": bool, "steps": list}
32
+ """
33
+ current = get_schema_version(graph)
34
+ target = CURRENT_SCHEMA_VERSION
35
+
36
+ if current == target:
37
+ return {"current": current, "target": target, "migrated": False, "steps": []}
38
+
39
+ if current > target:
40
+ raise RuntimeError(
41
+ f"Graph schema v{current} is newer than this version supports (v{target}). "
42
+ "Please upgrade knowledge-master."
43
+ )
44
+
45
+ if not auto_migrate:
46
+ raise RuntimeError(
47
+ f"Graph schema v{current} needs migration to v{target}. Run: km upgrade"
48
+ )
49
+
50
+ steps = []
51
+ for v in range(current + 1, target + 1):
52
+ migration_fn = MIGRATIONS.get(v)
53
+ if migration_fn:
54
+ migration_fn(graph)
55
+ steps.append(f"v{v-1} → v{v}: {migration_fn.__doc__ or 'applied'}")
56
+ else:
57
+ steps.append(f"v{v-1} → v{v}: no-op (compatible)")
58
+
59
+ set_schema_version(graph, target)
60
+ return {"current": current, "target": target, "migrated": True, "steps": steps}
61
+
62
+
63
+ # --- Migrations ---
64
+
65
+ def _migrate_to_v1(graph):
66
+ """Add indexed_at timestamp to existing chunks missing it."""
67
+ graph.query("MATCH (c:Chunk) WHERE c.indexed_at IS NULL SET c.indexed_at = timestamp()")
68
+
69
+
70
+ def _migrate_to_v2(graph):
71
+ """Add OWNS relationships from ownership extraction."""
72
+ pass # OWNS edges are created by extract_ownership, no schema change needed
73
+
74
+
75
+ def _migrate_to_v3(graph):
76
+ """Add lang property to IMPORTS edges and Function nodes."""
77
+ graph.query("MATCH (f:Function) WHERE f.lang IS NULL SET f.lang = 'python'")
78
+ graph.query("MATCH ()-[e:IMPORTS]->() WHERE e.lang IS NULL SET e.lang = 'python'")
79
+
80
+
81
+ def _migrate_to_v4(graph):
82
+ """Add content_hash to Chunk nodes for deduplication."""
83
+ pass # New chunks will have hash, old ones are fine without
84
+
85
+
86
+ MIGRATIONS[1] = _migrate_to_v1
87
+ MIGRATIONS[2] = _migrate_to_v2
88
+ MIGRATIONS[3] = _migrate_to_v3
89
+ MIGRATIONS[4] = _migrate_to_v4
@@ -46,6 +46,8 @@ def index_repo(repo_path: str, graph=None, branch: str = "HEAD", on_progress=Non
46
46
  tracked = repo.git.ls_files().splitlines()
47
47
  indexable = [f for f in tracked if _should_index(f)]
48
48
  total = len(indexable)
49
+ indexed_files = []
50
+ failed_files = []
49
51
 
50
52
  with Progress(disable=not sys.stdout.isatty()) as progress:
51
53
  task = progress.add_task(f"Indexing {repo_name}", total=total)
@@ -53,19 +55,26 @@ def index_repo(repo_path: str, graph=None, branch: str = "HEAD", on_progress=Non
53
55
  full_path = os.path.join(repo_path, filepath)
54
56
  try:
55
57
  _index_file(graph, full_path, filepath, repo_name, repo)
58
+ indexed_files.append(filepath)
56
59
  except Exception as e:
60
+ failed_files.append((filepath, str(e)))
57
61
  progress.console.print(f" [yellow]skip {filepath}: {e}[/]")
58
62
  progress.advance(task)
59
63
  if on_progress:
60
64
  on_progress(i + 1, total, filepath)
61
65
 
66
+ # If more than 50% failed, warn (possible systemic issue)
67
+ if total > 0 and len(failed_files) > total * 0.5:
68
+ import sys as _sys
69
+ print(f"WARNING: {len(failed_files)}/{total} files failed. Possible systemic issue.", file=_sys.stderr)
70
+
62
71
  # Run intelligence extraction
63
72
  intel = extract_all(repo_path, graph)
64
73
 
65
74
  # Run static analysis (import graph, symbols) — all languages
66
75
  static = build_import_graph_all(repo_path, graph)
67
76
 
68
- return {"repo": repo_name, "files_indexed": total, "intelligence": intel, "static_analysis": static}
77
+ return {"repo": repo_name, "files_indexed": len(indexed_files), "files_failed": len(failed_files), "intelligence": intel, "static_analysis": static}
69
78
 
70
79
 
71
80
  def _should_index(filepath: str) -> bool:
@@ -1,5 +1,7 @@
1
1
  """FalkorDB graph store - nodes, edges, vector search, and graph traversal."""
2
2
 
3
+ import hashlib
4
+
3
5
  from falkordb import FalkorDB
4
6
 
5
7
  GRAPH_NAME = "knowledge"
@@ -7,11 +9,35 @@ GRAPH_NAME = "knowledge"
7
9
  # Vector dimension for nomic-embed-text
8
10
  VECTOR_DIM = 768
9
11
 
12
+ _graph_instance = None
13
+
10
14
 
11
15
  def get_graph(host: str = "localhost", port: int = 6379):
12
- """Get FalkorDB graph instance."""
16
+ """Get FalkorDB graph instance with schema version check."""
17
+ global _graph_instance
18
+ if _graph_instance is not None:
19
+ return _graph_instance
20
+
13
21
  db = FalkorDB(host=host, port=port)
14
- return db.select_graph(GRAPH_NAME)
22
+ graph = db.select_graph(GRAPH_NAME)
23
+
24
+ # Check and auto-migrate schema
25
+ from .migrations import check_and_migrate
26
+ check_and_migrate(graph, auto_migrate=True)
27
+
28
+ _graph_instance = graph
29
+ return graph
30
+
31
+
32
+ def reset_graph_instance():
33
+ """Reset cached graph instance (for testing)."""
34
+ global _graph_instance
35
+ _graph_instance = None
36
+
37
+
38
+ def content_hash(text: str) -> str:
39
+ """Compute content hash for deduplication."""
40
+ return hashlib.sha256(text.encode()).hexdigest()[:16]
15
41
 
16
42
 
17
43
  def init_schema(graph):
@@ -35,20 +61,32 @@ def init_schema(graph):
35
61
 
36
62
 
37
63
  def upsert_chunk(graph, chunk_id: str, text: str, embedding: list[float], metadata: dict):
38
- """Insert or update a chunk node with embedding."""
64
+ """Insert or update a chunk node with embedding. Skips if content unchanged (dedup)."""
65
+ chash = content_hash(text)
66
+
67
+ # Check if chunk exists with same content hash — skip if unchanged
68
+ existing = graph.query(
69
+ "MATCH (c:Chunk {id: $id}) RETURN c.content_hash",
70
+ params={"id": chunk_id},
71
+ )
72
+ if existing.result_set and existing.result_set[0][0] == chash:
73
+ return False # skip — content unchanged
74
+
39
75
  graph.query(
40
76
  """MERGE (c:Chunk {id: $id})
41
77
  SET c.text = $text, c.embedding = vecf32($embedding),
42
78
  c.source = $source, c.source_type = $source_type,
43
- c.indexed_at = timestamp()""",
79
+ c.content_hash = $hash, c.indexed_at = timestamp()""",
44
80
  params={
45
81
  "id": chunk_id,
46
82
  "text": text,
47
83
  "embedding": embedding,
48
84
  "source": metadata.get("source", ""),
49
85
  "source_type": metadata.get("source_type", ""),
86
+ "hash": chash,
50
87
  },
51
88
  )
89
+ return True # inserted/updated
52
90
 
53
91
 
54
92
  def upsert_document(graph, path: str, doc_type: str, metadata: dict):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowledge-master
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: Local-first knowledge graph for developers. Your AI agent's permanent memory.
5
5
  Author: Milenko Mitrovic
6
6
  License: MIT
@@ -9,6 +9,7 @@ knowledge_master/cli.py
9
9
  knowledge_master/connectors.py
10
10
  knowledge_master/embeddings.py
11
11
  knowledge_master/intelligence.py
12
+ knowledge_master/migrations.py
12
13
  knowledge_master/rerank.py
13
14
  knowledge_master/server.py
14
15
  knowledge_master/static_analysis.py
@@ -28,5 +29,10 @@ knowledge_master/parsers/markdown.py
28
29
  tests/test_api.py
29
30
  tests/test_chunking.py
30
31
  tests/test_cli.py
32
+ tests/test_connectors.py
33
+ tests/test_dedup.py
31
34
  tests/test_intelligence.py
32
- tests/test_static_analysis.py
35
+ tests/test_migrations.py
36
+ tests/test_reranker.py
37
+ tests/test_static_analysis.py
38
+ tests/test_ts_parsers.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "knowledge-master"
3
- version = "0.3.0"
3
+ version = "0.5.0"
4
4
  description = "Local-first knowledge graph for developers. Your AI agent's permanent memory."
5
5
  requires-python = ">=3.11"
6
6
  license = {text = "MIT"}
@@ -0,0 +1,58 @@
1
+ """Tests for connector framework."""
2
+ from knowledge_master.connectors import MCPSource, SOURCES, add_custom_source, _parse_mcp_result
3
+
4
+
5
+ def test_preconfigured_sources_exist():
6
+ assert "outlook" in SOURCES
7
+ assert "slack" in SOURCES
8
+
9
+
10
+ def test_source_has_required_fields():
11
+ for name, source in SOURCES.items():
12
+ assert source.name, f"{name} missing name"
13
+ assert source.command, f"{name} missing command"
14
+ assert source.tool_name, f"{name} missing tool_name"
15
+ assert source.source_type, f"{name} missing source_type"
16
+
17
+
18
+ def test_add_custom_source():
19
+ add_custom_source("test-source", ["echo", "hi"], "get_data", {"limit": 10}, "custom")
20
+ assert "test-source" in SOURCES
21
+ assert SOURCES["test-source"].tool_name == "get_data"
22
+ # Cleanup
23
+ del SOURCES["test-source"]
24
+
25
+
26
+ def test_parse_mcp_result_json_list():
27
+ class FakeContent:
28
+ text = '[{"title": "hello", "body": "world"}]'
29
+
30
+ class FakeResult:
31
+ content = [FakeContent()]
32
+
33
+ items = _parse_mcp_result(FakeResult())
34
+ assert len(items) == 1
35
+ assert items[0]["title"] == "hello"
36
+
37
+
38
+ def test_parse_mcp_result_json_dict_with_results():
39
+ class FakeContent:
40
+ text = '{"results": [{"text": "a"}, {"text": "b"}]}'
41
+
42
+ class FakeResult:
43
+ content = [FakeContent()]
44
+
45
+ items = _parse_mcp_result(FakeResult())
46
+ assert len(items) == 2
47
+
48
+
49
+ def test_parse_mcp_result_plain_text():
50
+ class FakeContent:
51
+ text = "just plain text"
52
+
53
+ class FakeResult:
54
+ content = [FakeContent()]
55
+
56
+ items = _parse_mcp_result(FakeResult())
57
+ assert len(items) == 1
58
+ assert items[0]["text"] == "just plain text"
@@ -0,0 +1,24 @@
1
+ """Tests for store deduplication and content hashing."""
2
+ from knowledge_master.store import content_hash
3
+
4
+
5
+ def test_content_hash_deterministic():
6
+ h1 = content_hash("hello world")
7
+ h2 = content_hash("hello world")
8
+ assert h1 == h2
9
+
10
+
11
+ def test_content_hash_different_for_different_content():
12
+ h1 = content_hash("hello world")
13
+ h2 = content_hash("hello world!")
14
+ assert h1 != h2
15
+
16
+
17
+ def test_content_hash_length():
18
+ h = content_hash("test")
19
+ assert len(h) == 16 # sha256 truncated to 16 chars
20
+
21
+
22
+ def test_content_hash_hex():
23
+ h = content_hash("test")
24
+ assert all(c in "0123456789abcdef" for c in h)
@@ -0,0 +1,23 @@
1
+ """Tests for schema migrations."""
2
+ from knowledge_master.migrations import (
3
+ CURRENT_SCHEMA_VERSION,
4
+ MIGRATIONS,
5
+ _migrate_to_v1,
6
+ _migrate_to_v3,
7
+ _migrate_to_v4,
8
+ )
9
+
10
+
11
+ def test_current_schema_version():
12
+ assert CURRENT_SCHEMA_VERSION == 4
13
+
14
+
15
+ def test_all_migrations_defined():
16
+ for v in range(1, CURRENT_SCHEMA_VERSION + 1):
17
+ assert v in MIGRATIONS, f"Migration to v{v} not defined"
18
+
19
+
20
+ def test_migrations_are_callable():
21
+ for v, fn in MIGRATIONS.items():
22
+ assert callable(fn)
23
+ assert fn.__doc__ is not None, f"Migration v{v} has no docstring"
@@ -0,0 +1,43 @@
1
+ """Benchmark test for re-ranker — compare raw cosine vs re-ranked ordering.
2
+
3
+ This is an integration test (requires Ollama) but demonstrates the value of re-ranking.
4
+ Run with: pytest tests/integration/test_reranker_benchmark.py -v
5
+ """
6
+ import pytest
7
+ from knowledge_master.rerank import rerank, _cosine_sim
8
+
9
+
10
+ def test_cosine_sim_identical():
11
+ a = [1.0, 0.0, 0.0]
12
+ assert _cosine_sim(a, a) == pytest.approx(1.0)
13
+
14
+
15
+ def test_cosine_sim_orthogonal():
16
+ a = [1.0, 0.0, 0.0]
17
+ b = [0.0, 1.0, 0.0]
18
+ assert _cosine_sim(a, b) == pytest.approx(0.0)
19
+
20
+
21
+ def test_cosine_sim_opposite():
22
+ a = [1.0, 0.0]
23
+ b = [-1.0, 0.0]
24
+ assert _cosine_sim(a, b) == pytest.approx(-1.0)
25
+
26
+
27
+ def test_rerank_empty():
28
+ result = rerank("test query", [], top_k=5)
29
+ assert result == []
30
+
31
+
32
+ def test_rerank_preserves_fields():
33
+ """Re-rank should keep all original result fields."""
34
+ results = [
35
+ {"text": "hello world", "source": "a.py", "score": 0.5, "extra": "keep"},
36
+ ]
37
+ # This would normally call Ollama — skip if not available
38
+ try:
39
+ ranked = rerank("hello", results, top_k=1)
40
+ assert "extra" in ranked[0]
41
+ assert "rerank_score" in ranked[0]
42
+ except Exception:
43
+ pytest.skip("Ollama not available")
@@ -0,0 +1,96 @@
1
+ """Tests for tree-sitter based language parsers."""
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
+ from knowledge_master.ts_parsers import (
6
+ extract_typescript_graph,
7
+ extract_go_graph,
8
+ extract_rust_graph,
9
+ resolve_ts_import,
10
+ )
11
+
12
+
13
+ def test_typescript_import_extraction():
14
+ with tempfile.NamedTemporaryFile(suffix=".ts", mode="w", delete=False) as f:
15
+ f.write('import { useState } from "react";\n')
16
+ f.write('import { helper } from "./utils";\n')
17
+ f.write("export function App() { return null; }\n")
18
+ f.flush()
19
+ result = extract_typescript_graph(f.name)
20
+
21
+ assert len(result["imports"]) == 2
22
+ assert result["imports"][0]["module"] == "react"
23
+ assert result["imports"][1]["module"] == "./utils"
24
+ assert len(result["exports"]) >= 1
25
+ assert any(e["name"] == "App" for e in result["exports"])
26
+
27
+
28
+ def test_typescript_require_extraction():
29
+ with tempfile.NamedTemporaryFile(suffix=".js", mode="w", delete=False) as f:
30
+ f.write('const fs = require("fs");\n')
31
+ f.flush()
32
+ result = extract_typescript_graph(f.name)
33
+
34
+ assert len(result["imports"]) >= 1
35
+ assert result["imports"][0]["module"] == "fs"
36
+
37
+
38
+ def test_go_import_extraction():
39
+ with tempfile.NamedTemporaryFile(suffix=".go", mode="w", delete=False) as f:
40
+ f.write('package main\n\nimport (\n\t"fmt"\n\t"os"\n)\n\nfunc Main() {}\n')
41
+ f.flush()
42
+ result = extract_go_graph(f.name)
43
+
44
+ assert len(result["imports"]) >= 2
45
+ modules = [i["module"] for i in result["imports"]]
46
+ assert "fmt" in modules
47
+ assert "os" in modules
48
+
49
+
50
+ def test_go_exported_functions():
51
+ with tempfile.NamedTemporaryFile(suffix=".go", mode="w", delete=False) as f:
52
+ f.write('package pkg\n\nfunc PublicFunc() {}\nfunc privateFunc() {}\n')
53
+ f.flush()
54
+ result = extract_go_graph(f.name)
55
+
56
+ names = [e["name"] for e in result["exports"]]
57
+ assert "PublicFunc" in names
58
+ assert "privateFunc" not in names
59
+
60
+
61
+ def test_rust_use_extraction():
62
+ with tempfile.NamedTemporaryFile(suffix=".rs", mode="w", delete=False) as f:
63
+ f.write("use std::collections::HashMap;\nuse crate::utils;\n\npub fn hello() {}\nfn private() {}\n")
64
+ f.flush()
65
+ result = extract_rust_graph(f.name)
66
+
67
+ assert len(result["imports"]) >= 2
68
+ modules = [i["module"] for i in result["imports"]]
69
+ assert "std" in modules
70
+ assert any(e["name"] == "hello" for e in result["exports"])
71
+ assert not any(e["name"] == "private" for e in result["exports"])
72
+
73
+
74
+ def test_rust_mod_detection():
75
+ with tempfile.NamedTemporaryFile(suffix=".rs", mode="w", delete=False) as f:
76
+ f.write("mod parser;\nmod utils;\n")
77
+ f.flush()
78
+ result = extract_rust_graph(f.name)
79
+
80
+ mod_imports = [i for i in result["imports"] if i.get("is_mod")]
81
+ assert len(mod_imports) == 2
82
+ assert mod_imports[0]["module"] == "parser"
83
+
84
+
85
+ def test_ts_import_resolution():
86
+ with tempfile.TemporaryDirectory() as tmp:
87
+ (Path(tmp) / "src").mkdir()
88
+ (Path(tmp) / "src" / "utils.ts").write_text("export const x = 1;")
89
+ (Path(tmp) / "src" / "index.ts").write_text("")
90
+
91
+ result = resolve_ts_import("./utils", "src/index.ts", tmp)
92
+ assert result == "src/utils.ts"
93
+
94
+ # External package — should return None
95
+ result = resolve_ts_import("react", "src/index.ts", tmp)
96
+ assert result is None