knowledge-master 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/PKG-INFO +6 -1
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/cli.py +70 -0
- knowledge_master-0.4.0/knowledge_master/migrations.py +89 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/parsers/git_repo.py +13 -4
- knowledge_master-0.4.0/knowledge_master/static_analysis.py +306 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/store.py +42 -4
- knowledge_master-0.4.0/knowledge_master/ts_parsers.py +192 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/PKG-INFO +6 -1
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/SOURCES.txt +2 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/requires.txt +5 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/pyproject.toml +6 -1
- knowledge_master-0.2.0/knowledge_master/static_analysis.py +0 -141
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/LICENSE +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/README.md +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/__init__.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/__main__.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/api.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/chunking.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/connectors.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/embeddings.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/intelligence.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/parsers/__init__.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/parsers/markdown.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/rerank.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/server.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/watcher.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master/web.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/dependency_links.txt +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/entry_points.txt +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/top_level.txt +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/setup.cfg +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/tests/test_api.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/tests/test_chunking.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/tests/test_cli.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/tests/test_intelligence.py +0 -0
- {knowledge_master-0.2.0 → knowledge_master-0.4.0}/tests/test_static_analysis.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowledge-master
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Local-first knowledge graph for developers. Your AI agent's permanent memory.
|
|
5
5
|
Author: Milenko Mitrovic
|
|
6
6
|
License: MIT
|
|
@@ -28,6 +28,11 @@ Requires-Dist: rich<15.0,>=14.0.0
|
|
|
28
28
|
Requires-Dist: fastapi<1.0,>=0.115.0
|
|
29
29
|
Requires-Dist: uvicorn<1.0,>=0.34.0
|
|
30
30
|
Requires-Dist: pyyaml>=6.0
|
|
31
|
+
Requires-Dist: tree-sitter>=0.23.0
|
|
32
|
+
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
33
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
34
|
+
Requires-Dist: tree-sitter-go>=0.23.0
|
|
35
|
+
Requires-Dist: tree-sitter-rust>=0.23.0
|
|
31
36
|
Provides-Extra: office
|
|
32
37
|
Requires-Dist: python-docx<2.0,>=1.1.0; extra == "office"
|
|
33
38
|
Requires-Dist: openpyxl<4.0,>=3.1.0; extra == "office"
|
|
@@ -460,5 +460,75 @@ def who_owns(file: str = typer.Argument(..., help="File path to check ownership"
|
|
|
460
460
|
console.print("[dim]Run 'km index <repo>' first to extract ownership.[/]")
|
|
461
461
|
|
|
462
462
|
|
|
463
|
+
@app.command()
|
|
464
|
+
def upgrade():
|
|
465
|
+
"""Upgrade graph schema to the latest version."""
|
|
466
|
+
from .migrations import check_and_migrate, get_schema_version, CURRENT_SCHEMA_VERSION
|
|
467
|
+
|
|
468
|
+
graph = store.get_graph()
|
|
469
|
+
current = get_schema_version(graph)
|
|
470
|
+
console.print(f"[bold]Current schema:[/] v{current}")
|
|
471
|
+
console.print(f"[bold]Target schema:[/] v{CURRENT_SCHEMA_VERSION}")
|
|
472
|
+
|
|
473
|
+
if current == CURRENT_SCHEMA_VERSION:
|
|
474
|
+
console.print("[green]✓ Already up to date[/]")
|
|
475
|
+
return
|
|
476
|
+
|
|
477
|
+
result = check_and_migrate(graph, auto_migrate=True)
|
|
478
|
+
for step in result["steps"]:
|
|
479
|
+
console.print(f" [green]✓[/] {step}")
|
|
480
|
+
console.print(f"\n[green]✓ Upgraded to v{CURRENT_SCHEMA_VERSION}[/]")
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
@app.command()
|
|
484
|
+
def prune(
|
|
485
|
+
older_than: int = typer.Option(30, help="Remove chunks not re-indexed in this many days"),
|
|
486
|
+
dry_run: bool = typer.Option(False, "--dry-run", help="Show what would be removed"),
|
|
487
|
+
):
|
|
488
|
+
"""Remove stale/orphaned data from the knowledge graph."""
|
|
489
|
+
graph = store.get_graph()
|
|
490
|
+
|
|
491
|
+
# Find orphaned chunks (no PART_OF edge)
|
|
492
|
+
orphaned = graph.query("MATCH (c:Chunk) WHERE NOT (c)-[:PART_OF]->() RETURN count(c)")
|
|
493
|
+
orphan_count = orphaned.result_set[0][0] if orphaned.result_set else 0
|
|
494
|
+
|
|
495
|
+
# Find documents with no chunks
|
|
496
|
+
empty_docs = graph.query(
|
|
497
|
+
"MATCH (d:Document) WHERE NOT ()-[:PART_OF]->(d) AND NOT (d)-[:IN_REPO]->() RETURN count(d)"
|
|
498
|
+
)
|
|
499
|
+
empty_count = empty_docs.result_set[0][0] if empty_docs.result_set else 0
|
|
500
|
+
|
|
501
|
+
# Find stale chunks (indexed_at older than threshold)
|
|
502
|
+
# FalkorDB timestamp() returns ms since epoch
|
|
503
|
+
threshold_ms = older_than * 86400 * 1000
|
|
504
|
+
stale = graph.query(
|
|
505
|
+
"""MATCH (c:Chunk)
|
|
506
|
+
WHERE c.indexed_at IS NOT NULL AND (timestamp() - c.indexed_at) > $threshold
|
|
507
|
+
RETURN count(c)""",
|
|
508
|
+
params={"threshold": threshold_ms},
|
|
509
|
+
)
|
|
510
|
+
stale_count = stale.result_set[0][0] if stale.result_set else 0
|
|
511
|
+
|
|
512
|
+
console.print("[bold]Prune analysis:[/]")
|
|
513
|
+
console.print(f" Orphaned chunks (no document link): {orphan_count}")
|
|
514
|
+
console.print(f" Empty documents (no chunks): {empty_count}")
|
|
515
|
+
console.print(f" Stale chunks (>{older_than} days): {stale_count}")
|
|
516
|
+
|
|
517
|
+
if dry_run:
|
|
518
|
+
console.print("\n[yellow]Dry run — nothing removed.[/]")
|
|
519
|
+
return
|
|
520
|
+
|
|
521
|
+
total = 0
|
|
522
|
+
if orphan_count > 0:
|
|
523
|
+
graph.query("MATCH (c:Chunk) WHERE NOT (c)-[:PART_OF]->() DELETE c")
|
|
524
|
+
total += orphan_count
|
|
525
|
+
|
|
526
|
+
if empty_count > 0:
|
|
527
|
+
graph.query("MATCH (d:Document) WHERE NOT ()-[:PART_OF]->(d) AND NOT (d)-[:IN_REPO]->() DELETE d")
|
|
528
|
+
total += empty_count
|
|
529
|
+
|
|
530
|
+
console.print(f"\n[green]✓ Removed {total} stale nodes[/]")
|
|
531
|
+
|
|
532
|
+
|
|
463
533
|
if __name__ == "__main__":
|
|
464
534
|
app()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Schema versioning and migrations for the knowledge graph."""
|
|
2
|
+
|
|
3
|
+
CURRENT_SCHEMA_VERSION = 4 # v0.4.0
|
|
4
|
+
|
|
5
|
+
# Migration definitions: version -> function that upgrades from previous version
|
|
6
|
+
MIGRATIONS = {}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_schema_version(graph) -> int:
|
|
10
|
+
"""Get current schema version from graph metadata."""
|
|
11
|
+
try:
|
|
12
|
+
result = graph.query("MATCH (m:_Meta {key: 'schema_version'}) RETURN m.value")
|
|
13
|
+
if result.result_set:
|
|
14
|
+
return int(result.result_set[0][0])
|
|
15
|
+
except Exception:
|
|
16
|
+
pass
|
|
17
|
+
return 0 # no version = legacy graph
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def set_schema_version(graph, version: int):
|
|
21
|
+
"""Store schema version in graph metadata."""
|
|
22
|
+
graph.query(
|
|
23
|
+
"MERGE (m:_Meta {key: 'schema_version'}) SET m.value = $version",
|
|
24
|
+
params={"version": version},
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def check_and_migrate(graph, auto_migrate: bool = True) -> dict:
|
|
29
|
+
"""Check schema version and migrate if needed.
|
|
30
|
+
|
|
31
|
+
Returns: {"current": int, "target": int, "migrated": bool, "steps": list}
|
|
32
|
+
"""
|
|
33
|
+
current = get_schema_version(graph)
|
|
34
|
+
target = CURRENT_SCHEMA_VERSION
|
|
35
|
+
|
|
36
|
+
if current == target:
|
|
37
|
+
return {"current": current, "target": target, "migrated": False, "steps": []}
|
|
38
|
+
|
|
39
|
+
if current > target:
|
|
40
|
+
raise RuntimeError(
|
|
41
|
+
f"Graph schema v{current} is newer than this version supports (v{target}). "
|
|
42
|
+
"Please upgrade knowledge-master."
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if not auto_migrate:
|
|
46
|
+
raise RuntimeError(
|
|
47
|
+
f"Graph schema v{current} needs migration to v{target}. Run: km upgrade"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
steps = []
|
|
51
|
+
for v in range(current + 1, target + 1):
|
|
52
|
+
migration_fn = MIGRATIONS.get(v)
|
|
53
|
+
if migration_fn:
|
|
54
|
+
migration_fn(graph)
|
|
55
|
+
steps.append(f"v{v-1} → v{v}: {migration_fn.__doc__ or 'applied'}")
|
|
56
|
+
else:
|
|
57
|
+
steps.append(f"v{v-1} → v{v}: no-op (compatible)")
|
|
58
|
+
|
|
59
|
+
set_schema_version(graph, target)
|
|
60
|
+
return {"current": current, "target": target, "migrated": True, "steps": steps}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# --- Migrations ---
|
|
64
|
+
|
|
65
|
+
def _migrate_to_v1(graph):
|
|
66
|
+
"""Add indexed_at timestamp to existing chunks missing it."""
|
|
67
|
+
graph.query("MATCH (c:Chunk) WHERE c.indexed_at IS NULL SET c.indexed_at = timestamp()")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _migrate_to_v2(graph):
|
|
71
|
+
"""Add OWNS relationships from ownership extraction."""
|
|
72
|
+
pass # OWNS edges are created by extract_ownership, no schema change needed
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _migrate_to_v3(graph):
|
|
76
|
+
"""Add lang property to IMPORTS edges and Function nodes."""
|
|
77
|
+
graph.query("MATCH (f:Function) WHERE f.lang IS NULL SET f.lang = 'python'")
|
|
78
|
+
graph.query("MATCH ()-[e:IMPORTS]->() WHERE e.lang IS NULL SET e.lang = 'python'")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _migrate_to_v4(graph):
|
|
82
|
+
"""Add content_hash to Chunk nodes for deduplication."""
|
|
83
|
+
pass # New chunks will have hash, old ones are fine without
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
MIGRATIONS[1] = _migrate_to_v1
|
|
87
|
+
MIGRATIONS[2] = _migrate_to_v2
|
|
88
|
+
MIGRATIONS[3] = _migrate_to_v3
|
|
89
|
+
MIGRATIONS[4] = _migrate_to_v4
|
|
@@ -9,7 +9,7 @@ from rich.progress import Progress
|
|
|
9
9
|
|
|
10
10
|
from .. import chunking, embeddings, store
|
|
11
11
|
from ..intelligence import extract_all
|
|
12
|
-
from ..static_analysis import
|
|
12
|
+
from ..static_analysis import build_import_graph_all
|
|
13
13
|
|
|
14
14
|
INDEXABLE_EXTENSIONS = {
|
|
15
15
|
".py", ".ts", ".tsx", ".js", ".rs", ".go", ".java",
|
|
@@ -46,6 +46,8 @@ def index_repo(repo_path: str, graph=None, branch: str = "HEAD", on_progress=Non
|
|
|
46
46
|
tracked = repo.git.ls_files().splitlines()
|
|
47
47
|
indexable = [f for f in tracked if _should_index(f)]
|
|
48
48
|
total = len(indexable)
|
|
49
|
+
indexed_files = []
|
|
50
|
+
failed_files = []
|
|
49
51
|
|
|
50
52
|
with Progress(disable=not sys.stdout.isatty()) as progress:
|
|
51
53
|
task = progress.add_task(f"Indexing {repo_name}", total=total)
|
|
@@ -53,19 +55,26 @@ def index_repo(repo_path: str, graph=None, branch: str = "HEAD", on_progress=Non
|
|
|
53
55
|
full_path = os.path.join(repo_path, filepath)
|
|
54
56
|
try:
|
|
55
57
|
_index_file(graph, full_path, filepath, repo_name, repo)
|
|
58
|
+
indexed_files.append(filepath)
|
|
56
59
|
except Exception as e:
|
|
60
|
+
failed_files.append((filepath, str(e)))
|
|
57
61
|
progress.console.print(f" [yellow]skip {filepath}: {e}[/]")
|
|
58
62
|
progress.advance(task)
|
|
59
63
|
if on_progress:
|
|
60
64
|
on_progress(i + 1, total, filepath)
|
|
61
65
|
|
|
66
|
+
# If more than 50% failed, warn (possible systemic issue)
|
|
67
|
+
if total > 0 and len(failed_files) > total * 0.5:
|
|
68
|
+
import sys as _sys
|
|
69
|
+
print(f"WARNING: {len(failed_files)}/{total} files failed. Possible systemic issue.", file=_sys.stderr)
|
|
70
|
+
|
|
62
71
|
# Run intelligence extraction
|
|
63
72
|
intel = extract_all(repo_path, graph)
|
|
64
73
|
|
|
65
|
-
# Run static analysis (import graph, symbols)
|
|
66
|
-
static =
|
|
74
|
+
# Run static analysis (import graph, symbols) — all languages
|
|
75
|
+
static = build_import_graph_all(repo_path, graph)
|
|
67
76
|
|
|
68
|
-
return {"repo": repo_name, "files_indexed":
|
|
77
|
+
return {"repo": repo_name, "files_indexed": len(indexed_files), "files_failed": len(failed_files), "intelligence": intel, "static_analysis": static}
|
|
69
78
|
|
|
70
79
|
|
|
71
80
|
def _should_index(filepath: str) -> bool:
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""Static analysis — extract import graphs, symbols, and call relationships from code."""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_python_graph(file_path: str) -> dict:
|
|
9
|
+
"""Extract imports, exports (top-level functions/classes), and calls from a Python file."""
|
|
10
|
+
try:
|
|
11
|
+
source = Path(file_path).read_text(errors="ignore")
|
|
12
|
+
tree = ast.parse(source)
|
|
13
|
+
except (SyntaxError, ValueError):
|
|
14
|
+
return {"imports": [], "exports": [], "calls": [], "path": file_path}
|
|
15
|
+
|
|
16
|
+
imports = []
|
|
17
|
+
exports = []
|
|
18
|
+
calls = []
|
|
19
|
+
|
|
20
|
+
for node in ast.walk(tree):
|
|
21
|
+
if isinstance(node, ast.Import):
|
|
22
|
+
for alias in node.names:
|
|
23
|
+
imports.append({"module": alias.name, "alias": alias.asname, "names": []})
|
|
24
|
+
elif isinstance(node, ast.ImportFrom):
|
|
25
|
+
imports.append({
|
|
26
|
+
"module": node.module or "",
|
|
27
|
+
"names": [a.name for a in node.names],
|
|
28
|
+
"level": node.level,
|
|
29
|
+
})
|
|
30
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
31
|
+
if node.col_offset == 0: # top-level function
|
|
32
|
+
exports.append({"name": node.name, "type": "function", "line": node.lineno})
|
|
33
|
+
elif isinstance(node, ast.ClassDef):
|
|
34
|
+
if node.col_offset == 0: # top-level class
|
|
35
|
+
bases = [_node_name(b) for b in node.bases]
|
|
36
|
+
exports.append({"name": node.name, "type": "class", "line": node.lineno, "bases": bases})
|
|
37
|
+
|
|
38
|
+
return {"imports": imports, "exports": exports, "calls": calls, "path": file_path}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def resolve_import(module: str, level: int, source_file: str, repo_root: str) -> str | None:
|
|
42
|
+
"""Resolve an import to a file path within the repo."""
|
|
43
|
+
if level > 0:
|
|
44
|
+
# Relative import: go up `level` directories from source file's package
|
|
45
|
+
source_dir = Path(os.path.join(repo_root, source_file)).parent
|
|
46
|
+
for _ in range(level - 1):
|
|
47
|
+
source_dir = source_dir.parent
|
|
48
|
+
parts = module.split(".") if module else []
|
|
49
|
+
candidate = source_dir / Path(*parts) if parts else source_dir
|
|
50
|
+
else:
|
|
51
|
+
# Absolute import — check if it's a local module
|
|
52
|
+
parts = module.split(".")
|
|
53
|
+
candidate = Path(repo_root) / Path(*parts)
|
|
54
|
+
|
|
55
|
+
# Try as module.py or package/__init__.py
|
|
56
|
+
as_file = str(candidate) + ".py"
|
|
57
|
+
as_pkg = str(candidate / "__init__.py")
|
|
58
|
+
|
|
59
|
+
if os.path.exists(as_file):
|
|
60
|
+
return os.path.relpath(as_file, repo_root)
|
|
61
|
+
if os.path.exists(as_pkg):
|
|
62
|
+
return os.path.relpath(as_pkg, repo_root)
|
|
63
|
+
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def build_import_graph(repo_path: str, graph):
|
|
68
|
+
"""Walk a repo, extract Python imports, store as IMPORTS edges between File nodes."""
|
|
69
|
+
repo_path = str(Path(repo_path).resolve())
|
|
70
|
+
repo_name = Path(repo_path).name
|
|
71
|
+
py_files = list(Path(repo_path).rglob("*.py"))
|
|
72
|
+
py_files = [f for f in py_files if not any(
|
|
73
|
+
p in f.parts for p in (".venv", "venv", "node_modules", "__pycache__", ".git", "site-packages")
|
|
74
|
+
)]
|
|
75
|
+
|
|
76
|
+
file_exports = {} # relative_path -> [exported symbols]
|
|
77
|
+
file_imports = {} # relative_path -> [import info]
|
|
78
|
+
|
|
79
|
+
# Pass 1: collect exports and imports
|
|
80
|
+
for py_file in py_files:
|
|
81
|
+
relative = os.path.relpath(str(py_file), repo_path)
|
|
82
|
+
result = extract_python_graph(str(py_file))
|
|
83
|
+
file_exports[relative] = result["exports"]
|
|
84
|
+
file_imports[relative] = result["imports"]
|
|
85
|
+
|
|
86
|
+
# Store Function/Class nodes
|
|
87
|
+
for export in result["exports"]:
|
|
88
|
+
node_type = "Function" if export["type"] == "function" else "Class"
|
|
89
|
+
graph.query(
|
|
90
|
+
f"MERGE (s:{node_type} {{name: $name, file: $file, repo: $repo}}) SET s.line = $line",
|
|
91
|
+
params={"name": export["name"], "file": relative, "repo": repo_name, "line": export["line"]},
|
|
92
|
+
)
|
|
93
|
+
# Link symbol to file
|
|
94
|
+
graph.query(
|
|
95
|
+
f"""MATCH (s:{node_type} {{name: $name, file: $file}}), (d:Document {{path: $file}})
|
|
96
|
+
MERGE (s)-[:DEFINED_IN]->(d)""",
|
|
97
|
+
params={"name": export["name"], "file": relative},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Pass 2: resolve imports to file paths, create IMPORTS edges
|
|
101
|
+
edges_created = 0
|
|
102
|
+
for source_file, imports in file_imports.items():
|
|
103
|
+
for imp in imports:
|
|
104
|
+
module = imp.get("module", "")
|
|
105
|
+
level = imp.get("level", 0)
|
|
106
|
+
names = imp.get("names", [])
|
|
107
|
+
|
|
108
|
+
if module:
|
|
109
|
+
# from module import X or import module
|
|
110
|
+
target_file = resolve_import(module, level, source_file, repo_path)
|
|
111
|
+
if target_file and target_file in file_exports:
|
|
112
|
+
graph.query(
|
|
113
|
+
"""MERGE (src:Document {path: $src})
|
|
114
|
+
MERGE (dst:Document {path: $dst})
|
|
115
|
+
MERGE (src)-[:IMPORTS {names: $names}]->(dst)""",
|
|
116
|
+
params={"src": source_file, "dst": target_file, "names": names},
|
|
117
|
+
)
|
|
118
|
+
edges_created += 1
|
|
119
|
+
elif level > 0 and names:
|
|
120
|
+
# from . import module1, module2 — each name is a sibling module
|
|
121
|
+
for name in names:
|
|
122
|
+
target_file = resolve_import(name, level, source_file, repo_path)
|
|
123
|
+
if target_file and target_file in file_exports:
|
|
124
|
+
graph.query(
|
|
125
|
+
"""MERGE (src:Document {path: $src})
|
|
126
|
+
MERGE (dst:Document {path: $dst})
|
|
127
|
+
MERGE (src)-[:IMPORTS {names: $imp_names}]->(dst)""",
|
|
128
|
+
params={"src": source_file, "dst": target_file, "imp_names": [name]},
|
|
129
|
+
)
|
|
130
|
+
edges_created += 1
|
|
131
|
+
|
|
132
|
+
return {"files_analyzed": len(py_files), "import_edges": edges_created, "symbols": sum(len(v) for v in file_exports.values())}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _node_name(node) -> str:
|
|
136
|
+
"""Get string name from an AST node."""
|
|
137
|
+
if isinstance(node, ast.Name):
|
|
138
|
+
return node.id
|
|
139
|
+
elif isinstance(node, ast.Attribute):
|
|
140
|
+
return f"{_node_name(node.value)}.{node.attr}"
|
|
141
|
+
return ""
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def build_import_graph_all(repo_path: str, graph):
|
|
145
|
+
"""Build import graph for all supported languages in the repo."""
|
|
146
|
+
repo_path = str(Path(repo_path).resolve())
|
|
147
|
+
results = {"python": {}, "typescript": {}, "go": {}, "rust": {}}
|
|
148
|
+
|
|
149
|
+
# Python (AST-based)
|
|
150
|
+
results["python"] = build_import_graph(repo_path, graph)
|
|
151
|
+
|
|
152
|
+
# TypeScript/JavaScript (tree-sitter)
|
|
153
|
+
results["typescript"] = _build_ts_import_graph(repo_path, graph)
|
|
154
|
+
|
|
155
|
+
# Go (tree-sitter)
|
|
156
|
+
results["go"] = _build_go_import_graph(repo_path, graph)
|
|
157
|
+
|
|
158
|
+
# Rust (tree-sitter)
|
|
159
|
+
results["rust"] = _build_rust_import_graph(repo_path, graph)
|
|
160
|
+
|
|
161
|
+
total_edges = sum(r.get("import_edges", 0) for r in results.values())
|
|
162
|
+
total_symbols = sum(r.get("symbols", 0) for r in results.values())
|
|
163
|
+
total_files = sum(r.get("files_analyzed", 0) for r in results.values())
|
|
164
|
+
|
|
165
|
+
return {"files_analyzed": total_files, "import_edges": total_edges, "symbols": total_symbols, "by_language": results}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _build_ts_import_graph(repo_path: str, graph) -> dict:
|
|
169
|
+
"""Build import graph for TypeScript/JavaScript files."""
|
|
170
|
+
from .ts_parsers import extract_typescript_graph, resolve_ts_import
|
|
171
|
+
|
|
172
|
+
skip = {".git", "node_modules", "dist", "build", ".venv", "__pycache__"}
|
|
173
|
+
ts_files = []
|
|
174
|
+
for ext in (".ts", ".tsx", ".js", ".jsx"):
|
|
175
|
+
for f in Path(repo_path).rglob(f"*{ext}"):
|
|
176
|
+
if not any(p in f.parts for p in skip):
|
|
177
|
+
ts_files.append(f)
|
|
178
|
+
|
|
179
|
+
if not ts_files:
|
|
180
|
+
return {"files_analyzed": 0, "import_edges": 0, "symbols": 0}
|
|
181
|
+
|
|
182
|
+
edges = 0
|
|
183
|
+
symbols = 0
|
|
184
|
+
for ts_file in ts_files:
|
|
185
|
+
relative = os.path.relpath(str(ts_file), repo_path)
|
|
186
|
+
try:
|
|
187
|
+
result = extract_typescript_graph(str(ts_file))
|
|
188
|
+
except Exception:
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
# Store exports as symbols
|
|
192
|
+
for export in result["exports"]:
|
|
193
|
+
graph.query(
|
|
194
|
+
"MERGE (f:Function {name: $name, file: $file}) SET f.line = $line, f.lang = 'typescript'",
|
|
195
|
+
params={"name": export["name"], "file": relative, "line": export.get("line", 0)},
|
|
196
|
+
)
|
|
197
|
+
symbols += 1
|
|
198
|
+
|
|
199
|
+
# Resolve imports and create edges
|
|
200
|
+
for imp in result["imports"]:
|
|
201
|
+
target = resolve_ts_import(imp["module"], relative, repo_path)
|
|
202
|
+
if target:
|
|
203
|
+
graph.query(
|
|
204
|
+
"""MERGE (src:Document {path: $src})
|
|
205
|
+
MERGE (dst:Document {path: $dst})
|
|
206
|
+
MERGE (src)-[:IMPORTS {names: $names, lang: 'typescript'}]->(dst)""",
|
|
207
|
+
params={"src": relative, "dst": target, "names": imp.get("names", [])},
|
|
208
|
+
)
|
|
209
|
+
edges += 1
|
|
210
|
+
|
|
211
|
+
return {"files_analyzed": len(ts_files), "import_edges": edges, "symbols": symbols}
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _build_go_import_graph(repo_path: str, graph) -> dict:
|
|
215
|
+
"""Build import graph for Go files."""
|
|
216
|
+
from .ts_parsers import extract_go_graph, resolve_go_import
|
|
217
|
+
|
|
218
|
+
skip = {".git", "vendor", "node_modules"}
|
|
219
|
+
go_files = [f for f in Path(repo_path).rglob("*.go") if not any(p in f.parts for p in skip)]
|
|
220
|
+
|
|
221
|
+
if not go_files:
|
|
222
|
+
return {"files_analyzed": 0, "import_edges": 0, "symbols": 0}
|
|
223
|
+
|
|
224
|
+
# Read go.mod for module name
|
|
225
|
+
go_module = ""
|
|
226
|
+
gomod = Path(repo_path) / "go.mod"
|
|
227
|
+
if gomod.exists():
|
|
228
|
+
for line in gomod.read_text().splitlines():
|
|
229
|
+
if line.startswith("module "):
|
|
230
|
+
go_module = line.split()[1]
|
|
231
|
+
break
|
|
232
|
+
|
|
233
|
+
edges = 0
|
|
234
|
+
symbols = 0
|
|
235
|
+
for go_file in go_files:
|
|
236
|
+
relative = os.path.relpath(str(go_file), repo_path)
|
|
237
|
+
try:
|
|
238
|
+
result = extract_go_graph(str(go_file))
|
|
239
|
+
except Exception:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
for export in result["exports"]:
|
|
243
|
+
graph.query(
|
|
244
|
+
"MERGE (f:Function {name: $name, file: $file}) SET f.line = $line, f.lang = 'go'",
|
|
245
|
+
params={"name": export["name"], "file": relative, "line": export.get("line", 0)},
|
|
246
|
+
)
|
|
247
|
+
symbols += 1
|
|
248
|
+
|
|
249
|
+
for imp in result["imports"]:
|
|
250
|
+
target = resolve_go_import(imp["module"], repo_path, go_module)
|
|
251
|
+
if target:
|
|
252
|
+
graph.query(
|
|
253
|
+
"""MERGE (src:Document {path: $src})
|
|
254
|
+
MERGE (dst:Document {path: $dst})
|
|
255
|
+
MERGE (src)-[:IMPORTS {names: $names, lang: 'go'}]->(dst)""",
|
|
256
|
+
params={"src": relative, "dst": target, "names": []},
|
|
257
|
+
)
|
|
258
|
+
edges += 1
|
|
259
|
+
|
|
260
|
+
return {"files_analyzed": len(go_files), "import_edges": edges, "symbols": symbols}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _build_rust_import_graph(repo_path: str, graph) -> dict:
|
|
264
|
+
"""Build import graph for Rust files."""
|
|
265
|
+
from .ts_parsers import extract_rust_graph
|
|
266
|
+
|
|
267
|
+
skip = {".git", "target", "node_modules"}
|
|
268
|
+
rs_files = [f for f in Path(repo_path).rglob("*.rs") if not any(p in f.parts for p in skip)]
|
|
269
|
+
|
|
270
|
+
if not rs_files:
|
|
271
|
+
return {"files_analyzed": 0, "import_edges": 0, "symbols": 0}
|
|
272
|
+
|
|
273
|
+
edges = 0
|
|
274
|
+
symbols = 0
|
|
275
|
+
for rs_file in rs_files:
|
|
276
|
+
relative = os.path.relpath(str(rs_file), repo_path)
|
|
277
|
+
try:
|
|
278
|
+
result = extract_rust_graph(str(rs_file))
|
|
279
|
+
except Exception:
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
for export in result["exports"]:
|
|
283
|
+
graph.query(
|
|
284
|
+
"MERGE (f:Function {name: $name, file: $file}) SET f.line = $line, f.lang = 'rust'",
|
|
285
|
+
params={"name": export["name"], "file": relative, "line": export.get("line", 0)},
|
|
286
|
+
)
|
|
287
|
+
symbols += 1
|
|
288
|
+
|
|
289
|
+
# Rust mod resolution: mod foo -> foo.rs or foo/mod.rs
|
|
290
|
+
for imp in result["imports"]:
|
|
291
|
+
if imp.get("is_mod"):
|
|
292
|
+
mod_name = imp["module"]
|
|
293
|
+
src_dir = Path(os.path.join(repo_path, relative)).parent
|
|
294
|
+
for candidate in [src_dir / f"{mod_name}.rs", src_dir / mod_name / "mod.rs"]:
|
|
295
|
+
if candidate.exists():
|
|
296
|
+
target = os.path.relpath(str(candidate), repo_path)
|
|
297
|
+
graph.query(
|
|
298
|
+
"""MERGE (src:Document {path: $src})
|
|
299
|
+
MERGE (dst:Document {path: $dst})
|
|
300
|
+
MERGE (src)-[:IMPORTS {names: [], lang: 'rust'}]->(dst)""",
|
|
301
|
+
params={"src": relative, "dst": target},
|
|
302
|
+
)
|
|
303
|
+
edges += 1
|
|
304
|
+
break
|
|
305
|
+
|
|
306
|
+
return {"files_analyzed": len(rs_files), "import_edges": edges, "symbols": symbols}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""FalkorDB graph store - nodes, edges, vector search, and graph traversal."""
|
|
2
2
|
|
|
3
|
+
import hashlib
|
|
4
|
+
|
|
3
5
|
from falkordb import FalkorDB
|
|
4
6
|
|
|
5
7
|
GRAPH_NAME = "knowledge"
|
|
@@ -7,11 +9,35 @@ GRAPH_NAME = "knowledge"
|
|
|
7
9
|
# Vector dimension for nomic-embed-text
|
|
8
10
|
VECTOR_DIM = 768
|
|
9
11
|
|
|
12
|
+
_graph_instance = None
|
|
13
|
+
|
|
10
14
|
|
|
11
15
|
def get_graph(host: str = "localhost", port: int = 6379):
|
|
12
|
-
"""Get FalkorDB graph instance."""
|
|
16
|
+
"""Get FalkorDB graph instance with schema version check."""
|
|
17
|
+
global _graph_instance
|
|
18
|
+
if _graph_instance is not None:
|
|
19
|
+
return _graph_instance
|
|
20
|
+
|
|
13
21
|
db = FalkorDB(host=host, port=port)
|
|
14
|
-
|
|
22
|
+
graph = db.select_graph(GRAPH_NAME)
|
|
23
|
+
|
|
24
|
+
# Check and auto-migrate schema
|
|
25
|
+
from .migrations import check_and_migrate
|
|
26
|
+
check_and_migrate(graph, auto_migrate=True)
|
|
27
|
+
|
|
28
|
+
_graph_instance = graph
|
|
29
|
+
return graph
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def reset_graph_instance():
|
|
33
|
+
"""Reset cached graph instance (for testing)."""
|
|
34
|
+
global _graph_instance
|
|
35
|
+
_graph_instance = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def content_hash(text: str) -> str:
|
|
39
|
+
"""Compute content hash for deduplication."""
|
|
40
|
+
return hashlib.sha256(text.encode()).hexdigest()[:16]
|
|
15
41
|
|
|
16
42
|
|
|
17
43
|
def init_schema(graph):
|
|
@@ -35,20 +61,32 @@ def init_schema(graph):
|
|
|
35
61
|
|
|
36
62
|
|
|
37
63
|
def upsert_chunk(graph, chunk_id: str, text: str, embedding: list[float], metadata: dict):
|
|
38
|
-
"""Insert or update a chunk node with embedding."""
|
|
64
|
+
"""Insert or update a chunk node with embedding. Skips if content unchanged (dedup)."""
|
|
65
|
+
chash = content_hash(text)
|
|
66
|
+
|
|
67
|
+
# Check if chunk exists with same content hash — skip if unchanged
|
|
68
|
+
existing = graph.query(
|
|
69
|
+
"MATCH (c:Chunk {id: $id}) RETURN c.content_hash",
|
|
70
|
+
params={"id": chunk_id},
|
|
71
|
+
)
|
|
72
|
+
if existing.result_set and existing.result_set[0][0] == chash:
|
|
73
|
+
return False # skip — content unchanged
|
|
74
|
+
|
|
39
75
|
graph.query(
|
|
40
76
|
"""MERGE (c:Chunk {id: $id})
|
|
41
77
|
SET c.text = $text, c.embedding = vecf32($embedding),
|
|
42
78
|
c.source = $source, c.source_type = $source_type,
|
|
43
|
-
c.indexed_at = timestamp()""",
|
|
79
|
+
c.content_hash = $hash, c.indexed_at = timestamp()""",
|
|
44
80
|
params={
|
|
45
81
|
"id": chunk_id,
|
|
46
82
|
"text": text,
|
|
47
83
|
"embedding": embedding,
|
|
48
84
|
"source": metadata.get("source", ""),
|
|
49
85
|
"source_type": metadata.get("source_type", ""),
|
|
86
|
+
"hash": chash,
|
|
50
87
|
},
|
|
51
88
|
)
|
|
89
|
+
return True # inserted/updated
|
|
52
90
|
|
|
53
91
|
|
|
54
92
|
def upsert_document(graph, path: str, doc_type: str, metadata: dict):
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Tree-sitter based import graph extraction for TypeScript, Go, and Rust."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from tree_sitter import Language, Parser
|
|
7
|
+
import tree_sitter_typescript as ts_ts
|
|
8
|
+
import tree_sitter_go as ts_go
|
|
9
|
+
import tree_sitter_rust as ts_rust
|
|
10
|
+
import tree_sitter_javascript as ts_js
|
|
11
|
+
|
|
12
|
+
# Initialize languages
|
|
13
|
+
TYPESCRIPT = Language(ts_ts.language_typescript())
|
|
14
|
+
TSX = Language(ts_ts.language_tsx())
|
|
15
|
+
JAVASCRIPT = Language(ts_js.language())
|
|
16
|
+
GO = Language(ts_go.language())
|
|
17
|
+
RUST = Language(ts_rust.language())
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_typescript_graph(file_path: str) -> dict:
|
|
21
|
+
"""Extract imports and exports from a TypeScript/JavaScript file."""
|
|
22
|
+
source = Path(file_path).read_bytes()
|
|
23
|
+
lang = TSX if file_path.endswith(".tsx") else (JAVASCRIPT if file_path.endswith(".js") else TYPESCRIPT)
|
|
24
|
+
parser = Parser(lang)
|
|
25
|
+
tree = parser.parse(source)
|
|
26
|
+
|
|
27
|
+
imports = []
|
|
28
|
+
exports = []
|
|
29
|
+
|
|
30
|
+
for node in _walk(tree.root_node):
|
|
31
|
+
# import { X } from './module' | import X from 'module'
|
|
32
|
+
if node.type == "import_statement":
|
|
33
|
+
source_node = node.child_by_field_name("source")
|
|
34
|
+
if source_node:
|
|
35
|
+
module = source_node.text.decode().strip("'\"")
|
|
36
|
+
names = []
|
|
37
|
+
for child in node.children:
|
|
38
|
+
if child.type == "import_clause":
|
|
39
|
+
for spec in _walk(child):
|
|
40
|
+
if spec.type == "identifier":
|
|
41
|
+
names.append(spec.text.decode())
|
|
42
|
+
elif spec.type == "import_specifier":
|
|
43
|
+
name_node = spec.child_by_field_name("name")
|
|
44
|
+
if name_node:
|
|
45
|
+
names.append(name_node.text.decode())
|
|
46
|
+
imports.append({"module": module, "names": names})
|
|
47
|
+
|
|
48
|
+
# require('module')
|
|
49
|
+
elif node.type == "call_expression":
|
|
50
|
+
func = node.child_by_field_name("function")
|
|
51
|
+
if func and func.text == b"require":
|
|
52
|
+
args = node.child_by_field_name("arguments")
|
|
53
|
+
if args and args.child_count > 1:
|
|
54
|
+
arg = args.children[1]
|
|
55
|
+
if arg.type == "string":
|
|
56
|
+
imports.append({"module": arg.text.decode().strip("'\""), "names": []})
|
|
57
|
+
|
|
58
|
+
# export function/class/const
|
|
59
|
+
elif node.type in ("export_statement", "export_default_declaration"):
|
|
60
|
+
decl = node.child_by_field_name("declaration")
|
|
61
|
+
if decl:
|
|
62
|
+
name_node = decl.child_by_field_name("name")
|
|
63
|
+
if name_node:
|
|
64
|
+
exports.append({"name": name_node.text.decode(), "type": decl.type, "line": decl.start_point[0] + 1})
|
|
65
|
+
# export { x, y }
|
|
66
|
+
for child in node.children:
|
|
67
|
+
if child.type == "export_clause":
|
|
68
|
+
for spec in _walk(child):
|
|
69
|
+
if spec.type == "export_specifier":
|
|
70
|
+
name_node = spec.child_by_field_name("name")
|
|
71
|
+
if name_node:
|
|
72
|
+
exports.append({"name": name_node.text.decode(), "type": "re-export", "line": spec.start_point[0] + 1})
|
|
73
|
+
|
|
74
|
+
# Top-level function/class declarations
|
|
75
|
+
elif node.type in ("function_declaration", "class_declaration") and node.parent.type in ("program", "export_statement"):
|
|
76
|
+
name_node = node.child_by_field_name("name")
|
|
77
|
+
if name_node:
|
|
78
|
+
exports.append({"name": name_node.text.decode(), "type": node.type, "line": node.start_point[0] + 1})
|
|
79
|
+
|
|
80
|
+
return {"imports": imports, "exports": exports, "path": file_path}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def extract_go_graph(file_path: str) -> dict:
|
|
84
|
+
"""Extract imports and exports from a Go file."""
|
|
85
|
+
source = Path(file_path).read_bytes()
|
|
86
|
+
parser = Parser(GO)
|
|
87
|
+
tree = parser.parse(source)
|
|
88
|
+
|
|
89
|
+
imports = []
|
|
90
|
+
exports = []
|
|
91
|
+
|
|
92
|
+
for node in _walk(tree.root_node):
|
|
93
|
+
# import "pkg" or import ( "pkg1"; "pkg2" )
|
|
94
|
+
if node.type == "import_declaration":
|
|
95
|
+
for child in _walk(node):
|
|
96
|
+
if child.type == "import_spec":
|
|
97
|
+
path_node = child.child_by_field_name("path")
|
|
98
|
+
if path_node:
|
|
99
|
+
module = path_node.text.decode().strip('"')
|
|
100
|
+
imports.append({"module": module, "names": []})
|
|
101
|
+
elif child.type == "interpreted_string_literal":
|
|
102
|
+
imports.append({"module": child.text.decode().strip('"'), "names": []})
|
|
103
|
+
|
|
104
|
+
# Exported functions (capitalized)
|
|
105
|
+
elif node.type == "function_declaration":
|
|
106
|
+
name_node = node.child_by_field_name("name")
|
|
107
|
+
if name_node:
|
|
108
|
+
name = name_node.text.decode()
|
|
109
|
+
if name[0].isupper(): # Go exports are capitalized
|
|
110
|
+
exports.append({"name": name, "type": "function", "line": node.start_point[0] + 1})
|
|
111
|
+
|
|
112
|
+
# Exported types
|
|
113
|
+
elif node.type == "type_declaration":
|
|
114
|
+
for spec in node.children:
|
|
115
|
+
if spec.type == "type_spec":
|
|
116
|
+
name_node = spec.child_by_field_name("name")
|
|
117
|
+
if name_node:
|
|
118
|
+
name = name_node.text.decode()
|
|
119
|
+
if name[0].isupper():
|
|
120
|
+
exports.append({"name": name, "type": "type", "line": spec.start_point[0] + 1})
|
|
121
|
+
|
|
122
|
+
return {"imports": imports, "exports": exports, "path": file_path}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def extract_rust_graph(file_path: str) -> dict:
|
|
126
|
+
"""Extract use statements and pub items from a Rust file."""
|
|
127
|
+
source = Path(file_path).read_bytes()
|
|
128
|
+
parser = Parser(RUST)
|
|
129
|
+
tree = parser.parse(source)
|
|
130
|
+
|
|
131
|
+
imports = []
|
|
132
|
+
exports = []
|
|
133
|
+
|
|
134
|
+
for node in _walk(tree.root_node):
|
|
135
|
+
# use std::collections::HashMap; | use crate::module::Item;
|
|
136
|
+
if node.type == "use_declaration":
|
|
137
|
+
path_text = ""
|
|
138
|
+
for child in _walk(node):
|
|
139
|
+
if child.type in ("scoped_identifier", "identifier", "use_wildcard", "scoped_use_list"):
|
|
140
|
+
path_text = child.text.decode()
|
|
141
|
+
break
|
|
142
|
+
if path_text:
|
|
143
|
+
module = path_text.split("::")[0]
|
|
144
|
+
names = path_text.split("::")[-1:] if "::" in path_text else []
|
|
145
|
+
imports.append({"module": module, "path": path_text, "names": names})
|
|
146
|
+
|
|
147
|
+
# mod declarations
|
|
148
|
+
elif node.type == "mod_item":
|
|
149
|
+
name_node = node.child_by_field_name("name")
|
|
150
|
+
if name_node:
|
|
151
|
+
imports.append({"module": name_node.text.decode(), "names": [], "is_mod": True})
|
|
152
|
+
|
|
153
|
+
# pub fn / pub struct / pub enum
|
|
154
|
+
elif node.type in ("function_item", "struct_item", "enum_item", "impl_item"):
|
|
155
|
+
is_pub = any(c.type == "visibility_modifier" for c in node.children)
|
|
156
|
+
name_node = node.child_by_field_name("name")
|
|
157
|
+
if name_node and is_pub:
|
|
158
|
+
exports.append({"name": name_node.text.decode(), "type": node.type.replace("_item", ""), "line": node.start_point[0] + 1})
|
|
159
|
+
|
|
160
|
+
return {"imports": imports, "exports": exports, "path": file_path}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def resolve_ts_import(module: str, source_file: str, repo_root: str) -> str | None:
|
|
164
|
+
"""Resolve a TypeScript/JS import to a file path."""
|
|
165
|
+
if not module.startswith("."):
|
|
166
|
+
return None # external package
|
|
167
|
+
|
|
168
|
+
source_dir = Path(os.path.join(repo_root, source_file)).parent
|
|
169
|
+
candidate = source_dir / module
|
|
170
|
+
|
|
171
|
+
for suffix in [".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"]:
|
|
172
|
+
path = str(candidate) + suffix
|
|
173
|
+
if os.path.exists(path):
|
|
174
|
+
return os.path.relpath(path, repo_root)
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def resolve_go_import(module: str, repo_root: str, go_module: str = "") -> str | None:
|
|
179
|
+
"""Resolve a Go import to a directory in the repo."""
|
|
180
|
+
if go_module and module.startswith(go_module):
|
|
181
|
+
relative = module[len(go_module):].lstrip("/")
|
|
182
|
+
candidate = os.path.join(repo_root, relative)
|
|
183
|
+
if os.path.isdir(candidate):
|
|
184
|
+
return relative
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _walk(node):
|
|
189
|
+
"""Recursively walk tree-sitter nodes."""
|
|
190
|
+
yield node
|
|
191
|
+
for child in node.children:
|
|
192
|
+
yield from _walk(child)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowledge-master
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Local-first knowledge graph for developers. Your AI agent's permanent memory.
|
|
5
5
|
Author: Milenko Mitrovic
|
|
6
6
|
License: MIT
|
|
@@ -28,6 +28,11 @@ Requires-Dist: rich<15.0,>=14.0.0
|
|
|
28
28
|
Requires-Dist: fastapi<1.0,>=0.115.0
|
|
29
29
|
Requires-Dist: uvicorn<1.0,>=0.34.0
|
|
30
30
|
Requires-Dist: pyyaml>=6.0
|
|
31
|
+
Requires-Dist: tree-sitter>=0.23.0
|
|
32
|
+
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
33
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
34
|
+
Requires-Dist: tree-sitter-go>=0.23.0
|
|
35
|
+
Requires-Dist: tree-sitter-rust>=0.23.0
|
|
31
36
|
Provides-Extra: office
|
|
32
37
|
Requires-Dist: python-docx<2.0,>=1.1.0; extra == "office"
|
|
33
38
|
Requires-Dist: openpyxl<4.0,>=3.1.0; extra == "office"
|
|
@@ -9,10 +9,12 @@ knowledge_master/cli.py
|
|
|
9
9
|
knowledge_master/connectors.py
|
|
10
10
|
knowledge_master/embeddings.py
|
|
11
11
|
knowledge_master/intelligence.py
|
|
12
|
+
knowledge_master/migrations.py
|
|
12
13
|
knowledge_master/rerank.py
|
|
13
14
|
knowledge_master/server.py
|
|
14
15
|
knowledge_master/static_analysis.py
|
|
15
16
|
knowledge_master/store.py
|
|
17
|
+
knowledge_master/ts_parsers.py
|
|
16
18
|
knowledge_master/watcher.py
|
|
17
19
|
knowledge_master/web.py
|
|
18
20
|
knowledge_master.egg-info/PKG-INFO
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "knowledge-master"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
description = "Local-first knowledge graph for developers. Your AI agent's permanent memory."
|
|
5
5
|
requires-python = ">=3.11"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -27,6 +27,11 @@ dependencies = [
|
|
|
27
27
|
"fastapi>=0.115.0,<1.0",
|
|
28
28
|
"uvicorn>=0.34.0,<1.0",
|
|
29
29
|
"pyyaml>=6.0",
|
|
30
|
+
"tree-sitter>=0.23.0",
|
|
31
|
+
"tree-sitter-javascript>=0.23.0",
|
|
32
|
+
"tree-sitter-typescript>=0.23.0",
|
|
33
|
+
"tree-sitter-go>=0.23.0",
|
|
34
|
+
"tree-sitter-rust>=0.23.0",
|
|
30
35
|
]
|
|
31
36
|
|
|
32
37
|
[project.optional-dependencies]
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
"""Static analysis — extract import graphs, symbols, and call relationships from code."""
|
|
2
|
-
|
|
3
|
-
import ast
|
|
4
|
-
import os
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def extract_python_graph(file_path: str) -> dict:
|
|
9
|
-
"""Extract imports, exports (top-level functions/classes), and calls from a Python file."""
|
|
10
|
-
try:
|
|
11
|
-
source = Path(file_path).read_text(errors="ignore")
|
|
12
|
-
tree = ast.parse(source)
|
|
13
|
-
except (SyntaxError, ValueError):
|
|
14
|
-
return {"imports": [], "exports": [], "calls": [], "path": file_path}
|
|
15
|
-
|
|
16
|
-
imports = []
|
|
17
|
-
exports = []
|
|
18
|
-
calls = []
|
|
19
|
-
|
|
20
|
-
for node in ast.walk(tree):
|
|
21
|
-
if isinstance(node, ast.Import):
|
|
22
|
-
for alias in node.names:
|
|
23
|
-
imports.append({"module": alias.name, "alias": alias.asname, "names": []})
|
|
24
|
-
elif isinstance(node, ast.ImportFrom):
|
|
25
|
-
imports.append({
|
|
26
|
-
"module": node.module or "",
|
|
27
|
-
"names": [a.name for a in node.names],
|
|
28
|
-
"level": node.level,
|
|
29
|
-
})
|
|
30
|
-
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
31
|
-
if node.col_offset == 0: # top-level function
|
|
32
|
-
exports.append({"name": node.name, "type": "function", "line": node.lineno})
|
|
33
|
-
elif isinstance(node, ast.ClassDef):
|
|
34
|
-
if node.col_offset == 0: # top-level class
|
|
35
|
-
bases = [_node_name(b) for b in node.bases]
|
|
36
|
-
exports.append({"name": node.name, "type": "class", "line": node.lineno, "bases": bases})
|
|
37
|
-
|
|
38
|
-
return {"imports": imports, "exports": exports, "calls": calls, "path": file_path}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def resolve_import(module: str, level: int, source_file: str, repo_root: str) -> str | None:
|
|
42
|
-
"""Resolve an import to a file path within the repo."""
|
|
43
|
-
if level > 0:
|
|
44
|
-
# Relative import: go up `level` directories from source file's package
|
|
45
|
-
source_dir = Path(os.path.join(repo_root, source_file)).parent
|
|
46
|
-
for _ in range(level - 1):
|
|
47
|
-
source_dir = source_dir.parent
|
|
48
|
-
parts = module.split(".") if module else []
|
|
49
|
-
candidate = source_dir / Path(*parts) if parts else source_dir
|
|
50
|
-
else:
|
|
51
|
-
# Absolute import — check if it's a local module
|
|
52
|
-
parts = module.split(".")
|
|
53
|
-
candidate = Path(repo_root) / Path(*parts)
|
|
54
|
-
|
|
55
|
-
# Try as module.py or package/__init__.py
|
|
56
|
-
as_file = str(candidate) + ".py"
|
|
57
|
-
as_pkg = str(candidate / "__init__.py")
|
|
58
|
-
|
|
59
|
-
if os.path.exists(as_file):
|
|
60
|
-
return os.path.relpath(as_file, repo_root)
|
|
61
|
-
if os.path.exists(as_pkg):
|
|
62
|
-
return os.path.relpath(as_pkg, repo_root)
|
|
63
|
-
|
|
64
|
-
return None
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def build_import_graph(repo_path: str, graph):
|
|
68
|
-
"""Walk a repo, extract Python imports, store as IMPORTS edges between File nodes."""
|
|
69
|
-
repo_path = str(Path(repo_path).resolve())
|
|
70
|
-
repo_name = Path(repo_path).name
|
|
71
|
-
py_files = list(Path(repo_path).rglob("*.py"))
|
|
72
|
-
py_files = [f for f in py_files if not any(
|
|
73
|
-
p in f.parts for p in (".venv", "venv", "node_modules", "__pycache__", ".git", "site-packages")
|
|
74
|
-
)]
|
|
75
|
-
|
|
76
|
-
file_exports = {} # relative_path -> [exported symbols]
|
|
77
|
-
file_imports = {} # relative_path -> [import info]
|
|
78
|
-
|
|
79
|
-
# Pass 1: collect exports and imports
|
|
80
|
-
for py_file in py_files:
|
|
81
|
-
relative = os.path.relpath(str(py_file), repo_path)
|
|
82
|
-
result = extract_python_graph(str(py_file))
|
|
83
|
-
file_exports[relative] = result["exports"]
|
|
84
|
-
file_imports[relative] = result["imports"]
|
|
85
|
-
|
|
86
|
-
# Store Function/Class nodes
|
|
87
|
-
for export in result["exports"]:
|
|
88
|
-
node_type = "Function" if export["type"] == "function" else "Class"
|
|
89
|
-
graph.query(
|
|
90
|
-
f"MERGE (s:{node_type} {{name: $name, file: $file, repo: $repo}}) SET s.line = $line",
|
|
91
|
-
params={"name": export["name"], "file": relative, "repo": repo_name, "line": export["line"]},
|
|
92
|
-
)
|
|
93
|
-
# Link symbol to file
|
|
94
|
-
graph.query(
|
|
95
|
-
f"""MATCH (s:{node_type} {{name: $name, file: $file}}), (d:Document {{path: $file}})
|
|
96
|
-
MERGE (s)-[:DEFINED_IN]->(d)""",
|
|
97
|
-
params={"name": export["name"], "file": relative},
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
# Pass 2: resolve imports to file paths, create IMPORTS edges
|
|
101
|
-
edges_created = 0
|
|
102
|
-
for source_file, imports in file_imports.items():
|
|
103
|
-
for imp in imports:
|
|
104
|
-
module = imp.get("module", "")
|
|
105
|
-
level = imp.get("level", 0)
|
|
106
|
-
names = imp.get("names", [])
|
|
107
|
-
|
|
108
|
-
if module:
|
|
109
|
-
# from module import X or import module
|
|
110
|
-
target_file = resolve_import(module, level, source_file, repo_path)
|
|
111
|
-
if target_file and target_file in file_exports:
|
|
112
|
-
graph.query(
|
|
113
|
-
"""MERGE (src:Document {path: $src})
|
|
114
|
-
MERGE (dst:Document {path: $dst})
|
|
115
|
-
MERGE (src)-[:IMPORTS {names: $names}]->(dst)""",
|
|
116
|
-
params={"src": source_file, "dst": target_file, "names": names},
|
|
117
|
-
)
|
|
118
|
-
edges_created += 1
|
|
119
|
-
elif level > 0 and names:
|
|
120
|
-
# from . import module1, module2 — each name is a sibling module
|
|
121
|
-
for name in names:
|
|
122
|
-
target_file = resolve_import(name, level, source_file, repo_path)
|
|
123
|
-
if target_file and target_file in file_exports:
|
|
124
|
-
graph.query(
|
|
125
|
-
"""MERGE (src:Document {path: $src})
|
|
126
|
-
MERGE (dst:Document {path: $dst})
|
|
127
|
-
MERGE (src)-[:IMPORTS {names: $imp_names}]->(dst)""",
|
|
128
|
-
params={"src": source_file, "dst": target_file, "imp_names": [name]},
|
|
129
|
-
)
|
|
130
|
-
edges_created += 1
|
|
131
|
-
|
|
132
|
-
return {"files_analyzed": len(py_files), "import_edges": edges_created, "symbols": sum(len(v) for v in file_exports.values())}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def _node_name(node) -> str:
|
|
136
|
-
"""Get string name from an AST node."""
|
|
137
|
-
if isinstance(node, ast.Name):
|
|
138
|
-
return node.id
|
|
139
|
-
elif isinstance(node, ast.Attribute):
|
|
140
|
-
return f"{_node_name(node.value)}.{node.attr}"
|
|
141
|
-
return ""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{knowledge_master-0.2.0 → knowledge_master-0.4.0}/knowledge_master.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|