feed-the-machine 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +170 -170
- package/bin/generate-manifest.mjs +463 -463
- package/bin/install.mjs +491 -491
- package/docs/HOOKS.md +243 -243
- package/docs/INBOX.md +233 -233
- package/ftm/SKILL.md +122 -122
- package/ftm-audit/SKILL.md +623 -541
- package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
- package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
- package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
- package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
- package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
- package/ftm-audit/scripts/run-knip.sh +23 -23
- package/ftm-audit.yml +2 -2
- package/ftm-brainstorm/SKILL.md +498 -498
- package/ftm-brainstorm/evals/evals.json +100 -100
- package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
- package/ftm-brainstorm/references/agent-prompts.md +224 -224
- package/ftm-brainstorm/references/plan-template.md +121 -121
- package/ftm-brainstorm.yml +2 -2
- package/ftm-browse/SKILL.md +454 -454
- package/ftm-browse/daemon/browser-manager.ts +206 -206
- package/ftm-browse/daemon/bun.lock +30 -30
- package/ftm-browse/daemon/cli.ts +347 -347
- package/ftm-browse/daemon/commands.ts +410 -410
- package/ftm-browse/daemon/main.ts +357 -357
- package/ftm-browse/daemon/package.json +17 -17
- package/ftm-browse/daemon/server.ts +189 -189
- package/ftm-browse/daemon/snapshot.ts +519 -519
- package/ftm-browse/daemon/tsconfig.json +22 -22
- package/ftm-browse.yml +4 -4
- package/ftm-capture/SKILL.md +370 -370
- package/ftm-capture.yml +4 -4
- package/ftm-codex-gate/SKILL.md +361 -361
- package/ftm-codex-gate.yml +2 -2
- package/ftm-config/SKILL.md +345 -345
- package/ftm-config.default.yml +82 -80
- package/ftm-config.yml +2 -2
- package/ftm-council/SKILL.md +416 -416
- package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
- package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
- package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
- package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
- package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
- package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
- package/ftm-council.yml +2 -2
- package/ftm-dashboard/SKILL.md +163 -163
- package/ftm-dashboard.yml +4 -4
- package/ftm-debug/SKILL.md +1037 -1037
- package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
- package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
- package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
- package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
- package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
- package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
- package/ftm-debug.yml +2 -2
- package/ftm-diagram/SKILL.md +277 -277
- package/ftm-diagram.yml +2 -2
- package/ftm-executor/SKILL.md +777 -767
- package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
- package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
- package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
- package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
- package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
- package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
- package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
- package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
- package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -44
- package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
- package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
- package/ftm-executor/runtime/package.json +8 -8
- package/ftm-executor.yml +2 -2
- package/ftm-git/SKILL.md +441 -441
- package/ftm-git/evals/evals.json +26 -26
- package/ftm-git/evals/promptfoo.yaml +75 -75
- package/ftm-git/hooks/post-commit-experience.sh +92 -92
- package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
- package/ftm-git/references/protocols/REMEDIATION.md +139 -139
- package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
- package/ftm-git.yml +2 -2
- package/ftm-inbox/backend/adapters/_retry.py +64 -64
- package/ftm-inbox/backend/adapters/base.py +230 -230
- package/ftm-inbox/backend/adapters/freshservice.py +104 -104
- package/ftm-inbox/backend/adapters/gmail.py +125 -125
- package/ftm-inbox/backend/adapters/jira.py +136 -136
- package/ftm-inbox/backend/adapters/registry.py +192 -192
- package/ftm-inbox/backend/adapters/slack.py +110 -110
- package/ftm-inbox/backend/db/connection.py +54 -54
- package/ftm-inbox/backend/db/schema.py +78 -78
- package/ftm-inbox/backend/executor/__init__.py +7 -7
- package/ftm-inbox/backend/executor/engine.py +149 -149
- package/ftm-inbox/backend/executor/step_runner.py +98 -98
- package/ftm-inbox/backend/main.py +103 -103
- package/ftm-inbox/backend/models/__init__.py +1 -1
- package/ftm-inbox/backend/models/unified_task.py +36 -36
- package/ftm-inbox/backend/planner/__init__.py +6 -6
- package/ftm-inbox/backend/planner/generator.py +127 -127
- package/ftm-inbox/backend/planner/schema.py +34 -34
- package/ftm-inbox/backend/requirements.txt +5 -5
- package/ftm-inbox/backend/routes/execute.py +186 -186
- package/ftm-inbox/backend/routes/health.py +52 -52
- package/ftm-inbox/backend/routes/inbox.py +68 -68
- package/ftm-inbox/backend/routes/plan.py +271 -271
- package/ftm-inbox/bin/launchagent.mjs +91 -91
- package/ftm-inbox/bin/setup.mjs +188 -188
- package/ftm-inbox/bin/start.sh +10 -10
- package/ftm-inbox/bin/status.sh +17 -17
- package/ftm-inbox/bin/stop.sh +8 -8
- package/ftm-inbox/config.example.yml +55 -55
- package/ftm-inbox/package-lock.json +2898 -2898
- package/ftm-inbox/package.json +26 -26
- package/ftm-inbox/postcss.config.js +6 -6
- package/ftm-inbox/src/app.css +199 -199
- package/ftm-inbox/src/app.html +18 -18
- package/ftm-inbox/src/lib/api.ts +166 -166
- package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
- package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
- package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
- package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
- package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
- package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
- package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
- package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
- package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
- package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
- package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
- package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
- package/ftm-inbox/src/lib/theme.ts +47 -47
- package/ftm-inbox/src/routes/+layout.svelte +76 -76
- package/ftm-inbox/src/routes/+page.svelte +401 -401
- package/ftm-inbox/svelte.config.js +12 -12
- package/ftm-inbox/tailwind.config.ts +63 -63
- package/ftm-inbox/tsconfig.json +13 -13
- package/ftm-inbox/vite.config.ts +6 -6
- package/ftm-intent/SKILL.md +241 -241
- package/ftm-intent.yml +2 -2
- package/ftm-manifest.json +3794 -3794
- package/ftm-map/SKILL.md +291 -291
- package/ftm-map/scripts/db.py +712 -712
- package/ftm-map/scripts/index.py +415 -415
- package/ftm-map/scripts/parser.py +224 -224
- package/ftm-map/scripts/queries/go-tags.scm +20 -20
- package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
- package/ftm-map/scripts/queries/python-tags.scm +31 -31
- package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
- package/ftm-map/scripts/queries/rust-tags.scm +37 -37
- package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
- package/ftm-map/scripts/query.py +301 -301
- package/ftm-map/scripts/ranker.py +377 -377
- package/ftm-map/scripts/requirements.txt +5 -5
- package/ftm-map/scripts/setup-hooks.sh +27 -27
- package/ftm-map/scripts/setup.sh +56 -56
- package/ftm-map/scripts/test_db.py +364 -364
- package/ftm-map/scripts/test_parser.py +174 -174
- package/ftm-map/scripts/test_query.py +183 -183
- package/ftm-map/scripts/test_ranker.py +199 -199
- package/ftm-map/scripts/views.py +591 -591
- package/ftm-map.yml +2 -2
- package/ftm-mind/SKILL.md +1943 -1943
- package/ftm-mind/evals/promptfoo.yaml +142 -142
- package/ftm-mind/references/blackboard-schema.md +328 -328
- package/ftm-mind/references/complexity-guide.md +110 -110
- package/ftm-mind/references/event-registry.md +319 -319
- package/ftm-mind/references/mcp-inventory.md +296 -296
- package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
- package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
- package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
- package/ftm-mind/references/reflexion-protocol.md +249 -249
- package/ftm-mind/references/routing/SCENARIOS.md +22 -22
- package/ftm-mind/references/routing-scenarios.md +35 -35
- package/ftm-mind.yml +2 -2
- package/ftm-pause/SKILL.md +395 -395
- package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
- package/ftm-pause/references/protocols/VALIDATION.md +80 -80
- package/ftm-pause.yml +2 -2
- package/ftm-researcher/SKILL.md +275 -275
- package/ftm-researcher/evals/agent-diversity.yaml +17 -17
- package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
- package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
- package/ftm-researcher/references/adaptive-search.md +116 -116
- package/ftm-researcher/references/agent-prompts.md +193 -193
- package/ftm-researcher/references/council-integration.md +193 -193
- package/ftm-researcher/references/output-format.md +203 -203
- package/ftm-researcher/references/synthesis-pipeline.md +165 -165
- package/ftm-researcher/scripts/score_credibility.py +234 -234
- package/ftm-researcher/scripts/validate_research.py +92 -92
- package/ftm-researcher.yml +2 -2
- package/ftm-resume/SKILL.md +518 -518
- package/ftm-resume/references/protocols/VALIDATION.md +172 -172
- package/ftm-resume.yml +2 -2
- package/ftm-retro/SKILL.md +380 -380
- package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
- package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
- package/ftm-retro.yml +2 -2
- package/ftm-routine/SKILL.md +170 -170
- package/ftm-routine.yml +4 -4
- package/ftm-state/blackboard/capabilities.json +5 -5
- package/ftm-state/blackboard/capabilities.schema.json +27 -27
- package/ftm-state/blackboard/context.json +23 -23
- package/ftm-state/blackboard/experiences/index.json +9 -9
- package/ftm-state/blackboard/patterns.json +6 -6
- package/ftm-state/schemas/context.schema.json +130 -130
- package/ftm-state/schemas/experience-index.schema.json +77 -77
- package/ftm-state/schemas/experience.schema.json +78 -78
- package/ftm-state/schemas/patterns.schema.json +44 -44
- package/ftm-upgrade/SKILL.md +194 -194
- package/ftm-upgrade/scripts/check-version.sh +76 -76
- package/ftm-upgrade/scripts/upgrade.sh +143 -143
- package/ftm-upgrade.yml +2 -2
- package/ftm-verify.yml +2 -2
- package/ftm.yml +2 -2
- package/hooks/ftm-blackboard-enforcer.sh +93 -93
- package/hooks/ftm-discovery-reminder.sh +90 -90
- package/hooks/ftm-drafts-gate.sh +61 -61
- package/hooks/ftm-event-logger.mjs +107 -107
- package/hooks/ftm-map-autodetect.sh +79 -79
- package/hooks/ftm-pending-sync-check.sh +22 -22
- package/hooks/ftm-plan-gate.sh +92 -92
- package/hooks/ftm-post-commit-trigger.sh +57 -57
- package/hooks/settings-template.json +81 -81
- package/install.sh +363 -363
- package/package.json +84 -84
- package/uninstall.sh +25 -25
package/ftm-map/scripts/db.py
CHANGED
|
@@ -1,712 +1,712 @@
|
|
|
1
|
-
"""
|
|
2
|
-
db.py — SQLite database module for ftm-map.
|
|
3
|
-
|
|
4
|
-
Manages a 5-table schema (files, symbols, refs, file_edges, symbol_edges)
|
|
5
|
-
plus FTS5 for full-text search over symbols. Provides CRUD operations,
|
|
6
|
-
materialized edge rebuilding, and graph traversal queries.
|
|
7
|
-
|
|
8
|
-
Schema overview:
|
|
9
|
-
files — tracked source files with metadata
|
|
10
|
-
symbols — indexed code symbols (functions, classes, methods, etc.)
|
|
11
|
-
refs — unresolved references (calls, imports) keyed by symbol name
|
|
12
|
-
file_edges — materialized file-level dependency graph
|
|
13
|
-
symbol_edges — materialized symbol-level dependency graph
|
|
14
|
-
symbols_fts — FTS5 virtual table for BM25-ranked search
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
import hashlib
|
|
18
|
-
import os
|
|
19
|
-
import sqlite3
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Optional
|
|
22
|
-
|
|
23
|
-
# ---------------------------------------------------------------------------
|
|
24
|
-
# Constants
|
|
25
|
-
# ---------------------------------------------------------------------------
|
|
26
|
-
|
|
27
|
-
DB_DIR = ".ftm-map"
|
|
28
|
-
DB_PATH = os.path.join(DB_DIR, "map.db")
|
|
29
|
-
|
|
30
|
-
# ---------------------------------------------------------------------------
|
|
31
|
-
# Schema DDL
|
|
32
|
-
# ---------------------------------------------------------------------------
|
|
33
|
-
|
|
34
|
-
_SCHEMA = """
|
|
35
|
-
CREATE TABLE IF NOT EXISTS files (
|
|
36
|
-
id INTEGER PRIMARY KEY,
|
|
37
|
-
path TEXT NOT NULL UNIQUE,
|
|
38
|
-
lang TEXT,
|
|
39
|
-
mtime REAL NOT NULL,
|
|
40
|
-
hash TEXT,
|
|
41
|
-
line_count INTEGER
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
CREATE TABLE IF NOT EXISTS symbols (
|
|
45
|
-
id INTEGER PRIMARY KEY,
|
|
46
|
-
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
47
|
-
name TEXT NOT NULL,
|
|
48
|
-
qualified_name TEXT,
|
|
49
|
-
kind TEXT NOT NULL,
|
|
50
|
-
line_start INTEGER NOT NULL,
|
|
51
|
-
line_end INTEGER,
|
|
52
|
-
signature TEXT,
|
|
53
|
-
parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL
|
|
54
|
-
);
|
|
55
|
-
|
|
56
|
-
CREATE TABLE IF NOT EXISTS refs (
|
|
57
|
-
id INTEGER PRIMARY KEY,
|
|
58
|
-
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
59
|
-
symbol_name TEXT NOT NULL,
|
|
60
|
-
line INTEGER NOT NULL,
|
|
61
|
-
kind TEXT DEFAULT 'call'
|
|
62
|
-
);
|
|
63
|
-
|
|
64
|
-
CREATE TABLE IF NOT EXISTS file_edges (
|
|
65
|
-
source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
66
|
-
target_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
67
|
-
weight REAL DEFAULT 1.0,
|
|
68
|
-
PRIMARY KEY (source_file_id, target_file_id)
|
|
69
|
-
);
|
|
70
|
-
|
|
71
|
-
CREATE TABLE IF NOT EXISTS symbol_edges (
|
|
72
|
-
source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
|
|
73
|
-
target_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
|
|
74
|
-
kind TEXT NOT NULL,
|
|
75
|
-
file_id INTEGER REFERENCES files(id),
|
|
76
|
-
line INTEGER,
|
|
77
|
-
PRIMARY KEY (source_symbol_id, target_symbol_id, kind)
|
|
78
|
-
);
|
|
79
|
-
|
|
80
|
-
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
|
|
81
|
-
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
|
|
82
|
-
CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
|
|
83
|
-
CREATE INDEX IF NOT EXISTS idx_refs_file ON refs(file_id);
|
|
84
|
-
CREATE INDEX IF NOT EXISTS idx_refs_symbol_name ON refs(symbol_name);
|
|
85
|
-
CREATE INDEX IF NOT EXISTS idx_file_edges_target ON file_edges(target_file_id);
|
|
86
|
-
CREATE INDEX IF NOT EXISTS idx_symbol_edges_target ON symbol_edges(target_symbol_id);
|
|
87
|
-
|
|
88
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
|
|
89
|
-
name, qualified_name, signature,
|
|
90
|
-
content=symbols, content_rowid=id,
|
|
91
|
-
tokenize='porter'
|
|
92
|
-
);
|
|
93
|
-
"""
|
|
94
|
-
|
|
95
|
-
# ---------------------------------------------------------------------------
|
|
96
|
-
# Connection management
|
|
97
|
-
# ---------------------------------------------------------------------------
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def get_connection(project_root: str) -> sqlite3.Connection:
|
|
101
|
-
"""Return a connection to the project's map database.
|
|
102
|
-
|
|
103
|
-
Creates .ftm-map/ and initialises the schema if they do not exist yet.
|
|
104
|
-
WAL mode is enabled for concurrent readers; foreign-key enforcement is on.
|
|
105
|
-
"""
|
|
106
|
-
db_path = os.path.join(project_root, DB_PATH)
|
|
107
|
-
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
108
|
-
|
|
109
|
-
conn = sqlite3.connect(db_path)
|
|
110
|
-
conn.execute("PRAGMA journal_mode=WAL")
|
|
111
|
-
conn.execute("PRAGMA foreign_keys=ON")
|
|
112
|
-
conn.row_factory = sqlite3.Row
|
|
113
|
-
|
|
114
|
-
_init_schema(conn)
|
|
115
|
-
return conn
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def _init_schema(conn: sqlite3.Connection) -> None:
|
|
119
|
-
"""Create tables, indexes, and FTS5 virtual table if they do not exist."""
|
|
120
|
-
conn.executescript(_SCHEMA)
|
|
121
|
-
conn.commit()
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
# ---------------------------------------------------------------------------
|
|
125
|
-
# File CRUD
|
|
126
|
-
# ---------------------------------------------------------------------------
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def add_file(
|
|
130
|
-
conn: sqlite3.Connection,
|
|
131
|
-
path: str,
|
|
132
|
-
lang: str,
|
|
133
|
-
mtime: float,
|
|
134
|
-
hash: Optional[str] = None,
|
|
135
|
-
line_count: Optional[int] = None,
|
|
136
|
-
) -> int:
|
|
137
|
-
"""Insert a file row. Returns the new file id."""
|
|
138
|
-
cursor = conn.execute(
|
|
139
|
-
"""
|
|
140
|
-
INSERT INTO files (path, lang, mtime, hash, line_count)
|
|
141
|
-
VALUES (?, ?, ?, ?, ?)
|
|
142
|
-
""",
|
|
143
|
-
(path, lang, mtime, hash, line_count),
|
|
144
|
-
)
|
|
145
|
-
return cursor.lastrowid
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def get_file_by_path(conn: sqlite3.Connection, path: str) -> Optional[dict]:
|
|
149
|
-
"""Return a file row as a dict, or None if not found."""
|
|
150
|
-
row = conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
|
|
151
|
-
return dict(row) if row else None
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def remove_file(conn: sqlite3.Connection, path: str) -> None:
|
|
155
|
-
"""Delete a file and cascade to symbols, refs, and edges.
|
|
156
|
-
|
|
157
|
-
FTS5 rows are removed explicitly before the symbol rows because the
|
|
158
|
-
content= table does not handle cascaded deletes automatically.
|
|
159
|
-
"""
|
|
160
|
-
file_row = get_file_by_path(conn, path)
|
|
161
|
-
if file_row is None:
|
|
162
|
-
return
|
|
163
|
-
|
|
164
|
-
file_id = file_row["id"]
|
|
165
|
-
|
|
166
|
-
# Clean up FTS entries for symbols in this file
|
|
167
|
-
sym_ids = [
|
|
168
|
-
row["id"]
|
|
169
|
-
for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
|
|
170
|
-
]
|
|
171
|
-
for sid in sym_ids:
|
|
172
|
-
conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
|
|
173
|
-
|
|
174
|
-
# CASCADE handles symbols, refs, file_edges, symbol_edges
|
|
175
|
-
conn.execute("DELETE FROM files WHERE id=?", (file_id,))
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
# ---------------------------------------------------------------------------
|
|
179
|
-
# Symbol CRUD
|
|
180
|
-
# ---------------------------------------------------------------------------
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
def add_symbol(
|
|
184
|
-
conn: sqlite3.Connection,
|
|
185
|
-
file_id: int,
|
|
186
|
-
name: str,
|
|
187
|
-
kind: str,
|
|
188
|
-
line_start: int,
|
|
189
|
-
line_end: Optional[int] = None,
|
|
190
|
-
qualified_name: Optional[str] = None,
|
|
191
|
-
signature: Optional[str] = None,
|
|
192
|
-
parent_id: Optional[int] = None,
|
|
193
|
-
) -> int:
|
|
194
|
-
"""Insert a symbol row and keep the FTS5 index in sync.
|
|
195
|
-
|
|
196
|
-
Returns the new symbol id.
|
|
197
|
-
"""
|
|
198
|
-
cursor = conn.execute(
|
|
199
|
-
"""
|
|
200
|
-
INSERT INTO symbols
|
|
201
|
-
(file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id)
|
|
202
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
203
|
-
""",
|
|
204
|
-
(file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id),
|
|
205
|
-
)
|
|
206
|
-
symbol_id = cursor.lastrowid
|
|
207
|
-
|
|
208
|
-
# FTS5 content= tables require manual insert so BM25 ranking stays accurate.
|
|
209
|
-
conn.execute(
|
|
210
|
-
"INSERT INTO symbols_fts(rowid, name, qualified_name, signature) VALUES (?, ?, ?, ?)",
|
|
211
|
-
(symbol_id, name, qualified_name or "", signature or ""),
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
return symbol_id
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def get_symbol_by_id(conn: sqlite3.Connection, symbol_id: int) -> Optional[dict]:
|
|
218
|
-
"""Return a symbol row as a dict, or None if not found."""
|
|
219
|
-
row = conn.execute("SELECT * FROM symbols WHERE id=?", (symbol_id,)).fetchone()
|
|
220
|
-
return dict(row) if row else None
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
|
|
224
|
-
"""Return all symbols matching *name* (name is not guaranteed unique)."""
|
|
225
|
-
rows = conn.execute("SELECT * FROM symbols WHERE name=?", (name,)).fetchall()
|
|
226
|
-
return [dict(r) for r in rows]
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
def get_symbols_by_file(conn: sqlite3.Connection, file_id: int) -> list:
|
|
230
|
-
"""Return all symbols belonging to a given file."""
|
|
231
|
-
rows = conn.execute(
|
|
232
|
-
"SELECT * FROM symbols WHERE file_id=? ORDER BY line_start",
|
|
233
|
-
(file_id,),
|
|
234
|
-
).fetchall()
|
|
235
|
-
return [dict(r) for r in rows]
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def remove_symbols_by_file(conn: sqlite3.Connection, file_path: str) -> None:
|
|
239
|
-
"""Delete all symbols for a given file path.
|
|
240
|
-
|
|
241
|
-
Finds the file_id from the path, cleans up FTS entries, then deletes
|
|
242
|
-
the symbols (CASCADE handles symbol_edges).
|
|
243
|
-
"""
|
|
244
|
-
file_row = get_file_by_path(conn, file_path)
|
|
245
|
-
if file_row is None:
|
|
246
|
-
return
|
|
247
|
-
|
|
248
|
-
file_id = file_row["id"]
|
|
249
|
-
|
|
250
|
-
# Clean up FTS entries
|
|
251
|
-
sym_ids = [
|
|
252
|
-
row["id"]
|
|
253
|
-
for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
|
|
254
|
-
]
|
|
255
|
-
for sid in sym_ids:
|
|
256
|
-
conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
|
|
257
|
-
|
|
258
|
-
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
# ---------------------------------------------------------------------------
|
|
262
|
-
# Reference CRUD
|
|
263
|
-
# ---------------------------------------------------------------------------
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def add_reference(
|
|
267
|
-
conn: sqlite3.Connection,
|
|
268
|
-
file_id: int,
|
|
269
|
-
symbol_name: str,
|
|
270
|
-
line: int,
|
|
271
|
-
kind: str = "call",
|
|
272
|
-
) -> int:
|
|
273
|
-
"""Insert a reference row. Returns the new ref id."""
|
|
274
|
-
cursor = conn.execute(
|
|
275
|
-
"INSERT INTO refs (file_id, symbol_name, line, kind) VALUES (?, ?, ?, ?)",
|
|
276
|
-
(file_id, symbol_name, line, kind),
|
|
277
|
-
)
|
|
278
|
-
return cursor.lastrowid
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
def get_references_by_file(conn: sqlite3.Connection, file_id: int) -> list:
|
|
282
|
-
"""Return all references in a given file."""
|
|
283
|
-
rows = conn.execute(
|
|
284
|
-
"SELECT * FROM refs WHERE file_id=? ORDER BY line",
|
|
285
|
-
(file_id,),
|
|
286
|
-
).fetchall()
|
|
287
|
-
return [dict(r) for r in rows]
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# ---------------------------------------------------------------------------
|
|
291
|
-
# Edge CRUD
|
|
292
|
-
# ---------------------------------------------------------------------------
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
def add_edge(
|
|
296
|
-
conn: sqlite3.Connection,
|
|
297
|
-
source_id: int,
|
|
298
|
-
target_id: int,
|
|
299
|
-
kind: str,
|
|
300
|
-
) -> None:
|
|
301
|
-
"""Insert a directed symbol edge. Silently ignored if the edge already exists."""
|
|
302
|
-
conn.execute(
|
|
303
|
-
"INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind) VALUES (?, ?, ?)",
|
|
304
|
-
(source_id, target_id, kind),
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
# ---------------------------------------------------------------------------
|
|
309
|
-
# Materialized edge rebuilding
|
|
310
|
-
# ---------------------------------------------------------------------------
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
def rebuild_file_edges(conn: sqlite3.Connection) -> None:
|
|
314
|
-
"""Rebuild the file_edges table from refs and symbols.
|
|
315
|
-
|
|
316
|
-
For each ref in refs, finds which file defines a symbol with that name,
|
|
317
|
-
then creates a file_edge from the referencing file to the defining file.
|
|
318
|
-
Duplicate edges are collapsed; weight defaults to 1.0.
|
|
319
|
-
"""
|
|
320
|
-
conn.execute("DELETE FROM file_edges")
|
|
321
|
-
|
|
322
|
-
conn.execute(
|
|
323
|
-
"""
|
|
324
|
-
INSERT OR IGNORE INTO file_edges (source_file_id, target_file_id, weight)
|
|
325
|
-
SELECT DISTINCT r.file_id, s.file_id, 1.0
|
|
326
|
-
FROM refs r
|
|
327
|
-
JOIN symbols s ON s.name = r.symbol_name
|
|
328
|
-
WHERE r.file_id != s.file_id
|
|
329
|
-
"""
|
|
330
|
-
)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
def rebuild_symbol_edges(conn: sqlite3.Connection) -> None:
|
|
334
|
-
"""Rebuild the symbol_edges table from refs and symbols.
|
|
335
|
-
|
|
336
|
-
For each ref, finds the target symbol (by name match) and the nearest
|
|
337
|
-
enclosing definition in the referencing file (the symbol whose line range
|
|
338
|
-
contains the ref line). Creates a symbol_edge from the enclosing symbol
|
|
339
|
-
to the target symbol.
|
|
340
|
-
"""
|
|
341
|
-
conn.execute("DELETE FROM symbol_edges")
|
|
342
|
-
|
|
343
|
-
# Find matching ref -> target symbol, with nearest enclosing source symbol.
|
|
344
|
-
# The enclosing symbol is the one in the same file as the ref whose
|
|
345
|
-
# line_start <= ref.line and (line_end >= ref.line OR line_end IS NULL),
|
|
346
|
-
# ordered by line_start DESC to get the nearest (innermost) enclosure.
|
|
347
|
-
conn.execute(
|
|
348
|
-
"""
|
|
349
|
-
INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind, file_id, line)
|
|
350
|
-
SELECT src.id, tgt.id, r.kind, r.file_id, r.line
|
|
351
|
-
FROM refs r
|
|
352
|
-
JOIN symbols tgt ON tgt.name = r.symbol_name
|
|
353
|
-
JOIN symbols src ON src.file_id = r.file_id
|
|
354
|
-
AND src.line_start <= r.line
|
|
355
|
-
AND (src.line_end >= r.line OR src.line_end IS NULL)
|
|
356
|
-
WHERE src.id != tgt.id
|
|
357
|
-
GROUP BY r.id, tgt.id
|
|
358
|
-
HAVING src.line_start = MAX(src.line_start)
|
|
359
|
-
"""
|
|
360
|
-
)
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
# ---------------------------------------------------------------------------
|
|
364
|
-
# Graph traversal — recursive CTEs
|
|
365
|
-
# ---------------------------------------------------------------------------
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
def get_transitive_deps(
|
|
369
|
-
conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
|
|
370
|
-
) -> list:
|
|
371
|
-
"""Return all symbols this symbol transitively depends on (forward closure).
|
|
372
|
-
|
|
373
|
-
Cycle prevention is handled by tracking visited ids as a comma-separated
|
|
374
|
-
path string inside the CTE; a node is skipped if its id already appears in
|
|
375
|
-
the path string.
|
|
376
|
-
|
|
377
|
-
Results are ordered by traversal depth (shallowest first) and deduplicated.
|
|
378
|
-
"""
|
|
379
|
-
query = """
|
|
380
|
-
WITH RECURSIVE dep_chain(id, name, kind, file_id, depth, path) AS (
|
|
381
|
-
-- Base: direct dependencies of the seed symbol
|
|
382
|
-
SELECT s.id,
|
|
383
|
-
s.name,
|
|
384
|
-
s.kind,
|
|
385
|
-
s.file_id,
|
|
386
|
-
0,
|
|
387
|
-
CAST(s.id AS TEXT)
|
|
388
|
-
FROM symbol_edges e
|
|
389
|
-
JOIN symbols s ON s.id = e.target_symbol_id
|
|
390
|
-
WHERE e.source_symbol_id = ?
|
|
391
|
-
|
|
392
|
-
UNION ALL
|
|
393
|
-
|
|
394
|
-
-- Recursive: dependencies of already-visited nodes
|
|
395
|
-
SELECT s.id,
|
|
396
|
-
s.name,
|
|
397
|
-
s.kind,
|
|
398
|
-
s.file_id,
|
|
399
|
-
dc.depth + 1,
|
|
400
|
-
dc.path || ',' || CAST(s.id AS TEXT)
|
|
401
|
-
FROM dep_chain dc
|
|
402
|
-
JOIN symbol_edges e ON e.source_symbol_id = dc.id
|
|
403
|
-
JOIN symbols s ON s.id = e.target_symbol_id
|
|
404
|
-
WHERE dc.depth < ?
|
|
405
|
-
AND INSTR(dc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
|
|
406
|
-
)
|
|
407
|
-
SELECT DISTINCT id, name, kind, file_id, depth
|
|
408
|
-
FROM dep_chain
|
|
409
|
-
ORDER BY depth
|
|
410
|
-
"""
|
|
411
|
-
rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
|
|
412
|
-
return [dict(r) for r in rows]
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
def get_reverse_deps(
|
|
416
|
-
conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
|
|
417
|
-
) -> list:
|
|
418
|
-
"""Return all symbols that transitively depend on this symbol (blast radius).
|
|
419
|
-
|
|
420
|
-
Traverses symbol_edges in reverse (callers/importers of the seed symbol).
|
|
421
|
-
Same cycle-prevention strategy as get_transitive_deps.
|
|
422
|
-
"""
|
|
423
|
-
query = """
|
|
424
|
-
WITH RECURSIVE rev_chain(id, name, kind, file_id, depth, path) AS (
|
|
425
|
-
-- Base: direct dependents of the seed symbol
|
|
426
|
-
SELECT s.id,
|
|
427
|
-
s.name,
|
|
428
|
-
s.kind,
|
|
429
|
-
s.file_id,
|
|
430
|
-
0,
|
|
431
|
-
CAST(s.id AS TEXT)
|
|
432
|
-
FROM symbol_edges e
|
|
433
|
-
JOIN symbols s ON s.id = e.source_symbol_id
|
|
434
|
-
WHERE e.target_symbol_id = ?
|
|
435
|
-
|
|
436
|
-
UNION ALL
|
|
437
|
-
|
|
438
|
-
-- Recursive: dependents of already-visited nodes
|
|
439
|
-
SELECT s.id,
|
|
440
|
-
s.name,
|
|
441
|
-
s.kind,
|
|
442
|
-
s.file_id,
|
|
443
|
-
rc.depth + 1,
|
|
444
|
-
rc.path || ',' || CAST(s.id AS TEXT)
|
|
445
|
-
FROM rev_chain rc
|
|
446
|
-
JOIN symbol_edges e ON e.target_symbol_id = rc.id
|
|
447
|
-
JOIN symbols s ON s.id = e.source_symbol_id
|
|
448
|
-
WHERE rc.depth < ?
|
|
449
|
-
AND INSTR(rc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
|
|
450
|
-
)
|
|
451
|
-
SELECT DISTINCT id, name, kind, file_id, depth
|
|
452
|
-
FROM rev_chain
|
|
453
|
-
ORDER BY depth
|
|
454
|
-
"""
|
|
455
|
-
rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
|
|
456
|
-
return [dict(r) for r in rows]
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
# ---------------------------------------------------------------------------
|
|
460
|
-
# Full-text search
|
|
461
|
-
# ---------------------------------------------------------------------------
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> list:
|
|
465
|
-
"""BM25-ranked full-text search over symbol names, qualified names, and signatures.
|
|
466
|
-
|
|
467
|
-
Returns up to *limit* symbol dicts with an additional 'rank' field.
|
|
468
|
-
Lower rank values indicate better matches (BM25 scores are negative in
|
|
469
|
-
SQLite's fts5 implementation).
|
|
470
|
-
"""
|
|
471
|
-
query = """
|
|
472
|
-
SELECT s.*, fts.rank
|
|
473
|
-
FROM symbols_fts fts
|
|
474
|
-
JOIN symbols s ON s.id = fts.rowid
|
|
475
|
-
WHERE symbols_fts MATCH ?
|
|
476
|
-
ORDER BY fts.rank
|
|
477
|
-
LIMIT ?
|
|
478
|
-
"""
|
|
479
|
-
rows = conn.execute(query, (query_text, limit)).fetchall()
|
|
480
|
-
return [dict(r) for r in rows]
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
# ---------------------------------------------------------------------------
|
|
484
|
-
# Statistics
|
|
485
|
-
# ---------------------------------------------------------------------------
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
def get_stats(conn: sqlite3.Connection) -> dict:
|
|
489
|
-
"""Return high-level database statistics."""
|
|
490
|
-
file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
|
491
|
-
symbol_count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
492
|
-
edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
|
|
493
|
-
reference_count = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
|
|
494
|
-
file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
|
|
495
|
-
return {
|
|
496
|
-
"file_count": file_count,
|
|
497
|
-
"symbol_count": symbol_count,
|
|
498
|
-
"edge_count": edge_count,
|
|
499
|
-
"reference_count": reference_count,
|
|
500
|
-
"file_edge_count": file_edge_count,
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
# ---------------------------------------------------------------------------
|
|
505
|
-
# Utility helpers
|
|
506
|
-
# ---------------------------------------------------------------------------
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def hash_content(content: str) -> str:
|
|
510
|
-
"""Return a SHA-256 hex digest for *content*. Useful for change detection."""
|
|
511
|
-
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
# ---------------------------------------------------------------------------
|
|
515
|
-
# Smoke-test entrypoint
|
|
516
|
-
# ---------------------------------------------------------------------------
|
|
517
|
-
|
|
518
|
-
if __name__ == "__main__":
|
|
519
|
-
import tempfile
|
|
520
|
-
|
|
521
|
-
print("Running db.py smoke tests ...")
|
|
522
|
-
|
|
523
|
-
with tempfile.TemporaryDirectory() as tmp:
|
|
524
|
-
conn = get_connection(tmp)
|
|
525
|
-
|
|
526
|
-
# ---- add files ----
|
|
527
|
-
fid_parser = add_file(conn, "src/parser.py", "python", 1000.0, hash="abc123", line_count=50)
|
|
528
|
-
fid_lexer = add_file(conn, "src/lexer.py", "python", 1001.0, line_count=30)
|
|
529
|
-
fid_index = add_file(conn, "src/index.py", "python", 1002.0)
|
|
530
|
-
conn.commit()
|
|
531
|
-
|
|
532
|
-
assert get_file_by_path(conn, "src/parser.py")["id"] == fid_parser, "get_file_by_path failed"
|
|
533
|
-
assert get_file_by_path(conn, "nonexistent.py") is None, "get_file_by_path should return None"
|
|
534
|
-
print(" [PASS] File CRUD")
|
|
535
|
-
|
|
536
|
-
# ---- add symbols ----
|
|
537
|
-
# parser.py: parse_file (lines 10-40)
|
|
538
|
-
sid_parse = add_symbol(
|
|
539
|
-
conn, fid_parser, "parse_file", "function", 10, line_end=40,
|
|
540
|
-
qualified_name="parser.parse_file",
|
|
541
|
-
signature="def parse_file(path: str) -> AST",
|
|
542
|
-
)
|
|
543
|
-
# lexer.py: tokenize (lines 1-20)
|
|
544
|
-
sid_tokenize = add_symbol(
|
|
545
|
-
conn, fid_lexer, "tokenize", "function", 1, line_end=20,
|
|
546
|
-
qualified_name="lexer.tokenize",
|
|
547
|
-
signature="def tokenize(src: str) -> list",
|
|
548
|
-
)
|
|
549
|
-
# index.py: build_index (lines 5-60) — calls parse_file and tokenize
|
|
550
|
-
sid_build = add_symbol(
|
|
551
|
-
conn, fid_index, "build_index", "function", 5, line_end=60,
|
|
552
|
-
qualified_name="index.build_index",
|
|
553
|
-
signature="def build_index(root: str) -> None",
|
|
554
|
-
)
|
|
555
|
-
conn.commit()
|
|
556
|
-
|
|
557
|
-
assert get_symbol_by_id(conn, sid_parse)["name"] == "parse_file", "get_symbol_by_id failed"
|
|
558
|
-
assert len(get_symbol_by_name(conn, "tokenize")) == 1, "get_symbol_by_name failed"
|
|
559
|
-
assert len(get_symbols_by_file(conn, fid_parser)) == 1, "get_symbols_by_file failed"
|
|
560
|
-
print(" [PASS] Symbol CRUD")
|
|
561
|
-
|
|
562
|
-
# ---- add references ----
|
|
563
|
-
# build_index calls parse_file at line 15 and tokenize at line 25
|
|
564
|
-
ref1 = add_reference(conn, fid_index, "parse_file", 15, kind="call")
|
|
565
|
-
ref2 = add_reference(conn, fid_index, "tokenize", 25, kind="call")
|
|
566
|
-
# parse_file calls tokenize at line 20
|
|
567
|
-
ref3 = add_reference(conn, fid_parser, "tokenize", 20, kind="call")
|
|
568
|
-
conn.commit()
|
|
569
|
-
|
|
570
|
-
refs_index = get_references_by_file(conn, fid_index)
|
|
571
|
-
assert len(refs_index) == 2, f"expected 2 refs in index.py, got {len(refs_index)}"
|
|
572
|
-
print(" [PASS] Reference CRUD")
|
|
573
|
-
|
|
574
|
-
# ---- rebuild file edges ----
|
|
575
|
-
rebuild_file_edges(conn)
|
|
576
|
-
conn.commit()
|
|
577
|
-
|
|
578
|
-
fe_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
|
|
579
|
-
assert fe_count >= 2, f"expected >= 2 file edges, got {fe_count}"
|
|
580
|
-
|
|
581
|
-
# index.py -> parser.py edge should exist
|
|
582
|
-
fe = conn.execute(
|
|
583
|
-
"SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
|
|
584
|
-
(fid_index, fid_parser),
|
|
585
|
-
).fetchone()
|
|
586
|
-
assert fe is not None, "file edge index->parser missing"
|
|
587
|
-
# index.py -> lexer.py edge should exist
|
|
588
|
-
fe2 = conn.execute(
|
|
589
|
-
"SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
|
|
590
|
-
(fid_index, fid_lexer),
|
|
591
|
-
).fetchone()
|
|
592
|
-
assert fe2 is not None, "file edge index->lexer missing"
|
|
593
|
-
print(" [PASS] rebuild_file_edges")
|
|
594
|
-
|
|
595
|
-
# ---- rebuild symbol edges ----
|
|
596
|
-
rebuild_symbol_edges(conn)
|
|
597
|
-
conn.commit()
|
|
598
|
-
|
|
599
|
-
se_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
|
|
600
|
-
assert se_count >= 2, f"expected >= 2 symbol edges, got {se_count}"
|
|
601
|
-
|
|
602
|
-
# build_index -> parse_file edge should exist
|
|
603
|
-
se = conn.execute(
|
|
604
|
-
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
605
|
-
(sid_build, sid_parse),
|
|
606
|
-
).fetchone()
|
|
607
|
-
assert se is not None, "symbol edge build_index->parse_file missing"
|
|
608
|
-
|
|
609
|
-
# build_index -> tokenize edge should exist
|
|
610
|
-
se2 = conn.execute(
|
|
611
|
-
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
612
|
-
(sid_build, sid_tokenize),
|
|
613
|
-
).fetchone()
|
|
614
|
-
assert se2 is not None, "symbol edge build_index->tokenize missing"
|
|
615
|
-
|
|
616
|
-
# parse_file -> tokenize edge should exist
|
|
617
|
-
se3 = conn.execute(
|
|
618
|
-
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
619
|
-
(sid_parse, sid_tokenize),
|
|
620
|
-
).fetchone()
|
|
621
|
-
assert se3 is not None, "symbol edge parse_file->tokenize missing"
|
|
622
|
-
print(" [PASS] rebuild_symbol_edges")
|
|
623
|
-
|
|
624
|
-
# ---- transitive deps via symbol_edges ----
|
|
625
|
-
deps = get_transitive_deps(conn, sid_build)
|
|
626
|
-
dep_ids = {d["id"] for d in deps}
|
|
627
|
-
assert sid_parse in dep_ids, f"transitive deps missing parse_file: {dep_ids}"
|
|
628
|
-
assert sid_tokenize in dep_ids, f"transitive deps missing tokenize: {dep_ids}"
|
|
629
|
-
print(" [PASS] get_transitive_deps")
|
|
630
|
-
|
|
631
|
-
# ---- reverse deps via symbol_edges ----
|
|
632
|
-
rdeps = get_reverse_deps(conn, sid_tokenize)
|
|
633
|
-
rdep_ids = {d["id"] for d in rdeps}
|
|
634
|
-
assert sid_parse in rdep_ids, f"reverse deps missing parse_file: {rdep_ids}"
|
|
635
|
-
assert sid_build in rdep_ids, f"reverse deps missing build_index: {rdep_ids}"
|
|
636
|
-
print(" [PASS] get_reverse_deps")
|
|
637
|
-
|
|
638
|
-
# ---- FTS search ----
|
|
639
|
-
results = fts_search(conn, "parse")
|
|
640
|
-
assert any(r["name"] == "parse_file" for r in results), "FTS search for 'parse' failed"
|
|
641
|
-
|
|
642
|
-
results_sig = fts_search(conn, "tokenize")
|
|
643
|
-
assert any(r["name"] == "tokenize" for r in results_sig), "FTS search for 'tokenize' failed"
|
|
644
|
-
|
|
645
|
-
results_qn = fts_search(conn, "index")
|
|
646
|
-
assert any(r["name"] == "build_index" for r in results_qn), "FTS qualified_name search failed"
|
|
647
|
-
print(" [PASS] FTS search")
|
|
648
|
-
|
|
649
|
-
# ---- stats ----
|
|
650
|
-
stats = get_stats(conn)
|
|
651
|
-
assert stats["file_count"] == 3, f"expected 3 files, got {stats['file_count']}"
|
|
652
|
-
assert stats["symbol_count"] == 3, f"expected 3 symbols, got {stats['symbol_count']}"
|
|
653
|
-
assert stats["reference_count"] == 3, f"expected 3 refs, got {stats['reference_count']}"
|
|
654
|
-
assert stats["edge_count"] >= 2, f"expected >= 2 symbol edges, got {stats['edge_count']}"
|
|
655
|
-
assert stats["file_edge_count"] >= 2, f"expected >= 2 file edges, got {stats['file_edge_count']}"
|
|
656
|
-
print(" [PASS] get_stats")
|
|
657
|
-
|
|
658
|
-
# ---- add_edge (manual symbol edge) ----
|
|
659
|
-
add_edge(conn, sid_parse, sid_build, "test_edge")
|
|
660
|
-
conn.commit()
|
|
661
|
-
manual_edge = conn.execute(
|
|
662
|
-
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=? AND kind=?",
|
|
663
|
-
(sid_parse, sid_build, "test_edge"),
|
|
664
|
-
).fetchone()
|
|
665
|
-
assert manual_edge is not None, "add_edge failed"
|
|
666
|
-
# duplicate should be ignored
|
|
667
|
-
add_edge(conn, sid_parse, sid_build, "test_edge")
|
|
668
|
-
conn.commit()
|
|
669
|
-
print(" [PASS] add_edge (manual)")
|
|
670
|
-
|
|
671
|
-
# ---- CASCADE deletes ----
|
|
672
|
-
# Remove lexer.py file -> tokenize symbol, refs to tokenize, and edges should cascade
|
|
673
|
-
sym_count_before = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
674
|
-
ref_count_before = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
|
|
675
|
-
|
|
676
|
-
remove_file(conn, "src/lexer.py")
|
|
677
|
-
conn.commit()
|
|
678
|
-
|
|
679
|
-
assert get_file_by_path(conn, "src/lexer.py") is None, "file not removed"
|
|
680
|
-
assert get_symbol_by_id(conn, sid_tokenize) is None, "symbol not cascaded on file delete"
|
|
681
|
-
|
|
682
|
-
sym_count_after = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
683
|
-
assert sym_count_after == sym_count_before - 1, f"expected {sym_count_before - 1} symbols, got {sym_count_after}"
|
|
684
|
-
|
|
685
|
-
# Refs in lexer.py should be gone (there were none, but verify no error)
|
|
686
|
-
# Symbol edges involving tokenize should be gone
|
|
687
|
-
edges_to_tokenize = conn.execute(
|
|
688
|
-
"SELECT COUNT(*) FROM symbol_edges WHERE target_symbol_id=? OR source_symbol_id=?",
|
|
689
|
-
(sid_tokenize, sid_tokenize),
|
|
690
|
-
).fetchone()[0]
|
|
691
|
-
assert edges_to_tokenize == 0, f"expected 0 edges involving deleted symbol, got {edges_to_tokenize}"
|
|
692
|
-
print(" [PASS] CASCADE deletes (remove_file)")
|
|
693
|
-
|
|
694
|
-
# ---- remove_symbols_by_file (without removing file) ----
|
|
695
|
-
remove_symbols_by_file(conn, "src/parser.py")
|
|
696
|
-
conn.commit()
|
|
697
|
-
assert get_symbol_by_id(conn, sid_parse) is None, "remove_symbols_by_file failed"
|
|
698
|
-
# File itself should still exist
|
|
699
|
-
assert get_file_by_path(conn, "src/parser.py") is not None, "file should still exist after remove_symbols_by_file"
|
|
700
|
-
print(" [PASS] remove_symbols_by_file")
|
|
701
|
-
|
|
702
|
-
# ---- final stats ----
|
|
703
|
-
final_stats = get_stats(conn)
|
|
704
|
-
print(f"\n Final stats: {final_stats}")
|
|
705
|
-
|
|
706
|
-
# ---- hash_content utility ----
|
|
707
|
-
h = hash_content("hello world")
|
|
708
|
-
assert len(h) == 64, "hash_content should return 64-char hex string"
|
|
709
|
-
assert h == hash_content("hello world"), "hash_content should be deterministic"
|
|
710
|
-
print(" [PASS] hash_content")
|
|
711
|
-
|
|
712
|
-
print("\nAll smoke tests passed.")
|
|
1
|
+
"""
|
|
2
|
+
db.py — SQLite database module for ftm-map.
|
|
3
|
+
|
|
4
|
+
Manages a 5-table schema (files, symbols, refs, file_edges, symbol_edges)
|
|
5
|
+
plus FTS5 for full-text search over symbols. Provides CRUD operations,
|
|
6
|
+
materialized edge rebuilding, and graph traversal queries.
|
|
7
|
+
|
|
8
|
+
Schema overview:
|
|
9
|
+
files — tracked source files with metadata
|
|
10
|
+
symbols — indexed code symbols (functions, classes, methods, etc.)
|
|
11
|
+
refs — unresolved references (calls, imports) keyed by symbol name
|
|
12
|
+
file_edges — materialized file-level dependency graph
|
|
13
|
+
symbol_edges — materialized symbol-level dependency graph
|
|
14
|
+
symbols_fts — FTS5 virtual table for BM25-ranked search
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import os
|
|
19
|
+
import sqlite3
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Constants
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
DB_DIR = ".ftm-map"
|
|
28
|
+
DB_PATH = os.path.join(DB_DIR, "map.db")
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Schema DDL
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
_SCHEMA = """
|
|
35
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
36
|
+
id INTEGER PRIMARY KEY,
|
|
37
|
+
path TEXT NOT NULL UNIQUE,
|
|
38
|
+
lang TEXT,
|
|
39
|
+
mtime REAL NOT NULL,
|
|
40
|
+
hash TEXT,
|
|
41
|
+
line_count INTEGER
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
CREATE TABLE IF NOT EXISTS symbols (
|
|
45
|
+
id INTEGER PRIMARY KEY,
|
|
46
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
47
|
+
name TEXT NOT NULL,
|
|
48
|
+
qualified_name TEXT,
|
|
49
|
+
kind TEXT NOT NULL,
|
|
50
|
+
line_start INTEGER NOT NULL,
|
|
51
|
+
line_end INTEGER,
|
|
52
|
+
signature TEXT,
|
|
53
|
+
parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
CREATE TABLE IF NOT EXISTS refs (
|
|
57
|
+
id INTEGER PRIMARY KEY,
|
|
58
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
59
|
+
symbol_name TEXT NOT NULL,
|
|
60
|
+
line INTEGER NOT NULL,
|
|
61
|
+
kind TEXT DEFAULT 'call'
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
CREATE TABLE IF NOT EXISTS file_edges (
|
|
65
|
+
source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
66
|
+
target_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
67
|
+
weight REAL DEFAULT 1.0,
|
|
68
|
+
PRIMARY KEY (source_file_id, target_file_id)
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
CREATE TABLE IF NOT EXISTS symbol_edges (
|
|
72
|
+
source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
|
|
73
|
+
target_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
|
|
74
|
+
kind TEXT NOT NULL,
|
|
75
|
+
file_id INTEGER REFERENCES files(id),
|
|
76
|
+
line INTEGER,
|
|
77
|
+
PRIMARY KEY (source_symbol_id, target_symbol_id, kind)
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
|
|
81
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
|
|
82
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
|
|
83
|
+
CREATE INDEX IF NOT EXISTS idx_refs_file ON refs(file_id);
|
|
84
|
+
CREATE INDEX IF NOT EXISTS idx_refs_symbol_name ON refs(symbol_name);
|
|
85
|
+
CREATE INDEX IF NOT EXISTS idx_file_edges_target ON file_edges(target_file_id);
|
|
86
|
+
CREATE INDEX IF NOT EXISTS idx_symbol_edges_target ON symbol_edges(target_symbol_id);
|
|
87
|
+
|
|
88
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
|
|
89
|
+
name, qualified_name, signature,
|
|
90
|
+
content=symbols, content_rowid=id,
|
|
91
|
+
tokenize='porter'
|
|
92
|
+
);
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Connection management
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_connection(project_root: str) -> sqlite3.Connection:
|
|
101
|
+
"""Return a connection to the project's map database.
|
|
102
|
+
|
|
103
|
+
Creates .ftm-map/ and initialises the schema if they do not exist yet.
|
|
104
|
+
WAL mode is enabled for concurrent readers; foreign-key enforcement is on.
|
|
105
|
+
"""
|
|
106
|
+
db_path = os.path.join(project_root, DB_PATH)
|
|
107
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
108
|
+
|
|
109
|
+
conn = sqlite3.connect(db_path)
|
|
110
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
111
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
112
|
+
conn.row_factory = sqlite3.Row
|
|
113
|
+
|
|
114
|
+
_init_schema(conn)
|
|
115
|
+
return conn
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _init_schema(conn: sqlite3.Connection) -> None:
|
|
119
|
+
"""Create tables, indexes, and FTS5 virtual table if they do not exist."""
|
|
120
|
+
conn.executescript(_SCHEMA)
|
|
121
|
+
conn.commit()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# File CRUD
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def add_file(
|
|
130
|
+
conn: sqlite3.Connection,
|
|
131
|
+
path: str,
|
|
132
|
+
lang: str,
|
|
133
|
+
mtime: float,
|
|
134
|
+
hash: Optional[str] = None,
|
|
135
|
+
line_count: Optional[int] = None,
|
|
136
|
+
) -> int:
|
|
137
|
+
"""Insert a file row. Returns the new file id."""
|
|
138
|
+
cursor = conn.execute(
|
|
139
|
+
"""
|
|
140
|
+
INSERT INTO files (path, lang, mtime, hash, line_count)
|
|
141
|
+
VALUES (?, ?, ?, ?, ?)
|
|
142
|
+
""",
|
|
143
|
+
(path, lang, mtime, hash, line_count),
|
|
144
|
+
)
|
|
145
|
+
return cursor.lastrowid
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def get_file_by_path(conn: sqlite3.Connection, path: str) -> Optional[dict]:
|
|
149
|
+
"""Return a file row as a dict, or None if not found."""
|
|
150
|
+
row = conn.execute("SELECT * FROM files WHERE path=?", (path,)).fetchone()
|
|
151
|
+
return dict(row) if row else None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def remove_file(conn: sqlite3.Connection, path: str) -> None:
|
|
155
|
+
"""Delete a file and cascade to symbols, refs, and edges.
|
|
156
|
+
|
|
157
|
+
FTS5 rows are removed explicitly before the symbol rows because the
|
|
158
|
+
content= table does not handle cascaded deletes automatically.
|
|
159
|
+
"""
|
|
160
|
+
file_row = get_file_by_path(conn, path)
|
|
161
|
+
if file_row is None:
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
file_id = file_row["id"]
|
|
165
|
+
|
|
166
|
+
# Clean up FTS entries for symbols in this file
|
|
167
|
+
sym_ids = [
|
|
168
|
+
row["id"]
|
|
169
|
+
for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
|
|
170
|
+
]
|
|
171
|
+
for sid in sym_ids:
|
|
172
|
+
conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
|
|
173
|
+
|
|
174
|
+
# CASCADE handles symbols, refs, file_edges, symbol_edges
|
|
175
|
+
conn.execute("DELETE FROM files WHERE id=?", (file_id,))
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# ---------------------------------------------------------------------------
|
|
179
|
+
# Symbol CRUD
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def add_symbol(
|
|
184
|
+
conn: sqlite3.Connection,
|
|
185
|
+
file_id: int,
|
|
186
|
+
name: str,
|
|
187
|
+
kind: str,
|
|
188
|
+
line_start: int,
|
|
189
|
+
line_end: Optional[int] = None,
|
|
190
|
+
qualified_name: Optional[str] = None,
|
|
191
|
+
signature: Optional[str] = None,
|
|
192
|
+
parent_id: Optional[int] = None,
|
|
193
|
+
) -> int:
|
|
194
|
+
"""Insert a symbol row and keep the FTS5 index in sync.
|
|
195
|
+
|
|
196
|
+
Returns the new symbol id.
|
|
197
|
+
"""
|
|
198
|
+
cursor = conn.execute(
|
|
199
|
+
"""
|
|
200
|
+
INSERT INTO symbols
|
|
201
|
+
(file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id)
|
|
202
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
203
|
+
""",
|
|
204
|
+
(file_id, name, qualified_name, kind, line_start, line_end, signature, parent_id),
|
|
205
|
+
)
|
|
206
|
+
symbol_id = cursor.lastrowid
|
|
207
|
+
|
|
208
|
+
# FTS5 content= tables require manual insert so BM25 ranking stays accurate.
|
|
209
|
+
conn.execute(
|
|
210
|
+
"INSERT INTO symbols_fts(rowid, name, qualified_name, signature) VALUES (?, ?, ?, ?)",
|
|
211
|
+
(symbol_id, name, qualified_name or "", signature or ""),
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return symbol_id
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_symbol_by_id(conn: sqlite3.Connection, symbol_id: int) -> Optional[dict]:
|
|
218
|
+
"""Return a symbol row as a dict, or None if not found."""
|
|
219
|
+
row = conn.execute("SELECT * FROM symbols WHERE id=?", (symbol_id,)).fetchone()
|
|
220
|
+
return dict(row) if row else None
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_symbol_by_name(conn: sqlite3.Connection, name: str) -> list:
|
|
224
|
+
"""Return all symbols matching *name* (name is not guaranteed unique)."""
|
|
225
|
+
rows = conn.execute("SELECT * FROM symbols WHERE name=?", (name,)).fetchall()
|
|
226
|
+
return [dict(r) for r in rows]
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def get_symbols_by_file(conn: sqlite3.Connection, file_id: int) -> list:
|
|
230
|
+
"""Return all symbols belonging to a given file."""
|
|
231
|
+
rows = conn.execute(
|
|
232
|
+
"SELECT * FROM symbols WHERE file_id=? ORDER BY line_start",
|
|
233
|
+
(file_id,),
|
|
234
|
+
).fetchall()
|
|
235
|
+
return [dict(r) for r in rows]
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def remove_symbols_by_file(conn: sqlite3.Connection, file_path: str) -> None:
|
|
239
|
+
"""Delete all symbols for a given file path.
|
|
240
|
+
|
|
241
|
+
Finds the file_id from the path, cleans up FTS entries, then deletes
|
|
242
|
+
the symbols (CASCADE handles symbol_edges).
|
|
243
|
+
"""
|
|
244
|
+
file_row = get_file_by_path(conn, file_path)
|
|
245
|
+
if file_row is None:
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
file_id = file_row["id"]
|
|
249
|
+
|
|
250
|
+
# Clean up FTS entries
|
|
251
|
+
sym_ids = [
|
|
252
|
+
row["id"]
|
|
253
|
+
for row in conn.execute("SELECT id FROM symbols WHERE file_id=?", (file_id,))
|
|
254
|
+
]
|
|
255
|
+
for sid in sym_ids:
|
|
256
|
+
conn.execute("DELETE FROM symbols_fts WHERE rowid=?", (sid,))
|
|
257
|
+
|
|
258
|
+
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# ---------------------------------------------------------------------------
|
|
262
|
+
# Reference CRUD
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def add_reference(
|
|
267
|
+
conn: sqlite3.Connection,
|
|
268
|
+
file_id: int,
|
|
269
|
+
symbol_name: str,
|
|
270
|
+
line: int,
|
|
271
|
+
kind: str = "call",
|
|
272
|
+
) -> int:
|
|
273
|
+
"""Insert a reference row. Returns the new ref id."""
|
|
274
|
+
cursor = conn.execute(
|
|
275
|
+
"INSERT INTO refs (file_id, symbol_name, line, kind) VALUES (?, ?, ?, ?)",
|
|
276
|
+
(file_id, symbol_name, line, kind),
|
|
277
|
+
)
|
|
278
|
+
return cursor.lastrowid
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_references_by_file(conn: sqlite3.Connection, file_id: int) -> list:
|
|
282
|
+
"""Return all references in a given file."""
|
|
283
|
+
rows = conn.execute(
|
|
284
|
+
"SELECT * FROM refs WHERE file_id=? ORDER BY line",
|
|
285
|
+
(file_id,),
|
|
286
|
+
).fetchall()
|
|
287
|
+
return [dict(r) for r in rows]
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
# ---------------------------------------------------------------------------
|
|
291
|
+
# Edge CRUD
|
|
292
|
+
# ---------------------------------------------------------------------------
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def add_edge(
|
|
296
|
+
conn: sqlite3.Connection,
|
|
297
|
+
source_id: int,
|
|
298
|
+
target_id: int,
|
|
299
|
+
kind: str,
|
|
300
|
+
) -> None:
|
|
301
|
+
"""Insert a directed symbol edge. Silently ignored if the edge already exists."""
|
|
302
|
+
conn.execute(
|
|
303
|
+
"INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind) VALUES (?, ?, ?)",
|
|
304
|
+
(source_id, target_id, kind),
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
# ---------------------------------------------------------------------------
|
|
309
|
+
# Materialized edge rebuilding
|
|
310
|
+
# ---------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def rebuild_file_edges(conn: sqlite3.Connection) -> None:
|
|
314
|
+
"""Rebuild the file_edges table from refs and symbols.
|
|
315
|
+
|
|
316
|
+
For each ref in refs, finds which file defines a symbol with that name,
|
|
317
|
+
then creates a file_edge from the referencing file to the defining file.
|
|
318
|
+
Duplicate edges are collapsed; weight defaults to 1.0.
|
|
319
|
+
"""
|
|
320
|
+
conn.execute("DELETE FROM file_edges")
|
|
321
|
+
|
|
322
|
+
conn.execute(
|
|
323
|
+
"""
|
|
324
|
+
INSERT OR IGNORE INTO file_edges (source_file_id, target_file_id, weight)
|
|
325
|
+
SELECT DISTINCT r.file_id, s.file_id, 1.0
|
|
326
|
+
FROM refs r
|
|
327
|
+
JOIN symbols s ON s.name = r.symbol_name
|
|
328
|
+
WHERE r.file_id != s.file_id
|
|
329
|
+
"""
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def rebuild_symbol_edges(conn: sqlite3.Connection) -> None:
|
|
334
|
+
"""Rebuild the symbol_edges table from refs and symbols.
|
|
335
|
+
|
|
336
|
+
For each ref, finds the target symbol (by name match) and the nearest
|
|
337
|
+
enclosing definition in the referencing file (the symbol whose line range
|
|
338
|
+
contains the ref line). Creates a symbol_edge from the enclosing symbol
|
|
339
|
+
to the target symbol.
|
|
340
|
+
"""
|
|
341
|
+
conn.execute("DELETE FROM symbol_edges")
|
|
342
|
+
|
|
343
|
+
# Find matching ref -> target symbol, with nearest enclosing source symbol.
|
|
344
|
+
# The enclosing symbol is the one in the same file as the ref whose
|
|
345
|
+
# line_start <= ref.line and (line_end >= ref.line OR line_end IS NULL),
|
|
346
|
+
# ordered by line_start DESC to get the nearest (innermost) enclosure.
|
|
347
|
+
conn.execute(
|
|
348
|
+
"""
|
|
349
|
+
INSERT OR IGNORE INTO symbol_edges (source_symbol_id, target_symbol_id, kind, file_id, line)
|
|
350
|
+
SELECT src.id, tgt.id, r.kind, r.file_id, r.line
|
|
351
|
+
FROM refs r
|
|
352
|
+
JOIN symbols tgt ON tgt.name = r.symbol_name
|
|
353
|
+
JOIN symbols src ON src.file_id = r.file_id
|
|
354
|
+
AND src.line_start <= r.line
|
|
355
|
+
AND (src.line_end >= r.line OR src.line_end IS NULL)
|
|
356
|
+
WHERE src.id != tgt.id
|
|
357
|
+
GROUP BY r.id, tgt.id
|
|
358
|
+
HAVING src.line_start = MAX(src.line_start)
|
|
359
|
+
"""
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# ---------------------------------------------------------------------------
|
|
364
|
+
# Graph traversal — recursive CTEs
|
|
365
|
+
# ---------------------------------------------------------------------------
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def get_transitive_deps(
|
|
369
|
+
conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
|
|
370
|
+
) -> list:
|
|
371
|
+
"""Return all symbols this symbol transitively depends on (forward closure).
|
|
372
|
+
|
|
373
|
+
Cycle prevention is handled by tracking visited ids as a comma-separated
|
|
374
|
+
path string inside the CTE; a node is skipped if its id already appears in
|
|
375
|
+
the path string.
|
|
376
|
+
|
|
377
|
+
Results are ordered by traversal depth (shallowest first) and deduplicated.
|
|
378
|
+
"""
|
|
379
|
+
query = """
|
|
380
|
+
WITH RECURSIVE dep_chain(id, name, kind, file_id, depth, path) AS (
|
|
381
|
+
-- Base: direct dependencies of the seed symbol
|
|
382
|
+
SELECT s.id,
|
|
383
|
+
s.name,
|
|
384
|
+
s.kind,
|
|
385
|
+
s.file_id,
|
|
386
|
+
0,
|
|
387
|
+
CAST(s.id AS TEXT)
|
|
388
|
+
FROM symbol_edges e
|
|
389
|
+
JOIN symbols s ON s.id = e.target_symbol_id
|
|
390
|
+
WHERE e.source_symbol_id = ?
|
|
391
|
+
|
|
392
|
+
UNION ALL
|
|
393
|
+
|
|
394
|
+
-- Recursive: dependencies of already-visited nodes
|
|
395
|
+
SELECT s.id,
|
|
396
|
+
s.name,
|
|
397
|
+
s.kind,
|
|
398
|
+
s.file_id,
|
|
399
|
+
dc.depth + 1,
|
|
400
|
+
dc.path || ',' || CAST(s.id AS TEXT)
|
|
401
|
+
FROM dep_chain dc
|
|
402
|
+
JOIN symbol_edges e ON e.source_symbol_id = dc.id
|
|
403
|
+
JOIN symbols s ON s.id = e.target_symbol_id
|
|
404
|
+
WHERE dc.depth < ?
|
|
405
|
+
AND INSTR(dc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
|
|
406
|
+
)
|
|
407
|
+
SELECT DISTINCT id, name, kind, file_id, depth
|
|
408
|
+
FROM dep_chain
|
|
409
|
+
ORDER BY depth
|
|
410
|
+
"""
|
|
411
|
+
rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
|
|
412
|
+
return [dict(r) for r in rows]
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def get_reverse_deps(
|
|
416
|
+
conn: sqlite3.Connection, symbol_id: int, max_depth: int = 10
|
|
417
|
+
) -> list:
|
|
418
|
+
"""Return all symbols that transitively depend on this symbol (blast radius).
|
|
419
|
+
|
|
420
|
+
Traverses symbol_edges in reverse (callers/importers of the seed symbol).
|
|
421
|
+
Same cycle-prevention strategy as get_transitive_deps.
|
|
422
|
+
"""
|
|
423
|
+
query = """
|
|
424
|
+
WITH RECURSIVE rev_chain(id, name, kind, file_id, depth, path) AS (
|
|
425
|
+
-- Base: direct dependents of the seed symbol
|
|
426
|
+
SELECT s.id,
|
|
427
|
+
s.name,
|
|
428
|
+
s.kind,
|
|
429
|
+
s.file_id,
|
|
430
|
+
0,
|
|
431
|
+
CAST(s.id AS TEXT)
|
|
432
|
+
FROM symbol_edges e
|
|
433
|
+
JOIN symbols s ON s.id = e.source_symbol_id
|
|
434
|
+
WHERE e.target_symbol_id = ?
|
|
435
|
+
|
|
436
|
+
UNION ALL
|
|
437
|
+
|
|
438
|
+
-- Recursive: dependents of already-visited nodes
|
|
439
|
+
SELECT s.id,
|
|
440
|
+
s.name,
|
|
441
|
+
s.kind,
|
|
442
|
+
s.file_id,
|
|
443
|
+
rc.depth + 1,
|
|
444
|
+
rc.path || ',' || CAST(s.id AS TEXT)
|
|
445
|
+
FROM rev_chain rc
|
|
446
|
+
JOIN symbol_edges e ON e.target_symbol_id = rc.id
|
|
447
|
+
JOIN symbols s ON s.id = e.source_symbol_id
|
|
448
|
+
WHERE rc.depth < ?
|
|
449
|
+
AND INSTR(rc.path, CAST(s.id AS TEXT)) = 0 -- cycle guard
|
|
450
|
+
)
|
|
451
|
+
SELECT DISTINCT id, name, kind, file_id, depth
|
|
452
|
+
FROM rev_chain
|
|
453
|
+
ORDER BY depth
|
|
454
|
+
"""
|
|
455
|
+
rows = conn.execute(query, (symbol_id, max_depth)).fetchall()
|
|
456
|
+
return [dict(r) for r in rows]
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
# ---------------------------------------------------------------------------
|
|
460
|
+
# Full-text search
|
|
461
|
+
# ---------------------------------------------------------------------------
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def fts_search(conn: sqlite3.Connection, query_text: str, limit: int = 10) -> list:
|
|
465
|
+
"""BM25-ranked full-text search over symbol names, qualified names, and signatures.
|
|
466
|
+
|
|
467
|
+
Returns up to *limit* symbol dicts with an additional 'rank' field.
|
|
468
|
+
Lower rank values indicate better matches (BM25 scores are negative in
|
|
469
|
+
SQLite's fts5 implementation).
|
|
470
|
+
"""
|
|
471
|
+
query = """
|
|
472
|
+
SELECT s.*, fts.rank
|
|
473
|
+
FROM symbols_fts fts
|
|
474
|
+
JOIN symbols s ON s.id = fts.rowid
|
|
475
|
+
WHERE symbols_fts MATCH ?
|
|
476
|
+
ORDER BY fts.rank
|
|
477
|
+
LIMIT ?
|
|
478
|
+
"""
|
|
479
|
+
rows = conn.execute(query, (query_text, limit)).fetchall()
|
|
480
|
+
return [dict(r) for r in rows]
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
# ---------------------------------------------------------------------------
|
|
484
|
+
# Statistics
|
|
485
|
+
# ---------------------------------------------------------------------------
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def get_stats(conn: sqlite3.Connection) -> dict:
|
|
489
|
+
"""Return high-level database statistics."""
|
|
490
|
+
file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
|
491
|
+
symbol_count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
492
|
+
edge_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
|
|
493
|
+
reference_count = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
|
|
494
|
+
file_edge_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
|
|
495
|
+
return {
|
|
496
|
+
"file_count": file_count,
|
|
497
|
+
"symbol_count": symbol_count,
|
|
498
|
+
"edge_count": edge_count,
|
|
499
|
+
"reference_count": reference_count,
|
|
500
|
+
"file_edge_count": file_edge_count,
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
# ---------------------------------------------------------------------------
|
|
505
|
+
# Utility helpers
|
|
506
|
+
# ---------------------------------------------------------------------------
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def hash_content(content: str) -> str:
|
|
510
|
+
"""Return a SHA-256 hex digest for *content*. Useful for change detection."""
|
|
511
|
+
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
# ---------------------------------------------------------------------------
|
|
515
|
+
# Smoke-test entrypoint
|
|
516
|
+
# ---------------------------------------------------------------------------
|
|
517
|
+
|
|
518
|
+
if __name__ == "__main__":
|
|
519
|
+
import tempfile
|
|
520
|
+
|
|
521
|
+
print("Running db.py smoke tests ...")
|
|
522
|
+
|
|
523
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
524
|
+
conn = get_connection(tmp)
|
|
525
|
+
|
|
526
|
+
# ---- add files ----
|
|
527
|
+
fid_parser = add_file(conn, "src/parser.py", "python", 1000.0, hash="abc123", line_count=50)
|
|
528
|
+
fid_lexer = add_file(conn, "src/lexer.py", "python", 1001.0, line_count=30)
|
|
529
|
+
fid_index = add_file(conn, "src/index.py", "python", 1002.0)
|
|
530
|
+
conn.commit()
|
|
531
|
+
|
|
532
|
+
assert get_file_by_path(conn, "src/parser.py")["id"] == fid_parser, "get_file_by_path failed"
|
|
533
|
+
assert get_file_by_path(conn, "nonexistent.py") is None, "get_file_by_path should return None"
|
|
534
|
+
print(" [PASS] File CRUD")
|
|
535
|
+
|
|
536
|
+
# ---- add symbols ----
|
|
537
|
+
# parser.py: parse_file (lines 10-40)
|
|
538
|
+
sid_parse = add_symbol(
|
|
539
|
+
conn, fid_parser, "parse_file", "function", 10, line_end=40,
|
|
540
|
+
qualified_name="parser.parse_file",
|
|
541
|
+
signature="def parse_file(path: str) -> AST",
|
|
542
|
+
)
|
|
543
|
+
# lexer.py: tokenize (lines 1-20)
|
|
544
|
+
sid_tokenize = add_symbol(
|
|
545
|
+
conn, fid_lexer, "tokenize", "function", 1, line_end=20,
|
|
546
|
+
qualified_name="lexer.tokenize",
|
|
547
|
+
signature="def tokenize(src: str) -> list",
|
|
548
|
+
)
|
|
549
|
+
# index.py: build_index (lines 5-60) — calls parse_file and tokenize
|
|
550
|
+
sid_build = add_symbol(
|
|
551
|
+
conn, fid_index, "build_index", "function", 5, line_end=60,
|
|
552
|
+
qualified_name="index.build_index",
|
|
553
|
+
signature="def build_index(root: str) -> None",
|
|
554
|
+
)
|
|
555
|
+
conn.commit()
|
|
556
|
+
|
|
557
|
+
assert get_symbol_by_id(conn, sid_parse)["name"] == "parse_file", "get_symbol_by_id failed"
|
|
558
|
+
assert len(get_symbol_by_name(conn, "tokenize")) == 1, "get_symbol_by_name failed"
|
|
559
|
+
assert len(get_symbols_by_file(conn, fid_parser)) == 1, "get_symbols_by_file failed"
|
|
560
|
+
print(" [PASS] Symbol CRUD")
|
|
561
|
+
|
|
562
|
+
# ---- add references ----
|
|
563
|
+
# build_index calls parse_file at line 15 and tokenize at line 25
|
|
564
|
+
ref1 = add_reference(conn, fid_index, "parse_file", 15, kind="call")
|
|
565
|
+
ref2 = add_reference(conn, fid_index, "tokenize", 25, kind="call")
|
|
566
|
+
# parse_file calls tokenize at line 20
|
|
567
|
+
ref3 = add_reference(conn, fid_parser, "tokenize", 20, kind="call")
|
|
568
|
+
conn.commit()
|
|
569
|
+
|
|
570
|
+
refs_index = get_references_by_file(conn, fid_index)
|
|
571
|
+
assert len(refs_index) == 2, f"expected 2 refs in index.py, got {len(refs_index)}"
|
|
572
|
+
print(" [PASS] Reference CRUD")
|
|
573
|
+
|
|
574
|
+
# ---- rebuild file edges ----
|
|
575
|
+
rebuild_file_edges(conn)
|
|
576
|
+
conn.commit()
|
|
577
|
+
|
|
578
|
+
fe_count = conn.execute("SELECT COUNT(*) FROM file_edges").fetchone()[0]
|
|
579
|
+
assert fe_count >= 2, f"expected >= 2 file edges, got {fe_count}"
|
|
580
|
+
|
|
581
|
+
# index.py -> parser.py edge should exist
|
|
582
|
+
fe = conn.execute(
|
|
583
|
+
"SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
|
|
584
|
+
(fid_index, fid_parser),
|
|
585
|
+
).fetchone()
|
|
586
|
+
assert fe is not None, "file edge index->parser missing"
|
|
587
|
+
# index.py -> lexer.py edge should exist
|
|
588
|
+
fe2 = conn.execute(
|
|
589
|
+
"SELECT * FROM file_edges WHERE source_file_id=? AND target_file_id=?",
|
|
590
|
+
(fid_index, fid_lexer),
|
|
591
|
+
).fetchone()
|
|
592
|
+
assert fe2 is not None, "file edge index->lexer missing"
|
|
593
|
+
print(" [PASS] rebuild_file_edges")
|
|
594
|
+
|
|
595
|
+
# ---- rebuild symbol edges ----
|
|
596
|
+
rebuild_symbol_edges(conn)
|
|
597
|
+
conn.commit()
|
|
598
|
+
|
|
599
|
+
se_count = conn.execute("SELECT COUNT(*) FROM symbol_edges").fetchone()[0]
|
|
600
|
+
assert se_count >= 2, f"expected >= 2 symbol edges, got {se_count}"
|
|
601
|
+
|
|
602
|
+
# build_index -> parse_file edge should exist
|
|
603
|
+
se = conn.execute(
|
|
604
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
605
|
+
(sid_build, sid_parse),
|
|
606
|
+
).fetchone()
|
|
607
|
+
assert se is not None, "symbol edge build_index->parse_file missing"
|
|
608
|
+
|
|
609
|
+
# build_index -> tokenize edge should exist
|
|
610
|
+
se2 = conn.execute(
|
|
611
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
612
|
+
(sid_build, sid_tokenize),
|
|
613
|
+
).fetchone()
|
|
614
|
+
assert se2 is not None, "symbol edge build_index->tokenize missing"
|
|
615
|
+
|
|
616
|
+
# parse_file -> tokenize edge should exist
|
|
617
|
+
se3 = conn.execute(
|
|
618
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=?",
|
|
619
|
+
(sid_parse, sid_tokenize),
|
|
620
|
+
).fetchone()
|
|
621
|
+
assert se3 is not None, "symbol edge parse_file->tokenize missing"
|
|
622
|
+
print(" [PASS] rebuild_symbol_edges")
|
|
623
|
+
|
|
624
|
+
# ---- transitive deps via symbol_edges ----
|
|
625
|
+
deps = get_transitive_deps(conn, sid_build)
|
|
626
|
+
dep_ids = {d["id"] for d in deps}
|
|
627
|
+
assert sid_parse in dep_ids, f"transitive deps missing parse_file: {dep_ids}"
|
|
628
|
+
assert sid_tokenize in dep_ids, f"transitive deps missing tokenize: {dep_ids}"
|
|
629
|
+
print(" [PASS] get_transitive_deps")
|
|
630
|
+
|
|
631
|
+
# ---- reverse deps via symbol_edges ----
|
|
632
|
+
rdeps = get_reverse_deps(conn, sid_tokenize)
|
|
633
|
+
rdep_ids = {d["id"] for d in rdeps}
|
|
634
|
+
assert sid_parse in rdep_ids, f"reverse deps missing parse_file: {rdep_ids}"
|
|
635
|
+
assert sid_build in rdep_ids, f"reverse deps missing build_index: {rdep_ids}"
|
|
636
|
+
print(" [PASS] get_reverse_deps")
|
|
637
|
+
|
|
638
|
+
# ---- FTS search ----
|
|
639
|
+
results = fts_search(conn, "parse")
|
|
640
|
+
assert any(r["name"] == "parse_file" for r in results), "FTS search for 'parse' failed"
|
|
641
|
+
|
|
642
|
+
results_sig = fts_search(conn, "tokenize")
|
|
643
|
+
assert any(r["name"] == "tokenize" for r in results_sig), "FTS search for 'tokenize' failed"
|
|
644
|
+
|
|
645
|
+
results_qn = fts_search(conn, "index")
|
|
646
|
+
assert any(r["name"] == "build_index" for r in results_qn), "FTS qualified_name search failed"
|
|
647
|
+
print(" [PASS] FTS search")
|
|
648
|
+
|
|
649
|
+
# ---- stats ----
|
|
650
|
+
stats = get_stats(conn)
|
|
651
|
+
assert stats["file_count"] == 3, f"expected 3 files, got {stats['file_count']}"
|
|
652
|
+
assert stats["symbol_count"] == 3, f"expected 3 symbols, got {stats['symbol_count']}"
|
|
653
|
+
assert stats["reference_count"] == 3, f"expected 3 refs, got {stats['reference_count']}"
|
|
654
|
+
assert stats["edge_count"] >= 2, f"expected >= 2 symbol edges, got {stats['edge_count']}"
|
|
655
|
+
assert stats["file_edge_count"] >= 2, f"expected >= 2 file edges, got {stats['file_edge_count']}"
|
|
656
|
+
print(" [PASS] get_stats")
|
|
657
|
+
|
|
658
|
+
# ---- add_edge (manual symbol edge) ----
|
|
659
|
+
add_edge(conn, sid_parse, sid_build, "test_edge")
|
|
660
|
+
conn.commit()
|
|
661
|
+
manual_edge = conn.execute(
|
|
662
|
+
"SELECT * FROM symbol_edges WHERE source_symbol_id=? AND target_symbol_id=? AND kind=?",
|
|
663
|
+
(sid_parse, sid_build, "test_edge"),
|
|
664
|
+
).fetchone()
|
|
665
|
+
assert manual_edge is not None, "add_edge failed"
|
|
666
|
+
# duplicate should be ignored
|
|
667
|
+
add_edge(conn, sid_parse, sid_build, "test_edge")
|
|
668
|
+
conn.commit()
|
|
669
|
+
print(" [PASS] add_edge (manual)")
|
|
670
|
+
|
|
671
|
+
# ---- CASCADE deletes ----
|
|
672
|
+
# Remove lexer.py file -> tokenize symbol, refs to tokenize, and edges should cascade
|
|
673
|
+
sym_count_before = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
674
|
+
ref_count_before = conn.execute("SELECT COUNT(*) FROM refs").fetchone()[0]
|
|
675
|
+
|
|
676
|
+
remove_file(conn, "src/lexer.py")
|
|
677
|
+
conn.commit()
|
|
678
|
+
|
|
679
|
+
assert get_file_by_path(conn, "src/lexer.py") is None, "file not removed"
|
|
680
|
+
assert get_symbol_by_id(conn, sid_tokenize) is None, "symbol not cascaded on file delete"
|
|
681
|
+
|
|
682
|
+
sym_count_after = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
683
|
+
assert sym_count_after == sym_count_before - 1, f"expected {sym_count_before - 1} symbols, got {sym_count_after}"
|
|
684
|
+
|
|
685
|
+
# Refs in lexer.py should be gone (there were none, but verify no error)
|
|
686
|
+
# Symbol edges involving tokenize should be gone
|
|
687
|
+
edges_to_tokenize = conn.execute(
|
|
688
|
+
"SELECT COUNT(*) FROM symbol_edges WHERE target_symbol_id=? OR source_symbol_id=?",
|
|
689
|
+
(sid_tokenize, sid_tokenize),
|
|
690
|
+
).fetchone()[0]
|
|
691
|
+
assert edges_to_tokenize == 0, f"expected 0 edges involving deleted symbol, got {edges_to_tokenize}"
|
|
692
|
+
print(" [PASS] CASCADE deletes (remove_file)")
|
|
693
|
+
|
|
694
|
+
# ---- remove_symbols_by_file (without removing file) ----
|
|
695
|
+
remove_symbols_by_file(conn, "src/parser.py")
|
|
696
|
+
conn.commit()
|
|
697
|
+
assert get_symbol_by_id(conn, sid_parse) is None, "remove_symbols_by_file failed"
|
|
698
|
+
# File itself should still exist
|
|
699
|
+
assert get_file_by_path(conn, "src/parser.py") is not None, "file should still exist after remove_symbols_by_file"
|
|
700
|
+
print(" [PASS] remove_symbols_by_file")
|
|
701
|
+
|
|
702
|
+
# ---- final stats ----
|
|
703
|
+
final_stats = get_stats(conn)
|
|
704
|
+
print(f"\n Final stats: {final_stats}")
|
|
705
|
+
|
|
706
|
+
# ---- hash_content utility ----
|
|
707
|
+
h = hash_content("hello world")
|
|
708
|
+
assert len(h) == 64, "hash_content should return 64-char hex string"
|
|
709
|
+
assert h == hash_content("hello world"), "hash_content should be deterministic"
|
|
710
|
+
print(" [PASS] hash_content")
|
|
711
|
+
|
|
712
|
+
print("\nAll smoke tests passed.")
|