codebase-retrieval-context-engine 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_retrieval_context_engine-2.0.0.dist-info/METADATA +505 -0
- codebase_retrieval_context_engine-2.0.0.dist-info/RECORD +46 -0
- codebase_retrieval_context_engine-2.0.0.dist-info/WHEEL +4 -0
- codebase_retrieval_context_engine-2.0.0.dist-info/entry_points.txt +3 -0
- codebase_retrieval_context_engine-2.0.0.dist-info/licenses/LICENSE +201 -0
- corbell/__init__.py +6 -0
- corbell/cli/__init__.py +1 -0
- corbell/cli/commands/__init__.py +1 -0
- corbell/cli/commands/index.py +86 -0
- corbell/cli/commands/query.py +71 -0
- corbell/cli/main.py +57 -0
- corbell/core/__init__.py +1 -0
- corbell/core/constants.py +52 -0
- corbell/core/embeddings/__init__.py +6 -0
- corbell/core/embeddings/base.py +68 -0
- corbell/core/embeddings/extractor.py +201 -0
- corbell/core/embeddings/factory.py +48 -0
- corbell/core/embeddings/model.py +401 -0
- corbell/core/embeddings/search_cache.py +95 -0
- corbell/core/embeddings/sqlite_store.py +271 -0
- corbell/core/gitignore.py +76 -0
- corbell/core/graph/__init__.py +1 -0
- corbell/core/graph/builder.py +696 -0
- corbell/core/graph/method_graph.py +1077 -0
- corbell/core/graph/providers/__init__.py +6 -0
- corbell/core/graph/providers/aws_patterns.py +62 -0
- corbell/core/graph/providers/azure_patterns.py +64 -0
- corbell/core/graph/providers/gcp_patterns.py +59 -0
- corbell/core/graph/schema.py +175 -0
- corbell/core/graph/sqlite_store.py +500 -0
- corbell/core/indexing/__init__.py +1 -0
- corbell/core/indexing/builder.py +608 -0
- corbell/core/indexing/lock.py +150 -0
- corbell/core/indexing/tracker.py +245 -0
- corbell/core/llm_client.py +677 -0
- corbell/core/mcp/__init__.py +1 -0
- corbell/core/mcp/server.py +214 -0
- corbell/core/query/__init__.py +1 -0
- corbell/core/query/diagnostics.py +38 -0
- corbell/core/query/engine.py +321 -0
- corbell/core/query/enhancer.py +102 -0
- corbell/core/query/formatter.py +98 -0
- corbell/core/query/graph_expander.py +284 -0
- corbell/core/query/merger.py +171 -0
- corbell/core/query/reranker.py +131 -0
- corbell/core/workspace.py +408 -0
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
"""SQLite-backed implementation of the GraphStore interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sqlite3
|
|
7
|
+
from collections import deque
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from corbell.core.graph.schema import (
|
|
12
|
+
DataStoreNode,
|
|
13
|
+
DependencyEdge,
|
|
14
|
+
FlowNode,
|
|
15
|
+
GraphStore,
|
|
16
|
+
MethodNode,
|
|
17
|
+
QueueNode,
|
|
18
|
+
ServiceNode,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
_CREATE_NODES = """
|
|
22
|
+
CREATE TABLE IF NOT EXISTS graph_nodes (
|
|
23
|
+
id TEXT PRIMARY KEY,
|
|
24
|
+
node_type TEXT NOT NULL,
|
|
25
|
+
data TEXT NOT NULL
|
|
26
|
+
);
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
_CREATE_EDGES = """
|
|
30
|
+
CREATE TABLE IF NOT EXISTS graph_edges (
|
|
31
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
32
|
+
source_id TEXT NOT NULL,
|
|
33
|
+
target_id TEXT NOT NULL,
|
|
34
|
+
kind TEXT NOT NULL,
|
|
35
|
+
metadata TEXT NOT NULL DEFAULT '{}',
|
|
36
|
+
UNIQUE(source_id, target_id, kind)
|
|
37
|
+
);
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
_CREATE_IDX_SOURCE = "CREATE INDEX IF NOT EXISTS idx_edges_source ON graph_edges(source_id);"
|
|
41
|
+
_CREATE_IDX_TARGET = "CREATE INDEX IF NOT EXISTS idx_edges_target ON graph_edges(target_id);"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _node_to_dict(node: ServiceNode | DataStoreNode | QueueNode | MethodNode) -> dict:
|
|
45
|
+
"""Serialize any node dataclass to a plain dict."""
|
|
46
|
+
from dataclasses import asdict
|
|
47
|
+
d = asdict(node)
|
|
48
|
+
# Convert lists inside fields
|
|
49
|
+
for k, v in d.items():
|
|
50
|
+
if isinstance(v, Path):
|
|
51
|
+
d[k] = str(v)
|
|
52
|
+
return d
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _dict_to_node(node_type: str, data: dict) -> ServiceNode | DataStoreNode | QueueNode | MethodNode | FlowNode:
|
|
56
|
+
"""Deserialize a dict back to a typed node dataclass."""
|
|
57
|
+
if node_type == "service":
|
|
58
|
+
return ServiceNode(**{k: v for k, v in data.items() if k in ServiceNode.__dataclass_fields__})
|
|
59
|
+
if node_type == "datastore":
|
|
60
|
+
return DataStoreNode(**data)
|
|
61
|
+
if node_type == "queue":
|
|
62
|
+
return QueueNode(**data)
|
|
63
|
+
if node_type == "method":
|
|
64
|
+
return MethodNode(**{k: v for k, v in data.items() if k in MethodNode.__dataclass_fields__})
|
|
65
|
+
if node_type == "flow":
|
|
66
|
+
return FlowNode(**{k: v for k, v in data.items() if k in FlowNode.__dataclass_fields__})
|
|
67
|
+
raise ValueError(f"Unknown node_type: {node_type}")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _node_type_str(node) -> str:
|
|
71
|
+
if isinstance(node, ServiceNode):
|
|
72
|
+
return "service"
|
|
73
|
+
if isinstance(node, DataStoreNode):
|
|
74
|
+
return "datastore"
|
|
75
|
+
if isinstance(node, QueueNode):
|
|
76
|
+
return "queue"
|
|
77
|
+
if isinstance(node, MethodNode):
|
|
78
|
+
return "method"
|
|
79
|
+
if isinstance(node, FlowNode):
|
|
80
|
+
return "flow"
|
|
81
|
+
raise TypeError(f"Unsupported node type: {type(node)}")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SQLiteGraphStore(GraphStore):
|
|
85
|
+
"""Graph store backed by a local SQLite database.
|
|
86
|
+
|
|
87
|
+
Creates two tables: ``graph_nodes`` and ``graph_edges``. All node data is
|
|
88
|
+
stored as JSON blobs for schema flexibility.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self, db_path: Path | str):
|
|
92
|
+
"""Initialize the store, creating the database file if needed.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
db_path: Path to the SQLite database file.
|
|
96
|
+
"""
|
|
97
|
+
self.db_path = Path(db_path)
|
|
98
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
self._init_db()
|
|
100
|
+
|
|
101
|
+
def _conn(self) -> sqlite3.Connection:
|
|
102
|
+
conn = sqlite3.connect(str(self.db_path))
|
|
103
|
+
conn.row_factory = sqlite3.Row
|
|
104
|
+
return conn
|
|
105
|
+
|
|
106
|
+
def _init_db(self) -> None:
|
|
107
|
+
with self._conn() as conn:
|
|
108
|
+
conn.execute(_CREATE_NODES)
|
|
109
|
+
conn.execute(_CREATE_EDGES)
|
|
110
|
+
conn.execute(_CREATE_IDX_SOURCE)
|
|
111
|
+
conn.execute(_CREATE_IDX_TARGET)
|
|
112
|
+
conn.commit()
|
|
113
|
+
|
|
114
|
+
def upsert_node(self, node) -> None:
|
|
115
|
+
"""Insert or update a node."""
|
|
116
|
+
node_type = _node_type_str(node)
|
|
117
|
+
data = _node_to_dict(node)
|
|
118
|
+
with self._conn() as conn:
|
|
119
|
+
conn.execute(
|
|
120
|
+
"INSERT OR REPLACE INTO graph_nodes (id, node_type, data) VALUES (?, ?, ?)",
|
|
121
|
+
(node.id, node_type, json.dumps(data)),
|
|
122
|
+
)
|
|
123
|
+
conn.commit()
|
|
124
|
+
|
|
125
|
+
def upsert_edge(self, edge: DependencyEdge) -> None:
|
|
126
|
+
"""Insert or update an edge (unique on source+target+kind)."""
|
|
127
|
+
with self._conn() as conn:
|
|
128
|
+
conn.execute(
|
|
129
|
+
"""INSERT INTO graph_edges (source_id, target_id, kind, metadata)
|
|
130
|
+
VALUES (?, ?, ?, ?)
|
|
131
|
+
ON CONFLICT(source_id, target_id, kind)
|
|
132
|
+
DO UPDATE SET metadata = excluded.metadata""",
|
|
133
|
+
(edge.source_id, edge.target_id, edge.kind, json.dumps(edge.metadata)),
|
|
134
|
+
)
|
|
135
|
+
conn.commit()
|
|
136
|
+
|
|
137
|
+
def _load_node(self, row) -> ServiceNode | DataStoreNode | QueueNode | MethodNode:
|
|
138
|
+
return _dict_to_node(row["node_type"], json.loads(row["data"]))
|
|
139
|
+
|
|
140
|
+
def get_service(self, service_id: str) -> Optional[ServiceNode]:
|
|
141
|
+
with self._conn() as conn:
|
|
142
|
+
row = conn.execute(
|
|
143
|
+
"SELECT * FROM graph_nodes WHERE id = ? AND node_type = 'service'",
|
|
144
|
+
(service_id,),
|
|
145
|
+
).fetchone()
|
|
146
|
+
if row:
|
|
147
|
+
return self._load_node(row)
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def get_all_services(self) -> List[ServiceNode]:
|
|
151
|
+
with self._conn() as conn:
|
|
152
|
+
rows = conn.execute(
|
|
153
|
+
"SELECT * FROM graph_nodes WHERE node_type = 'service'"
|
|
154
|
+
).fetchall()
|
|
155
|
+
return [self._load_node(r) for r in rows]
|
|
156
|
+
|
|
157
|
+
def get_dependencies(self, service_id: str) -> List[DependencyEdge]:
|
|
158
|
+
with self._conn() as conn:
|
|
159
|
+
rows = conn.execute(
|
|
160
|
+
"SELECT * FROM graph_edges WHERE source_id = ?", (service_id,)
|
|
161
|
+
).fetchall()
|
|
162
|
+
return [
|
|
163
|
+
DependencyEdge(
|
|
164
|
+
source_id=r["source_id"],
|
|
165
|
+
target_id=r["target_id"],
|
|
166
|
+
kind=r["kind"],
|
|
167
|
+
metadata=json.loads(r["metadata"]),
|
|
168
|
+
)
|
|
169
|
+
for r in rows
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
def get_dependents(self, service_id: str) -> List[DependencyEdge]:
|
|
173
|
+
with self._conn() as conn:
|
|
174
|
+
rows = conn.execute(
|
|
175
|
+
"SELECT * FROM graph_edges WHERE target_id = ?", (service_id,)
|
|
176
|
+
).fetchall()
|
|
177
|
+
return [
|
|
178
|
+
DependencyEdge(
|
|
179
|
+
source_id=r["source_id"],
|
|
180
|
+
target_id=r["target_id"],
|
|
181
|
+
kind=r["kind"],
|
|
182
|
+
metadata=json.loads(r["metadata"]),
|
|
183
|
+
)
|
|
184
|
+
for r in rows
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
def get_method(self, method_id: str) -> Optional[MethodNode]:
|
|
188
|
+
with self._conn() as conn:
|
|
189
|
+
row = conn.execute(
|
|
190
|
+
"SELECT * FROM graph_nodes WHERE id = ? AND node_type = 'method'",
|
|
191
|
+
(method_id,),
|
|
192
|
+
).fetchone()
|
|
193
|
+
if row:
|
|
194
|
+
return self._load_node(row)
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
def get_call_path(
|
|
198
|
+
self, from_method_id: str, to_method_id: str, max_depth: int = 5
|
|
199
|
+
) -> List[List[str]]:
|
|
200
|
+
"""BFS to find all call paths between two method nodes."""
|
|
201
|
+
paths: List[List[str]] = []
|
|
202
|
+
queue: deque[List[str]] = deque([[from_method_id]])
|
|
203
|
+
|
|
204
|
+
with self._conn() as conn:
|
|
205
|
+
while queue:
|
|
206
|
+
path = queue.popleft()
|
|
207
|
+
if len(path) > max_depth:
|
|
208
|
+
continue
|
|
209
|
+
current = path[-1]
|
|
210
|
+
if current == to_method_id:
|
|
211
|
+
paths.append(path)
|
|
212
|
+
continue
|
|
213
|
+
rows = conn.execute(
|
|
214
|
+
"SELECT target_id FROM graph_edges WHERE source_id = ? AND kind = 'method_call'",
|
|
215
|
+
(current,),
|
|
216
|
+
).fetchall()
|
|
217
|
+
for row in rows:
|
|
218
|
+
neighbor = row["target_id"]
|
|
219
|
+
if neighbor not in path: # avoid cycles
|
|
220
|
+
queue.append(path + [neighbor])
|
|
221
|
+
return paths
|
|
222
|
+
|
|
223
|
+
def get_methods_for_service(self, service_id: str) -> List[MethodNode]:
|
|
224
|
+
with self._conn() as conn:
|
|
225
|
+
rows = conn.execute(
|
|
226
|
+
"SELECT * FROM graph_nodes WHERE node_type = 'method'"
|
|
227
|
+
).fetchall()
|
|
228
|
+
results = []
|
|
229
|
+
for row in rows:
|
|
230
|
+
data = json.loads(row["data"])
|
|
231
|
+
if data.get("service_id") == service_id:
|
|
232
|
+
results.append(_dict_to_node("method", data))
|
|
233
|
+
return results
|
|
234
|
+
|
|
235
|
+
def get_callers_of_method(self, method_id: str) -> List[MethodNode]:
|
|
236
|
+
"""Return all MethodNodes that have a method_call edge targeting method_id."""
|
|
237
|
+
with self._conn() as conn:
|
|
238
|
+
rows = conn.execute(
|
|
239
|
+
"SELECT source_id FROM graph_edges WHERE target_id = ? AND kind = 'method_call'",
|
|
240
|
+
(method_id,),
|
|
241
|
+
).fetchall()
|
|
242
|
+
caller_ids = [r["source_id"] for r in rows]
|
|
243
|
+
|
|
244
|
+
result: List[MethodNode] = []
|
|
245
|
+
with self._conn() as conn:
|
|
246
|
+
for cid in caller_ids:
|
|
247
|
+
row = conn.execute(
|
|
248
|
+
"SELECT * FROM graph_nodes WHERE id = ? AND node_type = 'method'",
|
|
249
|
+
(cid,),
|
|
250
|
+
).fetchone()
|
|
251
|
+
if row:
|
|
252
|
+
result.append(self._load_node(row)) # type: ignore[arg-type]
|
|
253
|
+
return result
|
|
254
|
+
|
|
255
|
+
def get_flows_for_method(self, method_id: str) -> List[dict]:
|
|
256
|
+
"""Return flows that include method_id as a step.
|
|
257
|
+
|
|
258
|
+
Each returned dict has keys: ``flow_id``, ``flow_name``, ``step``,
|
|
259
|
+
``entry_method_id``.
|
|
260
|
+
"""
|
|
261
|
+
with self._conn() as conn:
|
|
262
|
+
rows = conn.execute(
|
|
263
|
+
"SELECT source_id, metadata FROM graph_edges "
|
|
264
|
+
"WHERE target_id = ? AND kind = 'flow_step'",
|
|
265
|
+
(method_id,),
|
|
266
|
+
).fetchall()
|
|
267
|
+
|
|
268
|
+
result = []
|
|
269
|
+
with self._conn() as conn:
|
|
270
|
+
for row in rows:
|
|
271
|
+
flow_id = row["source_id"]
|
|
272
|
+
meta = json.loads(row["metadata"] or "{}")
|
|
273
|
+
flow_row = conn.execute(
|
|
274
|
+
"SELECT data FROM graph_nodes WHERE id = ? AND node_type = 'flow'",
|
|
275
|
+
(flow_id,),
|
|
276
|
+
).fetchone()
|
|
277
|
+
if flow_row:
|
|
278
|
+
flow_data = json.loads(flow_row["data"])
|
|
279
|
+
result.append({
|
|
280
|
+
"flow_id": flow_id,
|
|
281
|
+
"flow_name": flow_data.get("name", ""),
|
|
282
|
+
"step": meta.get("step", 0),
|
|
283
|
+
"entry_method_id": flow_data.get("entry_method_id", ""),
|
|
284
|
+
})
|
|
285
|
+
return result
|
|
286
|
+
|
|
287
|
+
def get_all_nodes_summary(self) -> Dict[str, Any]:
|
|
288
|
+
with self._conn() as conn:
|
|
289
|
+
node_counts = conn.execute(
|
|
290
|
+
"SELECT node_type, COUNT(*) as cnt FROM graph_nodes GROUP BY node_type"
|
|
291
|
+
).fetchall()
|
|
292
|
+
edge_count = conn.execute("SELECT COUNT(*) FROM graph_edges").fetchone()[0]
|
|
293
|
+
return {
|
|
294
|
+
"nodes": {r["node_type"]: r["cnt"] for r in node_counts},
|
|
295
|
+
"edges": edge_count,
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
def to_mermaid(self) -> str:
|
|
299
|
+
"""Return a single Mermaid file string describing the system boundaries."""
|
|
300
|
+
with self._conn() as conn:
|
|
301
|
+
rows = conn.execute("SELECT id, node_type, data FROM graph_nodes").fetchall()
|
|
302
|
+
services, datastores, queues = [], [], []
|
|
303
|
+
|
|
304
|
+
for row in rows:
|
|
305
|
+
ntype = row["node_type"]
|
|
306
|
+
data = json.loads(row["data"])
|
|
307
|
+
nid = row["id"].replace("-", "_").replace(".", "_").replace(":", "_")
|
|
308
|
+
label = data.get("name", row["id"]).replace('"', "'")
|
|
309
|
+
if ntype == "service":
|
|
310
|
+
services.append({"id": nid, "label": label})
|
|
311
|
+
elif ntype == "datastore":
|
|
312
|
+
datastores.append({"id": nid, "label": label})
|
|
313
|
+
elif ntype == "queue":
|
|
314
|
+
queues.append({"id": nid, "label": label})
|
|
315
|
+
|
|
316
|
+
edges = conn.execute("SELECT source_id, target_id, kind FROM graph_edges").fetchall()
|
|
317
|
+
connections = []
|
|
318
|
+
seen = set()
|
|
319
|
+
for e in edges:
|
|
320
|
+
kind = e["kind"]
|
|
321
|
+
if kind in ("method_call", "flow_step", "git_coupling", "flow_link"):
|
|
322
|
+
continue
|
|
323
|
+
src = e["source_id"].replace("-", "_").replace(".", "_").replace(":", "_")
|
|
324
|
+
tgt = e["target_id"].replace("-", "_").replace(".", "_").replace(":", "_")
|
|
325
|
+
if (src, tgt, kind) in seen:
|
|
326
|
+
continue
|
|
327
|
+
seen.add((src, tgt, kind))
|
|
328
|
+
|
|
329
|
+
if kind == "http_call":
|
|
330
|
+
connections.append(f" {src} -- HTTP --> {tgt}")
|
|
331
|
+
elif kind == "rpc_call":
|
|
332
|
+
connections.append(f" {src} -- RPC/Edge Function --> {tgt}")
|
|
333
|
+
elif kind == "db_read":
|
|
334
|
+
connections.append(f" {src} -- Reads --> {tgt}")
|
|
335
|
+
elif kind == "db_write":
|
|
336
|
+
connections.append(f" {src} -- Writes --> {tgt}")
|
|
337
|
+
elif kind == "queue_publish":
|
|
338
|
+
connections.append(f" {src} -- Publishes --> {tgt}")
|
|
339
|
+
elif kind == "queue_consume":
|
|
340
|
+
connections.append(f" {src} -- Consumes --> {tgt}")
|
|
341
|
+
elif kind == "library_dependency":
|
|
342
|
+
connections.append(f" {src} -. Import/Library .-> {tgt}")
|
|
343
|
+
else:
|
|
344
|
+
connections.append(f" {src} --> {tgt}")
|
|
345
|
+
|
|
346
|
+
lines = ["graph LR"]
|
|
347
|
+
lines.append(" %% Services")
|
|
348
|
+
for s in services:
|
|
349
|
+
lines.append(f' {s["id"]}["{s["label"]}"]')
|
|
350
|
+
|
|
351
|
+
if datastores:
|
|
352
|
+
lines.append(" %% Data Stores")
|
|
353
|
+
for d in datastores:
|
|
354
|
+
lines.append(f' {d["id"]}[("{d["label"]}")]')
|
|
355
|
+
|
|
356
|
+
if queues:
|
|
357
|
+
lines.append(" %% Queues")
|
|
358
|
+
for q in queues:
|
|
359
|
+
lines.append(f' {q["id"]}>"{q["label"]}"]')
|
|
360
|
+
|
|
361
|
+
lines.append(" %% Edges")
|
|
362
|
+
lines.extend(connections)
|
|
363
|
+
|
|
364
|
+
lines.append(" %% Styling")
|
|
365
|
+
lines.append(" classDef service fill:#161b22,stroke:#39d353,stroke-width:2px,color:#c9d1d9;")
|
|
366
|
+
lines.append(" classDef datastore fill:#161b22,stroke:#ffa657,stroke-width:2px,color:#c9d1d9;")
|
|
367
|
+
lines.append(" classDef queue fill:#161b22,stroke:#bc8cff,stroke-width:2px,color:#c9d1d9;")
|
|
368
|
+
|
|
369
|
+
for s in services:
|
|
370
|
+
lines.append(f' class {s["id"]} service')
|
|
371
|
+
for d in datastores:
|
|
372
|
+
lines.append(f' class {d["id"]} datastore')
|
|
373
|
+
for q in queues:
|
|
374
|
+
lines.append(f' class {q["id"]} queue')
|
|
375
|
+
|
|
376
|
+
return "\n".join(lines)
|
|
377
|
+
|
|
378
|
+
def to_json(self) -> str:
|
|
379
|
+
"""Return the JSON representation of the service graph."""
|
|
380
|
+
nodes = []
|
|
381
|
+
edges = []
|
|
382
|
+
with self._conn() as conn:
|
|
383
|
+
rows = conn.execute("SELECT id, node_type, data FROM graph_nodes").fetchall()
|
|
384
|
+
for row in rows:
|
|
385
|
+
ntype = row["node_type"]
|
|
386
|
+
data = json.loads(row["data"])
|
|
387
|
+
node = {"id": row["id"], "type": ntype}
|
|
388
|
+
if ntype == "service":
|
|
389
|
+
node.update({
|
|
390
|
+
"label": data.get("name", row["id"]),
|
|
391
|
+
"language": data.get("language", ""),
|
|
392
|
+
"service_type": data.get("service_type", "api"),
|
|
393
|
+
"tags": data.get("tags", []),
|
|
394
|
+
})
|
|
395
|
+
elif ntype == "datastore":
|
|
396
|
+
node.update({"label": data.get("name", row["id"]), "kind": data.get("kind", "")})
|
|
397
|
+
elif ntype == "queue":
|
|
398
|
+
node.update({"label": data.get("name", row["id"]), "kind": data.get("kind", "")})
|
|
399
|
+
elif ntype == "flow":
|
|
400
|
+
svc_id = data.get("service_id", "")
|
|
401
|
+
node.update({
|
|
402
|
+
"label": data.get("name", row["id"]),
|
|
403
|
+
"service_id": svc_id,
|
|
404
|
+
"step_count": data.get("step_count", 0),
|
|
405
|
+
})
|
|
406
|
+
edges.append({
|
|
407
|
+
"source": svc_id,
|
|
408
|
+
"target": row["id"],
|
|
409
|
+
"kind": "flow_link",
|
|
410
|
+
"meta": {}
|
|
411
|
+
})
|
|
412
|
+
elif ntype == "method":
|
|
413
|
+
continue # don't clutter service-level graph
|
|
414
|
+
nodes.append(node)
|
|
415
|
+
|
|
416
|
+
# Count methods per service for node sizing
|
|
417
|
+
method_counts: Dict[str, int] = {}
|
|
418
|
+
mcounts = conn.execute(
|
|
419
|
+
"SELECT data FROM graph_nodes WHERE node_type='method'"
|
|
420
|
+
).fetchall()
|
|
421
|
+
for row in mcounts:
|
|
422
|
+
d = json.loads(row["data"])
|
|
423
|
+
sid = d.get("service_id", "")
|
|
424
|
+
if sid:
|
|
425
|
+
method_counts[sid] = method_counts.get(sid, 0) + 1
|
|
426
|
+
for n in nodes:
|
|
427
|
+
if n["type"] == "service":
|
|
428
|
+
n["method_count"] = method_counts.get(n["id"], 0)
|
|
429
|
+
|
|
430
|
+
# Edges
|
|
431
|
+
skip_kinds = {"method_call", "flow_step"}
|
|
432
|
+
erows = conn.execute(
|
|
433
|
+
"SELECT source_id, target_id, kind, metadata FROM graph_edges"
|
|
434
|
+
).fetchall()
|
|
435
|
+
seen = set()
|
|
436
|
+
for row in erows:
|
|
437
|
+
if row["kind"] in skip_kinds:
|
|
438
|
+
continue
|
|
439
|
+
key = (row["source_id"], row["target_id"], row["kind"])
|
|
440
|
+
if key in seen:
|
|
441
|
+
continue
|
|
442
|
+
seen.add(key)
|
|
443
|
+
meta = json.loads(row["metadata"] or "{}")
|
|
444
|
+
edges.append({
|
|
445
|
+
"source": row["source_id"],
|
|
446
|
+
"target": row["target_id"],
|
|
447
|
+
"kind": row["kind"],
|
|
448
|
+
"meta": meta,
|
|
449
|
+
})
|
|
450
|
+
return json.dumps({"nodes": nodes, "edges": edges}, indent=2)
|
|
451
|
+
|
|
452
|
+
def clear(self) -> None:
|
|
453
|
+
"""Delete all graph data."""
|
|
454
|
+
with self._conn() as conn:
|
|
455
|
+
conn.execute("DELETE FROM graph_nodes")
|
|
456
|
+
conn.execute("DELETE FROM graph_edges")
|
|
457
|
+
conn.commit()
|
|
458
|
+
|
|
459
|
+
def delete_service_data(self, service_id: str) -> None:
|
|
460
|
+
"""Delete a service node, all its method nodes, and all related edges.
|
|
461
|
+
|
|
462
|
+
Removes the service node itself, all MethodNodes whose ``service_id``
|
|
463
|
+
matches, and all edges where ``source_id`` starts with ``service_id``.
|
|
464
|
+
This leaves data for other services intact.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
service_id: The ID of the service to remove.
|
|
468
|
+
"""
|
|
469
|
+
with self._conn() as conn:
|
|
470
|
+
# Delete the service node itself
|
|
471
|
+
conn.execute(
|
|
472
|
+
"DELETE FROM graph_nodes WHERE id = ? AND node_type = 'service'",
|
|
473
|
+
(service_id,),
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Find and delete all method nodes for this service (stored in JSON data)
|
|
477
|
+
method_rows = conn.execute(
|
|
478
|
+
"SELECT id, data FROM graph_nodes WHERE node_type = 'method'"
|
|
479
|
+
).fetchall()
|
|
480
|
+
method_ids_to_delete = []
|
|
481
|
+
for row in method_rows:
|
|
482
|
+
import json as _json
|
|
483
|
+
data = _json.loads(row["data"])
|
|
484
|
+
if data.get("service_id") == service_id:
|
|
485
|
+
method_ids_to_delete.append(row["id"])
|
|
486
|
+
|
|
487
|
+
if method_ids_to_delete:
|
|
488
|
+
placeholders = ",".join("?" * len(method_ids_to_delete))
|
|
489
|
+
conn.execute(
|
|
490
|
+
f"DELETE FROM graph_nodes WHERE id IN ({placeholders})",
|
|
491
|
+
method_ids_to_delete,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# Delete all edges where source starts with service_id
|
|
495
|
+
conn.execute(
|
|
496
|
+
"DELETE FROM graph_edges WHERE source_id = ? OR source_id LIKE ?",
|
|
497
|
+
(service_id, f"{service_id}::%"),
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
conn.commit()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Incremental indexing module for Corbell."""
|