osscodeiq 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- osscodeiq/__init__.py +0 -0
- osscodeiq/analyzer.py +467 -0
- osscodeiq/cache/__init__.py +0 -0
- osscodeiq/cache/hasher.py +23 -0
- osscodeiq/cache/store.py +300 -0
- osscodeiq/classifiers/__init__.py +0 -0
- osscodeiq/classifiers/layer_classifier.py +69 -0
- osscodeiq/cli.py +721 -0
- osscodeiq/config.py +113 -0
- osscodeiq/detectors/__init__.py +0 -0
- osscodeiq/detectors/auth/__init__.py +0 -0
- osscodeiq/detectors/auth/certificate_auth.py +139 -0
- osscodeiq/detectors/auth/ldap_auth.py +89 -0
- osscodeiq/detectors/auth/session_header_auth.py +120 -0
- osscodeiq/detectors/base.py +41 -0
- osscodeiq/detectors/config/__init__.py +0 -0
- osscodeiq/detectors/config/batch_structure.py +128 -0
- osscodeiq/detectors/config/cloudformation.py +183 -0
- osscodeiq/detectors/config/docker_compose.py +179 -0
- osscodeiq/detectors/config/github_actions.py +150 -0
- osscodeiq/detectors/config/gitlab_ci.py +216 -0
- osscodeiq/detectors/config/helm_chart.py +187 -0
- osscodeiq/detectors/config/ini_structure.py +101 -0
- osscodeiq/detectors/config/json_structure.py +72 -0
- osscodeiq/detectors/config/kubernetes.py +305 -0
- osscodeiq/detectors/config/kubernetes_rbac.py +212 -0
- osscodeiq/detectors/config/openapi.py +194 -0
- osscodeiq/detectors/config/package_json.py +99 -0
- osscodeiq/detectors/config/properties_detector.py +108 -0
- osscodeiq/detectors/config/pyproject_toml.py +169 -0
- osscodeiq/detectors/config/sql_structure.py +155 -0
- osscodeiq/detectors/config/toml_structure.py +93 -0
- osscodeiq/detectors/config/tsconfig_json.py +105 -0
- osscodeiq/detectors/config/yaml_structure.py +82 -0
- osscodeiq/detectors/cpp/__init__.py +0 -0
- osscodeiq/detectors/cpp/cpp_structures.py +192 -0
- osscodeiq/detectors/csharp/__init__.py +0 -0
- osscodeiq/detectors/csharp/csharp_efcore.py +184 -0
- osscodeiq/detectors/csharp/csharp_minimal_apis.py +156 -0
- osscodeiq/detectors/csharp/csharp_structures.py +317 -0
- osscodeiq/detectors/docs/__init__.py +0 -0
- osscodeiq/detectors/docs/markdown_structure.py +117 -0
- osscodeiq/detectors/frontend/__init__.py +0 -0
- osscodeiq/detectors/frontend/angular_components.py +177 -0
- osscodeiq/detectors/frontend/frontend_routes.py +259 -0
- osscodeiq/detectors/frontend/react_components.py +148 -0
- osscodeiq/detectors/frontend/svelte_components.py +84 -0
- osscodeiq/detectors/frontend/vue_components.py +150 -0
- osscodeiq/detectors/generic/__init__.py +1 -0
- osscodeiq/detectors/generic/imports_detector.py +413 -0
- osscodeiq/detectors/go/__init__.py +0 -0
- osscodeiq/detectors/go/go_orm.py +202 -0
- osscodeiq/detectors/go/go_structures.py +162 -0
- osscodeiq/detectors/go/go_web.py +157 -0
- osscodeiq/detectors/iac/__init__.py +0 -0
- osscodeiq/detectors/iac/bicep.py +135 -0
- osscodeiq/detectors/iac/dockerfile.py +182 -0
- osscodeiq/detectors/iac/terraform.py +188 -0
- osscodeiq/detectors/java/__init__.py +0 -0
- osscodeiq/detectors/java/azure_functions.py +424 -0
- osscodeiq/detectors/java/azure_messaging.py +350 -0
- osscodeiq/detectors/java/class_hierarchy.py +349 -0
- osscodeiq/detectors/java/config_def.py +82 -0
- osscodeiq/detectors/java/cosmos_db.py +105 -0
- osscodeiq/detectors/java/graphql_resolver.py +188 -0
- osscodeiq/detectors/java/grpc_service.py +142 -0
- osscodeiq/detectors/java/ibm_mq.py +178 -0
- osscodeiq/detectors/java/jaxrs.py +160 -0
- osscodeiq/detectors/java/jdbc.py +196 -0
- osscodeiq/detectors/java/jms.py +116 -0
- osscodeiq/detectors/java/jpa_entity.py +143 -0
- osscodeiq/detectors/java/kafka.py +113 -0
- osscodeiq/detectors/java/kafka_protocol.py +70 -0
- osscodeiq/detectors/java/micronaut.py +248 -0
- osscodeiq/detectors/java/module_deps.py +191 -0
- osscodeiq/detectors/java/public_api.py +206 -0
- osscodeiq/detectors/java/quarkus.py +176 -0
- osscodeiq/detectors/java/rabbitmq.py +150 -0
- osscodeiq/detectors/java/raw_sql.py +136 -0
- osscodeiq/detectors/java/repository.py +131 -0
- osscodeiq/detectors/java/rmi.py +129 -0
- osscodeiq/detectors/java/spring_events.py +117 -0
- osscodeiq/detectors/java/spring_rest.py +168 -0
- osscodeiq/detectors/java/spring_security.py +212 -0
- osscodeiq/detectors/java/tibco_ems.py +193 -0
- osscodeiq/detectors/java/websocket.py +188 -0
- osscodeiq/detectors/kotlin/__init__.py +0 -0
- osscodeiq/detectors/kotlin/kotlin_structures.py +124 -0
- osscodeiq/detectors/kotlin/ktor_routes.py +163 -0
- osscodeiq/detectors/proto/__init__.py +0 -0
- osscodeiq/detectors/proto/proto_structure.py +153 -0
- osscodeiq/detectors/python/__init__.py +0 -0
- osscodeiq/detectors/python/celery_tasks.py +88 -0
- osscodeiq/detectors/python/django_auth.py +132 -0
- osscodeiq/detectors/python/django_models.py +157 -0
- osscodeiq/detectors/python/django_views.py +74 -0
- osscodeiq/detectors/python/fastapi_auth.py +143 -0
- osscodeiq/detectors/python/fastapi_routes.py +68 -0
- osscodeiq/detectors/python/flask_routes.py +67 -0
- osscodeiq/detectors/python/kafka_python.py +175 -0
- osscodeiq/detectors/python/pydantic_models.py +115 -0
- osscodeiq/detectors/python/python_structures.py +234 -0
- osscodeiq/detectors/python/sqlalchemy_models.py +82 -0
- osscodeiq/detectors/registry.py +100 -0
- osscodeiq/detectors/rust/__init__.py +0 -0
- osscodeiq/detectors/rust/actix_web.py +234 -0
- osscodeiq/detectors/rust/rust_structures.py +174 -0
- osscodeiq/detectors/scala/__init__.py +0 -0
- osscodeiq/detectors/scala/scala_structures.py +128 -0
- osscodeiq/detectors/shell/__init__.py +0 -0
- osscodeiq/detectors/shell/bash_detector.py +127 -0
- osscodeiq/detectors/shell/powershell_detector.py +118 -0
- osscodeiq/detectors/typescript/__init__.py +0 -0
- osscodeiq/detectors/typescript/express_routes.py +55 -0
- osscodeiq/detectors/typescript/fastify_routes.py +156 -0
- osscodeiq/detectors/typescript/graphql_resolvers.py +100 -0
- osscodeiq/detectors/typescript/kafka_js.py +164 -0
- osscodeiq/detectors/typescript/mongoose_orm.py +151 -0
- osscodeiq/detectors/typescript/nestjs_controllers.py +99 -0
- osscodeiq/detectors/typescript/nestjs_guards.py +138 -0
- osscodeiq/detectors/typescript/passport_jwt.py +133 -0
- osscodeiq/detectors/typescript/prisma_orm.py +96 -0
- osscodeiq/detectors/typescript/remix_routes.py +160 -0
- osscodeiq/detectors/typescript/sequelize_orm.py +136 -0
- osscodeiq/detectors/typescript/typeorm_entities.py +86 -0
- osscodeiq/detectors/typescript/typescript_structures.py +185 -0
- osscodeiq/detectors/utils.py +49 -0
- osscodeiq/discovery/__init__.py +11 -0
- osscodeiq/discovery/change_detector.py +97 -0
- osscodeiq/discovery/file_discovery.py +342 -0
- osscodeiq/flow/__init__.py +0 -0
- osscodeiq/flow/engine.py +78 -0
- osscodeiq/flow/models.py +72 -0
- osscodeiq/flow/renderer.py +127 -0
- osscodeiq/flow/templates/interactive.html +252 -0
- osscodeiq/flow/vendor/cytoscape-dagre.min.js +8 -0
- osscodeiq/flow/vendor/cytoscape.min.js +32 -0
- osscodeiq/flow/vendor/dagre.min.js +3809 -0
- osscodeiq/flow/views.py +357 -0
- osscodeiq/graph/__init__.py +0 -0
- osscodeiq/graph/backend.py +52 -0
- osscodeiq/graph/backends/__init__.py +23 -0
- osscodeiq/graph/backends/kuzu.py +576 -0
- osscodeiq/graph/backends/networkx.py +135 -0
- osscodeiq/graph/backends/sqlite_backend.py +406 -0
- osscodeiq/graph/builder.py +297 -0
- osscodeiq/graph/query.py +228 -0
- osscodeiq/graph/store.py +183 -0
- osscodeiq/graph/views.py +231 -0
- osscodeiq/models/__init__.py +17 -0
- osscodeiq/models/graph.py +116 -0
- osscodeiq/output/__init__.py +0 -0
- osscodeiq/output/dot.py +171 -0
- osscodeiq/output/mermaid.py +160 -0
- osscodeiq/output/safety.py +58 -0
- osscodeiq/output/serializers.py +42 -0
- osscodeiq/parsing/__init__.py +5 -0
- osscodeiq/parsing/languages/__init__.py +0 -0
- osscodeiq/parsing/languages/base.py +23 -0
- osscodeiq/parsing/languages/java.py +68 -0
- osscodeiq/parsing/languages/python.py +57 -0
- osscodeiq/parsing/languages/typescript.py +95 -0
- osscodeiq/parsing/parser_manager.py +125 -0
- osscodeiq/parsing/structured/__init__.py +0 -0
- osscodeiq/parsing/structured/gradle_parser.py +78 -0
- osscodeiq/parsing/structured/json_parser.py +24 -0
- osscodeiq/parsing/structured/properties_parser.py +56 -0
- osscodeiq/parsing/structured/sql_parser.py +54 -0
- osscodeiq/parsing/structured/xml_parser.py +148 -0
- osscodeiq/parsing/structured/yaml_parser.py +38 -0
- osscodeiq/server/__init__.py +7 -0
- osscodeiq/server/app.py +53 -0
- osscodeiq/server/mcp_server.py +174 -0
- osscodeiq/server/middleware.py +16 -0
- osscodeiq/server/routes.py +184 -0
- osscodeiq/server/service.py +445 -0
- osscodeiq/server/templates/welcome.html +56 -0
- osscodeiq-0.0.0.dist-info/METADATA +30 -0
- osscodeiq-0.0.0.dist-info/RECORD +183 -0
- osscodeiq-0.0.0.dist-info/WHEEL +5 -0
- osscodeiq-0.0.0.dist-info/entry_points.txt +2 -0
- osscodeiq-0.0.0.dist-info/licenses/LICENSE +21 -0
- osscodeiq-0.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
"""KuzuDB-backed graph backend with Cypher support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import tempfile
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import kuzu
|
|
13
|
+
|
|
14
|
+
from osscodeiq.models.graph import (
|
|
15
|
+
EdgeKind,
|
|
16
|
+
GraphEdge,
|
|
17
|
+
GraphNode,
|
|
18
|
+
NodeKind,
|
|
19
|
+
SourceLocation,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Schema DDL
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
_CREATE_NODE_TABLE = """
|
|
28
|
+
CREATE NODE TABLE IF NOT EXISTS CodeNode(
|
|
29
|
+
id STRING,
|
|
30
|
+
kind STRING,
|
|
31
|
+
label STRING,
|
|
32
|
+
fqn STRING,
|
|
33
|
+
module STRING,
|
|
34
|
+
file_path STRING,
|
|
35
|
+
line_start INT64,
|
|
36
|
+
line_end INT64,
|
|
37
|
+
annotations STRING,
|
|
38
|
+
properties STRING,
|
|
39
|
+
PRIMARY KEY(id)
|
|
40
|
+
)
|
|
41
|
+
""".strip()
|
|
42
|
+
|
|
43
|
+
_CREATE_EDGE_TABLE = """
|
|
44
|
+
CREATE REL TABLE IF NOT EXISTS CODE_EDGE(
|
|
45
|
+
FROM CodeNode TO CodeNode,
|
|
46
|
+
kind STRING,
|
|
47
|
+
label STRING,
|
|
48
|
+
properties STRING
|
|
49
|
+
)
|
|
50
|
+
""".strip()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# Serialization helpers
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
def _node_to_params(node: GraphNode) -> dict[str, Any]:
|
|
57
|
+
"""Convert a GraphNode to a flat dict suitable for Cypher parameters."""
|
|
58
|
+
return {
|
|
59
|
+
"id": node.id,
|
|
60
|
+
"kind": node.kind.value,
|
|
61
|
+
"label": node.label,
|
|
62
|
+
"fqn": node.fqn or "",
|
|
63
|
+
"module": node.module or "",
|
|
64
|
+
"file_path": node.location.file_path if node.location else "",
|
|
65
|
+
"line_start": node.location.line_start if node.location and node.location.line_start is not None else -1,
|
|
66
|
+
"line_end": node.location.line_end if node.location and node.location.line_end is not None else -1,
|
|
67
|
+
"annotations": json.dumps(node.annotations),
|
|
68
|
+
"properties": json.dumps(node.properties),
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _row_to_node(columns: list[str], row: list[Any]) -> GraphNode:
|
|
73
|
+
"""Reconstruct a *GraphNode* from a ``RETURN n.*`` result row.
|
|
74
|
+
|
|
75
|
+
*columns* must be the column names returned by the query (e.g.
|
|
76
|
+
``["n.id", "n.kind", ...]``). We strip the ``n.`` prefix to get
|
|
77
|
+
field names.
|
|
78
|
+
"""
|
|
79
|
+
data: dict[str, Any] = {}
|
|
80
|
+
for col, val in zip(columns, row):
|
|
81
|
+
# column names look like "n.id", "n.kind", etc.
|
|
82
|
+
field = col.rsplit(".", 1)[-1]
|
|
83
|
+
data[field] = val
|
|
84
|
+
|
|
85
|
+
location: SourceLocation | None = None
|
|
86
|
+
if data.get("file_path"):
|
|
87
|
+
ls = data.get("line_start")
|
|
88
|
+
le = data.get("line_end")
|
|
89
|
+
location = SourceLocation(
|
|
90
|
+
file_path=data["file_path"],
|
|
91
|
+
line_start=ls if ls is not None and ls >= 0 else None,
|
|
92
|
+
line_end=le if le is not None and le >= 0 else None,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
annotations_raw = data.get("annotations", "[]")
|
|
96
|
+
properties_raw = data.get("properties", "{}")
|
|
97
|
+
|
|
98
|
+
return GraphNode(
|
|
99
|
+
id=data["id"],
|
|
100
|
+
kind=NodeKind(data["kind"]),
|
|
101
|
+
label=data["label"],
|
|
102
|
+
fqn=data.get("fqn") or None,
|
|
103
|
+
module=data.get("module") or None,
|
|
104
|
+
location=location,
|
|
105
|
+
annotations=json.loads(annotations_raw) if annotations_raw else [],
|
|
106
|
+
properties=json.loads(properties_raw) if properties_raw else {},
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _edge_row_to_edge(columns: list[str], row: list[Any]) -> GraphEdge:
|
|
111
|
+
"""Reconstruct a *GraphEdge* from an edge query result row.
|
|
112
|
+
|
|
113
|
+
Expected columns pattern: ``["a.id", "b.id", "e.kind", "e.label", "e.properties"]``.
|
|
114
|
+
"""
|
|
115
|
+
data: dict[str, Any] = {}
|
|
116
|
+
for col, val in zip(columns, row):
|
|
117
|
+
data[col] = val
|
|
118
|
+
|
|
119
|
+
# Find source / target ids (first two columns are a.id and b.id)
|
|
120
|
+
source = row[0]
|
|
121
|
+
target = row[1]
|
|
122
|
+
|
|
123
|
+
# Remaining columns are edge properties prefixed with "e."
|
|
124
|
+
kind_val = data.get("e.kind", "")
|
|
125
|
+
label_val = data.get("e.label")
|
|
126
|
+
props_raw = data.get("e.properties", "{}")
|
|
127
|
+
|
|
128
|
+
return GraphEdge(
|
|
129
|
+
source=source,
|
|
130
|
+
target=target,
|
|
131
|
+
kind=EdgeKind(kind_val),
|
|
132
|
+
label=label_val or None,
|
|
133
|
+
properties=json.loads(props_raw) if props_raw else {},
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# KuzuBackend
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
class KuzuBackend:
|
|
141
|
+
"""Persistent graph backend using KuzuDB (embedded graph database).
|
|
142
|
+
|
|
143
|
+
Implements both :class:`GraphBackend` and :class:`CypherBackend` protocols.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
def __init__(self, db_path: str) -> None:
|
|
147
|
+
self._db = kuzu.Database(db_path)
|
|
148
|
+
self._conn = kuzu.Connection(self._db)
|
|
149
|
+
self._ensure_schema()
|
|
150
|
+
|
|
151
|
+
# ------------------------------------------------------------------
|
|
152
|
+
# Schema bootstrapping
|
|
153
|
+
# ------------------------------------------------------------------
|
|
154
|
+
def _ensure_schema(self) -> None:
|
|
155
|
+
"""Create the node and relationship tables if they don't exist."""
|
|
156
|
+
try:
|
|
157
|
+
self._conn.execute(_CREATE_NODE_TABLE)
|
|
158
|
+
self._conn.execute(_CREATE_EDGE_TABLE)
|
|
159
|
+
except Exception:
|
|
160
|
+
logger.exception("Failed to ensure KuzuDB schema")
|
|
161
|
+
raise
|
|
162
|
+
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
# Helpers
|
|
165
|
+
# ------------------------------------------------------------------
|
|
166
|
+
def _execute(
|
|
167
|
+
self, query: str, params: dict[str, Any] | None = None
|
|
168
|
+
) -> kuzu.QueryResult | None:
|
|
169
|
+
"""Execute a Cypher statement, returning the QueryResult or *None* on error."""
|
|
170
|
+
try:
|
|
171
|
+
return self._conn.execute(query, parameters=params or {})
|
|
172
|
+
except Exception:
|
|
173
|
+
logger.exception("KuzuDB query failed: %s | params=%s", query, params)
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
# ------------------------------------------------------------------
|
|
177
|
+
# GraphBackend protocol
|
|
178
|
+
# ------------------------------------------------------------------
|
|
179
|
+
def add_node(self, node: GraphNode) -> None:
|
|
180
|
+
if self.has_node(node.id):
|
|
181
|
+
logger.debug("Duplicate node ID %s, keeping first", node.id)
|
|
182
|
+
return
|
|
183
|
+
params = _node_to_params(node)
|
|
184
|
+
self._execute(
|
|
185
|
+
"CREATE (n:CodeNode {"
|
|
186
|
+
"id: $id, kind: $kind, label: $label, fqn: $fqn, module: $module, "
|
|
187
|
+
"file_path: $file_path, line_start: $line_start, line_end: $line_end, "
|
|
188
|
+
"annotations: $annotations, properties: $properties"
|
|
189
|
+
"})",
|
|
190
|
+
params,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def add_edge(self, edge: GraphEdge) -> None:
|
|
194
|
+
params = {
|
|
195
|
+
"src": edge.source,
|
|
196
|
+
"tgt": edge.target,
|
|
197
|
+
"kind": edge.kind.value,
|
|
198
|
+
"label": edge.label or "",
|
|
199
|
+
"properties": json.dumps(edge.properties),
|
|
200
|
+
}
|
|
201
|
+
self._execute(
|
|
202
|
+
"MATCH (a:CodeNode {id: $src}), (b:CodeNode {id: $tgt}) "
|
|
203
|
+
"CREATE (a)-[:CODE_EDGE {kind: $kind, label: $label, properties: $properties}]->(b)",
|
|
204
|
+
params,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def bulk_add_nodes(self, nodes: list[GraphNode]) -> None:
|
|
208
|
+
"""Bulk-insert nodes via CSV COPY FROM (~100x faster than per-row)."""
|
|
209
|
+
if not nodes:
|
|
210
|
+
return
|
|
211
|
+
seen: set[str] = set()
|
|
212
|
+
unique_nodes: list[GraphNode] = []
|
|
213
|
+
for n in nodes:
|
|
214
|
+
if n.id not in seen:
|
|
215
|
+
seen.add(n.id)
|
|
216
|
+
unique_nodes.append(n)
|
|
217
|
+
|
|
218
|
+
csv_path = ""
|
|
219
|
+
try:
|
|
220
|
+
fd = tempfile.NamedTemporaryFile(
|
|
221
|
+
mode="w", suffix=".csv", delete=False, newline=""
|
|
222
|
+
)
|
|
223
|
+
csv_path = fd.name
|
|
224
|
+
writer = csv.writer(fd)
|
|
225
|
+
for node in unique_nodes:
|
|
226
|
+
p = _node_to_params(node)
|
|
227
|
+
writer.writerow([
|
|
228
|
+
p["id"], p["kind"], p["label"], p["fqn"], p["module"],
|
|
229
|
+
p["file_path"], p["line_start"], p["line_end"],
|
|
230
|
+
p["annotations"], p["properties"],
|
|
231
|
+
])
|
|
232
|
+
fd.close()
|
|
233
|
+
self._conn.execute(
|
|
234
|
+
f'COPY CodeNode FROM "{csv_path}" (HEADER=false)'
|
|
235
|
+
)
|
|
236
|
+
except Exception:
|
|
237
|
+
logger.exception("Bulk node insert failed, falling back to per-row")
|
|
238
|
+
for node in unique_nodes:
|
|
239
|
+
self.add_node(node)
|
|
240
|
+
finally:
|
|
241
|
+
if csv_path:
|
|
242
|
+
try:
|
|
243
|
+
os.unlink(csv_path)
|
|
244
|
+
except OSError:
|
|
245
|
+
pass
|
|
246
|
+
|
|
247
|
+
def bulk_add_edges(self, edges: list[GraphEdge]) -> None:
|
|
248
|
+
"""Bulk-insert edges via CSV COPY FROM (~100x faster than per-row)."""
|
|
249
|
+
if not edges:
|
|
250
|
+
return
|
|
251
|
+
csv_path = ""
|
|
252
|
+
try:
|
|
253
|
+
fd = tempfile.NamedTemporaryFile(
|
|
254
|
+
mode="w", suffix=".csv", delete=False, newline=""
|
|
255
|
+
)
|
|
256
|
+
csv_path = fd.name
|
|
257
|
+
writer = csv.writer(fd)
|
|
258
|
+
for edge in edges:
|
|
259
|
+
writer.writerow([
|
|
260
|
+
edge.source,
|
|
261
|
+
edge.target,
|
|
262
|
+
edge.kind.value,
|
|
263
|
+
edge.label or "",
|
|
264
|
+
json.dumps(edge.properties),
|
|
265
|
+
])
|
|
266
|
+
fd.close()
|
|
267
|
+
self._conn.execute(
|
|
268
|
+
f'COPY CODE_EDGE FROM "{csv_path}" (HEADER=false)'
|
|
269
|
+
)
|
|
270
|
+
except Exception:
|
|
271
|
+
logger.exception("Bulk edge insert failed, falling back to per-row")
|
|
272
|
+
for edge in edges:
|
|
273
|
+
self.add_edge(edge)
|
|
274
|
+
finally:
|
|
275
|
+
if csv_path:
|
|
276
|
+
try:
|
|
277
|
+
os.unlink(csv_path)
|
|
278
|
+
except OSError:
|
|
279
|
+
pass
|
|
280
|
+
|
|
281
|
+
def clear(self) -> None:
|
|
282
|
+
"""Remove all data by dropping and recreating both tables."""
|
|
283
|
+
try:
|
|
284
|
+
self._conn.execute("DROP TABLE CODE_EDGE")
|
|
285
|
+
except Exception:
|
|
286
|
+
logger.debug("DROP TABLE CODE_EDGE failed (may not exist)")
|
|
287
|
+
try:
|
|
288
|
+
self._conn.execute("DROP TABLE CodeNode")
|
|
289
|
+
except Exception:
|
|
290
|
+
logger.debug("DROP TABLE CodeNode failed (may not exist)")
|
|
291
|
+
self._ensure_schema()
|
|
292
|
+
|
|
293
|
+
def get_node(self, node_id: str) -> GraphNode | None:
|
|
294
|
+
result = self._execute(
|
|
295
|
+
"MATCH (n:CodeNode {id: $id}) RETURN n.*", {"id": node_id}
|
|
296
|
+
)
|
|
297
|
+
if result is None:
|
|
298
|
+
return None
|
|
299
|
+
rows = result.get_all()
|
|
300
|
+
if not rows:
|
|
301
|
+
return None
|
|
302
|
+
return _row_to_node(result.get_column_names(), rows[0])
|
|
303
|
+
|
|
304
|
+
def has_node(self, node_id: str) -> bool:
|
|
305
|
+
result = self._execute(
|
|
306
|
+
"MATCH (n:CodeNode {id: $id}) RETURN COUNT(n)", {"id": node_id}
|
|
307
|
+
)
|
|
308
|
+
if result is None:
|
|
309
|
+
return False
|
|
310
|
+
rows = result.get_all()
|
|
311
|
+
return bool(rows and rows[0][0] > 0)
|
|
312
|
+
|
|
313
|
+
def get_edges_between(self, source: str, target: str) -> list[GraphEdge]:
|
|
314
|
+
result = self._execute(
|
|
315
|
+
"MATCH (a:CodeNode {id: $src})-[e:CODE_EDGE]->(b:CodeNode {id: $tgt}) "
|
|
316
|
+
"RETURN a.id, b.id, e.*",
|
|
317
|
+
{"src": source, "tgt": target},
|
|
318
|
+
)
|
|
319
|
+
if result is None:
|
|
320
|
+
return []
|
|
321
|
+
columns = result.get_column_names()
|
|
322
|
+
return [_edge_row_to_edge(columns, r) for r in result.get_all()]
|
|
323
|
+
|
|
324
|
+
def all_nodes(self) -> list[GraphNode]:
|
|
325
|
+
result = self._execute("MATCH (n:CodeNode) RETURN n.*")
|
|
326
|
+
if result is None:
|
|
327
|
+
return []
|
|
328
|
+
columns = result.get_column_names()
|
|
329
|
+
return [_row_to_node(columns, r) for r in result.get_all()]
|
|
330
|
+
|
|
331
|
+
def all_edges(self) -> list[GraphEdge]:
|
|
332
|
+
result = self._execute(
|
|
333
|
+
"MATCH (a:CodeNode)-[e:CODE_EDGE]->(b:CodeNode) RETURN a.id, b.id, e.*"
|
|
334
|
+
)
|
|
335
|
+
if result is None:
|
|
336
|
+
return []
|
|
337
|
+
columns = result.get_column_names()
|
|
338
|
+
return [_edge_row_to_edge(columns, r) for r in result.get_all()]
|
|
339
|
+
|
|
340
|
+
def nodes_by_kind(self, kind: NodeKind) -> list[GraphNode]:
|
|
341
|
+
result = self._execute(
|
|
342
|
+
"MATCH (n:CodeNode) WHERE n.kind = $kind RETURN n.*",
|
|
343
|
+
{"kind": kind.value},
|
|
344
|
+
)
|
|
345
|
+
if result is None:
|
|
346
|
+
return []
|
|
347
|
+
columns = result.get_column_names()
|
|
348
|
+
return [_row_to_node(columns, r) for r in result.get_all()]
|
|
349
|
+
|
|
350
|
+
def edges_by_kind(self, kind: EdgeKind) -> list[GraphEdge]:
|
|
351
|
+
result = self._execute(
|
|
352
|
+
"MATCH (a:CodeNode)-[e:CODE_EDGE]->(b:CodeNode) WHERE e.kind = $kind "
|
|
353
|
+
"RETURN a.id, b.id, e.*",
|
|
354
|
+
{"kind": kind.value},
|
|
355
|
+
)
|
|
356
|
+
if result is None:
|
|
357
|
+
return []
|
|
358
|
+
columns = result.get_column_names()
|
|
359
|
+
return [_edge_row_to_edge(columns, r) for r in result.get_all()]
|
|
360
|
+
|
|
361
|
+
@property
|
|
362
|
+
def node_count(self) -> int:
|
|
363
|
+
result = self._execute("MATCH (n:CodeNode) RETURN COUNT(n)")
|
|
364
|
+
if result is None:
|
|
365
|
+
return 0
|
|
366
|
+
rows = result.get_all()
|
|
367
|
+
return int(rows[0][0]) if rows else 0
|
|
368
|
+
|
|
369
|
+
@property
|
|
370
|
+
def edge_count(self) -> int:
|
|
371
|
+
result = self._execute("MATCH ()-[e:CODE_EDGE]->() RETURN COUNT(e)")
|
|
372
|
+
if result is None:
|
|
373
|
+
return 0
|
|
374
|
+
rows = result.get_all()
|
|
375
|
+
return int(rows[0][0]) if rows else 0
|
|
376
|
+
|
|
377
|
+
def neighbors(
|
|
378
|
+
self,
|
|
379
|
+
node_id: str,
|
|
380
|
+
edge_kinds: set[EdgeKind] | None = None,
|
|
381
|
+
direction: str = "both",
|
|
382
|
+
) -> list[str]:
|
|
383
|
+
result_ids: set[str] = set()
|
|
384
|
+
|
|
385
|
+
if direction in ("out", "both"):
|
|
386
|
+
if edge_kinds is not None:
|
|
387
|
+
for ek in edge_kinds:
|
|
388
|
+
res = self._execute(
|
|
389
|
+
"MATCH (a:CodeNode {id: $id})-[e:CODE_EDGE]->(b:CodeNode) "
|
|
390
|
+
"WHERE e.kind = $kind RETURN DISTINCT b.id",
|
|
391
|
+
{"id": node_id, "kind": ek.value},
|
|
392
|
+
)
|
|
393
|
+
if res is not None:
|
|
394
|
+
for row in res.get_all():
|
|
395
|
+
result_ids.add(row[0])
|
|
396
|
+
else:
|
|
397
|
+
res = self._execute(
|
|
398
|
+
"MATCH (a:CodeNode {id: $id})-[:CODE_EDGE]->(b:CodeNode) "
|
|
399
|
+
"RETURN DISTINCT b.id",
|
|
400
|
+
{"id": node_id},
|
|
401
|
+
)
|
|
402
|
+
if res is not None:
|
|
403
|
+
for row in res.get_all():
|
|
404
|
+
result_ids.add(row[0])
|
|
405
|
+
|
|
406
|
+
if direction in ("in", "both"):
|
|
407
|
+
if edge_kinds is not None:
|
|
408
|
+
for ek in edge_kinds:
|
|
409
|
+
res = self._execute(
|
|
410
|
+
"MATCH (b:CodeNode)-[e:CODE_EDGE]->(a:CodeNode {id: $id}) "
|
|
411
|
+
"WHERE e.kind = $kind RETURN DISTINCT b.id",
|
|
412
|
+
{"id": node_id, "kind": ek.value},
|
|
413
|
+
)
|
|
414
|
+
if res is not None:
|
|
415
|
+
for row in res.get_all():
|
|
416
|
+
result_ids.add(row[0])
|
|
417
|
+
else:
|
|
418
|
+
res = self._execute(
|
|
419
|
+
"MATCH (b:CodeNode)-[:CODE_EDGE]->(a:CodeNode {id: $id}) "
|
|
420
|
+
"RETURN DISTINCT b.id",
|
|
421
|
+
{"id": node_id},
|
|
422
|
+
)
|
|
423
|
+
if res is not None:
|
|
424
|
+
for row in res.get_all():
|
|
425
|
+
result_ids.add(row[0])
|
|
426
|
+
|
|
427
|
+
return sorted(result_ids)
|
|
428
|
+
|
|
429
|
+
def find_cycles(self, limit: int = 100) -> list[list[str]]:
|
|
430
|
+
"""Detect cycles using bounded recursive Cypher match.
|
|
431
|
+
|
|
432
|
+
Falls back to loading the graph into NetworkX if the Cypher
|
|
433
|
+
approach fails.
|
|
434
|
+
"""
|
|
435
|
+
try:
|
|
436
|
+
result = self._execute(
|
|
437
|
+
"MATCH p = (a:CodeNode)-[e:CODE_EDGE* 2..10]->(a) "
|
|
438
|
+
"RETURN a.id, nodes(p) LIMIT $lim",
|
|
439
|
+
{"lim": limit * 5}, # over-fetch to account for dedup
|
|
440
|
+
)
|
|
441
|
+
if result is None:
|
|
442
|
+
return self._find_cycles_nx_fallback(limit)
|
|
443
|
+
|
|
444
|
+
rows = result.get_all()
|
|
445
|
+
if not rows:
|
|
446
|
+
return []
|
|
447
|
+
|
|
448
|
+
# Deduplicate: each cycle can appear starting from any node and at
|
|
449
|
+
# varying lengths (due to repeated traversals). Normalise each
|
|
450
|
+
# cycle to its shortest, canonical rotation.
|
|
451
|
+
seen: set[tuple[str, ...]] = set()
|
|
452
|
+
cycles: list[list[str]] = []
|
|
453
|
+
for row in rows:
|
|
454
|
+
path_nodes: list[str] = [n["id"] for n in row[1]]
|
|
455
|
+
# path_nodes is e.g. [a, b, c, a] — strip the repeated tail
|
|
456
|
+
cycle = path_nodes[:-1]
|
|
457
|
+
if len(cycle) < 2:
|
|
458
|
+
continue
|
|
459
|
+
# Check the cycle is *simple* (no repeated interior nodes)
|
|
460
|
+
if len(set(cycle)) != len(cycle):
|
|
461
|
+
continue
|
|
462
|
+
# Canonical form: rotate so the smallest id is first
|
|
463
|
+
min_idx = cycle.index(min(cycle))
|
|
464
|
+
canonical = tuple(cycle[min_idx:] + cycle[:min_idx])
|
|
465
|
+
if canonical in seen:
|
|
466
|
+
continue
|
|
467
|
+
seen.add(canonical)
|
|
468
|
+
cycles.append(list(canonical))
|
|
469
|
+
if len(cycles) >= limit:
|
|
470
|
+
break
|
|
471
|
+
return cycles
|
|
472
|
+
|
|
473
|
+
except Exception:
|
|
474
|
+
logger.debug("Cypher cycle detection failed, falling back to NetworkX")
|
|
475
|
+
return self._find_cycles_nx_fallback(limit)
|
|
476
|
+
|
|
477
|
+
def _find_cycles_nx_fallback(self, limit: int) -> list[list[str]]:
|
|
478
|
+
"""Load the graph into a temporary NetworkX digraph and find cycles."""
|
|
479
|
+
from osscodeiq.graph.backends.networkx import NetworkXBackend
|
|
480
|
+
|
|
481
|
+
nx_backend = self._to_networkx_backend()
|
|
482
|
+
return nx_backend.find_cycles(limit)
|
|
483
|
+
|
|
484
|
+
def shortest_path(self, source: str, target: str) -> list[str] | None:
|
|
485
|
+
"""Find the shortest path between two nodes.
|
|
486
|
+
|
|
487
|
+
Uses KuzuDB's ``ALL SHORTEST`` recursive match. Falls back to
|
|
488
|
+
NetworkX if the Cypher query fails.
|
|
489
|
+
"""
|
|
490
|
+
try:
|
|
491
|
+
result = self._execute(
|
|
492
|
+
"MATCH (a:CodeNode {id: $src}), (b:CodeNode {id: $tgt}), "
|
|
493
|
+
"p = (a)-[:CODE_EDGE* ALL SHORTEST 1..30]->(b) "
|
|
494
|
+
"RETURN nodes(p) LIMIT 1",
|
|
495
|
+
{"src": source, "tgt": target},
|
|
496
|
+
)
|
|
497
|
+
if result is None:
|
|
498
|
+
return self._shortest_path_nx_fallback(source, target)
|
|
499
|
+
|
|
500
|
+
rows = result.get_all()
|
|
501
|
+
if not rows:
|
|
502
|
+
return None
|
|
503
|
+
return [n["id"] for n in rows[0][0]]
|
|
504
|
+
|
|
505
|
+
except Exception:
|
|
506
|
+
logger.debug("Cypher shortest-path failed, falling back to NetworkX")
|
|
507
|
+
return self._shortest_path_nx_fallback(source, target)
|
|
508
|
+
|
|
509
|
+
def _shortest_path_nx_fallback(self, source: str, target: str) -> list[str] | None:
|
|
510
|
+
from osscodeiq.graph.backends.networkx import NetworkXBackend
|
|
511
|
+
|
|
512
|
+
nx_backend = self._to_networkx_backend()
|
|
513
|
+
return nx_backend.shortest_path(source, target)
|
|
514
|
+
|
|
515
|
+
def subgraph(self, node_ids: set[str]) -> "NetworkXBackend":
|
|
516
|
+
"""Return a NetworkXBackend loaded with the requested subset.
|
|
517
|
+
|
|
518
|
+
KuzuDB has no lightweight view abstraction, so we materialise the
|
|
519
|
+
subgraph into an in-memory NetworkX backend.
|
|
520
|
+
"""
|
|
521
|
+
from osscodeiq.graph.backends.networkx import NetworkXBackend
|
|
522
|
+
|
|
523
|
+
nx_backend = NetworkXBackend()
|
|
524
|
+
for node in self.all_nodes():
|
|
525
|
+
if node.id in node_ids:
|
|
526
|
+
nx_backend.add_node(node)
|
|
527
|
+
for edge in self.all_edges():
|
|
528
|
+
if edge.source in node_ids and edge.target in node_ids:
|
|
529
|
+
nx_backend.add_edge(edge)
|
|
530
|
+
return nx_backend
|
|
531
|
+
|
|
532
|
+
def update_node_properties(self, node_id: str, properties: dict[str, Any]) -> None:
|
|
533
|
+
# Merge new properties into existing ones
|
|
534
|
+
node = self.get_node(node_id)
|
|
535
|
+
if node is None:
|
|
536
|
+
logger.warning("update_node_properties: node %s not found", node_id)
|
|
537
|
+
return
|
|
538
|
+
merged = {**node.properties, **properties}
|
|
539
|
+
self._execute(
|
|
540
|
+
"MATCH (n:CodeNode {id: $id}) SET n.properties = $props",
|
|
541
|
+
{"id": node_id, "props": json.dumps(merged)},
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
def close(self) -> None:
|
|
545
|
+
"""Close the KuzuDB connection."""
|
|
546
|
+
try:
|
|
547
|
+
self._conn.close()
|
|
548
|
+
except Exception:
|
|
549
|
+
logger.debug("Error closing KuzuDB connection", exc_info=True)
|
|
550
|
+
|
|
551
|
+
# ------------------------------------------------------------------
|
|
552
|
+
# CypherBackend protocol
|
|
553
|
+
# ------------------------------------------------------------------
|
|
554
|
+
def query_cypher(
|
|
555
|
+
self, cypher: str, params: dict[str, Any] | None = None
|
|
556
|
+
) -> list[dict[str, Any]]:
|
|
557
|
+
"""Execute a raw Cypher query and return results as a list of dicts."""
|
|
558
|
+
result = self._execute(cypher, params)
|
|
559
|
+
if result is None:
|
|
560
|
+
return []
|
|
561
|
+
columns = result.get_column_names()
|
|
562
|
+
return [dict(zip(columns, row)) for row in result.get_all()]
|
|
563
|
+
|
|
564
|
+
# ------------------------------------------------------------------
|
|
565
|
+
# Internal helpers
|
|
566
|
+
# ------------------------------------------------------------------
|
|
567
|
+
def _to_networkx_backend(self) -> "NetworkXBackend":
|
|
568
|
+
"""Materialise the entire graph into a NetworkXBackend."""
|
|
569
|
+
from osscodeiq.graph.backends.networkx import NetworkXBackend
|
|
570
|
+
|
|
571
|
+
nx_backend = NetworkXBackend()
|
|
572
|
+
for node in self.all_nodes():
|
|
573
|
+
nx_backend.add_node(node)
|
|
574
|
+
for edge in self.all_edges():
|
|
575
|
+
nx_backend.add_edge(edge)
|
|
576
|
+
return nx_backend
|