htmlgraph 0.27.1__py3-none-any.whl → 0.27.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- htmlgraph/__init__.py +1 -1
- htmlgraph/analytics/session_graph.py +707 -0
- htmlgraph/api/main.py +8 -8
- htmlgraph/bounded_paths.py +539 -0
- htmlgraph/path_query.py +608 -0
- htmlgraph/pattern_matcher.py +636 -0
- htmlgraph/query_composer.py +509 -0
- {htmlgraph-0.27.1.dist-info → htmlgraph-0.27.3.dist-info}/METADATA +2 -2
- {htmlgraph-0.27.1.dist-info → htmlgraph-0.27.3.dist-info}/RECORD +16 -11
- {htmlgraph-0.27.1.data → htmlgraph-0.27.3.data}/data/htmlgraph/dashboard.html +0 -0
- {htmlgraph-0.27.1.data → htmlgraph-0.27.3.data}/data/htmlgraph/styles.css +0 -0
- {htmlgraph-0.27.1.data → htmlgraph-0.27.3.data}/data/htmlgraph/templates/AGENTS.md.template +0 -0
- {htmlgraph-0.27.1.data → htmlgraph-0.27.3.data}/data/htmlgraph/templates/CLAUDE.md.template +0 -0
- {htmlgraph-0.27.1.data → htmlgraph-0.27.3.data}/data/htmlgraph/templates/GEMINI.md.template +0 -0
- {htmlgraph-0.27.1.dist-info → htmlgraph-0.27.3.dist-info}/WHEEL +0 -0
- {htmlgraph-0.27.1.dist-info → htmlgraph-0.27.3.dist-info}/entry_points.txt +0 -0
htmlgraph/api/main.py
CHANGED
|
@@ -191,7 +191,7 @@ def get_app(db_path: str) -> FastAPI:
|
|
|
191
191
|
|
|
192
192
|
# Store database path and query cache in app state
|
|
193
193
|
app.state.db_path = db_path
|
|
194
|
-
app.state.query_cache = QueryCache(ttl_seconds=
|
|
194
|
+
app.state.query_cache = QueryCache(ttl_seconds=1.0) # Short TTL for real-time data
|
|
195
195
|
|
|
196
196
|
# Setup Jinja2 templates
|
|
197
197
|
template_dir = Path(__file__).parent / "templates"
|
|
@@ -2250,11 +2250,11 @@ def get_app(db_path: str) -> FastAPI:
|
|
|
2250
2250
|
datetime.fromisoformat(since.replace("Z", "+00:00"))
|
|
2251
2251
|
last_timestamp = since
|
|
2252
2252
|
except (ValueError, AttributeError):
|
|
2253
|
-
# Invalid timestamp - default to
|
|
2254
|
-
last_timestamp = (datetime.now() - timedelta(hours=
|
|
2253
|
+
# Invalid timestamp - default to 24 hours ago
|
|
2254
|
+
last_timestamp = (datetime.now() - timedelta(hours=24)).isoformat()
|
|
2255
2255
|
else:
|
|
2256
|
-
# Default: Load events from last
|
|
2257
|
-
last_timestamp = (datetime.now() - timedelta(hours=
|
|
2256
|
+
# Default: Load events from last 24 hours (captures all recent events in typical workflow)
|
|
2257
|
+
last_timestamp = (datetime.now() - timedelta(hours=24)).isoformat()
|
|
2258
2258
|
|
|
2259
2259
|
# FIX 3: Load historical events first (before real-time streaming)
|
|
2260
2260
|
db = await get_db()
|
|
@@ -2487,9 +2487,9 @@ def get_app(db_path: str) -> FastAPI:
|
|
|
2487
2487
|
def create_app(db_path: str | None = None) -> FastAPI:
|
|
2488
2488
|
"""Create FastAPI app with default database path."""
|
|
2489
2489
|
if db_path is None:
|
|
2490
|
-
# Use
|
|
2491
|
-
# Note: index.sqlite
|
|
2492
|
-
db_path = str(Path.home() / ".htmlgraph" / "
|
|
2490
|
+
# Use htmlgraph.db - this is the main database with all events
|
|
2491
|
+
# Note: Changed from index.sqlite which was empty analytics cache
|
|
2492
|
+
db_path = str(Path.home() / ".htmlgraph" / "htmlgraph.db")
|
|
2493
2493
|
|
|
2494
2494
|
return get_app(db_path)
|
|
2495
2495
|
|
|
@@ -0,0 +1,539 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bounded path-finding and cycle detection for HtmlGraph.
|
|
3
|
+
|
|
4
|
+
Provides safe, deterministic graph traversal algorithms with built-in
|
|
5
|
+
cycle avoidance and depth bounds. Replaces timeout-based safety guards
|
|
6
|
+
with structural guarantees:
|
|
7
|
+
|
|
8
|
+
- BFS for shortest paths: O(V+E) guaranteed
|
|
9
|
+
- DFS with per-path visited tracking for bounded enumeration
|
|
10
|
+
- Cycle detection with configurable depth limits
|
|
11
|
+
|
|
12
|
+
All algorithms terminate deterministically via depth bounds,
|
|
13
|
+
never requiring timeouts.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from collections import deque
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from htmlgraph.edge_index import EdgeRef
|
|
24
|
+
from htmlgraph.graph import HtmlGraph
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PathResult:
|
|
29
|
+
"""
|
|
30
|
+
Result of a path-finding operation.
|
|
31
|
+
|
|
32
|
+
Represents an ordered sequence of nodes connected by edges,
|
|
33
|
+
forming a path through the graph.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
nodes: Ordered list of node IDs in the path (source first, target last).
|
|
37
|
+
edges: List of EdgeRef objects for each edge traversed.
|
|
38
|
+
length: Number of edges in the path (len(nodes) - 1).
|
|
39
|
+
relationship_types: Distinct edge relationship types used in this path.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
nodes: list[str]
|
|
43
|
+
edges: list[EdgeRef]
|
|
44
|
+
length: int
|
|
45
|
+
relationship_types: list[str]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class CycleResult:
|
|
50
|
+
"""
|
|
51
|
+
Result of a cycle detection operation.
|
|
52
|
+
|
|
53
|
+
Represents a cycle found in the graph, identified by the sequence
|
|
54
|
+
of nodes that form a closed loop.
|
|
55
|
+
|
|
56
|
+
Attributes:
|
|
57
|
+
cycle: Node IDs forming the cycle. The first and last element
|
|
58
|
+
are the same node, closing the loop.
|
|
59
|
+
length: Number of edges in the cycle.
|
|
60
|
+
edge_types: Distinct relationship types in the cycle.
|
|
61
|
+
involves_node: The node that was queried or that participates
|
|
62
|
+
in this cycle.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
cycle: list[str]
|
|
66
|
+
length: int
|
|
67
|
+
edge_types: list[str]
|
|
68
|
+
involves_node: str
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class BoundedPathFinder:
|
|
73
|
+
"""
|
|
74
|
+
Safe, bounded graph traversal with cycle avoidance.
|
|
75
|
+
|
|
76
|
+
Provides deterministic path-finding and cycle detection algorithms
|
|
77
|
+
that terminate based on depth bounds rather than timeouts.
|
|
78
|
+
|
|
79
|
+
All methods use the graph's EdgeIndex for efficient O(1) neighbor
|
|
80
|
+
lookups and support optional edge-type filtering.
|
|
81
|
+
|
|
82
|
+
Example:
|
|
83
|
+
>>> from htmlgraph.graph import HtmlGraph
|
|
84
|
+
>>> graph = HtmlGraph("features/", auto_load=True)
|
|
85
|
+
>>> finder = BoundedPathFinder(graph)
|
|
86
|
+
>>> path = finder.any_shortest("feat-001", "feat-010")
|
|
87
|
+
>>> if path:
|
|
88
|
+
... print(f"Shortest path: {' -> '.join(path.nodes)}")
|
|
89
|
+
>>> cycles = finder.find_cycles("feat-001")
|
|
90
|
+
>>> for c in cycles:
|
|
91
|
+
... print(f"Cycle of length {c.length}: {c.cycle}")
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
graph: HtmlGraph
|
|
95
|
+
max_depth: int = 20
|
|
96
|
+
|
|
97
|
+
# Internal caches, not part of __init__ signature
|
|
98
|
+
_adjacency_cache: dict[str, dict[str, list[_NeighborInfo]]] = field(
|
|
99
|
+
default_factory=dict, init=False, repr=False
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def _get_neighbors(
|
|
103
|
+
self,
|
|
104
|
+
node_id: str,
|
|
105
|
+
edge_types: list[str] | None,
|
|
106
|
+
direction: str = "outgoing",
|
|
107
|
+
) -> list[_NeighborInfo]:
|
|
108
|
+
"""
|
|
109
|
+
Get neighbors of a node with edge metadata, using the EdgeIndex.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
node_id: The node to get neighbors for.
|
|
113
|
+
edge_types: If provided, only follow edges with these relationship types.
|
|
114
|
+
direction: "outgoing" follows edges from node_id, "incoming" follows
|
|
115
|
+
edges pointing to node_id.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of _NeighborInfo with neighbor_id and the EdgeRef.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
results: list[_NeighborInfo] = []
|
|
122
|
+
if direction == "outgoing":
|
|
123
|
+
refs = self.graph._edge_index.get_outgoing(node_id)
|
|
124
|
+
for ref in refs:
|
|
125
|
+
if edge_types is None or ref.relationship in edge_types:
|
|
126
|
+
results.append(_NeighborInfo(ref.target_id, ref))
|
|
127
|
+
else: # incoming
|
|
128
|
+
refs = self.graph._edge_index.get_incoming(node_id)
|
|
129
|
+
for ref in refs:
|
|
130
|
+
if edge_types is None or ref.relationship in edge_types:
|
|
131
|
+
results.append(_NeighborInfo(ref.source_id, ref))
|
|
132
|
+
return results
|
|
133
|
+
|
|
134
|
+
# ------------------------------------------------------------------
|
|
135
|
+
# Public API
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
def any_shortest(
|
|
139
|
+
self,
|
|
140
|
+
from_id: str,
|
|
141
|
+
to_id: str,
|
|
142
|
+
edge_types: list[str] | None = None,
|
|
143
|
+
) -> PathResult | None:
|
|
144
|
+
"""
|
|
145
|
+
Find ANY shortest path between two nodes using BFS.
|
|
146
|
+
|
|
147
|
+
Guaranteed O(V+E) time complexity with built-in cycle avoidance
|
|
148
|
+
via the BFS visited set.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
from_id: Source node ID.
|
|
152
|
+
to_id: Target node ID.
|
|
153
|
+
edge_types: If provided, only traverse edges with these relationship types.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
A PathResult for one shortest path, or None if no path exists.
|
|
157
|
+
"""
|
|
158
|
+
if from_id not in self.graph._nodes or to_id not in self.graph._nodes:
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
if from_id == to_id:
|
|
162
|
+
return PathResult(
|
|
163
|
+
nodes=[from_id], edges=[], length=0, relationship_types=[]
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# BFS: queue entries are (current_node, path_of_nodes, path_of_edges)
|
|
167
|
+
queue: deque[tuple[str, list[str], list[EdgeRef]]] = deque()
|
|
168
|
+
queue.append((from_id, [from_id], []))
|
|
169
|
+
visited: set[str] = {from_id}
|
|
170
|
+
|
|
171
|
+
while queue:
|
|
172
|
+
current, path_nodes, path_edges = queue.popleft()
|
|
173
|
+
|
|
174
|
+
for info in self._get_neighbors(current, edge_types, "outgoing"):
|
|
175
|
+
neighbor = info.neighbor_id
|
|
176
|
+
edge_ref = info.edge_ref
|
|
177
|
+
|
|
178
|
+
new_nodes = path_nodes + [neighbor]
|
|
179
|
+
new_edges = path_edges + [edge_ref]
|
|
180
|
+
|
|
181
|
+
if neighbor == to_id:
|
|
182
|
+
rel_types = sorted(set(e.relationship for e in new_edges))
|
|
183
|
+
return PathResult(
|
|
184
|
+
nodes=new_nodes,
|
|
185
|
+
edges=new_edges,
|
|
186
|
+
length=len(new_edges),
|
|
187
|
+
relationship_types=rel_types,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if neighbor not in visited and neighbor in self.graph._nodes:
|
|
191
|
+
visited.add(neighbor)
|
|
192
|
+
queue.append((neighbor, new_nodes, new_edges))
|
|
193
|
+
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
def all_shortest(
|
|
197
|
+
self,
|
|
198
|
+
from_id: str,
|
|
199
|
+
to_id: str,
|
|
200
|
+
edge_types: list[str] | None = None,
|
|
201
|
+
) -> list[PathResult]:
|
|
202
|
+
"""
|
|
203
|
+
Find ALL shortest paths (same minimum length) between two nodes.
|
|
204
|
+
|
|
205
|
+
Uses BFS to determine the shortest distance, then enumerates all
|
|
206
|
+
paths of exactly that length. The BFS phase is O(V+E); the
|
|
207
|
+
enumeration phase explores only paths within the shortest distance
|
|
208
|
+
bound.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
from_id: Source node ID.
|
|
212
|
+
to_id: Target node ID.
|
|
213
|
+
edge_types: If provided, only traverse edges with these relationship types.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
List of PathResult objects, all having the same minimum length.
|
|
217
|
+
Empty list if no path exists.
|
|
218
|
+
"""
|
|
219
|
+
if from_id not in self.graph._nodes or to_id not in self.graph._nodes:
|
|
220
|
+
return []
|
|
221
|
+
|
|
222
|
+
if from_id == to_id:
|
|
223
|
+
return [
|
|
224
|
+
PathResult(nodes=[from_id], edges=[], length=0, relationship_types=[])
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
# Phase 1: BFS to find shortest distance and predecessor map.
|
|
228
|
+
# For each node, record ALL predecessors at the shortest distance.
|
|
229
|
+
dist: dict[str, int] = {from_id: 0}
|
|
230
|
+
# predecessors maps node -> list of (predecessor_node, edge_ref)
|
|
231
|
+
predecessors: dict[str, list[tuple[str, EdgeRef]]] = {}
|
|
232
|
+
queue: deque[str] = deque([from_id])
|
|
233
|
+
|
|
234
|
+
while queue:
|
|
235
|
+
current = queue.popleft()
|
|
236
|
+
current_dist = dist[current]
|
|
237
|
+
|
|
238
|
+
for info in self._get_neighbors(current, edge_types, "outgoing"):
|
|
239
|
+
neighbor = info.neighbor_id
|
|
240
|
+
edge_ref = info.edge_ref
|
|
241
|
+
|
|
242
|
+
if neighbor not in self.graph._nodes:
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
new_dist = current_dist + 1
|
|
246
|
+
|
|
247
|
+
if neighbor not in dist:
|
|
248
|
+
# First time reaching this node
|
|
249
|
+
dist[neighbor] = new_dist
|
|
250
|
+
predecessors[neighbor] = [(current, edge_ref)]
|
|
251
|
+
queue.append(neighbor)
|
|
252
|
+
elif dist[neighbor] == new_dist:
|
|
253
|
+
# Same shortest distance, add alternative predecessor
|
|
254
|
+
predecessors[neighbor].append((current, edge_ref))
|
|
255
|
+
|
|
256
|
+
if to_id not in dist:
|
|
257
|
+
return []
|
|
258
|
+
|
|
259
|
+
# Phase 2: Backtrack from to_id to from_id using predecessors.
|
|
260
|
+
results: list[PathResult] = []
|
|
261
|
+
|
|
262
|
+
def _backtrack(
|
|
263
|
+
node: str,
|
|
264
|
+
path_nodes: list[str],
|
|
265
|
+
path_edges: list[EdgeRef],
|
|
266
|
+
) -> None:
|
|
267
|
+
if node == from_id:
|
|
268
|
+
# Reverse to get source-to-target order
|
|
269
|
+
final_nodes = list(reversed(path_nodes))
|
|
270
|
+
final_edges = list(reversed(path_edges))
|
|
271
|
+
rel_types = sorted(set(e.relationship for e in final_edges))
|
|
272
|
+
results.append(
|
|
273
|
+
PathResult(
|
|
274
|
+
nodes=final_nodes,
|
|
275
|
+
edges=final_edges,
|
|
276
|
+
length=len(final_edges),
|
|
277
|
+
relationship_types=rel_types,
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
for pred_node, edge_ref in predecessors.get(node, []):
|
|
283
|
+
path_nodes.append(pred_node)
|
|
284
|
+
path_edges.append(edge_ref)
|
|
285
|
+
_backtrack(pred_node, path_nodes, path_edges)
|
|
286
|
+
path_nodes.pop()
|
|
287
|
+
path_edges.pop()
|
|
288
|
+
|
|
289
|
+
_backtrack(to_id, [to_id], [])
|
|
290
|
+
return results
|
|
291
|
+
|
|
292
|
+
def bounded_paths(
|
|
293
|
+
self,
|
|
294
|
+
from_id: str,
|
|
295
|
+
to_id: str,
|
|
296
|
+
max_depth: int | None = None,
|
|
297
|
+
max_results: int = 100,
|
|
298
|
+
edge_types: list[str] | None = None,
|
|
299
|
+
) -> list[PathResult]:
|
|
300
|
+
"""
|
|
301
|
+
Find paths up to max_depth with built-in cycle avoidance per path.
|
|
302
|
+
|
|
303
|
+
Replaces all_paths() with a deterministic depth bound instead of
|
|
304
|
+
a timeout. Each path independently tracks visited nodes to allow
|
|
305
|
+
different paths to share intermediate nodes while preventing
|
|
306
|
+
cycles within any single path.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
from_id: Source node ID.
|
|
310
|
+
to_id: Target node ID.
|
|
311
|
+
max_depth: Maximum path length in edges. Defaults to self.max_depth.
|
|
312
|
+
max_results: Maximum number of paths to return (default 100).
|
|
313
|
+
edge_types: If provided, only traverse edges with these relationship types.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
List of PathResult objects, up to max_results.
|
|
317
|
+
"""
|
|
318
|
+
depth_limit = max_depth if max_depth is not None else self.max_depth
|
|
319
|
+
|
|
320
|
+
if from_id not in self.graph._nodes or to_id not in self.graph._nodes:
|
|
321
|
+
return []
|
|
322
|
+
|
|
323
|
+
if from_id == to_id:
|
|
324
|
+
return [
|
|
325
|
+
PathResult(nodes=[from_id], edges=[], length=0, relationship_types=[])
|
|
326
|
+
]
|
|
327
|
+
|
|
328
|
+
results: list[PathResult] = []
|
|
329
|
+
|
|
330
|
+
def _dfs(
|
|
331
|
+
current: str,
|
|
332
|
+
path_nodes: list[str],
|
|
333
|
+
path_edges: list[EdgeRef],
|
|
334
|
+
visited: set[str],
|
|
335
|
+
) -> None:
|
|
336
|
+
if len(results) >= max_results:
|
|
337
|
+
return
|
|
338
|
+
|
|
339
|
+
if len(path_edges) > depth_limit:
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
if current == to_id:
|
|
343
|
+
rel_types = sorted(set(e.relationship for e in path_edges))
|
|
344
|
+
results.append(
|
|
345
|
+
PathResult(
|
|
346
|
+
nodes=list(path_nodes),
|
|
347
|
+
edges=list(path_edges),
|
|
348
|
+
length=len(path_edges),
|
|
349
|
+
relationship_types=rel_types,
|
|
350
|
+
)
|
|
351
|
+
)
|
|
352
|
+
return
|
|
353
|
+
|
|
354
|
+
# Don't go deeper if we're at the depth limit
|
|
355
|
+
if len(path_edges) >= depth_limit:
|
|
356
|
+
return
|
|
357
|
+
|
|
358
|
+
for info in self._get_neighbors(current, edge_types, "outgoing"):
|
|
359
|
+
neighbor = info.neighbor_id
|
|
360
|
+
if neighbor not in visited and neighbor in self.graph._nodes:
|
|
361
|
+
visited.add(neighbor)
|
|
362
|
+
path_nodes.append(neighbor)
|
|
363
|
+
path_edges.append(info.edge_ref)
|
|
364
|
+
_dfs(neighbor, path_nodes, path_edges, visited)
|
|
365
|
+
path_edges.pop()
|
|
366
|
+
path_nodes.pop()
|
|
367
|
+
visited.remove(neighbor)
|
|
368
|
+
|
|
369
|
+
_dfs(from_id, [from_id], [], {from_id})
|
|
370
|
+
return results
|
|
371
|
+
|
|
372
|
+
def find_cycles(
|
|
373
|
+
self,
|
|
374
|
+
node_id: str | None = None,
|
|
375
|
+
edge_types: list[str] | None = None,
|
|
376
|
+
max_cycle_length: int = 10,
|
|
377
|
+
) -> list[CycleResult]:
|
|
378
|
+
"""
|
|
379
|
+
Detect cycles in the graph.
|
|
380
|
+
|
|
381
|
+
If node_id is provided, finds cycles that include that specific node.
|
|
382
|
+
Otherwise, finds all cycles up to max_cycle_length in the entire graph.
|
|
383
|
+
|
|
384
|
+
Uses DFS with depth bounding for deterministic termination.
|
|
385
|
+
Inspired by SQL/PGQ ownership-cycle detection patterns.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
node_id: If provided, only find cycles involving this node.
|
|
389
|
+
edge_types: If provided, only follow edges with these relationship types.
|
|
390
|
+
max_cycle_length: Maximum cycle length to search for (default 10).
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
List of CycleResult objects describing each cycle found.
|
|
394
|
+
"""
|
|
395
|
+
if node_id is not None:
|
|
396
|
+
return self._find_cycles_for_node(node_id, edge_types, max_cycle_length)
|
|
397
|
+
|
|
398
|
+
# Find cycles for all nodes
|
|
399
|
+
all_cycles: list[CycleResult] = []
|
|
400
|
+
seen_cycles: set[tuple[str, ...]] = set()
|
|
401
|
+
|
|
402
|
+
for nid in self.graph._nodes:
|
|
403
|
+
for cycle_result in self._find_cycles_for_node(
|
|
404
|
+
nid, edge_types, max_cycle_length
|
|
405
|
+
):
|
|
406
|
+
# Normalize cycle for deduplication: rotate so smallest ID is first
|
|
407
|
+
cycle_nodes = cycle_result.cycle[:-1] # Remove closing duplicate
|
|
408
|
+
if not cycle_nodes:
|
|
409
|
+
continue
|
|
410
|
+
min_idx = cycle_nodes.index(min(cycle_nodes))
|
|
411
|
+
normalized = tuple(cycle_nodes[min_idx:] + cycle_nodes[:min_idx])
|
|
412
|
+
|
|
413
|
+
if normalized not in seen_cycles:
|
|
414
|
+
seen_cycles.add(normalized)
|
|
415
|
+
all_cycles.append(cycle_result)
|
|
416
|
+
|
|
417
|
+
return all_cycles
|
|
418
|
+
|
|
419
|
+
def _find_cycles_for_node(
|
|
420
|
+
self,
|
|
421
|
+
node_id: str,
|
|
422
|
+
edge_types: list[str] | None,
|
|
423
|
+
max_cycle_length: int,
|
|
424
|
+
) -> list[CycleResult]:
|
|
425
|
+
"""
|
|
426
|
+
Find all cycles involving a specific node, up to max_cycle_length.
|
|
427
|
+
|
|
428
|
+
Uses iterative DFS from node_id looking for paths that return to it.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
node_id: The node to find cycles for.
|
|
432
|
+
edge_types: Optional edge type filter.
|
|
433
|
+
max_cycle_length: Maximum edges in a cycle.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
List of CycleResult objects for cycles involving node_id.
|
|
437
|
+
"""
|
|
438
|
+
if node_id not in self.graph._nodes:
|
|
439
|
+
return []
|
|
440
|
+
|
|
441
|
+
results: list[CycleResult] = []
|
|
442
|
+
|
|
443
|
+
def _dfs(
|
|
444
|
+
current: str,
|
|
445
|
+
path: list[str],
|
|
446
|
+
path_edges: list[EdgeRef],
|
|
447
|
+
visited: set[str],
|
|
448
|
+
) -> None:
|
|
449
|
+
if len(path_edges) > max_cycle_length:
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
for info in self._get_neighbors(current, edge_types, "outgoing"):
|
|
453
|
+
neighbor = info.neighbor_id
|
|
454
|
+
candidate_length = len(path_edges) + 1
|
|
455
|
+
|
|
456
|
+
if neighbor == node_id:
|
|
457
|
+
# Found a cycle back to start (includes self-loops)
|
|
458
|
+
if candidate_length <= max_cycle_length:
|
|
459
|
+
cycle_path = path + [node_id]
|
|
460
|
+
all_edges = path_edges + [info.edge_ref]
|
|
461
|
+
edge_type_set = sorted(set(e.relationship for e in all_edges))
|
|
462
|
+
results.append(
|
|
463
|
+
CycleResult(
|
|
464
|
+
cycle=cycle_path,
|
|
465
|
+
length=len(all_edges),
|
|
466
|
+
edge_types=edge_type_set,
|
|
467
|
+
involves_node=node_id,
|
|
468
|
+
)
|
|
469
|
+
)
|
|
470
|
+
elif (
|
|
471
|
+
neighbor not in visited
|
|
472
|
+
and neighbor in self.graph._nodes
|
|
473
|
+
and candidate_length < max_cycle_length
|
|
474
|
+
):
|
|
475
|
+
visited.add(neighbor)
|
|
476
|
+
path.append(neighbor)
|
|
477
|
+
path_edges.append(info.edge_ref)
|
|
478
|
+
_dfs(neighbor, path, path_edges, visited)
|
|
479
|
+
path_edges.pop()
|
|
480
|
+
path.pop()
|
|
481
|
+
visited.remove(neighbor)
|
|
482
|
+
|
|
483
|
+
_dfs(node_id, [node_id], [], {node_id})
|
|
484
|
+
return results
|
|
485
|
+
|
|
486
|
+
def reachable_set(
|
|
487
|
+
self,
|
|
488
|
+
from_id: str,
|
|
489
|
+
edge_types: list[str] | None = None,
|
|
490
|
+
direction: str = "outgoing",
|
|
491
|
+
max_depth: int | None = None,
|
|
492
|
+
) -> set[str]:
|
|
493
|
+
"""
|
|
494
|
+
Find all nodes reachable from a starting node within a depth bound.
|
|
495
|
+
|
|
496
|
+
Uses BFS for level-by-level exploration. Useful for transitive
|
|
497
|
+
dependency analysis with limits.
|
|
498
|
+
|
|
499
|
+
Args:
|
|
500
|
+
from_id: Starting node ID.
|
|
501
|
+
edge_types: If provided, only follow edges with these relationship types.
|
|
502
|
+
direction: "outgoing" follows edges from source, "incoming" follows
|
|
503
|
+
edges pointing to source.
|
|
504
|
+
max_depth: Maximum traversal depth. Defaults to self.max_depth.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
Set of reachable node IDs (does not include from_id itself).
|
|
508
|
+
"""
|
|
509
|
+
depth_limit = max_depth if max_depth is not None else self.max_depth
|
|
510
|
+
|
|
511
|
+
if from_id not in self.graph._nodes:
|
|
512
|
+
return set()
|
|
513
|
+
|
|
514
|
+
reachable: set[str] = set()
|
|
515
|
+
visited: set[str] = {from_id}
|
|
516
|
+
queue: deque[tuple[str, int]] = deque([(from_id, 0)])
|
|
517
|
+
|
|
518
|
+
while queue:
|
|
519
|
+
current, depth = queue.popleft()
|
|
520
|
+
|
|
521
|
+
if depth >= depth_limit:
|
|
522
|
+
continue
|
|
523
|
+
|
|
524
|
+
for info in self._get_neighbors(current, edge_types, direction):
|
|
525
|
+
neighbor = info.neighbor_id
|
|
526
|
+
if neighbor not in visited and neighbor in self.graph._nodes:
|
|
527
|
+
visited.add(neighbor)
|
|
528
|
+
reachable.add(neighbor)
|
|
529
|
+
queue.append((neighbor, depth + 1))
|
|
530
|
+
|
|
531
|
+
return reachable
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
@dataclass
|
|
535
|
+
class _NeighborInfo:
|
|
536
|
+
"""Internal helper pairing a neighbor ID with its edge reference."""
|
|
537
|
+
|
|
538
|
+
neighbor_id: str
|
|
539
|
+
edge_ref: EdgeRef
|