biocypher 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/__init__.py +8 -0
- biocypher/_graph.py +819 -0
- biocypher/_metadata.py +21 -8
- biocypher/_workflow.py +798 -0
- biocypher/output/in_memory/_airr.py +10 -2
- {biocypher-0.10.0.dist-info → biocypher-0.11.0.dist-info}/METADATA +23 -25
- {biocypher-0.10.0.dist-info → biocypher-0.11.0.dist-info}/RECORD +15 -13
- {biocypher-0.10.0.dist-info → biocypher-0.11.0.dist-info}/WHEEL +1 -1
- {biocypher-0.10.0.dist-info → biocypher-0.11.0.dist-info/licenses}/LICENSE +0 -0
biocypher/_graph.py
ADDED
|
@@ -0,0 +1,819 @@
|
|
|
1
|
+
"""Unified Graph representation for BioCypher.
|
|
2
|
+
|
|
3
|
+
This module provides a comprehensive Graph class that can represent various
|
|
4
|
+
graph types including simple graphs, directed graphs, weighted graphs,
|
|
5
|
+
multigraphs, and hypergraphs. The design focuses on simplicity and
|
|
6
|
+
extensibility for knowledge representation.
|
|
7
|
+
|
|
8
|
+
TODO: examine overlap with legacy BioCypher modules, synergise where possible.
|
|
9
|
+
TODO: evaluate generalised graph class as consensus internal representation as
|
|
10
|
+
technical intermediate for other output adapters.
|
|
11
|
+
TODO: validation of new entities against schema. Rollback of inconsistent operations.
|
|
12
|
+
TODO: retrieval of subgraphs from existing databases.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
|
|
17
|
+
from collections import defaultdict, deque
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from enum import Enum
|
|
20
|
+
from typing import Any, Iterator
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class EdgeType(Enum):
|
|
24
|
+
"""Types of edges in the graph."""
|
|
25
|
+
|
|
26
|
+
SIMPLE = "simple"
|
|
27
|
+
DIRECTED = "directed"
|
|
28
|
+
WEIGHTED = "weighted"
|
|
29
|
+
HYPEREDGE = "hyperedge"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Node:
|
|
34
|
+
"""Represents a node in the graph."""
|
|
35
|
+
|
|
36
|
+
id: str
|
|
37
|
+
type: str
|
|
38
|
+
properties: dict[str, Any] = field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
def __post_init__(self):
|
|
41
|
+
if not isinstance(self.id, str):
|
|
42
|
+
raise ValueError("Node ID must be a string")
|
|
43
|
+
if not isinstance(self.type, str):
|
|
44
|
+
raise ValueError("Node type must be a string")
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> dict[str, Any]:
|
|
47
|
+
"""Convert node to dictionary representation."""
|
|
48
|
+
return {"id": self.id, "type": self.type, "properties": self.properties}
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_dict(cls, data: dict[str, Any]) -> "Node":
|
|
52
|
+
"""Create node from dictionary representation."""
|
|
53
|
+
return cls(id=data["id"], type=data["type"], properties=data.get("properties", {}))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class Edge:
|
|
58
|
+
"""Represents an edge in the graph."""
|
|
59
|
+
|
|
60
|
+
id: str
|
|
61
|
+
type: str
|
|
62
|
+
source: str
|
|
63
|
+
target: str
|
|
64
|
+
properties: dict[str, Any] = field(default_factory=dict)
|
|
65
|
+
|
|
66
|
+
def __post_init__(self):
|
|
67
|
+
if not isinstance(self.id, str):
|
|
68
|
+
raise ValueError("Edge ID must be a string")
|
|
69
|
+
if not isinstance(self.type, str):
|
|
70
|
+
raise ValueError("Edge type must be a string")
|
|
71
|
+
if not isinstance(self.source, str):
|
|
72
|
+
raise ValueError("Edge source must be a string")
|
|
73
|
+
if not isinstance(self.target, str):
|
|
74
|
+
raise ValueError("Edge target must be a string")
|
|
75
|
+
|
|
76
|
+
def to_dict(self) -> dict[str, Any]:
|
|
77
|
+
"""Convert edge to dictionary representation."""
|
|
78
|
+
return {
|
|
79
|
+
"id": self.id,
|
|
80
|
+
"type": self.type,
|
|
81
|
+
"source": self.source,
|
|
82
|
+
"target": self.target,
|
|
83
|
+
"properties": self.properties,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def from_dict(cls, data: dict[str, Any]) -> "Edge":
|
|
88
|
+
"""Create edge from dictionary representation."""
|
|
89
|
+
return cls(
|
|
90
|
+
id=data["id"],
|
|
91
|
+
type=data["type"],
|
|
92
|
+
source=data["source"],
|
|
93
|
+
target=data["target"],
|
|
94
|
+
properties=data.get("properties", {}),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class HyperEdge:
|
|
100
|
+
"""Represents a hyperedge connecting multiple nodes."""
|
|
101
|
+
|
|
102
|
+
id: str
|
|
103
|
+
type: str
|
|
104
|
+
nodes: set[str]
|
|
105
|
+
properties: dict[str, Any] = field(default_factory=dict)
|
|
106
|
+
|
|
107
|
+
def __post_init__(self):
|
|
108
|
+
if not isinstance(self.id, str):
|
|
109
|
+
raise ValueError("HyperEdge ID must be a string")
|
|
110
|
+
if not isinstance(self.type, str):
|
|
111
|
+
raise ValueError("HyperEdge type must be a string")
|
|
112
|
+
if not isinstance(self.nodes, set):
|
|
113
|
+
raise ValueError("HyperEdge nodes must be a set")
|
|
114
|
+
if len(self.nodes) < 2:
|
|
115
|
+
raise ValueError("HyperEdge must connect at least 2 nodes")
|
|
116
|
+
|
|
117
|
+
def to_dict(self) -> dict[str, Any]:
|
|
118
|
+
"""Convert hyperedge to dictionary representation."""
|
|
119
|
+
return {"id": self.id, "type": self.type, "nodes": list(self.nodes), "properties": self.properties}
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def from_dict(cls, data: dict[str, Any]) -> "HyperEdge":
|
|
123
|
+
"""Create hyperedge from dictionary representation."""
|
|
124
|
+
return cls(id=data["id"], type=data["type"], nodes=set(data["nodes"]), properties=data.get("properties", {}))
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class Graph:
|
|
128
|
+
"""Unified graph representation supporting various graph types.
|
|
129
|
+
|
|
130
|
+
This class provides a comprehensive graph representation that can handle:
|
|
131
|
+
- Simple undirected graphs
|
|
132
|
+
- Directed graphs
|
|
133
|
+
- Weighted graphs
|
|
134
|
+
- Multigraphs (multiple edges between same nodes)
|
|
135
|
+
- Hypergraphs (edges connecting multiple nodes)
|
|
136
|
+
- Property graphs (nodes and edges with properties)
|
|
137
|
+
|
|
138
|
+
The design prioritizes simplicity and extensibility for knowledge representation.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
def __init__(self, name: str = "graph", directed: bool = True):
|
|
142
|
+
"""Initialize a new graph.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
name: Name of the graph
|
|
146
|
+
directed: Whether the graph is directed (default: True)
|
|
147
|
+
"""
|
|
148
|
+
self.name = name
|
|
149
|
+
self.directed = directed
|
|
150
|
+
|
|
151
|
+
# Core data structures
|
|
152
|
+
self._nodes: dict[str, Node] = {}
|
|
153
|
+
self._edges: dict[str, Edge] = {}
|
|
154
|
+
self._hyperedges: dict[str, HyperEdge] = {}
|
|
155
|
+
|
|
156
|
+
# Indexes for efficient querying
|
|
157
|
+
self._node_types: dict[str, set[str]] = defaultdict(set)
|
|
158
|
+
self._edge_types: dict[str, set[str]] = defaultdict(set)
|
|
159
|
+
self._hyperedge_types: dict[str, set[str]] = defaultdict(set)
|
|
160
|
+
|
|
161
|
+
# Adjacency indexes
|
|
162
|
+
self._outgoing: dict[str, set[str]] = defaultdict(set)
|
|
163
|
+
self._incoming: dict[str, set[str]] = defaultdict(set)
|
|
164
|
+
|
|
165
|
+
# Statistics
|
|
166
|
+
self._stats = {"nodes": 0, "edges": 0, "hyperedges": 0, "node_types": 0, "edge_types": 0, "hyperedge_types": 0}
|
|
167
|
+
|
|
168
|
+
# ==================== NODE OPERATIONS ====================
|
|
169
|
+
|
|
170
|
+
def add_node(self, node_id: str, node_type: str, properties: dict[str, Any] | None = None) -> bool:
|
|
171
|
+
"""Add a node to the graph.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
node_id: Unique identifier for the node
|
|
175
|
+
node_type: Type/category of the node
|
|
176
|
+
properties: Optional properties dictionary
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
bool: True if node was added, False if it already exists
|
|
180
|
+
"""
|
|
181
|
+
if node_id in self._nodes:
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
node = Node(id=node_id, type=node_type, properties=properties or {})
|
|
185
|
+
|
|
186
|
+
self._nodes[node_id] = node
|
|
187
|
+
self._node_types[node_type].add(node_id)
|
|
188
|
+
self._stats["nodes"] += 1
|
|
189
|
+
self._stats["node_types"] = len(self._node_types)
|
|
190
|
+
|
|
191
|
+
return True
|
|
192
|
+
|
|
193
|
+
def get_node(self, node_id: str) -> Node | None:
|
|
194
|
+
"""Get a node by ID.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
node_id: Node identifier
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Node object or None if not found
|
|
201
|
+
"""
|
|
202
|
+
return self._nodes.get(node_id)
|
|
203
|
+
|
|
204
|
+
def has_node(self, node_id: str) -> bool:
|
|
205
|
+
"""Check if a node exists.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
node_id: Node identifier
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
bool: True if node exists
|
|
212
|
+
"""
|
|
213
|
+
return node_id in self._nodes
|
|
214
|
+
|
|
215
|
+
def remove_node(self, node_id: str) -> bool:
|
|
216
|
+
"""Remove a node and all its connected edges.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
node_id: Node identifier
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
bool: True if node was removed, False if not found
|
|
223
|
+
"""
|
|
224
|
+
if node_id not in self._nodes:
|
|
225
|
+
return False
|
|
226
|
+
|
|
227
|
+
node = self._nodes[node_id]
|
|
228
|
+
|
|
229
|
+
# Remove from type index
|
|
230
|
+
self._node_types[node.type].discard(node_id)
|
|
231
|
+
if not self._node_types[node.type]:
|
|
232
|
+
del self._node_types[node.type]
|
|
233
|
+
|
|
234
|
+
# Remove connected edges
|
|
235
|
+
edges_to_remove = []
|
|
236
|
+
for edge_id, edge in self._edges.items():
|
|
237
|
+
if edge.source == node_id or edge.target == node_id:
|
|
238
|
+
edges_to_remove.append(edge_id)
|
|
239
|
+
|
|
240
|
+
for edge_id in edges_to_remove:
|
|
241
|
+
self.remove_edge(edge_id)
|
|
242
|
+
|
|
243
|
+
# Remove from adjacency indexes
|
|
244
|
+
if node_id in self._outgoing:
|
|
245
|
+
del self._outgoing[node_id]
|
|
246
|
+
if node_id in self._incoming:
|
|
247
|
+
del self._incoming[node_id]
|
|
248
|
+
|
|
249
|
+
# Remove node
|
|
250
|
+
del self._nodes[node_id]
|
|
251
|
+
self._stats["nodes"] -= 1
|
|
252
|
+
self._stats["node_types"] = len(self._node_types)
|
|
253
|
+
|
|
254
|
+
return True
|
|
255
|
+
|
|
256
|
+
def get_nodes(self, node_type: str | None = None) -> list[Node]:
|
|
257
|
+
"""Get all nodes, optionally filtered by type.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
node_type: Optional filter by node type
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
List of Node objects
|
|
264
|
+
"""
|
|
265
|
+
if node_type is None:
|
|
266
|
+
return list(self._nodes.values())
|
|
267
|
+
|
|
268
|
+
node_ids = self._node_types.get(node_type, set())
|
|
269
|
+
return [self._nodes[node_id] for node_id in node_ids]
|
|
270
|
+
|
|
271
|
+
def get_node_ids(self, node_type: str | None = None) -> set[str]:
|
|
272
|
+
"""Get all node IDs, optionally filtered by type.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
node_type: Optional filter by node type
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Set of node IDs
|
|
279
|
+
"""
|
|
280
|
+
if node_type is None:
|
|
281
|
+
return set(self._nodes.keys())
|
|
282
|
+
|
|
283
|
+
return self._node_types.get(node_type, set()).copy()
|
|
284
|
+
|
|
285
|
+
# ==================== EDGE OPERATIONS ====================
|
|
286
|
+
|
|
287
|
+
def add_edge(
|
|
288
|
+
self, edge_id: str, edge_type: str, source: str, target: str, properties: dict[str, Any] | None = None
|
|
289
|
+
) -> bool:
|
|
290
|
+
"""Add an edge to the graph.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
edge_id: Unique identifier for the edge
|
|
294
|
+
edge_type: Type/category of the edge
|
|
295
|
+
source: Source node ID
|
|
296
|
+
target: Target node ID
|
|
297
|
+
properties: Optional properties dictionary
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
bool: True if edge was added, False if it already exists
|
|
301
|
+
"""
|
|
302
|
+
if edge_id in self._edges:
|
|
303
|
+
return False
|
|
304
|
+
|
|
305
|
+
# Check if nodes exist
|
|
306
|
+
if source not in self._nodes:
|
|
307
|
+
raise ValueError(f"Source node '{source}' does not exist")
|
|
308
|
+
if target not in self._nodes:
|
|
309
|
+
raise ValueError(f"Target node '{target}' does not exist")
|
|
310
|
+
|
|
311
|
+
edge = Edge(id=edge_id, type=edge_type, source=source, target=target, properties=properties or {})
|
|
312
|
+
|
|
313
|
+
self._edges[edge_id] = edge
|
|
314
|
+
self._edge_types[edge_type].add(edge_id)
|
|
315
|
+
|
|
316
|
+
# Update adjacency indexes
|
|
317
|
+
self._outgoing[source].add(edge_id)
|
|
318
|
+
self._incoming[target].add(edge_id)
|
|
319
|
+
|
|
320
|
+
self._stats["edges"] += 1
|
|
321
|
+
self._stats["edge_types"] = len(self._edge_types)
|
|
322
|
+
|
|
323
|
+
return True
|
|
324
|
+
|
|
325
|
+
def get_edge(self, edge_id: str) -> Edge | None:
|
|
326
|
+
"""Get an edge by ID.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
edge_id: Edge identifier
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
Edge object or None if not found
|
|
333
|
+
"""
|
|
334
|
+
return self._edges.get(edge_id)
|
|
335
|
+
|
|
336
|
+
def has_edge(self, edge_id: str) -> bool:
|
|
337
|
+
"""Check if an edge exists.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
edge_id: Edge identifier
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
bool: True if edge exists
|
|
344
|
+
"""
|
|
345
|
+
return edge_id in self._edges
|
|
346
|
+
|
|
347
|
+
def remove_edge(self, edge_id: str) -> bool:
|
|
348
|
+
"""Remove an edge from the graph.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
edge_id: Edge identifier
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
bool: True if edge was removed, False if not found
|
|
355
|
+
"""
|
|
356
|
+
if edge_id not in self._edges:
|
|
357
|
+
return False
|
|
358
|
+
|
|
359
|
+
edge = self._edges[edge_id]
|
|
360
|
+
|
|
361
|
+
# Remove from type index
|
|
362
|
+
self._edge_types[edge.type].discard(edge_id)
|
|
363
|
+
if not self._edge_types[edge.type]:
|
|
364
|
+
del self._edge_types[edge.type]
|
|
365
|
+
|
|
366
|
+
# Remove from adjacency indexes
|
|
367
|
+
self._outgoing[edge.source].discard(edge_id)
|
|
368
|
+
self._incoming[edge.target].discard(edge_id)
|
|
369
|
+
|
|
370
|
+
# Remove edge
|
|
371
|
+
del self._edges[edge_id]
|
|
372
|
+
self._stats["edges"] -= 1
|
|
373
|
+
self._stats["edge_types"] = len(self._edge_types)
|
|
374
|
+
|
|
375
|
+
return True
|
|
376
|
+
|
|
377
|
+
def get_edges(self, edge_type: str | None = None) -> list[Edge]:
|
|
378
|
+
"""Get all edges, optionally filtered by type.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
edge_type: Optional filter by edge type
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
List of Edge objects
|
|
385
|
+
"""
|
|
386
|
+
if edge_type is None:
|
|
387
|
+
return list(self._edges.values())
|
|
388
|
+
|
|
389
|
+
edge_ids = self._edge_types.get(edge_type, set())
|
|
390
|
+
return [self._edges[edge_id] for edge_id in edge_ids]
|
|
391
|
+
|
|
392
|
+
def get_edges_between(self, source: str, target: str, edge_type: str | None = None) -> list[Edge]:
|
|
393
|
+
"""Get edges between two nodes.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
source: Source node ID
|
|
397
|
+
target: Target node ID
|
|
398
|
+
edge_type: Optional filter by edge type
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
List of Edge objects
|
|
402
|
+
"""
|
|
403
|
+
edges = []
|
|
404
|
+
source_edges = self._outgoing.get(source, set())
|
|
405
|
+
|
|
406
|
+
for edge_id in source_edges:
|
|
407
|
+
edge = self._edges[edge_id]
|
|
408
|
+
if edge.target == target and (edge_type is None or edge.type == edge_type):
|
|
409
|
+
edges.append(edge)
|
|
410
|
+
|
|
411
|
+
return edges
|
|
412
|
+
|
|
413
|
+
# ==================== HYPEREDGE OPERATIONS ====================
|
|
414
|
+
|
|
415
|
+
def add_hyperedge(
|
|
416
|
+
self, hyperedge_id: str, hyperedge_type: str, nodes: set[str], properties: dict[str, Any] | None = None
|
|
417
|
+
) -> bool:
|
|
418
|
+
"""Add a hyperedge to the graph.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
hyperedge_id: Unique identifier for the hyperedge
|
|
422
|
+
hyperedge_type: Type/category of the hyperedge
|
|
423
|
+
nodes: Set of node IDs to connect
|
|
424
|
+
properties: Optional properties dictionary
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
bool: True if hyperedge was added, False if it already exists
|
|
428
|
+
"""
|
|
429
|
+
if hyperedge_id in self._hyperedges:
|
|
430
|
+
return False
|
|
431
|
+
|
|
432
|
+
# Check if all nodes exist
|
|
433
|
+
for node_id in nodes:
|
|
434
|
+
if node_id not in self._nodes:
|
|
435
|
+
raise ValueError(f"Node '{node_id}' does not exist")
|
|
436
|
+
|
|
437
|
+
if len(nodes) < 2:
|
|
438
|
+
raise ValueError("Hyperedge must connect at least 2 nodes")
|
|
439
|
+
|
|
440
|
+
hyperedge = HyperEdge(id=hyperedge_id, type=hyperedge_type, nodes=nodes, properties=properties or {})
|
|
441
|
+
|
|
442
|
+
self._hyperedges[hyperedge_id] = hyperedge
|
|
443
|
+
self._hyperedge_types[hyperedge_type].add(hyperedge_id)
|
|
444
|
+
|
|
445
|
+
self._stats["hyperedges"] += 1
|
|
446
|
+
self._stats["hyperedge_types"] = len(self._hyperedge_types)
|
|
447
|
+
|
|
448
|
+
return True
|
|
449
|
+
|
|
450
|
+
def get_hyperedge(self, hyperedge_id: str) -> HyperEdge | None:
|
|
451
|
+
"""Get a hyperedge by ID.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
hyperedge_id: Hyperedge identifier
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
HyperEdge object or None if not found
|
|
458
|
+
"""
|
|
459
|
+
return self._hyperedges.get(hyperedge_id)
|
|
460
|
+
|
|
461
|
+
def has_hyperedge(self, hyperedge_id: str) -> bool:
|
|
462
|
+
"""Check if a hyperedge exists.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
hyperedge_id: Hyperedge identifier
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
bool: True if hyperedge exists
|
|
469
|
+
"""
|
|
470
|
+
return hyperedge_id in self._hyperedges
|
|
471
|
+
|
|
472
|
+
def get_hyperedges(self, hyperedge_type: str | None = None) -> list[HyperEdge]:
|
|
473
|
+
"""Get all hyperedges, optionally filtered by type.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
hyperedge_type: Optional filter by hyperedge type
|
|
477
|
+
|
|
478
|
+
Returns:
|
|
479
|
+
List of HyperEdge objects
|
|
480
|
+
"""
|
|
481
|
+
if hyperedge_type is None:
|
|
482
|
+
return list(self._hyperedges.values())
|
|
483
|
+
|
|
484
|
+
hyperedge_ids = self._hyperedge_types.get(hyperedge_type, set())
|
|
485
|
+
return [self._hyperedges[hyperedge_id] for hyperedge_id in hyperedge_ids]
|
|
486
|
+
|
|
487
|
+
# ==================== GRAPH TRAVERSAL ====================
|
|
488
|
+
# These methods are placeholders. I am not sure it is useful to focus on traversal,
|
|
489
|
+
# retrieval, analysis, etc. in this module. May be better to focus on the agentic
|
|
490
|
+
# creation of the graph here, and figure out if graph traversal is needed in the
|
|
491
|
+
# use cases we want to support.
|
|
492
|
+
|
|
493
|
+
def get_neighbors(self, node_id: str, direction: str = "both") -> set[str]:
|
|
494
|
+
"""Get neighboring nodes.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
node_id: Node identifier
|
|
498
|
+
direction: "in", "out", or "both"
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
Set of neighboring node IDs
|
|
502
|
+
"""
|
|
503
|
+
if node_id not in self._nodes:
|
|
504
|
+
return set()
|
|
505
|
+
|
|
506
|
+
neighbors = set()
|
|
507
|
+
|
|
508
|
+
if direction in ["out", "both"]:
|
|
509
|
+
for edge_id in self._outgoing.get(node_id, set()):
|
|
510
|
+
edge = self._edges[edge_id]
|
|
511
|
+
neighbors.add(edge.target)
|
|
512
|
+
|
|
513
|
+
if direction in ["in", "both"]:
|
|
514
|
+
for edge_id in self._incoming.get(node_id, set()):
|
|
515
|
+
edge = self._edges[edge_id]
|
|
516
|
+
neighbors.add(edge.source)
|
|
517
|
+
|
|
518
|
+
return neighbors
|
|
519
|
+
|
|
520
|
+
def get_connected_edges(self, node_id: str, direction: str = "both") -> list[Edge]:
|
|
521
|
+
"""Get edges connected to a node.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
node_id: Node identifier
|
|
525
|
+
direction: "in", "out", or "both"
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
List of connected Edge objects
|
|
529
|
+
"""
|
|
530
|
+
edges = []
|
|
531
|
+
|
|
532
|
+
if direction in ["out", "both"]:
|
|
533
|
+
for edge_id in self._outgoing.get(node_id, set()):
|
|
534
|
+
edges.append(self._edges[edge_id])
|
|
535
|
+
|
|
536
|
+
if direction in ["in", "both"]:
|
|
537
|
+
for edge_id in self._incoming.get(node_id, set()):
|
|
538
|
+
edges.append(self._edges[edge_id])
|
|
539
|
+
|
|
540
|
+
return edges
|
|
541
|
+
|
|
542
|
+
def find_paths(self, source: str, target: str, max_length: int = 3) -> list[list[Edge]]:
|
|
543
|
+
"""Find all paths between two nodes.
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
source: Source node ID
|
|
547
|
+
target: Target node ID
|
|
548
|
+
max_length: Maximum path length
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
List of paths, each path is a list of Edge objects
|
|
552
|
+
"""
|
|
553
|
+
if source not in self._nodes or target not in self._nodes:
|
|
554
|
+
return []
|
|
555
|
+
|
|
556
|
+
paths = []
|
|
557
|
+
queue = deque([([], source)])
|
|
558
|
+
visited = set()
|
|
559
|
+
|
|
560
|
+
while queue:
|
|
561
|
+
path, current = queue.popleft()
|
|
562
|
+
|
|
563
|
+
if current == target and path:
|
|
564
|
+
paths.append(path)
|
|
565
|
+
continue
|
|
566
|
+
|
|
567
|
+
if len(path) >= max_length:
|
|
568
|
+
continue
|
|
569
|
+
|
|
570
|
+
state = (current, len(path))
|
|
571
|
+
if state in visited:
|
|
572
|
+
continue
|
|
573
|
+
visited.add(state)
|
|
574
|
+
|
|
575
|
+
# Explore outgoing edges
|
|
576
|
+
for edge_id in self._outgoing.get(current, set()):
|
|
577
|
+
edge = self._edges[edge_id]
|
|
578
|
+
new_path = path + [edge]
|
|
579
|
+
queue.append((new_path, edge.target))
|
|
580
|
+
|
|
581
|
+
return paths
|
|
582
|
+
|
|
583
|
+
# ==================== GRAPH ANALYSIS ====================
|
|
584
|
+
# These methods are placeholders. Similar to the traversal methods, not fully clear if
|
|
585
|
+
# these are needed.
|
|
586
|
+
|
|
587
|
+
def get_statistics(self) -> dict[str, Any]:
|
|
588
|
+
"""Get comprehensive graph statistics.
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
Dictionary with graph statistics
|
|
592
|
+
"""
|
|
593
|
+
# Ensure we have the latest counts by recalculating from actual data
|
|
594
|
+
actual_nodes = len(self._nodes)
|
|
595
|
+
actual_edges = len(self._edges)
|
|
596
|
+
actual_hyperedges = len(self._hyperedges)
|
|
597
|
+
|
|
598
|
+
# Update internal stats to match actual counts
|
|
599
|
+
self._stats["nodes"] = actual_nodes
|
|
600
|
+
self._stats["edges"] = actual_edges
|
|
601
|
+
self._stats["hyperedges"] = actual_hyperedges
|
|
602
|
+
self._stats["node_types"] = len(self._node_types)
|
|
603
|
+
self._stats["edge_types"] = len(self._edge_types)
|
|
604
|
+
self._stats["hyperedge_types"] = len(self._hyperedge_types)
|
|
605
|
+
|
|
606
|
+
# Node type distribution
|
|
607
|
+
node_type_dist = {node_type: len(nodes) for node_type, nodes in self._node_types.items()}
|
|
608
|
+
|
|
609
|
+
# Edge type distribution
|
|
610
|
+
edge_type_dist = {edge_type: len(edges) for edge_type, edges in self._edge_types.items()}
|
|
611
|
+
|
|
612
|
+
# Hyperedge type distribution
|
|
613
|
+
hyperedge_type_dist = {
|
|
614
|
+
hyperedge_type: len(hyperedges) for hyperedge_type, hyperedges in self._hyperedge_types.items()
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
# Connectivity analysis
|
|
618
|
+
isolated_nodes = 0
|
|
619
|
+
for node_id in self._nodes:
|
|
620
|
+
if not self.get_neighbors(node_id):
|
|
621
|
+
isolated_nodes += 1
|
|
622
|
+
|
|
623
|
+
return {
|
|
624
|
+
"basic": self._stats.copy(),
|
|
625
|
+
"node_types": node_type_dist,
|
|
626
|
+
"edge_types": edge_type_dist,
|
|
627
|
+
"hyperedge_types": hyperedge_type_dist,
|
|
628
|
+
"connectivity": {"isolated_nodes": isolated_nodes, "connected_nodes": actual_nodes - isolated_nodes},
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
def get_subgraph(self, node_ids: set[str], include_edges: bool = True) -> "Graph":
|
|
632
|
+
"""Extract a subgraph containing specified nodes.
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
node_ids: Set of node IDs to include
|
|
636
|
+
include_edges: Whether to include edges between included nodes
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
New Graph object containing the subgraph
|
|
640
|
+
"""
|
|
641
|
+
subgraph = Graph(name=f"{self.name}_subgraph", directed=self.directed)
|
|
642
|
+
|
|
643
|
+
# Add nodes
|
|
644
|
+
for node_id in node_ids:
|
|
645
|
+
if node_id in self._nodes:
|
|
646
|
+
node = self._nodes[node_id]
|
|
647
|
+
subgraph.add_node(node.id, node.type, node.properties)
|
|
648
|
+
|
|
649
|
+
# Add edges (if requested)
|
|
650
|
+
if include_edges:
|
|
651
|
+
for edge in self._edges.values():
|
|
652
|
+
if edge.source in node_ids and edge.target in node_ids:
|
|
653
|
+
subgraph.add_edge(edge.id, edge.type, edge.source, edge.target, edge.properties)
|
|
654
|
+
|
|
655
|
+
return subgraph
|
|
656
|
+
|
|
657
|
+
# ==================== SERIALIZATION ====================
|
|
658
|
+
# Placeholder methods, as serialisation should probably be handled by the corresponding
|
|
659
|
+
# legacy BioCypher modules.
|
|
660
|
+
|
|
661
|
+
def to_dict(self) -> dict[str, Any]:
|
|
662
|
+
"""Convert graph to dictionary representation.
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
Dictionary representation of the graph
|
|
666
|
+
"""
|
|
667
|
+
return {
|
|
668
|
+
"name": self.name,
|
|
669
|
+
"directed": self.directed,
|
|
670
|
+
"nodes": [node.to_dict() for node in self._nodes.values()],
|
|
671
|
+
"edges": [edge.to_dict() for edge in self._edges.values()],
|
|
672
|
+
"hyperedges": [hyperedge.to_dict() for hyperedge in self._hyperedges.values()],
|
|
673
|
+
"statistics": self.get_statistics(),
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
def to_json(self) -> str:
|
|
677
|
+
"""Convert graph to JSON string.
|
|
678
|
+
|
|
679
|
+
Returns:
|
|
680
|
+
JSON string representation of the graph
|
|
681
|
+
"""
|
|
682
|
+
return json.dumps(self.to_dict(), indent=2)
|
|
683
|
+
|
|
684
|
+
@classmethod
|
|
685
|
+
def from_dict(cls, data: dict[str, Any]) -> "Graph":
|
|
686
|
+
"""Create graph from dictionary representation.
|
|
687
|
+
|
|
688
|
+
Args:
|
|
689
|
+
data: Dictionary representation of the graph
|
|
690
|
+
|
|
691
|
+
Returns:
|
|
692
|
+
Graph object
|
|
693
|
+
"""
|
|
694
|
+
graph = cls(name=data["name"], directed=data["directed"])
|
|
695
|
+
|
|
696
|
+
# Add nodes
|
|
697
|
+
for node_data in data["nodes"]:
|
|
698
|
+
node = Node.from_dict(node_data)
|
|
699
|
+
graph._nodes[node.id] = node
|
|
700
|
+
graph._node_types[node.type].add(node.id)
|
|
701
|
+
|
|
702
|
+
# Add edges
|
|
703
|
+
for edge_data in data["edges"]:
|
|
704
|
+
edge = Edge.from_dict(edge_data)
|
|
705
|
+
graph._edges[edge.id] = edge
|
|
706
|
+
graph._edge_types[edge.type].add(edge.id)
|
|
707
|
+
graph._outgoing[edge.source].add(edge.id)
|
|
708
|
+
graph._incoming[edge.target].add(edge.id)
|
|
709
|
+
|
|
710
|
+
# Add hyperedges
|
|
711
|
+
for hyperedge_data in data["hyperedges"]:
|
|
712
|
+
hyperedge = HyperEdge.from_dict(hyperedge_data)
|
|
713
|
+
graph._hyperedges[hyperedge.id] = hyperedge
|
|
714
|
+
graph._hyperedge_types[hyperedge.type].add(hyperedge.id)
|
|
715
|
+
|
|
716
|
+
# Update statistics
|
|
717
|
+
graph._stats["nodes"] = len(graph._nodes)
|
|
718
|
+
graph._stats["edges"] = len(graph._edges)
|
|
719
|
+
graph._stats["hyperedges"] = len(graph._hyperedges)
|
|
720
|
+
graph._stats["node_types"] = len(graph._node_types)
|
|
721
|
+
graph._stats["edge_types"] = len(graph._edge_types)
|
|
722
|
+
graph._stats["hyperedge_types"] = len(graph._hyperedge_types)
|
|
723
|
+
|
|
724
|
+
return graph
|
|
725
|
+
|
|
726
|
+
@classmethod
|
|
727
|
+
def from_json_string(cls, json_str: str) -> "Graph":
|
|
728
|
+
"""Create graph from JSON string.
|
|
729
|
+
|
|
730
|
+
Args:
|
|
731
|
+
json_str: JSON string representation of the graph
|
|
732
|
+
|
|
733
|
+
Returns:
|
|
734
|
+
Graph object
|
|
735
|
+
"""
|
|
736
|
+
data = json.loads(json_str)
|
|
737
|
+
return cls.from_dict(data)
|
|
738
|
+
|
|
739
|
+
def from_json(self, json_str: str) -> None:
|
|
740
|
+
"""Load graph data from JSON string into this graph instance.
|
|
741
|
+
|
|
742
|
+
This method clears the existing graph and loads new data from JSON.
|
|
743
|
+
|
|
744
|
+
Args:
|
|
745
|
+
json_str: JSON string representation of the graph
|
|
746
|
+
"""
|
|
747
|
+
data = json.loads(json_str)
|
|
748
|
+
|
|
749
|
+
# Clear existing data
|
|
750
|
+
self.clear()
|
|
751
|
+
|
|
752
|
+
# Update graph properties
|
|
753
|
+
self.name = data["name"]
|
|
754
|
+
self.directed = data["directed"]
|
|
755
|
+
|
|
756
|
+
# Add nodes
|
|
757
|
+
for node_data in data["nodes"]:
|
|
758
|
+
node = Node.from_dict(node_data)
|
|
759
|
+
self._nodes[node.id] = node
|
|
760
|
+
self._node_types[node.type].add(node.id)
|
|
761
|
+
|
|
762
|
+
# Add edges
|
|
763
|
+
for edge_data in data["edges"]:
|
|
764
|
+
edge = Edge.from_dict(edge_data)
|
|
765
|
+
self._edges[edge.id] = edge
|
|
766
|
+
self._edge_types[edge.type].add(edge.id)
|
|
767
|
+
self._outgoing[edge.source].add(edge.id)
|
|
768
|
+
self._incoming[edge.target].add(edge.id)
|
|
769
|
+
|
|
770
|
+
# Add hyperedges
|
|
771
|
+
for hyperedge_data in data["hyperedges"]:
|
|
772
|
+
hyperedge = HyperEdge.from_dict(hyperedge_data)
|
|
773
|
+
self._hyperedges[hyperedge.id] = hyperedge
|
|
774
|
+
self._hyperedge_types[hyperedge.type].add(hyperedge.id)
|
|
775
|
+
|
|
776
|
+
# Update statistics
|
|
777
|
+
self._stats["nodes"] = len(self._nodes)
|
|
778
|
+
self._stats["edges"] = len(self._edges)
|
|
779
|
+
self._stats["hyperedges"] = len(self._hyperedges)
|
|
780
|
+
self._stats["node_types"] = len(self._node_types)
|
|
781
|
+
self._stats["edge_types"] = len(self._edge_types)
|
|
782
|
+
self._stats["hyperedge_types"] = len(self._hyperedge_types)
|
|
783
|
+
|
|
784
|
+
# ==================== UTILITY METHODS ====================
|
|
785
|
+
|
|
786
|
+
def clear(self) -> None:
|
|
787
|
+
"""Clear all nodes, edges, and hyperedges from the graph."""
|
|
788
|
+
self._nodes.clear()
|
|
789
|
+
self._edges.clear()
|
|
790
|
+
self._hyperedges.clear()
|
|
791
|
+
self._node_types.clear()
|
|
792
|
+
self._edge_types.clear()
|
|
793
|
+
self._hyperedge_types.clear()
|
|
794
|
+
self._outgoing.clear()
|
|
795
|
+
self._incoming.clear()
|
|
796
|
+
self._stats = {"nodes": 0, "edges": 0, "hyperedges": 0, "node_types": 0, "edge_types": 0, "hyperedge_types": 0}
|
|
797
|
+
|
|
798
|
+
def __len__(self) -> int:
|
|
799
|
+
"""Return the number of nodes in the graph."""
|
|
800
|
+
return len(self._nodes)
|
|
801
|
+
|
|
802
|
+
def __contains__(self, node_id: str) -> bool:
|
|
803
|
+
"""Check if a node exists in the graph."""
|
|
804
|
+
return node_id in self._nodes
|
|
805
|
+
|
|
806
|
+
def __iter__(self) -> Iterator[Node]:
|
|
807
|
+
"""Iterate over all nodes in the graph."""
|
|
808
|
+
return iter(self._nodes.values())
|
|
809
|
+
|
|
810
|
+
def __str__(self) -> str:
|
|
811
|
+
"""String representation of the graph."""
|
|
812
|
+
stats = self.get_statistics()
|
|
813
|
+
return (
|
|
814
|
+
f"Graph(name='{self.name}', nodes={stats['basic']['nodes']}, "
|
|
815
|
+
f"edges={stats['basic']['edges']}, hyperedges={stats['basic']['hyperedges']})"
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
def __repr__(self) -> str:
|
|
819
|
+
return self.__str__()
|