biocypher 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_workflow.py ADDED
@@ -0,0 +1,798 @@
1
+ """Unified BioCypher Workflow API for knowledge graph workflows.
2
+
3
+ This module provides a streamlined interface for creating and managing
4
+ knowledge graphs using the unified Graph representation, with optional
5
+ schema and ontology support. Designed for both agentic and deterministic
6
+ workflows.
7
+
8
+ ## Design Philosophy
9
+
10
+ This API is designed with the following principles:
11
+ 1. **Agentic-First**: Optimized for LLM agent workflows with computable functions
12
+ 2. **Zero Dependencies**: Pure Python implementation for maximum compatibility
13
+ 3. **Future-Proof**: Native BioCypher objects enable advanced agentic features
14
+ 4. **Migration-Ready**: Wrapper methods provide compatibility with existing tools
15
+ 5. **Progressive Validation**: Optional validation and deduplication with flexible modes
16
+
17
+ ## Validation and Deduplication
18
+
19
+ Unlike the legacy BioCypher which enforces strict validation and deduplication,
20
+ this API provides **progressive validation** with three modes:
21
+
22
+ - **"none"** (default): No validation or deduplication - maximum flexibility for agents
23
+ - **"warn"**: Log warnings for schema violations and duplicates but continue processing
24
+ - **"strict"**: Enforce schema validation and deduplication - fail fast on violations
25
+
26
+ This approach allows:
27
+ - **Agents** to work with maximum flexibility (no validation overhead)
28
+ - **Deterministic workflows** to use validation when needed
29
+ - **Gradual migration** from legacy BioCypher (start with "none", add validation later)
30
+
31
+ ## Future Migration Path
32
+
33
+ This module represents the future direction of BioCypher's in-memory graph
34
+ functionality. The plan is to:
35
+
36
+ 1. **Phase 1** (Current): Keep separate from legacy code, provide compatibility wrappers
37
+ 2. **Phase 2**: Replace legacy in-memory implementations (PandasKG, NetworkxKG)
38
+ 3. **Phase 3**: Add advanced agentic features (computable functions, decision logging)
39
+ 4. **Phase 4**: Integrate with main BioCypher class as unified interface
40
+
41
+ ## Agentic Features (Future)
42
+
43
+ - Computable functions attached to nodes/edges
44
+ - Decision logging and reasoning traces
45
+ - Counterfactual inference capabilities
46
+ - MCP (Model Context Protocol) interface integration
47
+ - Local graph computation for agent workflows
48
+
49
+ """
50
+
51
+ import json
52
+
53
+ from typing import Any
54
+
55
+ import yaml
56
+
57
+ from ._graph import Edge, Graph, HyperEdge, Node
58
+ from ._logger import logger
59
+
60
+
61
+ class BioCypherWorkflow:
62
+ """Unified BioCypher interface for knowledge graph workflows.
63
+
64
+ This class provides a clean, simple API for creating and managing
65
+ knowledge graphs with optional schema and ontology support. Designed
66
+ for both agentic and deterministic workflows.
67
+ """
68
+
69
+ def __init__(
70
+ self,
71
+ name: str = "workflow_graph",
72
+ directed: bool = True,
73
+ schema: dict[str, Any] | None = None,
74
+ schema_file: str | None = None,
75
+ head_ontology_url: str | None = None,
76
+ validation_mode: str = "none",
77
+ deduplication: bool = False,
78
+ ):
79
+ """Initialize the workflow with a unified graph.
80
+
81
+ Args:
82
+ name: Name of the knowledge graph
83
+ directed: Whether the graph is directed (default: True)
84
+ schema: Dictionary defining the knowledge graph schema
85
+ schema_file: Path to YAML schema file
86
+ head_ontology_url: URL to ontology file (defaults to Biolink model)
87
+ validation_mode: Validation level ("none", "warn", "strict")
88
+ deduplication: Whether to enable deduplication (default: False)
89
+ """
90
+ self.graph = Graph(name=name, directed=directed)
91
+ self.name = name
92
+ self.schema = schema
93
+ self.schema_file = schema_file
94
+ self.head_ontology_url = head_ontology_url
95
+ self.validation_mode = validation_mode
96
+ self.deduplication = deduplication
97
+
98
+ # Track seen entities for deduplication
99
+ self._seen_nodes = set()
100
+ self._seen_edges = set()
101
+
102
+ # Initialize schema if provided
103
+ if schema_file:
104
+ self._load_schema_from_file(schema_file)
105
+ elif schema:
106
+ self._load_schema_from_dict(schema)
107
+
108
+ def _load_schema_from_file(self, schema_file: str) -> None:
109
+ """Load schema from YAML file."""
110
+ try:
111
+ with open(schema_file, "r") as f:
112
+ self.schema = yaml.safe_load(f)
113
+ logger.info(f"Loaded schema from {schema_file}")
114
+ except Exception as e:
115
+ logger.warning(f"Could not load schema from {schema_file}: {e}")
116
+
117
+ def _load_schema_from_dict(self, schema: dict[str, Any]) -> None:
118
+ """Load schema from dictionary."""
119
+ self.schema = schema
120
+ logger.info("Loaded schema from dictionary")
121
+
122
+ # ==================== NODE OPERATIONS ====================
123
+
124
+ def add_node(self, node_id: str, node_type: str, **properties) -> bool:
125
+ """Add a node to the knowledge graph.
126
+
127
+ Args:
128
+ node_id: Unique identifier for the node
129
+ node_type: Type/category of the node
130
+ **properties: Node properties as keyword arguments
131
+
132
+ Returns:
133
+ bool: True if node was added, False if it already exists
134
+
135
+ Example:
136
+ workflow.add_node("protein_1", "protein", name="TP53", function="tumor_suppressor")
137
+ """
138
+ # Check for duplicates if deduplication is enabled
139
+ if self.deduplication:
140
+ if node_id in self._seen_nodes:
141
+ if self.validation_mode == "warn":
142
+ logger.warning(f"Duplicate node ID '{node_id}' detected")
143
+ elif self.validation_mode == "strict":
144
+ raise ValueError(f"Duplicate node ID '{node_id}' not allowed in strict mode")
145
+ return False
146
+ self._seen_nodes.add(node_id)
147
+
148
+ # Validate against schema if validation is enabled
149
+ if self.validation_mode in ["warn", "strict"]:
150
+ is_valid = self.validate_against_schema(node_type, properties)
151
+ if not is_valid:
152
+ if self.validation_mode == "strict":
153
+ raise ValueError(f"Node '{node_id}' of type '{node_type}' failed schema validation")
154
+ elif self.validation_mode == "warn":
155
+ logger.warning(f"Node '{node_id}' of type '{node_type}' failed schema validation")
156
+
157
+ # Try to add node to graph (Graph class handles its own deduplication)
158
+ result = self.graph.add_node(node_id, node_type, properties)
159
+
160
+ # If deduplication is enabled and we're tracking, update our tracking
161
+ if self.deduplication and result:
162
+ self._seen_nodes.add(node_id)
163
+
164
+ return result
165
+
166
+ def get_node(self, node_id: str) -> Node | None:
167
+ """Get a node by ID.
168
+
169
+ Args:
170
+ node_id: Node identifier
171
+
172
+ Returns:
173
+ Node object or None if not found
174
+ """
175
+ return self.graph.get_node(node_id)
176
+
177
+ def get_nodes(self, node_type: str | None = None) -> list[Node]:
178
+ """Get all nodes, optionally filtered by type.
179
+
180
+ Args:
181
+ node_type: Optional filter by node type
182
+
183
+ Returns:
184
+ List of Node objects
185
+ """
186
+ return self.graph.get_nodes(node_type)
187
+
188
+ def has_node(self, node_id: str) -> bool:
189
+ """Check if a node exists.
190
+
191
+ Args:
192
+ node_id: Node identifier
193
+
194
+ Returns:
195
+ bool: True if node exists
196
+ """
197
+ return self.graph.has_node(node_id)
198
+
199
+ def remove_node(self, node_id: str) -> bool:
200
+ """Remove a node and all its connected edges.
201
+
202
+ Args:
203
+ node_id: Node identifier
204
+
205
+ Returns:
206
+ bool: True if node was removed, False if not found
207
+ """
208
+ return self.graph.remove_node(node_id)
209
+
210
+ # ==================== EDGE OPERATIONS ====================
211
+
212
+ def add_edge(self, edge_id: str, edge_type: str, source: str, target: str, **properties) -> bool:
213
+ """Add an edge to the knowledge graph.
214
+
215
+ Args:
216
+ edge_id: Unique identifier for the edge
217
+ edge_type: Type/category of the edge
218
+ source: Source node ID
219
+ target: Target node ID
220
+ **properties: Edge properties as keyword arguments
221
+
222
+ Returns:
223
+ bool: True if edge was added, False if it already exists
224
+
225
+ Example:
226
+ workflow.add_edge("interaction_1", "interaction", "protein_1", "protein_2",
227
+ confidence=0.8, method="yeast_two_hybrid")
228
+ """
229
+ # Check for duplicates if deduplication is enabled
230
+ if self.deduplication:
231
+ edge_key = (edge_id, edge_type)
232
+ if edge_key in self._seen_edges:
233
+ if self.validation_mode == "warn":
234
+ logger.warning(f"Duplicate edge ID '{edge_id}' of type '{edge_type}' detected")
235
+ elif self.validation_mode == "strict":
236
+ raise ValueError(f"Duplicate edge ID '{edge_id}' not allowed in strict mode")
237
+ return False
238
+ self._seen_edges.add(edge_key)
239
+
240
+ # Validate against schema if validation is enabled
241
+ if self.validation_mode in ["warn", "strict"]:
242
+ is_valid = self.validate_against_schema(edge_type, properties)
243
+ if not is_valid:
244
+ if self.validation_mode == "strict":
245
+ raise ValueError(f"Edge '{edge_id}' of type '{edge_type}' failed schema validation")
246
+ elif self.validation_mode == "warn":
247
+ logger.warning(f"Edge '{edge_id}' of type '{edge_type}' failed schema validation")
248
+
249
+ # Try to add edge to graph (Graph class handles its own deduplication)
250
+ result = self.graph.add_edge(edge_id, edge_type, source, target, properties)
251
+
252
+ # If deduplication is enabled and we're tracking, update our tracking
253
+ if self.deduplication and result:
254
+ edge_key = (edge_id, edge_type)
255
+ self._seen_edges.add(edge_key)
256
+
257
+ return result
258
+
259
+ def get_edge(self, edge_id: str) -> Edge | None:
260
+ """Get an edge by ID.
261
+
262
+ Args:
263
+ edge_id: Edge identifier
264
+
265
+ Returns:
266
+ Edge object or None if not found
267
+ """
268
+ return self.graph.get_edge(edge_id)
269
+
270
+ def get_edges(self, edge_type: str | None = None) -> list[Edge]:
271
+ """Get all edges, optionally filtered by type.
272
+
273
+ Args:
274
+ edge_type: Optional filter by edge type
275
+
276
+ Returns:
277
+ List of Edge objects
278
+ """
279
+ return self.graph.get_edges(edge_type)
280
+
281
+ def get_edges_between(self, source: str, target: str, edge_type: str | None = None) -> list[Edge]:
282
+ """Get edges between two nodes.
283
+
284
+ Args:
285
+ source: Source node ID
286
+ target: Target node ID
287
+ edge_type: Optional filter by edge type
288
+
289
+ Returns:
290
+ List of Edge objects
291
+ """
292
+ return self.graph.get_edges_between(source, target, edge_type)
293
+
294
+ def has_edge(self, edge_id: str) -> bool:
295
+ """Check if an edge exists.
296
+
297
+ Args:
298
+ edge_id: Edge identifier
299
+
300
+ Returns:
301
+ bool: True if edge exists
302
+ """
303
+ return self.graph.has_edge(edge_id)
304
+
305
+ def remove_edge(self, edge_id: str) -> bool:
306
+ """Remove an edge from the graph.
307
+
308
+ Args:
309
+ edge_id: Edge identifier
310
+
311
+ Returns:
312
+ bool: True if edge was removed, False if not found
313
+ """
314
+ return self.graph.remove_edge(edge_id)
315
+
316
+ # ==================== HYPEREDGE OPERATIONS ====================
317
+
318
+ def add_hyperedge(self, hyperedge_id: str, hyperedge_type: str, nodes: set[str], **properties) -> bool:
319
+ """Add a hyperedge connecting multiple nodes.
320
+
321
+ Args:
322
+ hyperedge_id: Unique identifier for the hyperedge
323
+ hyperedge_type: Type/category of the hyperedge
324
+ nodes: Set of node IDs to connect
325
+ **properties: Hyperedge properties as keyword arguments
326
+
327
+ Returns:
328
+ bool: True if hyperedge was added, False if it already exists
329
+
330
+ Example:
331
+ workflow.add_hyperedge("complex_1", "protein_complex", {"protein_1", "protein_2", "protein_3"},
332
+ name="transcription_factor_complex")
333
+ """
334
+ return self.graph.add_hyperedge(hyperedge_id, hyperedge_type, nodes, properties)
335
+
336
+ def get_hyperedge(self, hyperedge_id: str) -> HyperEdge | None:
337
+ """Get a hyperedge by ID.
338
+
339
+ Args:
340
+ hyperedge_id: Hyperedge identifier
341
+
342
+ Returns:
343
+ HyperEdge object or None if not found
344
+ """
345
+ return self.graph.get_hyperedge(hyperedge_id)
346
+
347
+ def get_hyperedges(self, hyperedge_type: str | None = None) -> list[HyperEdge]:
348
+ """Get all hyperedges, optionally filtered by type.
349
+
350
+ Args:
351
+ hyperedge_type: Optional filter by hyperedge type
352
+
353
+ Returns:
354
+ List of HyperEdge objects
355
+ """
356
+ return self.graph.get_hyperedges(hyperedge_type)
357
+
358
+ def has_hyperedge(self, hyperedge_id: str) -> bool:
359
+ """Check if a hyperedge exists.
360
+
361
+ Args:
362
+ hyperedge_id: Hyperedge identifier
363
+
364
+ Returns:
365
+ bool: True if hyperedge exists
366
+ """
367
+ return self.graph.has_hyperedge(hyperedge_id)
368
+
369
+ # ==================== GRAPH TRAVERSAL ====================
370
+
371
+ def get_neighbors(self, node_id: str, direction: str = "both") -> set[str]:
372
+ """Get neighboring nodes.
373
+
374
+ Args:
375
+ node_id: Node identifier
376
+ direction: "in", "out", or "both"
377
+
378
+ Returns:
379
+ Set of neighboring node IDs
380
+ """
381
+ return self.graph.get_neighbors(node_id, direction)
382
+
383
+ def get_connected_edges(self, node_id: str, direction: str = "both") -> list[Edge]:
384
+ """Get edges connected to a node.
385
+
386
+ Args:
387
+ node_id: Node identifier
388
+ direction: "in", "out", or "both"
389
+
390
+ Returns:
391
+ List of connected Edge objects
392
+ """
393
+ return self.graph.get_connected_edges(node_id, direction)
394
+
395
+ def find_paths(self, source: str, target: str, max_length: int = 3) -> list[list[Edge]]:
396
+ """Find all paths between two nodes.
397
+
398
+ Args:
399
+ source: Source node ID
400
+ target: Target node ID
401
+ max_length: Maximum path length
402
+
403
+ Returns:
404
+ List of paths, each path is a list of Edge objects
405
+ """
406
+ return self.graph.find_paths(source, target, max_length)
407
+
408
+ # ==================== QUERY INTERFACE ====================
409
+
410
+ def query_nodes(self, node_type: str | None = None) -> list[dict[str, Any]]:
411
+ """Query nodes in the knowledge graph.
412
+
413
+ Args:
414
+ node_type: Optional filter by node type
415
+
416
+ Returns:
417
+ List of node dictionaries
418
+ """
419
+ nodes = self.graph.get_nodes(node_type)
420
+ return [node.to_dict() for node in nodes]
421
+
422
+ def query_edges(self, edge_type: str | None = None) -> list[dict[str, Any]]:
423
+ """Query edges in the knowledge graph.
424
+
425
+ Args:
426
+ edge_type: Optional filter by edge type
427
+
428
+ Returns:
429
+ List of edge dictionaries
430
+ """
431
+ edges = self.graph.get_edges(edge_type)
432
+ return [edge.to_dict() for edge in edges]
433
+
434
+ def query_hyperedges(self, hyperedge_type: str | None = None) -> list[dict[str, Any]]:
435
+ """Query hyperedges in the knowledge graph.
436
+
437
+ Args:
438
+ hyperedge_type: Optional filter by hyperedge type
439
+
440
+ Returns:
441
+ List of hyperedge dictionaries
442
+ """
443
+ hyperedges = self.graph.get_hyperedges(hyperedge_type)
444
+ return [hyperedge.to_dict() for hyperedge in hyperedges]
445
+
446
+ def find_connected_components(self, node_id: str, max_depth: int = 2) -> dict[str, Any]:
447
+ """Find connected components around a node.
448
+
449
+ Args:
450
+ node_id: Starting node ID
451
+ max_depth: Maximum depth to explore
452
+
453
+ Returns:
454
+ Dictionary with nodes and edges in the component
455
+ """
456
+ if not self.graph.has_node(node_id):
457
+ return {"nodes": [], "edges": [], "hyperedges": []}
458
+
459
+ # Collect nodes within max_depth
460
+ component_nodes = {node_id}
461
+ current_level = {node_id}
462
+
463
+ for depth in range(max_depth):
464
+ next_level = set()
465
+ for node in current_level:
466
+ neighbors = self.graph.get_neighbors(node)
467
+ next_level.update(neighbors)
468
+ current_level = next_level - component_nodes
469
+ component_nodes.update(current_level)
470
+
471
+ if not current_level:
472
+ break
473
+
474
+ # Get subgraph
475
+ subgraph = self.graph.get_subgraph(component_nodes)
476
+
477
+ return {
478
+ "nodes": [node.to_dict() for node in subgraph.get_nodes()],
479
+ "edges": [edge.to_dict() for edge in subgraph.get_edges()],
480
+ "hyperedges": [hyperedge.to_dict() for hyperedge in subgraph.get_hyperedges()],
481
+ "statistics": subgraph.get_statistics(),
482
+ }
483
+
484
+ # ==================== GRAPH ANALYSIS ====================
485
+
486
+ def get_statistics(self) -> dict[str, Any]:
487
+ """Get comprehensive graph statistics.
488
+
489
+ Returns:
490
+ Dictionary with graph statistics
491
+ """
492
+ return self.graph.get_statistics()
493
+
494
+ def get_summary(self) -> dict[str, Any]:
495
+ """Get a human-readable summary of the graph.
496
+
497
+ Returns:
498
+ Dictionary with graph summary
499
+ """
500
+ stats = self.graph.get_statistics()
501
+
502
+ # Get top node types
503
+ node_types = stats["node_types"]
504
+ top_node_types = sorted(node_types.items(), key=lambda x: x[1], reverse=True)[:5]
505
+
506
+ # Get top edge types
507
+ edge_types = stats["edge_types"]
508
+ top_edge_types = sorted(edge_types.items(), key=lambda x: x[1], reverse=True)[:5]
509
+
510
+ return {
511
+ "name": self.name,
512
+ "total_nodes": stats["basic"]["nodes"],
513
+ "total_edges": stats["basic"]["edges"],
514
+ "total_hyperedges": stats["basic"]["hyperedges"],
515
+ "top_node_types": top_node_types,
516
+ "top_edge_types": top_edge_types,
517
+ "connectivity": stats["connectivity"],
518
+ }
519
+
520
+ # ==================== SCHEMA AND ONTOLOGY SUPPORT ====================
521
+
522
+ def get_schema(self) -> dict[str, Any] | None:
523
+ """Get the current schema configuration.
524
+
525
+ Returns:
526
+ Dictionary representing the schema or None if no schema
527
+ """
528
+ return self.schema
529
+
530
+ def export_schema(self, filepath: str) -> None:
531
+ """Export the current schema to a YAML file.
532
+
533
+ Args:
534
+ filepath: Path to save the schema file
535
+ """
536
+ if self.schema:
537
+ with open(filepath, "w") as f:
538
+ yaml.dump(self.schema, f, default_flow_style=False)
539
+ logger.info(f"Schema exported to {filepath}")
540
+ else:
541
+ logger.warning("No schema to export")
542
+
543
+ def validate_against_schema(self, node_type: str, properties: dict[str, Any]) -> bool:
544
+ """Validate node properties against schema (if available).
545
+
546
+ Args:
547
+ node_type: Type of node to validate
548
+ properties: Properties to validate
549
+
550
+ Returns:
551
+ bool: True if valid, False otherwise
552
+ """
553
+ if not self.schema or node_type not in self.schema:
554
+ return True # No schema or type not in schema, assume valid
555
+
556
+ schema_entry = self.schema[node_type]
557
+ if "properties" not in schema_entry:
558
+ return True # No property constraints
559
+
560
+ required_properties = schema_entry["properties"]
561
+
562
+ # Check if all required properties are present and have correct types
563
+ for prop_name, prop_type in required_properties.items():
564
+ if prop_name not in properties:
565
+ logger.warning(f"Missing required property '{prop_name}' for node type '{node_type}'")
566
+ return False
567
+
568
+ # Check property type
569
+ actual_value = properties[prop_name]
570
+ if not self._validate_property_type(actual_value, prop_type):
571
+ logger.warning(
572
+ f"Property '{prop_name}' has wrong type. Expected {prop_type}, got {type(actual_value).__name__}"
573
+ )
574
+ return False
575
+
576
+ return True
577
+
578
+ def _validate_property_type(self, value: Any, expected_type: str) -> bool:
579
+ """Validate that a property value matches the expected type.
580
+
581
+ Args:
582
+ value: The actual value
583
+ expected_type: The expected type as string (e.g., 'str', 'int', 'float')
584
+
585
+ Returns:
586
+ bool: True if type matches, False otherwise
587
+ """
588
+ type_mapping = {
589
+ "str": str,
590
+ "int": int,
591
+ "float": float,
592
+ "bool": bool,
593
+ "list": list,
594
+ "dict": dict,
595
+ }
596
+
597
+ if expected_type not in type_mapping:
598
+ return True # Unknown type, assume valid
599
+
600
+ expected_python_type = type_mapping[expected_type]
601
+ return isinstance(value, expected_python_type)
602
+
603
+ # ==================== SERIALIZATION ====================
604
+
605
+ def to_json(self) -> str:
606
+ """Export the knowledge graph to JSON format.
607
+
608
+ Returns:
609
+ JSON string representation of the graph
610
+ """
611
+ return self.graph.to_json()
612
+
613
+ def from_json(self, json_data: str) -> None:
614
+ """Import knowledge graph from JSON format.
615
+
616
+ Args:
617
+ json_data: JSON string containing graph data
618
+ """
619
+ data = json.loads(json_data)
620
+ self.graph = Graph.from_dict(data)
621
+ self.name = self.graph.name
622
+
623
+ def save(self, filepath: str) -> None:
624
+ """Save the graph to a file.
625
+
626
+ Args:
627
+ filepath: Path to save the graph
628
+ """
629
+ with open(filepath, "w") as f:
630
+ f.write(self.to_json())
631
+ logger.info(f"Graph saved to {filepath}")
632
+
633
+ def load(self, filepath: str) -> None:
634
+ """Load the graph from a file.
635
+
636
+ Args:
637
+ filepath: Path to load the graph from
638
+ """
639
+ with open(filepath, "r") as f:
640
+ json_data = f.read()
641
+ self.from_json(json_data)
642
+ logger.info(f"Graph loaded from {filepath}")
643
+
644
+ # ==================== UTILITY METHODS ====================
645
+
646
+ def clear(self) -> None:
647
+ """Clear all nodes and edges from the graph."""
648
+ self.graph = Graph(name=self.name, directed=self.graph.directed)
649
+ logger.info("Graph cleared")
650
+
651
+ def copy(self) -> "BioCypherWorkflow":
652
+ """Create a copy of the workflow and its graph.
653
+
654
+ Returns:
655
+ New BioCypherWorkflow instance
656
+ """
657
+ new_workflow = BioCypherWorkflow(
658
+ name=self.name, directed=self.graph.directed, schema=self.schema, head_ontology_url=self.head_ontology_url
659
+ )
660
+ new_workflow.from_json(self.to_json())
661
+ return new_workflow
662
+
663
+ def get_graph(self) -> Graph:
664
+ """Get the underlying Graph object.
665
+
666
+ Returns:
667
+ Graph object
668
+ """
669
+ return self.graph
670
+
671
+ def __len__(self) -> int:
672
+ """Return the number of nodes in the graph."""
673
+ return len(self.graph)
674
+
675
+ def __contains__(self, node_id: str) -> bool:
676
+ """Check if a node exists in the graph."""
677
+ return node_id in self.graph
678
+
679
+ def __str__(self) -> str:
680
+ """String representation of the workflow."""
681
+ stats = self.get_statistics()
682
+ return (
683
+ f"BioCypherWorkflow(name='{self.name}', "
684
+ f"nodes={stats['basic']['nodes']}, edges={stats['basic']['edges']}, "
685
+ f"hyperedges={stats['basic']['hyperedges']})"
686
+ )
687
+
688
+ def __repr__(self) -> str:
689
+ return self.__str__()
690
+
691
+ # ==================== COMPATIBILITY WRAPPER METHODS ====================
692
+
693
+ def to_networkx(self):
694
+ """Convert to NetworkX graph for compatibility with existing tools.
695
+
696
+ Returns:
697
+ networkx.DiGraph: NetworkX representation of the graph
698
+
699
+ Note:
700
+ This method provides compatibility with existing NetworkX-based
701
+ tools while maintaining the native BioCypher object structure.
702
+ Future versions may use this as the primary backend.
703
+ """
704
+ try:
705
+ import networkx as nx
706
+ except ImportError:
707
+ raise ImportError("NetworkX is required for to_networkx() conversion. Install with: pip install networkx")
708
+
709
+ g = nx.DiGraph() if self.graph.directed else nx.Graph()
710
+
711
+ # Add nodes with properties
712
+ for node in self.graph._nodes.values():
713
+ attrs = node.properties.copy()
714
+ attrs["node_type"] = node.type
715
+ g.add_node(node.id, **attrs)
716
+
717
+ # Add edges with properties
718
+ for edge in self.graph._edges.values():
719
+ attrs = edge.properties.copy()
720
+ attrs["edge_type"] = edge.type
721
+ g.add_edge(edge.source, edge.target, **attrs)
722
+
723
+ return g
724
+
725
+ def to_pandas(self):
726
+ """Convert to Pandas DataFrames for compatibility with existing tools.
727
+
728
+ Returns:
729
+ dict[str, pd.DataFrame]: Dictionary of DataFrames, one per node/edge type
730
+
731
+ Note:
732
+ This method provides compatibility with existing Pandas-based
733
+ tools while maintaining the native BioCypher object structure.
734
+ Future versions may use this as the primary backend.
735
+ """
736
+ try:
737
+ import pandas as pd
738
+ except ImportError:
739
+ raise ImportError("Pandas is required for to_pandas() conversion. Install with: pip install pandas")
740
+
741
+ dfs = {}
742
+
743
+ # Create node DataFrames by type
744
+ for node_type, node_ids in self.graph._node_types.items():
745
+ nodes = [self.graph._nodes[node_id] for node_id in node_ids]
746
+ data = []
747
+ for node in nodes:
748
+ row = {"node_id": node.id, "node_type": node.type}
749
+ row.update(node.properties)
750
+ data.append(row)
751
+ dfs[node_type] = pd.DataFrame(data)
752
+
753
+ # Create edge DataFrames by type
754
+ for edge_type, edge_ids in self.graph._edge_types.items():
755
+ edges = [self.graph._edges[edge_id] for edge_id in edge_ids]
756
+ data = []
757
+ for edge in edges:
758
+ row = {"edge_id": edge.id, "edge_type": edge.type, "source_id": edge.source, "target_id": edge.target}
759
+ row.update(edge.properties)
760
+ data.append(row)
761
+ dfs[edge_type] = pd.DataFrame(data)
762
+
763
+ return dfs
764
+
765
+
766
+ # Convenience function for quick workflow creation
767
+ def create_workflow(
768
+ name: str = "knowledge_graph",
769
+ directed: bool = True,
770
+ schema: dict[str, Any] | None = None,
771
+ schema_file: str | None = None,
772
+ head_ontology_url: str | None = None,
773
+ validation_mode: str = "none",
774
+ deduplication: bool = False,
775
+ ) -> BioCypherWorkflow:
776
+ """Create a new knowledge graph workflow.
777
+
778
+ Args:
779
+ name: Name of the knowledge graph
780
+ directed: Whether the graph is directed
781
+ schema: Dictionary defining the knowledge graph schema
782
+ schema_file: Path to YAML schema file
783
+ head_ontology_url: URL to ontology file
784
+ validation_mode: Validation level ("none", "warn", "strict")
785
+ deduplication: Whether to enable deduplication
786
+
787
+ Returns:
788
+ BioCypherWorkflow instance
789
+ """
790
+ return BioCypherWorkflow(
791
+ name=name,
792
+ directed=directed,
793
+ schema=schema,
794
+ schema_file=schema_file,
795
+ head_ontology_url=head_ontology_url,
796
+ validation_mode=validation_mode,
797
+ deduplication=deduplication,
798
+ )