graphitedb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graphite/__init__.py CHANGED
@@ -1,681 +1,734 @@
1
- from __future__ import annotations
2
-
3
- import pickle
4
- import re
5
- from collections import defaultdict
6
- from dataclasses import dataclass, field
7
- from datetime import date, datetime
8
- from enum import Enum
9
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
10
-
11
- # =============== TYPE SYSTEM ===============
12
-
13
- class DataType(Enum):
14
- STRING = "string"
15
- INT = "int"
16
- DATE = "date"
17
- FLOAT = "float"
18
- BOOL = "bool"
19
-
20
- @dataclass
21
- class Field:
22
- name: str
23
- dtype: DataType
24
- default: Any = None
25
-
26
- @dataclass
27
- class NodeType:
28
- name: str
29
- fields: List[Field] = field(default_factory=list)
30
- parent: Optional[NodeType] = None
31
-
32
- def get_all_fields(self) -> List[Field]:
33
- """Get all fields including inherited ones"""
34
- fields = self.fields.copy()
35
- if self.parent:
36
- fields = self.parent.get_all_fields() + fields
37
- return fields
38
-
39
- def __hash__(self):
40
- return hash(self.name)
41
-
42
- @dataclass
43
- class RelationType:
44
- name: str
45
- from_type: str
46
- to_type: str
47
- fields: List[Field] = field(default_factory=list)
48
- reverse_name: Optional[str] = None
49
- is_bidirectional: bool = False
50
-
51
- def __hash__(self):
52
- return hash(self.name)
53
-
54
- # =============== INSTANCES ===============
55
-
56
- @dataclass
57
- class Node:
58
- type_name: str
59
- id: str
60
- values: Dict[str, Any]
61
- _type_ref: Optional[NodeType] = None
62
-
63
- def get(self, field_name: str) -> Any:
64
- return self.values.get(field_name)
65
-
66
- def __getitem__(self, key):
67
- return self.get(key)
68
-
69
- def __repr__(self):
70
- return f"Node({self.type_name}:{self.id})"
71
-
72
- @dataclass
73
- class Relation:
74
- type_name: str
75
- from_node: str # node id
76
- to_node: str # node id
77
- values: Dict[str, Any]
78
- _type_ref: Optional[RelationType] = None
79
-
80
- def get(self, field_name: str) -> Any:
81
- return self.values.get(field_name)
82
-
83
- def __repr__(self):
84
- return f"Relation({self.type_name}:{self.from_node}->{self.to_node})"
85
-
86
- # =============== PARSER ===============
87
-
88
- class GraphiteParser:
89
- """Parser for Graphite DSL"""
90
-
91
- @staticmethod
92
- def parse_node_definition(line: str) -> Tuple[str, List[Field], str]:
93
- """Parse node type definition: 'node Person\nname: string\nage: int'"""
94
- lines = line.strip().split('\n')
95
- first_line = lines[0].strip()
96
-
97
- # Parse inheritance
98
- if ' from ' in first_line:
99
- parts = first_line.split(' from ')
100
- node_name = parts[0].replace('node', '').strip()
101
- parent = parts[1].strip()
102
- fields_start = 1
103
- else:
104
- node_name = first_line.replace('node', '').strip()
105
- parent = None
106
- fields_start = 1
107
-
108
- fields = []
109
- for field_line in lines[fields_start:]:
110
- field_line = field_line.strip()
111
- if not field_line:
112
- continue
113
- name_type = field_line.split(':')
114
- if len(name_type) == 2:
115
- name = name_type[0].strip()
116
- dtype_str = name_type[1].strip()
117
- dtype = DataType(dtype_str)
118
- fields.append(Field(name, dtype))
119
-
120
- return node_name, fields, parent
121
-
122
- @staticmethod
123
- def parse_relation_definition(line: str) -> Tuple[str, str, str, List[Field], Optional[str], bool]:
124
- """Parse relation definition"""
125
- lines = line.strip().split('\n')
126
- first_line = lines[0].strip()
127
-
128
- # Check for 'both' keyword
129
- is_bidirectional = ' both' in first_line
130
- if is_bidirectional:
131
- first_line = first_line.replace(' both', '')
132
-
133
- # Parse reverse
134
- reverse_name = None
135
- if ' reverse ' in first_line:
136
- parts = first_line.split(' reverse ')
137
- relation_name = parts[0].replace('relation', '').strip()
138
- reverse_name = parts[1].strip()
139
- first_line = parts[0]
140
- else:
141
- relation_name = first_line.replace('relation', '').strip()
142
-
143
- # Parse participants
144
- participants_line = lines[1].strip()
145
- if '->' in participants_line:
146
- from_to = participants_line.split('->')
147
- from_type = from_to[0].strip()
148
- to_type = from_to[1].strip()
149
- elif '-' in participants_line:
150
- parts = participants_line.split('-')
151
- from_type = parts[0].strip()
152
- to_type = parts[2].strip() if len(parts) > 2 else parts[1].strip()
153
- else:
154
- raise ValueError(f"Invalid relation format: {participants_line}")
155
-
156
- # Parse fields
157
- fields = []
158
- for field_line in lines[2:]:
159
- field_line = field_line.strip()
160
- if not field_line:
161
- continue
162
- name_type = field_line.split(':')
163
- if len(name_type) == 2:
164
- name = name_type[0].strip()
165
- dtype_str = name_type[1].strip()
166
- dtype = DataType(dtype_str)
167
- fields.append(Field(name, dtype))
168
-
169
- return relation_name, from_type, to_type, fields, reverse_name, is_bidirectional
170
-
171
- @staticmethod
172
- def parse_node_instance(line: str) -> Tuple[str, str, List[Any]]:
173
- """Parse node instance: 'User, user_1, "Joe Doe", 32, "joe4030"'"""
174
- # Handle quoted strings
175
- parts = []
176
- current = ''
177
- in_quotes = False
178
- for char in line:
179
- if char == '"':
180
- in_quotes = not in_quotes
181
- current += char
182
- elif char == ',' and not in_quotes:
183
- parts.append(current.strip())
184
- current = ''
185
- else:
186
- current += char
187
- if current:
188
- parts.append(current.strip())
189
-
190
- node_type = parts[0].strip()
191
- node_id = parts[1].strip()
192
- values = []
193
-
194
- for val in parts[2:]:
195
- val = val.strip()
196
- if val.startswith('"') and val.endswith('"'):
197
- values.append(val[1:-1])
198
- elif val.replace('-', '').isdigit() and '-' in val: # Date-like
199
- values.append(val)
200
- elif val.isdigit() or (val.startswith('-') and val[1:].isdigit()):
201
- values.append(int(val))
202
- elif val.replace('.', '').isdigit() and val.count('.') == 1:
203
- values.append(float(val))
204
- elif val.lower() in ('true', 'false'):
205
- values.append(val.lower() == 'true')
206
- else:
207
- values.append(val)
208
-
209
- return node_type, node_id, values
210
-
211
- @staticmethod
212
- def parse_relation_instance(line: str) -> tuple[str | Any, str | Any, Any, list[Any], str]:
213
- """Parse relation instance: 'user_1 -[OWNER, 2000-10-04]-> notebook'"""
214
- # Extract relation type and attributes
215
- pattern = r'(\w+)\s*(-\[([^\]]+)\]\s*[->-]\s*|\s*[->-]\s*\[([^\]]+)\]\s*->\s*)(\w+)'
216
- match = re.search(pattern, line)
217
- if not match:
218
- raise ValueError(f"Invalid relation format: {line}")
219
-
220
- from_node = match.group(1)
221
- to_node = match.group(5)
222
-
223
- # Get relation type and attributes
224
- rel_part = match.group(3) or match.group(4)
225
- rel_parts = [p.strip() for p in rel_part.split(',')]
226
- rel_type = rel_parts[0]
227
- attributes = rel_parts[1:] if len(rel_parts) > 1 else []
228
-
229
- # Parse direction
230
- if '->' in line:
231
- direction = 'forward'
232
- elif '-[' in line and ']-' in line:
233
- direction = 'bidirectional'
234
- else:
235
- direction = 'forward'
236
-
237
- return from_node, to_node, rel_type, attributes, direction
238
-
239
- # =============== QUERY ENGINE ===============
240
-
241
- class QueryResult:
242
- """Represents a query result that can be chained"""
243
-
244
- def __init__(self, engine: GraphiteEngine, nodes: List[Node], edges: List[Relation] = None):
245
- self.engine = engine
246
- self.nodes = nodes
247
- self.edges = edges or []
248
- self.current_relation: Optional[RelationType] = None
249
- self.direction: str = 'outgoing'
250
-
251
- def where(self, condition: Union[str, Callable]) -> QueryResult:
252
- """Filter nodes based on condition"""
253
- filtered_nodes = []
254
-
255
- if callable(condition):
256
- # Lambda function
257
- for node in self.nodes:
258
- try:
259
- if condition(node):
260
- filtered_nodes.append(node)
261
- except:
262
- continue
263
- else:
264
- # String condition like "age > 18"
265
- for node in self.nodes:
266
- if self._evaluate_condition(node, condition):
267
- filtered_nodes.append(node)
268
-
269
- return QueryResult(self.engine, filtered_nodes, self.edges)
270
-
271
- def _evaluate_condition(self, node: Node, condition: str) -> bool:
272
- """Evaluate a condition string on a node"""
273
- # Simple condition parser
274
- ops = ['>=', '<=', '!=', '==', '>', '<', '=']
275
-
276
- for op in ops:
277
- if op in condition:
278
- left, right = condition.split(op)
279
- left = left.strip()
280
- right = right.strip()
281
-
282
- # Get value from node
283
- node_value = node.get(left)
284
- if node_value is None:
285
- return False
286
-
287
- # Parse right side
288
- if right.startswith('"') and right.endswith('"'):
289
- right_value = right[1:-1]
290
- elif right.isdigit():
291
- right_value = int(right)
292
- elif right.replace('.', '').isdigit() and right.count('.') == 1:
293
- right_value = float(right)
294
- else:
295
- right_value = right
296
-
297
- # Apply operation
298
- if op in ('=', '=='):
299
- return node_value == right_value
300
- elif op == '!=':
301
- return node_value != right_value
302
- elif op == '>':
303
- return node_value > right_value
304
- elif op == '<':
305
- return node_value < right_value
306
- elif op == '>=':
307
- return node_value >= right_value
308
- elif op == '<=':
309
- return node_value <= right_value
310
-
311
- return False
312
-
313
- def traverse(self, relation_type: str, direction: str = 'outgoing') -> QueryResult:
314
- """Traverse relations from current nodes"""
315
- result_nodes = []
316
- result_edges = []
317
-
318
- for node in self.nodes:
319
- if direction == 'outgoing':
320
- edges = self.engine.get_relations_from(node.id, relation_type)
321
- elif direction == 'incoming':
322
- edges = self.engine.get_relations_to(node.id, relation_type)
323
- else: # both
324
- edges = (self.engine.get_relations_from(node.id, relation_type) +
325
- self.engine.get_relations_to(node.id, relation_type))
326
-
327
- for edge in edges:
328
- result_edges.append(edge)
329
- target_id = edge.to_node if direction == 'outgoing' else edge.from_node
330
- target_node = self.engine.get_node(target_id)
331
- if target_node:
332
- result_nodes.append(target_node)
333
-
334
- # Remove duplicates
335
- result_nodes = list(dict((n.id, n) for n in result_nodes).values())
336
- return QueryResult(self.engine, result_nodes, result_edges)
337
-
338
- def outgoing(self, relation_type: str) -> QueryResult:
339
- """Traverse outgoing relations"""
340
- return self.traverse(relation_type, 'outgoing')
341
-
342
- def incoming(self, relation_type: str) -> QueryResult:
343
- """Traverse incoming relations"""
344
- return self.traverse(relation_type, 'incoming')
345
-
346
- def both(self, relation_type: str) -> QueryResult:
347
- """Traverse both directions"""
348
- return self.traverse(relation_type, 'both')
349
-
350
- def limit(self, n: int) -> QueryResult:
351
- """Limit number of results"""
352
- return QueryResult(self.engine, self.nodes[:n], self.edges[:n])
353
-
354
- def distinct(self) -> QueryResult:
355
- """Get distinct nodes"""
356
- seen = set()
357
- distinct_nodes = []
358
- for node in self.nodes:
359
- if node.id not in seen:
360
- seen.add(node.id)
361
- distinct_nodes.append(node)
362
- return QueryResult(self.engine, distinct_nodes, self.edges)
363
-
364
- def order_by(self, field: str, descending: bool = False) -> QueryResult:
365
- """Order nodes by field"""
366
-
367
- def get_key(node):
368
- val = node.get(field)
369
- return (val is None, val)
370
-
371
- sorted_nodes = sorted(self.nodes, key=get_key, reverse=descending)
372
- return QueryResult(self.engine, sorted_nodes, self.edges)
373
-
374
- def count(self) -> int:
375
- """Count nodes"""
376
- return len(self.nodes)
377
-
378
- def get(self) -> List[Node]:
379
- """Get all nodes"""
380
- return self.nodes
381
-
382
- def first(self) -> Optional[Node]:
383
- """Get first node"""
384
- return self.nodes[0] if self.nodes else None
385
-
386
- def ids(self) -> List[str]:
387
- """Get node IDs"""
388
- return [n.id for n in self.nodes]
389
-
390
- class QueryBuilder:
391
- """Builder for creating queries"""
392
-
393
- def __init__(self, engine: GraphiteEngine):
394
- self.engine = engine
395
-
396
- def __getattr__(self, name: str) -> QueryResult:
397
- """Allow starting query from node type: engine.User"""
398
- if name in self.engine.node_types:
399
- nodes = self.engine.get_nodes_of_type(name)
400
- return QueryResult(self.engine, nodes)
401
- raise AttributeError(f"No node type '{name}' found")
402
-
403
- # =============== MAIN ENGINE ===============
404
-
405
- class GraphiteEngine:
406
- """Main graph database engine"""
407
-
408
- def __init__(self):
409
- self.node_types: Dict[str, NodeType] = {}
410
- self.relation_types: Dict[str, RelationType] = {}
411
- self.nodes: Dict[str, Node] = {}
412
- self.relations: List[Relation] = []
413
- self.node_by_type: Dict[str, List[Node]] = defaultdict(list)
414
- self.relations_by_type: Dict[str, List[Relation]] = defaultdict(list)
415
- self.relations_by_from: Dict[str, List[Relation]] = defaultdict(list)
416
- self.relations_by_to: Dict[str, List[Relation]] = defaultdict(list)
417
- self.parser = GraphiteParser()
418
- self.query = QueryBuilder(self)
419
-
420
- # =============== SCHEMA DEFINITION ===============
421
-
422
- def define_node(self, definition: str):
423
- """Define a node type from DSL"""
424
- node_name, fields, parent_name = self.parser.parse_node_definition(definition)
425
-
426
- parent = None
427
- if parent_name:
428
- if parent_name not in self.node_types:
429
- raise ValueError(f"Parent node type '{parent_name}' not found")
430
- parent = self.node_types[parent_name]
431
-
432
- node_type = NodeType(node_name, fields, parent)
433
- self.node_types[node_name] = node_type
434
-
435
- def define_relation(self, definition: str):
436
- """Define a relation type from DSL"""
437
- (rel_name, from_type, to_type, fields,
438
- reverse_name, is_bidirectional) = self.parser.parse_relation_definition(definition)
439
-
440
- # Validate node types exist
441
- if from_type not in self.node_types:
442
- raise ValueError(f"Node type '{from_type}' not found")
443
- if to_type not in self.node_types:
444
- raise ValueError(f"Node type '{to_type}' not found")
445
-
446
- rel_type = RelationType(
447
- rel_name, from_type, to_type,
448
- fields, reverse_name, is_bidirectional
449
- )
450
- self.relation_types[rel_name] = rel_type
451
-
452
- # Register reverse relation if specified
453
- if reverse_name:
454
- reverse_rel = RelationType(
455
- reverse_name, to_type, from_type,
456
- fields, rel_name, is_bidirectional
457
- )
458
- self.relation_types[reverse_name] = reverse_rel
459
-
460
- # =============== DATA MANIPULATION ===============
461
-
462
- def create_node(self, node_type: str, node_id: str, *values) -> Node:
463
- """Create a node instance"""
464
- if node_type not in self.node_types:
465
- raise ValueError(f"Node type '{node_type}' not defined")
466
-
467
- node_type_obj = self.node_types[node_type]
468
- all_fields = node_type_obj.get_all_fields()
469
-
470
- if len(values) != len(all_fields):
471
- raise ValueError(f"Expected {len(all_fields)} values, got {len(values)}")
472
-
473
- # Create values dictionary
474
- node_values = {}
475
- for field, value in zip(all_fields, values):
476
- # Convert string dates to date objects
477
- if field.dtype == DataType.DATE and isinstance(value, str):
478
- try:
479
- value = datetime.strptime(value, "%Y-%m-%d").date()
480
- except:
481
- pass
482
- node_values[field.name] = value
483
-
484
- node = Node(node_type, node_id, node_values, node_type_obj)
485
- self.nodes[node_id] = node
486
- self.node_by_type[node_type].append(node)
487
- return node
488
-
489
- def create_relation(self, from_id: str, to_id: str, rel_type: str, *values) -> Relation:
490
- """Create a relation instance"""
491
- if rel_type not in self.relation_types:
492
- raise ValueError(f"Relation type '{rel_type}' not defined")
493
-
494
- rel_type_obj = self.relation_types[rel_type]
495
-
496
- # Check if nodes exist
497
- if from_id not in self.nodes:
498
- raise ValueError(f"Node '{from_id}' not found")
499
- if to_id not in self.nodes:
500
- raise ValueError(f"Node '{to_id}' not found")
501
-
502
- # Create values dictionary
503
- rel_values = {}
504
- for i, field in enumerate(rel_type_obj.fields):
505
- if i < len(values):
506
- value = values[i]
507
- if field.dtype == DataType.DATE and isinstance(value, str):
508
- try:
509
- value = datetime.strptime(value, "%Y-%m-%d").date()
510
- except:
511
- pass
512
- rel_values[field.name] = value
513
-
514
- relation = Relation(rel_type, from_id, to_id, rel_values, rel_type_obj)
515
- self.relations.append(relation)
516
- self.relations_by_type[rel_type].append(relation)
517
- self.relations_by_from[from_id].append(relation)
518
- self.relations_by_to[to_id].append(relation)
519
-
520
- # If relation is bidirectional, create reverse automatically
521
- if rel_type_obj.is_bidirectional:
522
- reverse_rel = Relation(rel_type, to_id, from_id, rel_values, rel_type_obj)
523
- self.relations.append(reverse_rel)
524
- self.relations_by_type[rel_type].append(reverse_rel)
525
- self.relations_by_from[to_id].append(reverse_rel)
526
- self.relations_by_to[from_id].append(reverse_rel)
527
-
528
- return relation
529
-
530
- # =============== QUERY METHODS ===============
531
-
532
- def get_node(self, node_id: str) -> Optional[Node]:
533
- """Get node by ID"""
534
- return self.nodes.get(node_id)
535
-
536
- def get_nodes_of_type(self, node_type: str) -> List[Node]:
537
- """Get all nodes of a specific type"""
538
- return self.node_by_type.get(node_type, [])
539
-
540
- def get_relations_from(self, node_id: str, rel_type: str = None) -> List[Relation]:
541
- """Get relations from a node"""
542
- all_rels = self.relations_by_from.get(node_id, [])
543
- if rel_type:
544
- return [r for r in all_rels if r.type_name == rel_type]
545
- return all_rels
546
-
547
- def get_relations_to(self, node_id: str, rel_type: str = None) -> List[Relation]:
548
- """Get relations to a node"""
549
- all_rels = self.relations_by_to.get(node_id, [])
550
- if rel_type:
551
- return [r for r in all_rels if r.type_name == rel_type]
552
- return all_rels
553
-
554
- # =============== BULK LOADING ===============
555
-
556
- def load_dsl(self, dsl: str):
557
- """Load Graphite DSL"""
558
- lines = dsl.strip().split('\n')
559
- i = 0
560
-
561
- while i < len(lines):
562
- line = lines[i].strip()
563
- if not line or line.startswith('#'):
564
- i += 1
565
- continue
566
-
567
- if line.startswith('node'):
568
- # Collect multiline node definition
569
- node_def = [line]
570
- i += 1
571
- while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith(('node', 'relation')):
572
- node_def.append(lines[i])
573
- i += 1
574
- self.define_node('\n'.join(node_def))
575
-
576
- elif line.startswith('relation'):
577
- # Collect multiline relation definition
578
- rel_def = [line]
579
- i += 1
580
- while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith(('node', 'relation')):
581
- rel_def.append(lines[i])
582
- i += 1
583
- self.define_relation('\n'.join(rel_def))
584
-
585
- elif ',' in line and not line.startswith(('-', '[')):
586
- # Node instance
587
- node_type, node_id, values = self.parser.parse_node_instance(line)
588
- self.create_node(node_type, node_id, *values)
589
- i += 1
590
-
591
- elif ('-[' in line or '->' in line) and ']' in line:
592
- # Relation instance
593
- from_id, to_id, rel_type, values, direction = self.parser.parse_relation_instance(line)
594
- self.create_relation(from_id, to_id, rel_type, *values)
595
- i += 1
596
- else:
597
- i += 1
598
-
599
- # =============== PERSISTENCE ===============
600
-
601
- def save(self, filename: str):
602
- """Save database to file"""
603
- with open(filename, 'wb') as f:
604
- data = {
605
- 'node_types' : self.node_types,
606
- 'relation_types' : self.relation_types,
607
- 'nodes' : self.nodes,
608
- 'relations' : self.relations,
609
- 'node_by_type' : self.node_by_type,
610
- 'relations_by_type': self.relations_by_type,
611
- 'relations_by_from': self.relations_by_from,
612
- 'relations_by_to' : self.relations_by_to,
613
- }
614
- # noinspection PyTypeChecker
615
- pickle.dump(data, f)
616
-
617
- def load(self, filename: str):
618
- """Load database from file"""
619
- with open(filename, 'rb') as f:
620
- data = pickle.load(f)
621
- self.node_types = data['node_types']
622
- self.relation_types = data['relation_types']
623
- self.nodes = data['nodes']
624
- self.relations = data['relations']
625
- self.node_by_type = data['node_by_type']
626
- self.relations_by_type = data['relations_by_type']
627
- self.relations_by_from = data['relations_by_from']
628
- self.relations_by_to = data['relations_by_to']
629
-
630
- # =============== UTILITY METHODS ===============
631
-
632
- def clear(self):
633
- """Clear all data"""
634
- self.node_types.clear()
635
- self.relation_types.clear()
636
- self.nodes.clear()
637
- self.relations.clear()
638
- self.node_by_type.clear()
639
- self.relations_by_type.clear()
640
- self.relations_by_from.clear()
641
- self.relations_by_to.clear()
642
-
643
- def stats(self) -> Dict[str, Any]:
644
- """Get database statistics"""
645
- return {
646
- 'node_types' : len(self.node_types),
647
- 'relation_types': len(self.relation_types),
648
- 'nodes' : len(self.nodes),
649
- 'relations' : len(self.relations),
650
- }
651
-
652
- # =============== SYNTAX SUGAR ===============
653
-
654
- def node(node_type: str, **fields) -> str:
655
- """Helper function to create node definitions"""
656
- lines = [f"node {node_type}"]
657
- for field_name, field_type in fields.items():
658
- lines.append(f"{field_name}: {field_type}")
659
- return "\n".join(lines)
660
-
661
- def relation(name: str, from_type: str, to_type: str, **kwargs) -> str:
662
- """Helper function to create relation definitions"""
663
- lines = [f"relation {name}"]
664
- if kwargs.get('both'):
665
- lines[0] += " both"
666
- if kwargs.get('reverse'):
667
- lines[0] += f" reverse {kwargs['reverse']}"
668
-
669
- direction = "->" if not kwargs.get('both') else "-"
670
- lines.append(f"{from_type} {direction} {to_type}")
671
-
672
- for field_name, field_type in kwargs.get('fields', {}).items():
673
- lines.append(f"{field_name}: {field_type}")
674
-
675
- return "\n".join(lines)
676
-
677
- # ================ PUBLIC API ================
678
-
679
- def engine() -> GraphiteEngine:
680
- """Create graphite engine instance"""
681
- return GraphiteEngine()
1
+ """
2
+ Graphite: A clean, embedded graph database engine for Python.
3
+
4
+ This is graphite module (installation: ``pip install graphitedb``).
5
+ You can use it with ``import graphite``.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import pickle
10
+ import re
11
+ from collections import defaultdict
12
+ from dataclasses import dataclass, field
13
+ from datetime import date, datetime
14
+ from enum import Enum
15
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
16
+
17
+ # =============== TYPE SYSTEM ===============
18
+
19
+ class DataType(Enum):
20
+ """
21
+ Valid data types in graphite. Used in nodes and relations properties.
22
+ """
23
+ STRING = "string"
24
+ INT = "int"
25
+ DATE = "date"
26
+ FLOAT = "float"
27
+ BOOL = "bool"
28
+
29
+ @dataclass
30
+ class Field:
31
+ """
32
+ A data field (property) for nodes and relations.
33
+ """
34
+ name: str
35
+ dtype: DataType
36
+ default: Any = None
37
+
38
+ @dataclass
39
+ class NodeType:
40
+ """
41
+ A defined node type (with ``node ...`` block in dsl or ``GraphiteEngine.define_node()``).
42
+ Each node type has a name (in snake_case usually), and optional list of fields (properties).
43
+ Supports optional parent node type.
44
+ """
45
+ name: str
46
+ fields: List[Field] = field(default_factory=list)
47
+ parent: Optional[NodeType] = None
48
+
49
+ def get_all_fields(self) -> List[Field]:
50
+ """Get all fields including inherited ones"""
51
+ fields = self.fields.copy()
52
+ if self.parent:
53
+ fields = self.parent.get_all_fields() + fields
54
+ return fields
55
+
56
+ def __hash__(self):
57
+ return hash(self.name)
58
+
59
+ @dataclass
60
+ class RelationType:
61
+ """
62
+ A defined relation type (with ``relation ...`` block in dsl or
63
+ ``GraphiteEngine.define_relation()``). Each relation type has a name (in UPPER_SNAKE_CASE
64
+ usually), and optional list of fields (properties). A relation type can be from one node
65
+ type to another.
66
+ """
67
+ name: str
68
+ from_type: str
69
+ to_type: str
70
+ fields: List[Field] = field(default_factory=list)
71
+ reverse_name: Optional[str] = None
72
+ is_bidirectional: bool = False
73
+
74
+ def __hash__(self):
75
+ return hash(self.name)
76
+
77
+ # =============== INSTANCES ===============
78
+
79
+ @dataclass
80
+ class Node:
81
+ """
82
+ A node in database. Has a base type, id, and properties from base type (and it's parent
83
+ type recursively).
84
+ """
85
+ type_name: str
86
+ id: str
87
+ values: Dict[str, Any]
88
+ _type_ref: Optional[NodeType] = None
89
+
90
+ def get(self, field_name: str) -> Any:
91
+ """Get a field from this node."""
92
+ return self.values.get(field_name)
93
+
94
+ def __getitem__(self, key):
95
+ return self.get(key)
96
+
97
+ def __repr__(self):
98
+ return f"Node({self.type_name}:{self.id})"
99
+
100
+ @dataclass
101
+ class Relation:
102
+ """
103
+ A relation between two nodes in database. Has a base type, source and target node IDs,
104
+ and properties from base type.
105
+ """
106
+ type_name: str
107
+ from_node: str # node id
108
+ to_node: str # node id
109
+ values: Dict[str, Any]
110
+ _type_ref: Optional[RelationType] = None
111
+
112
+ def get(self, field_name: str) -> Any:
113
+ """Get a field from this relation."""
114
+ return self.values.get(field_name)
115
+
116
+ def __repr__(self):
117
+ return f"Relation({self.type_name}:{self.from_node}->{self.to_node})"
118
+
119
+ # =============== PARSER ===============
120
+
121
+ class GraphiteParser:
122
+ """Parser for Graphite DSL"""
123
+
124
+ @staticmethod
125
+ def parse_node_definition(line: str) -> Tuple[str, List[Field], str]:
126
+ """Parse node type definition: 'node Person\nname: string\nage: int'"""
127
+ lines = line.strip().split('\n')
128
+ first_line = lines[0].strip()
129
+
130
+ # Parse inheritance
131
+ if ' from ' in first_line:
132
+ parts = first_line.split(' from ')
133
+ node_name = parts[0].replace('node', '').strip()
134
+ parent = parts[1].strip()
135
+ fields_start = 1
136
+ else:
137
+ node_name = first_line.replace('node', '').strip()
138
+ parent = None
139
+ fields_start = 1
140
+
141
+ fields = []
142
+ for field_line in lines[fields_start:]:
143
+ field_line = field_line.strip()
144
+ if not field_line:
145
+ continue
146
+ name_type = field_line.split(':')
147
+ if len(name_type) == 2:
148
+ name = name_type[0].strip()
149
+ dtype_str = name_type[1].strip()
150
+ dtype = DataType(dtype_str)
151
+ fields.append(Field(name, dtype))
152
+
153
+ return node_name, fields, parent
154
+
155
+ # pylint: disable=too-many-locals
156
+ @staticmethod
157
+ def parse_relation_definition(line: str) -> Tuple[str, str, str, List[Field], Optional[str], bool]:
158
+ """Parse relation definition"""
159
+ lines = line.strip().split('\n')
160
+ first_line = lines[0].strip()
161
+
162
+ # Check for 'both' keyword
163
+ is_bidirectional = ' both' in first_line
164
+ if is_bidirectional:
165
+ first_line = first_line.replace(' both', '')
166
+
167
+ # Parse reverse
168
+ reverse_name = None
169
+ if ' reverse ' in first_line:
170
+ parts = first_line.split(' reverse ')
171
+ relation_name = parts[0].replace('relation', '').strip()
172
+ reverse_name = parts[1].strip()
173
+ first_line = parts[0]
174
+ else:
175
+ relation_name = first_line.replace('relation', '').strip()
176
+
177
+ # Parse participants
178
+ participants_line = lines[1].strip()
179
+ if '->' in participants_line:
180
+ from_to = participants_line.split('->')
181
+ from_type = from_to[0].strip()
182
+ to_type = from_to[1].strip()
183
+ elif '-' in participants_line:
184
+ parts = participants_line.split('-')
185
+ from_type = parts[0].strip()
186
+ to_type = parts[2].strip() if len(parts) > 2 else parts[1].strip()
187
+ else:
188
+ raise ValueError(f"Invalid relation format: {participants_line}")
189
+
190
+ # Parse fields
191
+ fields = []
192
+ for field_line in lines[2:]:
193
+ field_line = field_line.strip()
194
+ if not field_line:
195
+ continue
196
+ name_type = field_line.split(':')
197
+ if len(name_type) == 2:
198
+ name = name_type[0].strip()
199
+ dtype_str = name_type[1].strip()
200
+ dtype = DataType(dtype_str)
201
+ fields.append(Field(name, dtype))
202
+
203
+ return relation_name, from_type, to_type, fields, reverse_name, is_bidirectional
204
+
205
+ @staticmethod
206
+ def parse_node_instance(line: str) -> Tuple[str, str, List[Any]]:
207
+ """Parse node instance: 'User, user_1, "Joe Doe", 32, "joe4030"'"""
208
+ # Handle quoted strings
209
+ parts = []
210
+ current = ''
211
+ in_quotes = False
212
+ for char in line:
213
+ if char == '"':
214
+ in_quotes = not in_quotes
215
+ current += char
216
+ elif char == ',' and not in_quotes:
217
+ parts.append(current.strip())
218
+ current = ''
219
+ else:
220
+ current += char
221
+ if current:
222
+ parts.append(current.strip())
223
+
224
+ node_type = parts[0].strip()
225
+ node_id = parts[1].strip()
226
+ values = []
227
+
228
+ for val in parts[2:]:
229
+ val = val.strip()
230
+ if val.startswith('"') and val.endswith('"'):
231
+ values.append(val[1:-1])
232
+ elif val.replace('-', '').isdigit() and '-' in val: # Date-like
233
+ values.append(val)
234
+ elif val.isdigit() or (val.startswith('-') and val[1:].isdigit()):
235
+ values.append(int(val))
236
+ elif val.replace('.', '').isdigit() and val.count('.') == 1:
237
+ values.append(float(val))
238
+ elif val.lower() in ('true', 'false'):
239
+ values.append(val.lower() == 'true')
240
+ else:
241
+ values.append(val)
242
+
243
+ return node_type, node_id, values
244
+
245
+ @staticmethod
246
+ def parse_relation_instance(line: str) -> tuple[str | Any, str | Any, Any, list[Any], str]:
247
+ """Parse relation instance: 'user_1 -[OWNER, 2000-10-04]-> notebook'"""
248
+ # Extract relation type and attributes
249
+ pattern = r'(\w+)\s*(-\[([^\]]+)\]\s*[->-]\s*|\s*[->-]\s*\[([^\]]+)\]\s*->\s*)(\w+)'
250
+ match = re.search(pattern, line)
251
+ if not match:
252
+ raise ValueError(f"Invalid relation format: {line}")
253
+
254
+ from_node = match.group(1)
255
+ to_node = match.group(5)
256
+
257
+ # Get relation type and attributes
258
+ rel_part = match.group(3) or match.group(4)
259
+ rel_parts = [p.strip() for p in rel_part.split(',')]
260
+ rel_type = rel_parts[0]
261
+ attributes = rel_parts[1:] if len(rel_parts) > 1 else []
262
+
263
+ # Parse direction
264
+ if '->' in line:
265
+ direction = 'forward'
266
+ elif '-[' in line and ']-' in line:
267
+ direction = 'bidirectional'
268
+ else:
269
+ direction = 'forward'
270
+
271
+ return from_node, to_node, rel_type, attributes, direction
272
+
273
+ # =============== QUERY ENGINE ===============
274
+
275
+ class QueryResult:
276
+ """Represents a query result that can be chained"""
277
+
278
+ def __init__(self, graph_engine: GraphiteEngine, nodes: List[Node], edges: List[Relation] = None):
279
+ self.engine = graph_engine
280
+ self.nodes = nodes
281
+ self.edges = edges or []
282
+ self.current_relation: Optional[RelationType] = None
283
+ self.direction: str = 'outgoing'
284
+
285
+ def where(self, condition: Union[str, Callable]) -> QueryResult:
286
+ """Filter nodes based on condition"""
287
+ filtered_nodes = []
288
+
289
+ if callable(condition):
290
+ # Lambda function
291
+ for processing_node in self.nodes:
292
+ try:
293
+ if condition(processing_node):
294
+ filtered_nodes.append(processing_node)
295
+ except Exception as e: # pylint: disable=broad-exception-caught
296
+ print(f"Graphite Warn: 'where' condition failed for node {processing_node}: {e}")
297
+ else:
298
+ # String condition like "age > 18"
299
+ for processing_node in self.nodes:
300
+ if self._evaluate_condition(processing_node, condition):
301
+ filtered_nodes.append(processing_node)
302
+
303
+ return QueryResult(self.engine, filtered_nodes, self.edges)
304
+
305
+ # pylint: disable=too-many-branches
306
+ def _evaluate_condition(self, target_node: Node, condition: str) -> bool:
307
+ """Evaluate a condition string on a node"""
308
+ # Simple condition parser
309
+ ops = ['>=', '<=', '!=', '==', '>', '<', '=']
310
+
311
+ for op in ops:
312
+ if op in condition:
313
+ left, right = condition.split(op)
314
+ left = left.strip()
315
+ right = right.strip()
316
+
317
+ # Get value from node
318
+ node_value = target_node.get(left)
319
+ if node_value is None:
320
+ return False
321
+
322
+ # Parse right side
323
+ if right.startswith('"') and right.endswith('"'):
324
+ right_value = right[1:-1]
325
+ elif right.isdigit():
326
+ right_value = int(right)
327
+ elif right.replace('.', '').isdigit() and right.count('.') == 1:
328
+ right_value = float(right)
329
+ else:
330
+ right_value = right
331
+
332
+ # Apply operation
333
+ result = None
334
+ if op in ('=', '=='):
335
+ result = node_value == right_value
336
+ if op == '!=':
337
+ result = node_value != right_value
338
+ if op == '>':
339
+ result = node_value > right_value
340
+ if op == '<':
341
+ result = node_value < right_value
342
+ if op == '>=':
343
+ result = node_value >= right_value
344
+ if op == '<=':
345
+ result = node_value <= right_value
346
+ if result is None:
347
+ raise ValueError(f"Invalid condition string: {condition}")
348
+ return result
349
+
350
+ return False
351
+
352
+ def traverse(self, relation_type: str, direction: str = 'outgoing') -> QueryResult:
353
+ """Traverse relations from current nodes"""
354
+ result_nodes = []
355
+ result_edges = []
356
+
357
+ for processing_node in self.nodes:
358
+ if direction == 'outgoing':
359
+ edges = self.engine.get_relations_from(processing_node.id, relation_type)
360
+ elif direction == 'incoming':
361
+ edges = self.engine.get_relations_to(processing_node.id, relation_type)
362
+ else: # both
363
+ edges = (self.engine.get_relations_from(processing_node.id, relation_type) +
364
+ self.engine.get_relations_to(processing_node.id, relation_type))
365
+
366
+ for edge in edges:
367
+ result_edges.append(edge)
368
+ target_id = edge.to_node if direction == 'outgoing' else edge.from_node
369
+ target_node = self.engine.get_node(target_id)
370
+ if target_node:
371
+ result_nodes.append(target_node)
372
+
373
+ # Remove duplicates
374
+ result_nodes = list(dict((n.id, n) for n in result_nodes).values())
375
+ return QueryResult(self.engine, result_nodes, result_edges)
376
+
377
+ def outgoing(self, relation_type: str) -> QueryResult:
378
+ """Traverse outgoing relations"""
379
+ return self.traverse(relation_type, 'outgoing')
380
+
381
+ def incoming(self, relation_type: str) -> QueryResult:
382
+ """Traverse incoming relations"""
383
+ return self.traverse(relation_type, 'incoming')
384
+
385
+ def both(self, relation_type: str) -> QueryResult:
386
+ """Traverse both directions"""
387
+ return self.traverse(relation_type, 'both')
388
+
389
+ def limit(self, n: int) -> QueryResult:
390
+ """Limit number of results"""
391
+ return QueryResult(self.engine, self.nodes[:n], self.edges[:n])
392
+
393
+ def distinct(self) -> QueryResult:
394
+ """Get distinct nodes"""
395
+ seen = set()
396
+ distinct_nodes = []
397
+ for processing_node in self.nodes:
398
+ if processing_node.id not in seen:
399
+ seen.add(processing_node.id)
400
+ distinct_nodes.append(processing_node)
401
+ return QueryResult(self.engine, distinct_nodes, self.edges)
402
+
403
+ def order_by(self, by_field: str, descending: bool = False) -> QueryResult:
404
+ """Order nodes by field"""
405
+
406
+ def get_key(from_node):
407
+ val = from_node.get(by_field)
408
+ return (val is None, val)
409
+
410
+ sorted_nodes = sorted(self.nodes, key=get_key, reverse=descending)
411
+ return QueryResult(self.engine, sorted_nodes, self.edges)
412
+
413
+ def count(self) -> int:
414
+ """Count nodes"""
415
+ return len(self.nodes)
416
+
417
+ def get(self) -> List[Node]:
418
+ """Get all nodes"""
419
+ return self.nodes
420
+
421
+ def first(self) -> Optional[Node]:
422
+ """Get first node"""
423
+ return self.nodes[0] if self.nodes else None
424
+
425
+ def ids(self) -> List[str]:
426
+ """Get node IDs"""
427
+ return [n.id for n in self.nodes]
428
+
429
+ class QueryBuilder: # pylint: disable=too-few-public-methods
430
+ """Builder for creating queries"""
431
+
432
+ def __init__(self, graphite_engine: GraphiteEngine):
433
+ self.engine = graphite_engine
434
+
435
+ def __getattr__(self, name: str) -> QueryResult:
436
+ """Allow starting query from node type: engine.User"""
437
+ if name in self.engine.node_types:
438
+ nodes = self.engine.get_nodes_of_type(name)
439
+ return QueryResult(self.engine, nodes)
440
+ raise AttributeError(f"No node type '{name}' found")
441
+
442
+ # =============== MAIN ENGINE ===============
443
+
444
+ class GraphiteEngine: # pylint: disable=too-many-instance-attributes
445
+ """Main graph database engine"""
446
+
447
+ def __init__(self):
448
+ self.node_types: Dict[str, NodeType] = {}
449
+ self.relation_types: Dict[str, RelationType] = {}
450
+ self.nodes: Dict[str, Node] = {}
451
+ self.relations: List[Relation] = []
452
+ self.node_by_type: Dict[str, List[Node]] = defaultdict(list)
453
+ self.relations_by_type: Dict[str, List[Relation]] = defaultdict(list)
454
+ self.relations_by_from: Dict[str, List[Relation]] = defaultdict(list)
455
+ self.relations_by_to: Dict[str, List[Relation]] = defaultdict(list)
456
+ self.parser = GraphiteParser()
457
+ self.query = QueryBuilder(self)
458
+
459
+ # =============== SCHEMA DEFINITION ===============
460
+
461
+ def define_node(self, definition: str):
462
+ """Define a node type from DSL"""
463
+ node_name, fields, parent_name = self.parser.parse_node_definition(definition)
464
+
465
+ parent = None
466
+ if parent_name:
467
+ if parent_name not in self.node_types:
468
+ raise ValueError(f"Parent node type '{parent_name}' not found")
469
+ parent = self.node_types[parent_name]
470
+
471
+ node_type = NodeType(node_name, fields, parent)
472
+ self.node_types[node_name] = node_type
473
+
474
+ def define_relation(self, definition: str):
475
+ """Define a relation type from DSL"""
476
+ (rel_name, from_type, to_type, fields,
477
+ reverse_name, is_bidirectional) = self.parser.parse_relation_definition(definition)
478
+
479
+ # Validate node types exist
480
+ if from_type not in self.node_types:
481
+ raise ValueError(f"Node type '{from_type}' not found")
482
+ if to_type not in self.node_types:
483
+ raise ValueError(f"Node type '{to_type}' not found")
484
+
485
+ rel_type = RelationType(
486
+ rel_name, from_type, to_type,
487
+ fields, reverse_name, is_bidirectional
488
+ )
489
+ self.relation_types[rel_name] = rel_type
490
+
491
+ # Register reverse relation if specified
492
+ if reverse_name:
493
+ reverse_rel = RelationType(
494
+ reverse_name, to_type, from_type,
495
+ fields, rel_name, is_bidirectional
496
+ )
497
+ self.relation_types[reverse_name] = reverse_rel
498
+
499
+ # =============== DATA MANIPULATION ===============
500
+
501
+ def create_node(self, node_type: str, node_id: str, *values) -> Node:
502
+ """Create a node instance"""
503
+ if node_type not in self.node_types:
504
+ raise ValueError(f"Node type '{node_type}' not defined")
505
+
506
+ node_type_obj = self.node_types[node_type]
507
+ all_fields = node_type_obj.get_all_fields()
508
+
509
+ if len(values) != len(all_fields):
510
+ raise ValueError(f"Expected {len(all_fields)} values, got {len(values)}")
511
+
512
+ # Create values dictionary
513
+ node_values = {}
514
+ for current_field, value in zip(all_fields, values):
515
+ # Convert string dates to date objects
516
+ if current_field.dtype == DataType.DATE and isinstance(value, str):
517
+ try:
518
+ value = datetime.strptime(value, "%Y-%m-%d").date()
519
+ except Exception as e:
520
+ raise ValueError(f"'{e}' while parsing date string: {value}") from e
521
+ node_values[current_field.name] = value
522
+
523
+ new_node = Node(node_type, node_id, node_values, node_type_obj)
524
+ self.nodes[node_id] = new_node
525
+ self.node_by_type[node_type].append(new_node)
526
+ return new_node
527
+
528
+ def create_relation(self, from_id: str, to_id: str, rel_type: str, *values) -> Relation:
529
+ """Create a relation instance"""
530
+ if rel_type not in self.relation_types:
531
+ raise ValueError(f"Relation type '{rel_type}' not defined")
532
+
533
+ rel_type_obj = self.relation_types[rel_type]
534
+
535
+ # Check if nodes exist
536
+ if from_id not in self.nodes:
537
+ raise ValueError(f"Node '{from_id}' not found")
538
+ if to_id not in self.nodes:
539
+ raise ValueError(f"Node '{to_id}' not found")
540
+
541
+ # Create values dictionary
542
+ rel_values = {}
543
+ for i, rel_field in enumerate(rel_type_obj.fields):
544
+ if i < len(values):
545
+ value = values[i]
546
+ if rel_field.dtype == DataType.DATE and isinstance(value, str):
547
+ try:
548
+ value = datetime.strptime(value, "%Y-%m-%d").date()
549
+ except Exception as e:
550
+ raise ValueError(f"'{e}' while parsing date string: {value}") from e
551
+ rel_values[rel_field.name] = value
552
+
553
+ new_relation = Relation(rel_type, from_id, to_id, rel_values, rel_type_obj)
554
+ self.relations.append(new_relation)
555
+ self.relations_by_type[rel_type].append(new_relation)
556
+ self.relations_by_from[from_id].append(new_relation)
557
+ self.relations_by_to[to_id].append(new_relation)
558
+
559
+ # If relation is bidirectional, create reverse automatically
560
+ if rel_type_obj.is_bidirectional:
561
+ reverse_rel = Relation(rel_type, to_id, from_id, rel_values, rel_type_obj)
562
+ self.relations.append(reverse_rel)
563
+ self.relations_by_type[rel_type].append(reverse_rel)
564
+ self.relations_by_from[to_id].append(reverse_rel)
565
+ self.relations_by_to[from_id].append(reverse_rel)
566
+
567
+ return new_relation
568
+
569
+ # =============== QUERY METHODS ===============
570
+
571
+ def get_node(self, node_id: str) -> Optional[Node]:
572
+ """Get node by ID"""
573
+ return self.nodes.get(node_id)
574
+
575
+ def get_nodes_of_type(self, node_type: str) -> List[Node]:
576
+ """Get all nodes of a specific type"""
577
+ return self.node_by_type.get(node_type, [])
578
+
579
+ def get_relations_from(self, node_id: str, rel_type: str = None) -> List[Relation]:
580
+ """Get relations from a node"""
581
+ all_rels = self.relations_by_from.get(node_id, [])
582
+ if rel_type:
583
+ return [r for r in all_rels if r.type_name == rel_type]
584
+ return all_rels
585
+
586
+ def get_relations_to(self, node_id: str, rel_type: str = None) -> List[Relation]:
587
+ """Get relations to a node"""
588
+ all_rels = self.relations_by_to.get(node_id, [])
589
+ if rel_type:
590
+ return [r for r in all_rels if r.type_name == rel_type]
591
+ return all_rels
592
+
593
+ # =============== BULK LOADING ===============
594
+
595
+ def load_dsl(self, dsl: str):
596
+ """Load Graphite DSL"""
597
+ lines = dsl.strip().split('\n')
598
+ i = 0
599
+
600
+ while i < len(lines):
601
+ line = lines[i].strip()
602
+ if not line or line.startswith('#'):
603
+ i += 1
604
+ continue
605
+
606
+ if line.startswith('node'):
607
+ # Collect multiline node definition
608
+ node_def = [line]
609
+ i += 1
610
+ while (
611
+ i < len(lines)
612
+ and lines[i].strip()
613
+ and not lines[i].strip().startswith(('node', 'relation'))
614
+ ):
615
+ node_def.append(lines[i])
616
+ i += 1
617
+ self.define_node('\n'.join(node_def))
618
+
619
+ elif line.startswith('relation'):
620
+ # Collect multiline relation definition
621
+ rel_def = [line]
622
+ i += 1
623
+ while (
624
+ i < len(lines)
625
+ and lines[i].strip()
626
+ and not lines[i].strip().startswith(('node', 'relation'))
627
+ ):
628
+ rel_def.append(lines[i])
629
+ i += 1
630
+ self.define_relation('\n'.join(rel_def))
631
+
632
+ elif '[' not in line:
633
+ # Node instance
634
+ node_type, node_id, values = self.parser.parse_node_instance(line)
635
+ self.create_node(node_type, node_id, *values)
636
+ i += 1
637
+
638
+ elif '-[' in line and (']->' in line or ']-' in line):
639
+ # Relation instance
640
+ from_id, to_id, rel_type, values, _ = self.parser.parse_relation_instance(line)
641
+ self.create_relation(from_id, to_id, rel_type, *values)
642
+ i += 1
643
+ else:
644
+ i += 1
645
+
646
+ # =============== PERSISTENCE ===============
647
+
648
+ def save(self, filename: str):
649
+ """Save database to file"""
650
+ with open(filename, 'wb') as f:
651
+ data = {
652
+ 'node_types' : self.node_types,
653
+ 'relation_types' : self.relation_types,
654
+ 'nodes' : self.nodes,
655
+ 'relations' : self.relations,
656
+ 'node_by_type' : self.node_by_type,
657
+ 'relations_by_type': self.relations_by_type,
658
+ 'relations_by_from': self.relations_by_from,
659
+ 'relations_by_to' : self.relations_by_to,
660
+ }
661
+ # noinspection PyTypeChecker
662
+ pickle.dump(data, f)
663
+
664
+ def load(self, filename: str):
665
+ """Load database from file"""
666
+ with open(filename, 'rb') as f:
667
+ data = pickle.load(f)
668
+ self.node_types = data['node_types']
669
+ self.relation_types = data['relation_types']
670
+ self.nodes = data['nodes']
671
+ self.relations = data['relations']
672
+ self.node_by_type = data['node_by_type']
673
+ self.relations_by_type = data['relations_by_type']
674
+ self.relations_by_from = data['relations_by_from']
675
+ self.relations_by_to = data['relations_by_to']
676
+
677
+ # =============== UTILITY METHODS ===============
678
+
679
+ def clear(self):
680
+ """Clear all data"""
681
+ self.node_types.clear()
682
+ self.relation_types.clear()
683
+ self.nodes.clear()
684
+ self.relations.clear()
685
+ self.node_by_type.clear()
686
+ self.relations_by_type.clear()
687
+ self.relations_by_from.clear()
688
+ self.relations_by_to.clear()
689
+
690
+ def stats(self) -> Dict[str, Any]:
691
+ """Get database statistics"""
692
+ return {
693
+ 'node_types' : len(self.node_types),
694
+ 'relation_types': len(self.relation_types),
695
+ 'nodes' : len(self.nodes),
696
+ 'relations' : len(self.relations),
697
+ }
698
+
699
+ # =============== SYNTAX SUGAR ===============
700
+
701
+ def parse(self, data: str):
702
+ """Parse data into nodes and relations (strcuture or data)"""
703
+ self.load_dsl(data)
704
+
705
+ # =============== SYNTAX SUGAR ===============
706
+
707
+ def node(node_type: str, **fields) -> str:
708
+ """Helper function to create node definitions"""
709
+ lines = [f"node {node_type}"]
710
+ for field_name, field_type in fields.items():
711
+ lines.append(f"{field_name}: {field_type}")
712
+ return "\n".join(lines)
713
+
714
+ def relation(name: str, from_type: str, to_type: str, **kwargs) -> str:
715
+ """Helper function to create relation definitions"""
716
+ lines = [f"relation {name}"]
717
+ if kwargs.get('both'):
718
+ lines[0] += " both"
719
+ if kwargs.get('reverse'):
720
+ lines[0] += f" reverse {kwargs['reverse']}"
721
+
722
+ direction = "->" if not kwargs.get('both') else "-"
723
+ lines.append(f"{from_type} {direction} {to_type}")
724
+
725
+ for field_name, field_type in kwargs.get('fields', {}).items():
726
+ lines.append(f"{field_name}: {field_type}")
727
+
728
+ return "\n".join(lines)
729
+
730
+ # ================ PUBLIC API ================
731
+
732
+ def engine() -> GraphiteEngine:
733
+ """Create graphite engine instance"""
734
+ return GraphiteEngine()