graphitedb 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphite/__init__.py +687 -681
- graphitedb-0.1.2.dist-info/METADATA +205 -0
- graphitedb-0.1.2.dist-info/RECORD +7 -0
- {graphitedb-0.1.1.dist-info → graphitedb-0.1.2.dist-info}/licenses/LICENSE +21 -21
- graphitedb-0.1.1.dist-info/METADATA +0 -149
- graphitedb-0.1.1.dist-info/RECORD +0 -7
- {graphitedb-0.1.1.dist-info → graphitedb-0.1.2.dist-info}/WHEEL +0 -0
- {graphitedb-0.1.1.dist-info → graphitedb-0.1.2.dist-info}/top_level.txt +0 -0
graphite/__init__.py
CHANGED
|
@@ -1,681 +1,687 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import pickle
|
|
4
|
-
import re
|
|
5
|
-
from collections import defaultdict
|
|
6
|
-
from dataclasses import dataclass, field
|
|
7
|
-
from datetime import date, datetime
|
|
8
|
-
from enum import Enum
|
|
9
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
10
|
-
|
|
11
|
-
# =============== TYPE SYSTEM ===============
|
|
12
|
-
|
|
13
|
-
class DataType(Enum):
|
|
14
|
-
STRING = "string"
|
|
15
|
-
INT = "int"
|
|
16
|
-
DATE = "date"
|
|
17
|
-
FLOAT = "float"
|
|
18
|
-
BOOL = "bool"
|
|
19
|
-
|
|
20
|
-
@dataclass
|
|
21
|
-
class Field:
|
|
22
|
-
name: str
|
|
23
|
-
dtype: DataType
|
|
24
|
-
default: Any = None
|
|
25
|
-
|
|
26
|
-
@dataclass
|
|
27
|
-
class NodeType:
|
|
28
|
-
name: str
|
|
29
|
-
fields: List[Field] = field(default_factory=list)
|
|
30
|
-
parent: Optional[NodeType] = None
|
|
31
|
-
|
|
32
|
-
def get_all_fields(self) -> List[Field]:
|
|
33
|
-
"""Get all fields including inherited ones"""
|
|
34
|
-
fields = self.fields.copy()
|
|
35
|
-
if self.parent:
|
|
36
|
-
fields = self.parent.get_all_fields() + fields
|
|
37
|
-
return fields
|
|
38
|
-
|
|
39
|
-
def __hash__(self):
|
|
40
|
-
return hash(self.name)
|
|
41
|
-
|
|
42
|
-
@dataclass
|
|
43
|
-
class RelationType:
|
|
44
|
-
name: str
|
|
45
|
-
from_type: str
|
|
46
|
-
to_type: str
|
|
47
|
-
fields: List[Field] = field(default_factory=list)
|
|
48
|
-
reverse_name: Optional[str] = None
|
|
49
|
-
is_bidirectional: bool = False
|
|
50
|
-
|
|
51
|
-
def __hash__(self):
|
|
52
|
-
return hash(self.name)
|
|
53
|
-
|
|
54
|
-
# =============== INSTANCES ===============
|
|
55
|
-
|
|
56
|
-
@dataclass
|
|
57
|
-
class Node:
|
|
58
|
-
type_name: str
|
|
59
|
-
id: str
|
|
60
|
-
values: Dict[str, Any]
|
|
61
|
-
_type_ref: Optional[NodeType] = None
|
|
62
|
-
|
|
63
|
-
def get(self, field_name: str) -> Any:
|
|
64
|
-
return self.values.get(field_name)
|
|
65
|
-
|
|
66
|
-
def __getitem__(self, key):
|
|
67
|
-
return self.get(key)
|
|
68
|
-
|
|
69
|
-
def __repr__(self):
|
|
70
|
-
return f"Node({self.type_name}:{self.id})"
|
|
71
|
-
|
|
72
|
-
@dataclass
|
|
73
|
-
class Relation:
|
|
74
|
-
type_name: str
|
|
75
|
-
from_node: str # node id
|
|
76
|
-
to_node: str # node id
|
|
77
|
-
values: Dict[str, Any]
|
|
78
|
-
_type_ref: Optional[RelationType] = None
|
|
79
|
-
|
|
80
|
-
def get(self, field_name: str) -> Any:
|
|
81
|
-
return self.values.get(field_name)
|
|
82
|
-
|
|
83
|
-
def __repr__(self):
|
|
84
|
-
return f"Relation({self.type_name}:{self.from_node}->{self.to_node})"
|
|
85
|
-
|
|
86
|
-
# =============== PARSER ===============
|
|
87
|
-
|
|
88
|
-
class GraphiteParser:
|
|
89
|
-
"""Parser for Graphite DSL"""
|
|
90
|
-
|
|
91
|
-
@staticmethod
|
|
92
|
-
def parse_node_definition(line: str) -> Tuple[str, List[Field], str]:
|
|
93
|
-
"""Parse node type definition: 'node Person\nname: string\nage: int'"""
|
|
94
|
-
lines = line.strip().split('\n')
|
|
95
|
-
first_line = lines[0].strip()
|
|
96
|
-
|
|
97
|
-
# Parse inheritance
|
|
98
|
-
if ' from ' in first_line:
|
|
99
|
-
parts = first_line.split(' from ')
|
|
100
|
-
node_name = parts[0].replace('node', '').strip()
|
|
101
|
-
parent = parts[1].strip()
|
|
102
|
-
fields_start = 1
|
|
103
|
-
else:
|
|
104
|
-
node_name = first_line.replace('node', '').strip()
|
|
105
|
-
parent = None
|
|
106
|
-
fields_start = 1
|
|
107
|
-
|
|
108
|
-
fields = []
|
|
109
|
-
for field_line in lines[fields_start:]:
|
|
110
|
-
field_line = field_line.strip()
|
|
111
|
-
if not field_line:
|
|
112
|
-
continue
|
|
113
|
-
name_type = field_line.split(':')
|
|
114
|
-
if len(name_type) == 2:
|
|
115
|
-
name = name_type[0].strip()
|
|
116
|
-
dtype_str = name_type[1].strip()
|
|
117
|
-
dtype = DataType(dtype_str)
|
|
118
|
-
fields.append(Field(name, dtype))
|
|
119
|
-
|
|
120
|
-
return node_name, fields, parent
|
|
121
|
-
|
|
122
|
-
@staticmethod
|
|
123
|
-
def parse_relation_definition(line: str) -> Tuple[str, str, str, List[Field], Optional[str], bool]:
|
|
124
|
-
"""Parse relation definition"""
|
|
125
|
-
lines = line.strip().split('\n')
|
|
126
|
-
first_line = lines[0].strip()
|
|
127
|
-
|
|
128
|
-
# Check for 'both' keyword
|
|
129
|
-
is_bidirectional = ' both' in first_line
|
|
130
|
-
if is_bidirectional:
|
|
131
|
-
first_line = first_line.replace(' both', '')
|
|
132
|
-
|
|
133
|
-
# Parse reverse
|
|
134
|
-
reverse_name = None
|
|
135
|
-
if ' reverse ' in first_line:
|
|
136
|
-
parts = first_line.split(' reverse ')
|
|
137
|
-
relation_name = parts[0].replace('relation', '').strip()
|
|
138
|
-
reverse_name = parts[1].strip()
|
|
139
|
-
first_line = parts[0]
|
|
140
|
-
else:
|
|
141
|
-
relation_name = first_line.replace('relation', '').strip()
|
|
142
|
-
|
|
143
|
-
# Parse participants
|
|
144
|
-
participants_line = lines[1].strip()
|
|
145
|
-
if '->' in participants_line:
|
|
146
|
-
from_to = participants_line.split('->')
|
|
147
|
-
from_type = from_to[0].strip()
|
|
148
|
-
to_type = from_to[1].strip()
|
|
149
|
-
elif '-' in participants_line:
|
|
150
|
-
parts = participants_line.split('-')
|
|
151
|
-
from_type = parts[0].strip()
|
|
152
|
-
to_type = parts[2].strip() if len(parts) > 2 else parts[1].strip()
|
|
153
|
-
else:
|
|
154
|
-
raise ValueError(f"Invalid relation format: {participants_line}")
|
|
155
|
-
|
|
156
|
-
# Parse fields
|
|
157
|
-
fields = []
|
|
158
|
-
for field_line in lines[2:]:
|
|
159
|
-
field_line = field_line.strip()
|
|
160
|
-
if not field_line:
|
|
161
|
-
continue
|
|
162
|
-
name_type = field_line.split(':')
|
|
163
|
-
if len(name_type) == 2:
|
|
164
|
-
name = name_type[0].strip()
|
|
165
|
-
dtype_str = name_type[1].strip()
|
|
166
|
-
dtype = DataType(dtype_str)
|
|
167
|
-
fields.append(Field(name, dtype))
|
|
168
|
-
|
|
169
|
-
return relation_name, from_type, to_type, fields, reverse_name, is_bidirectional
|
|
170
|
-
|
|
171
|
-
@staticmethod
|
|
172
|
-
def parse_node_instance(line: str) -> Tuple[str, str, List[Any]]:
|
|
173
|
-
"""Parse node instance: 'User, user_1, "Joe Doe", 32, "joe4030"'"""
|
|
174
|
-
# Handle quoted strings
|
|
175
|
-
parts = []
|
|
176
|
-
current = ''
|
|
177
|
-
in_quotes = False
|
|
178
|
-
for char in line:
|
|
179
|
-
if char == '"':
|
|
180
|
-
in_quotes = not in_quotes
|
|
181
|
-
current += char
|
|
182
|
-
elif char == ',' and not in_quotes:
|
|
183
|
-
parts.append(current.strip())
|
|
184
|
-
current = ''
|
|
185
|
-
else:
|
|
186
|
-
current += char
|
|
187
|
-
if current:
|
|
188
|
-
parts.append(current.strip())
|
|
189
|
-
|
|
190
|
-
node_type = parts[0].strip()
|
|
191
|
-
node_id = parts[1].strip()
|
|
192
|
-
values = []
|
|
193
|
-
|
|
194
|
-
for val in parts[2:]:
|
|
195
|
-
val = val.strip()
|
|
196
|
-
if val.startswith('"') and val.endswith('"'):
|
|
197
|
-
values.append(val[1:-1])
|
|
198
|
-
elif val.replace('-', '').isdigit() and '-' in val: # Date-like
|
|
199
|
-
values.append(val)
|
|
200
|
-
elif val.isdigit() or (val.startswith('-') and val[1:].isdigit()):
|
|
201
|
-
values.append(int(val))
|
|
202
|
-
elif val.replace('.', '').isdigit() and val.count('.') == 1:
|
|
203
|
-
values.append(float(val))
|
|
204
|
-
elif val.lower() in ('true', 'false'):
|
|
205
|
-
values.append(val.lower() == 'true')
|
|
206
|
-
else:
|
|
207
|
-
values.append(val)
|
|
208
|
-
|
|
209
|
-
return node_type, node_id, values
|
|
210
|
-
|
|
211
|
-
@staticmethod
|
|
212
|
-
def parse_relation_instance(line: str) -> tuple[str | Any, str | Any, Any, list[Any], str]:
|
|
213
|
-
"""Parse relation instance: 'user_1 -[OWNER, 2000-10-04]-> notebook'"""
|
|
214
|
-
# Extract relation type and attributes
|
|
215
|
-
pattern = r'(\w+)\s*(-\[([^\]]+)\]\s*[->-]\s*|\s*[->-]\s*\[([^\]]+)\]\s*->\s*)(\w+)'
|
|
216
|
-
match = re.search(pattern, line)
|
|
217
|
-
if not match:
|
|
218
|
-
raise ValueError(f"Invalid relation format: {line}")
|
|
219
|
-
|
|
220
|
-
from_node = match.group(1)
|
|
221
|
-
to_node = match.group(5)
|
|
222
|
-
|
|
223
|
-
# Get relation type and attributes
|
|
224
|
-
rel_part = match.group(3) or match.group(4)
|
|
225
|
-
rel_parts = [p.strip() for p in rel_part.split(',')]
|
|
226
|
-
rel_type = rel_parts[0]
|
|
227
|
-
attributes = rel_parts[1:] if len(rel_parts) > 1 else []
|
|
228
|
-
|
|
229
|
-
# Parse direction
|
|
230
|
-
if '->' in line:
|
|
231
|
-
direction = 'forward'
|
|
232
|
-
elif '-[' in line and ']-' in line:
|
|
233
|
-
direction = 'bidirectional'
|
|
234
|
-
else:
|
|
235
|
-
direction = 'forward'
|
|
236
|
-
|
|
237
|
-
return from_node, to_node, rel_type, attributes, direction
|
|
238
|
-
|
|
239
|
-
# =============== QUERY ENGINE ===============
|
|
240
|
-
|
|
241
|
-
class QueryResult:
|
|
242
|
-
"""Represents a query result that can be chained"""
|
|
243
|
-
|
|
244
|
-
def __init__(self, engine: GraphiteEngine, nodes: List[Node], edges: List[Relation] = None):
|
|
245
|
-
self.engine = engine
|
|
246
|
-
self.nodes = nodes
|
|
247
|
-
self.edges = edges or []
|
|
248
|
-
self.current_relation: Optional[RelationType] = None
|
|
249
|
-
self.direction: str = 'outgoing'
|
|
250
|
-
|
|
251
|
-
def where(self, condition: Union[str, Callable]) -> QueryResult:
|
|
252
|
-
"""Filter nodes based on condition"""
|
|
253
|
-
filtered_nodes = []
|
|
254
|
-
|
|
255
|
-
if callable(condition):
|
|
256
|
-
# Lambda function
|
|
257
|
-
for node in self.nodes:
|
|
258
|
-
try:
|
|
259
|
-
if condition(node):
|
|
260
|
-
filtered_nodes.append(node)
|
|
261
|
-
except:
|
|
262
|
-
|
|
263
|
-
else:
|
|
264
|
-
# String condition like "age > 18"
|
|
265
|
-
for node in self.nodes:
|
|
266
|
-
if self._evaluate_condition(node, condition):
|
|
267
|
-
filtered_nodes.append(node)
|
|
268
|
-
|
|
269
|
-
return QueryResult(self.engine, filtered_nodes, self.edges)
|
|
270
|
-
|
|
271
|
-
def _evaluate_condition(self, node: Node, condition: str) -> bool:
|
|
272
|
-
"""Evaluate a condition string on a node"""
|
|
273
|
-
# Simple condition parser
|
|
274
|
-
ops = ['>=', '<=', '!=', '==', '>', '<', '=']
|
|
275
|
-
|
|
276
|
-
for op in ops:
|
|
277
|
-
if op in condition:
|
|
278
|
-
left, right = condition.split(op)
|
|
279
|
-
left = left.strip()
|
|
280
|
-
right = right.strip()
|
|
281
|
-
|
|
282
|
-
# Get value from node
|
|
283
|
-
node_value = node.get(left)
|
|
284
|
-
if node_value is None:
|
|
285
|
-
return False
|
|
286
|
-
|
|
287
|
-
# Parse right side
|
|
288
|
-
if right.startswith('"') and right.endswith('"'):
|
|
289
|
-
right_value = right[1:-1]
|
|
290
|
-
elif right.isdigit():
|
|
291
|
-
right_value = int(right)
|
|
292
|
-
elif right.replace('.', '').isdigit() and right.count('.') == 1:
|
|
293
|
-
right_value = float(right)
|
|
294
|
-
else:
|
|
295
|
-
right_value = right
|
|
296
|
-
|
|
297
|
-
# Apply operation
|
|
298
|
-
if op in ('=', '=='):
|
|
299
|
-
return node_value == right_value
|
|
300
|
-
elif op == '!=':
|
|
301
|
-
return node_value != right_value
|
|
302
|
-
elif op == '>':
|
|
303
|
-
return node_value > right_value
|
|
304
|
-
elif op == '<':
|
|
305
|
-
return node_value < right_value
|
|
306
|
-
elif op == '>=':
|
|
307
|
-
return node_value >= right_value
|
|
308
|
-
elif op == '<=':
|
|
309
|
-
return node_value <= right_value
|
|
310
|
-
|
|
311
|
-
return False
|
|
312
|
-
|
|
313
|
-
def traverse(self, relation_type: str, direction: str = 'outgoing') -> QueryResult:
|
|
314
|
-
"""Traverse relations from current nodes"""
|
|
315
|
-
result_nodes = []
|
|
316
|
-
result_edges = []
|
|
317
|
-
|
|
318
|
-
for node in self.nodes:
|
|
319
|
-
if direction == 'outgoing':
|
|
320
|
-
edges = self.engine.get_relations_from(node.id, relation_type)
|
|
321
|
-
elif direction == 'incoming':
|
|
322
|
-
edges = self.engine.get_relations_to(node.id, relation_type)
|
|
323
|
-
else: # both
|
|
324
|
-
edges = (self.engine.get_relations_from(node.id, relation_type) +
|
|
325
|
-
self.engine.get_relations_to(node.id, relation_type))
|
|
326
|
-
|
|
327
|
-
for edge in edges:
|
|
328
|
-
result_edges.append(edge)
|
|
329
|
-
target_id = edge.to_node if direction == 'outgoing' else edge.from_node
|
|
330
|
-
target_node = self.engine.get_node(target_id)
|
|
331
|
-
if target_node:
|
|
332
|
-
result_nodes.append(target_node)
|
|
333
|
-
|
|
334
|
-
# Remove duplicates
|
|
335
|
-
result_nodes = list(dict((n.id, n) for n in result_nodes).values())
|
|
336
|
-
return QueryResult(self.engine, result_nodes, result_edges)
|
|
337
|
-
|
|
338
|
-
def outgoing(self, relation_type: str) -> QueryResult:
|
|
339
|
-
"""Traverse outgoing relations"""
|
|
340
|
-
return self.traverse(relation_type, 'outgoing')
|
|
341
|
-
|
|
342
|
-
def incoming(self, relation_type: str) -> QueryResult:
|
|
343
|
-
"""Traverse incoming relations"""
|
|
344
|
-
return self.traverse(relation_type, 'incoming')
|
|
345
|
-
|
|
346
|
-
def both(self, relation_type: str) -> QueryResult:
|
|
347
|
-
"""Traverse both directions"""
|
|
348
|
-
return self.traverse(relation_type, 'both')
|
|
349
|
-
|
|
350
|
-
def limit(self, n: int) -> QueryResult:
|
|
351
|
-
"""Limit number of results"""
|
|
352
|
-
return QueryResult(self.engine, self.nodes[:n], self.edges[:n])
|
|
353
|
-
|
|
354
|
-
def distinct(self) -> QueryResult:
|
|
355
|
-
"""Get distinct nodes"""
|
|
356
|
-
seen = set()
|
|
357
|
-
distinct_nodes = []
|
|
358
|
-
for node in self.nodes:
|
|
359
|
-
if node.id not in seen:
|
|
360
|
-
seen.add(node.id)
|
|
361
|
-
distinct_nodes.append(node)
|
|
362
|
-
return QueryResult(self.engine, distinct_nodes, self.edges)
|
|
363
|
-
|
|
364
|
-
def order_by(self, field: str, descending: bool = False) -> QueryResult:
|
|
365
|
-
"""Order nodes by field"""
|
|
366
|
-
|
|
367
|
-
def get_key(node):
|
|
368
|
-
val = node.get(field)
|
|
369
|
-
return (val is None, val)
|
|
370
|
-
|
|
371
|
-
sorted_nodes = sorted(self.nodes, key=get_key, reverse=descending)
|
|
372
|
-
return QueryResult(self.engine, sorted_nodes, self.edges)
|
|
373
|
-
|
|
374
|
-
def count(self) -> int:
|
|
375
|
-
"""Count nodes"""
|
|
376
|
-
return len(self.nodes)
|
|
377
|
-
|
|
378
|
-
def get(self) -> List[Node]:
|
|
379
|
-
"""Get all nodes"""
|
|
380
|
-
return self.nodes
|
|
381
|
-
|
|
382
|
-
def first(self) -> Optional[Node]:
|
|
383
|
-
"""Get first node"""
|
|
384
|
-
return self.nodes[0] if self.nodes else None
|
|
385
|
-
|
|
386
|
-
def ids(self) -> List[str]:
|
|
387
|
-
"""Get node IDs"""
|
|
388
|
-
return [n.id for n in self.nodes]
|
|
389
|
-
|
|
390
|
-
class QueryBuilder:
|
|
391
|
-
"""Builder for creating queries"""
|
|
392
|
-
|
|
393
|
-
def __init__(self, engine: GraphiteEngine):
|
|
394
|
-
self.engine = engine
|
|
395
|
-
|
|
396
|
-
def __getattr__(self, name: str) -> QueryResult:
|
|
397
|
-
"""Allow starting query from node type: engine.User"""
|
|
398
|
-
if name in self.engine.node_types:
|
|
399
|
-
nodes = self.engine.get_nodes_of_type(name)
|
|
400
|
-
return QueryResult(self.engine, nodes)
|
|
401
|
-
raise AttributeError(f"No node type '{name}' found")
|
|
402
|
-
|
|
403
|
-
# =============== MAIN ENGINE ===============
|
|
404
|
-
|
|
405
|
-
class GraphiteEngine:
|
|
406
|
-
"""Main graph database engine"""
|
|
407
|
-
|
|
408
|
-
def __init__(self):
|
|
409
|
-
self.node_types: Dict[str, NodeType] = {}
|
|
410
|
-
self.relation_types: Dict[str, RelationType] = {}
|
|
411
|
-
self.nodes: Dict[str, Node] = {}
|
|
412
|
-
self.relations: List[Relation] = []
|
|
413
|
-
self.node_by_type: Dict[str, List[Node]] = defaultdict(list)
|
|
414
|
-
self.relations_by_type: Dict[str, List[Relation]] = defaultdict(list)
|
|
415
|
-
self.relations_by_from: Dict[str, List[Relation]] = defaultdict(list)
|
|
416
|
-
self.relations_by_to: Dict[str, List[Relation]] = defaultdict(list)
|
|
417
|
-
self.parser = GraphiteParser()
|
|
418
|
-
self.query = QueryBuilder(self)
|
|
419
|
-
|
|
420
|
-
# =============== SCHEMA DEFINITION ===============
|
|
421
|
-
|
|
422
|
-
def define_node(self, definition: str):
|
|
423
|
-
"""Define a node type from DSL"""
|
|
424
|
-
node_name, fields, parent_name = self.parser.parse_node_definition(definition)
|
|
425
|
-
|
|
426
|
-
parent = None
|
|
427
|
-
if parent_name:
|
|
428
|
-
if parent_name not in self.node_types:
|
|
429
|
-
raise ValueError(f"Parent node type '{parent_name}' not found")
|
|
430
|
-
parent = self.node_types[parent_name]
|
|
431
|
-
|
|
432
|
-
node_type = NodeType(node_name, fields, parent)
|
|
433
|
-
self.node_types[node_name] = node_type
|
|
434
|
-
|
|
435
|
-
def define_relation(self, definition: str):
|
|
436
|
-
"""Define a relation type from DSL"""
|
|
437
|
-
(rel_name, from_type, to_type, fields,
|
|
438
|
-
reverse_name, is_bidirectional) = self.parser.parse_relation_definition(definition)
|
|
439
|
-
|
|
440
|
-
# Validate node types exist
|
|
441
|
-
if from_type not in self.node_types:
|
|
442
|
-
raise ValueError(f"Node type '{from_type}' not found")
|
|
443
|
-
if to_type not in self.node_types:
|
|
444
|
-
raise ValueError(f"Node type '{to_type}' not found")
|
|
445
|
-
|
|
446
|
-
rel_type = RelationType(
|
|
447
|
-
rel_name, from_type, to_type,
|
|
448
|
-
fields, reverse_name, is_bidirectional
|
|
449
|
-
)
|
|
450
|
-
self.relation_types[rel_name] = rel_type
|
|
451
|
-
|
|
452
|
-
# Register reverse relation if specified
|
|
453
|
-
if reverse_name:
|
|
454
|
-
reverse_rel = RelationType(
|
|
455
|
-
reverse_name, to_type, from_type,
|
|
456
|
-
fields, rel_name, is_bidirectional
|
|
457
|
-
)
|
|
458
|
-
self.relation_types[reverse_name] = reverse_rel
|
|
459
|
-
|
|
460
|
-
# =============== DATA MANIPULATION ===============
|
|
461
|
-
|
|
462
|
-
def create_node(self, node_type: str, node_id: str, *values) -> Node:
|
|
463
|
-
"""Create a node instance"""
|
|
464
|
-
if node_type not in self.node_types:
|
|
465
|
-
raise ValueError(f"Node type '{node_type}' not defined")
|
|
466
|
-
|
|
467
|
-
node_type_obj = self.node_types[node_type]
|
|
468
|
-
all_fields = node_type_obj.get_all_fields()
|
|
469
|
-
|
|
470
|
-
if len(values) != len(all_fields):
|
|
471
|
-
raise ValueError(f"Expected {len(all_fields)} values, got {len(values)}")
|
|
472
|
-
|
|
473
|
-
# Create values dictionary
|
|
474
|
-
node_values = {}
|
|
475
|
-
for field, value in zip(all_fields, values):
|
|
476
|
-
# Convert string dates to date objects
|
|
477
|
-
if field.dtype == DataType.DATE and isinstance(value, str):
|
|
478
|
-
try:
|
|
479
|
-
value = datetime.strptime(value, "%Y-%m-%d").date()
|
|
480
|
-
except:
|
|
481
|
-
|
|
482
|
-
node_values[field.name] = value
|
|
483
|
-
|
|
484
|
-
node = Node(node_type, node_id, node_values, node_type_obj)
|
|
485
|
-
self.nodes[node_id] = node
|
|
486
|
-
self.node_by_type[node_type].append(node)
|
|
487
|
-
return node
|
|
488
|
-
|
|
489
|
-
def create_relation(self, from_id: str, to_id: str, rel_type: str, *values) -> Relation:
|
|
490
|
-
"""Create a relation instance"""
|
|
491
|
-
if rel_type not in self.relation_types:
|
|
492
|
-
raise ValueError(f"Relation type '{rel_type}' not defined")
|
|
493
|
-
|
|
494
|
-
rel_type_obj = self.relation_types[rel_type]
|
|
495
|
-
|
|
496
|
-
# Check if nodes exist
|
|
497
|
-
if from_id not in self.nodes:
|
|
498
|
-
raise ValueError(f"Node '{from_id}' not found")
|
|
499
|
-
if to_id not in self.nodes:
|
|
500
|
-
raise ValueError(f"Node '{to_id}' not found")
|
|
501
|
-
|
|
502
|
-
# Create values dictionary
|
|
503
|
-
rel_values = {}
|
|
504
|
-
for i, field in enumerate(rel_type_obj.fields):
|
|
505
|
-
if i < len(values):
|
|
506
|
-
value = values[i]
|
|
507
|
-
if field.dtype == DataType.DATE and isinstance(value, str):
|
|
508
|
-
try:
|
|
509
|
-
value = datetime.strptime(value, "%Y-%m-%d").date()
|
|
510
|
-
except:
|
|
511
|
-
|
|
512
|
-
rel_values[field.name] = value
|
|
513
|
-
|
|
514
|
-
relation = Relation(rel_type, from_id, to_id, rel_values, rel_type_obj)
|
|
515
|
-
self.relations.append(relation)
|
|
516
|
-
self.relations_by_type[rel_type].append(relation)
|
|
517
|
-
self.relations_by_from[from_id].append(relation)
|
|
518
|
-
self.relations_by_to[to_id].append(relation)
|
|
519
|
-
|
|
520
|
-
# If relation is bidirectional, create reverse automatically
|
|
521
|
-
if rel_type_obj.is_bidirectional:
|
|
522
|
-
reverse_rel = Relation(rel_type, to_id, from_id, rel_values, rel_type_obj)
|
|
523
|
-
self.relations.append(reverse_rel)
|
|
524
|
-
self.relations_by_type[rel_type].append(reverse_rel)
|
|
525
|
-
self.relations_by_from[to_id].append(reverse_rel)
|
|
526
|
-
self.relations_by_to[from_id].append(reverse_rel)
|
|
527
|
-
|
|
528
|
-
return relation
|
|
529
|
-
|
|
530
|
-
# =============== QUERY METHODS ===============
|
|
531
|
-
|
|
532
|
-
def get_node(self, node_id: str) -> Optional[Node]:
|
|
533
|
-
"""Get node by ID"""
|
|
534
|
-
return self.nodes.get(node_id)
|
|
535
|
-
|
|
536
|
-
def get_nodes_of_type(self, node_type: str) -> List[Node]:
|
|
537
|
-
"""Get all nodes of a specific type"""
|
|
538
|
-
return self.node_by_type.get(node_type, [])
|
|
539
|
-
|
|
540
|
-
def get_relations_from(self, node_id: str, rel_type: str = None) -> List[Relation]:
|
|
541
|
-
"""Get relations from a node"""
|
|
542
|
-
all_rels = self.relations_by_from.get(node_id, [])
|
|
543
|
-
if rel_type:
|
|
544
|
-
return [r for r in all_rels if r.type_name == rel_type]
|
|
545
|
-
return all_rels
|
|
546
|
-
|
|
547
|
-
def get_relations_to(self, node_id: str, rel_type: str = None) -> List[Relation]:
|
|
548
|
-
"""Get relations to a node"""
|
|
549
|
-
all_rels = self.relations_by_to.get(node_id, [])
|
|
550
|
-
if rel_type:
|
|
551
|
-
return [r for r in all_rels if r.type_name == rel_type]
|
|
552
|
-
return all_rels
|
|
553
|
-
|
|
554
|
-
# =============== BULK LOADING ===============
|
|
555
|
-
|
|
556
|
-
def load_dsl(self, dsl: str):
|
|
557
|
-
"""Load Graphite DSL"""
|
|
558
|
-
lines = dsl.strip().split('\n')
|
|
559
|
-
i = 0
|
|
560
|
-
|
|
561
|
-
while i < len(lines):
|
|
562
|
-
line = lines[i].strip()
|
|
563
|
-
if not line or line.startswith('#'):
|
|
564
|
-
i += 1
|
|
565
|
-
continue
|
|
566
|
-
|
|
567
|
-
if line.startswith('node'):
|
|
568
|
-
# Collect multiline node definition
|
|
569
|
-
node_def = [line]
|
|
570
|
-
i += 1
|
|
571
|
-
while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith(('node', 'relation')):
|
|
572
|
-
node_def.append(lines[i])
|
|
573
|
-
i += 1
|
|
574
|
-
self.define_node('\n'.join(node_def))
|
|
575
|
-
|
|
576
|
-
elif line.startswith('relation'):
|
|
577
|
-
# Collect multiline relation definition
|
|
578
|
-
rel_def = [line]
|
|
579
|
-
i += 1
|
|
580
|
-
while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith(('node', 'relation')):
|
|
581
|
-
rel_def.append(lines[i])
|
|
582
|
-
i += 1
|
|
583
|
-
self.define_relation('\n'.join(rel_def))
|
|
584
|
-
|
|
585
|
-
elif '
|
|
586
|
-
# Node instance
|
|
587
|
-
node_type, node_id, values = self.parser.parse_node_instance(line)
|
|
588
|
-
self.create_node(node_type, node_id, *values)
|
|
589
|
-
i += 1
|
|
590
|
-
|
|
591
|
-
elif
|
|
592
|
-
# Relation instance
|
|
593
|
-
from_id, to_id, rel_type, values, direction = self.parser.parse_relation_instance(line)
|
|
594
|
-
self.create_relation(from_id, to_id, rel_type, *values)
|
|
595
|
-
i += 1
|
|
596
|
-
else:
|
|
597
|
-
i += 1
|
|
598
|
-
|
|
599
|
-
# =============== PERSISTENCE ===============
|
|
600
|
-
|
|
601
|
-
def save(self, filename: str):
|
|
602
|
-
"""Save database to file"""
|
|
603
|
-
with open(filename, 'wb') as f:
|
|
604
|
-
data = {
|
|
605
|
-
'node_types' : self.node_types,
|
|
606
|
-
'relation_types' : self.relation_types,
|
|
607
|
-
'nodes' : self.nodes,
|
|
608
|
-
'relations' : self.relations,
|
|
609
|
-
'node_by_type' : self.node_by_type,
|
|
610
|
-
'relations_by_type': self.relations_by_type,
|
|
611
|
-
'relations_by_from': self.relations_by_from,
|
|
612
|
-
'relations_by_to' : self.relations_by_to,
|
|
613
|
-
}
|
|
614
|
-
# noinspection PyTypeChecker
|
|
615
|
-
pickle.dump(data, f)
|
|
616
|
-
|
|
617
|
-
def load(self, filename: str):
|
|
618
|
-
"""Load database from file"""
|
|
619
|
-
with open(filename, 'rb') as f:
|
|
620
|
-
data = pickle.load(f)
|
|
621
|
-
self.node_types = data['node_types']
|
|
622
|
-
self.relation_types = data['relation_types']
|
|
623
|
-
self.nodes = data['nodes']
|
|
624
|
-
self.relations = data['relations']
|
|
625
|
-
self.node_by_type = data['node_by_type']
|
|
626
|
-
self.relations_by_type = data['relations_by_type']
|
|
627
|
-
self.relations_by_from = data['relations_by_from']
|
|
628
|
-
self.relations_by_to = data['relations_by_to']
|
|
629
|
-
|
|
630
|
-
# =============== UTILITY METHODS ===============
|
|
631
|
-
|
|
632
|
-
def clear(self):
|
|
633
|
-
"""Clear all data"""
|
|
634
|
-
self.node_types.clear()
|
|
635
|
-
self.relation_types.clear()
|
|
636
|
-
self.nodes.clear()
|
|
637
|
-
self.relations.clear()
|
|
638
|
-
self.node_by_type.clear()
|
|
639
|
-
self.relations_by_type.clear()
|
|
640
|
-
self.relations_by_from.clear()
|
|
641
|
-
self.relations_by_to.clear()
|
|
642
|
-
|
|
643
|
-
def stats(self) -> Dict[str, Any]:
|
|
644
|
-
"""Get database statistics"""
|
|
645
|
-
return {
|
|
646
|
-
'node_types' : len(self.node_types),
|
|
647
|
-
'relation_types': len(self.relation_types),
|
|
648
|
-
'nodes' : len(self.nodes),
|
|
649
|
-
'relations' : len(self.relations),
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
# =============== SYNTAX SUGAR ===============
|
|
653
|
-
|
|
654
|
-
def
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
lines
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
return
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pickle
|
|
4
|
+
import re
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import date, datetime
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
10
|
+
|
|
11
|
+
# =============== TYPE SYSTEM ===============
|
|
12
|
+
|
|
13
|
+
class DataType(Enum):
|
|
14
|
+
STRING = "string"
|
|
15
|
+
INT = "int"
|
|
16
|
+
DATE = "date"
|
|
17
|
+
FLOAT = "float"
|
|
18
|
+
BOOL = "bool"
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class Field:
|
|
22
|
+
name: str
|
|
23
|
+
dtype: DataType
|
|
24
|
+
default: Any = None
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class NodeType:
|
|
28
|
+
name: str
|
|
29
|
+
fields: List[Field] = field(default_factory=list)
|
|
30
|
+
parent: Optional[NodeType] = None
|
|
31
|
+
|
|
32
|
+
def get_all_fields(self) -> List[Field]:
|
|
33
|
+
"""Get all fields including inherited ones"""
|
|
34
|
+
fields = self.fields.copy()
|
|
35
|
+
if self.parent:
|
|
36
|
+
fields = self.parent.get_all_fields() + fields
|
|
37
|
+
return fields
|
|
38
|
+
|
|
39
|
+
def __hash__(self):
|
|
40
|
+
return hash(self.name)
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class RelationType:
|
|
44
|
+
name: str
|
|
45
|
+
from_type: str
|
|
46
|
+
to_type: str
|
|
47
|
+
fields: List[Field] = field(default_factory=list)
|
|
48
|
+
reverse_name: Optional[str] = None
|
|
49
|
+
is_bidirectional: bool = False
|
|
50
|
+
|
|
51
|
+
def __hash__(self):
|
|
52
|
+
return hash(self.name)
|
|
53
|
+
|
|
54
|
+
# =============== INSTANCES ===============
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class Node:
|
|
58
|
+
type_name: str
|
|
59
|
+
id: str
|
|
60
|
+
values: Dict[str, Any]
|
|
61
|
+
_type_ref: Optional[NodeType] = None
|
|
62
|
+
|
|
63
|
+
def get(self, field_name: str) -> Any:
|
|
64
|
+
return self.values.get(field_name)
|
|
65
|
+
|
|
66
|
+
def __getitem__(self, key):
|
|
67
|
+
return self.get(key)
|
|
68
|
+
|
|
69
|
+
def __repr__(self):
|
|
70
|
+
return f"Node({self.type_name}:{self.id})"
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class Relation:
|
|
74
|
+
type_name: str
|
|
75
|
+
from_node: str # node id
|
|
76
|
+
to_node: str # node id
|
|
77
|
+
values: Dict[str, Any]
|
|
78
|
+
_type_ref: Optional[RelationType] = None
|
|
79
|
+
|
|
80
|
+
def get(self, field_name: str) -> Any:
|
|
81
|
+
return self.values.get(field_name)
|
|
82
|
+
|
|
83
|
+
def __repr__(self):
|
|
84
|
+
return f"Relation({self.type_name}:{self.from_node}->{self.to_node})"
|
|
85
|
+
|
|
86
|
+
# =============== PARSER ===============
|
|
87
|
+
|
|
88
|
+
class GraphiteParser:
|
|
89
|
+
"""Parser for Graphite DSL"""
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def parse_node_definition(line: str) -> Tuple[str, List[Field], str]:
|
|
93
|
+
"""Parse node type definition: 'node Person\nname: string\nage: int'"""
|
|
94
|
+
lines = line.strip().split('\n')
|
|
95
|
+
first_line = lines[0].strip()
|
|
96
|
+
|
|
97
|
+
# Parse inheritance
|
|
98
|
+
if ' from ' in first_line:
|
|
99
|
+
parts = first_line.split(' from ')
|
|
100
|
+
node_name = parts[0].replace('node', '').strip()
|
|
101
|
+
parent = parts[1].strip()
|
|
102
|
+
fields_start = 1
|
|
103
|
+
else:
|
|
104
|
+
node_name = first_line.replace('node', '').strip()
|
|
105
|
+
parent = None
|
|
106
|
+
fields_start = 1
|
|
107
|
+
|
|
108
|
+
fields = []
|
|
109
|
+
for field_line in lines[fields_start:]:
|
|
110
|
+
field_line = field_line.strip()
|
|
111
|
+
if not field_line:
|
|
112
|
+
continue
|
|
113
|
+
name_type = field_line.split(':')
|
|
114
|
+
if len(name_type) == 2:
|
|
115
|
+
name = name_type[0].strip()
|
|
116
|
+
dtype_str = name_type[1].strip()
|
|
117
|
+
dtype = DataType(dtype_str)
|
|
118
|
+
fields.append(Field(name, dtype))
|
|
119
|
+
|
|
120
|
+
return node_name, fields, parent
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def parse_relation_definition(line: str) -> Tuple[str, str, str, List[Field], Optional[str], bool]:
|
|
124
|
+
"""Parse relation definition"""
|
|
125
|
+
lines = line.strip().split('\n')
|
|
126
|
+
first_line = lines[0].strip()
|
|
127
|
+
|
|
128
|
+
# Check for 'both' keyword
|
|
129
|
+
is_bidirectional = ' both' in first_line
|
|
130
|
+
if is_bidirectional:
|
|
131
|
+
first_line = first_line.replace(' both', '')
|
|
132
|
+
|
|
133
|
+
# Parse reverse
|
|
134
|
+
reverse_name = None
|
|
135
|
+
if ' reverse ' in first_line:
|
|
136
|
+
parts = first_line.split(' reverse ')
|
|
137
|
+
relation_name = parts[0].replace('relation', '').strip()
|
|
138
|
+
reverse_name = parts[1].strip()
|
|
139
|
+
first_line = parts[0]
|
|
140
|
+
else:
|
|
141
|
+
relation_name = first_line.replace('relation', '').strip()
|
|
142
|
+
|
|
143
|
+
# Parse participants
|
|
144
|
+
participants_line = lines[1].strip()
|
|
145
|
+
if '->' in participants_line:
|
|
146
|
+
from_to = participants_line.split('->')
|
|
147
|
+
from_type = from_to[0].strip()
|
|
148
|
+
to_type = from_to[1].strip()
|
|
149
|
+
elif '-' in participants_line:
|
|
150
|
+
parts = participants_line.split('-')
|
|
151
|
+
from_type = parts[0].strip()
|
|
152
|
+
to_type = parts[2].strip() if len(parts) > 2 else parts[1].strip()
|
|
153
|
+
else:
|
|
154
|
+
raise ValueError(f"Invalid relation format: {participants_line}")
|
|
155
|
+
|
|
156
|
+
# Parse fields
|
|
157
|
+
fields = []
|
|
158
|
+
for field_line in lines[2:]:
|
|
159
|
+
field_line = field_line.strip()
|
|
160
|
+
if not field_line:
|
|
161
|
+
continue
|
|
162
|
+
name_type = field_line.split(':')
|
|
163
|
+
if len(name_type) == 2:
|
|
164
|
+
name = name_type[0].strip()
|
|
165
|
+
dtype_str = name_type[1].strip()
|
|
166
|
+
dtype = DataType(dtype_str)
|
|
167
|
+
fields.append(Field(name, dtype))
|
|
168
|
+
|
|
169
|
+
return relation_name, from_type, to_type, fields, reverse_name, is_bidirectional
|
|
170
|
+
|
|
171
|
+
@staticmethod
|
|
172
|
+
def parse_node_instance(line: str) -> Tuple[str, str, List[Any]]:
|
|
173
|
+
"""Parse node instance: 'User, user_1, "Joe Doe", 32, "joe4030"'"""
|
|
174
|
+
# Handle quoted strings
|
|
175
|
+
parts = []
|
|
176
|
+
current = ''
|
|
177
|
+
in_quotes = False
|
|
178
|
+
for char in line:
|
|
179
|
+
if char == '"':
|
|
180
|
+
in_quotes = not in_quotes
|
|
181
|
+
current += char
|
|
182
|
+
elif char == ',' and not in_quotes:
|
|
183
|
+
parts.append(current.strip())
|
|
184
|
+
current = ''
|
|
185
|
+
else:
|
|
186
|
+
current += char
|
|
187
|
+
if current:
|
|
188
|
+
parts.append(current.strip())
|
|
189
|
+
|
|
190
|
+
node_type = parts[0].strip()
|
|
191
|
+
node_id = parts[1].strip()
|
|
192
|
+
values = []
|
|
193
|
+
|
|
194
|
+
for val in parts[2:]:
|
|
195
|
+
val = val.strip()
|
|
196
|
+
if val.startswith('"') and val.endswith('"'):
|
|
197
|
+
values.append(val[1:-1])
|
|
198
|
+
elif val.replace('-', '').isdigit() and '-' in val: # Date-like
|
|
199
|
+
values.append(val)
|
|
200
|
+
elif val.isdigit() or (val.startswith('-') and val[1:].isdigit()):
|
|
201
|
+
values.append(int(val))
|
|
202
|
+
elif val.replace('.', '').isdigit() and val.count('.') == 1:
|
|
203
|
+
values.append(float(val))
|
|
204
|
+
elif val.lower() in ('true', 'false'):
|
|
205
|
+
values.append(val.lower() == 'true')
|
|
206
|
+
else:
|
|
207
|
+
values.append(val)
|
|
208
|
+
|
|
209
|
+
return node_type, node_id, values
|
|
210
|
+
|
|
211
|
+
@staticmethod
|
|
212
|
+
def parse_relation_instance(line: str) -> tuple[str | Any, str | Any, Any, list[Any], str]:
|
|
213
|
+
"""Parse relation instance: 'user_1 -[OWNER, 2000-10-04]-> notebook'"""
|
|
214
|
+
# Extract relation type and attributes
|
|
215
|
+
pattern = r'(\w+)\s*(-\[([^\]]+)\]\s*[->-]\s*|\s*[->-]\s*\[([^\]]+)\]\s*->\s*)(\w+)'
|
|
216
|
+
match = re.search(pattern, line)
|
|
217
|
+
if not match:
|
|
218
|
+
raise ValueError(f"Invalid relation format: {line}")
|
|
219
|
+
|
|
220
|
+
from_node = match.group(1)
|
|
221
|
+
to_node = match.group(5)
|
|
222
|
+
|
|
223
|
+
# Get relation type and attributes
|
|
224
|
+
rel_part = match.group(3) or match.group(4)
|
|
225
|
+
rel_parts = [p.strip() for p in rel_part.split(',')]
|
|
226
|
+
rel_type = rel_parts[0]
|
|
227
|
+
attributes = rel_parts[1:] if len(rel_parts) > 1 else []
|
|
228
|
+
|
|
229
|
+
# Parse direction
|
|
230
|
+
if '->' in line:
|
|
231
|
+
direction = 'forward'
|
|
232
|
+
elif '-[' in line and ']-' in line:
|
|
233
|
+
direction = 'bidirectional'
|
|
234
|
+
else:
|
|
235
|
+
direction = 'forward'
|
|
236
|
+
|
|
237
|
+
return from_node, to_node, rel_type, attributes, direction
|
|
238
|
+
|
|
239
|
+
# =============== QUERY ENGINE ===============
|
|
240
|
+
|
|
241
|
+
class QueryResult:
|
|
242
|
+
"""Represents a query result that can be chained"""
|
|
243
|
+
|
|
244
|
+
def __init__(self, engine: GraphiteEngine, nodes: List[Node], edges: List[Relation] = None):
|
|
245
|
+
self.engine = engine
|
|
246
|
+
self.nodes = nodes
|
|
247
|
+
self.edges = edges or []
|
|
248
|
+
self.current_relation: Optional[RelationType] = None
|
|
249
|
+
self.direction: str = 'outgoing'
|
|
250
|
+
|
|
251
|
+
def where(self, condition: Union[str, Callable]) -> QueryResult:
|
|
252
|
+
"""Filter nodes based on condition"""
|
|
253
|
+
filtered_nodes = []
|
|
254
|
+
|
|
255
|
+
if callable(condition):
|
|
256
|
+
# Lambda function
|
|
257
|
+
for node in self.nodes:
|
|
258
|
+
try:
|
|
259
|
+
if condition(node):
|
|
260
|
+
filtered_nodes.append(node)
|
|
261
|
+
except e:
|
|
262
|
+
print(f"Graphite Warn: 'where' condition failed for node {node}: {e}")
|
|
263
|
+
else:
|
|
264
|
+
# String condition like "age > 18"
|
|
265
|
+
for node in self.nodes:
|
|
266
|
+
if self._evaluate_condition(node, condition):
|
|
267
|
+
filtered_nodes.append(node)
|
|
268
|
+
|
|
269
|
+
return QueryResult(self.engine, filtered_nodes, self.edges)
|
|
270
|
+
|
|
271
|
+
def _evaluate_condition(self, node: Node, condition: str) -> bool:
|
|
272
|
+
"""Evaluate a condition string on a node"""
|
|
273
|
+
# Simple condition parser
|
|
274
|
+
ops = ['>=', '<=', '!=', '==', '>', '<', '=']
|
|
275
|
+
|
|
276
|
+
for op in ops:
|
|
277
|
+
if op in condition:
|
|
278
|
+
left, right = condition.split(op)
|
|
279
|
+
left = left.strip()
|
|
280
|
+
right = right.strip()
|
|
281
|
+
|
|
282
|
+
# Get value from node
|
|
283
|
+
node_value = node.get(left)
|
|
284
|
+
if node_value is None:
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
# Parse right side
|
|
288
|
+
if right.startswith('"') and right.endswith('"'):
|
|
289
|
+
right_value = right[1:-1]
|
|
290
|
+
elif right.isdigit():
|
|
291
|
+
right_value = int(right)
|
|
292
|
+
elif right.replace('.', '').isdigit() and right.count('.') == 1:
|
|
293
|
+
right_value = float(right)
|
|
294
|
+
else:
|
|
295
|
+
right_value = right
|
|
296
|
+
|
|
297
|
+
# Apply operation
|
|
298
|
+
if op in ('=', '=='):
|
|
299
|
+
return node_value == right_value
|
|
300
|
+
elif op == '!=':
|
|
301
|
+
return node_value != right_value
|
|
302
|
+
elif op == '>':
|
|
303
|
+
return node_value > right_value
|
|
304
|
+
elif op == '<':
|
|
305
|
+
return node_value < right_value
|
|
306
|
+
elif op == '>=':
|
|
307
|
+
return node_value >= right_value
|
|
308
|
+
elif op == '<=':
|
|
309
|
+
return node_value <= right_value
|
|
310
|
+
|
|
311
|
+
return False
|
|
312
|
+
|
|
313
|
+
def traverse(self, relation_type: str, direction: str = 'outgoing') -> QueryResult:
|
|
314
|
+
"""Traverse relations from current nodes"""
|
|
315
|
+
result_nodes = []
|
|
316
|
+
result_edges = []
|
|
317
|
+
|
|
318
|
+
for node in self.nodes:
|
|
319
|
+
if direction == 'outgoing':
|
|
320
|
+
edges = self.engine.get_relations_from(node.id, relation_type)
|
|
321
|
+
elif direction == 'incoming':
|
|
322
|
+
edges = self.engine.get_relations_to(node.id, relation_type)
|
|
323
|
+
else: # both
|
|
324
|
+
edges = (self.engine.get_relations_from(node.id, relation_type) +
|
|
325
|
+
self.engine.get_relations_to(node.id, relation_type))
|
|
326
|
+
|
|
327
|
+
for edge in edges:
|
|
328
|
+
result_edges.append(edge)
|
|
329
|
+
target_id = edge.to_node if direction == 'outgoing' else edge.from_node
|
|
330
|
+
target_node = self.engine.get_node(target_id)
|
|
331
|
+
if target_node:
|
|
332
|
+
result_nodes.append(target_node)
|
|
333
|
+
|
|
334
|
+
# Remove duplicates
|
|
335
|
+
result_nodes = list(dict((n.id, n) for n in result_nodes).values())
|
|
336
|
+
return QueryResult(self.engine, result_nodes, result_edges)
|
|
337
|
+
|
|
338
|
+
def outgoing(self, relation_type: str) -> QueryResult:
|
|
339
|
+
"""Traverse outgoing relations"""
|
|
340
|
+
return self.traverse(relation_type, 'outgoing')
|
|
341
|
+
|
|
342
|
+
def incoming(self, relation_type: str) -> QueryResult:
|
|
343
|
+
"""Traverse incoming relations"""
|
|
344
|
+
return self.traverse(relation_type, 'incoming')
|
|
345
|
+
|
|
346
|
+
def both(self, relation_type: str) -> QueryResult:
|
|
347
|
+
"""Traverse both directions"""
|
|
348
|
+
return self.traverse(relation_type, 'both')
|
|
349
|
+
|
|
350
|
+
def limit(self, n: int) -> QueryResult:
|
|
351
|
+
"""Limit number of results"""
|
|
352
|
+
return QueryResult(self.engine, self.nodes[:n], self.edges[:n])
|
|
353
|
+
|
|
354
|
+
def distinct(self) -> QueryResult:
|
|
355
|
+
"""Get distinct nodes"""
|
|
356
|
+
seen = set()
|
|
357
|
+
distinct_nodes = []
|
|
358
|
+
for node in self.nodes:
|
|
359
|
+
if node.id not in seen:
|
|
360
|
+
seen.add(node.id)
|
|
361
|
+
distinct_nodes.append(node)
|
|
362
|
+
return QueryResult(self.engine, distinct_nodes, self.edges)
|
|
363
|
+
|
|
364
|
+
def order_by(self, field: str, descending: bool = False) -> QueryResult:
|
|
365
|
+
"""Order nodes by field"""
|
|
366
|
+
|
|
367
|
+
def get_key(node):
|
|
368
|
+
val = node.get(field)
|
|
369
|
+
return (val is None, val)
|
|
370
|
+
|
|
371
|
+
sorted_nodes = sorted(self.nodes, key=get_key, reverse=descending)
|
|
372
|
+
return QueryResult(self.engine, sorted_nodes, self.edges)
|
|
373
|
+
|
|
374
|
+
def count(self) -> int:
|
|
375
|
+
"""Count nodes"""
|
|
376
|
+
return len(self.nodes)
|
|
377
|
+
|
|
378
|
+
def get(self) -> List[Node]:
|
|
379
|
+
"""Get all nodes"""
|
|
380
|
+
return self.nodes
|
|
381
|
+
|
|
382
|
+
def first(self) -> Optional[Node]:
|
|
383
|
+
"""Get first node"""
|
|
384
|
+
return self.nodes[0] if self.nodes else None
|
|
385
|
+
|
|
386
|
+
def ids(self) -> List[str]:
|
|
387
|
+
"""Get node IDs"""
|
|
388
|
+
return [n.id for n in self.nodes]
|
|
389
|
+
|
|
390
|
+
class QueryBuilder:
|
|
391
|
+
"""Builder for creating queries"""
|
|
392
|
+
|
|
393
|
+
def __init__(self, engine: GraphiteEngine):
|
|
394
|
+
self.engine = engine
|
|
395
|
+
|
|
396
|
+
def __getattr__(self, name: str) -> QueryResult:
|
|
397
|
+
"""Allow starting query from node type: engine.User"""
|
|
398
|
+
if name in self.engine.node_types:
|
|
399
|
+
nodes = self.engine.get_nodes_of_type(name)
|
|
400
|
+
return QueryResult(self.engine, nodes)
|
|
401
|
+
raise AttributeError(f"No node type '{name}' found")
|
|
402
|
+
|
|
403
|
+
# =============== MAIN ENGINE ===============
|
|
404
|
+
|
|
405
|
+
class GraphiteEngine:
|
|
406
|
+
"""Main graph database engine"""
|
|
407
|
+
|
|
408
|
+
def __init__(self):
|
|
409
|
+
self.node_types: Dict[str, NodeType] = {}
|
|
410
|
+
self.relation_types: Dict[str, RelationType] = {}
|
|
411
|
+
self.nodes: Dict[str, Node] = {}
|
|
412
|
+
self.relations: List[Relation] = []
|
|
413
|
+
self.node_by_type: Dict[str, List[Node]] = defaultdict(list)
|
|
414
|
+
self.relations_by_type: Dict[str, List[Relation]] = defaultdict(list)
|
|
415
|
+
self.relations_by_from: Dict[str, List[Relation]] = defaultdict(list)
|
|
416
|
+
self.relations_by_to: Dict[str, List[Relation]] = defaultdict(list)
|
|
417
|
+
self.parser = GraphiteParser()
|
|
418
|
+
self.query = QueryBuilder(self)
|
|
419
|
+
|
|
420
|
+
# =============== SCHEMA DEFINITION ===============
|
|
421
|
+
|
|
422
|
+
def define_node(self, definition: str):
|
|
423
|
+
"""Define a node type from DSL"""
|
|
424
|
+
node_name, fields, parent_name = self.parser.parse_node_definition(definition)
|
|
425
|
+
|
|
426
|
+
parent = None
|
|
427
|
+
if parent_name:
|
|
428
|
+
if parent_name not in self.node_types:
|
|
429
|
+
raise ValueError(f"Parent node type '{parent_name}' not found")
|
|
430
|
+
parent = self.node_types[parent_name]
|
|
431
|
+
|
|
432
|
+
node_type = NodeType(node_name, fields, parent)
|
|
433
|
+
self.node_types[node_name] = node_type
|
|
434
|
+
|
|
435
|
+
def define_relation(self, definition: str):
|
|
436
|
+
"""Define a relation type from DSL"""
|
|
437
|
+
(rel_name, from_type, to_type, fields,
|
|
438
|
+
reverse_name, is_bidirectional) = self.parser.parse_relation_definition(definition)
|
|
439
|
+
|
|
440
|
+
# Validate node types exist
|
|
441
|
+
if from_type not in self.node_types:
|
|
442
|
+
raise ValueError(f"Node type '{from_type}' not found")
|
|
443
|
+
if to_type not in self.node_types:
|
|
444
|
+
raise ValueError(f"Node type '{to_type}' not found")
|
|
445
|
+
|
|
446
|
+
rel_type = RelationType(
|
|
447
|
+
rel_name, from_type, to_type,
|
|
448
|
+
fields, reverse_name, is_bidirectional
|
|
449
|
+
)
|
|
450
|
+
self.relation_types[rel_name] = rel_type
|
|
451
|
+
|
|
452
|
+
# Register reverse relation if specified
|
|
453
|
+
if reverse_name:
|
|
454
|
+
reverse_rel = RelationType(
|
|
455
|
+
reverse_name, to_type, from_type,
|
|
456
|
+
fields, rel_name, is_bidirectional
|
|
457
|
+
)
|
|
458
|
+
self.relation_types[reverse_name] = reverse_rel
|
|
459
|
+
|
|
460
|
+
# =============== DATA MANIPULATION ===============
|
|
461
|
+
|
|
462
|
+
def create_node(self, node_type: str, node_id: str, *values) -> Node:
|
|
463
|
+
"""Create a node instance"""
|
|
464
|
+
if node_type not in self.node_types:
|
|
465
|
+
raise ValueError(f"Node type '{node_type}' not defined")
|
|
466
|
+
|
|
467
|
+
node_type_obj = self.node_types[node_type]
|
|
468
|
+
all_fields = node_type_obj.get_all_fields()
|
|
469
|
+
|
|
470
|
+
if len(values) != len(all_fields):
|
|
471
|
+
raise ValueError(f"Expected {len(all_fields)} values, got {len(values)}")
|
|
472
|
+
|
|
473
|
+
# Create values dictionary
|
|
474
|
+
node_values = {}
|
|
475
|
+
for field, value in zip(all_fields, values):
|
|
476
|
+
# Convert string dates to date objects
|
|
477
|
+
if field.dtype == DataType.DATE and isinstance(value, str):
|
|
478
|
+
try:
|
|
479
|
+
value = datetime.strptime(value, "%Y-%m-%d").date()
|
|
480
|
+
except e:
|
|
481
|
+
raise ValueError(f"Invalid date format: {value}")
|
|
482
|
+
node_values[field.name] = value
|
|
483
|
+
|
|
484
|
+
node = Node(node_type, node_id, node_values, node_type_obj)
|
|
485
|
+
self.nodes[node_id] = node
|
|
486
|
+
self.node_by_type[node_type].append(node)
|
|
487
|
+
return node
|
|
488
|
+
|
|
489
|
+
def create_relation(self, from_id: str, to_id: str, rel_type: str, *values) -> Relation:
|
|
490
|
+
"""Create a relation instance"""
|
|
491
|
+
if rel_type not in self.relation_types:
|
|
492
|
+
raise ValueError(f"Relation type '{rel_type}' not defined")
|
|
493
|
+
|
|
494
|
+
rel_type_obj = self.relation_types[rel_type]
|
|
495
|
+
|
|
496
|
+
# Check if nodes exist
|
|
497
|
+
if from_id not in self.nodes:
|
|
498
|
+
raise ValueError(f"Node '{from_id}' not found")
|
|
499
|
+
if to_id not in self.nodes:
|
|
500
|
+
raise ValueError(f"Node '{to_id}' not found")
|
|
501
|
+
|
|
502
|
+
# Create values dictionary
|
|
503
|
+
rel_values = {}
|
|
504
|
+
for i, field in enumerate(rel_type_obj.fields):
|
|
505
|
+
if i < len(values):
|
|
506
|
+
value = values[i]
|
|
507
|
+
if field.dtype == DataType.DATE and isinstance(value, str):
|
|
508
|
+
try:
|
|
509
|
+
value = datetime.strptime(value, "%Y-%m-%d").date()
|
|
510
|
+
except e:
|
|
511
|
+
raise ValueError(f"Invalid date format: {value}")
|
|
512
|
+
rel_values[field.name] = value
|
|
513
|
+
|
|
514
|
+
relation = Relation(rel_type, from_id, to_id, rel_values, rel_type_obj)
|
|
515
|
+
self.relations.append(relation)
|
|
516
|
+
self.relations_by_type[rel_type].append(relation)
|
|
517
|
+
self.relations_by_from[from_id].append(relation)
|
|
518
|
+
self.relations_by_to[to_id].append(relation)
|
|
519
|
+
|
|
520
|
+
# If relation is bidirectional, create reverse automatically
|
|
521
|
+
if rel_type_obj.is_bidirectional:
|
|
522
|
+
reverse_rel = Relation(rel_type, to_id, from_id, rel_values, rel_type_obj)
|
|
523
|
+
self.relations.append(reverse_rel)
|
|
524
|
+
self.relations_by_type[rel_type].append(reverse_rel)
|
|
525
|
+
self.relations_by_from[to_id].append(reverse_rel)
|
|
526
|
+
self.relations_by_to[from_id].append(reverse_rel)
|
|
527
|
+
|
|
528
|
+
return relation
|
|
529
|
+
|
|
530
|
+
# =============== QUERY METHODS ===============
|
|
531
|
+
|
|
532
|
+
def get_node(self, node_id: str) -> Optional[Node]:
|
|
533
|
+
"""Get node by ID"""
|
|
534
|
+
return self.nodes.get(node_id)
|
|
535
|
+
|
|
536
|
+
def get_nodes_of_type(self, node_type: str) -> List[Node]:
|
|
537
|
+
"""Get all nodes of a specific type"""
|
|
538
|
+
return self.node_by_type.get(node_type, [])
|
|
539
|
+
|
|
540
|
+
def get_relations_from(self, node_id: str, rel_type: str = None) -> List[Relation]:
|
|
541
|
+
"""Get relations from a node"""
|
|
542
|
+
all_rels = self.relations_by_from.get(node_id, [])
|
|
543
|
+
if rel_type:
|
|
544
|
+
return [r for r in all_rels if r.type_name == rel_type]
|
|
545
|
+
return all_rels
|
|
546
|
+
|
|
547
|
+
def get_relations_to(self, node_id: str, rel_type: str = None) -> List[Relation]:
|
|
548
|
+
"""Get relations to a node"""
|
|
549
|
+
all_rels = self.relations_by_to.get(node_id, [])
|
|
550
|
+
if rel_type:
|
|
551
|
+
return [r for r in all_rels if r.type_name == rel_type]
|
|
552
|
+
return all_rels
|
|
553
|
+
|
|
554
|
+
# =============== BULK LOADING ===============
|
|
555
|
+
|
|
556
|
+
def load_dsl(self, dsl: str):
|
|
557
|
+
"""Load Graphite DSL"""
|
|
558
|
+
lines = dsl.strip().split('\n')
|
|
559
|
+
i = 0
|
|
560
|
+
|
|
561
|
+
while i < len(lines):
|
|
562
|
+
line = lines[i].strip()
|
|
563
|
+
if not line or line.startswith('#'):
|
|
564
|
+
i += 1
|
|
565
|
+
continue
|
|
566
|
+
|
|
567
|
+
if line.startswith('node'):
|
|
568
|
+
# Collect multiline node definition
|
|
569
|
+
node_def = [line]
|
|
570
|
+
i += 1
|
|
571
|
+
while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith(('node', 'relation')):
|
|
572
|
+
node_def.append(lines[i])
|
|
573
|
+
i += 1
|
|
574
|
+
self.define_node('\n'.join(node_def))
|
|
575
|
+
|
|
576
|
+
elif line.startswith('relation'):
|
|
577
|
+
# Collect multiline relation definition
|
|
578
|
+
rel_def = [line]
|
|
579
|
+
i += 1
|
|
580
|
+
while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith(('node', 'relation')):
|
|
581
|
+
rel_def.append(lines[i])
|
|
582
|
+
i += 1
|
|
583
|
+
self.define_relation('\n'.join(rel_def))
|
|
584
|
+
|
|
585
|
+
elif '[' not in line:
|
|
586
|
+
# Node instance
|
|
587
|
+
node_type, node_id, values = self.parser.parse_node_instance(line)
|
|
588
|
+
self.create_node(node_type, node_id, *values)
|
|
589
|
+
i += 1
|
|
590
|
+
|
|
591
|
+
elif '-[' in line and (']->' in line or ']-' in line):
|
|
592
|
+
# Relation instance
|
|
593
|
+
from_id, to_id, rel_type, values, direction = self.parser.parse_relation_instance(line)
|
|
594
|
+
self.create_relation(from_id, to_id, rel_type, *values)
|
|
595
|
+
i += 1
|
|
596
|
+
else:
|
|
597
|
+
i += 1
|
|
598
|
+
|
|
599
|
+
# =============== PERSISTENCE ===============
|
|
600
|
+
|
|
601
|
+
def save(self, filename: str):
|
|
602
|
+
"""Save database to file"""
|
|
603
|
+
with open(filename, 'wb') as f:
|
|
604
|
+
data = {
|
|
605
|
+
'node_types' : self.node_types,
|
|
606
|
+
'relation_types' : self.relation_types,
|
|
607
|
+
'nodes' : self.nodes,
|
|
608
|
+
'relations' : self.relations,
|
|
609
|
+
'node_by_type' : self.node_by_type,
|
|
610
|
+
'relations_by_type': self.relations_by_type,
|
|
611
|
+
'relations_by_from': self.relations_by_from,
|
|
612
|
+
'relations_by_to' : self.relations_by_to,
|
|
613
|
+
}
|
|
614
|
+
# noinspection PyTypeChecker
|
|
615
|
+
pickle.dump(data, f)
|
|
616
|
+
|
|
617
|
+
def load(self, filename: str):
|
|
618
|
+
"""Load database from file"""
|
|
619
|
+
with open(filename, 'rb') as f:
|
|
620
|
+
data = pickle.load(f)
|
|
621
|
+
self.node_types = data['node_types']
|
|
622
|
+
self.relation_types = data['relation_types']
|
|
623
|
+
self.nodes = data['nodes']
|
|
624
|
+
self.relations = data['relations']
|
|
625
|
+
self.node_by_type = data['node_by_type']
|
|
626
|
+
self.relations_by_type = data['relations_by_type']
|
|
627
|
+
self.relations_by_from = data['relations_by_from']
|
|
628
|
+
self.relations_by_to = data['relations_by_to']
|
|
629
|
+
|
|
630
|
+
# =============== UTILITY METHODS ===============
|
|
631
|
+
|
|
632
|
+
def clear(self):
|
|
633
|
+
"""Clear all data"""
|
|
634
|
+
self.node_types.clear()
|
|
635
|
+
self.relation_types.clear()
|
|
636
|
+
self.nodes.clear()
|
|
637
|
+
self.relations.clear()
|
|
638
|
+
self.node_by_type.clear()
|
|
639
|
+
self.relations_by_type.clear()
|
|
640
|
+
self.relations_by_from.clear()
|
|
641
|
+
self.relations_by_to.clear()
|
|
642
|
+
|
|
643
|
+
def stats(self) -> Dict[str, Any]:
|
|
644
|
+
"""Get database statistics"""
|
|
645
|
+
return {
|
|
646
|
+
'node_types' : len(self.node_types),
|
|
647
|
+
'relation_types': len(self.relation_types),
|
|
648
|
+
'nodes' : len(self.nodes),
|
|
649
|
+
'relations' : len(self.relations),
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
# =============== SYNTAX SUGAR ===============
|
|
653
|
+
|
|
654
|
+
def parse(self, data: str):
|
|
655
|
+
"""Parse data into nodes and relations (strcuture or data)"""
|
|
656
|
+
self.load_dsl(data)
|
|
657
|
+
|
|
658
|
+
# =============== SYNTAX SUGAR ===============
|
|
659
|
+
|
|
660
|
+
def node(node_type: str, **fields) -> str:
|
|
661
|
+
"""Helper function to create node definitions"""
|
|
662
|
+
lines = [f"node {node_type}"]
|
|
663
|
+
for field_name, field_type in fields.items():
|
|
664
|
+
lines.append(f"{field_name}: {field_type}")
|
|
665
|
+
return "\n".join(lines)
|
|
666
|
+
|
|
667
|
+
def relation(name: str, from_type: str, to_type: str, **kwargs) -> str:
|
|
668
|
+
"""Helper function to create relation definitions"""
|
|
669
|
+
lines = [f"relation {name}"]
|
|
670
|
+
if kwargs.get('both'):
|
|
671
|
+
lines[0] += " both"
|
|
672
|
+
if kwargs.get('reverse'):
|
|
673
|
+
lines[0] += f" reverse {kwargs['reverse']}"
|
|
674
|
+
|
|
675
|
+
direction = "->" if not kwargs.get('both') else "-"
|
|
676
|
+
lines.append(f"{from_type} {direction} {to_type}")
|
|
677
|
+
|
|
678
|
+
for field_name, field_type in kwargs.get('fields', {}).items():
|
|
679
|
+
lines.append(f"{field_name}: {field_type}")
|
|
680
|
+
|
|
681
|
+
return "\n".join(lines)
|
|
682
|
+
|
|
683
|
+
# ================ PUBLIC API ================
|
|
684
|
+
|
|
685
|
+
def engine() -> GraphiteEngine:
|
|
686
|
+
"""Create graphite engine instance"""
|
|
687
|
+
return GraphiteEngine()
|