grai-build 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grai/__init__.py +11 -0
- grai/cli/__init__.py +5 -0
- grai/cli/main.py +2546 -0
- grai/core/__init__.py +1 -0
- grai/core/cache/__init__.py +33 -0
- grai/core/cache/build_cache.py +352 -0
- grai/core/compiler/__init__.py +23 -0
- grai/core/compiler/cypher_compiler.py +426 -0
- grai/core/exporter/__init__.py +13 -0
- grai/core/exporter/ir_exporter.py +343 -0
- grai/core/lineage/__init__.py +42 -0
- grai/core/lineage/lineage_tracker.py +685 -0
- grai/core/loader/__init__.py +21 -0
- grai/core/loader/neo4j_loader.py +514 -0
- grai/core/models.py +344 -0
- grai/core/parser/__init__.py +25 -0
- grai/core/parser/yaml_parser.py +375 -0
- grai/core/validator/__init__.py +25 -0
- grai/core/validator/validator.py +475 -0
- grai/core/visualizer/__init__.py +650 -0
- grai/core/visualizer/visualizer.py +15 -0
- grai/templates/__init__.py +1 -0
- grai_build-0.3.0.dist-info/METADATA +374 -0
- grai_build-0.3.0.dist-info/RECORD +28 -0
- grai_build-0.3.0.dist-info/WHEEL +5 -0
- grai_build-0.3.0.dist-info/entry_points.txt +2 -0
- grai_build-0.3.0.dist-info/licenses/LICENSE +21 -0
- grai_build-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cypher compiler for grai.build.
|
|
3
|
+
|
|
4
|
+
This module generates Neo4j Cypher statements from Entity and Relation models.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List, Union
|
|
9
|
+
|
|
10
|
+
from grai.core.models import Entity, Project, Property, Relation
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CompilerError(Exception):
|
|
14
|
+
"""Base exception for compiler errors."""
|
|
15
|
+
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def escape_cypher_string(value: str) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Escape a string for use in Cypher queries.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
value: String to escape.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Escaped string safe for Cypher.
|
|
28
|
+
"""
|
|
29
|
+
# Escape single quotes and backslashes
|
|
30
|
+
return value.replace("\\", "\\\\").replace("'", "\\'")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_cypher_property_placeholder(prop_name: str, use_row: bool = True) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Get the Cypher placeholder for a property value.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
prop_name: Property name.
|
|
39
|
+
use_row: If True, use row.property format; else use $property format.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Cypher placeholder string.
|
|
43
|
+
"""
|
|
44
|
+
if use_row:
|
|
45
|
+
return f"row.{prop_name}"
|
|
46
|
+
else:
|
|
47
|
+
return f"${prop_name}"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def compile_property_set(
|
|
51
|
+
properties: List[Property], node_var: str = "n", indent: str = " "
|
|
52
|
+
) -> str:
|
|
53
|
+
"""
|
|
54
|
+
Compile property SET clause for Cypher.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
properties: List of properties to set.
|
|
58
|
+
node_var: Variable name for the node/relationship (default: "n").
|
|
59
|
+
indent: Indentation string for formatting.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Cypher SET clause string.
|
|
63
|
+
"""
|
|
64
|
+
if not properties:
|
|
65
|
+
return ""
|
|
66
|
+
|
|
67
|
+
set_clauses = []
|
|
68
|
+
for prop in properties:
|
|
69
|
+
placeholder = get_cypher_property_placeholder(prop.name)
|
|
70
|
+
set_clauses.append(f"{node_var}.{prop.name} = {placeholder}")
|
|
71
|
+
|
|
72
|
+
if len(set_clauses) == 1:
|
|
73
|
+
return f"SET {set_clauses[0]}"
|
|
74
|
+
else:
|
|
75
|
+
# Multi-line format for multiple properties
|
|
76
|
+
lines = [f"SET {set_clauses[0]}"]
|
|
77
|
+
for clause in set_clauses[1:]:
|
|
78
|
+
lines.append(f"{indent}{clause}")
|
|
79
|
+
return ",\n".join(lines)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def compile_entity(entity: Entity) -> str:
|
|
83
|
+
"""
|
|
84
|
+
Compile an entity into a Cypher MERGE statement.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
entity: Entity model to compile.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Cypher MERGE statement for creating/updating nodes.
|
|
91
|
+
|
|
92
|
+
Example:
|
|
93
|
+
```cypher
|
|
94
|
+
// Create customer nodes
|
|
95
|
+
MERGE (n:customer {customer_id: row.customer_id})
|
|
96
|
+
SET n.name = row.name,
|
|
97
|
+
n.email = row.email;
|
|
98
|
+
```
|
|
99
|
+
"""
|
|
100
|
+
# Build the MERGE clause with key properties
|
|
101
|
+
key_conditions = []
|
|
102
|
+
for key in entity.keys:
|
|
103
|
+
placeholder = get_cypher_property_placeholder(key)
|
|
104
|
+
key_conditions.append(f"{key}: {placeholder}")
|
|
105
|
+
|
|
106
|
+
merge_clause = f"MERGE (n:{entity.entity} {{{', '.join(key_conditions)}}})"
|
|
107
|
+
|
|
108
|
+
# Build the SET clause for non-key properties
|
|
109
|
+
non_key_properties = [p for p in entity.properties if p.name not in entity.keys]
|
|
110
|
+
|
|
111
|
+
if non_key_properties:
|
|
112
|
+
set_clause = compile_property_set(non_key_properties)
|
|
113
|
+
cypher = f"{merge_clause}\n{set_clause};"
|
|
114
|
+
else:
|
|
115
|
+
cypher = f"{merge_clause};"
|
|
116
|
+
|
|
117
|
+
# Add comment header
|
|
118
|
+
header = f"// Create {entity.entity} nodes"
|
|
119
|
+
return f"{header}\n{cypher}"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def compile_relation(relation: Relation) -> str:
|
|
123
|
+
"""
|
|
124
|
+
Compile a relation into Cypher MATCH...MERGE statements.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
relation: Relation model to compile.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Cypher statements for creating relationships.
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
```cypher
|
|
134
|
+
// Create PURCHASED relationships
|
|
135
|
+
MATCH (from:customer {customer_id: row.customer_id})
|
|
136
|
+
MATCH (to:product {product_id: row.product_id})
|
|
137
|
+
MERGE (from)-[r:PURCHASED]->(to)
|
|
138
|
+
SET r.order_id = row.order_id,
|
|
139
|
+
r.order_date = row.order_date;
|
|
140
|
+
```
|
|
141
|
+
"""
|
|
142
|
+
# Build MATCH clause for source node
|
|
143
|
+
from_key = relation.mappings.from_key
|
|
144
|
+
from_placeholder = get_cypher_property_placeholder(from_key)
|
|
145
|
+
match_from = f"MATCH (from:{relation.from_entity} {{{from_key}: {from_placeholder}}})"
|
|
146
|
+
|
|
147
|
+
# Build MATCH clause for target node
|
|
148
|
+
to_key = relation.mappings.to_key
|
|
149
|
+
to_placeholder = get_cypher_property_placeholder(to_key)
|
|
150
|
+
match_to = f"MATCH (to:{relation.to_entity} {{{to_key}: {to_placeholder}}})"
|
|
151
|
+
|
|
152
|
+
# Build MERGE clause for relationship
|
|
153
|
+
merge_rel = f"MERGE (from)-[r:{relation.relation}]->(to)"
|
|
154
|
+
|
|
155
|
+
# Build SET clause for relationship properties
|
|
156
|
+
if relation.properties:
|
|
157
|
+
set_clause = compile_property_set(relation.properties, node_var="r")
|
|
158
|
+
cypher = f"{match_from}\n{match_to}\n{merge_rel}\n{set_clause};"
|
|
159
|
+
else:
|
|
160
|
+
cypher = f"{match_from}\n{match_to}\n{merge_rel};"
|
|
161
|
+
|
|
162
|
+
# Add comment header
|
|
163
|
+
header = f"// Create {relation.relation} relationships"
|
|
164
|
+
return f"{header}\n{cypher}"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def compile_project(
|
|
168
|
+
project: Project,
|
|
169
|
+
include_header: bool = True,
|
|
170
|
+
include_constraints: bool = True,
|
|
171
|
+
) -> str:
|
|
172
|
+
"""
|
|
173
|
+
Compile a complete project into a Cypher script.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
project: Project model to compile.
|
|
177
|
+
include_header: If True, include script header with project info.
|
|
178
|
+
include_constraints: If True, include constraint creation statements.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Complete Cypher script as a string.
|
|
182
|
+
"""
|
|
183
|
+
lines = []
|
|
184
|
+
|
|
185
|
+
# Add header
|
|
186
|
+
if include_header:
|
|
187
|
+
lines.append(f"// Generated Cypher script for project: {project.name}")
|
|
188
|
+
lines.append(f"// Version: {project.version}")
|
|
189
|
+
lines.append("// Generated by grai.build")
|
|
190
|
+
lines.append("")
|
|
191
|
+
|
|
192
|
+
# Add constraints (unique constraints on entity keys)
|
|
193
|
+
if include_constraints and project.entities:
|
|
194
|
+
lines.append(
|
|
195
|
+
"// ============================================================================="
|
|
196
|
+
)
|
|
197
|
+
lines.append("// CONSTRAINTS")
|
|
198
|
+
lines.append(
|
|
199
|
+
"// ============================================================================="
|
|
200
|
+
)
|
|
201
|
+
lines.append("")
|
|
202
|
+
|
|
203
|
+
for entity in project.entities:
|
|
204
|
+
for key in entity.keys:
|
|
205
|
+
constraint_name = f"constraint_{entity.entity}_{key}"
|
|
206
|
+
constraint = (
|
|
207
|
+
f"CREATE CONSTRAINT {constraint_name} IF NOT EXISTS "
|
|
208
|
+
f"FOR (n:{entity.entity}) REQUIRE n.{key} IS UNIQUE;"
|
|
209
|
+
)
|
|
210
|
+
lines.append(constraint)
|
|
211
|
+
|
|
212
|
+
lines.append("")
|
|
213
|
+
|
|
214
|
+
# Add entities
|
|
215
|
+
if project.entities:
|
|
216
|
+
lines.append(
|
|
217
|
+
"// ============================================================================="
|
|
218
|
+
)
|
|
219
|
+
lines.append("// ENTITIES (NODES)")
|
|
220
|
+
lines.append(
|
|
221
|
+
"// ============================================================================="
|
|
222
|
+
)
|
|
223
|
+
lines.append("")
|
|
224
|
+
|
|
225
|
+
for entity in project.entities:
|
|
226
|
+
lines.append(compile_entity(entity))
|
|
227
|
+
lines.append("")
|
|
228
|
+
|
|
229
|
+
# Add relations
|
|
230
|
+
if project.relations:
|
|
231
|
+
lines.append(
|
|
232
|
+
"// ============================================================================="
|
|
233
|
+
)
|
|
234
|
+
lines.append("// RELATIONS (EDGES)")
|
|
235
|
+
lines.append(
|
|
236
|
+
"// ============================================================================="
|
|
237
|
+
)
|
|
238
|
+
lines.append("")
|
|
239
|
+
|
|
240
|
+
for relation in project.relations:
|
|
241
|
+
lines.append(compile_relation(relation))
|
|
242
|
+
lines.append("")
|
|
243
|
+
|
|
244
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def write_cypher_file(
|
|
248
|
+
cypher: str,
|
|
249
|
+
output_path: Union[str, Path],
|
|
250
|
+
create_dirs: bool = True,
|
|
251
|
+
) -> Path:
|
|
252
|
+
"""
|
|
253
|
+
Write Cypher script to a file.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
cypher: Cypher script content.
|
|
257
|
+
output_path: Path to write the file.
|
|
258
|
+
create_dirs: If True, create parent directories if they don't exist.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Path to the written file.
|
|
262
|
+
|
|
263
|
+
Raises:
|
|
264
|
+
CompilerError: If file cannot be written.
|
|
265
|
+
"""
|
|
266
|
+
path = Path(output_path)
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
if create_dirs:
|
|
270
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
271
|
+
|
|
272
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
273
|
+
f.write(cypher)
|
|
274
|
+
|
|
275
|
+
return path
|
|
276
|
+
|
|
277
|
+
except Exception as e:
|
|
278
|
+
raise CompilerError(f"Failed to write Cypher file to {path}: {e}")
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def compile_and_write(
|
|
282
|
+
project: Project,
|
|
283
|
+
output_dir: Union[str, Path] = "target/neo4j",
|
|
284
|
+
filename: str = "compiled.cypher",
|
|
285
|
+
include_header: bool = True,
|
|
286
|
+
include_constraints: bool = True,
|
|
287
|
+
) -> Path:
|
|
288
|
+
"""
|
|
289
|
+
Compile a project and write the Cypher script to a file.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
project: Project to compile.
|
|
293
|
+
output_dir: Directory to write the output file.
|
|
294
|
+
filename: Name of the output file.
|
|
295
|
+
include_header: If True, include script header.
|
|
296
|
+
include_constraints: If True, include constraint statements.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Path to the written file.
|
|
300
|
+
|
|
301
|
+
Raises:
|
|
302
|
+
CompilerError: If compilation or writing fails.
|
|
303
|
+
"""
|
|
304
|
+
# Compile the project
|
|
305
|
+
cypher = compile_project(
|
|
306
|
+
project,
|
|
307
|
+
include_header=include_header,
|
|
308
|
+
include_constraints=include_constraints,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Write to file
|
|
312
|
+
output_path = Path(output_dir) / filename
|
|
313
|
+
return write_cypher_file(cypher, output_path)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def generate_load_csv_statements(
|
|
317
|
+
project: Project,
|
|
318
|
+
data_dir: str = "data",
|
|
319
|
+
) -> Dict[str, str]:
|
|
320
|
+
"""
|
|
321
|
+
Generate LOAD CSV statements for entities and relations.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
project: Project to generate load statements for.
|
|
325
|
+
data_dir: Directory containing CSV files.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Dictionary mapping entity/relation names to LOAD CSV statements.
|
|
329
|
+
"""
|
|
330
|
+
statements = {}
|
|
331
|
+
|
|
332
|
+
# Generate entity load statements
|
|
333
|
+
for entity in project.entities:
|
|
334
|
+
source_name = entity.get_source_name()
|
|
335
|
+
csv_file = f"{data_dir}/{source_name.replace('.', '_')}.csv"
|
|
336
|
+
|
|
337
|
+
# Build LOAD CSV statement
|
|
338
|
+
merge_keys = {key: f"row.{key}" for key in entity.keys}
|
|
339
|
+
key_clause = ", ".join([f"{k}: {v}" for k, v in merge_keys.items()])
|
|
340
|
+
|
|
341
|
+
lines = [
|
|
342
|
+
f"// Load {entity.entity} from CSV",
|
|
343
|
+
f"LOAD CSV WITH HEADERS FROM 'file:///{csv_file}' AS row",
|
|
344
|
+
f"MERGE (n:{entity.entity} {{{key_clause}}})",
|
|
345
|
+
]
|
|
346
|
+
|
|
347
|
+
# Add SET clause for other properties
|
|
348
|
+
non_key_props = [p for p in entity.properties if p.name not in entity.keys]
|
|
349
|
+
if non_key_props:
|
|
350
|
+
set_clauses = [f"n.{p.name} = row.{p.name}" for p in non_key_props]
|
|
351
|
+
lines.append("SET " + ",\n ".join(set_clauses))
|
|
352
|
+
|
|
353
|
+
lines.append(";")
|
|
354
|
+
statements[entity.entity] = "\n".join(lines)
|
|
355
|
+
|
|
356
|
+
# Generate relation load statements
|
|
357
|
+
for relation in project.relations:
|
|
358
|
+
source_name = relation.get_source_name()
|
|
359
|
+
csv_file = f"{data_dir}/{source_name.replace('.', '_')}.csv"
|
|
360
|
+
|
|
361
|
+
lines = [
|
|
362
|
+
f"// Load {relation.relation} from CSV",
|
|
363
|
+
f"LOAD CSV WITH HEADERS FROM 'file:///{csv_file}' AS row",
|
|
364
|
+
f"MATCH (from:{relation.from_entity} {{{relation.mappings.from_key}: row.{relation.mappings.from_key}}})",
|
|
365
|
+
f"MATCH (to:{relation.to_entity} {{{relation.mappings.to_key}: row.{relation.mappings.to_key}}})",
|
|
366
|
+
f"MERGE (from)-[r:{relation.relation}]->(to)",
|
|
367
|
+
]
|
|
368
|
+
|
|
369
|
+
# Add SET clause for relationship properties
|
|
370
|
+
if relation.properties:
|
|
371
|
+
set_clauses = [f"r.{p.name} = row.{p.name}" for p in relation.properties]
|
|
372
|
+
lines.append("SET " + ",\n ".join(set_clauses))
|
|
373
|
+
|
|
374
|
+
lines.append(";")
|
|
375
|
+
statements[relation.relation] = "\n".join(lines)
|
|
376
|
+
|
|
377
|
+
return statements
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def compile_schema_only(project: Project) -> str:
|
|
381
|
+
"""
|
|
382
|
+
Compile only the schema (constraints and indexes) without data loading.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
project: Project to compile schema for.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
Cypher script with only schema definitions.
|
|
389
|
+
"""
|
|
390
|
+
lines = [
|
|
391
|
+
f"// Schema definition for project: {project.name}",
|
|
392
|
+
f"// Version: {project.version}",
|
|
393
|
+
"",
|
|
394
|
+
"// =============================================================================",
|
|
395
|
+
"// CONSTRAINTS",
|
|
396
|
+
"// =============================================================================",
|
|
397
|
+
"",
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
for entity in project.entities:
|
|
401
|
+
for key in entity.keys:
|
|
402
|
+
constraint_name = f"constraint_{entity.entity}_{key}"
|
|
403
|
+
constraint = (
|
|
404
|
+
f"CREATE CONSTRAINT {constraint_name} IF NOT EXISTS "
|
|
405
|
+
f"FOR (n:{entity.entity}) REQUIRE n.{key} IS UNIQUE;"
|
|
406
|
+
)
|
|
407
|
+
lines.append(constraint)
|
|
408
|
+
|
|
409
|
+
lines.append("")
|
|
410
|
+
lines.append("// =============================================================================")
|
|
411
|
+
lines.append("// INDEXES")
|
|
412
|
+
lines.append("// =============================================================================")
|
|
413
|
+
lines.append("")
|
|
414
|
+
|
|
415
|
+
# Create indexes on non-key properties that might be used in queries
|
|
416
|
+
for entity in project.entities:
|
|
417
|
+
for prop in entity.properties:
|
|
418
|
+
if prop.name not in entity.keys:
|
|
419
|
+
index_name = f"index_{entity.entity}_{prop.name}"
|
|
420
|
+
index = (
|
|
421
|
+
f"CREATE INDEX {index_name} IF NOT EXISTS "
|
|
422
|
+
f"FOR (n:{entity.entity}) ON (n.{prop.name});"
|
|
423
|
+
)
|
|
424
|
+
lines.append(index)
|
|
425
|
+
|
|
426
|
+
return "\n".join(lines) + "\n"
|