grai-build 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,514 @@
1
+ """
2
+ Neo4j Loader - Execute Cypher statements against Neo4j database.
3
+
4
+ This module provides functionality to connect to Neo4j, execute Cypher queries,
5
+ and manage database operations for grai.build projects.
6
+ """
7
+
8
+ import time
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Union
12
+
13
+ try:
14
+ from neo4j import Driver, GraphDatabase, Result, Session
15
+ from neo4j.exceptions import AuthError, Neo4jError, ServiceUnavailable
16
+
17
+ NEO4J_AVAILABLE = True
18
+ except ImportError:
19
+ NEO4J_AVAILABLE = False
20
+ Driver = None
21
+ Session = None
22
+ Result = None
23
+
24
+
25
+ @dataclass
26
+ class Neo4jConnection:
27
+ """
28
+ Neo4j connection configuration.
29
+
30
+ Attributes:
31
+ uri: Neo4j connection URI (e.g., bolt://localhost:7687)
32
+ user: Username for authentication
33
+ password: Password for authentication
34
+ database: Database name (default: neo4j)
35
+ encrypted: Whether to use encrypted connection
36
+ max_retry_time: Maximum time to retry connection (seconds)
37
+ """
38
+
39
+ uri: str
40
+ user: str
41
+ password: str
42
+ database: str = "neo4j"
43
+ encrypted: bool = False
44
+ max_retry_time: int = 30
45
+
46
+
47
+ @dataclass
48
+ class ExecutionResult:
49
+ """
50
+ Result of executing Cypher statements.
51
+
52
+ Attributes:
53
+ success: Whether execution was successful
54
+ statements_executed: Number of statements executed
55
+ records_affected: Number of records affected (if available)
56
+ execution_time: Time taken to execute (seconds)
57
+ errors: List of error messages
58
+ warnings: List of warning messages
59
+ """
60
+
61
+ success: bool
62
+ statements_executed: int = 0
63
+ records_affected: int = 0
64
+ execution_time: float = 0.0
65
+ errors: List[str] = None
66
+ warnings: List[str] = None
67
+
68
+ def __post_init__(self):
69
+ """Initialize mutable defaults."""
70
+ if self.errors is None:
71
+ self.errors = []
72
+ if self.warnings is None:
73
+ self.warnings = []
74
+
75
+
76
+ def check_neo4j_available():
77
+ """
78
+ Check if neo4j driver is available.
79
+
80
+ Raises:
81
+ ImportError: If neo4j driver is not installed.
82
+ """
83
+ if not NEO4J_AVAILABLE:
84
+ raise ImportError("neo4j driver not installed. Install it with: pip install neo4j")
85
+
86
+
87
+ def connect_neo4j(
88
+ uri: str,
89
+ user: str,
90
+ password: str,
91
+ database: str = "neo4j",
92
+ encrypted: bool = False,
93
+ max_retry_time: int = 30,
94
+ ) -> Driver:
95
+ """
96
+ Connect to Neo4j database.
97
+
98
+ Args:
99
+ uri: Neo4j connection URI (e.g., bolt://localhost:7687)
100
+ user: Username for authentication
101
+ password: Password for authentication
102
+ database: Database name (default: neo4j)
103
+ encrypted: Whether to use encrypted connection
104
+ max_retry_time: Maximum time to retry connection (seconds)
105
+
106
+ Returns:
107
+ Neo4j driver instance.
108
+
109
+ Raises:
110
+ ImportError: If neo4j driver is not installed.
111
+ ServiceUnavailable: If cannot connect to Neo4j.
112
+ AuthError: If authentication fails.
113
+
114
+ Example:
115
+ ```python
116
+ driver = connect_neo4j(
117
+ uri="bolt://localhost:7687",
118
+ user="neo4j",
119
+ password="password"
120
+ )
121
+ ```
122
+ """
123
+ check_neo4j_available()
124
+
125
+ try:
126
+ driver = GraphDatabase.driver(
127
+ uri,
128
+ auth=(user, password),
129
+ encrypted=encrypted,
130
+ max_connection_lifetime=3600,
131
+ max_connection_pool_size=50,
132
+ connection_acquisition_timeout=max_retry_time,
133
+ )
134
+
135
+ # Verify connectivity
136
+ driver.verify_connectivity()
137
+
138
+ return driver
139
+
140
+ except AuthError as e:
141
+ raise AuthError(f"Authentication failed: {e}")
142
+ except ServiceUnavailable as e:
143
+ raise ServiceUnavailable(f"Cannot connect to Neo4j at {uri}: {e}")
144
+ except Exception as e:
145
+ raise RuntimeError(f"Error connecting to Neo4j: {e}")
146
+
147
+
148
+ def verify_connection(driver: Driver, database: str = "neo4j") -> bool:
149
+ """
150
+ Verify that connection to Neo4j is working.
151
+
152
+ Args:
153
+ driver: Neo4j driver instance.
154
+ database: Database name to test.
155
+
156
+ Returns:
157
+ True if connection is working, False otherwise.
158
+
159
+ Example:
160
+ ```python
161
+ driver = connect_neo4j(...)
162
+ if verify_connection(driver):
163
+ print("Connected!")
164
+ ```
165
+ """
166
+ check_neo4j_available()
167
+
168
+ try:
169
+ with driver.session(database=database) as session:
170
+ result = session.run("RETURN 1 AS test")
171
+ record = result.single()
172
+ return record["test"] == 1
173
+ except Exception:
174
+ return False
175
+
176
+
177
+ def close_connection(driver: Driver) -> None:
178
+ """
179
+ Close Neo4j driver connection.
180
+
181
+ Args:
182
+ driver: Neo4j driver instance to close.
183
+
184
+ Example:
185
+ ```python
186
+ driver = connect_neo4j(...)
187
+ # ... use driver ...
188
+ close_connection(driver)
189
+ ```
190
+ """
191
+ if driver:
192
+ driver.close()
193
+
194
+
195
+ def split_cypher_statements(cypher: str) -> List[str]:
196
+ """
197
+ Split Cypher script into individual statements.
198
+
199
+ Args:
200
+ cypher: Cypher script containing multiple statements.
201
+
202
+ Returns:
203
+ List of individual Cypher statements.
204
+
205
+ Note:
206
+ This is a simple implementation that splits on semicolons.
207
+ It does not handle semicolons within strings or comments.
208
+ """
209
+ # Remove comments (but not // inside quoted strings)
210
+ lines = []
211
+ for line in cypher.split("\n"):
212
+ # Simple check: if line has quotes, keep it as-is (might contain // in URLs)
213
+ # Otherwise, remove // comments
214
+ if "'" in line or '"' in line:
215
+ # Line might contain URLs or strings, keep it as-is
216
+ lines.append(line)
217
+ else:
218
+ # Remove single-line comments
219
+ if "//" in line:
220
+ line = line[: line.index("//")]
221
+ lines.append(line)
222
+
223
+ cypher_no_comments = "\n".join(lines)
224
+
225
+ # Split on semicolons and filter empty statements
226
+ statements = [
227
+ stmt.strip()
228
+ for stmt in cypher_no_comments.split(";")
229
+ if stmt.strip() and not stmt.strip().startswith("//")
230
+ ]
231
+
232
+ return statements
233
+
234
+
235
+ def execute_cypher(
236
+ driver: Driver,
237
+ cypher: str,
238
+ parameters: Optional[Dict[str, Any]] = None,
239
+ database: str = "neo4j",
240
+ ) -> ExecutionResult:
241
+ """
242
+ Execute Cypher statement(s) against Neo4j.
243
+
244
+ Args:
245
+ driver: Neo4j driver instance.
246
+ cypher: Cypher statement(s) to execute.
247
+ parameters: Optional parameters for the query.
248
+ database: Database name to execute against.
249
+
250
+ Returns:
251
+ ExecutionResult with execution details.
252
+
253
+ Example:
254
+ ```python
255
+ driver = connect_neo4j(...)
256
+ result = execute_cypher(
257
+ driver,
258
+ "CREATE (n:Person {name: $name}) RETURN n",
259
+ parameters={"name": "Alice"}
260
+ )
261
+ print(f"Success: {result.success}")
262
+ print(f"Statements executed: {result.statements_executed}")
263
+ ```
264
+ """
265
+ check_neo4j_available()
266
+
267
+ start_time = time.time()
268
+ result = ExecutionResult(success=False)
269
+
270
+ try:
271
+ # Split into individual statements
272
+ statements = split_cypher_statements(cypher)
273
+
274
+ with driver.session(database=database) as session:
275
+ for statement in statements:
276
+ try:
277
+ # Execute statement
278
+ query_result = session.run(statement, parameters or {})
279
+
280
+ # Consume results to ensure execution
281
+ summary = query_result.consume()
282
+
283
+ # Track counters
284
+ counters = summary.counters
285
+ result.records_affected += (
286
+ counters.nodes_created
287
+ + counters.nodes_deleted
288
+ + counters.relationships_created
289
+ + counters.relationships_deleted
290
+ + counters.properties_set
291
+ )
292
+
293
+ result.statements_executed += 1
294
+
295
+ except Neo4jError as e:
296
+ result.errors.append(f"Error executing statement: {e}")
297
+ result.success = False
298
+ return result
299
+
300
+ # All statements executed successfully
301
+ result.success = True
302
+
303
+ except Exception as e:
304
+ result.errors.append(f"Execution error: {e}")
305
+ result.success = False
306
+
307
+ finally:
308
+ result.execution_time = time.time() - start_time
309
+
310
+ return result
311
+
312
+
313
+ def execute_cypher_file(
314
+ driver: Driver,
315
+ file_path: Union[str, Path],
316
+ database: str = "neo4j",
317
+ batch_size: Optional[int] = None,
318
+ ) -> ExecutionResult:
319
+ """
320
+ Execute Cypher statements from a file.
321
+
322
+ Args:
323
+ driver: Neo4j driver instance.
324
+ file_path: Path to Cypher file.
325
+ database: Database name to execute against.
326
+ batch_size: Optional batch size for large files.
327
+
328
+ Returns:
329
+ ExecutionResult with execution details.
330
+
331
+ Raises:
332
+ FileNotFoundError: If file does not exist.
333
+
334
+ Example:
335
+ ```python
336
+ driver = connect_neo4j(...)
337
+ result = execute_cypher_file(
338
+ driver,
339
+ "target/neo4j/compiled.cypher"
340
+ )
341
+ print(f"Executed {result.statements_executed} statements")
342
+ print(f"Affected {result.records_affected} records")
343
+ ```
344
+ """
345
+ check_neo4j_available()
346
+
347
+ file_path = Path(file_path)
348
+
349
+ if not file_path.exists():
350
+ raise FileNotFoundError(f"Cypher file not found: {file_path}")
351
+
352
+ # Read file
353
+ cypher = file_path.read_text()
354
+
355
+ # Execute
356
+ return execute_cypher(driver, cypher, database=database)
357
+
358
+
359
+ def execute_cypher_with_retry(
360
+ driver: Driver,
361
+ cypher: str,
362
+ parameters: Optional[Dict[str, Any]] = None,
363
+ database: str = "neo4j",
364
+ max_retries: int = 3,
365
+ retry_delay: float = 1.0,
366
+ ) -> ExecutionResult:
367
+ """
368
+ Execute Cypher with retry logic for transient failures.
369
+
370
+ Args:
371
+ driver: Neo4j driver instance.
372
+ cypher: Cypher statement(s) to execute.
373
+ parameters: Optional parameters for the query.
374
+ database: Database name to execute against.
375
+ max_retries: Maximum number of retries.
376
+ retry_delay: Delay between retries (seconds).
377
+
378
+ Returns:
379
+ ExecutionResult with execution details.
380
+
381
+ Example:
382
+ ```python
383
+ driver = connect_neo4j(...)
384
+ result = execute_cypher_with_retry(
385
+ driver,
386
+ cypher,
387
+ max_retries=5,
388
+ retry_delay=2.0
389
+ )
390
+ ```
391
+ """
392
+ check_neo4j_available()
393
+
394
+ last_result = None
395
+
396
+ for attempt in range(max_retries + 1):
397
+ result = execute_cypher(driver, cypher, parameters, database)
398
+
399
+ if result.success:
400
+ return result
401
+
402
+ last_result = result
403
+
404
+ # Don't retry on last attempt
405
+ if attempt < max_retries:
406
+ if result.warnings:
407
+ result.warnings.append(
408
+ f"Retrying after failure (attempt {attempt + 1}/{max_retries})"
409
+ )
410
+ time.sleep(retry_delay)
411
+
412
+ # All retries exhausted
413
+ return last_result
414
+
415
+
416
+ def get_database_info(driver: Driver, database: str = "neo4j") -> Dict[str, Any]:
417
+ """
418
+ Get information about the Neo4j database.
419
+
420
+ Args:
421
+ driver: Neo4j driver instance.
422
+ database: Database name.
423
+
424
+ Returns:
425
+ Dictionary with database information.
426
+
427
+ Example:
428
+ ```python
429
+ driver = connect_neo4j(...)
430
+ info = get_database_info(driver)
431
+ print(f"Node count: {info['node_count']}")
432
+ print(f"Relationship count: {info['relationship_count']}")
433
+ ```
434
+ """
435
+ check_neo4j_available()
436
+
437
+ info = {
438
+ "node_count": 0,
439
+ "relationship_count": 0,
440
+ "labels": [],
441
+ "relationship_types": [],
442
+ "constraints": [],
443
+ "indexes": [],
444
+ }
445
+
446
+ try:
447
+ with driver.session(database=database) as session:
448
+ # Get node count
449
+ result = session.run("MATCH (n) RETURN count(n) AS count")
450
+ info["node_count"] = result.single()["count"]
451
+
452
+ # Get relationship count
453
+ result = session.run("MATCH ()-[r]->() RETURN count(r) AS count")
454
+ info["relationship_count"] = result.single()["count"]
455
+
456
+ # Get labels
457
+ result = session.run("CALL db.labels()")
458
+ info["labels"] = [record["label"] for record in result]
459
+
460
+ # Get relationship types
461
+ result = session.run("CALL db.relationshipTypes()")
462
+ info["relationship_types"] = [record["relationshipType"] for record in result]
463
+
464
+ # Get constraints
465
+ result = session.run("SHOW CONSTRAINTS")
466
+ info["constraints"] = [dict(record) for record in result]
467
+
468
+ # Get indexes
469
+ result = session.run("SHOW INDEXES")
470
+ info["indexes"] = [dict(record) for record in result]
471
+
472
+ except Exception as e:
473
+ info["error"] = str(e)
474
+
475
+ return info
476
+
477
+
478
+ def clear_database(
479
+ driver: Driver,
480
+ database: str = "neo4j",
481
+ confirm: bool = False,
482
+ ) -> ExecutionResult:
483
+ """
484
+ Clear all nodes and relationships from database.
485
+
486
+ WARNING: This will delete all data in the database!
487
+
488
+ Args:
489
+ driver: Neo4j driver instance.
490
+ database: Database name.
491
+ confirm: Must be True to actually delete data.
492
+
493
+ Returns:
494
+ ExecutionResult with deletion details.
495
+
496
+ Example:
497
+ ```python
498
+ driver = connect_neo4j(...)
499
+ # Confirm deletion by passing confirm=True
500
+ result = clear_database(driver, confirm=True)
501
+ print(f"Deleted {result.records_affected} records")
502
+ ```
503
+ """
504
+ check_neo4j_available()
505
+
506
+ if not confirm:
507
+ return ExecutionResult(success=False, errors=["Must pass confirm=True to delete data"])
508
+
509
+ cypher = """
510
+ MATCH (n)
511
+ DETACH DELETE n;
512
+ """
513
+
514
+ return execute_cypher(driver, cypher, database=database)