graflo 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (45) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +39 -0
  3. graflo/architecture/__init__.py +37 -0
  4. graflo/architecture/actor.py +974 -0
  5. graflo/architecture/actor_util.py +425 -0
  6. graflo/architecture/edge.py +295 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +277 -0
  13. graflo/caster.py +409 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +144 -0
  16. graflo/cli/manage_dbs.py +193 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/db/__init__.py +32 -0
  20. graflo/db/arango/__init__.py +16 -0
  21. graflo/db/arango/conn.py +734 -0
  22. graflo/db/arango/query.py +180 -0
  23. graflo/db/arango/util.py +88 -0
  24. graflo/db/connection.py +304 -0
  25. graflo/db/manager.py +104 -0
  26. graflo/db/neo4j/__init__.py +16 -0
  27. graflo/db/neo4j/conn.py +432 -0
  28. graflo/db/util.py +49 -0
  29. graflo/filter/__init__.py +21 -0
  30. graflo/filter/onto.py +400 -0
  31. graflo/logging.conf +22 -0
  32. graflo/onto.py +186 -0
  33. graflo/plot/__init__.py +17 -0
  34. graflo/plot/plotter.py +556 -0
  35. graflo/util/__init__.py +23 -0
  36. graflo/util/chunker.py +739 -0
  37. graflo/util/merge.py +148 -0
  38. graflo/util/misc.py +37 -0
  39. graflo/util/onto.py +63 -0
  40. graflo/util/transform.py +406 -0
  41. graflo-1.1.0.dist-info/METADATA +157 -0
  42. graflo-1.1.0.dist-info/RECORD +45 -0
  43. graflo-1.1.0.dist-info/WHEEL +4 -0
  44. graflo-1.1.0.dist-info/entry_points.txt +5 -0
  45. graflo-1.1.0.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,432 @@
1
+ """Neo4j connection implementation for graph database operations.
2
+
3
+ This module implements the Connection interface for Neo4j, providing
4
+ specific functionality for graph operations in Neo4j. It handles:
5
+ - Node and relationship management
6
+ - Cypher query execution
7
+ - Index creation and management
8
+ - Batch operations
9
+ - Graph traversal and pattern matching
10
+
11
+ Key Features:
12
+ - Label-based node organization
13
+ - Relationship type management
14
+ - Property indices
15
+ - Cypher query execution
16
+ - Batch node and relationship operations
17
+
18
+ Example:
19
+ >>> conn = Neo4jConnection(config)
20
+ >>> conn.init_db(schema, clean_start=True)
21
+ >>> conn.upsert_docs_batch(docs, "User", match_keys=["email"])
22
+ """
23
+
24
+ import logging
25
+
26
+ from neo4j import GraphDatabase
27
+ from suthing import Neo4jConnectionConfig
28
+
29
+ from graflo.architecture.edge import Edge
30
+ from graflo.architecture.onto import Index
31
+ from graflo.architecture.schema import Schema
32
+ from graflo.architecture.vertex import VertexConfig
33
+ from graflo.db.connection import Connection
34
+ from graflo.filter.onto import Expression
35
+ from graflo.onto import AggregationType, DBFlavor
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class Neo4jConnection(Connection):
41
+ """Neo4j-specific implementation of the Connection interface.
42
+
43
+ This class provides Neo4j-specific implementations for all database
44
+ operations, including node management, relationship operations, and
45
+ Cypher query execution. It uses the Neo4j Python driver for all operations.
46
+
47
+ Attributes:
48
+ flavor: Database flavor identifier (NEO4J)
49
+ conn: Neo4j session instance
50
+ """
51
+
52
+ flavor = DBFlavor.NEO4J
53
+
54
+ def __init__(self, config: Neo4jConnectionConfig):
55
+ """Initialize Neo4j connection.
56
+
57
+ Args:
58
+ config: Neo4j connection configuration containing URL and credentials
59
+ """
60
+ super().__init__()
61
+ self._driver = GraphDatabase.driver(
62
+ uri=config.url, auth=(config.username, config.password)
63
+ )
64
+ self.conn = self._driver.session()
65
+
66
+ def execute(self, query, **kwargs):
67
+ """Execute a Cypher query.
68
+
69
+ Args:
70
+ query: Cypher query string to execute
71
+ **kwargs: Additional query parameters
72
+
73
+ Returns:
74
+ Result: Neo4j query result
75
+ """
76
+ cursor = self.conn.run(query, **kwargs)
77
+ return cursor
78
+
79
+ def close(self):
80
+ """Close the Neo4j connection and session."""
81
+ # Close session first, then the underlying driver
82
+ try:
83
+ self.conn.close()
84
+ finally:
85
+ # Ensure the driver is also closed to release resources
86
+ self._driver.close()
87
+
88
+ def create_database(self, name: str):
89
+ """Create a new Neo4j database.
90
+
91
+ Note: This operation is only supported in Neo4j Enterprise Edition.
92
+
93
+ Args:
94
+ name: Name of the database to create
95
+ """
96
+ try:
97
+ self.execute(f"CREATE DATABASE {name}")
98
+ except Exception as e:
99
+ logger.error(f"{e}")
100
+
101
+ def delete_database(self, name: str):
102
+ """Delete a Neo4j database.
103
+
104
+ Note: This operation is only supported in Neo4j Enterprise Edition.
105
+ As a fallback, it deletes all nodes and relationships.
106
+
107
+ Args:
108
+ name: Name of the database to delete
109
+ """
110
+ try:
111
+ self.execute("MATCH (n) DETACH DELETE n")
112
+ except Exception as e:
113
+ logger.error(f"Could not clean database : {e}")
114
+
115
+ def define_vertex_indices(self, vertex_config: VertexConfig):
116
+ """Define indices for vertex labels.
117
+
118
+ Creates indices for each vertex label based on the configuration.
119
+
120
+ Args:
121
+ vertex_config: Vertex configuration containing index definitions
122
+ """
123
+ for c in vertex_config.vertex_set:
124
+ for index_obj in vertex_config.indexes(c):
125
+ self._add_index(c, index_obj)
126
+
127
+ def define_edge_indices(self, edges: list[Edge]):
128
+ """Define indices for relationship types.
129
+
130
+ Creates indices for each relationship type based on the configuration.
131
+
132
+ Args:
133
+ edges: List of edge configurations containing index definitions
134
+ """
135
+ for edge in edges:
136
+ for index_obj in edge.indexes:
137
+ if edge.relation is not None:
138
+ self._add_index(edge.relation, index_obj, is_vertex_index=False)
139
+
140
+ def _add_index(self, obj_name, index: Index, is_vertex_index=True):
141
+ """Add an index to a label or relationship type.
142
+
143
+ Args:
144
+ obj_name: Label or relationship type name
145
+ index: Index configuration to create
146
+ is_vertex_index: If True, create index on nodes, otherwise on relationships
147
+ """
148
+ fields_str = ", ".join([f"x.{f}" for f in index.fields])
149
+ fields_str2 = "_".join(index.fields)
150
+ index_name = f"{obj_name}_{fields_str2}"
151
+ if is_vertex_index:
152
+ formula = f"(x:{obj_name})"
153
+ else:
154
+ formula = f"()-[x:{obj_name}]-()"
155
+
156
+ q = f"CREATE INDEX {index_name} IF NOT EXISTS FOR {formula} ON ({fields_str});"
157
+
158
+ self.execute(q)
159
+
160
+ def define_collections(self, schema: Schema):
161
+ """Define collections based on schema.
162
+
163
+ Note: This is a no-op in Neo4j as collections are implicit.
164
+
165
+ Args:
166
+ schema: Schema containing collection definitions
167
+ """
168
+ pass
169
+
170
+ def define_vertex_collections(self, schema: Schema):
171
+ """Define vertex collections based on schema.
172
+
173
+ Note: This is a no-op in Neo4j as vertex collections are implicit.
174
+
175
+ Args:
176
+ schema: Schema containing vertex definitions
177
+ """
178
+ pass
179
+
180
+ def define_edge_collections(self, edges: list[Edge]):
181
+ """Define edge collections based on schema.
182
+
183
+ Note: This is a no-op in Neo4j as edge collections are implicit.
184
+
185
+ Args:
186
+ edges: List of edge configurations
187
+ """
188
+ pass
189
+
190
+ def delete_collections(self, cnames=(), gnames=(), delete_all=False):
191
+ """Delete nodes and relationships from the database.
192
+
193
+ Args:
194
+ cnames: Label names to delete nodes for
195
+ gnames: Unused in Neo4j
196
+ delete_all: If True, delete all nodes and relationships
197
+ """
198
+ if cnames:
199
+ for c in cnames:
200
+ q = f"MATCH (n:{c}) DELETE n"
201
+ self.execute(q)
202
+ else:
203
+ q = "MATCH (n) DELETE n"
204
+ self.execute(q)
205
+
206
+ def init_db(self, schema: Schema, clean_start):
207
+ """Initialize Neo4j with the given schema.
208
+
209
+ Args:
210
+ schema: Schema containing graph structure definitions
211
+ clean_start: If True, delete all existing data before initialization
212
+ """
213
+ if clean_start:
214
+ self.delete_database("")
215
+ self.define_indexes(schema)
216
+
217
+ def upsert_docs_batch(self, docs, class_name, match_keys, **kwargs):
218
+ """Upsert a batch of nodes using Cypher.
219
+
220
+ Performs an upsert operation on a batch of nodes, using the specified
221
+ match keys to determine whether to update existing nodes or create new ones.
222
+
223
+ Args:
224
+ docs: List of node documents to upsert
225
+ class_name: Label to upsert into
226
+ match_keys: Keys to match for upsert operation
227
+ **kwargs: Additional options:
228
+ - dry: If True, don't execute the query
229
+ """
230
+ dry = kwargs.pop("dry", False)
231
+
232
+ index_str = ", ".join([f"{k}: row.{k}" for k in match_keys])
233
+ q = f"""
234
+ WITH $batch AS batch
235
+ UNWIND batch as row
236
+ MERGE (n:{class_name} {{ {index_str} }})
237
+ ON MATCH set n += row
238
+ ON CREATE set n += row
239
+ """
240
+ if not dry:
241
+ self.execute(q, batch=docs)
242
+
243
+ def insert_edges_batch(
244
+ self,
245
+ docs_edges,
246
+ source_class,
247
+ target_class,
248
+ relation_name,
249
+ collection_name=None,
250
+ match_keys_source=("_key",),
251
+ match_keys_target=("_key",),
252
+ filter_uniques=True,
253
+ uniq_weight_fields=None,
254
+ uniq_weight_collections=None,
255
+ upsert_option=False,
256
+ head=None,
257
+ **kwargs,
258
+ ):
259
+ """Insert a batch of relationships using Cypher.
260
+
261
+ Creates relationships between source and target nodes, with support for
262
+ property matching and unique constraints.
263
+
264
+ Args:
265
+ docs_edges: List of edge documents in format [{__source: source_doc, __target: target_doc}]
266
+ source_class: Source node label
267
+ target_class: Target node label
268
+ relation_name: Relationship type name
269
+ collection_name: Unused in Neo4j
270
+ match_keys_source: Keys to match source nodes
271
+ match_keys_target: Keys to match target nodes
272
+ filter_uniques: Unused in Neo4j
273
+ uniq_weight_fields: Unused in Neo4j
274
+ uniq_weight_collections: Unused in Neo4j
275
+ upsert_option: Unused in Neo4j
276
+ head: Optional limit on number of relationships to insert
277
+ **kwargs: Additional options:
278
+ - dry: If True, don't execute the query
279
+ """
280
+ dry = kwargs.pop("dry", False)
281
+
282
+ source_match_str = [f"source.{key} = row[0].{key}" for key in match_keys_source]
283
+ target_match_str = [f"target.{key} = row[1].{key}" for key in match_keys_target]
284
+
285
+ match_clause = "WHERE " + " AND ".join(source_match_str + target_match_str)
286
+
287
+ q = f"""
288
+ WITH $batch AS batch
289
+ UNWIND batch as row
290
+ MATCH (source:{source_class}),
291
+ (target:{target_class}) {match_clause}
292
+ MERGE (source)-[r:{relation_name}]->(target)
293
+ SET r += row[2]
294
+
295
+ """
296
+ if not dry:
297
+ self.execute(q, batch=docs_edges)
298
+
299
+ def insert_return_batch(self, docs, class_name):
300
+ """Insert nodes and return their properties.
301
+
302
+ Note: Not implemented in Neo4j.
303
+
304
+ Args:
305
+ docs: Documents to insert
306
+ class_name: Label to insert into
307
+
308
+ Raises:
309
+ NotImplementedError: This method is not implemented for Neo4j
310
+ """
311
+ raise NotImplementedError()
312
+
313
+ def fetch_docs(
314
+ self,
315
+ class_name,
316
+ filters: list | dict | None = None,
317
+ limit: int | None = None,
318
+ return_keys: list | None = None,
319
+ unset_keys: list | None = None,
320
+ ):
321
+ """Fetch nodes from a label.
322
+
323
+ Args:
324
+ class_name: Label to fetch from
325
+ filters: Query filters
326
+ limit: Maximum number of nodes to return
327
+ return_keys: Keys to return
328
+ unset_keys: Unused in Neo4j
329
+
330
+ Returns:
331
+ list: Fetched nodes
332
+ """
333
+ if filters is not None:
334
+ ff = Expression.from_dict(filters)
335
+ filter_clause = f"WHERE {ff(doc_name='n', kind=DBFlavor.NEO4J)}"
336
+ else:
337
+ filter_clause = ""
338
+
339
+ if return_keys is not None:
340
+ keep_clause_ = ", ".join([f".{item}" for item in return_keys])
341
+ keep_clause = f"{{ {keep_clause_} }}"
342
+ else:
343
+ keep_clause = ""
344
+
345
+ if limit is not None and isinstance(limit, int):
346
+ limit_clause = f"LIMIT {limit}"
347
+ else:
348
+ limit_clause = ""
349
+
350
+ q = (
351
+ f"MATCH (n:{class_name})"
352
+ f" {filter_clause}"
353
+ f" RETURN n {keep_clause}"
354
+ f" {limit_clause}"
355
+ )
356
+ cursor = self.execute(q)
357
+ r = [item["n"] for item in cursor.data()]
358
+ return r
359
+
360
+ def fetch_present_documents(
361
+ self,
362
+ batch,
363
+ class_name,
364
+ match_keys,
365
+ keep_keys,
366
+ flatten=False,
367
+ filters: list | dict | None = None,
368
+ ):
369
+ """Fetch nodes that exist in the database.
370
+
371
+ Note: Not implemented in Neo4j.
372
+
373
+ Args:
374
+ batch: Batch of documents to check
375
+ class_name: Label to check in
376
+ match_keys: Keys to match nodes
377
+ keep_keys: Keys to keep in result
378
+ flatten: Unused in Neo4j
379
+ filters: Additional query filters
380
+
381
+ Raises:
382
+ NotImplementedError: This method is not implemented for Neo4j
383
+ """
384
+ raise NotImplementedError
385
+
386
+ def aggregate(
387
+ self,
388
+ class_name,
389
+ aggregation_function: AggregationType,
390
+ discriminant: str | None = None,
391
+ aggregated_field: str | None = None,
392
+ filters: list | dict | None = None,
393
+ ):
394
+ """Perform aggregation on nodes.
395
+
396
+ Note: Not implemented in Neo4j.
397
+
398
+ Args:
399
+ class_name: Label to aggregate
400
+ aggregation_function: Type of aggregation to perform
401
+ discriminant: Field to group by
402
+ aggregated_field: Field to aggregate
403
+ filters: Query filters
404
+
405
+ Raises:
406
+ NotImplementedError: This method is not implemented for Neo4j
407
+ """
408
+ raise NotImplementedError
409
+
410
+ def keep_absent_documents(
411
+ self,
412
+ batch,
413
+ class_name,
414
+ match_keys,
415
+ keep_keys,
416
+ filters: list | dict | None = None,
417
+ ):
418
+ """Keep nodes that don't exist in the database.
419
+
420
+ Note: Not implemented in Neo4j.
421
+
422
+ Args:
423
+ batch: Batch of documents to check
424
+ class_name: Label to check in
425
+ match_keys: Keys to match nodes
426
+ keep_keys: Keys to keep in result
427
+ filters: Additional query filters
428
+
429
+ Raises:
430
+ NotImplementedError: This method is not implemented for Neo4j
431
+ """
432
+ raise NotImplementedError
graflo/db/util.py ADDED
@@ -0,0 +1,49 @@
1
+ """Database cursor utilities for graph operations.
2
+
3
+ This module provides utility functions for working with database cursors,
4
+ particularly for handling batch data retrieval and cursor iteration.
5
+
6
+ Key Functions:
7
+ - get_data_from_cursor: Retrieve data from a cursor with optional limit
8
+
9
+ Example:
10
+ >>> cursor = db.execute("FOR doc IN collection RETURN doc")
11
+ >>> batch = get_data_from_cursor(cursor, limit=100)
12
+ """
13
+
14
+ from arango.exceptions import CursorNextError
15
+
16
+
17
+ def get_data_from_cursor(cursor, limit=None):
18
+ """Retrieve data from a cursor with optional limit.
19
+
20
+ This function iterates over a database cursor and collects the results
21
+ into a batch. It handles cursor iteration errors and supports an optional
22
+ limit on the number of items retrieved.
23
+
24
+ Args:
25
+ cursor: Database cursor to iterate over
26
+ limit: Optional maximum number of items to retrieve
27
+
28
+ Returns:
29
+ list: Batch of items retrieved from the cursor
30
+
31
+ Note:
32
+ The function will stop iteration if:
33
+ - The limit is reached
34
+ - The cursor is exhausted
35
+ - A CursorNextError occurs
36
+ """
37
+ batch = []
38
+ cnt = 0
39
+ while True:
40
+ try:
41
+ if limit is not None and cnt >= limit:
42
+ raise StopIteration
43
+ item = next(cursor)
44
+ batch.append(item)
45
+ cnt += 1
46
+ except StopIteration:
47
+ return batch
48
+ except CursorNextError:
49
+ return batch
@@ -0,0 +1,21 @@
1
+ """Filter expression system for database queries.
2
+
3
+ This package provides a flexible system for creating and evaluating filter expressions
4
+ that can be translated into different database query languages (AQL, Cypher, Python).
5
+
6
+ Key Components:
7
+ - LogicalOperator: Logical operations (AND, OR, NOT, IMPLICATION)
8
+ - ComparisonOperator: Comparison operations (==, !=, >, <, etc.)
9
+ - Clause: Filter clause implementation
10
+ - Expression: Filter expression factory
11
+
12
+ Example:
13
+ >>> from graflo.filter import Expression
14
+ >>> expr = Expression.from_dict({
15
+ ... "AND": [
16
+ ... {"field": "age", "cmp_operator": ">=", "value": 18},
17
+ ... {"field": "status", "cmp_operator": "==", "value": "active"}
18
+ ... ]
19
+ ... })
20
+ >>> # Converts to: "age >= 18 AND status == 'active'"
21
+ """