graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,562 @@
1
+ """Vertex configuration and management for graph databases.
2
+
3
+ This module provides classes and utilities for managing vertices in graph databases.
4
+ It handles vertex configuration, field management, indexing, and filtering operations.
5
+ The module supports both ArangoDB and Neo4j through the DBFlavor enum.
6
+
7
+ Key Components:
8
+ - Vertex: Represents a vertex with its fields and indexes
9
+ - VertexConfig: Manages vertices and their configurations
10
+
11
+ Example:
12
+ >>> vertex = Vertex(name="user", fields=["id", "name"])
13
+ >>> config = VertexConfig(vertices=[vertex])
14
+ >>> fields = config.fields("user") # Returns list[Field]
15
+ >>> field_names = config.fields_names("user") # Returns list[str]
16
+ """
17
+
18
+ import ast
19
+ import dataclasses
20
+ import json
21
+ import logging
22
+ from typing import TYPE_CHECKING
23
+
24
+ from graflo.architecture.onto import Index
25
+ from graflo.filter.onto import Expression
26
+ from graflo.onto import BaseDataclass, BaseEnum, DBFlavor
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class FieldType(BaseEnum):
32
+ """Supported field types for graph databases.
33
+
34
+ These types are primarily used for TigerGraph, which requires explicit field types.
35
+ Other databases (ArangoDB, Neo4j) may use different type systems or not require types.
36
+
37
+ Attributes:
38
+ INT: Integer type
39
+ UINT: Unsigned integer type
40
+ FLOAT: Floating point type
41
+ DOUBLE: Double precision floating point type
42
+ BOOL: Boolean type
43
+ STRING: String type
44
+ DATETIME: DateTime type
45
+ """
46
+
47
+ INT = "INT"
48
+ UINT = "UINT"
49
+ FLOAT = "FLOAT"
50
+ DOUBLE = "DOUBLE"
51
+ BOOL = "BOOL"
52
+ STRING = "STRING"
53
+ DATETIME = "DATETIME"
54
+
55
+
56
+ if TYPE_CHECKING:
57
+ # For type checking: after __post_init__, fields is always list[Field]
58
+ # Using string literal to avoid forward reference issues
59
+ _FieldsType = list["Field"]
60
+ # For type checking: after __post_init__, type is always FieldType | None
61
+ _FieldTypeType = FieldType | None
62
+ else:
63
+ # For runtime: accept flexible input types, will be normalized in __post_init__
64
+ _FieldsType = list[str] | list["Field"] | list[dict]
65
+ # For runtime: accept FieldType, str, or None (strings converted in __post_init__)
66
+ _FieldTypeType = FieldType | str | None
67
+
68
+
69
+ @dataclasses.dataclass
70
+ class Field(BaseDataclass):
71
+ """Represents a typed field in a vertex.
72
+
73
+ Field objects behave like strings for backward compatibility. They can be used
74
+ in sets, as dictionary keys, and in string comparisons. The type information
75
+ is preserved for databases that need it (like TigerGraph).
76
+
77
+ Attributes:
78
+ name: Name of the field
79
+ type: Optional type of the field. Can be FieldType enum, str, or None at construction.
80
+ Strings are converted to FieldType enum in __post_init__.
81
+ After initialization, this is always FieldType | None (type checker sees this).
82
+ None is allowed (most databases like ArangoDB don't require types).
83
+ Defaults to None.
84
+ """
85
+
86
+ name: str
87
+ type: _FieldTypeType = None
88
+
89
+ def __post_init__(self):
90
+ """Validate and normalize type if specified.
91
+
92
+ This method handles type normalization AFTER a Field object has been created.
93
+ It converts string types to FieldType enum and validates the type.
94
+ This is separate from _normalize_fields() which handles the creation of Field
95
+ objects from various input formats (str/dict/Field).
96
+ """
97
+ if self.type is not None:
98
+ # Convert string to FieldType enum if it's a string
99
+ if isinstance(self.type, str):
100
+ type_upper = self.type.upper()
101
+ # Validate and convert to FieldType enum
102
+ if type_upper not in FieldType:
103
+ allowed_types = sorted(ft.value for ft in FieldType)
104
+ raise ValueError(
105
+ f"Field type '{self.type}' is not allowed. "
106
+ f"Allowed types are: {', '.join(allowed_types)}"
107
+ )
108
+ self.type = FieldType(type_upper)
109
+ # If it's already a FieldType, validate it's a valid enum member
110
+ elif isinstance(self.type, FieldType):
111
+ # Already a FieldType enum, no conversion needed
112
+ pass
113
+ else:
114
+ allowed_types = sorted(ft.value for ft in FieldType)
115
+ raise ValueError(
116
+ f"Field type must be FieldType enum, str, or None, got {type(self.type)}. "
117
+ f"Allowed types are: {', '.join(allowed_types)}"
118
+ )
119
+
120
+ def __str__(self) -> str:
121
+ """Return field name as string for backward compatibility."""
122
+ return self.name
123
+
124
+ def __repr__(self) -> str:
125
+ """Return representation including type information."""
126
+ if self.type:
127
+ return f"Field(name='{self.name}', type='{self.type}')"
128
+ return f"Field(name='{self.name}')"
129
+
130
+ def __hash__(self) -> int:
131
+ """Hash by name only, allowing Field objects to work in sets and as dict keys."""
132
+ return hash(self.name)
133
+
134
+ def __eq__(self, other) -> bool:
135
+ """Compare equal to strings with same name, or other Field objects with same name."""
136
+ if isinstance(other, Field):
137
+ return self.name == other.name
138
+ if isinstance(other, str):
139
+ return self.name == other
140
+ return False
141
+
142
+ def __ne__(self, other) -> bool:
143
+ """Compare not equal."""
144
+ return not self.__eq__(other)
145
+
146
+ # Field objects are hashable (via __hash__) and comparable to strings (via __eq__)
147
+ # This allows them to work in sets, as dict keys, and in membership tests
148
+
149
+
150
+ @dataclasses.dataclass
151
+ class Vertex(BaseDataclass):
152
+ """Represents a vertex in the graph database.
153
+
154
+ A vertex is a fundamental unit in the graph that can have fields, indexes,
155
+ and filters. Fields can be specified as strings, Field objects, or dicts.
156
+ Internally, fields are stored as Field objects but behave like strings
157
+ for backward compatibility.
158
+
159
+ Attributes:
160
+ name: Name of the vertex
161
+ fields: List of field names (str), Field objects, or dicts.
162
+ Will be normalized to Field objects internally in __post_init__.
163
+ After initialization, this is always list[Field] (type checker sees this).
164
+ indexes: List of indexes for the vertex
165
+ filters: List of filter expressions
166
+ dbname: Optional database name (defaults to vertex name)
167
+
168
+ Examples:
169
+ >>> # Backward compatible: list of strings
170
+ >>> v1 = Vertex(name="user", fields=["id", "name"])
171
+
172
+ >>> # Typed fields: list of Field objects
173
+ >>> v2 = Vertex(name="user", fields=[
174
+ ... Field(name="id", type="INT"),
175
+ ... Field(name="name", type="STRING")
176
+ ... ])
177
+
178
+ >>> # From dicts (e.g., from YAML/JSON)
179
+ >>> v3 = Vertex(name="user", fields=[
180
+ ... {"name": "id", "type": "INT"},
181
+ ... {"name": "name"} # defaults to None type
182
+ ... ])
183
+ """
184
+
185
+ name: str
186
+ fields: _FieldsType = dataclasses.field(default_factory=list)
187
+ indexes: list[Index] = dataclasses.field(default_factory=list)
188
+ filters: list[Expression] = dataclasses.field(default_factory=list)
189
+ dbname: str | None = None
190
+
191
+ @staticmethod
192
+ def _parse_string_to_dict(field_str: str) -> dict | None:
193
+ """Parse a string that might be a JSON or Python dict representation.
194
+
195
+ Args:
196
+ field_str: String that might be a dict representation
197
+
198
+ Returns:
199
+ dict if successfully parsed as dict, None otherwise
200
+ """
201
+ # Try JSON first (handles double-quoted strings)
202
+ try:
203
+ parsed = json.loads(field_str)
204
+ return parsed if isinstance(parsed, dict) else None
205
+ except json.JSONDecodeError:
206
+ pass
207
+
208
+ # Try Python literal eval (handles single-quoted strings)
209
+ try:
210
+ parsed = ast.literal_eval(field_str)
211
+ return parsed if isinstance(parsed, dict) else None
212
+ except (ValueError, SyntaxError):
213
+ return None
214
+
215
+ @staticmethod
216
+ def _dict_to_field(field_dict: dict) -> Field:
217
+ """Convert a dict to a Field object.
218
+
219
+ Args:
220
+ field_dict: Dictionary with 'name' key and optional 'type' key
221
+
222
+ Returns:
223
+ Field object
224
+
225
+ Raises:
226
+ ValueError: If dict doesn't have 'name' key
227
+ """
228
+ name = field_dict.get("name")
229
+ if name is None:
230
+ raise ValueError(f"Field dict must have 'name' key: {field_dict}")
231
+ return Field(name=name, type=field_dict.get("type"))
232
+
233
+ def _normalize_fields(
234
+ self, fields: list[str] | list[Field] | list[dict]
235
+ ) -> list[Field]:
236
+ """Normalize fields to Field objects.
237
+
238
+ Converts strings, Field objects, or dicts to Field objects.
239
+ Handles the case where dataclass_wizard may have converted dicts to JSON strings.
240
+ Field objects behave like strings for backward compatibility.
241
+
242
+ Args:
243
+ fields: List of strings, Field objects, or dicts
244
+
245
+ Returns:
246
+ list[Field]: Normalized list of Field objects (preserving order)
247
+ """
248
+ normalized = []
249
+ for field in fields:
250
+ if isinstance(field, Field):
251
+ normalized.append(field)
252
+ elif isinstance(field, dict):
253
+ normalized.append(self._dict_to_field(field))
254
+ elif isinstance(field, str):
255
+ # Try to parse as dict (JSON or Python literal)
256
+ parsed_dict = self._parse_string_to_dict(field)
257
+ if parsed_dict:
258
+ normalized.append(self._dict_to_field(parsed_dict))
259
+ else:
260
+ # Plain field name
261
+ normalized.append(Field(name=field, type=None))
262
+ else:
263
+ raise TypeError(f"Field must be str, Field, or dict, got {type(field)}")
264
+ return normalized
265
+
266
+ @property
267
+ def field_names(self) -> list[str]:
268
+ """Get list of field names (as strings).
269
+
270
+ Returns:
271
+ list[str]: List of field names
272
+ """
273
+ return [field.name for field in self.fields]
274
+
275
+ def get_fields(self) -> list[Field]:
276
+ return self.fields
277
+
278
+ def __post_init__(self):
279
+ """Initialize the vertex after dataclass initialization.
280
+
281
+ Sets the database name if not provided, normalizes fields to Field objects,
282
+ and updates fields based on indexes. Field objects behave like strings,
283
+ maintaining backward compatibility.
284
+ """
285
+ if self.dbname is None:
286
+ self.dbname = self.name
287
+
288
+ # Normalize fields to Field objects (preserve order)
289
+ self.fields = self._normalize_fields(self.fields)
290
+
291
+ # Normalize indexes to Index objects if they're dicts
292
+ normalized_indexes = []
293
+ for idx in self.indexes:
294
+ if isinstance(idx, dict):
295
+ normalized_indexes.append(Index.from_dict(idx))
296
+ else:
297
+ normalized_indexes.append(idx)
298
+ self.indexes = normalized_indexes
299
+
300
+ if not self.indexes:
301
+ # Index expects list[str], but Field objects convert to strings automatically
302
+ # via __str__, so we extract names
303
+ self.indexes = [Index(fields=self.field_names)]
304
+
305
+ # Collect field names from existing fields (preserve order)
306
+ seen_names = {f.name for f in self.fields}
307
+ # Add index fields that aren't already present (preserve original order, append new)
308
+ for idx in self.indexes:
309
+ for field_name in idx.fields:
310
+ if field_name not in seen_names:
311
+ # Add new field, preserving order by adding to end
312
+ self.fields.append(Field(name=field_name, type=None))
313
+ seen_names.add(field_name)
314
+
315
+ def finish_init(self, db_flavor: DBFlavor):
316
+ """Complete initialization of all edges with vertex configuration."""
317
+ self.fields = [
318
+ Field(name=f.name, type=FieldType.STRING)
319
+ if f.type is None and db_flavor == DBFlavor.TIGERGRAPH
320
+ else f
321
+ for f in self.fields
322
+ ]
323
+
324
+
325
+ @dataclasses.dataclass
326
+ class VertexConfig(BaseDataclass):
327
+ """Configuration for managing vertices.
328
+
329
+ This class manages vertices, providing methods for accessing
330
+ and manipulating vertex configurations.
331
+
332
+ Attributes:
333
+ vertices: List of vertex configurations
334
+ blank_vertices: List of blank vertex names
335
+ force_types: Dictionary mapping vertex names to type lists
336
+ db_flavor: Database flavor (ARANGO or NEO4J)
337
+ """
338
+
339
+ vertices: list[Vertex]
340
+ blank_vertices: list[str] = dataclasses.field(default_factory=list)
341
+ force_types: dict[str, list] = dataclasses.field(default_factory=dict)
342
+ db_flavor: DBFlavor = DBFlavor.ARANGO
343
+
344
+ def __post_init__(self):
345
+ """Initialize the vertex configuration.
346
+
347
+ Creates internal mappings and validates blank vertices.
348
+
349
+ Raises:
350
+ ValueError: If blank vertices are not defined in the configuration
351
+ """
352
+ self._vertices_map: dict[str, Vertex] = {
353
+ item.name: item for item in self.vertices
354
+ }
355
+
356
+ # TODO replace by types
357
+ # vertex_name -> [numeric fields]
358
+ self._vertex_numeric_fields_map = {}
359
+
360
+ if set(self.blank_vertices) - set(self.vertex_set):
361
+ raise ValueError(
362
+ f" Blank vertices {self.blank_vertices} are not defined as vertices"
363
+ )
364
+
365
+ @property
366
+ def vertex_set(self):
367
+ """Get set of vertex names.
368
+
369
+ Returns:
370
+ set[str]: Set of vertex names
371
+ """
372
+ return set(self._vertices_map.keys())
373
+
374
+ @property
375
+ def vertex_list(self):
376
+ """Get list of vertex configurations.
377
+
378
+ Returns:
379
+ list[Vertex]: List of vertex configurations
380
+ """
381
+ return list(self._vertices_map.values())
382
+
383
+ def _get_vertex_by_name_or_dbname(self, identifier: str) -> Vertex:
384
+ """Get vertex by name or dbname.
385
+
386
+ Args:
387
+ identifier: Vertex name or dbname
388
+
389
+ Returns:
390
+ Vertex: The vertex object
391
+
392
+ Raises:
393
+ KeyError: If vertex is not found by name or dbname
394
+ """
395
+ # First try by name (most common case)
396
+ if identifier in self._vertices_map:
397
+ return self._vertices_map[identifier]
398
+
399
+ # Try by dbname
400
+ for vertex in self._vertices_map.values():
401
+ if vertex.dbname == identifier:
402
+ return vertex
403
+
404
+ # Not found
405
+ available_names = list(self._vertices_map.keys())
406
+ available_dbnames = [v.dbname for v in self._vertices_map.values()]
407
+ raise KeyError(
408
+ f"Vertex '{identifier}' not found by name or dbname. "
409
+ f"Available names: {available_names}, "
410
+ f"Available dbnames: {available_dbnames}"
411
+ )
412
+
413
+ def vertex_dbname(self, vertex_name):
414
+ """Get database name for a vertex.
415
+
416
+ Args:
417
+ vertex_name: Name of the vertex
418
+
419
+ Returns:
420
+ str: Database name for the vertex
421
+
422
+ Raises:
423
+ KeyError: If vertex is not found
424
+ """
425
+ try:
426
+ value = self._vertices_map[vertex_name].dbname
427
+ except KeyError as e:
428
+ logger.error(
429
+ "Available vertices :"
430
+ f" {self._vertices_map.keys()}; vertex"
431
+ f" requested : {vertex_name}"
432
+ )
433
+ raise e
434
+ return value
435
+
436
+ def index(self, vertex_name) -> Index:
437
+ """Get primary index for a vertex.
438
+
439
+ Args:
440
+ vertex_name: Name of the vertex
441
+
442
+ Returns:
443
+ Index: Primary index for the vertex
444
+ """
445
+ return self._vertices_map[vertex_name].indexes[0]
446
+
447
+ def indexes(self, vertex_name) -> list[Index]:
448
+ """Get all indexes for a vertex.
449
+
450
+ Args:
451
+ vertex_name: Name of the vertex
452
+
453
+ Returns:
454
+ list[Index]: List of indexes for the vertex
455
+ """
456
+ return self._vertices_map[vertex_name].indexes
457
+
458
+ def fields(self, vertex_name: str) -> list[Field]:
459
+ """Get fields for a vertex.
460
+
461
+ Args:
462
+ vertex_name: Name of the vertex or dbname
463
+
464
+ Returns:
465
+ list[Field]: List of Field objects
466
+ """
467
+ # Get vertex by name or dbname
468
+ vertex = self._get_vertex_by_name_or_dbname(vertex_name)
469
+
470
+ return vertex.fields
471
+
472
+ def fields_names(
473
+ self,
474
+ vertex_name: str,
475
+ ) -> list[str]:
476
+ """Get field names for a vertex as strings.
477
+
478
+ Args:
479
+ vertex_name: Name of the vertex or dbname
480
+
481
+ Returns:
482
+ list[str]: List of field names as strings
483
+ """
484
+ vertex = self._get_vertex_by_name_or_dbname(vertex_name)
485
+ return vertex.field_names
486
+
487
+ def numeric_fields_list(self, vertex_name):
488
+ """Get list of numeric fields for a vertex.
489
+
490
+ Args:
491
+ vertex_name: Name of the vertex
492
+
493
+ Returns:
494
+ tuple: Tuple of numeric field names
495
+
496
+ Raises:
497
+ ValueError: If vertex is not defined in config
498
+ """
499
+ if vertex_name in self.vertex_set:
500
+ if vertex_name in self._vertex_numeric_fields_map:
501
+ return self._vertex_numeric_fields_map[vertex_name]
502
+ else:
503
+ return ()
504
+ else:
505
+ raise ValueError(
506
+ " Accessing vertex numeric fields: vertex"
507
+ f" {vertex_name} was not defined in config"
508
+ )
509
+
510
+ def filters(self, vertex_name) -> list[Expression]:
511
+ """Get filter expressions for a vertex.
512
+
513
+ Args:
514
+ vertex_name: Name of the vertex
515
+
516
+ Returns:
517
+ list[Expression]: List of filter expressions
518
+ """
519
+ if vertex_name in self._vertices_map:
520
+ return self._vertices_map[vertex_name].filters
521
+ else:
522
+ return []
523
+
524
+ def update_vertex(self, v: Vertex):
525
+ """Update vertex configuration.
526
+
527
+ Args:
528
+ v: Vertex configuration to update
529
+ """
530
+ self._vertices_map[v.name] = v
531
+
532
+ def __getitem__(self, key: str):
533
+ """Get vertex configuration by name.
534
+
535
+ Args:
536
+ key: Vertex name
537
+
538
+ Returns:
539
+ Vertex: Vertex configuration
540
+
541
+ Raises:
542
+ KeyError: If vertex is not found
543
+ """
544
+ if key in self._vertices_map:
545
+ return self._vertices_map[key]
546
+ else:
547
+ raise KeyError(f"Vertex {key} absent")
548
+
549
+ def __setitem__(self, key: str, value: Vertex):
550
+ """Set vertex configuration by name.
551
+
552
+ Args:
553
+ key: Vertex name
554
+ value: Vertex configuration
555
+ """
556
+ self._vertices_map[key] = value
557
+
558
+ def finish_init(self, db_flavor: DBFlavor):
559
+ """Complete initialization of all edges with vertex configuration."""
560
+
561
+ for v in self.vertices:
562
+ v.finish_init(db_flavor)