graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,418 @@
1
+ """Edge configuration and management for graph databases.
2
+
3
+ This module provides classes and utilities for managing edges in graph databases.
4
+ It handles edge configuration, weight management, indexing, and relationship operations.
5
+ The module supports both ArangoDB and Neo4j through the DBFlavor enum.
6
+
7
+ Key Components:
8
+ - Edge: Represents an edge with its source, target, and configuration
9
+ - EdgeConfig: Manages collections of edges and their configurations
10
+ - WeightConfig: Configuration for edge weights and relationships
11
+
12
+ Example:
13
+ >>> edge = Edge(source="user", target="post")
14
+ >>> config = EdgeConfig(edges=[edge])
15
+ >>> edge.finish_init(vertex_config=vertex_config)
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import dataclasses
21
+ from typing import Union
22
+
23
+ from graflo.architecture.onto import (
24
+ BaseDataclass,
25
+ EdgeId,
26
+ EdgeType,
27
+ Index,
28
+ Weight,
29
+ )
30
+ from graflo.architecture.vertex import Field, VertexConfig, _FieldsType
31
+ from graflo.onto import DBFlavor
32
+
33
+ # Default relation name for TigerGraph edges when relation is not specified
34
+ DEFAULT_TIGERGRAPH_RELATION = "relates"
35
+
36
+
37
+ @dataclasses.dataclass
38
+ class WeightConfig(BaseDataclass):
39
+ """Configuration for edge weights and relationships.
40
+
41
+ This class manages the configuration of weights and relationships for edges,
42
+ including source and target field mappings.
43
+
44
+ Attributes:
45
+ vertices: List of weight configurations
46
+ direct: List of direct field mappings. Can be specified as strings, Field objects, or dicts.
47
+ Will be normalized to Field objects internally in __post_init__.
48
+ After initialization, this is always list[Field] (type checker sees this).
49
+
50
+ Examples:
51
+ >>> # Backward compatible: list of strings
52
+ >>> wc1 = WeightConfig(direct=["date", "weight"])
53
+
54
+ >>> # Typed fields: list of Field objects
55
+ >>> wc2 = WeightConfig(direct=[
56
+ ... Field(name="date", type="DATETIME"),
57
+ ... Field(name="weight", type="FLOAT")
58
+ ... ])
59
+
60
+ >>> # From dicts (e.g., from YAML/JSON)
61
+ >>> wc3 = WeightConfig(direct=[
62
+ ... {"name": "date", "type": "DATETIME"},
63
+ ... {"name": "weight"} # defaults to None type
64
+ ... ])
65
+ """
66
+
67
+ vertices: list[Weight] = dataclasses.field(default_factory=list)
68
+ # Internal representation: After __post_init__, this is always list[Field]
69
+ # Input types: Accepts list[str], list[Field], or list[dict] at construction
70
+ # The _FieldsType allows flexible input but normalizes to list[Field] internally
71
+ direct: _FieldsType = dataclasses.field(default_factory=list)
72
+
73
+ def _normalize_fields(
74
+ self, fields: list[str] | list[Field] | list[dict]
75
+ ) -> list[Field]:
76
+ """Normalize fields to Field objects.
77
+
78
+ Converts strings, Field objects, or dicts to Field objects.
79
+ Field objects behave like strings for backward compatibility.
80
+
81
+ Args:
82
+ fields: List of strings, Field objects, or dicts
83
+
84
+ Returns:
85
+ list[Field]: Normalized list of Field objects (preserving order)
86
+ """
87
+ normalized = []
88
+ for field in fields:
89
+ if isinstance(field, Field):
90
+ normalized.append(field)
91
+ elif isinstance(field, str):
92
+ # Backward compatibility: string becomes Field with None type
93
+ # (most databases like ArangoDB don't require types)
94
+ normalized.append(Field(name=field, type=None))
95
+ elif isinstance(field, dict):
96
+ # From dict (e.g., from YAML/JSON)
97
+ # Extract name and optional type
98
+ name = field.get("name")
99
+ if name is None:
100
+ raise ValueError(f"Field dict must have 'name' key: {field}")
101
+ field_type = field.get("type")
102
+ normalized.append(Field(name=name, type=field_type))
103
+ else:
104
+ raise TypeError(f"Field must be str, Field, or dict, got {type(field)}")
105
+ return normalized
106
+
107
+ @property
108
+ def direct_names(self) -> list[str]:
109
+ """Get list of direct field names (as strings).
110
+
111
+ Returns:
112
+ list[str]: List of field names
113
+ """
114
+ return [field.name for field in self.direct]
115
+
116
+ def __post_init__(self):
117
+ """Initialize the weight configuration after dataclass initialization.
118
+
119
+ Normalizes direct fields to Field objects. Field objects behave like strings,
120
+ maintaining backward compatibility.
121
+
122
+ After this method, self.direct is always list[Field], regardless of input type.
123
+ """
124
+ # Normalize direct fields to Field objects (preserve order)
125
+ # This converts str, Field, or dict inputs to list[Field]
126
+ self.direct = self._normalize_fields(self.direct)
127
+
128
+ @classmethod
129
+ def from_dict(cls, data: dict):
130
+ """Create WeightConfig from dictionary, handling field normalization.
131
+
132
+ Overrides parent to properly handle direct fields that may be strings, dicts, or Field objects.
133
+ JSONWizard may incorrectly deserialize dicts in direct, so we need to handle them manually.
134
+
135
+ Args:
136
+ data: Dictionary containing weight config data
137
+
138
+ Returns:
139
+ WeightConfig: New WeightConfig instance with direct normalized to list[Field]
140
+ """
141
+ # Extract and preserve direct fields before JSONWizard processes them
142
+ direct_data = data.get("direct", [])
143
+ # Create a copy without direct to let JSONWizard handle the rest
144
+ data_copy = {k: v for k, v in data.items() if k != "direct"}
145
+
146
+ # Call parent from_dict (JSONWizard)
147
+ instance = super().from_dict(data_copy)
148
+
149
+ # Now manually set direct (could be strings, dicts, or already Field objects)
150
+ # __post_init__ will normalize them to list[Field]
151
+ instance.direct = direct_data
152
+ # Trigger normalization - this ensures direct is always list[Field] after init
153
+ instance.direct = instance._normalize_fields(instance.direct)
154
+ return instance
155
+
156
+
157
+ @dataclasses.dataclass
158
+ class Edge(BaseDataclass):
159
+ """Represents an edge in the graph database.
160
+
161
+ An edge connects two vertices and can have various configurations for
162
+ indexing, weights, and relationship types.
163
+
164
+ Attributes:
165
+ source: Source vertex name
166
+ target: Target vertex name
167
+ indexes: List of indexes for the edge
168
+ weights: Optional weight configuration
169
+ relation: Optional relation name (for Neo4j)
170
+ purpose: Optional purpose for utility collections
171
+ match_source: Optional source discriminant field
172
+ match_target: Optional target discriminant field
173
+ type: Edge type (DIRECT or INDIRECT)
174
+ aux: Whether this is an auxiliary edge
175
+ by: Optional vertex name for indirect edges
176
+ graph_name: Optional graph name (ArangoDB only, set in finish_init)
177
+ database_name: Optional database-specific edge identifier (ArangoDB only, set in finish_init).
178
+ For ArangoDB, this corresponds to the edge collection name.
179
+ """
180
+
181
+ source: str
182
+ target: str
183
+ indexes: list[Index] = dataclasses.field(default_factory=list)
184
+ weights: Union[WeightConfig, None] = (
185
+ None # Using Union for dataclass_wizard compatibility
186
+ )
187
+
188
+ # relation represents Class in neo4j, for arango it becomes a weight
189
+ relation: str | None = None
190
+ # field that contains Class or relation
191
+ relation_field: str | None = None
192
+ relation_from_key: bool = False
193
+
194
+ # used to create extra utility collections between the same type of vertices (A, B)
195
+ purpose: str | None = None
196
+
197
+ match_source: str | None = None
198
+ match_target: str | None = None
199
+ exclude_source: str | None = None
200
+ exclude_target: str | None = None
201
+ match: str | None = None
202
+
203
+ type: EdgeType = EdgeType.DIRECT
204
+
205
+ aux: bool = False # aux=True edges are init in the db but not considered by graflo
206
+
207
+ by: str | None = None
208
+ graph_name: str | None = None # ArangoDB-specific: graph name (set in finish_init)
209
+ database_name: str | None = (
210
+ None # ArangoDB-specific: edge collection name (set in finish_init)
211
+ )
212
+
213
+ def __post_init__(self):
214
+ """Initialize the edge after dataclass initialization."""
215
+
216
+ self._source: str | None = None
217
+ self._target: str | None = None
218
+
219
+ def finish_init(self, vertex_config: VertexConfig):
220
+ """Complete edge initialization with vertex configuration.
221
+
222
+ Sets up edge collections, graph names, and initializes indices based on
223
+ the vertex configuration.
224
+
225
+ Args:
226
+ vertex_config: Configuration for vertices
227
+
228
+ """
229
+ if self.type == EdgeType.INDIRECT and self.by is not None:
230
+ self.by = vertex_config.vertex_dbname(self.by)
231
+
232
+ self._source = vertex_config.vertex_dbname(self.source)
233
+ self._target = vertex_config.vertex_dbname(self.target)
234
+
235
+ # ArangoDB-specific: set graph_name and database_name only for ArangoDB
236
+ if vertex_config.db_flavor == DBFlavor.ARANGO:
237
+ graph_name = [
238
+ vertex_config.vertex_dbname(self.source),
239
+ vertex_config.vertex_dbname(self.target),
240
+ ]
241
+ if self.purpose is not None:
242
+ graph_name += [self.purpose]
243
+ self.graph_name = "_".join(graph_name + ["graph"])
244
+ self.database_name = "_".join(graph_name + ["edges"])
245
+
246
+ # TigerGraph requires named edge types (relations), so assign default if missing
247
+ if vertex_config.db_flavor == DBFlavor.TIGERGRAPH and self.relation is None:
248
+ # Use default relation name for TigerGraph
249
+ # TigerGraph requires all edges to have a named type (relation)
250
+ self.relation = DEFAULT_TIGERGRAPH_RELATION
251
+
252
+ self._init_indices(vertex_config)
253
+
254
+ def _init_indices(self, vc: VertexConfig):
255
+ """Initialize indices for the edge.
256
+
257
+ Args:
258
+ vc: Vertex configuration
259
+ """
260
+ self.indexes = [self._init_index(index, vc) for index in self.indexes]
261
+
262
+ def _init_index(self, index: Index, vc: VertexConfig) -> Index:
263
+ """Initialize a single index for the edge.
264
+
265
+ Args:
266
+ index: Index to initialize
267
+ vc: Vertex configuration
268
+
269
+ Returns:
270
+ Index: Initialized index
271
+
272
+ Note:
273
+ Default behavior for edge indices: adds ["_from", "_to"] for uniqueness
274
+ in ArangoDB.
275
+ """
276
+ index_fields = []
277
+
278
+ # "@" is reserved : quick hack - do not reinit the index twice
279
+ if any("@" in f for f in index.fields):
280
+ return index
281
+ if index.name is None:
282
+ index_fields += index.fields
283
+ else:
284
+ # add index over a vertex of index.name
285
+ if index.fields:
286
+ fields = index.fields
287
+ else:
288
+ fields = vc.index(index.name).fields
289
+ index_fields += [f"{index.name}@{x}" for x in fields]
290
+
291
+ if not index.exclude_edge_endpoints and vc.db_flavor == DBFlavor.ARANGO:
292
+ if all([item not in index_fields for item in ["_from", "_to"]]):
293
+ index_fields = ["_from", "_to"] + index_fields
294
+
295
+ index.fields = index_fields
296
+ return index
297
+
298
+ @property
299
+ def edge_name_dyad(self):
300
+ """Get the edge name as a dyad (source, target).
301
+
302
+ Returns:
303
+ tuple[str, str]: Source and target vertex names
304
+ """
305
+ return self.source, self.target
306
+
307
+ @property
308
+ def edge_id(self) -> EdgeId:
309
+ """Get the edge ID.
310
+
311
+ Returns:
312
+ EdgeId: Tuple of (source, target, purpose)
313
+ """
314
+ return self.source, self.target, self.purpose
315
+
316
+
317
+ @dataclasses.dataclass
318
+ class EdgeConfig(BaseDataclass):
319
+ """Configuration for managing collections of edges.
320
+
321
+ This class manages a collection of edges, providing methods for accessing
322
+ and manipulating edge configurations.
323
+
324
+ Attributes:
325
+ edges: List of edge configurations
326
+ """
327
+
328
+ edges: list[Edge] = dataclasses.field(default_factory=list)
329
+
330
+ def __post_init__(self):
331
+ """Initialize the edge configuration.
332
+
333
+ Creates internal mapping of edge IDs to edge configurations.
334
+ """
335
+ self._edges_map: dict[EdgeId, Edge] = {e.edge_id: e for e in self.edges}
336
+
337
+ def finish_init(self, vc: VertexConfig):
338
+ """Complete initialization of all edges with vertex configuration.
339
+
340
+ Args:
341
+ vc: Vertex configuration
342
+ """
343
+ for e in self.edges:
344
+ e.finish_init(vc)
345
+
346
+ def edges_list(self, include_aux=False):
347
+ """Get list of edges.
348
+
349
+ Args:
350
+ include_aux: Whether to include auxiliary edges
351
+
352
+ Returns:
353
+ generator: Generator yielding edge configurations
354
+ """
355
+ return (e for e in self._edges_map.values() if include_aux or not e.aux)
356
+
357
+ def edges_items(self, include_aux=False):
358
+ """Get items of edges.
359
+
360
+ Args:
361
+ include_aux: Whether to include auxiliary edges
362
+
363
+ Returns:
364
+ generator: Generator yielding (edge_id, edge) tuples
365
+ """
366
+ return (
367
+ (eid, e) for eid, e in self._edges_map.items() if include_aux or not e.aux
368
+ )
369
+
370
+ def __contains__(self, item: EdgeId | Edge):
371
+ """Check if edge exists in configuration.
372
+
373
+ Args:
374
+ item: Edge ID or Edge instance to check
375
+
376
+ Returns:
377
+ bool: True if edge exists, False otherwise
378
+ """
379
+ if isinstance(item, Edge):
380
+ eid = item.edge_id
381
+ else:
382
+ eid = item
383
+
384
+ if eid in self._edges_map:
385
+ return True
386
+ else:
387
+ return False
388
+
389
+ def update_edges(self, edge: Edge, vertex_config: VertexConfig):
390
+ """Update edge configuration.
391
+
392
+ Args:
393
+ edge: Edge configuration to update
394
+ vertex_config: Vertex configuration
395
+ """
396
+ if edge.edge_id in self._edges_map:
397
+ self._edges_map[edge.edge_id].update(edge)
398
+ else:
399
+ self._edges_map[edge.edge_id] = edge
400
+ self._edges_map[edge.edge_id].finish_init(vertex_config=vertex_config)
401
+
402
+ @property
403
+ def vertices(self):
404
+ """Get set of vertex names involved in edges.
405
+
406
+ Returns:
407
+ set[str]: Set of vertex names
408
+ """
409
+ return {e.source for e in self.edges} | {e.target for e in self.edges}
410
+
411
+ # def __getitem__(self, key: EdgeId):
412
+ # if key in self._reset_edges():
413
+ # return self._edges_map[key]
414
+ # else:
415
+ # raise KeyError(f"Vertex {key} absent")
416
+ #
417
+ # def __setitem__(self, key: EdgeId, value: Edge):
418
+ # self._edges_map[key] = value