graflo 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1120 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +297 -0
- graflo/architecture/onto.py +374 -0
- graflo/architecture/resource.py +161 -0
- graflo/architecture/schema.py +136 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +93 -0
- graflo/architecture/vertex.py +586 -0
- graflo/caster.py +655 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +194 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +97 -0
- graflo/data_source/factory.py +298 -0
- graflo/data_source/file.py +133 -0
- graflo/data_source/memory.py +72 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +185 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1026 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +688 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +156 -0
- graflo/db/postgres/conn.py +425 -0
- graflo/db/postgres/resource_mapping.py +139 -0
- graflo/db/postgres/schema_inference.py +245 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2212 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +190 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +556 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +751 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +332 -0
- graflo/util/transform.py +448 -0
- graflo-1.3.3.dist-info/METADATA +190 -0
- graflo-1.3.3.dist-info/RECORD +64 -0
- graflo-1.3.3.dist-info/WHEEL +4 -0
- graflo-1.3.3.dist-info/entry_points.txt +5 -0
- graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""Edge configuration and management for graph databases.
|
|
2
|
+
|
|
3
|
+
This module provides classes and utilities for managing edges in graph databases.
|
|
4
|
+
It handles edge configuration, weight management, indexing, and relationship operations.
|
|
5
|
+
The module supports both ArangoDB and Neo4j through the DBFlavor enum.
|
|
6
|
+
|
|
7
|
+
Key Components:
|
|
8
|
+
- Edge: Represents an edge with its source, target, and configuration
|
|
9
|
+
- EdgeConfig: Manages collections of edges and their configurations
|
|
10
|
+
- WeightConfig: Configuration for edge weights and relationships
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> edge = Edge(source="user", target="post")
|
|
14
|
+
>>> config = EdgeConfig(edges=[edge])
|
|
15
|
+
>>> edge.finish_init(vertex_config=vertex_config)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import dataclasses
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from graflo.architecture.onto import (
|
|
22
|
+
BaseDataclass,
|
|
23
|
+
EdgeId,
|
|
24
|
+
EdgeType,
|
|
25
|
+
Index,
|
|
26
|
+
Weight,
|
|
27
|
+
)
|
|
28
|
+
from graflo.architecture.vertex import VertexConfig
|
|
29
|
+
from graflo.onto import DBFlavor
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclasses.dataclass
|
|
33
|
+
class WeightConfig(BaseDataclass):
|
|
34
|
+
"""Configuration for edge weights and relationships.
|
|
35
|
+
|
|
36
|
+
This class manages the configuration of weights and relationships for edges,
|
|
37
|
+
including source and target field mappings.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
vertices: List of weight configurations
|
|
41
|
+
direct: List of direct field mappings
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
vertices: list[Weight] = dataclasses.field(default_factory=list)
|
|
45
|
+
direct: list[str] = dataclasses.field(default_factory=list)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclasses.dataclass
|
|
49
|
+
class Edge(BaseDataclass):
|
|
50
|
+
"""Represents an edge in the graph database.
|
|
51
|
+
|
|
52
|
+
An edge connects two vertices and can have various configurations for
|
|
53
|
+
indexing, weights, and relationship types.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
source: Source vertex name
|
|
57
|
+
target: Target vertex name
|
|
58
|
+
indexes: List of indexes for the edge
|
|
59
|
+
weights: Optional weight configuration
|
|
60
|
+
relation: Optional relation name (for Neo4j)
|
|
61
|
+
purpose: Optional purpose for utility collections
|
|
62
|
+
match_source: Optional source discriminant field
|
|
63
|
+
match_target: Optional target discriminant field
|
|
64
|
+
type: Edge type (DIRECT or INDIRECT)
|
|
65
|
+
aux: Whether this is an auxiliary edge
|
|
66
|
+
by: Optional vertex name for indirect edges
|
|
67
|
+
graph_name: Optional graph name
|
|
68
|
+
collection_name: Optional collection name
|
|
69
|
+
db_flavor: Database flavor (ARANGO or NEO4J)
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
source: str
|
|
73
|
+
target: str
|
|
74
|
+
indexes: list[Index] = dataclasses.field(default_factory=list)
|
|
75
|
+
weights: Optional[WeightConfig] = None
|
|
76
|
+
|
|
77
|
+
# relation represents Class in neo4j, for arango it becomes a weight
|
|
78
|
+
relation: str | None = None
|
|
79
|
+
# field that contains Class or relation
|
|
80
|
+
relation_field: str | None = None
|
|
81
|
+
relation_from_key: bool = False
|
|
82
|
+
|
|
83
|
+
# used to create extra utility collections between the same type of vertices (A, B)
|
|
84
|
+
purpose: str | None = None
|
|
85
|
+
|
|
86
|
+
match_source: str | None = None
|
|
87
|
+
match_target: str | None = None
|
|
88
|
+
exclude_source: str | None = None
|
|
89
|
+
exclude_target: str | None = None
|
|
90
|
+
match: str | None = None
|
|
91
|
+
|
|
92
|
+
type: EdgeType = EdgeType.DIRECT
|
|
93
|
+
|
|
94
|
+
aux: bool = False # aux=True edges are init in the db but not considered by graflo
|
|
95
|
+
|
|
96
|
+
by: str | None = None
|
|
97
|
+
graph_name: str | None = None
|
|
98
|
+
collection_name: str | None = None
|
|
99
|
+
db_flavor: DBFlavor = DBFlavor.ARANGO
|
|
100
|
+
|
|
101
|
+
def __post_init__(self):
|
|
102
|
+
"""Initialize the edge after dataclass initialization."""
|
|
103
|
+
|
|
104
|
+
self._source_collection: str | None = None
|
|
105
|
+
self._target_collection: str | None = None
|
|
106
|
+
|
|
107
|
+
def finish_init(self, vertex_config: VertexConfig):
|
|
108
|
+
"""Complete edge initialization with vertex configuration.
|
|
109
|
+
|
|
110
|
+
Sets up edge collections, graph names, and initializes indices based on
|
|
111
|
+
the vertex configuration.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
vertex_config: Configuration for vertices
|
|
115
|
+
|
|
116
|
+
"""
|
|
117
|
+
if self.type == EdgeType.INDIRECT and self.by is not None:
|
|
118
|
+
self.by = vertex_config.vertex_dbname(self.by)
|
|
119
|
+
|
|
120
|
+
self._source_collection = vertex_config.vertex_dbname(self.source)
|
|
121
|
+
self._target_collection = vertex_config.vertex_dbname(self.target)
|
|
122
|
+
graph_name = [
|
|
123
|
+
vertex_config.vertex_dbname(self.source),
|
|
124
|
+
vertex_config.vertex_dbname(self.target),
|
|
125
|
+
]
|
|
126
|
+
if self.purpose is not None:
|
|
127
|
+
graph_name += [self.purpose]
|
|
128
|
+
self.graph_name = "_".join(graph_name + ["graph"])
|
|
129
|
+
self.collection_name = "_".join(graph_name + ["edges"])
|
|
130
|
+
self.db_flavor = vertex_config.db_flavor
|
|
131
|
+
self._init_indices(vertex_config)
|
|
132
|
+
|
|
133
|
+
def _init_indices(self, vc: VertexConfig):
|
|
134
|
+
"""Initialize indices for the edge.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
vc: Vertex configuration
|
|
138
|
+
"""
|
|
139
|
+
self.indexes = [self._init_index(index, vc) for index in self.indexes]
|
|
140
|
+
|
|
141
|
+
def _init_index(self, index: Index, vc: VertexConfig) -> Index:
|
|
142
|
+
"""Initialize a single index for the edge.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
index: Index to initialize
|
|
146
|
+
vc: Vertex configuration
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Index: Initialized index
|
|
150
|
+
|
|
151
|
+
Note:
|
|
152
|
+
Default behavior for edge indices: adds ["_from", "_to"] for uniqueness
|
|
153
|
+
in ArangoDB.
|
|
154
|
+
"""
|
|
155
|
+
index_fields = []
|
|
156
|
+
|
|
157
|
+
# "@" is reserved : quick hack - do not reinit the index twice
|
|
158
|
+
if any("@" in f for f in index.fields):
|
|
159
|
+
return index
|
|
160
|
+
if index.name is None:
|
|
161
|
+
index_fields += index.fields
|
|
162
|
+
else:
|
|
163
|
+
# add index over a vertex of index.name
|
|
164
|
+
if index.fields:
|
|
165
|
+
fields = index.fields
|
|
166
|
+
else:
|
|
167
|
+
fields = vc.index(index.name).fields
|
|
168
|
+
index_fields += [f"{index.name}@{x}" for x in fields]
|
|
169
|
+
|
|
170
|
+
if not index.exclude_edge_endpoints and self.db_flavor == DBFlavor.ARANGO:
|
|
171
|
+
if all([item not in index_fields for item in ["_from", "_to"]]):
|
|
172
|
+
index_fields = ["_from", "_to"] + index_fields
|
|
173
|
+
|
|
174
|
+
index.fields = index_fields
|
|
175
|
+
return index
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def edge_name_dyad(self):
|
|
179
|
+
"""Get the edge name as a dyad (source, target).
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
tuple[str, str]: Source and target vertex names
|
|
183
|
+
"""
|
|
184
|
+
return self.source, self.target
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def edge_id(self) -> EdgeId:
|
|
188
|
+
"""Get the edge ID.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
EdgeId: Tuple of (source, target, purpose)
|
|
192
|
+
"""
|
|
193
|
+
return self.source, self.target, self.purpose
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@dataclasses.dataclass
|
|
197
|
+
class EdgeConfig(BaseDataclass):
|
|
198
|
+
"""Configuration for managing collections of edges.
|
|
199
|
+
|
|
200
|
+
This class manages a collection of edges, providing methods for accessing
|
|
201
|
+
and manipulating edge configurations.
|
|
202
|
+
|
|
203
|
+
Attributes:
|
|
204
|
+
edges: List of edge configurations
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
edges: list[Edge] = dataclasses.field(default_factory=list)
|
|
208
|
+
|
|
209
|
+
def __post_init__(self):
|
|
210
|
+
"""Initialize the edge configuration.
|
|
211
|
+
|
|
212
|
+
Creates internal mapping of edge IDs to edge configurations.
|
|
213
|
+
"""
|
|
214
|
+
self._edges_map: dict[EdgeId, Edge] = {e.edge_id: e for e in self.edges}
|
|
215
|
+
|
|
216
|
+
def finish_init(self, vc: VertexConfig):
|
|
217
|
+
"""Complete initialization of all edges with vertex configuration.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
vc: Vertex configuration
|
|
221
|
+
"""
|
|
222
|
+
for _, e in self._edges_map.items():
|
|
223
|
+
e.finish_init(vc)
|
|
224
|
+
|
|
225
|
+
def edges_list(self, include_aux=False):
|
|
226
|
+
"""Get list of edges.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
include_aux: Whether to include auxiliary edges
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
generator: Generator yielding edge configurations
|
|
233
|
+
"""
|
|
234
|
+
return (e for e in self._edges_map.values() if include_aux or not e.aux)
|
|
235
|
+
|
|
236
|
+
def edges_items(self, include_aux=False):
|
|
237
|
+
"""Get items of edges.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
include_aux: Whether to include auxiliary edges
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
generator: Generator yielding (edge_id, edge) tuples
|
|
244
|
+
"""
|
|
245
|
+
return (
|
|
246
|
+
(eid, e) for eid, e in self._edges_map.items() if include_aux or not e.aux
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
def __contains__(self, item: EdgeId | Edge):
|
|
250
|
+
"""Check if edge exists in configuration.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
item: Edge ID or Edge instance to check
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
bool: True if edge exists, False otherwise
|
|
257
|
+
"""
|
|
258
|
+
if isinstance(item, Edge):
|
|
259
|
+
eid = item.edge_id
|
|
260
|
+
else:
|
|
261
|
+
eid = item
|
|
262
|
+
|
|
263
|
+
if eid in self._edges_map:
|
|
264
|
+
return True
|
|
265
|
+
else:
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
def update_edges(self, edge: Edge, vertex_config: VertexConfig):
|
|
269
|
+
"""Update edge configuration.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
edge: Edge configuration to update
|
|
273
|
+
vertex_config: Vertex configuration
|
|
274
|
+
"""
|
|
275
|
+
if edge.edge_id in self._edges_map:
|
|
276
|
+
self._edges_map[edge.edge_id].update(edge)
|
|
277
|
+
else:
|
|
278
|
+
self._edges_map[edge.edge_id] = edge
|
|
279
|
+
self._edges_map[edge.edge_id].finish_init(vertex_config=vertex_config)
|
|
280
|
+
|
|
281
|
+
@property
|
|
282
|
+
def vertices(self):
|
|
283
|
+
"""Get set of vertex names involved in edges.
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
set[str]: Set of vertex names
|
|
287
|
+
"""
|
|
288
|
+
return {e.source for e in self.edges} | {e.target for e in self.edges}
|
|
289
|
+
|
|
290
|
+
# def __getitem__(self, key: EdgeId):
|
|
291
|
+
# if key in self._reset_edges():
|
|
292
|
+
# return self._edges_map[key]
|
|
293
|
+
# else:
|
|
294
|
+
# raise KeyError(f"Vertex {key} absent")
|
|
295
|
+
#
|
|
296
|
+
# def __setitem__(self, key: EdgeId, value: Edge):
|
|
297
|
+
# self._edges_map[key] = value
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""Core ontology and data structures for graph database operations.
|
|
2
|
+
|
|
3
|
+
This module defines the fundamental data structures and types used throughout the graflo
|
|
4
|
+
package for working with graph databases. It provides:
|
|
5
|
+
|
|
6
|
+
- Core data types for vertices and edges
|
|
7
|
+
- Database index configurations
|
|
8
|
+
- Graph container implementations
|
|
9
|
+
- Edge mapping and casting utilities
|
|
10
|
+
- Action context for graph transformations
|
|
11
|
+
|
|
12
|
+
The module is designed to be database-agnostic, supporting both ArangoDB and Neo4j through
|
|
13
|
+
the DBFlavor enum. It provides a unified interface for working with graph data structures
|
|
14
|
+
while allowing for database-specific optimizations and features.
|
|
15
|
+
|
|
16
|
+
Key Components:
|
|
17
|
+
- EdgeMapping: Defines how edges are mapped between vertices
|
|
18
|
+
- IndexType: Supported database index types
|
|
19
|
+
- EdgeType: Types of edge handling in the graph database
|
|
20
|
+
- GraphContainer: Main container for graph data
|
|
21
|
+
- ActionContext: Context for graph transformation operations
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
>>> container = GraphContainer(vertices={}, edges={}, linear=[])
|
|
25
|
+
>>> index = Index(fields=["name", "age"], type=IndexType.PERSISTENT)
|
|
26
|
+
>>> context = ActionContext()
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import dataclasses
|
|
32
|
+
import logging
|
|
33
|
+
from abc import ABCMeta
|
|
34
|
+
from collections import defaultdict
|
|
35
|
+
from typing import Any, Optional, TypeAlias, Union
|
|
36
|
+
|
|
37
|
+
from dataclass_wizard import JSONWizard, YAMLWizard
|
|
38
|
+
|
|
39
|
+
from graflo.onto import BaseDataclass, BaseEnum, DBFlavor
|
|
40
|
+
from graflo.util.transform import pick_unique_dict
|
|
41
|
+
|
|
42
|
+
# type for vertex or edge name (index)
|
|
43
|
+
EdgeId: TypeAlias = tuple[str, str, Optional[str]]
|
|
44
|
+
GraphEntity: TypeAlias = Union[str, EdgeId]
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class EdgeMapping(BaseEnum):
|
|
50
|
+
"""Defines how edges are mapped between vertices.
|
|
51
|
+
|
|
52
|
+
ALL: Maps all vertices to all vertices
|
|
53
|
+
ONE_N: Maps one vertex to many vertices
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
ALL = "all"
|
|
57
|
+
ONE_N = "1-n"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class EncodingType(BaseEnum):
|
|
61
|
+
"""Supported character encodings for data input/output."""
|
|
62
|
+
|
|
63
|
+
ISO_8859 = "ISO-8859-1"
|
|
64
|
+
UTF_8 = "utf-8"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class IndexType(BaseEnum):
|
|
68
|
+
"""Types of database indexes supported.
|
|
69
|
+
|
|
70
|
+
PERSISTENT: Standard persistent index
|
|
71
|
+
HASH: Hash-based index for fast lookups
|
|
72
|
+
SKIPLIST: Sorted index using skip list data structure
|
|
73
|
+
FULLTEXT: Index optimized for text search
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
PERSISTENT = "persistent"
|
|
77
|
+
HASH = "hash"
|
|
78
|
+
SKIPLIST = "skiplist"
|
|
79
|
+
FULLTEXT = "fulltext"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class EdgeType(BaseEnum):
|
|
83
|
+
"""Defines how edges are handled in the graph database.
|
|
84
|
+
|
|
85
|
+
INDIRECT: Defined as a collection with indexes, may be used after data ingestion
|
|
86
|
+
DIRECT: In addition to indexes, these edges are generated during ingestion
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
INDIRECT = "indirect"
|
|
90
|
+
DIRECT = "direct"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclasses.dataclass
|
|
94
|
+
class ABCFields(BaseDataclass, metaclass=ABCMeta):
|
|
95
|
+
"""Abstract base class for entities that have fields.
|
|
96
|
+
|
|
97
|
+
Attributes:
|
|
98
|
+
name: Optional name of the entity
|
|
99
|
+
fields: List of field names
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
name: Optional[str] = None
|
|
103
|
+
fields: list[str] = dataclasses.field(default_factory=list)
|
|
104
|
+
keep_vertex_name: bool = True
|
|
105
|
+
|
|
106
|
+
def cfield(self, x: str) -> str:
|
|
107
|
+
"""Creates a composite field name by combining the entity name with a field name.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
x: Field name to combine with entity name
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Composite field name in format "entity@field"
|
|
114
|
+
"""
|
|
115
|
+
return f"{self.name}@{x}" if self.keep_vertex_name else x
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclasses.dataclass
|
|
119
|
+
class Weight(ABCFields):
|
|
120
|
+
"""Defines weight configuration for edges.
|
|
121
|
+
|
|
122
|
+
Attributes:
|
|
123
|
+
map: Dictionary mapping field values to weights
|
|
124
|
+
filter: Dictionary of filter conditions for weights
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
map: dict = dataclasses.field(default_factory=dict)
|
|
128
|
+
filter: dict = dataclasses.field(default_factory=dict)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclasses.dataclass
|
|
132
|
+
class Index(BaseDataclass):
|
|
133
|
+
"""Configuration for database indexes.
|
|
134
|
+
|
|
135
|
+
Attributes:
|
|
136
|
+
name: Optional name of the index
|
|
137
|
+
fields: List of fields to index
|
|
138
|
+
unique: Whether the index enforces uniqueness
|
|
139
|
+
type: Type of index to create
|
|
140
|
+
deduplicate: Whether to deduplicate index entries
|
|
141
|
+
sparse: Whether to create a sparse index
|
|
142
|
+
exclude_edge_endpoints: Whether to exclude edge endpoints from index
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
name: str | None = None
|
|
146
|
+
fields: list[str] = dataclasses.field(default_factory=list)
|
|
147
|
+
unique: bool = True
|
|
148
|
+
type: IndexType = IndexType.PERSISTENT
|
|
149
|
+
deduplicate: bool = True
|
|
150
|
+
sparse: bool = False
|
|
151
|
+
exclude_edge_endpoints: bool = False
|
|
152
|
+
|
|
153
|
+
def __iter__(self):
|
|
154
|
+
"""Iterate over the indexed fields."""
|
|
155
|
+
return iter(self.fields)
|
|
156
|
+
|
|
157
|
+
def db_form(self, db_type: DBFlavor) -> dict:
|
|
158
|
+
"""Convert index configuration to database-specific format.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
db_type: Type of database (ARANGO or NEO4J)
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Dictionary of index configuration in database-specific format
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
ValueError: If db_type is not supported
|
|
168
|
+
"""
|
|
169
|
+
r = self.to_dict()
|
|
170
|
+
if db_type == DBFlavor.ARANGO:
|
|
171
|
+
_ = r.pop("name")
|
|
172
|
+
_ = r.pop("exclude_edge_endpoints")
|
|
173
|
+
elif db_type == DBFlavor.NEO4J:
|
|
174
|
+
pass
|
|
175
|
+
else:
|
|
176
|
+
raise ValueError(f"Unknown db_type {db_type}")
|
|
177
|
+
|
|
178
|
+
return r
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class ItemsView:
|
|
182
|
+
"""View class for iterating over vertices and edges in a GraphContainer."""
|
|
183
|
+
|
|
184
|
+
def __init__(self, gc: GraphContainer):
|
|
185
|
+
self._dictlike = gc
|
|
186
|
+
|
|
187
|
+
def __iter__(self):
|
|
188
|
+
"""Iterate over vertices and edges in the container."""
|
|
189
|
+
for key in self._dictlike.vertices:
|
|
190
|
+
yield key, self._dictlike.vertices[key]
|
|
191
|
+
for key in self._dictlike.edges:
|
|
192
|
+
yield key, self._dictlike.edges[key]
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@dataclasses.dataclass
|
|
196
|
+
class GraphContainer(BaseDataclass):
|
|
197
|
+
"""Container for graph data including vertices and edges.
|
|
198
|
+
|
|
199
|
+
Attributes:
|
|
200
|
+
vertices: Dictionary mapping vertex names to lists of vertex data
|
|
201
|
+
edges: Dictionary mapping edge IDs to lists of edge data
|
|
202
|
+
linear: List of default dictionaries containing linear data
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
vertices: dict[str, list]
|
|
206
|
+
edges: dict[tuple[str, str, str | None], list]
|
|
207
|
+
linear: list[defaultdict[str | tuple[str, str, str | None], list[Any]]]
|
|
208
|
+
|
|
209
|
+
def __post_init__(self):
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
def items(self):
|
|
213
|
+
"""Get an ItemsView of the container's contents."""
|
|
214
|
+
return ItemsView(self)
|
|
215
|
+
|
|
216
|
+
def pick_unique(self):
|
|
217
|
+
"""Remove duplicate entries from vertices and edges."""
|
|
218
|
+
for k, v in self.vertices.items():
|
|
219
|
+
self.vertices[k] = pick_unique_dict(v)
|
|
220
|
+
for k, v in self.edges.items():
|
|
221
|
+
self.edges[k] = pick_unique_dict(v)
|
|
222
|
+
|
|
223
|
+
def loop_over_relations(self, edge_def: tuple[str, str, str | None]):
|
|
224
|
+
"""Iterate over edges matching the given edge definition.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
edge_def: Tuple of (source, target, optional_purpose)
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Generator yielding matching edge IDs
|
|
231
|
+
"""
|
|
232
|
+
source, target, _ = edge_def
|
|
233
|
+
return (ed for ed in self.edges if source == ed[0] and target == ed[1])
|
|
234
|
+
|
|
235
|
+
@classmethod
|
|
236
|
+
def from_docs_list(
|
|
237
|
+
cls, list_default_dicts: list[defaultdict[GraphEntity, list]]
|
|
238
|
+
) -> GraphContainer:
|
|
239
|
+
"""Create a GraphContainer from a list of default dictionaries.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
list_default_dicts: List of default dictionaries containing vertex and edge data
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
New GraphContainer instance
|
|
246
|
+
|
|
247
|
+
Raises:
|
|
248
|
+
AssertionError: If edge IDs are not properly formatted
|
|
249
|
+
"""
|
|
250
|
+
vdict: defaultdict[str, list] = defaultdict(list)
|
|
251
|
+
edict: defaultdict[tuple[str, str, str | None], list] = defaultdict(list)
|
|
252
|
+
|
|
253
|
+
for d in list_default_dicts:
|
|
254
|
+
for k, v in d.items():
|
|
255
|
+
if isinstance(k, str):
|
|
256
|
+
vdict[k].extend(v)
|
|
257
|
+
elif isinstance(k, tuple):
|
|
258
|
+
assert (
|
|
259
|
+
len(k) == 3
|
|
260
|
+
and all(isinstance(item, str) for item in k[:-1])
|
|
261
|
+
and isinstance(k[-1], (str, type(None)))
|
|
262
|
+
)
|
|
263
|
+
edict[k].extend(v)
|
|
264
|
+
return GraphContainer(
|
|
265
|
+
vertices=dict(vdict.items()),
|
|
266
|
+
edges=dict(edict.items()),
|
|
267
|
+
linear=list_default_dicts,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class EdgeCastingType(BaseEnum):
|
|
272
|
+
"""Types of edge casting supported.
|
|
273
|
+
|
|
274
|
+
PAIR: Edges are cast as pairs of vertices
|
|
275
|
+
PRODUCT: Edges are cast as combinations of vertex sets
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
PAIR = "pair"
|
|
279
|
+
PRODUCT = "product"
|
|
280
|
+
COMBINATIONS = "combinations"
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def inner_factory_vertex() -> defaultdict[LocationIndex, list]:
|
|
284
|
+
"""Create a default dictionary for vertex data."""
|
|
285
|
+
return defaultdict(list)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def outer_factory() -> defaultdict[str, defaultdict[LocationIndex, list]]:
|
|
289
|
+
"""Create a nested default dictionary for vertex data."""
|
|
290
|
+
return defaultdict(inner_factory_vertex)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def dd_factory() -> defaultdict[GraphEntity, list]:
|
|
294
|
+
"""Create a default dictionary for graph entity data."""
|
|
295
|
+
return defaultdict(list)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@dataclasses.dataclass(kw_only=True)
|
|
299
|
+
class VertexRep(BaseDataclass):
|
|
300
|
+
"""Context for graph transformation actions.
|
|
301
|
+
|
|
302
|
+
Attributes:
|
|
303
|
+
vertex: doc representing a vertex
|
|
304
|
+
ctx: context (for edge definition upstream
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
vertex: dict
|
|
308
|
+
ctx: dict
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@dataclasses.dataclass(frozen=True, eq=True)
|
|
312
|
+
class LocationIndex(JSONWizard, YAMLWizard):
|
|
313
|
+
path: tuple[str | int | None, ...] = dataclasses.field(default_factory=tuple)
|
|
314
|
+
|
|
315
|
+
def extend(self, extension: tuple[str | int | None, ...]) -> LocationIndex:
|
|
316
|
+
return LocationIndex((*self.path, *extension))
|
|
317
|
+
|
|
318
|
+
def depth(self):
|
|
319
|
+
return len(self.path)
|
|
320
|
+
|
|
321
|
+
def congruence_measure(self, other: LocationIndex):
|
|
322
|
+
neq_position = 0
|
|
323
|
+
for step_a, step_b in zip(self.path, other.path):
|
|
324
|
+
if step_a != step_b:
|
|
325
|
+
break
|
|
326
|
+
neq_position += 1
|
|
327
|
+
return neq_position
|
|
328
|
+
|
|
329
|
+
def filter(self, lindex_list: list[LocationIndex]) -> list[LocationIndex]:
|
|
330
|
+
return [
|
|
331
|
+
t
|
|
332
|
+
for t in lindex_list
|
|
333
|
+
if t.depth() >= self.depth() and t.path[: self.depth()] == self.path
|
|
334
|
+
]
|
|
335
|
+
|
|
336
|
+
def __lt__(self, other: LocationIndex):
|
|
337
|
+
return len(self.path) < len(other.path)
|
|
338
|
+
|
|
339
|
+
def __contains__(self, item):
|
|
340
|
+
return item in self.path
|
|
341
|
+
|
|
342
|
+
def __len__(self):
|
|
343
|
+
return len(self.path)
|
|
344
|
+
|
|
345
|
+
def __iter__(self):
|
|
346
|
+
return iter(self.path)
|
|
347
|
+
|
|
348
|
+
def __getitem__(self, item):
|
|
349
|
+
return self.path[item]
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@dataclasses.dataclass(kw_only=True)
|
|
353
|
+
class ActionContext(BaseDataclass):
|
|
354
|
+
"""Context for graph transformation actions.
|
|
355
|
+
|
|
356
|
+
Attributes:
|
|
357
|
+
acc_vertex: Local accumulation of vertices
|
|
358
|
+
acc_global: Global accumulation of graph entities
|
|
359
|
+
buffer_vertex: Buffer for vertex data
|
|
360
|
+
buffer_transforms: Buffer for transforms data
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
acc_vertex: defaultdict[str, defaultdict[LocationIndex, list]] = dataclasses.field(
|
|
364
|
+
default_factory=outer_factory
|
|
365
|
+
)
|
|
366
|
+
acc_global: defaultdict[GraphEntity, list] = dataclasses.field(
|
|
367
|
+
default_factory=dd_factory
|
|
368
|
+
)
|
|
369
|
+
buffer_vertex: defaultdict[GraphEntity, list] = dataclasses.field(
|
|
370
|
+
default_factory=lambda: defaultdict(list)
|
|
371
|
+
)
|
|
372
|
+
buffer_transforms: defaultdict[LocationIndex, list[dict]] = dataclasses.field(
|
|
373
|
+
default_factory=lambda: defaultdict(list)
|
|
374
|
+
)
|