graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,180 @@
1
+ """ArangoDB query utilities for graph operations.
2
+
3
+ This module provides utility functions for executing and profiling AQL queries
4
+ in ArangoDB. It includes functions for basic query execution, query profiling,
5
+ and field fetching operations.
6
+
7
+ Key Functions:
8
+ - basic_query: Execute a basic AQL query with configurable parameters
9
+ - profile_query: Profile query execution and save results
10
+ - fetch_fields_query: Generate and execute field-fetching queries
11
+
12
+ Example:
13
+ >>> cursor = basic_query("FOR doc IN users RETURN doc", db_name="mydb")
14
+ >>> profile_query("FOR doc IN users RETURN doc", nq=1, profile_times=3, fpath=".")
15
+ """
16
+
17
+ import gzip
18
+ import json
19
+ import logging
20
+ from os.path import join
21
+
22
+ from arango import ArangoClient
23
+
24
+ from graflo.filter.onto import Expression
25
+ from graflo.onto import DBFlavor
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def basic_query(
31
+ query,
32
+ port=8529,
33
+ hostname="127.0.0.1",
34
+ cred_name="root",
35
+ cred_pass="123",
36
+ db_name="_system",
37
+ profile=False,
38
+ batch_size=10000,
39
+ bind_vars=None,
40
+ ):
41
+ """Execute a basic AQL query in ArangoDB.
42
+
43
+ This function provides a simple interface for executing AQL queries with
44
+ configurable connection parameters and query options.
45
+
46
+ Args:
47
+ query: AQL query string to execute
48
+ port: ArangoDB server port
49
+ hostname: ArangoDB server hostname
50
+ cred_name: Database username
51
+ cred_pass: Database password
52
+ db_name: Database name
53
+ profile: Whether to enable query profiling
54
+ batch_size: Size of result batches
55
+ bind_vars: Query bind variables
56
+
57
+ Returns:
58
+ Cursor: ArangoDB cursor for the query results
59
+ """
60
+ hosts = f"http://{hostname}:{port}"
61
+ client = ArangoClient(hosts=hosts)
62
+
63
+ sys_db = client.db(db_name, username=cred_name, password=cred_pass)
64
+ cursor = sys_db.aql.execute(
65
+ query,
66
+ profile=profile,
67
+ stream=True,
68
+ batch_size=batch_size,
69
+ bind_vars=bind_vars,
70
+ )
71
+ return cursor
72
+
73
+
74
+ def profile_query(query, nq, profile_times, fpath, limit=None, **kwargs):
75
+ """Profile AQL query execution and save results.
76
+
77
+ This function executes a query multiple times with profiling enabled and
78
+ saves both the profiling results and query results to files.
79
+
80
+ Args:
81
+ query: AQL query string to profile
82
+ nq: Query number for file naming
83
+ profile_times: Number of times to profile the query
84
+ fpath: Path to save results
85
+ limit: Optional limit on query results
86
+ **kwargs: Additional query parameters passed to basic_query
87
+
88
+ Note:
89
+ Results are saved in two formats:
90
+ - Profiling results: query{nq}_profile{limit}.json
91
+ - Query results: query{nq}_result{limit}_batch_{n}.json.gz
92
+ """
93
+ limit_str = f"_limit_{limit}" if limit else ""
94
+ if profile_times:
95
+ logger.info(f"starting profiling: {limit}")
96
+ profiling = []
97
+ for n in range(profile_times):
98
+ cursor = basic_query(query, profile=True, **kwargs)
99
+ profiling += [cursor.profile()]
100
+ cursor.close()
101
+ with open(join(fpath, f"query{nq}_profile{limit_str}.json"), "w") as fp:
102
+ json.dump(profiling, fp, indent=4)
103
+
104
+ logger.info(f"starting actual query at {limit}")
105
+
106
+ cnt = 0
107
+ cursor = basic_query(query, **kwargs)
108
+ chunk = list(cursor.batch())
109
+ with gzip.open(
110
+ join(fpath, f"./query{nq}_result{limit_str}_batch_{cnt}.json.gz"),
111
+ "wt",
112
+ encoding="ascii",
113
+ ) as fp:
114
+ json.dump(chunk, fp, indent=4)
115
+
116
+ while cursor.has_more():
117
+ cnt += 1
118
+ with gzip.open(
119
+ join(fpath, f"./query{nq}_result{limit_str}_batch_{cnt}.json.gz"),
120
+ "wt",
121
+ encoding="ascii",
122
+ ) as fp:
123
+ chunk = list(cursor.fetch()["batch"])
124
+ json.dump(chunk, fp, indent=4)
125
+
126
+
127
+ def fetch_fields_query(
128
+ collection_name,
129
+ docs,
130
+ match_keys,
131
+ keep_keys,
132
+ filters: list | dict | None = None,
133
+ ):
134
+ """Generate and execute a field-fetching AQL query.
135
+
136
+ This function generates an AQL query to fetch specific fields from documents
137
+ that match the given criteria. It supports filtering and field projection.
138
+
139
+ Args:
140
+ collection_name: Collection to query
141
+ docs: List of documents to match against
142
+ match_keys: Keys to use for matching documents
143
+ keep_keys: Keys to return in the result
144
+ filters: Additional query filters
145
+
146
+ Returns:
147
+ str: Generated AQL query string
148
+
149
+ Example:
150
+ >>> query = fetch_fields_query(
151
+ ... "users",
152
+ ... [{"email": "user@example.com"}],
153
+ ... ["email"],
154
+ ... ["name", "age"]
155
+ ... )
156
+ """
157
+ docs_ = [{k: doc[k] for k in match_keys if k in doc} for doc in docs]
158
+ for i, doc in enumerate(docs_):
159
+ doc.update({"__i": i})
160
+
161
+ docs_str = json.dumps(docs_)
162
+
163
+ match_str = " &&".join([f" _cdoc['{key}'] == _doc['{key}']" for key in match_keys])
164
+
165
+ keep_clause = f"KEEP(_x, {list(keep_keys)})" if keep_keys is not None else "_x"
166
+
167
+ if filters is not None:
168
+ ff = Expression.from_dict(filters)
169
+ extrac_filter_clause = f" && {ff(doc_name='_cdoc', kind=DBFlavor.ARANGO)}"
170
+ else:
171
+ extrac_filter_clause = ""
172
+
173
+ q0 = f"""
174
+ FOR _cdoc in {collection_name}
175
+ FOR _doc in {docs_str}
176
+ FILTER {match_str} {extrac_filter_clause}
177
+ COLLECT i = _doc['__i'] into _group = _cdoc
178
+ LET gp = (for _x in _group return {keep_clause})
179
+ RETURN {{'__i' : i, '_group': gp}}"""
180
+ return q0
@@ -0,0 +1,88 @@
1
+ """ArangoDB utility functions for graph operations.
2
+
3
+ This module provides utility functions for working with ArangoDB graphs and
4
+ queries. It includes functions for edge definition, filter rendering, and
5
+ query generation.
6
+
7
+ Key Functions:
8
+ - define_extra_edges: Generate queries for creating derived edges
9
+ - render_filters: Convert filter expressions to AQL filter clauses
10
+
11
+ Example:
12
+ >>> query = define_extra_edges(edge_config)
13
+ >>> filter_clause = render_filters({"field": "value"}, doc_name="d")
14
+ """
15
+
16
+ import logging
17
+
18
+ from graflo.architecture.edge import Edge
19
+ from graflo.filter.onto import Clause, Expression
20
+ from graflo.onto import ExpressionFlavor
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def define_extra_edges(g: Edge):
26
+ """Generate AQL query for creating derived edges.
27
+
28
+ This function creates a query to generate edges from source to target
29
+ vertices through an intermediate vertex, copying properties from the
30
+ intermediate vertex to the new edge.
31
+
32
+ Args:
33
+ g: Edge configuration containing source, target, and intermediate
34
+ vertex information
35
+
36
+ Returns:
37
+ str: AQL query string for creating the derived edges
38
+
39
+ Example:
40
+ >>> edge = Edge(source="user", target="post", by="comment")
41
+ >>> query = define_extra_edges(edge)
42
+ >>> # Generates query to create user->post edges through comments
43
+ """
44
+ ucol, vcol, wcol = g.source, g.target, g.by
45
+ weight = g.weight_dict
46
+ s = f"""FOR w IN {wcol}
47
+ LET uset = (FOR u IN 1..1 INBOUND w {ucol}_{wcol}_edges RETURN u)
48
+ LET vset = (FOR v IN 1..1 INBOUND w {vcol}_{wcol}_edges RETURN v)
49
+ FOR u in uset
50
+ FOR v in vset
51
+ """
52
+ s_ins_ = ", ".join([f"{v}: w.{k}" for k, v in weight.items()])
53
+ s_ins_ = f"_from: u._id, _to: v._id, {s_ins_}"
54
+ s_ins = f" INSERT {{{s_ins_}}} "
55
+ s_last = f"IN {ucol}_{vcol}_edges"
56
+ query0 = s + s_ins + s_last
57
+ return query0
58
+
59
+
60
+ def render_filters(filters: None | list | dict | Clause = None, doc_name="d") -> str:
61
+ """Convert filter expressions to AQL filter clauses.
62
+
63
+ This function converts filter expressions into AQL filter clauses that
64
+ can be used in queries. It supports various filter types and formats.
65
+
66
+ Args:
67
+ filters: Filter expression to convert
68
+ doc_name: Name of the document variable in the query
69
+
70
+ Returns:
71
+ str: AQL filter clause string
72
+
73
+ Example:
74
+ >>> filters = {"field": "value", "age": {"$gt": 18}}
75
+ >>> clause = render_filters(filters, doc_name="user")
76
+ >>> # Returns: "FILTER user.field == 'value' && user.age > 18"
77
+ """
78
+ if filters is not None:
79
+ if not isinstance(filters, Clause):
80
+ ff = Expression.from_dict(filters)
81
+ else:
82
+ ff = filters
83
+ literal_condition = ff(doc_name=doc_name, kind=ExpressionFlavor.ARANGO)
84
+ filter_clause = f"FILTER {literal_condition}"
85
+ else:
86
+ filter_clause = ""
87
+
88
+ return filter_clause
graflo/db/conn.py ADDED
@@ -0,0 +1,377 @@
1
+ """Abstract database connection interface for graph databases.
2
+
3
+ This module defines the abstract interface for database connections, providing
4
+ a unified API for different graph database implementations. It includes methods
5
+ for database management, graph structure operations, and data manipulation.
6
+
7
+ Key Components:
8
+ - Connection: Abstract base class for database connections
9
+ - ConnectionType: Type variable for connection implementations
10
+
11
+ The connection interface supports:
12
+ - Database/Graph creation and deletion
13
+ - Graph structure management (vertex types, edge types)
14
+ - Index definition
15
+ - Document operations (insert, update, fetch)
16
+ - Edge operations
17
+ - Aggregation queries
18
+
19
+ Database Organization Terminology:
20
+ Different databases organize graph data differently:
21
+
22
+ - ArangoDB:
23
+ * Database: Top-level container (like a schema)
24
+ * Collections: Container for vertices (vertex collections)
25
+ * Edge Collections: Container for edges
26
+ * Graph: Named graph that connects vertex and edge collections
27
+
28
+ - Neo4j:
29
+ * Database: Top-level container
30
+ * Labels: Categories for nodes (equivalent to vertex types)
31
+ * Relationship Types: Types of relationships (equivalent to edge types)
32
+ * No explicit "graph" concept - all nodes/relationships are in the database
33
+
34
+ - TigerGraph:
35
+ * Graph: Top-level container (functions like a database in ArangoDB)
36
+ * Vertex Types: Global vertex type definitions (can be shared across graphs)
37
+ * Edge Types: Global edge type definitions (can be shared across graphs)
38
+ * Vertex and edge types are associated with graphs
39
+
40
+ When using the Connection interface, the terms "vertex type" and "edge type"
41
+ are used generically to refer to the appropriate concept in each database.
42
+
43
+ Example:
44
+ >>> class MyConnection(Connection):
45
+ ... def create_database(self, name: str):
46
+ ... # Implementation
47
+ ... def execute(self, query, **kwargs):
48
+ ... # Implementation
49
+ """
50
+
51
+ import abc
52
+ import logging
53
+ from typing import TypeVar
54
+
55
+ from graflo.architecture.edge import Edge
56
+ from graflo.architecture.schema import Schema
57
+ from graflo.architecture.vertex import VertexConfig
58
+ from graflo.onto import AggregationType
59
+
60
+ logger = logging.getLogger(__name__)
61
+ ConnectionType = TypeVar("ConnectionType", bound="Connection")
62
+
63
+
64
+ class Connection(abc.ABC):
65
+ """Abstract base class for database connections.
66
+
67
+ This class defines the interface that all database connection implementations
68
+ must follow. It provides methods for database/graph operations, graph structure
69
+ management (vertex types, edge types), and data manipulation.
70
+
71
+ Note:
72
+ All methods marked with @abc.abstractmethod must be implemented by
73
+ concrete connection classes.
74
+ """
75
+
76
+ def __init__(self):
77
+ """Initialize the connection."""
78
+ pass
79
+
80
+ @abc.abstractmethod
81
+ def create_database(self, name: str):
82
+ """Create a new database.
83
+
84
+ Args:
85
+ name: Name of the database to create
86
+ """
87
+ pass
88
+
89
+ @abc.abstractmethod
90
+ def delete_database(self, name: str):
91
+ """Delete a database.
92
+
93
+ Args:
94
+ name: Name of the database to delete
95
+ """
96
+ pass
97
+
98
+ @abc.abstractmethod
99
+ def execute(self, query, **kwargs):
100
+ """Execute a database query.
101
+
102
+ Args:
103
+ query: Query to execute
104
+ **kwargs: Additional query parameters
105
+ """
106
+ pass
107
+
108
+ @abc.abstractmethod
109
+ def close(self):
110
+ """Close the database connection."""
111
+ pass
112
+
113
+ def define_indexes(self, schema: Schema):
114
+ """Define indexes for vertices and edges in the schema.
115
+
116
+ Args:
117
+ schema: Schema containing vertex and edge configurations
118
+ """
119
+ self.define_vertex_indices(schema.vertex_config)
120
+ self.define_edge_indices(schema.edge_config.edges_list(include_aux=True))
121
+
122
+ @abc.abstractmethod
123
+ def define_schema(self, schema: Schema):
124
+ """Define collections based on the schema.
125
+
126
+ Args:
127
+ schema: Schema containing collection definitions
128
+ """
129
+ pass
130
+
131
+ @abc.abstractmethod
132
+ def delete_graph_structure(self, vertex_types=(), graph_names=(), delete_all=False):
133
+ """Delete graph structure (graphs, vertex types, edge types) from the database.
134
+
135
+ This method deletes graphs and their associated vertex/edge types.
136
+ The exact behavior depends on the database implementation:
137
+
138
+ - ArangoDB: Deletes graphs and collections (vertex/edge collections)
139
+ - Neo4j: Deletes nodes from labels (vertex types) and relationships
140
+ - TigerGraph: Deletes graphs, vertex types, edge types, and jobs
141
+
142
+ Args:
143
+ vertex_types: Vertex type names to delete (database-specific interpretation)
144
+ graph_names: Graph/database names to delete
145
+ delete_all: If True, delete all graphs and their associated structures
146
+ """
147
+ pass
148
+
149
+ @abc.abstractmethod
150
+ def init_db(self, schema: Schema, clean_start):
151
+ """Initialize the database with the given schema.
152
+
153
+ Args:
154
+ schema: Schema to initialize the database with
155
+ clean_start: Whether to clean existing data
156
+ """
157
+ pass
158
+
159
+ @abc.abstractmethod
160
+ def upsert_docs_batch(self, docs, class_name, match_keys, **kwargs):
161
+ """Upsert a batch of documents.
162
+
163
+ Args:
164
+ docs: Documents to upsert
165
+ class_name: Name of the vertex type (or collection/label in database-specific terms)
166
+ match_keys: Keys to match for upsert
167
+ **kwargs: Additional upsert parameters
168
+ """
169
+ pass
170
+
171
+ @abc.abstractmethod
172
+ def insert_edges_batch(
173
+ self,
174
+ docs_edges,
175
+ source_class,
176
+ target_class,
177
+ relation_name,
178
+ collection_name,
179
+ match_keys_source,
180
+ match_keys_target,
181
+ filter_uniques=True,
182
+ uniq_weight_fields=None,
183
+ uniq_weight_collections=None,
184
+ upsert_option=False,
185
+ head=None,
186
+ **kwargs,
187
+ ):
188
+ """Insert a batch of edges.
189
+
190
+ Args:
191
+ docs_edges: Edge documents to insert
192
+ source_class: Source vertex type/class
193
+ target_class: Target vertex type/class
194
+ relation_name: Name of the edge type/relation
195
+ collection_name: Name of the edge type (database-specific: collection/relationship type)
196
+ match_keys_source: Keys to match source vertices
197
+ match_keys_target: Keys to match target vertices
198
+ filter_uniques: Whether to filter unique edges
199
+ uniq_weight_fields: Fields to consider for uniqueness
200
+ uniq_weight_collections: Vertex/edge types to consider for uniqueness (database-specific)
201
+ upsert_option: Whether to upsert existing edges
202
+ head: Optional head document
203
+ **kwargs: Additional insertion parameters
204
+ """
205
+ pass
206
+
207
+ @abc.abstractmethod
208
+ def insert_return_batch(self, docs, class_name):
209
+ """Insert documents and return the inserted documents.
210
+
211
+ Args:
212
+ docs: Documents to insert
213
+ class_name: Name of the vertex type (or collection/label in database-specific terms)
214
+
215
+ Returns:
216
+ list: Inserted documents
217
+ """
218
+ pass
219
+
220
+ @abc.abstractmethod
221
+ def fetch_docs(
222
+ self,
223
+ class_name,
224
+ filters,
225
+ limit,
226
+ return_keys,
227
+ unset_keys,
228
+ **kwargs,
229
+ ):
230
+ """Fetch documents from a vertex type.
231
+
232
+ Args:
233
+ class_name: Name of the vertex type (or collection/label in database-specific terms)
234
+ filters: Query filters
235
+ limit: Maximum number of documents to return
236
+ return_keys: Keys to return
237
+ unset_keys: Keys to unset
238
+ **kwargs: Additional database-specific parameters (e.g., field_types for TigerGraph)
239
+
240
+ Returns:
241
+ list: Fetched documents
242
+ """
243
+ pass
244
+
245
+ @abc.abstractmethod
246
+ def fetch_edges(
247
+ self,
248
+ from_type: str,
249
+ from_id: str,
250
+ edge_type: str | None = None,
251
+ to_type: str | None = None,
252
+ to_id: str | None = None,
253
+ filters: list | dict | None = None,
254
+ limit: int | None = None,
255
+ return_keys: list | None = None,
256
+ unset_keys: list | None = None,
257
+ **kwargs,
258
+ ):
259
+ """Fetch edges from the database.
260
+
261
+ Args:
262
+ from_type: Source vertex type
263
+ from_id: Source vertex ID (required)
264
+ edge_type: Optional edge type to filter by
265
+ to_type: Optional target vertex type to filter by
266
+ to_id: Optional target vertex ID to filter by
267
+ filters: Additional query filters
268
+ limit: Maximum number of edges to return
269
+ return_keys: Keys to return (projection)
270
+ unset_keys: Keys to exclude (projection)
271
+ **kwargs: Additional database-specific parameters
272
+
273
+ Returns:
274
+ list: List of fetched edges
275
+ """
276
+ pass
277
+
278
+ @abc.abstractmethod
279
+ def fetch_present_documents(
280
+ self,
281
+ batch,
282
+ class_name,
283
+ match_keys,
284
+ keep_keys,
285
+ flatten=False,
286
+ filters: list | dict | None = None,
287
+ ):
288
+ """Fetch documents that exist in the database.
289
+
290
+ Args:
291
+ batch: Batch of documents to check
292
+ class_name: Name of the collection
293
+ match_keys: Keys to match
294
+ keep_keys: Keys to keep in result
295
+ flatten: Whether to flatten the result
296
+ filters: Additional query filters
297
+
298
+ Returns:
299
+ list: Documents that exist in the database
300
+ """
301
+ pass
302
+
303
+ @abc.abstractmethod
304
+ def aggregate(
305
+ self,
306
+ class_name,
307
+ aggregation_function: AggregationType,
308
+ discriminant: str | None = None,
309
+ aggregated_field: str | None = None,
310
+ filters: list | dict | None = None,
311
+ ):
312
+ """Perform aggregation on a collection.
313
+
314
+ Args:
315
+ class_name: Name of the collection
316
+ aggregation_function: Type of aggregation to perform
317
+ discriminant: Field to group by
318
+ aggregated_field: Field to aggregate
319
+ filters: Query filters
320
+
321
+ Returns:
322
+ dict: Aggregation results
323
+ """
324
+ pass
325
+
326
+ @abc.abstractmethod
327
+ def keep_absent_documents(
328
+ self,
329
+ batch,
330
+ class_name,
331
+ match_keys,
332
+ keep_keys,
333
+ filters: list | dict | None = None,
334
+ ):
335
+ """Keep documents that don't exist in the database.
336
+
337
+ Args:
338
+ batch: Batch of documents to check
339
+ class_name: Name of the collection
340
+ match_keys: Keys to match
341
+ keep_keys: Keys to keep in result
342
+ filters: Additional query filters
343
+
344
+ Returns:
345
+ list: Documents that don't exist in the database
346
+ """
347
+ pass
348
+
349
+ @abc.abstractmethod
350
+ def define_vertex_indices(self, vertex_config: VertexConfig):
351
+ """Define indices for vertex collections.
352
+
353
+ Args:
354
+ vertex_config: Vertex configuration containing index definitions
355
+ """
356
+ pass
357
+
358
+ @abc.abstractmethod
359
+ def define_edge_indices(self, edges: list[Edge]):
360
+ """Define indices for edge collections.
361
+
362
+ Args:
363
+ edges: List of edge configurations containing index definitions
364
+ """
365
+ pass
366
+
367
+ # @abc.abstractmethod
368
+ # def define_vertex_collections(self, graph_config, vertex_config):
369
+ # pass
370
+ #
371
+ # @abc.abstractmethod
372
+ # def define_edge_collections(self, graph_config):
373
+ # pass
374
+
375
+ # @abc.abstractmethod
376
+ # def create_collection_if_absent(self, g, vcol, index, unique=True):
377
+ # pass
@@ -0,0 +1,6 @@
1
+ from .onto import DBConfig, DBType
2
+
3
+ __all__ = [
4
+ "DBConfig",
5
+ "DBType",
6
+ ]
@@ -0,0 +1,18 @@
1
+ from typing import Dict, Type
2
+
3
+ from .onto import (
4
+ ArangoConfig,
5
+ DBConfig,
6
+ DBType,
7
+ Neo4jConfig,
8
+ PostgresConfig,
9
+ TigergraphConfig,
10
+ )
11
+
12
+ # Define this mapping in a separate file to avoid circular imports
13
+ DB_TYPE_MAPPING: Dict[DBType, Type[DBConfig]] = {
14
+ DBType.ARANGO: ArangoConfig,
15
+ DBType.NEO4J: Neo4jConfig,
16
+ DBType.TIGERGRAPH: TigergraphConfig,
17
+ DBType.POSTGRES: PostgresConfig,
18
+ }