graflo 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1120 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +297 -0
- graflo/architecture/onto.py +374 -0
- graflo/architecture/resource.py +161 -0
- graflo/architecture/schema.py +136 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +93 -0
- graflo/architecture/vertex.py +586 -0
- graflo/caster.py +655 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +194 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +97 -0
- graflo/data_source/factory.py +298 -0
- graflo/data_source/file.py +133 -0
- graflo/data_source/memory.py +72 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +185 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1026 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +688 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +156 -0
- graflo/db/postgres/conn.py +425 -0
- graflo/db/postgres/resource_mapping.py +139 -0
- graflo/db/postgres/schema_inference.py +245 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2212 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +190 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +556 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +751 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +332 -0
- graflo/util/transform.py +448 -0
- graflo-1.3.3.dist-info/METADATA +190 -0
- graflo-1.3.3.dist-info/RECORD +64 -0
- graflo-1.3.3.dist-info/WHEEL +4 -0
- graflo-1.3.3.dist-info/entry_points.txt +5 -0
- graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
graflo/db/arango/conn.py
ADDED
|
@@ -0,0 +1,1026 @@
|
|
|
1
|
+
"""ArangoDB connection implementation for graph database operations.
|
|
2
|
+
|
|
3
|
+
This module implements the Connection interface for ArangoDB, providing
|
|
4
|
+
specific functionality for graph operations in ArangoDB. It handles:
|
|
5
|
+
- Graph and collection management
|
|
6
|
+
- Document and edge operations
|
|
7
|
+
- Index creation and management
|
|
8
|
+
- AQL query execution
|
|
9
|
+
- Batch operations with upsert support
|
|
10
|
+
|
|
11
|
+
Key Features:
|
|
12
|
+
- Graph-based document organization
|
|
13
|
+
- Edge collection management
|
|
14
|
+
- Persistent, hash, skiplist, and fulltext indices
|
|
15
|
+
- Batch document and edge operations
|
|
16
|
+
- AQL query generation and execution
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
>>> conn = ArangoConnection(config)
|
|
20
|
+
>>> conn.init_db(schema, clean_start=True)
|
|
21
|
+
>>> conn.upsert_docs_batch(docs, "users", match_keys=["email"])
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import json
|
|
25
|
+
import logging
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
from arango import ArangoClient
|
|
29
|
+
|
|
30
|
+
from graflo.architecture.edge import Edge
|
|
31
|
+
from graflo.architecture.onto import (
|
|
32
|
+
Index,
|
|
33
|
+
IndexType,
|
|
34
|
+
)
|
|
35
|
+
from graflo.architecture.schema import Schema
|
|
36
|
+
from graflo.architecture.vertex import VertexConfig
|
|
37
|
+
from graflo.db.arango.query import fetch_fields_query
|
|
38
|
+
from graflo.db.arango.util import render_filters
|
|
39
|
+
from graflo.db.conn import Connection
|
|
40
|
+
from graflo.db.util import get_data_from_cursor
|
|
41
|
+
from graflo.filter.onto import Clause
|
|
42
|
+
from graflo.onto import AggregationType, DBFlavor
|
|
43
|
+
from graflo.util.transform import pick_unique_dict
|
|
44
|
+
|
|
45
|
+
from ..connection.onto import ArangoConfig
|
|
46
|
+
|
|
47
|
+
logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _json_serializer(obj):
|
|
51
|
+
"""JSON serializer for objects not serializable by default json code.
|
|
52
|
+
|
|
53
|
+
Handles datetime, date, time, and other non-serializable types.
|
|
54
|
+
Decimal should already be converted to float at the data source level.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
obj: Object to serialize
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
JSON-serializable representation
|
|
61
|
+
"""
|
|
62
|
+
from datetime import date, datetime, time
|
|
63
|
+
|
|
64
|
+
if isinstance(obj, (datetime, date, time)):
|
|
65
|
+
return obj.isoformat()
|
|
66
|
+
# Decimal should be converted to float at source (SQLDataSource)
|
|
67
|
+
# But handle it here as a fallback
|
|
68
|
+
from decimal import Decimal
|
|
69
|
+
|
|
70
|
+
if isinstance(obj, Decimal):
|
|
71
|
+
return float(obj)
|
|
72
|
+
raise TypeError(f"Type {type(obj)} not serializable")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ArangoConnection(Connection):
|
|
76
|
+
"""ArangoDB-specific implementation of the Connection interface.
|
|
77
|
+
|
|
78
|
+
This class provides ArangoDB-specific implementations for all database
|
|
79
|
+
operations, including graph management, document operations, and query
|
|
80
|
+
execution. It uses the ArangoDB Python driver for all operations.
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
conn: ArangoDB database connection instance
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(self, config: ArangoConfig):
|
|
87
|
+
"""Initialize ArangoDB connection.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
config: ArangoDB connection configuration containing URL, credentials,
|
|
91
|
+
and database name
|
|
92
|
+
"""
|
|
93
|
+
super().__init__()
|
|
94
|
+
# Store config for later use
|
|
95
|
+
self.config = config
|
|
96
|
+
# Validate required config values
|
|
97
|
+
if config.url is None:
|
|
98
|
+
raise ValueError("ArangoDB connection requires a URL to be configured")
|
|
99
|
+
if config.database is None:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
"ArangoDB connection requires a database name to be configured"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# ArangoDB accepts empty string for password if None
|
|
105
|
+
password = config.password if config.password is not None else ""
|
|
106
|
+
# ArangoDB has default username "root" if None
|
|
107
|
+
username = config.username if config.username is not None else "root"
|
|
108
|
+
|
|
109
|
+
# Store client for system operations
|
|
110
|
+
self.client = ArangoClient(
|
|
111
|
+
hosts=config.url, request_timeout=config.request_timeout
|
|
112
|
+
)
|
|
113
|
+
# Connect to the configured database for regular operations
|
|
114
|
+
self.conn = self.client.db(
|
|
115
|
+
config.database,
|
|
116
|
+
username=username,
|
|
117
|
+
password=password,
|
|
118
|
+
)
|
|
119
|
+
# Store credentials for system operations
|
|
120
|
+
self._username = username
|
|
121
|
+
self._password = password
|
|
122
|
+
|
|
123
|
+
def create_database(self, name: str):
|
|
124
|
+
"""Create a new ArangoDB database.
|
|
125
|
+
|
|
126
|
+
Database creation/deletion operations must be performed from the _system database.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
name: Name of the database to create
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
# Connect to _system database for system operations
|
|
133
|
+
system_db = self.client.db(
|
|
134
|
+
"_system", username=self._username, password=self._password
|
|
135
|
+
)
|
|
136
|
+
if not system_db.has_database(name):
|
|
137
|
+
try:
|
|
138
|
+
system_db.create_database(name)
|
|
139
|
+
logger.info(f"Successfully created ArangoDB database '{name}'")
|
|
140
|
+
except Exception as create_error:
|
|
141
|
+
logger.error(
|
|
142
|
+
f"Failed to create ArangoDB database '{name}': {create_error}",
|
|
143
|
+
exc_info=True,
|
|
144
|
+
)
|
|
145
|
+
raise
|
|
146
|
+
else:
|
|
147
|
+
logger.debug(f"ArangoDB database '{name}' already exists")
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error(
|
|
150
|
+
f"Error creating ArangoDB database '{name}': {e}",
|
|
151
|
+
exc_info=True,
|
|
152
|
+
)
|
|
153
|
+
raise
|
|
154
|
+
|
|
155
|
+
def delete_database(self, name: str):
|
|
156
|
+
"""Delete an ArangoDB database.
|
|
157
|
+
|
|
158
|
+
Database creation/deletion operations must be performed from the _system database.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
name: Name of the database to delete
|
|
162
|
+
"""
|
|
163
|
+
try:
|
|
164
|
+
# Connect to _system database for system operations
|
|
165
|
+
system_db = self.client.db(
|
|
166
|
+
"_system", username=self._username, password=self._password
|
|
167
|
+
)
|
|
168
|
+
if system_db.has_database(name):
|
|
169
|
+
try:
|
|
170
|
+
system_db.delete_database(name)
|
|
171
|
+
logger.info(f"Successfully deleted ArangoDB database '{name}'")
|
|
172
|
+
except Exception as delete_error:
|
|
173
|
+
logger.error(
|
|
174
|
+
f"Failed to delete ArangoDB database '{name}': {delete_error}",
|
|
175
|
+
exc_info=True,
|
|
176
|
+
)
|
|
177
|
+
raise
|
|
178
|
+
else:
|
|
179
|
+
logger.debug(
|
|
180
|
+
f"ArangoDB database '{name}' does not exist, skipping deletion"
|
|
181
|
+
)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
logger.error(
|
|
184
|
+
f"Error deleting ArangoDB database '{name}': {e}",
|
|
185
|
+
exc_info=True,
|
|
186
|
+
)
|
|
187
|
+
raise
|
|
188
|
+
|
|
189
|
+
def execute(self, query, **kwargs):
|
|
190
|
+
"""Execute an AQL query.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
query: AQL query string to execute
|
|
194
|
+
**kwargs: Additional query parameters
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Cursor: ArangoDB cursor for the query results
|
|
198
|
+
"""
|
|
199
|
+
cursor = self.conn.aql.execute(query)
|
|
200
|
+
return cursor
|
|
201
|
+
|
|
202
|
+
def close(self):
|
|
203
|
+
"""Close the ArangoDB connection."""
|
|
204
|
+
# self.conn.close()
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
def init_db(self, schema: Schema, clean_start):
|
|
208
|
+
"""Initialize ArangoDB with the given schema.
|
|
209
|
+
|
|
210
|
+
Checks if the database exists and creates it if it doesn't.
|
|
211
|
+
Uses schema.general.name if database is not set in config.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
schema: Schema containing graph structure definitions
|
|
215
|
+
clean_start: If True, delete all existing collections before initialization
|
|
216
|
+
"""
|
|
217
|
+
# Determine database name: use config.database if set, otherwise use schema.general.name
|
|
218
|
+
db_name = self.config.database
|
|
219
|
+
if not db_name:
|
|
220
|
+
db_name = schema.general.name
|
|
221
|
+
# Update config for subsequent operations
|
|
222
|
+
self.config.database = db_name
|
|
223
|
+
|
|
224
|
+
# Check if database exists and create it if it doesn't
|
|
225
|
+
# Use context manager pattern for system database operations
|
|
226
|
+
try:
|
|
227
|
+
system_db = self.client.db(
|
|
228
|
+
"_system", username=self._username, password=self._password
|
|
229
|
+
)
|
|
230
|
+
if not system_db.has_database(db_name):
|
|
231
|
+
logger.info(f"Database '{db_name}' does not exist, creating it...")
|
|
232
|
+
try:
|
|
233
|
+
system_db.create_database(db_name)
|
|
234
|
+
logger.info(f"Successfully created database '{db_name}'")
|
|
235
|
+
except Exception as create_error:
|
|
236
|
+
logger.error(
|
|
237
|
+
f"Failed to create database '{db_name}': {create_error}",
|
|
238
|
+
exc_info=True,
|
|
239
|
+
)
|
|
240
|
+
raise
|
|
241
|
+
|
|
242
|
+
# Reconnect to the target database (newly created or existing)
|
|
243
|
+
if (
|
|
244
|
+
self.config.database != db_name
|
|
245
|
+
or not hasattr(self, "_db_connected")
|
|
246
|
+
or self._db_connected != db_name
|
|
247
|
+
):
|
|
248
|
+
try:
|
|
249
|
+
self.conn = self.client.db(
|
|
250
|
+
db_name, username=self._username, password=self._password
|
|
251
|
+
)
|
|
252
|
+
self._db_connected = db_name
|
|
253
|
+
logger.debug(f"Connected to database '{db_name}'")
|
|
254
|
+
except Exception as conn_error:
|
|
255
|
+
logger.error(
|
|
256
|
+
f"Failed to connect to database '{db_name}': {conn_error}",
|
|
257
|
+
exc_info=True,
|
|
258
|
+
)
|
|
259
|
+
raise
|
|
260
|
+
except Exception as e:
|
|
261
|
+
logger.error(
|
|
262
|
+
f"Error during database initialization for '{db_name}': {e}",
|
|
263
|
+
exc_info=True,
|
|
264
|
+
)
|
|
265
|
+
raise
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
if clean_start:
|
|
269
|
+
try:
|
|
270
|
+
self.delete_graph_structure([], [], delete_all=True)
|
|
271
|
+
logger.debug(f"Cleaned database '{db_name}' for fresh start")
|
|
272
|
+
except Exception as clean_error:
|
|
273
|
+
logger.warning(
|
|
274
|
+
f"Error during clean_start for database '{db_name}': {clean_error}",
|
|
275
|
+
exc_info=True,
|
|
276
|
+
)
|
|
277
|
+
# Continue - may be first run or already clean
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
self.define_schema(schema)
|
|
281
|
+
logger.debug(f"Defined schema for database '{db_name}'")
|
|
282
|
+
except Exception as schema_error:
|
|
283
|
+
logger.error(
|
|
284
|
+
f"Failed to define schema for database '{db_name}': {schema_error}",
|
|
285
|
+
exc_info=True,
|
|
286
|
+
)
|
|
287
|
+
raise
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
self.define_indexes(schema)
|
|
291
|
+
logger.debug(f"Defined indexes for database '{db_name}'")
|
|
292
|
+
except Exception as index_error:
|
|
293
|
+
logger.error(
|
|
294
|
+
f"Failed to define indexes for database '{db_name}': {index_error}",
|
|
295
|
+
exc_info=True,
|
|
296
|
+
)
|
|
297
|
+
raise
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.error(
|
|
300
|
+
f"Error during database schema initialization for '{db_name}': {e}",
|
|
301
|
+
exc_info=True,
|
|
302
|
+
)
|
|
303
|
+
raise
|
|
304
|
+
|
|
305
|
+
def define_schema(self, schema: Schema):
|
|
306
|
+
"""Define ArangoDB collections based on schema.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
schema: Schema containing collection definitions
|
|
310
|
+
"""
|
|
311
|
+
self.define_vertex_collections(schema)
|
|
312
|
+
self.define_edge_collections(schema.edge_config.edges_list(include_aux=True))
|
|
313
|
+
|
|
314
|
+
def define_vertex_collections(self, schema: Schema):
|
|
315
|
+
"""Define vertex collections in ArangoDB.
|
|
316
|
+
|
|
317
|
+
Creates vertex collections for both connected and disconnected vertices,
|
|
318
|
+
organizing them into appropriate graphs.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
schema: Schema containing vertex definitions
|
|
322
|
+
"""
|
|
323
|
+
vertex_config = schema.vertex_config
|
|
324
|
+
disconnected_vertex_collections = (
|
|
325
|
+
set(vertex_config.vertex_set) - schema.edge_config.vertices
|
|
326
|
+
)
|
|
327
|
+
for item in schema.edge_config.edges_list():
|
|
328
|
+
u, v = item.source, item.target
|
|
329
|
+
gname = item.graph_name
|
|
330
|
+
if not gname:
|
|
331
|
+
logger.warning(
|
|
332
|
+
f"Edge {item.source} -> {item.target} has no graph_name, skipping"
|
|
333
|
+
)
|
|
334
|
+
continue
|
|
335
|
+
logger.info(f"{item.source}, {item.target}, {gname}")
|
|
336
|
+
if self.conn.has_graph(gname):
|
|
337
|
+
g = self.conn.graph(gname)
|
|
338
|
+
else:
|
|
339
|
+
g = self.conn.create_graph(gname) # type: ignore
|
|
340
|
+
|
|
341
|
+
_ = self.create_collection(
|
|
342
|
+
vertex_config.vertex_dbname(u), vertex_config.index(u), g
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
_ = self.create_collection(
|
|
346
|
+
vertex_config.vertex_dbname(v), vertex_config.index(v), g
|
|
347
|
+
)
|
|
348
|
+
for v in disconnected_vertex_collections:
|
|
349
|
+
_ = self.create_collection(
|
|
350
|
+
vertex_config.vertex_dbname(v), vertex_config.index(v), None
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
def define_edge_collections(self, edges: list[Edge]):
|
|
354
|
+
"""Define edge collections in ArangoDB.
|
|
355
|
+
|
|
356
|
+
Creates edge collections and their definitions in the appropriate graphs.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
edges: List of edge configurations to create
|
|
360
|
+
"""
|
|
361
|
+
for item in edges:
|
|
362
|
+
gname = item.graph_name
|
|
363
|
+
if not gname:
|
|
364
|
+
logger.warning("Edge has no graph_name, skipping")
|
|
365
|
+
continue
|
|
366
|
+
if self.conn.has_graph(gname):
|
|
367
|
+
g = self.conn.graph(gname)
|
|
368
|
+
else:
|
|
369
|
+
g = self.conn.create_graph(gname) # type: ignore
|
|
370
|
+
collection_name = item.collection_name
|
|
371
|
+
if not collection_name:
|
|
372
|
+
logger.warning("Edge has no collection_name, skipping")
|
|
373
|
+
continue
|
|
374
|
+
if not g.has_edge_definition(collection_name):
|
|
375
|
+
_ = g.create_edge_definition(
|
|
376
|
+
edge_collection=collection_name,
|
|
377
|
+
from_vertex_collections=[item._source_collection],
|
|
378
|
+
to_vertex_collections=[item._target_collection],
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
def _add_index(self, general_collection, index: Index):
|
|
382
|
+
"""Add an index to an ArangoDB collection.
|
|
383
|
+
|
|
384
|
+
Supports persistent, hash, skiplist, and fulltext indices.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
general_collection: ArangoDB collection to add index to
|
|
388
|
+
index: Index configuration to create
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
IndexHandle: Handle to the created index
|
|
392
|
+
"""
|
|
393
|
+
data = index.db_form(DBFlavor.ARANGO)
|
|
394
|
+
if index.type == IndexType.PERSISTENT:
|
|
395
|
+
ih = general_collection.add_index(data)
|
|
396
|
+
if index.type == IndexType.HASH:
|
|
397
|
+
ih = general_collection.add_index(data)
|
|
398
|
+
elif index.type == IndexType.SKIPLIST:
|
|
399
|
+
ih = general_collection.add_skiplist_index(
|
|
400
|
+
fields=index.fields, unique=index.unique
|
|
401
|
+
)
|
|
402
|
+
elif index.type == IndexType.FULLTEXT:
|
|
403
|
+
ih = general_collection.add_index(
|
|
404
|
+
data={"fields": index.fields, "type": "fulltext"}
|
|
405
|
+
)
|
|
406
|
+
else:
|
|
407
|
+
ih = None
|
|
408
|
+
return ih
|
|
409
|
+
|
|
410
|
+
def define_vertex_indices(self, vertex_config: VertexConfig):
|
|
411
|
+
"""Define indices for vertex collections.
|
|
412
|
+
|
|
413
|
+
Creates indices for each vertex collection based on the configuration.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
vertex_config: Vertex configuration containing index definitions
|
|
417
|
+
"""
|
|
418
|
+
for c in vertex_config.vertex_set:
|
|
419
|
+
general_collection = self.conn.collection(vertex_config.vertex_dbname(c))
|
|
420
|
+
ixs = general_collection.indexes()
|
|
421
|
+
field_combinations = [tuple(ix["fields"]) for ix in ixs]
|
|
422
|
+
for index_obj in vertex_config.indexes(c):
|
|
423
|
+
if tuple(index_obj.fields) not in field_combinations:
|
|
424
|
+
self._add_index(general_collection, index_obj)
|
|
425
|
+
|
|
426
|
+
def define_edge_indices(self, edges: list[Edge]):
|
|
427
|
+
"""Define indices for edge collections.
|
|
428
|
+
|
|
429
|
+
Creates indices for each edge collection based on the configuration.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
edges: List of edge configurations containing index definitions
|
|
433
|
+
"""
|
|
434
|
+
for edge in edges:
|
|
435
|
+
collection_name = edge.collection_name
|
|
436
|
+
if not collection_name:
|
|
437
|
+
logger.warning("Edge has no collection_name, skipping index creation")
|
|
438
|
+
continue
|
|
439
|
+
general_collection = self.conn.collection(collection_name)
|
|
440
|
+
for index_obj in edge.indexes:
|
|
441
|
+
self._add_index(general_collection, index_obj)
|
|
442
|
+
|
|
443
|
+
def fetch_indexes(self, db_class_name: Optional[str] = None):
|
|
444
|
+
"""Fetch all indices from the database.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
db_class_name: Optional collection name to fetch indices for
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
dict: Mapping of collection names to their indices
|
|
451
|
+
"""
|
|
452
|
+
if db_class_name is None:
|
|
453
|
+
classes = self.conn.collections()
|
|
454
|
+
elif self.conn.has_collection(db_class_name):
|
|
455
|
+
classes = [self.conn.collection(db_class_name)]
|
|
456
|
+
else:
|
|
457
|
+
classes = []
|
|
458
|
+
|
|
459
|
+
r = {}
|
|
460
|
+
for cname in classes:
|
|
461
|
+
assert isinstance(cname["name"], str)
|
|
462
|
+
c = self.conn.collection(cname["name"])
|
|
463
|
+
r[cname["name"]] = c.indexes()
|
|
464
|
+
return r
|
|
465
|
+
|
|
466
|
+
def create_collection(self, db_class_name, index: None | Index = None, g=None):
|
|
467
|
+
"""Create a new ArangoDB collection.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
db_class_name: Name of the collection to create
|
|
471
|
+
index: Optional index to create on the collection
|
|
472
|
+
g: Optional graph to create the collection in
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
IndexHandle: Handle to the created index if one was created
|
|
476
|
+
"""
|
|
477
|
+
if not self.conn.has_collection(db_class_name):
|
|
478
|
+
if g is not None:
|
|
479
|
+
_ = g.create_vertex_collection(db_class_name)
|
|
480
|
+
else:
|
|
481
|
+
self.conn.create_collection(db_class_name)
|
|
482
|
+
general_collection = self.conn.collection(db_class_name)
|
|
483
|
+
if index is not None and index.fields != ["_key"]:
|
|
484
|
+
ih = self._add_index(general_collection, index)
|
|
485
|
+
return ih
|
|
486
|
+
else:
|
|
487
|
+
return None
|
|
488
|
+
|
|
489
|
+
def delete_graph_structure(self, vertex_types=(), graph_names=(), delete_all=False):
|
|
490
|
+
"""Delete graph structure (collections and graphs) from ArangoDB.
|
|
491
|
+
|
|
492
|
+
In ArangoDB:
|
|
493
|
+
- Collections: Container for vertices (vertex collections) and edges (edge collections)
|
|
494
|
+
- Graphs: Named graphs that connect vertex and edge collections
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
vertex_types: Collection names to delete (vertex or edge collections)
|
|
498
|
+
graph_names: Graph names to delete
|
|
499
|
+
delete_all: If True, delete all non-system collections and graphs
|
|
500
|
+
"""
|
|
501
|
+
cnames = vertex_types
|
|
502
|
+
gnames = graph_names
|
|
503
|
+
logger.info("collections (non system):")
|
|
504
|
+
logger.info([c for c in self.conn.collections() if c["name"][0] != "_"])
|
|
505
|
+
|
|
506
|
+
if delete_all:
|
|
507
|
+
cnames = [c["name"] for c in self.conn.collections() if c["name"][0] != "_"]
|
|
508
|
+
gnames = [g["name"] for g in self.conn.graphs()]
|
|
509
|
+
|
|
510
|
+
for gn in gnames:
|
|
511
|
+
if self.conn.has_graph(gn):
|
|
512
|
+
self.conn.delete_graph(gn)
|
|
513
|
+
|
|
514
|
+
logger.info("graphs (after delete operation):")
|
|
515
|
+
logger.info(self.conn.graphs())
|
|
516
|
+
|
|
517
|
+
for cn in cnames:
|
|
518
|
+
if self.conn.has_collection(cn):
|
|
519
|
+
self.conn.delete_collection(cn)
|
|
520
|
+
|
|
521
|
+
logger.info("collections (after delete operation):")
|
|
522
|
+
logger.info([c for c in self.conn.collections() if c["name"][0] != "_"])
|
|
523
|
+
|
|
524
|
+
logger.info("graphs:")
|
|
525
|
+
logger.info(self.conn.graphs())
|
|
526
|
+
|
|
527
|
+
def get_collections(self):
|
|
528
|
+
"""Get all collections in the database.
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
list: List of collection information dictionaries
|
|
532
|
+
"""
|
|
533
|
+
return self.conn.collections()
|
|
534
|
+
|
|
535
|
+
def upsert_docs_batch(
|
|
536
|
+
self,
|
|
537
|
+
docs,
|
|
538
|
+
class_name,
|
|
539
|
+
match_keys: list[str] | None = None,
|
|
540
|
+
**kwargs,
|
|
541
|
+
):
|
|
542
|
+
"""Upsert a batch of documents using AQL.
|
|
543
|
+
|
|
544
|
+
Performs an upsert operation on a batch of documents, using the specified
|
|
545
|
+
match keys to determine whether to update existing documents or insert new ones.
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
docs: List of documents to upsert
|
|
549
|
+
class_name: Collection name to upsert into
|
|
550
|
+
match_keys: Keys to match for upsert operation
|
|
551
|
+
**kwargs: Additional options:
|
|
552
|
+
- dry: If True, don't execute the query
|
|
553
|
+
- update_keys: Keys to update on match
|
|
554
|
+
- filter_uniques: If True, filter duplicate documents
|
|
555
|
+
"""
|
|
556
|
+
dry = kwargs.pop("dry", False)
|
|
557
|
+
update_keys = kwargs.pop("update_keys", None)
|
|
558
|
+
filter_uniques = kwargs.pop("filter_uniques", True)
|
|
559
|
+
|
|
560
|
+
if isinstance(docs, list):
|
|
561
|
+
if filter_uniques:
|
|
562
|
+
docs = pick_unique_dict(docs)
|
|
563
|
+
docs = json.dumps(docs, default=_json_serializer)
|
|
564
|
+
if match_keys is None:
|
|
565
|
+
upsert_clause = ""
|
|
566
|
+
update_clause = ""
|
|
567
|
+
else:
|
|
568
|
+
upsert_clause = ", ".join([f'"{k}": doc.{k}' for k in match_keys])
|
|
569
|
+
upsert_clause = f"UPSERT {{{upsert_clause}}}"
|
|
570
|
+
|
|
571
|
+
if isinstance(update_keys, list):
|
|
572
|
+
update_clause = ", ".join([f'"{k}": doc.{k}' for k in update_keys])
|
|
573
|
+
update_clause = f"{{{update_clause}}}"
|
|
574
|
+
elif update_keys == "doc":
|
|
575
|
+
update_clause = "doc"
|
|
576
|
+
else:
|
|
577
|
+
update_clause = "{}"
|
|
578
|
+
update_clause = f"UPDATE {update_clause}"
|
|
579
|
+
|
|
580
|
+
options = "OPTIONS {exclusive: true, ignoreErrors: true}"
|
|
581
|
+
|
|
582
|
+
q_update = f"""FOR doc in {docs}
|
|
583
|
+
{upsert_clause}
|
|
584
|
+
INSERT doc
|
|
585
|
+
{update_clause}
|
|
586
|
+
IN {class_name} {options}"""
|
|
587
|
+
if not dry:
|
|
588
|
+
self.execute(q_update)
|
|
589
|
+
|
|
590
|
+
def insert_edges_batch(
|
|
591
|
+
self,
|
|
592
|
+
docs_edges,
|
|
593
|
+
source_class,
|
|
594
|
+
target_class,
|
|
595
|
+
relation_name=None,
|
|
596
|
+
collection_name=None,
|
|
597
|
+
match_keys_source=("_key",),
|
|
598
|
+
match_keys_target=("_key",),
|
|
599
|
+
filter_uniques=True,
|
|
600
|
+
uniq_weight_fields=None,
|
|
601
|
+
uniq_weight_collections=None,
|
|
602
|
+
upsert_option=False,
|
|
603
|
+
head=None,
|
|
604
|
+
**kwargs,
|
|
605
|
+
):
|
|
606
|
+
"""Insert a batch of edges using AQL.
|
|
607
|
+
|
|
608
|
+
Creates edges between source and target vertices, with support for
|
|
609
|
+
weight fields and unique constraints.
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
docs_edges: List of edge documents in format [{_source_aux: source_doc, _target_aux: target_doc}]
|
|
613
|
+
source_class: Source vertex collection name
|
|
614
|
+
target_class: Target vertex collection name
|
|
615
|
+
relation_name: Optional relation name for the edges
|
|
616
|
+
collection_name: Edge collection name
|
|
617
|
+
match_keys_source: Keys to match source vertices
|
|
618
|
+
match_keys_target: Keys to match target vertices
|
|
619
|
+
filter_uniques: If True, filter duplicate edges
|
|
620
|
+
uniq_weight_fields: Fields to consider for uniqueness
|
|
621
|
+
uniq_weight_collections: Collections to consider for uniqueness
|
|
622
|
+
upsert_option: If True, use upsert instead of insert
|
|
623
|
+
head: Optional limit on number of edges to insert
|
|
624
|
+
**kwargs: Additional options:
|
|
625
|
+
- dry: If True, don't execute the query
|
|
626
|
+
"""
|
|
627
|
+
dry = kwargs.pop("dry", False)
|
|
628
|
+
|
|
629
|
+
if isinstance(docs_edges, list):
|
|
630
|
+
if docs_edges:
|
|
631
|
+
logger.debug(f" docs_edges[0] = {docs_edges[0]}")
|
|
632
|
+
if head is not None:
|
|
633
|
+
docs_edges = docs_edges[:head]
|
|
634
|
+
if filter_uniques:
|
|
635
|
+
docs_edges = pick_unique_dict(docs_edges)
|
|
636
|
+
docs_edges_str = json.dumps(docs_edges)
|
|
637
|
+
else:
|
|
638
|
+
return ""
|
|
639
|
+
|
|
640
|
+
if match_keys_source[0] == "_key":
|
|
641
|
+
result_from = f'CONCAT("{source_class}/", edge[0]._key)'
|
|
642
|
+
source_filter = ""
|
|
643
|
+
else:
|
|
644
|
+
result_from = "sources[0]._id"
|
|
645
|
+
filter_source = " && ".join(
|
|
646
|
+
[f"v.{k} == edge[0].{k}" for k in match_keys_source]
|
|
647
|
+
)
|
|
648
|
+
source_filter = (
|
|
649
|
+
f"LET sources = (FOR v IN {source_class} FILTER"
|
|
650
|
+
f" {filter_source} LIMIT 1 RETURN v)"
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
if match_keys_target[0] == "_key":
|
|
654
|
+
result_to = f'CONCAT("{target_class}/", edge[1]._key)'
|
|
655
|
+
target_filter = ""
|
|
656
|
+
else:
|
|
657
|
+
result_to = "targets[0]._id"
|
|
658
|
+
filter_target = " && ".join(
|
|
659
|
+
[f"v.{k} == edge[1].{k}" for k in match_keys_target]
|
|
660
|
+
)
|
|
661
|
+
target_filter = (
|
|
662
|
+
f"LET targets = (FOR v IN {target_class} FILTER"
|
|
663
|
+
f" {filter_target} LIMIT 1 RETURN v)"
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
doc_definition = f"MERGE({{_from : {result_from}, _to : {result_to}}}, edge[2])"
|
|
667
|
+
|
|
668
|
+
logger.debug(f" source_filter = {source_filter}")
|
|
669
|
+
logger.debug(f" target_filter = {target_filter}")
|
|
670
|
+
logger.debug(f" doc = {doc_definition}")
|
|
671
|
+
|
|
672
|
+
if upsert_option:
|
|
673
|
+
ups_from = result_from if source_filter else "doc._from"
|
|
674
|
+
ups_to = result_to if target_filter else "doc._to"
|
|
675
|
+
|
|
676
|
+
weight_fs = []
|
|
677
|
+
if uniq_weight_fields is not None:
|
|
678
|
+
weight_fs += uniq_weight_fields
|
|
679
|
+
if uniq_weight_collections is not None:
|
|
680
|
+
weight_fs += uniq_weight_collections
|
|
681
|
+
if relation_name is not None:
|
|
682
|
+
weight_fs += ["relation"]
|
|
683
|
+
|
|
684
|
+
if weight_fs:
|
|
685
|
+
weights_clause = ", " + ", ".join(
|
|
686
|
+
[f"'{x}' : edge.{x}" for x in weight_fs]
|
|
687
|
+
)
|
|
688
|
+
else:
|
|
689
|
+
weights_clause = ""
|
|
690
|
+
|
|
691
|
+
upsert = f"{{'_from': {ups_from}, '_to': {ups_to}" + weights_clause + "}"
|
|
692
|
+
logger.debug(f" upsert clause: {upsert}")
|
|
693
|
+
clauses = f"UPSERT {upsert} INSERT doc UPDATE {{}}"
|
|
694
|
+
options = "OPTIONS {exclusive: true}"
|
|
695
|
+
else:
|
|
696
|
+
if relation_name is None:
|
|
697
|
+
doc_clause = "doc"
|
|
698
|
+
else:
|
|
699
|
+
doc_clause = f"MERGE(doc, {{'relation': '{relation_name}' }})"
|
|
700
|
+
clauses = f"INSERT {doc_clause}"
|
|
701
|
+
options = "OPTIONS {exclusive: true, ignoreErrors: true}"
|
|
702
|
+
|
|
703
|
+
q_update = f"""
|
|
704
|
+
FOR edge in {docs_edges_str} {source_filter} {target_filter}
|
|
705
|
+
LET doc = {doc_definition}
|
|
706
|
+
{clauses}
|
|
707
|
+
in {collection_name} {options}"""
|
|
708
|
+
if not dry:
|
|
709
|
+
self.execute(q_update)
|
|
710
|
+
|
|
711
|
+
def insert_return_batch(self, docs, class_name):
|
|
712
|
+
"""Insert documents and return their keys.
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
docs: Documents to insert
|
|
716
|
+
class_name: Collection to insert into
|
|
717
|
+
|
|
718
|
+
Returns:
|
|
719
|
+
str: AQL query string for the operation
|
|
720
|
+
"""
|
|
721
|
+
docs = json.dumps(docs)
|
|
722
|
+
query0 = f"""FOR doc in {docs}
|
|
723
|
+
INSERT doc
|
|
724
|
+
INTO {class_name}
|
|
725
|
+
LET inserted = NEW
|
|
726
|
+
RETURN {{_key: inserted._key}}
|
|
727
|
+
"""
|
|
728
|
+
return query0
|
|
729
|
+
|
|
730
|
+
def fetch_present_documents(
|
|
731
|
+
self,
|
|
732
|
+
batch,
|
|
733
|
+
class_name,
|
|
734
|
+
match_keys,
|
|
735
|
+
keep_keys,
|
|
736
|
+
flatten=False,
|
|
737
|
+
filters: None | Clause | list | dict = None,
|
|
738
|
+
) -> list | dict:
|
|
739
|
+
"""Fetch documents that exist in the database.
|
|
740
|
+
|
|
741
|
+
Args:
|
|
742
|
+
batch: Batch of documents to check
|
|
743
|
+
class_name: Collection to check in
|
|
744
|
+
match_keys: Keys to match documents
|
|
745
|
+
keep_keys: Keys to keep in result
|
|
746
|
+
flatten: If True, flatten the result into a list
|
|
747
|
+
filters: Additional query filters
|
|
748
|
+
|
|
749
|
+
Returns:
|
|
750
|
+
Union[list, dict]: Documents that exist in the database, either as a
|
|
751
|
+
flat list or a dictionary mapping batch indices to documents
|
|
752
|
+
"""
|
|
753
|
+
q0 = fetch_fields_query(
|
|
754
|
+
collection_name=class_name,
|
|
755
|
+
docs=batch,
|
|
756
|
+
match_keys=match_keys,
|
|
757
|
+
keep_keys=keep_keys,
|
|
758
|
+
filters=filters,
|
|
759
|
+
)
|
|
760
|
+
# {"__i": i, "_group": [doc]}
|
|
761
|
+
cursor = self.execute(q0)
|
|
762
|
+
|
|
763
|
+
if flatten:
|
|
764
|
+
rdata = []
|
|
765
|
+
for item in get_data_from_cursor(cursor):
|
|
766
|
+
group = item.pop("_group", [])
|
|
767
|
+
rdata += [sub_item for sub_item in group]
|
|
768
|
+
return rdata
|
|
769
|
+
else:
|
|
770
|
+
rdata_dict = {}
|
|
771
|
+
for item in get_data_from_cursor(cursor):
|
|
772
|
+
__i = item.pop("__i")
|
|
773
|
+
group = item.pop("_group")
|
|
774
|
+
rdata_dict[__i] = group
|
|
775
|
+
return rdata_dict
|
|
776
|
+
|
|
777
|
+
def fetch_docs(
|
|
778
|
+
self,
|
|
779
|
+
class_name,
|
|
780
|
+
filters: None | Clause | list | dict = None,
|
|
781
|
+
limit: int | None = None,
|
|
782
|
+
return_keys: list | None = None,
|
|
783
|
+
unset_keys: list | None = None,
|
|
784
|
+
**kwargs,
|
|
785
|
+
):
|
|
786
|
+
"""Fetch documents from a collection.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
class_name: Collection to fetch from
|
|
790
|
+
filters: Query filters
|
|
791
|
+
limit: Maximum number of documents to return
|
|
792
|
+
return_keys: Keys to return
|
|
793
|
+
unset_keys: Keys to unset
|
|
794
|
+
|
|
795
|
+
Returns:
|
|
796
|
+
list: Fetched documents
|
|
797
|
+
"""
|
|
798
|
+
filter_clause = render_filters(filters, doc_name="d")
|
|
799
|
+
|
|
800
|
+
if return_keys is None:
|
|
801
|
+
if unset_keys is None:
|
|
802
|
+
return_clause = "d"
|
|
803
|
+
else:
|
|
804
|
+
tmp_clause = ", ".join([f'"{item}"' for item in unset_keys])
|
|
805
|
+
return_clause = f"UNSET(d, {tmp_clause})"
|
|
806
|
+
else:
|
|
807
|
+
if unset_keys is None:
|
|
808
|
+
tmp_clause = ", ".join([f'"{item}"' for item in return_keys])
|
|
809
|
+
return_clause = f"KEEP(d, {tmp_clause})"
|
|
810
|
+
else:
|
|
811
|
+
raise ValueError("both return_keys and unset_keys are set")
|
|
812
|
+
|
|
813
|
+
if limit is not None and isinstance(limit, int):
|
|
814
|
+
limit_clause = f"LIMIT {limit}"
|
|
815
|
+
else:
|
|
816
|
+
limit_clause = ""
|
|
817
|
+
|
|
818
|
+
q = (
|
|
819
|
+
f"FOR d in {class_name}"
|
|
820
|
+
f" {filter_clause}"
|
|
821
|
+
f" {limit_clause}"
|
|
822
|
+
f" RETURN {return_clause}"
|
|
823
|
+
)
|
|
824
|
+
cursor = self.execute(q)
|
|
825
|
+
return get_data_from_cursor(cursor)
|
|
826
|
+
|
|
827
|
+
# TODO test
|
|
828
|
+
def fetch_edges(
|
|
829
|
+
self,
|
|
830
|
+
from_type: str,
|
|
831
|
+
from_id: str,
|
|
832
|
+
edge_type: str | None = None,
|
|
833
|
+
to_type: str | None = None,
|
|
834
|
+
to_id: str | None = None,
|
|
835
|
+
filters: list | dict | Clause | None = None,
|
|
836
|
+
limit: int | None = None,
|
|
837
|
+
return_keys: list | None = None,
|
|
838
|
+
unset_keys: list | None = None,
|
|
839
|
+
**kwargs,
|
|
840
|
+
):
|
|
841
|
+
"""Fetch edges from ArangoDB using AQL.
|
|
842
|
+
|
|
843
|
+
Args:
|
|
844
|
+
from_type: Source vertex collection name
|
|
845
|
+
from_id: Source vertex ID (can be _key or _id)
|
|
846
|
+
edge_type: Optional edge collection name to filter by
|
|
847
|
+
to_type: Optional target vertex collection name to filter by
|
|
848
|
+
to_id: Optional target vertex ID to filter by
|
|
849
|
+
filters: Additional query filters
|
|
850
|
+
limit: Maximum number of edges to return
|
|
851
|
+
return_keys: Keys to return (projection)
|
|
852
|
+
unset_keys: Keys to exclude (projection)
|
|
853
|
+
**kwargs: Additional parameters
|
|
854
|
+
|
|
855
|
+
Returns:
|
|
856
|
+
list: List of fetched edges
|
|
857
|
+
"""
|
|
858
|
+
# Convert from_id to _id format if needed
|
|
859
|
+
if not from_id.startswith(from_type):
|
|
860
|
+
# Assume it's a _key, convert to _id
|
|
861
|
+
from_vertex_id = f"{from_type}/{from_id}"
|
|
862
|
+
else:
|
|
863
|
+
from_vertex_id = from_id
|
|
864
|
+
|
|
865
|
+
# Build AQL query to fetch edges
|
|
866
|
+
# Start with basic edge traversal
|
|
867
|
+
if edge_type:
|
|
868
|
+
edge_collection = edge_type
|
|
869
|
+
else:
|
|
870
|
+
# If no edge_type specified, we need to search all edge collections
|
|
871
|
+
# This is a simplified version - in practice you might want to list all edge collections
|
|
872
|
+
raise ValueError("edge_type is required for ArangoDB edge fetching")
|
|
873
|
+
|
|
874
|
+
filter_clause = render_filters(filters, doc_name="e")
|
|
875
|
+
filter_parts = []
|
|
876
|
+
|
|
877
|
+
if to_type:
|
|
878
|
+
filter_parts.append(f"e._to LIKE '{to_type}/%'")
|
|
879
|
+
if to_id and to_type:
|
|
880
|
+
if not to_id.startswith(to_type):
|
|
881
|
+
to_vertex_id = f"{to_type}/{to_id}"
|
|
882
|
+
else:
|
|
883
|
+
to_vertex_id = to_id
|
|
884
|
+
filter_parts.append(f"e._to == '{to_vertex_id}'")
|
|
885
|
+
|
|
886
|
+
additional_filters = " && ".join(filter_parts)
|
|
887
|
+
if filter_clause and additional_filters:
|
|
888
|
+
filter_clause = f"{filter_clause} && {additional_filters}"
|
|
889
|
+
elif additional_filters:
|
|
890
|
+
filter_clause = additional_filters
|
|
891
|
+
|
|
892
|
+
query = f"""
|
|
893
|
+
FOR e IN {edge_collection}
|
|
894
|
+
FILTER e._from == '{from_vertex_id}'
|
|
895
|
+
{f"FILTER {filter_clause}" if filter_clause else ""}
|
|
896
|
+
{f"LIMIT {limit}" if limit else ""}
|
|
897
|
+
RETURN e
|
|
898
|
+
"""
|
|
899
|
+
|
|
900
|
+
cursor = self.execute(query)
|
|
901
|
+
result = list(get_data_from_cursor(cursor))
|
|
902
|
+
|
|
903
|
+
# Apply projection
|
|
904
|
+
if return_keys is not None:
|
|
905
|
+
result = [
|
|
906
|
+
{k: doc.get(k) for k in return_keys if k in doc} for doc in result
|
|
907
|
+
]
|
|
908
|
+
elif unset_keys is not None:
|
|
909
|
+
result = [
|
|
910
|
+
{k: v for k, v in doc.items() if k not in unset_keys} for doc in result
|
|
911
|
+
]
|
|
912
|
+
|
|
913
|
+
return result
|
|
914
|
+
|
|
915
|
+
def aggregate(
|
|
916
|
+
self,
|
|
917
|
+
class_name,
|
|
918
|
+
aggregation_function: AggregationType,
|
|
919
|
+
discriminant: str | None = None,
|
|
920
|
+
aggregated_field: str | None = None,
|
|
921
|
+
filters: None | Clause | list | dict = None,
|
|
922
|
+
):
|
|
923
|
+
"""Perform aggregation on a collection.
|
|
924
|
+
|
|
925
|
+
Args:
|
|
926
|
+
class_name: Collection to aggregate
|
|
927
|
+
aggregation_function: Type of aggregation to perform
|
|
928
|
+
discriminant: Field to group by
|
|
929
|
+
aggregated_field: Field to aggregate
|
|
930
|
+
filters: Query filters
|
|
931
|
+
|
|
932
|
+
Returns:
|
|
933
|
+
list: Aggregation results
|
|
934
|
+
"""
|
|
935
|
+
filter_clause = render_filters(filters, doc_name="doc")
|
|
936
|
+
|
|
937
|
+
if (
|
|
938
|
+
aggregated_field is not None
|
|
939
|
+
and aggregation_function != AggregationType.COUNT
|
|
940
|
+
):
|
|
941
|
+
group_unit = f"g[*].doc.{aggregated_field}"
|
|
942
|
+
else:
|
|
943
|
+
group_unit = "g"
|
|
944
|
+
|
|
945
|
+
if discriminant is not None:
|
|
946
|
+
collect_clause = f"COLLECT value = doc['{discriminant}'] INTO g"
|
|
947
|
+
return_clause = f"""{{ '{discriminant}' : value, '_value': {aggregation_function}({group_unit})}}"""
|
|
948
|
+
else:
|
|
949
|
+
if (
|
|
950
|
+
aggregated_field is None
|
|
951
|
+
and aggregation_function == AggregationType.COUNT
|
|
952
|
+
):
|
|
953
|
+
collect_clause = (
|
|
954
|
+
f"COLLECT AGGREGATE value = {aggregation_function} (doc)"
|
|
955
|
+
)
|
|
956
|
+
else:
|
|
957
|
+
collect_clause = (
|
|
958
|
+
"COLLECT AGGREGATE value ="
|
|
959
|
+
f" {aggregation_function}(doc['{aggregated_field}'])"
|
|
960
|
+
)
|
|
961
|
+
return_clause = """{ '_value' : value }"""
|
|
962
|
+
|
|
963
|
+
q = f"""FOR doc IN {class_name}
|
|
964
|
+
{filter_clause}
|
|
965
|
+
{collect_clause}
|
|
966
|
+
RETURN {return_clause}"""
|
|
967
|
+
|
|
968
|
+
cursor = self.execute(q)
|
|
969
|
+
data = get_data_from_cursor(cursor)
|
|
970
|
+
return data
|
|
971
|
+
|
|
972
|
+
def keep_absent_documents(
|
|
973
|
+
self,
|
|
974
|
+
batch,
|
|
975
|
+
class_name,
|
|
976
|
+
match_keys,
|
|
977
|
+
keep_keys,
|
|
978
|
+
filters: None | Clause | list | dict = None,
|
|
979
|
+
):
|
|
980
|
+
"""Keep documents that don't exist in the database.
|
|
981
|
+
|
|
982
|
+
Args:
|
|
983
|
+
batch: Batch of documents to check
|
|
984
|
+
class_name: Collection to check in
|
|
985
|
+
match_keys: Keys to match documents
|
|
986
|
+
keep_keys: Keys to keep in result
|
|
987
|
+
filters: Additional query filters
|
|
988
|
+
|
|
989
|
+
Returns:
|
|
990
|
+
list: Documents that don't exist in the database
|
|
991
|
+
"""
|
|
992
|
+
present_docs_keys = self.fetch_present_documents(
|
|
993
|
+
batch=batch,
|
|
994
|
+
class_name=class_name,
|
|
995
|
+
match_keys=match_keys,
|
|
996
|
+
keep_keys=keep_keys,
|
|
997
|
+
flatten=False,
|
|
998
|
+
filters=filters,
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
assert isinstance(present_docs_keys, dict)
|
|
1002
|
+
|
|
1003
|
+
if any([len(v) > 1 for v in present_docs_keys.values()]):
|
|
1004
|
+
logger.warning(
|
|
1005
|
+
"fetch_present_documents returned multiple docs per filtering condition"
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
absent_indices = sorted(set(range(len(batch))) - set(present_docs_keys.keys()))
|
|
1009
|
+
batch_absent = [batch[j] for j in absent_indices]
|
|
1010
|
+
return batch_absent
|
|
1011
|
+
|
|
1012
|
+
def update_to_numeric(self, collection_name, field):
|
|
1013
|
+
"""Update a field to numeric type in all documents.
|
|
1014
|
+
|
|
1015
|
+
Args:
|
|
1016
|
+
collection_name: Collection to update
|
|
1017
|
+
field: Field to convert to numeric
|
|
1018
|
+
|
|
1019
|
+
Returns:
|
|
1020
|
+
str: AQL query string for the operation
|
|
1021
|
+
"""
|
|
1022
|
+
s1 = f"FOR p IN {collection_name} FILTER p.{field} update p with {{"
|
|
1023
|
+
s2 = f"{field}: TO_NUMBER(p.{field}) "
|
|
1024
|
+
s3 = f"}} in {collection_name}"
|
|
1025
|
+
q0 = s1 + s2 + s3
|
|
1026
|
+
return q0
|