graflo 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graflo might be problematic. Click here for more details.
- graflo/README.md +18 -0
- graflo/__init__.py +39 -0
- graflo/architecture/__init__.py +37 -0
- graflo/architecture/actor.py +974 -0
- graflo/architecture/actor_util.py +425 -0
- graflo/architecture/edge.py +295 -0
- graflo/architecture/onto.py +374 -0
- graflo/architecture/resource.py +161 -0
- graflo/architecture/schema.py +136 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +93 -0
- graflo/architecture/vertex.py +277 -0
- graflo/caster.py +409 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +144 -0
- graflo/cli/manage_dbs.py +193 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/db/__init__.py +32 -0
- graflo/db/arango/__init__.py +16 -0
- graflo/db/arango/conn.py +734 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/connection.py +304 -0
- graflo/db/manager.py +104 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +432 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +400 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +186 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +556 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +739 -0
- graflo/util/merge.py +148 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +63 -0
- graflo/util/transform.py +406 -0
- graflo-1.1.0.dist-info/METADATA +157 -0
- graflo-1.1.0.dist-info/RECORD +45 -0
- graflo-1.1.0.dist-info/WHEEL +4 -0
- graflo-1.1.0.dist-info/entry_points.txt +5 -0
- graflo-1.1.0.dist-info/licenses/LICENSE +126 -0
graflo/db/arango/conn.py
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
1
|
+
"""ArangoDB connection implementation for graph database operations.
|
|
2
|
+
|
|
3
|
+
This module implements the Connection interface for ArangoDB, providing
|
|
4
|
+
specific functionality for graph operations in ArangoDB. It handles:
|
|
5
|
+
- Graph and collection management
|
|
6
|
+
- Document and edge operations
|
|
7
|
+
- Index creation and management
|
|
8
|
+
- AQL query execution
|
|
9
|
+
- Batch operations with upsert support
|
|
10
|
+
|
|
11
|
+
Key Features:
|
|
12
|
+
- Graph-based document organization
|
|
13
|
+
- Edge collection management
|
|
14
|
+
- Persistent, hash, skiplist, and fulltext indices
|
|
15
|
+
- Batch document and edge operations
|
|
16
|
+
- AQL query generation and execution
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
>>> conn = ArangoConnection(config)
|
|
20
|
+
>>> conn.init_db(schema, clean_start=True)
|
|
21
|
+
>>> conn.upsert_docs_batch(docs, "users", match_keys=["email"])
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import json
|
|
25
|
+
import logging
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
from arango import ArangoClient
|
|
29
|
+
from suthing import ArangoConnectionConfig
|
|
30
|
+
|
|
31
|
+
from graflo.architecture.edge import Edge
|
|
32
|
+
from graflo.architecture.onto import (
|
|
33
|
+
Index,
|
|
34
|
+
IndexType,
|
|
35
|
+
)
|
|
36
|
+
from graflo.architecture.schema import Schema
|
|
37
|
+
from graflo.architecture.vertex import VertexConfig
|
|
38
|
+
from graflo.db.arango.query import fetch_fields_query
|
|
39
|
+
from graflo.db.arango.util import render_filters
|
|
40
|
+
from graflo.db.connection import Connection
|
|
41
|
+
from graflo.db.util import get_data_from_cursor
|
|
42
|
+
from graflo.filter.onto import Clause
|
|
43
|
+
from graflo.onto import AggregationType, DBFlavor
|
|
44
|
+
from graflo.util.transform import pick_unique_dict
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ArangoConnection(Connection):
|
|
50
|
+
"""ArangoDB-specific implementation of the Connection interface.
|
|
51
|
+
|
|
52
|
+
This class provides ArangoDB-specific implementations for all database
|
|
53
|
+
operations, including graph management, document operations, and query
|
|
54
|
+
execution. It uses the ArangoDB Python driver for all operations.
|
|
55
|
+
|
|
56
|
+
Attributes:
|
|
57
|
+
conn: ArangoDB database connection instance
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, config: ArangoConnectionConfig):
|
|
61
|
+
"""Initialize ArangoDB connection.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
config: ArangoDB connection configuration containing URL, credentials,
|
|
65
|
+
and database name
|
|
66
|
+
"""
|
|
67
|
+
super().__init__()
|
|
68
|
+
client = ArangoClient(hosts=config.url, request_timeout=config.request_timeout)
|
|
69
|
+
|
|
70
|
+
self.conn = client.db(
|
|
71
|
+
config.database,
|
|
72
|
+
username=config.username,
|
|
73
|
+
password=config.password,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def create_database(self, name: str):
|
|
77
|
+
"""Create a new ArangoDB database.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
name: Name of the database to create
|
|
81
|
+
"""
|
|
82
|
+
if not self.conn.has_database(name):
|
|
83
|
+
self.conn.create_database(name)
|
|
84
|
+
|
|
85
|
+
def delete_database(self, name: str):
|
|
86
|
+
"""Delete an ArangoDB database.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
name: Name of the database to delete
|
|
90
|
+
"""
|
|
91
|
+
if not self.conn.has_database(name):
|
|
92
|
+
self.conn.delete_database(name)
|
|
93
|
+
|
|
94
|
+
def execute(self, query, **kwargs):
|
|
95
|
+
"""Execute an AQL query.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
query: AQL query string to execute
|
|
99
|
+
**kwargs: Additional query parameters
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Cursor: ArangoDB cursor for the query results
|
|
103
|
+
"""
|
|
104
|
+
cursor = self.conn.aql.execute(query)
|
|
105
|
+
return cursor
|
|
106
|
+
|
|
107
|
+
def close(self):
|
|
108
|
+
"""Close the ArangoDB connection."""
|
|
109
|
+
# self.conn.close()
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
def init_db(self, schema: Schema, clean_start):
|
|
113
|
+
"""Initialize ArangoDB with the given schema.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
schema: Schema containing graph structure definitions
|
|
117
|
+
clean_start: If True, delete all existing collections before initialization
|
|
118
|
+
"""
|
|
119
|
+
if clean_start:
|
|
120
|
+
self.delete_collections([], [], delete_all=True)
|
|
121
|
+
self.define_collections(schema)
|
|
122
|
+
self.define_indexes(schema)
|
|
123
|
+
|
|
124
|
+
def define_collections(self, schema: Schema):
|
|
125
|
+
"""Define ArangoDB collections based on schema.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
schema: Schema containing collection definitions
|
|
129
|
+
"""
|
|
130
|
+
self.define_vertex_collections(schema)
|
|
131
|
+
self.define_edge_collections(schema.edge_config.edges_list(include_aux=True))
|
|
132
|
+
|
|
133
|
+
def define_vertex_collections(self, schema: Schema):
|
|
134
|
+
"""Define vertex collections in ArangoDB.
|
|
135
|
+
|
|
136
|
+
Creates vertex collections for both connected and disconnected vertices,
|
|
137
|
+
organizing them into appropriate graphs.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
schema: Schema containing vertex definitions
|
|
141
|
+
"""
|
|
142
|
+
vertex_config = schema.vertex_config
|
|
143
|
+
disconnected_vertex_collections = (
|
|
144
|
+
set(vertex_config.vertex_set) - schema.edge_config.vertices
|
|
145
|
+
)
|
|
146
|
+
for item in schema.edge_config.edges_list():
|
|
147
|
+
u, v = item.source, item.target
|
|
148
|
+
gname = item.graph_name
|
|
149
|
+
logger.info(f"{item.source}, {item.target}, {gname}")
|
|
150
|
+
if self.conn.has_graph(gname):
|
|
151
|
+
g = self.conn.graph(gname)
|
|
152
|
+
else:
|
|
153
|
+
g = self.conn.create_graph(gname) # type: ignore
|
|
154
|
+
|
|
155
|
+
_ = self.create_collection(
|
|
156
|
+
vertex_config.vertex_dbname(u), vertex_config.index(u), g
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
_ = self.create_collection(
|
|
160
|
+
vertex_config.vertex_dbname(v), vertex_config.index(v), g
|
|
161
|
+
)
|
|
162
|
+
for v in disconnected_vertex_collections:
|
|
163
|
+
_ = self.create_collection(
|
|
164
|
+
vertex_config.vertex_dbname(v), vertex_config.index(v), None
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
def define_edge_collections(self, edges: list[Edge]):
|
|
168
|
+
"""Define edge collections in ArangoDB.
|
|
169
|
+
|
|
170
|
+
Creates edge collections and their definitions in the appropriate graphs.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
edges: List of edge configurations to create
|
|
174
|
+
"""
|
|
175
|
+
for item in edges:
|
|
176
|
+
gname = item.graph_name
|
|
177
|
+
if self.conn.has_graph(gname):
|
|
178
|
+
g = self.conn.graph(gname)
|
|
179
|
+
else:
|
|
180
|
+
g = self.conn.create_graph(gname) # type: ignore
|
|
181
|
+
if not g.has_edge_definition(item.collection_name):
|
|
182
|
+
_ = g.create_edge_definition(
|
|
183
|
+
edge_collection=item.collection_name,
|
|
184
|
+
from_vertex_collections=[item._source_collection],
|
|
185
|
+
to_vertex_collections=[item._target_collection],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _add_index(self, general_collection, index: Index):
|
|
189
|
+
"""Add an index to an ArangoDB collection.
|
|
190
|
+
|
|
191
|
+
Supports persistent, hash, skiplist, and fulltext indices.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
general_collection: ArangoDB collection to add index to
|
|
195
|
+
index: Index configuration to create
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
IndexHandle: Handle to the created index
|
|
199
|
+
"""
|
|
200
|
+
data = index.db_form(DBFlavor.ARANGO)
|
|
201
|
+
if index.type == IndexType.PERSISTENT:
|
|
202
|
+
ih = general_collection.add_index(data)
|
|
203
|
+
if index.type == IndexType.HASH:
|
|
204
|
+
ih = general_collection.add_index(data)
|
|
205
|
+
elif index.type == IndexType.SKIPLIST:
|
|
206
|
+
ih = general_collection.add_skiplist_index(
|
|
207
|
+
fields=index.fields, unique=index.unique
|
|
208
|
+
)
|
|
209
|
+
elif index.type == IndexType.FULLTEXT:
|
|
210
|
+
ih = general_collection.add_index(
|
|
211
|
+
data={"fields": index.fields, "type": "fulltext"}
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
ih = None
|
|
215
|
+
return ih
|
|
216
|
+
|
|
217
|
+
def define_vertex_indices(self, vertex_config: VertexConfig):
|
|
218
|
+
"""Define indices for vertex collections.
|
|
219
|
+
|
|
220
|
+
Creates indices for each vertex collection based on the configuration.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
vertex_config: Vertex configuration containing index definitions
|
|
224
|
+
"""
|
|
225
|
+
for c in vertex_config.vertex_set:
|
|
226
|
+
general_collection = self.conn.collection(vertex_config.vertex_dbname(c))
|
|
227
|
+
ixs = general_collection.indexes()
|
|
228
|
+
field_combinations = [tuple(ix["fields"]) for ix in ixs]
|
|
229
|
+
for index_obj in vertex_config.indexes(c):
|
|
230
|
+
if tuple(index_obj.fields) not in field_combinations:
|
|
231
|
+
self._add_index(general_collection, index_obj)
|
|
232
|
+
|
|
233
|
+
def define_edge_indices(self, edges: list[Edge]):
|
|
234
|
+
"""Define indices for edge collections.
|
|
235
|
+
|
|
236
|
+
Creates indices for each edge collection based on the configuration.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
edges: List of edge configurations containing index definitions
|
|
240
|
+
"""
|
|
241
|
+
for edge in edges:
|
|
242
|
+
general_collection = self.conn.collection(edge.collection_name)
|
|
243
|
+
for index_obj in edge.indexes:
|
|
244
|
+
self._add_index(general_collection, index_obj)
|
|
245
|
+
|
|
246
|
+
def fetch_indexes(self, db_class_name: Optional[str] = None):
|
|
247
|
+
"""Fetch all indices from the database.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
db_class_name: Optional collection name to fetch indices for
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
dict: Mapping of collection names to their indices
|
|
254
|
+
"""
|
|
255
|
+
if db_class_name is None:
|
|
256
|
+
classes = self.conn.collections()
|
|
257
|
+
elif self.conn.has_collection(db_class_name):
|
|
258
|
+
classes = [self.conn.collection(db_class_name)]
|
|
259
|
+
else:
|
|
260
|
+
classes = []
|
|
261
|
+
|
|
262
|
+
r = {}
|
|
263
|
+
for cname in classes:
|
|
264
|
+
assert isinstance(cname["name"], str)
|
|
265
|
+
c = self.conn.collection(cname["name"])
|
|
266
|
+
r[cname["name"]] = c.indexes()
|
|
267
|
+
return r
|
|
268
|
+
|
|
269
|
+
def create_collection(self, db_class_name, index: None | Index = None, g=None):
|
|
270
|
+
"""Create a new ArangoDB collection.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
db_class_name: Name of the collection to create
|
|
274
|
+
index: Optional index to create on the collection
|
|
275
|
+
g: Optional graph to create the collection in
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
IndexHandle: Handle to the created index if one was created
|
|
279
|
+
"""
|
|
280
|
+
if not self.conn.has_collection(db_class_name):
|
|
281
|
+
if g is not None:
|
|
282
|
+
_ = g.create_vertex_collection(db_class_name)
|
|
283
|
+
else:
|
|
284
|
+
self.conn.create_collection(db_class_name)
|
|
285
|
+
general_collection = self.conn.collection(db_class_name)
|
|
286
|
+
if index is not None and index.fields != ["_key"]:
|
|
287
|
+
ih = self._add_index(general_collection, index)
|
|
288
|
+
return ih
|
|
289
|
+
else:
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
def delete_collections(self, cnames=(), gnames=(), delete_all=False):
|
|
293
|
+
"""Delete collections and graphs from ArangoDB.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
cnames: Collection names to delete
|
|
297
|
+
gnames: Graph names to delete
|
|
298
|
+
delete_all: If True, delete all non-system collections and graphs
|
|
299
|
+
"""
|
|
300
|
+
logger.info("collections (non system):")
|
|
301
|
+
logger.info([c for c in self.conn.collections() if c["name"][0] != "_"])
|
|
302
|
+
|
|
303
|
+
if delete_all:
|
|
304
|
+
cnames = [c["name"] for c in self.conn.collections() if c["name"][0] != "_"]
|
|
305
|
+
gnames = [g["name"] for g in self.conn.graphs()]
|
|
306
|
+
|
|
307
|
+
for gn in gnames:
|
|
308
|
+
if self.conn.has_graph(gn):
|
|
309
|
+
self.conn.delete_graph(gn)
|
|
310
|
+
|
|
311
|
+
logger.info("graphs (after delete operation):")
|
|
312
|
+
logger.info(self.conn.graphs())
|
|
313
|
+
|
|
314
|
+
for cn in cnames:
|
|
315
|
+
if self.conn.has_collection(cn):
|
|
316
|
+
self.conn.delete_collection(cn)
|
|
317
|
+
|
|
318
|
+
logger.info("collections (after delete operation):")
|
|
319
|
+
logger.info([c for c in self.conn.collections() if c["name"][0] != "_"])
|
|
320
|
+
|
|
321
|
+
logger.info("graphs:")
|
|
322
|
+
logger.info(self.conn.graphs())
|
|
323
|
+
|
|
324
|
+
def get_collections(self):
|
|
325
|
+
"""Get all collections in the database.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
list: List of collection information dictionaries
|
|
329
|
+
"""
|
|
330
|
+
return self.conn.collections()
|
|
331
|
+
|
|
332
|
+
def upsert_docs_batch(
|
|
333
|
+
self,
|
|
334
|
+
docs,
|
|
335
|
+
class_name,
|
|
336
|
+
match_keys: list[str] | None = None,
|
|
337
|
+
**kwargs,
|
|
338
|
+
):
|
|
339
|
+
"""Upsert a batch of documents using AQL.
|
|
340
|
+
|
|
341
|
+
Performs an upsert operation on a batch of documents, using the specified
|
|
342
|
+
match keys to determine whether to update existing documents or insert new ones.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
docs: List of documents to upsert
|
|
346
|
+
class_name: Collection name to upsert into
|
|
347
|
+
match_keys: Keys to match for upsert operation
|
|
348
|
+
**kwargs: Additional options:
|
|
349
|
+
- dry: If True, don't execute the query
|
|
350
|
+
- update_keys: Keys to update on match
|
|
351
|
+
- filter_uniques: If True, filter duplicate documents
|
|
352
|
+
"""
|
|
353
|
+
dry = kwargs.pop("dry", False)
|
|
354
|
+
update_keys = kwargs.pop("update_keys", None)
|
|
355
|
+
filter_uniques = kwargs.pop("filter_uniques", True)
|
|
356
|
+
|
|
357
|
+
if isinstance(docs, list):
|
|
358
|
+
if filter_uniques:
|
|
359
|
+
docs = pick_unique_dict(docs)
|
|
360
|
+
docs = json.dumps(docs)
|
|
361
|
+
if match_keys is None:
|
|
362
|
+
upsert_clause = ""
|
|
363
|
+
update_clause = ""
|
|
364
|
+
else:
|
|
365
|
+
upsert_clause = ", ".join([f'"{k}": doc.{k}' for k in match_keys])
|
|
366
|
+
upsert_clause = f"UPSERT {{{upsert_clause}}}"
|
|
367
|
+
|
|
368
|
+
if isinstance(update_keys, list):
|
|
369
|
+
update_clause = ", ".join([f'"{k}": doc.{k}' for k in update_keys])
|
|
370
|
+
update_clause = f"{{{update_clause}}}"
|
|
371
|
+
elif update_keys == "doc":
|
|
372
|
+
update_clause = "doc"
|
|
373
|
+
else:
|
|
374
|
+
update_clause = "{}"
|
|
375
|
+
update_clause = f"UPDATE {update_clause}"
|
|
376
|
+
|
|
377
|
+
options = "OPTIONS {exclusive: true, ignoreErrors: true}"
|
|
378
|
+
|
|
379
|
+
q_update = f"""FOR doc in {docs}
|
|
380
|
+
{upsert_clause}
|
|
381
|
+
INSERT doc
|
|
382
|
+
{update_clause}
|
|
383
|
+
IN {class_name} {options}"""
|
|
384
|
+
if not dry:
|
|
385
|
+
self.execute(q_update)
|
|
386
|
+
|
|
387
|
+
def insert_edges_batch(
|
|
388
|
+
self,
|
|
389
|
+
docs_edges,
|
|
390
|
+
source_class,
|
|
391
|
+
target_class,
|
|
392
|
+
relation_name=None,
|
|
393
|
+
collection_name=None,
|
|
394
|
+
match_keys_source=("_key",),
|
|
395
|
+
match_keys_target=("_key",),
|
|
396
|
+
filter_uniques=True,
|
|
397
|
+
uniq_weight_fields=None,
|
|
398
|
+
uniq_weight_collections=None,
|
|
399
|
+
upsert_option=False,
|
|
400
|
+
head=None,
|
|
401
|
+
**kwargs,
|
|
402
|
+
):
|
|
403
|
+
"""Insert a batch of edges using AQL.
|
|
404
|
+
|
|
405
|
+
Creates edges between source and target vertices, with support for
|
|
406
|
+
weight fields and unique constraints.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
docs_edges: List of edge documents in format [{_source_aux: source_doc, _target_aux: target_doc}]
|
|
410
|
+
source_class: Source vertex collection name
|
|
411
|
+
target_class: Target vertex collection name
|
|
412
|
+
relation_name: Optional relation name for the edges
|
|
413
|
+
collection_name: Edge collection name
|
|
414
|
+
match_keys_source: Keys to match source vertices
|
|
415
|
+
match_keys_target: Keys to match target vertices
|
|
416
|
+
filter_uniques: If True, filter duplicate edges
|
|
417
|
+
uniq_weight_fields: Fields to consider for uniqueness
|
|
418
|
+
uniq_weight_collections: Collections to consider for uniqueness
|
|
419
|
+
upsert_option: If True, use upsert instead of insert
|
|
420
|
+
head: Optional limit on number of edges to insert
|
|
421
|
+
**kwargs: Additional options:
|
|
422
|
+
- dry: If True, don't execute the query
|
|
423
|
+
"""
|
|
424
|
+
dry = kwargs.pop("dry", False)
|
|
425
|
+
|
|
426
|
+
if isinstance(docs_edges, list):
|
|
427
|
+
if docs_edges:
|
|
428
|
+
logger.debug(f" docs_edges[0] = {docs_edges[0]}")
|
|
429
|
+
if head is not None:
|
|
430
|
+
docs_edges = docs_edges[:head]
|
|
431
|
+
if filter_uniques:
|
|
432
|
+
docs_edges = pick_unique_dict(docs_edges)
|
|
433
|
+
docs_edges_str = json.dumps(docs_edges)
|
|
434
|
+
else:
|
|
435
|
+
return ""
|
|
436
|
+
|
|
437
|
+
if match_keys_source[0] == "_key":
|
|
438
|
+
result_from = f'CONCAT("{source_class}/", edge[0]._key)'
|
|
439
|
+
source_filter = ""
|
|
440
|
+
else:
|
|
441
|
+
result_from = "sources[0]._id"
|
|
442
|
+
filter_source = " && ".join(
|
|
443
|
+
[f"v.{k} == edge[0].{k}" for k in match_keys_source]
|
|
444
|
+
)
|
|
445
|
+
source_filter = (
|
|
446
|
+
f"LET sources = (FOR v IN {source_class} FILTER"
|
|
447
|
+
f" {filter_source} LIMIT 1 RETURN v)"
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
if match_keys_target[0] == "_key":
|
|
451
|
+
result_to = f'CONCAT("{target_class}/", edge[1]._key)'
|
|
452
|
+
target_filter = ""
|
|
453
|
+
else:
|
|
454
|
+
result_to = "targets[0]._id"
|
|
455
|
+
filter_target = " && ".join(
|
|
456
|
+
[f"v.{k} == edge[1].{k}" for k in match_keys_target]
|
|
457
|
+
)
|
|
458
|
+
target_filter = (
|
|
459
|
+
f"LET targets = (FOR v IN {target_class} FILTER"
|
|
460
|
+
f" {filter_target} LIMIT 1 RETURN v)"
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
doc_definition = f"MERGE({{_from : {result_from}, _to : {result_to}}}, edge[2])"
|
|
464
|
+
|
|
465
|
+
logger.debug(f" source_filter = {source_filter}")
|
|
466
|
+
logger.debug(f" target_filter = {target_filter}")
|
|
467
|
+
logger.debug(f" doc = {doc_definition}")
|
|
468
|
+
|
|
469
|
+
if upsert_option:
|
|
470
|
+
ups_from = result_from if source_filter else "doc._from"
|
|
471
|
+
ups_to = result_to if target_filter else "doc._to"
|
|
472
|
+
|
|
473
|
+
weight_fs = []
|
|
474
|
+
if uniq_weight_fields is not None:
|
|
475
|
+
weight_fs += uniq_weight_fields
|
|
476
|
+
if uniq_weight_collections is not None:
|
|
477
|
+
weight_fs += uniq_weight_collections
|
|
478
|
+
if relation_name is not None:
|
|
479
|
+
weight_fs += ["relation"]
|
|
480
|
+
|
|
481
|
+
if weight_fs:
|
|
482
|
+
weights_clause = ", " + ", ".join(
|
|
483
|
+
[f"'{x}' : edge.{x}" for x in weight_fs]
|
|
484
|
+
)
|
|
485
|
+
else:
|
|
486
|
+
weights_clause = ""
|
|
487
|
+
|
|
488
|
+
upsert = f"{{'_from': {ups_from}, '_to': {ups_to}" + weights_clause + "}"
|
|
489
|
+
logger.debug(f" upsert clause: {upsert}")
|
|
490
|
+
clauses = f"UPSERT {upsert} INSERT doc UPDATE {{}}"
|
|
491
|
+
options = "OPTIONS {exclusive: true}"
|
|
492
|
+
else:
|
|
493
|
+
if relation_name is None:
|
|
494
|
+
doc_clause = "doc"
|
|
495
|
+
else:
|
|
496
|
+
doc_clause = f"MERGE(doc, {{'relation': '{relation_name}' }})"
|
|
497
|
+
clauses = f"INSERT {doc_clause}"
|
|
498
|
+
options = "OPTIONS {exclusive: true, ignoreErrors: true}"
|
|
499
|
+
|
|
500
|
+
q_update = f"""
|
|
501
|
+
FOR edge in {docs_edges_str} {source_filter} {target_filter}
|
|
502
|
+
LET doc = {doc_definition}
|
|
503
|
+
{clauses}
|
|
504
|
+
in {collection_name} {options}"""
|
|
505
|
+
if not dry:
|
|
506
|
+
self.execute(q_update)
|
|
507
|
+
|
|
508
|
+
def insert_return_batch(self, docs, class_name):
|
|
509
|
+
"""Insert documents and return their keys.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
docs: Documents to insert
|
|
513
|
+
class_name: Collection to insert into
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
str: AQL query string for the operation
|
|
517
|
+
"""
|
|
518
|
+
docs = json.dumps(docs)
|
|
519
|
+
query0 = f"""FOR doc in {docs}
|
|
520
|
+
INSERT doc
|
|
521
|
+
INTO {class_name}
|
|
522
|
+
LET inserted = NEW
|
|
523
|
+
RETURN {{_key: inserted._key}}
|
|
524
|
+
"""
|
|
525
|
+
return query0
|
|
526
|
+
|
|
527
|
+
def fetch_present_documents(
|
|
528
|
+
self,
|
|
529
|
+
batch,
|
|
530
|
+
class_name,
|
|
531
|
+
match_keys,
|
|
532
|
+
keep_keys,
|
|
533
|
+
flatten=False,
|
|
534
|
+
filters: None | Clause | list | dict = None,
|
|
535
|
+
) -> list | dict:
|
|
536
|
+
"""Fetch documents that exist in the database.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
batch: Batch of documents to check
|
|
540
|
+
class_name: Collection to check in
|
|
541
|
+
match_keys: Keys to match documents
|
|
542
|
+
keep_keys: Keys to keep in result
|
|
543
|
+
flatten: If True, flatten the result into a list
|
|
544
|
+
filters: Additional query filters
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Union[list, dict]: Documents that exist in the database, either as a
|
|
548
|
+
flat list or a dictionary mapping batch indices to documents
|
|
549
|
+
"""
|
|
550
|
+
q0 = fetch_fields_query(
|
|
551
|
+
collection_name=class_name,
|
|
552
|
+
docs=batch,
|
|
553
|
+
match_keys=match_keys,
|
|
554
|
+
keep_keys=keep_keys,
|
|
555
|
+
filters=filters,
|
|
556
|
+
)
|
|
557
|
+
# {"__i": i, "_group": [doc]}
|
|
558
|
+
cursor = self.execute(q0)
|
|
559
|
+
|
|
560
|
+
if flatten:
|
|
561
|
+
rdata = []
|
|
562
|
+
for item in get_data_from_cursor(cursor):
|
|
563
|
+
group = item.pop("_group", [])
|
|
564
|
+
rdata += [sub_item for sub_item in group]
|
|
565
|
+
return rdata
|
|
566
|
+
else:
|
|
567
|
+
rdata_dict = {}
|
|
568
|
+
for item in get_data_from_cursor(cursor):
|
|
569
|
+
__i = item.pop("__i")
|
|
570
|
+
group = item.pop("_group")
|
|
571
|
+
rdata_dict[__i] = group
|
|
572
|
+
return rdata_dict
|
|
573
|
+
|
|
574
|
+
def fetch_docs(
|
|
575
|
+
self,
|
|
576
|
+
class_name,
|
|
577
|
+
filters: None | Clause | list | dict = None,
|
|
578
|
+
limit: int | None = None,
|
|
579
|
+
return_keys: list | None = None,
|
|
580
|
+
unset_keys: list | None = None,
|
|
581
|
+
):
|
|
582
|
+
"""Fetch documents from a collection.
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
class_name: Collection to fetch from
|
|
586
|
+
filters: Query filters
|
|
587
|
+
limit: Maximum number of documents to return
|
|
588
|
+
return_keys: Keys to return
|
|
589
|
+
unset_keys: Keys to unset
|
|
590
|
+
|
|
591
|
+
Returns:
|
|
592
|
+
list: Fetched documents
|
|
593
|
+
"""
|
|
594
|
+
filter_clause = render_filters(filters, doc_name="d")
|
|
595
|
+
|
|
596
|
+
if return_keys is None:
|
|
597
|
+
if unset_keys is None:
|
|
598
|
+
return_clause = "d"
|
|
599
|
+
else:
|
|
600
|
+
tmp_clause = ", ".join([f'"{item}"' for item in unset_keys])
|
|
601
|
+
return_clause = f"UNSET(d, {tmp_clause})"
|
|
602
|
+
else:
|
|
603
|
+
if unset_keys is None:
|
|
604
|
+
tmp_clause = ", ".join([f'"{item}"' for item in return_keys])
|
|
605
|
+
return_clause = f"KEEP(d, {tmp_clause})"
|
|
606
|
+
else:
|
|
607
|
+
raise ValueError("both return_keys and unset_keys are set")
|
|
608
|
+
|
|
609
|
+
if limit is not None and isinstance(limit, int):
|
|
610
|
+
limit_clause = f"LIMIT {limit}"
|
|
611
|
+
else:
|
|
612
|
+
limit_clause = ""
|
|
613
|
+
|
|
614
|
+
q = (
|
|
615
|
+
f"FOR d in {class_name}"
|
|
616
|
+
f" {filter_clause}"
|
|
617
|
+
f" {limit_clause}"
|
|
618
|
+
f" RETURN {return_clause}"
|
|
619
|
+
)
|
|
620
|
+
cursor = self.execute(q)
|
|
621
|
+
return get_data_from_cursor(cursor)
|
|
622
|
+
|
|
623
|
+
def aggregate(
|
|
624
|
+
self,
|
|
625
|
+
class_name,
|
|
626
|
+
aggregation_function: AggregationType,
|
|
627
|
+
discriminant: str | None = None,
|
|
628
|
+
aggregated_field: str | None = None,
|
|
629
|
+
filters: None | Clause | list | dict = None,
|
|
630
|
+
):
|
|
631
|
+
"""Perform aggregation on a collection.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
class_name: Collection to aggregate
|
|
635
|
+
aggregation_function: Type of aggregation to perform
|
|
636
|
+
discriminant: Field to group by
|
|
637
|
+
aggregated_field: Field to aggregate
|
|
638
|
+
filters: Query filters
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
list: Aggregation results
|
|
642
|
+
"""
|
|
643
|
+
filter_clause = render_filters(filters, doc_name="doc")
|
|
644
|
+
|
|
645
|
+
if (
|
|
646
|
+
aggregated_field is not None
|
|
647
|
+
and aggregation_function != AggregationType.COUNT
|
|
648
|
+
):
|
|
649
|
+
group_unit = f"g[*].doc.{aggregated_field}"
|
|
650
|
+
else:
|
|
651
|
+
group_unit = "g"
|
|
652
|
+
|
|
653
|
+
if discriminant is not None:
|
|
654
|
+
collect_clause = f"COLLECT value = doc['{discriminant}'] INTO g"
|
|
655
|
+
return_clause = f"""{{ '{discriminant}' : value, '_value': {aggregation_function}({group_unit})}}"""
|
|
656
|
+
else:
|
|
657
|
+
if (
|
|
658
|
+
aggregated_field is None
|
|
659
|
+
and aggregation_function == AggregationType.COUNT
|
|
660
|
+
):
|
|
661
|
+
collect_clause = (
|
|
662
|
+
f"COLLECT AGGREGATE value = {aggregation_function} (doc)"
|
|
663
|
+
)
|
|
664
|
+
else:
|
|
665
|
+
collect_clause = (
|
|
666
|
+
"COLLECT AGGREGATE value ="
|
|
667
|
+
f" {aggregation_function}(doc['{aggregated_field}'])"
|
|
668
|
+
)
|
|
669
|
+
return_clause = """{ '_value' : value }"""
|
|
670
|
+
|
|
671
|
+
q = f"""FOR doc IN {class_name}
|
|
672
|
+
{filter_clause}
|
|
673
|
+
{collect_clause}
|
|
674
|
+
RETURN {return_clause}"""
|
|
675
|
+
|
|
676
|
+
cursor = self.execute(q)
|
|
677
|
+
data = get_data_from_cursor(cursor)
|
|
678
|
+
return data
|
|
679
|
+
|
|
680
|
+
def keep_absent_documents(
|
|
681
|
+
self,
|
|
682
|
+
batch,
|
|
683
|
+
class_name,
|
|
684
|
+
match_keys,
|
|
685
|
+
keep_keys,
|
|
686
|
+
filters: None | Clause | list | dict = None,
|
|
687
|
+
):
|
|
688
|
+
"""Keep documents that don't exist in the database.
|
|
689
|
+
|
|
690
|
+
Args:
|
|
691
|
+
batch: Batch of documents to check
|
|
692
|
+
class_name: Collection to check in
|
|
693
|
+
match_keys: Keys to match documents
|
|
694
|
+
keep_keys: Keys to keep in result
|
|
695
|
+
filters: Additional query filters
|
|
696
|
+
|
|
697
|
+
Returns:
|
|
698
|
+
list: Documents that don't exist in the database
|
|
699
|
+
"""
|
|
700
|
+
present_docs_keys = self.fetch_present_documents(
|
|
701
|
+
batch=batch,
|
|
702
|
+
class_name=class_name,
|
|
703
|
+
match_keys=match_keys,
|
|
704
|
+
keep_keys=keep_keys,
|
|
705
|
+
flatten=False,
|
|
706
|
+
filters=filters,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
assert isinstance(present_docs_keys, dict)
|
|
710
|
+
|
|
711
|
+
if any([len(v) > 1 for v in present_docs_keys.values()]):
|
|
712
|
+
logger.warning(
|
|
713
|
+
"fetch_present_documents returned multiple docs per filtering condition"
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
absent_indices = sorted(set(range(len(batch))) - set(present_docs_keys.keys()))
|
|
717
|
+
batch_absent = [batch[j] for j in absent_indices]
|
|
718
|
+
return batch_absent
|
|
719
|
+
|
|
720
|
+
def update_to_numeric(self, collection_name, field):
|
|
721
|
+
"""Update a field to numeric type in all documents.
|
|
722
|
+
|
|
723
|
+
Args:
|
|
724
|
+
collection_name: Collection to update
|
|
725
|
+
field: Field to convert to numeric
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
str: AQL query string for the operation
|
|
729
|
+
"""
|
|
730
|
+
s1 = f"FOR p IN {collection_name} FILTER p.{field} update p with {{"
|
|
731
|
+
s2 = f"{field}: TO_NUMBER(p.{field}) "
|
|
732
|
+
s3 = f"}} in {collection_name}"
|
|
733
|
+
q0 = s1 + s2 + s3
|
|
734
|
+
return q0
|