linkml-store 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +2 -0
- linkml_store/api/collection.py +101 -6
- linkml_store/api/database.py +36 -5
- linkml_store/api/stores/duckdb/duckdb_collection.py +1 -0
- linkml_store/api/stores/filesystem/__init__.py +7 -8
- linkml_store/api/stores/filesystem/filesystem_collection.py +148 -113
- linkml_store/api/stores/filesystem/filesystem_database.py +57 -21
- linkml_store/api/stores/mongodb/mongodb_collection.py +10 -4
- linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +88 -7
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/format_utils.py +89 -8
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.9.dist-info}/METADATA +4 -1
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.9.dist-info}/RECORD +20 -16
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.9.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.9.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.9.dist-info}/entry_points.txt +0 -0
linkml_store/api/client.py
CHANGED
|
@@ -9,6 +9,7 @@ from linkml_store.api import Database
|
|
|
9
9
|
from linkml_store.api.config import ClientConfig
|
|
10
10
|
from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
|
|
11
11
|
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
|
+
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
12
13
|
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
13
14
|
from linkml_store.api.stores.solr.solr_database import SolrDatabase
|
|
14
15
|
|
|
@@ -20,6 +21,7 @@ HANDLE_MAP = {
|
|
|
20
21
|
"solr": SolrDatabase,
|
|
21
22
|
"mongodb": MongoDBDatabase,
|
|
22
23
|
"chromadb": ChromaDBDatabase,
|
|
24
|
+
"file": FileSystemDatabase,
|
|
23
25
|
}
|
|
24
26
|
|
|
25
27
|
|
linkml_store/api/collection.py
CHANGED
|
@@ -4,16 +4,19 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
+
from linkml_runtime import SchemaView
|
|
10
11
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
11
12
|
from linkml_runtime.linkml_model.meta import ArrayExpression
|
|
12
13
|
from pydantic import BaseModel
|
|
13
14
|
|
|
15
|
+
from linkml_store.api.types import DatabaseType
|
|
14
16
|
from linkml_store.index import get_indexer
|
|
15
17
|
from linkml_store.utils.format_utils import load_objects
|
|
16
18
|
from linkml_store.utils.object_utils import clean_empties
|
|
19
|
+
from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
|
|
17
20
|
|
|
18
21
|
try:
|
|
19
22
|
from linkml.validator.report import ValidationResult
|
|
@@ -36,7 +39,7 @@ IDENTIFIER = str
|
|
|
36
39
|
FIELD_NAME = str
|
|
37
40
|
|
|
38
41
|
|
|
39
|
-
class Collection:
|
|
42
|
+
class Collection(Generic[DatabaseType]):
|
|
40
43
|
"""
|
|
41
44
|
A collection is an organized set of objects of the same or similar type.
|
|
42
45
|
|
|
@@ -56,7 +59,7 @@ class Collection:
|
|
|
56
59
|
"""
|
|
57
60
|
|
|
58
61
|
# name: str
|
|
59
|
-
parent: Optional[
|
|
62
|
+
parent: Optional[DatabaseType] = None
|
|
60
63
|
_indexers: Optional[Dict[str, Indexer]] = None
|
|
61
64
|
# hidden: Optional[bool] = False
|
|
62
65
|
|
|
@@ -197,6 +200,10 @@ class Collection:
|
|
|
197
200
|
"""
|
|
198
201
|
raise NotImplementedError
|
|
199
202
|
|
|
203
|
+
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
204
|
+
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
205
|
+
self._broadcast(patches, **kwargs)
|
|
206
|
+
|
|
200
207
|
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
201
208
|
"""
|
|
202
209
|
Delete one or more objects from the collection.
|
|
@@ -301,7 +308,7 @@ class Collection:
|
|
|
301
308
|
|
|
302
309
|
def query_facets(
|
|
303
310
|
self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
304
|
-
) -> Dict[str,
|
|
311
|
+
) -> Dict[str, List[Tuple[Any, int]]]:
|
|
305
312
|
"""
|
|
306
313
|
Run a query to get facet counts for one or more columns.
|
|
307
314
|
|
|
@@ -319,7 +326,7 @@ class Collection:
|
|
|
319
326
|
:param query: A Query object representing the base query.
|
|
320
327
|
:param facet_columns: A list of column names to get facet counts for.
|
|
321
328
|
:param facet_limit:
|
|
322
|
-
:return: A dictionary where keys are column names and values are
|
|
329
|
+
:return: A dictionary where keys are column names and values are tuples
|
|
323
330
|
containing the facet counts for each unique value in the respective column.
|
|
324
331
|
"""
|
|
325
332
|
raise NotImplementedError
|
|
@@ -523,6 +530,7 @@ class Collection:
|
|
|
523
530
|
ix_coll.delete_where()
|
|
524
531
|
|
|
525
532
|
ix_coll.insert(objects_with_ix, **kwargs)
|
|
533
|
+
ix_coll.commit()
|
|
526
534
|
|
|
527
535
|
def list_index_names(self) -> List[str]:
|
|
528
536
|
"""
|
|
@@ -557,12 +565,22 @@ class Collection:
|
|
|
557
565
|
|
|
558
566
|
:return:
|
|
559
567
|
"""
|
|
560
|
-
sv = self.parent.schema_view
|
|
568
|
+
sv: SchemaView = self.parent.schema_view
|
|
561
569
|
if sv:
|
|
562
570
|
cls = sv.get_class(self.target_class_name)
|
|
571
|
+
if cls and not cls.attributes:
|
|
572
|
+
if not sv.class_induced_slots(cls.name):
|
|
573
|
+
for att in self._induce_attributes():
|
|
574
|
+
cls.attributes[att.name] = att
|
|
575
|
+
sv.set_modified()
|
|
563
576
|
return cls
|
|
564
577
|
return None
|
|
565
578
|
|
|
579
|
+
def _induce_attributes(self) -> List[SlotDefinition]:
|
|
580
|
+
result = self.find({}, limit=-1)
|
|
581
|
+
cd = self.induce_class_definition_from_objects(result.rows, max_sample_size=None)
|
|
582
|
+
return list(cd.attributes.values())
|
|
583
|
+
|
|
566
584
|
@property
|
|
567
585
|
def identifier_attribute_name(self) -> Optional[str]:
|
|
568
586
|
"""
|
|
@@ -579,6 +597,37 @@ class Collection:
|
|
|
579
597
|
return att.name
|
|
580
598
|
return None
|
|
581
599
|
|
|
600
|
+
def set_identifier_attribute_name(self, name: str):
|
|
601
|
+
"""
|
|
602
|
+
Set the name of the identifier attribute for the collection.
|
|
603
|
+
|
|
604
|
+
AKA the primary key.
|
|
605
|
+
|
|
606
|
+
:param name: The name of the identifier attribute.
|
|
607
|
+
"""
|
|
608
|
+
cd = self.class_definition()
|
|
609
|
+
if not cd:
|
|
610
|
+
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
611
|
+
id_att = None
|
|
612
|
+
candidates = []
|
|
613
|
+
sv: SchemaView = self.parent.schema_view
|
|
614
|
+
cls = sv.get_class(cd.name)
|
|
615
|
+
existing_id_slot = sv.get_identifier_slot(cls.name)
|
|
616
|
+
if existing_id_slot:
|
|
617
|
+
if existing_id_slot.name == name:
|
|
618
|
+
return
|
|
619
|
+
existing_id_slot.identifier = False
|
|
620
|
+
for att in cls.attributes.values():
|
|
621
|
+
candidates.append(att.name)
|
|
622
|
+
if att.name == name:
|
|
623
|
+
att.identifier = True
|
|
624
|
+
id_att = att
|
|
625
|
+
else:
|
|
626
|
+
att.identifier = False
|
|
627
|
+
if not id_att:
|
|
628
|
+
raise ValueError(f"No attribute found with name {name} in {candidates}")
|
|
629
|
+
sv.set_modified()
|
|
630
|
+
|
|
582
631
|
def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
|
|
583
632
|
"""
|
|
584
633
|
Return the identifier for an object.
|
|
@@ -622,6 +671,8 @@ class Collection:
|
|
|
622
671
|
for k, v in obj.items():
|
|
623
672
|
keys[k].append(v)
|
|
624
673
|
for k, vs in keys.items():
|
|
674
|
+
if k == "_id":
|
|
675
|
+
continue
|
|
625
676
|
multivalueds = []
|
|
626
677
|
inlineds = []
|
|
627
678
|
rngs = []
|
|
@@ -698,6 +749,39 @@ class Collection:
|
|
|
698
749
|
"""
|
|
699
750
|
raise NotImplementedError
|
|
700
751
|
|
|
752
|
+
def apply_patches(self, patches: List[PatchDict], **kwargs):
|
|
753
|
+
"""
|
|
754
|
+
Apply a patch to the collection.
|
|
755
|
+
|
|
756
|
+
Patches conform to the JSON Patch format,
|
|
757
|
+
|
|
758
|
+
:param patches:
|
|
759
|
+
:param kwargs:
|
|
760
|
+
:return:
|
|
761
|
+
"""
|
|
762
|
+
all_objs = self.find(limit=-1).rows
|
|
763
|
+
primary_key = self.identifier_attribute_name
|
|
764
|
+
if not primary_key:
|
|
765
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
766
|
+
new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
|
|
767
|
+
self.replace(new_objs)
|
|
768
|
+
|
|
769
|
+
def diff(self, other: "Collection", **kwargs):
|
|
770
|
+
"""
|
|
771
|
+
Diff two collections.
|
|
772
|
+
|
|
773
|
+
:param other:
|
|
774
|
+
:param kwargs:
|
|
775
|
+
:return:
|
|
776
|
+
"""
|
|
777
|
+
src_objs = self.find(limit=-1).rows
|
|
778
|
+
tgt_objs = other.find(limit=-1).rows
|
|
779
|
+
primary_key = self.identifier_attribute_name
|
|
780
|
+
if not primary_key:
|
|
781
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
782
|
+
patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
783
|
+
return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
784
|
+
|
|
701
785
|
def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
702
786
|
"""
|
|
703
787
|
Validate the contents of the collection
|
|
@@ -717,3 +801,14 @@ class Collection:
|
|
|
717
801
|
for obj in result.rows:
|
|
718
802
|
obj = clean_empties(obj)
|
|
719
803
|
yield from validator.iter_results(obj, class_name)
|
|
804
|
+
|
|
805
|
+
def commit(self):
|
|
806
|
+
"""
|
|
807
|
+
Commit changes to the collection.
|
|
808
|
+
|
|
809
|
+
:return:
|
|
810
|
+
"""
|
|
811
|
+
pass
|
|
812
|
+
|
|
813
|
+
def _broadcast(self, *args, **kwargs):
|
|
814
|
+
self.parent.broadcast(self, *args, **kwargs)
|
linkml_store/api/database.py
CHANGED
|
@@ -3,9 +3,24 @@ from abc import ABC
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from copy import copy
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
7
|
-
|
|
6
|
+
from typing import (
|
|
7
|
+
TYPE_CHECKING,
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
ClassVar,
|
|
11
|
+
Dict,
|
|
12
|
+
Generic,
|
|
13
|
+
Iterator,
|
|
14
|
+
List,
|
|
15
|
+
Optional,
|
|
16
|
+
Sequence,
|
|
17
|
+
Type,
|
|
18
|
+
Union,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from linkml_store.api.types import CollectionType
|
|
8
22
|
from linkml_store.utils.format_utils import load_objects, render_output
|
|
23
|
+
from linkml_store.utils.patch_utils import PatchDict
|
|
9
24
|
|
|
10
25
|
try:
|
|
11
26
|
from linkml.validator.report import Severity, ValidationResult
|
|
@@ -24,8 +39,10 @@ if TYPE_CHECKING:
|
|
|
24
39
|
|
|
25
40
|
logger = logging.getLogger(__name__)
|
|
26
41
|
|
|
42
|
+
LISTENER = Callable[[Collection, List[PatchDict]], None]
|
|
43
|
+
|
|
27
44
|
|
|
28
|
-
class Database(ABC):
|
|
45
|
+
class Database(ABC, Generic[CollectionType]):
|
|
29
46
|
"""
|
|
30
47
|
A Database provides access to named collections of data.
|
|
31
48
|
|
|
@@ -89,6 +106,8 @@ class Database(ABC):
|
|
|
89
106
|
metadata: Optional[DatabaseConfig] = None
|
|
90
107
|
collection_class: ClassVar[Optional[Type[Collection]]] = None
|
|
91
108
|
|
|
109
|
+
listeners: Optional[List[LISTENER]] = None
|
|
110
|
+
|
|
92
111
|
def __init__(self, handle: Optional[str] = None, metadata: Optional[DatabaseConfig] = None, **kwargs):
|
|
93
112
|
if metadata:
|
|
94
113
|
self.metadata = metadata
|
|
@@ -233,7 +252,8 @@ class Database(ABC):
|
|
|
233
252
|
:param kwargs:
|
|
234
253
|
:return:
|
|
235
254
|
"""
|
|
236
|
-
|
|
255
|
+
for coll in self.list_collections():
|
|
256
|
+
coll.commit()
|
|
237
257
|
|
|
238
258
|
def close(self, **kwargs):
|
|
239
259
|
"""
|
|
@@ -301,6 +321,7 @@ class Database(ABC):
|
|
|
301
321
|
alias = name
|
|
302
322
|
self._collections[alias] = collection
|
|
303
323
|
if recreate_if_exists:
|
|
324
|
+
logger.debug(f"Recreating collection {collection.name}")
|
|
304
325
|
collection.delete_where({}, missing_ok=True)
|
|
305
326
|
return collection
|
|
306
327
|
|
|
@@ -418,7 +439,11 @@ class Database(ABC):
|
|
|
418
439
|
:return:
|
|
419
440
|
|
|
420
441
|
"""
|
|
421
|
-
|
|
442
|
+
if query.from_table:
|
|
443
|
+
collection = self.get_collection(query.from_table)
|
|
444
|
+
return collection.query(query, **kwargs)
|
|
445
|
+
else:
|
|
446
|
+
raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
|
|
422
447
|
|
|
423
448
|
@property
|
|
424
449
|
def schema_view(self) -> SchemaView:
|
|
@@ -689,3 +714,9 @@ class Database(ABC):
|
|
|
689
714
|
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
|
|
690
715
|
with open(location, "w", encoding="utf-8") as stream:
|
|
691
716
|
stream.write(render_output(obj, format=target_format))
|
|
717
|
+
|
|
718
|
+
def broadcast(self, source: Collection, patches: List[PatchDict]):
|
|
719
|
+
if not self.listeners:
|
|
720
|
+
return
|
|
721
|
+
for listener in self.listeners:
|
|
722
|
+
listener(source, patches)
|
|
@@ -38,6 +38,7 @@ class DuckDBCollection(Collection):
|
|
|
38
38
|
with conn.begin():
|
|
39
39
|
conn.execute(insert(table), objs)
|
|
40
40
|
conn.commit()
|
|
41
|
+
self._post_insert_hook(objs)
|
|
41
42
|
|
|
42
43
|
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
43
44
|
if not isinstance(objs, list):
|
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Adapter for
|
|
2
|
+
Adapter for FileSystem wrapper
|
|
3
3
|
|
|
4
4
|
Handles have the form:
|
|
5
5
|
|
|
6
|
-
- ``
|
|
7
|
-
|
|
8
|
-
"""
|
|
6
|
+
- ``file:<path>`` for a local file
|
|
7
|
+
"""
|
|
9
8
|
|
|
10
|
-
from linkml_store.api.stores.
|
|
11
|
-
from linkml_store.api.stores.
|
|
9
|
+
from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
|
|
10
|
+
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
12
11
|
|
|
13
12
|
__all__ = [
|
|
14
|
-
"
|
|
15
|
-
"
|
|
13
|
+
"FileSystemCollection",
|
|
14
|
+
"FileSystemDatabase",
|
|
16
15
|
]
|
|
@@ -1,142 +1,177 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
from typing import Any, Dict, List, Optional, Union
|
|
3
4
|
|
|
4
|
-
import sqlalchemy as sqla
|
|
5
|
-
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
6
|
-
from sqlalchemy import Column, Table, delete, insert, inspect, text
|
|
7
|
-
from sqlalchemy.sql.ddl import CreateTable
|
|
8
|
-
|
|
9
5
|
from linkml_store.api import Collection
|
|
10
6
|
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
11
|
-
from linkml_store.api.queries import Query
|
|
12
|
-
from linkml_store.api.
|
|
13
|
-
from linkml_store.utils.
|
|
7
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
8
|
+
from linkml_store.api.types import DatabaseType
|
|
9
|
+
from linkml_store.utils.query_utils import mongo_query_to_match_function
|
|
14
10
|
|
|
15
11
|
logger = logging.getLogger(__name__)
|
|
16
12
|
|
|
17
13
|
|
|
18
|
-
class FileSystemCollection(Collection):
|
|
19
|
-
|
|
14
|
+
class FileSystemCollection(Collection[DatabaseType]):
|
|
15
|
+
path: Optional[Path] = None
|
|
16
|
+
file_format: Optional[str] = None
|
|
17
|
+
encoding: Optional[str] = None
|
|
18
|
+
_objects_list: List[OBJECT] = None
|
|
19
|
+
_object_map: Dict[str, OBJECT] = None
|
|
20
|
+
|
|
21
|
+
def __init__(self, **kwargs):
|
|
22
|
+
super().__init__(**kwargs)
|
|
23
|
+
parent: DatabaseType = self.parent
|
|
24
|
+
if not self.path:
|
|
25
|
+
if self.parent:
|
|
26
|
+
self.path = Path(parent.directory_path)
|
|
27
|
+
self._objects_list = []
|
|
28
|
+
self._object_map = {}
|
|
29
|
+
if not self.file_format:
|
|
30
|
+
self.file_format = "json"
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def path_to_file(self):
|
|
34
|
+
return Path(self.parent.directory_path) / f"{self.name}.{self.file_format}"
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def objects_as_list(self) -> List[OBJECT]:
|
|
38
|
+
if self._object_map:
|
|
39
|
+
return list(self._object_map.values())
|
|
40
|
+
else:
|
|
41
|
+
return self._objects_list
|
|
42
|
+
|
|
43
|
+
def _set_objects(self, objs: List[OBJECT]):
|
|
44
|
+
pk = self.identifier_attribute_name
|
|
45
|
+
if pk:
|
|
46
|
+
self._object_map = {obj[pk]: obj for obj in objs}
|
|
47
|
+
self._objects_list = []
|
|
48
|
+
else:
|
|
49
|
+
self._objects_list = objs
|
|
50
|
+
self._object_map = {}
|
|
51
|
+
|
|
52
|
+
def commit(self):
|
|
53
|
+
path = self.path_to_file
|
|
54
|
+
if not path:
|
|
55
|
+
raise ValueError("Path not set")
|
|
56
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
self._save(path)
|
|
58
|
+
|
|
59
|
+
def _save(self, path: Path):
|
|
60
|
+
encoding = self.encoding or "utf-8"
|
|
61
|
+
fmt = self.file_format or "json"
|
|
62
|
+
mode = "w"
|
|
63
|
+
if fmt == "parquet":
|
|
64
|
+
mode = "wb"
|
|
65
|
+
encoding = None
|
|
66
|
+
with open(path, mode, encoding=encoding) as stream:
|
|
67
|
+
if fmt == "json":
|
|
68
|
+
import json
|
|
69
|
+
|
|
70
|
+
json.dump(self.objects_as_list, stream, indent=2)
|
|
71
|
+
elif fmt == "jsonl":
|
|
72
|
+
import jsonlines
|
|
73
|
+
|
|
74
|
+
writer = jsonlines.Writer(stream)
|
|
75
|
+
writer.write_all(self.objects_as_list)
|
|
76
|
+
elif fmt == "yaml":
|
|
77
|
+
import yaml
|
|
78
|
+
|
|
79
|
+
yaml.dump_all(self.objects_as_list, stream)
|
|
80
|
+
elif fmt == "parquet":
|
|
81
|
+
import pandas as pd
|
|
82
|
+
import pyarrow
|
|
83
|
+
import pyarrow.parquet as pq
|
|
84
|
+
|
|
85
|
+
df = pd.DataFrame(self.objects_as_list)
|
|
86
|
+
table = pyarrow.Table.from_pandas(df)
|
|
87
|
+
pq.write_table(table, stream)
|
|
88
|
+
elif fmt in {"csv", "tsv"}:
|
|
89
|
+
import csv
|
|
90
|
+
|
|
91
|
+
delimiter = "\t" if fmt == "tsv" else ","
|
|
92
|
+
fieldnames = list(self.objects_as_list[0].keys())
|
|
93
|
+
for obj in self.objects_as_list[1:]:
|
|
94
|
+
fieldnames.extend([k for k in obj.keys() if k not in fieldnames])
|
|
95
|
+
writer = csv.DictWriter(stream, fieldnames=fieldnames, delimiter=delimiter)
|
|
96
|
+
writer.writeheader()
|
|
97
|
+
for obj in self.objects_as_list:
|
|
98
|
+
writer.writerow(obj)
|
|
99
|
+
else:
|
|
100
|
+
raise ValueError(f"Unsupported file format: {fmt}")
|
|
20
101
|
|
|
21
102
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
22
103
|
if not isinstance(objs, list):
|
|
23
104
|
objs = [objs]
|
|
24
105
|
if not objs:
|
|
25
106
|
return
|
|
26
|
-
|
|
27
|
-
if
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
with engine.connect() as conn:
|
|
36
|
-
with conn.begin():
|
|
37
|
-
conn.execute(insert(table), objs)
|
|
38
|
-
conn.commit()
|
|
107
|
+
pk = self.identifier_attribute_name
|
|
108
|
+
if pk:
|
|
109
|
+
for obj in objs:
|
|
110
|
+
if pk not in obj:
|
|
111
|
+
raise ValueError(f"Primary key {pk} not found in object {obj}")
|
|
112
|
+
pk_val = obj[pk]
|
|
113
|
+
self._object_map[pk_val] = obj
|
|
114
|
+
else:
|
|
115
|
+
self._objects_list.extend(objs)
|
|
39
116
|
|
|
40
117
|
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
41
118
|
if not isinstance(objs, list):
|
|
42
119
|
objs = [objs]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
with engine.connect() as conn:
|
|
120
|
+
if not objs:
|
|
121
|
+
return 0
|
|
122
|
+
pk = self.identifier_attribute_name
|
|
123
|
+
n = 0
|
|
124
|
+
if pk:
|
|
49
125
|
for obj in objs:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
126
|
+
pk_val = obj[pk]
|
|
127
|
+
if pk_val in self._object_map:
|
|
128
|
+
del self._object_map[pk_val]
|
|
129
|
+
n += 1
|
|
130
|
+
else:
|
|
131
|
+
n = len(objs)
|
|
132
|
+
self._objects_list = [o for o in self._objects_list if o not in objs]
|
|
133
|
+
n = n - len(objs)
|
|
134
|
+
return n
|
|
56
135
|
|
|
57
136
|
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
58
137
|
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
59
138
|
if where is None:
|
|
60
139
|
where = {}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
return deleted_rows_count if deleted_rows_count > -1 else None
|
|
140
|
+
|
|
141
|
+
def matches(obj: OBJECT):
|
|
142
|
+
for k, v in where.items():
|
|
143
|
+
if obj.get(k) != v:
|
|
144
|
+
return False
|
|
145
|
+
return True
|
|
146
|
+
|
|
147
|
+
print(type(self))
|
|
148
|
+
print(self)
|
|
149
|
+
print(vars(self))
|
|
150
|
+
curr_objects = [o for o in self.objects_as_list if not matches(o)]
|
|
151
|
+
self._set_objects(curr_objects)
|
|
152
|
+
|
|
153
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
154
|
+
|
|
155
|
+
where = query.where_clause or {}
|
|
156
|
+
match = mongo_query_to_match_function(where)
|
|
157
|
+
rows = [o for o in self.objects_as_list if match(o)]
|
|
158
|
+
count = len(rows)
|
|
159
|
+
return QueryResult(query=query, num_rows=count, rows=rows)
|
|
82
160
|
|
|
83
161
|
def query_facets(
|
|
84
162
|
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
85
163
|
) -> Dict[str, Dict[str, int]]:
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
if
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
rows = list(conn.execute(text(facet_query_str)))
|
|
101
|
-
results[col] = rows
|
|
102
|
-
return results
|
|
103
|
-
|
|
104
|
-
def _sqla_table(self, cd: ClassDefinition) -> Table:
|
|
105
|
-
schema_view = self.parent.schema_view
|
|
106
|
-
metadata_obj = sqla.MetaData()
|
|
107
|
-
cols = []
|
|
108
|
-
for att in schema_view.class_induced_slots(cd.name):
|
|
109
|
-
typ = TMAP.get(att.range, sqla.String)
|
|
110
|
-
if att.inlined:
|
|
111
|
-
typ = sqla.JSON
|
|
112
|
-
if att.multivalued:
|
|
113
|
-
typ = sqla.ARRAY(typ, dimensions=1)
|
|
114
|
-
if att.array:
|
|
115
|
-
typ = sqla.ARRAY(typ, dimensions=1)
|
|
116
|
-
col = Column(att.name, typ)
|
|
117
|
-
cols.append(col)
|
|
118
|
-
t = Table(self.alias, metadata_obj, *cols)
|
|
119
|
-
return t
|
|
120
|
-
|
|
121
|
-
def _create_table(self, cd: ClassDefinition):
|
|
122
|
-
if self._table_created or self.metadata.is_prepopulated:
|
|
123
|
-
logger.info(f"Already have table for: {cd.name}")
|
|
124
|
-
return
|
|
125
|
-
query = Query(
|
|
126
|
-
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
127
|
-
)
|
|
128
|
-
qr = self.parent.query(query)
|
|
129
|
-
if qr.num_rows > 0:
|
|
130
|
-
logger.info(f"Table already exists for {cd.name}")
|
|
131
|
-
self._table_created = True
|
|
132
|
-
self.metadata.is_prepopulated = True
|
|
133
|
-
return
|
|
134
|
-
logger.info(f"Creating table for {cd.name}")
|
|
135
|
-
t = self._sqla_table(cd)
|
|
136
|
-
ct = CreateTable(t)
|
|
137
|
-
ddl = str(ct.compile(self.parent.engine))
|
|
138
|
-
with self.parent.engine.connect() as conn:
|
|
139
|
-
conn.execute(text(ddl))
|
|
140
|
-
conn.commit()
|
|
141
|
-
self._table_created = True
|
|
142
|
-
self.metadata.is_prepopulated = True
|
|
164
|
+
match = mongo_query_to_match_function(where)
|
|
165
|
+
rows = [o for o in self.objects_as_list if match(o)]
|
|
166
|
+
if not facet_columns:
|
|
167
|
+
facet_columns = self.class_definition().attributes.keys()
|
|
168
|
+
facet_results = {c: {} for c in facet_columns}
|
|
169
|
+
for row in rows:
|
|
170
|
+
for fc in facet_columns:
|
|
171
|
+
if fc in row:
|
|
172
|
+
v = row[fc]
|
|
173
|
+
if v not in facet_results[fc]:
|
|
174
|
+
facet_results[fc][v] = 1
|
|
175
|
+
else:
|
|
176
|
+
facet_results[fc][v] += 1
|
|
177
|
+
return {fc: list(facet_results[fc].items()) for fc in facet_results}
|