linkml-store 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

@@ -9,6 +9,7 @@ from linkml_store.api import Database
9
9
  from linkml_store.api.config import ClientConfig
10
10
  from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
11
11
  from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
12
+ from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
12
13
  from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
13
14
  from linkml_store.api.stores.solr.solr_database import SolrDatabase
14
15
 
@@ -20,6 +21,7 @@ HANDLE_MAP = {
20
21
  "solr": SolrDatabase,
21
22
  "mongodb": MongoDBDatabase,
22
23
  "chromadb": ChromaDBDatabase,
24
+ "file": FileSystemDatabase,
23
25
  }
24
26
 
25
27
 
@@ -4,16 +4,19 @@ import hashlib
4
4
  import logging
5
5
  from collections import defaultdict
6
6
  from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
7
+ from typing import TYPE_CHECKING, Any, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
8
8
 
9
9
  import numpy as np
10
+ from linkml_runtime import SchemaView
10
11
  from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
11
12
  from linkml_runtime.linkml_model.meta import ArrayExpression
12
13
  from pydantic import BaseModel
13
14
 
15
+ from linkml_store.api.types import DatabaseType
14
16
  from linkml_store.index import get_indexer
15
17
  from linkml_store.utils.format_utils import load_objects
16
18
  from linkml_store.utils.object_utils import clean_empties
19
+ from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
17
20
 
18
21
  try:
19
22
  from linkml.validator.report import ValidationResult
@@ -36,7 +39,7 @@ IDENTIFIER = str
36
39
  FIELD_NAME = str
37
40
 
38
41
 
39
- class Collection:
42
+ class Collection(Generic[DatabaseType]):
40
43
  """
41
44
  A collection is an organized set of objects of the same or similar type.
42
45
 
@@ -56,7 +59,7 @@ class Collection:
56
59
  """
57
60
 
58
61
  # name: str
59
- parent: Optional["Database"] = None
62
+ parent: Optional[DatabaseType] = None
60
63
  _indexers: Optional[Dict[str, Indexer]] = None
61
64
  # hidden: Optional[bool] = False
62
65
 
@@ -197,6 +200,10 @@ class Collection:
197
200
  """
198
201
  raise NotImplementedError
199
202
 
203
+ def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
204
+ patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
205
+ self._broadcast(patches, **kwargs)
206
+
200
207
  def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
201
208
  """
202
209
  Delete one or more objects from the collection.
@@ -301,7 +308,7 @@ class Collection:
301
308
 
302
309
  def query_facets(
303
310
  self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
304
- ) -> Dict[str, Dict[str, int]]:
311
+ ) -> Dict[str, List[Tuple[Any, int]]]:
305
312
  """
306
313
  Run a query to get facet counts for one or more columns.
307
314
 
@@ -319,7 +326,7 @@ class Collection:
319
326
  :param query: A Query object representing the base query.
320
327
  :param facet_columns: A list of column names to get facet counts for.
321
328
  :param facet_limit:
322
- :return: A dictionary where keys are column names and values are pandas DataFrames
329
+ :return: A dictionary where keys are column names and values are tuples
323
330
  containing the facet counts for each unique value in the respective column.
324
331
  """
325
332
  raise NotImplementedError
@@ -523,6 +530,7 @@ class Collection:
523
530
  ix_coll.delete_where()
524
531
 
525
532
  ix_coll.insert(objects_with_ix, **kwargs)
533
+ ix_coll.commit()
526
534
 
527
535
  def list_index_names(self) -> List[str]:
528
536
  """
@@ -557,12 +565,22 @@ class Collection:
557
565
 
558
566
  :return:
559
567
  """
560
- sv = self.parent.schema_view
568
+ sv: SchemaView = self.parent.schema_view
561
569
  if sv:
562
570
  cls = sv.get_class(self.target_class_name)
571
+ if cls and not cls.attributes:
572
+ if not sv.class_induced_slots(cls.name):
573
+ for att in self._induce_attributes():
574
+ cls.attributes[att.name] = att
575
+ sv.set_modified()
563
576
  return cls
564
577
  return None
565
578
 
579
+ def _induce_attributes(self) -> List[SlotDefinition]:
580
+ result = self.find({}, limit=-1)
581
+ cd = self.induce_class_definition_from_objects(result.rows, max_sample_size=None)
582
+ return list(cd.attributes.values())
583
+
566
584
  @property
567
585
  def identifier_attribute_name(self) -> Optional[str]:
568
586
  """
@@ -579,6 +597,37 @@ class Collection:
579
597
  return att.name
580
598
  return None
581
599
 
600
+ def set_identifier_attribute_name(self, name: str):
601
+ """
602
+ Set the name of the identifier attribute for the collection.
603
+
604
+ AKA the primary key.
605
+
606
+ :param name: The name of the identifier attribute.
607
+ """
608
+ cd = self.class_definition()
609
+ if not cd:
610
+ raise ValueError(f"Cannot find class definition for {self.target_class_name}")
611
+ id_att = None
612
+ candidates = []
613
+ sv: SchemaView = self.parent.schema_view
614
+ cls = sv.get_class(cd.name)
615
+ existing_id_slot = sv.get_identifier_slot(cls.name)
616
+ if existing_id_slot:
617
+ if existing_id_slot.name == name:
618
+ return
619
+ existing_id_slot.identifier = False
620
+ for att in cls.attributes.values():
621
+ candidates.append(att.name)
622
+ if att.name == name:
623
+ att.identifier = True
624
+ id_att = att
625
+ else:
626
+ att.identifier = False
627
+ if not id_att:
628
+ raise ValueError(f"No attribute found with name {name} in {candidates}")
629
+ sv.set_modified()
630
+
582
631
  def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
583
632
  """
584
633
  Return the identifier for an object.
@@ -622,6 +671,8 @@ class Collection:
622
671
  for k, v in obj.items():
623
672
  keys[k].append(v)
624
673
  for k, vs in keys.items():
674
+ if k == "_id":
675
+ continue
625
676
  multivalueds = []
626
677
  inlineds = []
627
678
  rngs = []
@@ -698,6 +749,39 @@ class Collection:
698
749
  """
699
750
  raise NotImplementedError
700
751
 
752
+ def apply_patches(self, patches: List[PatchDict], **kwargs):
753
+ """
754
+ Apply a patch to the collection.
755
+
756
+ Patches conform to the JSON Patch format,
757
+
758
+ :param patches:
759
+ :param kwargs:
760
+ :return:
761
+ """
762
+ all_objs = self.find(limit=-1).rows
763
+ primary_key = self.identifier_attribute_name
764
+ if not primary_key:
765
+ raise ValueError(f"No primary key for {self.target_class_name}")
766
+ new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
767
+ self.replace(new_objs)
768
+
769
+ def diff(self, other: "Collection", **kwargs):
770
+ """
771
+ Diff two collections.
772
+
773
+ :param other:
774
+ :param kwargs:
775
+ :return:
776
+ """
777
+ src_objs = self.find(limit=-1).rows
778
+ tgt_objs = other.find(limit=-1).rows
779
+ primary_key = self.identifier_attribute_name
780
+ if not primary_key:
781
+ raise ValueError(f"No primary key for {self.target_class_name}")
782
+ patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
783
+ return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
784
+
701
785
  def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
702
786
  """
703
787
  Validate the contents of the collection
@@ -717,3 +801,14 @@ class Collection:
717
801
  for obj in result.rows:
718
802
  obj = clean_empties(obj)
719
803
  yield from validator.iter_results(obj, class_name)
804
+
805
+ def commit(self):
806
+ """
807
+ Commit changes to the collection.
808
+
809
+ :return:
810
+ """
811
+ pass
812
+
813
+ def _broadcast(self, *args, **kwargs):
814
+ self.parent.broadcast(self, *args, **kwargs)
@@ -3,9 +3,24 @@ from abc import ABC
3
3
  from collections import defaultdict
4
4
  from copy import copy
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterator, Optional, Sequence, Type, Union
7
-
6
+ from typing import (
7
+ TYPE_CHECKING,
8
+ Any,
9
+ Callable,
10
+ ClassVar,
11
+ Dict,
12
+ Generic,
13
+ Iterator,
14
+ List,
15
+ Optional,
16
+ Sequence,
17
+ Type,
18
+ Union,
19
+ )
20
+
21
+ from linkml_store.api.types import CollectionType
8
22
  from linkml_store.utils.format_utils import load_objects, render_output
23
+ from linkml_store.utils.patch_utils import PatchDict
9
24
 
10
25
  try:
11
26
  from linkml.validator.report import Severity, ValidationResult
@@ -24,8 +39,10 @@ if TYPE_CHECKING:
24
39
 
25
40
  logger = logging.getLogger(__name__)
26
41
 
42
+ LISTENER = Callable[[Collection, List[PatchDict]], None]
43
+
27
44
 
28
- class Database(ABC):
45
+ class Database(ABC, Generic[CollectionType]):
29
46
  """
30
47
  A Database provides access to named collections of data.
31
48
 
@@ -89,6 +106,8 @@ class Database(ABC):
89
106
  metadata: Optional[DatabaseConfig] = None
90
107
  collection_class: ClassVar[Optional[Type[Collection]]] = None
91
108
 
109
+ listeners: Optional[List[LISTENER]] = None
110
+
92
111
  def __init__(self, handle: Optional[str] = None, metadata: Optional[DatabaseConfig] = None, **kwargs):
93
112
  if metadata:
94
113
  self.metadata = metadata
@@ -233,7 +252,8 @@ class Database(ABC):
233
252
  :param kwargs:
234
253
  :return:
235
254
  """
236
- raise NotImplementedError()
255
+ for coll in self.list_collections():
256
+ coll.commit()
237
257
 
238
258
  def close(self, **kwargs):
239
259
  """
@@ -301,6 +321,7 @@ class Database(ABC):
301
321
  alias = name
302
322
  self._collections[alias] = collection
303
323
  if recreate_if_exists:
324
+ logger.debug(f"Recreating collection {collection.name}")
304
325
  collection.delete_where({}, missing_ok=True)
305
326
  return collection
306
327
 
@@ -418,7 +439,11 @@ class Database(ABC):
418
439
  :return:
419
440
 
420
441
  """
421
- raise NotImplementedError
442
+ if query.from_table:
443
+ collection = self.get_collection(query.from_table)
444
+ return collection.query(query, **kwargs)
445
+ else:
446
+ raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
422
447
 
423
448
  @property
424
449
  def schema_view(self) -> SchemaView:
@@ -689,3 +714,9 @@ class Database(ABC):
689
714
  logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
690
715
  with open(location, "w", encoding="utf-8") as stream:
691
716
  stream.write(render_output(obj, format=target_format))
717
+
718
+ def broadcast(self, source: Collection, patches: List[PatchDict]):
719
+ if not self.listeners:
720
+ return
721
+ for listener in self.listeners:
722
+ listener(source, patches)
@@ -38,6 +38,7 @@ class DuckDBCollection(Collection):
38
38
  with conn.begin():
39
39
  conn.execute(insert(table), objs)
40
40
  conn.commit()
41
+ self._post_insert_hook(objs)
41
42
 
42
43
  def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
43
44
  if not isinstance(objs, list):
@@ -1,16 +1,15 @@
1
1
  """
2
- Adapter for DuckDB embedded database.
2
+ Adapter for FileSystem wrapper
3
3
 
4
4
  Handles have the form:
5
5
 
6
- - ``duckdb:///<path>`` for a file-based database
7
- - ``duckdb:///:memory:`` for an in-memory database
8
- """
6
+ - ``file:<path>`` for a local file
7
+ """
9
8
 
10
- from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
11
- from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
9
+ from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
10
+ from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
12
11
 
13
12
  __all__ = [
14
- "DuckDBCollection",
15
- "DuckDBDatabase",
13
+ "FileSystemCollection",
14
+ "FileSystemDatabase",
16
15
  ]
@@ -1,142 +1,177 @@
1
1
  import logging
2
+ from pathlib import Path
2
3
  from typing import Any, Dict, List, Optional, Union
3
4
 
4
- import sqlalchemy as sqla
5
- from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
6
- from sqlalchemy import Column, Table, delete, insert, inspect, text
7
- from sqlalchemy.sql.ddl import CreateTable
8
-
9
5
  from linkml_store.api import Collection
10
6
  from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
11
- from linkml_store.api.queries import Query
12
- from linkml_store.api.stores.duckdb.mappings import TMAP
13
- from linkml_store.utils.sql_utils import facet_count_sql
7
+ from linkml_store.api.queries import Query, QueryResult
8
+ from linkml_store.api.types import DatabaseType
9
+ from linkml_store.utils.query_utils import mongo_query_to_match_function
14
10
 
15
11
  logger = logging.getLogger(__name__)
16
12
 
17
13
 
18
- class FileSystemCollection(Collection):
19
- _table_created: bool = None
14
+ class FileSystemCollection(Collection[DatabaseType]):
15
+ path: Optional[Path] = None
16
+ file_format: Optional[str] = None
17
+ encoding: Optional[str] = None
18
+ _objects_list: List[OBJECT] = None
19
+ _object_map: Dict[str, OBJECT] = None
20
+
21
+ def __init__(self, **kwargs):
22
+ super().__init__(**kwargs)
23
+ parent: DatabaseType = self.parent
24
+ if not self.path:
25
+ if self.parent:
26
+ self.path = Path(parent.directory_path)
27
+ self._objects_list = []
28
+ self._object_map = {}
29
+ if not self.file_format:
30
+ self.file_format = "json"
31
+
32
+ @property
33
+ def path_to_file(self):
34
+ return Path(self.parent.directory_path) / f"{self.name}.{self.file_format}"
35
+
36
+ @property
37
+ def objects_as_list(self) -> List[OBJECT]:
38
+ if self._object_map:
39
+ return list(self._object_map.values())
40
+ else:
41
+ return self._objects_list
42
+
43
+ def _set_objects(self, objs: List[OBJECT]):
44
+ pk = self.identifier_attribute_name
45
+ if pk:
46
+ self._object_map = {obj[pk]: obj for obj in objs}
47
+ self._objects_list = []
48
+ else:
49
+ self._objects_list = objs
50
+ self._object_map = {}
51
+
52
+ def commit(self):
53
+ path = self.path_to_file
54
+ if not path:
55
+ raise ValueError("Path not set")
56
+ path.parent.mkdir(parents=True, exist_ok=True)
57
+ self._save(path)
58
+
59
+ def _save(self, path: Path):
60
+ encoding = self.encoding or "utf-8"
61
+ fmt = self.file_format or "json"
62
+ mode = "w"
63
+ if fmt == "parquet":
64
+ mode = "wb"
65
+ encoding = None
66
+ with open(path, mode, encoding=encoding) as stream:
67
+ if fmt == "json":
68
+ import json
69
+
70
+ json.dump(self.objects_as_list, stream, indent=2)
71
+ elif fmt == "jsonl":
72
+ import jsonlines
73
+
74
+ writer = jsonlines.Writer(stream)
75
+ writer.write_all(self.objects_as_list)
76
+ elif fmt == "yaml":
77
+ import yaml
78
+
79
+ yaml.dump_all(self.objects_as_list, stream)
80
+ elif fmt == "parquet":
81
+ import pandas as pd
82
+ import pyarrow
83
+ import pyarrow.parquet as pq
84
+
85
+ df = pd.DataFrame(self.objects_as_list)
86
+ table = pyarrow.Table.from_pandas(df)
87
+ pq.write_table(table, stream)
88
+ elif fmt in {"csv", "tsv"}:
89
+ import csv
90
+
91
+ delimiter = "\t" if fmt == "tsv" else ","
92
+ fieldnames = list(self.objects_as_list[0].keys())
93
+ for obj in self.objects_as_list[1:]:
94
+ fieldnames.extend([k for k in obj.keys() if k not in fieldnames])
95
+ writer = csv.DictWriter(stream, fieldnames=fieldnames, delimiter=delimiter)
96
+ writer.writeheader()
97
+ for obj in self.objects_as_list:
98
+ writer.writerow(obj)
99
+ else:
100
+ raise ValueError(f"Unsupported file format: {fmt}")
20
101
 
21
102
  def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
22
103
  if not isinstance(objs, list):
23
104
  objs = [objs]
24
105
  if not objs:
25
106
  return
26
- cd = self.class_definition()
27
- if not cd:
28
- cd = self.induce_class_definition_from_objects(objs)
29
- self._create_table(cd)
30
- table = self._sqla_table(cd)
31
- logger.info(f"Inserting into: {self.alias} // T={table.name}")
32
- engine = self.parent.engine
33
- col_names = [c.name for c in table.columns]
34
- objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
35
- with engine.connect() as conn:
36
- with conn.begin():
37
- conn.execute(insert(table), objs)
38
- conn.commit()
107
+ pk = self.identifier_attribute_name
108
+ if pk:
109
+ for obj in objs:
110
+ if pk not in obj:
111
+ raise ValueError(f"Primary key {pk} not found in object {obj}")
112
+ pk_val = obj[pk]
113
+ self._object_map[pk_val] = obj
114
+ else:
115
+ self._objects_list.extend(objs)
39
116
 
40
117
  def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
41
118
  if not isinstance(objs, list):
42
119
  objs = [objs]
43
- cd = self.class_definition()
44
- if not cd:
45
- cd = self.induce_class_definition_from_objects(objs)
46
- table = self._sqla_table(cd)
47
- engine = self.parent.engine
48
- with engine.connect() as conn:
120
+ if not objs:
121
+ return 0
122
+ pk = self.identifier_attribute_name
123
+ n = 0
124
+ if pk:
49
125
  for obj in objs:
50
- conditions = [table.c[k] == v for k, v in obj.items() if k in cd.attributes]
51
- stmt = delete(table).where(*conditions)
52
- stmt = stmt.compile(engine)
53
- conn.execute(stmt)
54
- conn.commit()
55
- return
126
+ pk_val = obj[pk]
127
+ if pk_val in self._object_map:
128
+ del self._object_map[pk_val]
129
+ n += 1
130
+ else:
131
+ n = len(objs)
132
+ self._objects_list = [o for o in self._objects_list if o not in objs]
133
+ n = n - len(objs)
134
+ return n
56
135
 
57
136
  def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
58
137
  logger.info(f"Deleting from {self.target_class_name} where: {where}")
59
138
  if where is None:
60
139
  where = {}
61
- cd = self.class_definition()
62
- if not cd:
63
- logger.info(f"No class definition found for {self.target_class_name}, assuming not prepopulated")
64
- return 0
65
- table = self._sqla_table(cd)
66
- engine = self.parent.engine
67
- inspector = inspect(engine)
68
- table_exists = table.name in inspector.get_table_names()
69
- if not table_exists:
70
- logger.info(f"Table {table.name} does not exist, assuming no data")
71
- return 0
72
- with engine.connect() as conn:
73
- conditions = [table.c[k] == v for k, v in where.items()]
74
- stmt = delete(table).where(*conditions)
75
- stmt = stmt.compile(engine)
76
- result = conn.execute(stmt)
77
- deleted_rows_count = result.rowcount
78
- if deleted_rows_count == 0 and not missing_ok:
79
- raise ValueError(f"No rows found for {where}")
80
- conn.commit()
81
- return deleted_rows_count if deleted_rows_count > -1 else None
140
+
141
+ def matches(obj: OBJECT):
142
+ for k, v in where.items():
143
+ if obj.get(k) != v:
144
+ return False
145
+ return True
146
+
147
+ print(type(self))
148
+ print(self)
149
+ print(vars(self))
150
+ curr_objects = [o for o in self.objects_as_list if not matches(o)]
151
+ self._set_objects(curr_objects)
152
+
153
+ def query(self, query: Query, **kwargs) -> QueryResult:
154
+
155
+ where = query.where_clause or {}
156
+ match = mongo_query_to_match_function(where)
157
+ rows = [o for o in self.objects_as_list if match(o)]
158
+ count = len(rows)
159
+ return QueryResult(query=query, num_rows=count, rows=rows)
82
160
 
83
161
  def query_facets(
84
162
  self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
85
163
  ) -> Dict[str, Dict[str, int]]:
86
- results = {}
87
- cd = self.class_definition()
88
- with self.parent.engine.connect() as conn:
89
- if not facet_columns:
90
- facet_columns = list(self.class_definition().attributes.keys())
91
- for col in facet_columns:
92
- logger.debug(f"Faceting on {col}")
93
- if isinstance(col, tuple):
94
- sd = SlotDefinition(name="PLACEHOLDER")
95
- else:
96
- sd = cd.attributes[col]
97
- facet_query = self._create_query(where_clause=where)
98
- facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
99
- logger.debug(f"Facet query: {facet_query_str}")
100
- rows = list(conn.execute(text(facet_query_str)))
101
- results[col] = rows
102
- return results
103
-
104
- def _sqla_table(self, cd: ClassDefinition) -> Table:
105
- schema_view = self.parent.schema_view
106
- metadata_obj = sqla.MetaData()
107
- cols = []
108
- for att in schema_view.class_induced_slots(cd.name):
109
- typ = TMAP.get(att.range, sqla.String)
110
- if att.inlined:
111
- typ = sqla.JSON
112
- if att.multivalued:
113
- typ = sqla.ARRAY(typ, dimensions=1)
114
- if att.array:
115
- typ = sqla.ARRAY(typ, dimensions=1)
116
- col = Column(att.name, typ)
117
- cols.append(col)
118
- t = Table(self.alias, metadata_obj, *cols)
119
- return t
120
-
121
- def _create_table(self, cd: ClassDefinition):
122
- if self._table_created or self.metadata.is_prepopulated:
123
- logger.info(f"Already have table for: {cd.name}")
124
- return
125
- query = Query(
126
- from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
127
- )
128
- qr = self.parent.query(query)
129
- if qr.num_rows > 0:
130
- logger.info(f"Table already exists for {cd.name}")
131
- self._table_created = True
132
- self.metadata.is_prepopulated = True
133
- return
134
- logger.info(f"Creating table for {cd.name}")
135
- t = self._sqla_table(cd)
136
- ct = CreateTable(t)
137
- ddl = str(ct.compile(self.parent.engine))
138
- with self.parent.engine.connect() as conn:
139
- conn.execute(text(ddl))
140
- conn.commit()
141
- self._table_created = True
142
- self.metadata.is_prepopulated = True
164
+ match = mongo_query_to_match_function(where)
165
+ rows = [o for o in self.objects_as_list if match(o)]
166
+ if not facet_columns:
167
+ facet_columns = self.class_definition().attributes.keys()
168
+ facet_results = {c: {} for c in facet_columns}
169
+ for row in rows:
170
+ for fc in facet_columns:
171
+ if fc in row:
172
+ v = row[fc]
173
+ if v not in facet_results[fc]:
174
+ facet_results[fc][v] = 1
175
+ else:
176
+ facet_results[fc][v] += 1
177
+ return {fc: list(facet_results[fc].items()) for fc in facet_results}