linkml-store 0.0.0__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (35) hide show
  1. linkml_store/api/__init__.py +2 -2
  2. linkml_store/api/client.py +108 -7
  3. linkml_store/api/collection.py +221 -30
  4. linkml_store/api/config.py +97 -0
  5. linkml_store/api/database.py +207 -17
  6. linkml_store/api/queries.py +12 -1
  7. linkml_store/api/stores/chromadb/__init__.py +0 -0
  8. linkml_store/api/stores/chromadb/chromadb_collection.py +114 -0
  9. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  10. linkml_store/api/stores/duckdb/duckdb_collection.py +47 -14
  11. linkml_store/api/stores/duckdb/duckdb_database.py +35 -44
  12. linkml_store/api/stores/hdf5/__init__.py +0 -0
  13. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  14. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  15. linkml_store/api/stores/mongodb/mongodb_collection.py +86 -40
  16. linkml_store/api/stores/mongodb/mongodb_database.py +58 -67
  17. linkml_store/api/stores/solr/solr_collection.py +132 -0
  18. linkml_store/api/stores/solr/solr_database.py +82 -0
  19. linkml_store/api/stores/solr/solr_utils.py +0 -0
  20. linkml_store/cli.py +369 -0
  21. linkml_store/index/__init__.py +33 -0
  22. linkml_store/index/implementations/{llm_index.py → llm_indexer.py} +2 -2
  23. linkml_store/index/implementations/{simple_index.py → simple_indexer.py} +6 -3
  24. linkml_store/index/{index.py → indexer.py} +7 -4
  25. linkml_store/utils/format_utils.py +93 -0
  26. linkml_store/utils/object_utils.py +73 -0
  27. linkml_store/utils/sql_utils.py +46 -7
  28. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/METADATA +17 -6
  29. linkml_store-0.1.6.dist-info/RECORD +41 -0
  30. linkml_store-0.1.6.dist-info/entry_points.txt +3 -0
  31. linkml_store/api/metadata.py +0 -5
  32. linkml_store-0.0.0.dist-info/RECORD +0 -29
  33. linkml_store-0.0.0.dist-info/entry_points.txt +0 -3
  34. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/LICENSE +0 -0
  35. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/WHEEL +0 -0
@@ -1,22 +1,24 @@
1
- from dataclasses import dataclass
1
+ import logging
2
2
  from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  import sqlalchemy as sqla
5
5
  from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
6
- from sqlalchemy import Column, Table, delete, insert, text
6
+ from sqlalchemy import Column, Table, delete, insert, inspect, text
7
7
  from sqlalchemy.sql.ddl import CreateTable
8
8
 
9
9
  from linkml_store.api import Collection
10
- from linkml_store.api.collection import OBJECT
10
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
11
+ from linkml_store.api.queries import Query
11
12
  from linkml_store.api.stores.duckdb.mappings import TMAP
12
13
  from linkml_store.utils.sql_utils import facet_count_sql
13
14
 
15
+ logger = logging.getLogger(__name__)
16
+
14
17
 
15
- @dataclass
16
18
  class DuckDBCollection(Collection):
17
19
  _table_created: bool = None
18
20
 
19
- def add(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
21
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
20
22
  if not isinstance(objs, list):
21
23
  objs = [objs]
22
24
  if not objs:
@@ -26,6 +28,7 @@ class DuckDBCollection(Collection):
26
28
  cd = self.induce_class_definition_from_objects(objs)
27
29
  self._create_table(cd)
28
30
  table = self._sqla_table(cd)
31
+ logger.info(f"Inserting into: {self._alias} // T={table.name}")
29
32
  engine = self.parent.engine
30
33
  col_names = [c.name for c in table.columns]
31
34
  objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
@@ -51,39 +54,58 @@ class DuckDBCollection(Collection):
51
54
  conn.commit()
52
55
  return len(objs)
53
56
 
54
- def delete_where(self, where: Optional[Dict[str, Any]] = None, **kwargs) -> int:
57
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
58
+ logger.info(f"Deleting from {self._target_class_name} where: {where}")
59
+ if where is None:
60
+ where = {}
55
61
  cd = self.class_definition()
62
+ if not cd:
63
+ logger.info(f"No class definition found for {self._target_class_name}, assuming not prepopulated")
64
+ return 0
56
65
  table = self._sqla_table(cd)
57
66
  engine = self.parent.engine
67
+ inspector = inspect(engine)
68
+ table_exists = table.name in inspector.get_table_names()
69
+ if not table_exists:
70
+ logger.info(f"Table {table.name} does not exist, assuming no data")
71
+ return 0
58
72
  with engine.connect() as conn:
59
73
  conditions = [table.c[k] == v for k, v in where.items()]
60
74
  stmt = delete(table).where(*conditions)
61
75
  stmt = stmt.compile(engine)
62
- conn.execute(stmt)
76
+ result = conn.execute(stmt)
77
+ deleted_rows_count = result.rowcount
78
+ if deleted_rows_count == 0 and not missing_ok:
79
+ raise ValueError(f"No rows found for {where}")
63
80
  conn.commit()
64
- return 0
81
+ return deleted_rows_count
65
82
 
66
- def query_facets(self, where: Dict = None, facet_columns: List[str] = None) -> Dict[str, Dict[str, int]]:
83
+ def query_facets(
84
+ self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
85
+ ) -> Dict[str, Dict[str, int]]:
67
86
  results = {}
68
87
  cd = self.class_definition()
69
88
  with self.parent.engine.connect() as conn:
70
89
  if not facet_columns:
71
90
  facet_columns = list(self.class_definition().attributes.keys())
72
91
  for col in facet_columns:
92
+ logger.debug(f"Faceting on {col}")
73
93
  if isinstance(col, tuple):
74
94
  sd = SlotDefinition(name="PLACEHOLDER")
75
95
  else:
76
96
  sd = cd.attributes[col]
77
97
  facet_query = self._create_query(where_clause=where)
78
98
  facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
99
+ logger.debug(f"Facet query: {facet_query_str}")
79
100
  rows = list(conn.execute(text(facet_query_str)))
80
101
  results[col] = rows
81
102
  return results
82
103
 
83
104
  def _sqla_table(self, cd: ClassDefinition) -> Table:
105
+ schema_view = self.parent.schema_view
84
106
  metadata_obj = sqla.MetaData()
85
107
  cols = []
86
- for att in cd.attributes.values():
108
+ for att in schema_view.class_induced_slots(cd.name):
87
109
  typ = TMAP.get(att.range, sqla.String)
88
110
  if att.inlined:
89
111
  typ = sqla.JSON
@@ -93,17 +115,28 @@ class DuckDBCollection(Collection):
93
115
  typ = sqla.ARRAY(typ, dimensions=1)
94
116
  col = Column(att.name, typ)
95
117
  cols.append(col)
96
- t = Table(self.name, metadata_obj, *cols)
118
+ t = Table(self._alias, metadata_obj, *cols)
97
119
  return t
98
120
 
99
121
  def _create_table(self, cd: ClassDefinition):
100
- if self._table_created:
122
+ if self._table_created or self.metadata.is_prepopulated:
123
+ logger.info(f"Already have table for: {cd.name}")
101
124
  return
125
+ query = Query(
126
+ from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self._alias}
127
+ )
128
+ qr = self.parent.query(query)
129
+ if qr.num_rows > 0:
130
+ logger.info(f"Table already exists for {cd.name}")
131
+ self._table_created = True
132
+ self.metadata.is_prepopulated = True
133
+ return
134
+ logger.info(f"Creating table for {cd.name}")
102
135
  t = self._sqla_table(cd)
103
136
  ct = CreateTable(t)
104
137
  ddl = str(ct.compile(self.parent.engine))
105
138
  with self.parent.engine.connect() as conn:
106
139
  conn.execute(text(ddl))
107
140
  conn.commit()
108
- if not self._table_created:
109
- self._table_created = True
141
+ self._table_created = True
142
+ self.metadata.is_prepopulated = True
@@ -1,15 +1,14 @@
1
1
  import json
2
2
  import logging
3
- from dataclasses import dataclass
4
3
  from typing import Optional
5
4
 
6
5
  import pandas as pd
7
6
  import sqlalchemy
8
7
  from duckdb import DuckDBPyConnection
9
8
  from linkml_runtime import SchemaView
10
- from linkml_runtime.linkml_model import SlotDefinition
9
+ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
11
10
  from linkml_runtime.utils.schema_builder import SchemaBuilder
12
- from sqlalchemy import text
11
+ from sqlalchemy import NullPool, text
13
12
 
14
13
  from linkml_store.api import Database
15
14
  from linkml_store.api.queries import Query, QueryResult
@@ -20,54 +19,36 @@ TYPE_MAP = {
20
19
  "VARCHAR": "string",
21
20
  "BIGINT": "integer",
22
21
  "BOOLEAN": "boolean",
22
+ "DATE": "date",
23
+ "DOUBLE": "float",
24
+ "INTEGER": "integer",
23
25
  }
24
26
 
25
27
 
26
28
  logger = logging.getLogger(__name__)
27
29
 
28
30
 
29
- def run_query(con: DuckDBPyConnection, query: Query, **kwargs):
30
- """
31
- Run a query and return the result.
32
-
33
- >>> import duckdb
34
- >>> con = duckdb.connect("db/mgi.db")
35
- >>> query = Query(from_table="gaf_association", limit=5)
36
- >>> result = run_query(con, query)
37
- >>> print(result.num_rows)
38
- 532233
39
-
40
- :param con:
41
- :param query:
42
- :return:
43
- """
44
- count_query_str = query_to_sql(query, count=True)
45
- num_rows = con.execute(count_query_str).fetchall()[0][0]
46
- logger.debug(f"num_rows: {num_rows}")
47
- query_str = query_to_sql(query, **kwargs)
48
- logger.debug(f"query_str: {query_str}")
49
- rows = con.execute(query_str).fetchdf()
50
- qr = QueryResult(query=query, num_rows=num_rows)
51
- qr.set_rows(rows)
52
- return qr
53
-
54
-
55
- @dataclass
56
31
  class DuckDBDatabase(Database):
57
32
  _connection: DuckDBPyConnection = None
58
33
  _engine: sqlalchemy.Engine = None
34
+ collection_class = DuckDBCollection
59
35
 
60
- def __post_init__(self):
61
- if not self.handle:
62
- self.handle = "duckdb:///:memory:"
36
+ def __init__(self, handle: Optional[str] = None, **kwargs):
37
+ if handle is None:
38
+ handle = "duckdb:///:memory:"
39
+ super().__init__(handle=handle, **kwargs)
63
40
 
64
41
  @property
65
42
  def engine(self) -> sqlalchemy.Engine:
66
43
  if not self._engine:
67
44
  handle = self.handle
68
45
  if not handle.startswith("duckdb://") and not handle.startswith(":"):
69
- handle = f"duckdb://{handle}"
70
- self._engine = sqlalchemy.create_engine(handle)
46
+ handle = f"duckdb:///{handle}"
47
+ if ":memory:" not in handle:
48
+ # TODO: investigate this; duckdb appears to be prematurely caching
49
+ self._engine = sqlalchemy.create_engine(handle, poolclass=NullPool)
50
+ else:
51
+ self._engine = sqlalchemy.create_engine(handle)
71
52
  return self._engine
72
53
 
73
54
  def commit(self, **kwargs):
@@ -80,6 +61,14 @@ class DuckDBDatabase(Database):
80
61
  def query(self, query: Query, **kwargs) -> QueryResult:
81
62
  json_encoded_cols = []
82
63
  if query.from_table:
64
+ if not query.from_table.startswith("information_schema"):
65
+ meta_query = Query(
66
+ from_table="information_schema.tables", where_clause={"table_name": query.from_table}
67
+ )
68
+ qr = self.query(meta_query)
69
+ if qr.num_rows == 0:
70
+ logger.debug(f"Table {query.from_table} not created yet")
71
+ return QueryResult(query=query, num_rows=0, rows=[])
83
72
  sv = self._schema_view
84
73
  if sv:
85
74
  cd = None
@@ -127,21 +116,15 @@ class DuckDBDatabase(Database):
127
116
  collection = DuckDBCollection(name=table_name, parent=self)
128
117
  self._collections[table_name] = collection
129
118
 
130
- def create_collection(self, name: str, alias: Optional[str] = None, **kwargs) -> DuckDBCollection:
131
- collection = DuckDBCollection(name=name, parent=self)
132
- if not self._collections:
133
- self._collections = {}
134
- if not alias:
135
- alias = name
136
- self._collections[alias] = collection
137
- return collection
138
-
139
119
  def induce_schema_view(self) -> SchemaView:
140
120
  # TODO: unify schema introspection
121
+ # TODO: handle case where schema is provided in advance
122
+ logger.info(f"Inducing schema view for {self.metadata.handle}")
141
123
  sb = SchemaBuilder()
142
124
  schema = sb.schema
143
125
  query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
144
126
  qr = self.query(query)
127
+ logger.info(f"Found {qr.num_rows} information_schema.tables // {qr.rows}")
145
128
  if qr.num_rows:
146
129
  table_names = [row["table_name"] for row in qr.rows]
147
130
  for tbl in table_names:
@@ -162,5 +145,13 @@ class DuckDBDatabase(Database):
162
145
  row["column_name"], required=row["is_nullable"] == "NO", multivalued=multivalued, range=rng
163
146
  )
164
147
  sb.schema.classes[tbl_name].attributes[sd.name] = sd
148
+ logger.info(f"Introspected slot: {tbl_name}.{sd.name}: {sd.range}")
165
149
  sb.add_defaults()
150
+ for cls_name in schema.classes:
151
+ if cls_name in self.metadata.collections:
152
+ collection_metadata = self.metadata.collections[cls_name]
153
+ if collection_metadata.attributes:
154
+ del schema.classes[cls_name]
155
+ cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
156
+ schema.classes[cls.name] = cls
166
157
  return SchemaView(schema)
File without changes
@@ -0,0 +1,104 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
+
5
+ import h5py
6
+
7
+ from linkml_store.api import Collection
8
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
9
+ from linkml_store.api.queries import Query, QueryResult
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class HDF5Collection(Collection):
15
+
16
+ @property
17
+ def hdf5_group(self) -> h5py.Group:
18
+ return self.parent.file[self.name]
19
+
20
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
21
+ if not isinstance(objs, list):
22
+ objs = [objs]
23
+
24
+ for obj in objs:
25
+ if "id" not in obj:
26
+ raise ValueError("Each object must have an 'id' field.")
27
+ obj_id = str(obj["id"])
28
+ for key, value in obj.items():
29
+ if key == "id":
30
+ continue
31
+ if isinstance(value, (dict, list)):
32
+ value = json.dumps(value)
33
+ self.hdf5_group.create_dataset(f"{obj_id}/{key}", data=value)
34
+
35
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
36
+ if not isinstance(objs, list):
37
+ objs = [objs]
38
+ count = 0
39
+ for obj in objs:
40
+ if "id" not in obj:
41
+ raise ValueError("Each object must have an 'id' field.")
42
+ obj_id = str(obj["id"])
43
+ if obj_id in self.hdf5_group:
44
+ del self.hdf5_group[obj_id]
45
+ count += 1
46
+ return count
47
+
48
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
49
+ logger.info(f"Deleting from {self._target_class_name} where: {where}")
50
+ if where is None:
51
+ where = {}
52
+ results = self.query(Query(where_clause=where)).rows
53
+ count = self.delete(results)
54
+ return count
55
+
56
+ def query(self, query: Query, **kwargs) -> QueryResult:
57
+ results = []
58
+ for obj_id in self.hdf5_group:
59
+ obj = {"id": obj_id}
60
+ for key, value in self.hdf5_group[obj_id].items():
61
+ try:
62
+ obj[key] = json.loads(value[()])
63
+ except json.JSONDecodeError:
64
+ obj[key] = value[()]
65
+ if self._match_where_clause(obj, query.where_clause):
66
+ results.append(obj)
67
+
68
+ count = len(results)
69
+ if query.limit:
70
+ results = results[: query.limit]
71
+ return QueryResult(query=query, num_rows=count, rows=results)
72
+
73
+ def query_facets(
74
+ self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
75
+ ) -> Dict[str, List[Tuple[Any, int]]]:
76
+ results = {}
77
+ if not facet_columns:
78
+ facet_columns = list(self.class_definition().attributes.keys())
79
+
80
+ for col in facet_columns:
81
+ logger.debug(f"Faceting on {col}")
82
+ facet_counts = {}
83
+ for obj in self.query(Query(where_clause=where)).rows:
84
+ if col in obj:
85
+ value = obj[col]
86
+ if isinstance(value, list):
87
+ for v in value:
88
+ facet_counts[v] = facet_counts.get(v, 0) + 1
89
+ else:
90
+ facet_counts[value] = facet_counts.get(value, 0) + 1
91
+ facet_counts = sorted(facet_counts.items(), key=lambda x: x[1], reverse=True)[:facet_limit]
92
+ results[col] = facet_counts
93
+
94
+ return results
95
+
96
+ def _match_where_clause(self, obj: Dict[str, Any], where_clause: Optional[Dict[str, Any]]) -> bool:
97
+ if where_clause is None:
98
+ return True
99
+ for key, value in where_clause.items():
100
+ if key not in obj:
101
+ return False
102
+ if obj[key] != value:
103
+ return False
104
+ return True
@@ -0,0 +1,79 @@
1
+ # hdf5_database.py
2
+
3
+ import logging
4
+ from typing import Optional
5
+
6
+ import h5py
7
+ from linkml_runtime import SchemaView
8
+ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
9
+ from linkml_runtime.utils.schema_builder import SchemaBuilder
10
+
11
+ from linkml_store.api import Database
12
+ from linkml_store.api.queries import Query, QueryResult
13
+ from linkml_store.api.stores.hdf5.hdf5_collection import HDF5Collection
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class HDF5Database(Database):
19
+ _file: h5py.File = None
20
+ collection_class = HDF5Collection
21
+
22
+ def __init__(self, handle: Optional[str] = None, **kwargs):
23
+ if handle is None:
24
+ handle = "linkml_store.h5"
25
+ super().__init__(handle=handle, **kwargs)
26
+
27
+ @property
28
+ def file(self) -> h5py.File:
29
+ if self._file is None:
30
+ self._file = h5py.File(self.handle, "a")
31
+ return self._file
32
+
33
+ def commit(self, **kwargs):
34
+ self.file.flush()
35
+
36
+ def close(self, **kwargs):
37
+ if self._file:
38
+ self._file.close()
39
+
40
+ def query(self, query: Query, **kwargs) -> QueryResult:
41
+ if query.from_table:
42
+ collection = self.get_collection(query.from_table)
43
+ return collection.query(query, **kwargs)
44
+
45
+ def init_collections(self):
46
+ if self._collections is None:
47
+ self._collections = {}
48
+
49
+ for collection_name in self.file:
50
+ if collection_name not in self._collections:
51
+ collection = HDF5Collection(name=collection_name, parent=self)
52
+ self._collections[collection_name] = collection
53
+
54
+ def induce_schema_view(self) -> SchemaView:
55
+ logger.info(f"Inducing schema view for {self.handle}")
56
+ sb = SchemaBuilder()
57
+ schema = sb.schema
58
+
59
+ for collection_name in self.file:
60
+ sb.add_class(collection_name)
61
+ hdf5_group = self.file[collection_name]
62
+ for field in hdf5_group:
63
+ if field == "_id":
64
+ continue
65
+ sd = SlotDefinition(field)
66
+ if isinstance(hdf5_group[field][()], list):
67
+ sd.multivalued = True
68
+ sb.schema.classes[collection_name].attributes[sd.name] = sd
69
+
70
+ sb.add_defaults()
71
+ for cls_name in schema.classes:
72
+ if cls_name in self.metadata.collections:
73
+ collection_metadata = self.metadata.collections[cls_name]
74
+ if collection_metadata.attributes:
75
+ del schema.classes[cls_name]
76
+ cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
77
+ schema.classes[cls.name] = cls
78
+
79
+ return SchemaView(schema)
@@ -1,56 +1,102 @@
1
- from dataclasses import dataclass
2
- from typing import Any, Dict, List, Optional, Union
1
+ import logging
2
+ from typing import Any, Dict, List, Optional, Tuple, Union
3
+
4
+ from linkml_runtime.linkml_model import SlotDefinition
5
+ from pymongo.collection import Collection as MongoCollection
3
6
 
4
7
  from linkml_store.api import Collection
5
- from linkml_store.api.collection import OBJECT
8
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
9
+ from linkml_store.api.queries import Query, QueryResult
10
+
11
+ logger = logging.getLogger(__name__)
6
12
 
7
13
 
8
- @dataclass
9
14
  class MongoDBCollection(Collection):
10
- """
11
- A wrapper around a MongoDB collection
12
- """
13
15
 
14
- def add(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
15
- if not isinstance(objs, list):
16
- objs = [objs]
17
- if not objs:
18
- return
19
- cd = self.class_definition()
20
- if not cd:
21
- cd = self.induce_class_definition_from_objects(objs)
22
- collection = self.parent.database[self.name]
23
- collection.insert_many(objs)
16
+ @property
17
+ def mongo_collection(self) -> MongoCollection:
18
+ if not self.name:
19
+ raise ValueError("Collection name not set")
20
+ return self.parent.native_db[self.name]
24
21
 
25
- def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
22
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
26
23
  if not isinstance(objs, list):
27
24
  objs = [objs]
28
- cd = self.class_definition()
29
- if not cd:
30
- cd = self.induce_class_definition_from_objects(objs)
31
- collection = self.parent.database[self.name]
32
- deleted_count = 0
33
- for obj in objs:
34
- result = collection.delete_one(obj)
35
- deleted_count += result.deleted_count
36
- return deleted_count
25
+ self.mongo_collection.insert_many(objs)
37
26
 
38
- def delete_where(self, where: Optional[Dict[str, Any]] = None, **kwargs) -> int:
39
- collection = self.parent.database[self.name]
40
- result = collection.delete_many(where)
41
- return result.deleted_count
27
+ def query(self, query: Query, **kwargs) -> QueryResult:
28
+ mongo_filter = self._build_mongo_filter(query.where_clause)
29
+ if query.limit:
30
+ cursor = self.mongo_collection.find(mongo_filter).limit(query.limit)
31
+ else:
32
+ cursor = self.mongo_collection.find(mongo_filter)
33
+
34
+ rows = list(cursor)
35
+ count = self.mongo_collection.count_documents(mongo_filter)
36
+
37
+ return QueryResult(query=query, num_rows=count, rows=rows)
38
+
39
+ def _build_mongo_filter(self, where_clause: Dict[str, Any]) -> Dict[str, Any]:
40
+ mongo_filter = {}
41
+ if where_clause:
42
+ for field, value in where_clause.items():
43
+ mongo_filter[field] = value
44
+ return mongo_filter
42
45
 
43
- def query_facets(self, where: Dict = None, facet_columns: List[str] = None) -> Dict[str, Dict[str, int]]:
46
+ def query_facets(
47
+ self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
48
+ ) -> Dict[str, List[Tuple[Any, int]]]:
44
49
  results = {}
45
- _cd = self.class_definition()
46
- collection = self.parent.database[self.name]
50
+ cd = self.class_definition()
47
51
  if not facet_columns:
48
52
  facet_columns = list(self.class_definition().attributes.keys())
53
+
49
54
  for col in facet_columns:
50
- facet_pipeline = [
51
- {"$match": where} if where else {"$match": {}},
52
- {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
53
- ]
54
- facet_results = list(collection.aggregate(facet_pipeline))
55
- results[col] = [(row["_id"], row["count"]) for row in facet_results]
55
+ logger.debug(f"Faceting on {col}")
56
+ if isinstance(col, tuple):
57
+ sd = SlotDefinition(name="PLACEHOLDER")
58
+ else:
59
+ sd = cd.attributes[col]
60
+
61
+ if sd.multivalued:
62
+ facet_pipeline = [
63
+ {"$match": where} if where else {"$match": {}},
64
+ {"$unwind": f"${col}"},
65
+ {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
66
+ {"$sort": {"count": -1}},
67
+ {"$limit": facet_limit},
68
+ ]
69
+ else:
70
+ facet_pipeline = [
71
+ {"$match": where} if where else {"$match": {}},
72
+ {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
73
+ {"$sort": {"count": -1}},
74
+ {"$limit": facet_limit},
75
+ ]
76
+
77
+ facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
78
+ results[col] = [(result["_id"], result["count"]) for result in facet_results]
79
+
56
80
  return results
81
+
82
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
83
+ if not isinstance(objs, list):
84
+ objs = [objs]
85
+ filter_conditions = []
86
+ for obj in objs:
87
+ filter_condition = {}
88
+ for key, value in obj.items():
89
+ filter_condition[key] = value
90
+ filter_conditions.append(filter_condition)
91
+ result = self.mongo_collection.delete_many({"$or": filter_conditions})
92
+ return result.deleted_count
93
+
94
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
95
+ logger.info(f"Deleting from {self._target_class_name} where: {where}")
96
+ if where is None:
97
+ where = {}
98
+ result = self.mongo_collection.delete_many(where)
99
+ deleted_rows_count = result.deleted_count
100
+ if deleted_rows_count == 0 and not missing_ok:
101
+ raise ValueError(f"No rows found for {where}")
102
+ return deleted_rows_count