linkml-store 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (35) hide show
  1. linkml_store/api/client.py +76 -11
  2. linkml_store/api/collection.py +223 -40
  3. linkml_store/api/config.py +59 -9
  4. linkml_store/api/database.py +45 -27
  5. linkml_store/api/stores/duckdb/duckdb_collection.py +21 -3
  6. linkml_store/api/stores/duckdb/duckdb_database.py +36 -3
  7. linkml_store/api/stores/filesystem/filesystem_collection.py +13 -4
  8. linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
  9. linkml_store/api/stores/mongodb/mongodb_collection.py +80 -34
  10. linkml_store/api/stores/mongodb/mongodb_database.py +1 -36
  11. linkml_store/api/stores/solr/solr_collection.py +4 -4
  12. linkml_store/cli.py +44 -18
  13. linkml_store/index/__init__.py +21 -5
  14. linkml_store/index/implementations/llm_indexer.py +2 -1
  15. linkml_store/index/indexer.py +20 -4
  16. linkml_store/utils/file_utils.py +37 -0
  17. linkml_store/utils/format_utils.py +69 -8
  18. linkml_store/utils/pandas_utils.py +40 -0
  19. linkml_store/utils/schema_utils.py +23 -0
  20. linkml_store/utils/sql_utils.py +2 -1
  21. linkml_store/webapi/__init__.py +0 -0
  22. linkml_store/webapi/html/__init__.py +3 -0
  23. linkml_store/webapi/html/base.html.j2 +24 -0
  24. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  25. linkml_store/webapi/html/database_details.html.j2 +16 -0
  26. linkml_store/webapi/html/databases.html.j2 +14 -0
  27. linkml_store/webapi/html/generic.html.j2 +46 -0
  28. linkml_store/webapi/main.py +572 -0
  29. linkml_store-0.1.11.dist-info/METADATA +171 -0
  30. linkml_store-0.1.11.dist-info/RECORD +60 -0
  31. {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/entry_points.txt +1 -0
  32. linkml_store-0.1.9.dist-info/METADATA +0 -61
  33. linkml_store-0.1.9.dist-info/RECORD +0 -49
  34. {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/LICENSE +0 -0
  35. {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/WHEEL +0 -0
@@ -149,26 +149,19 @@ class Database(ABC, Generic[CollectionType]):
149
149
  def _initialize_collections(self):
150
150
  if not self.metadata.collections:
151
151
  return
152
- for name, collection_config in self.metadata.collections.items():
153
- alias = collection_config.alias
154
- typ = collection_config.type
155
- # if typ and alias is None:
156
- # alias = name
157
- # if typ is None:
158
- # typ = name
159
- # collection = self.create_collection(
160
- # typ, alias=alias, metadata=collection_config.metadata
161
- # )
162
- if False and typ is not None:
163
- if not alias:
164
- alias = name
165
- name = typ
166
- if not collection_config.name:
167
- collection_config.name = name
168
- _collection = self.create_collection(name, alias=alias, metadata=collection_config)
152
+ for k, collection_config in self.metadata.collections.items():
153
+ if collection_config.alias:
154
+ if collection_config.alias != k:
155
+ raise ValueError(f"Alias mismatch: {collection_config.alias} != {k}")
156
+ alias = k
157
+ typ = collection_config.type or alias
158
+ _collection = self.create_collection(typ, alias=alias, metadata=collection_config)
159
+ assert _collection.alias == alias
160
+ assert _collection.target_class_name == typ
169
161
  if collection_config.attributes:
162
+ # initialize schema
170
163
  sv = self.schema_view
171
- cd = ClassDefinition(name, attributes=collection_config.attributes)
164
+ cd = ClassDefinition(typ, attributes=collection_config.attributes)
172
165
  sv.schema.classes[cd.name] = cd
173
166
  sv.set_modified()
174
167
  # assert collection.class_definition() is not None
@@ -275,7 +268,7 @@ class Database(ABC, Generic[CollectionType]):
275
268
  metadata: Optional[CollectionConfig] = None,
276
269
  recreate_if_exists=False,
277
270
  **kwargs,
278
- ) -> Collection:
271
+ ) -> CollectionType:
279
272
  """
280
273
  Create a new collection in the current database.
281
274
 
@@ -307,8 +300,10 @@ class Database(ABC, Generic[CollectionType]):
307
300
  if not name:
308
301
  raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
309
302
  collection_cls = self.collection_class
310
- collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
311
- if metadata and metadata.source_location:
303
+ collection = collection_cls(name=name, parent=self, metadata=metadata)
304
+ if alias:
305
+ collection.metadata.alias = alias
306
+ if metadata and metadata.source:
312
307
  collection.load_from_source()
313
308
  if metadata and metadata.attributes:
314
309
  sv = self.schema_view
@@ -321,7 +316,7 @@ class Database(ABC, Generic[CollectionType]):
321
316
  alias = name
322
317
  self._collections[alias] = collection
323
318
  if recreate_if_exists:
324
- logger.debug(f"Recreating collection {collection.name}")
319
+ logger.debug(f"Recreating collection {collection.alias}")
325
320
  collection.delete_where({}, missing_ok=True)
326
321
  return collection
327
322
 
@@ -339,7 +334,7 @@ class Database(ABC, Generic[CollectionType]):
339
334
  >>> collections = db.list_collections()
340
335
  >>> len(collections)
341
336
  2
342
- >>> [c.name for c in collections]
337
+ >>> [c.target_class_name for c in collections]
343
338
  ['Person', 'Product']
344
339
 
345
340
  :param include_internal: include internal collections
@@ -367,7 +362,7 @@ class Database(ABC, Generic[CollectionType]):
367
362
  ['Person', 'Product']
368
363
 
369
364
  """
370
- return [c.name for c in self.list_collections(**kwargs)]
365
+ return [c.alias for c in self.list_collections(**kwargs)]
371
366
 
372
367
  def get_collection(
373
368
  self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
@@ -410,7 +405,7 @@ class Database(ABC, Generic[CollectionType]):
410
405
  """
411
406
  Initialize collections.
412
407
 
413
- Not typically called directly: consider making hidden
408
+ TODO: Not typically called directly: consider making this private
414
409
  :return:
415
410
  """
416
411
  raise NotImplementedError
@@ -502,7 +497,7 @@ class Database(ABC, Generic[CollectionType]):
502
497
  >>> sorted(collection.class_definition().slots)
503
498
  ['capital', 'code', 'continent', 'languages', 'name']
504
499
 
505
- :param schema_view:
500
+ :param schema_view: can be either a path to the schema, or a SchemaView object
506
501
  :return:
507
502
  """
508
503
  if isinstance(schema_view, Path):
@@ -585,7 +580,15 @@ class Database(ABC, Generic[CollectionType]):
585
580
 
586
581
  :return: A schema view
587
582
  """
588
- raise NotImplementedError()
583
+ logger.info(f"Inducing schema view for {self.handle}")
584
+ from linkml_runtime.utils.schema_builder import SchemaBuilder
585
+
586
+ sb = SchemaBuilder()
587
+
588
+ for collection_name in self.list_collection_names():
589
+ coll = self.get_collection(collection_name)
590
+ sb.add_class(coll.target_class_name)
591
+ return SchemaView(sb.schema)
589
592
 
590
593
  def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
591
594
  """
@@ -683,6 +686,21 @@ class Database(ABC, Generic[CollectionType]):
683
686
  """
684
687
  Drop the database and all collections.
685
688
 
689
+ >>> from linkml_store.api.client import Client
690
+ >>> client = Client()
691
+ >>> path = Path("/tmp/test.db")
692
+ >>> path.parent.mkdir(exist_ok=True, parents=True)
693
+ >>> db = client.attach_database(f"duckdb:///{path}")
694
+ >>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
695
+ >>> coll = db.get_collection("persons")
696
+ >>> coll.find({}).num_rows
697
+ 1
698
+ >>> db.drop()
699
+ >>> db = client.attach_database("duckdb:///tmp/test.db", alias="test")
700
+ >>> coll = db.get_collection("persons")
701
+ >>> coll.find({}).num_rows
702
+ 0
703
+
686
704
  :param kwargs: additional arguments
687
705
  """
688
706
  raise NotImplementedError()
@@ -18,6 +18,9 @@ logger = logging.getLogger(__name__)
18
18
  class DuckDBCollection(Collection):
19
19
  _table_created: bool = None
20
20
 
21
+ def __init__(self, *args, **kwargs):
22
+ super().__init__(*args, **kwargs)
23
+
21
24
  def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
22
25
  logger.debug(f"Inserting {len(objs)}")
23
26
  if not isinstance(objs, list):
@@ -90,7 +93,9 @@ class DuckDBCollection(Collection):
90
93
  cd = self.class_definition()
91
94
  with self.parent.engine.connect() as conn:
92
95
  if not facet_columns:
93
- facet_columns = list(self.class_definition().attributes.keys())
96
+ if not cd:
97
+ raise ValueError(f"No class definition found for {self.target_class_name}")
98
+ facet_columns = list(cd.attributes.keys())
94
99
  for col in facet_columns:
95
100
  logger.debug(f"Faceting on {col}")
96
101
  if isinstance(col, tuple):
@@ -101,7 +106,7 @@ class DuckDBCollection(Collection):
101
106
  facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
102
107
  logger.debug(f"Facet query: {facet_query_str}")
103
108
  rows = list(conn.execute(text(facet_query_str)))
104
- results[col] = rows
109
+ results[col] = [tuple(row) for row in rows]
105
110
  return results
106
111
 
107
112
  def _sqla_table(self, cd: ClassDefinition) -> Table:
@@ -110,7 +115,7 @@ class DuckDBCollection(Collection):
110
115
  cols = []
111
116
  for att in schema_view.class_induced_slots(cd.name):
112
117
  typ = TMAP.get(att.range, sqla.String)
113
- if att.inlined:
118
+ if att.inlined or att.inlined_as_list:
114
119
  typ = sqla.JSON
115
120
  if att.multivalued:
116
121
  typ = sqla.ARRAY(typ, dimensions=1)
@@ -121,6 +126,17 @@ class DuckDBCollection(Collection):
121
126
  t = Table(self.alias, metadata_obj, *cols)
122
127
  return t
123
128
 
129
+ def _check_if_initialized(self) -> bool:
130
+ # if self._initialized:
131
+ # return True
132
+ query = Query(
133
+ from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
134
+ )
135
+ qr = self.parent.query(query)
136
+ if qr.num_rows > 0:
137
+ return True
138
+ return False
139
+
124
140
  def _create_table(self, cd: ClassDefinition):
125
141
  if self._table_created or self.metadata.is_prepopulated:
126
142
  logger.info(f"Already have table for: {cd.name}")
@@ -132,6 +148,7 @@ class DuckDBCollection(Collection):
132
148
  if qr.num_rows > 0:
133
149
  logger.info(f"Table already exists for {cd.name}")
134
150
  self._table_created = True
151
+ self._initialized = True
135
152
  self.metadata.is_prepopulated = True
136
153
  return
137
154
  logger.info(f"Creating table for {cd.name}")
@@ -142,4 +159,5 @@ class DuckDBCollection(Collection):
142
159
  conn.execute(text(ddl))
143
160
  conn.commit()
144
161
  self._table_created = True
162
+ self._initialized = True
145
163
  self.metadata.is_prepopulated = True
@@ -26,18 +26,32 @@ TYPE_MAP = {
26
26
  "JSON": "Any",
27
27
  }
28
28
 
29
+ MEMORY_HANDLE = "duckdb:///:memory:"
30
+
29
31
 
30
32
  logger = logging.getLogger(__name__)
31
33
 
32
34
 
33
35
  class DuckDBDatabase(Database):
36
+ """
37
+ An adapter for DuckDB databases.
38
+
39
+ Note that this adapter does not make use of a LinkML relational model transformation and
40
+ SQL Alchemy ORM layer. Instead, it attempts to map each collection (which is of type
41
+ some LinkML class) to a *single* DuckDB table. New tables are not created for nested references,
42
+ and linking tables are not created for many-to-many relationships.
43
+
44
+ Instead the native DuckDB ARRAY type is used to store multivalued attributes, and DuckDB JSON
45
+ types are used for nested inlined objects.
46
+ """
47
+
34
48
  _connection: DuckDBPyConnection = None
35
49
  _engine: sqlalchemy.Engine = None
36
50
  collection_class = DuckDBCollection
37
51
 
38
52
  def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
39
53
  if handle is None:
40
- handle = "duckdb:///:memory:"
54
+ handle = MEMORY_HANDLE
41
55
  if recreate_if_exists:
42
56
  path = Path(handle.replace("duckdb:///", ""))
43
57
  if path.exists():
@@ -64,6 +78,17 @@ class DuckDBDatabase(Database):
64
78
  def close(self, **kwargs):
65
79
  self.engine.dispose()
66
80
 
81
+ def drop(self, missing_ok=True, **kwargs):
82
+ self.close()
83
+ if self.handle == MEMORY_HANDLE:
84
+ return
85
+ path = Path(self.handle.replace("duckdb:///", ""))
86
+ if path.exists():
87
+ path.unlink()
88
+ else:
89
+ if not missing_ok:
90
+ raise FileNotFoundError(f"Database file not found: {path}")
91
+
67
92
  def query(self, query: Query, **kwargs) -> QueryResult:
68
93
  json_encoded_cols = []
69
94
  if query.from_table:
@@ -82,7 +107,8 @@ class DuckDBDatabase(Database):
82
107
  if sv:
83
108
  cd = None
84
109
  for c in self._collections.values():
85
- if c.name == query.from_table or c.metadata.alias == query.from_table:
110
+ # if c.name == query.from_table or c.metadata.alias == query.from_table:
111
+ if c.alias == query.from_table or c.target_class_name == query.from_table:
86
112
  cd = c.class_definition()
87
113
  break
88
114
  if cd:
@@ -103,7 +129,14 @@ class DuckDBDatabase(Database):
103
129
  if row[col]:
104
130
  if isinstance(row[col], list):
105
131
  for i in range(len(row[col])):
106
- row[col][i] = json.loads(row[col][i])
132
+ try:
133
+ parsed_val = json.loads(row[col][i])
134
+ except json.JSONDecodeError as e:
135
+ logger.error(f"Failed to parse col {col}[{i}] == {row[col][i]}")
136
+ raise e
137
+ row[col][i] = parsed_val
138
+ elif isinstance(row[col], dict):
139
+ pass
107
140
  else:
108
141
  row[col] = json.loads(row[col])
109
142
  qr.set_rows(pd.DataFrame(rows))
@@ -31,7 +31,7 @@ class FileSystemCollection(Collection[DatabaseType]):
31
31
 
32
32
  @property
33
33
  def path_to_file(self):
34
- return Path(self.parent.directory_path) / f"{self.name}.{self.file_format}"
34
+ return Path(self.parent.directory_path) / f"{self.alias}.{self.file_format}"
35
35
 
36
36
  @property
37
37
  def objects_as_list(self) -> List[OBJECT]:
@@ -150,13 +150,20 @@ class FileSystemCollection(Collection[DatabaseType]):
150
150
  curr_objects = [o for o in self.objects_as_list if not matches(o)]
151
151
  self._set_objects(curr_objects)
152
152
 
153
- def query(self, query: Query, **kwargs) -> QueryResult:
154
-
153
+ def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
154
+ limit = limit or query.limit
155
+ offset = offset or query.offset
156
+ if offset is None:
157
+ offset = 0
155
158
  where = query.where_clause or {}
156
159
  match = mongo_query_to_match_function(where)
157
160
  rows = [o for o in self.objects_as_list if match(o)]
158
161
  count = len(rows)
159
- return QueryResult(query=query, num_rows=count, rows=rows)
162
+ if limit is None or limit < 0:
163
+ limit = count
164
+ # TODO: avoid recalculating
165
+ returned_row = rows[offset : offset + limit]
166
+ return QueryResult(query=query, num_rows=count, rows=returned_row)
160
167
 
161
168
  def query_facets(
162
169
  self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
@@ -170,6 +177,8 @@ class FileSystemCollection(Collection[DatabaseType]):
170
177
  for fc in facet_columns:
171
178
  if fc in row:
172
179
  v = row[fc]
180
+ if not isinstance(v, str):
181
+ v = str(v)
173
182
  if v not in facet_results[fc]:
174
183
  facet_results[fc][v] = 1
175
184
  else:
@@ -9,6 +9,7 @@ from linkml_runtime import SchemaView
9
9
  from linkml_store.api import Database
10
10
  from linkml_store.api.config import DatabaseConfig
11
11
  from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
12
+ from linkml_store.utils.file_utils import safe_remove_directory
12
13
  from linkml_store.utils.format_utils import Format, load_objects
13
14
 
14
15
  logger = logging.getLogger(__name__)
@@ -20,6 +21,8 @@ class FileSystemDatabase(Database):
20
21
  directory_path: Optional[Path] = None
21
22
  default_file_format: Optional[str] = None
22
23
 
24
+ no_backup_on_drop: bool = False
25
+
23
26
  def __init__(self, handle: Optional[str] = None, **kwargs):
24
27
  handle = handle.replace("file:", "")
25
28
  if handle.startswith("//"):
@@ -43,6 +46,12 @@ class FileSystemDatabase(Database):
43
46
  def close(self, **kwargs):
44
47
  pass
45
48
 
49
+ def drop(self, no_backup=False, **kwargs):
50
+ self.close()
51
+ path = self.directory_path
52
+ if path.exists():
53
+ safe_remove_directory(path, no_backup=self.no_backup_on_drop or no_backup)
54
+
46
55
  def init_collections(self):
47
56
  metadata = self.metadata
48
57
  if self._collections is None:
@@ -63,7 +72,7 @@ class FileSystemDatabase(Database):
63
72
  self._collections[n] = collection
64
73
  collection._set_objects(objs)
65
74
 
66
- def induce_schema_view(self) -> SchemaView:
75
+ def xxxinduce_schema_view(self) -> SchemaView:
67
76
  logger.info(f"Inducing schema view for {self.handle}")
68
77
  sb = SchemaBuilder()
69
78
 
@@ -2,7 +2,6 @@ import logging
2
2
  from copy import copy
3
3
  from typing import Any, Dict, List, Optional, Tuple, Union
4
4
 
5
- from linkml_runtime.linkml_model import SlotDefinition
6
5
  from pymongo.collection import Collection as MongoCollection
7
6
 
8
7
  from linkml_store.api import Collection
@@ -24,11 +23,15 @@ class MongoDBCollection(Collection):
24
23
 
25
24
  @property
26
25
  def mongo_collection(self) -> MongoCollection:
27
- if not self.name:
26
+ # collection_name = self.alias or self.name
27
+ collection_name = self.alias
28
+ if not collection_name:
28
29
  raise ValueError("Collection name not set")
29
- collection_name = self.alias or self.name
30
30
  return self.parent.native_db[collection_name]
31
31
 
32
+ def _check_if_initialized(self) -> bool:
33
+ return self.alias in self.parent.native_db.list_collection_names()
34
+
32
35
  def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
33
36
  if not isinstance(objs, list):
34
37
  objs = [objs]
@@ -38,13 +41,15 @@ class MongoDBCollection(Collection):
38
41
  del obj["_id"]
39
42
  self._post_insert_hook(objs)
40
43
 
41
- def query(self, query: Query, limit: Optional[int] = None, **kwargs) -> QueryResult:
44
+ def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
42
45
  mongo_filter = self._build_mongo_filter(query.where_clause)
43
46
  limit = limit or query.limit
47
+ cursor = self.mongo_collection.find(mongo_filter)
44
48
  if limit and limit >= 0:
45
- cursor = self.mongo_collection.find(mongo_filter).limit(limit)
46
- else:
47
- cursor = self.mongo_collection.find(mongo_filter)
49
+ cursor = cursor.limit(limit)
50
+ offset = offset or query.offset
51
+ if offset and offset >= 0:
52
+ cursor = cursor.skip(offset)
48
53
 
49
54
  def _as_row(row: dict):
50
55
  row = copy(row)
@@ -63,46 +68,87 @@ class MongoDBCollection(Collection):
63
68
  mongo_filter[field] = value
64
69
  return mongo_filter
65
70
 
71
+ from typing import Any, Dict, List, Union
72
+
66
73
  def query_facets(
67
- self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
68
- ) -> Dict[str, List[Tuple[Any, int]]]:
74
+ self,
75
+ where: Dict = None,
76
+ facet_columns: List[Union[str, Tuple[str, ...]]] = None,
77
+ facet_limit=DEFAULT_FACET_LIMIT,
78
+ **kwargs,
79
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
69
80
  results = {}
70
- cd = self.class_definition()
71
81
  if not facet_columns:
72
82
  facet_columns = list(self.class_definition().attributes.keys())
73
83
 
74
84
  for col in facet_columns:
75
85
  logger.debug(f"Faceting on {col}")
86
+
87
+ # Handle tuple columns
88
+ if isinstance(col, tuple):
89
+ group_id = {k.replace(".", "_"): f"${k}" for k in col}
90
+ all_fields = col
91
+ else:
92
+ group_id = f"${col}"
93
+ all_fields = [col]
94
+
95
+ # Initial pipeline without unwinding
96
+ facet_pipeline = [
97
+ {"$match": where} if where else {"$match": {}},
98
+ {"$group": {"_id": group_id, "count": {"$sum": 1}}},
99
+ {"$sort": {"count": -1}},
100
+ {"$limit": facet_limit},
101
+ ]
102
+
103
+ logger.info(f"Initial facet pipeline: {facet_pipeline}")
104
+ initial_results = list(self.mongo_collection.aggregate(facet_pipeline))
105
+
106
+ # Check if we need to unwind based on the results
107
+ needs_unwinding = False
76
108
  if isinstance(col, tuple):
77
- sd = SlotDefinition(name="PLACEHOLDER")
109
+ needs_unwinding = any(
110
+ isinstance(result["_id"], dict) and any(isinstance(v, list) for v in result["_id"].values())
111
+ for result in initial_results
112
+ )
113
+ else:
114
+ needs_unwinding = any(isinstance(result["_id"], list) for result in initial_results)
115
+
116
+ if needs_unwinding:
117
+ logger.info(f"Detected array values for {col}, unwinding...")
118
+ facet_pipeline = [{"$match": where} if where else {"$match": {}}]
119
+
120
+ # Unwind each field if needed
121
+ for field in all_fields:
122
+ field_parts = field.split(".")
123
+ for i in range(len(field_parts)):
124
+ facet_pipeline.append({"$unwind": f"${'.'.join(field_parts[:i + 1])}"})
125
+
126
+ facet_pipeline.extend(
127
+ [
128
+ {"$group": {"_id": group_id, "count": {"$sum": 1}}},
129
+ {"$sort": {"count": -1}},
130
+ {"$limit": facet_limit},
131
+ ]
132
+ )
133
+
134
+ logger.info(f"Updated facet pipeline with unwinding: {facet_pipeline}")
135
+ facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
78
136
  else:
79
- if col in cd.attributes:
80
- sd = cd.attributes[col]
81
- else:
82
- logger.info(f"No schema metadata for {col}")
83
- sd = SlotDefinition(name=col)
84
- group = {"$group": {"_id": f"${col}", "count": {"$sum": 1}}}
137
+ facet_results = initial_results
138
+
139
+ logger.info(f"Facet results: {facet_results}")
140
+
141
+ # Process results
85
142
  if isinstance(col, tuple):
86
- q = {k.replace(".", ""): f"${k}" for k in col}
87
- group["$group"]["_id"] = q
88
- if sd and sd.multivalued:
89
- facet_pipeline = [
90
- {"$match": where} if where else {"$match": {}},
91
- {"$unwind": f"${col}"},
92
- group,
93
- {"$sort": {"count": -1}},
94
- {"$limit": facet_limit},
143
+ results[col] = [
144
+ (tuple(result["_id"].values()), result["count"])
145
+ for result in facet_results
146
+ if result["_id"] is not None and all(v is not None for v in result["_id"].values())
95
147
  ]
96
148
  else:
97
- facet_pipeline = [
98
- {"$match": where} if where else {"$match": {}},
99
- group,
100
- {"$sort": {"count": -1}},
101
- {"$limit": facet_limit},
149
+ results[col] = [
150
+ (result["_id"], result["count"]) for result in facet_results if result["_id"] is not None
102
151
  ]
103
- logger.info(f"Facet pipeline: {facet_pipeline}")
104
- facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
105
- results[col] = [(result["_id"], result["count"]) for result in facet_results]
106
152
 
107
153
  return results
108
154
 
@@ -3,9 +3,6 @@
3
3
  import logging
4
4
  from typing import Optional
5
5
 
6
- from linkml_runtime import SchemaView
7
- from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
8
- from linkml_runtime.utils.schema_builder import SchemaBuilder
9
6
  from pymongo import MongoClient
10
7
  from pymongo.database import Database as NativeDatabase
11
8
 
@@ -63,10 +60,9 @@ class MongoDBDatabase(Database):
63
60
  self._native_client.close()
64
61
 
65
62
  def drop(self, **kwargs):
66
- self.native_client.drop_database(self.metadata.alias)
63
+ self.native_client.drop_database(self.native_db.name)
67
64
 
68
65
  def query(self, query: Query, **kwargs) -> QueryResult:
69
- # TODO: DRY
70
66
  if query.from_table:
71
67
  collection = self.get_collection(query.from_table)
72
68
  return collection.query(query, **kwargs)
@@ -81,34 +77,3 @@ class MongoDBDatabase(Database):
81
77
  if collection_name not in self._collections:
82
78
  collection = MongoDBCollection(name=collection_name, parent=self)
83
79
  self._collections[collection_name] = collection
84
-
85
- def induce_schema_view(self) -> SchemaView:
86
- logger.info(f"Inducing schema view for {self.handle}")
87
- sb = SchemaBuilder()
88
- schema = sb.schema
89
-
90
- for collection_name in self.native_db.list_collection_names():
91
- sb.add_class(collection_name)
92
- mongo_collection = self.native_db[collection_name]
93
- sample_doc = mongo_collection.find_one()
94
- if sample_doc:
95
- for field, value in sample_doc.items():
96
- if field == "_id":
97
- continue
98
- sd = SlotDefinition(field)
99
- if isinstance(value, list):
100
- sd.multivalued = True
101
- if isinstance(value, dict):
102
- sd.inlined = True
103
- sb.schema.classes[collection_name].attributes[sd.name] = sd
104
-
105
- sb.add_defaults()
106
- for cls_name in schema.classes:
107
- if cls_name in self.metadata.collections:
108
- collection_metadata = self.metadata.collections[cls_name]
109
- if collection_metadata.attributes:
110
- del schema.classes[cls_name]
111
- cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
112
- schema.classes[cls.name] = cls
113
-
114
- return SchemaView(schema)
@@ -18,7 +18,7 @@ class SolrCollection(Collection):
18
18
  @property
19
19
  def _collection_base(self) -> str:
20
20
  if self.parent.use_cores:
21
- base_url = f"{self.parent.base_url}/{self.name}"
21
+ base_url = f"{self.parent.base_url}/{self.alias}"
22
22
  else:
23
23
  base_url = self.parent.base_url
24
24
  return base_url
@@ -37,7 +37,7 @@ class SolrCollection(Collection):
37
37
  if not qfs:
38
38
  raise ValueError("No searchable slots configured for Solr collection")
39
39
  solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
40
- logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
40
+ logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
41
41
 
42
42
  response = requests.get(f"{self._collection_base}/select", params=solr_query)
43
43
  response.raise_for_status()
@@ -50,7 +50,7 @@ class SolrCollection(Collection):
50
50
 
51
51
  def query(self, query: Query, **kwargs) -> QueryResult:
52
52
  solr_query = self._build_solr_query(query)
53
- logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
53
+ logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
54
54
 
55
55
  response = requests.get(f"{self._collection_base}/select", params=solr_query)
56
56
  response.raise_for_status()
@@ -69,7 +69,7 @@ class SolrCollection(Collection):
69
69
  solr_query["facet.field"] = facet_columns
70
70
  solr_query["facet.limit"] = facet_limit
71
71
 
72
- logger.info(f"Querying Solr collection {self.name} for facets with query: {solr_query}")
72
+ logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
73
73
 
74
74
  response = requests.get(f"{self._collection_base}/select", params=solr_query)
75
75
  response.raise_for_status()