linkml-store 0.2.5__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (28) hide show
  1. linkml_store/api/client.py +9 -6
  2. linkml_store/api/collection.py +118 -5
  3. linkml_store/api/database.py +45 -14
  4. linkml_store/api/stores/duckdb/duckdb_collection.py +176 -8
  5. linkml_store/api/stores/duckdb/duckdb_database.py +52 -19
  6. linkml_store/api/stores/filesystem/__init__.py +1 -1
  7. linkml_store/api/stores/mongodb/mongodb_collection.py +186 -0
  8. linkml_store/api/stores/mongodb/mongodb_database.py +8 -3
  9. linkml_store/api/stores/solr/solr_collection.py +7 -1
  10. linkml_store/cli.py +202 -21
  11. linkml_store/index/implementations/llm_indexer.py +14 -6
  12. linkml_store/index/indexer.py +7 -4
  13. linkml_store/inference/implementations/llm_inference_engine.py +13 -9
  14. linkml_store/inference/implementations/rag_inference_engine.py +13 -10
  15. linkml_store/inference/implementations/sklearn_inference_engine.py +7 -1
  16. linkml_store/inference/inference_config.py +1 -0
  17. linkml_store/utils/dat_parser.py +95 -0
  18. linkml_store/utils/enrichment_analyzer.py +217 -0
  19. linkml_store/utils/format_utils.py +183 -3
  20. linkml_store/utils/llm_utils.py +3 -1
  21. linkml_store/utils/pandas_utils.py +1 -1
  22. linkml_store/utils/sql_utils.py +7 -1
  23. linkml_store/utils/vector_utils.py +4 -11
  24. {linkml_store-0.2.5.dist-info → linkml_store-0.2.9.dist-info}/METADATA +4 -3
  25. {linkml_store-0.2.5.dist-info → linkml_store-0.2.9.dist-info}/RECORD +28 -26
  26. {linkml_store-0.2.5.dist-info → linkml_store-0.2.9.dist-info}/WHEEL +1 -1
  27. {linkml_store-0.2.5.dist-info → linkml_store-0.2.9.dist-info}/LICENSE +0 -0
  28. {linkml_store-0.2.5.dist-info → linkml_store-0.2.9.dist-info}/entry_points.txt +0 -0
@@ -12,9 +12,9 @@ from linkml_store.api.config import ClientConfig
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
-
16
15
  HANDLE_MAP = {
17
16
  "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
17
+ "sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
18
  "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
19
19
  "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
20
20
  "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
@@ -24,6 +24,8 @@ HANDLE_MAP = {
24
24
 
25
25
  SUFFIX_MAP = {
26
26
  "ddb": "duckdb:///{path}",
27
+ "duckdb": "duckdb:///{path}",
28
+ "db": "duckdb:///{path}",
27
29
  }
28
30
 
29
31
 
@@ -204,9 +206,10 @@ class Client:
204
206
  if ":" not in handle:
205
207
  if alias is None:
206
208
  alias = handle
207
- suffix = handle.split(".")[-1]
208
- if suffix in SUFFIX_MAP:
209
- handle = SUFFIX_MAP[suffix].format(path=handle)
209
+ if "." in handle:
210
+ suffix = handle.split(".")[-1]
211
+ if suffix in SUFFIX_MAP:
212
+ handle = SUFFIX_MAP[suffix].format(path=handle)
210
213
  if ":" not in handle:
211
214
  scheme = handle
212
215
  handle = None
@@ -216,14 +219,14 @@ class Client:
216
219
  scheme, _ = handle.split(":", 1)
217
220
  if scheme not in HANDLE_MAP:
218
221
  raise ValueError(f"Unknown scheme: {scheme}")
219
- module_path, class_name = HANDLE_MAP[scheme].rsplit('.', 1)
222
+ module_path, class_name = HANDLE_MAP[scheme].rsplit(".", 1)
220
223
  try:
221
224
  module = importlib.import_module(module_path)
222
225
  cls = getattr(module, class_name)
223
226
  except ImportError as e:
224
227
  raise ImportError(f"Failed to import {scheme} database. Make sure the correct extras are installed: {e}")
225
228
 
226
- #cls = HANDLE_MAP[scheme]
229
+ # cls = HANDLE_MAP[scheme]
227
230
  db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
228
231
  if schema_view:
229
232
  db.set_schema_view(schema_view)
@@ -1,6 +1,7 @@
1
1
  """A structure for representing collections of similar objects."""
2
2
 
3
3
  import hashlib
4
+ import json
4
5
  import logging
5
6
  from collections import defaultdict
6
7
  from pathlib import Path
@@ -210,8 +211,62 @@ class Collection(Generic[DatabaseType]):
210
211
  """
211
212
  raise NotImplementedError
212
213
 
214
+ def index(
215
+ self,
216
+ objs: Union[OBJECT, List[OBJECT]],
217
+ index_name: Optional[str] = None,
218
+ replace: bool = False,
219
+ unique: bool = False,
220
+ **kwargs,
221
+ ) -> None:
222
+ """
223
+ Index objects in the collection.
224
+
225
+ :param objs:
226
+ :param index_name:
227
+ :param replace: replace the index, or not
228
+ :param unique: boolean used to declare the index unique or not
229
+ :param kwargs:
230
+ :return:
231
+ """
232
+ raise NotImplementedError
233
+
234
+ def upsert(
235
+ self,
236
+ objs: Union[OBJECT, List[OBJECT]],
237
+ filter_fields: List[str],
238
+ update_fields: Union[List[str], None] = None,
239
+ **kwargs,
240
+ ):
241
+ """
242
+ Add one or more objects to the collection.
243
+
244
+ >>> from linkml_store import Client
245
+ >>> client = Client()
246
+ >>> db = client.attach_database("mongodb", alias="test")
247
+ >>> collection = db.create_collection("Person")
248
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
249
+ >>> collection.upsert(objs)
250
+
251
+ :param objs:
252
+ :param filter_fields: List of field names to use as the filter for matching existing collections.
253
+ :param update_fields: List of field names to include in the update. If None, all fields are updated.
254
+ :param kwargs:
255
+
256
+ :return:
257
+ """
258
+ raise NotImplementedError
259
+
213
260
  def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
214
- logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
261
+ """
262
+ Pre-query hook.
263
+
264
+ This is called before a query is executed. It is used to materialize derivations and indexes.
265
+ :param query:
266
+ :param kwargs:
267
+ :return:
268
+ """
269
+ logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
215
270
  if not self._initialized:
216
271
  self._materialize_derivations()
217
272
  self._initialized = True
@@ -402,7 +457,12 @@ class Collection(Generic[DatabaseType]):
402
457
  return qr.rows[0]
403
458
  return None
404
459
 
405
- def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
460
+ def find(
461
+ self,
462
+ where: Optional[Any] = None,
463
+ select_cols: Optional[List[str]] = None,
464
+ **kwargs,
465
+ ) -> QueryResult:
406
466
  """
407
467
  Find objects in the collection using a where query.
408
468
 
@@ -432,10 +492,14 @@ class Collection(Generic[DatabaseType]):
432
492
 
433
493
 
434
494
  :param where:
495
+ :param select_cols:
435
496
  :param kwargs:
436
497
  :return:
437
498
  """
438
- query = self._create_query(where_clause=where)
499
+ query = self._create_query(
500
+ where_clause=where,
501
+ select_cols=select_cols,
502
+ )
439
503
  self._pre_query_hook(query)
440
504
  return self.query(query, **kwargs)
441
505
 
@@ -535,8 +599,16 @@ class Collection(Generic[DatabaseType]):
535
599
  assert ix_coll.size() > 0
536
600
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
537
601
  index_col = ix.index_field
602
+
538
603
  # TODO: optimize this for large indexes
539
- vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
604
+ def row2array(row):
605
+ v = row[index_col]
606
+ if isinstance(v, str):
607
+ # sqlite stores arrays as strings
608
+ v = json.loads(v)
609
+ return np.array(v, dtype=float)
610
+
611
+ vector_pairs = [(row, row2array(row)) for row in qr.rows]
540
612
  results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
541
613
  for r in results:
542
614
  del r[1][index_col]
@@ -550,6 +622,47 @@ class Collection(Generic[DatabaseType]):
550
622
  new_qr.rows = [r[1] for r in results]
551
623
  return new_qr
552
624
 
625
+ def group_by(
626
+ self,
627
+ group_by_fields: List[str],
628
+ inlined_field="objects",
629
+ agg_map: Optional[Dict[str, str]] = None,
630
+ where: Optional[Dict] = None,
631
+ **kwargs,
632
+ ) -> QueryResult:
633
+ """
634
+ Group objects in the collection by a column.
635
+
636
+ :param group_by:
637
+ :param where:
638
+ :param kwargs:
639
+ :return:
640
+ """
641
+ if isinstance(group_by_fields, str):
642
+ group_by_fields = [group_by_fields]
643
+ df = self.find(where=where, limit=-1).rows_dataframe
644
+
645
+ # Handle the case where agg_map is None
646
+ if agg_map is None:
647
+ agg_map = {}
648
+
649
+ pk_fields = agg_map.get("first", []) + group_by_fields
650
+ list_fields = agg_map.get("list", [])
651
+ if not list_fields:
652
+ list_fields = [a for a in df.columns if a not in pk_fields]
653
+
654
+ grouped_objs = defaultdict(list)
655
+ for _, row in df.iterrows():
656
+ pk = tuple(row[pk_fields])
657
+ grouped_objs[pk].append({k: row[k] for k in list_fields})
658
+ results = []
659
+ for pk, objs in grouped_objs.items():
660
+ top_obj = {k: v for k, v in zip(pk_fields, pk)}
661
+ top_obj[inlined_field] = objs
662
+ results.append(top_obj)
663
+ r = QueryResult(num_rows=len(results), rows=results)
664
+ return r
665
+
553
666
  @property
554
667
  def is_internal(self) -> bool:
555
668
  """
@@ -1004,7 +1117,7 @@ class Collection(Generic[DatabaseType]):
1004
1117
  multivalued = any(multivalueds)
1005
1118
  inlined = any(inlineds)
1006
1119
  if multivalued and False in multivalueds:
1007
- raise ValueError(f"Mixed list non list: {vs} // inferred= {multivalueds}")
1120
+ logger.info(f"Mixed list non list: {vs} // inferred= {multivalueds}")
1008
1121
  # if not rngs:
1009
1122
  # raise AssertionError(f"Empty rngs for {k} = {vs}")
1010
1123
  rng = rngs[0] if rngs else None
@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
276
276
 
277
277
  Examples:
278
278
 
279
- >>> from linkml_store.api.client import Client
280
- >>> client = Client()
281
- >>> db = client.attach_database("duckdb", alias="test")
282
- >>> collection = db.create_collection("Person", alias="persons")
283
- >>> collection.alias
284
- 'persons'
285
- >>> collection.target_class_name
286
- 'Person'
279
+ >>> from linkml_store.api.client import Client
280
+ >>> client = Client()
281
+ >>> db = client.attach_database("duckdb", alias="test")
282
+ >>> collection = db.create_collection("Person", alias="persons")
283
+ >>> collection.alias
284
+ 'persons'
285
+
286
+ >>> collection.target_class_name
287
+ 'Person'
287
288
 
288
289
  If alias is not provided, it defaults to the name of the type.
289
290
 
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
419
420
  >>> from linkml_store.api.client import Client
420
421
  >>> from linkml_store.api.queries import Query
421
422
  >>> client = Client()
422
- >>> db = client.attach_database("duckdb", alias="test")
423
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
423
424
  >>> collection = db.create_collection("Person")
424
425
  >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
425
426
  >>> query = Query(from_table="Person", where_clause={"name": "John"})
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
451
452
 
452
453
  >>> from linkml_store.api.client import Client
453
454
  >>> client = Client()
454
- >>> db = client.attach_database("duckdb", alias="test")
455
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
455
456
  >>> collection = db.create_collection("Person", alias="persons")
456
457
  >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
457
458
  >>> schema_view = db.schema_view
@@ -594,7 +595,31 @@ class Database(ABC, Generic[CollectionType]):
594
595
  sb.add_class(coll.target_class_name)
595
596
  return SchemaView(sb.schema)
596
597
 
597
- def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
598
+ def validate_database(self, **kwargs) -> List["ValidationResult"]:
599
+ """
600
+ Validate the contents of the database.
601
+
602
+ As `iter_validate_database`, but returns a list of validation results.
603
+
604
+ :param kwargs:
605
+ :return:
606
+ """
607
+ return list(self.iter_validate_database(**kwargs))
608
+
609
+ def validate_database(self, **kwargs) -> List["ValidationResult"]:
610
+ """
611
+ Validate the contents of the database.
612
+
613
+ As `iter_validate_database`, but returns a list of validation results.
614
+
615
+ :param kwargs:
616
+ :return:
617
+ """
618
+ return list(self.iter_validate_database(**kwargs))
619
+
620
+ def iter_validate_database(
621
+ self, ensure_referential_integrity: bool = None, **kwargs
622
+ ) -> Iterator["ValidationResult"]:
598
623
  """
599
624
  Validate the contents of the database.
600
625
 
@@ -634,12 +659,14 @@ class Database(ABC, Generic[CollectionType]):
634
659
  'capital' is a required property
635
660
  'continent' is a required proper
636
661
 
662
+ :param ensure_referential_integrity: ensure referential integrity
637
663
  :param kwargs:
638
664
  :return: iterator over validation results
639
665
  """
640
666
  for collection in self.list_collections():
641
667
  yield from collection.iter_validate_collection(**kwargs)
642
- if self.metadata.ensure_referential_integrity:
668
+ if self.metadata.ensure_referential_integrity or ensure_referential_integrity:
669
+ logger.info(f"Validating referential integrity on {self.alias}")
643
670
  yield from self._validate_referential_integrity(**kwargs)
644
671
 
645
672
  def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
@@ -660,7 +687,9 @@ class Database(ABC, Generic[CollectionType]):
660
687
  induced_slots = sv.class_induced_slots(cd.name)
661
688
  slot_map = {s.name: s for s in induced_slots}
662
689
  # rmap = {s.name: s.range for s in induced_slots}
690
+ # map slot ranges to a collection where that range is stored
663
691
  sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
692
+ logger.debug(f"Validating referential integrity for {collection.target_class_name} // {sr_to_coll}")
664
693
  for obj in collection.find_iter():
665
694
  for k, v in obj.items():
666
695
  if k not in sr_to_coll:
@@ -721,7 +750,7 @@ class Database(ABC, Generic[CollectionType]):
721
750
 
722
751
  >>> from linkml_store.api.client import Client
723
752
  >>> client = Client()
724
- >>> db = client.attach_database("duckdb", alias="test")
753
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
725
754
  >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
726
755
  >>> db.list_collection_names()
727
756
  ['iris']
@@ -741,7 +770,9 @@ class Database(ABC, Generic[CollectionType]):
741
770
  # import into a test instance
742
771
  tmp_handle = source_format.value
743
772
  client = self.parent
744
- tmp_db = client.attach_database(tmp_handle, alias="tmp")
773
+ tmp_alias = "tmp"
774
+ client.drop_database(tmp_alias, missing_ok=True)
775
+ tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
745
776
  # TODO: check for infinite recursion
746
777
  tmp_db.import_database(location, source_format=source_format)
747
778
  obj = {}
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, Dict, List, Optional, Union
2
+ from typing import Any, Dict, List, Optional, Union, Tuple
3
3
 
4
4
  import sqlalchemy as sqla
5
5
  from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
@@ -8,7 +8,7 @@ from sqlalchemy.sql.ddl import CreateTable
8
8
 
9
9
  from linkml_store.api import Collection
10
10
  from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
11
- from linkml_store.api.queries import Query
11
+ from linkml_store.api.queries import Query, QueryResult
12
12
  from linkml_store.api.stores.duckdb.mappings import TMAP
13
13
  from linkml_store.utils.sql_utils import facet_count_sql
14
14
 
@@ -94,7 +94,9 @@ class DuckDBCollection(Collection):
94
94
 
95
95
  def query_facets(
96
96
  self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
97
- ) -> Dict[str, Dict[str, int]]:
97
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
98
+ if facet_limit is None:
99
+ facet_limit = DEFAULT_FACET_LIMIT
98
100
  results = {}
99
101
  cd = self.class_definition()
100
102
  with self.parent.engine.connect() as conn:
@@ -143,20 +145,186 @@ class DuckDBCollection(Collection):
143
145
  return True
144
146
  return False
145
147
 
148
+ def group_by(
149
+ self,
150
+ group_by_fields: List[str],
151
+ inlined_field="objects",
152
+ agg_map: Optional[Dict[str, str]] = None,
153
+ where: Optional[Dict] = None,
154
+ **kwargs,
155
+ ) -> QueryResult:
156
+ """
157
+ Group objects in the collection by specified fields using SQLAlchemy.
158
+
159
+ This implementation leverages DuckDB's SQL capabilities for more efficient grouping.
160
+
161
+ :param group_by_fields: List of fields to group by
162
+ :param inlined_field: Field name to store aggregated objects
163
+ :param agg_map: Dictionary mapping aggregation types to fields
164
+ :param where: Filter conditions
165
+ :param kwargs: Additional arguments
166
+ :return: Query result containing grouped data
167
+ """
168
+ if isinstance(group_by_fields, str):
169
+ group_by_fields = [group_by_fields]
170
+
171
+ cd = self.class_definition()
172
+ if not cd:
173
+ logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}")
174
+ return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
175
+
176
+ # Check if the table exists
177
+ if not self.parent._table_exists(self.alias):
178
+ logger.debug(f"Table {self.alias} doesn't exist, falling back to parent implementation")
179
+ return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
180
+
181
+ # Get table definition
182
+ table = self._sqla_table(cd)
183
+ engine = self.parent.engine
184
+
185
+ # Create a SQLAlchemy select statement for groups
186
+ from sqlalchemy import select, func, and_, or_
187
+ group_cols = [table.c[field] for field in group_by_fields if field in table.columns.keys()]
188
+
189
+ if not group_cols:
190
+ logger.warning(f"None of the group_by fields {group_by_fields} found in table columns")
191
+ return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
192
+
193
+ stmt = select(*group_cols).distinct()
194
+
195
+ # Add where conditions if specified
196
+ if where:
197
+ conditions = []
198
+ for k, v in where.items():
199
+ if k in table.columns.keys():
200
+ # Handle different operator types (dict values for operators)
201
+ if isinstance(v, dict):
202
+ for op, val in v.items():
203
+ if op == "$gt":
204
+ conditions.append(table.c[k] > val)
205
+ elif op == "$gte":
206
+ conditions.append(table.c[k] >= val)
207
+ elif op == "$lt":
208
+ conditions.append(table.c[k] < val)
209
+ elif op == "$lte":
210
+ conditions.append(table.c[k] <= val)
211
+ elif op == "$ne":
212
+ conditions.append(table.c[k] != val)
213
+ elif op == "$in":
214
+ conditions.append(table.c[k].in_(val))
215
+ else:
216
+ # Default to equality for unknown operators
217
+ logger.warning(f"Unknown operator {op}, using equality")
218
+ conditions.append(table.c[k] == val)
219
+ else:
220
+ # Direct equality comparison
221
+ conditions.append(table.c[k] == v)
222
+
223
+ if conditions:
224
+ for condition in conditions:
225
+ stmt = stmt.where(condition)
226
+
227
+ results = []
228
+ try:
229
+ with engine.connect() as conn:
230
+ # Get all distinct groups
231
+ group_result = conn.execute(stmt)
232
+ group_rows = list(group_result)
233
+
234
+ # For each group, get all objects
235
+ for group_row in group_rows:
236
+ # Build conditions for this group
237
+ group_conditions = []
238
+ group_dict = {}
239
+
240
+ for i, field in enumerate(group_by_fields):
241
+ if field in table.columns.keys():
242
+ value = group_row[i]
243
+ group_dict[field] = value
244
+ if value is None:
245
+ group_conditions.append(table.c[field].is_(None))
246
+ else:
247
+ group_conditions.append(table.c[field] == value)
248
+
249
+ # Get all rows for this group
250
+ row_stmt = select(*table.columns)
251
+ for condition in group_conditions:
252
+ row_stmt = row_stmt.where(condition)
253
+
254
+ # Add original where conditions
255
+ if where:
256
+ for k, v in where.items():
257
+ if k in table.columns.keys():
258
+ # Handle different operator types for the row query as well
259
+ if isinstance(v, dict):
260
+ for op, val in v.items():
261
+ if op == "$gt":
262
+ row_stmt = row_stmt.where(table.c[k] > val)
263
+ elif op == "$gte":
264
+ row_stmt = row_stmt.where(table.c[k] >= val)
265
+ elif op == "$lt":
266
+ row_stmt = row_stmt.where(table.c[k] < val)
267
+ elif op == "$lte":
268
+ row_stmt = row_stmt.where(table.c[k] <= val)
269
+ elif op == "$ne":
270
+ row_stmt = row_stmt.where(table.c[k] != val)
271
+ elif op == "$in":
272
+ row_stmt = row_stmt.where(table.c[k].in_(val))
273
+ else:
274
+ # Default to equality for unknown operators
275
+ row_stmt = row_stmt.where(table.c[k] == val)
276
+ else:
277
+ # Direct equality comparison
278
+ row_stmt = row_stmt.where(table.c[k] == v)
279
+
280
+ row_result = conn.execute(row_stmt)
281
+ rows = list(row_result)
282
+
283
+ # Convert rows to dictionaries
284
+ objects = []
285
+ for row in rows:
286
+ obj = {}
287
+ for i, col in enumerate(row._fields):
288
+ obj[col] = row[i]
289
+ objects.append(obj)
290
+
291
+ # Apply agg_map to filter fields if specified
292
+ if agg_map and "list" in agg_map:
293
+ list_fields = agg_map["list"]
294
+ if list_fields:
295
+ objects = [{k: obj.get(k) for k in list_fields if k in obj} for obj in objects]
296
+
297
+ # Create the result object
298
+ result_obj = group_dict.copy()
299
+ result_obj[inlined_field] = objects
300
+ results.append(result_obj)
301
+
302
+ return QueryResult(num_rows=len(results), rows=results)
303
+ except Exception as e:
304
+ logger.warning(f"Error in DuckDB group_by: {e}")
305
+ # Fall back to parent implementation
306
+ return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
307
+
146
308
  def _create_table(self, cd: ClassDefinition):
147
309
  if self._table_created or self.metadata.is_prepopulated:
148
310
  logger.info(f"Already have table for: {cd.name}")
149
311
  return
150
- query = Query(
151
- from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
152
- )
153
- qr = self.parent.query(query)
154
- if qr.num_rows > 0:
312
+ if self.parent._table_exists(self.alias):
155
313
  logger.info(f"Table already exists for {cd.name}")
156
314
  self._table_created = True
157
315
  self._initialized = True
158
316
  self.metadata.is_prepopulated = True
159
317
  return
318
+ # query = Query(
319
+ # from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
320
+ # )
321
+ # qr = self.parent.query(query)
322
+ # if qr.num_rows > 0:
323
+ # logger.info(f"Table already exists for {cd.name}")
324
+ # self._table_created = True
325
+ # self._initialized = True
326
+ # self.metadata.is_prepopulated = True
327
+ # return
160
328
  logger.info(f"Creating table for {cd.name}")
161
329
  t = self._sqla_table(cd)
162
330
  ct = CreateTable(t)