linkml-store 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (35) hide show
  1. linkml_store/api/client.py +15 -4
  2. linkml_store/api/collection.py +185 -15
  3. linkml_store/api/config.py +11 -3
  4. linkml_store/api/database.py +36 -5
  5. linkml_store/api/stores/duckdb/duckdb_collection.py +6 -3
  6. linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
  7. linkml_store/api/stores/filesystem/__init__.py +7 -8
  8. linkml_store/api/stores/filesystem/filesystem_collection.py +150 -113
  9. linkml_store/api/stores/filesystem/filesystem_database.py +57 -21
  10. linkml_store/api/stores/mongodb/mongodb_collection.py +82 -34
  11. linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
  12. linkml_store/api/types.py +4 -0
  13. linkml_store/cli.py +97 -8
  14. linkml_store/index/__init__.py +5 -3
  15. linkml_store/index/indexer.py +7 -2
  16. linkml_store/utils/change_utils.py +17 -0
  17. linkml_store/utils/format_utils.py +89 -8
  18. linkml_store/utils/patch_utils.py +126 -0
  19. linkml_store/utils/query_utils.py +89 -0
  20. linkml_store/utils/schema_utils.py +23 -0
  21. linkml_store/webapi/__init__.py +0 -0
  22. linkml_store/webapi/html/__init__.py +3 -0
  23. linkml_store/webapi/html/base.html.j2 +24 -0
  24. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  25. linkml_store/webapi/html/database_details.html.j2 +16 -0
  26. linkml_store/webapi/html/databases.html.j2 +14 -0
  27. linkml_store/webapi/html/generic.html.j2 +46 -0
  28. linkml_store/webapi/main.py +572 -0
  29. linkml_store-0.1.10.dist-info/METADATA +138 -0
  30. linkml_store-0.1.10.dist-info/RECORD +58 -0
  31. {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/entry_points.txt +1 -0
  32. linkml_store-0.1.8.dist-info/METADATA +0 -58
  33. linkml_store-0.1.8.dist-info/RECORD +0 -45
  34. {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/LICENSE +0 -0
  35. {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/WHEEL +0 -0
@@ -9,6 +9,7 @@ from linkml_store.api import Database
9
9
  from linkml_store.api.config import ClientConfig
10
10
  from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
11
11
  from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
12
+ from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
12
13
  from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
13
14
  from linkml_store.api.stores.solr.solr_database import SolrDatabase
14
15
 
@@ -20,6 +21,7 @@ HANDLE_MAP = {
20
21
  "solr": SolrDatabase,
21
22
  "mongodb": MongoDBDatabase,
22
23
  "chromadb": ChromaDBDatabase,
24
+ "file": FileSystemDatabase,
23
25
  }
24
26
 
25
27
 
@@ -96,7 +98,7 @@ class Client:
96
98
  """
97
99
  return self.metadata.base_dir
98
100
 
99
- def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
101
+ def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
100
102
  """
101
103
  Create a client from a configuration.
102
104
 
@@ -116,11 +118,13 @@ class Client:
116
118
  :return:
117
119
 
118
120
  """
121
+ if isinstance(config, dict):
122
+ config = ClientConfig(**config)
119
123
  if isinstance(config, Path):
120
124
  config = str(config)
121
125
  if isinstance(config, str):
122
- if not base_dir:
123
- base_dir = Path(config).parent
126
+ # if not base_dir:
127
+ # base_dir = Path(config).parent
124
128
  parsed_obj = yaml.safe_load(open(config))
125
129
  config = ClientConfig(**parsed_obj)
126
130
  self.metadata = config
@@ -131,8 +135,15 @@ class Client:
131
135
 
132
136
  def _initialize_databases(self, **kwargs):
133
137
  for name, db_config in self.metadata.databases.items():
134
- handle = db_config.handle.format(base_dir=self.base_dir)
138
+ base_dir = self.base_dir
139
+ logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
140
+ if not base_dir:
141
+ base_dir = Path.cwd()
142
+ logger.info(f"Using current working directory: {base_dir}")
143
+ handle = db_config.handle.format(base_dir=base_dir)
135
144
  db_config.handle = handle
145
+ if db_config.schema_location:
146
+ db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
136
147
  db = self.attach_database(handle, alias=name, **kwargs)
137
148
  db.from_config(db_config)
138
149
 
@@ -4,16 +4,19 @@ import hashlib
4
4
  import logging
5
5
  from collections import defaultdict
6
6
  from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
7
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
8
8
 
9
9
  import numpy as np
10
+ from linkml_runtime import SchemaView
10
11
  from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
11
12
  from linkml_runtime.linkml_model.meta import ArrayExpression
12
13
  from pydantic import BaseModel
13
14
 
15
+ from linkml_store.api.types import DatabaseType
14
16
  from linkml_store.index import get_indexer
15
17
  from linkml_store.utils.format_utils import load_objects
16
18
  from linkml_store.utils.object_utils import clean_empties
19
+ from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
17
20
 
18
21
  try:
19
22
  from linkml.validator.report import ValidationResult
@@ -36,7 +39,7 @@ IDENTIFIER = str
36
39
  FIELD_NAME = str
37
40
 
38
41
 
39
- class Collection:
42
+ class Collection(Generic[DatabaseType]):
40
43
  """
41
44
  A collection is an organized set of objects of the same or similar type.
42
45
 
@@ -56,11 +59,12 @@ class Collection:
56
59
  """
57
60
 
58
61
  # name: str
59
- parent: Optional["Database"] = None
62
+ parent: Optional[DatabaseType] = None
60
63
  _indexers: Optional[Dict[str, Indexer]] = None
61
64
  # hidden: Optional[bool] = False
62
65
 
63
66
  metadata: Optional[CollectionConfig] = None
67
+ default_index_name: ClassVar[str] = "simple"
64
68
 
65
69
  def __init__(
66
70
  self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
@@ -197,6 +201,10 @@ class Collection:
197
201
  """
198
202
  raise NotImplementedError
199
203
 
204
+ def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
205
+ patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
206
+ self._broadcast(patches, **kwargs)
207
+
200
208
  def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
201
209
  """
202
210
  Delete one or more objects from the collection.
@@ -301,7 +309,7 @@ class Collection:
301
309
 
302
310
  def query_facets(
303
311
  self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
304
- ) -> Dict[str, Dict[str, int]]:
312
+ ) -> Dict[str, List[Tuple[Any, int]]]:
305
313
  """
306
314
  Run a query to get facet counts for one or more columns.
307
315
 
@@ -319,7 +327,7 @@ class Collection:
319
327
  :param query: A Query object representing the base query.
320
328
  :param facet_columns: A list of column names to get facet counts for.
321
329
  :param facet_limit:
322
- :return: A dictionary where keys are column names and values are pandas DataFrames
330
+ :return: A dictionary where keys are column names and values are tuples
323
331
  containing the facet counts for each unique value in the respective column.
324
332
  """
325
333
  raise NotImplementedError
@@ -414,7 +422,30 @@ class Collection:
414
422
  **kwargs,
415
423
  ) -> QueryResult:
416
424
  """
417
- Search the collection using a full-text search index.
425
+ Search the collection using a text-based index index.
426
+
427
+ Example:
428
+
429
+ >>> from linkml_store import Client
430
+ >>> from linkml_store.utils.format_utils import load_objects
431
+ >>> client = Client()
432
+ >>> db = client.attach_database("duckdb")
433
+ >>> collection = db.create_collection("Country")
434
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
435
+ >>> collection.insert(objs)
436
+
437
+ Now let's index, using the simple trigram-based index
438
+
439
+ >>> index = get_indexer("simple")
440
+ >>> collection.attach_indexer(index)
441
+
442
+ Now let's find all objects:
443
+
444
+ >>> qr = collection.search("France")
445
+ >>> score, top_obj = qr.ranked_rows[0]
446
+ >>> assert score > 0.1
447
+ >>> top_obj["code"]
448
+ 'FR'
418
449
 
419
450
  :param query:
420
451
  :param where:
@@ -424,12 +455,18 @@ class Collection:
424
455
  :return:
425
456
  """
426
457
  if index_name is None:
427
- if len(self._indexers) == 1:
428
- index_name = list(self._indexers.keys())[0]
458
+ if len(self.indexers) == 1:
459
+ index_name = list(self.indexers.keys())[0]
429
460
  else:
430
- raise ValueError("Multiple indexes found. Please specify an index name.")
461
+ logger.warning("Multiple indexes found. Using default index.")
462
+ index_name = self.default_index_name
431
463
  ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
432
- ix = self._indexers.get(index_name)
464
+ if index_name not in self.indexers:
465
+ ix = get_indexer(index_name)
466
+ if not self._indexers:
467
+ self._indexers = {}
468
+ self._indexers[index_name] = ix
469
+ ix = self.indexers.get(index_name)
433
470
  if not ix:
434
471
  raise ValueError(f"No index named {index_name}")
435
472
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
@@ -446,7 +483,10 @@ class Collection:
446
483
  @property
447
484
  def is_internal(self) -> bool:
448
485
  """
449
- Check if the collection is internal
486
+ Check if the collection is internal.
487
+
488
+ Internal collections are hidden by default. Examples of internal collections
489
+ include shadow "index" collections
450
490
 
451
491
  :return:
452
492
  """
@@ -462,6 +502,45 @@ class Collection:
462
502
  """
463
503
  Attach an index to the collection.
464
504
 
505
+ As an example, first let's create a collection in a database:
506
+
507
+ >>> from linkml_store import Client
508
+ >>> from linkml_store.utils.format_utils import load_objects
509
+ >>> client = Client()
510
+ >>> db = client.attach_database("duckdb")
511
+ >>> collection = db.create_collection("Country")
512
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
513
+ >>> collection.insert(objs)
514
+
515
+ We will create two indexes - one that indexes the whole object
516
+ (default behavior), the other one indexes the name only
517
+
518
+ >>> full_index = get_indexer("simple")
519
+ >>> full_index.name = "full"
520
+ >>> name_index = get_indexer("simple", text_template="{name}")
521
+ >>> name_index.name = "name"
522
+ >>> collection.attach_indexer(full_index)
523
+ >>> collection.attach_indexer(name_index)
524
+
525
+ Now let's find objects using the full index, using the string "France".
526
+ We expect the country France to be the top hit, but the score will
527
+ be less than zero because we did not match all fields in the object.
528
+
529
+ >>> qr = collection.search("France", index_name="full")
530
+ >>> score, top_obj = qr.ranked_rows[0]
531
+ >>> assert score > 0.1
532
+ >>> assert score < 0.5
533
+ >>> top_obj["code"]
534
+ 'FR'
535
+
536
+ Now using the name index
537
+
538
+ >>> qr = collection.search("France", index_name="name")
539
+ >>> score, top_obj = qr.ranked_rows[0]
540
+ >>> assert score > 0.99
541
+ >>> top_obj["code"]
542
+ 'FR'
543
+
465
544
  :param index:
466
545
  :param name:
467
546
  :param auto_index: Automatically index all objects in the collection
@@ -497,15 +576,18 @@ class Collection:
497
576
 
498
577
  def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
499
578
  """
500
- Index a list of objects
579
+ Index a list of objects using a specified index.
580
+
581
+ By default, the indexed objects will be stored in a shadow
582
+ collection in the same database, with additional fields for the index vector
501
583
 
502
584
  :param objs:
503
- :param index_name:
585
+ :param index_name: e.g. simple, llm
504
586
  :param replace:
505
587
  :param kwargs:
506
588
  :return:
507
589
  """
508
- ix = self._indexers.get(index_name)
590
+ ix = self._indexers.get(index_name, None)
509
591
  if not ix:
510
592
  raise ValueError(f"No index named {index_name}")
511
593
  ix_coll_name = self._index_collection_name(index_name)
@@ -523,6 +605,7 @@ class Collection:
523
605
  ix_coll.delete_where()
524
606
 
525
607
  ix_coll.insert(objects_with_ix, **kwargs)
608
+ ix_coll.commit()
526
609
 
527
610
  def list_index_names(self) -> List[str]:
528
611
  """
@@ -557,12 +640,22 @@ class Collection:
557
640
 
558
641
  :return:
559
642
  """
560
- sv = self.parent.schema_view
643
+ sv: SchemaView = self.parent.schema_view
561
644
  if sv:
562
645
  cls = sv.get_class(self.target_class_name)
646
+ if cls and not cls.attributes:
647
+ if not sv.class_induced_slots(cls.name):
648
+ for att in self._induce_attributes():
649
+ cls.attributes[att.name] = att
650
+ sv.set_modified()
563
651
  return cls
564
652
  return None
565
653
 
654
+ def _induce_attributes(self) -> List[SlotDefinition]:
655
+ result = self.find({}, limit=-1)
656
+ cd = self.induce_class_definition_from_objects(result.rows, max_sample_size=None)
657
+ return list(cd.attributes.values())
658
+
566
659
  @property
567
660
  def identifier_attribute_name(self) -> Optional[str]:
568
661
  """
@@ -579,6 +672,37 @@ class Collection:
579
672
  return att.name
580
673
  return None
581
674
 
675
+ def set_identifier_attribute_name(self, name: str):
676
+ """
677
+ Set the name of the identifier attribute for the collection.
678
+
679
+ AKA the primary key.
680
+
681
+ :param name: The name of the identifier attribute.
682
+ """
683
+ cd = self.class_definition()
684
+ if not cd:
685
+ raise ValueError(f"Cannot find class definition for {self.target_class_name}")
686
+ id_att = None
687
+ candidates = []
688
+ sv: SchemaView = self.parent.schema_view
689
+ cls = sv.get_class(cd.name)
690
+ existing_id_slot = sv.get_identifier_slot(cls.name)
691
+ if existing_id_slot:
692
+ if existing_id_slot.name == name:
693
+ return
694
+ existing_id_slot.identifier = False
695
+ for att in cls.attributes.values():
696
+ candidates.append(att.name)
697
+ if att.name == name:
698
+ att.identifier = True
699
+ id_att = att
700
+ else:
701
+ att.identifier = False
702
+ if not id_att:
703
+ raise ValueError(f"No attribute found with name {name} in {candidates}")
704
+ sv.set_modified()
705
+
582
706
  def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
583
707
  """
584
708
  Return the identifier for an object.
@@ -622,6 +746,8 @@ class Collection:
622
746
  for k, v in obj.items():
623
747
  keys[k].append(v)
624
748
  for k, vs in keys.items():
749
+ if k == "_id":
750
+ continue
625
751
  multivalueds = []
626
752
  inlineds = []
627
753
  rngs = []
@@ -698,6 +824,39 @@ class Collection:
698
824
  """
699
825
  raise NotImplementedError
700
826
 
827
+ def apply_patches(self, patches: List[PatchDict], **kwargs):
828
+ """
829
+ Apply a patch to the collection.
830
+
831
+ Patches conform to the JSON Patch format,
832
+
833
+ :param patches:
834
+ :param kwargs:
835
+ :return:
836
+ """
837
+ all_objs = self.find(limit=-1).rows
838
+ primary_key = self.identifier_attribute_name
839
+ if not primary_key:
840
+ raise ValueError(f"No primary key for {self.target_class_name}")
841
+ new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
842
+ self.replace(new_objs)
843
+
844
+ def diff(self, other: "Collection", **kwargs):
845
+ """
846
+ Diff two collections.
847
+
848
+ :param other:
849
+ :param kwargs:
850
+ :return:
851
+ """
852
+ src_objs = self.find(limit=-1).rows
853
+ tgt_objs = other.find(limit=-1).rows
854
+ primary_key = self.identifier_attribute_name
855
+ if not primary_key:
856
+ raise ValueError(f"No primary key for {self.target_class_name}")
857
+ patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
858
+ return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
859
+
701
860
  def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
702
861
  """
703
862
  Validate the contents of the collection
@@ -717,3 +876,14 @@ class Collection:
717
876
  for obj in result.rows:
718
877
  obj = clean_empties(obj)
719
878
  yield from validator.iter_results(obj, class_name)
879
+
880
+ def commit(self):
881
+ """
882
+ Commit changes to the collection.
883
+
884
+ :return:
885
+ """
886
+ pass
887
+
888
+ def _broadcast(self, *args, **kwargs):
889
+ self.parent.broadcast(self, *args, **kwargs)
@@ -3,7 +3,11 @@ from typing import Any, Dict, List, Optional
3
3
  from pydantic import BaseModel, Field
4
4
 
5
5
 
6
- class CollectionConfig(BaseModel):
6
+ class ConfiguredBaseModel(BaseModel, extra="forbid"):
7
+ pass
8
+
9
+
10
+ class CollectionConfig(ConfiguredBaseModel):
7
11
  name: Optional[str] = Field(
8
12
  default=None,
9
13
  description="An optional name for the collection",
@@ -42,7 +46,7 @@ class CollectionConfig(BaseModel):
42
46
  )
43
47
 
44
48
 
45
- class DatabaseConfig(BaseModel):
49
+ class DatabaseConfig(ConfiguredBaseModel):
46
50
  handle: str = Field(
47
51
  default="duckdb:///:memory:",
48
52
  description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
@@ -86,7 +90,7 @@ class DatabaseConfig(BaseModel):
86
90
  )
87
91
 
88
92
 
89
- class ClientConfig(BaseModel):
93
+ class ClientConfig(ConfiguredBaseModel):
90
94
  handle: Optional[str] = Field(
91
95
  default=None,
92
96
  description="The client handle",
@@ -95,6 +99,10 @@ class ClientConfig(BaseModel):
95
99
  default={},
96
100
  description="A dictionary of database configurations",
97
101
  )
102
+ default_database: Optional[str] = Field(
103
+ default=None,
104
+ description="The default database",
105
+ )
98
106
  schema_path: Optional[str] = Field(
99
107
  default=None,
100
108
  description="The path to the LinkML schema file",
@@ -3,9 +3,24 @@ from abc import ABC
3
3
  from collections import defaultdict
4
4
  from copy import copy
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterator, Optional, Sequence, Type, Union
7
-
6
+ from typing import (
7
+ TYPE_CHECKING,
8
+ Any,
9
+ Callable,
10
+ ClassVar,
11
+ Dict,
12
+ Generic,
13
+ Iterator,
14
+ List,
15
+ Optional,
16
+ Sequence,
17
+ Type,
18
+ Union,
19
+ )
20
+
21
+ from linkml_store.api.types import CollectionType
8
22
  from linkml_store.utils.format_utils import load_objects, render_output
23
+ from linkml_store.utils.patch_utils import PatchDict
9
24
 
10
25
  try:
11
26
  from linkml.validator.report import Severity, ValidationResult
@@ -24,8 +39,10 @@ if TYPE_CHECKING:
24
39
 
25
40
  logger = logging.getLogger(__name__)
26
41
 
42
+ LISTENER = Callable[[Collection, List[PatchDict]], None]
43
+
27
44
 
28
- class Database(ABC):
45
+ class Database(ABC, Generic[CollectionType]):
29
46
  """
30
47
  A Database provides access to named collections of data.
31
48
 
@@ -89,6 +106,8 @@ class Database(ABC):
89
106
  metadata: Optional[DatabaseConfig] = None
90
107
  collection_class: ClassVar[Optional[Type[Collection]]] = None
91
108
 
109
+ listeners: Optional[List[LISTENER]] = None
110
+
92
111
  def __init__(self, handle: Optional[str] = None, metadata: Optional[DatabaseConfig] = None, **kwargs):
93
112
  if metadata:
94
113
  self.metadata = metadata
@@ -233,7 +252,8 @@ class Database(ABC):
233
252
  :param kwargs:
234
253
  :return:
235
254
  """
236
- raise NotImplementedError()
255
+ for coll in self.list_collections():
256
+ coll.commit()
237
257
 
238
258
  def close(self, **kwargs):
239
259
  """
@@ -301,6 +321,7 @@ class Database(ABC):
301
321
  alias = name
302
322
  self._collections[alias] = collection
303
323
  if recreate_if_exists:
324
+ logger.debug(f"Recreating collection {collection.name}")
304
325
  collection.delete_where({}, missing_ok=True)
305
326
  return collection
306
327
 
@@ -418,7 +439,11 @@ class Database(ABC):
418
439
  :return:
419
440
 
420
441
  """
421
- raise NotImplementedError
442
+ if query.from_table:
443
+ collection = self.get_collection(query.from_table)
444
+ return collection.query(query, **kwargs)
445
+ else:
446
+ raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
422
447
 
423
448
  @property
424
449
  def schema_view(self) -> SchemaView:
@@ -689,3 +714,9 @@ class Database(ABC):
689
714
  logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
690
715
  with open(location, "w", encoding="utf-8") as stream:
691
716
  stream.write(render_output(obj, format=target_format))
717
+
718
+ def broadcast(self, source: Collection, patches: List[PatchDict]):
719
+ if not self.listeners:
720
+ return
721
+ for listener in self.listeners:
722
+ listener(source, patches)
@@ -38,6 +38,7 @@ class DuckDBCollection(Collection):
38
38
  with conn.begin():
39
39
  conn.execute(insert(table), objs)
40
40
  conn.commit()
41
+ self._post_insert_hook(objs)
41
42
 
42
43
  def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
43
44
  if not isinstance(objs, list):
@@ -89,7 +90,9 @@ class DuckDBCollection(Collection):
89
90
  cd = self.class_definition()
90
91
  with self.parent.engine.connect() as conn:
91
92
  if not facet_columns:
92
- facet_columns = list(self.class_definition().attributes.keys())
93
+ if not cd:
94
+ raise ValueError(f"No class definition found for {self.target_class_name}")
95
+ facet_columns = list(cd.attributes.keys())
93
96
  for col in facet_columns:
94
97
  logger.debug(f"Faceting on {col}")
95
98
  if isinstance(col, tuple):
@@ -100,7 +103,7 @@ class DuckDBCollection(Collection):
100
103
  facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
101
104
  logger.debug(f"Facet query: {facet_query_str}")
102
105
  rows = list(conn.execute(text(facet_query_str)))
103
- results[col] = rows
106
+ results[col] = [tuple(row) for row in rows]
104
107
  return results
105
108
 
106
109
  def _sqla_table(self, cd: ClassDefinition) -> Table:
@@ -109,7 +112,7 @@ class DuckDBCollection(Collection):
109
112
  cols = []
110
113
  for att in schema_view.class_induced_slots(cd.name):
111
114
  typ = TMAP.get(att.range, sqla.String)
112
- if att.inlined:
115
+ if att.inlined or att.inlined_as_list:
113
116
  typ = sqla.JSON
114
117
  if att.multivalued:
115
118
  typ = sqla.ARRAY(typ, dimensions=1)
@@ -31,6 +31,18 @@ logger = logging.getLogger(__name__)
31
31
 
32
32
 
33
33
  class DuckDBDatabase(Database):
34
+ """
35
+ An adapter for DuckDB databases.
36
+
37
+ Note that this adapter does not make use of a LinkML relational model transformation and
38
+ SQL Alchemy ORM layer. Instead, it attempts to map each collection (which is of type
39
+ some LinkML class) to a *single* DuckDB table. New tables are not created for nested references,
40
+ and linking tables are not created for many-to-many relationships.
41
+
42
+ Instead the native DuckDB ARRAY type is used to store multivalued attributes, and DuckDB JSON
43
+ types are used for nested inlined objects.
44
+ """
45
+
34
46
  _connection: DuckDBPyConnection = None
35
47
  _engine: sqlalchemy.Engine = None
36
48
  collection_class = DuckDBCollection
@@ -103,7 +115,14 @@ class DuckDBDatabase(Database):
103
115
  if row[col]:
104
116
  if isinstance(row[col], list):
105
117
  for i in range(len(row[col])):
106
- row[col][i] = json.loads(row[col][i])
118
+ try:
119
+ parsed_val = json.loads(row[col][i])
120
+ except json.JSONDecodeError as e:
121
+ logger.error(f"Failed to parse col {col}[{i}] == {row[col][i]}")
122
+ raise e
123
+ row[col][i] = parsed_val
124
+ elif isinstance(row[col], dict):
125
+ pass
107
126
  else:
108
127
  row[col] = json.loads(row[col])
109
128
  qr.set_rows(pd.DataFrame(rows))
@@ -1,16 +1,15 @@
1
1
  """
2
- Adapter for DuckDB embedded database.
2
+ Adapter for FileSystem wrapper
3
3
 
4
4
  Handles have the form:
5
5
 
6
- - ``duckdb:///<path>`` for a file-based database
7
- - ``duckdb:///:memory:`` for an in-memory database
8
- """
6
+ - ``file:<path>`` for a local file
7
+ """
9
8
 
10
- from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
11
- from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
9
+ from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
10
+ from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
12
11
 
13
12
  __all__ = [
14
- "DuckDBCollection",
15
- "DuckDBDatabase",
13
+ "FileSystemCollection",
14
+ "FileSystemDatabase",
16
15
  ]