linkml-store 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

@@ -242,7 +242,7 @@ class Client:
242
242
  Return all attached databases
243
243
 
244
244
  Examples
245
- --------
245
+
246
246
  >>> client = Client()
247
247
  >>> _ = client.attach_database("duckdb", alias="test1")
248
248
  >>> _ = client.attach_database("duckdb", alias="test2")
@@ -268,25 +268,81 @@ class Client:
268
268
  """
269
269
  Drop a database.
270
270
 
271
+ Example (in-memory):
272
+
273
+ >>> client = Client()
274
+ >>> db1 = client.attach_database("duckdb", alias="test1")
275
+ >>> db2 = client.attach_database("duckdb", alias="test2")
276
+ >>> len(client.databases)
277
+ 2
278
+ >>> client.drop_database("test1")
279
+ >>> len(client.databases)
280
+ 1
281
+
282
+ Databases that persist on disk:
283
+
284
+ >>> client = Client()
285
+ >>> path = Path("tmp/test.db")
286
+ >>> path.parent.mkdir(parents=True, exist_ok=True)
287
+ >>> db = client.attach_database(f"duckdb:///{path}", alias="test")
288
+ >>> len(client.databases)
289
+ 1
290
+ >>> db.store({"persons": [{"id": "P1", "name": "John"}]})
291
+ >>> db.commit()
292
+ >>> Path("tmp/test.db").exists()
293
+ True
294
+ >>> client.drop_database("test")
295
+ >>> len(client.databases)
296
+ 0
297
+ >>> Path("tmp/test.db").exists()
298
+ False
299
+
300
+ Dropping a non-existent database:
301
+
302
+ >>> client = Client()
303
+ >>> client.drop_database("duckdb:///tmp/made-up1", missing_ok=True)
304
+ >>> client.drop_database("duckdb:///tmp/made-up2", missing_ok=False)
305
+ Traceback (most recent call last):
306
+ ...
307
+ ValueError: Database duckdb:///tmp/made-up2 not found
308
+
271
309
  :param name:
272
310
  :param missing_ok:
273
311
  :return:
274
312
  """
275
- if name in self._databases:
276
- db = self._databases[name]
277
- db.drop(**kwargs)
278
- del self._databases[name]
313
+ if self._databases:
314
+ if name in self._databases:
315
+ db = self._databases[name]
316
+ db.drop(**kwargs)
317
+ del self._databases[name]
318
+ else:
319
+ if not missing_ok:
320
+ raise ValueError(f"Database {name} not found")
279
321
  else:
280
- if not missing_ok:
281
- raise ValueError(f"Database {name} not found")
322
+ db = self.get_database(name, create_if_not_exists=True)
323
+ db.drop(**kwargs)
282
324
 
283
325
  def drop_all_databases(self, **kwargs):
284
326
  """
285
327
  Drop all databases.
286
328
 
329
+ Example (in-memory):
330
+
331
+ >>> client = Client()
332
+ >>> db1 = client.attach_database("duckdb", alias="test1")
333
+ >>> assert "test1" in client.databases
334
+ >>> db2 = client.attach_database("duckdb", alias="test2")
335
+ >>> assert "test2" in client.databases
336
+ >>> client.drop_all_databases()
337
+ >>> len(client.databases)
338
+ 0
339
+
340
+
287
341
  :param missing_ok:
288
342
  :return:
289
343
  """
344
+ if not self._databases:
345
+ return
290
346
  for name in list(self._databases.keys()):
291
347
  self.drop_database(name, missing_ok=False, **kwargs)
292
348
  self._databases = {}
@@ -14,7 +14,7 @@ from pydantic import BaseModel
14
14
 
15
15
  from linkml_store.api.types import DatabaseType
16
16
  from linkml_store.index import get_indexer
17
- from linkml_store.utils.format_utils import load_objects
17
+ from linkml_store.utils.format_utils import load_objects, load_objects_from_url
18
18
  from linkml_store.utils.object_utils import clean_empties
19
19
  from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
20
20
 
@@ -61,6 +61,7 @@ class Collection(Generic[DatabaseType]):
61
61
  # name: str
62
62
  parent: Optional[DatabaseType] = None
63
63
  _indexers: Optional[Dict[str, Indexer]] = None
64
+ _initialized: Optional[bool] = None
64
65
  # hidden: Optional[bool] = False
65
66
 
66
67
  metadata: Optional[CollectionConfig] = None
@@ -73,7 +74,7 @@ class Collection(Generic[DatabaseType]):
73
74
  if metadata:
74
75
  self.metadata = metadata
75
76
  else:
76
- self.metadata = CollectionConfig(name=name, **kwargs)
77
+ self.metadata = CollectionConfig(type=name, **kwargs)
77
78
  if not self.metadata.alias:
78
79
  self.metadata.alias = name
79
80
  if not self.metadata.type:
@@ -81,17 +82,6 @@ class Collection(Generic[DatabaseType]):
81
82
  # if name is not None and self.metadata.name is not None and name != self.metadata.name:
82
83
  # raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
83
84
 
84
- @property
85
- def name(self) -> str:
86
- """
87
- Return the name of the collection.
88
-
89
- TODO: deprecate in favor of Type
90
-
91
- :return: name of the collection
92
- """
93
- return self.metadata.name
94
-
95
85
  @property
96
86
  def hidden(self) -> bool:
97
87
  """
@@ -118,12 +108,18 @@ class Collection(Generic[DatabaseType]):
118
108
  >>> collection.target_class_name
119
109
  'Person'
120
110
 
111
+ >>> collection = db.create_collection("Organization")
112
+ >>> collection.target_class_name
113
+ 'Organization'
114
+ >>> collection.alias
115
+ 'Organization'
116
+
121
117
  :return: name of the class which members of this collection instantiate
122
118
  """
123
119
  # TODO: this is a shim layer until we can normalize on this
124
120
  if self.metadata.type:
125
121
  return self.metadata.type
126
- return self.name
122
+ return self.alias
127
123
 
128
124
  @property
129
125
  def alias(self):
@@ -161,10 +157,9 @@ class Collection(Generic[DatabaseType]):
161
157
  :return:
162
158
  """
163
159
  # TODO: this is a shim layer until we can normalize on this
164
- # TODO: this is a shim layer until we can normalize on this
165
160
  if self.metadata.alias:
166
161
  return self.metadata.alias
167
- return self.name
162
+ return self.target_class_name
168
163
 
169
164
  def replace(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
170
165
  """
@@ -201,7 +196,14 @@ class Collection(Generic[DatabaseType]):
201
196
  """
202
197
  raise NotImplementedError
203
198
 
199
+ def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
200
+ logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
201
+ if not self._initialized:
202
+ self._materialize_derivations()
203
+ self._initialized = True
204
+
204
205
  def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
206
+ self._initialized = True
205
207
  patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
206
208
  self._broadcast(patches, **kwargs)
207
209
 
@@ -305,6 +307,7 @@ class Collection(Generic[DatabaseType]):
305
307
  :param kwargs:
306
308
  :return:
307
309
  """
310
+ self._pre_query_hook()
308
311
  return self.parent.query(query, **kwargs)
309
312
 
310
313
  def query_facets(
@@ -340,7 +343,6 @@ class Collection(Generic[DatabaseType]):
340
343
  :param kwargs:
341
344
  :return:
342
345
  """
343
- # TODO
344
346
  id_field = self.identifier_attribute_name
345
347
  if not id_field:
346
348
  raise ValueError(f"No identifier for {self.name}")
@@ -399,9 +401,10 @@ class Collection(Generic[DatabaseType]):
399
401
  :return:
400
402
  """
401
403
  query = self._create_query(where_clause=where)
404
+ self._pre_query_hook(query)
402
405
  return self.query(query, **kwargs)
403
406
 
404
- def find_iter(self, where: Optional[Any] = None, **kwargs) -> Iterator[OBJECT]:
407
+ def find_iter(self, where: Optional[Any] = None, page_size=100, **kwargs) -> Iterator[OBJECT]:
405
408
  """
406
409
  Find objects in the collection using a where query.
407
410
 
@@ -409,9 +412,22 @@ class Collection(Generic[DatabaseType]):
409
412
  :param kwargs:
410
413
  :return:
411
414
  """
412
- qr = self.find(where=where, limit=-1, **kwargs)
413
- for row in qr.rows:
414
- yield row
415
+ total_rows = None
416
+ offset = 0
417
+ if page_size < 1:
418
+ raise ValueError(f"Invalid page size: {page_size}")
419
+ while True:
420
+ qr = self.find(where=where, offset=offset, limit=page_size, **kwargs)
421
+ if total_rows is None:
422
+ total_rows = qr.num_rows
423
+ if not qr.rows:
424
+ return
425
+ for row in qr.rows:
426
+ yield row
427
+ offset += page_size
428
+ if offset >= total_rows:
429
+ break
430
+ return
415
431
 
416
432
  def search(
417
433
  self,
@@ -454,6 +470,7 @@ class Collection(Generic[DatabaseType]):
454
470
  :param kwargs:
455
471
  :return:
456
472
  """
473
+ self._pre_query_hook()
457
474
  if index_name is None:
458
475
  if len(self.indexers) == 1:
459
476
  index_name = list(self.indexers.keys())[0]
@@ -494,10 +511,93 @@ class Collection(Generic[DatabaseType]):
494
511
  raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
495
512
  return self.alias.startswith("internal__")
496
513
 
497
- def load_from_source(self):
498
- objects = load_objects(self.metadata.source_location)
514
+ def exists(self) -> Optional[bool]:
515
+ """
516
+ Check if the collection exists.
517
+
518
+ :return:
519
+ """
520
+ cd = self.class_definition()
521
+ return cd is not None
522
+
523
+ def load_from_source(self, load_if_exists=False):
524
+ """
525
+ Load objects from the source location.
526
+
527
+ :param load_if_exists:
528
+ :return:
529
+ """
530
+ if not load_if_exists and self.exists():
531
+ return
532
+ metadata = self.metadata
533
+ if metadata.source:
534
+ source = metadata.source
535
+ kwargs = source.arguments or {}
536
+ if source.local_path:
537
+ objects = load_objects(
538
+ metadata.source.local_path, format=source.format, expected_type=source.expected_type, **kwargs
539
+ )
540
+ elif metadata.source.url:
541
+ objects = load_objects_from_url(
542
+ metadata.source.url, format=source.format, expected_type=source.expected_type, **kwargs
543
+ )
499
544
  self.insert(objects)
500
545
 
546
+ def _check_if_initialized(self) -> bool:
547
+ return self._initialized
548
+
549
+ def _materialize_derivations(self, **kwargs):
550
+ metadata = self.metadata
551
+ if not metadata.derived_from:
552
+ logger.info(f"No metadata for {self.alias}; no derivations")
553
+ return
554
+ if self._check_if_initialized():
555
+ logger.info(f"Already initialized {self.alias}; no derivations")
556
+ return
557
+ parent_db = self.parent
558
+ client = parent_db.parent
559
+ # cd = self.class_definition()
560
+ for derivation in metadata.derived_from:
561
+ # TODO: optimize this; utilize underlying engine
562
+ logger.info(f"Deriving from {derivation}")
563
+ if derivation.database:
564
+ db = client.get_database(derivation.database)
565
+ else:
566
+ db = parent_db
567
+ if derivation.collection:
568
+ coll = db.get_collection(derivation.collection)
569
+ else:
570
+ coll = self
571
+ coll.class_definition()
572
+ source_obj_iter = coll.find_iter(derivation.where or {})
573
+ mappings = derivation.mappings
574
+ if not mappings:
575
+ raise ValueError(f"No mappings for {self.name}")
576
+ target_class_name = self.target_class_name
577
+ from linkml_map.session import Session
578
+
579
+ session = Session()
580
+ session.set_source_schema(db.schema_view.schema)
581
+ session.set_object_transformer(
582
+ {
583
+ "class_derivations": {
584
+ target_class_name: {
585
+ "populated_from": coll.target_class_name,
586
+ "slot_derivations": mappings,
587
+ },
588
+ }
589
+ },
590
+ )
591
+ logger.debug(f"Session Spec: {session.object_transformer}")
592
+ tr_objs = []
593
+ for source_obj in source_obj_iter:
594
+ tr_obj = session.transform(source_obj, source_type=coll.target_class_name)
595
+ tr_objs.append(tr_obj)
596
+ if not tr_objs:
597
+ raise ValueError(f"No objects derived from {coll.name}")
598
+ self.insert(tr_objs)
599
+ self.commit()
600
+
501
601
  def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
502
602
  """
503
603
  Attach an index to the collection.
@@ -572,7 +672,7 @@ class Collection(Generic[DatabaseType]):
572
672
  :param indexer:
573
673
  :return:
574
674
  """
575
- return f"internal__index__{self.name}__{index_name}"
675
+ return f"internal__index__{self.alias}__{index_name}"
576
676
 
577
677
  def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
578
678
  """
@@ -638,6 +738,9 @@ class Collection(Generic[DatabaseType]):
638
738
  """
639
739
  Return the class definition for the collection.
640
740
 
741
+ If no schema has been explicitly set, and the native database does not
742
+ have a schema, then a schema will be induced from the objects in the collection.
743
+
641
744
  :return:
642
745
  """
643
746
  sv: SchemaView = self.parent.schema_view
@@ -722,7 +825,9 @@ class Collection(Generic[DatabaseType]):
722
825
  else:
723
826
  return None
724
827
 
725
- def induce_class_definition_from_objects(self, objs: List[OBJECT], max_sample_size=10) -> ClassDefinition:
828
+ def induce_class_definition_from_objects(
829
+ self, objs: List[OBJECT], max_sample_size: Optional[int] = None
830
+ ) -> ClassDefinition:
726
831
  """
727
832
  Induce a class definition from a list of objects.
728
833
 
@@ -733,6 +838,9 @@ class Collection(Generic[DatabaseType]):
733
838
  :param max_sample_size:
734
839
  :return:
735
840
  """
841
+ # TODO: use schemaview
842
+ if max_sample_size is None:
843
+ max_sample_size = 10
736
844
  if not self.target_class_name:
737
845
  raise ValueError(f"No target_class_name for {self.alias}")
738
846
  cd = ClassDefinition(self.target_class_name)
@@ -795,6 +903,7 @@ class Collection(Generic[DatabaseType]):
795
903
  for other_rng in rngs:
796
904
  if rng != other_rng:
797
905
  raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
906
+ logger.debug(f"Inducing {k} as {rng} {multivalued} {inlined}")
798
907
  cd.attributes[k] = SlotDefinition(k, range=rng, multivalued=multivalued, inlined=inlined)
799
908
  if exact_dimensions_list:
800
909
  array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
@@ -828,7 +937,7 @@ class Collection(Generic[DatabaseType]):
828
937
  """
829
938
  Apply a patch to the collection.
830
939
 
831
- Patches conform to the JSON Patch format,
940
+ Patches conform to the JSON Patch format.
832
941
 
833
942
  :param patches:
834
943
  :param kwargs:
@@ -841,11 +950,11 @@ class Collection(Generic[DatabaseType]):
841
950
  new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
842
951
  self.replace(new_objs)
843
952
 
844
- def diff(self, other: "Collection", **kwargs):
953
+ def diff(self, other: "Collection", **kwargs) -> List[PatchDict]:
845
954
  """
846
955
  Diff two collections.
847
956
 
848
- :param other:
957
+ :param other: The collection to diff against
849
958
  :param kwargs:
850
959
  :return:
851
960
  """
@@ -872,8 +981,7 @@ class Collection(Generic[DatabaseType]):
872
981
  if not cd:
873
982
  raise ValueError(f"Cannot find class definition for {self.target_class_name}")
874
983
  class_name = cd.name
875
- result = self.find(**kwargs)
876
- for obj in result.rows:
984
+ for obj in self.find_iter(**kwargs):
877
985
  obj = clean_empties(obj)
878
986
  yield from validator.iter_results(obj, class_name)
879
987
 
@@ -4,14 +4,43 @@ from pydantic import BaseModel, Field
4
4
 
5
5
 
6
6
  class ConfiguredBaseModel(BaseModel, extra="forbid"):
7
+ """
8
+ Base class for all configuration models.
9
+ """
10
+
7
11
  pass
8
12
 
9
13
 
14
+ class DerivationConfiguration(ConfiguredBaseModel):
15
+ """
16
+ Configuration for a derivation
17
+ """
18
+
19
+ database: Optional[str] = None
20
+ collection: Optional[str] = None
21
+ mappings: Optional[Dict[str, Any]] = None
22
+ where: Optional[Dict[str, Any]] = None
23
+
24
+
25
+ class CollectionSource(ConfiguredBaseModel):
26
+ """
27
+ Metadata about a source
28
+ """
29
+
30
+ url: Optional[str] = None
31
+ local_path: Optional[str] = None
32
+ source_location: Optional[str] = None
33
+ refresh_interval_days: Optional[float] = None
34
+ expected_type: Optional[str] = None
35
+ format: Optional[str] = None
36
+ arguments: Optional[Dict[str, Any]] = None
37
+
38
+
10
39
  class CollectionConfig(ConfiguredBaseModel):
11
- name: Optional[str] = Field(
12
- default=None,
13
- description="An optional name for the collection",
14
- )
40
+ """
41
+ Configuration for a collection
42
+ """
43
+
15
44
  alias: Optional[str] = Field(
16
45
  default=None,
17
46
  description="An optional alias for the collection",
@@ -40,13 +69,22 @@ class CollectionConfig(ConfiguredBaseModel):
40
69
  default=False,
41
70
  description="Whether the collection is prepopulated",
42
71
  )
43
- source_location: Optional[str] = Field(
72
+ source: Optional[CollectionSource] = Field(
44
73
  default=None,
45
- description="Filesystem or remote URL that stores the data",
74
+ description="Metadata about the source",
75
+ )
76
+ # TODO: derived_from
77
+ derived_from: Optional[List[DerivationConfiguration]] = Field(
78
+ default=None,
79
+ description="LinkML-Map derivations",
46
80
  )
47
81
 
48
82
 
49
83
  class DatabaseConfig(ConfiguredBaseModel):
84
+ """
85
+ Configuration for a database
86
+ """
87
+
50
88
  handle: str = Field(
51
89
  default="duckdb:///:memory:",
52
90
  description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
@@ -91,6 +129,10 @@ class DatabaseConfig(ConfiguredBaseModel):
91
129
 
92
130
 
93
131
  class ClientConfig(ConfiguredBaseModel):
132
+ """
133
+ Configuration for a client
134
+ """
135
+
94
136
  handle: Optional[str] = Field(
95
137
  default=None,
96
138
  description="The client handle",
@@ -149,26 +149,19 @@ class Database(ABC, Generic[CollectionType]):
149
149
  def _initialize_collections(self):
150
150
  if not self.metadata.collections:
151
151
  return
152
- for name, collection_config in self.metadata.collections.items():
153
- alias = collection_config.alias
154
- typ = collection_config.type
155
- # if typ and alias is None:
156
- # alias = name
157
- # if typ is None:
158
- # typ = name
159
- # collection = self.create_collection(
160
- # typ, alias=alias, metadata=collection_config.metadata
161
- # )
162
- if False and typ is not None:
163
- if not alias:
164
- alias = name
165
- name = typ
166
- if not collection_config.name:
167
- collection_config.name = name
168
- _collection = self.create_collection(name, alias=alias, metadata=collection_config)
152
+ for k, collection_config in self.metadata.collections.items():
153
+ if collection_config.alias:
154
+ if collection_config.alias != k:
155
+ raise ValueError(f"Alias mismatch: {collection_config.alias} != {k}")
156
+ alias = k
157
+ typ = collection_config.type or alias
158
+ _collection = self.create_collection(typ, alias=alias, metadata=collection_config)
159
+ assert _collection.alias == alias
160
+ assert _collection.target_class_name == typ
169
161
  if collection_config.attributes:
162
+ # initialize schema
170
163
  sv = self.schema_view
171
- cd = ClassDefinition(name, attributes=collection_config.attributes)
164
+ cd = ClassDefinition(typ, attributes=collection_config.attributes)
172
165
  sv.schema.classes[cd.name] = cd
173
166
  sv.set_modified()
174
167
  # assert collection.class_definition() is not None
@@ -275,7 +268,7 @@ class Database(ABC, Generic[CollectionType]):
275
268
  metadata: Optional[CollectionConfig] = None,
276
269
  recreate_if_exists=False,
277
270
  **kwargs,
278
- ) -> Collection:
271
+ ) -> CollectionType:
279
272
  """
280
273
  Create a new collection in the current database.
281
274
 
@@ -307,8 +300,10 @@ class Database(ABC, Generic[CollectionType]):
307
300
  if not name:
308
301
  raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
309
302
  collection_cls = self.collection_class
310
- collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
311
- if metadata and metadata.source_location:
303
+ collection = collection_cls(name=name, parent=self, metadata=metadata)
304
+ if alias:
305
+ collection.metadata.alias = alias
306
+ if metadata and metadata.source:
312
307
  collection.load_from_source()
313
308
  if metadata and metadata.attributes:
314
309
  sv = self.schema_view
@@ -321,7 +316,7 @@ class Database(ABC, Generic[CollectionType]):
321
316
  alias = name
322
317
  self._collections[alias] = collection
323
318
  if recreate_if_exists:
324
- logger.debug(f"Recreating collection {collection.name}")
319
+ logger.debug(f"Recreating collection {collection.alias}")
325
320
  collection.delete_where({}, missing_ok=True)
326
321
  return collection
327
322
 
@@ -339,7 +334,7 @@ class Database(ABC, Generic[CollectionType]):
339
334
  >>> collections = db.list_collections()
340
335
  >>> len(collections)
341
336
  2
342
- >>> [c.name for c in collections]
337
+ >>> [c.target_class_name for c in collections]
343
338
  ['Person', 'Product']
344
339
 
345
340
  :param include_internal: include internal collections
@@ -367,7 +362,7 @@ class Database(ABC, Generic[CollectionType]):
367
362
  ['Person', 'Product']
368
363
 
369
364
  """
370
- return [c.name for c in self.list_collections(**kwargs)]
365
+ return [c.alias for c in self.list_collections(**kwargs)]
371
366
 
372
367
  def get_collection(
373
368
  self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
@@ -410,7 +405,7 @@ class Database(ABC, Generic[CollectionType]):
410
405
  """
411
406
  Initialize collections.
412
407
 
413
- Not typically called directly: consider making hidden
408
+ TODO: Not typically called directly: consider making this private
414
409
  :return:
415
410
  """
416
411
  raise NotImplementedError
@@ -502,7 +497,7 @@ class Database(ABC, Generic[CollectionType]):
502
497
  >>> sorted(collection.class_definition().slots)
503
498
  ['capital', 'code', 'continent', 'languages', 'name']
504
499
 
505
- :param schema_view:
500
+ :param schema_view: can be either a path to the schema, or a SchemaView object
506
501
  :return:
507
502
  """
508
503
  if isinstance(schema_view, Path):
@@ -585,7 +580,15 @@ class Database(ABC, Generic[CollectionType]):
585
580
 
586
581
  :return: A schema view
587
582
  """
588
- raise NotImplementedError()
583
+ logger.info(f"Inducing schema view for {self.handle}")
584
+ from linkml_runtime.utils.schema_builder import SchemaBuilder
585
+
586
+ sb = SchemaBuilder()
587
+
588
+ for collection_name in self.list_collection_names():
589
+ coll = self.get_collection(collection_name)
590
+ sb.add_class(coll.target_class_name)
591
+ return SchemaView(sb.schema)
589
592
 
590
593
  def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
591
594
  """
@@ -683,6 +686,21 @@ class Database(ABC, Generic[CollectionType]):
683
686
  """
684
687
  Drop the database and all collections.
685
688
 
689
+ >>> from linkml_store.api.client import Client
690
+ >>> client = Client()
691
+ >>> path = Path("/tmp/test.db")
692
+ >>> path.parent.mkdir(exist_ok=True, parents=True)
693
+ >>> db = client.attach_database(f"duckdb:///{path}")
694
+ >>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
695
+ >>> coll = db.get_collection("persons")
696
+ >>> coll.find({}).num_rows
697
+ 1
698
+ >>> db.drop()
699
+ >>> db = client.attach_database("duckdb:///tmp/test.db", alias="test")
700
+ >>> coll = db.get_collection("persons")
701
+ >>> coll.find({}).num_rows
702
+ 0
703
+
686
704
  :param kwargs: additional arguments
687
705
  """
688
706
  raise NotImplementedError()
@@ -18,6 +18,9 @@ logger = logging.getLogger(__name__)
18
18
  class DuckDBCollection(Collection):
19
19
  _table_created: bool = None
20
20
 
21
+ def __init__(self, *args, **kwargs):
22
+ super().__init__(*args, **kwargs)
23
+
21
24
  def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
22
25
  logger.debug(f"Inserting {len(objs)}")
23
26
  if not isinstance(objs, list):
@@ -123,6 +126,17 @@ class DuckDBCollection(Collection):
123
126
  t = Table(self.alias, metadata_obj, *cols)
124
127
  return t
125
128
 
129
+ def _check_if_initialized(self) -> bool:
130
+ # if self._initialized:
131
+ # return True
132
+ query = Query(
133
+ from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
134
+ )
135
+ qr = self.parent.query(query)
136
+ if qr.num_rows > 0:
137
+ return True
138
+ return False
139
+
126
140
  def _create_table(self, cd: ClassDefinition):
127
141
  if self._table_created or self.metadata.is_prepopulated:
128
142
  logger.info(f"Already have table for: {cd.name}")
@@ -134,6 +148,7 @@ class DuckDBCollection(Collection):
134
148
  if qr.num_rows > 0:
135
149
  logger.info(f"Table already exists for {cd.name}")
136
150
  self._table_created = True
151
+ self._initialized = True
137
152
  self.metadata.is_prepopulated = True
138
153
  return
139
154
  logger.info(f"Creating table for {cd.name}")
@@ -144,4 +159,5 @@ class DuckDBCollection(Collection):
144
159
  conn.execute(text(ddl))
145
160
  conn.commit()
146
161
  self._table_created = True
162
+ self._initialized = True
147
163
  self.metadata.is_prepopulated = True