linkml-store 0.2.5__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {linkml_store-0.2.5 → linkml_store-0.2.9}/PKG-INFO +4 -3
  2. {linkml_store-0.2.5 → linkml_store-0.2.9}/pyproject.toml +4 -2
  3. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/client.py +9 -6
  4. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/collection.py +118 -5
  5. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/database.py +45 -14
  6. linkml_store-0.2.9/src/linkml_store/api/stores/duckdb/duckdb_collection.py +337 -0
  7. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/duckdb/duckdb_database.py +52 -19
  8. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/filesystem/__init__.py +1 -1
  9. linkml_store-0.2.9/src/linkml_store/api/stores/mongodb/mongodb_collection.py +365 -0
  10. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/mongodb/mongodb_database.py +8 -3
  11. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/solr_collection.py +7 -1
  12. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/cli.py +202 -21
  13. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/implementations/llm_indexer.py +14 -6
  14. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/indexer.py +7 -4
  15. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/llm_inference_engine.py +13 -9
  16. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/rag_inference_engine.py +13 -10
  17. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +7 -1
  18. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/inference_config.py +1 -0
  19. linkml_store-0.2.9/src/linkml_store/utils/dat_parser.py +95 -0
  20. linkml_store-0.2.9/src/linkml_store/utils/enrichment_analyzer.py +217 -0
  21. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/format_utils.py +183 -3
  22. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/llm_utils.py +3 -1
  23. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/pandas_utils.py +1 -1
  24. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/sql_utils.py +7 -1
  25. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/vector_utils.py +4 -11
  26. linkml_store-0.2.5/src/linkml_store/api/stores/duckdb/duckdb_collection.py +0 -169
  27. linkml_store-0.2.5/src/linkml_store/api/stores/mongodb/mongodb_collection.py +0 -179
  28. {linkml_store-0.2.5 → linkml_store-0.2.9}/LICENSE +0 -0
  29. {linkml_store-0.2.5 → linkml_store-0.2.9}/README.md +0 -0
  30. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/__init__.py +0 -0
  31. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/__init__.py +0 -0
  32. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/config.py +0 -0
  33. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/queries.py +0 -0
  34. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/__init__.py +0 -0
  35. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
  36. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  37. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  38. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  39. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
  40. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
  41. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
  42. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
  43. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  44. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  45. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  46. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
  47. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
  48. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/neo4j/neo4j_database.py +0 -0
  49. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  50. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  51. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  52. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/types.py +0 -0
  53. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/constants.py +0 -0
  54. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/graphs/__init__.py +0 -0
  55. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/graphs/graph_map.py +0 -0
  56. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/__init__.py +0 -0
  57. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/implementations/__init__.py +0 -0
  58. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
  59. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/__init__.py +0 -0
  60. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/evaluation.py +0 -0
  61. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/__init__.py +0 -0
  62. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
  63. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/inference_engine.py +0 -0
  64. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/inference_engine_registry.py +0 -0
  65. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/__init__.py +0 -0
  66. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/change_utils.py +0 -0
  67. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/file_utils.py +0 -0
  68. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/io.py +0 -0
  69. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/mongodb_utils.py +0 -0
  70. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/neo4j_utils.py +0 -0
  71. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/object_utils.py +0 -0
  72. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/patch_utils.py +0 -0
  73. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/query_utils.py +0 -0
  74. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/schema_utils.py +0 -0
  75. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/sklearn_utils.py +0 -0
  76. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/stats_utils.py +0 -0
  77. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/__init__.py +0 -0
  78. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/__init__.py +0 -0
  79. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/base.html.j2 +0 -0
  80. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
  81. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
  82. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
  83. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
  84. {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/main.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: linkml-store
3
- Version: 0.2.5
3
+ Version: 0.2.9
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
@@ -24,6 +24,7 @@ Provides-Extra: map
24
24
  Provides-Extra: mongodb
25
25
  Provides-Extra: neo4j
26
26
  Provides-Extra: pyarrow
27
+ Provides-Extra: rdf
27
28
  Provides-Extra: renderer
28
29
  Provides-Extra: scipy
29
30
  Provides-Extra: tests
@@ -34,12 +35,12 @@ Requires-Dist: duckdb (>=0.10.1)
34
35
  Requires-Dist: duckdb-engine (>=0.11.2)
35
36
  Requires-Dist: fastapi ; extra == "fastapi"
36
37
  Requires-Dist: frictionless ; extra == "frictionless"
37
- Requires-Dist: gcsfs
38
38
  Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
39
39
  Requires-Dist: h5py ; extra == "h5py"
40
40
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
41
41
  Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
42
42
  Requires-Dist: jsonpatch (>=1.33)
43
+ Requires-Dist: lightrdf ; extra == "rdf"
43
44
  Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
44
45
  Requires-Dist: linkml-runtime (>=1.8.0)
45
46
  Requires-Dist: linkml_map ; extra == "map"
@@ -54,7 +55,7 @@ Requires-Dist: plotly ; extra == "analytics"
54
55
  Requires-Dist: py2neo ; extra == "neo4j"
55
56
  Requires-Dist: pyarrow ; extra == "pyarrow"
56
57
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
57
- Requires-Dist: pymongo ; extra == "mongodb"
58
+ Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
58
59
  Requires-Dist: pystow (>=0.5.4,<0.6.0)
59
60
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
60
61
  Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.2.5"
3
+ version = "0.2.9"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
@@ -23,8 +23,9 @@ pystow = "^0.5.4"
23
23
  black = { version=">=24.0.0", optional = true }
24
24
  ruff = { version=">=0.6.2", optional = true }
25
25
  llm = { version="*", optional = true }
26
+ lightrdf = { version="*", optional = true }
26
27
  tiktoken = { version="*", optional = true }
27
- pymongo = { version="*", optional = true }
28
+ pymongo = "^4.11"
28
29
  neo4j = { version="*", optional = true }
29
30
  py2neo = { version="*", optional = true }
30
31
  networkx = { version="*", optional = true }
@@ -91,6 +92,7 @@ renderer = ["linkml_renderer"]
91
92
  fastapi = ["fastapi", "uvicorn"]
92
93
  frictionless = ["frictionless"]
93
94
  scipy = ["scipy", "scikit-learn"]
95
+ rdf = ["lightrdf"]
94
96
  #ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
95
97
  bigquery = ["google-cloud-bigquery"]
96
98
  all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]
@@ -12,9 +12,9 @@ from linkml_store.api.config import ClientConfig
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
-
16
15
  HANDLE_MAP = {
17
16
  "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
17
+ "sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
18
  "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
19
19
  "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
20
20
  "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
@@ -24,6 +24,8 @@ HANDLE_MAP = {
24
24
 
25
25
  SUFFIX_MAP = {
26
26
  "ddb": "duckdb:///{path}",
27
+ "duckdb": "duckdb:///{path}",
28
+ "db": "duckdb:///{path}",
27
29
  }
28
30
 
29
31
 
@@ -204,9 +206,10 @@ class Client:
204
206
  if ":" not in handle:
205
207
  if alias is None:
206
208
  alias = handle
207
- suffix = handle.split(".")[-1]
208
- if suffix in SUFFIX_MAP:
209
- handle = SUFFIX_MAP[suffix].format(path=handle)
209
+ if "." in handle:
210
+ suffix = handle.split(".")[-1]
211
+ if suffix in SUFFIX_MAP:
212
+ handle = SUFFIX_MAP[suffix].format(path=handle)
210
213
  if ":" not in handle:
211
214
  scheme = handle
212
215
  handle = None
@@ -216,14 +219,14 @@ class Client:
216
219
  scheme, _ = handle.split(":", 1)
217
220
  if scheme not in HANDLE_MAP:
218
221
  raise ValueError(f"Unknown scheme: {scheme}")
219
- module_path, class_name = HANDLE_MAP[scheme].rsplit('.', 1)
222
+ module_path, class_name = HANDLE_MAP[scheme].rsplit(".", 1)
220
223
  try:
221
224
  module = importlib.import_module(module_path)
222
225
  cls = getattr(module, class_name)
223
226
  except ImportError as e:
224
227
  raise ImportError(f"Failed to import {scheme} database. Make sure the correct extras are installed: {e}")
225
228
 
226
- #cls = HANDLE_MAP[scheme]
229
+ # cls = HANDLE_MAP[scheme]
227
230
  db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
228
231
  if schema_view:
229
232
  db.set_schema_view(schema_view)
@@ -1,6 +1,7 @@
1
1
  """A structure for representing collections of similar objects."""
2
2
 
3
3
  import hashlib
4
+ import json
4
5
  import logging
5
6
  from collections import defaultdict
6
7
  from pathlib import Path
@@ -210,8 +211,62 @@ class Collection(Generic[DatabaseType]):
210
211
  """
211
212
  raise NotImplementedError
212
213
 
214
+ def index(
215
+ self,
216
+ objs: Union[OBJECT, List[OBJECT]],
217
+ index_name: Optional[str] = None,
218
+ replace: bool = False,
219
+ unique: bool = False,
220
+ **kwargs,
221
+ ) -> None:
222
+ """
223
+ Index objects in the collection.
224
+
225
+ :param objs:
226
+ :param index_name:
227
+ :param replace: replace the index, or not
228
+ :param unique: boolean used to declare the index unique or not
229
+ :param kwargs:
230
+ :return:
231
+ """
232
+ raise NotImplementedError
233
+
234
+ def upsert(
235
+ self,
236
+ objs: Union[OBJECT, List[OBJECT]],
237
+ filter_fields: List[str],
238
+ update_fields: Union[List[str], None] = None,
239
+ **kwargs,
240
+ ):
241
+ """
242
+ Add one or more objects to the collection.
243
+
244
+ >>> from linkml_store import Client
245
+ >>> client = Client()
246
+ >>> db = client.attach_database("mongodb", alias="test")
247
+ >>> collection = db.create_collection("Person")
248
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
249
+ >>> collection.upsert(objs)
250
+
251
+ :param objs:
252
+ :param filter_fields: List of field names to use as the filter for matching existing collections.
253
+ :param update_fields: List of field names to include in the update. If None, all fields are updated.
254
+ :param kwargs:
255
+
256
+ :return:
257
+ """
258
+ raise NotImplementedError
259
+
213
260
  def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
214
- logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
261
+ """
262
+ Pre-query hook.
263
+
264
+ This is called before a query is executed. It is used to materialize derivations and indexes.
265
+ :param query:
266
+ :param kwargs:
267
+ :return:
268
+ """
269
+ logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
215
270
  if not self._initialized:
216
271
  self._materialize_derivations()
217
272
  self._initialized = True
@@ -402,7 +457,12 @@ class Collection(Generic[DatabaseType]):
402
457
  return qr.rows[0]
403
458
  return None
404
459
 
405
- def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
460
+ def find(
461
+ self,
462
+ where: Optional[Any] = None,
463
+ select_cols: Optional[List[str]] = None,
464
+ **kwargs,
465
+ ) -> QueryResult:
406
466
  """
407
467
  Find objects in the collection using a where query.
408
468
 
@@ -432,10 +492,14 @@ class Collection(Generic[DatabaseType]):
432
492
 
433
493
 
434
494
  :param where:
495
+ :param select_cols:
435
496
  :param kwargs:
436
497
  :return:
437
498
  """
438
- query = self._create_query(where_clause=where)
499
+ query = self._create_query(
500
+ where_clause=where,
501
+ select_cols=select_cols,
502
+ )
439
503
  self._pre_query_hook(query)
440
504
  return self.query(query, **kwargs)
441
505
 
@@ -535,8 +599,16 @@ class Collection(Generic[DatabaseType]):
535
599
  assert ix_coll.size() > 0
536
600
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
537
601
  index_col = ix.index_field
602
+
538
603
  # TODO: optimize this for large indexes
539
- vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
604
+ def row2array(row):
605
+ v = row[index_col]
606
+ if isinstance(v, str):
607
+ # sqlite stores arrays as strings
608
+ v = json.loads(v)
609
+ return np.array(v, dtype=float)
610
+
611
+ vector_pairs = [(row, row2array(row)) for row in qr.rows]
540
612
  results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
541
613
  for r in results:
542
614
  del r[1][index_col]
@@ -550,6 +622,47 @@ class Collection(Generic[DatabaseType]):
550
622
  new_qr.rows = [r[1] for r in results]
551
623
  return new_qr
552
624
 
625
+ def group_by(
626
+ self,
627
+ group_by_fields: List[str],
628
+ inlined_field="objects",
629
+ agg_map: Optional[Dict[str, str]] = None,
630
+ where: Optional[Dict] = None,
631
+ **kwargs,
632
+ ) -> QueryResult:
633
+ """
634
+ Group objects in the collection by a column.
635
+
636
+ :param group_by:
637
+ :param where:
638
+ :param kwargs:
639
+ :return:
640
+ """
641
+ if isinstance(group_by_fields, str):
642
+ group_by_fields = [group_by_fields]
643
+ df = self.find(where=where, limit=-1).rows_dataframe
644
+
645
+ # Handle the case where agg_map is None
646
+ if agg_map is None:
647
+ agg_map = {}
648
+
649
+ pk_fields = agg_map.get("first", []) + group_by_fields
650
+ list_fields = agg_map.get("list", [])
651
+ if not list_fields:
652
+ list_fields = [a for a in df.columns if a not in pk_fields]
653
+
654
+ grouped_objs = defaultdict(list)
655
+ for _, row in df.iterrows():
656
+ pk = tuple(row[pk_fields])
657
+ grouped_objs[pk].append({k: row[k] for k in list_fields})
658
+ results = []
659
+ for pk, objs in grouped_objs.items():
660
+ top_obj = {k: v for k, v in zip(pk_fields, pk)}
661
+ top_obj[inlined_field] = objs
662
+ results.append(top_obj)
663
+ r = QueryResult(num_rows=len(results), rows=results)
664
+ return r
665
+
553
666
  @property
554
667
  def is_internal(self) -> bool:
555
668
  """
@@ -1004,7 +1117,7 @@ class Collection(Generic[DatabaseType]):
1004
1117
  multivalued = any(multivalueds)
1005
1118
  inlined = any(inlineds)
1006
1119
  if multivalued and False in multivalueds:
1007
- raise ValueError(f"Mixed list non list: {vs} // inferred= {multivalueds}")
1120
+ logger.info(f"Mixed list non list: {vs} // inferred= {multivalueds}")
1008
1121
  # if not rngs:
1009
1122
  # raise AssertionError(f"Empty rngs for {k} = {vs}")
1010
1123
  rng = rngs[0] if rngs else None
@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
276
276
 
277
277
  Examples:
278
278
 
279
- >>> from linkml_store.api.client import Client
280
- >>> client = Client()
281
- >>> db = client.attach_database("duckdb", alias="test")
282
- >>> collection = db.create_collection("Person", alias="persons")
283
- >>> collection.alias
284
- 'persons'
285
- >>> collection.target_class_name
286
- 'Person'
279
+ >>> from linkml_store.api.client import Client
280
+ >>> client = Client()
281
+ >>> db = client.attach_database("duckdb", alias="test")
282
+ >>> collection = db.create_collection("Person", alias="persons")
283
+ >>> collection.alias
284
+ 'persons'
285
+
286
+ >>> collection.target_class_name
287
+ 'Person'
287
288
 
288
289
  If alias is not provided, it defaults to the name of the type.
289
290
 
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
419
420
  >>> from linkml_store.api.client import Client
420
421
  >>> from linkml_store.api.queries import Query
421
422
  >>> client = Client()
422
- >>> db = client.attach_database("duckdb", alias="test")
423
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
423
424
  >>> collection = db.create_collection("Person")
424
425
  >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
425
426
  >>> query = Query(from_table="Person", where_clause={"name": "John"})
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
451
452
 
452
453
  >>> from linkml_store.api.client import Client
453
454
  >>> client = Client()
454
- >>> db = client.attach_database("duckdb", alias="test")
455
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
455
456
  >>> collection = db.create_collection("Person", alias="persons")
456
457
  >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
457
458
  >>> schema_view = db.schema_view
@@ -594,7 +595,31 @@ class Database(ABC, Generic[CollectionType]):
594
595
  sb.add_class(coll.target_class_name)
595
596
  return SchemaView(sb.schema)
596
597
 
597
- def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
598
+ def validate_database(self, **kwargs) -> List["ValidationResult"]:
599
+ """
600
+ Validate the contents of the database.
601
+
602
+ As `iter_validate_database`, but returns a list of validation results.
603
+
604
+ :param kwargs:
605
+ :return:
606
+ """
607
+ return list(self.iter_validate_database(**kwargs))
608
+
609
+ def validate_database(self, **kwargs) -> List["ValidationResult"]:
610
+ """
611
+ Validate the contents of the database.
612
+
613
+ As `iter_validate_database`, but returns a list of validation results.
614
+
615
+ :param kwargs:
616
+ :return:
617
+ """
618
+ return list(self.iter_validate_database(**kwargs))
619
+
620
+ def iter_validate_database(
621
+ self, ensure_referential_integrity: bool = None, **kwargs
622
+ ) -> Iterator["ValidationResult"]:
598
623
  """
599
624
  Validate the contents of the database.
600
625
 
@@ -634,12 +659,14 @@ class Database(ABC, Generic[CollectionType]):
634
659
  'capital' is a required property
635
660
  'continent' is a required proper
636
661
 
662
+ :param ensure_referential_integrity: ensure referential integrity
637
663
  :param kwargs:
638
664
  :return: iterator over validation results
639
665
  """
640
666
  for collection in self.list_collections():
641
667
  yield from collection.iter_validate_collection(**kwargs)
642
- if self.metadata.ensure_referential_integrity:
668
+ if self.metadata.ensure_referential_integrity or ensure_referential_integrity:
669
+ logger.info(f"Validating referential integrity on {self.alias}")
643
670
  yield from self._validate_referential_integrity(**kwargs)
644
671
 
645
672
  def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
@@ -660,7 +687,9 @@ class Database(ABC, Generic[CollectionType]):
660
687
  induced_slots = sv.class_induced_slots(cd.name)
661
688
  slot_map = {s.name: s for s in induced_slots}
662
689
  # rmap = {s.name: s.range for s in induced_slots}
690
+ # map slot ranges to a collection where that range is stored
663
691
  sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
692
+ logger.debug(f"Validating referential integrity for {collection.target_class_name} // {sr_to_coll}")
664
693
  for obj in collection.find_iter():
665
694
  for k, v in obj.items():
666
695
  if k not in sr_to_coll:
@@ -721,7 +750,7 @@ class Database(ABC, Generic[CollectionType]):
721
750
 
722
751
  >>> from linkml_store.api.client import Client
723
752
  >>> client = Client()
724
- >>> db = client.attach_database("duckdb", alias="test")
753
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
725
754
  >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
726
755
  >>> db.list_collection_names()
727
756
  ['iris']
@@ -741,7 +770,9 @@ class Database(ABC, Generic[CollectionType]):
741
770
  # import into a test instance
742
771
  tmp_handle = source_format.value
743
772
  client = self.parent
744
- tmp_db = client.attach_database(tmp_handle, alias="tmp")
773
+ tmp_alias = "tmp"
774
+ client.drop_database(tmp_alias, missing_ok=True)
775
+ tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
745
776
  # TODO: check for infinite recursion
746
777
  tmp_db.import_database(location, source_format=source_format)
747
778
  obj = {}