linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,862 @@
1
+ import logging
2
+ from abc import ABC
3
+ from collections import defaultdict
4
+ from copy import copy
5
+ from pathlib import Path
6
+ from typing import (
7
+ TYPE_CHECKING,
8
+ Any,
9
+ Callable,
10
+ ClassVar,
11
+ Dict,
12
+ Generic,
13
+ Iterator,
14
+ List,
15
+ Optional,
16
+ Sequence,
17
+ Type,
18
+ Union,
19
+ )
20
+
21
+ from linkml_store.api.types import CollectionType
22
+ from linkml_store.utils.format_utils import Format, load_objects, render_output
23
+ from linkml_store.utils.patch_utils import PatchDict
24
+
25
+ try:
26
+ from linkml.validator.report import Severity, ValidationResult
27
+ except ImportError:
28
+ ValidationResult = None
29
+
30
+ from linkml_runtime import SchemaView
31
+ from linkml_runtime.linkml_model import ClassDefinition, SchemaDefinition
32
+
33
+ from linkml_store.api.collection import Collection
34
+ from linkml_store.api.config import CollectionConfig, DatabaseConfig
35
+ from linkml_store.api.queries import Query, QueryResult
36
+
37
+ if TYPE_CHECKING:
38
+ from linkml_store.api.client import Client
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ LISTENER = Callable[[Collection, List[PatchDict]], None]
43
+
44
+
45
+ class Database(ABC, Generic[CollectionType]):
46
+ """
47
+ A Database provides access to named collections of data.
48
+
49
+ A database object is owned by a :ref:`Client`. The database
50
+ object uses a :ref:`handle` to know what kind of external
51
+ dataase system to connect to (e.g. duckdb, mongodb). The handle
52
+ is a string ``<DatabaseType>:<LocalLocator>``
53
+
54
+ The
55
+ database object may also have an :ref:`alias` that is mapped
56
+ to the handle.
57
+
58
+ Attaching a database
59
+ --------------------
60
+ >>> from linkml_store.api.client import Client
61
+ >>> client = Client()
62
+ >>> db = client.attach_database("duckdb:///:memory:", alias="test")
63
+
64
+ We can check the value of the handle:
65
+
66
+ >>> db.handle
67
+ 'duckdb:///:memory:'
68
+
69
+ The alias can be used to retrieve the database object from the client
70
+
71
+ >>> assert db == client.get_database("test")
72
+
73
+ Creating a collection
74
+ ---------------------
75
+
76
+ >>> collection = db.create_collection("Person")
77
+ >>> len(db.list_collections())
78
+ 1
79
+ >>> db.get_collection("Person") == collection
80
+ True
81
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
82
+ >>> collection.insert(objs)
83
+ >>> qr = collection.find()
84
+ >>> len(qr.rows)
85
+ 2
86
+ >>> qr.rows[0]["id"]
87
+ 'P1'
88
+ >>> qr.rows[1]["name"]
89
+ 'Alice'
90
+ >>> qr = collection.find({"name": "John"})
91
+ >>> len(qr.rows)
92
+ 1
93
+ >>> qr.rows[0]["name"]
94
+ 'John'
95
+
96
+ """
97
+
98
+ _schema_view: Optional[SchemaView] = None
99
+ """Schema for the database. May be transformed."""
100
+
101
+ _original_schema_view: Optional[SchemaView] = None
102
+ """If a schema must be transformed, then the original is stored here."""
103
+
104
+ _collections: Optional[Dict[str, Collection]] = None
105
+ parent: Optional["Client"] = None
106
+ metadata: Optional[DatabaseConfig] = None
107
+ collection_class: ClassVar[Optional[Type[Collection]]] = None
108
+
109
+ listeners: Optional[List[LISTENER]] = None
110
+
111
+ def __init__(self, handle: Optional[str] = None, metadata: Optional[DatabaseConfig] = None, **kwargs):
112
+ if metadata:
113
+ self.metadata = metadata
114
+ else:
115
+ self.metadata = DatabaseConfig(handle=handle, **kwargs)
116
+ if handle is not None and self.metadata.handle is not None and handle != self.metadata.handle:
117
+ raise ValueError(f"Handle mismatch: {handle} != {self.metadata.handle}")
118
+ self._initialize_schema()
119
+ self._initialize_collections()
120
+
121
+ def _initialize_schema(self, **kwargs):
122
+ db_config = self.metadata
123
+ if db_config.schema_location:
124
+ schema_location = db_config.schema_location.format(base_dir=self.parent.metadata.base_dir)
125
+ logger.info(f"Loading schema from: {schema_location}")
126
+ self.load_schema_view(schema_location)
127
+ if db_config.schema_dict:
128
+ schema_dict = copy(db_config.schema_dict)
129
+ if "id" not in schema_dict:
130
+ schema_dict["id"] = "tmp"
131
+ if "name" not in schema_dict:
132
+ schema_dict["name"] = "tmp"
133
+ self.set_schema_view(SchemaView(SchemaDefinition(**schema_dict)))
134
+
135
+ def from_config(self, db_config: DatabaseConfig, **kwargs):
136
+ """
137
+ Initialize a database from a configuration.
138
+
139
+ TODO: DEPRECATE
140
+
141
+ :param db_config: database configuration
142
+ :param kwargs: additional arguments
143
+ """
144
+ self.metadata = db_config
145
+ self._initialize_schema()
146
+ self._initialize_collections()
147
+ return self
148
+
149
+ def _initialize_collections(self):
150
+ if not self.metadata.collections:
151
+ return
152
+ for k, collection_config in self.metadata.collections.items():
153
+ if collection_config.alias:
154
+ if collection_config.alias != k:
155
+ raise ValueError(f"Alias mismatch: {collection_config.alias} != {k}")
156
+ alias = k
157
+ typ = collection_config.type or alias
158
+ _collection = self.create_collection(typ, alias=alias, metadata=collection_config)
159
+ assert _collection.alias == alias
160
+ assert _collection.target_class_name == typ
161
+ if collection_config.attributes:
162
+ # initialize schema
163
+ sv = self.schema_view
164
+ cd = ClassDefinition(typ, attributes=collection_config.attributes)
165
+ sv.schema.classes[cd.name] = cd
166
+ sv.set_modified()
167
+ # assert collection.class_definition() is not None
168
+
169
+ @property
170
+ def recreate_if_exists(self) -> bool:
171
+ """
172
+ Return whether to recreate the database if it already exists.
173
+
174
+ :return:
175
+ """
176
+ return self.metadata.recreate_if_exists
177
+
178
+ @property
179
+ def handle(self) -> str:
180
+ """
181
+ Return the database handle.
182
+
183
+ Examples:
184
+
185
+ - ``duckdb:///:memory:``
186
+ - ``duckdb:///tmp/test.db``
187
+ - ``mongodb://localhost:27017/``
188
+
189
+ :return:
190
+ """
191
+ return self.metadata.handle
192
+
193
+ @property
194
+ def alias(self):
195
+ return self.metadata.alias
196
+
197
+ def store(self, obj: Dict[str, Any], **kwargs):
198
+ """
199
+ Store an object in the database.
200
+
201
+ The object is assumed to be a Dictionary of Collections.
202
+
203
+ >>> from linkml_store.api.client import Client
204
+ >>> client = Client()
205
+ >>> db = client.attach_database("duckdb", alias="test")
206
+ >>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
207
+ >>> collection = db.get_collection("persons")
208
+ >>> qr = collection.find()
209
+ >>> qr.num_rows
210
+ 1
211
+
212
+ :param obj: object to store
213
+ :param kwargs: additional arguments
214
+ """
215
+ sv = self.schema_view
216
+ roots = [c for c in sv.all_classes().values() if c.tree_root]
217
+ root = roots[0] if roots else None
218
+ for k, v in obj.items():
219
+ logger.info(f"Storing collection {k}")
220
+ if root:
221
+ slot = sv.induced_slot(k, root.name)
222
+ if not slot:
223
+ raise ValueError(f"Cannot determine type for {k}")
224
+ else:
225
+ slot = None
226
+ if isinstance(v, dict):
227
+ logger.debug(f"Coercing dict to list: {v}")
228
+ v = [v]
229
+ if not isinstance(v, list):
230
+ continue
231
+ if not v:
232
+ continue
233
+ if slot:
234
+ logger.debug(f"Aligning to existing slot: {slot.name} range={slot.range}")
235
+ collection = self.get_collection(slot.name, type=slot.range, create_if_not_exists=True)
236
+ else:
237
+ collection = self.get_collection(k, create_if_not_exists=True)
238
+ logger.debug(f"Replacing using {collection.alias} {collection.target_class_name}")
239
+ collection.replace(v)
240
+
241
+ def commit(self, **kwargs):
242
+ """
243
+ Commit pending changes to the database.
244
+
245
+ :param kwargs:
246
+ :return:
247
+ """
248
+ for coll in self.list_collections():
249
+ coll.commit()
250
+
251
+ def close(self, **kwargs):
252
+ """
253
+ Close the database.
254
+
255
+ :param kwargs:
256
+ :return:
257
+ """
258
+ raise NotImplementedError()
259
+
260
+ @property
261
+ def _collection_class(self) -> Type[Collection]:
262
+ raise NotImplementedError()
263
+
264
+ def create_collection(
265
+ self,
266
+ name: str,
267
+ alias: Optional[str] = None,
268
+ metadata: Optional[CollectionConfig] = None,
269
+ recreate_if_exists=False,
270
+ **kwargs,
271
+ ) -> Collection:
272
+ """
273
+ Create a new collection in the current database.
274
+
275
+ The collection must have a *Type*, and may have an *Alias*.
276
+
277
+ Examples:
278
+
279
+ >>> from linkml_store.api.client import Client
280
+ >>> client = Client()
281
+ >>> db = client.attach_database("duckdb", alias="test")
282
+ >>> collection = db.create_collection("Person", alias="persons")
283
+ >>> collection.alias
284
+ 'persons'
285
+
286
+ >>> collection.target_class_name
287
+ 'Person'
288
+
289
+ If alias is not provided, it defaults to the name of the type.
290
+
291
+ >>> collection = db.create_collection("Organization")
292
+ >>> collection.alias
293
+ 'Organization'
294
+
295
+ :param name: name of the collection
296
+ :param alias: alias for the collection
297
+ :param metadata: metadata for the collection
298
+ :param recreate_if_exists: recreate the collection if it already exists
299
+ :param kwargs: additional arguments
300
+ """
301
+ if not name:
302
+ raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
303
+ collection_cls = self.collection_class
304
+ collection = collection_cls(name=name, parent=self, metadata=metadata)
305
+ if alias:
306
+ collection.metadata.alias = alias
307
+ if metadata and metadata.source:
308
+ collection.load_from_source()
309
+ if metadata and metadata.attributes:
310
+ sv = self.schema_view
311
+ schema = sv.schema
312
+ cd = ClassDefinition(name=metadata.type, attributes=metadata.attributes)
313
+ schema.classes[cd.name] = cd
314
+ if not self._collections:
315
+ self._collections = {}
316
+ if not alias:
317
+ alias = name
318
+ self._collections[alias] = collection
319
+ if recreate_if_exists:
320
+ logger.debug(f"Recreating collection {collection.alias}")
321
+ collection.delete_where({}, missing_ok=True)
322
+ return collection
323
+
324
+ def list_collections(self, include_internal=False) -> Sequence[Collection]:
325
+ """
326
+ List all collections.
327
+
328
+ Examples
329
+ --------
330
+ >>> from linkml_store.api.client import Client
331
+ >>> client = Client()
332
+ >>> db = client.attach_database("duckdb", alias="test")
333
+ >>> c1 = db.create_collection("Person")
334
+ >>> c2 = db.create_collection("Product")
335
+ >>> collections = db.list_collections()
336
+ >>> len(collections)
337
+ 2
338
+ >>> [c.target_class_name for c in collections]
339
+ ['Person', 'Product']
340
+
341
+ :param include_internal: include internal collections
342
+ :return: list of collections
343
+ """
344
+ if not self._collections:
345
+ self.init_collections()
346
+ return [c for c in self._collections.values() if include_internal or not c.is_internal]
347
+
348
+ def list_collection_names(self, **kwargs) -> Sequence[str]:
349
+ """
350
+ List all collection names.
351
+
352
+ Examples
353
+ --------
354
+ >>> from linkml_store.api.client import Client
355
+ >>> client = Client()
356
+ >>> db = client.attach_database("duckdb", alias="test")
357
+ >>> c1 = db.create_collection("Person")
358
+ >>> c2 = db.create_collection("Product")
359
+ >>> collection_names = db.list_collection_names()
360
+ >>> len(collection_names)
361
+ 2
362
+ >>> collection_names
363
+ ['Person', 'Product']
364
+
365
+ """
366
+ return [c.alias for c in self.list_collections(**kwargs)]
367
+
368
+ def get_collection(
369
+ self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
370
+ ) -> "Collection":
371
+ """
372
+ Get a named collection.
373
+
374
+ Examples
375
+ --------
376
+ >>> from linkml_store.api.client import Client
377
+ >>> client = Client()
378
+ >>> db = client.attach_database("duckdb", alias="test")
379
+ >>> collection = db.create_collection("Person")
380
+ >>> db.get_collection("Person") == collection
381
+ True
382
+ >>> db.get_collection("NonExistent", create_if_not_exists=False)
383
+ Traceback (most recent call last):
384
+ ...
385
+ KeyError: 'Collection NonExistent does not exist'
386
+
387
+ :param name: name of the collection
388
+ :param type: target class name
389
+ :param create_if_not_exists: create the collection if it does not exist
390
+
391
+ """
392
+ if not self._collections:
393
+ logger.debug("Initializing collections")
394
+ self.init_collections()
395
+ if name not in self._collections.keys():
396
+ if create_if_not_exists:
397
+ if type is None:
398
+ type = name
399
+ logger.debug(f"Creating new collection: {name} kwargs: {kwargs}")
400
+ self._collections[name] = self.create_collection(type, alias=name, **kwargs)
401
+ else:
402
+ raise KeyError(f"Collection {name} does not exist")
403
+ return self._collections[name]
404
+
405
+ def init_collections(self):
406
+ """
407
+ Initialize collections.
408
+
409
+ TODO: Not typically called directly: consider making this private
410
+ :return:
411
+ """
412
+ raise NotImplementedError
413
+
414
+ def query(self, query: Query, **kwargs) -> QueryResult:
415
+ """
416
+ Run a query against the database.
417
+
418
+ Examples
419
+ --------
420
+ >>> from linkml_store.api.client import Client
421
+ >>> from linkml_store.api.queries import Query
422
+ >>> client = Client()
423
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
424
+ >>> collection = db.create_collection("Person")
425
+ >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
426
+ >>> query = Query(from_table="Person", where_clause={"name": "John"})
427
+ >>> result = db.query(query)
428
+ >>> len(result.rows)
429
+ 1
430
+ >>> result.rows[0]["id"]
431
+ 'P1'
432
+
433
+ :param query:
434
+ :param kwargs:
435
+ :return:
436
+
437
+ """
438
+ if query.from_table:
439
+ collection = self.get_collection(query.from_table)
440
+ return collection.query(query, **kwargs)
441
+ else:
442
+ raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
443
+
444
+ @property
445
+ def supports_sql(self) -> bool:
446
+ """
447
+ Return whether this database supports raw SQL queries.
448
+
449
+ Backends like DuckDB, PostgreSQL, Dremio support SQL.
450
+ Backends like MongoDB, filesystem do not.
451
+
452
+ :return: True if raw SQL is supported
453
+ """
454
+ return False
455
+
456
+ def execute_sql(self, sql: str, **kwargs) -> QueryResult:
457
+ """
458
+ Execute a raw SQL query against the database.
459
+
460
+ This method allows direct SQL execution on SQL-capable backends,
461
+ bypassing the linkml-store query abstraction layer.
462
+
463
+ :param sql: SQL query string
464
+ :param kwargs: Additional arguments
465
+ :return: QueryResult containing the results
466
+ :raises NotImplementedError: If this backend does not support SQL
467
+
468
+ Examples
469
+ --------
470
+ >>> from linkml_store.api.client import Client
471
+ >>> client = Client()
472
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
473
+ >>> collection = db.create_collection("Person")
474
+ >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
475
+ >>> result = db.execute_sql("SELECT * FROM Person WHERE name = 'John'")
476
+ >>> len(result.rows)
477
+ 1
478
+ >>> result.rows[0]["name"]
479
+ 'John'
480
+ """
481
+ raise NotImplementedError(
482
+ f"Raw SQL queries are not supported by {self.__class__.__name__}. "
483
+ f"Use collection.find() or collection.query() instead."
484
+ )
485
+
486
+ @property
487
+ def schema_view(self) -> SchemaView:
488
+ """
489
+ Return a schema view for the named collection.
490
+
491
+ If no explicit schema is provided, this will generalize one
492
+
493
+ Induced schema example:
494
+
495
+ >>> from linkml_store.api.client import Client
496
+ >>> client = Client()
497
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
498
+ >>> collection = db.create_collection("Person", alias="persons")
499
+ >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
500
+ >>> schema_view = db.schema_view
501
+ >>> cd = schema_view.get_class("Person")
502
+ >>> cd.attributes["id"].range
503
+ 'string'
504
+ >>> cd.attributes["age_in_years"].range
505
+ 'integer'
506
+
507
+ We can reuse the same class:
508
+
509
+ >>> collection2 = db.create_collection("Person", alias="other_persons")
510
+ >>> collection2.class_definition().attributes["age_in_years"].range
511
+ 'integer'
512
+ """
513
+ if not self._schema_view:
514
+ self._initialize_schema()
515
+ if not self._schema_view:
516
+ logger.info("Inducing schema view")
517
+ self._schema_view = self.induce_schema_view()
518
+ return self._schema_view
519
+
520
+ def set_schema_view(self, schema_view: Union[str, Path, SchemaView]):
521
+ """
522
+ Set the schema view for the database.
523
+
524
+ >>> from linkml_store.api.client import Client
525
+ >>> client = Client()
526
+ >>> db = client.attach_database("duckdb", alias="test")
527
+ >>> sv = SchemaView("tests/input/countries/countries.linkml.yaml")
528
+ >>> db.set_schema_view(sv)
529
+ >>> cd = db.schema_view.schema.classes["Country"]
530
+ >>> sorted(cd.slots)
531
+ ['capital', 'code', 'continent', 'languages', 'name']
532
+ >>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
533
+ >>> sorted(induced_slots.keys())
534
+ ['capital', 'code', 'continent', 'languages', 'name']
535
+ >>> induced_slots["code"].identifier
536
+ True
537
+
538
+ Creating a new collection will align with the schema view:
539
+
540
+ >>> collection = db.create_collection("Country", "all_countries")
541
+ >>> sorted(collection.class_definition().slots)
542
+ ['capital', 'code', 'continent', 'languages', 'name']
543
+
544
+ :param schema_view: can be either a path to the schema, or a SchemaView object
545
+ :return:
546
+ """
547
+ if isinstance(schema_view, Path):
548
+ schema_view = str(schema_view)
549
+ if isinstance(schema_view, str):
550
+ schema_view = SchemaView(schema_view)
551
+ self._schema_view = schema_view
552
+ logger.info(f"Setting schema view for {self.handle}")
553
+ # self._schema_view = SchemaView(schema_view.materialize_derived_schema())
554
+ if not self._collections:
555
+ return
556
+
557
+ # align with induced schema
558
+ roots = [c for c in schema_view.all_classes().values() if c.tree_root]
559
+ if len(roots) == 0:
560
+ all_ranges = set()
561
+ for cn in schema_view.all_classes():
562
+ for slot in schema_view.class_induced_slots(cn):
563
+ if slot.range:
564
+ all_ranges.add(slot.range)
565
+ roots = [
566
+ c
567
+ for c in schema_view.all_classes().values()
568
+ if not all_ranges.intersection(schema_view.class_ancestors(c.name, reflexive=True))
569
+ ]
570
+ if len(roots) == 1:
571
+ root = roots[0]
572
+ for slot in schema_view.class_induced_slots(root.name):
573
+ inlined = slot.inlined or slot.inlined_as_list
574
+ if inlined and slot.range:
575
+ if slot.name in self._collections:
576
+ coll = self._collections[slot.name]
577
+ coll.metadata.type = slot.range
578
+
579
+ def load_schema_view(self, path: Union[str, Path]):
580
+ """
581
+ Load a schema view from a file.
582
+
583
+ >>> from linkml_store.api.client import Client
584
+ >>> client = Client()
585
+ >>> db = client.attach_database("duckdb", alias="test")
586
+ >>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
587
+ >>> sv = db.schema_view
588
+ >>> cd = sv.schema.classes["Country"]
589
+ >>> sorted(cd.slots)
590
+ ['capital', 'code', 'continent', 'languages', 'name']
591
+ >>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
592
+ >>> sorted(induced_slots.keys())
593
+ ['capital', 'code', 'continent', 'languages', 'name']
594
+ >>> induced_slots["code"].identifier
595
+ True
596
+
597
+ Creating a new collection will align with the schema view:
598
+
599
+ >>> collection = db.create_collection("Country", "all_countries")
600
+ >>> sorted(collection.class_definition().slots)
601
+ ['capital', 'code', 'continent', 'languages', 'name']
602
+
603
+ :param path:
604
+ :return:
605
+ """
606
+ if isinstance(path, Path):
607
+ path = str(path)
608
+ self.set_schema_view(SchemaView(path))
609
+
610
+ def induce_schema_view(self) -> SchemaView:
611
+ """
612
+ Induce a schema view from a schema definition.
613
+
614
+ >>> from linkml_store.api.client import Client
615
+ >>> from linkml_store.api.queries import Query
616
+ >>> client = Client()
617
+ >>> db = client.attach_database("duckdb", alias="test")
618
+ >>> collection = db.create_collection("Person")
619
+ >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25},
620
+ ... {"id": "P2", "name": "Alice", "age_in_years": 25}])
621
+ >>> schema_view = db.induce_schema_view()
622
+ >>> cd = schema_view.get_class("Person")
623
+ >>> cd.attributes["id"].range
624
+ 'string'
625
+ >>> cd.attributes["age_in_years"].range
626
+ 'integer'
627
+
628
+ :return: A schema view
629
+ """
630
+ logger.info(f"Inducing schema view for {self.handle}")
631
+ from linkml_runtime.utils.schema_builder import SchemaBuilder
632
+
633
+ sb = SchemaBuilder()
634
+
635
+ for collection_name in self.list_collection_names():
636
+ coll = self.get_collection(collection_name)
637
+ sb.add_class(coll.target_class_name)
638
+ return SchemaView(sb.schema)
639
+
640
+ def validate_database(self, **kwargs) -> List["ValidationResult"]:
641
+ """
642
+ Validate the contents of the database.
643
+
644
+ As `iter_validate_database`, but returns a list of validation results.
645
+
646
+ :param kwargs:
647
+ :return:
648
+ """
649
+ return list(self.iter_validate_database(**kwargs))
650
+
651
+ def iter_validate_database(
652
+ self, ensure_referential_integrity: bool = None, **kwargs
653
+ ) -> Iterator["ValidationResult"]:
654
+ """
655
+ Validate the contents of the database.
656
+
657
+ An example, let's create a database with a predefined schema
658
+ from the countries.linkml.yaml file:
659
+
660
+ >>> from linkml_store.api.client import Client
661
+ >>> client = Client()
662
+ >>> db = client.attach_database("duckdb", alias="test")
663
+ >>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
664
+
665
+ Let's introspect the schema to see what slots are applicable for the class "Country":
666
+
667
+ >>> sv = db.schema_view
668
+ >>> for slot in sv.class_induced_slots("Country"):
669
+ ... print(slot.name, slot.range, slot.required)
670
+ name string True
671
+ code string True
672
+ capital string True
673
+ continent string True
674
+ languages Language None
675
+
676
+ Next we'll create a collection, binding it to the target class "Country", and insert
677
+ valid data:
678
+
679
+ >>> collection = db.create_collection("Country", "all_countries")
680
+ >>> obj = {"code": "US", "name": "United States", "continent": "North America", "capital": "Washington, D.C."}
681
+ >>> collection.insert([obj])
682
+ >>> list(db.iter_validate_database())
683
+ []
684
+
685
+ Now let's insert some invalid data (missing required fields)
686
+
687
+ >>> collection.insert([{"code": "FR", "name": "France"}])
688
+ >>> for r in db.iter_validate_database():
689
+ ... print(r.message[0:32])
690
+ 'capital' is a required property
691
+ 'continent' is a required proper
692
+
693
+ :param ensure_referential_integrity: ensure referential integrity
694
+ :param kwargs:
695
+ :return: iterator over validation results
696
+ """
697
+ for collection in self.list_collections():
698
+ yield from collection.iter_validate_collection(**kwargs)
699
+ if self.metadata.ensure_referential_integrity or ensure_referential_integrity:
700
+ logger.info(f"Validating referential integrity on {self.alias}")
701
+ yield from self._validate_referential_integrity(**kwargs)
702
+
703
+ def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
704
+ """
705
+ Validate referential integrity of the database.
706
+
707
+ :param kwargs:
708
+ :return: iterator over validation results
709
+ """
710
+ sv = self.schema_view
711
+ cmap = defaultdict(list)
712
+ for collection in self.list_collections():
713
+ if not collection.target_class_name:
714
+ raise ValueError(f"Collection {collection.name} has no target class")
715
+ cmap[collection.target_class_name].append(collection)
716
+ for collection in self.list_collections():
717
+ cd = collection.class_definition()
718
+ induced_slots = sv.class_induced_slots(cd.name)
719
+ slot_map = {s.name: s for s in induced_slots}
720
+ # rmap = {s.name: s.range for s in induced_slots}
721
+ # map slot ranges to a collection where that range is stored
722
+ sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
723
+ logger.debug(f"Validating referential integrity for {collection.target_class_name} // {sr_to_coll}")
724
+ for obj in collection.find_iter():
725
+ for k, v in obj.items():
726
+ if k not in sr_to_coll:
727
+ continue
728
+ ref_colls = sr_to_coll[k]
729
+ if not ref_colls:
730
+ continue
731
+ if not isinstance(v, (str, int)):
732
+ continue
733
+ slot = slot_map[k]
734
+ found = False
735
+ for ref_coll in ref_colls:
736
+ ref_obj = ref_coll.get_one(v)
737
+ if ref_obj:
738
+ found = True
739
+ break
740
+ if not found:
741
+ yield ValidationResult(
742
+ type="ReferentialIntegrity",
743
+ severity=Severity.ERROR,
744
+ message=f"Referential integrity error: {slot.range} not found",
745
+ instantiates=slot.range,
746
+ instance=v,
747
+ )
748
+
749
+ def drop(self, **kwargs):
750
+ """
751
+ Drop the database and all collections.
752
+
753
+ >>> from linkml_store.api.client import Client
754
+ >>> client = Client()
755
+ >>> path = Path("/tmp/test.db")
756
+ >>> path.parent.mkdir(exist_ok=True, parents=True)
757
+ >>> db = client.attach_database(f"duckdb:///{path}")
758
+ >>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
759
+ >>> coll = db.get_collection("persons")
760
+ >>> coll.find({}).num_rows
761
+ 1
762
+ >>> db.drop()
763
+ >>> db = client.attach_database("duckdb:///tmp/test.db", alias="test")
764
+ >>> coll = db.get_collection("persons")
765
+ >>> coll.find({}).num_rows
766
+ 0
767
+
768
+ :param kwargs: additional arguments
769
+ """
770
+ raise NotImplementedError()
771
+
772
+ def import_database(
773
+ self,
774
+ location: str,
775
+ source_format: Optional[Union[str, Format]] = None,
776
+ collection_name: Optional[str] = None,
777
+ **kwargs,
778
+ ):
779
+ """
780
+ Import a database from a file or location.
781
+
782
+ >>> from linkml_store.api.client import Client
783
+ >>> client = Client()
784
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
785
+ >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
786
+ >>> db.list_collection_names()
787
+ ['iris']
788
+ >>> collection = db.get_collection("iris")
789
+ >>> collection.find({}).num_rows
790
+ 150
791
+
792
+ :param location: location of the file
793
+ :param source_format: source format
794
+ :param collection_name: (Optional) name of the collection, for data that is flat
795
+ :param kwargs: additional arguments
796
+ """
797
+ if isinstance(source_format, str):
798
+ source_format = Format(source_format)
799
+ if isinstance(source_format, Format):
800
+ if source_format.is_dump_format() and source_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
801
+ # import into a test instance
802
+ tmp_handle = source_format.value
803
+ client = self.parent
804
+ tmp_alias = "tmp"
805
+ client.drop_database(tmp_alias, missing_ok=True)
806
+ tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
807
+ # TODO: check for infinite recursion
808
+ tmp_db.import_database(location, source_format=source_format)
809
+ obj = {}
810
+ for coll in tmp_db.list_collections():
811
+ qr = coll.find({}, limit=-1)
812
+ obj[coll.alias] = qr.rows
813
+ self.store(obj)
814
+ return
815
+ objects = load_objects(location, format=source_format)
816
+ if collection_name:
817
+ collection = self.get_collection(collection_name, create_if_not_exists=True)
818
+ collection.insert(objects)
819
+ else:
820
+ for obj in objects:
821
+ self.store(obj)
822
+
823
+ def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
824
+ """
825
+ Export a database to a file or location.
826
+
827
+ >>> from linkml_store.api.client import Client
828
+ >>> client = Client()
829
+ >>> db = client.attach_database("duckdb", alias="test")
830
+ >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
831
+ >>> db.export_database("/tmp/iris.yaml", Format.YAML)
832
+
833
+ :param location: location of the file
834
+ :param target_format: target format
835
+ :param kwargs: additional arguments
836
+ """
837
+ obj = {}
838
+ if isinstance(target_format, str):
839
+ target_format = Format(target_format)
840
+ for coll in self.list_collections():
841
+ qr = coll.find({}, limit=-1)
842
+ obj[coll.alias] = qr.rows
843
+ logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
844
+ if isinstance(target_format, Format):
845
+ if target_format.is_dump_format() and target_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
846
+ tmp_handle = target_format.value
847
+ client = self.parent
848
+ tmp_db = client.attach_database(tmp_handle, alias="tmp")
849
+ tmp_db.store(obj)
850
+ # TODO: check for infinite recursion
851
+ tmp_db.export_database(location, target_format=target_format)
852
+ return
853
+ if Path(location).is_dir():
854
+ raise ValueError(f"{location} is a directory; cannot write {target_format} to a dir")
855
+ with open(location, "w", encoding="utf-8") as stream:
856
+ stream.write(render_output(obj, format=target_format))
857
+
858
+ def broadcast(self, source: Collection, patches: List[PatchDict]):
859
+ if not self.listeners:
860
+ return
861
+ for listener in self.listeners:
862
+ listener(source, patches)