linkml-store 0.0.0__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (35) hide show
  1. linkml_store/api/__init__.py +2 -2
  2. linkml_store/api/client.py +108 -7
  3. linkml_store/api/collection.py +221 -30
  4. linkml_store/api/config.py +97 -0
  5. linkml_store/api/database.py +207 -17
  6. linkml_store/api/queries.py +12 -1
  7. linkml_store/api/stores/chromadb/__init__.py +0 -0
  8. linkml_store/api/stores/chromadb/chromadb_collection.py +114 -0
  9. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  10. linkml_store/api/stores/duckdb/duckdb_collection.py +47 -14
  11. linkml_store/api/stores/duckdb/duckdb_database.py +35 -44
  12. linkml_store/api/stores/hdf5/__init__.py +0 -0
  13. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  14. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  15. linkml_store/api/stores/mongodb/mongodb_collection.py +86 -40
  16. linkml_store/api/stores/mongodb/mongodb_database.py +58 -67
  17. linkml_store/api/stores/solr/solr_collection.py +132 -0
  18. linkml_store/api/stores/solr/solr_database.py +82 -0
  19. linkml_store/api/stores/solr/solr_utils.py +0 -0
  20. linkml_store/cli.py +369 -0
  21. linkml_store/index/__init__.py +33 -0
  22. linkml_store/index/implementations/{llm_index.py → llm_indexer.py} +2 -2
  23. linkml_store/index/implementations/{simple_index.py → simple_indexer.py} +6 -3
  24. linkml_store/index/{index.py → indexer.py} +7 -4
  25. linkml_store/utils/format_utils.py +93 -0
  26. linkml_store/utils/object_utils.py +73 -0
  27. linkml_store/utils/sql_utils.py +46 -7
  28. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/METADATA +17 -6
  29. linkml_store-0.1.6.dist-info/RECORD +41 -0
  30. linkml_store-0.1.6.dist-info/entry_points.txt +3 -0
  31. linkml_store/api/metadata.py +0 -5
  32. linkml_store-0.0.0.dist-info/RECORD +0 -29
  33. linkml_store-0.0.0.dist-info/entry_points.txt +0 -3
  34. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/LICENSE +0 -0
  35. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/WHEEL +0 -0
@@ -1,8 +1,8 @@
1
1
  # flake8: noqa: E402
2
2
  from linkml_store.api.collection import Collection
3
3
  from linkml_store.api.database import Database
4
- from linkml_store.api.metadata import MetaData
5
4
  from linkml_store.api.client import Client
5
+
6
6
  # flake8: noqa
7
7
 
8
- __all__ = ["Client", "Database", "MetaData", "Collection"]
8
+ __all__ = ["Client", "Database", "Collection"]
@@ -1,17 +1,24 @@
1
- from dataclasses import dataclass
2
- from typing import Dict, Optional
1
+ from pathlib import Path
2
+ from typing import Dict, Optional, Union
3
3
 
4
+ import yaml
4
5
  from linkml_runtime import SchemaView
5
6
 
6
7
  from linkml_store.api import Database
8
+ from linkml_store.api.config import ClientConfig
9
+ from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
7
10
  from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
11
+ from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
12
+ from linkml_store.api.stores.solr.solr_database import SolrDatabase
8
13
 
9
14
  HANDLE_MAP = {
10
15
  "duckdb": DuckDBDatabase,
16
+ "solr": SolrDatabase,
17
+ "mongodb": MongoDBDatabase,
18
+ "chromadb": ChromaDBDatabase,
11
19
  }
12
20
 
13
21
 
14
- @dataclass
15
22
  class Client:
16
23
  """
17
24
  A client provides access to named collections.
@@ -22,7 +29,7 @@ class Client:
22
29
  >>> db = client.attach_database("duckdb", alias="test")
23
30
  >>> collection = db.create_collection("Person")
24
31
  >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
25
- >>> collection.add(objs)
32
+ >>> collection.insert(objs)
26
33
  >>> qr = collection.find()
27
34
  >>> len(qr.rows)
28
35
  2
@@ -38,9 +45,76 @@ class Client:
38
45
 
39
46
  """
40
47
 
41
- handle: Optional[str] = None
48
+ metadata: Optional[ClientConfig] = None
42
49
  _databases: Optional[Dict[str, Database]] = None
43
50
 
51
+ def __init__(self, handle: Optional[str] = None, metadata: Optional[ClientConfig] = None):
52
+ """
53
+ Initialize a client.
54
+
55
+ :param handle:
56
+ :param metadata:
57
+ """
58
+ self.metadata = metadata
59
+ if not self.metadata:
60
+ self.metadata = ClientConfig()
61
+ self.metadata.handle = handle
62
+
63
+ @property
64
+ def handle(self) -> Optional[str]:
65
+ return self.metadata.handle
66
+
67
+ @property
68
+ def base_dir(self) -> Optional[str]:
69
+ """
70
+ Get the base directory for the client.
71
+
72
+ Wraps metadata.base_dir.
73
+
74
+ :return:
75
+ """
76
+ return self.metadata.base_dir
77
+
78
+ def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
79
+ """
80
+ Create a client from a configuration.
81
+
82
+ Examples
83
+ --------
84
+ >>> from linkml_store.api.config import ClientConfig
85
+ >>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}))
86
+ >>> len(client.databases)
87
+ 1
88
+ >>> "test" in client.databases
89
+ True
90
+ >>> client.databases["test"].handle
91
+ 'duckdb:///:memory:'
92
+
93
+ :param config:
94
+ :param kwargs:
95
+ :return:
96
+
97
+ """
98
+ if isinstance(config, Path):
99
+ config = str(config)
100
+ if isinstance(config, str):
101
+ if not base_dir:
102
+ base_dir = Path(config).parent
103
+ parsed_obj = yaml.safe_load(open(config))
104
+ config = ClientConfig(**parsed_obj)
105
+ self.metadata = config
106
+ if base_dir:
107
+ self.metadata.base_dir = base_dir
108
+ self._initialize_databases(**kwargs)
109
+ return self
110
+
111
+ def _initialize_databases(self, **kwargs):
112
+ for name, db_config in self.metadata.databases.items():
113
+ handle = db_config.handle.format(base_dir=self.base_dir)
114
+ db_config.handle = handle
115
+ db = self.attach_database(handle, alias=name, **kwargs)
116
+ db.from_config(db_config)
117
+
44
118
  def attach_database(
45
119
  self,
46
120
  handle: str,
@@ -69,7 +143,6 @@ class Client:
69
143
  :param schema_view: schema view to associate with the database
70
144
  :param kwargs:
71
145
  :return:
72
-
73
146
  """
74
147
  if ":" not in handle:
75
148
  scheme = handle
@@ -87,6 +160,7 @@ class Client:
87
160
  if not self._databases:
88
161
  self._databases = {}
89
162
  self._databases[alias] = db
163
+ db.parent = self
90
164
  return db
91
165
 
92
166
  def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
@@ -101,7 +175,7 @@ class Client:
101
175
  >>> db == retrieved_db
102
176
  True
103
177
 
104
- :param name:
178
+ :param name: if None, there must be a single database attached
105
179
  :param create_if_not_exists:
106
180
  :param kwargs:
107
181
  :return:
@@ -149,3 +223,30 @@ class Client:
149
223
  if not self._databases:
150
224
  self._databases = {}
151
225
  return self._databases
226
+
227
+ def drop_database(self, name: str, missing_ok=False, **kwargs):
228
+ """
229
+ Drop a database.
230
+
231
+ :param name:
232
+ :param missing_ok:
233
+ :return:
234
+ """
235
+ if name in self._databases:
236
+ db = self._databases[name]
237
+ db.drop(**kwargs)
238
+ del self._databases[name]
239
+ else:
240
+ if not missing_ok:
241
+ raise ValueError(f"Database {name} not found")
242
+
243
+ def drop_all_databases(self, **kwargs):
244
+ """
245
+ Drop all databases.
246
+
247
+ :param missing_ok:
248
+ :return:
249
+ """
250
+ for name in list(self._databases.keys()):
251
+ self.drop_database(name, missing_ok=False, **kwargs)
252
+ self._databases = {}
@@ -1,16 +1,24 @@
1
+ import hashlib
1
2
  import logging
2
3
  from collections import defaultdict
3
- from dataclasses import dataclass
4
4
  from pathlib import Path
5
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, TextIO, Type, Union
5
+ from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
6
6
 
7
7
  import numpy as np
8
8
  from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
9
9
  from linkml_runtime.linkml_model.meta import ArrayExpression
10
10
  from pydantic import BaseModel
11
11
 
12
+ from linkml_store.index import get_indexer
13
+
14
+ try:
15
+ from linkml.validator.report import ValidationResult
16
+ except ImportError:
17
+ ValidationResult = None
18
+
19
+ from linkml_store.api.config import CollectionConfig
12
20
  from linkml_store.api.queries import Query, QueryResult
13
- from linkml_store.index.index import Index
21
+ from linkml_store.index.indexer import Indexer
14
22
 
15
23
  if TYPE_CHECKING:
16
24
  from linkml_store.api.database import Database
@@ -19,11 +27,11 @@ logger = logging.getLogger(__name__)
19
27
 
20
28
  OBJECT = Union[Dict[str, Any], BaseModel, Type]
21
29
 
30
+ DEFAULT_FACET_LIMIT = 100
22
31
  IDENTIFIER = str
23
32
  FIELD_NAME = str
24
33
 
25
34
 
26
- @dataclass
27
35
  class Collection:
28
36
  """
29
37
  A collection is an organized set of objects of the same or similar type.
@@ -33,12 +41,80 @@ class Collection:
33
41
  - For a file system, a collection could be a single tabular file such as Parquet or CSV
34
42
  """
35
43
 
36
- name: str
44
+ # name: str
37
45
  parent: Optional["Database"] = None
38
- _indexes: Optional[Dict[str, Index]] = None
39
- hidden: Optional[bool] = False
46
+ _indexers: Optional[Dict[str, Indexer]] = None
47
+ # hidden: Optional[bool] = False
48
+
49
+ metadata: Optional[CollectionConfig] = None
50
+
51
+ def __init__(
52
+ self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
53
+ ):
54
+ self.parent = parent
55
+ if metadata:
56
+ self.metadata = metadata
57
+ else:
58
+ self.metadata = CollectionConfig(name=name, **kwargs)
59
+ if name is not None and self.metadata.name is not None and name != self.metadata.name:
60
+ raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
61
+
62
+ @property
63
+ def name(self) -> str:
64
+ return self.metadata.name
65
+
66
+ @property
67
+ def hidden(self) -> bool:
68
+ return self.metadata.hidden
69
+
70
+ @property
71
+ def _target_class_name(self):
72
+ """
73
+ Return the name of the class that this collection represents
74
+
75
+ This MUST be a LinkML class name
76
+
77
+ :return:
78
+ """
79
+ # TODO: this is a shim layer until we can normalize on this
80
+ if self.metadata.type:
81
+ return self.metadata.type
82
+ return self.name
40
83
 
41
- def add(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
84
+ @property
85
+ def _alias(self):
86
+ """
87
+ Return the primary name/alias used for the collection.
88
+
89
+ This MAY be the name of the LinkML class, but it may be desirable
90
+ to have an alias, for example "persons" which collects all instances
91
+ of class Person.
92
+
93
+ The _alias SHOULD be used for Table names in SQL.
94
+
95
+ For nested data, the alias SHOULD be used as the key; e.g
96
+
97
+ ``{ "persons": [ { "name": "Alice" }, { "name": "Bob" } ] }``
98
+
99
+ :return:
100
+ """
101
+ # TODO: this is a shim layer until we can normalize on this
102
+ if self.metadata.alias:
103
+ return self.metadata.alias
104
+ return self.name
105
+
106
+ def replace(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
107
+ """
108
+ Replace entire collection with objects.
109
+
110
+ :param objs:
111
+ :param kwargs:
112
+ :return:
113
+ """
114
+ self.delete_where({})
115
+ self.insert(objs, **kwargs)
116
+
117
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
42
118
  """
43
119
  Add one or more objects to the collection
44
120
 
@@ -58,13 +134,14 @@ class Collection:
58
134
  """
59
135
  raise NotImplementedError
60
136
 
61
- def delete_where(self, where: Optional[Dict[str, Any]] = None, **kwargs) -> int:
137
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
62
138
  """
63
139
  Delete objects that match a query
64
140
 
65
- :param where:
141
+ :param where: where conditions
142
+ :param missing_ok: if True, do not raise an error if the collection does not exist
66
143
  :param kwargs:
67
- :return:
144
+ :return: number of objects deleted (or -1 if unsupported)
68
145
  """
69
146
  raise NotImplementedError
70
147
 
@@ -79,7 +156,7 @@ class Collection:
79
156
  raise NotImplementedError
80
157
 
81
158
  def _create_query(self, **kwargs) -> Query:
82
- return Query(from_table=self.name, **kwargs)
159
+ return Query(from_table=self._alias, **kwargs)
83
160
 
84
161
  def query(self, query: Query, **kwargs) -> QueryResult:
85
162
  """
@@ -91,7 +168,9 @@ class Collection:
91
168
  """
92
169
  return self.parent.query(query, **kwargs)
93
170
 
94
- def query_facets(self, where: Optional[Dict] = None, facet_columns: List[str] = None) -> Dict[str, Dict[str, int]]:
171
+ def query_facets(
172
+ self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
173
+ ) -> Dict[str, Dict[str, int]]:
95
174
  """
96
175
  Run a query to get facet counts for one or more columns.
97
176
 
@@ -108,17 +187,32 @@ class Collection:
108
187
  :param con: A DuckDB database connection.
109
188
  :param query: A Query object representing the base query.
110
189
  :param facet_columns: A list of column names to get facet counts for.
190
+ :param facet_limit:
111
191
  :return: A dictionary where keys are column names and values are pandas DataFrames
112
192
  containing the facet counts for each unique value in the respective column.
113
193
  """
114
194
  raise NotImplementedError
115
195
 
116
196
  def get(self, ids: Optional[IDENTIFIER], **kwargs) -> QueryResult:
197
+ """
198
+ Get one or more objects by ID.
199
+
200
+ :param ids:
201
+ :param kwargs:
202
+ :return:
203
+ """
117
204
  id_field = self.identifier_field
118
205
  q = self._create_query(where_clause={id_field: ids})
119
206
  return self.query(q, **kwargs)
120
207
 
121
208
  def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
209
+ """
210
+ Find objects in the collection using a where query.
211
+
212
+ :param where:
213
+ :param kwargs:
214
+ :return:
215
+ """
122
216
  query = self._create_query(where_clause=where)
123
217
  return self.query(query, **kwargs)
124
218
 
@@ -141,66 +235,122 @@ class Collection:
141
235
  :return:
142
236
  """
143
237
  if index_name is None:
144
- if len(self._indexes) == 1:
145
- index_name = list(self._indexes.keys())[0]
238
+ if len(self._indexers) == 1:
239
+ index_name = list(self._indexers.keys())[0]
146
240
  else:
147
241
  raise ValueError("Multiple indexes found. Please specify an index name.")
148
242
  ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
149
- ix = self._indexes.get(index_name)
243
+ ix = self._indexers.get(index_name)
150
244
  if not ix:
151
245
  raise ValueError(f"No index named {index_name}")
152
246
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
153
247
  index_col = ix.index_field
154
248
  vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
155
249
  results = ix.search(query, vector_pairs, limit=limit)
250
+ for r in results:
251
+ del r[1][index_col]
156
252
  new_qr = QueryResult(num_rows=len(results))
157
253
  new_qr.ranked_rows = results
158
254
  return new_qr
159
255
 
160
- def attach_index(self, index: Index, auto_index=True, **kwargs):
256
+ @property
257
+ def is_internal(self) -> bool:
258
+ """
259
+ Check if the collection is internal
260
+
261
+ :return:
262
+ """
263
+ if not self.name:
264
+ raise ValueError(f"Collection has no name: {self} // {self.metadata}")
265
+ return self.name.startswith("internal__")
266
+
267
+ def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = True, auto_index=True, **kwargs):
161
268
  """
162
269
  Attach an index to the collection.
163
270
 
164
271
  :param index:
165
- :param auto_index:
272
+ :param name:
273
+ :param auto_index: Automatically index all objects in the collection
166
274
  :param kwargs:
167
275
  :return:
168
276
  """
277
+ if isinstance(index, str):
278
+ index = get_indexer(index)
279
+ if name:
280
+ index.name = name
281
+ if not index.name:
282
+ index.name = type(index).__name__.lower()
169
283
  index_name = index.name
170
284
  if not index_name:
171
285
  raise ValueError("Index must have a name")
172
- if not self._indexes:
173
- self._indexes = {}
174
- self._indexes[index_name] = index
286
+ if not self._indexers:
287
+ self._indexers = {}
288
+ self._indexers[index_name] = index
175
289
  if auto_index:
176
290
  all_objs = self.find(limit=-1).rows
177
- self.index_objects(all_objs, index_name, **kwargs)
291
+ self.index_objects(all_objs, index_name, replace=True, **kwargs)
178
292
 
179
293
  def _index_collection_name(self, index_name: str) -> str:
180
- return f"index__{self.name}_{index_name}"
294
+ """
295
+ Create a name for a special collection that holds index data
296
+
297
+ :param index_name:
298
+ :return:
299
+ """
300
+ return f"internal__index__{self.name}__{index_name}"
181
301
 
182
- def index_objects(self, objs: List[OBJECT], index_name: str, **kwargs):
302
+ def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
183
303
  """
184
304
  Index a list of objects
185
305
 
186
306
  :param objs:
187
307
  :param index_name:
308
+ :param replace:
188
309
  :param kwargs:
189
310
  :return:
190
311
  """
191
- ix = self._indexes.get(index_name)
312
+ ix = self._indexers.get(index_name)
192
313
  if not ix:
193
314
  raise ValueError(f"No index named {index_name}")
194
- ix_coll = self.parent.get_collection(self._index_collection_name(index_name), create_if_not_exists=True)
315
+ ix_coll_name = self._index_collection_name(index_name)
316
+ ix_coll = self.parent.get_collection(ix_coll_name, create_if_not_exists=True)
195
317
  vectors = [list(float(e) for e in v) for v in ix.objects_to_vectors(objs)]
196
318
  objects_with_ix = []
197
319
  index_col = ix.index_field
198
320
  for obj, vector in zip(objs, vectors):
199
321
  # TODO: id field
200
322
  objects_with_ix.append({**obj, **{index_col: vector}})
201
- ix_coll.add(objects_with_ix, **kwargs)
323
+ if replace:
324
+ schema = self.parent.schema_view.schema
325
+ logger.info(f"Checking if {ix_coll_name} is in {schema.classes.keys()}")
326
+ if ix_coll_name in schema.classes:
327
+ ix_coll.delete_where()
328
+ ix_coll.insert(objects_with_ix, **kwargs)
329
+
330
+ def list_index_names(self) -> List[str]:
331
+ """
332
+ Return a list of index names
333
+
334
+ :return:
335
+ """
336
+ return list(self._indexers.keys())
337
+
338
+ @property
339
+ def indexers(self) -> Dict[str, Indexer]:
340
+ """
341
+ Return a list of indexers
342
+
343
+ :return:
344
+ """
345
+ return self._indexers if self._indexers else {}
202
346
 
203
347
  def peek(self, limit: Optional[int] = None) -> QueryResult:
348
+ """
349
+ Return the first N objects in the collection
350
+
351
+ :param limit:
352
+ :return:
353
+ """
204
354
  q = self._create_query()
205
355
  return self.query(q, limit=limit)
206
356
 
@@ -212,13 +362,16 @@ class Collection:
212
362
  """
213
363
  sv = self.parent.schema_view
214
364
  if sv:
215
- return sv.get_class(self.name)
365
+ cls = sv.get_class(self._target_class_name)
366
+ return cls
216
367
  return None
217
368
 
218
369
  def identifier_attribute_name(self) -> Optional[str]:
219
370
  """
220
371
  Return the name of the identifier attribute for the collection.
221
372
 
373
+ AKA the primary key.
374
+
222
375
  :return: The name of the identifier attribute, if one exists.
223
376
  """
224
377
  cd = self.class_definition()
@@ -228,6 +381,25 @@ class Collection:
228
381
  return att.name
229
382
  return None
230
383
 
384
+ def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
385
+ """
386
+ Return the identifier for an object.
387
+
388
+ :param obj:
389
+ :param auto: If True, generate an identifier if one does not exist.
390
+ :return:
391
+ """
392
+ pk = self.identifier_attribute_name
393
+ if pk in obj:
394
+ return obj[pk]
395
+ elif auto:
396
+ # TODO: use other unique keys if no primary key
397
+ as_str = str(obj)
398
+ md5 = hashlib.md5(as_str.encode()).hexdigest()
399
+ return md5
400
+ else:
401
+ return None
402
+
231
403
  def induce_class_definition_from_objects(self, objs: List[OBJECT], max_sample_size=10) -> ClassDefinition:
232
404
  """
233
405
  Induce a class definition from a list of objects.
@@ -239,7 +411,7 @@ class Collection:
239
411
  :param max_sample_size:
240
412
  :return:
241
413
  """
242
- cd = ClassDefinition(self.name)
414
+ cd = ClassDefinition(self._target_class_name)
243
415
  keys = defaultdict(list)
244
416
  for obj in objs[0:max_sample_size]:
245
417
  if isinstance(obj, BaseModel):
@@ -302,7 +474,7 @@ class Collection:
302
474
  array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
303
475
  cd.attributes[k].array = array_expr
304
476
  sv = self.parent.schema_view
305
- sv.schema.classes[self.name] = cd
477
+ sv.schema.classes[self._target_class_name] = cd
306
478
  sv.set_modified()
307
479
  return cd
308
480
 
@@ -325,3 +497,22 @@ class Collection:
325
497
  :return:
326
498
  """
327
499
  raise NotImplementedError
500
+
501
+ def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
502
+ """
503
+ Validate the contents of the collection
504
+
505
+ :param kwargs:
506
+ :return: iterator over validation results
507
+ """
508
+ from linkml.validator import JsonschemaValidationPlugin, Validator
509
+
510
+ validation_plugins = [JsonschemaValidationPlugin(closed=True)]
511
+ validator = Validator(self.parent.schema_view.schema, validation_plugins=validation_plugins)
512
+ cd = self.class_definition()
513
+ if not cd:
514
+ raise ValueError(f"Cannot find class definition for {self._target_class_name}")
515
+ class_name = cd.name
516
+ result = self.find(**kwargs)
517
+ for obj in result.rows:
518
+ yield from validator.iter_results(obj, class_name)
@@ -0,0 +1,97 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class CollectionConfig(BaseModel):
7
+ name: Optional[str] = Field(
8
+ default=None,
9
+ description="An optional name for the collection",
10
+ )
11
+ alias: Optional[str] = Field(
12
+ default=None,
13
+ description="An optional alias for the collection",
14
+ )
15
+ type: Optional[str] = Field(
16
+ default=None,
17
+ description="The type of object in the collection. TODO; use this instead of name",
18
+ )
19
+ metadata: Optional[Dict] = Field(
20
+ default=None,
21
+ description="Optional metadata for the collection",
22
+ )
23
+ attributes: Optional[Dict[str, Dict]] = Field(
24
+ default=None,
25
+ description="Optional attributes for the collection, following LinkML schema",
26
+ )
27
+ indexers: Optional[Dict[str, Dict]] = Field(
28
+ default=None,
29
+ description="Optional configuration for indexers",
30
+ )
31
+ hidden: Optional[bool] = Field(
32
+ default=False,
33
+ description="Whether the collection is hidden",
34
+ )
35
+ is_prepopulated: Optional[bool] = Field(
36
+ default=False,
37
+ description="Whether the collection is prepopulated",
38
+ )
39
+
40
+
41
+ class DatabaseConfig(BaseModel):
42
+ handle: str = Field(
43
+ default="duckdb:///:memory:",
44
+ description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
45
+ )
46
+ alias: Optional[str] = Field(
47
+ default=None,
48
+ description="An optional alias for the database",
49
+ )
50
+ schema_location: Optional[str] = Field(
51
+ default=None,
52
+ description="The location of the schema file, either a path on disk or URL",
53
+ )
54
+ schema_dict: Optional[Dict[str, Any]] = Field(
55
+ default=None,
56
+ description="The LinkML schema as a dictionary",
57
+ )
58
+ collections: Dict[str, CollectionConfig] = Field(
59
+ default={},
60
+ description="A dictionary of collection configurations",
61
+ )
62
+ recreate_if_exists: bool = Field(
63
+ default=False,
64
+ description="Whether to recreate the database if it already exists",
65
+ )
66
+ collection_type_slot: Optional[str] = Field(
67
+ default=None,
68
+ description=(
69
+ "For databases that combine multiple collections into a single space, this field"
70
+ "specifies the field that contains the collection type. An example of this is a Solr"
71
+ "index that does not use cores for collections, and instead uses a single global"
72
+ "document space; if this has a field 'document_type', then this field should be set"
73
+ ),
74
+ )
75
+ searchable_slots: Optional[List[str]] = Field(
76
+ default=None,
77
+ description="Optional configuration for search fields",
78
+ )
79
+
80
+
81
+ class ClientConfig(BaseModel):
82
+ handle: Optional[str] = Field(
83
+ default=None,
84
+ description="The client handle",
85
+ )
86
+ databases: Dict[str, DatabaseConfig] = Field(
87
+ default={},
88
+ description="A dictionary of database configurations",
89
+ )
90
+ schema_path: Optional[str] = Field(
91
+ default=None,
92
+ description="The path to the LinkML schema file",
93
+ )
94
+ base_dir: Optional[str] = Field(
95
+ default=None,
96
+ description="The base directory for the client",
97
+ )