linkml-store 0.1.10__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (62) hide show
  1. {linkml_store-0.1.10 → linkml_store-0.1.12}/PKG-INFO +36 -3
  2. {linkml_store-0.1.10 → linkml_store-0.1.12}/README.md +31 -0
  3. {linkml_store-0.1.10 → linkml_store-0.1.12}/pyproject.toml +6 -3
  4. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/client.py +63 -7
  5. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/collection.py +152 -32
  6. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/config.py +49 -6
  7. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/database.py +77 -30
  8. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +16 -0
  9. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/duckdb_database.py +47 -5
  10. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +11 -4
  11. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
  12. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +6 -2
  13. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/mongodb/mongodb_database.py +30 -35
  14. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/solr_collection.py +4 -4
  15. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/cli.py +64 -19
  16. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/__init__.py +16 -2
  17. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/implementations/llm_indexer.py +2 -1
  18. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/indexer.py +13 -2
  19. linkml_store-0.1.12/src/linkml_store/utils/file_utils.py +37 -0
  20. linkml_store-0.1.12/src/linkml_store/utils/format_utils.py +403 -0
  21. linkml_store-0.1.12/src/linkml_store/utils/mongodb_utils.py +145 -0
  22. linkml_store-0.1.12/src/linkml_store/utils/pandas_utils.py +40 -0
  23. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/sql_utils.py +9 -3
  24. linkml_store-0.1.12/src/linkml_store/webapi/html/generic.html.j2 +43 -0
  25. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/main.py +346 -63
  26. linkml_store-0.1.10/src/linkml_store/utils/format_utils.py +0 -224
  27. linkml_store-0.1.10/src/linkml_store/webapi/html/generic.html.j2 +0 -46
  28. {linkml_store-0.1.10 → linkml_store-0.1.12}/LICENSE +0 -0
  29. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/__init__.py +0 -0
  30. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/__init__.py +0 -0
  31. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/queries.py +0 -0
  32. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/__init__.py +0 -0
  33. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
  34. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  35. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  36. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  37. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
  38. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
  39. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
  40. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  41. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  42. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  43. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  44. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  45. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  46. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/types.py +0 -0
  47. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/constants.py +0 -0
  48. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/implementations/__init__.py +0 -0
  49. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
  50. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/__init__.py +0 -0
  51. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/change_utils.py +0 -0
  52. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/io.py +0 -0
  53. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/object_utils.py +0 -0
  54. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/patch_utils.py +0 -0
  55. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/query_utils.py +0 -0
  56. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/schema_utils.py +0 -0
  57. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/__init__.py +0 -0
  58. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/__init__.py +0 -0
  59. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/base.html.j2 +0 -0
  60. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
  61. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
  62. {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: linkml-store
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
@@ -16,6 +16,7 @@ Provides-Extra: analytics
16
16
  Provides-Extra: app
17
17
  Provides-Extra: chromadb
18
18
  Provides-Extra: fastapi
19
+ Provides-Extra: frictionless
19
20
  Provides-Extra: h5py
20
21
  Provides-Extra: llm
21
22
  Provides-Extra: map
@@ -30,11 +31,12 @@ Requires-Dist: click
30
31
  Requires-Dist: duckdb (>=0.10.1,<0.11.0)
31
32
  Requires-Dist: duckdb-engine (>=0.11.2)
32
33
  Requires-Dist: fastapi ; extra == "fastapi"
34
+ Requires-Dist: frictionless ; extra == "frictionless"
33
35
  Requires-Dist: h5py ; extra == "h5py"
34
36
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
35
37
  Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
36
- Requires-Dist: linkml ; extra == "validation"
37
- Requires-Dist: linkml-runtime (>=1.8.0rc2)
38
+ Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
39
+ Requires-Dist: linkml-runtime (>=1.8.0)
38
40
  Requires-Dist: linkml_map ; extra == "map"
39
41
  Requires-Dist: linkml_renderer ; extra == "renderer"
40
42
  Requires-Dist: llm ; extra == "llm"
@@ -122,6 +124,10 @@ need to have a vector database to run embedding search!
122
124
 
123
125
  See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
124
126
 
127
+ ### Use with LLMs
128
+
129
+ TODO - docs
130
+
125
131
  ### Validation
126
132
 
127
133
  LinkML-Store is backed by [LinkML](https://linkml.io), which allows
@@ -131,6 +137,33 @@ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.htm
131
137
 
132
138
  and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
133
139
 
140
+ ## Web API
141
+
142
+ There is a preliminary API following HATEOAS principles implemented using FastAPI.
143
+
144
+ To start you should first create a config file, e.g. `db/conf.yaml`:
145
+
146
+ Then run:
147
+
148
+ ```
149
+ export LINKML_STORE_CONFIG=./db/conf.yaml
150
+ make api
151
+ ```
152
+
153
+ The API returns links as well as data objects, it's recommended to use a Chrome plugin for JSON viewing
154
+ for exploring the API. TODO: add docs here.
155
+
156
+ The main endpoints are:
157
+
158
+ * `http://localhost:8000/` - the root of the API
159
+ * `http://localhost:8000/pages/` - browse the API via HTML
160
+ * `http://localhost:8000/docs` - the Swagger UI
161
+
162
+ ## Streamlit app
163
+
164
+ ```
165
+ make app
166
+ ```
134
167
 
135
168
  ## Background
136
169
 
@@ -69,6 +69,10 @@ need to have a vector database to run embedding search!
69
69
 
70
70
  See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
71
71
 
72
+ ### Use with LLMs
73
+
74
+ TODO - docs
75
+
72
76
  ### Validation
73
77
 
74
78
  LinkML-Store is backed by [LinkML](https://linkml.io), which allows
@@ -78,6 +82,33 @@ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.htm
78
82
 
79
83
  and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
80
84
 
85
+ ## Web API
86
+
87
+ There is a preliminary API following HATEOAS principles implemented using FastAPI.
88
+
89
+ To start you should first create a config file, e.g. `db/conf.yaml`:
90
+
91
+ Then run:
92
+
93
+ ```
94
+ export LINKML_STORE_CONFIG=./db/conf.yaml
95
+ make api
96
+ ```
97
+
98
+ The API returns links as well as data objects, it's recommended to use a Chrome plugin for JSON viewing
99
+ for exploring the API. TODO: add docs here.
100
+
101
+ The main endpoints are:
102
+
103
+ * `http://localhost:8000/` - the root of the API
104
+ * `http://localhost:8000/pages/` - browse the API via HTML
105
+ * `http://localhost:8000/docs` - the Swagger UI
106
+
107
+ ## Streamlit app
108
+
109
+ ```
110
+ make app
111
+ ```
81
112
 
82
113
  ## Background
83
114
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.1.10"
3
+ version = "0.1.12"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
@@ -10,7 +10,7 @@ readme = "README.md"
10
10
  python = "^3.9, !=3.9.7"
11
11
  click = "*"
12
12
  pydantic = "^2.0.0"
13
- linkml-runtime = ">=1.8.0rc2"
13
+ linkml-runtime = ">=1.8.0"
14
14
  streamlit = { version = "^1.32.2", optional = true }
15
15
  sqlalchemy = "*"
16
16
  duckdb = "^0.10.1"
@@ -25,9 +25,10 @@ pymongo = { version="*", optional = true }
25
25
  chromadb = { version="*", optional = true }
26
26
  pyarrow = { version="*", optional = true }
27
27
  h5py = { version="*", optional = true }
28
- linkml = { version="*", optional = true }
28
+ linkml = { version=">=1.8.0", optional = true }
29
29
  linkml_map = { version="*", optional = true }
30
30
  linkml_renderer = { version="*", optional = true }
31
+ frictionless = { version="*", optional = true }
31
32
  pandas = ">=2.2.1"
32
33
  jinja2 = "^3.1.4"
33
34
  jsonlines = "^4.0.0"
@@ -49,6 +50,7 @@ nbsphinx = "*"
49
50
  jupyter = "*"
50
51
  jupysql = "*"
51
52
  papermill = "*"
53
+ nbdime = "*"
52
54
 
53
55
  [tool.poetry.group.tests.dependencies]
54
56
  pytest = "^7.4.0"
@@ -71,6 +73,7 @@ validation = ["linkml"]
71
73
  map = ["linkml_map"]
72
74
  renderer = ["linkml_renderer"]
73
75
  fastapi = ["fastapi", "uvicorn"]
76
+ frictionless = ["frictionless"]
74
77
 
75
78
  [tool.poetry.scripts]
76
79
  linkml-store = "linkml_store.cli:cli"
@@ -242,7 +242,7 @@ class Client:
242
242
  Return all attached databases
243
243
 
244
244
  Examples
245
- --------
245
+
246
246
  >>> client = Client()
247
247
  >>> _ = client.attach_database("duckdb", alias="test1")
248
248
  >>> _ = client.attach_database("duckdb", alias="test2")
@@ -268,25 +268,81 @@ class Client:
268
268
  """
269
269
  Drop a database.
270
270
 
271
+ Example (in-memory):
272
+
273
+ >>> client = Client()
274
+ >>> db1 = client.attach_database("duckdb", alias="test1")
275
+ >>> db2 = client.attach_database("duckdb", alias="test2")
276
+ >>> len(client.databases)
277
+ 2
278
+ >>> client.drop_database("test1")
279
+ >>> len(client.databases)
280
+ 1
281
+
282
+ Databases that persist on disk:
283
+
284
+ >>> client = Client()
285
+ >>> path = Path("tmp/test.db")
286
+ >>> path.parent.mkdir(parents=True, exist_ok=True)
287
+ >>> db = client.attach_database(f"duckdb:///{path}", alias="test")
288
+ >>> len(client.databases)
289
+ 1
290
+ >>> db.store({"persons": [{"id": "P1", "name": "John"}]})
291
+ >>> db.commit()
292
+ >>> Path("tmp/test.db").exists()
293
+ True
294
+ >>> client.drop_database("test")
295
+ >>> len(client.databases)
296
+ 0
297
+ >>> Path("tmp/test.db").exists()
298
+ False
299
+
300
+ Dropping a non-existent database:
301
+
302
+ >>> client = Client()
303
+ >>> client.drop_database("duckdb:///tmp/made-up1", missing_ok=True)
304
+ >>> client.drop_database("duckdb:///tmp/made-up2", missing_ok=False)
305
+ Traceback (most recent call last):
306
+ ...
307
+ ValueError: Database duckdb:///tmp/made-up2 not found
308
+
271
309
  :param name:
272
310
  :param missing_ok:
273
311
  :return:
274
312
  """
275
- if name in self._databases:
276
- db = self._databases[name]
277
- db.drop(**kwargs)
278
- del self._databases[name]
313
+ if self._databases:
314
+ if name in self._databases:
315
+ db = self._databases[name]
316
+ db.drop(**kwargs)
317
+ del self._databases[name]
318
+ else:
319
+ if not missing_ok:
320
+ raise ValueError(f"Database {name} not found")
279
321
  else:
280
- if not missing_ok:
281
- raise ValueError(f"Database {name} not found")
322
+ db = self.get_database(name, create_if_not_exists=True)
323
+ db.drop(**kwargs)
282
324
 
283
325
  def drop_all_databases(self, **kwargs):
284
326
  """
285
327
  Drop all databases.
286
328
 
329
+ Example (in-memory):
330
+
331
+ >>> client = Client()
332
+ >>> db1 = client.attach_database("duckdb", alias="test1")
333
+ >>> assert "test1" in client.databases
334
+ >>> db2 = client.attach_database("duckdb", alias="test2")
335
+ >>> assert "test2" in client.databases
336
+ >>> client.drop_all_databases()
337
+ >>> len(client.databases)
338
+ 0
339
+
340
+
287
341
  :param missing_ok:
288
342
  :return:
289
343
  """
344
+ if not self._databases:
345
+ return
290
346
  for name in list(self._databases.keys()):
291
347
  self.drop_database(name, missing_ok=False, **kwargs)
292
348
  self._databases = {}
@@ -14,7 +14,7 @@ from pydantic import BaseModel
14
14
 
15
15
  from linkml_store.api.types import DatabaseType
16
16
  from linkml_store.index import get_indexer
17
- from linkml_store.utils.format_utils import load_objects
17
+ from linkml_store.utils.format_utils import load_objects, load_objects_from_url
18
18
  from linkml_store.utils.object_utils import clean_empties
19
19
  from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
20
20
 
@@ -61,6 +61,7 @@ class Collection(Generic[DatabaseType]):
61
61
  # name: str
62
62
  parent: Optional[DatabaseType] = None
63
63
  _indexers: Optional[Dict[str, Indexer]] = None
64
+ _initialized: Optional[bool] = None
64
65
  # hidden: Optional[bool] = False
65
66
 
66
67
  metadata: Optional[CollectionConfig] = None
@@ -73,7 +74,7 @@ class Collection(Generic[DatabaseType]):
73
74
  if metadata:
74
75
  self.metadata = metadata
75
76
  else:
76
- self.metadata = CollectionConfig(name=name, **kwargs)
77
+ self.metadata = CollectionConfig(type=name, **kwargs)
77
78
  if not self.metadata.alias:
78
79
  self.metadata.alias = name
79
80
  if not self.metadata.type:
@@ -81,17 +82,6 @@ class Collection(Generic[DatabaseType]):
81
82
  # if name is not None and self.metadata.name is not None and name != self.metadata.name:
82
83
  # raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
83
84
 
84
- @property
85
- def name(self) -> str:
86
- """
87
- Return the name of the collection.
88
-
89
- TODO: deprecate in favor of Type
90
-
91
- :return: name of the collection
92
- """
93
- return self.metadata.name
94
-
95
85
  @property
96
86
  def hidden(self) -> bool:
97
87
  """
@@ -118,12 +108,18 @@ class Collection(Generic[DatabaseType]):
118
108
  >>> collection.target_class_name
119
109
  'Person'
120
110
 
111
+ >>> collection = db.create_collection("Organization")
112
+ >>> collection.target_class_name
113
+ 'Organization'
114
+ >>> collection.alias
115
+ 'Organization'
116
+
121
117
  :return: name of the class which members of this collection instantiate
122
118
  """
123
119
  # TODO: this is a shim layer until we can normalize on this
124
120
  if self.metadata.type:
125
121
  return self.metadata.type
126
- return self.name
122
+ return self.alias
127
123
 
128
124
  @property
129
125
  def alias(self):
@@ -161,10 +157,9 @@ class Collection(Generic[DatabaseType]):
161
157
  :return:
162
158
  """
163
159
  # TODO: this is a shim layer until we can normalize on this
164
- # TODO: this is a shim layer until we can normalize on this
165
160
  if self.metadata.alias:
166
161
  return self.metadata.alias
167
- return self.name
162
+ return self.target_class_name
168
163
 
169
164
  def replace(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
170
165
  """
@@ -201,7 +196,14 @@ class Collection(Generic[DatabaseType]):
201
196
  """
202
197
  raise NotImplementedError
203
198
 
199
+ def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
200
+ logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
201
+ if not self._initialized:
202
+ self._materialize_derivations()
203
+ self._initialized = True
204
+
204
205
  def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
206
+ self._initialized = True
205
207
  patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
206
208
  self._broadcast(patches, **kwargs)
207
209
 
@@ -305,6 +307,7 @@ class Collection(Generic[DatabaseType]):
305
307
  :param kwargs:
306
308
  :return:
307
309
  """
310
+ self._pre_query_hook()
308
311
  return self.parent.query(query, **kwargs)
309
312
 
310
313
  def query_facets(
@@ -340,11 +343,13 @@ class Collection(Generic[DatabaseType]):
340
343
  :param kwargs:
341
344
  :return:
342
345
  """
343
- # TODO
344
346
  id_field = self.identifier_attribute_name
345
347
  if not id_field:
346
348
  raise ValueError(f"No identifier for {self.name}")
347
- return self.find({id_field: ids})
349
+ if len(ids) == 1:
350
+ return self.find({id_field: ids[0]})
351
+ else:
352
+ return self.find({id_field: {"$in": ids}})
348
353
 
349
354
  def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
350
355
  """
@@ -399,9 +404,10 @@ class Collection(Generic[DatabaseType]):
399
404
  :return:
400
405
  """
401
406
  query = self._create_query(where_clause=where)
407
+ self._pre_query_hook(query)
402
408
  return self.query(query, **kwargs)
403
409
 
404
- def find_iter(self, where: Optional[Any] = None, **kwargs) -> Iterator[OBJECT]:
410
+ def find_iter(self, where: Optional[Any] = None, page_size=100, **kwargs) -> Iterator[OBJECT]:
405
411
  """
406
412
  Find objects in the collection using a where query.
407
413
 
@@ -409,9 +415,22 @@ class Collection(Generic[DatabaseType]):
409
415
  :param kwargs:
410
416
  :return:
411
417
  """
412
- qr = self.find(where=where, limit=-1, **kwargs)
413
- for row in qr.rows:
414
- yield row
418
+ total_rows = None
419
+ offset = 0
420
+ if page_size < 1:
421
+ raise ValueError(f"Invalid page size: {page_size}")
422
+ while True:
423
+ qr = self.find(where=where, offset=offset, limit=page_size, **kwargs)
424
+ if total_rows is None:
425
+ total_rows = qr.num_rows
426
+ if not qr.rows:
427
+ return
428
+ for row in qr.rows:
429
+ yield row
430
+ offset += page_size
431
+ if offset >= total_rows:
432
+ break
433
+ return
415
434
 
416
435
  def search(
417
436
  self,
@@ -454,6 +473,7 @@ class Collection(Generic[DatabaseType]):
454
473
  :param kwargs:
455
474
  :return:
456
475
  """
476
+ self._pre_query_hook()
457
477
  if index_name is None:
458
478
  if len(self.indexers) == 1:
459
479
  index_name = list(self.indexers.keys())[0]
@@ -494,10 +514,101 @@ class Collection(Generic[DatabaseType]):
494
514
  raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
495
515
  return self.alias.startswith("internal__")
496
516
 
497
- def load_from_source(self):
498
- objects = load_objects(self.metadata.source_location)
517
+ def exists(self) -> Optional[bool]:
518
+ """
519
+ Check if the collection exists.
520
+
521
+ :return:
522
+ """
523
+ cd = self.class_definition()
524
+ return cd is not None and cd.attributes
525
+
526
+ def load_from_source(self, load_if_exists=False):
527
+ """
528
+ Load objects from the source location.
529
+
530
+ :param load_if_exists:
531
+ :return:
532
+ """
533
+ if not load_if_exists and self.exists():
534
+ return
535
+ metadata = self.metadata
536
+ if metadata.source:
537
+ source = metadata.source
538
+ kwargs = source.arguments or {}
539
+ if source.local_path:
540
+ objects = load_objects(
541
+ metadata.source.local_path,
542
+ format=source.format,
543
+ expected_type=source.expected_type,
544
+ compression=source.compression,
545
+ **kwargs,
546
+ )
547
+ elif metadata.source.url:
548
+ objects = load_objects_from_url(
549
+ metadata.source.url,
550
+ format=source.format,
551
+ expected_type=source.expected_type,
552
+ compression=source.compression,
553
+ **kwargs,
554
+ )
499
555
  self.insert(objects)
500
556
 
557
+ def _check_if_initialized(self) -> bool:
558
+ return self._initialized
559
+
560
+ def _materialize_derivations(self, **kwargs):
561
+ metadata = self.metadata
562
+ if not metadata.derived_from:
563
+ logger.info(f"No metadata for {self.alias}; no derivations")
564
+ return
565
+ if self._check_if_initialized():
566
+ logger.info(f"Already initialized {self.alias}; no derivations")
567
+ return
568
+ parent_db = self.parent
569
+ client = parent_db.parent
570
+ # cd = self.class_definition()
571
+ for derivation in metadata.derived_from:
572
+ # TODO: optimize this; utilize underlying engine
573
+ logger.info(f"Deriving from {derivation}")
574
+ if derivation.database:
575
+ db = client.get_database(derivation.database)
576
+ else:
577
+ db = parent_db
578
+ if derivation.collection:
579
+ coll = db.get_collection(derivation.collection)
580
+ else:
581
+ coll = self
582
+ coll.class_definition()
583
+ source_obj_iter = coll.find_iter(derivation.where or {})
584
+ mappings = derivation.mappings
585
+ if not mappings:
586
+ raise ValueError(f"No mappings for {self.name}")
587
+ target_class_name = self.target_class_name
588
+ from linkml_map.session import Session
589
+
590
+ session = Session()
591
+ session.set_source_schema(db.schema_view.schema)
592
+ session.set_object_transformer(
593
+ {
594
+ "class_derivations": {
595
+ target_class_name: {
596
+ "populated_from": coll.target_class_name,
597
+ "slot_derivations": mappings,
598
+ },
599
+ }
600
+ },
601
+ )
602
+ logger.debug(f"Session Spec: {session.object_transformer}")
603
+ tr_objs = []
604
+ for source_obj in source_obj_iter:
605
+ tr_obj = session.transform(source_obj, source_type=coll.target_class_name)
606
+ tr_objs.append(tr_obj)
607
+ if not tr_objs:
608
+ raise ValueError(f"No objects derived from {coll.name}")
609
+ self.insert(tr_objs)
610
+ self.commit()
611
+
501
612
  def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
502
613
  """
503
614
  Attach an index to the collection.
@@ -572,7 +683,7 @@ class Collection(Generic[DatabaseType]):
572
683
  :param indexer:
573
684
  :return:
574
685
  """
575
- return f"internal__index__{self.name}__{index_name}"
686
+ return f"internal__index__{self.alias}__{index_name}"
576
687
 
577
688
  def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
578
689
  """
@@ -638,11 +749,15 @@ class Collection(Generic[DatabaseType]):
638
749
  """
639
750
  Return the class definition for the collection.
640
751
 
752
+ If no schema has been explicitly set, and the native database does not
753
+ have a schema, then a schema will be induced from the objects in the collection.
754
+
641
755
  :return:
642
756
  """
643
757
  sv: SchemaView = self.parent.schema_view
644
758
  if sv:
645
759
  cls = sv.get_class(self.target_class_name)
760
+ # cls = sv.schema.classes[self.target_class_name]
646
761
  if cls and not cls.attributes:
647
762
  if not sv.class_induced_slots(cls.name):
648
763
  for att in self._induce_attributes():
@@ -722,7 +837,9 @@ class Collection(Generic[DatabaseType]):
722
837
  else:
723
838
  return None
724
839
 
725
- def induce_class_definition_from_objects(self, objs: List[OBJECT], max_sample_size=10) -> ClassDefinition:
840
+ def induce_class_definition_from_objects(
841
+ self, objs: List[OBJECT], max_sample_size: Optional[int] = None
842
+ ) -> ClassDefinition:
726
843
  """
727
844
  Induce a class definition from a list of objects.
728
845
 
@@ -733,6 +850,9 @@ class Collection(Generic[DatabaseType]):
733
850
  :param max_sample_size:
734
851
  :return:
735
852
  """
853
+ # TODO: use schemaview
854
+ if max_sample_size is None:
855
+ max_sample_size = 10
736
856
  if not self.target_class_name:
737
857
  raise ValueError(f"No target_class_name for {self.alias}")
738
858
  cd = ClassDefinition(self.target_class_name)
@@ -760,7 +880,7 @@ class Collection(Generic[DatabaseType]):
760
880
  exact_dimensions_list.append(v.shape)
761
881
  break
762
882
  if isinstance(v, list):
763
- v = v[0]
883
+ v = v[0] if v else None
764
884
  multivalueds.append(True)
765
885
  elif isinstance(v, dict):
766
886
  v = list(v.values())[0]
@@ -795,6 +915,7 @@ class Collection(Generic[DatabaseType]):
795
915
  for other_rng in rngs:
796
916
  if rng != other_rng:
797
917
  raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
918
+ logger.debug(f"Inducing {k} as {rng} {multivalued} {inlined}")
798
919
  cd.attributes[k] = SlotDefinition(k, range=rng, multivalued=multivalued, inlined=inlined)
799
920
  if exact_dimensions_list:
800
921
  array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
@@ -828,7 +949,7 @@ class Collection(Generic[DatabaseType]):
828
949
  """
829
950
  Apply a patch to the collection.
830
951
 
831
- Patches conform to the JSON Patch format,
952
+ Patches conform to the JSON Patch format.
832
953
 
833
954
  :param patches:
834
955
  :param kwargs:
@@ -841,11 +962,11 @@ class Collection(Generic[DatabaseType]):
841
962
  new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
842
963
  self.replace(new_objs)
843
964
 
844
- def diff(self, other: "Collection", **kwargs):
965
+ def diff(self, other: "Collection", **kwargs) -> List[PatchDict]:
845
966
  """
846
967
  Diff two collections.
847
968
 
848
- :param other:
969
+ :param other: The collection to diff against
849
970
  :param kwargs:
850
971
  :return:
851
972
  """
@@ -872,8 +993,7 @@ class Collection(Generic[DatabaseType]):
872
993
  if not cd:
873
994
  raise ValueError(f"Cannot find class definition for {self.target_class_name}")
874
995
  class_name = cd.name
875
- result = self.find(**kwargs)
876
- for obj in result.rows:
996
+ for obj in self.find_iter(**kwargs):
877
997
  obj = clean_empties(obj)
878
998
  yield from validator.iter_results(obj, class_name)
879
999