linkml-store 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (48) hide show
  1. {linkml_store-0.1.7 → linkml_store-0.1.8}/PKG-INFO +4 -1
  2. {linkml_store-0.1.7 → linkml_store-0.1.8}/README.md +2 -0
  3. {linkml_store-0.1.7 → linkml_store-0.1.8}/pyproject.toml +3 -1
  4. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/client.py +30 -5
  5. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/collection.py +175 -21
  6. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/config.py +6 -2
  7. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/database.py +230 -18
  8. linkml_store-0.1.8/src/linkml_store/api/stores/chromadb/__init__.py +7 -0
  9. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/__init__.py +9 -0
  10. linkml_store-0.1.8/src/linkml_store/api/stores/duckdb/duckdb_collection.py +144 -0
  11. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/duckdb_database.py +19 -5
  12. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/mappings.py +1 -0
  13. linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/__init__.py +16 -0
  14. linkml_store-0.1.7/src/linkml_store/api/stores/duckdb/duckdb_collection.py → linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_collection.py +5 -5
  15. linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_database.py +36 -0
  16. linkml_store-0.1.8/src/linkml_store/api/stores/hdf5/__init__.py +7 -0
  17. linkml_store-0.1.8/src/linkml_store/api/stores/mongodb/__init__.py +25 -0
  18. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +21 -6
  19. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/cli.py +64 -10
  20. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/index/__init__.py +6 -2
  21. linkml_store-0.1.8/src/linkml_store/index/implementations/llm_indexer.py +122 -0
  22. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/index/implementations/simple_indexer.py +2 -2
  23. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/index/indexer.py +32 -8
  24. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/format_utils.py +52 -2
  25. linkml_store-0.1.7/src/linkml_store/api/stores/chromadb/__init__.py +0 -3
  26. linkml_store-0.1.7/src/linkml_store/index/implementations/__init__.py +0 -0
  27. linkml_store-0.1.7/src/linkml_store/index/implementations/llm_indexer.py +0 -44
  28. linkml_store-0.1.7/src/linkml_store/utils/__init__.py +0 -0
  29. {linkml_store-0.1.7 → linkml_store-0.1.8}/LICENSE +0 -0
  30. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/__init__.py +0 -0
  31. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/__init__.py +0 -0
  32. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/queries.py +0 -0
  33. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/__init__.py +0 -0
  34. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  35. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  36. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  37. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  38. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
  39. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  40. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
  41. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  42. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  43. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/constants.py +0 -0
  44. {linkml_store-0.1.7/src/linkml_store/api/stores/hdf5 → linkml_store-0.1.8/src/linkml_store/index/implementations}/__init__.py +0 -0
  45. {linkml_store-0.1.7/src/linkml_store/api/stores/mongodb → linkml_store-0.1.8/src/linkml_store/utils}/__init__.py +0 -0
  46. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/io.py +0 -0
  47. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/object_utils.py +0 -0
  48. {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/sql_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: linkml-store
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
@@ -27,6 +27,7 @@ Requires-Dist: click
27
27
  Requires-Dist: duckdb (>=0.10.1,<0.11.0)
28
28
  Requires-Dist: duckdb-engine (>=0.11.2)
29
29
  Requires-Dist: h5py ; extra == "h5py"
30
+ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
30
31
  Requires-Dist: linkml ; extra == "validation"
31
32
  Requires-Dist: linkml-runtime (>=1.7.5,<2.0.0)
32
33
  Requires-Dist: linkml_map ; extra == "map"
@@ -53,3 +54,5 @@ There is also experimental support for vector-based indexing using OpenAI test e
53
54
  The goals of this project are to provide high level access to data stored in heterogeneous databases,
54
55
  with optional schema management using LinkML.
55
56
 
57
+ See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
58
+
@@ -8,3 +8,5 @@ There is also experimental support for vector-based indexing using OpenAI test e
8
8
 
9
9
  The goals of this project are to provide high level access to data stored in heterogeneous databases,
10
10
  with optional schema management using LinkML.
11
+
12
+ See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.1.7"
3
+ version = "0.1.8"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
@@ -27,6 +27,7 @@ h5py = { version="*", optional = true }
27
27
  linkml = { version="*", optional = true }
28
28
  linkml_map = { version="*", optional = true }
29
29
  pandas = ">=2.2.1"
30
+ jinja2 = "^3.1.4"
30
31
 
31
32
  [tool.poetry.group.dev.dependencies]
32
33
  pytest = {version = ">=7.1.2"}
@@ -41,6 +42,7 @@ myst-parser = {version = ">=0.18.1"}
41
42
  furo = {version = "*"}
42
43
  nbsphinx = "*"
43
44
  jupyter = "*"
45
+ jupysql = "*"
44
46
 
45
47
  [tool.poetry.group.tests.dependencies]
46
48
  pytest = "^7.4.0"
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from pathlib import Path
2
3
  from typing import Dict, Optional, Union
3
4
 
@@ -11,6 +12,9 @@ from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
11
12
  from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
12
13
  from linkml_store.api.stores.solr.solr_database import SolrDatabase
13
14
 
15
+ logger = logging.getLogger(__name__)
16
+
17
+
14
18
  HANDLE_MAP = {
15
19
  "duckdb": DuckDBDatabase,
16
20
  "solr": SolrDatabase,
@@ -23,14 +27,27 @@ class Client:
23
27
  """
24
28
  A client is the top-level object for interacting with databases.
25
29
 
26
- A client has access to one or more :class:`Database` objects.
27
-
28
- Each database consists of a number of :class:`.Collection` objects.
30
+ * A client has access to one or more :class:`.Database` objects.
31
+ * Each database consists of a number of :class:`.Collection` objects.
29
32
 
30
- Examples
31
- --------
33
+ Creating a client
34
+ -----------------
32
35
  >>> client = Client()
36
+
37
+ Attaching a database
38
+ --------------------
33
39
  >>> db = client.attach_database("duckdb", alias="test")
40
+
41
+ Note that normally a handle would be specified by a locator such as ``duckdb:///<PATH>``, but
42
+ for convenience, an in-memory duckdb object can be specified without a full locator
43
+
44
+ We can check the actual handle:
45
+
46
+ >>> db.handle
47
+ 'duckdb:///:memory:'
48
+
49
+ Creating a new collection
50
+ -------------------------
34
51
  >>> collection = db.create_collection("Person")
35
52
  >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
36
53
  >>> collection.insert(objs)
@@ -151,6 +168,8 @@ class Client:
151
168
  if ":" not in handle:
152
169
  scheme = handle
153
170
  handle = None
171
+ if alias is None:
172
+ alias = scheme
154
173
  else:
155
174
  scheme, _ = handle.split(":", 1)
156
175
  if scheme not in HANDLE_MAP:
@@ -165,6 +184,11 @@ class Client:
165
184
  self._databases = {}
166
185
  self._databases[alias] = db
167
186
  db.parent = self
187
+ if db.alias:
188
+ if db.alias != alias:
189
+ raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}")
190
+ else:
191
+ db.metadata.alias = alias
168
192
  return db
169
193
 
170
194
  def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
@@ -195,6 +219,7 @@ class Client:
195
219
  self._databases = {}
196
220
  if name not in self._databases:
197
221
  if create_if_not_exists:
222
+ logger.info(f"Creating database: {name}")
198
223
  self.attach_database(name, **kwargs)
199
224
  else:
200
225
  raise ValueError(f"Database {name} does not exist")
@@ -1,3 +1,5 @@
1
+ """A structure for representing collections of similar objects."""
2
+
1
3
  import hashlib
2
4
  import logging
3
5
  from collections import defaultdict
@@ -10,6 +12,7 @@ from linkml_runtime.linkml_model.meta import ArrayExpression
10
12
  from pydantic import BaseModel
11
13
 
12
14
  from linkml_store.index import get_indexer
15
+ from linkml_store.utils.format_utils import load_objects
13
16
  from linkml_store.utils.object_utils import clean_empties
14
17
 
15
18
  try:
@@ -39,7 +42,17 @@ class Collection:
39
42
 
40
43
  - For relational databases, a collection is typically a table
41
44
  - For document databases such as MongoDB, a collection is the native type
42
- - For a file system, a collection could be a single tabular file such as Parquet or CSV
45
+ - For a file system, a collection could be a single tabular file such as Parquet or CSV.
46
+
47
+ Collection objects are typically not created directly - instead they are generated
48
+ from a parent :class:`.Database` object:
49
+
50
+ >>> from linkml_store import Client
51
+ >>> client = Client()
52
+ >>> db = client.attach_database("duckdb", alias="test")
53
+ >>> collection = db.create_collection("Person")
54
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
55
+ >>> collection.insert(objs)
43
56
  """
44
57
 
45
58
  # name: str
@@ -57,15 +70,21 @@ class Collection:
57
70
  self.metadata = metadata
58
71
  else:
59
72
  self.metadata = CollectionConfig(name=name, **kwargs)
60
- if name is not None and self.metadata.name is not None and name != self.metadata.name:
61
- raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
73
+ if not self.metadata.alias:
74
+ self.metadata.alias = name
75
+ if not self.metadata.type:
76
+ self.metadata.type = name
77
+ # if name is not None and self.metadata.name is not None and name != self.metadata.name:
78
+ # raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
62
79
 
63
80
  @property
64
81
  def name(self) -> str:
65
82
  """
66
- Return the name of the collection
83
+ Return the name of the collection.
67
84
 
68
- :return:
85
+ TODO: deprecate in favor of Type
86
+
87
+ :return: name of the collection
69
88
  """
70
89
  return self.metadata.name
71
90
 
@@ -79,7 +98,7 @@ class Collection:
79
98
 
80
99
  :return: True if the collection is hidden
81
100
  """
82
- return self.metadata.hidden
101
+ # return self.metadata.hidden
83
102
 
84
103
  @property
85
104
  def target_class_name(self):
@@ -88,7 +107,14 @@ class Collection:
88
107
 
89
108
  This MUST be a LinkML class name
90
109
 
91
- :return:
110
+ >>> from linkml_store import Client
111
+ >>> client = Client()
112
+ >>> db = client.attach_database("duckdb", alias="test")
113
+ >>> collection = db.create_collection("Person", alias="persons")
114
+ >>> collection.target_class_name
115
+ 'Person'
116
+
117
+ :return: name of the class which members of this collection instantiate
92
118
  """
93
119
  # TODO: this is a shim layer until we can normalize on this
94
120
  if self.metadata.type:
@@ -104,15 +130,34 @@ class Collection:
104
130
  to have an alias, for example "persons" which collects all instances
105
131
  of class Person.
106
132
 
107
- The _alias SHOULD be used for Table names in SQL.
133
+ >>> from linkml_store import Client
134
+ >>> client = Client()
135
+ >>> db = client.attach_database("duckdb", alias="test")
136
+ >>> collection = db.create_collection("Person", alias="persons")
137
+ >>> collection.alias
138
+ 'persons'
139
+
140
+ If no explicit alias is provided, then the target class name is used:
141
+
142
+ >>> from linkml_store import Client
143
+ >>> client = Client()
144
+ >>> db = client.attach_database("duckdb", alias="test")
145
+ >>> collection = db.create_collection("Person")
146
+ >>> collection.alias
147
+ 'Person'
148
+
149
+ The alias SHOULD be used for Table names in SQL.
108
150
 
109
151
  For nested data, the alias SHOULD be used as the key; e.g
110
152
 
111
- ``{ "persons": [ { "name": "Alice" }, { "name": "Bob" } ] }``
153
+ .. code-block:: json
154
+
155
+ { "persons": [ { "name": "Alice" }, { "name": "Bob" } ] }
112
156
 
113
157
  :return:
114
158
  """
115
159
  # TODO: this is a shim layer until we can normalize on this
160
+ # TODO: this is a shim layer until we can normalize on this
116
161
  if self.metadata.alias:
117
162
  return self.metadata.alias
118
163
  return self.name
@@ -121,6 +166,13 @@ class Collection:
121
166
  """
122
167
  Replace entire collection with objects.
123
168
 
169
+ >>> from linkml_store import Client
170
+ >>> client = Client()
171
+ >>> db = client.attach_database("duckdb", alias="test")
172
+ >>> collection = db.create_collection("Person")
173
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
174
+ >>> collection.insert(objs)
175
+
124
176
  :param objs:
125
177
  :param kwargs:
126
178
  :return:
@@ -130,7 +182,14 @@ class Collection:
130
182
 
131
183
  def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
132
184
  """
133
- Add one or more objects to the collection
185
+ Add one or more objects to the collection.
186
+
187
+ >>> from linkml_store import Client
188
+ >>> client = Client()
189
+ >>> db = client.attach_database("duckdb", alias="test")
190
+ >>> collection = db.create_collection("Person")
191
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
192
+ >>> collection.insert(objs)
134
193
 
135
194
  :param objs:
136
195
  :param kwargs:
@@ -138,9 +197,32 @@ class Collection:
138
197
  """
139
198
  raise NotImplementedError
140
199
 
141
- def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
200
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
142
201
  """
143
- Delete one or more objects from the collection
202
+ Delete one or more objects from the collection.
203
+
204
+ First let's set up a collection:
205
+
206
+ >>> from linkml_store import Client
207
+ >>> client = Client()
208
+ >>> db = client.attach_database("duckdb", alias="test")
209
+ >>> collection = db.create_collection("Person")
210
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
211
+ >>> collection.insert(objs)
212
+ >>> collection.find({}).num_rows
213
+ 2
214
+
215
+ Now let's delete an object:
216
+
217
+ >>> collection.delete(objs[0])
218
+ >>> collection.find({}).num_rows
219
+ 1
220
+
221
+ Deleting the same object again should have no effect:
222
+
223
+ >>> collection.delete(objs[0])
224
+ >>> collection.find({}).num_rows
225
+ 1
144
226
 
145
227
  :param objs:
146
228
  :param kwargs:
@@ -148,9 +230,30 @@ class Collection:
148
230
  """
149
231
  raise NotImplementedError
150
232
 
151
- def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
233
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
152
234
  """
153
- Delete objects that match a query
235
+ Delete objects that match a query.
236
+
237
+ First let's set up a collection:
238
+
239
+ >>> from linkml_store import Client
240
+ >>> client = Client()
241
+ >>> db = client.attach_database("duckdb", alias="test")
242
+ >>> collection = db.create_collection("Person")
243
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
244
+ >>> collection.insert(objs)
245
+
246
+ Now let's delete an object:
247
+
248
+ >>> collection.delete_where({"id": "P1"})
249
+ >>> collection.find({}).num_rows
250
+ 1
251
+
252
+ Match everything:
253
+
254
+ >>> collection.delete_where({})
255
+ >>> collection.find({}).num_rows
256
+ 0
154
257
 
155
258
  :param where: where conditions
156
259
  :param missing_ok: if True, do not raise an error if the collection does not exist
@@ -161,7 +264,7 @@ class Collection:
161
264
 
162
265
  def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
163
266
  """
164
- Update one or more objects in the collection
267
+ Update one or more objects in the collection.
165
268
 
166
269
  :param objs:
167
270
  :param kwargs:
@@ -174,7 +277,21 @@ class Collection:
174
277
 
175
278
  def query(self, query: Query, **kwargs) -> QueryResult:
176
279
  """
177
- Run a query against the collection
280
+ Run a query against the collection.
281
+
282
+ First let's load a collection:
283
+
284
+ >>> from linkml_store import Client
285
+ >>> from linkml_store.utils.format_utils import load_objects
286
+ >>> client = Client()
287
+ >>> db = client.attach_database("duckdb")
288
+ >>> collection = db.create_collection("Country")
289
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
290
+ >>> collection.insert(objs)
291
+
292
+ Now let's run a query:
293
+
294
+ TODO
178
295
 
179
296
  :param query:
180
297
  :param kwargs:
@@ -207,7 +324,7 @@ class Collection:
207
324
  """
208
325
  raise NotImplementedError
209
326
 
210
- def get(self, ids: Optional[IDENTIFIER], **kwargs) -> QueryResult:
327
+ def get(self, ids: Optional[List[IDENTIFIER]], **kwargs) -> QueryResult:
211
328
  """
212
329
  Get one or more objects by ID.
213
330
 
@@ -217,6 +334,8 @@ class Collection:
217
334
  """
218
335
  # TODO
219
336
  id_field = self.identifier_attribute_name
337
+ if not id_field:
338
+ raise ValueError(f"No identifier for {self.name}")
220
339
  return self.find({id_field: ids})
221
340
 
222
341
  def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
@@ -242,6 +361,31 @@ class Collection:
242
361
  """
243
362
  Find objects in the collection using a where query.
244
363
 
364
+ As an example, first load a collection:
365
+
366
+ >>> from linkml_store import Client
367
+ >>> from linkml_store.utils.format_utils import load_objects
368
+ >>> client = Client()
369
+ >>> db = client.attach_database("duckdb")
370
+ >>> collection = db.create_collection("Country")
371
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
372
+ >>> collection.insert(objs)
373
+
374
+ Now let's find all objects:
375
+
376
+ >>> qr = collection.find({})
377
+ >>> qr.num_rows
378
+ 20
379
+
380
+ We can do a more restrictive query:
381
+
382
+ >>> qr = collection.find({"code": "FR"})
383
+ >>> qr.num_rows
384
+ 1
385
+ >>> qr.rows[0]["name"]
386
+ 'France'
387
+
388
+
245
389
  :param where:
246
390
  :param kwargs:
247
391
  :return:
@@ -290,6 +434,7 @@ class Collection:
290
434
  raise ValueError(f"No index named {index_name}")
291
435
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
292
436
  index_col = ix.index_field
437
+ # TODO: optimize this for large indexes
293
438
  vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
294
439
  results = ix.search(query, vector_pairs, limit=limit)
295
440
  for r in results:
@@ -305,11 +450,15 @@ class Collection:
305
450
 
306
451
  :return:
307
452
  """
308
- if not self.name:
309
- raise ValueError(f"Collection has no name: {self} // {self.metadata}")
310
- return self.name.startswith("internal__")
453
+ if not self.alias:
454
+ raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
455
+ return self.alias.startswith("internal__")
311
456
 
312
- def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = True, auto_index=True, **kwargs):
457
+ def load_from_source(self):
458
+ objects = load_objects(self.metadata.source_location)
459
+ self.insert(objects)
460
+
461
+ def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
313
462
  """
314
463
  Attach an index to the collection.
315
464
 
@@ -333,6 +482,7 @@ class Collection:
333
482
  self._indexers[index_name] = index
334
483
  if auto_index:
335
484
  all_objs = self.find(limit=-1).rows
485
+ logger.info(f"Auto-indexing {len(all_objs)} objects")
336
486
  self.index_objects(all_objs, index_name, replace=True, **kwargs)
337
487
 
338
488
  def _index_collection_name(self, index_name: str) -> str:
@@ -340,6 +490,7 @@ class Collection:
340
490
  Create a name for a special collection that holds index data
341
491
 
342
492
  :param index_name:
493
+ :param indexer:
343
494
  :return:
344
495
  """
345
496
  return f"internal__index__{self.name}__{index_name}"
@@ -370,6 +521,7 @@ class Collection:
370
521
  logger.info(f"Checking if {ix_coll_name} is in {schema.classes.keys()}")
371
522
  if ix_coll_name in schema.classes:
372
523
  ix_coll.delete_where()
524
+
373
525
  ix_coll.insert(objects_with_ix, **kwargs)
374
526
 
375
527
  def list_index_names(self) -> List[str]:
@@ -457,6 +609,8 @@ class Collection:
457
609
  :param max_sample_size:
458
610
  :return:
459
611
  """
612
+ if not self.target_class_name:
613
+ raise ValueError(f"No target_class_name for {self.alias}")
460
614
  cd = ClassDefinition(self.target_class_name)
461
615
  keys = defaultdict(list)
462
616
  for obj in objs[0:max_sample_size]:
@@ -16,7 +16,7 @@ class CollectionConfig(BaseModel):
16
16
  default=None,
17
17
  description="The type of object in the collection. TODO; use this instead of name",
18
18
  )
19
- metadata: Optional[Dict] = Field(
19
+ additional_properties: Optional[Dict] = Field(
20
20
  default=None,
21
21
  description="Optional metadata for the collection",
22
22
  )
@@ -36,6 +36,10 @@ class CollectionConfig(BaseModel):
36
36
  default=False,
37
37
  description="Whether the collection is prepopulated",
38
38
  )
39
+ source_location: Optional[str] = Field(
40
+ default=None,
41
+ description="Filesystem or remote URL that stores the data",
42
+ )
39
43
 
40
44
 
41
45
  class DatabaseConfig(BaseModel):
@@ -55,7 +59,7 @@ class DatabaseConfig(BaseModel):
55
59
  default=None,
56
60
  description="The LinkML schema as a dictionary",
57
61
  )
58
- collections: Dict[str, CollectionConfig] = Field(
62
+ collections: Optional[Dict[str, CollectionConfig]] = Field(
59
63
  default={},
60
64
  description="A dictionary of collection configurations",
61
65
  )