linkml-store 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (80) hide show
  1. {linkml_store-0.2.4 → linkml_store-0.2.6}/PKG-INFO +7 -10
  2. {linkml_store-0.2.4 → linkml_store-0.2.6}/pyproject.toml +9 -8
  3. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/client.py +19 -2
  4. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/collection.py +60 -2
  5. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/database.py +17 -12
  6. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +11 -5
  7. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_database.py +52 -19
  8. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +83 -0
  9. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_database.py +7 -3
  10. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/cli.py +23 -5
  11. linkml_store-0.2.6/src/linkml_store/inference/implementations/llm_inference_engine.py +152 -0
  12. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/rag_inference_engine.py +20 -9
  13. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/inference_engine.py +2 -2
  14. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/format_utils.py +60 -1
  15. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/llm_utils.py +15 -0
  16. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/object_utils.py +3 -1
  17. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/sql_utils.py +7 -1
  18. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/vector_utils.py +1 -1
  19. {linkml_store-0.2.4 → linkml_store-0.2.6}/LICENSE +0 -0
  20. {linkml_store-0.2.4 → linkml_store-0.2.6}/README.md +0 -0
  21. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/__init__.py +0 -0
  22. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/__init__.py +0 -0
  23. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/config.py +0 -0
  24. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/queries.py +0 -0
  25. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/__init__.py +0 -0
  26. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
  27. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  28. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  29. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  30. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
  31. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
  32. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
  33. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
  34. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
  35. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  36. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  37. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  38. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
  39. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
  40. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/neo4j/neo4j_database.py +0 -0
  41. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  42. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
  43. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  44. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  45. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/types.py +0 -0
  46. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/constants.py +0 -0
  47. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/graphs/__init__.py +0 -0
  48. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/graphs/graph_map.py +0 -0
  49. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/__init__.py +0 -0
  50. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/implementations/__init__.py +0 -0
  51. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
  52. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
  53. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/indexer.py +0 -0
  54. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/__init__.py +0 -0
  55. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/evaluation.py +0 -0
  56. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/__init__.py +0 -0
  57. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
  58. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +0 -0
  59. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/inference_config.py +0 -0
  60. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/inference_engine_registry.py +0 -0
  61. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/__init__.py +0 -0
  62. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/change_utils.py +0 -0
  63. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/file_utils.py +0 -0
  64. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/io.py +0 -0
  65. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/mongodb_utils.py +0 -0
  66. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/neo4j_utils.py +0 -0
  67. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/pandas_utils.py +0 -0
  68. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/patch_utils.py +0 -0
  69. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/query_utils.py +0 -0
  70. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/schema_utils.py +0 -0
  71. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/sklearn_utils.py +0 -0
  72. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/stats_utils.py +0 -0
  73. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/__init__.py +0 -0
  74. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/__init__.py +0 -0
  75. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/base.html.j2 +0 -0
  76. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
  77. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
  78. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
  79. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
  80. {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/main.py +0 -0
@@ -1,14 +1,13 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: linkml-store
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
7
7
  Author-email: author@org.org
8
- Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
8
+ Requires-Python: >=3.10,<4.0
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Programming Language :: Python :: 3.10
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
@@ -20,7 +19,6 @@ Provides-Extra: bigquery
20
19
  Provides-Extra: fastapi
21
20
  Provides-Extra: frictionless
22
21
  Provides-Extra: h5py
23
- Provides-Extra: ibis
24
22
  Provides-Extra: llm
25
23
  Provides-Extra: map
26
24
  Provides-Extra: mongodb
@@ -36,20 +34,18 @@ Requires-Dist: duckdb (>=0.10.1)
36
34
  Requires-Dist: duckdb-engine (>=0.11.2)
37
35
  Requires-Dist: fastapi ; extra == "fastapi"
38
36
  Requires-Dist: frictionless ; extra == "frictionless"
39
- Requires-Dist: gcsfs ; extra == "ibis"
40
37
  Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
41
38
  Requires-Dist: h5py ; extra == "h5py"
42
- Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
43
39
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
44
40
  Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
45
- Requires-Dist: jsonpatch (>=1.33,<2.0)
41
+ Requires-Dist: jsonpatch (>=1.33)
46
42
  Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
47
43
  Requires-Dist: linkml-runtime (>=1.8.0)
48
44
  Requires-Dist: linkml_map ; extra == "map"
49
45
  Requires-Dist: linkml_renderer ; extra == "renderer"
50
46
  Requires-Dist: llm ; extra == "llm" or extra == "all"
51
47
  Requires-Dist: matplotlib ; extra == "analytics"
52
- Requires-Dist: multipledispatch ; extra == "ibis"
48
+ Requires-Dist: multipledispatch
53
49
  Requires-Dist: neo4j ; extra == "neo4j" or extra == "all"
54
50
  Requires-Dist: networkx ; extra == "neo4j"
55
51
  Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
@@ -57,8 +53,9 @@ Requires-Dist: plotly ; extra == "analytics"
57
53
  Requires-Dist: py2neo ; extra == "neo4j"
58
54
  Requires-Dist: pyarrow ; extra == "pyarrow"
59
55
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
60
- Requires-Dist: pymongo ; extra == "mongodb"
56
+ Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
61
57
  Requires-Dist: pystow (>=0.5.4,<0.6.0)
58
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
62
59
  Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
63
60
  Requires-Dist: scikit-learn ; extra == "scipy"
64
61
  Requires-Dist: scipy ; extra == "scipy"
@@ -68,7 +65,7 @@ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
68
65
  Requires-Dist: tabulate
69
66
  Requires-Dist: tiktoken ; extra == "llm"
70
67
  Requires-Dist: uvicorn ; extra == "fastapi"
71
- Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
68
+ Requires-Dist: xmltodict (>=0.13.0)
72
69
  Description-Content-Type: text/markdown
73
70
 
74
71
  # linkml-store
@@ -1,13 +1,13 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.2.4"
3
+ version = "0.2.6"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
8
 
9
9
  [tool.poetry.dependencies]
10
- python = "^3.9, !=3.9.7"
10
+ python = "^3.10"
11
11
  click = "*"
12
12
  pydantic = "^2.0.0"
13
13
  linkml-runtime = ">=1.8.0"
@@ -24,7 +24,7 @@ black = { version=">=24.0.0", optional = true }
24
24
  ruff = { version=">=0.6.2", optional = true }
25
25
  llm = { version="*", optional = true }
26
26
  tiktoken = { version="*", optional = true }
27
- pymongo = { version="*", optional = true }
27
+ pymongo = "^4.11"
28
28
  neo4j = { version="*", optional = true }
29
29
  py2neo = { version="*", optional = true }
30
30
  networkx = { version="*", optional = true }
@@ -37,7 +37,7 @@ linkml = { version=">=1.8.0", optional = true }
37
37
  linkml_map = { version="*", optional = true }
38
38
  linkml_renderer = { version="*", optional = true }
39
39
  frictionless = { version="*", optional = true }
40
- ibis-framework = { version=">=9.3.0", extras = ["duckdb", "examples"], optional = true }
40
+ #ibis-framework = { version=">=9.3.0", extras = ["duckdb", "examples"], optional = true }
41
41
  gcsfs = { version="*", optional = true }
42
42
  multipledispatch = { version="*" }
43
43
  tabulate = "*"
@@ -46,8 +46,9 @@ jinja2 = "^3.1.4"
46
46
  jsonlines = "^4.0.0"
47
47
  fastapi = { version="*", optional = true }
48
48
  uvicorn = { version="*", optional = true }
49
- xmltodict = "^0.13.0"
50
- jsonpatch = "^1.33"
49
+ xmltodict = ">=0.13.0"
50
+ jsonpatch = ">=1.33"
51
+ python-dotenv = "^1.0.1"
51
52
 
52
53
  [tool.poetry.group.dev.dependencies]
53
54
  pytest = {version = ">=7.1.2"}
@@ -90,9 +91,9 @@ renderer = ["linkml_renderer"]
90
91
  fastapi = ["fastapi", "uvicorn"]
91
92
  frictionless = ["frictionless"]
92
93
  scipy = ["scipy", "scikit-learn"]
93
- ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
94
+ #ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
94
95
  bigquery = ["google-cloud-bigquery"]
95
- all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "ibis", "bigquery"]
96
+ all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]
96
97
 
97
98
  [tool.poetry.scripts]
98
99
  linkml-store = "linkml_store.cli:cli"
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
15
15
 
16
16
  HANDLE_MAP = {
17
17
  "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
+ "sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
19
  "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
19
20
  "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
20
21
  "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
@@ -22,6 +23,12 @@ HANDLE_MAP = {
22
23
  "file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
23
24
  }
24
25
 
26
+ SUFFIX_MAP = {
27
+ "ddb": "duckdb:///{path}",
28
+ "duckdb": "duckdb:///{path}",
29
+ "db": "duckdb:///{path}",
30
+ }
31
+
25
32
 
26
33
  class Client:
27
34
  """
@@ -197,6 +204,13 @@ class Client:
197
204
  :param kwargs:
198
205
  :return:
199
206
  """
207
+ if ":" not in handle:
208
+ if alias is None:
209
+ alias = handle
210
+ if "." in handle:
211
+ suffix = handle.split(".")[-1]
212
+ if suffix in SUFFIX_MAP:
213
+ handle = SUFFIX_MAP[suffix].format(path=handle)
200
214
  if ":" not in handle:
201
215
  scheme = handle
202
216
  handle = None
@@ -220,7 +234,9 @@ class Client:
220
234
  if not alias:
221
235
  alias = handle
222
236
  if not self._databases:
237
+ logger.info("Initializing databases")
223
238
  self._databases = {}
239
+ logger.info(f"Attaching {alias}")
224
240
  self._databases[alias] = db
225
241
  db.parent = self
226
242
  if db.alias:
@@ -263,8 +279,9 @@ class Client:
263
279
  self._databases[name] = db
264
280
  if name not in self._databases:
265
281
  if create_if_not_exists:
266
- logger.info(f"Creating database: {name}")
267
- self.attach_database(name, **kwargs)
282
+ logger.info(f"Creating/attaching database: {name}")
283
+ db = self.attach_database(name, **kwargs)
284
+ name = db.alias
268
285
  else:
269
286
  raise ValueError(f"Database {name} does not exist")
270
287
  db = self._databases[name]
@@ -1,6 +1,7 @@
1
1
  """A structure for representing collections of similar objects."""
2
2
 
3
3
  import hashlib
4
+ import json
4
5
  import logging
5
6
  from collections import defaultdict
6
7
  from pathlib import Path
@@ -210,8 +211,59 @@ class Collection(Generic[DatabaseType]):
210
211
  """
211
212
  raise NotImplementedError
212
213
 
214
+ def index (
215
+ self,
216
+ objs: Union[OBJECT, List[OBJECT]],
217
+ index_name: Optional[str] = None,
218
+ replace: bool = False,
219
+ unique: bool = False,
220
+ **kwargs,
221
+ ) -> None:
222
+ """
223
+ Index objects in the collection.
224
+
225
+ :param objs:
226
+ :param index_name:
227
+ :param replace: replace the index, or not
228
+ :param unique: boolean used to declare the index unique or not
229
+ :param kwargs:
230
+ :return:
231
+ """
232
+ raise NotImplementedError
233
+
234
+ def upsert(self,
235
+ objs: Union[OBJECT, List[OBJECT]],
236
+ filter_fields: List[str],
237
+ update_fields: Union[List[str], None] = None, **kwargs):
238
+ """
239
+ Add one or more objects to the collection.
240
+
241
+ >>> from linkml_store import Client
242
+ >>> client = Client()
243
+ >>> db = client.attach_database("mongodb", alias="test")
244
+ >>> collection = db.create_collection("Person")
245
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
246
+ >>> collection.upsert(objs)
247
+
248
+ :param objs:
249
+ :param filter_fields: List of field names to use as the filter for matching existing collections.
250
+ :param update_fields: List of field names to include in the update. If None, all fields are updated.
251
+ :param kwargs:
252
+
253
+ :return:
254
+ """
255
+ raise NotImplementedError
256
+
213
257
  def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
214
- logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
258
+ """
259
+ Pre-query hook.
260
+
261
+ This is called before a query is executed. It is used to materialize derivations and indexes.
262
+ :param query:
263
+ :param kwargs:
264
+ :return:
265
+ """
266
+ logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
215
267
  if not self._initialized:
216
268
  self._materialize_derivations()
217
269
  self._initialized = True
@@ -536,7 +588,13 @@ class Collection(Generic[DatabaseType]):
536
588
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
537
589
  index_col = ix.index_field
538
590
  # TODO: optimize this for large indexes
539
- vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
591
+ def row2array(row):
592
+ v = row[index_col]
593
+ if isinstance(v, str):
594
+ # sqlite stores arrays as strings
595
+ v = json.loads(v)
596
+ return np.array(v, dtype=float)
597
+ vector_pairs = [(row, row2array(row)) for row in qr.rows]
540
598
  results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
541
599
  for r in results:
542
600
  del r[1][index_col]
@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
276
276
 
277
277
  Examples:
278
278
 
279
- >>> from linkml_store.api.client import Client
280
- >>> client = Client()
281
- >>> db = client.attach_database("duckdb", alias="test")
282
- >>> collection = db.create_collection("Person", alias="persons")
283
- >>> collection.alias
284
- 'persons'
285
- >>> collection.target_class_name
286
- 'Person'
279
+ >>> from linkml_store.api.client import Client
280
+ >>> client = Client()
281
+ >>> db = client.attach_database("duckdb", alias="test")
282
+ >>> collection = db.create_collection("Person", alias="persons")
283
+ >>> collection.alias
284
+ 'persons'
285
+
286
+ >>> collection.target_class_name
287
+ 'Person'
287
288
 
288
289
  If alias is not provided, it defaults to the name of the type.
289
290
 
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
419
420
  >>> from linkml_store.api.client import Client
420
421
  >>> from linkml_store.api.queries import Query
421
422
  >>> client = Client()
422
- >>> db = client.attach_database("duckdb", alias="test")
423
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
423
424
  >>> collection = db.create_collection("Person")
424
425
  >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
425
426
  >>> query = Query(from_table="Person", where_clause={"name": "John"})
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
451
452
 
452
453
  >>> from linkml_store.api.client import Client
453
454
  >>> client = Client()
454
- >>> db = client.attach_database("duckdb", alias="test")
455
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
455
456
  >>> collection = db.create_collection("Person", alias="persons")
456
457
  >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
457
458
  >>> schema_view = db.schema_view
@@ -470,6 +471,7 @@ class Database(ABC, Generic[CollectionType]):
470
471
  if not self._schema_view:
471
472
  self._initialize_schema()
472
473
  if not self._schema_view:
474
+ logger.info("Inducing schema view")
473
475
  self._schema_view = self.induce_schema_view()
474
476
  return self._schema_view
475
477
 
@@ -505,6 +507,7 @@ class Database(ABC, Generic[CollectionType]):
505
507
  if isinstance(schema_view, str):
506
508
  schema_view = SchemaView(schema_view)
507
509
  self._schema_view = schema_view
510
+ logger.info(f"Setting schema view for {self.handle}")
508
511
  # self._schema_view = SchemaView(schema_view.materialize_derived_schema())
509
512
  if not self._collections:
510
513
  return
@@ -719,7 +722,7 @@ class Database(ABC, Generic[CollectionType]):
719
722
 
720
723
  >>> from linkml_store.api.client import Client
721
724
  >>> client = Client()
722
- >>> db = client.attach_database("duckdb", alias="test")
725
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
723
726
  >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
724
727
  >>> db.list_collection_names()
725
728
  ['iris']
@@ -739,7 +742,9 @@ class Database(ABC, Generic[CollectionType]):
739
742
  # import into a test instance
740
743
  tmp_handle = source_format.value
741
744
  client = self.parent
742
- tmp_db = client.attach_database(tmp_handle, alias="tmp")
745
+ tmp_alias = "tmp"
746
+ client.drop_database(tmp_alias, missing_ok=True)
747
+ tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
743
748
  # TODO: check for infinite recursion
744
749
  tmp_db.import_database(location, source_format=source_format)
745
750
  obj = {}
@@ -147,16 +147,22 @@ class DuckDBCollection(Collection):
147
147
  if self._table_created or self.metadata.is_prepopulated:
148
148
  logger.info(f"Already have table for: {cd.name}")
149
149
  return
150
- query = Query(
151
- from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
152
- )
153
- qr = self.parent.query(query)
154
- if qr.num_rows > 0:
150
+ if self.parent._table_exists(self.alias):
155
151
  logger.info(f"Table already exists for {cd.name}")
156
152
  self._table_created = True
157
153
  self._initialized = True
158
154
  self.metadata.is_prepopulated = True
159
155
  return
156
+ # query = Query(
157
+ # from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
158
+ # )
159
+ # qr = self.parent.query(query)
160
+ # if qr.num_rows > 0:
161
+ # logger.info(f"Table already exists for {cd.name}")
162
+ # self._table_created = True
163
+ # self._initialized = True
164
+ # self.metadata.is_prepopulated = True
165
+ # return
160
166
  logger.info(f"Creating table for {cd.name}")
161
167
  t = self._sqla_table(cd)
162
168
  ct = CreateTable(t)
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  from pathlib import Path
4
- from typing import Optional, Union
4
+ from typing import Optional, Union, List
5
5
 
6
6
  import pandas as pd
7
7
  import sqlalchemy
@@ -14,7 +14,7 @@ from linkml_store.api import Database
14
14
  from linkml_store.api.queries import Query, QueryResult
15
15
  from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
16
16
  from linkml_store.utils.format_utils import Format
17
- from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
17
+ from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
18
18
 
19
19
  TYPE_MAP = {
20
20
  "VARCHAR": "string",
@@ -62,7 +62,7 @@ class DuckDBDatabase(Database):
62
62
  def engine(self) -> sqlalchemy.Engine:
63
63
  if not self._engine:
64
64
  handle = self.handle
65
- if not handle.startswith("duckdb://") and not handle.startswith(":"):
65
+ if not handle.startswith("duckdb://") and not handle.startswith(":") and "://" not in handle:
66
66
  handle = f"duckdb:///{handle}"
67
67
  if ":memory:" not in handle:
68
68
  # TODO: investigate this; duckdb appears to be prematurely caching
@@ -71,6 +71,10 @@ class DuckDBDatabase(Database):
71
71
  self._engine = sqlalchemy.create_engine(handle)
72
72
  return self._engine
73
73
 
74
+ @property
75
+ def _is_sqlite(self) -> bool:
76
+ return self.handle and self.handle.startswith("sqlite:")
77
+
74
78
  def commit(self, **kwargs):
75
79
  with self.engine.connect() as conn:
76
80
  conn.commit()
@@ -89,34 +93,60 @@ class DuckDBDatabase(Database):
89
93
  if not missing_ok:
90
94
  raise FileNotFoundError(f"Database file not found: {path}")
91
95
 
92
- def query(self, query: Query, **kwargs) -> QueryResult:
96
+ def _table_exists(self, table: str) -> bool:
97
+ if self._is_sqlite:
98
+ if table == "sqlite_master":
99
+ return True
100
+ meta_query = Query(
101
+ from_table="sqlite_master",
102
+ where_clause={
103
+ #"type": "table",
104
+ "name": table,
105
+ }
106
+ )
107
+ else:
108
+ if table.startswith("information_schema"):
109
+ return True
110
+ meta_query = Query(
111
+ from_table="information_schema.tables",
112
+ where_clause={
113
+ "table_type": "BASE TABLE",
114
+ "table_name": table,
115
+ }
116
+ )
117
+
118
+ qr = self.query(meta_query)
119
+ if qr.num_rows == 0:
120
+ logger.debug(f"Table {self.alias} not created yet")
121
+ return False
122
+ return True
123
+
124
+ def _json_encoded_cols(self, table_name: str) -> Optional[List[str]]:
93
125
  json_encoded_cols = []
94
- if query.from_table:
95
- if not query.from_table.startswith("information_schema"):
96
- meta_query = Query(
97
- from_table="information_schema.tables", where_clause={"table_name": query.from_table}
98
- )
99
- qr = self.query(meta_query)
100
- if qr.num_rows == 0:
101
- logger.debug(f"Table {query.from_table} not created yet")
102
- return QueryResult(query=query, num_rows=0, rows=[])
103
- if not query.from_table.startswith("information_schema"):
104
- sv = self.schema_view
105
- else:
106
- sv = None
126
+ if table_name:
127
+ if table_name.startswith("information_schema") or table_name.startswith("sqlite"):
128
+ return []
129
+ sv = self.schema_view
107
130
  if sv:
108
131
  cd = None
109
132
  for c in self._collections.values():
110
- # if c.name == query.from_table or c.metadata.alias == query.from_table:
111
- if c.alias == query.from_table or c.target_class_name == query.from_table:
133
+ if c.alias == table_name or c.target_class_name == table_name:
112
134
  cd = c.class_definition()
113
135
  break
114
136
  if cd:
115
137
  for att in sv.class_induced_slots(cd.name):
116
138
  if att.inlined or att.inlined_as_list:
117
139
  json_encoded_cols.append(att.name)
140
+ return json_encoded_cols
141
+
142
+ def query(self, query: Query, **kwargs) -> QueryResult:
143
+ if not self._table_exists(query.from_table):
144
+ return QueryResult(query=query, num_rows=0, rows=[])
145
+ json_encoded_cols = self._json_encoded_cols(query.from_table)
146
+
118
147
  with self.engine.connect() as conn:
119
148
  count_query_str = text(query_to_sql(query, count=True))
149
+ logger.debug(f"count_query_str: {count_query_str}")
120
150
  num_rows = list(conn.execute(count_query_str))[0][0]
121
151
  logger.debug(f"num_rows: {num_rows}")
122
152
  query_str = query_to_sql(query, **kwargs) # include offset, limit
@@ -167,6 +197,9 @@ class DuckDBDatabase(Database):
167
197
  logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
168
198
  sb = SchemaBuilder()
169
199
  schema = sb.schema
200
+ logger.info(f"Checking if {self.metadata.handle} is sqlite: {self._is_sqlite}")
201
+ if self._is_sqlite:
202
+ return SchemaView(schema)
170
203
  query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
171
204
  qr = self.query(query)
172
205
  logger.info(f"Found {qr.num_rows} information_schema.tables // {qr.rows}")
@@ -41,6 +41,89 @@ class MongoDBCollection(Collection):
41
41
  del obj["_id"]
42
42
  self._post_insert_hook(objs)
43
43
 
44
+
45
+ def index(self,
46
+ objs: Union[OBJECT, List[OBJECT]],
47
+ index_name: Optional[str] = None,
48
+ replace: bool = False,
49
+ unique: bool = False,
50
+ **kwargs):
51
+ """
52
+ Create indexes on the collection.
53
+
54
+ :param objs: Field(s) to index.
55
+ :param index_name: Optional name for the index.
56
+ :param replace: If True, the index will be dropped and recreated.
57
+ :param unique: If True, creates a unique index (default: False).
58
+ """
59
+
60
+ if not isinstance(objs, list):
61
+ objs = [objs]
62
+
63
+ existing_indexes = self.mongo_collection.index_information()
64
+
65
+ for obj in objs:
66
+ field_exists = False
67
+ index_to_drop = None
68
+
69
+ # Extract existing index details
70
+ for index_name_existing, index_details in existing_indexes.items():
71
+ indexed_fields = [field[0] for field in index_details.get("key", [])] # Extract field names
72
+
73
+ if obj in indexed_fields: # If this field is already indexed
74
+ field_exists = True
75
+ index_to_drop = index_name_existing if replace else None
76
+
77
+ # Drop the index if replace=True and index_to_drop is valid
78
+ if index_to_drop:
79
+ self.mongo_collection.drop_index(index_to_drop)
80
+ logging.debug(f"Dropped existing index: {index_to_drop}")
81
+
82
+ # Create the new index only if it doesn't exist or was dropped
83
+ if not field_exists or replace:
84
+ self.mongo_collection.create_index(obj, name=index_name, unique=unique)
85
+ logging.debug(f"Created new index: {index_name} on field {obj}, unique={unique}")
86
+ else:
87
+ logging.debug(f"Index already exists for field {obj}, skipping creation.")
88
+
89
+ def upsert(self,
90
+ objs: Union[OBJECT, List[OBJECT]],
91
+ filter_fields: List[str],
92
+ update_fields: Optional[List[str]] = None,
93
+ **kwargs):
94
+ """
95
+ Upsert one or more documents into the MongoDB collection.
96
+
97
+ :param objs: The document(s) to insert or update.
98
+ :param filter_fields: List of field names to use as the filter for matching existing documents.
99
+ :param update_fields: List of field names to include in the update. If None, all fields are updated.
100
+ """
101
+ if not isinstance(objs, list):
102
+ objs = [objs]
103
+
104
+ for obj in objs:
105
+ # Ensure filter fields exist in the object
106
+ filter_criteria = {field: obj[field] for field in filter_fields if field in obj}
107
+ if not filter_criteria:
108
+ raise ValueError("At least one valid filter field must be present in each object.")
109
+
110
+ # Check if a document already exists
111
+ existing_doc = self.mongo_collection.find_one(filter_criteria)
112
+
113
+ if existing_doc:
114
+ # Update only changed fields
115
+ updates = {key: obj[key] for key in update_fields if key in obj and obj[key] != existing_doc.get(key)}
116
+
117
+ if updates:
118
+ self.mongo_collection.update_one(filter_criteria, {"$set": updates})
119
+ logging.debug(f"Updated existing document: {filter_criteria} with {updates}")
120
+ else:
121
+ logging.debug(f"No changes detected for document: {filter_criteria}. Skipping update.")
122
+ else:
123
+ # Insert a new document
124
+ self.mongo_collection.insert_one(obj)
125
+ logging.debug(f"Inserted new document: {obj}")
126
+
44
127
  def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
45
128
  mongo_filter = self._build_mongo_filter(query.where_clause)
46
129
  limit = limit or query.limit
@@ -3,6 +3,7 @@
3
3
  import logging
4
4
  from pathlib import Path
5
5
  from typing import Optional, Union
6
+ from urllib.parse import urlparse
6
7
 
7
8
  from pymongo import MongoClient
8
9
  from pymongo.database import Database as NativeDatabase
@@ -38,10 +39,13 @@ class MongoDBDatabase(Database):
38
39
  @property
39
40
  def _db_name(self) -> str:
40
41
  if self.handle:
41
- db = self.handle.split("/")[-1]
42
+ parsed_url = urlparse(self.handle)
43
+ path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
44
+ print(path_parts)
45
+ db_name = path_parts[0] if path_parts else "default"
42
46
  else:
43
- db = "default"
44
- return db
47
+ db_name = "default"
48
+ return db_name
45
49
 
46
50
  @property
47
51
  def native_client(self) -> MongoClient: