linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,7 @@
1
+ from pathlib import Path
2
+
3
+ from linkml_store.api import Client
4
+
5
+ THIS_DIR = Path(__file__).parent
6
+
7
+ __all__ = ["Client"]
@@ -0,0 +1,8 @@
1
+ # flake8: noqa: E402
2
+ from linkml_store.api.collection import Collection
3
+ from linkml_store.api.database import Database
4
+ from linkml_store.api.client import Client
5
+
6
+ # flake8: noqa
7
+
8
+ __all__ = ["Client", "Database", "Collection"]
@@ -0,0 +1,414 @@
1
+ import importlib
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Dict, Optional, Union
5
+
6
+ import yaml
7
+ from linkml_runtime import SchemaView
8
+
9
+ from linkml_store.api import Database
10
+ from linkml_store.api.config import ClientConfig
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ HANDLE_MAP = {
16
+ "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
17
+ "sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
+ "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
19
+ "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
20
+ "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
21
+ "neo4j": "linkml_store.api.stores.neo4j.neo4j_database.Neo4jDatabase",
22
+ "file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
23
+ "dremio": "linkml_store.api.stores.dremio.dremio_database.DremioDatabase",
24
+ "dremio-rest": "linkml_store.api.stores.dremio_rest.dremio_rest_database.DremioRestDatabase",
25
+ "ibis": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
26
+ # Ibis backend-specific schemes
27
+ "ibis+duckdb": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
28
+ "ibis+sqlite": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
29
+ "ibis+postgres": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
30
+ "ibis+postgresql": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
31
+ "ibis+bigquery": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
32
+ "ibis+mysql": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
33
+ "ibis+snowflake": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
34
+ "ibis+clickhouse": "linkml_store.api.stores.ibis.ibis_database.IbisDatabase",
35
+ }
36
+
37
+ SUFFIX_MAP = {
38
+ "ddb": "duckdb:///{path}",
39
+ "duckdb": "duckdb:///{path}",
40
+ "db": "duckdb:///{path}",
41
+ }
42
+
43
+
44
+ class Client:
45
+ """
46
+ A client is the top-level object for interacting with databases.
47
+
48
+ * A client has access to one or more :class:`.Database` objects.
49
+ * Each database consists of a number of :class:`.Collection` objects.
50
+
51
+ Creating a client
52
+ -----------------
53
+ >>> client = Client()
54
+
55
+ Attaching a database
56
+ --------------------
57
+ >>> db = client.attach_database("duckdb", alias="test")
58
+
59
+ Note that normally a handle would be specified by a locator such as ``duckdb:///<PATH>``, but
60
+ for convenience, an in-memory duckdb object can be specified without a full locator
61
+
62
+ We can check the actual handle:
63
+
64
+ >>> db.handle
65
+ 'duckdb:///:memory:'
66
+
67
+ Creating a new collection
68
+ -------------------------
69
+ >>> collection = db.create_collection("Person")
70
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
71
+ >>> collection.insert(objs)
72
+ >>> qr = collection.find()
73
+ >>> len(qr.rows)
74
+ 2
75
+ >>> qr.rows[0]["id"]
76
+ 'P1'
77
+ >>> qr.rows[1]["name"]
78
+ 'Alice'
79
+ >>> qr = collection.find({"name": "John"})
80
+ >>> len(qr.rows)
81
+ 1
82
+ >>> qr.rows[0]["name"]
83
+ 'John'
84
+
85
+ """
86
+
87
+ metadata: Optional[ClientConfig] = None
88
+ _databases: Optional[Dict[str, Database]] = None
89
+
90
+ def __init__(self, handle: Optional[str] = None, metadata: Optional[ClientConfig] = None):
91
+ """
92
+ Initialize a client.
93
+
94
+ :param handle:
95
+ :param metadata:
96
+ """
97
+ self.metadata = metadata
98
+ if not self.metadata:
99
+ self.metadata = ClientConfig()
100
+ self.metadata.handle = handle
101
+
102
+ @property
103
+ def handle(self) -> Optional[str]:
104
+ return self.metadata.handle
105
+
106
+ @property
107
+ def base_dir(self) -> Optional[str]:
108
+ """
109
+ Get the base directory for the client.
110
+
111
+ Wraps metadata.base_dir.
112
+
113
+ :return:
114
+ """
115
+ return self.metadata.base_dir
116
+
117
+ def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, auto_attach=False, **kwargs):
118
+ """
119
+ Create a client from a configuration.
120
+
121
+ Examples
122
+ --------
123
+ >>> from linkml_store.api.config import ClientConfig
124
+ >>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}))
125
+ >>> len(client.databases)
126
+ 0
127
+ >>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}),
128
+ ... auto_attach=True)
129
+ >>> len(client.databases)
130
+ 1
131
+ >>> "test" in client.databases
132
+ True
133
+ >>> client.databases["test"].handle
134
+ 'duckdb:///:memory:'
135
+
136
+ :param config:
137
+ :param base_dir:
138
+ :param auto_attach:
139
+ :param kwargs:
140
+ :return:
141
+
142
+ """
143
+ if isinstance(config, dict):
144
+ config = ClientConfig(**config)
145
+ if isinstance(config, Path):
146
+ config = str(config)
147
+ if isinstance(config, str):
148
+ if not base_dir:
149
+ base_dir = Path(config).parent
150
+ parsed_obj = yaml.safe_load(open(config))
151
+ config = ClientConfig(**parsed_obj)
152
+ self.metadata = config
153
+ if base_dir:
154
+ self.metadata.base_dir = base_dir
155
+ self._initialize_databases(auto_attach=auto_attach, **kwargs)
156
+ return self
157
+
158
+ def _initialize_databases(self, auto_attach=False, **kwargs):
159
+ for name, db_config in self.metadata.databases.items():
160
+ base_dir = self.base_dir
161
+ logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
162
+ if not base_dir:
163
+ base_dir = Path.cwd()
164
+ logger.info(f"Using current working directory: {base_dir}")
165
+ handle = db_config.handle.format(base_dir=base_dir)
166
+ db_config.handle = handle
167
+ if db_config.schema_location:
168
+ db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
169
+ if auto_attach:
170
+ db = self.attach_database(handle, alias=name, **kwargs)
171
+ db.from_config(db_config)
172
+ if db_config.source:
173
+ db = self.get_database(name)
174
+ db.store(db_config.source.data)
175
+
176
+ def _set_database_config(self, db: Database):
177
+ """
178
+ Set the configuration for a database.
179
+
180
+ :param name:
181
+ :param config:
182
+ :return:
183
+ """
184
+ if not self.metadata:
185
+ return
186
+ if db.alias in self.metadata.databases:
187
+ db.from_config(self.metadata.databases[db.alias])
188
+
189
+ def attach_database(
190
+ self,
191
+ handle: str,
192
+ alias: Optional[str] = None,
193
+ schema_view: Optional[SchemaView] = None,
194
+ recreate_if_exists=False,
195
+ **kwargs,
196
+ ) -> Database:
197
+ """
198
+ Associate a database with a handle.
199
+
200
+ Examples
201
+ --------
202
+ >>> client = Client()
203
+ >>> db = client.attach_database("duckdb", alias="memory")
204
+ >>> "memory" in client.databases
205
+ True
206
+ >>> db = client.attach_database("duckdb:///tmp/another.db", alias="disk")
207
+ >>> len(client.databases)
208
+ 2
209
+ >>> "disk" in client.databases
210
+ True
211
+
212
+ :param handle: handle for the database, e.g. duckdb:///foo.db
213
+ :param alias: alias for the database, e.g foo
214
+ :param schema_view: schema view to associate with the database
215
+ :param kwargs:
216
+ :return:
217
+ """
218
+ if ":" not in handle:
219
+ if alias is None:
220
+ alias = handle
221
+ if "." in handle:
222
+ suffix = handle.split(".")[-1]
223
+ if suffix in SUFFIX_MAP:
224
+ handle = SUFFIX_MAP[suffix].format(path=handle)
225
+ if ":" not in handle:
226
+ scheme = handle
227
+ handle = None
228
+ if alias is None:
229
+ alias = scheme
230
+ else:
231
+ scheme, _ = handle.split(":", 1)
232
+ if scheme not in HANDLE_MAP:
233
+ raise ValueError(f"Unknown scheme: {scheme}")
234
+ module_path, class_name = HANDLE_MAP[scheme].rsplit(".", 1)
235
+ try:
236
+ module = importlib.import_module(module_path)
237
+ cls = getattr(module, class_name)
238
+ except ImportError as e:
239
+ raise ImportError(f"Failed to import {scheme} database. Make sure the correct extras are installed: {e}")
240
+
241
+ # cls = HANDLE_MAP[scheme]
242
+ db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
243
+ if schema_view:
244
+ db.set_schema_view(schema_view)
245
+ if not alias:
246
+ alias = handle
247
+ if not self._databases:
248
+ logger.info("Initializing databases")
249
+ self._databases = {}
250
+ logger.info(f"Attaching {alias}")
251
+ self._databases[alias] = db
252
+ db.parent = self
253
+ if db.alias:
254
+ if db.alias != alias:
255
+ raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}")
256
+ else:
257
+ db.metadata.alias = alias
258
+ self._set_database_config(db)
259
+ return db
260
+
261
+ def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
262
+ """
263
+ Get a named database.
264
+
265
+ Examples
266
+ --------
267
+ >>> client = Client()
268
+ >>> db = client.attach_database("duckdb:///test.db", alias="test")
269
+ >>> retrieved_db = client.get_database("test")
270
+ >>> db == retrieved_db
271
+ True
272
+
273
+ :param name: if None, there must be a single database attached
274
+ :param create_if_not_exists:
275
+ :param kwargs:
276
+ :return:
277
+
278
+ """
279
+ if not name:
280
+ if not self._databases:
281
+ raise ValueError("No databases attached and no name provided")
282
+ if len(self._databases) > 1:
283
+ raise ValueError("Ambiguous: No name provided and multiple databases attached")
284
+ return list(self._databases.values())[0]
285
+ if not self._databases:
286
+ self._databases = {}
287
+ if name not in self._databases and name in self.metadata.databases:
288
+ db_config = self.metadata.databases[name]
289
+ db = self.attach_database(db_config.handle, alias=name, **kwargs)
290
+ self._databases[name] = db
291
+ if name not in self._databases:
292
+ if create_if_not_exists:
293
+ logger.info(f"Creating/attaching database: {name}")
294
+ db = self.attach_database(name, **kwargs)
295
+ name = db.alias
296
+ else:
297
+ raise ValueError(f"Database {name} does not exist")
298
+ db = self._databases[name]
299
+ self._set_database_config(db)
300
+ return db
301
+
302
+ @property
303
+ def databases(self) -> Dict[str, Database]:
304
+ """
305
+ Return all attached databases
306
+
307
+ Examples
308
+
309
+ >>> client = Client()
310
+ >>> _ = client.attach_database("duckdb", alias="test1")
311
+ >>> _ = client.attach_database("duckdb", alias="test2")
312
+ >>> len(client.databases)
313
+ 2
314
+ >>> "test1" in client.databases
315
+ True
316
+ >>> "test2" in client.databases
317
+ True
318
+ >>> client.databases["test1"].handle
319
+ 'duckdb:///:memory:'
320
+ >>> client.databases["test2"].handle
321
+ 'duckdb:///:memory:'
322
+
323
+ :return:
324
+
325
+ """
326
+ if not self._databases:
327
+ self._databases = {}
328
+ return self._databases
329
+
330
+ def drop_database(self, name: str, missing_ok=False, **kwargs):
331
+ """
332
+ Drop a database.
333
+
334
+ Example (in-memory):
335
+
336
+ >>> client = Client()
337
+ >>> db1 = client.attach_database("duckdb", alias="test1")
338
+ >>> db2 = client.attach_database("duckdb", alias="test2")
339
+ >>> len(client.databases)
340
+ 2
341
+ >>> client.drop_database("test1")
342
+ >>> len(client.databases)
343
+ 1
344
+
345
+ Databases that persist on disk:
346
+
347
+ >>> client = Client()
348
+ >>> path = Path("tmp/test.db")
349
+ >>> path.parent.mkdir(parents=True, exist_ok=True)
350
+ >>> db = client.attach_database(f"duckdb:///{path}", alias="test")
351
+ >>> len(client.databases)
352
+ 1
353
+ >>> db.store({"persons": [{"id": "P1", "name": "John"}]})
354
+ >>> db.commit()
355
+ >>> Path("tmp/test.db").exists()
356
+ True
357
+ >>> client.drop_database("test")
358
+ >>> len(client.databases)
359
+ 0
360
+ >>> Path("tmp/test.db").exists()
361
+ False
362
+
363
+ Dropping a non-existent database:
364
+
365
+ >>> client = Client()
366
+ >>> client.drop_database("duckdb:///tmp/made-up1", missing_ok=True)
367
+ >>> client.drop_database("duckdb:///tmp/made-up2", missing_ok=False)
368
+ Traceback (most recent call last):
369
+ ...
370
+ ValueError: Database duckdb:///tmp/made-up2 not found
371
+
372
+ :param name:
373
+ :param missing_ok:
374
+ :return:
375
+ """
376
+ if self._databases:
377
+ if name in self._databases:
378
+ db = self._databases[name]
379
+ db.drop(**kwargs)
380
+ del self._databases[name]
381
+ else:
382
+ if not missing_ok:
383
+ raise ValueError(f"Database {name} not found")
384
+ else:
385
+ db = self.get_database(name, create_if_not_exists=True)
386
+ db.drop(**kwargs)
387
+ # Remove from _databases after dropping
388
+ if self._databases and db.alias in self._databases:
389
+ del self._databases[db.alias]
390
+
391
+ def drop_all_databases(self, **kwargs):
392
+ """
393
+ Drop all databases.
394
+
395
+ Example (in-memory):
396
+
397
+ >>> client = Client()
398
+ >>> db1 = client.attach_database("duckdb", alias="test1")
399
+ >>> assert "test1" in client.databases
400
+ >>> db2 = client.attach_database("duckdb", alias="test2")
401
+ >>> assert "test2" in client.databases
402
+ >>> client.drop_all_databases()
403
+ >>> len(client.databases)
404
+ 0
405
+
406
+
407
+ :param missing_ok:
408
+ :return:
409
+ """
410
+ if not self._databases:
411
+ return
412
+ for name in list(self._databases.keys()):
413
+ self.drop_database(name, missing_ok=False, **kwargs)
414
+ self._databases = {}