linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,328 @@
1
+ """Ibis database adapter for linkml-store."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import List, Optional, Union
6
+ from urllib.parse import urlparse
7
+
8
+ import pandas as pd
9
+ from linkml_runtime import SchemaView
10
+ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
11
+ from linkml_runtime.utils.schema_builder import SchemaBuilder
12
+
13
+ from linkml_store.api import Database
14
+ from linkml_store.api.queries import Query, QueryResult
15
+ from linkml_store.api.stores.ibis.ibis_collection import IbisCollection
16
+ from linkml_store.utils.format_utils import Format
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ TYPE_MAP = {
21
+ "string": "string",
22
+ "integer": "int64",
23
+ "int64": "int64",
24
+ "boolean": "boolean",
25
+ "date": "date",
26
+ "datetime": "timestamp",
27
+ "float": "float64",
28
+ "double": "float64",
29
+ }
30
+
31
+ MEMORY_HANDLE = "ibis+duckdb:///:memory:"
32
+
33
+
34
+ class IbisDatabase(Database):
35
+ """
36
+ An adapter for databases using Ibis as an abstraction layer.
37
+
38
+ Ibis provides a unified interface across multiple database backends including:
39
+ - DuckDB (default)
40
+ - PostgreSQL
41
+ - SQLite
42
+ - BigQuery
43
+ - Snowflake
44
+ - And many more
45
+
46
+ Connection strings should be in the format:
47
+ - ibis+duckdb:///:memory: (in-memory DuckDB)
48
+ - ibis+duckdb:///path/to/db.duckdb
49
+ - ibis+postgres://user:pass@host:port/dbname
50
+ - ibis+sqlite:///path/to/db.sqlite
51
+ - ibis+bigquery://project/dataset
52
+
53
+ For convenience, you can also use short forms:
54
+ - ibis:// defaults to ibis+duckdb:///:memory:
55
+ - ibis:///path.duckdb uses DuckDB
56
+ """
57
+
58
+ collection_class = IbisCollection
59
+
60
+ def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
61
+ self._connection = None # Instance-level connection
62
+ if handle is None:
63
+ handle = MEMORY_HANDLE
64
+ if recreate_if_exists and handle != MEMORY_HANDLE:
65
+ # For file-based databases, delete the file if it exists
66
+ parsed = self._parse_handle(handle)
67
+ path = parsed.get("path")
68
+ if path:
69
+ path_obj = Path(path)
70
+ if path_obj.exists():
71
+ path_obj.unlink()
72
+ # Also clean up potential WAL files
73
+ wal_path = Path(str(path) + ".wal")
74
+ if wal_path.exists():
75
+ wal_path.unlink()
76
+ super().__init__(handle=handle, **kwargs)
77
+ self._recreate_if_exists = recreate_if_exists
78
+
79
+ def _parse_handle(self, handle: str) -> dict:
80
+ """
81
+ Parse an Ibis handle into components.
82
+
83
+ Returns a dict with keys:
84
+ - backend: The Ibis backend name (duckdb, postgres, etc.)
85
+ - connection_string: The connection string for the backend
86
+ - path: File path for file-based backends
87
+ """
88
+ if not handle:
89
+ handle = MEMORY_HANDLE
90
+
91
+ # Handle short forms
92
+ if handle == "ibis://" or handle == "ibis":
93
+ handle = MEMORY_HANDLE
94
+ elif handle.startswith("ibis:///") and not handle.startswith("ibis+"):
95
+ # Assume DuckDB for file paths
96
+ path = handle.replace("ibis:///", "")
97
+ handle = f"ibis+duckdb:///{path}"
98
+
99
+ # Parse the handle
100
+ if handle.startswith("ibis+"):
101
+ # Format: ibis+backend://rest
102
+ rest = handle[5:] # Remove 'ibis+'
103
+ parsed = urlparse(rest)
104
+ backend = parsed.scheme
105
+
106
+ # Reconstruct connection string for the specific backend
107
+ if backend == "duckdb":
108
+ if parsed.netloc == "" and parsed.path == "/:memory:":
109
+ connection_string = ":memory:"
110
+ path = None
111
+ else:
112
+ # For file:// style URLs, path includes the leading /
113
+ # e.g., ibis+duckdb:///abs/path -> parsed.path = "/abs/path"
114
+ # We keep absolute paths as-is, only strip for relative paths
115
+ path = parsed.path if parsed.path else None
116
+ connection_string = path or ":memory:"
117
+ elif backend == "sqlite":
118
+ path = parsed.path if parsed.path else None
119
+ connection_string = path
120
+ elif backend in ["postgres", "postgresql"]:
121
+ # postgres://user:pass@host:port/dbname
122
+ connection_string = f"{backend}://{parsed.netloc}{parsed.path}"
123
+ path = None
124
+ elif backend == "bigquery":
125
+ # bigquery://project/dataset
126
+ connection_string = f"{parsed.netloc}{parsed.path}"
127
+ path = None
128
+ else:
129
+ # Generic backend
130
+ connection_string = rest.replace(f"{backend}://", "")
131
+ path = None
132
+
133
+ return {
134
+ "backend": backend,
135
+ "connection_string": connection_string,
136
+ "path": path,
137
+ }
138
+ else:
139
+ raise ValueError(
140
+ f"Invalid Ibis handle: {handle}. "
141
+ f"Expected format: ibis+backend://connection_string "
142
+ f"(e.g., ibis+duckdb:///:memory:, ibis+postgres://host/db)"
143
+ )
144
+
145
+ @property
146
+ def connection(self):
147
+ """Get or create the Ibis connection."""
148
+ if not self._connection:
149
+ try:
150
+ import ibis
151
+ except ImportError:
152
+ raise ImportError(
153
+ "Ibis is not installed. Install it with: pip install 'linkml-store[ibis]' "
154
+ "or pip install 'ibis-framework[duckdb]'"
155
+ )
156
+
157
+ parsed = self._parse_handle(self.handle)
158
+ backend = parsed["backend"]
159
+ connection_string = parsed["connection_string"]
160
+
161
+ logger.info(f"Connecting to Ibis backend: {backend} with connection: {connection_string}")
162
+
163
+ try:
164
+ if backend == "duckdb":
165
+ self._connection = ibis.duckdb.connect(connection_string)
166
+ elif backend == "sqlite":
167
+ self._connection = ibis.sqlite.connect(connection_string)
168
+ elif backend in ["postgres", "postgresql"]:
169
+ self._connection = ibis.postgres.connect(connection_string)
170
+ elif backend == "bigquery":
171
+ self._connection = ibis.bigquery.connect(connection_string)
172
+ else:
173
+ # Try generic connect
174
+ self._connection = ibis.connect(f"{backend}://{connection_string}")
175
+ except Exception as e:
176
+ raise ConnectionError(f"Failed to connect to Ibis backend {backend}: {e}")
177
+
178
+ # If recreate_if_exists was set, drop all existing tables
179
+ if getattr(self, "_recreate_if_exists", False):
180
+ self._drop_all_tables()
181
+
182
+ return self._connection
183
+
184
+ def _drop_all_tables(self):
185
+ """Drop all tables in the database."""
186
+ if self._connection:
187
+ tables = self._connection.list_tables()
188
+ for table_name in tables:
189
+ try:
190
+ self._connection.drop_table(table_name)
191
+ logger.debug(f"Dropped table {table_name}")
192
+ except Exception as e:
193
+ logger.warning(f"Failed to drop table {table_name}: {e}")
194
+
195
+ def commit(self, **kwargs):
196
+ """Commit changes (no-op for most Ibis backends)."""
197
+ # Most Ibis backends auto-commit, but we keep this for interface compatibility
198
+ pass
199
+
200
+ def close(self, **kwargs):
201
+ """Close the Ibis connection."""
202
+ if self._connection:
203
+ # Ibis connections may not have an explicit close method in all backends
204
+ # but we set to None to allow garbage collection
205
+ self._connection = None
206
+
207
+ def drop(self, missing_ok=True, **kwargs):
208
+ """Drop the database."""
209
+ self.close()
210
+ if self.handle == MEMORY_HANDLE:
211
+ return
212
+
213
+ parsed = self._parse_handle(self.handle)
214
+ path = parsed.get("path")
215
+ if path:
216
+ path_obj = Path(path)
217
+ if path_obj.exists():
218
+ path_obj.unlink()
219
+ elif not missing_ok:
220
+ raise FileNotFoundError(f"Database file not found: {path}")
221
+
222
+ def _table_exists(self, table: str) -> bool:
223
+ """Check if a table exists in the database."""
224
+ try:
225
+ return table in self.connection.list_tables()
226
+ except Exception as e:
227
+ logger.warning(f"Error checking if table {table} exists: {e}")
228
+ return False
229
+
230
+ def _list_table_names(self) -> List[str]:
231
+ """List all table names in the database."""
232
+ try:
233
+ return self.connection.list_tables()
234
+ except Exception as e:
235
+ logger.error(f"Error listing tables: {e}")
236
+ return []
237
+
238
+ def init_collections(self):
239
+ """Initialize collections from existing tables in the database."""
240
+ if self._collections is None:
241
+ self._collections = {}
242
+
243
+ for table_name in self._list_table_names():
244
+ if table_name not in self._collections:
245
+ collection = IbisCollection(name=table_name, parent=self)
246
+ self._collections[table_name] = collection
247
+
248
+ def query(self, query: Union[str, Query], **kwargs) -> QueryResult:
249
+ """
250
+ Execute a query against the database.
251
+
252
+ For Ibis, we support both:
253
+ - SQL strings (executed directly)
254
+ - Query objects (converted to Ibis operations)
255
+ """
256
+ if isinstance(query, str):
257
+ # Direct SQL query
258
+ try:
259
+ result = self.connection.sql(query)
260
+ df = result.to_pandas()
261
+ return QueryResult(
262
+ num_rows=len(df),
263
+ rows=df.to_dict("records"),
264
+ rows_dataframe=df,
265
+ )
266
+ except Exception as e:
267
+ logger.error(f"Error executing SQL query: {e}")
268
+ raise
269
+ else:
270
+ # Delegate to collection
271
+ collection_name = query.from_table
272
+ if not collection_name:
273
+ raise ValueError("Query must specify a from_table")
274
+ collection = self.get_collection(collection_name)
275
+ return collection.query(query, **kwargs)
276
+
277
+ def induce_schema_view(self) -> SchemaView:
278
+ """
279
+ Induce a LinkML schema from the database structure.
280
+
281
+ For Ibis, we introspect the database schema and convert it to LinkML.
282
+ """
283
+ sb = SchemaBuilder()
284
+ table_names = self._list_table_names()
285
+
286
+ for table_name in table_names:
287
+ try:
288
+ table = self.connection.table(table_name)
289
+ schema = table.schema()
290
+
291
+ # Create a class for this table
292
+ class_def = ClassDefinition(name=table_name, description=f"Table: {table_name}")
293
+
294
+ # Add attributes from columns
295
+ for col_name, col_type in schema.items():
296
+ ibis_type = str(col_type)
297
+ # Map Ibis types to LinkML types
298
+ linkml_type = self._map_ibis_type_to_linkml(ibis_type)
299
+
300
+ slot_def = SlotDefinition(name=col_name, range=linkml_type)
301
+ sb.add_slot(slot_def)
302
+ class_def.attributes[col_name] = slot_def
303
+
304
+ sb.add_class(class_def)
305
+ except Exception as e:
306
+ logger.warning(f"Error introspecting table {table_name}: {e}")
307
+
308
+ schema = sb.schema
309
+ return SchemaView(schema)
310
+
311
+ def _map_ibis_type_to_linkml(self, ibis_type: str) -> str:
312
+ """Map an Ibis type string to a LinkML type."""
313
+ ibis_type_lower = ibis_type.lower()
314
+
315
+ if "int" in ibis_type_lower:
316
+ return "integer"
317
+ elif "float" in ibis_type_lower or "double" in ibis_type_lower or "decimal" in ibis_type_lower:
318
+ return "float"
319
+ elif "bool" in ibis_type_lower:
320
+ return "boolean"
321
+ elif "date" in ibis_type_lower and "time" not in ibis_type_lower:
322
+ return "date"
323
+ elif "timestamp" in ibis_type_lower or "datetime" in ibis_type_lower:
324
+ return "datetime"
325
+ elif "string" in ibis_type_lower or "varchar" in ibis_type_lower or "text" in ibis_type_lower:
326
+ return "string"
327
+ else:
328
+ return "string" # Default to string for unknown types
@@ -0,0 +1,25 @@
1
+ """
2
+ Adapter for MongoDB document store.
3
+
4
+ Handles have the form: ``mongodb://<host>:<port>/<database>``
5
+
6
+ To use this, you must have the `pymongo` extra installed.
7
+
8
+ .. code-block:: bash
9
+
10
+ pip install linkml-store[mongodb]
11
+
12
+ or
13
+
14
+ .. code-block:: bash
15
+
16
+ pip install linkml-store[all]
17
+ """
18
+
19
+ from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
20
+ from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
21
+
22
+ __all__ = [
23
+ "MongoDBCollection",
24
+ "MongoDBDatabase",
25
+ ]