linkml-store 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +19 -2
- linkml_store/api/collection.py +60 -2
- linkml_store/api/database.py +17 -12
- linkml_store/api/stores/duckdb/duckdb_collection.py +11 -5
- linkml_store/api/stores/duckdb/duckdb_database.py +52 -19
- linkml_store/api/stores/mongodb/mongodb_collection.py +83 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +7 -3
- linkml_store/cli.py +23 -5
- linkml_store/inference/implementations/llm_inference_engine.py +152 -0
- linkml_store/inference/implementations/rag_inference_engine.py +20 -9
- linkml_store/inference/inference_engine.py +2 -2
- linkml_store/utils/format_utils.py +60 -1
- linkml_store/utils/llm_utils.py +15 -0
- linkml_store/utils/object_utils.py +3 -1
- linkml_store/utils/sql_utils.py +7 -1
- linkml_store/utils/vector_utils.py +1 -1
- {linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/METADATA +7 -10
- {linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/RECORD +21 -20
- {linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/WHEEL +1 -1
- {linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/LICENSE +0 -0
- {linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/entry_points.txt +0 -0
linkml_store/api/client.py
CHANGED
|
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
|
|
|
15
15
|
|
|
16
16
|
HANDLE_MAP = {
|
|
17
17
|
"duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
18
|
+
"sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
18
19
|
"solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
|
|
19
20
|
"mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
|
|
20
21
|
"chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
|
|
@@ -22,6 +23,12 @@ HANDLE_MAP = {
|
|
|
22
23
|
"file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
|
|
23
24
|
}
|
|
24
25
|
|
|
26
|
+
SUFFIX_MAP = {
|
|
27
|
+
"ddb": "duckdb:///{path}",
|
|
28
|
+
"duckdb": "duckdb:///{path}",
|
|
29
|
+
"db": "duckdb:///{path}",
|
|
30
|
+
}
|
|
31
|
+
|
|
25
32
|
|
|
26
33
|
class Client:
|
|
27
34
|
"""
|
|
@@ -197,6 +204,13 @@ class Client:
|
|
|
197
204
|
:param kwargs:
|
|
198
205
|
:return:
|
|
199
206
|
"""
|
|
207
|
+
if ":" not in handle:
|
|
208
|
+
if alias is None:
|
|
209
|
+
alias = handle
|
|
210
|
+
if "." in handle:
|
|
211
|
+
suffix = handle.split(".")[-1]
|
|
212
|
+
if suffix in SUFFIX_MAP:
|
|
213
|
+
handle = SUFFIX_MAP[suffix].format(path=handle)
|
|
200
214
|
if ":" not in handle:
|
|
201
215
|
scheme = handle
|
|
202
216
|
handle = None
|
|
@@ -220,7 +234,9 @@ class Client:
|
|
|
220
234
|
if not alias:
|
|
221
235
|
alias = handle
|
|
222
236
|
if not self._databases:
|
|
237
|
+
logger.info("Initializing databases")
|
|
223
238
|
self._databases = {}
|
|
239
|
+
logger.info(f"Attaching {alias}")
|
|
224
240
|
self._databases[alias] = db
|
|
225
241
|
db.parent = self
|
|
226
242
|
if db.alias:
|
|
@@ -263,8 +279,9 @@ class Client:
|
|
|
263
279
|
self._databases[name] = db
|
|
264
280
|
if name not in self._databases:
|
|
265
281
|
if create_if_not_exists:
|
|
266
|
-
logger.info(f"Creating database: {name}")
|
|
267
|
-
self.attach_database(name, **kwargs)
|
|
282
|
+
logger.info(f"Creating/attaching database: {name}")
|
|
283
|
+
db = self.attach_database(name, **kwargs)
|
|
284
|
+
name = db.alias
|
|
268
285
|
else:
|
|
269
286
|
raise ValueError(f"Database {name} does not exist")
|
|
270
287
|
db = self._databases[name]
|
linkml_store/api/collection.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""A structure for representing collections of similar objects."""
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
from collections import defaultdict
|
|
6
7
|
from pathlib import Path
|
|
@@ -210,8 +211,59 @@ class Collection(Generic[DatabaseType]):
|
|
|
210
211
|
"""
|
|
211
212
|
raise NotImplementedError
|
|
212
213
|
|
|
214
|
+
def index (
|
|
215
|
+
self,
|
|
216
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
217
|
+
index_name: Optional[str] = None,
|
|
218
|
+
replace: bool = False,
|
|
219
|
+
unique: bool = False,
|
|
220
|
+
**kwargs,
|
|
221
|
+
) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Index objects in the collection.
|
|
224
|
+
|
|
225
|
+
:param objs:
|
|
226
|
+
:param index_name:
|
|
227
|
+
:param replace: replace the index, or not
|
|
228
|
+
:param unique: boolean used to declare the index unique or not
|
|
229
|
+
:param kwargs:
|
|
230
|
+
:return:
|
|
231
|
+
"""
|
|
232
|
+
raise NotImplementedError
|
|
233
|
+
|
|
234
|
+
def upsert(self,
|
|
235
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
236
|
+
filter_fields: List[str],
|
|
237
|
+
update_fields: Union[List[str], None] = None, **kwargs):
|
|
238
|
+
"""
|
|
239
|
+
Add one or more objects to the collection.
|
|
240
|
+
|
|
241
|
+
>>> from linkml_store import Client
|
|
242
|
+
>>> client = Client()
|
|
243
|
+
>>> db = client.attach_database("mongodb", alias="test")
|
|
244
|
+
>>> collection = db.create_collection("Person")
|
|
245
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
246
|
+
>>> collection.upsert(objs)
|
|
247
|
+
|
|
248
|
+
:param objs:
|
|
249
|
+
:param filter_fields: List of field names to use as the filter for matching existing collections.
|
|
250
|
+
:param update_fields: List of field names to include in the update. If None, all fields are updated.
|
|
251
|
+
:param kwargs:
|
|
252
|
+
|
|
253
|
+
:return:
|
|
254
|
+
"""
|
|
255
|
+
raise NotImplementedError
|
|
256
|
+
|
|
213
257
|
def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
|
|
214
|
-
|
|
258
|
+
"""
|
|
259
|
+
Pre-query hook.
|
|
260
|
+
|
|
261
|
+
This is called before a query is executed. It is used to materialize derivations and indexes.
|
|
262
|
+
:param query:
|
|
263
|
+
:param kwargs:
|
|
264
|
+
:return:
|
|
265
|
+
"""
|
|
266
|
+
logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
|
|
215
267
|
if not self._initialized:
|
|
216
268
|
self._materialize_derivations()
|
|
217
269
|
self._initialized = True
|
|
@@ -536,7 +588,13 @@ class Collection(Generic[DatabaseType]):
|
|
|
536
588
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
537
589
|
index_col = ix.index_field
|
|
538
590
|
# TODO: optimize this for large indexes
|
|
539
|
-
|
|
591
|
+
def row2array(row):
|
|
592
|
+
v = row[index_col]
|
|
593
|
+
if isinstance(v, str):
|
|
594
|
+
# sqlite stores arrays as strings
|
|
595
|
+
v = json.loads(v)
|
|
596
|
+
return np.array(v, dtype=float)
|
|
597
|
+
vector_pairs = [(row, row2array(row)) for row in qr.rows]
|
|
540
598
|
results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
|
|
541
599
|
for r in results:
|
|
542
600
|
del r[1][index_col]
|
linkml_store/api/database.py
CHANGED
|
@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
276
276
|
|
|
277
277
|
Examples:
|
|
278
278
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
279
|
+
>>> from linkml_store.api.client import Client
|
|
280
|
+
>>> client = Client()
|
|
281
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
282
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
283
|
+
>>> collection.alias
|
|
284
|
+
'persons'
|
|
285
|
+
|
|
286
|
+
>>> collection.target_class_name
|
|
287
|
+
'Person'
|
|
287
288
|
|
|
288
289
|
If alias is not provided, it defaults to the name of the type.
|
|
289
290
|
|
|
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
419
420
|
>>> from linkml_store.api.client import Client
|
|
420
421
|
>>> from linkml_store.api.queries import Query
|
|
421
422
|
>>> client = Client()
|
|
422
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
423
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
423
424
|
>>> collection = db.create_collection("Person")
|
|
424
425
|
>>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
|
|
425
426
|
>>> query = Query(from_table="Person", where_clause={"name": "John"})
|
|
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
451
452
|
|
|
452
453
|
>>> from linkml_store.api.client import Client
|
|
453
454
|
>>> client = Client()
|
|
454
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
455
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
455
456
|
>>> collection = db.create_collection("Person", alias="persons")
|
|
456
457
|
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
|
|
457
458
|
>>> schema_view = db.schema_view
|
|
@@ -470,6 +471,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
470
471
|
if not self._schema_view:
|
|
471
472
|
self._initialize_schema()
|
|
472
473
|
if not self._schema_view:
|
|
474
|
+
logger.info("Inducing schema view")
|
|
473
475
|
self._schema_view = self.induce_schema_view()
|
|
474
476
|
return self._schema_view
|
|
475
477
|
|
|
@@ -505,6 +507,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
505
507
|
if isinstance(schema_view, str):
|
|
506
508
|
schema_view = SchemaView(schema_view)
|
|
507
509
|
self._schema_view = schema_view
|
|
510
|
+
logger.info(f"Setting schema view for {self.handle}")
|
|
508
511
|
# self._schema_view = SchemaView(schema_view.materialize_derived_schema())
|
|
509
512
|
if not self._collections:
|
|
510
513
|
return
|
|
@@ -719,7 +722,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
719
722
|
|
|
720
723
|
>>> from linkml_store.api.client import Client
|
|
721
724
|
>>> client = Client()
|
|
722
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
725
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
723
726
|
>>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
|
|
724
727
|
>>> db.list_collection_names()
|
|
725
728
|
['iris']
|
|
@@ -739,7 +742,9 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
739
742
|
# import into a test instance
|
|
740
743
|
tmp_handle = source_format.value
|
|
741
744
|
client = self.parent
|
|
742
|
-
|
|
745
|
+
tmp_alias = "tmp"
|
|
746
|
+
client.drop_database(tmp_alias, missing_ok=True)
|
|
747
|
+
tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
|
|
743
748
|
# TODO: check for infinite recursion
|
|
744
749
|
tmp_db.import_database(location, source_format=source_format)
|
|
745
750
|
obj = {}
|
|
@@ -147,16 +147,22 @@ class DuckDBCollection(Collection):
|
|
|
147
147
|
if self._table_created or self.metadata.is_prepopulated:
|
|
148
148
|
logger.info(f"Already have table for: {cd.name}")
|
|
149
149
|
return
|
|
150
|
-
|
|
151
|
-
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
152
|
-
)
|
|
153
|
-
qr = self.parent.query(query)
|
|
154
|
-
if qr.num_rows > 0:
|
|
150
|
+
if self.parent._table_exists(self.alias):
|
|
155
151
|
logger.info(f"Table already exists for {cd.name}")
|
|
156
152
|
self._table_created = True
|
|
157
153
|
self._initialized = True
|
|
158
154
|
self.metadata.is_prepopulated = True
|
|
159
155
|
return
|
|
156
|
+
# query = Query(
|
|
157
|
+
# from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
158
|
+
# )
|
|
159
|
+
# qr = self.parent.query(query)
|
|
160
|
+
# if qr.num_rows > 0:
|
|
161
|
+
# logger.info(f"Table already exists for {cd.name}")
|
|
162
|
+
# self._table_created = True
|
|
163
|
+
# self._initialized = True
|
|
164
|
+
# self.metadata.is_prepopulated = True
|
|
165
|
+
# return
|
|
160
166
|
logger.info(f"Creating table for {cd.name}")
|
|
161
167
|
t = self._sqla_table(cd)
|
|
162
168
|
ct = CreateTable(t)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Optional, Union
|
|
4
|
+
from typing import Optional, Union, List
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import sqlalchemy
|
|
@@ -14,7 +14,7 @@ from linkml_store.api import Database
|
|
|
14
14
|
from linkml_store.api.queries import Query, QueryResult
|
|
15
15
|
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
16
16
|
from linkml_store.utils.format_utils import Format
|
|
17
|
-
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
|
|
17
|
+
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
|
|
18
18
|
|
|
19
19
|
TYPE_MAP = {
|
|
20
20
|
"VARCHAR": "string",
|
|
@@ -62,7 +62,7 @@ class DuckDBDatabase(Database):
|
|
|
62
62
|
def engine(self) -> sqlalchemy.Engine:
|
|
63
63
|
if not self._engine:
|
|
64
64
|
handle = self.handle
|
|
65
|
-
if not handle.startswith("duckdb://") and not handle.startswith(":"):
|
|
65
|
+
if not handle.startswith("duckdb://") and not handle.startswith(":") and "://" not in handle:
|
|
66
66
|
handle = f"duckdb:///{handle}"
|
|
67
67
|
if ":memory:" not in handle:
|
|
68
68
|
# TODO: investigate this; duckdb appears to be prematurely caching
|
|
@@ -71,6 +71,10 @@ class DuckDBDatabase(Database):
|
|
|
71
71
|
self._engine = sqlalchemy.create_engine(handle)
|
|
72
72
|
return self._engine
|
|
73
73
|
|
|
74
|
+
@property
|
|
75
|
+
def _is_sqlite(self) -> bool:
|
|
76
|
+
return self.handle and self.handle.startswith("sqlite:")
|
|
77
|
+
|
|
74
78
|
def commit(self, **kwargs):
|
|
75
79
|
with self.engine.connect() as conn:
|
|
76
80
|
conn.commit()
|
|
@@ -89,34 +93,60 @@ class DuckDBDatabase(Database):
|
|
|
89
93
|
if not missing_ok:
|
|
90
94
|
raise FileNotFoundError(f"Database file not found: {path}")
|
|
91
95
|
|
|
92
|
-
def
|
|
96
|
+
def _table_exists(self, table: str) -> bool:
|
|
97
|
+
if self._is_sqlite:
|
|
98
|
+
if table == "sqlite_master":
|
|
99
|
+
return True
|
|
100
|
+
meta_query = Query(
|
|
101
|
+
from_table="sqlite_master",
|
|
102
|
+
where_clause={
|
|
103
|
+
#"type": "table",
|
|
104
|
+
"name": table,
|
|
105
|
+
}
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
if table.startswith("information_schema"):
|
|
109
|
+
return True
|
|
110
|
+
meta_query = Query(
|
|
111
|
+
from_table="information_schema.tables",
|
|
112
|
+
where_clause={
|
|
113
|
+
"table_type": "BASE TABLE",
|
|
114
|
+
"table_name": table,
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
qr = self.query(meta_query)
|
|
119
|
+
if qr.num_rows == 0:
|
|
120
|
+
logger.debug(f"Table {self.alias} not created yet")
|
|
121
|
+
return False
|
|
122
|
+
return True
|
|
123
|
+
|
|
124
|
+
def _json_encoded_cols(self, table_name: str) -> Optional[List[str]]:
|
|
93
125
|
json_encoded_cols = []
|
|
94
|
-
if
|
|
95
|
-
if
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
)
|
|
99
|
-
qr = self.query(meta_query)
|
|
100
|
-
if qr.num_rows == 0:
|
|
101
|
-
logger.debug(f"Table {query.from_table} not created yet")
|
|
102
|
-
return QueryResult(query=query, num_rows=0, rows=[])
|
|
103
|
-
if not query.from_table.startswith("information_schema"):
|
|
104
|
-
sv = self.schema_view
|
|
105
|
-
else:
|
|
106
|
-
sv = None
|
|
126
|
+
if table_name:
|
|
127
|
+
if table_name.startswith("information_schema") or table_name.startswith("sqlite"):
|
|
128
|
+
return []
|
|
129
|
+
sv = self.schema_view
|
|
107
130
|
if sv:
|
|
108
131
|
cd = None
|
|
109
132
|
for c in self._collections.values():
|
|
110
|
-
|
|
111
|
-
if c.alias == query.from_table or c.target_class_name == query.from_table:
|
|
133
|
+
if c.alias == table_name or c.target_class_name == table_name:
|
|
112
134
|
cd = c.class_definition()
|
|
113
135
|
break
|
|
114
136
|
if cd:
|
|
115
137
|
for att in sv.class_induced_slots(cd.name):
|
|
116
138
|
if att.inlined or att.inlined_as_list:
|
|
117
139
|
json_encoded_cols.append(att.name)
|
|
140
|
+
return json_encoded_cols
|
|
141
|
+
|
|
142
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
143
|
+
if not self._table_exists(query.from_table):
|
|
144
|
+
return QueryResult(query=query, num_rows=0, rows=[])
|
|
145
|
+
json_encoded_cols = self._json_encoded_cols(query.from_table)
|
|
146
|
+
|
|
118
147
|
with self.engine.connect() as conn:
|
|
119
148
|
count_query_str = text(query_to_sql(query, count=True))
|
|
149
|
+
logger.debug(f"count_query_str: {count_query_str}")
|
|
120
150
|
num_rows = list(conn.execute(count_query_str))[0][0]
|
|
121
151
|
logger.debug(f"num_rows: {num_rows}")
|
|
122
152
|
query_str = query_to_sql(query, **kwargs) # include offset, limit
|
|
@@ -167,6 +197,9 @@ class DuckDBDatabase(Database):
|
|
|
167
197
|
logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
|
|
168
198
|
sb = SchemaBuilder()
|
|
169
199
|
schema = sb.schema
|
|
200
|
+
logger.info(f"Checking if {self.metadata.handle} is sqlite: {self._is_sqlite}")
|
|
201
|
+
if self._is_sqlite:
|
|
202
|
+
return SchemaView(schema)
|
|
170
203
|
query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
|
|
171
204
|
qr = self.query(query)
|
|
172
205
|
logger.info(f"Found {qr.num_rows} information_schema.tables // {qr.rows}")
|
|
@@ -41,6 +41,89 @@ class MongoDBCollection(Collection):
|
|
|
41
41
|
del obj["_id"]
|
|
42
42
|
self._post_insert_hook(objs)
|
|
43
43
|
|
|
44
|
+
|
|
45
|
+
def index(self,
|
|
46
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
47
|
+
index_name: Optional[str] = None,
|
|
48
|
+
replace: bool = False,
|
|
49
|
+
unique: bool = False,
|
|
50
|
+
**kwargs):
|
|
51
|
+
"""
|
|
52
|
+
Create indexes on the collection.
|
|
53
|
+
|
|
54
|
+
:param objs: Field(s) to index.
|
|
55
|
+
:param index_name: Optional name for the index.
|
|
56
|
+
:param replace: If True, the index will be dropped and recreated.
|
|
57
|
+
:param unique: If True, creates a unique index (default: False).
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
if not isinstance(objs, list):
|
|
61
|
+
objs = [objs]
|
|
62
|
+
|
|
63
|
+
existing_indexes = self.mongo_collection.index_information()
|
|
64
|
+
|
|
65
|
+
for obj in objs:
|
|
66
|
+
field_exists = False
|
|
67
|
+
index_to_drop = None
|
|
68
|
+
|
|
69
|
+
# Extract existing index details
|
|
70
|
+
for index_name_existing, index_details in existing_indexes.items():
|
|
71
|
+
indexed_fields = [field[0] for field in index_details.get("key", [])] # Extract field names
|
|
72
|
+
|
|
73
|
+
if obj in indexed_fields: # If this field is already indexed
|
|
74
|
+
field_exists = True
|
|
75
|
+
index_to_drop = index_name_existing if replace else None
|
|
76
|
+
|
|
77
|
+
# Drop the index if replace=True and index_to_drop is valid
|
|
78
|
+
if index_to_drop:
|
|
79
|
+
self.mongo_collection.drop_index(index_to_drop)
|
|
80
|
+
logging.debug(f"Dropped existing index: {index_to_drop}")
|
|
81
|
+
|
|
82
|
+
# Create the new index only if it doesn't exist or was dropped
|
|
83
|
+
if not field_exists or replace:
|
|
84
|
+
self.mongo_collection.create_index(obj, name=index_name, unique=unique)
|
|
85
|
+
logging.debug(f"Created new index: {index_name} on field {obj}, unique={unique}")
|
|
86
|
+
else:
|
|
87
|
+
logging.debug(f"Index already exists for field {obj}, skipping creation.")
|
|
88
|
+
|
|
89
|
+
def upsert(self,
|
|
90
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
91
|
+
filter_fields: List[str],
|
|
92
|
+
update_fields: Optional[List[str]] = None,
|
|
93
|
+
**kwargs):
|
|
94
|
+
"""
|
|
95
|
+
Upsert one or more documents into the MongoDB collection.
|
|
96
|
+
|
|
97
|
+
:param objs: The document(s) to insert or update.
|
|
98
|
+
:param filter_fields: List of field names to use as the filter for matching existing documents.
|
|
99
|
+
:param update_fields: List of field names to include in the update. If None, all fields are updated.
|
|
100
|
+
"""
|
|
101
|
+
if not isinstance(objs, list):
|
|
102
|
+
objs = [objs]
|
|
103
|
+
|
|
104
|
+
for obj in objs:
|
|
105
|
+
# Ensure filter fields exist in the object
|
|
106
|
+
filter_criteria = {field: obj[field] for field in filter_fields if field in obj}
|
|
107
|
+
if not filter_criteria:
|
|
108
|
+
raise ValueError("At least one valid filter field must be present in each object.")
|
|
109
|
+
|
|
110
|
+
# Check if a document already exists
|
|
111
|
+
existing_doc = self.mongo_collection.find_one(filter_criteria)
|
|
112
|
+
|
|
113
|
+
if existing_doc:
|
|
114
|
+
# Update only changed fields
|
|
115
|
+
updates = {key: obj[key] for key in update_fields if key in obj and obj[key] != existing_doc.get(key)}
|
|
116
|
+
|
|
117
|
+
if updates:
|
|
118
|
+
self.mongo_collection.update_one(filter_criteria, {"$set": updates})
|
|
119
|
+
logging.debug(f"Updated existing document: {filter_criteria} with {updates}")
|
|
120
|
+
else:
|
|
121
|
+
logging.debug(f"No changes detected for document: {filter_criteria}. Skipping update.")
|
|
122
|
+
else:
|
|
123
|
+
# Insert a new document
|
|
124
|
+
self.mongo_collection.insert_one(obj)
|
|
125
|
+
logging.debug(f"Inserted new document: {obj}")
|
|
126
|
+
|
|
44
127
|
def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
|
|
45
128
|
mongo_filter = self._build_mongo_filter(query.where_clause)
|
|
46
129
|
limit = limit or query.limit
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional, Union
|
|
6
|
+
from urllib.parse import urlparse
|
|
6
7
|
|
|
7
8
|
from pymongo import MongoClient
|
|
8
9
|
from pymongo.database import Database as NativeDatabase
|
|
@@ -38,10 +39,13 @@ class MongoDBDatabase(Database):
|
|
|
38
39
|
@property
|
|
39
40
|
def _db_name(self) -> str:
|
|
40
41
|
if self.handle:
|
|
41
|
-
|
|
42
|
+
parsed_url = urlparse(self.handle)
|
|
43
|
+
path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
|
|
44
|
+
print(path_parts)
|
|
45
|
+
db_name = path_parts[0] if path_parts else "default"
|
|
42
46
|
else:
|
|
43
|
-
|
|
44
|
-
return
|
|
47
|
+
db_name = "default"
|
|
48
|
+
return db_name
|
|
45
49
|
|
|
46
50
|
@property
|
|
47
51
|
def native_client(self) -> MongoClient:
|
linkml_store/cli.py
CHANGED
|
@@ -99,6 +99,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
|
|
|
99
99
|
@click.option("--database", "-d", help="Database name")
|
|
100
100
|
@click.option("--collection", "-c", help="Collection name")
|
|
101
101
|
@click.option("--input", "-i", help="Input file (alternative to database/collection)")
|
|
102
|
+
@click.option("--schema", "-S", help="Path to schema (LinkML yaml)")
|
|
102
103
|
@click.option("--config", "-C", type=click.Path(exists=True), help="Path to the configuration file")
|
|
103
104
|
@click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
|
|
104
105
|
@click.option("-v", "--verbose", count=True)
|
|
@@ -111,7 +112,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
|
|
|
111
112
|
help="If set then show full stacktrace on error",
|
|
112
113
|
)
|
|
113
114
|
@click.pass_context
|
|
114
|
-
def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set, input, **kwargs):
|
|
115
|
+
def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, schema, config, set, input, **kwargs):
|
|
115
116
|
"""A CLI for interacting with the linkml-store."""
|
|
116
117
|
if not stacktrace:
|
|
117
118
|
sys.tracebacklimit = 0
|
|
@@ -158,6 +159,9 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
158
159
|
client = Client().from_config(config, **kwargs) if config else Client()
|
|
159
160
|
settings = ContextSettings(client=client, database_name=database, collection_name=collection)
|
|
160
161
|
ctx.obj["settings"] = settings
|
|
162
|
+
if schema:
|
|
163
|
+
db = settings.database
|
|
164
|
+
db.set_schema_view(schema)
|
|
161
165
|
if settings.database_name:
|
|
162
166
|
db = client.get_database(database)
|
|
163
167
|
if set:
|
|
@@ -182,7 +186,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
182
186
|
|
|
183
187
|
|
|
184
188
|
@cli.command()
|
|
185
|
-
@click.argument("files", type=click.Path(
|
|
189
|
+
@click.argument("files", type=click.Path(), nargs=-1)
|
|
186
190
|
@click.option("--replace/--no-replace", default=False, show_default=True, help="Replace existing objects")
|
|
187
191
|
@click.option("--format", "-f", type=format_choice, help="Input format")
|
|
188
192
|
@click.option("--object", "-i", multiple=True, help="Input object as YAML")
|
|
@@ -534,6 +538,7 @@ def pivot(ctx, where, limit, index, columns, values, output_type, output):
|
|
|
534
538
|
@click.option(
|
|
535
539
|
"--feature-attributes", "-F", type=click.STRING, help="Feature attributes for inference (comma separated)"
|
|
536
540
|
)
|
|
541
|
+
@click.option("--training-collection", type=click.STRING,help="Collection to use for training")
|
|
537
542
|
@click.option("--inference-config-file", "-Y", type=click.Path(), help="Path to inference configuration file")
|
|
538
543
|
@click.option("--export-model", "-E", type=click.Path(), help="Export model to file")
|
|
539
544
|
@click.option("--load-model", "-L", type=click.Path(), help="Load model from file")
|
|
@@ -555,6 +560,7 @@ def infer(
|
|
|
555
560
|
evaluation_count,
|
|
556
561
|
evaluation_match_function,
|
|
557
562
|
training_test_data_split,
|
|
563
|
+
training_collection,
|
|
558
564
|
predictor_type,
|
|
559
565
|
target_attribute,
|
|
560
566
|
feature_attributes,
|
|
@@ -617,6 +623,7 @@ def infer(
|
|
|
617
623
|
if model_format:
|
|
618
624
|
model_format = ModelSerialization(model_format)
|
|
619
625
|
if load_model:
|
|
626
|
+
logger.info(f"Loading predictor from {load_model}")
|
|
620
627
|
predictor = get_inference_engine(predictor_type)
|
|
621
628
|
predictor = type(predictor).load_model(load_model)
|
|
622
629
|
else:
|
|
@@ -627,13 +634,18 @@ def infer(
|
|
|
627
634
|
if training_test_data_split:
|
|
628
635
|
config.train_test_split = training_test_data_split
|
|
629
636
|
predictor = get_inference_engine(predictor_type, config=config)
|
|
630
|
-
|
|
631
|
-
|
|
637
|
+
training_collection_obj = collection
|
|
638
|
+
if training_collection:
|
|
639
|
+
training_collection_obj = ctx.obj["settings"].database.get_collection(training_collection)
|
|
640
|
+
if training_collection_obj:
|
|
641
|
+
logger.info(f"Using collection: {training_collection_obj.alias} for inference")
|
|
642
|
+
split = training_test_data_split or (1.0, 0.0)
|
|
643
|
+
predictor.load_and_split_data(training_collection_obj, split=split)
|
|
632
644
|
predictor.initialize_model()
|
|
633
645
|
if export_model:
|
|
634
646
|
logger.info(f"Exporting model to {export_model} in {model_format}")
|
|
635
647
|
predictor.export_model(export_model, model_format)
|
|
636
|
-
if not query_obj:
|
|
648
|
+
if not query_obj and where_clause is None:
|
|
637
649
|
if not export_model and not evaluation_count:
|
|
638
650
|
raise ValueError("Query or evaluate must be specified if not exporting model")
|
|
639
651
|
if evaluation_count:
|
|
@@ -651,6 +663,12 @@ def infer(
|
|
|
651
663
|
result = predictor.derive(query_obj)
|
|
652
664
|
dumped_obj = result.model_dump(exclude_none=True)
|
|
653
665
|
write_output([dumped_obj], output_type, target=output)
|
|
666
|
+
if where_clause is not None:
|
|
667
|
+
predicted_objs = []
|
|
668
|
+
for query_obj in collection.find(where_clause).rows:
|
|
669
|
+
result = predictor.derive(query_obj)
|
|
670
|
+
predicted_objs.append(result.predicted_object)
|
|
671
|
+
write_output(predicted_objs, output_type, target=output)
|
|
654
672
|
|
|
655
673
|
|
|
656
674
|
@cli.command()
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import ClassVar, List, Optional, TextIO, Union
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
from linkml_store.utils.llm_utils import parse_yaml_payload
|
|
9
|
+
from llm import get_key
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from linkml_store.api.collection import OBJECT, Collection
|
|
13
|
+
from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
|
|
14
|
+
from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
|
|
15
|
+
from linkml_store.utils.object_utils import select_nested
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
MAX_ITERATIONS = 5
|
|
20
|
+
DEFAULT_NUM_EXAMPLES = 20
|
|
21
|
+
|
|
22
|
+
SYSTEM_PROMPT = """
|
|
23
|
+
Your task is to inference the complete YAML
|
|
24
|
+
object output given the YAML object input. I will provide you
|
|
25
|
+
with contextual information, including the schema,
|
|
26
|
+
to help with the inference. You can use the following
|
|
27
|
+
|
|
28
|
+
You should return ONLY valid YAML in your response.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TrainedModel(BaseModel, extra="forbid"):
|
|
33
|
+
index_rows: List[OBJECT]
|
|
34
|
+
config: Optional[InferenceConfig] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LLMInference(Inference):
|
|
38
|
+
iterations: int = 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class LLMInferenceEngine(InferenceEngine):
|
|
43
|
+
"""
|
|
44
|
+
LLM based predictor.
|
|
45
|
+
|
|
46
|
+
Unlike the RAG predictor this performs few-shot inference
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
_model: "llm.Model" = None # noqa: F821
|
|
50
|
+
|
|
51
|
+
PERSIST_COLS: ClassVar[List[str]] = [
|
|
52
|
+
"config",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
def __post_init__(self):
|
|
56
|
+
if not self.config:
|
|
57
|
+
self.config = InferenceConfig()
|
|
58
|
+
if not self.config.llm_config:
|
|
59
|
+
self.config.llm_config = LLMConfig()
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def model(self) -> "llm.Model": # noqa: F821
|
|
63
|
+
import llm
|
|
64
|
+
|
|
65
|
+
if self._model is None:
|
|
66
|
+
self._model = llm.get_model(self.config.llm_config.model_name)
|
|
67
|
+
if self._model.needs_key:
|
|
68
|
+
key = get_key(None, key_alias=self._model.needs_key)
|
|
69
|
+
self._model.key = key
|
|
70
|
+
|
|
71
|
+
return self._model
|
|
72
|
+
|
|
73
|
+
def initialize_model(self, **kwargs):
|
|
74
|
+
logger.info(f"Initializing model {self.model}")
|
|
75
|
+
|
|
76
|
+
def object_to_text(self, object: OBJECT) -> str:
|
|
77
|
+
return yaml.dump(object)
|
|
78
|
+
|
|
79
|
+
def _schema_str(self) -> str:
|
|
80
|
+
db = self.training_data.base_collection.parent
|
|
81
|
+
from linkml_runtime.dumpers import json_dumper
|
|
82
|
+
schema_dict = json_dumper.to_dict(db.schema_view.schema)
|
|
83
|
+
return yaml.dump(schema_dict)
|
|
84
|
+
|
|
85
|
+
def derive(self, object: OBJECT, iteration=0, additional_prompt_texts: Optional[List[str]] = None) -> Optional[LLMInference]:
|
|
86
|
+
import llm
|
|
87
|
+
|
|
88
|
+
model: llm.Model = self.model
|
|
89
|
+
#model_name = self.config.llm_config.model_name
|
|
90
|
+
#feature_attributes = self.config.feature_attributes
|
|
91
|
+
target_attributes = self.config.target_attributes
|
|
92
|
+
query_text = self.object_to_text(object)
|
|
93
|
+
|
|
94
|
+
if not target_attributes:
|
|
95
|
+
target_attributes = [k for k, v in object.items() if v is None or v == ""]
|
|
96
|
+
#if not feature_attributes:
|
|
97
|
+
# feature_attributes = [k for k, v in object.items() if v is not None and v != ""]
|
|
98
|
+
|
|
99
|
+
system_prompt = SYSTEM_PROMPT.format(llm_config=self.config.llm_config)
|
|
100
|
+
|
|
101
|
+
system_prompt += "\n## SCHEMA:\n\n" + self._schema_str()
|
|
102
|
+
|
|
103
|
+
stub = ", ".join([f"{k}: ..." for k in target_attributes])
|
|
104
|
+
stub = "{" + stub + "}"
|
|
105
|
+
prompt = (
|
|
106
|
+
"Provide a YAML object of the form"
|
|
107
|
+
"```yaml\n"
|
|
108
|
+
f"{stub}\n"
|
|
109
|
+
"```\n"
|
|
110
|
+
"---\nQuery:\n" f"## INCOMPLETE OBJECT:\n{query_text}\n" "## OUTPUT:\n"
|
|
111
|
+
)
|
|
112
|
+
logger.info(f"Prompt: {prompt}")
|
|
113
|
+
response = model.prompt(prompt, system=system_prompt)
|
|
114
|
+
yaml_str = response.text()
|
|
115
|
+
logger.info(f"Response: {yaml_str}")
|
|
116
|
+
predicted_object = parse_yaml_payload(yaml_str, strict=True)
|
|
117
|
+
predicted_object = {**object, **predicted_object}
|
|
118
|
+
if self.config.validate_results:
|
|
119
|
+
base_collection = self.training_data.base_collection
|
|
120
|
+
errs = list(base_collection.iter_validate_collection([predicted_object]))
|
|
121
|
+
if errs:
|
|
122
|
+
print(f"{iteration} // FAILED TO VALIDATE: {yaml_str}")
|
|
123
|
+
print(f"PARSED: {predicted_object}")
|
|
124
|
+
print(f"ERRORS: {errs}")
|
|
125
|
+
if iteration > MAX_ITERATIONS:
|
|
126
|
+
raise ValueError(f"Validation errors: {errs}")
|
|
127
|
+
extra_texts = [
|
|
128
|
+
"Make sure results conform to the schema. Previously you provided:\n",
|
|
129
|
+
yaml_str,
|
|
130
|
+
"\nThis was invalid.\n",
|
|
131
|
+
"Validation errors:\n",
|
|
132
|
+
] + [self.object_to_text(e) for e in errs]
|
|
133
|
+
return self.derive(object, iteration=iteration+1, additional_prompt_texts=extra_texts)
|
|
134
|
+
return LLMInference(predicted_object=predicted_object, iterations=iteration+1, query=object)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def export_model(
|
|
138
|
+
self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
|
|
139
|
+
):
|
|
140
|
+
self.save_model(output)
|
|
141
|
+
|
|
142
|
+
def save_model(self, output: Union[str, Path]) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Save the trained model and related data to a file.
|
|
145
|
+
|
|
146
|
+
:param output: Path to save the model
|
|
147
|
+
"""
|
|
148
|
+
raise NotImplementedError("Does not make sense for this engine")
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def load_model(cls, file_path: Union[str, Path]) -> "LLMInferenceEngine":
|
|
152
|
+
raise NotImplementedError("Does not make sense for this engine")
|
|
@@ -20,7 +20,7 @@ DEFAULT_NUM_EXAMPLES = 20
|
|
|
20
20
|
DEFAULT_MMR_RELEVANCE_FACTOR = 0.8
|
|
21
21
|
|
|
22
22
|
SYSTEM_PROMPT = """
|
|
23
|
-
You are a {llm_config.role}, your task is to
|
|
23
|
+
You are a {llm_config.role}, your task is to infer the YAML
|
|
24
24
|
object output given the YAML object input. I will provide you
|
|
25
25
|
with a collection of examples that will provide guidance both
|
|
26
26
|
on the desired structure of the response, as well as the kind
|
|
@@ -130,23 +130,34 @@ class RAGInferenceEngine(InferenceEngine):
|
|
|
130
130
|
else:
|
|
131
131
|
if not self.rag_collection.indexers:
|
|
132
132
|
raise ValueError("RAG collection must have an indexer attached")
|
|
133
|
+
logger.info(f"Searching {self.rag_collection.alias} for examples for: {query_text}")
|
|
133
134
|
rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm",
|
|
134
135
|
mmr_relevance_factor=mmr_relevance_factor)
|
|
135
136
|
examples = rs.rows
|
|
137
|
+
logger.info(f"Found {len(examples)} examples")
|
|
136
138
|
if not examples:
|
|
137
139
|
raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
|
|
138
140
|
prompt_clauses = []
|
|
139
|
-
|
|
141
|
+
this_feature_attributes = feature_attributes
|
|
142
|
+
if not this_feature_attributes:
|
|
143
|
+
this_feature_attributes = list(set(object.keys()) - set(target_attributes))
|
|
144
|
+
query_obj = select_nested(object, this_feature_attributes)
|
|
140
145
|
query_text = self.object_to_text(query_obj)
|
|
141
146
|
for example in examples:
|
|
142
|
-
|
|
147
|
+
this_feature_attributes = feature_attributes
|
|
148
|
+
if not this_feature_attributes:
|
|
149
|
+
this_feature_attributes = list(set(example.keys()) - set(target_attributes))
|
|
150
|
+
if not this_feature_attributes:
|
|
151
|
+
raise ValueError(f"No feature attributes found in example {example}")
|
|
152
|
+
input_obj = select_nested(example, this_feature_attributes)
|
|
143
153
|
input_obj_text = self.object_to_text(input_obj)
|
|
144
154
|
if input_obj_text == query_text:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
155
|
+
continue
|
|
156
|
+
#raise ValueError(
|
|
157
|
+
# f"Query object {query_text} is the same as example object {input_obj_text}\n"
|
|
158
|
+
# "This indicates possible test data leakage\n."
|
|
159
|
+
# "TODO: allow an option that allows user to treat this as a basic lookup\n"
|
|
160
|
+
#)
|
|
150
161
|
output_obj = select_nested(example, target_attributes)
|
|
151
162
|
prompt_clause = (
|
|
152
163
|
"---\nExample:\n" f"## INPUT:\n{input_obj_text}\n" f"## OUTPUT:\n{self.object_to_text(output_obj)}\n"
|
|
@@ -169,7 +180,7 @@ class RAGInferenceEngine(InferenceEngine):
|
|
|
169
180
|
encoding=encoding, token_limit=token_limit,
|
|
170
181
|
additional_text=system_prompt)
|
|
171
182
|
logger.info(f"Prompt: {prompt}")
|
|
172
|
-
response = model.prompt(prompt, system_prompt)
|
|
183
|
+
response = model.prompt(prompt, system=system_prompt)
|
|
173
184
|
yaml_str = response.text()
|
|
174
185
|
logger.info(f"Response: {yaml_str}")
|
|
175
186
|
predicted_object = self._parse_yaml_payload(yaml_str, strict=True)
|
|
@@ -124,7 +124,7 @@ class InferenceEngine(ABC):
|
|
|
124
124
|
Load the data and split it into training and testing sets.
|
|
125
125
|
|
|
126
126
|
:param collection:
|
|
127
|
-
:param split:
|
|
127
|
+
:param split: Tuple of training and testing split ratios.
|
|
128
128
|
:param randomize:
|
|
129
129
|
:return:
|
|
130
130
|
"""
|
|
@@ -136,7 +136,7 @@ class InferenceEngine(ABC):
|
|
|
136
136
|
self.training_data = CollectionSlice(name="train", base_collection=collection, indices=None)
|
|
137
137
|
self.testing_data = None
|
|
138
138
|
return
|
|
139
|
-
logger.info(f"Loading and splitting data from collection {collection.alias}")
|
|
139
|
+
logger.info(f"Loading and splitting data {split} from collection {collection.alias}")
|
|
140
140
|
size = collection.size()
|
|
141
141
|
indices = range(size)
|
|
142
142
|
if randomize:
|
|
@@ -3,6 +3,7 @@ import gzip
|
|
|
3
3
|
import io
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
+
import re
|
|
6
7
|
import sys
|
|
7
8
|
import tarfile
|
|
8
9
|
from enum import Enum
|
|
@@ -31,10 +32,13 @@ class Format(Enum):
|
|
|
31
32
|
TSV = "tsv"
|
|
32
33
|
CSV = "csv"
|
|
33
34
|
XML = "xml"
|
|
35
|
+
OBO = "obo"
|
|
36
|
+
PKL = "pkl"
|
|
34
37
|
PYTHON = "python"
|
|
35
38
|
PARQUET = "parquet"
|
|
36
39
|
FORMATTED = "formatted"
|
|
37
40
|
TABLE = "table"
|
|
41
|
+
XLSX = "xlsx"
|
|
38
42
|
SQLDUMP_DUCKDB = "duckdb"
|
|
39
43
|
SQLDUMP_POSTGRES = "postgres"
|
|
40
44
|
DUMP_MONGODB = "mongodb"
|
|
@@ -67,6 +71,9 @@ class Format(Enum):
|
|
|
67
71
|
def is_dump_format(self):
|
|
68
72
|
return self in [Format.SQLDUMP_DUCKDB, Format.SQLDUMP_POSTGRES, Format.DUMP_MONGODB]
|
|
69
73
|
|
|
74
|
+
def is_binary_format(self):
|
|
75
|
+
return self in [Format.PARQUET, Format.XLSX]
|
|
76
|
+
|
|
70
77
|
def is_xsv(self):
|
|
71
78
|
return self in [Format.TSV, Format.CSV]
|
|
72
79
|
|
|
@@ -95,6 +102,26 @@ def load_objects_from_url(
|
|
|
95
102
|
return objs
|
|
96
103
|
|
|
97
104
|
|
|
105
|
+
def clean_pandas_value(v):
|
|
106
|
+
"""Clean a single value from pandas."""
|
|
107
|
+
import math
|
|
108
|
+
|
|
109
|
+
if isinstance(v, float):
|
|
110
|
+
if math.isnan(v) or math.isinf(v):
|
|
111
|
+
return None
|
|
112
|
+
return float(v) # Ensures proper float type
|
|
113
|
+
return v
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def clean_nested_structure(obj):
|
|
117
|
+
"""Recursively clean a nested structure of dicts/lists from pandas."""
|
|
118
|
+
if isinstance(obj, dict):
|
|
119
|
+
return {k: clean_nested_structure(v) for k, v in obj.items()}
|
|
120
|
+
elif isinstance(obj, list):
|
|
121
|
+
return [clean_nested_structure(item) for item in obj] # Fixed: using 'item' instead of 'v'
|
|
122
|
+
else:
|
|
123
|
+
return clean_pandas_value(obj)
|
|
124
|
+
|
|
98
125
|
def process_file(
|
|
99
126
|
f: IO, format: Format, expected_type: Optional[Type] = None, header_comment_token: Optional[str] = None
|
|
100
127
|
) -> List[Dict[str, Any]]:
|
|
@@ -128,6 +155,19 @@ def process_file(
|
|
|
128
155
|
objs = list(reader)
|
|
129
156
|
elif format == Format.XML:
|
|
130
157
|
objs = xmltodict.parse(f.read())
|
|
158
|
+
elif format == Format.PKL:
|
|
159
|
+
objs = pd.read_pickle(f).to_dict(orient="records")
|
|
160
|
+
elif format == Format.XLSX:
|
|
161
|
+
xls = pd.ExcelFile(f)
|
|
162
|
+
objs = {sheet: clean_nested_structure(xls.parse(sheet).to_dict(orient="records")) for sheet in xls.sheet_names}
|
|
163
|
+
elif format == Format.OBO:
|
|
164
|
+
blocks = split_document(f.read(), "\n\n")
|
|
165
|
+
id_pattern = re.compile(r"id: (\S+)")
|
|
166
|
+
def get_id(block):
|
|
167
|
+
m = id_pattern.search(block)
|
|
168
|
+
return m.group(1) if m else None
|
|
169
|
+
objs = [{"id": get_id(block), "content": block} for block in blocks]
|
|
170
|
+
objs = [obj for obj in objs if obj["id"]]
|
|
131
171
|
elif format == Format.PARQUET:
|
|
132
172
|
import pyarrow.parquet as pq
|
|
133
173
|
|
|
@@ -167,6 +207,14 @@ def load_objects(
|
|
|
167
207
|
if isinstance(file_path, Path):
|
|
168
208
|
file_path = str(file_path)
|
|
169
209
|
|
|
210
|
+
for url_scheme in ["http", "https", "ftp"]:
|
|
211
|
+
if file_path.startswith(f"{url_scheme}://"):
|
|
212
|
+
return load_objects_from_url(
|
|
213
|
+
file_path,
|
|
214
|
+
format=format,
|
|
215
|
+
expected_type=expected_type,
|
|
216
|
+
)
|
|
217
|
+
|
|
170
218
|
if isinstance(format, str):
|
|
171
219
|
format = Format(format)
|
|
172
220
|
|
|
@@ -185,9 +233,9 @@ def load_objects(
|
|
|
185
233
|
else:
|
|
186
234
|
if Path(file_path).is_dir():
|
|
187
235
|
raise ValueError(f"{file_path} is a dir, which is invalid for {format}")
|
|
188
|
-
mode = "rb" if format == Format.PARQUET or compression == "gz" else "r"
|
|
189
236
|
open_func = gzip.open if compression == "gz" else open
|
|
190
237
|
format = Format.guess_format(file_path) if not format else format
|
|
238
|
+
mode = "rb" if (format and format.is_binary_format()) or compression == "gz" else "r"
|
|
191
239
|
with open_func(file_path, mode) if file_path != "-" else sys.stdin as f:
|
|
192
240
|
if compression == "gz" and mode == "r":
|
|
193
241
|
f = io.TextIOWrapper(f)
|
|
@@ -343,3 +391,14 @@ def guess_format(path: str) -> Optional[Format]:
|
|
|
343
391
|
:return: The guessed format.
|
|
344
392
|
"""
|
|
345
393
|
return Format.guess_format(path)
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def split_document(doc: str, delimiter: str):
|
|
397
|
+
"""
|
|
398
|
+
Split a document into parts based on a delimiter.
|
|
399
|
+
|
|
400
|
+
:param doc: The document to split.
|
|
401
|
+
:param delimiter: The delimiter.
|
|
402
|
+
:return: The parts of the document.
|
|
403
|
+
"""
|
|
404
|
+
return doc.split(delimiter)
|
linkml_store/utils/llm_utils.py
CHANGED
|
@@ -100,3 +100,18 @@ def get_token_limit(model_name: str) -> int:
|
|
|
100
100
|
if model in model_name:
|
|
101
101
|
return token_limit
|
|
102
102
|
return 4096
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def parse_yaml_payload(yaml_str: str, strict=False) -> Optional[dict]:
|
|
106
|
+
import yaml
|
|
107
|
+
if "```" in yaml_str:
|
|
108
|
+
yaml_str = yaml_str.split("```")[1].strip()
|
|
109
|
+
if yaml_str.startswith("yaml"):
|
|
110
|
+
yaml_str = yaml_str[4:].strip()
|
|
111
|
+
try:
|
|
112
|
+
return yaml.safe_load(yaml_str)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
if strict:
|
|
115
|
+
raise e
|
|
116
|
+
logger.error(f"Error parsing YAML: {yaml_str}\n{e}")
|
|
117
|
+
return None
|
|
@@ -124,7 +124,7 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
|
|
|
124
124
|
|
|
125
125
|
Args:
|
|
126
126
|
data (dict): The input nested dictionary.
|
|
127
|
-
|
|
127
|
+
paths (list): A list of selector strings.
|
|
128
128
|
|
|
129
129
|
Returns:
|
|
130
130
|
dict: A new dictionary with the same structure, but only the selected attributes.
|
|
@@ -162,6 +162,8 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
|
|
|
162
162
|
if current_path is None:
|
|
163
163
|
current_path = []
|
|
164
164
|
matching_paths = []
|
|
165
|
+
if not paths:
|
|
166
|
+
raise ValueError("No paths provided")
|
|
165
167
|
for path in paths:
|
|
166
168
|
if isinstance(path, str):
|
|
167
169
|
path = path.split(".")
|
linkml_store/utils/sql_utils.py
CHANGED
|
@@ -5,7 +5,7 @@ import sqlalchemy
|
|
|
5
5
|
import sqlalchemy.sql.sqltypes as sqlt
|
|
6
6
|
from linkml_runtime.linkml_model import SchemaDefinition, SlotDefinition
|
|
7
7
|
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
8
|
-
from sqlalchemy import MetaData
|
|
8
|
+
from sqlalchemy import MetaData, quoted_name
|
|
9
9
|
|
|
10
10
|
from linkml_store.api.queries import Query
|
|
11
11
|
|
|
@@ -115,7 +115,13 @@ def facet_count_sql(query: Query, facet_column: Union[str, Tuple[str, ...]], mul
|
|
|
115
115
|
conditions = [cond for cond in where_clause_sql.split(" AND ") if not cond.startswith(f"{facet_column} ")]
|
|
116
116
|
modified_where = " AND ".join(conditions)
|
|
117
117
|
|
|
118
|
+
def make_col_safe(col):
|
|
119
|
+
return '"' + quoted_name(col, True) + '"' if ' ' in col else col
|
|
120
|
+
|
|
121
|
+
if isinstance(facet_column, str):
|
|
122
|
+
facet_column = make_col_safe(facet_column)
|
|
118
123
|
if isinstance(facet_column, tuple):
|
|
124
|
+
facet_column = [make_col_safe(col) for col in facet_column]
|
|
119
125
|
if multivalued:
|
|
120
126
|
raise NotImplementedError("Multivalued facets are not supported for multiple columns")
|
|
121
127
|
facet_column = ", ".join(facet_column)
|
|
@@ -34,7 +34,7 @@ def pairwise_cosine_similarity(vector1: np.array, vector2: np.array) -> float:
|
|
|
34
34
|
dot_product = np.dot(vector1, vector2)
|
|
35
35
|
norm1 = np.linalg.norm(vector1)
|
|
36
36
|
norm2 = np.linalg.norm(vector2)
|
|
37
|
-
return dot_product / (norm1 * norm2)
|
|
37
|
+
return float(dot_product / (norm1 * norm2))
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def compute_cosine_similarity_matrix(list1: LOL, list2: LOL) -> np.ndarray:
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
7
7
|
Author-email: author@org.org
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -20,7 +19,6 @@ Provides-Extra: bigquery
|
|
|
20
19
|
Provides-Extra: fastapi
|
|
21
20
|
Provides-Extra: frictionless
|
|
22
21
|
Provides-Extra: h5py
|
|
23
|
-
Provides-Extra: ibis
|
|
24
22
|
Provides-Extra: llm
|
|
25
23
|
Provides-Extra: map
|
|
26
24
|
Provides-Extra: mongodb
|
|
@@ -36,20 +34,18 @@ Requires-Dist: duckdb (>=0.10.1)
|
|
|
36
34
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
37
35
|
Requires-Dist: fastapi ; extra == "fastapi"
|
|
38
36
|
Requires-Dist: frictionless ; extra == "frictionless"
|
|
39
|
-
Requires-Dist: gcsfs ; extra == "ibis"
|
|
40
37
|
Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
|
|
41
38
|
Requires-Dist: h5py ; extra == "h5py"
|
|
42
|
-
Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
|
|
43
39
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
44
40
|
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
45
|
-
Requires-Dist: jsonpatch (>=1.33
|
|
41
|
+
Requires-Dist: jsonpatch (>=1.33)
|
|
46
42
|
Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
|
|
47
43
|
Requires-Dist: linkml-runtime (>=1.8.0)
|
|
48
44
|
Requires-Dist: linkml_map ; extra == "map"
|
|
49
45
|
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
50
46
|
Requires-Dist: llm ; extra == "llm" or extra == "all"
|
|
51
47
|
Requires-Dist: matplotlib ; extra == "analytics"
|
|
52
|
-
Requires-Dist: multipledispatch
|
|
48
|
+
Requires-Dist: multipledispatch
|
|
53
49
|
Requires-Dist: neo4j ; extra == "neo4j" or extra == "all"
|
|
54
50
|
Requires-Dist: networkx ; extra == "neo4j"
|
|
55
51
|
Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
|
|
@@ -57,8 +53,9 @@ Requires-Dist: plotly ; extra == "analytics"
|
|
|
57
53
|
Requires-Dist: py2neo ; extra == "neo4j"
|
|
58
54
|
Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
59
55
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
60
|
-
Requires-Dist: pymongo ; extra == "mongodb"
|
|
56
|
+
Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
|
|
61
57
|
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
58
|
+
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
62
59
|
Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
|
|
63
60
|
Requires-Dist: scikit-learn ; extra == "scipy"
|
|
64
61
|
Requires-Dist: scipy ; extra == "scipy"
|
|
@@ -68,7 +65,7 @@ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
|
68
65
|
Requires-Dist: tabulate
|
|
69
66
|
Requires-Dist: tiktoken ; extra == "llm"
|
|
70
67
|
Requires-Dist: uvicorn ; extra == "fastapi"
|
|
71
|
-
Requires-Dist: xmltodict (>=0.13.0
|
|
68
|
+
Requires-Dist: xmltodict (>=0.13.0)
|
|
72
69
|
Description-Content-Type: text/markdown
|
|
73
70
|
|
|
74
71
|
# linkml-store
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
linkml_store/__init__.py,sha256=jlU6WOUAn8cKIhzbTULmBTWpW9gZdEt7q_RI6KZN1bY,118
|
|
2
2
|
linkml_store/api/__init__.py,sha256=3CelcFEFz0y3MkQAzhQ9JxHIt1zFk6nYZxSmYTo8YZE,226
|
|
3
|
-
linkml_store/api/client.py,sha256
|
|
4
|
-
linkml_store/api/collection.py,sha256=
|
|
3
|
+
linkml_store/api/client.py,sha256=-XX1H5dIPBCPwU3lgZLtb7JXmUQR_c-FYGSKEW1idr8,12970
|
|
4
|
+
linkml_store/api/collection.py,sha256=Edwyb36D8NJjb-bkDy3O8BJbDVxtSH3yOKI_QW9_cic,41620
|
|
5
5
|
linkml_store/api/config.py,sha256=pOz210JIwkEEXtfjcsZBp1UEedkBu8RkH62Qa1b4exI,5777
|
|
6
|
-
linkml_store/api/database.py,sha256=
|
|
6
|
+
linkml_store/api/database.py,sha256=JyQ8SuPrNiltgMH4pdFt4IgGBc9nq3mfRJ5ZUEIDEqA,29696
|
|
7
7
|
linkml_store/api/queries.py,sha256=tx9fgGY5fC_2ZbIvg4BqTK_MXJwA_DI4mxr8HdQ6Vos,2075
|
|
8
8
|
linkml_store/api/stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
linkml_store/api/stores/chromadb/__init__.py,sha256=e9BkOPuPnVQKA5PRKDulag59yGNHDP3U2_DnPSrFAKM,132
|
|
10
10
|
linkml_store/api/stores/chromadb/chromadb_collection.py,sha256=RQUZx5oeotkzNihg-dlSevkiTiKY1d9x0bS63HF80W4,4270
|
|
11
11
|
linkml_store/api/stores/chromadb/chromadb_database.py,sha256=dZA3LQE8-ZMhJQOzsUFyxehnKpFF7adR182aggfkaFY,3205
|
|
12
12
|
linkml_store/api/stores/duckdb/__init__.py,sha256=rbQSDgNg-fdvi6-pHGYkJTST4p1qXUZBf9sFSsO3KPk,387
|
|
13
|
-
linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=
|
|
14
|
-
linkml_store/api/stores/duckdb/duckdb_database.py,sha256=
|
|
13
|
+
linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=1Jc770CR3oipfLj9iJn-dbkgtoEObLbylUQCoUWxuzs,7313
|
|
14
|
+
linkml_store/api/stores/duckdb/duckdb_database.py,sha256=idIe89yqrdMKR69Xpi3cd5LStwe6FRBOm4eJGsHfOV0,10904
|
|
15
15
|
linkml_store/api/stores/duckdb/mappings.py,sha256=tDce3W1Apwammhf4LS6cRJ0m4NiJ0eB7vOI_4U5ETY8,148
|
|
16
16
|
linkml_store/api/stores/filesystem/__init__.py,sha256=KjvCjdttwqMHNeGyL-gr59zRz0--HFEWWUNNCJ5hITs,347
|
|
17
17
|
linkml_store/api/stores/filesystem/filesystem_collection.py,sha256=9gqY2KRZsn_RWk4eKkxFd3_wcxs5YaXvcBI7GGJBMGE,6751
|
|
@@ -20,8 +20,8 @@ linkml_store/api/stores/hdf5/__init__.py,sha256=l4cIh3v7P0nPbwGIsfuCMD_serQ8q8c7
|
|
|
20
20
|
linkml_store/api/stores/hdf5/hdf5_collection.py,sha256=mnpLMYehn3PuaIjp2dXrIWu8jh-bdQ84X2Ku83jMdEY,3805
|
|
21
21
|
linkml_store/api/stores/hdf5/hdf5_database.py,sha256=EZbjrpaqiNDEFvoD5dZNcGBXA8z6HRNL81emueTZWNw,2714
|
|
22
22
|
linkml_store/api/stores/mongodb/__init__.py,sha256=OSFCr7RQlDEe-O-Y0P_i912oAMK-L3pC7Cnj7sxlwAk,510
|
|
23
|
-
linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=
|
|
24
|
-
linkml_store/api/stores/mongodb/mongodb_database.py,sha256=
|
|
23
|
+
linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=hKwaHHFxfWqjBNHZpzVuDVruH_SdXzoIKHdePN3JDEg,10447
|
|
24
|
+
linkml_store/api/stores/mongodb/mongodb_database.py,sha256=HfVEEFCuwZ96KO3eWuSGFajRUgZPmeG-fqsrWHZhJng,4077
|
|
25
25
|
linkml_store/api/stores/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
linkml_store/api/stores/neo4j/neo4j_collection.py,sha256=a-Az5_ypdBMgeNyhrTW7q-ik-vYPCDDONIK7N_CDA9c,17449
|
|
27
27
|
linkml_store/api/stores/neo4j/neo4j_database.py,sha256=zanP_uBZO3AH0wuzbu6auK4zcZon_lMreC2vooSZwt8,5571
|
|
@@ -30,7 +30,7 @@ linkml_store/api/stores/solr/solr_collection.py,sha256=ZlxC3JbVaHfSA4HuTeJTsp6qe
|
|
|
30
30
|
linkml_store/api/stores/solr/solr_database.py,sha256=TFjqbY7jAkdrhAchbNg0E-mChSP7ogNwFExslbvX7Yo,2877
|
|
31
31
|
linkml_store/api/stores/solr/solr_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
32
|
linkml_store/api/types.py,sha256=3aIQtDFMvsSmjuN5qrR2vNK5sHa6yzD_rEOPA6tHwvg,176
|
|
33
|
-
linkml_store/cli.py,sha256=
|
|
33
|
+
linkml_store/cli.py,sha256=GtbLVMcH6rHEeEMljFGVFd8U5h71WN_ojmIp17UlJxo,31253
|
|
34
34
|
linkml_store/constants.py,sha256=x4ZmDsfE9rZcL5WpA93uTKrRWzCD6GodYXviVzIvR38,112
|
|
35
35
|
linkml_store/graphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
linkml_store/graphs/graph_map.py,sha256=bYRxv8n1YPnFqE9d6JKNmRawb8EAhsPlHhBue0gvtZE,712
|
|
@@ -42,29 +42,30 @@ linkml_store/index/indexer.py,sha256=e5dsjh2wjOTDRsfClKJAFTbcK1UC7BOGkUCOfDg9omI
|
|
|
42
42
|
linkml_store/inference/__init__.py,sha256=b8NAFNZjOYU_8gOvxdyCyoiHOOl5Ai2ckKs1tv7ZkkY,342
|
|
43
43
|
linkml_store/inference/evaluation.py,sha256=YDFYaEu2QLSfFq4oyARrnKfTiPLtNF8irhhspgVDfdY,6013
|
|
44
44
|
linkml_store/inference/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
-
linkml_store/inference/implementations/
|
|
45
|
+
linkml_store/inference/implementations/llm_inference_engine.py,sha256=iSxiboYpgB0_yL4zlHIJx2ZbvDrJC8JioewTKgLUS0U,5443
|
|
46
|
+
linkml_store/inference/implementations/rag_inference_engine.py,sha256=R3Dz-DyNx7UU3ZaV1n9homxC2nUAT5JZnd4IRkIFftk,11326
|
|
46
47
|
linkml_store/inference/implementations/rule_based_inference_engine.py,sha256=0IEY_fsHJPJy6QKbYQU_qE87RRnPOXQxPuJKXCQG8jU,6250
|
|
47
48
|
linkml_store/inference/implementations/sklearn_inference_engine.py,sha256=Sdi7CoRK3qoLJu3prgLy1Ck_zQ1gHWRKFybHe7XQ4_g,13192
|
|
48
49
|
linkml_store/inference/inference_config.py,sha256=EFGdigxWsfTPREbgqyJVRShN0JktCEmFLLoECrLfXSg,2282
|
|
49
|
-
linkml_store/inference/inference_engine.py,sha256=
|
|
50
|
+
linkml_store/inference/inference_engine.py,sha256=7P9syuIwwBpCUytfqZcCR5ei61ys5LIw8YhO0iIehG4,7191
|
|
50
51
|
linkml_store/inference/inference_engine_registry.py,sha256=6o66gvBYBwdeAKm62zqqvfaBlcopVP_cla3L6uXGsHA,3015
|
|
51
52
|
linkml_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
53
|
linkml_store/utils/change_utils.py,sha256=O2rvSvgTKB60reLLz9mX5OWykAA_m93bwnUh5ZWa0EY,471
|
|
53
54
|
linkml_store/utils/file_utils.py,sha256=rQ7-XpmI6_Kx_dhEnI98muFRr0MmgI_kZ_9cgJBf_0I,1411
|
|
54
|
-
linkml_store/utils/format_utils.py,sha256=
|
|
55
|
+
linkml_store/utils/format_utils.py,sha256=hHRFkh3cwb5shM6RO7WWuOXsHHH283M_vZjXRuzbwWI,13035
|
|
55
56
|
linkml_store/utils/io.py,sha256=JHUrWDtlZC2jtN_PQZ4ypdGIyYlftZEN3JaCvEPs44w,884
|
|
56
|
-
linkml_store/utils/llm_utils.py,sha256=
|
|
57
|
+
linkml_store/utils/llm_utils.py,sha256=51AiwMeXm2FpiD-9AywKcbZzlUMqXRAjDFJEp5Ia0LA,3494
|
|
57
58
|
linkml_store/utils/mongodb_utils.py,sha256=Rl1YmMKs1IXwSsJIViSDChbi0Oer5cBnMmjka2TeQS8,4665
|
|
58
59
|
linkml_store/utils/neo4j_utils.py,sha256=y3KPmDZ8mQmePgg0lUeKkeKqzEr2rV226xxEtHc5pRg,1266
|
|
59
|
-
linkml_store/utils/object_utils.py,sha256=
|
|
60
|
+
linkml_store/utils/object_utils.py,sha256=V0s_ZzqAGkFUfrU-9fAPb5g3snMmgKKhR3SiYZgECXI,6353
|
|
60
61
|
linkml_store/utils/pandas_utils.py,sha256=djiFPO3YbgRVo2XAZuKCtgH8QVLuUyPIsfS8e-0umsU,3182
|
|
61
62
|
linkml_store/utils/patch_utils.py,sha256=q-h_v68okyruzdPTEHCe0WubbQHKpi1qy5bJ9vFWDo8,4823
|
|
62
63
|
linkml_store/utils/query_utils.py,sha256=HWt46BsGWoIGiNBTtvpXGY6onPRWsQky6eu_9cYqbvo,3440
|
|
63
64
|
linkml_store/utils/schema_utils.py,sha256=iJiZxo5NGr7v87h4DV6V9DrDOZHSswMRuf0N4V2rVtg,646
|
|
64
65
|
linkml_store/utils/sklearn_utils.py,sha256=itPpcrsbbyOazdjmivaaZ1lyZeytm0a0hJ2AS8ziUgg,7590
|
|
65
|
-
linkml_store/utils/sql_utils.py,sha256=
|
|
66
|
+
linkml_store/utils/sql_utils.py,sha256=qatmrJR2u4ICaO7QhDRL1ukxJlLv0zYSGgmmFV-hdnU,6210
|
|
66
67
|
linkml_store/utils/stats_utils.py,sha256=4KqBb1bqDgAmq-1fJLLu5B2paPgoZZc3A-gnyVam4bI,1799
|
|
67
|
-
linkml_store/utils/vector_utils.py,sha256=
|
|
68
|
+
linkml_store/utils/vector_utils.py,sha256=QcLTUQWm5z1OTtiOl0mXKJyFJcQeCtbcc-GQwHhkUYw,5456
|
|
68
69
|
linkml_store/webapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
70
|
linkml_store/webapi/html/__init__.py,sha256=hwp5eeBJKH65Bvv1x9Z4vsT1tLSYtb9Dq4I9r1kL1q0,69
|
|
70
71
|
linkml_store/webapi/html/base.html.j2,sha256=hoiV2uaSxxrQp7VuAZBOHueH7czyJMYcPBRN6dZFYhk,693
|
|
@@ -73,8 +74,8 @@ linkml_store/webapi/html/database_details.html.j2,sha256=qtXdavbZb0mohiObI9dvJtk
|
|
|
73
74
|
linkml_store/webapi/html/databases.html.j2,sha256=a9BCWQYfPeFhdUd31CWhB0yWhTIFXQayO08JgjyqKoc,294
|
|
74
75
|
linkml_store/webapi/html/generic.html.j2,sha256=KtLaO2HUEF2Opq-OwHKgRKetNWe8IWc6JuIkxRPsywk,1018
|
|
75
76
|
linkml_store/webapi/main.py,sha256=B0Da575kKR7X88N9ykm99Dem8FyBAW9f-w3A_JwUzfw,29165
|
|
76
|
-
linkml_store-0.2.
|
|
77
|
-
linkml_store-0.2.
|
|
78
|
-
linkml_store-0.2.
|
|
79
|
-
linkml_store-0.2.
|
|
80
|
-
linkml_store-0.2.
|
|
77
|
+
linkml_store-0.2.6.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
|
|
78
|
+
linkml_store-0.2.6.dist-info/METADATA,sha256=s5x6OmbGC7oVUpXunjiM42sASvsvKR8XRoJllGqF6ww,6964
|
|
79
|
+
linkml_store-0.2.6.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
80
|
+
linkml_store-0.2.6.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
|
|
81
|
+
linkml_store-0.2.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|