linkml-store 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
15
15
 
16
16
  HANDLE_MAP = {
17
17
  "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
+ "sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
19
  "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
19
20
  "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
20
21
  "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
@@ -22,6 +23,12 @@ HANDLE_MAP = {
22
23
  "file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
23
24
  }
24
25
 
26
+ SUFFIX_MAP = {
27
+ "ddb": "duckdb:///{path}",
28
+ "duckdb": "duckdb:///{path}",
29
+ "db": "duckdb:///{path}",
30
+ }
31
+
25
32
 
26
33
  class Client:
27
34
  """
@@ -197,6 +204,13 @@ class Client:
197
204
  :param kwargs:
198
205
  :return:
199
206
  """
207
+ if ":" not in handle:
208
+ if alias is None:
209
+ alias = handle
210
+ if "." in handle:
211
+ suffix = handle.split(".")[-1]
212
+ if suffix in SUFFIX_MAP:
213
+ handle = SUFFIX_MAP[suffix].format(path=handle)
200
214
  if ":" not in handle:
201
215
  scheme = handle
202
216
  handle = None
@@ -220,7 +234,9 @@ class Client:
220
234
  if not alias:
221
235
  alias = handle
222
236
  if not self._databases:
237
+ logger.info("Initializing databases")
223
238
  self._databases = {}
239
+ logger.info(f"Attaching {alias}")
224
240
  self._databases[alias] = db
225
241
  db.parent = self
226
242
  if db.alias:
@@ -263,8 +279,9 @@ class Client:
263
279
  self._databases[name] = db
264
280
  if name not in self._databases:
265
281
  if create_if_not_exists:
266
- logger.info(f"Creating database: {name}")
267
- self.attach_database(name, **kwargs)
282
+ logger.info(f"Creating/attaching database: {name}")
283
+ db = self.attach_database(name, **kwargs)
284
+ name = db.alias
268
285
  else:
269
286
  raise ValueError(f"Database {name} does not exist")
270
287
  db = self._databases[name]
@@ -1,6 +1,7 @@
1
1
  """A structure for representing collections of similar objects."""
2
2
 
3
3
  import hashlib
4
+ import json
4
5
  import logging
5
6
  from collections import defaultdict
6
7
  from pathlib import Path
@@ -210,8 +211,59 @@ class Collection(Generic[DatabaseType]):
210
211
  """
211
212
  raise NotImplementedError
212
213
 
214
+ def index (
215
+ self,
216
+ objs: Union[OBJECT, List[OBJECT]],
217
+ index_name: Optional[str] = None,
218
+ replace: bool = False,
219
+ unique: bool = False,
220
+ **kwargs,
221
+ ) -> None:
222
+ """
223
+ Index objects in the collection.
224
+
225
+ :param objs:
226
+ :param index_name:
227
+ :param replace: replace the index, or not
228
+ :param unique: boolean used to declare the index unique or not
229
+ :param kwargs:
230
+ :return:
231
+ """
232
+ raise NotImplementedError
233
+
234
+ def upsert(self,
235
+ objs: Union[OBJECT, List[OBJECT]],
236
+ filter_fields: List[str],
237
+ update_fields: Union[List[str], None] = None, **kwargs):
238
+ """
239
+ Add one or more objects to the collection.
240
+
241
+ >>> from linkml_store import Client
242
+ >>> client = Client()
243
+ >>> db = client.attach_database("mongodb", alias="test")
244
+ >>> collection = db.create_collection("Person")
245
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
246
+ >>> collection.upsert(objs)
247
+
248
+ :param objs:
249
+ :param filter_fields: List of field names to use as the filter for matching existing collections.
250
+ :param update_fields: List of field names to include in the update. If None, all fields are updated.
251
+ :param kwargs:
252
+
253
+ :return:
254
+ """
255
+ raise NotImplementedError
256
+
213
257
  def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
214
- logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
258
+ """
259
+ Pre-query hook.
260
+
261
+ This is called before a query is executed. It is used to materialize derivations and indexes.
262
+ :param query:
263
+ :param kwargs:
264
+ :return:
265
+ """
266
+ logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
215
267
  if not self._initialized:
216
268
  self._materialize_derivations()
217
269
  self._initialized = True
@@ -536,7 +588,13 @@ class Collection(Generic[DatabaseType]):
536
588
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
537
589
  index_col = ix.index_field
538
590
  # TODO: optimize this for large indexes
539
- vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
591
+ def row2array(row):
592
+ v = row[index_col]
593
+ if isinstance(v, str):
594
+ # sqlite stores arrays as strings
595
+ v = json.loads(v)
596
+ return np.array(v, dtype=float)
597
+ vector_pairs = [(row, row2array(row)) for row in qr.rows]
540
598
  results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
541
599
  for r in results:
542
600
  del r[1][index_col]
@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
276
276
 
277
277
  Examples:
278
278
 
279
- >>> from linkml_store.api.client import Client
280
- >>> client = Client()
281
- >>> db = client.attach_database("duckdb", alias="test")
282
- >>> collection = db.create_collection("Person", alias="persons")
283
- >>> collection.alias
284
- 'persons'
285
- >>> collection.target_class_name
286
- 'Person'
279
+ >>> from linkml_store.api.client import Client
280
+ >>> client = Client()
281
+ >>> db = client.attach_database("duckdb", alias="test")
282
+ >>> collection = db.create_collection("Person", alias="persons")
283
+ >>> collection.alias
284
+ 'persons'
285
+
286
+ >>> collection.target_class_name
287
+ 'Person'
287
288
 
288
289
  If alias is not provided, it defaults to the name of the type.
289
290
 
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
419
420
  >>> from linkml_store.api.client import Client
420
421
  >>> from linkml_store.api.queries import Query
421
422
  >>> client = Client()
422
- >>> db = client.attach_database("duckdb", alias="test")
423
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
423
424
  >>> collection = db.create_collection("Person")
424
425
  >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
425
426
  >>> query = Query(from_table="Person", where_clause={"name": "John"})
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
451
452
 
452
453
  >>> from linkml_store.api.client import Client
453
454
  >>> client = Client()
454
- >>> db = client.attach_database("duckdb", alias="test")
455
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
455
456
  >>> collection = db.create_collection("Person", alias="persons")
456
457
  >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
457
458
  >>> schema_view = db.schema_view
@@ -470,6 +471,7 @@ class Database(ABC, Generic[CollectionType]):
470
471
  if not self._schema_view:
471
472
  self._initialize_schema()
472
473
  if not self._schema_view:
474
+ logger.info("Inducing schema view")
473
475
  self._schema_view = self.induce_schema_view()
474
476
  return self._schema_view
475
477
 
@@ -505,6 +507,7 @@ class Database(ABC, Generic[CollectionType]):
505
507
  if isinstance(schema_view, str):
506
508
  schema_view = SchemaView(schema_view)
507
509
  self._schema_view = schema_view
510
+ logger.info(f"Setting schema view for {self.handle}")
508
511
  # self._schema_view = SchemaView(schema_view.materialize_derived_schema())
509
512
  if not self._collections:
510
513
  return
@@ -719,7 +722,7 @@ class Database(ABC, Generic[CollectionType]):
719
722
 
720
723
  >>> from linkml_store.api.client import Client
721
724
  >>> client = Client()
722
- >>> db = client.attach_database("duckdb", alias="test")
725
+ >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
723
726
  >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
724
727
  >>> db.list_collection_names()
725
728
  ['iris']
@@ -739,7 +742,9 @@ class Database(ABC, Generic[CollectionType]):
739
742
  # import into a test instance
740
743
  tmp_handle = source_format.value
741
744
  client = self.parent
742
- tmp_db = client.attach_database(tmp_handle, alias="tmp")
745
+ tmp_alias = "tmp"
746
+ client.drop_database(tmp_alias, missing_ok=True)
747
+ tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
743
748
  # TODO: check for infinite recursion
744
749
  tmp_db.import_database(location, source_format=source_format)
745
750
  obj = {}
@@ -147,16 +147,22 @@ class DuckDBCollection(Collection):
147
147
  if self._table_created or self.metadata.is_prepopulated:
148
148
  logger.info(f"Already have table for: {cd.name}")
149
149
  return
150
- query = Query(
151
- from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
152
- )
153
- qr = self.parent.query(query)
154
- if qr.num_rows > 0:
150
+ if self.parent._table_exists(self.alias):
155
151
  logger.info(f"Table already exists for {cd.name}")
156
152
  self._table_created = True
157
153
  self._initialized = True
158
154
  self.metadata.is_prepopulated = True
159
155
  return
156
+ # query = Query(
157
+ # from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
158
+ # )
159
+ # qr = self.parent.query(query)
160
+ # if qr.num_rows > 0:
161
+ # logger.info(f"Table already exists for {cd.name}")
162
+ # self._table_created = True
163
+ # self._initialized = True
164
+ # self.metadata.is_prepopulated = True
165
+ # return
160
166
  logger.info(f"Creating table for {cd.name}")
161
167
  t = self._sqla_table(cd)
162
168
  ct = CreateTable(t)
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  from pathlib import Path
4
- from typing import Optional, Union
4
+ from typing import Optional, Union, List
5
5
 
6
6
  import pandas as pd
7
7
  import sqlalchemy
@@ -14,7 +14,7 @@ from linkml_store.api import Database
14
14
  from linkml_store.api.queries import Query, QueryResult
15
15
  from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
16
16
  from linkml_store.utils.format_utils import Format
17
- from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
17
+ from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
18
18
 
19
19
  TYPE_MAP = {
20
20
  "VARCHAR": "string",
@@ -62,7 +62,7 @@ class DuckDBDatabase(Database):
62
62
  def engine(self) -> sqlalchemy.Engine:
63
63
  if not self._engine:
64
64
  handle = self.handle
65
- if not handle.startswith("duckdb://") and not handle.startswith(":"):
65
+ if not handle.startswith("duckdb://") and not handle.startswith(":") and "://" not in handle:
66
66
  handle = f"duckdb:///{handle}"
67
67
  if ":memory:" not in handle:
68
68
  # TODO: investigate this; duckdb appears to be prematurely caching
@@ -71,6 +71,10 @@ class DuckDBDatabase(Database):
71
71
  self._engine = sqlalchemy.create_engine(handle)
72
72
  return self._engine
73
73
 
74
+ @property
75
+ def _is_sqlite(self) -> bool:
76
+ return self.handle and self.handle.startswith("sqlite:")
77
+
74
78
  def commit(self, **kwargs):
75
79
  with self.engine.connect() as conn:
76
80
  conn.commit()
@@ -89,34 +93,60 @@ class DuckDBDatabase(Database):
89
93
  if not missing_ok:
90
94
  raise FileNotFoundError(f"Database file not found: {path}")
91
95
 
92
- def query(self, query: Query, **kwargs) -> QueryResult:
96
+ def _table_exists(self, table: str) -> bool:
97
+ if self._is_sqlite:
98
+ if table == "sqlite_master":
99
+ return True
100
+ meta_query = Query(
101
+ from_table="sqlite_master",
102
+ where_clause={
103
+ #"type": "table",
104
+ "name": table,
105
+ }
106
+ )
107
+ else:
108
+ if table.startswith("information_schema"):
109
+ return True
110
+ meta_query = Query(
111
+ from_table="information_schema.tables",
112
+ where_clause={
113
+ "table_type": "BASE TABLE",
114
+ "table_name": table,
115
+ }
116
+ )
117
+
118
+ qr = self.query(meta_query)
119
+ if qr.num_rows == 0:
120
+ logger.debug(f"Table {self.alias} not created yet")
121
+ return False
122
+ return True
123
+
124
+ def _json_encoded_cols(self, table_name: str) -> Optional[List[str]]:
93
125
  json_encoded_cols = []
94
- if query.from_table:
95
- if not query.from_table.startswith("information_schema"):
96
- meta_query = Query(
97
- from_table="information_schema.tables", where_clause={"table_name": query.from_table}
98
- )
99
- qr = self.query(meta_query)
100
- if qr.num_rows == 0:
101
- logger.debug(f"Table {query.from_table} not created yet")
102
- return QueryResult(query=query, num_rows=0, rows=[])
103
- if not query.from_table.startswith("information_schema"):
104
- sv = self.schema_view
105
- else:
106
- sv = None
126
+ if table_name:
127
+ if table_name.startswith("information_schema") or table_name.startswith("sqlite"):
128
+ return []
129
+ sv = self.schema_view
107
130
  if sv:
108
131
  cd = None
109
132
  for c in self._collections.values():
110
- # if c.name == query.from_table or c.metadata.alias == query.from_table:
111
- if c.alias == query.from_table or c.target_class_name == query.from_table:
133
+ if c.alias == table_name or c.target_class_name == table_name:
112
134
  cd = c.class_definition()
113
135
  break
114
136
  if cd:
115
137
  for att in sv.class_induced_slots(cd.name):
116
138
  if att.inlined or att.inlined_as_list:
117
139
  json_encoded_cols.append(att.name)
140
+ return json_encoded_cols
141
+
142
+ def query(self, query: Query, **kwargs) -> QueryResult:
143
+ if not self._table_exists(query.from_table):
144
+ return QueryResult(query=query, num_rows=0, rows=[])
145
+ json_encoded_cols = self._json_encoded_cols(query.from_table)
146
+
118
147
  with self.engine.connect() as conn:
119
148
  count_query_str = text(query_to_sql(query, count=True))
149
+ logger.debug(f"count_query_str: {count_query_str}")
120
150
  num_rows = list(conn.execute(count_query_str))[0][0]
121
151
  logger.debug(f"num_rows: {num_rows}")
122
152
  query_str = query_to_sql(query, **kwargs) # include offset, limit
@@ -167,6 +197,9 @@ class DuckDBDatabase(Database):
167
197
  logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
168
198
  sb = SchemaBuilder()
169
199
  schema = sb.schema
200
+ logger.info(f"Checking if {self.metadata.handle} is sqlite: {self._is_sqlite}")
201
+ if self._is_sqlite:
202
+ return SchemaView(schema)
170
203
  query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
171
204
  qr = self.query(query)
172
205
  logger.info(f"Found {qr.num_rows} information_schema.tables // {qr.rows}")
@@ -41,6 +41,89 @@ class MongoDBCollection(Collection):
41
41
  del obj["_id"]
42
42
  self._post_insert_hook(objs)
43
43
 
44
+
45
+ def index(self,
46
+ objs: Union[OBJECT, List[OBJECT]],
47
+ index_name: Optional[str] = None,
48
+ replace: bool = False,
49
+ unique: bool = False,
50
+ **kwargs):
51
+ """
52
+ Create indexes on the collection.
53
+
54
+ :param objs: Field(s) to index.
55
+ :param index_name: Optional name for the index.
56
+ :param replace: If True, the index will be dropped and recreated.
57
+ :param unique: If True, creates a unique index (default: False).
58
+ """
59
+
60
+ if not isinstance(objs, list):
61
+ objs = [objs]
62
+
63
+ existing_indexes = self.mongo_collection.index_information()
64
+
65
+ for obj in objs:
66
+ field_exists = False
67
+ index_to_drop = None
68
+
69
+ # Extract existing index details
70
+ for index_name_existing, index_details in existing_indexes.items():
71
+ indexed_fields = [field[0] for field in index_details.get("key", [])] # Extract field names
72
+
73
+ if obj in indexed_fields: # If this field is already indexed
74
+ field_exists = True
75
+ index_to_drop = index_name_existing if replace else None
76
+
77
+ # Drop the index if replace=True and index_to_drop is valid
78
+ if index_to_drop:
79
+ self.mongo_collection.drop_index(index_to_drop)
80
+ logging.debug(f"Dropped existing index: {index_to_drop}")
81
+
82
+ # Create the new index only if it doesn't exist or was dropped
83
+ if not field_exists or replace:
84
+ self.mongo_collection.create_index(obj, name=index_name, unique=unique)
85
+ logging.debug(f"Created new index: {index_name} on field {obj}, unique={unique}")
86
+ else:
87
+ logging.debug(f"Index already exists for field {obj}, skipping creation.")
88
+
89
+ def upsert(self,
90
+ objs: Union[OBJECT, List[OBJECT]],
91
+ filter_fields: List[str],
92
+ update_fields: Optional[List[str]] = None,
93
+ **kwargs):
94
+ """
95
+ Upsert one or more documents into the MongoDB collection.
96
+
97
+ :param objs: The document(s) to insert or update.
98
+ :param filter_fields: List of field names to use as the filter for matching existing documents.
99
+ :param update_fields: List of field names to include in the update. If None, all fields are updated.
100
+ """
101
+ if not isinstance(objs, list):
102
+ objs = [objs]
103
+
104
+ for obj in objs:
105
+ # Ensure filter fields exist in the object
106
+ filter_criteria = {field: obj[field] for field in filter_fields if field in obj}
107
+ if not filter_criteria:
108
+ raise ValueError("At least one valid filter field must be present in each object.")
109
+
110
+ # Check if a document already exists
111
+ existing_doc = self.mongo_collection.find_one(filter_criteria)
112
+
113
+ if existing_doc:
114
+ # Update only changed fields
115
+ updates = {key: obj[key] for key in update_fields if key in obj and obj[key] != existing_doc.get(key)}
116
+
117
+ if updates:
118
+ self.mongo_collection.update_one(filter_criteria, {"$set": updates})
119
+ logging.debug(f"Updated existing document: {filter_criteria} with {updates}")
120
+ else:
121
+ logging.debug(f"No changes detected for document: {filter_criteria}. Skipping update.")
122
+ else:
123
+ # Insert a new document
124
+ self.mongo_collection.insert_one(obj)
125
+ logging.debug(f"Inserted new document: {obj}")
126
+
44
127
  def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
45
128
  mongo_filter = self._build_mongo_filter(query.where_clause)
46
129
  limit = limit or query.limit
@@ -3,6 +3,7 @@
3
3
  import logging
4
4
  from pathlib import Path
5
5
  from typing import Optional, Union
6
+ from urllib.parse import urlparse
6
7
 
7
8
  from pymongo import MongoClient
8
9
  from pymongo.database import Database as NativeDatabase
@@ -38,10 +39,13 @@ class MongoDBDatabase(Database):
38
39
  @property
39
40
  def _db_name(self) -> str:
40
41
  if self.handle:
41
- db = self.handle.split("/")[-1]
42
+ parsed_url = urlparse(self.handle)
43
+ path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
44
+ print(path_parts)
45
+ db_name = path_parts[0] if path_parts else "default"
42
46
  else:
43
- db = "default"
44
- return db
47
+ db_name = "default"
48
+ return db_name
45
49
 
46
50
  @property
47
51
  def native_client(self) -> MongoClient:
linkml_store/cli.py CHANGED
@@ -99,6 +99,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
99
99
  @click.option("--database", "-d", help="Database name")
100
100
  @click.option("--collection", "-c", help="Collection name")
101
101
  @click.option("--input", "-i", help="Input file (alternative to database/collection)")
102
+ @click.option("--schema", "-S", help="Path to schema (LinkML yaml)")
102
103
  @click.option("--config", "-C", type=click.Path(exists=True), help="Path to the configuration file")
103
104
  @click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
104
105
  @click.option("-v", "--verbose", count=True)
@@ -111,7 +112,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
111
112
  help="If set then show full stacktrace on error",
112
113
  )
113
114
  @click.pass_context
114
- def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set, input, **kwargs):
115
+ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, schema, config, set, input, **kwargs):
115
116
  """A CLI for interacting with the linkml-store."""
116
117
  if not stacktrace:
117
118
  sys.tracebacklimit = 0
@@ -158,6 +159,9 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
158
159
  client = Client().from_config(config, **kwargs) if config else Client()
159
160
  settings = ContextSettings(client=client, database_name=database, collection_name=collection)
160
161
  ctx.obj["settings"] = settings
162
+ if schema:
163
+ db = settings.database
164
+ db.set_schema_view(schema)
161
165
  if settings.database_name:
162
166
  db = client.get_database(database)
163
167
  if set:
@@ -182,7 +186,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
182
186
 
183
187
 
184
188
  @cli.command()
185
- @click.argument("files", type=click.Path(exists=True), nargs=-1)
189
+ @click.argument("files", type=click.Path(), nargs=-1)
186
190
  @click.option("--replace/--no-replace", default=False, show_default=True, help="Replace existing objects")
187
191
  @click.option("--format", "-f", type=format_choice, help="Input format")
188
192
  @click.option("--object", "-i", multiple=True, help="Input object as YAML")
@@ -534,6 +538,7 @@ def pivot(ctx, where, limit, index, columns, values, output_type, output):
534
538
  @click.option(
535
539
  "--feature-attributes", "-F", type=click.STRING, help="Feature attributes for inference (comma separated)"
536
540
  )
541
+ @click.option("--training-collection", type=click.STRING,help="Collection to use for training")
537
542
  @click.option("--inference-config-file", "-Y", type=click.Path(), help="Path to inference configuration file")
538
543
  @click.option("--export-model", "-E", type=click.Path(), help="Export model to file")
539
544
  @click.option("--load-model", "-L", type=click.Path(), help="Load model from file")
@@ -555,6 +560,7 @@ def infer(
555
560
  evaluation_count,
556
561
  evaluation_match_function,
557
562
  training_test_data_split,
563
+ training_collection,
558
564
  predictor_type,
559
565
  target_attribute,
560
566
  feature_attributes,
@@ -617,6 +623,7 @@ def infer(
617
623
  if model_format:
618
624
  model_format = ModelSerialization(model_format)
619
625
  if load_model:
626
+ logger.info(f"Loading predictor from {load_model}")
620
627
  predictor = get_inference_engine(predictor_type)
621
628
  predictor = type(predictor).load_model(load_model)
622
629
  else:
@@ -627,13 +634,18 @@ def infer(
627
634
  if training_test_data_split:
628
635
  config.train_test_split = training_test_data_split
629
636
  predictor = get_inference_engine(predictor_type, config=config)
630
- if collection:
631
- predictor.load_and_split_data(collection)
637
+ training_collection_obj = collection
638
+ if training_collection:
639
+ training_collection_obj = ctx.obj["settings"].database.get_collection(training_collection)
640
+ if training_collection_obj:
641
+ logger.info(f"Using collection: {training_collection_obj.alias} for inference")
642
+ split = training_test_data_split or (1.0, 0.0)
643
+ predictor.load_and_split_data(training_collection_obj, split=split)
632
644
  predictor.initialize_model()
633
645
  if export_model:
634
646
  logger.info(f"Exporting model to {export_model} in {model_format}")
635
647
  predictor.export_model(export_model, model_format)
636
- if not query_obj:
648
+ if not query_obj and where_clause is None:
637
649
  if not export_model and not evaluation_count:
638
650
  raise ValueError("Query or evaluate must be specified if not exporting model")
639
651
  if evaluation_count:
@@ -651,6 +663,12 @@ def infer(
651
663
  result = predictor.derive(query_obj)
652
664
  dumped_obj = result.model_dump(exclude_none=True)
653
665
  write_output([dumped_obj], output_type, target=output)
666
+ if where_clause is not None:
667
+ predicted_objs = []
668
+ for query_obj in collection.find(where_clause).rows:
669
+ result = predictor.derive(query_obj)
670
+ predicted_objs.append(result.predicted_object)
671
+ write_output(predicted_objs, output_type, target=output)
654
672
 
655
673
 
656
674
  @cli.command()
@@ -0,0 +1,152 @@
1
+ import json
2
+ import logging
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import ClassVar, List, Optional, TextIO, Union
6
+
7
+ import yaml
8
+ from linkml_store.utils.llm_utils import parse_yaml_payload
9
+ from llm import get_key
10
+ from pydantic import BaseModel
11
+
12
+ from linkml_store.api.collection import OBJECT, Collection
13
+ from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
14
+ from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
15
+ from linkml_store.utils.object_utils import select_nested
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ MAX_ITERATIONS = 5
20
+ DEFAULT_NUM_EXAMPLES = 20
21
+
22
+ SYSTEM_PROMPT = """
23
+ Your task is to inference the complete YAML
24
+ object output given the YAML object input. I will provide you
25
+ with contextual information, including the schema,
26
+ to help with the inference. You can use the following
27
+
28
+ You should return ONLY valid YAML in your response.
29
+ """
30
+
31
+
32
+ class TrainedModel(BaseModel, extra="forbid"):
33
+ index_rows: List[OBJECT]
34
+ config: Optional[InferenceConfig] = None
35
+
36
+
37
+ class LLMInference(Inference):
38
+ iterations: int = 0
39
+
40
+
41
+ @dataclass
42
+ class LLMInferenceEngine(InferenceEngine):
43
+ """
44
+ LLM based predictor.
45
+
46
+ Unlike the RAG predictor this performs few-shot inference
47
+ """
48
+
49
+ _model: "llm.Model" = None # noqa: F821
50
+
51
+ PERSIST_COLS: ClassVar[List[str]] = [
52
+ "config",
53
+ ]
54
+
55
+ def __post_init__(self):
56
+ if not self.config:
57
+ self.config = InferenceConfig()
58
+ if not self.config.llm_config:
59
+ self.config.llm_config = LLMConfig()
60
+
61
+ @property
62
+ def model(self) -> "llm.Model": # noqa: F821
63
+ import llm
64
+
65
+ if self._model is None:
66
+ self._model = llm.get_model(self.config.llm_config.model_name)
67
+ if self._model.needs_key:
68
+ key = get_key(None, key_alias=self._model.needs_key)
69
+ self._model.key = key
70
+
71
+ return self._model
72
+
73
+ def initialize_model(self, **kwargs):
74
+ logger.info(f"Initializing model {self.model}")
75
+
76
+ def object_to_text(self, object: OBJECT) -> str:
77
+ return yaml.dump(object)
78
+
79
+ def _schema_str(self) -> str:
80
+ db = self.training_data.base_collection.parent
81
+ from linkml_runtime.dumpers import json_dumper
82
+ schema_dict = json_dumper.to_dict(db.schema_view.schema)
83
+ return yaml.dump(schema_dict)
84
+
85
+ def derive(self, object: OBJECT, iteration=0, additional_prompt_texts: Optional[List[str]] = None) -> Optional[LLMInference]:
86
+ import llm
87
+
88
+ model: llm.Model = self.model
89
+ #model_name = self.config.llm_config.model_name
90
+ #feature_attributes = self.config.feature_attributes
91
+ target_attributes = self.config.target_attributes
92
+ query_text = self.object_to_text(object)
93
+
94
+ if not target_attributes:
95
+ target_attributes = [k for k, v in object.items() if v is None or v == ""]
96
+ #if not feature_attributes:
97
+ # feature_attributes = [k for k, v in object.items() if v is not None and v != ""]
98
+
99
+ system_prompt = SYSTEM_PROMPT.format(llm_config=self.config.llm_config)
100
+
101
+ system_prompt += "\n## SCHEMA:\n\n" + self._schema_str()
102
+
103
+ stub = ", ".join([f"{k}: ..." for k in target_attributes])
104
+ stub = "{" + stub + "}"
105
+ prompt = (
106
+ "Provide a YAML object of the form"
107
+ "```yaml\n"
108
+ f"{stub}\n"
109
+ "```\n"
110
+ "---\nQuery:\n" f"## INCOMPLETE OBJECT:\n{query_text}\n" "## OUTPUT:\n"
111
+ )
112
+ logger.info(f"Prompt: {prompt}")
113
+ response = model.prompt(prompt, system=system_prompt)
114
+ yaml_str = response.text()
115
+ logger.info(f"Response: {yaml_str}")
116
+ predicted_object = parse_yaml_payload(yaml_str, strict=True)
117
+ predicted_object = {**object, **predicted_object}
118
+ if self.config.validate_results:
119
+ base_collection = self.training_data.base_collection
120
+ errs = list(base_collection.iter_validate_collection([predicted_object]))
121
+ if errs:
122
+ print(f"{iteration} // FAILED TO VALIDATE: {yaml_str}")
123
+ print(f"PARSED: {predicted_object}")
124
+ print(f"ERRORS: {errs}")
125
+ if iteration > MAX_ITERATIONS:
126
+ raise ValueError(f"Validation errors: {errs}")
127
+ extra_texts = [
128
+ "Make sure results conform to the schema. Previously you provided:\n",
129
+ yaml_str,
130
+ "\nThis was invalid.\n",
131
+ "Validation errors:\n",
132
+ ] + [self.object_to_text(e) for e in errs]
133
+ return self.derive(object, iteration=iteration+1, additional_prompt_texts=extra_texts)
134
+ return LLMInference(predicted_object=predicted_object, iterations=iteration+1, query=object)
135
+
136
+
137
+ def export_model(
138
+ self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
139
+ ):
140
+ self.save_model(output)
141
+
142
+ def save_model(self, output: Union[str, Path]) -> None:
143
+ """
144
+ Save the trained model and related data to a file.
145
+
146
+ :param output: Path to save the model
147
+ """
148
+ raise NotImplementedError("Does not make sense for this engine")
149
+
150
+ @classmethod
151
+ def load_model(cls, file_path: Union[str, Path]) -> "LLMInferenceEngine":
152
+ raise NotImplementedError("Does not make sense for this engine")
@@ -20,7 +20,7 @@ DEFAULT_NUM_EXAMPLES = 20
20
20
  DEFAULT_MMR_RELEVANCE_FACTOR = 0.8
21
21
 
22
22
  SYSTEM_PROMPT = """
23
- You are a {llm_config.role}, your task is to inference the YAML
23
+ You are a {llm_config.role}, your task is to infer the YAML
24
24
  object output given the YAML object input. I will provide you
25
25
  with a collection of examples that will provide guidance both
26
26
  on the desired structure of the response, as well as the kind
@@ -130,23 +130,34 @@ class RAGInferenceEngine(InferenceEngine):
130
130
  else:
131
131
  if not self.rag_collection.indexers:
132
132
  raise ValueError("RAG collection must have an indexer attached")
133
+ logger.info(f"Searching {self.rag_collection.alias} for examples for: {query_text}")
133
134
  rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm",
134
135
  mmr_relevance_factor=mmr_relevance_factor)
135
136
  examples = rs.rows
137
+ logger.info(f"Found {len(examples)} examples")
136
138
  if not examples:
137
139
  raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
138
140
  prompt_clauses = []
139
- query_obj = select_nested(object, feature_attributes)
141
+ this_feature_attributes = feature_attributes
142
+ if not this_feature_attributes:
143
+ this_feature_attributes = list(set(object.keys()) - set(target_attributes))
144
+ query_obj = select_nested(object, this_feature_attributes)
140
145
  query_text = self.object_to_text(query_obj)
141
146
  for example in examples:
142
- input_obj = select_nested(example, feature_attributes)
147
+ this_feature_attributes = feature_attributes
148
+ if not this_feature_attributes:
149
+ this_feature_attributes = list(set(example.keys()) - set(target_attributes))
150
+ if not this_feature_attributes:
151
+ raise ValueError(f"No feature attributes found in example {example}")
152
+ input_obj = select_nested(example, this_feature_attributes)
143
153
  input_obj_text = self.object_to_text(input_obj)
144
154
  if input_obj_text == query_text:
145
- raise ValueError(
146
- f"Query object {query_text} is the same as example object {input_obj_text}\n"
147
- "This indicates possible test data leakage\n."
148
- "TODO: allow an option that allows user to treat this as a basic lookup\n"
149
- )
155
+ continue
156
+ #raise ValueError(
157
+ # f"Query object {query_text} is the same as example object {input_obj_text}\n"
158
+ # "This indicates possible test data leakage\n."
159
+ # "TODO: allow an option that allows user to treat this as a basic lookup\n"
160
+ #)
150
161
  output_obj = select_nested(example, target_attributes)
151
162
  prompt_clause = (
152
163
  "---\nExample:\n" f"## INPUT:\n{input_obj_text}\n" f"## OUTPUT:\n{self.object_to_text(output_obj)}\n"
@@ -169,7 +180,7 @@ class RAGInferenceEngine(InferenceEngine):
169
180
  encoding=encoding, token_limit=token_limit,
170
181
  additional_text=system_prompt)
171
182
  logger.info(f"Prompt: {prompt}")
172
- response = model.prompt(prompt, system_prompt)
183
+ response = model.prompt(prompt, system=system_prompt)
173
184
  yaml_str = response.text()
174
185
  logger.info(f"Response: {yaml_str}")
175
186
  predicted_object = self._parse_yaml_payload(yaml_str, strict=True)
@@ -124,7 +124,7 @@ class InferenceEngine(ABC):
124
124
  Load the data and split it into training and testing sets.
125
125
 
126
126
  :param collection:
127
- :param split:
127
+ :param split: Tuple of training and testing split ratios.
128
128
  :param randomize:
129
129
  :return:
130
130
  """
@@ -136,7 +136,7 @@ class InferenceEngine(ABC):
136
136
  self.training_data = CollectionSlice(name="train", base_collection=collection, indices=None)
137
137
  self.testing_data = None
138
138
  return
139
- logger.info(f"Loading and splitting data from collection {collection.alias}")
139
+ logger.info(f"Loading and splitting data {split} from collection {collection.alias}")
140
140
  size = collection.size()
141
141
  indices = range(size)
142
142
  if randomize:
@@ -3,6 +3,7 @@ import gzip
3
3
  import io
4
4
  import json
5
5
  import logging
6
+ import re
6
7
  import sys
7
8
  import tarfile
8
9
  from enum import Enum
@@ -31,10 +32,13 @@ class Format(Enum):
31
32
  TSV = "tsv"
32
33
  CSV = "csv"
33
34
  XML = "xml"
35
+ OBO = "obo"
36
+ PKL = "pkl"
34
37
  PYTHON = "python"
35
38
  PARQUET = "parquet"
36
39
  FORMATTED = "formatted"
37
40
  TABLE = "table"
41
+ XLSX = "xlsx"
38
42
  SQLDUMP_DUCKDB = "duckdb"
39
43
  SQLDUMP_POSTGRES = "postgres"
40
44
  DUMP_MONGODB = "mongodb"
@@ -67,6 +71,9 @@ class Format(Enum):
67
71
  def is_dump_format(self):
68
72
  return self in [Format.SQLDUMP_DUCKDB, Format.SQLDUMP_POSTGRES, Format.DUMP_MONGODB]
69
73
 
74
+ def is_binary_format(self):
75
+ return self in [Format.PARQUET, Format.XLSX]
76
+
70
77
  def is_xsv(self):
71
78
  return self in [Format.TSV, Format.CSV]
72
79
 
@@ -95,6 +102,26 @@ def load_objects_from_url(
95
102
  return objs
96
103
 
97
104
 
105
+ def clean_pandas_value(v):
106
+ """Clean a single value from pandas."""
107
+ import math
108
+
109
+ if isinstance(v, float):
110
+ if math.isnan(v) or math.isinf(v):
111
+ return None
112
+ return float(v) # Ensures proper float type
113
+ return v
114
+
115
+
116
+ def clean_nested_structure(obj):
117
+ """Recursively clean a nested structure of dicts/lists from pandas."""
118
+ if isinstance(obj, dict):
119
+ return {k: clean_nested_structure(v) for k, v in obj.items()}
120
+ elif isinstance(obj, list):
121
+ return [clean_nested_structure(item) for item in obj] # Fixed: using 'item' instead of 'v'
122
+ else:
123
+ return clean_pandas_value(obj)
124
+
98
125
  def process_file(
99
126
  f: IO, format: Format, expected_type: Optional[Type] = None, header_comment_token: Optional[str] = None
100
127
  ) -> List[Dict[str, Any]]:
@@ -128,6 +155,19 @@ def process_file(
128
155
  objs = list(reader)
129
156
  elif format == Format.XML:
130
157
  objs = xmltodict.parse(f.read())
158
+ elif format == Format.PKL:
159
+ objs = pd.read_pickle(f).to_dict(orient="records")
160
+ elif format == Format.XLSX:
161
+ xls = pd.ExcelFile(f)
162
+ objs = {sheet: clean_nested_structure(xls.parse(sheet).to_dict(orient="records")) for sheet in xls.sheet_names}
163
+ elif format == Format.OBO:
164
+ blocks = split_document(f.read(), "\n\n")
165
+ id_pattern = re.compile(r"id: (\S+)")
166
+ def get_id(block):
167
+ m = id_pattern.search(block)
168
+ return m.group(1) if m else None
169
+ objs = [{"id": get_id(block), "content": block} for block in blocks]
170
+ objs = [obj for obj in objs if obj["id"]]
131
171
  elif format == Format.PARQUET:
132
172
  import pyarrow.parquet as pq
133
173
 
@@ -167,6 +207,14 @@ def load_objects(
167
207
  if isinstance(file_path, Path):
168
208
  file_path = str(file_path)
169
209
 
210
+ for url_scheme in ["http", "https", "ftp"]:
211
+ if file_path.startswith(f"{url_scheme}://"):
212
+ return load_objects_from_url(
213
+ file_path,
214
+ format=format,
215
+ expected_type=expected_type,
216
+ )
217
+
170
218
  if isinstance(format, str):
171
219
  format = Format(format)
172
220
 
@@ -185,9 +233,9 @@ def load_objects(
185
233
  else:
186
234
  if Path(file_path).is_dir():
187
235
  raise ValueError(f"{file_path} is a dir, which is invalid for {format}")
188
- mode = "rb" if format == Format.PARQUET or compression == "gz" else "r"
189
236
  open_func = gzip.open if compression == "gz" else open
190
237
  format = Format.guess_format(file_path) if not format else format
238
+ mode = "rb" if (format and format.is_binary_format()) or compression == "gz" else "r"
191
239
  with open_func(file_path, mode) if file_path != "-" else sys.stdin as f:
192
240
  if compression == "gz" and mode == "r":
193
241
  f = io.TextIOWrapper(f)
@@ -343,3 +391,14 @@ def guess_format(path: str) -> Optional[Format]:
343
391
  :return: The guessed format.
344
392
  """
345
393
  return Format.guess_format(path)
394
+
395
+
396
+ def split_document(doc: str, delimiter: str):
397
+ """
398
+ Split a document into parts based on a delimiter.
399
+
400
+ :param doc: The document to split.
401
+ :param delimiter: The delimiter.
402
+ :return: The parts of the document.
403
+ """
404
+ return doc.split(delimiter)
@@ -100,3 +100,18 @@ def get_token_limit(model_name: str) -> int:
100
100
  if model in model_name:
101
101
  return token_limit
102
102
  return 4096
103
+
104
+
105
+ def parse_yaml_payload(yaml_str: str, strict=False) -> Optional[dict]:
106
+ import yaml
107
+ if "```" in yaml_str:
108
+ yaml_str = yaml_str.split("```")[1].strip()
109
+ if yaml_str.startswith("yaml"):
110
+ yaml_str = yaml_str[4:].strip()
111
+ try:
112
+ return yaml.safe_load(yaml_str)
113
+ except Exception as e:
114
+ if strict:
115
+ raise e
116
+ logger.error(f"Error parsing YAML: {yaml_str}\n{e}")
117
+ return None
@@ -124,7 +124,7 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
124
124
 
125
125
  Args:
126
126
  data (dict): The input nested dictionary.
127
- selectors (list): A list of selector strings.
127
+ paths (list): A list of selector strings.
128
128
 
129
129
  Returns:
130
130
  dict: A new dictionary with the same structure, but only the selected attributes.
@@ -162,6 +162,8 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
162
162
  if current_path is None:
163
163
  current_path = []
164
164
  matching_paths = []
165
+ if not paths:
166
+ raise ValueError("No paths provided")
165
167
  for path in paths:
166
168
  if isinstance(path, str):
167
169
  path = path.split(".")
@@ -5,7 +5,7 @@ import sqlalchemy
5
5
  import sqlalchemy.sql.sqltypes as sqlt
6
6
  from linkml_runtime.linkml_model import SchemaDefinition, SlotDefinition
7
7
  from linkml_runtime.utils.schema_builder import SchemaBuilder
8
- from sqlalchemy import MetaData
8
+ from sqlalchemy import MetaData, quoted_name
9
9
 
10
10
  from linkml_store.api.queries import Query
11
11
 
@@ -115,7 +115,13 @@ def facet_count_sql(query: Query, facet_column: Union[str, Tuple[str, ...]], mul
115
115
  conditions = [cond for cond in where_clause_sql.split(" AND ") if not cond.startswith(f"{facet_column} ")]
116
116
  modified_where = " AND ".join(conditions)
117
117
 
118
+ def make_col_safe(col):
119
+ return '"' + quoted_name(col, True) + '"' if ' ' in col else col
120
+
121
+ if isinstance(facet_column, str):
122
+ facet_column = make_col_safe(facet_column)
118
123
  if isinstance(facet_column, tuple):
124
+ facet_column = [make_col_safe(col) for col in facet_column]
119
125
  if multivalued:
120
126
  raise NotImplementedError("Multivalued facets are not supported for multiple columns")
121
127
  facet_column = ", ".join(facet_column)
@@ -34,7 +34,7 @@ def pairwise_cosine_similarity(vector1: np.array, vector2: np.array) -> float:
34
34
  dot_product = np.dot(vector1, vector2)
35
35
  norm1 = np.linalg.norm(vector1)
36
36
  norm2 = np.linalg.norm(vector2)
37
- return dot_product / (norm1 * norm2)
37
+ return float(dot_product / (norm1 * norm2))
38
38
 
39
39
 
40
40
  def compute_cosine_similarity_matrix(list1: LOL, list2: LOL) -> np.ndarray:
@@ -1,14 +1,13 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: linkml-store
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
7
7
  Author-email: author@org.org
8
- Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
8
+ Requires-Python: >=3.10,<4.0
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Programming Language :: Python :: 3.10
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
@@ -20,7 +19,6 @@ Provides-Extra: bigquery
20
19
  Provides-Extra: fastapi
21
20
  Provides-Extra: frictionless
22
21
  Provides-Extra: h5py
23
- Provides-Extra: ibis
24
22
  Provides-Extra: llm
25
23
  Provides-Extra: map
26
24
  Provides-Extra: mongodb
@@ -36,20 +34,18 @@ Requires-Dist: duckdb (>=0.10.1)
36
34
  Requires-Dist: duckdb-engine (>=0.11.2)
37
35
  Requires-Dist: fastapi ; extra == "fastapi"
38
36
  Requires-Dist: frictionless ; extra == "frictionless"
39
- Requires-Dist: gcsfs ; extra == "ibis"
40
37
  Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
41
38
  Requires-Dist: h5py ; extra == "h5py"
42
- Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
43
39
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
44
40
  Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
45
- Requires-Dist: jsonpatch (>=1.33,<2.0)
41
+ Requires-Dist: jsonpatch (>=1.33)
46
42
  Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
47
43
  Requires-Dist: linkml-runtime (>=1.8.0)
48
44
  Requires-Dist: linkml_map ; extra == "map"
49
45
  Requires-Dist: linkml_renderer ; extra == "renderer"
50
46
  Requires-Dist: llm ; extra == "llm" or extra == "all"
51
47
  Requires-Dist: matplotlib ; extra == "analytics"
52
- Requires-Dist: multipledispatch ; extra == "ibis"
48
+ Requires-Dist: multipledispatch
53
49
  Requires-Dist: neo4j ; extra == "neo4j" or extra == "all"
54
50
  Requires-Dist: networkx ; extra == "neo4j"
55
51
  Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
@@ -57,8 +53,9 @@ Requires-Dist: plotly ; extra == "analytics"
57
53
  Requires-Dist: py2neo ; extra == "neo4j"
58
54
  Requires-Dist: pyarrow ; extra == "pyarrow"
59
55
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
60
- Requires-Dist: pymongo ; extra == "mongodb"
56
+ Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
61
57
  Requires-Dist: pystow (>=0.5.4,<0.6.0)
58
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
62
59
  Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
63
60
  Requires-Dist: scikit-learn ; extra == "scipy"
64
61
  Requires-Dist: scipy ; extra == "scipy"
@@ -68,7 +65,7 @@ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
68
65
  Requires-Dist: tabulate
69
66
  Requires-Dist: tiktoken ; extra == "llm"
70
67
  Requires-Dist: uvicorn ; extra == "fastapi"
71
- Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
68
+ Requires-Dist: xmltodict (>=0.13.0)
72
69
  Description-Content-Type: text/markdown
73
70
 
74
71
  # linkml-store
@@ -1,17 +1,17 @@
1
1
  linkml_store/__init__.py,sha256=jlU6WOUAn8cKIhzbTULmBTWpW9gZdEt7q_RI6KZN1bY,118
2
2
  linkml_store/api/__init__.py,sha256=3CelcFEFz0y3MkQAzhQ9JxHIt1zFk6nYZxSmYTo8YZE,226
3
- linkml_store/api/client.py,sha256=wFVgl1NUovaKLqNVUEt9dsnoIzjzqFvktJVncAupdE4,12362
4
- linkml_store/api/collection.py,sha256=CGvWxH7HRhjDt9Cp3SGdMqyhYx7Q4fRKUtAJV74_l0g,39711
3
+ linkml_store/api/client.py,sha256=-XX1H5dIPBCPwU3lgZLtb7JXmUQR_c-FYGSKEW1idr8,12970
4
+ linkml_store/api/collection.py,sha256=Edwyb36D8NJjb-bkDy3O8BJbDVxtSH3yOKI_QW9_cic,41620
5
5
  linkml_store/api/config.py,sha256=pOz210JIwkEEXtfjcsZBp1UEedkBu8RkH62Qa1b4exI,5777
6
- linkml_store/api/database.py,sha256=nvae8jnOZsQIFCsl_lRBnKcvrpJg4A10ujIKGeMyUS8,29350
6
+ linkml_store/api/database.py,sha256=JyQ8SuPrNiltgMH4pdFt4IgGBc9nq3mfRJ5ZUEIDEqA,29696
7
7
  linkml_store/api/queries.py,sha256=tx9fgGY5fC_2ZbIvg4BqTK_MXJwA_DI4mxr8HdQ6Vos,2075
8
8
  linkml_store/api/stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  linkml_store/api/stores/chromadb/__init__.py,sha256=e9BkOPuPnVQKA5PRKDulag59yGNHDP3U2_DnPSrFAKM,132
10
10
  linkml_store/api/stores/chromadb/chromadb_collection.py,sha256=RQUZx5oeotkzNihg-dlSevkiTiKY1d9x0bS63HF80W4,4270
11
11
  linkml_store/api/stores/chromadb/chromadb_database.py,sha256=dZA3LQE8-ZMhJQOzsUFyxehnKpFF7adR182aggfkaFY,3205
12
12
  linkml_store/api/stores/duckdb/__init__.py,sha256=rbQSDgNg-fdvi6-pHGYkJTST4p1qXUZBf9sFSsO3KPk,387
13
- linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=Rkbm_uIVIRj5576lEolsyY_3Um1h8Lf3RHn8Fy3LIgU,7036
14
- linkml_store/api/stores/duckdb/duckdb_database.py,sha256=GH9bcOfHpNp6r-Eu1C3W0xuYcLsqGFDH1Sh4weifGaQ,9923
13
+ linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=1Jc770CR3oipfLj9iJn-dbkgtoEObLbylUQCoUWxuzs,7313
14
+ linkml_store/api/stores/duckdb/duckdb_database.py,sha256=idIe89yqrdMKR69Xpi3cd5LStwe6FRBOm4eJGsHfOV0,10904
15
15
  linkml_store/api/stores/duckdb/mappings.py,sha256=tDce3W1Apwammhf4LS6cRJ0m4NiJ0eB7vOI_4U5ETY8,148
16
16
  linkml_store/api/stores/filesystem/__init__.py,sha256=KjvCjdttwqMHNeGyL-gr59zRz0--HFEWWUNNCJ5hITs,347
17
17
  linkml_store/api/stores/filesystem/filesystem_collection.py,sha256=9gqY2KRZsn_RWk4eKkxFd3_wcxs5YaXvcBI7GGJBMGE,6751
@@ -20,8 +20,8 @@ linkml_store/api/stores/hdf5/__init__.py,sha256=l4cIh3v7P0nPbwGIsfuCMD_serQ8q8c7
20
20
  linkml_store/api/stores/hdf5/hdf5_collection.py,sha256=mnpLMYehn3PuaIjp2dXrIWu8jh-bdQ84X2Ku83jMdEY,3805
21
21
  linkml_store/api/stores/hdf5/hdf5_database.py,sha256=EZbjrpaqiNDEFvoD5dZNcGBXA8z6HRNL81emueTZWNw,2714
22
22
  linkml_store/api/stores/mongodb/__init__.py,sha256=OSFCr7RQlDEe-O-Y0P_i912oAMK-L3pC7Cnj7sxlwAk,510
23
- linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=unN0v7RYlGIiJxEhbNRxZ86TVQ4ELlAsNWTwEbg7E_g,6831
24
- linkml_store/api/stores/mongodb/mongodb_database.py,sha256=Y9MIV0KSRGCyopz8vGEivhSuvF0vZLCDJd29cdqMIX8,3857
23
+ linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=hKwaHHFxfWqjBNHZpzVuDVruH_SdXzoIKHdePN3JDEg,10447
24
+ linkml_store/api/stores/mongodb/mongodb_database.py,sha256=HfVEEFCuwZ96KO3eWuSGFajRUgZPmeG-fqsrWHZhJng,4077
25
25
  linkml_store/api/stores/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  linkml_store/api/stores/neo4j/neo4j_collection.py,sha256=a-Az5_ypdBMgeNyhrTW7q-ik-vYPCDDONIK7N_CDA9c,17449
27
27
  linkml_store/api/stores/neo4j/neo4j_database.py,sha256=zanP_uBZO3AH0wuzbu6auK4zcZon_lMreC2vooSZwt8,5571
@@ -30,7 +30,7 @@ linkml_store/api/stores/solr/solr_collection.py,sha256=ZlxC3JbVaHfSA4HuTeJTsp6qe
30
30
  linkml_store/api/stores/solr/solr_database.py,sha256=TFjqbY7jAkdrhAchbNg0E-mChSP7ogNwFExslbvX7Yo,2877
31
31
  linkml_store/api/stores/solr/solr_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  linkml_store/api/types.py,sha256=3aIQtDFMvsSmjuN5qrR2vNK5sHa6yzD_rEOPA6tHwvg,176
33
- linkml_store/cli.py,sha256=bWbWQita8KCBjzovBRzQqHtjbRrf7Ttxq0Fe8zrDuds,30235
33
+ linkml_store/cli.py,sha256=GtbLVMcH6rHEeEMljFGVFd8U5h71WN_ojmIp17UlJxo,31253
34
34
  linkml_store/constants.py,sha256=x4ZmDsfE9rZcL5WpA93uTKrRWzCD6GodYXviVzIvR38,112
35
35
  linkml_store/graphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  linkml_store/graphs/graph_map.py,sha256=bYRxv8n1YPnFqE9d6JKNmRawb8EAhsPlHhBue0gvtZE,712
@@ -42,29 +42,30 @@ linkml_store/index/indexer.py,sha256=e5dsjh2wjOTDRsfClKJAFTbcK1UC7BOGkUCOfDg9omI
42
42
  linkml_store/inference/__init__.py,sha256=b8NAFNZjOYU_8gOvxdyCyoiHOOl5Ai2ckKs1tv7ZkkY,342
43
43
  linkml_store/inference/evaluation.py,sha256=YDFYaEu2QLSfFq4oyARrnKfTiPLtNF8irhhspgVDfdY,6013
44
44
  linkml_store/inference/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- linkml_store/inference/implementations/rag_inference_engine.py,sha256=mN7YQI-BeZglsAnZnNIuAj-Nxg1su5efNaohooEmNmM,10622
45
+ linkml_store/inference/implementations/llm_inference_engine.py,sha256=iSxiboYpgB0_yL4zlHIJx2ZbvDrJC8JioewTKgLUS0U,5443
46
+ linkml_store/inference/implementations/rag_inference_engine.py,sha256=R3Dz-DyNx7UU3ZaV1n9homxC2nUAT5JZnd4IRkIFftk,11326
46
47
  linkml_store/inference/implementations/rule_based_inference_engine.py,sha256=0IEY_fsHJPJy6QKbYQU_qE87RRnPOXQxPuJKXCQG8jU,6250
47
48
  linkml_store/inference/implementations/sklearn_inference_engine.py,sha256=Sdi7CoRK3qoLJu3prgLy1Ck_zQ1gHWRKFybHe7XQ4_g,13192
48
49
  linkml_store/inference/inference_config.py,sha256=EFGdigxWsfTPREbgqyJVRShN0JktCEmFLLoECrLfXSg,2282
49
- linkml_store/inference/inference_engine.py,sha256=IxQIOgmXCDI8ilCGtoaVA_1wFROUg4uH1_yGbX78N2U,7139
50
+ linkml_store/inference/inference_engine.py,sha256=7P9syuIwwBpCUytfqZcCR5ei61ys5LIw8YhO0iIehG4,7191
50
51
  linkml_store/inference/inference_engine_registry.py,sha256=6o66gvBYBwdeAKm62zqqvfaBlcopVP_cla3L6uXGsHA,3015
51
52
  linkml_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
53
  linkml_store/utils/change_utils.py,sha256=O2rvSvgTKB60reLLz9mX5OWykAA_m93bwnUh5ZWa0EY,471
53
54
  linkml_store/utils/file_utils.py,sha256=rQ7-XpmI6_Kx_dhEnI98muFRr0MmgI_kZ_9cgJBf_0I,1411
54
- linkml_store/utils/format_utils.py,sha256=sjpdJJ8Ww2ilm03mQt_v4QkZvQMymqUeTiPS3U1ViKM,11067
55
+ linkml_store/utils/format_utils.py,sha256=hHRFkh3cwb5shM6RO7WWuOXsHHH283M_vZjXRuzbwWI,13035
55
56
  linkml_store/utils/io.py,sha256=JHUrWDtlZC2jtN_PQZ4ypdGIyYlftZEN3JaCvEPs44w,884
56
- linkml_store/utils/llm_utils.py,sha256=0lvR_lBSDSuP-0Eum16QBUsSv8sWfDjZPz_MnDSPvn0,3048
57
+ linkml_store/utils/llm_utils.py,sha256=51AiwMeXm2FpiD-9AywKcbZzlUMqXRAjDFJEp5Ia0LA,3494
57
58
  linkml_store/utils/mongodb_utils.py,sha256=Rl1YmMKs1IXwSsJIViSDChbi0Oer5cBnMmjka2TeQS8,4665
58
59
  linkml_store/utils/neo4j_utils.py,sha256=y3KPmDZ8mQmePgg0lUeKkeKqzEr2rV226xxEtHc5pRg,1266
59
- linkml_store/utils/object_utils.py,sha256=Vib-5Ip2DlRVKLZpU-008ZZI813-vfKVSCY0TksRenM,6293
60
+ linkml_store/utils/object_utils.py,sha256=V0s_ZzqAGkFUfrU-9fAPb5g3snMmgKKhR3SiYZgECXI,6353
60
61
  linkml_store/utils/pandas_utils.py,sha256=djiFPO3YbgRVo2XAZuKCtgH8QVLuUyPIsfS8e-0umsU,3182
61
62
  linkml_store/utils/patch_utils.py,sha256=q-h_v68okyruzdPTEHCe0WubbQHKpi1qy5bJ9vFWDo8,4823
62
63
  linkml_store/utils/query_utils.py,sha256=HWt46BsGWoIGiNBTtvpXGY6onPRWsQky6eu_9cYqbvo,3440
63
64
  linkml_store/utils/schema_utils.py,sha256=iJiZxo5NGr7v87h4DV6V9DrDOZHSswMRuf0N4V2rVtg,646
64
65
  linkml_store/utils/sklearn_utils.py,sha256=itPpcrsbbyOazdjmivaaZ1lyZeytm0a0hJ2AS8ziUgg,7590
65
- linkml_store/utils/sql_utils.py,sha256=T41w_vsc3SauTJQkDMwid_nOtKW1YOKyUuaxEf470hk,5938
66
+ linkml_store/utils/sql_utils.py,sha256=qatmrJR2u4ICaO7QhDRL1ukxJlLv0zYSGgmmFV-hdnU,6210
66
67
  linkml_store/utils/stats_utils.py,sha256=4KqBb1bqDgAmq-1fJLLu5B2paPgoZZc3A-gnyVam4bI,1799
67
- linkml_store/utils/vector_utils.py,sha256=Q1RlpDzavJAM9-H2m2XNU5BNUcfZkpIWeEZii2hK0PQ,5449
68
+ linkml_store/utils/vector_utils.py,sha256=QcLTUQWm5z1OTtiOl0mXKJyFJcQeCtbcc-GQwHhkUYw,5456
68
69
  linkml_store/webapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
70
  linkml_store/webapi/html/__init__.py,sha256=hwp5eeBJKH65Bvv1x9Z4vsT1tLSYtb9Dq4I9r1kL1q0,69
70
71
  linkml_store/webapi/html/base.html.j2,sha256=hoiV2uaSxxrQp7VuAZBOHueH7czyJMYcPBRN6dZFYhk,693
@@ -73,8 +74,8 @@ linkml_store/webapi/html/database_details.html.j2,sha256=qtXdavbZb0mohiObI9dvJtk
73
74
  linkml_store/webapi/html/databases.html.j2,sha256=a9BCWQYfPeFhdUd31CWhB0yWhTIFXQayO08JgjyqKoc,294
74
75
  linkml_store/webapi/html/generic.html.j2,sha256=KtLaO2HUEF2Opq-OwHKgRKetNWe8IWc6JuIkxRPsywk,1018
75
76
  linkml_store/webapi/main.py,sha256=B0Da575kKR7X88N9ykm99Dem8FyBAW9f-w3A_JwUzfw,29165
76
- linkml_store-0.2.4.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
77
- linkml_store-0.2.4.dist-info/METADATA,sha256=PJX-_TSPk6WDXDCmvuFDUb5649ECQc2N6zP4pWqhBvU,7204
78
- linkml_store-0.2.4.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
79
- linkml_store-0.2.4.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
80
- linkml_store-0.2.4.dist-info/RECORD,,
77
+ linkml_store-0.2.6.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
78
+ linkml_store-0.2.6.dist-info/METADATA,sha256=s5x6OmbGC7oVUpXunjiM42sASvsvKR8XRoJllGqF6ww,6964
79
+ linkml_store-0.2.6.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
80
+ linkml_store-0.2.6.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
81
+ linkml_store-0.2.6.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.0.1
2
+ Generator: poetry-core 2.1.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any