PyPI - linkml-store - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

linkml-store 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (21) hide show

linkml_store/api/client.py +19 -2
linkml_store/api/collection.py +60 -2
linkml_store/api/database.py +17 -12
linkml_store/api/stores/duckdb/duckdb_collection.py +11 -5
linkml_store/api/stores/duckdb/duckdb_database.py +52 -19
linkml_store/api/stores/mongodb/mongodb_collection.py +83 -0
linkml_store/api/stores/mongodb/mongodb_database.py +7 -3
linkml_store/cli.py +23 -5
linkml_store/inference/implementations/llm_inference_engine.py +152 -0
linkml_store/inference/implementations/rag_inference_engine.py +20 -9
linkml_store/inference/inference_engine.py +2 -2
linkml_store/utils/format_utils.py +60 -1
linkml_store/utils/llm_utils.py +15 -0
linkml_store/utils/object_utils.py +3 -1
linkml_store/utils/sql_utils.py +7 -1
linkml_store/utils/vector_utils.py +1 -1
{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/METADATA +7 -10
{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/RECORD +21 -20
{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/WHEEL +1 -1
{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/LICENSE +0 -0
{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/entry_points.txt +0 -0

linkml_store/api/client.py CHANGED Viewed

@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
 HANDLE_MAP = {
     "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
+    "sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
     "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
     "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
     "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
@@ -22,6 +23,12 @@ HANDLE_MAP = {
     "file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
 }
+SUFFIX_MAP = {
+    "ddb": "duckdb:///{path}",
+    "duckdb": "duckdb:///{path}",
+    "db": "duckdb:///{path}",
+}
 class Client:
     """
@@ -197,6 +204,13 @@ class Client:
         :param kwargs:
         :return:
         """
+        if ":" not in handle:
+            if alias is None:
+                alias = handle
+            if "." in handle:
+                suffix = handle.split(".")[-1]
+                if suffix in SUFFIX_MAP:
+                    handle = SUFFIX_MAP[suffix].format(path=handle)
         if ":" not in handle:
             scheme = handle
             handle = None
@@ -220,7 +234,9 @@ class Client:
         if not alias:
             alias = handle
         if not self._databases:
+            logger.info("Initializing databases")
             self._databases = {}
+        logger.info(f"Attaching {alias}")
         self._databases[alias] = db
         db.parent = self
         if db.alias:
@@ -263,8 +279,9 @@ class Client:
             self._databases[name] = db
         if name not in self._databases:
             if create_if_not_exists:
-                logger.info(f"Creating database: {name}")
-                self.attach_database(name, **kwargs)
+                logger.info(f"Creating/attaching database: {name}")
+                db = self.attach_database(name, **kwargs)
+                name = db.alias
             else:
                 raise ValueError(f"Database {name} does not exist")
         db = self._databases[name]

linkml_store/api/collection.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """A structure for representing collections of similar objects."""
 import hashlib
+import json
 import logging
 from collections import defaultdict
 from pathlib import Path
@@ -210,8 +211,59 @@ class Collection(Generic[DatabaseType]):
         """
         raise NotImplementedError
+    def index (
+        self,
+        objs: Union[OBJECT, List[OBJECT]],
+        index_name: Optional[str] = None,
+        replace: bool = False,
+        unique: bool = False,
+        **kwargs,
+    ) -> None:
+        """
+        Index objects in the collection.
+        :param objs:
+        :param index_name:
+        :param replace: replace the index, or not
+        :param unique: boolean used to declare the index unique or not
+        :param kwargs:
+        :return:
+        """
+        raise NotImplementedError
+    def upsert(self,
+               objs: Union[OBJECT, List[OBJECT]],
+               filter_fields: List[str],
+               update_fields: Union[List[str], None] = None, **kwargs):
+        """
+        Add one or more objects to the collection.
+        >>> from linkml_store import Client
+        >>> client = Client()
+        >>> db = client.attach_database("mongodb", alias="test")
+        >>> collection = db.create_collection("Person")
+        >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
+        >>> collection.upsert(objs)
+        :param objs:
+        :param filter_fields: List of field names to use as the filter for matching existing collections.
+        :param update_fields: List of field names to include in the update. If None, all fields are updated.
+        :param kwargs:
+        :return:
+        """
+        raise NotImplementedError
     def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
-        logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
+        """
+        Pre-query hook.
+        This is called before a query is executed. It is used to materialize derivations and indexes.
+        :param query:
+        :param kwargs:
+        :return:
+        """
+        logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}")  # if logging.info, this is very noisy.
         if not self._initialized:
             self._materialize_derivations()
             self._initialized = True
@@ -536,7 +588,13 @@ class Collection(Generic[DatabaseType]):
         qr = ix_coll.find(where=where, limit=-1, **kwargs)
         index_col = ix.index_field
         # TODO: optimize this for large indexes
-        vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
+        def row2array(row):
+            v = row[index_col]
+            if isinstance(v, str):
+                # sqlite stores arrays as strings
+                v = json.loads(v)
+            return np.array(v, dtype=float)
+        vector_pairs = [(row, row2array(row)) for row in qr.rows]
         results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
         for r in results:
             del r[1][index_col]

linkml_store/api/database.py CHANGED Viewed

@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
         Examples:
-        >>> from linkml_store.api.client import Client
-        >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
-        >>> collection = db.create_collection("Person", alias="persons")
-        >>> collection.alias
-        'persons'
-        >>> collection.target_class_name
-        'Person'
+            >>> from linkml_store.api.client import Client
+            >>> client = Client()
+            >>> db = client.attach_database("duckdb", alias="test")
+            >>> collection = db.create_collection("Person", alias="persons")
+            >>> collection.alias
+            'persons'
+            >>> collection.target_class_name
+            'Person'
         If alias is not provided, it defaults to the name of the type.
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
         >>> from linkml_store.api.client import Client
         >>> from linkml_store.api.queries import Query
         >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
+        >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
         >>> collection = db.create_collection("Person")
         >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
         >>> query = Query(from_table="Person", where_clause={"name": "John"})
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
         >>> from linkml_store.api.client import Client
         >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
+        >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
         >>> collection = db.create_collection("Person", alias="persons")
         >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
         >>> schema_view = db.schema_view
@@ -470,6 +471,7 @@ class Database(ABC, Generic[CollectionType]):
         if not self._schema_view:
             self._initialize_schema()
         if not self._schema_view:
+            logger.info("Inducing schema view")
             self._schema_view = self.induce_schema_view()
         return self._schema_view
@@ -505,6 +507,7 @@ class Database(ABC, Generic[CollectionType]):
         if isinstance(schema_view, str):
             schema_view = SchemaView(schema_view)
         self._schema_view = schema_view
+        logger.info(f"Setting schema view for {self.handle}")
         # self._schema_view = SchemaView(schema_view.materialize_derived_schema())
         if not self._collections:
             return
@@ -719,7 +722,7 @@ class Database(ABC, Generic[CollectionType]):
         >>> from linkml_store.api.client import Client
         >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
+        >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
         >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
         >>> db.list_collection_names()
         ['iris']
@@ -739,7 +742,9 @@ class Database(ABC, Generic[CollectionType]):
                 # import into a test instance
                 tmp_handle = source_format.value
                 client = self.parent
-                tmp_db = client.attach_database(tmp_handle, alias="tmp")
+                tmp_alias = "tmp"
+                client.drop_database(tmp_alias, missing_ok=True)
+                tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
                 # TODO: check for infinite recursion
                 tmp_db.import_database(location, source_format=source_format)
                 obj = {}

linkml_store/api/stores/duckdb/duckdb_collection.py CHANGED Viewed

@@ -147,16 +147,22 @@ class DuckDBCollection(Collection):
         if self._table_created or self.metadata.is_prepopulated:
             logger.info(f"Already have table for: {cd.name}")
             return
-        query = Query(
-            from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
-        )
-        qr = self.parent.query(query)
-        if qr.num_rows > 0:
+        if self.parent._table_exists(self.alias):
             logger.info(f"Table already exists for {cd.name}")
             self._table_created = True
             self._initialized = True
             self.metadata.is_prepopulated = True
             return
+        # query = Query(
+        #     from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
+        # )
+        # qr = self.parent.query(query)
+        # if qr.num_rows > 0:
+        #     logger.info(f"Table already exists for {cd.name}")
+        #     self._table_created = True
+        #     self._initialized = True
+        #     self.metadata.is_prepopulated = True
+        #     return
         logger.info(f"Creating table for {cd.name}")
         t = self._sqla_table(cd)
         ct = CreateTable(t)

linkml_store/api/stores/duckdb/duckdb_database.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Optional, Union
+from typing import Optional, Union, List
 import pandas as pd
 import sqlalchemy
@@ -14,7 +14,7 @@ from linkml_store.api import Database
 from linkml_store.api.queries import Query, QueryResult
 from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
 from linkml_store.utils.format_utils import Format
-from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
+from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
 TYPE_MAP = {
     "VARCHAR": "string",
@@ -62,7 +62,7 @@ class DuckDBDatabase(Database):
     def engine(self) -> sqlalchemy.Engine:
         if not self._engine:
             handle = self.handle
-            if not handle.startswith("duckdb://") and not handle.startswith(":"):
+            if not handle.startswith("duckdb://") and not handle.startswith(":") and "://" not in handle:
                 handle = f"duckdb:///{handle}"
             if ":memory:" not in handle:
                 # TODO: investigate this; duckdb appears to be prematurely caching
@@ -71,6 +71,10 @@ class DuckDBDatabase(Database):
                 self._engine = sqlalchemy.create_engine(handle)
         return self._engine
+    @property
+    def _is_sqlite(self) -> bool:
+        return self.handle and self.handle.startswith("sqlite:")
     def commit(self, **kwargs):
         with self.engine.connect() as conn:
             conn.commit()
@@ -89,34 +93,60 @@ class DuckDBDatabase(Database):
             if not missing_ok:
                 raise FileNotFoundError(f"Database file not found: {path}")
-    def query(self, query: Query, **kwargs) -> QueryResult:
+    def _table_exists(self, table: str) -> bool:
+        if self._is_sqlite:
+            if table == "sqlite_master":
+                return True
+            meta_query = Query(
+                from_table="sqlite_master",
+                where_clause={
+                    #"type": "table",
+                    "name": table,
+                }
+            )
+        else:
+            if table.startswith("information_schema"):
+                return True
+            meta_query = Query(
+                from_table="information_schema.tables",
+                where_clause={
+                    "table_type": "BASE TABLE",
+                    "table_name": table,
+                }
+            )
+        qr = self.query(meta_query)
+        if qr.num_rows == 0:
+            logger.debug(f"Table {self.alias} not created yet")
+            return False
+        return True
+    def _json_encoded_cols(self, table_name: str) -> Optional[List[str]]:
         json_encoded_cols = []
-        if query.from_table:
-            if not query.from_table.startswith("information_schema"):
-                meta_query = Query(
-                    from_table="information_schema.tables", where_clause={"table_name": query.from_table}
-                )
-                qr = self.query(meta_query)
-                if qr.num_rows == 0:
-                    logger.debug(f"Table {query.from_table} not created yet")
-                    return QueryResult(query=query, num_rows=0, rows=[])
-            if not query.from_table.startswith("information_schema"):
-                sv = self.schema_view
-            else:
-                sv = None
+        if table_name:
+            if table_name.startswith("information_schema") or table_name.startswith("sqlite"):
+                return []
+            sv = self.schema_view
             if sv:
                 cd = None
                 for c in self._collections.values():
-                    # if c.name == query.from_table or c.metadata.alias == query.from_table:
-                    if c.alias == query.from_table or c.target_class_name == query.from_table:
+                    if c.alias == table_name or c.target_class_name == table_name:
                         cd = c.class_definition()
                         break
                 if cd:
                     for att in sv.class_induced_slots(cd.name):
                         if att.inlined or att.inlined_as_list:
                             json_encoded_cols.append(att.name)
+        return json_encoded_cols
+    def query(self, query: Query, **kwargs) -> QueryResult:
+        if not self._table_exists(query.from_table):
+            return QueryResult(query=query, num_rows=0, rows=[])
+        json_encoded_cols = self._json_encoded_cols(query.from_table)
         with self.engine.connect() as conn:
             count_query_str = text(query_to_sql(query, count=True))
+            logger.debug(f"count_query_str: {count_query_str}")
             num_rows = list(conn.execute(count_query_str))[0][0]
             logger.debug(f"num_rows: {num_rows}")
             query_str = query_to_sql(query, **kwargs)  # include offset, limit
@@ -167,6 +197,9 @@ class DuckDBDatabase(Database):
         logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
         sb = SchemaBuilder()
         schema = sb.schema
+        logger.info(f"Checking if {self.metadata.handle} is sqlite: {self._is_sqlite}")
+        if self._is_sqlite:
+            return SchemaView(schema)
         query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
         qr = self.query(query)
         logger.info(f"Found {qr.num_rows} information_schema.tables // {qr.rows}")

linkml_store/api/stores/mongodb/mongodb_collection.py CHANGED Viewed

@@ -41,6 +41,89 @@ class MongoDBCollection(Collection):
             del obj["_id"]
         self._post_insert_hook(objs)
+    def index(self,
+              objs: Union[OBJECT, List[OBJECT]],
+              index_name: Optional[str] = None,
+              replace: bool = False,
+              unique: bool = False,
+              **kwargs):
+        """
+        Create indexes on the collection.
+        :param objs: Field(s) to index.
+        :param index_name: Optional name for the index.
+        :param replace: If True, the index will be dropped and recreated.
+        :param unique: If True, creates a unique index (default: False).
+        """
+        if not isinstance(objs, list):
+            objs = [objs]
+        existing_indexes = self.mongo_collection.index_information()
+        for obj in objs:
+            field_exists = False
+            index_to_drop = None
+            # Extract existing index details
+            for index_name_existing, index_details in existing_indexes.items():
+                indexed_fields = [field[0] for field in index_details.get("key", [])]  # Extract field names
+                if obj in indexed_fields:  # If this field is already indexed
+                    field_exists = True
+                    index_to_drop = index_name_existing if replace else None
+            # Drop the index if replace=True and index_to_drop is valid
+            if index_to_drop:
+                self.mongo_collection.drop_index(index_to_drop)
+                logging.debug(f"Dropped existing index: {index_to_drop}")
+            # Create the new index only if it doesn't exist or was dropped
+            if not field_exists or replace:
+                self.mongo_collection.create_index(obj, name=index_name, unique=unique)
+                logging.debug(f"Created new index: {index_name} on field {obj}, unique={unique}")
+            else:
+                logging.debug(f"Index already exists for field {obj}, skipping creation.")
+    def upsert(self,
+               objs: Union[OBJECT, List[OBJECT]],
+               filter_fields: List[str],
+               update_fields: Optional[List[str]] = None,
+               **kwargs):
+        """
+        Upsert one or more documents into the MongoDB collection.
+        :param objs: The document(s) to insert or update.
+        :param filter_fields: List of field names to use as the filter for matching existing documents.
+        :param update_fields: List of field names to include in the update. If None, all fields are updated.
+        """
+        if not isinstance(objs, list):
+            objs = [objs]
+        for obj in objs:
+            # Ensure filter fields exist in the object
+            filter_criteria = {field: obj[field] for field in filter_fields if field in obj}
+            if not filter_criteria:
+                raise ValueError("At least one valid filter field must be present in each object.")
+            # Check if a document already exists
+            existing_doc = self.mongo_collection.find_one(filter_criteria)
+            if existing_doc:
+                # Update only changed fields
+                updates = {key: obj[key] for key in update_fields if key in obj and obj[key] != existing_doc.get(key)}
+                if updates:
+                    self.mongo_collection.update_one(filter_criteria, {"$set": updates})
+                    logging.debug(f"Updated existing document: {filter_criteria} with {updates}")
+                else:
+                    logging.debug(f"No changes detected for document: {filter_criteria}. Skipping update.")
+            else:
+                # Insert a new document
+                self.mongo_collection.insert_one(obj)
+                logging.debug(f"Inserted new document: {obj}")
     def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
         mongo_filter = self._build_mongo_filter(query.where_clause)
         limit = limit or query.limit

linkml_store/api/stores/mongodb/mongodb_database.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import logging
 from pathlib import Path
 from typing import Optional, Union
+from urllib.parse import urlparse
 from pymongo import MongoClient
 from pymongo.database import Database as NativeDatabase
@@ -38,10 +39,13 @@ class MongoDBDatabase(Database):
     @property
     def _db_name(self) -> str:
         if self.handle:
-            db = self.handle.split("/")[-1]
+            parsed_url = urlparse(self.handle)
+            path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
+            print(path_parts)
+            db_name = path_parts[0] if path_parts else "default"
         else:
-            db = "default"
-        return db
+            db_name = "default"
+        return db_name
     @property
     def native_client(self) -> MongoClient:

linkml_store/cli.py CHANGED Viewed

@@ -99,6 +99,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
 @click.option("--database", "-d", help="Database name")
 @click.option("--collection", "-c", help="Collection name")
 @click.option("--input", "-i", help="Input file (alternative to database/collection)")
+@click.option("--schema", "-S", help="Path to schema (LinkML yaml)")
 @click.option("--config", "-C", type=click.Path(exists=True), help="Path to the configuration file")
 @click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
 @click.option("-v", "--verbose", count=True)
@@ -111,7 +112,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
     help="If set then show full stacktrace on error",
 )
 @click.pass_context
-def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set, input, **kwargs):
+def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, schema, config, set, input, **kwargs):
     """A CLI for interacting with the linkml-store."""
     if not stacktrace:
         sys.tracebacklimit = 0
@@ -158,6 +159,9 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
     client = Client().from_config(config, **kwargs) if config else Client()
     settings = ContextSettings(client=client, database_name=database, collection_name=collection)
     ctx.obj["settings"] = settings
+    if schema:
+        db = settings.database
+        db.set_schema_view(schema)
     if settings.database_name:
         db = client.get_database(database)
         if set:
@@ -182,7 +186,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
 @cli.command()
-@click.argument("files", type=click.Path(exists=True), nargs=-1)
+@click.argument("files", type=click.Path(), nargs=-1)
 @click.option("--replace/--no-replace", default=False, show_default=True, help="Replace existing objects")
 @click.option("--format", "-f", type=format_choice, help="Input format")
 @click.option("--object", "-i", multiple=True, help="Input object as YAML")
@@ -534,6 +538,7 @@ def pivot(ctx, where, limit, index, columns, values, output_type, output):
 @click.option(
     "--feature-attributes", "-F", type=click.STRING, help="Feature attributes for inference (comma separated)"
 )
+@click.option("--training-collection", type=click.STRING,help="Collection to use for training")
 @click.option("--inference-config-file", "-Y", type=click.Path(), help="Path to inference configuration file")
 @click.option("--export-model", "-E", type=click.Path(), help="Export model to file")
 @click.option("--load-model", "-L", type=click.Path(), help="Load model from file")
@@ -555,6 +560,7 @@ def infer(
     evaluation_count,
     evaluation_match_function,
     training_test_data_split,
+    training_collection,
     predictor_type,
     target_attribute,
     feature_attributes,
@@ -617,6 +623,7 @@ def infer(
     if model_format:
         model_format = ModelSerialization(model_format)
     if load_model:
+        logger.info(f"Loading predictor from {load_model}")
         predictor = get_inference_engine(predictor_type)
         predictor = type(predictor).load_model(load_model)
     else:
@@ -627,13 +634,18 @@ def infer(
         if training_test_data_split:
             config.train_test_split = training_test_data_split
         predictor = get_inference_engine(predictor_type, config=config)
-        if collection:
-            predictor.load_and_split_data(collection)
+        training_collection_obj = collection
+        if training_collection:
+            training_collection_obj = ctx.obj["settings"].database.get_collection(training_collection)
+        if training_collection_obj:
+            logger.info(f"Using collection: {training_collection_obj.alias} for inference")
+            split = training_test_data_split or (1.0, 0.0)
+            predictor.load_and_split_data(training_collection_obj, split=split)
         predictor.initialize_model()
     if export_model:
         logger.info(f"Exporting model to {export_model} in {model_format}")
         predictor.export_model(export_model, model_format)
-    if not query_obj:
+    if not query_obj and where_clause is None:
         if not export_model and not evaluation_count:
             raise ValueError("Query or evaluate must be specified if not exporting model")
     if evaluation_count:
@@ -651,6 +663,12 @@ def infer(
         result = predictor.derive(query_obj)
         dumped_obj = result.model_dump(exclude_none=True)
         write_output([dumped_obj], output_type, target=output)
+    if where_clause is not None:
+        predicted_objs = []
+        for query_obj in collection.find(where_clause).rows:
+            result = predictor.derive(query_obj)
+            predicted_objs.append(result.predicted_object)
+        write_output(predicted_objs, output_type, target=output)
 @cli.command()

linkml_store/inference/implementations/llm_inference_engine.py ADDED Viewed

@@ -0,0 +1,152 @@
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import ClassVar, List, Optional, TextIO, Union
+import yaml
+from linkml_store.utils.llm_utils import parse_yaml_payload
+from llm import get_key
+from pydantic import BaseModel
+from linkml_store.api.collection import OBJECT, Collection
+from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
+from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
+from linkml_store.utils.object_utils import select_nested
+logger = logging.getLogger(__name__)
+MAX_ITERATIONS = 5
+DEFAULT_NUM_EXAMPLES = 20
+SYSTEM_PROMPT = """
+Your task is to inference the complete YAML
+object output given the YAML object input. I will provide you
+with contextual information, including the schema,
+to help with the inference. You can use the following
+You should return ONLY valid YAML in your response.
+"""
+class TrainedModel(BaseModel, extra="forbid"):
+    index_rows: List[OBJECT]
+    config: Optional[InferenceConfig] = None
+class LLMInference(Inference):
+    iterations: int = 0
+@dataclass
+class LLMInferenceEngine(InferenceEngine):
+    """
+    LLM based predictor.
+    Unlike the RAG predictor this performs few-shot inference
+    """
+    _model: "llm.Model" = None  # noqa: F821
+    PERSIST_COLS: ClassVar[List[str]] = [
+        "config",
+    ]
+    def __post_init__(self):
+        if not self.config:
+            self.config = InferenceConfig()
+        if not self.config.llm_config:
+            self.config.llm_config = LLMConfig()
+    @property
+    def model(self) -> "llm.Model":  # noqa: F821
+        import llm
+        if self._model is None:
+            self._model = llm.get_model(self.config.llm_config.model_name)
+            if self._model.needs_key:
+                key = get_key(None, key_alias=self._model.needs_key)
+                self._model.key = key
+        return self._model
+    def initialize_model(self, **kwargs):
+        logger.info(f"Initializing model {self.model}")
+    def object_to_text(self, object: OBJECT) -> str:
+        return yaml.dump(object)
+    def _schema_str(self) -> str:
+        db = self.training_data.base_collection.parent
+        from linkml_runtime.dumpers import json_dumper
+        schema_dict = json_dumper.to_dict(db.schema_view.schema)
+        return yaml.dump(schema_dict)
+    def derive(self, object: OBJECT, iteration=0, additional_prompt_texts: Optional[List[str]] = None) -> Optional[LLMInference]:
+        import llm
+        model: llm.Model = self.model
+        #model_name = self.config.llm_config.model_name
+        #feature_attributes = self.config.feature_attributes
+        target_attributes = self.config.target_attributes
+        query_text = self.object_to_text(object)
+        if not target_attributes:
+            target_attributes = [k for k, v in object.items() if v is None or v == ""]
+        #if not feature_attributes:
+        #    feature_attributes = [k for k, v in object.items() if v is not None and v != ""]
+        system_prompt = SYSTEM_PROMPT.format(llm_config=self.config.llm_config)
+        system_prompt += "\n## SCHEMA:\n\n" + self._schema_str()
+        stub = ", ".join([f"{k}: ..." for k in target_attributes])
+        stub = "{" + stub + "}"
+        prompt = (
+            "Provide a YAML object of the form"
+            "```yaml\n"
+            f"{stub}\n"
+            "```\n"
+            "---\nQuery:\n" f"## INCOMPLETE OBJECT:\n{query_text}\n" "## OUTPUT:\n"
+        )
+        logger.info(f"Prompt: {prompt}")
+        response = model.prompt(prompt, system=system_prompt)
+        yaml_str = response.text()
+        logger.info(f"Response: {yaml_str}")
+        predicted_object = parse_yaml_payload(yaml_str, strict=True)
+        predicted_object = {**object, **predicted_object}
+        if self.config.validate_results:
+            base_collection = self.training_data.base_collection
+            errs = list(base_collection.iter_validate_collection([predicted_object]))
+            if errs:
+                print(f"{iteration} // FAILED TO VALIDATE: {yaml_str}")
+                print(f"PARSED: {predicted_object}")
+                print(f"ERRORS: {errs}")
+                if iteration > MAX_ITERATIONS:
+                    raise ValueError(f"Validation errors: {errs}")
+                extra_texts = [
+                    "Make sure results conform to the schema. Previously you provided:\n",
+                    yaml_str,
+                    "\nThis was invalid.\n",
+                    "Validation errors:\n",
+                ] + [self.object_to_text(e) for e in errs]
+                return self.derive(object, iteration=iteration+1, additional_prompt_texts=extra_texts)
+        return LLMInference(predicted_object=predicted_object, iterations=iteration+1, query=object)
+    def export_model(
+        self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
+    ):
+        self.save_model(output)
+    def save_model(self, output: Union[str, Path]) -> None:
+        """
+        Save the trained model and related data to a file.
+        :param output: Path to save the model
+        """
+        raise NotImplementedError("Does not make sense for this engine")
+    @classmethod
+    def load_model(cls, file_path: Union[str, Path]) -> "LLMInferenceEngine":
+        raise NotImplementedError("Does not make sense for this engine")

linkml_store/inference/implementations/rag_inference_engine.py CHANGED Viewed

@@ -20,7 +20,7 @@ DEFAULT_NUM_EXAMPLES = 20
 DEFAULT_MMR_RELEVANCE_FACTOR = 0.8
 SYSTEM_PROMPT = """
-You are a {llm_config.role}, your task is to inference the YAML
+You are a {llm_config.role}, your task is to infer the YAML
 object output given the YAML object input. I will provide you
 with a collection of examples that will provide guidance both
 on the desired structure of the response, as well as the kind
@@ -130,23 +130,34 @@ class RAGInferenceEngine(InferenceEngine):
         else:
             if not self.rag_collection.indexers:
                 raise ValueError("RAG collection must have an indexer attached")
+            logger.info(f"Searching {self.rag_collection.alias} for examples for: {query_text}")
             rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm",
                                             mmr_relevance_factor=mmr_relevance_factor)
             examples = rs.rows
+            logger.info(f"Found {len(examples)} examples")
             if not examples:
                 raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
         prompt_clauses = []
-        query_obj = select_nested(object, feature_attributes)
+        this_feature_attributes = feature_attributes
+        if not this_feature_attributes:
+            this_feature_attributes = list(set(object.keys()) - set(target_attributes))
+        query_obj = select_nested(object, this_feature_attributes)
         query_text = self.object_to_text(query_obj)
         for example in examples:
-            input_obj = select_nested(example, feature_attributes)
+            this_feature_attributes = feature_attributes
+            if not this_feature_attributes:
+                this_feature_attributes = list(set(example.keys()) - set(target_attributes))
+            if not this_feature_attributes:
+                raise ValueError(f"No feature attributes found in example {example}")
+            input_obj = select_nested(example, this_feature_attributes)
             input_obj_text = self.object_to_text(input_obj)
             if input_obj_text == query_text:
-                raise ValueError(
-                    f"Query object {query_text} is the same as example object {input_obj_text}\n"
-                    "This indicates possible test data leakage\n."
-                    "TODO: allow an option that allows user to treat this as a basic lookup\n"
-                )
+                continue
+                #raise ValueError(
+                #    f"Query object {query_text} is the same as example object {input_obj_text}\n"
+                #    "This indicates possible test data leakage\n."
+                #    "TODO: allow an option that allows user to treat this as a basic lookup\n"
+                #)
             output_obj = select_nested(example, target_attributes)
             prompt_clause = (
                 "---\nExample:\n" f"## INPUT:\n{input_obj_text}\n" f"## OUTPUT:\n{self.object_to_text(output_obj)}\n"
@@ -169,7 +180,7 @@ class RAGInferenceEngine(InferenceEngine):
                                        encoding=encoding, token_limit=token_limit,
                                        additional_text=system_prompt)
         logger.info(f"Prompt: {prompt}")
-        response = model.prompt(prompt, system_prompt)
+        response = model.prompt(prompt, system=system_prompt)
         yaml_str = response.text()
         logger.info(f"Response: {yaml_str}")
         predicted_object = self._parse_yaml_payload(yaml_str, strict=True)

linkml_store/inference/inference_engine.py CHANGED Viewed

@@ -124,7 +124,7 @@ class InferenceEngine(ABC):
         Load the data and split it into training and testing sets.
         :param collection:
-        :param split:
+        :param split: Tuple of training and testing split ratios.
         :param randomize:
         :return:
         """
@@ -136,7 +136,7 @@ class InferenceEngine(ABC):
             self.training_data = CollectionSlice(name="train", base_collection=collection, indices=None)
             self.testing_data = None
             return
-        logger.info(f"Loading and splitting data from collection {collection.alias}")
+        logger.info(f"Loading and splitting data {split} from collection {collection.alias}")
         size = collection.size()
         indices = range(size)
         if randomize:

linkml_store/utils/format_utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gzip
 import io
 import json
 import logging
+import re
 import sys
 import tarfile
 from enum import Enum
@@ -31,10 +32,13 @@ class Format(Enum):
     TSV = "tsv"
     CSV = "csv"
     XML = "xml"
+    OBO = "obo"
+    PKL = "pkl"
     PYTHON = "python"
     PARQUET = "parquet"
     FORMATTED = "formatted"
     TABLE = "table"
+    XLSX = "xlsx"
     SQLDUMP_DUCKDB = "duckdb"
     SQLDUMP_POSTGRES = "postgres"
     DUMP_MONGODB = "mongodb"
@@ -67,6 +71,9 @@ class Format(Enum):
     def is_dump_format(self):
         return self in [Format.SQLDUMP_DUCKDB, Format.SQLDUMP_POSTGRES, Format.DUMP_MONGODB]
+    def is_binary_format(self):
+        return self in [Format.PARQUET, Format.XLSX]
     def is_xsv(self):
         return self in [Format.TSV, Format.CSV]
@@ -95,6 +102,26 @@ def load_objects_from_url(
     return objs
+def clean_pandas_value(v):
+    """Clean a single value from pandas."""
+    import math
+    if isinstance(v, float):
+        if math.isnan(v) or math.isinf(v):
+            return None
+        return float(v)  # Ensures proper float type
+    return v
+def clean_nested_structure(obj):
+    """Recursively clean a nested structure of dicts/lists from pandas."""
+    if isinstance(obj, dict):
+        return {k: clean_nested_structure(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [clean_nested_structure(item) for item in obj]  # Fixed: using 'item' instead of 'v'
+    else:
+        return clean_pandas_value(obj)
 def process_file(
     f: IO, format: Format, expected_type: Optional[Type] = None, header_comment_token: Optional[str] = None
 ) -> List[Dict[str, Any]]:
@@ -128,6 +155,19 @@ def process_file(
         objs = list(reader)
     elif format == Format.XML:
         objs = xmltodict.parse(f.read())
+    elif format == Format.PKL:
+        objs = pd.read_pickle(f).to_dict(orient="records")
+    elif format == Format.XLSX:
+        xls = pd.ExcelFile(f)
+        objs = {sheet: clean_nested_structure(xls.parse(sheet).to_dict(orient="records")) for sheet in xls.sheet_names}
+    elif format == Format.OBO:
+        blocks = split_document(f.read(), "\n\n")
+        id_pattern = re.compile(r"id: (\S+)")
+        def get_id(block):
+            m = id_pattern.search(block)
+            return m.group(1) if m else None
+        objs = [{"id": get_id(block), "content": block} for block in blocks]
+        objs = [obj for obj in objs if obj["id"]]
     elif format == Format.PARQUET:
         import pyarrow.parquet as pq
@@ -167,6 +207,14 @@ def load_objects(
     if isinstance(file_path, Path):
         file_path = str(file_path)
+    for url_scheme in ["http", "https", "ftp"]:
+        if file_path.startswith(f"{url_scheme}://"):
+            return load_objects_from_url(
+                file_path,
+                format=format,
+                expected_type=expected_type,
+            )
     if isinstance(format, str):
         format = Format(format)
@@ -185,9 +233,9 @@ def load_objects(
     else:
         if Path(file_path).is_dir():
             raise ValueError(f"{file_path} is a dir, which is invalid for {format}")
-        mode = "rb" if format == Format.PARQUET or compression == "gz" else "r"
         open_func = gzip.open if compression == "gz" else open
         format = Format.guess_format(file_path) if not format else format
+        mode = "rb" if (format and format.is_binary_format()) or compression == "gz" else "r"
         with open_func(file_path, mode) if file_path != "-" else sys.stdin as f:
             if compression == "gz" and mode == "r":
                 f = io.TextIOWrapper(f)
@@ -343,3 +391,14 @@ def guess_format(path: str) -> Optional[Format]:
     :return: The guessed format.
     """
     return Format.guess_format(path)
+def split_document(doc: str, delimiter: str):
+    """
+    Split a document into parts based on a delimiter.
+    :param doc: The document to split.
+    :param delimiter: The delimiter.
+    :return: The parts of the document.
+    """
+    return doc.split(delimiter)

linkml_store/utils/llm_utils.py CHANGED Viewed

@@ -100,3 +100,18 @@ def get_token_limit(model_name: str) -> int:
         if model in model_name:
             return token_limit
     return 4096
+def parse_yaml_payload(yaml_str: str, strict=False) -> Optional[dict]:
+    import yaml
+    if "```" in yaml_str:
+        yaml_str = yaml_str.split("```")[1].strip()
+        if yaml_str.startswith("yaml"):
+            yaml_str = yaml_str[4:].strip()
+    try:
+        return yaml.safe_load(yaml_str)
+    except Exception as e:
+        if strict:
+            raise e
+        logger.error(f"Error parsing YAML: {yaml_str}\n{e}")
+        return None

linkml_store/utils/object_utils.py CHANGED Viewed

@@ -124,7 +124,7 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
     Args:
     data (dict): The input nested dictionary.
-    selectors (list): A list of selector strings.
+    paths (list): A list of selector strings.
     Returns:
     dict: A new dictionary with the same structure, but only the selected attributes.
@@ -162,6 +162,8 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
     if current_path is None:
         current_path = []
     matching_paths = []
+    if not paths:
+        raise ValueError("No paths provided")
     for path in paths:
         if isinstance(path, str):
             path = path.split(".")

linkml_store/utils/sql_utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ import sqlalchemy
 import sqlalchemy.sql.sqltypes as sqlt
 from linkml_runtime.linkml_model import SchemaDefinition, SlotDefinition
 from linkml_runtime.utils.schema_builder import SchemaBuilder
-from sqlalchemy import MetaData
+from sqlalchemy import MetaData, quoted_name
 from linkml_store.api.queries import Query
@@ -115,7 +115,13 @@ def facet_count_sql(query: Query, facet_column: Union[str, Tuple[str, ...]], mul
         conditions = [cond for cond in where_clause_sql.split(" AND ") if not cond.startswith(f"{facet_column} ")]
         modified_where = " AND ".join(conditions)
+    def make_col_safe(col):
+        return '"' + quoted_name(col, True) + '"' if ' ' in col else col
+    if isinstance(facet_column, str):
+        facet_column = make_col_safe(facet_column)
     if isinstance(facet_column, tuple):
+        facet_column = [make_col_safe(col) for col in facet_column]
         if multivalued:
             raise NotImplementedError("Multivalued facets are not supported for multiple columns")
         facet_column = ", ".join(facet_column)

linkml_store/utils/vector_utils.py CHANGED Viewed

@@ -34,7 +34,7 @@ def pairwise_cosine_similarity(vector1: np.array, vector2: np.array) -> float:
     dot_product = np.dot(vector1, vector2)
     norm1 = np.linalg.norm(vector1)
     norm2 = np.linalg.norm(vector2)
-    return dot_product / (norm1 * norm2)
+    return float(dot_product / (norm1 * norm2))
 def compute_cosine_similarity_matrix(list1: LOL, list2: LOL) -> np.ndarray:

{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/METADATA RENAMED Viewed

@@ -1,14 +1,13 @@
 Metadata-Version: 2.3
 Name: linkml-store
-Version: 0.2.4
+Version: 0.2.6
 Summary: linkml-store
 License: MIT
 Author: Author 1
 Author-email: author@org.org
-Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
+Requires-Python: >=3.10,<4.0
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
@@ -20,7 +19,6 @@ Provides-Extra: bigquery
 Provides-Extra: fastapi
 Provides-Extra: frictionless
 Provides-Extra: h5py
-Provides-Extra: ibis
 Provides-Extra: llm
 Provides-Extra: map
 Provides-Extra: mongodb
@@ -36,20 +34,18 @@ Requires-Dist: duckdb (>=0.10.1)
 Requires-Dist: duckdb-engine (>=0.11.2)
 Requires-Dist: fastapi ; extra == "fastapi"
 Requires-Dist: frictionless ; extra == "frictionless"
-Requires-Dist: gcsfs ; extra == "ibis"
 Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
 Requires-Dist: h5py ; extra == "h5py"
-Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
 Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
 Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
-Requires-Dist: jsonpatch (>=1.33,<2.0)
+Requires-Dist: jsonpatch (>=1.33)
 Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
 Requires-Dist: linkml-runtime (>=1.8.0)
 Requires-Dist: linkml_map ; extra == "map"
 Requires-Dist: linkml_renderer ; extra == "renderer"
 Requires-Dist: llm ; extra == "llm" or extra == "all"
 Requires-Dist: matplotlib ; extra == "analytics"
-Requires-Dist: multipledispatch ; extra == "ibis"
+Requires-Dist: multipledispatch
 Requires-Dist: neo4j ; extra == "neo4j" or extra == "all"
 Requires-Dist: networkx ; extra == "neo4j"
 Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
@@ -57,8 +53,9 @@ Requires-Dist: plotly ; extra == "analytics"
 Requires-Dist: py2neo ; extra == "neo4j"
 Requires-Dist: pyarrow ; extra == "pyarrow"
 Requires-Dist: pydantic (>=2.0.0,<3.0.0)
-Requires-Dist: pymongo ; extra == "mongodb"
+Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
 Requires-Dist: pystow (>=0.5.4,<0.6.0)
+Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
 Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
 Requires-Dist: scikit-learn ; extra == "scipy"
 Requires-Dist: scipy ; extra == "scipy"
@@ -68,7 +65,7 @@ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
 Requires-Dist: tabulate
 Requires-Dist: tiktoken ; extra == "llm"
 Requires-Dist: uvicorn ; extra == "fastapi"
-Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
+Requires-Dist: xmltodict (>=0.13.0)
 Description-Content-Type: text/markdown
 # linkml-store

{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 linkml_store/__init__.py,sha256=jlU6WOUAn8cKIhzbTULmBTWpW9gZdEt7q_RI6KZN1bY,118
 linkml_store/api/__init__.py,sha256=3CelcFEFz0y3MkQAzhQ9JxHIt1zFk6nYZxSmYTo8YZE,226
-linkml_store/api/client.py,sha256=wFVgl1NUovaKLqNVUEt9dsnoIzjzqFvktJVncAupdE4,12362
-linkml_store/api/collection.py,sha256=CGvWxH7HRhjDt9Cp3SGdMqyhYx7Q4fRKUtAJV74_l0g,39711
+linkml_store/api/client.py,sha256=-XX1H5dIPBCPwU3lgZLtb7JXmUQR_c-FYGSKEW1idr8,12970
+linkml_store/api/collection.py,sha256=Edwyb36D8NJjb-bkDy3O8BJbDVxtSH3yOKI_QW9_cic,41620
 linkml_store/api/config.py,sha256=pOz210JIwkEEXtfjcsZBp1UEedkBu8RkH62Qa1b4exI,5777
-linkml_store/api/database.py,sha256=nvae8jnOZsQIFCsl_lRBnKcvrpJg4A10ujIKGeMyUS8,29350
+linkml_store/api/database.py,sha256=JyQ8SuPrNiltgMH4pdFt4IgGBc9nq3mfRJ5ZUEIDEqA,29696
 linkml_store/api/queries.py,sha256=tx9fgGY5fC_2ZbIvg4BqTK_MXJwA_DI4mxr8HdQ6Vos,2075
 linkml_store/api/stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 linkml_store/api/stores/chromadb/__init__.py,sha256=e9BkOPuPnVQKA5PRKDulag59yGNHDP3U2_DnPSrFAKM,132
 linkml_store/api/stores/chromadb/chromadb_collection.py,sha256=RQUZx5oeotkzNihg-dlSevkiTiKY1d9x0bS63HF80W4,4270
 linkml_store/api/stores/chromadb/chromadb_database.py,sha256=dZA3LQE8-ZMhJQOzsUFyxehnKpFF7adR182aggfkaFY,3205
 linkml_store/api/stores/duckdb/__init__.py,sha256=rbQSDgNg-fdvi6-pHGYkJTST4p1qXUZBf9sFSsO3KPk,387
-linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=Rkbm_uIVIRj5576lEolsyY_3Um1h8Lf3RHn8Fy3LIgU,7036
-linkml_store/api/stores/duckdb/duckdb_database.py,sha256=GH9bcOfHpNp6r-Eu1C3W0xuYcLsqGFDH1Sh4weifGaQ,9923
+linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=1Jc770CR3oipfLj9iJn-dbkgtoEObLbylUQCoUWxuzs,7313
+linkml_store/api/stores/duckdb/duckdb_database.py,sha256=idIe89yqrdMKR69Xpi3cd5LStwe6FRBOm4eJGsHfOV0,10904
 linkml_store/api/stores/duckdb/mappings.py,sha256=tDce3W1Apwammhf4LS6cRJ0m4NiJ0eB7vOI_4U5ETY8,148
 linkml_store/api/stores/filesystem/__init__.py,sha256=KjvCjdttwqMHNeGyL-gr59zRz0--HFEWWUNNCJ5hITs,347
 linkml_store/api/stores/filesystem/filesystem_collection.py,sha256=9gqY2KRZsn_RWk4eKkxFd3_wcxs5YaXvcBI7GGJBMGE,6751
@@ -20,8 +20,8 @@ linkml_store/api/stores/hdf5/__init__.py,sha256=l4cIh3v7P0nPbwGIsfuCMD_serQ8q8c7
 linkml_store/api/stores/hdf5/hdf5_collection.py,sha256=mnpLMYehn3PuaIjp2dXrIWu8jh-bdQ84X2Ku83jMdEY,3805
 linkml_store/api/stores/hdf5/hdf5_database.py,sha256=EZbjrpaqiNDEFvoD5dZNcGBXA8z6HRNL81emueTZWNw,2714
 linkml_store/api/stores/mongodb/__init__.py,sha256=OSFCr7RQlDEe-O-Y0P_i912oAMK-L3pC7Cnj7sxlwAk,510
-linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=unN0v7RYlGIiJxEhbNRxZ86TVQ4ELlAsNWTwEbg7E_g,6831
-linkml_store/api/stores/mongodb/mongodb_database.py,sha256=Y9MIV0KSRGCyopz8vGEivhSuvF0vZLCDJd29cdqMIX8,3857
+linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=hKwaHHFxfWqjBNHZpzVuDVruH_SdXzoIKHdePN3JDEg,10447
+linkml_store/api/stores/mongodb/mongodb_database.py,sha256=HfVEEFCuwZ96KO3eWuSGFajRUgZPmeG-fqsrWHZhJng,4077
 linkml_store/api/stores/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 linkml_store/api/stores/neo4j/neo4j_collection.py,sha256=a-Az5_ypdBMgeNyhrTW7q-ik-vYPCDDONIK7N_CDA9c,17449
 linkml_store/api/stores/neo4j/neo4j_database.py,sha256=zanP_uBZO3AH0wuzbu6auK4zcZon_lMreC2vooSZwt8,5571
@@ -30,7 +30,7 @@ linkml_store/api/stores/solr/solr_collection.py,sha256=ZlxC3JbVaHfSA4HuTeJTsp6qe
 linkml_store/api/stores/solr/solr_database.py,sha256=TFjqbY7jAkdrhAchbNg0E-mChSP7ogNwFExslbvX7Yo,2877
 linkml_store/api/stores/solr/solr_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 linkml_store/api/types.py,sha256=3aIQtDFMvsSmjuN5qrR2vNK5sHa6yzD_rEOPA6tHwvg,176
-linkml_store/cli.py,sha256=bWbWQita8KCBjzovBRzQqHtjbRrf7Ttxq0Fe8zrDuds,30235
+linkml_store/cli.py,sha256=GtbLVMcH6rHEeEMljFGVFd8U5h71WN_ojmIp17UlJxo,31253
 linkml_store/constants.py,sha256=x4ZmDsfE9rZcL5WpA93uTKrRWzCD6GodYXviVzIvR38,112
 linkml_store/graphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 linkml_store/graphs/graph_map.py,sha256=bYRxv8n1YPnFqE9d6JKNmRawb8EAhsPlHhBue0gvtZE,712
@@ -42,29 +42,30 @@ linkml_store/index/indexer.py,sha256=e5dsjh2wjOTDRsfClKJAFTbcK1UC7BOGkUCOfDg9omI
 linkml_store/inference/__init__.py,sha256=b8NAFNZjOYU_8gOvxdyCyoiHOOl5Ai2ckKs1tv7ZkkY,342
 linkml_store/inference/evaluation.py,sha256=YDFYaEu2QLSfFq4oyARrnKfTiPLtNF8irhhspgVDfdY,6013
 linkml_store/inference/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-linkml_store/inference/implementations/rag_inference_engine.py,sha256=mN7YQI-BeZglsAnZnNIuAj-Nxg1su5efNaohooEmNmM,10622
+linkml_store/inference/implementations/llm_inference_engine.py,sha256=iSxiboYpgB0_yL4zlHIJx2ZbvDrJC8JioewTKgLUS0U,5443
+linkml_store/inference/implementations/rag_inference_engine.py,sha256=R3Dz-DyNx7UU3ZaV1n9homxC2nUAT5JZnd4IRkIFftk,11326
 linkml_store/inference/implementations/rule_based_inference_engine.py,sha256=0IEY_fsHJPJy6QKbYQU_qE87RRnPOXQxPuJKXCQG8jU,6250
 linkml_store/inference/implementations/sklearn_inference_engine.py,sha256=Sdi7CoRK3qoLJu3prgLy1Ck_zQ1gHWRKFybHe7XQ4_g,13192
 linkml_store/inference/inference_config.py,sha256=EFGdigxWsfTPREbgqyJVRShN0JktCEmFLLoECrLfXSg,2282
-linkml_store/inference/inference_engine.py,sha256=IxQIOgmXCDI8ilCGtoaVA_1wFROUg4uH1_yGbX78N2U,7139
+linkml_store/inference/inference_engine.py,sha256=7P9syuIwwBpCUytfqZcCR5ei61ys5LIw8YhO0iIehG4,7191
 linkml_store/inference/inference_engine_registry.py,sha256=6o66gvBYBwdeAKm62zqqvfaBlcopVP_cla3L6uXGsHA,3015
 linkml_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 linkml_store/utils/change_utils.py,sha256=O2rvSvgTKB60reLLz9mX5OWykAA_m93bwnUh5ZWa0EY,471
 linkml_store/utils/file_utils.py,sha256=rQ7-XpmI6_Kx_dhEnI98muFRr0MmgI_kZ_9cgJBf_0I,1411
-linkml_store/utils/format_utils.py,sha256=sjpdJJ8Ww2ilm03mQt_v4QkZvQMymqUeTiPS3U1ViKM,11067
+linkml_store/utils/format_utils.py,sha256=hHRFkh3cwb5shM6RO7WWuOXsHHH283M_vZjXRuzbwWI,13035
 linkml_store/utils/io.py,sha256=JHUrWDtlZC2jtN_PQZ4ypdGIyYlftZEN3JaCvEPs44w,884
-linkml_store/utils/llm_utils.py,sha256=0lvR_lBSDSuP-0Eum16QBUsSv8sWfDjZPz_MnDSPvn0,3048
+linkml_store/utils/llm_utils.py,sha256=51AiwMeXm2FpiD-9AywKcbZzlUMqXRAjDFJEp5Ia0LA,3494
 linkml_store/utils/mongodb_utils.py,sha256=Rl1YmMKs1IXwSsJIViSDChbi0Oer5cBnMmjka2TeQS8,4665
 linkml_store/utils/neo4j_utils.py,sha256=y3KPmDZ8mQmePgg0lUeKkeKqzEr2rV226xxEtHc5pRg,1266
-linkml_store/utils/object_utils.py,sha256=Vib-5Ip2DlRVKLZpU-008ZZI813-vfKVSCY0TksRenM,6293
+linkml_store/utils/object_utils.py,sha256=V0s_ZzqAGkFUfrU-9fAPb5g3snMmgKKhR3SiYZgECXI,6353
 linkml_store/utils/pandas_utils.py,sha256=djiFPO3YbgRVo2XAZuKCtgH8QVLuUyPIsfS8e-0umsU,3182
 linkml_store/utils/patch_utils.py,sha256=q-h_v68okyruzdPTEHCe0WubbQHKpi1qy5bJ9vFWDo8,4823
 linkml_store/utils/query_utils.py,sha256=HWt46BsGWoIGiNBTtvpXGY6onPRWsQky6eu_9cYqbvo,3440
 linkml_store/utils/schema_utils.py,sha256=iJiZxo5NGr7v87h4DV6V9DrDOZHSswMRuf0N4V2rVtg,646
 linkml_store/utils/sklearn_utils.py,sha256=itPpcrsbbyOazdjmivaaZ1lyZeytm0a0hJ2AS8ziUgg,7590
-linkml_store/utils/sql_utils.py,sha256=T41w_vsc3SauTJQkDMwid_nOtKW1YOKyUuaxEf470hk,5938
+linkml_store/utils/sql_utils.py,sha256=qatmrJR2u4ICaO7QhDRL1ukxJlLv0zYSGgmmFV-hdnU,6210
 linkml_store/utils/stats_utils.py,sha256=4KqBb1bqDgAmq-1fJLLu5B2paPgoZZc3A-gnyVam4bI,1799
-linkml_store/utils/vector_utils.py,sha256=Q1RlpDzavJAM9-H2m2XNU5BNUcfZkpIWeEZii2hK0PQ,5449
+linkml_store/utils/vector_utils.py,sha256=QcLTUQWm5z1OTtiOl0mXKJyFJcQeCtbcc-GQwHhkUYw,5456
 linkml_store/webapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 linkml_store/webapi/html/__init__.py,sha256=hwp5eeBJKH65Bvv1x9Z4vsT1tLSYtb9Dq4I9r1kL1q0,69
 linkml_store/webapi/html/base.html.j2,sha256=hoiV2uaSxxrQp7VuAZBOHueH7czyJMYcPBRN6dZFYhk,693
@@ -73,8 +74,8 @@ linkml_store/webapi/html/database_details.html.j2,sha256=qtXdavbZb0mohiObI9dvJtk
 linkml_store/webapi/html/databases.html.j2,sha256=a9BCWQYfPeFhdUd31CWhB0yWhTIFXQayO08JgjyqKoc,294
 linkml_store/webapi/html/generic.html.j2,sha256=KtLaO2HUEF2Opq-OwHKgRKetNWe8IWc6JuIkxRPsywk,1018
 linkml_store/webapi/main.py,sha256=B0Da575kKR7X88N9ykm99Dem8FyBAW9f-w3A_JwUzfw,29165
-linkml_store-0.2.4.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
-linkml_store-0.2.4.dist-info/METADATA,sha256=PJX-_TSPk6WDXDCmvuFDUb5649ECQc2N6zP4pWqhBvU,7204
-linkml_store-0.2.4.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
-linkml_store-0.2.4.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
-linkml_store-0.2.4.dist-info/RECORD,,
+linkml_store-0.2.6.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
+linkml_store-0.2.6.dist-info/METADATA,sha256=s5x6OmbGC7oVUpXunjiM42sASvsvKR8XRoJllGqF6ww,6964
+linkml_store-0.2.6.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+linkml_store-0.2.6.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
+linkml_store-0.2.6.dist-info/RECORD,,

{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.0.1
+Generator: poetry-core 2.1.1
 Root-Is-Purelib: true
 Tag: py3-none-any

{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/LICENSE RENAMED Viewed

File without changes

{linkml_store-0.2.4.dist-info → linkml_store-0.2.6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

linkml-store 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

Potentially problematic release.

linkml-store 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl