PyPI - linkml-store - Versions diffs - 0.2.5__tar.gz → 0.2.6__tar.gz - Mend

linkml-store 0.2.5tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (80) hide show

{linkml_store-0.2.5 → linkml_store-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: linkml-store
-Version: 0.2.5
+Version: 0.2.6
 Summary: linkml-store
 License: MIT
 Author: Author 1
@@ -34,7 +34,6 @@ Requires-Dist: duckdb (>=0.10.1)
 Requires-Dist: duckdb-engine (>=0.11.2)
 Requires-Dist: fastapi ; extra == "fastapi"
 Requires-Dist: frictionless ; extra == "frictionless"
-Requires-Dist: gcsfs
 Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
 Requires-Dist: h5py ; extra == "h5py"
 Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
@@ -54,7 +53,7 @@ Requires-Dist: plotly ; extra == "analytics"
 Requires-Dist: py2neo ; extra == "neo4j"
 Requires-Dist: pyarrow ; extra == "pyarrow"
 Requires-Dist: pydantic (>=2.0.0,<3.0.0)
-Requires-Dist: pymongo ; extra == "mongodb"
+Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
 Requires-Dist: pystow (>=0.5.4,<0.6.0)
 Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
 Requires-Dist: ruff (>=0.6.2) ; extra == "tests"

{linkml_store-0.2.5 → linkml_store-0.2.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "linkml-store"
-version = "0.2.5"
+version = "0.2.6"
 description = "linkml-store"
 authors = ["Author 1 <author@org.org>"]
 license = "MIT"
@@ -24,7 +24,7 @@ black = { version=">=24.0.0", optional = true }
 ruff = { version=">=0.6.2", optional = true }
 llm = { version="*", optional = true }
 tiktoken = { version="*", optional = true }
-pymongo = { version="*", optional = true }
+pymongo = "^4.11"
 neo4j = { version="*", optional = true }
 py2neo = { version="*", optional = true }
 networkx = { version="*", optional = true }

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/api/client.py RENAMED Viewed

@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
 HANDLE_MAP = {
     "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
+    "sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
     "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
     "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
     "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
@@ -24,6 +25,8 @@ HANDLE_MAP = {
 SUFFIX_MAP = {
     "ddb": "duckdb:///{path}",
+    "duckdb": "duckdb:///{path}",
+    "db": "duckdb:///{path}",
 }
@@ -204,9 +207,10 @@ class Client:
         if ":" not in handle:
             if alias is None:
                 alias = handle
-            suffix = handle.split(".")[-1]
-            if suffix in SUFFIX_MAP:
-                handle = SUFFIX_MAP[suffix].format(path=handle)
+            if "." in handle:
+                suffix = handle.split(".")[-1]
+                if suffix in SUFFIX_MAP:
+                    handle = SUFFIX_MAP[suffix].format(path=handle)
         if ":" not in handle:
             scheme = handle
             handle = None

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/api/collection.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """A structure for representing collections of similar objects."""
 import hashlib
+import json
 import logging
 from collections import defaultdict
 from pathlib import Path
@@ -210,8 +211,59 @@ class Collection(Generic[DatabaseType]):
         """
         raise NotImplementedError
+    def index (
+        self,
+        objs: Union[OBJECT, List[OBJECT]],
+        index_name: Optional[str] = None,
+        replace: bool = False,
+        unique: bool = False,
+        **kwargs,
+    ) -> None:
+        """
+        Index objects in the collection.
+        :param objs:
+        :param index_name:
+        :param replace: replace the index, or not
+        :param unique: boolean used to declare the index unique or not
+        :param kwargs:
+        :return:
+        """
+        raise NotImplementedError
+    def upsert(self,
+               objs: Union[OBJECT, List[OBJECT]],
+               filter_fields: List[str],
+               update_fields: Union[List[str], None] = None, **kwargs):
+        """
+        Add one or more objects to the collection.
+        >>> from linkml_store import Client
+        >>> client = Client()
+        >>> db = client.attach_database("mongodb", alias="test")
+        >>> collection = db.create_collection("Person")
+        >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
+        >>> collection.upsert(objs)
+        :param objs:
+        :param filter_fields: List of field names to use as the filter for matching existing collections.
+        :param update_fields: List of field names to include in the update. If None, all fields are updated.
+        :param kwargs:
+        :return:
+        """
+        raise NotImplementedError
     def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
-        logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
+        """
+        Pre-query hook.
+        This is called before a query is executed. It is used to materialize derivations and indexes.
+        :param query:
+        :param kwargs:
+        :return:
+        """
+        logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}")  # if logging.info, this is very noisy.
         if not self._initialized:
             self._materialize_derivations()
             self._initialized = True
@@ -536,7 +588,13 @@ class Collection(Generic[DatabaseType]):
         qr = ix_coll.find(where=where, limit=-1, **kwargs)
         index_col = ix.index_field
         # TODO: optimize this for large indexes
-        vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
+        def row2array(row):
+            v = row[index_col]
+            if isinstance(v, str):
+                # sqlite stores arrays as strings
+                v = json.loads(v)
+            return np.array(v, dtype=float)
+        vector_pairs = [(row, row2array(row)) for row in qr.rows]
         results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
         for r in results:
             del r[1][index_col]

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/api/database.py RENAMED Viewed

@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
         Examples:
-        >>> from linkml_store.api.client import Client
-        >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
-        >>> collection = db.create_collection("Person", alias="persons")
-        >>> collection.alias
-        'persons'
-        >>> collection.target_class_name
-        'Person'
+            >>> from linkml_store.api.client import Client
+            >>> client = Client()
+            >>> db = client.attach_database("duckdb", alias="test")
+            >>> collection = db.create_collection("Person", alias="persons")
+            >>> collection.alias
+            'persons'
+            >>> collection.target_class_name
+            'Person'
         If alias is not provided, it defaults to the name of the type.
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
         >>> from linkml_store.api.client import Client
         >>> from linkml_store.api.queries import Query
         >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
+        >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
         >>> collection = db.create_collection("Person")
         >>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
         >>> query = Query(from_table="Person", where_clause={"name": "John"})
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
         >>> from linkml_store.api.client import Client
         >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
+        >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
         >>> collection = db.create_collection("Person", alias="persons")
         >>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
         >>> schema_view = db.schema_view
@@ -721,7 +722,7 @@ class Database(ABC, Generic[CollectionType]):
         >>> from linkml_store.api.client import Client
         >>> client = Client()
-        >>> db = client.attach_database("duckdb", alias="test")
+        >>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
         >>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
         >>> db.list_collection_names()
         ['iris']
@@ -741,7 +742,9 @@ class Database(ABC, Generic[CollectionType]):
                 # import into a test instance
                 tmp_handle = source_format.value
                 client = self.parent
-                tmp_db = client.attach_database(tmp_handle, alias="tmp")
+                tmp_alias = "tmp"
+                client.drop_database(tmp_alias, missing_ok=True)
+                tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
                 # TODO: check for infinite recursion
                 tmp_db.import_database(location, source_format=source_format)
                 obj = {}

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_collection.py RENAMED Viewed

@@ -147,16 +147,22 @@ class DuckDBCollection(Collection):
         if self._table_created or self.metadata.is_prepopulated:
             logger.info(f"Already have table for: {cd.name}")
             return
-        query = Query(
-            from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
-        )
-        qr = self.parent.query(query)
-        if qr.num_rows > 0:
+        if self.parent._table_exists(self.alias):
             logger.info(f"Table already exists for {cd.name}")
             self._table_created = True
             self._initialized = True
             self.metadata.is_prepopulated = True
             return
+        # query = Query(
+        #     from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
+        # )
+        # qr = self.parent.query(query)
+        # if qr.num_rows > 0:
+        #     logger.info(f"Table already exists for {cd.name}")
+        #     self._table_created = True
+        #     self._initialized = True
+        #     self.metadata.is_prepopulated = True
+        #     return
         logger.info(f"Creating table for {cd.name}")
         t = self._sqla_table(cd)
         ct = CreateTable(t)

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_database.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Optional, Union
+from typing import Optional, Union, List
 import pandas as pd
 import sqlalchemy
@@ -14,7 +14,7 @@ from linkml_store.api import Database
 from linkml_store.api.queries import Query, QueryResult
 from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
 from linkml_store.utils.format_utils import Format
-from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
+from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
 TYPE_MAP = {
     "VARCHAR": "string",
@@ -62,7 +62,7 @@ class DuckDBDatabase(Database):
     def engine(self) -> sqlalchemy.Engine:
         if not self._engine:
             handle = self.handle
-            if not handle.startswith("duckdb://") and not handle.startswith(":"):
+            if not handle.startswith("duckdb://") and not handle.startswith(":") and "://" not in handle:
                 handle = f"duckdb:///{handle}"
             if ":memory:" not in handle:
                 # TODO: investigate this; duckdb appears to be prematurely caching
@@ -71,6 +71,10 @@ class DuckDBDatabase(Database):
                 self._engine = sqlalchemy.create_engine(handle)
         return self._engine
+    @property
+    def _is_sqlite(self) -> bool:
+        return self.handle and self.handle.startswith("sqlite:")
     def commit(self, **kwargs):
         with self.engine.connect() as conn:
             conn.commit()
@@ -89,34 +93,60 @@ class DuckDBDatabase(Database):
             if not missing_ok:
                 raise FileNotFoundError(f"Database file not found: {path}")
-    def query(self, query: Query, **kwargs) -> QueryResult:
+    def _table_exists(self, table: str) -> bool:
+        if self._is_sqlite:
+            if table == "sqlite_master":
+                return True
+            meta_query = Query(
+                from_table="sqlite_master",
+                where_clause={
+                    #"type": "table",
+                    "name": table,
+                }
+            )
+        else:
+            if table.startswith("information_schema"):
+                return True
+            meta_query = Query(
+                from_table="information_schema.tables",
+                where_clause={
+                    "table_type": "BASE TABLE",
+                    "table_name": table,
+                }
+            )
+        qr = self.query(meta_query)
+        if qr.num_rows == 0:
+            logger.debug(f"Table {self.alias} not created yet")
+            return False
+        return True
+    def _json_encoded_cols(self, table_name: str) -> Optional[List[str]]:
         json_encoded_cols = []
-        if query.from_table:
-            if not query.from_table.startswith("information_schema"):
-                meta_query = Query(
-                    from_table="information_schema.tables", where_clause={"table_name": query.from_table}
-                )
-                qr = self.query(meta_query)
-                if qr.num_rows == 0:
-                    logger.debug(f"Table {query.from_table} not created yet")
-                    return QueryResult(query=query, num_rows=0, rows=[])
-            if not query.from_table.startswith("information_schema"):
-                sv = self.schema_view
-            else:
-                sv = None
+        if table_name:
+            if table_name.startswith("information_schema") or table_name.startswith("sqlite"):
+                return []
+            sv = self.schema_view
             if sv:
                 cd = None
                 for c in self._collections.values():
-                    # if c.name == query.from_table or c.metadata.alias == query.from_table:
-                    if c.alias == query.from_table or c.target_class_name == query.from_table:
+                    if c.alias == table_name or c.target_class_name == table_name:
                         cd = c.class_definition()
                         break
                 if cd:
                     for att in sv.class_induced_slots(cd.name):
                         if att.inlined or att.inlined_as_list:
                             json_encoded_cols.append(att.name)
+        return json_encoded_cols
+    def query(self, query: Query, **kwargs) -> QueryResult:
+        if not self._table_exists(query.from_table):
+            return QueryResult(query=query, num_rows=0, rows=[])
+        json_encoded_cols = self._json_encoded_cols(query.from_table)
         with self.engine.connect() as conn:
             count_query_str = text(query_to_sql(query, count=True))
+            logger.debug(f"count_query_str: {count_query_str}")
             num_rows = list(conn.execute(count_query_str))[0][0]
             logger.debug(f"num_rows: {num_rows}")
             query_str = query_to_sql(query, **kwargs)  # include offset, limit
@@ -167,6 +197,9 @@ class DuckDBDatabase(Database):
         logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
         sb = SchemaBuilder()
         schema = sb.schema
+        logger.info(f"Checking if {self.metadata.handle} is sqlite: {self._is_sqlite}")
+        if self._is_sqlite:
+            return SchemaView(schema)
         query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
         qr = self.query(query)
         logger.info(f"Found {qr.num_rows} information_schema.tables // {qr.rows}")

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_collection.py RENAMED Viewed

@@ -41,6 +41,89 @@ class MongoDBCollection(Collection):
             del obj["_id"]
         self._post_insert_hook(objs)
+    def index(self,
+              objs: Union[OBJECT, List[OBJECT]],
+              index_name: Optional[str] = None,
+              replace: bool = False,
+              unique: bool = False,
+              **kwargs):
+        """
+        Create indexes on the collection.
+        :param objs: Field(s) to index.
+        :param index_name: Optional name for the index.
+        :param replace: If True, the index will be dropped and recreated.
+        :param unique: If True, creates a unique index (default: False).
+        """
+        if not isinstance(objs, list):
+            objs = [objs]
+        existing_indexes = self.mongo_collection.index_information()
+        for obj in objs:
+            field_exists = False
+            index_to_drop = None
+            # Extract existing index details
+            for index_name_existing, index_details in existing_indexes.items():
+                indexed_fields = [field[0] for field in index_details.get("key", [])]  # Extract field names
+                if obj in indexed_fields:  # If this field is already indexed
+                    field_exists = True
+                    index_to_drop = index_name_existing if replace else None
+            # Drop the index if replace=True and index_to_drop is valid
+            if index_to_drop:
+                self.mongo_collection.drop_index(index_to_drop)
+                logging.debug(f"Dropped existing index: {index_to_drop}")
+            # Create the new index only if it doesn't exist or was dropped
+            if not field_exists or replace:
+                self.mongo_collection.create_index(obj, name=index_name, unique=unique)
+                logging.debug(f"Created new index: {index_name} on field {obj}, unique={unique}")
+            else:
+                logging.debug(f"Index already exists for field {obj}, skipping creation.")
+    def upsert(self,
+               objs: Union[OBJECT, List[OBJECT]],
+               filter_fields: List[str],
+               update_fields: Optional[List[str]] = None,
+               **kwargs):
+        """
+        Upsert one or more documents into the MongoDB collection.
+        :param objs: The document(s) to insert or update.
+        :param filter_fields: List of field names to use as the filter for matching existing documents.
+        :param update_fields: List of field names to include in the update. If None, all fields are updated.
+        """
+        if not isinstance(objs, list):
+            objs = [objs]
+        for obj in objs:
+            # Ensure filter fields exist in the object
+            filter_criteria = {field: obj[field] for field in filter_fields if field in obj}
+            if not filter_criteria:
+                raise ValueError("At least one valid filter field must be present in each object.")
+            # Check if a document already exists
+            existing_doc = self.mongo_collection.find_one(filter_criteria)
+            if existing_doc:
+                # Update only changed fields
+                updates = {key: obj[key] for key in update_fields if key in obj and obj[key] != existing_doc.get(key)}
+                if updates:
+                    self.mongo_collection.update_one(filter_criteria, {"$set": updates})
+                    logging.debug(f"Updated existing document: {filter_criteria} with {updates}")
+                else:
+                    logging.debug(f"No changes detected for document: {filter_criteria}. Skipping update.")
+            else:
+                # Insert a new document
+                self.mongo_collection.insert_one(obj)
+                logging.debug(f"Inserted new document: {obj}")
     def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
         mongo_filter = self._build_mongo_filter(query.where_clause)
         limit = limit or query.limit

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_database.py RENAMED Viewed

@@ -3,6 +3,7 @@
 import logging
 from pathlib import Path
 from typing import Optional, Union
+from urllib.parse import urlparse
 from pymongo import MongoClient
 from pymongo.database import Database as NativeDatabase
@@ -38,10 +39,13 @@ class MongoDBDatabase(Database):
     @property
     def _db_name(self) -> str:
         if self.handle:
-            db = self.handle.split("/")[-1]
+            parsed_url = urlparse(self.handle)
+            path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
+            print(path_parts)
+            db_name = path_parts[0] if path_parts else "default"
         else:
-            db = "default"
-        return db
+            db_name = "default"
+        return db_name
     @property
     def native_client(self) -> MongoClient:

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/cli.py RENAMED Viewed

@@ -186,7 +186,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
 @cli.command()
-@click.argument("files", type=click.Path(exists=True), nargs=-1)
+@click.argument("files", type=click.Path(), nargs=-1)
 @click.option("--replace/--no-replace", default=False, show_default=True, help="Replace existing objects")
 @click.option("--format", "-f", type=format_choice, help="Input format")
 @click.option("--object", "-i", multiple=True, help="Input object as YAML")

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/utils/format_utils.py RENAMED Viewed

@@ -3,6 +3,7 @@ import gzip
 import io
 import json
 import logging
+import re
 import sys
 import tarfile
 from enum import Enum
@@ -31,10 +32,13 @@ class Format(Enum):
     TSV = "tsv"
     CSV = "csv"
     XML = "xml"
+    OBO = "obo"
+    PKL = "pkl"
     PYTHON = "python"
     PARQUET = "parquet"
     FORMATTED = "formatted"
     TABLE = "table"
+    XLSX = "xlsx"
     SQLDUMP_DUCKDB = "duckdb"
     SQLDUMP_POSTGRES = "postgres"
     DUMP_MONGODB = "mongodb"
@@ -67,6 +71,9 @@ class Format(Enum):
     def is_dump_format(self):
         return self in [Format.SQLDUMP_DUCKDB, Format.SQLDUMP_POSTGRES, Format.DUMP_MONGODB]
+    def is_binary_format(self):
+        return self in [Format.PARQUET, Format.XLSX]
     def is_xsv(self):
         return self in [Format.TSV, Format.CSV]
@@ -95,6 +102,26 @@ def load_objects_from_url(
     return objs
+def clean_pandas_value(v):
+    """Clean a single value from pandas."""
+    import math
+    if isinstance(v, float):
+        if math.isnan(v) or math.isinf(v):
+            return None
+        return float(v)  # Ensures proper float type
+    return v
+def clean_nested_structure(obj):
+    """Recursively clean a nested structure of dicts/lists from pandas."""
+    if isinstance(obj, dict):
+        return {k: clean_nested_structure(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [clean_nested_structure(item) for item in obj]  # Fixed: using 'item' instead of 'v'
+    else:
+        return clean_pandas_value(obj)
 def process_file(
     f: IO, format: Format, expected_type: Optional[Type] = None, header_comment_token: Optional[str] = None
 ) -> List[Dict[str, Any]]:
@@ -128,6 +155,19 @@ def process_file(
         objs = list(reader)
     elif format == Format.XML:
         objs = xmltodict.parse(f.read())
+    elif format == Format.PKL:
+        objs = pd.read_pickle(f).to_dict(orient="records")
+    elif format == Format.XLSX:
+        xls = pd.ExcelFile(f)
+        objs = {sheet: clean_nested_structure(xls.parse(sheet).to_dict(orient="records")) for sheet in xls.sheet_names}
+    elif format == Format.OBO:
+        blocks = split_document(f.read(), "\n\n")
+        id_pattern = re.compile(r"id: (\S+)")
+        def get_id(block):
+            m = id_pattern.search(block)
+            return m.group(1) if m else None
+        objs = [{"id": get_id(block), "content": block} for block in blocks]
+        objs = [obj for obj in objs if obj["id"]]
     elif format == Format.PARQUET:
         import pyarrow.parquet as pq
@@ -167,6 +207,14 @@ def load_objects(
     if isinstance(file_path, Path):
         file_path = str(file_path)
+    for url_scheme in ["http", "https", "ftp"]:
+        if file_path.startswith(f"{url_scheme}://"):
+            return load_objects_from_url(
+                file_path,
+                format=format,
+                expected_type=expected_type,
+            )
     if isinstance(format, str):
         format = Format(format)
@@ -185,9 +233,9 @@ def load_objects(
     else:
         if Path(file_path).is_dir():
             raise ValueError(f"{file_path} is a dir, which is invalid for {format}")
-        mode = "rb" if format == Format.PARQUET or compression == "gz" else "r"
         open_func = gzip.open if compression == "gz" else open
         format = Format.guess_format(file_path) if not format else format
+        mode = "rb" if (format and format.is_binary_format()) or compression == "gz" else "r"
         with open_func(file_path, mode) if file_path != "-" else sys.stdin as f:
             if compression == "gz" and mode == "r":
                 f = io.TextIOWrapper(f)
@@ -343,3 +391,14 @@ def guess_format(path: str) -> Optional[Format]:
     :return: The guessed format.
     """
     return Format.guess_format(path)
+def split_document(doc: str, delimiter: str):
+    """
+    Split a document into parts based on a delimiter.
+    :param doc: The document to split.
+    :param delimiter: The delimiter.
+    :return: The parts of the document.
+    """
+    return doc.split(delimiter)

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/utils/sql_utils.py RENAMED Viewed

@@ -5,7 +5,7 @@ import sqlalchemy
 import sqlalchemy.sql.sqltypes as sqlt
 from linkml_runtime.linkml_model import SchemaDefinition, SlotDefinition
 from linkml_runtime.utils.schema_builder import SchemaBuilder
-from sqlalchemy import MetaData
+from sqlalchemy import MetaData, quoted_name
 from linkml_store.api.queries import Query
@@ -115,7 +115,13 @@ def facet_count_sql(query: Query, facet_column: Union[str, Tuple[str, ...]], mul
         conditions = [cond for cond in where_clause_sql.split(" AND ") if not cond.startswith(f"{facet_column} ")]
         modified_where = " AND ".join(conditions)
+    def make_col_safe(col):
+        return '"' + quoted_name(col, True) + '"' if ' ' in col else col
+    if isinstance(facet_column, str):
+        facet_column = make_col_safe(facet_column)
     if isinstance(facet_column, tuple):
+        facet_column = [make_col_safe(col) for col in facet_column]
         if multivalued:
             raise NotImplementedError("Multivalued facets are not supported for multiple columns")
         facet_column = ", ".join(facet_column)

{linkml_store-0.2.5 → linkml_store-0.2.6}/src/linkml_store/utils/vector_utils.py RENAMED Viewed

@@ -34,7 +34,7 @@ def pairwise_cosine_similarity(vector1: np.array, vector2: np.array) -> float:
     dot_product = np.dot(vector1, vector2)
     norm1 = np.linalg.norm(vector1)
     norm2 = np.linalg.norm(vector2)
-    return dot_product / (norm1 * norm2)
+    return float(dot_product / (norm1 * norm2))
 def compute_cosine_similarity_matrix(list1: LOL, list2: LOL) -> np.ndarray: