PyPI - linkml-store - Versions diffs - 0.2.9__tar.gz → 0.2.10rc1__tar.gz - Mend

linkml-store 0.2.9tar.gz → 0.2.10rc1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (86) hide show

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: linkml-store
-Version: 0.2.9
+Version: 0.2.10rc1
 Summary: linkml-store
 License: MIT
 Author: Author 1

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "linkml-store"
-version = "0.2.9"
+version = "0.2.10rc1"
 description = "linkml-store"
 authors = ["Author 1 <author@org.org>"]
 license = "MIT"
@@ -67,6 +67,10 @@ jupyter = "*"
 jupysql = "*"
 papermill = "*"
 nbdime = "*"
+codespell = {version = ">=2.3.0"}
+tomli = {version = ">=2.0.1"}
+black = {version = ">=24.0.0"}
+ruff = {version = ">=0.6.2"}
 [tool.poetry.group.tests.dependencies]
 pytest = "^7.4.0"

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/collection.py RENAMED Viewed

@@ -641,11 +641,11 @@ class Collection(Generic[DatabaseType]):
         if isinstance(group_by_fields, str):
             group_by_fields = [group_by_fields]
         df = self.find(where=where, limit=-1).rows_dataframe
         # Handle the case where agg_map is None
         if agg_map is None:
             agg_map = {}
         pk_fields = agg_map.get("first", []) + group_by_fields
         list_fields = agg_map.get("list", [])
         if not list_fields:

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/database.py RENAMED Viewed

@@ -606,24 +606,13 @@ class Database(ABC, Generic[CollectionType]):
         """
         return list(self.iter_validate_database(**kwargs))
-    def validate_database(self, **kwargs) -> List["ValidationResult"]:
-        """
-        Validate the contents of the database.
-        As `iter_validate_database`, but returns a list of validation results.
-        :param kwargs:
-        :return:
-        """
-        return list(self.iter_validate_database(**kwargs))
     def iter_validate_database(
         self, ensure_referential_integrity: bool = None, **kwargs
     ) -> Iterator["ValidationResult"]:
         """
         Validate the contents of the database.
-        An an example, let's create a database with a predefined schema
+        An example, let's create a database with a predefined schema
         from the countries.linkml.yaml file:
         >>> from linkml_store.api.client import Client

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/duckdb_collection.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Dict, List, Optional, Union, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
 import sqlalchemy as sqla
 from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
@@ -155,9 +155,9 @@ class DuckDBCollection(Collection):
     ) -> QueryResult:
         """
         Group objects in the collection by specified fields using SQLAlchemy.
         This implementation leverages DuckDB's SQL capabilities for more efficient grouping.
         :param group_by_fields: List of fields to group by
         :param inlined_field: Field name to store aggregated objects
         :param agg_map: Dictionary mapping aggregation types to fields
@@ -167,31 +167,32 @@ class DuckDBCollection(Collection):
         """
         if isinstance(group_by_fields, str):
             group_by_fields = [group_by_fields]
         cd = self.class_definition()
         if not cd:
             logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}")
             return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
         # Check if the table exists
         if not self.parent._table_exists(self.alias):
             logger.debug(f"Table {self.alias} doesn't exist, falling back to parent implementation")
             return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
         # Get table definition
         table = self._sqla_table(cd)
         engine = self.parent.engine
         # Create a SQLAlchemy select statement for groups
-        from sqlalchemy import select, func, and_, or_
+        from sqlalchemy import select
         group_cols = [table.c[field] for field in group_by_fields if field in table.columns.keys()]
         if not group_cols:
             logger.warning(f"None of the group_by fields {group_by_fields} found in table columns")
             return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
         stmt = select(*group_cols).distinct()
         # Add where conditions if specified
         if where:
             conditions = []
@@ -219,24 +220,24 @@ class DuckDBCollection(Collection):
                     else:
                         # Direct equality comparison
                         conditions.append(table.c[k] == v)
             if conditions:
                 for condition in conditions:
                     stmt = stmt.where(condition)
         results = []
         try:
             with engine.connect() as conn:
                 # Get all distinct groups
                 group_result = conn.execute(stmt)
                 group_rows = list(group_result)
                 # For each group, get all objects
                 for group_row in group_rows:
                     # Build conditions for this group
                     group_conditions = []
                     group_dict = {}
                     for i, field in enumerate(group_by_fields):
                         if field in table.columns.keys():
                             value = group_row[i]
@@ -245,12 +246,12 @@ class DuckDBCollection(Collection):
                                 group_conditions.append(table.c[field].is_(None))
                             else:
                                 group_conditions.append(table.c[field] == value)
                     # Get all rows for this group
                     row_stmt = select(*table.columns)
                     for condition in group_conditions:
                         row_stmt = row_stmt.where(condition)
                     # Add original where conditions
                     if where:
                         for k, v in where.items():
@@ -276,10 +277,10 @@ class DuckDBCollection(Collection):
                                 else:
                                     # Direct equality comparison
                                     row_stmt = row_stmt.where(table.c[k] == v)
                     row_result = conn.execute(row_stmt)
                     rows = list(row_result)
                     # Convert rows to dictionaries
                     objects = []
                     for row in rows:
@@ -287,18 +288,18 @@ class DuckDBCollection(Collection):
                         for i, col in enumerate(row._fields):
                             obj[col] = row[i]
                         objects.append(obj)
                     # Apply agg_map to filter fields if specified
                     if agg_map and "list" in agg_map:
                         list_fields = agg_map["list"]
                         if list_fields:
                             objects = [{k: obj.get(k) for k in list_fields if k in obj} for obj in objects]
                     # Create the result object
                     result_obj = group_dict.copy()
                     result_obj[inlined_field] = objects
                     results.append(result_obj)
                 return QueryResult(num_rows=len(results), rows=results)
         except Exception as e:
             logger.warning(f"Error in DuckDB group_by: {e}")
@@ -316,7 +317,8 @@ class DuckDBCollection(Collection):
             self.metadata.is_prepopulated = True
             return
         # query = Query(
-        #     from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
+        #     from_table="information_schema.tables",
+        #     where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
         # )
         # qr = self.parent.query(query)
         # if qr.num_rows > 0:

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/duckdb_database.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Optional, Union, List
+from typing import List, Optional, Union
 import pandas as pd
 import sqlalchemy
@@ -14,7 +14,7 @@ from linkml_store.api import Database
 from linkml_store.api.queries import Query, QueryResult
 from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
 from linkml_store.utils.format_utils import Format
-from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
+from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
 TYPE_MAP = {
     "VARCHAR": "string",

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/mongodb/mongodb_collection.py RENAMED Viewed

@@ -7,6 +7,7 @@ from pymongo.collection import Collection as MongoCollection
 from linkml_store.api import Collection
 from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
 from linkml_store.api.queries import Query, QueryResult
+from linkml_store.utils.object_utils import object_path_get
 logger = logging.getLogger(__name__)
@@ -130,7 +131,15 @@ class MongoDBCollection(Collection):
     def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
         mongo_filter = self._build_mongo_filter(query.where_clause)
         limit = limit or query.limit
-        cursor = self.mongo_collection.find(mongo_filter)
+        # Build projection if select_cols are provided
+        projection = None
+        if query.select_cols:
+            projection = {"_id": 0}
+            for col in query.select_cols:
+                projection[col] = 1
+        cursor = self.mongo_collection.find(mongo_filter, projection)
         if limit and limit >= 0:
             cursor = cursor.limit(limit)
         offset = offset or query.offset
@@ -141,9 +150,19 @@ class MongoDBCollection(Collection):
         def _as_row(row: dict):
             row = copy(row)
-            del row["_id"]
+            if "_id" in row:
+                del row["_id"]
             if select_cols:
-                row = {k: row[k] for k in select_cols if k in row}
+                # For nested fields, ensure we handle them properly
+                result = {}
+                for col in select_cols:
+                    # If it's a nested field (contains dots)
+                    if "." in col or "[" in col:
+                        result[col]  = object_path_get(row, col)
+                    elif col in row:
+                        result[col] = row[col]
+                return result
             return row
         rows = [_as_row(row) for row in cursor]
@@ -265,7 +284,7 @@ class MongoDBCollection(Collection):
         if deleted_rows_count == 0 and not missing_ok:
             raise ValueError(f"No rows found for {where}")
         return deleted_rows_count
     def group_by(
         self,
         group_by_fields: List[str],
@@ -276,9 +295,9 @@ class MongoDBCollection(Collection):
     ) -> QueryResult:
         """
         Group objects in the collection by specified fields using MongoDB's aggregation pipeline.
         This implementation leverages MongoDB's native aggregation capabilities for efficient grouping.
         :param group_by_fields: List of fields to group by
         :param inlined_field: Field name to store aggregated objects
         :param agg_map: Dictionary mapping aggregation types to fields
@@ -288,7 +307,7 @@ class MongoDBCollection(Collection):
         """
         if isinstance(group_by_fields, str):
             group_by_fields = [group_by_fields]
         # Build the group key for MongoDB
         if len(group_by_fields) == 1:
             # Single field grouping
@@ -296,34 +315,29 @@ class MongoDBCollection(Collection):
         else:
             # Multi-field grouping
             group_id = {field: f"${field}" for field in group_by_fields}
         # Start building the pipeline
         pipeline = []
         # Add match stage if where clause is provided
         if where:
             pipeline.append({"$match": where})
         # Add the group stage
-        group_stage = {
-            "$group": {
-                "_id": group_id,
-                "objects": {"$push": "$$ROOT"}
-            }
-        }
+        group_stage = {"$group": {"_id": group_id, "objects": {"$push": "$$ROOT"}}}
         pipeline.append(group_stage)
         # Execute the aggregation
         logger.debug(f"MongoDB group_by pipeline: {pipeline}")
         aggregation_results = list(self.mongo_collection.aggregate(pipeline))
         # Transform the results to match the expected format
         results = []
         for result in aggregation_results:
             # Skip null groups if needed
             if result["_id"] is None and kwargs.get("skip_nulls", False):
                 continue
             # Create the group object
             if isinstance(result["_id"], dict):
                 # Multi-field grouping
@@ -331,15 +345,15 @@ class MongoDBCollection(Collection):
             else:
                 # Single field grouping
                 group_obj = {group_by_fields[0]: result["_id"]}
             # Add the grouped objects
             objects = result["objects"]
             # Remove MongoDB _id field from each object
             for obj in objects:
                 if "_id" in obj:
                     del obj["_id"]
             # Apply any field selection or transformations based on agg_map
             if agg_map:
                 # Get first fields (fields to keep as single values)
@@ -347,7 +361,7 @@ class MongoDBCollection(Collection):
                 if first_fields:
                     # These are already in the group_obj from the _id
                     pass
                 # Get list fields (fields to aggregate as lists)
                 list_fields = agg_map.get("list", [])
                 if list_fields:
@@ -357,9 +371,9 @@ class MongoDBCollection(Collection):
                     # If list_fields is empty but first_fields is specified,
                     # filter out first_fields from objects to avoid duplication
                     objects = [{k: v for k, v in obj.items() if k not in first_fields} for obj in objects]
             # Add the objects to the group
             group_obj[inlined_field] = objects
             results.append(group_obj)
         return QueryResult(num_rows=len(results), rows=results)

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/neo4j_database.py RENAMED Viewed

@@ -27,7 +27,7 @@ class Neo4jDatabase(Database):
         if handle is None:
             handle = "bolt://localhost:7687/neo4j"
         if handle.startswith("neo4j:"):
-            handle = handle.replace("neo4j:", "bolt:")
+            handle = handle.replace("neo4j:", "bolt:", 1)
         super().__init__(handle=handle, **kwargs)
     @property

linkml_store-0.2.10rc1/src/linkml_store/api/stores/solr/solr_collection.py ADDED Viewed

@@ -0,0 +1,222 @@
+# solr_collection.py
+import logging
+from copy import copy
+from typing import Any, Dict, List, Optional, Union, Tuple
+import requests
+from linkml_store.api import Collection
+from linkml_store.api.collection import DEFAULT_FACET_LIMIT
+from linkml_store.api.queries import Query, QueryResult
+logger = logging.getLogger(__name__)
+class SolrCollection(Collection):
+    @property
+    def _collection_base(self) -> str:
+        if self.parent.use_cores:
+            base_url = f"{self.parent.base_url}/{self.alias}"
+        else:
+            base_url = self.parent.base_url
+        return base_url
+    def search(
+        self,
+        query: str,
+        where: Optional[Any] = None,
+        index_name: Optional[str] = None,
+        limit: Optional[int] = None,
+        **kwargs,
+    ) -> QueryResult:
+        if index_name is None:
+            index_name = "edismax"
+        qfs = self.parent.metadata.searchable_slots
+        if not qfs:
+            raise ValueError("No searchable slots configured for Solr collection")
+        solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
+        logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
+        response = requests.get(f"{self._collection_base}/select", params=solr_query)
+        response.raise_for_status()
+        data = response.json()
+        num_rows = data["response"]["numFound"]
+        rows = data["response"]["docs"]
+        ranked_rows = [(1.0, row) for row in rows]
+        return QueryResult(query=where, search_term=query, num_rows=num_rows, rows=rows, ranked_rows=ranked_rows)
+    def query(self, query: Query, **kwargs) -> QueryResult:
+        solr_query = self._build_solr_query(query)
+        logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
+        response = requests.get(f"{self._collection_base}/select", params=solr_query)
+        response.raise_for_status()
+        data = response.json()
+        logger.debug(f"Response: {data}")
+        num_rows = data["response"]["numFound"]
+        rows = data["response"]["docs"]
+        return QueryResult(query=query, num_rows=num_rows, rows=rows)
+    def query_facets(
+        self,
+        where: Optional[Dict] = None,
+        facet_columns: List[Union[str, Tuple[str, ...]]] = None,
+        facet_limit=DEFAULT_FACET_LIMIT,
+        facet_min_count: int = 1,
+        **kwargs,
+    ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
+        """
+        Query facet counts for fields or field combinations.
+        :param where: Filter conditions
+        :param facet_columns: List of fields to facet on. Elements can be:
+                            - Simple strings for single field facets
+                            - Tuples of strings for field combinations (pivot facets)
+        :param facet_limit: Maximum number of facet values to return
+        :param facet_min_count: Minimum count for facet values to be included
+        :return: Dictionary mapping fields or field tuples to lists of (value, count) tuples
+        """
+        solr_query = self._build_solr_query(where)
+        # Separate single fields and tuple fields
+        single_fields = []
+        tuple_fields = []
+        if facet_columns:
+            for field in facet_columns:
+                if isinstance(field, str):
+                    single_fields.append(field)
+                elif isinstance(field, tuple):
+                    tuple_fields.append(field)
+        # Process regular facets
+        results = {}
+        if single_fields:
+            solr_query["facet"] = "true"
+            solr_query["facet.field"] = single_fields
+            solr_query["facet.limit"] = facet_limit
+            solr_query["facet.mincount"] = facet_min_count
+            logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
+            response = requests.get(f"{self._collection_base}/select", params=solr_query)
+            response.raise_for_status()
+            data = response.json()
+            facet_counts = data["facet_counts"]["facet_fields"]
+            for facet_field, counts in facet_counts.items():
+                results[facet_field] = list(zip(counts[::2], counts[1::2]))
+        # Process pivot facets for tuple fields
+        if tuple_fields:
+            # TODO: Add a warning if Solr < 4.0, when this was introduced
+            for field_tuple in tuple_fields:
+                # Create a query for this specific field tuple
+                pivot_query = self._build_solr_query(where)
+                pivot_query["facet"] = "true"
+                # Create pivot facet
+                field_str = ','.join(field_tuple)
+                pivot_query["facet.pivot"] = field_str
+                pivot_query["facet.pivot.mincount"] = facet_min_count
+                pivot_query["facet.limit"] = facet_limit
+                logger.info(f"Querying Solr collection {self.alias} for pivot facets with query: {pivot_query}")
+                response = requests.get(f"{self._collection_base}/select", params=pivot_query)
+                response.raise_for_status()
+                data = response.json()
+                pivot_facets = data.get("facet_counts", {}).get("facet_pivot", {})
+                # Process pivot facets into the same format as MongoDB results
+                field_str = ','.join(field_tuple)
+                pivot_data = pivot_facets.get(field_str, [])
+                # Build a list of tuples (field values, count)
+                pivot_results = []
+                self._process_pivot_facets(pivot_data, [], pivot_results, field_tuple)
+                results[field_tuple] = pivot_results
+        return results
+    def _process_pivot_facets(self, pivot_data, current_values, results, field_tuple):
+        """
+        Recursively process pivot facet results to extract combinations of field values.
+        :param pivot_data: The pivot facet data from Solr
+        :param current_values: The current path of values in the recursion
+        :param results: The result list to populate
+        :param field_tuple: The original field tuple for reference
+        """
+        for item in pivot_data:
+            # Add the current field value
+            value = item.get("value")
+            count = item.get("count", 0)
+            # Update the current path with this value
+            values = current_values + [value]
+            # If we have all the fields from the tuple, add a result
+            if len(values) == len(field_tuple):
+                # Create a tuple of values corresponding to the field tuple
+                results.append((tuple(values), count))
+            # Process child pivot fields recursively
+            pivot = item.get("pivot", [])
+            if pivot and len(values) < len(field_tuple):
+                self._process_pivot_facets(pivot, values, results, field_tuple)
+    def _build_solr_query(
+        self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None
+    ) -> Dict[str, Any]:
+        solr_query = {}
+        if query is None:
+            query = {}
+        if isinstance(query, Query):
+            where = query.where_clause
+            solr_query["fq"] = self._build_solr_where_clause(where)
+            if query.select_cols:
+                solr_query["fl"] = ",".join(query.select_cols)
+            if query.limit:
+                solr_query["rows"] = query.limit
+            if query.offset:
+                solr_query["start"] = query.offset
+        elif isinstance(query, dict):
+            solr_query["fq"] = self._build_solr_where_clause(query)
+        solr_query["wt"] = "json"
+        if "q" not in solr_query:
+            solr_query["q"] = search_term
+        if extra:
+            solr_query.update(extra)
+        logger.info(f"Built Solr query: {solr_query}")
+        return solr_query
+    def _build_solr_where_clause(self, where_clause: Dict) -> str:
+        if where_clause is None:
+            where_clause = {}
+        conditions = []
+        if self.parent.metadata.collection_type_slot:
+            where_clause = copy(where_clause)
+            where_clause[self.parent.metadata.collection_type_slot] = self.alias
+        for field, value in where_clause.items():
+            if not isinstance(value, (list, tuple)):
+                value = [value]
+            value = [f'"{v}"' if isinstance(v, str) else str(v) for v in value]
+            if len(value) > 1:
+                conditions.append(f"{field}:({' '.join(value)})")
+            else:
+                conditions.append(f"{field}:{value[0]}")
+        return " AND ".join(conditions)

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/cli.py RENAMED Viewed

@@ -3,8 +3,7 @@ import sys
 import warnings
 from collections import defaultdict
 from pathlib import Path
-from tokenize import group
-from typing import Optional, Tuple, Any
+from typing import Any, Optional, Tuple
 import click
 import yaml

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/implementations/llm_indexer.py RENAMED Viewed

@@ -3,7 +3,6 @@ from pathlib import Path
 from typing import TYPE_CHECKING, List, Optional
 import numpy as np
-import openai
 from linkml_store.api.config import CollectionConfig
 from linkml_store.index.indexer import INDEX_ITEM, Indexer

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/indexer.py RENAMED Viewed

@@ -3,9 +3,10 @@ from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Tuple
 import numpy as np
-from linkml_store.utils.vector_utils import pairwise_cosine_similarity, mmr_diversified_search
 from pydantic import BaseModel
+from linkml_store.utils.vector_utils import mmr_diversified_search, pairwise_cosine_similarity
 INDEX_ITEM = np.ndarray
 logger = logging.getLogger(__name__)

{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/llm_inference_engine.py RENAMED Viewed

@@ -1,18 +1,16 @@
-import json
 import logging
 from dataclasses import dataclass
 from pathlib import Path
 from typing import ClassVar, List, Optional, TextIO, Union
 import yaml
-from linkml_store.utils.llm_utils import parse_yaml_payload
 from llm import get_key
 from pydantic import BaseModel
-from linkml_store.api.collection import OBJECT, Collection
+from linkml_store.api.collection import OBJECT
 from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
 from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
-from linkml_store.utils.object_utils import select_nested
+from linkml_store.utils.llm_utils import parse_yaml_payload
 logger = logging.getLogger(__name__)

linkml-store 0.2.9__tar.gz → 0.2.10rc1__tar.gz

Potentially problematic release.

linkml-store 0.2.9tar.gz → 0.2.10rc1tar.gz