PyPI - linkml-store - Versions diffs - 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

linkml-store 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (21) hide show

linkml_store/api/collection.py +48 -5
linkml_store/api/database.py +7 -1
linkml_store/api/queries.py +3 -1
linkml_store/api/stores/duckdb/duckdb_collection.py +8 -2
linkml_store/cli.py +44 -18
linkml_store/index/implementations/llm_indexer.py +20 -2
linkml_store/index/indexer.py +51 -1
linkml_store/inference/evaluation.py +195 -0
linkml_store/inference/implementations/rag_inference_engine.py +120 -33
linkml_store/inference/implementations/rule_based_inference_engine.py +15 -4
linkml_store/inference/implementations/sklearn_inference_engine.py +20 -2
linkml_store/inference/inference_config.py +1 -0
linkml_store/inference/inference_engine.py +53 -19
linkml_store/utils/format_utils.py +6 -0
linkml_store/utils/llm_utils.py +2 -0
linkml_store/utils/object_utils.py +100 -1
{linkml_store-0.1.14.dist-info → linkml_store-0.2.1.dist-info}/METADATA +9 -1
{linkml_store-0.1.14.dist-info → linkml_store-0.2.1.dist-info}/RECORD +21 -20
{linkml_store-0.1.14.dist-info → linkml_store-0.2.1.dist-info}/LICENSE +0 -0
{linkml_store-0.1.14.dist-info → linkml_store-0.2.1.dist-info}/WHEEL +0 -0
{linkml_store-0.1.14.dist-info → linkml_store-0.2.1.dist-info}/entry_points.txt +0 -0

linkml_store/inference/implementations/rag_inference_engine.py CHANGED Viewed

@@ -1,13 +1,17 @@
+import json
 import logging
 from dataclasses import dataclass
-from typing import Any, Optional
+from pathlib import Path
+from typing import ClassVar, List, Optional, TextIO, Union
 import yaml
 from llm import get_key
+from pydantic import BaseModel
 from linkml_store.api.collection import OBJECT, Collection
 from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
-from linkml_store.inference.inference_engine import InferenceEngine
+from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
+from linkml_store.utils.object_utils import select_nested
 logger = logging.getLogger(__name__)
@@ -22,6 +26,12 @@ You should return ONLY valid YAML in your response.
 """
+class TrainedModel(BaseModel, extra="forbid"):
+    rag_collection_rows: List[OBJECT]
+    index_rows: List[OBJECT]
+    config: Optional[InferenceConfig] = None
 @dataclass
 class RAGInferenceEngine(InferenceEngine):
     """
@@ -48,14 +58,23 @@ class RAGInferenceEngine(InferenceEngine):
     >>> prediction.predicted_object
     {'capital': 'Montevideo', 'code': 'UY', 'continent': 'South America', 'languages': ['Spanish']}
+    The "model" can be saved for later use:
+    >>> ie.export_model("tests/output/countries.rag_model.json")
+    Note in this case the model is not the underlying LLM, but the "RAG Model" which is the vectorized
+    representation of training set objects.
     """
-    classifier: Any = None
-    encoders: dict = None
     _model: "llm.Model" = None  # noqa: F821
     rag_collection: Collection = None
+    PERSIST_COLS: ClassVar[List[str]] = [
+        "config",
+    ]
     def __post_init__(self):
         if not self.config:
             self.config = InferenceConfig()
@@ -75,18 +94,11 @@ class RAGInferenceEngine(InferenceEngine):
         return self._model
     def initialize_model(self, **kwargs):
-        td = self.training_data
-        s = td.slice
-        if not s[0] and not s[1]:
-            rag_collection = td.collection
-        else:
-            base_collection = td.collection
-            objs = base_collection.find({}, offset=s[0], limit=s[1] - s[0]).rows
-            db = base_collection.parent
-            rag_collection = db.get_collection(f"{base_collection.alias}__rag_{s[0]}_{s[1]}", create_if_not_exists=True)
-            rag_collection.insert(objs)
-        rag_collection.attach_indexer("llm", auto_index=False)
-        self.rag_collection = rag_collection
+        logger.info(f"Initializing model {self.model}")
+        if self.training_data:
+            rag_collection = self.training_data.collection
+            rag_collection.attach_indexer("llm", auto_index=False)
+            self.rag_collection = rag_collection
     def object_to_text(self, object: OBJECT) -> str:
         return yaml.dump(object)
@@ -103,24 +115,34 @@ class RAGInferenceEngine(InferenceEngine):
         target_attributes = self.config.target_attributes
         num_examples = self.config.llm_config.number_of_few_shot_examples or 5
         query_text = self.object_to_text(object)
-        if not self.rag_collection.indexers:
-            raise ValueError("RAG collection must have an indexer attached")
-        rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm")
-        examples = rs.rows
-        if not examples:
-            raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
+        if not self.rag_collection:
+            # TODO: zero-shot mode
+            examples = []
+        else:
+            if not self.rag_collection.indexers:
+                raise ValueError("RAG collection must have an indexer attached")
+            rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm")
+            examples = rs.rows
+            if not examples:
+                raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
         prompt_clauses = []
+        query_obj = select_nested(object, feature_attributes)
+        query_text = self.object_to_text(query_obj)
         for example in examples:
-            input_obj = {k: example.get(k, None) for k in feature_attributes}
-            output_obj = {k: example.get(k, None) for k in target_attributes}
+            input_obj = select_nested(example, feature_attributes)
+            input_obj_text = self.object_to_text(input_obj)
+            if input_obj_text == query_text:
+                raise ValueError(
+                    f"Query object {query_text} is the same as example object {input_obj_text}\n"
+                    "This indicates possible test data leakage\n."
+                    "TODO: allow an option that allows user to treat this as a basic lookup\n"
+                )
+            output_obj = select_nested(example, target_attributes)
             prompt_clause = (
-                "---\nExample:\n"
-                f"## INPUT:\n{self.object_to_text(input_obj)}\n"
-                f"## OUTPUT:\n{self.object_to_text(output_obj)}\n"
+                "---\nExample:\n" f"## INPUT:\n{input_obj_text}\n" f"## OUTPUT:\n{self.object_to_text(output_obj)}\n"
             )
             prompt_clauses.append(prompt_clause)
-        query_obj = {k: object.get(k, None) for k in feature_attributes}
-        query_text = self.object_to_text(query_obj)
         prompt_end = "---\nQuery:\n" f"## INPUT:\n{query_text}\n" "## OUTPUT:\n"
         system_prompt = SYSTEM_PROMPT.format(llm_config=self.config.llm_config)
@@ -137,9 +159,74 @@ class RAGInferenceEngine(InferenceEngine):
         response = model.prompt(prompt, system_prompt)
         yaml_str = response.text()
         logger.info(f"Response: {yaml_str}")
+        return Inference(predicted_object=self._parse_yaml_payload(yaml_str))
+    def _parse_yaml_payload(self, yaml_str: str, strict=False) -> Optional[OBJECT]:
+        if "```" in yaml_str:
+            yaml_str = yaml_str.split("```")[1].strip()
+            if yaml_str.startswith("yaml"):
+                yaml_str = yaml_str[4:].strip()
         try:
-            predicted_object = yaml.safe_load(yaml_str)
-            return Inference(predicted_object=predicted_object)
-        except yaml.parser.ParserError as e:
-            logger.error(f"Error parsing response: {yaml_str}\n{e}")
+            return yaml.safe_load(yaml_str)
+        except Exception as e:
+            if strict:
+                raise e
+            logger.error(f"Error parsing YAML: {yaml_str}\n{e}")
             return None
+    def export_model(
+        self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
+    ):
+        self.save_model(output)
+    def save_model(self, output: Union[str, Path]) -> None:
+        """
+        Save the trained model and related data to a file.
+        :param output: Path to save the model
+        """
+        # trigger index
+        _qr = self.rag_collection.search("*", limit=1)
+        assert len(_qr.ranked_rows) > 0
+        rows = self.rag_collection.find(limit=-1).rows
+        indexers = self.rag_collection.indexers
+        assert len(indexers) == 1
+        ix = self.rag_collection.indexers["llm"]
+        ix_coll = self.rag_collection.parent.get_collection(self.rag_collection.get_index_collection_name(ix))
+        ix_rows = ix_coll.find(limit=-1).rows
+        assert len(ix_rows) > 0
+        tm = TrainedModel(rag_collection_rows=rows, index_rows=ix_rows, config=self.config)
+        # tm = TrainedModel(rag_collection_rows=rows, index_rows=ix_rows)
+        with open(output, "w", encoding="utf-8") as f:
+            json.dump(tm.model_dump(), f)
+    @classmethod
+    def load_model(cls, file_path: Union[str, Path]) -> "RAGInferenceEngine":
+        """
+        Load a trained model and related data from a file.
+        :param file_path: Path to the saved model
+        :return: SklearnInferenceEngine instance with loaded model
+        """
+        with open(file_path, "r", encoding="utf-8") as f:
+            model_data = json.load(f)
+        tm = TrainedModel(**model_data)
+        from linkml_store.api import Client
+        client = Client()
+        db = client.attach_database("duckdb", alias="training")
+        db.store({"data": tm.rag_collection_rows})
+        collection = db.get_collection("data")
+        ix = collection.attach_indexer("llm", auto_index=False)
+        assert ix.name
+        ix_coll_name = collection.get_index_collection_name(ix)
+        assert ix_coll_name
+        ix_coll = db.get_collection(ix_coll_name, create_if_not_exists=True)
+        ix_coll.insert(tm.index_rows)
+        ie = cls(config=tm.config)
+        ie.rag_collection = collection
+        return ie

linkml_store/inference/implementations/rule_based_inference_engine.py CHANGED Viewed

@@ -13,7 +13,7 @@ from linkml_runtime.utils.formatutils import underscore
 from pydantic import BaseModel
 from linkml_store.api.collection import OBJECT, Collection
-from linkml_store.inference.inference_config import Inference
+from linkml_store.inference.inference_config import Inference, InferenceConfig
 from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
 logger = logging.getLogger(__name__)
@@ -111,11 +111,16 @@ class RuleBasedInferenceEngine(InferenceEngine):
         object = {underscore(k): v for k, v in object.items()}
         if self.slot_expressions:
             for slot, expr in self.slot_expressions.items():
-                print(f"EVAL {object}")
                 v = eval_expr(expr, **object)
                 if v is not None:
                     object[slot] = v
-        return Inference(predicted_object=object)
+        if self.config and self.config.target_attributes:
+            predicted_object = {k: object.get(k, None) for k in self.config.target_attributes}
+        else:
+            predicted_object = object
+        if all(v is None for v in predicted_object.values()):
+            return None
+        return Inference(predicted_object=predicted_object)
     def import_model_from(self, inference_engine: InferenceEngine, **kwargs):
         io = StringIO()
@@ -127,6 +132,8 @@ class RuleBasedInferenceEngine(InferenceEngine):
         if self.slot_expressions is None:
             self.slot_expressions = {}
         self.slot_expressions[target_attribute] = io.getvalue()
+        if not self.config:
+            self.config = inference_engine.config
     def save_model(self, output: Union[str, Path]) -> None:
         """
@@ -148,7 +155,11 @@ class RuleBasedInferenceEngine(InferenceEngine):
     def load_model(cls, file_path: Union[str, Path]) -> "RuleBasedInferenceEngine":
         model_data = yaml.safe_load(open(file_path))
-        engine = cls(config=model_data["config"])
+        if model_data["config"]:
+            config = InferenceConfig(**model_data["config"])
+        else:
+            config = None
+        engine = cls(config=config)
         for k, v in model_data.items():
             if k == "config":
                 continue

linkml_store/inference/implementations/sklearn_inference_engine.py CHANGED Viewed

@@ -153,7 +153,7 @@ class SklearnInferenceEngine(InferenceEngine):
             y = y_encoder.fit_transform(y.values.ravel())  # Convert to 1D numpy array
             self.transformed_targets = y_encoder.classes_
-        logger.info(f"Fitting model with features: {X.columns}")
+        # print(f"Fitting model with features: {X.columns}")
         clf = DecisionTreeClassifier(random_state=42)
         clf.fit(X, y)
         self.classifier = clf
@@ -174,6 +174,7 @@ class SklearnInferenceEngine(InferenceEngine):
             if col in self.encoders:
                 encoder = self.encoders[col]
                 if isinstance(encoder, OneHotEncoder):
+                    print(f"Encoding: {col} v={object[col]} df={new_X[[col]]} encoder={encoder}")
                     encoded = encoder.transform(new_X[[col]])
                     feature_names = encoder.get_feature_names_out([col])
                     for i, name in enumerate(feature_names):
@@ -216,7 +217,24 @@ class SklearnInferenceEngine(InferenceEngine):
         return Inference(predicted_object=predicted_object, confidence=self.confidence)
     def _normalize(self, object: OBJECT) -> OBJECT:
-        return {k: object.get(k, None) for k in self.config.feature_attributes}
+        """
+        Normalize the input object to ensure it has all the expected attributes.
+        Also remove any numpy/pandas oddities
+        :param object:
+        :return:
+        """
+        np_map = {np.nan: None}
+        def _tr(x: Any):
+            # TODO: figure a more elegant way to do this
+            try:
+                return np_map.get(x, x)
+            except TypeError:
+                return x
+        return {k: _tr(object.get(k, None)) for k in self.config.feature_attributes}
     def export_model(
         self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs

linkml_store/inference/inference_config.py CHANGED Viewed

@@ -35,6 +35,7 @@ class InferenceConfig(BaseModel, extra="forbid"):
     feature_attributes: Optional[List[str]] = None
     train_test_split: Optional[Tuple[float, float]] = None
     llm_config: Optional[LLMConfig] = None
+    random_seed: Optional[int] = None
     @classmethod
     def from_file(cls, file_path: str, format: Optional[Format] = None) -> "InferenceConfig":

linkml_store/inference/inference_engine.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+import random
 from abc import ABC
 from dataclasses import dataclass
 from enum import Enum
@@ -6,7 +7,7 @@ from pathlib import Path
 from typing import Optional, TextIO, Tuple, Union
 import pandas as pd
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict
 from linkml_store.api.collection import OBJECT, Collection
 from linkml_store.inference.inference_config import Inference, InferenceConfig
@@ -28,6 +29,7 @@ class ModelSerialization(str, Enum):
     PNG = "png"
     LINKML_EXPRESSION = "linkml_expression"
     RULE_BASED = "rulebased"
+    RAG_INDEX = "rag_index"
     @classmethod
     def from_filepath(cls, file_path: str) -> Optional["ModelSerialization"]:
@@ -57,11 +59,36 @@ class ModelSerialization(str, Enum):
 class CollectionSlice(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    collection: Optional[Collection] = None
-    dataframe: Optional[pd.DataFrame] = None
-    slice: Tuple[Optional[int], Optional[int]] = Field(default=(None, None))
+    model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")
+    name: Optional[str] = None
+    base_collection: Optional[Collection] = None
+    # _dataframe: Optional[pd.DataFrame] = None
+    # slice: Tuple[Optional[int], Optional[int]] = Field(default=(None, None))
+    indices: Optional[Tuple[int, ...]] = None
+    _collection: Optional[Collection] = None
+    @property
+    def collection(self) -> Collection:
+        if not self._collection and not self.indices:
+            return self.base_collection
+        if not self._collection:
+            rows = self.base_collection.find({}, limit=-1).rows
+            subset = [rows[i] for i in self.indices]
+            db = self.base_collection.parent
+            subset_name = self.slice_alias
+            subset_collection = db.get_collection(subset_name, create_if_not_exists=True)
+            # ensure the collection has the same schema type as the base collection;
+            # this ensures that column/attribute types are preserved
+            subset_collection.metadata.type = self.base_collection.target_class_name
+            subset_collection.delete_where({})
+            subset_collection.insert(subset)
+            self._collection = subset_collection
+        return self._collection
+    @property
+    def slice_alias(self) -> str:
+        return f"{self.base_collection.alias}__rag_{self.name}"
     def as_dataframe(self, flattened=False) -> pd.DataFrame:
         """
@@ -69,17 +96,11 @@ class CollectionSlice(BaseModel):
         :return:
         """
-        if self.dataframe is not None:
-            df = self.dataframe
-            return df.iloc[self.slice[0] : self.slice[1]]
-        elif self.collection is not None:
-            rs = self.collection.find({}, offset=self.slice[0], limit=self.slice[1] - self.slice[0])
-            if flattened:
-                return nested_objects_to_dataframe(rs.rows)
-            else:
-                return rs.rows_dataframe
+        rs = self.collection.find({}, limit=-1)
+        if flattened:
+            return nested_objects_to_dataframe(rs.rows)
         else:
-            raise ValueError("No dataframe or collection provided")
+            return rs.rows_dataframe
 @dataclass
@@ -96,21 +117,34 @@ class InferenceEngine(ABC):
     training_data: Optional[CollectionSlice] = None
     testing_data: Optional[CollectionSlice] = None
-    def load_and_split_data(self, collection: Collection, split: Optional[Tuple[float, float]] = None):
+    def load_and_split_data(self, collection: Collection, split: Optional[Tuple[float, float]] = None, randomize=True):
         """
         Load the data and split it into training and testing sets.
         :param collection:
         :param split:
+        :param randomize:
         :return:
         """
+        local_random = random.Random(self.config.random_seed) if self.config.random_seed else random.Random()
         split = split or self.config.train_test_split
         if not split:
             split = (0.7, 0.3)
+        if split[0] == 1.0:
+            self.training_data = CollectionSlice(name="train", base_collection=collection, indices=None)
+            self.testing_data = None
+            return
         logger.info(f"Loading and splitting data from collection {collection.alias}")
         size = collection.size()
-        self.training_data = CollectionSlice(collection=collection, slice=(0, int(size * split[0])))
-        self.testing_data = CollectionSlice(collection=collection, slice=(int(size * split[0]), size))
+        indices = range(size)
+        if randomize:
+            train_indices = local_random.sample(indices, int(size * split[0]))
+            test_indices = set(indices) - set(train_indices)
+        else:
+            train_indices = indices[: int(size * split[0])]
+            test_indices = indices[int(size * split[0]) :]
+        self.training_data = CollectionSlice(name="train", base_collection=collection, indices=train_indices)
+        self.testing_data = CollectionSlice(name="test", base_collection=collection, indices=test_indices)
     def initialize_model(self, **kwargs):
         """

linkml_store/utils/format_utils.py CHANGED Viewed

@@ -47,6 +47,7 @@ class Format(Enum):
             ".jsonl": cls.JSONL,
             ".yaml": cls.YAML,
             ".yml": cls.YAML,
+            ".yamll": cls.YAMLL,
             ".tsv": cls.TSV,
             ".csv": cls.CSV,
             ".py": cls.PYTHON,
@@ -98,6 +99,9 @@ def process_file(
     """
     Process a single file and return a list of objects.
     """
+    if format == Format.YAMLL:
+        format = Format.YAML
+        expected_type = list
     if format == Format.JSON:
         objs = json.load(f)
     elif format == Format.JSONL:
@@ -105,6 +109,8 @@ def process_file(
     elif format == Format.YAML:
         if expected_type and expected_type == list:  # noqa E721
             objs = list(yaml.safe_load_all(f))
+            # allow YAML with a `---` with no object before it
+            objs = [obj for obj in objs if obj is not None]
         else:
             objs = yaml.safe_load(f)
     elif format in [Format.TSV, Format.CSV]:

linkml_store/utils/llm_utils.py CHANGED Viewed

@@ -20,6 +20,7 @@ MODEL_TOKEN_MAPPING = {
     "gpt-3.5-turbo-instruct": 4096,
     "text-ada-001": 2049,
     "ada": 2049,
+    "ada-002": 8192,
     "text-babbage-001": 2040,
     "babbage": 2049,
     "text-curie-001": 2049,
@@ -32,6 +33,7 @@ MODEL_TOKEN_MAPPING = {
     "code-cushman-002": 2048,
     "code-cushman-001": 2048,
     "claude": 200_000,
+    "llama-3": 200_000,
 }

linkml_store/utils/object_utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 from copy import deepcopy
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 from pydantic import BaseModel
@@ -60,6 +60,41 @@ def object_path_update(
     return ret_obj
+def object_path_get(obj: Union[BaseModel, Dict[str, Any]], path: str, default_value=None) -> Any:
+    """
+    Retrieves a value from a nested object based on a path description. The path to the
+    desired field is given in dot and bracket notation (e.g., 'a[0].b.c[1]').
+    :param obj: The dictionary object to be updated.
+    :type obj: Dict[str, Any]
+    :param path: The path string indicating where to place the value within the object.
+    :type path: str
+    :return: The value at the specified path.
+    :rtype: Any
+    **Example**::
+    >>> data = {'persons': [{'foo': {'bar': 1}}]}
+    >>> object_path_get(data, 'persons[0].foo.bar')
+    1
+    >>> object_path_get(data, 'persons[0].foo')
+    {'bar': 1}
+    >>> object_path_get({}, 'not there', "NA")
+    'NA'
+    """
+    if isinstance(obj, BaseModel):
+        obj = obj.dict()
+    parts = path.split(".")
+    for part in parts:
+        if "[" in part:
+            key, index = part[:-1].split("[")
+            index = int(index)
+            obj = obj[key][index]
+        else:
+            obj = obj.get(part, default_value)
+    return obj
 def parse_update_expression(expr: str) -> Union[tuple[str, Any], None]:
     """
     Parse a string expression of the form 'path.to.field=value' into a path and a value.
@@ -81,3 +116,67 @@ def clean_empties(value: Union[Dict, List]) -> Any:
     elif isinstance(value, list):
         value = [v for v in (clean_empties(v) for v in value) if v is not None]
     return value
+def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=None) -> Optional[dict]:
+    """
+    Select nested attributes from a complex dictionary based on selector strings.
+    Args:
+    data (dict): The input nested dictionary.
+    selectors (list): A list of selector strings.
+    Returns:
+    dict: A new dictionary with the same structure, but only the selected attributes.
+    Example:
+    >>> data = {
+    ...     "person": {
+    ...         "name": "John Doe",
+    ...         "age": 30,
+    ...         "address": {
+    ...             "street": "123 Main St",
+    ...             "city": "Anytown",
+    ...             "country": "USA"
+    ...         },
+    ...         "phones": [
+    ...             {"type": "home", "number": "555-1234"},
+    ...             {"type": "work", "number": "555-5678"}
+    ...         ]
+    ...     },
+    ...     "company": {
+    ...         "name": "Acme Inc",
+    ...         "location": "New York"
+    ...     }
+    ... }
+    >>> select_nested(data, ["person.address.street", "person.address.city"])
+    {'person': {'address': {'street': '123 Main St', 'city': 'Anytown'}}}
+    >>> select_nested(data, ["person.phones.number", "person.phones.type"])
+    {'person': {'phones': [{'type': 'home', 'number': '555-1234'}, {'type': 'work', 'number': '555-5678'}]}}
+    >>> select_nested(data, ["person"])
+    {'person': {'name': 'John Doe', 'age': 30, 'address': {'street': '123 Main St', 'city': 'Anytown',
+     'country': 'USA'}, 'phones': [{'type': 'home', 'number': '555-1234'}, {'type': 'work', 'number': '555-5678'}]}}
+    >>> select_nested(data, ["person.phones.type"])
+    {'person': {'phones': [{'type': 'home'}, {'type': 'work'}]}}
+    """
+    if current_path is None:
+        current_path = []
+    matching_paths = []
+    for path in paths:
+        if isinstance(path, str):
+            path = path.split(".")
+        if path == current_path:
+            return data
+        if path[: len(current_path)] == current_path:
+            matching_paths.append(path)
+    if not matching_paths:
+        return None
+    if isinstance(data, dict):
+        new_obj = {k: select_nested(v, matching_paths, current_path + [k]) for k, v in data.items()}
+        new_obj = {k: v for k, v in new_obj.items() if v is not None}
+        return new_obj
+    if isinstance(data, list):
+        new_obj = [select_nested(v, matching_paths, current_path + []) for i, v in enumerate(data)]
+        new_obj = [v for v in new_obj if v is not None]
+        return new_obj
+    return data

{linkml_store-0.1.14.dist-info → linkml_store-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: linkml-store
-Version: 0.1.14
+Version: 0.2.1
 Summary: linkml-store
 License: MIT
 Author: Author 1
@@ -18,6 +18,7 @@ Provides-Extra: chromadb
 Provides-Extra: fastapi
 Provides-Extra: frictionless
 Provides-Extra: h5py
+Provides-Extra: ibis
 Provides-Extra: llm
 Provides-Extra: map
 Provides-Extra: mongodb
@@ -34,7 +35,9 @@ Requires-Dist: duckdb (>=0.10.1)
 Requires-Dist: duckdb-engine (>=0.11.2)
 Requires-Dist: fastapi ; extra == "fastapi"
 Requires-Dist: frictionless ; extra == "frictionless"
+Requires-Dist: gcsfs ; extra == "ibis"
 Requires-Dist: h5py ; extra == "h5py"
+Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
 Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
 Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
 Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
@@ -43,6 +46,7 @@ Requires-Dist: linkml_map ; extra == "map"
 Requires-Dist: linkml_renderer ; extra == "renderer"
 Requires-Dist: llm ; extra == "llm"
 Requires-Dist: matplotlib ; extra == "analytics"
+Requires-Dist: multipledispatch ; extra == "ibis"
 Requires-Dist: neo4j ; extra == "neo4j"
 Requires-Dist: networkx ; extra == "neo4j"
 Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
@@ -52,6 +56,7 @@ Requires-Dist: pyarrow ; extra == "pyarrow"
 Requires-Dist: pydantic (>=2.0.0,<3.0.0)
 Requires-Dist: pymongo ; extra == "mongodb"
 Requires-Dist: pystow (>=0.5.4,<0.6.0)
+Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
 Requires-Dist: scikit-learn ; extra == "scipy"
 Requires-Dist: scipy ; extra == "scipy"
 Requires-Dist: seaborn ; extra == "analytics"
@@ -70,6 +75,8 @@ common query, index, and storage operations.
 For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
+See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for a high level overview.
 __Warning__ LinkML-Store is still undergoing changes and refactoring,
 APIs and command line options are subject to change!
@@ -196,3 +203,4 @@ make app
 See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details

linkml-store 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

linkml-store 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl