linkml-store 0.1.14__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.1.14 → linkml_store-0.2.0}/PKG-INFO +4 -1
- {linkml_store-0.1.14 → linkml_store-0.2.0}/README.md +3 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/pyproject.toml +1 -1
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +3 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/cli.py +24 -15
- linkml_store-0.2.0/src/linkml_store/inference/evaluation.py +189 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/implementations/rag_inference_engine.py +13 -13
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +15 -4
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +19 -1
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/inference_engine.py +44 -17
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/format_utils.py +6 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/llm_utils.py +1 -0
- linkml_store-0.2.0/src/linkml_store/utils/object_utils.py +182 -0
- linkml_store-0.1.14/src/linkml_store/utils/object_utils.py +0 -83
- {linkml_store-0.1.14 → linkml_store-0.2.0}/LICENSE +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/client.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/collection.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/config.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/duckdb_database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/neo4j_database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/graphs/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/graphs/graph_map.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/indexer.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/implementations/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/inference_config.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/inference_engine_registry.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/neo4j_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/pandas_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/sklearn_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/sql_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/utils/stats_utils.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -70,6 +70,8 @@ common query, index, and storage operations.
|
|
|
70
70
|
|
|
71
71
|
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
72
72
|
|
|
73
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for a high level overview.
|
|
74
|
+
|
|
73
75
|
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
74
76
|
APIs and command line options are subject to change!
|
|
75
77
|
|
|
@@ -196,3 +198,4 @@ make app
|
|
|
196
198
|
|
|
197
199
|
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
198
200
|
|
|
201
|
+
|
|
@@ -7,6 +7,8 @@ common query, index, and storage operations.
|
|
|
7
7
|
|
|
8
8
|
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
9
9
|
|
|
10
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for a high level overview.
|
|
11
|
+
|
|
10
12
|
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
11
13
|
APIs and command line options are subject to change!
|
|
12
14
|
|
|
@@ -132,3 +134,4 @@ make app
|
|
|
132
134
|
## Background
|
|
133
135
|
|
|
134
136
|
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
137
|
+
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/duckdb_collection.py
RENAMED
|
@@ -36,6 +36,9 @@ class DuckDBCollection(Collection):
|
|
|
36
36
|
logger.info(f"Inserting into: {self.alias} // T={table.name}")
|
|
37
37
|
engine = self.parent.engine
|
|
38
38
|
col_names = [c.name for c in table.columns]
|
|
39
|
+
bad_objs = [obj for obj in objs if not isinstance(obj, dict)]
|
|
40
|
+
if bad_objs:
|
|
41
|
+
logger.error(f"Bad objects: {bad_objs}")
|
|
39
42
|
objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
|
|
40
43
|
with engine.connect() as conn:
|
|
41
44
|
with conn.begin():
|
|
@@ -7,6 +7,7 @@ from typing import Optional
|
|
|
7
7
|
import click
|
|
8
8
|
import yaml
|
|
9
9
|
from linkml_runtime.dumpers import json_dumper
|
|
10
|
+
from linkml_runtime.utils.formatutils import underscore
|
|
10
11
|
from pydantic import BaseModel
|
|
11
12
|
|
|
12
13
|
from linkml_store import Client
|
|
@@ -17,6 +18,7 @@ from linkml_store.index import get_indexer
|
|
|
17
18
|
from linkml_store.index.implementations.simple_indexer import SimpleIndexer
|
|
18
19
|
from linkml_store.index.indexer import Indexer
|
|
19
20
|
from linkml_store.inference import get_inference_engine
|
|
21
|
+
from linkml_store.inference.evaluation import evaluate_predictor, score_text_overlap
|
|
20
22
|
from linkml_store.inference.inference_config import InferenceConfig
|
|
21
23
|
from linkml_store.inference.inference_engine import ModelSerialization
|
|
22
24
|
from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output, write_output
|
|
@@ -130,7 +132,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
130
132
|
logger.setLevel(logging.ERROR)
|
|
131
133
|
ctx.ensure_object(dict)
|
|
132
134
|
if input:
|
|
133
|
-
stem = Path(input).stem
|
|
135
|
+
stem = underscore(Path(input).stem)
|
|
134
136
|
database = "duckdb"
|
|
135
137
|
collection = stem
|
|
136
138
|
config = ClientConfig(databases={"duckdb": {"collections": {stem: {"source": {"local_path": input}}}}})
|
|
@@ -496,12 +498,14 @@ def describe(ctx, where, output_type, output, limit):
|
|
|
496
498
|
@click.option(
|
|
497
499
|
"--predictor-type", "-t", default="sklearn", show_default=True, type=click.STRING, help="Type of predictor"
|
|
498
500
|
)
|
|
501
|
+
@click.option("--evaluation-count", "-n", type=click.INT, help="Number of examples to evaluate over")
|
|
499
502
|
@click.option("--query", "-q", type=click.STRING, help="query term")
|
|
500
503
|
@click.pass_context
|
|
501
504
|
def infer(
|
|
502
505
|
ctx,
|
|
503
506
|
inference_config_file,
|
|
504
507
|
query,
|
|
508
|
+
evaluation_count,
|
|
505
509
|
training_test_data_split,
|
|
506
510
|
predictor_type,
|
|
507
511
|
target_attribute,
|
|
@@ -546,24 +550,24 @@ def infer(
|
|
|
546
550
|
query_obj = None
|
|
547
551
|
collection = ctx.obj["settings"].collection
|
|
548
552
|
atts = collection.class_definition().attributes.keys()
|
|
553
|
+
if feature_attributes:
|
|
554
|
+
features = feature_attributes.split(",")
|
|
555
|
+
features = [f.strip() for f in features]
|
|
556
|
+
else:
|
|
557
|
+
if query_obj:
|
|
558
|
+
features = query_obj.keys()
|
|
559
|
+
else:
|
|
560
|
+
features = None
|
|
561
|
+
if target_attribute:
|
|
562
|
+
target_attributes = list(target_attribute)
|
|
563
|
+
else:
|
|
564
|
+
target_attributes = [att for att in atts if att not in features]
|
|
549
565
|
if model_format:
|
|
550
566
|
model_format = ModelSerialization(model_format)
|
|
551
567
|
if load_model:
|
|
552
568
|
predictor = get_inference_engine(predictor_type)
|
|
553
569
|
predictor = type(predictor).load_model(load_model)
|
|
554
570
|
else:
|
|
555
|
-
if feature_attributes:
|
|
556
|
-
features = feature_attributes.split(",")
|
|
557
|
-
features = [f.strip() for f in features]
|
|
558
|
-
else:
|
|
559
|
-
if query_obj:
|
|
560
|
-
features = query_obj.keys()
|
|
561
|
-
else:
|
|
562
|
-
features = None
|
|
563
|
-
if target_attribute:
|
|
564
|
-
target_attributes = list(target_attribute)
|
|
565
|
-
else:
|
|
566
|
-
target_attributes = [att for att in atts if att not in features]
|
|
567
571
|
if inference_config_file:
|
|
568
572
|
config = InferenceConfig.from_file(inference_config_file)
|
|
569
573
|
else:
|
|
@@ -577,8 +581,13 @@ def infer(
|
|
|
577
581
|
logger.info(f"Exporting model to {export_model} in {model_format}")
|
|
578
582
|
predictor.export_model(export_model, model_format)
|
|
579
583
|
if not query_obj:
|
|
580
|
-
if not export_model:
|
|
581
|
-
raise ValueError("Query must be specified if not exporting model")
|
|
584
|
+
if not export_model and not evaluation_count:
|
|
585
|
+
raise ValueError("Query or evaluate must be specified if not exporting model")
|
|
586
|
+
if evaluation_count:
|
|
587
|
+
outcome = evaluate_predictor(
|
|
588
|
+
predictor, target_attributes, evaluation_count=evaluation_count, match_function=score_text_overlap
|
|
589
|
+
)
|
|
590
|
+
print(f"Outcome: {outcome} // accuracy: {outcome.accuracy}")
|
|
582
591
|
if query_obj:
|
|
583
592
|
result = predictor.derive(query_obj)
|
|
584
593
|
dumped_obj = result.model_dump(exclude_none=True)
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from typing import Any, List, Optional
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from linkml_store.inference import InferenceEngine
|
|
10
|
+
from linkml_store.utils.object_utils import select_nested
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def score_match(target: Optional[Any], candidate: Optional[Any], match_function: Optional[Callable] = None) -> float:
|
|
16
|
+
"""
|
|
17
|
+
Compute a score for a match between two objects
|
|
18
|
+
|
|
19
|
+
>>> score_match("a", "a")
|
|
20
|
+
1.0
|
|
21
|
+
>>> score_match("a", "b")
|
|
22
|
+
0.0
|
|
23
|
+
>>> score_match("a", None)
|
|
24
|
+
0.0
|
|
25
|
+
>>> score_match(None, "a")
|
|
26
|
+
0.0
|
|
27
|
+
>>> score_match(None, None)
|
|
28
|
+
1.0
|
|
29
|
+
>>> score_match(["a", "b"], ["a", "b"])
|
|
30
|
+
1.0
|
|
31
|
+
>>> score_match(["a", "b"], ["b", "a"])
|
|
32
|
+
1.0
|
|
33
|
+
>>> round(score_match(["a"], ["b", "a"]), 2)
|
|
34
|
+
0.67
|
|
35
|
+
>>> score_match({"a": 1}, {"a": 1})
|
|
36
|
+
1.0
|
|
37
|
+
>>> score_match({"a": 1}, {"a": 2})
|
|
38
|
+
0.0
|
|
39
|
+
>>> score_match({"a": 1, "b": None}, {"a": 1})
|
|
40
|
+
1.0
|
|
41
|
+
>>> score_match([{"a": 1, "b": 2}, {"a": 3, "b": 4}], [{"a": 1, "b": 2}, {"a": 3, "b": 4}])
|
|
42
|
+
1.0
|
|
43
|
+
>>> score_match([{"a": 1, "b": 4}, {"a": 3, "b": 2}], [{"a": 1, "b": 2}, {"a": 3, "b": 4}])
|
|
44
|
+
0.5
|
|
45
|
+
>>> def char_match(x, y):
|
|
46
|
+
... return len(set(x).intersection(set(y))) / len(set(x).union(set(y)))
|
|
47
|
+
>>> score_match("abcd", "abc", char_match)
|
|
48
|
+
0.75
|
|
49
|
+
>>> score_match(["abcd", "efgh"], ["ac", "gh"], char_match)
|
|
50
|
+
0.5
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
:param target:
|
|
54
|
+
:param candidate:
|
|
55
|
+
:param match_function:
|
|
56
|
+
:return:
|
|
57
|
+
"""
|
|
58
|
+
if target == candidate:
|
|
59
|
+
return 1.0
|
|
60
|
+
if target is None or candidate is None:
|
|
61
|
+
return 0.0
|
|
62
|
+
if isinstance(target, (set, list)) and isinstance(candidate, (set, list)):
|
|
63
|
+
# create an all by all matrix using numpy
|
|
64
|
+
# for each pair of elements, compute the score
|
|
65
|
+
# return the average score
|
|
66
|
+
score_matrix = np.array([[score_match(t, c, match_function) for c in candidate] for t in target])
|
|
67
|
+
best_matches0 = np.max(score_matrix, axis=0)
|
|
68
|
+
best_matches1 = np.max(score_matrix, axis=1)
|
|
69
|
+
return (np.sum(best_matches0) + np.sum(best_matches1)) / (len(target) + len(candidate))
|
|
70
|
+
if isinstance(target, dict) and isinstance(candidate, dict):
|
|
71
|
+
keys = set(target.keys()).union(candidate.keys())
|
|
72
|
+
scores = [score_match(target.get(k), candidate.get(k), match_function) for k in keys]
|
|
73
|
+
return np.mean(scores)
|
|
74
|
+
if match_function:
|
|
75
|
+
return match_function(target, candidate)
|
|
76
|
+
return 0.0
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Outcome(BaseModel):
|
|
80
|
+
true_positive_count: float
|
|
81
|
+
total_count: int
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def accuracy(self) -> float:
|
|
85
|
+
return self.true_positive_count / self.total_count
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def evaluate_predictor(
|
|
89
|
+
predictor: InferenceEngine,
|
|
90
|
+
target_attributes: List[str],
|
|
91
|
+
feature_attributes: Optional[List[str]] = None,
|
|
92
|
+
test_data: pd.DataFrame = None,
|
|
93
|
+
evaluation_count: Optional[int] = 10,
|
|
94
|
+
match_function: Optional[Callable] = None,
|
|
95
|
+
) -> Outcome:
|
|
96
|
+
"""
|
|
97
|
+
Evaluate a predictor by comparing its predictions to the expected values in the testing data.
|
|
98
|
+
|
|
99
|
+
:param predictor:
|
|
100
|
+
:param target_attributes:
|
|
101
|
+
:param feature_attributes:
|
|
102
|
+
:param evaluation_count:
|
|
103
|
+
:return:
|
|
104
|
+
"""
|
|
105
|
+
n = 0
|
|
106
|
+
tp = 0
|
|
107
|
+
if test_data is None:
|
|
108
|
+
test_data = predictor.testing_data.as_dataframe()
|
|
109
|
+
for row in test_data.to_dict(orient="records"):
|
|
110
|
+
expected_obj = select_nested(row, target_attributes)
|
|
111
|
+
if feature_attributes:
|
|
112
|
+
test_obj = {k: v for k, v in row.items() if k not in target_attributes}
|
|
113
|
+
else:
|
|
114
|
+
test_obj = row
|
|
115
|
+
result = predictor.derive(test_obj)
|
|
116
|
+
logger.info(f"Predicted: {result.predicted_object} Expected: {expected_obj}")
|
|
117
|
+
tp += score_match(result.predicted_object, expected_obj, match_function)
|
|
118
|
+
n += 1
|
|
119
|
+
if evaluation_count is not None and n >= evaluation_count:
|
|
120
|
+
break
|
|
121
|
+
return Outcome(true_positive_count=tp, total_count=n)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def score_text_overlap(str1: Any, str2: Any) -> float:
|
|
125
|
+
"""
|
|
126
|
+
Compute the overlap score between two strings.
|
|
127
|
+
|
|
128
|
+
:param str1:
|
|
129
|
+
:param str2:
|
|
130
|
+
:return:
|
|
131
|
+
"""
|
|
132
|
+
if str1 == str2:
|
|
133
|
+
return 1.0
|
|
134
|
+
if not str1 or not str2:
|
|
135
|
+
return 0.0
|
|
136
|
+
overlap, length = find_longest_overlap(str1, str2)
|
|
137
|
+
return len(overlap) / max(len(str1), len(str2))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def find_longest_overlap(str1: str, str2: str):
|
|
141
|
+
"""
|
|
142
|
+
Find the longest overlapping substring between two strings.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
str1 (str): The first string
|
|
146
|
+
str2 (str): The second string
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
tuple: A tuple containing the longest overlapping substring and its length
|
|
150
|
+
|
|
151
|
+
Examples:
|
|
152
|
+
>>> find_longest_overlap("hello world", "world of programming")
|
|
153
|
+
('world', 5)
|
|
154
|
+
>>> find_longest_overlap("abcdefg", "defghi")
|
|
155
|
+
('defg', 4)
|
|
156
|
+
>>> find_longest_overlap("python", "java")
|
|
157
|
+
('', 0)
|
|
158
|
+
>>> find_longest_overlap("", "test")
|
|
159
|
+
('', 0)
|
|
160
|
+
>>> find_longest_overlap("aabbcc", "ddeeff")
|
|
161
|
+
('', 0)
|
|
162
|
+
>>> find_longest_overlap("programming", "PROGRAMMING")
|
|
163
|
+
('', 0)
|
|
164
|
+
"""
|
|
165
|
+
if not str1 or not str2:
|
|
166
|
+
return "", 0
|
|
167
|
+
|
|
168
|
+
# Create a table to store lengths of matching substrings
|
|
169
|
+
m, n = len(str1), len(str2)
|
|
170
|
+
dp = [[0] * (n + 1) for _ in range(m + 1)]
|
|
171
|
+
|
|
172
|
+
# Variables to store the maximum length and ending position
|
|
173
|
+
max_length = 0
|
|
174
|
+
end_pos = 0
|
|
175
|
+
|
|
176
|
+
# Fill the dp table
|
|
177
|
+
for i in range(1, m + 1):
|
|
178
|
+
for j in range(1, n + 1):
|
|
179
|
+
if str1[i - 1] == str2[j - 1]:
|
|
180
|
+
dp[i][j] = dp[i - 1][j - 1] + 1
|
|
181
|
+
if dp[i][j] > max_length:
|
|
182
|
+
max_length = dp[i][j]
|
|
183
|
+
end_pos = i
|
|
184
|
+
|
|
185
|
+
# Extract the longest common substring
|
|
186
|
+
start_pos = end_pos - max_length
|
|
187
|
+
longest_substring = str1[start_pos:end_pos]
|
|
188
|
+
|
|
189
|
+
return longest_substring, max_length
|
|
@@ -8,6 +8,7 @@ from llm import get_key
|
|
|
8
8
|
from linkml_store.api.collection import OBJECT, Collection
|
|
9
9
|
from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
|
|
10
10
|
from linkml_store.inference.inference_engine import InferenceEngine
|
|
11
|
+
from linkml_store.utils.object_utils import select_nested
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
@@ -22,6 +23,11 @@ You should return ONLY valid YAML in your response.
|
|
|
22
23
|
"""
|
|
23
24
|
|
|
24
25
|
|
|
26
|
+
# def select_object(obj: OBJECT, key_paths: List[str]) -> OBJECT:
|
|
27
|
+
# return {k: obj.get(k, None) for k in keys}
|
|
28
|
+
# return {k: object_path_get(obj, k, None) for k in key_paths}
|
|
29
|
+
|
|
30
|
+
|
|
25
31
|
@dataclass
|
|
26
32
|
class RAGInferenceEngine(InferenceEngine):
|
|
27
33
|
"""
|
|
@@ -75,16 +81,7 @@ class RAGInferenceEngine(InferenceEngine):
|
|
|
75
81
|
return self._model
|
|
76
82
|
|
|
77
83
|
def initialize_model(self, **kwargs):
|
|
78
|
-
|
|
79
|
-
s = td.slice
|
|
80
|
-
if not s[0] and not s[1]:
|
|
81
|
-
rag_collection = td.collection
|
|
82
|
-
else:
|
|
83
|
-
base_collection = td.collection
|
|
84
|
-
objs = base_collection.find({}, offset=s[0], limit=s[1] - s[0]).rows
|
|
85
|
-
db = base_collection.parent
|
|
86
|
-
rag_collection = db.get_collection(f"{base_collection.alias}__rag_{s[0]}_{s[1]}", create_if_not_exists=True)
|
|
87
|
-
rag_collection.insert(objs)
|
|
84
|
+
rag_collection = self.training_data.collection
|
|
88
85
|
rag_collection.attach_indexer("llm", auto_index=False)
|
|
89
86
|
self.rag_collection = rag_collection
|
|
90
87
|
|
|
@@ -111,15 +108,18 @@ class RAGInferenceEngine(InferenceEngine):
|
|
|
111
108
|
raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
|
|
112
109
|
prompt_clauses = []
|
|
113
110
|
for example in examples:
|
|
114
|
-
input_obj = {k: example.get(k, None) for k in feature_attributes}
|
|
115
|
-
|
|
111
|
+
# input_obj = {k: example.get(k, None) for k in feature_attributes}
|
|
112
|
+
input_obj = select_nested(example, feature_attributes)
|
|
113
|
+
# output_obj = {k: example.get(k, None) for k in target_attributes}
|
|
114
|
+
output_obj = select_nested(example, target_attributes)
|
|
116
115
|
prompt_clause = (
|
|
117
116
|
"---\nExample:\n"
|
|
118
117
|
f"## INPUT:\n{self.object_to_text(input_obj)}\n"
|
|
119
118
|
f"## OUTPUT:\n{self.object_to_text(output_obj)}\n"
|
|
120
119
|
)
|
|
121
120
|
prompt_clauses.append(prompt_clause)
|
|
122
|
-
query_obj = {k: object.get(k, None) for k in feature_attributes}
|
|
121
|
+
# query_obj = {k: object.get(k, None) for k in feature_attributes}
|
|
122
|
+
query_obj = select_nested(object, feature_attributes)
|
|
123
123
|
query_text = self.object_to_text(query_obj)
|
|
124
124
|
prompt_end = "---\nQuery:\n" f"## INPUT:\n{query_text}\n" "## OUTPUT:\n"
|
|
125
125
|
system_prompt = SYSTEM_PROMPT.format(llm_config=self.config.llm_config)
|
|
@@ -13,7 +13,7 @@ from linkml_runtime.utils.formatutils import underscore
|
|
|
13
13
|
from pydantic import BaseModel
|
|
14
14
|
|
|
15
15
|
from linkml_store.api.collection import OBJECT, Collection
|
|
16
|
-
from linkml_store.inference.inference_config import Inference
|
|
16
|
+
from linkml_store.inference.inference_config import Inference, InferenceConfig
|
|
17
17
|
from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
@@ -111,11 +111,16 @@ class RuleBasedInferenceEngine(InferenceEngine):
|
|
|
111
111
|
object = {underscore(k): v for k, v in object.items()}
|
|
112
112
|
if self.slot_expressions:
|
|
113
113
|
for slot, expr in self.slot_expressions.items():
|
|
114
|
-
print(f"EVAL {object}")
|
|
115
114
|
v = eval_expr(expr, **object)
|
|
116
115
|
if v is not None:
|
|
117
116
|
object[slot] = v
|
|
118
|
-
|
|
117
|
+
if self.config and self.config.target_attributes:
|
|
118
|
+
predicted_object = {k: object.get(k, None) for k in self.config.target_attributes}
|
|
119
|
+
else:
|
|
120
|
+
predicted_object = object
|
|
121
|
+
if all(v is None for v in predicted_object.values()):
|
|
122
|
+
return None
|
|
123
|
+
return Inference(predicted_object=predicted_object)
|
|
119
124
|
|
|
120
125
|
def import_model_from(self, inference_engine: InferenceEngine, **kwargs):
|
|
121
126
|
io = StringIO()
|
|
@@ -127,6 +132,8 @@ class RuleBasedInferenceEngine(InferenceEngine):
|
|
|
127
132
|
if self.slot_expressions is None:
|
|
128
133
|
self.slot_expressions = {}
|
|
129
134
|
self.slot_expressions[target_attribute] = io.getvalue()
|
|
135
|
+
if not self.config:
|
|
136
|
+
self.config = inference_engine.config
|
|
130
137
|
|
|
131
138
|
def save_model(self, output: Union[str, Path]) -> None:
|
|
132
139
|
"""
|
|
@@ -148,7 +155,11 @@ class RuleBasedInferenceEngine(InferenceEngine):
|
|
|
148
155
|
def load_model(cls, file_path: Union[str, Path]) -> "RuleBasedInferenceEngine":
|
|
149
156
|
model_data = yaml.safe_load(open(file_path))
|
|
150
157
|
|
|
151
|
-
|
|
158
|
+
if model_data["config"]:
|
|
159
|
+
config = InferenceConfig(**model_data["config"])
|
|
160
|
+
else:
|
|
161
|
+
config = None
|
|
162
|
+
engine = cls(config=config)
|
|
152
163
|
for k, v in model_data.items():
|
|
153
164
|
if k == "config":
|
|
154
165
|
continue
|
|
@@ -174,6 +174,7 @@ class SklearnInferenceEngine(InferenceEngine):
|
|
|
174
174
|
if col in self.encoders:
|
|
175
175
|
encoder = self.encoders[col]
|
|
176
176
|
if isinstance(encoder, OneHotEncoder):
|
|
177
|
+
print(f"Encoding: {col} v={object[col]} df={new_X[[col]]} encoder={encoder}")
|
|
177
178
|
encoded = encoder.transform(new_X[[col]])
|
|
178
179
|
feature_names = encoder.get_feature_names_out([col])
|
|
179
180
|
for i, name in enumerate(feature_names):
|
|
@@ -216,7 +217,24 @@ class SklearnInferenceEngine(InferenceEngine):
|
|
|
216
217
|
return Inference(predicted_object=predicted_object, confidence=self.confidence)
|
|
217
218
|
|
|
218
219
|
def _normalize(self, object: OBJECT) -> OBJECT:
|
|
219
|
-
|
|
220
|
+
"""
|
|
221
|
+
Normalize the input object to ensure it has all the expected attributes.
|
|
222
|
+
|
|
223
|
+
Also remove any numpy/pandas oddities
|
|
224
|
+
|
|
225
|
+
:param object:
|
|
226
|
+
:return:
|
|
227
|
+
"""
|
|
228
|
+
np_map = {np.nan: None}
|
|
229
|
+
|
|
230
|
+
def _tr(x: Any):
|
|
231
|
+
# TODO: figure a more elegant way to do this
|
|
232
|
+
try:
|
|
233
|
+
return np_map.get(x, x)
|
|
234
|
+
except TypeError:
|
|
235
|
+
return x
|
|
236
|
+
|
|
237
|
+
return {k: _tr(object.get(k, None)) for k in self.config.feature_attributes}
|
|
220
238
|
|
|
221
239
|
def export_model(
|
|
222
240
|
self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import random
|
|
2
3
|
from abc import ABC
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from enum import Enum
|
|
@@ -6,7 +7,7 @@ from pathlib import Path
|
|
|
6
7
|
from typing import Optional, TextIO, Tuple, Union
|
|
7
8
|
|
|
8
9
|
import pandas as pd
|
|
9
|
-
from pydantic import BaseModel, ConfigDict
|
|
10
|
+
from pydantic import BaseModel, ConfigDict
|
|
10
11
|
|
|
11
12
|
from linkml_store.api.collection import OBJECT, Collection
|
|
12
13
|
from linkml_store.inference.inference_config import Inference, InferenceConfig
|
|
@@ -59,9 +60,25 @@ class ModelSerialization(str, Enum):
|
|
|
59
60
|
class CollectionSlice(BaseModel):
|
|
60
61
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
61
62
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
63
|
+
name: Optional[str] = None
|
|
64
|
+
base_collection: Optional[Collection] = None
|
|
65
|
+
# _dataframe: Optional[pd.DataFrame] = None
|
|
66
|
+
# slice: Tuple[Optional[int], Optional[int]] = Field(default=(None, None))
|
|
67
|
+
indices: Optional[Tuple[int, ...]] = None
|
|
68
|
+
_collection: Optional[Collection] = None
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def collection(self) -> Collection:
|
|
72
|
+
if not self._collection:
|
|
73
|
+
rows = self.base_collection.find({}, limit=-1).rows
|
|
74
|
+
# subset based on indices
|
|
75
|
+
subset = [rows[i] for i in self.indices]
|
|
76
|
+
db = self.base_collection.parent
|
|
77
|
+
subset_name = f"{self.base_collection.alias}__rag_{self.name}"
|
|
78
|
+
subset_collection = db.get_collection(subset_name, create_if_not_exists=True)
|
|
79
|
+
subset_collection.insert(subset)
|
|
80
|
+
self._collection = subset_collection
|
|
81
|
+
return self._collection
|
|
65
82
|
|
|
66
83
|
def as_dataframe(self, flattened=False) -> pd.DataFrame:
|
|
67
84
|
"""
|
|
@@ -69,17 +86,11 @@ class CollectionSlice(BaseModel):
|
|
|
69
86
|
|
|
70
87
|
:return:
|
|
71
88
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
return
|
|
75
|
-
elif self.collection is not None:
|
|
76
|
-
rs = self.collection.find({}, offset=self.slice[0], limit=self.slice[1] - self.slice[0])
|
|
77
|
-
if flattened:
|
|
78
|
-
return nested_objects_to_dataframe(rs.rows)
|
|
79
|
-
else:
|
|
80
|
-
return rs.rows_dataframe
|
|
89
|
+
rs = self.collection.find({}, limit=-1)
|
|
90
|
+
if flattened:
|
|
91
|
+
return nested_objects_to_dataframe(rs.rows)
|
|
81
92
|
else:
|
|
82
|
-
|
|
93
|
+
return rs.rows_dataframe
|
|
83
94
|
|
|
84
95
|
|
|
85
96
|
@dataclass
|
|
@@ -96,7 +107,7 @@ class InferenceEngine(ABC):
|
|
|
96
107
|
training_data: Optional[CollectionSlice] = None
|
|
97
108
|
testing_data: Optional[CollectionSlice] = None
|
|
98
109
|
|
|
99
|
-
def load_and_split_data(self, collection: Collection, split: Optional[Tuple[float, float]] = None):
|
|
110
|
+
def load_and_split_data(self, collection: Collection, split: Optional[Tuple[float, float]] = None, randomize=True):
|
|
100
111
|
"""
|
|
101
112
|
Load the data and split it into training and testing sets.
|
|
102
113
|
|
|
@@ -109,8 +120,24 @@ class InferenceEngine(ABC):
|
|
|
109
120
|
split = (0.7, 0.3)
|
|
110
121
|
logger.info(f"Loading and splitting data from collection {collection.alias}")
|
|
111
122
|
size = collection.size()
|
|
112
|
-
|
|
113
|
-
|
|
123
|
+
indices = range(size)
|
|
124
|
+
if randomize:
|
|
125
|
+
train_indices = random.sample(indices, int(size * split[0]))
|
|
126
|
+
test_indices = set(indices) - set(train_indices)
|
|
127
|
+
else:
|
|
128
|
+
train_indices = indices[: int(size * split[0])]
|
|
129
|
+
test_indices = indices[int(size * split[0]) :]
|
|
130
|
+
self.training_data = CollectionSlice(name="train", base_collection=collection, indices=train_indices)
|
|
131
|
+
self.testing_data = CollectionSlice(name="test", base_collection=collection, indices=test_indices)
|
|
132
|
+
# all_data = collection.find({}, limit=size).rows
|
|
133
|
+
# all_data_df = nested_objects_to_dataframe(all_data)
|
|
134
|
+
# all_data_df = collection.find({}, limit=size).rows_dataframe
|
|
135
|
+
# randomize/shuffle order of rows in dataframe
|
|
136
|
+
# all_data_df = all_data_df.sample(frac=1).reset_index(drop=True)
|
|
137
|
+
# self.training_data = CollectionSlice(dataframe=all_data_df[: int(size * split[0])])
|
|
138
|
+
# self.testing_data = CollectionSlice(dataframe=all_data_df[int(size * split[0]) : size])
|
|
139
|
+
# self.training_data = CollectionSlice(base_collection=collection, slice=(0, int(size * split[0])))
|
|
140
|
+
# self.testing_data = CollectionSlice(base_collection=collection, slice=(int(size * split[0]), size))
|
|
114
141
|
|
|
115
142
|
def initialize_model(self, **kwargs):
|
|
116
143
|
"""
|
|
@@ -47,6 +47,7 @@ class Format(Enum):
|
|
|
47
47
|
".jsonl": cls.JSONL,
|
|
48
48
|
".yaml": cls.YAML,
|
|
49
49
|
".yml": cls.YAML,
|
|
50
|
+
".yamll": cls.YAMLL,
|
|
50
51
|
".tsv": cls.TSV,
|
|
51
52
|
".csv": cls.CSV,
|
|
52
53
|
".py": cls.PYTHON,
|
|
@@ -98,6 +99,9 @@ def process_file(
|
|
|
98
99
|
"""
|
|
99
100
|
Process a single file and return a list of objects.
|
|
100
101
|
"""
|
|
102
|
+
if format == Format.YAMLL:
|
|
103
|
+
format = Format.YAML
|
|
104
|
+
expected_type = list
|
|
101
105
|
if format == Format.JSON:
|
|
102
106
|
objs = json.load(f)
|
|
103
107
|
elif format == Format.JSONL:
|
|
@@ -105,6 +109,8 @@ def process_file(
|
|
|
105
109
|
elif format == Format.YAML:
|
|
106
110
|
if expected_type and expected_type == list: # noqa E721
|
|
107
111
|
objs = list(yaml.safe_load_all(f))
|
|
112
|
+
# allow YAML with a `---` with no object before it
|
|
113
|
+
objs = [obj for obj in objs if obj is not None]
|
|
108
114
|
else:
|
|
109
115
|
objs = yaml.safe_load(f)
|
|
110
116
|
elif format in [Format.TSV, Format.CSV]:
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def object_path_update(
|
|
9
|
+
obj: Union[BaseModel, Dict[str, Any]], path: str, value: Any
|
|
10
|
+
) -> Union[BaseModel, Dict[str, Any]]:
|
|
11
|
+
"""
|
|
12
|
+
Updates a nested object based on a path description and a value. The path to the
|
|
13
|
+
desired field is given in dot and bracket notation (e.g., 'a[0].b.c[1]').
|
|
14
|
+
|
|
15
|
+
:param obj: The dictionary object to be updated.
|
|
16
|
+
:type obj: Dict[str, Any]
|
|
17
|
+
:param path: The path string indicating where to place the value within the object.
|
|
18
|
+
:type path: str
|
|
19
|
+
:param value: The value to be set at the specified path.
|
|
20
|
+
:type value: Any
|
|
21
|
+
:return: None. This function modifies the object in-place.
|
|
22
|
+
:rtype: None
|
|
23
|
+
|
|
24
|
+
**Example**::
|
|
25
|
+
|
|
26
|
+
>>> data = {}
|
|
27
|
+
>>> object_path_update(data, 'persons[0].foo.bar', 1)
|
|
28
|
+
{'persons': [{'foo': {'bar': 1}}]}
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(obj, BaseModel):
|
|
31
|
+
typ = type(obj)
|
|
32
|
+
obj = obj.model_dump(exclude_none=True)
|
|
33
|
+
obj = object_path_update(obj, path, value)
|
|
34
|
+
return typ(**obj)
|
|
35
|
+
obj = deepcopy(obj)
|
|
36
|
+
ret_obj = obj
|
|
37
|
+
parts = path.split(".")
|
|
38
|
+
for part in parts[:-1]:
|
|
39
|
+
if "[" in part:
|
|
40
|
+
key, index = part[:-1].split("[")
|
|
41
|
+
index = int(index)
|
|
42
|
+
# obj = obj.setdefault(key, [{} for _ in range(index+1)])
|
|
43
|
+
obj = obj.setdefault(key, [])
|
|
44
|
+
while len(obj) <= index:
|
|
45
|
+
obj.append({})
|
|
46
|
+
obj = obj[index]
|
|
47
|
+
else:
|
|
48
|
+
if part in obj and obj[part] is None:
|
|
49
|
+
del obj[part]
|
|
50
|
+
obj = obj.setdefault(part, {})
|
|
51
|
+
last_part = parts[-1]
|
|
52
|
+
if "[" in last_part:
|
|
53
|
+
key, index = last_part[:-1].split("[")
|
|
54
|
+
index = int(index)
|
|
55
|
+
if key not in obj or not isinstance(obj[key], list):
|
|
56
|
+
obj[key] = [{} for _ in range(index + 1)]
|
|
57
|
+
obj[key][index] = value
|
|
58
|
+
else:
|
|
59
|
+
obj[last_part] = value
|
|
60
|
+
return ret_obj
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def object_path_get(obj: Union[BaseModel, Dict[str, Any]], path: str, default_value=None) -> Any:
|
|
64
|
+
"""
|
|
65
|
+
Retrieves a value from a nested object based on a path description. The path to the
|
|
66
|
+
desired field is given in dot and bracket notation (e.g., 'a[0].b.c[1]').
|
|
67
|
+
|
|
68
|
+
:param obj: The dictionary object to be updated.
|
|
69
|
+
:type obj: Dict[str, Any]
|
|
70
|
+
:param path: The path string indicating where to place the value within the object.
|
|
71
|
+
:type path: str
|
|
72
|
+
:return: The value at the specified path.
|
|
73
|
+
:rtype: Any
|
|
74
|
+
|
|
75
|
+
**Example**::
|
|
76
|
+
|
|
77
|
+
>>> data = {'persons': [{'foo': {'bar': 1}}]}
|
|
78
|
+
>>> object_path_get(data, 'persons[0].foo.bar')
|
|
79
|
+
1
|
|
80
|
+
>>> object_path_get(data, 'persons[0].foo')
|
|
81
|
+
{'bar': 1}
|
|
82
|
+
>>> object_path_get({}, 'not there', "NA")
|
|
83
|
+
'NA'
|
|
84
|
+
"""
|
|
85
|
+
if isinstance(obj, BaseModel):
|
|
86
|
+
obj = obj.dict()
|
|
87
|
+
parts = path.split(".")
|
|
88
|
+
for part in parts:
|
|
89
|
+
if "[" in part:
|
|
90
|
+
key, index = part[:-1].split("[")
|
|
91
|
+
index = int(index)
|
|
92
|
+
obj = obj[key][index]
|
|
93
|
+
else:
|
|
94
|
+
obj = obj.get(part, default_value)
|
|
95
|
+
return obj
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def parse_update_expression(expr: str) -> Union[tuple[str, Any], None]:
|
|
99
|
+
"""
|
|
100
|
+
Parse a string expression of the form 'path.to.field=value' into a path and a value.
|
|
101
|
+
|
|
102
|
+
:param expr:
|
|
103
|
+
:return:
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
path, val = expr.split("=", 1)
|
|
107
|
+
val = json.loads(val)
|
|
108
|
+
except ValueError:
|
|
109
|
+
return None
|
|
110
|
+
return path, val
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def clean_empties(value: Union[Dict, List]) -> Any:
|
|
114
|
+
if isinstance(value, dict):
|
|
115
|
+
value = {k: v for k, v in ((k, clean_empties(v)) for k, v in value.items()) if v is not None}
|
|
116
|
+
elif isinstance(value, list):
|
|
117
|
+
value = [v for v in (clean_empties(v) for v in value) if v is not None]
|
|
118
|
+
return value
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=None) -> Optional[dict]:
|
|
122
|
+
"""
|
|
123
|
+
Select nested attributes from a complex dictionary based on selector strings.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
data (dict): The input nested dictionary.
|
|
127
|
+
selectors (list): A list of selector strings.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
dict: A new dictionary with the same structure, but only the selected attributes.
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
>>> data = {
|
|
134
|
+
... "person": {
|
|
135
|
+
... "name": "John Doe",
|
|
136
|
+
... "age": 30,
|
|
137
|
+
... "address": {
|
|
138
|
+
... "street": "123 Main St",
|
|
139
|
+
... "city": "Anytown",
|
|
140
|
+
... "country": "USA"
|
|
141
|
+
... },
|
|
142
|
+
... "phones": [
|
|
143
|
+
... {"type": "home", "number": "555-1234"},
|
|
144
|
+
... {"type": "work", "number": "555-5678"}
|
|
145
|
+
... ]
|
|
146
|
+
... },
|
|
147
|
+
... "company": {
|
|
148
|
+
... "name": "Acme Inc",
|
|
149
|
+
... "location": "New York"
|
|
150
|
+
... }
|
|
151
|
+
... }
|
|
152
|
+
>>> select_nested(data, ["person.address.street", "person.address.city"])
|
|
153
|
+
{'person': {'address': {'street': '123 Main St', 'city': 'Anytown'}}}
|
|
154
|
+
>>> select_nested(data, ["person.phones.number", "person.phones.type"])
|
|
155
|
+
{'person': {'phones': [{'type': 'home', 'number': '555-1234'}, {'type': 'work', 'number': '555-5678'}]}}
|
|
156
|
+
>>> select_nested(data, ["person"])
|
|
157
|
+
{'person': {'name': 'John Doe', 'age': 30, 'address': {'street': '123 Main St', 'city': 'Anytown',
|
|
158
|
+
'country': 'USA'}, 'phones': [{'type': 'home', 'number': '555-1234'}, {'type': 'work', 'number': '555-5678'}]}}
|
|
159
|
+
>>> select_nested(data, ["person.phones.type"])
|
|
160
|
+
{'person': {'phones': [{'type': 'home'}, {'type': 'work'}]}}
|
|
161
|
+
"""
|
|
162
|
+
if current_path is None:
|
|
163
|
+
current_path = []
|
|
164
|
+
matching_paths = []
|
|
165
|
+
for path in paths:
|
|
166
|
+
if isinstance(path, str):
|
|
167
|
+
path = path.split(".")
|
|
168
|
+
if path == current_path:
|
|
169
|
+
return data
|
|
170
|
+
if path[: len(current_path)] == current_path:
|
|
171
|
+
matching_paths.append(path)
|
|
172
|
+
if not matching_paths:
|
|
173
|
+
return None
|
|
174
|
+
if isinstance(data, dict):
|
|
175
|
+
new_obj = {k: select_nested(v, matching_paths, current_path + [k]) for k, v in data.items()}
|
|
176
|
+
new_obj = {k: v for k, v in new_obj.items() if v is not None}
|
|
177
|
+
return new_obj
|
|
178
|
+
if isinstance(data, list):
|
|
179
|
+
new_obj = [select_nested(v, matching_paths, current_path + []) for i, v in enumerate(data)]
|
|
180
|
+
new_obj = [v for v in new_obj if v is not None]
|
|
181
|
+
return new_obj
|
|
182
|
+
return data
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from copy import deepcopy
|
|
3
|
-
from typing import Any, Dict, List, Union
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def object_path_update(
|
|
9
|
-
obj: Union[BaseModel, Dict[str, Any]], path: str, value: Any
|
|
10
|
-
) -> Union[BaseModel, Dict[str, Any]]:
|
|
11
|
-
"""
|
|
12
|
-
Updates a nested object based on a path description and a value. The path to the
|
|
13
|
-
desired field is given in dot and bracket notation (e.g., 'a[0].b.c[1]').
|
|
14
|
-
|
|
15
|
-
:param obj: The dictionary object to be updated.
|
|
16
|
-
:type obj: Dict[str, Any]
|
|
17
|
-
:param path: The path string indicating where to place the value within the object.
|
|
18
|
-
:type path: str
|
|
19
|
-
:param value: The value to be set at the specified path.
|
|
20
|
-
:type value: Any
|
|
21
|
-
:return: None. This function modifies the object in-place.
|
|
22
|
-
:rtype: None
|
|
23
|
-
|
|
24
|
-
**Example**::
|
|
25
|
-
|
|
26
|
-
>>> data = {}
|
|
27
|
-
>>> object_path_update(data, 'persons[0].foo.bar', 1)
|
|
28
|
-
{'persons': [{'foo': {'bar': 1}}]}
|
|
29
|
-
"""
|
|
30
|
-
if isinstance(obj, BaseModel):
|
|
31
|
-
typ = type(obj)
|
|
32
|
-
obj = obj.model_dump(exclude_none=True)
|
|
33
|
-
obj = object_path_update(obj, path, value)
|
|
34
|
-
return typ(**obj)
|
|
35
|
-
obj = deepcopy(obj)
|
|
36
|
-
ret_obj = obj
|
|
37
|
-
parts = path.split(".")
|
|
38
|
-
for part in parts[:-1]:
|
|
39
|
-
if "[" in part:
|
|
40
|
-
key, index = part[:-1].split("[")
|
|
41
|
-
index = int(index)
|
|
42
|
-
# obj = obj.setdefault(key, [{} for _ in range(index+1)])
|
|
43
|
-
obj = obj.setdefault(key, [])
|
|
44
|
-
while len(obj) <= index:
|
|
45
|
-
obj.append({})
|
|
46
|
-
obj = obj[index]
|
|
47
|
-
else:
|
|
48
|
-
if part in obj and obj[part] is None:
|
|
49
|
-
del obj[part]
|
|
50
|
-
obj = obj.setdefault(part, {})
|
|
51
|
-
last_part = parts[-1]
|
|
52
|
-
if "[" in last_part:
|
|
53
|
-
key, index = last_part[:-1].split("[")
|
|
54
|
-
index = int(index)
|
|
55
|
-
if key not in obj or not isinstance(obj[key], list):
|
|
56
|
-
obj[key] = [{} for _ in range(index + 1)]
|
|
57
|
-
obj[key][index] = value
|
|
58
|
-
else:
|
|
59
|
-
obj[last_part] = value
|
|
60
|
-
return ret_obj
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def parse_update_expression(expr: str) -> Union[tuple[str, Any], None]:
|
|
64
|
-
"""
|
|
65
|
-
Parse a string expression of the form 'path.to.field=value' into a path and a value.
|
|
66
|
-
|
|
67
|
-
:param expr:
|
|
68
|
-
:return:
|
|
69
|
-
"""
|
|
70
|
-
try:
|
|
71
|
-
path, val = expr.split("=", 1)
|
|
72
|
-
val = json.loads(val)
|
|
73
|
-
except ValueError:
|
|
74
|
-
return None
|
|
75
|
-
return path, val
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def clean_empties(value: Union[Dict, List]) -> Any:
|
|
79
|
-
if isinstance(value, dict):
|
|
80
|
-
value = {k: v for k, v in ((k, clean_empties(v)) for k, v in value.items()) if v is not None}
|
|
81
|
-
elif isinstance(value, list):
|
|
82
|
-
value = [v for v in (clean_empties(v) for v in value) if v is not None]
|
|
83
|
-
return value
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/chromadb/chromadb_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/duckdb_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/filesystem/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/hdf5_collection.py
RENAMED
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/hdf5_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/mongodb_collection.py
RENAMED
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/mongodb_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/neo4j_collection.py
RENAMED
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/neo4j_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_collection.py
RENAMED
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/implementations/__init__.py
RENAMED
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/implementations/llm_indexer.py
RENAMED
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/index/implementations/simple_indexer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/implementations/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/inference/inference_engine_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/collection_details.html.j2
RENAMED
|
File without changes
|
{linkml_store-0.1.14 → linkml_store-0.2.0}/src/linkml_store/webapi/html/database_details.html.j2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|