linkml-store 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml_store/__init__.py +7 -0
- linkml_store/api/__init__.py +8 -0
- linkml_store/api/client.py +414 -0
- linkml_store/api/collection.py +1280 -0
- linkml_store/api/config.py +187 -0
- linkml_store/api/database.py +862 -0
- linkml_store/api/queries.py +69 -0
- linkml_store/api/stores/__init__.py +0 -0
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
- linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
- linkml_store/api/stores/dremio/__init__.py +10 -0
- linkml_store/api/stores/dremio/dremio_collection.py +555 -0
- linkml_store/api/stores/dremio/dremio_database.py +1052 -0
- linkml_store/api/stores/dremio/mappings.py +105 -0
- linkml_store/api/stores/dremio_rest/__init__.py +11 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
- linkml_store/api/stores/duckdb/mappings.py +8 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
- linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
- linkml_store/api/stores/ibis/__init__.py +5 -0
- linkml_store/api/stores/ibis/ibis_collection.py +488 -0
- linkml_store/api/stores/ibis/ibis_database.py +328 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
- linkml_store/api/stores/neo4j/__init__.py +0 -0
- linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +224 -0
- linkml_store/api/stores/solr/solr_database.py +83 -0
- linkml_store/api/stores/solr/solr_utils.py +0 -0
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +1147 -0
- linkml_store/constants.py +7 -0
- linkml_store/graphs/__init__.py +0 -0
- linkml_store/graphs/graph_map.py +24 -0
- linkml_store/index/__init__.py +53 -0
- linkml_store/index/implementations/__init__.py +0 -0
- linkml_store/index/implementations/llm_indexer.py +174 -0
- linkml_store/index/implementations/simple_indexer.py +43 -0
- linkml_store/index/indexer.py +211 -0
- linkml_store/inference/__init__.py +13 -0
- linkml_store/inference/evaluation.py +195 -0
- linkml_store/inference/implementations/__init__.py +0 -0
- linkml_store/inference/implementations/llm_inference_engine.py +154 -0
- linkml_store/inference/implementations/rag_inference_engine.py +276 -0
- linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
- linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
- linkml_store/inference/inference_config.py +66 -0
- linkml_store/inference/inference_engine.py +209 -0
- linkml_store/inference/inference_engine_registry.py +74 -0
- linkml_store/plotting/__init__.py +5 -0
- linkml_store/plotting/cli.py +826 -0
- linkml_store/plotting/dimensionality_reduction.py +453 -0
- linkml_store/plotting/embedding_plot.py +489 -0
- linkml_store/plotting/facet_chart.py +73 -0
- linkml_store/plotting/heatmap.py +383 -0
- linkml_store/utils/__init__.py +0 -0
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/dat_parser.py +95 -0
- linkml_store/utils/embedding_matcher.py +424 -0
- linkml_store/utils/embedding_utils.py +299 -0
- linkml_store/utils/enrichment_analyzer.py +217 -0
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +550 -0
- linkml_store/utils/io.py +38 -0
- linkml_store/utils/llm_utils.py +122 -0
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/neo4j_utils.py +42 -0
- linkml_store/utils/object_utils.py +190 -0
- linkml_store/utils/pandas_utils.py +93 -0
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store/utils/sql_utils.py +177 -0
- linkml_store/utils/stats_utils.py +53 -0
- linkml_store/utils/vector_utils.py +158 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +43 -0
- linkml_store/webapi/main.py +855 -0
- linkml_store-0.3.0.dist-info/METADATA +226 -0
- linkml_store-0.3.0.dist-info/RECORD +101 -0
- linkml_store-0.3.0.dist-info/WHEEL +4 -0
- linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
- linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import random
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Optional, TextIO, Tuple, Union
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from pydantic import BaseModel, ConfigDict
|
|
11
|
+
|
|
12
|
+
from linkml_store.api.collection import OBJECT, Collection
|
|
13
|
+
from linkml_store.inference.inference_config import Inference, InferenceConfig
|
|
14
|
+
from linkml_store.utils.pandas_utils import nested_objects_to_dataframe
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ModelSerialization(str, Enum):
|
|
20
|
+
"""
|
|
21
|
+
Enum for model serialization types.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
PICKLE = "pickle"
|
|
25
|
+
ONNX = "onnx"
|
|
26
|
+
PMML = "pmml"
|
|
27
|
+
PFA = "pfa"
|
|
28
|
+
JOBLIB = "joblib"
|
|
29
|
+
PNG = "png"
|
|
30
|
+
LINKML_EXPRESSION = "linkml_expression"
|
|
31
|
+
RULE_BASED = "rulebased"
|
|
32
|
+
RAG_INDEX = "rag_index"
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def from_filepath(cls, file_path: str) -> Optional["ModelSerialization"]:
|
|
36
|
+
"""
|
|
37
|
+
Get the serialization type from the file path.
|
|
38
|
+
|
|
39
|
+
>>> ModelSerialization.from_filepath("model.onnx")
|
|
40
|
+
<ModelSerialization.ONNX: 'onnx'>
|
|
41
|
+
>>> ModelSerialization.from_filepath("model.pkl")
|
|
42
|
+
<ModelSerialization.PICKLE: 'pickle'>
|
|
43
|
+
>>> assert ModelSerialization.from_filepath("poor_file_name") is None
|
|
44
|
+
|
|
45
|
+
:param file_path:
|
|
46
|
+
:return:
|
|
47
|
+
"""
|
|
48
|
+
toks = file_path.split(".")
|
|
49
|
+
suffix = toks[-1]
|
|
50
|
+
if len(toks) > 2:
|
|
51
|
+
if suffix == "yaml" and toks[-2] == "rulebased":
|
|
52
|
+
return cls.RULE_BASED
|
|
53
|
+
# Generate mapping dynamically
|
|
54
|
+
extension_mapping = {v.lower(): v for v in cls}
|
|
55
|
+
# Add special cases
|
|
56
|
+
extension_mapping["pkl"] = cls.PICKLE
|
|
57
|
+
extension_mapping["py"] = cls.LINKML_EXPRESSION
|
|
58
|
+
return extension_mapping.get(suffix, None)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class CollectionSlice(BaseModel):
|
|
62
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")
|
|
63
|
+
|
|
64
|
+
name: Optional[str] = None
|
|
65
|
+
base_collection: Optional[Collection] = None
|
|
66
|
+
# _dataframe: Optional[pd.DataFrame] = None
|
|
67
|
+
# slice: Tuple[Optional[int], Optional[int]] = Field(default=(None, None))
|
|
68
|
+
indices: Optional[Tuple[int, ...]] = None
|
|
69
|
+
_collection: Optional[Collection] = None
|
|
70
|
+
where: Any = None
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def collection(self) -> Collection:
|
|
74
|
+
if not self._collection and not self.indices:
|
|
75
|
+
return self.base_collection
|
|
76
|
+
if not self._collection:
|
|
77
|
+
rows = self.base_collection.rows
|
|
78
|
+
subset = [rows[i] for i in self.indices]
|
|
79
|
+
db = self.base_collection.parent
|
|
80
|
+
subset_name = self.slice_alias
|
|
81
|
+
subset_collection = db.get_collection(subset_name, create_if_not_exists=True)
|
|
82
|
+
# ensure the collection has the same schema type as the base collection;
|
|
83
|
+
# this ensures that column/attribute types are preserved
|
|
84
|
+
subset_collection.metadata.type = self.base_collection.target_class_name
|
|
85
|
+
subset_collection.delete_where({})
|
|
86
|
+
subset_collection.insert(subset)
|
|
87
|
+
self._collection = subset_collection
|
|
88
|
+
return self._collection
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def slice_alias(self) -> str:
|
|
92
|
+
return f"{self.base_collection.alias}__rag_{self.name}"
|
|
93
|
+
|
|
94
|
+
def as_dataframe(self, flattened=False) -> pd.DataFrame:
|
|
95
|
+
"""
|
|
96
|
+
Return the slice of the collection as a dataframe.
|
|
97
|
+
|
|
98
|
+
:param flattened: flattned nested objects to give keys like foo.bar
|
|
99
|
+
:return:
|
|
100
|
+
"""
|
|
101
|
+
rs = self.collection.find({}, limit=-1)
|
|
102
|
+
if flattened:
|
|
103
|
+
return nested_objects_to_dataframe(rs.rows)
|
|
104
|
+
else:
|
|
105
|
+
return rs.rows_dataframe
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class InferenceEngine(ABC):
|
|
110
|
+
"""
|
|
111
|
+
Base class for all inference engine.
|
|
112
|
+
|
|
113
|
+
An InferenceEngine is capable of deriving inferences from input objects and a collection.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
predictor_type: Optional[str] = None
|
|
117
|
+
config: Optional[InferenceConfig] = None
|
|
118
|
+
|
|
119
|
+
training_data: Optional[CollectionSlice] = None
|
|
120
|
+
testing_data: Optional[CollectionSlice] = None
|
|
121
|
+
|
|
122
|
+
def load_and_split_data(self, collection: Collection, split: Optional[Tuple[float, float]] = None, randomize=True):
|
|
123
|
+
"""
|
|
124
|
+
Load the data and split it into training and testing sets.
|
|
125
|
+
|
|
126
|
+
:param collection:
|
|
127
|
+
:param split: Tuple of training and testing split ratios.
|
|
128
|
+
:param randomize:
|
|
129
|
+
:return:
|
|
130
|
+
"""
|
|
131
|
+
local_random = random.Random(self.config.random_seed) if self.config.random_seed else random.Random()
|
|
132
|
+
split = split or self.config.train_test_split
|
|
133
|
+
if not split:
|
|
134
|
+
split = (0.7, 0.3)
|
|
135
|
+
if split[0] == 1.0:
|
|
136
|
+
self.training_data = CollectionSlice(name="train", base_collection=collection, indices=None)
|
|
137
|
+
self.testing_data = None
|
|
138
|
+
return
|
|
139
|
+
logger.info(f"Loading and splitting data {split} from collection {collection.alias}")
|
|
140
|
+
size = collection.size()
|
|
141
|
+
indices = range(size)
|
|
142
|
+
if randomize:
|
|
143
|
+
train_indices = local_random.sample(indices, int(size * split[0]))
|
|
144
|
+
test_indices = set(indices) - set(train_indices)
|
|
145
|
+
else:
|
|
146
|
+
train_indices = indices[: int(size * split[0])]
|
|
147
|
+
test_indices = indices[int(size * split[0]) :]
|
|
148
|
+
self.training_data = CollectionSlice(name="train", base_collection=collection, indices=train_indices)
|
|
149
|
+
self.testing_data = CollectionSlice(name="test", base_collection=collection, indices=test_indices)
|
|
150
|
+
|
|
151
|
+
def initialize_model(self, **kwargs):
|
|
152
|
+
"""
|
|
153
|
+
Initialize the model.
|
|
154
|
+
|
|
155
|
+
:param kwargs:
|
|
156
|
+
:return:
|
|
157
|
+
"""
|
|
158
|
+
raise NotImplementedError("Initialize model method must be implemented by subclass")
|
|
159
|
+
|
|
160
|
+
def export_model(
|
|
161
|
+
self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
|
|
162
|
+
):
|
|
163
|
+
"""
|
|
164
|
+
Export the model to the given output.
|
|
165
|
+
|
|
166
|
+
:param model_serialization:
|
|
167
|
+
:param output:
|
|
168
|
+
:param kwargs:
|
|
169
|
+
:return:
|
|
170
|
+
"""
|
|
171
|
+
raise NotImplementedError("Export model method must be implemented by subclass")
|
|
172
|
+
|
|
173
|
+
def import_model_from(self, inference_engine: "InferenceEngine", **kwargs):
|
|
174
|
+
"""
|
|
175
|
+
Import the model from the given inference engine.
|
|
176
|
+
|
|
177
|
+
:param inference_engine:
|
|
178
|
+
:param kwargs:
|
|
179
|
+
:return:
|
|
180
|
+
"""
|
|
181
|
+
raise NotImplementedError("Import model method must be implemented by subclass")
|
|
182
|
+
|
|
183
|
+
def save_model(self, output: Union[str, Path]) -> None:
|
|
184
|
+
"""
|
|
185
|
+
Save the model to the given output.
|
|
186
|
+
|
|
187
|
+
:param output:
|
|
188
|
+
:return:
|
|
189
|
+
"""
|
|
190
|
+
raise NotImplementedError("Save model method must be implemented by subclass")
|
|
191
|
+
|
|
192
|
+
@classmethod
|
|
193
|
+
def load_model(cls, file_path: Union[str, Path]) -> "InferenceEngine":
|
|
194
|
+
"""
|
|
195
|
+
Load the model from the given file path.
|
|
196
|
+
|
|
197
|
+
:param file_path:
|
|
198
|
+
:return:
|
|
199
|
+
"""
|
|
200
|
+
raise NotImplementedError("Load model method must be implemented by subclass")
|
|
201
|
+
|
|
202
|
+
def derive(self, object: OBJECT) -> Optional[Inference]:
|
|
203
|
+
"""
|
|
204
|
+
Derive the prediction for the given object.
|
|
205
|
+
|
|
206
|
+
:param object:
|
|
207
|
+
:return:
|
|
208
|
+
"""
|
|
209
|
+
raise NotImplementedError("Predict method must be implemented by subclass")
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import inspect
|
|
3
|
+
import os
|
|
4
|
+
from typing import Dict, Type
|
|
5
|
+
|
|
6
|
+
from linkml_store.inference.inference_config import InferenceConfig
|
|
7
|
+
from linkml_store.inference.inference_engine import InferenceEngine
|
|
8
|
+
from linkml_store.utils.object_utils import object_path_update
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InferenceEngineRegistry:
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self.engines: Dict[str, Type[InferenceEngine]] = {}
|
|
14
|
+
|
|
15
|
+
def register(self, name: str, engine_class: Type[InferenceEngine]):
|
|
16
|
+
self.engines[name] = engine_class
|
|
17
|
+
|
|
18
|
+
def get_engine_class(self, name: str) -> Type[InferenceEngine]:
|
|
19
|
+
if name not in self.engines:
|
|
20
|
+
raise ValueError(f"Unknown inference engine type: {name}" f"Known engines: {list(self.engines.keys())}")
|
|
21
|
+
return self.engines[name]
|
|
22
|
+
|
|
23
|
+
def create_engine(self, engine_type: str, config: InferenceConfig = None, **kwargs) -> InferenceEngine:
|
|
24
|
+
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
25
|
+
if ":" in engine_type:
|
|
26
|
+
engine_type, conf_args = engine_type.split(":", 1)
|
|
27
|
+
if config is None:
|
|
28
|
+
config = InferenceConfig()
|
|
29
|
+
for arg in conf_args.split(","):
|
|
30
|
+
k, v = arg.split("=")
|
|
31
|
+
config = object_path_update(config, k, v)
|
|
32
|
+
|
|
33
|
+
engine_class = self.get_engine_class(engine_type)
|
|
34
|
+
kwargs["predictor_type"] = engine_type
|
|
35
|
+
return engine_class(config=config, **kwargs)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def load_engines(cls, package_path: str):
|
|
39
|
+
registry = cls()
|
|
40
|
+
package_dir = os.path.dirname(importlib.import_module(package_path).__file__)
|
|
41
|
+
for filename in os.listdir(package_dir):
|
|
42
|
+
if filename.endswith(".py") and not filename.startswith("__"):
|
|
43
|
+
module_name = f"{package_path}.{filename[:-3]}"
|
|
44
|
+
try:
|
|
45
|
+
module = importlib.import_module(module_name)
|
|
46
|
+
for name, obj in inspect.getmembers(module):
|
|
47
|
+
if inspect.isclass(obj) and issubclass(obj, InferenceEngine) and obj != InferenceEngine:
|
|
48
|
+
engine_name = name.lower().replace("inferenceengine", "")
|
|
49
|
+
registry.register(engine_name, obj)
|
|
50
|
+
except ImportError as e:
|
|
51
|
+
print(f"Error importing {module_name}: {e}")
|
|
52
|
+
return registry
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Initialize the registry
|
|
56
|
+
registry = InferenceEngineRegistry.load_engines("linkml_store.inference.implementations")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# Function to get an inference engine (can be used as before)
|
|
60
|
+
def get_inference_engine(engine_type: str, config: InferenceConfig = None, **kwargs) -> InferenceEngine:
|
|
61
|
+
"""
|
|
62
|
+
Get an inference engine.
|
|
63
|
+
|
|
64
|
+
>>> from linkml_store.inference import get_inference_engine
|
|
65
|
+
>>> ie = get_inference_engine('sklearn')
|
|
66
|
+
>>> type(ie)
|
|
67
|
+
<class 'linkml_store.inference.implementations.sklearn_inference_engine.SklearnInferenceEngine'>
|
|
68
|
+
|
|
69
|
+
:param engine_type:
|
|
70
|
+
:param config:
|
|
71
|
+
:param kwargs:
|
|
72
|
+
:return:
|
|
73
|
+
"""
|
|
74
|
+
return registry.create_engine(engine_type, config, **kwargs)
|