linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,209 @@
1
+ import logging
2
+ import random
3
+ from abc import ABC
4
+ from dataclasses import dataclass
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Any, Optional, TextIO, Tuple, Union
8
+
9
+ import pandas as pd
10
+ from pydantic import BaseModel, ConfigDict
11
+
12
+ from linkml_store.api.collection import OBJECT, Collection
13
+ from linkml_store.inference.inference_config import Inference, InferenceConfig
14
+ from linkml_store.utils.pandas_utils import nested_objects_to_dataframe
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class ModelSerialization(str, Enum):
20
+ """
21
+ Enum for model serialization types.
22
+ """
23
+
24
+ PICKLE = "pickle"
25
+ ONNX = "onnx"
26
+ PMML = "pmml"
27
+ PFA = "pfa"
28
+ JOBLIB = "joblib"
29
+ PNG = "png"
30
+ LINKML_EXPRESSION = "linkml_expression"
31
+ RULE_BASED = "rulebased"
32
+ RAG_INDEX = "rag_index"
33
+
34
+ @classmethod
35
+ def from_filepath(cls, file_path: str) -> Optional["ModelSerialization"]:
36
+ """
37
+ Get the serialization type from the file path.
38
+
39
+ >>> ModelSerialization.from_filepath("model.onnx")
40
+ <ModelSerialization.ONNX: 'onnx'>
41
+ >>> ModelSerialization.from_filepath("model.pkl")
42
+ <ModelSerialization.PICKLE: 'pickle'>
43
+ >>> assert ModelSerialization.from_filepath("poor_file_name") is None
44
+
45
+ :param file_path:
46
+ :return:
47
+ """
48
+ toks = file_path.split(".")
49
+ suffix = toks[-1]
50
+ if len(toks) > 2:
51
+ if suffix == "yaml" and toks[-2] == "rulebased":
52
+ return cls.RULE_BASED
53
+ # Generate mapping dynamically
54
+ extension_mapping = {v.lower(): v for v in cls}
55
+ # Add special cases
56
+ extension_mapping["pkl"] = cls.PICKLE
57
+ extension_mapping["py"] = cls.LINKML_EXPRESSION
58
+ return extension_mapping.get(suffix, None)
59
+
60
+
61
+ class CollectionSlice(BaseModel):
62
+ model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")
63
+
64
+ name: Optional[str] = None
65
+ base_collection: Optional[Collection] = None
66
+ # _dataframe: Optional[pd.DataFrame] = None
67
+ # slice: Tuple[Optional[int], Optional[int]] = Field(default=(None, None))
68
+ indices: Optional[Tuple[int, ...]] = None
69
+ _collection: Optional[Collection] = None
70
+ where: Any = None
71
+
72
+ @property
73
+ def collection(self) -> Collection:
74
+ if not self._collection and not self.indices:
75
+ return self.base_collection
76
+ if not self._collection:
77
+ rows = self.base_collection.rows
78
+ subset = [rows[i] for i in self.indices]
79
+ db = self.base_collection.parent
80
+ subset_name = self.slice_alias
81
+ subset_collection = db.get_collection(subset_name, create_if_not_exists=True)
82
+ # ensure the collection has the same schema type as the base collection;
83
+ # this ensures that column/attribute types are preserved
84
+ subset_collection.metadata.type = self.base_collection.target_class_name
85
+ subset_collection.delete_where({})
86
+ subset_collection.insert(subset)
87
+ self._collection = subset_collection
88
+ return self._collection
89
+
90
+ @property
91
+ def slice_alias(self) -> str:
92
+ return f"{self.base_collection.alias}__rag_{self.name}"
93
+
94
+ def as_dataframe(self, flattened=False) -> pd.DataFrame:
95
+ """
96
+ Return the slice of the collection as a dataframe.
97
+
98
+ :param flattened: flattned nested objects to give keys like foo.bar
99
+ :return:
100
+ """
101
+ rs = self.collection.find({}, limit=-1)
102
+ if flattened:
103
+ return nested_objects_to_dataframe(rs.rows)
104
+ else:
105
+ return rs.rows_dataframe
106
+
107
+
108
+ @dataclass
109
+ class InferenceEngine(ABC):
110
+ """
111
+ Base class for all inference engine.
112
+
113
+ An InferenceEngine is capable of deriving inferences from input objects and a collection.
114
+ """
115
+
116
+ predictor_type: Optional[str] = None
117
+ config: Optional[InferenceConfig] = None
118
+
119
+ training_data: Optional[CollectionSlice] = None
120
+ testing_data: Optional[CollectionSlice] = None
121
+
122
+ def load_and_split_data(self, collection: Collection, split: Optional[Tuple[float, float]] = None, randomize=True):
123
+ """
124
+ Load the data and split it into training and testing sets.
125
+
126
+ :param collection:
127
+ :param split: Tuple of training and testing split ratios.
128
+ :param randomize:
129
+ :return:
130
+ """
131
+ local_random = random.Random(self.config.random_seed) if self.config.random_seed else random.Random()
132
+ split = split or self.config.train_test_split
133
+ if not split:
134
+ split = (0.7, 0.3)
135
+ if split[0] == 1.0:
136
+ self.training_data = CollectionSlice(name="train", base_collection=collection, indices=None)
137
+ self.testing_data = None
138
+ return
139
+ logger.info(f"Loading and splitting data {split} from collection {collection.alias}")
140
+ size = collection.size()
141
+ indices = range(size)
142
+ if randomize:
143
+ train_indices = local_random.sample(indices, int(size * split[0]))
144
+ test_indices = set(indices) - set(train_indices)
145
+ else:
146
+ train_indices = indices[: int(size * split[0])]
147
+ test_indices = indices[int(size * split[0]) :]
148
+ self.training_data = CollectionSlice(name="train", base_collection=collection, indices=train_indices)
149
+ self.testing_data = CollectionSlice(name="test", base_collection=collection, indices=test_indices)
150
+
151
+ def initialize_model(self, **kwargs):
152
+ """
153
+ Initialize the model.
154
+
155
+ :param kwargs:
156
+ :return:
157
+ """
158
+ raise NotImplementedError("Initialize model method must be implemented by subclass")
159
+
160
+ def export_model(
161
+ self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
162
+ ):
163
+ """
164
+ Export the model to the given output.
165
+
166
+ :param model_serialization:
167
+ :param output:
168
+ :param kwargs:
169
+ :return:
170
+ """
171
+ raise NotImplementedError("Export model method must be implemented by subclass")
172
+
173
+ def import_model_from(self, inference_engine: "InferenceEngine", **kwargs):
174
+ """
175
+ Import the model from the given inference engine.
176
+
177
+ :param inference_engine:
178
+ :param kwargs:
179
+ :return:
180
+ """
181
+ raise NotImplementedError("Import model method must be implemented by subclass")
182
+
183
+ def save_model(self, output: Union[str, Path]) -> None:
184
+ """
185
+ Save the model to the given output.
186
+
187
+ :param output:
188
+ :return:
189
+ """
190
+ raise NotImplementedError("Save model method must be implemented by subclass")
191
+
192
+ @classmethod
193
+ def load_model(cls, file_path: Union[str, Path]) -> "InferenceEngine":
194
+ """
195
+ Load the model from the given file path.
196
+
197
+ :param file_path:
198
+ :return:
199
+ """
200
+ raise NotImplementedError("Load model method must be implemented by subclass")
201
+
202
+ def derive(self, object: OBJECT) -> Optional[Inference]:
203
+ """
204
+ Derive the prediction for the given object.
205
+
206
+ :param object:
207
+ :return:
208
+ """
209
+ raise NotImplementedError("Predict method must be implemented by subclass")
@@ -0,0 +1,74 @@
1
+ import importlib
2
+ import inspect
3
+ import os
4
+ from typing import Dict, Type
5
+
6
+ from linkml_store.inference.inference_config import InferenceConfig
7
+ from linkml_store.inference.inference_engine import InferenceEngine
8
+ from linkml_store.utils.object_utils import object_path_update
9
+
10
+
11
+ class InferenceEngineRegistry:
12
+ def __init__(self):
13
+ self.engines: Dict[str, Type[InferenceEngine]] = {}
14
+
15
+ def register(self, name: str, engine_class: Type[InferenceEngine]):
16
+ self.engines[name] = engine_class
17
+
18
+ def get_engine_class(self, name: str) -> Type[InferenceEngine]:
19
+ if name not in self.engines:
20
+ raise ValueError(f"Unknown inference engine type: {name}" f"Known engines: {list(self.engines.keys())}")
21
+ return self.engines[name]
22
+
23
+ def create_engine(self, engine_type: str, config: InferenceConfig = None, **kwargs) -> InferenceEngine:
24
+ kwargs = {k: v for k, v in kwargs.items() if v is not None}
25
+ if ":" in engine_type:
26
+ engine_type, conf_args = engine_type.split(":", 1)
27
+ if config is None:
28
+ config = InferenceConfig()
29
+ for arg in conf_args.split(","):
30
+ k, v = arg.split("=")
31
+ config = object_path_update(config, k, v)
32
+
33
+ engine_class = self.get_engine_class(engine_type)
34
+ kwargs["predictor_type"] = engine_type
35
+ return engine_class(config=config, **kwargs)
36
+
37
+ @classmethod
38
+ def load_engines(cls, package_path: str):
39
+ registry = cls()
40
+ package_dir = os.path.dirname(importlib.import_module(package_path).__file__)
41
+ for filename in os.listdir(package_dir):
42
+ if filename.endswith(".py") and not filename.startswith("__"):
43
+ module_name = f"{package_path}.{filename[:-3]}"
44
+ try:
45
+ module = importlib.import_module(module_name)
46
+ for name, obj in inspect.getmembers(module):
47
+ if inspect.isclass(obj) and issubclass(obj, InferenceEngine) and obj != InferenceEngine:
48
+ engine_name = name.lower().replace("inferenceengine", "")
49
+ registry.register(engine_name, obj)
50
+ except ImportError as e:
51
+ print(f"Error importing {module_name}: {e}")
52
+ return registry
53
+
54
+
55
+ # Initialize the registry
56
+ registry = InferenceEngineRegistry.load_engines("linkml_store.inference.implementations")
57
+
58
+
59
+ # Function to get an inference engine (can be used as before)
60
+ def get_inference_engine(engine_type: str, config: InferenceConfig = None, **kwargs) -> InferenceEngine:
61
+ """
62
+ Get an inference engine.
63
+
64
+ >>> from linkml_store.inference import get_inference_engine
65
+ >>> ie = get_inference_engine('sklearn')
66
+ >>> type(ie)
67
+ <class 'linkml_store.inference.implementations.sklearn_inference_engine.SklearnInferenceEngine'>
68
+
69
+ :param engine_type:
70
+ :param config:
71
+ :param kwargs:
72
+ :return:
73
+ """
74
+ return registry.create_engine(engine_type, config, **kwargs)
@@ -0,0 +1,5 @@
1
+ """
2
+ Visualization and plotting functions for LinkML data.
3
+ """
4
+
5
+ __version__ = "0.1.0"