linkml-store 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.2.4 → linkml_store-0.2.5}/PKG-INFO +7 -9
- {linkml_store-0.2.4 → linkml_store-0.2.5}/pyproject.toml +8 -7
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/client.py +15 -2
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/database.py +2 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/cli.py +22 -4
- linkml_store-0.2.5/src/linkml_store/inference/implementations/llm_inference_engine.py +152 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/implementations/rag_inference_engine.py +20 -9
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/inference_engine.py +2 -2
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/llm_utils.py +15 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/object_utils.py +3 -1
- {linkml_store-0.2.4 → linkml_store-0.2.5}/LICENSE +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/README.md +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/config.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/duckdb/duckdb_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/neo4j/neo4j_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/graphs/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/graphs/graph_map.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/indexer.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/evaluation.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/implementations/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/inference_config.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/inference_engine_registry.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/format_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/neo4j_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/pandas_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/sklearn_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/sql_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/stats_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/utils/vector_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
7
7
|
Author-email: author@org.org
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -20,7 +19,6 @@ Provides-Extra: bigquery
|
|
|
20
19
|
Provides-Extra: fastapi
|
|
21
20
|
Provides-Extra: frictionless
|
|
22
21
|
Provides-Extra: h5py
|
|
23
|
-
Provides-Extra: ibis
|
|
24
22
|
Provides-Extra: llm
|
|
25
23
|
Provides-Extra: map
|
|
26
24
|
Provides-Extra: mongodb
|
|
@@ -36,20 +34,19 @@ Requires-Dist: duckdb (>=0.10.1)
|
|
|
36
34
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
37
35
|
Requires-Dist: fastapi ; extra == "fastapi"
|
|
38
36
|
Requires-Dist: frictionless ; extra == "frictionless"
|
|
39
|
-
Requires-Dist: gcsfs
|
|
37
|
+
Requires-Dist: gcsfs
|
|
40
38
|
Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
|
|
41
39
|
Requires-Dist: h5py ; extra == "h5py"
|
|
42
|
-
Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
|
|
43
40
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
44
41
|
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
45
|
-
Requires-Dist: jsonpatch (>=1.33
|
|
42
|
+
Requires-Dist: jsonpatch (>=1.33)
|
|
46
43
|
Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
|
|
47
44
|
Requires-Dist: linkml-runtime (>=1.8.0)
|
|
48
45
|
Requires-Dist: linkml_map ; extra == "map"
|
|
49
46
|
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
50
47
|
Requires-Dist: llm ; extra == "llm" or extra == "all"
|
|
51
48
|
Requires-Dist: matplotlib ; extra == "analytics"
|
|
52
|
-
Requires-Dist: multipledispatch
|
|
49
|
+
Requires-Dist: multipledispatch
|
|
53
50
|
Requires-Dist: neo4j ; extra == "neo4j" or extra == "all"
|
|
54
51
|
Requires-Dist: networkx ; extra == "neo4j"
|
|
55
52
|
Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
|
|
@@ -59,6 +56,7 @@ Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
|
59
56
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
60
57
|
Requires-Dist: pymongo ; extra == "mongodb"
|
|
61
58
|
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
59
|
+
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
62
60
|
Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
|
|
63
61
|
Requires-Dist: scikit-learn ; extra == "scipy"
|
|
64
62
|
Requires-Dist: scipy ; extra == "scipy"
|
|
@@ -68,7 +66,7 @@ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
|
68
66
|
Requires-Dist: tabulate
|
|
69
67
|
Requires-Dist: tiktoken ; extra == "llm"
|
|
70
68
|
Requires-Dist: uvicorn ; extra == "fastapi"
|
|
71
|
-
Requires-Dist: xmltodict (>=0.13.0
|
|
69
|
+
Requires-Dist: xmltodict (>=0.13.0)
|
|
72
70
|
Description-Content-Type: text/markdown
|
|
73
71
|
|
|
74
72
|
# linkml-store
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.5"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
|
|
9
9
|
[tool.poetry.dependencies]
|
|
10
|
-
python = "^3.
|
|
10
|
+
python = "^3.10"
|
|
11
11
|
click = "*"
|
|
12
12
|
pydantic = "^2.0.0"
|
|
13
13
|
linkml-runtime = ">=1.8.0"
|
|
@@ -37,7 +37,7 @@ linkml = { version=">=1.8.0", optional = true }
|
|
|
37
37
|
linkml_map = { version="*", optional = true }
|
|
38
38
|
linkml_renderer = { version="*", optional = true }
|
|
39
39
|
frictionless = { version="*", optional = true }
|
|
40
|
-
ibis-framework = { version=">=9.3.0", extras = ["duckdb", "examples"], optional = true }
|
|
40
|
+
#ibis-framework = { version=">=9.3.0", extras = ["duckdb", "examples"], optional = true }
|
|
41
41
|
gcsfs = { version="*", optional = true }
|
|
42
42
|
multipledispatch = { version="*" }
|
|
43
43
|
tabulate = "*"
|
|
@@ -46,8 +46,9 @@ jinja2 = "^3.1.4"
|
|
|
46
46
|
jsonlines = "^4.0.0"
|
|
47
47
|
fastapi = { version="*", optional = true }
|
|
48
48
|
uvicorn = { version="*", optional = true }
|
|
49
|
-
xmltodict = "
|
|
50
|
-
jsonpatch = "
|
|
49
|
+
xmltodict = ">=0.13.0"
|
|
50
|
+
jsonpatch = ">=1.33"
|
|
51
|
+
python-dotenv = "^1.0.1"
|
|
51
52
|
|
|
52
53
|
[tool.poetry.group.dev.dependencies]
|
|
53
54
|
pytest = {version = ">=7.1.2"}
|
|
@@ -90,9 +91,9 @@ renderer = ["linkml_renderer"]
|
|
|
90
91
|
fastapi = ["fastapi", "uvicorn"]
|
|
91
92
|
frictionless = ["frictionless"]
|
|
92
93
|
scipy = ["scipy", "scikit-learn"]
|
|
93
|
-
ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
|
|
94
|
+
#ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
|
|
94
95
|
bigquery = ["google-cloud-bigquery"]
|
|
95
|
-
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "
|
|
96
|
+
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]
|
|
96
97
|
|
|
97
98
|
[tool.poetry.scripts]
|
|
98
99
|
linkml-store = "linkml_store.cli:cli"
|
|
@@ -22,6 +22,10 @@ HANDLE_MAP = {
|
|
|
22
22
|
"file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
SUFFIX_MAP = {
|
|
26
|
+
"ddb": "duckdb:///{path}",
|
|
27
|
+
}
|
|
28
|
+
|
|
25
29
|
|
|
26
30
|
class Client:
|
|
27
31
|
"""
|
|
@@ -197,6 +201,12 @@ class Client:
|
|
|
197
201
|
:param kwargs:
|
|
198
202
|
:return:
|
|
199
203
|
"""
|
|
204
|
+
if ":" not in handle:
|
|
205
|
+
if alias is None:
|
|
206
|
+
alias = handle
|
|
207
|
+
suffix = handle.split(".")[-1]
|
|
208
|
+
if suffix in SUFFIX_MAP:
|
|
209
|
+
handle = SUFFIX_MAP[suffix].format(path=handle)
|
|
200
210
|
if ":" not in handle:
|
|
201
211
|
scheme = handle
|
|
202
212
|
handle = None
|
|
@@ -220,7 +230,9 @@ class Client:
|
|
|
220
230
|
if not alias:
|
|
221
231
|
alias = handle
|
|
222
232
|
if not self._databases:
|
|
233
|
+
logger.info("Initializing databases")
|
|
223
234
|
self._databases = {}
|
|
235
|
+
logger.info(f"Attaching {alias}")
|
|
224
236
|
self._databases[alias] = db
|
|
225
237
|
db.parent = self
|
|
226
238
|
if db.alias:
|
|
@@ -263,8 +275,9 @@ class Client:
|
|
|
263
275
|
self._databases[name] = db
|
|
264
276
|
if name not in self._databases:
|
|
265
277
|
if create_if_not_exists:
|
|
266
|
-
logger.info(f"Creating database: {name}")
|
|
267
|
-
self.attach_database(name, **kwargs)
|
|
278
|
+
logger.info(f"Creating/attaching database: {name}")
|
|
279
|
+
db = self.attach_database(name, **kwargs)
|
|
280
|
+
name = db.alias
|
|
268
281
|
else:
|
|
269
282
|
raise ValueError(f"Database {name} does not exist")
|
|
270
283
|
db = self._databases[name]
|
|
@@ -470,6 +470,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
470
470
|
if not self._schema_view:
|
|
471
471
|
self._initialize_schema()
|
|
472
472
|
if not self._schema_view:
|
|
473
|
+
logger.info("Inducing schema view")
|
|
473
474
|
self._schema_view = self.induce_schema_view()
|
|
474
475
|
return self._schema_view
|
|
475
476
|
|
|
@@ -505,6 +506,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
505
506
|
if isinstance(schema_view, str):
|
|
506
507
|
schema_view = SchemaView(schema_view)
|
|
507
508
|
self._schema_view = schema_view
|
|
509
|
+
logger.info(f"Setting schema view for {self.handle}")
|
|
508
510
|
# self._schema_view = SchemaView(schema_view.materialize_derived_schema())
|
|
509
511
|
if not self._collections:
|
|
510
512
|
return
|
|
@@ -99,6 +99,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
|
|
|
99
99
|
@click.option("--database", "-d", help="Database name")
|
|
100
100
|
@click.option("--collection", "-c", help="Collection name")
|
|
101
101
|
@click.option("--input", "-i", help="Input file (alternative to database/collection)")
|
|
102
|
+
@click.option("--schema", "-S", help="Path to schema (LinkML yaml)")
|
|
102
103
|
@click.option("--config", "-C", type=click.Path(exists=True), help="Path to the configuration file")
|
|
103
104
|
@click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
|
|
104
105
|
@click.option("-v", "--verbose", count=True)
|
|
@@ -111,7 +112,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
|
|
|
111
112
|
help="If set then show full stacktrace on error",
|
|
112
113
|
)
|
|
113
114
|
@click.pass_context
|
|
114
|
-
def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set, input, **kwargs):
|
|
115
|
+
def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, schema, config, set, input, **kwargs):
|
|
115
116
|
"""A CLI for interacting with the linkml-store."""
|
|
116
117
|
if not stacktrace:
|
|
117
118
|
sys.tracebacklimit = 0
|
|
@@ -158,6 +159,9 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
158
159
|
client = Client().from_config(config, **kwargs) if config else Client()
|
|
159
160
|
settings = ContextSettings(client=client, database_name=database, collection_name=collection)
|
|
160
161
|
ctx.obj["settings"] = settings
|
|
162
|
+
if schema:
|
|
163
|
+
db = settings.database
|
|
164
|
+
db.set_schema_view(schema)
|
|
161
165
|
if settings.database_name:
|
|
162
166
|
db = client.get_database(database)
|
|
163
167
|
if set:
|
|
@@ -534,6 +538,7 @@ def pivot(ctx, where, limit, index, columns, values, output_type, output):
|
|
|
534
538
|
@click.option(
|
|
535
539
|
"--feature-attributes", "-F", type=click.STRING, help="Feature attributes for inference (comma separated)"
|
|
536
540
|
)
|
|
541
|
+
@click.option("--training-collection", type=click.STRING,help="Collection to use for training")
|
|
537
542
|
@click.option("--inference-config-file", "-Y", type=click.Path(), help="Path to inference configuration file")
|
|
538
543
|
@click.option("--export-model", "-E", type=click.Path(), help="Export model to file")
|
|
539
544
|
@click.option("--load-model", "-L", type=click.Path(), help="Load model from file")
|
|
@@ -555,6 +560,7 @@ def infer(
|
|
|
555
560
|
evaluation_count,
|
|
556
561
|
evaluation_match_function,
|
|
557
562
|
training_test_data_split,
|
|
563
|
+
training_collection,
|
|
558
564
|
predictor_type,
|
|
559
565
|
target_attribute,
|
|
560
566
|
feature_attributes,
|
|
@@ -617,6 +623,7 @@ def infer(
|
|
|
617
623
|
if model_format:
|
|
618
624
|
model_format = ModelSerialization(model_format)
|
|
619
625
|
if load_model:
|
|
626
|
+
logger.info(f"Loading predictor from {load_model}")
|
|
620
627
|
predictor = get_inference_engine(predictor_type)
|
|
621
628
|
predictor = type(predictor).load_model(load_model)
|
|
622
629
|
else:
|
|
@@ -627,13 +634,18 @@ def infer(
|
|
|
627
634
|
if training_test_data_split:
|
|
628
635
|
config.train_test_split = training_test_data_split
|
|
629
636
|
predictor = get_inference_engine(predictor_type, config=config)
|
|
630
|
-
|
|
631
|
-
|
|
637
|
+
training_collection_obj = collection
|
|
638
|
+
if training_collection:
|
|
639
|
+
training_collection_obj = ctx.obj["settings"].database.get_collection(training_collection)
|
|
640
|
+
if training_collection_obj:
|
|
641
|
+
logger.info(f"Using collection: {training_collection_obj.alias} for inference")
|
|
642
|
+
split = training_test_data_split or (1.0, 0.0)
|
|
643
|
+
predictor.load_and_split_data(training_collection_obj, split=split)
|
|
632
644
|
predictor.initialize_model()
|
|
633
645
|
if export_model:
|
|
634
646
|
logger.info(f"Exporting model to {export_model} in {model_format}")
|
|
635
647
|
predictor.export_model(export_model, model_format)
|
|
636
|
-
if not query_obj:
|
|
648
|
+
if not query_obj and where_clause is None:
|
|
637
649
|
if not export_model and not evaluation_count:
|
|
638
650
|
raise ValueError("Query or evaluate must be specified if not exporting model")
|
|
639
651
|
if evaluation_count:
|
|
@@ -651,6 +663,12 @@ def infer(
|
|
|
651
663
|
result = predictor.derive(query_obj)
|
|
652
664
|
dumped_obj = result.model_dump(exclude_none=True)
|
|
653
665
|
write_output([dumped_obj], output_type, target=output)
|
|
666
|
+
if where_clause is not None:
|
|
667
|
+
predicted_objs = []
|
|
668
|
+
for query_obj in collection.find(where_clause).rows:
|
|
669
|
+
result = predictor.derive(query_obj)
|
|
670
|
+
predicted_objs.append(result.predicted_object)
|
|
671
|
+
write_output(predicted_objs, output_type, target=output)
|
|
654
672
|
|
|
655
673
|
|
|
656
674
|
@cli.command()
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import ClassVar, List, Optional, TextIO, Union
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
from linkml_store.utils.llm_utils import parse_yaml_payload
|
|
9
|
+
from llm import get_key
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from linkml_store.api.collection import OBJECT, Collection
|
|
13
|
+
from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
|
|
14
|
+
from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
|
|
15
|
+
from linkml_store.utils.object_utils import select_nested
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
MAX_ITERATIONS = 5
|
|
20
|
+
DEFAULT_NUM_EXAMPLES = 20
|
|
21
|
+
|
|
22
|
+
SYSTEM_PROMPT = """
|
|
23
|
+
Your task is to inference the complete YAML
|
|
24
|
+
object output given the YAML object input. I will provide you
|
|
25
|
+
with contextual information, including the schema,
|
|
26
|
+
to help with the inference. You can use the following
|
|
27
|
+
|
|
28
|
+
You should return ONLY valid YAML in your response.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TrainedModel(BaseModel, extra="forbid"):
|
|
33
|
+
index_rows: List[OBJECT]
|
|
34
|
+
config: Optional[InferenceConfig] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LLMInference(Inference):
|
|
38
|
+
iterations: int = 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class LLMInferenceEngine(InferenceEngine):
|
|
43
|
+
"""
|
|
44
|
+
LLM based predictor.
|
|
45
|
+
|
|
46
|
+
Unlike the RAG predictor this performs few-shot inference
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
_model: "llm.Model" = None # noqa: F821
|
|
50
|
+
|
|
51
|
+
PERSIST_COLS: ClassVar[List[str]] = [
|
|
52
|
+
"config",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
def __post_init__(self):
|
|
56
|
+
if not self.config:
|
|
57
|
+
self.config = InferenceConfig()
|
|
58
|
+
if not self.config.llm_config:
|
|
59
|
+
self.config.llm_config = LLMConfig()
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def model(self) -> "llm.Model": # noqa: F821
|
|
63
|
+
import llm
|
|
64
|
+
|
|
65
|
+
if self._model is None:
|
|
66
|
+
self._model = llm.get_model(self.config.llm_config.model_name)
|
|
67
|
+
if self._model.needs_key:
|
|
68
|
+
key = get_key(None, key_alias=self._model.needs_key)
|
|
69
|
+
self._model.key = key
|
|
70
|
+
|
|
71
|
+
return self._model
|
|
72
|
+
|
|
73
|
+
def initialize_model(self, **kwargs):
|
|
74
|
+
logger.info(f"Initializing model {self.model}")
|
|
75
|
+
|
|
76
|
+
def object_to_text(self, object: OBJECT) -> str:
|
|
77
|
+
return yaml.dump(object)
|
|
78
|
+
|
|
79
|
+
def _schema_str(self) -> str:
|
|
80
|
+
db = self.training_data.base_collection.parent
|
|
81
|
+
from linkml_runtime.dumpers import json_dumper
|
|
82
|
+
schema_dict = json_dumper.to_dict(db.schema_view.schema)
|
|
83
|
+
return yaml.dump(schema_dict)
|
|
84
|
+
|
|
85
|
+
def derive(self, object: OBJECT, iteration=0, additional_prompt_texts: Optional[List[str]] = None) -> Optional[LLMInference]:
|
|
86
|
+
import llm
|
|
87
|
+
|
|
88
|
+
model: llm.Model = self.model
|
|
89
|
+
#model_name = self.config.llm_config.model_name
|
|
90
|
+
#feature_attributes = self.config.feature_attributes
|
|
91
|
+
target_attributes = self.config.target_attributes
|
|
92
|
+
query_text = self.object_to_text(object)
|
|
93
|
+
|
|
94
|
+
if not target_attributes:
|
|
95
|
+
target_attributes = [k for k, v in object.items() if v is None or v == ""]
|
|
96
|
+
#if not feature_attributes:
|
|
97
|
+
# feature_attributes = [k for k, v in object.items() if v is not None and v != ""]
|
|
98
|
+
|
|
99
|
+
system_prompt = SYSTEM_PROMPT.format(llm_config=self.config.llm_config)
|
|
100
|
+
|
|
101
|
+
system_prompt += "\n## SCHEMA:\n\n" + self._schema_str()
|
|
102
|
+
|
|
103
|
+
stub = ", ".join([f"{k}: ..." for k in target_attributes])
|
|
104
|
+
stub = "{" + stub + "}"
|
|
105
|
+
prompt = (
|
|
106
|
+
"Provide a YAML object of the form"
|
|
107
|
+
"```yaml\n"
|
|
108
|
+
f"{stub}\n"
|
|
109
|
+
"```\n"
|
|
110
|
+
"---\nQuery:\n" f"## INCOMPLETE OBJECT:\n{query_text}\n" "## OUTPUT:\n"
|
|
111
|
+
)
|
|
112
|
+
logger.info(f"Prompt: {prompt}")
|
|
113
|
+
response = model.prompt(prompt, system=system_prompt)
|
|
114
|
+
yaml_str = response.text()
|
|
115
|
+
logger.info(f"Response: {yaml_str}")
|
|
116
|
+
predicted_object = parse_yaml_payload(yaml_str, strict=True)
|
|
117
|
+
predicted_object = {**object, **predicted_object}
|
|
118
|
+
if self.config.validate_results:
|
|
119
|
+
base_collection = self.training_data.base_collection
|
|
120
|
+
errs = list(base_collection.iter_validate_collection([predicted_object]))
|
|
121
|
+
if errs:
|
|
122
|
+
print(f"{iteration} // FAILED TO VALIDATE: {yaml_str}")
|
|
123
|
+
print(f"PARSED: {predicted_object}")
|
|
124
|
+
print(f"ERRORS: {errs}")
|
|
125
|
+
if iteration > MAX_ITERATIONS:
|
|
126
|
+
raise ValueError(f"Validation errors: {errs}")
|
|
127
|
+
extra_texts = [
|
|
128
|
+
"Make sure results conform to the schema. Previously you provided:\n",
|
|
129
|
+
yaml_str,
|
|
130
|
+
"\nThis was invalid.\n",
|
|
131
|
+
"Validation errors:\n",
|
|
132
|
+
] + [self.object_to_text(e) for e in errs]
|
|
133
|
+
return self.derive(object, iteration=iteration+1, additional_prompt_texts=extra_texts)
|
|
134
|
+
return LLMInference(predicted_object=predicted_object, iterations=iteration+1, query=object)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def export_model(
|
|
138
|
+
self, output: Optional[Union[str, Path, TextIO]], model_serialization: ModelSerialization = None, **kwargs
|
|
139
|
+
):
|
|
140
|
+
self.save_model(output)
|
|
141
|
+
|
|
142
|
+
def save_model(self, output: Union[str, Path]) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Save the trained model and related data to a file.
|
|
145
|
+
|
|
146
|
+
:param output: Path to save the model
|
|
147
|
+
"""
|
|
148
|
+
raise NotImplementedError("Does not make sense for this engine")
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def load_model(cls, file_path: Union[str, Path]) -> "LLMInferenceEngine":
|
|
152
|
+
raise NotImplementedError("Does not make sense for this engine")
|
|
@@ -20,7 +20,7 @@ DEFAULT_NUM_EXAMPLES = 20
|
|
|
20
20
|
DEFAULT_MMR_RELEVANCE_FACTOR = 0.8
|
|
21
21
|
|
|
22
22
|
SYSTEM_PROMPT = """
|
|
23
|
-
You are a {llm_config.role}, your task is to
|
|
23
|
+
You are a {llm_config.role}, your task is to infer the YAML
|
|
24
24
|
object output given the YAML object input. I will provide you
|
|
25
25
|
with a collection of examples that will provide guidance both
|
|
26
26
|
on the desired structure of the response, as well as the kind
|
|
@@ -130,23 +130,34 @@ class RAGInferenceEngine(InferenceEngine):
|
|
|
130
130
|
else:
|
|
131
131
|
if not self.rag_collection.indexers:
|
|
132
132
|
raise ValueError("RAG collection must have an indexer attached")
|
|
133
|
+
logger.info(f"Searching {self.rag_collection.alias} for examples for: {query_text}")
|
|
133
134
|
rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm",
|
|
134
135
|
mmr_relevance_factor=mmr_relevance_factor)
|
|
135
136
|
examples = rs.rows
|
|
137
|
+
logger.info(f"Found {len(examples)} examples")
|
|
136
138
|
if not examples:
|
|
137
139
|
raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
|
|
138
140
|
prompt_clauses = []
|
|
139
|
-
|
|
141
|
+
this_feature_attributes = feature_attributes
|
|
142
|
+
if not this_feature_attributes:
|
|
143
|
+
this_feature_attributes = list(set(object.keys()) - set(target_attributes))
|
|
144
|
+
query_obj = select_nested(object, this_feature_attributes)
|
|
140
145
|
query_text = self.object_to_text(query_obj)
|
|
141
146
|
for example in examples:
|
|
142
|
-
|
|
147
|
+
this_feature_attributes = feature_attributes
|
|
148
|
+
if not this_feature_attributes:
|
|
149
|
+
this_feature_attributes = list(set(example.keys()) - set(target_attributes))
|
|
150
|
+
if not this_feature_attributes:
|
|
151
|
+
raise ValueError(f"No feature attributes found in example {example}")
|
|
152
|
+
input_obj = select_nested(example, this_feature_attributes)
|
|
143
153
|
input_obj_text = self.object_to_text(input_obj)
|
|
144
154
|
if input_obj_text == query_text:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
155
|
+
continue
|
|
156
|
+
#raise ValueError(
|
|
157
|
+
# f"Query object {query_text} is the same as example object {input_obj_text}\n"
|
|
158
|
+
# "This indicates possible test data leakage\n."
|
|
159
|
+
# "TODO: allow an option that allows user to treat this as a basic lookup\n"
|
|
160
|
+
#)
|
|
150
161
|
output_obj = select_nested(example, target_attributes)
|
|
151
162
|
prompt_clause = (
|
|
152
163
|
"---\nExample:\n" f"## INPUT:\n{input_obj_text}\n" f"## OUTPUT:\n{self.object_to_text(output_obj)}\n"
|
|
@@ -169,7 +180,7 @@ class RAGInferenceEngine(InferenceEngine):
|
|
|
169
180
|
encoding=encoding, token_limit=token_limit,
|
|
170
181
|
additional_text=system_prompt)
|
|
171
182
|
logger.info(f"Prompt: {prompt}")
|
|
172
|
-
response = model.prompt(prompt, system_prompt)
|
|
183
|
+
response = model.prompt(prompt, system=system_prompt)
|
|
173
184
|
yaml_str = response.text()
|
|
174
185
|
logger.info(f"Response: {yaml_str}")
|
|
175
186
|
predicted_object = self._parse_yaml_payload(yaml_str, strict=True)
|
|
@@ -124,7 +124,7 @@ class InferenceEngine(ABC):
|
|
|
124
124
|
Load the data and split it into training and testing sets.
|
|
125
125
|
|
|
126
126
|
:param collection:
|
|
127
|
-
:param split:
|
|
127
|
+
:param split: Tuple of training and testing split ratios.
|
|
128
128
|
:param randomize:
|
|
129
129
|
:return:
|
|
130
130
|
"""
|
|
@@ -136,7 +136,7 @@ class InferenceEngine(ABC):
|
|
|
136
136
|
self.training_data = CollectionSlice(name="train", base_collection=collection, indices=None)
|
|
137
137
|
self.testing_data = None
|
|
138
138
|
return
|
|
139
|
-
logger.info(f"Loading and splitting data from collection {collection.alias}")
|
|
139
|
+
logger.info(f"Loading and splitting data {split} from collection {collection.alias}")
|
|
140
140
|
size = collection.size()
|
|
141
141
|
indices = range(size)
|
|
142
142
|
if randomize:
|
|
@@ -100,3 +100,18 @@ def get_token_limit(model_name: str) -> int:
|
|
|
100
100
|
if model in model_name:
|
|
101
101
|
return token_limit
|
|
102
102
|
return 4096
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def parse_yaml_payload(yaml_str: str, strict=False) -> Optional[dict]:
|
|
106
|
+
import yaml
|
|
107
|
+
if "```" in yaml_str:
|
|
108
|
+
yaml_str = yaml_str.split("```")[1].strip()
|
|
109
|
+
if yaml_str.startswith("yaml"):
|
|
110
|
+
yaml_str = yaml_str[4:].strip()
|
|
111
|
+
try:
|
|
112
|
+
return yaml.safe_load(yaml_str)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
if strict:
|
|
115
|
+
raise e
|
|
116
|
+
logger.error(f"Error parsing YAML: {yaml_str}\n{e}")
|
|
117
|
+
return None
|
|
@@ -124,7 +124,7 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
|
|
|
124
124
|
|
|
125
125
|
Args:
|
|
126
126
|
data (dict): The input nested dictionary.
|
|
127
|
-
|
|
127
|
+
paths (list): A list of selector strings.
|
|
128
128
|
|
|
129
129
|
Returns:
|
|
130
130
|
dict: A new dictionary with the same structure, but only the selected attributes.
|
|
@@ -162,6 +162,8 @@ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=N
|
|
|
162
162
|
if current_path is None:
|
|
163
163
|
current_path = []
|
|
164
164
|
matching_paths = []
|
|
165
|
+
if not paths:
|
|
166
|
+
raise ValueError("No paths provided")
|
|
165
167
|
for path in paths:
|
|
166
168
|
if isinstance(path, str):
|
|
167
169
|
path = path.split(".")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/chromadb/chromadb_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/duckdb/duckdb_collection.py
RENAMED
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/duckdb/duckdb_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/filesystem/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/hdf5/hdf5_collection.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/mongodb/mongodb_collection.py
RENAMED
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/mongodb/mongodb_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/neo4j/neo4j_collection.py
RENAMED
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/neo4j/neo4j_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/api/stores/solr/solr_collection.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/implementations/__init__.py
RENAMED
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/implementations/llm_indexer.py
RENAMED
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/index/implementations/simple_indexer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/implementations/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/inference/inference_engine_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/collection_details.html.j2
RENAMED
|
File without changes
|
{linkml_store-0.2.4 → linkml_store-0.2.5}/src/linkml_store/webapi/html/database_details.html.j2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|