linkml-store 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (27) hide show
  1. linkml_store/api/client.py +35 -8
  2. linkml_store/api/collection.py +40 -5
  3. linkml_store/api/config.py +20 -3
  4. linkml_store/api/database.py +24 -3
  5. linkml_store/api/stores/duckdb/duckdb_collection.py +3 -0
  6. linkml_store/api/stores/mongodb/mongodb_collection.py +4 -0
  7. linkml_store/cli.py +149 -13
  8. linkml_store/inference/__init__.py +13 -0
  9. linkml_store/inference/evaluation.py +189 -0
  10. linkml_store/inference/implementations/__init__.py +0 -0
  11. linkml_store/inference/implementations/rag_inference_engine.py +145 -0
  12. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  13. linkml_store/inference/implementations/sklearn_inference_engine.py +308 -0
  14. linkml_store/inference/inference_config.py +62 -0
  15. linkml_store/inference/inference_engine.py +200 -0
  16. linkml_store/inference/inference_engine_registry.py +74 -0
  17. linkml_store/utils/format_utils.py +27 -90
  18. linkml_store/utils/llm_utils.py +96 -0
  19. linkml_store/utils/object_utils.py +103 -2
  20. linkml_store/utils/pandas_utils.py +55 -2
  21. linkml_store/utils/sklearn_utils.py +193 -0
  22. linkml_store/utils/stats_utils.py +53 -0
  23. {linkml_store-0.1.13.dist-info → linkml_store-0.2.0.dist-info}/METADATA +28 -2
  24. {linkml_store-0.1.13.dist-info → linkml_store-0.2.0.dist-info}/RECORD +27 -15
  25. {linkml_store-0.1.13.dist-info → linkml_store-0.2.0.dist-info}/LICENSE +0 -0
  26. {linkml_store-0.1.13.dist-info → linkml_store-0.2.0.dist-info}/WHEEL +0 -0
  27. {linkml_store-0.1.13.dist-info → linkml_store-0.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,53 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+
5
+ def predictive_power(df, target_col, feature_cols, cv=5):
6
+ from sklearn.model_selection import cross_val_score
7
+ from sklearn.preprocessing import LabelEncoder
8
+ from sklearn.tree import DecisionTreeClassifier
9
+
10
+ # Prepare the data
11
+ X = df[feature_cols].copy() # Create an explicit copy
12
+ y = df[target_col].copy()
13
+
14
+ # Encode categorical variables
15
+ for col in X.columns:
16
+ if X[col].dtype == "object":
17
+ X[col] = LabelEncoder().fit_transform(X[col].astype(str))
18
+
19
+ if y.dtype == "object":
20
+ y = LabelEncoder().fit_transform(y.astype(str))
21
+
22
+ # Adjust cv based on the number of unique values in y
23
+ n_unique = len(np.unique(y))
24
+ cv = min(cv, n_unique)
25
+
26
+ # Train a decision tree and get cross-validated accuracy
27
+ clf = DecisionTreeClassifier(random_state=42)
28
+
29
+ if cv < 2:
30
+ # If cv is less than 2, we can't do cross-validation, so we'll just fit and score
31
+ clf.fit(X, y)
32
+ return clf.score(X, y)
33
+ else:
34
+ scores = cross_val_score(clf, X, y, cv=cv)
35
+ return scores.mean()
36
+
37
+
38
+ def analyze_predictive_power(df, columns=None, cv=5):
39
+ if columns is None:
40
+ columns = df.columns
41
+ results = pd.DataFrame(index=columns, columns=["predictive_power", "features"])
42
+
43
+ for target_col in columns:
44
+ feature_cols = [col for col in columns if col != target_col]
45
+ try:
46
+ power = predictive_power(df, target_col, feature_cols, cv)
47
+ results.loc[target_col, "predictive_power"] = power
48
+ results.loc[target_col, "features"] = ", ".join(feature_cols)
49
+ except Exception as e:
50
+ print(f"Error processing {target_col}: {str(e)}")
51
+ results.loc[target_col, "predictive_power"] = np.nan
52
+
53
+ return results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: linkml-store
3
- Version: 0.1.13
3
+ Version: 0.2.0
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
@@ -24,6 +24,7 @@ Provides-Extra: mongodb
24
24
  Provides-Extra: neo4j
25
25
  Provides-Extra: pyarrow
26
26
  Provides-Extra: renderer
27
+ Provides-Extra: scipy
27
28
  Provides-Extra: tests
28
29
  Provides-Extra: validation
29
30
  Requires-Dist: black (>=24.0.0) ; extra == "tests"
@@ -51,9 +52,12 @@ Requires-Dist: pyarrow ; extra == "pyarrow"
51
52
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
52
53
  Requires-Dist: pymongo ; extra == "mongodb"
53
54
  Requires-Dist: pystow (>=0.5.4,<0.6.0)
55
+ Requires-Dist: scikit-learn ; extra == "scipy"
56
+ Requires-Dist: scipy ; extra == "scipy"
54
57
  Requires-Dist: seaborn ; extra == "analytics"
55
58
  Requires-Dist: sqlalchemy
56
59
  Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
60
+ Requires-Dist: tiktoken ; extra == "llm"
57
61
  Requires-Dist: uvicorn ; extra == "fastapi"
58
62
  Description-Content-Type: text/markdown
59
63
 
@@ -61,11 +65,13 @@ Description-Content-Type: text/markdown
61
65
 
62
66
  An AI-ready data management and integration platform. LinkML-Store
63
67
  provides an abstraction layer over multiple different backends
64
- (including DuckDB, MongoDB, and local filesystems), allowing for
68
+ (including DuckDB, MongoDB, Neo4j, and local filesystems), allowing for
65
69
  common query, index, and storage operations.
66
70
 
67
71
  For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
68
72
 
73
+ See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for a high level overview.
74
+
69
75
  __Warning__ LinkML-Store is still undergoing changes and refactoring,
70
76
  APIs and command line options are subject to change!
71
77
 
@@ -99,6 +105,23 @@ linkml-store -d duckdb:///db/my.db -c persons validate
99
105
  * API
100
106
  * Streamlit applications
101
107
 
108
+ ## The CRUDSI pattern
109
+
110
+ Most database APIs implement the **CRUD** pattern: Create, Read, Update, Delete.
111
+ LinkML-Store adds **Search** and **Inference** to this pattern, making it **CRUDSI**.
112
+
113
+ The notion of "Search" and "Inference" is intended to be flexible and extensible,
114
+ including:
115
+
116
+ * Search
117
+ * Traditional keyword search
118
+ * Search using LLM Vector embeddings (*without* a dedicated vector database)
119
+ * Pluggable specialized search, e.g. genomic sequence (not yet implemented)
120
+ * Inference (encompassing *validation*, *repair*, and inference of missing data)
121
+ * Classic rule-based inference
122
+ * Inference using LLM Retrieval Augmented Generation (RAG)
123
+ * Statistical/ML inference
124
+
102
125
  ## Features
103
126
 
104
127
  ### Multiple Adapters
@@ -108,6 +131,8 @@ LinkML-Store is designed to work with multiple backends, giving a common abstrac
108
131
  * [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
109
132
  * [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
110
133
  * [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
134
+ * [Neo4j](https://linkml.io/linkml-store/how-to/Use-Neo4j.html)
135
+
111
136
  * Filesystem
112
137
 
113
138
  Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
@@ -173,3 +198,4 @@ make app
173
198
 
174
199
  See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
175
200
 
201
+
@@ -1,16 +1,16 @@
1
1
  linkml_store/__init__.py,sha256=jlU6WOUAn8cKIhzbTULmBTWpW9gZdEt7q_RI6KZN1bY,118
2
2
  linkml_store/api/__init__.py,sha256=3CelcFEFz0y3MkQAzhQ9JxHIt1zFk6nYZxSmYTo8YZE,226
3
- linkml_store/api/client.py,sha256=aiWhLSAY0Wj7EfoXdEXMtGId1LSbvGya8dGEKdOB0iI,10989
4
- linkml_store/api/collection.py,sha256=Qi89f-iUSC1zKUgqSMuooklteO0wbJfYbeVkpn8CbIM,36186
5
- linkml_store/api/config.py,sha256=E-ma5hXQrs5Gyet2BB6CFJyPqNjXwAr1pr9H7AT2vOc,5159
6
- linkml_store/api/database.py,sha256=W5PzpIua8w6eCUqugGqOV6Y7O15g5FSZzzaQKDOfqak,28297
3
+ linkml_store/api/client.py,sha256=3klBXenQVbLjNQF3WmYfjASt3zvKOfWaCNp5aJM81Ec,12034
4
+ linkml_store/api/collection.py,sha256=7JndC6A9r3OVbR9aB6d_bdaYN53XU4FpppUterygOaE,37800
5
+ linkml_store/api/config.py,sha256=71pxQ5jM-ETxJWU7CzmKjsH6IEJUMP5sml381u9TYVk,5654
6
+ linkml_store/api/database.py,sha256=QVvUuLQPCxB4cvsS7rXqPSfoHkhcMzP9vUcsjkuEYds,29051
7
7
  linkml_store/api/queries.py,sha256=w0qnNeCH6pC9WTGoEQYd300MF6o0G3atz2YxN3WecAs,2028
8
8
  linkml_store/api/stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  linkml_store/api/stores/chromadb/__init__.py,sha256=e9BkOPuPnVQKA5PRKDulag59yGNHDP3U2_DnPSrFAKM,132
10
10
  linkml_store/api/stores/chromadb/chromadb_collection.py,sha256=RQUZx5oeotkzNihg-dlSevkiTiKY1d9x0bS63HF80W4,4270
11
11
  linkml_store/api/stores/chromadb/chromadb_database.py,sha256=dZA3LQE8-ZMhJQOzsUFyxehnKpFF7adR182aggfkaFY,3205
12
12
  linkml_store/api/stores/duckdb/__init__.py,sha256=rbQSDgNg-fdvi6-pHGYkJTST4p1qXUZBf9sFSsO3KPk,387
13
- linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=d9aYNFt9t8itNmhb6H2PdsxHL1dkYiW0N9wF8SATk1g,6767
13
+ linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=yXnJpEiGK4lMyNuJykuvlKOqaV9ntqv0m0NZMOw0auk,6911
14
14
  linkml_store/api/stores/duckdb/duckdb_database.py,sha256=GH9bcOfHpNp6r-Eu1C3W0xuYcLsqGFDH1Sh4weifGaQ,9923
15
15
  linkml_store/api/stores/duckdb/mappings.py,sha256=tDce3W1Apwammhf4LS6cRJ0m4NiJ0eB7vOI_4U5ETY8,148
16
16
  linkml_store/api/stores/filesystem/__init__.py,sha256=KjvCjdttwqMHNeGyL-gr59zRz0--HFEWWUNNCJ5hITs,347
@@ -20,7 +20,7 @@ linkml_store/api/stores/hdf5/__init__.py,sha256=l4cIh3v7P0nPbwGIsfuCMD_serQ8q8c7
20
20
  linkml_store/api/stores/hdf5/hdf5_collection.py,sha256=mnpLMYehn3PuaIjp2dXrIWu8jh-bdQ84X2Ku83jMdEY,3805
21
21
  linkml_store/api/stores/hdf5/hdf5_database.py,sha256=EZbjrpaqiNDEFvoD5dZNcGBXA8z6HRNL81emueTZWNw,2714
22
22
  linkml_store/api/stores/mongodb/__init__.py,sha256=OSFCr7RQlDEe-O-Y0P_i912oAMK-L3pC7Cnj7sxlwAk,510
23
- linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=W-j1tkk8h1-zzjIRe-IYAnAOwlFSwr4fnDtgiu7RsVI,6695
23
+ linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=unN0v7RYlGIiJxEhbNRxZ86TVQ4ELlAsNWTwEbg7E_g,6831
24
24
  linkml_store/api/stores/mongodb/mongodb_database.py,sha256=Y9MIV0KSRGCyopz8vGEivhSuvF0vZLCDJd29cdqMIX8,3857
25
25
  linkml_store/api/stores/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  linkml_store/api/stores/neo4j/neo4j_collection.py,sha256=a-Az5_ypdBMgeNyhrTW7q-ik-vYPCDDONIK7N_CDA9c,17449
@@ -30,7 +30,7 @@ linkml_store/api/stores/solr/solr_collection.py,sha256=ZlxC3JbVaHfSA4HuTeJTsp6qe
30
30
  linkml_store/api/stores/solr/solr_database.py,sha256=TFjqbY7jAkdrhAchbNg0E-mChSP7ogNwFExslbvX7Yo,2877
31
31
  linkml_store/api/stores/solr/solr_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  linkml_store/api/types.py,sha256=3aIQtDFMvsSmjuN5qrR2vNK5sHa6yzD_rEOPA6tHwvg,176
33
- linkml_store/cli.py,sha256=_oDbqElZIWKHJ6ruJyu57rbg2IYUr7U-0AqDlKz6jug,21126
33
+ linkml_store/cli.py,sha256=NIEU5dEkEKz3a2q4mpkdXxHX1mANd2z9oFIkNVz-wsw,27048
34
34
  linkml_store/constants.py,sha256=x4ZmDsfE9rZcL5WpA93uTKrRWzCD6GodYXviVzIvR38,112
35
35
  linkml_store/graphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  linkml_store/graphs/graph_map.py,sha256=bYRxv8n1YPnFqE9d6JKNmRawb8EAhsPlHhBue0gvtZE,712
@@ -39,19 +39,31 @@ linkml_store/index/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
39
39
  linkml_store/index/implementations/llm_indexer.py,sha256=LI5f8SLF_rJY5W6wZPLaUqpyoq-VDW_KqlCBNDNm_po,4827
40
40
  linkml_store/index/implementations/simple_indexer.py,sha256=KnkFJtXTHnwjhD_D6ZK2rFhBID1dgCedcOVPEWAY2NU,1282
41
41
  linkml_store/index/indexer.py,sha256=K-TDPzdTyGFo6iG4XI_A_3IpwDbKeiTIbdr85NIL5r8,4918
42
+ linkml_store/inference/__init__.py,sha256=b8NAFNZjOYU_8gOvxdyCyoiHOOl5Ai2ckKs1tv7ZkkY,342
43
+ linkml_store/inference/evaluation.py,sha256=qvsmGDBKTZBDKhpbPDe_AkcJ2LtQ8e-oUYCUGfI6IAE,5799
44
+ linkml_store/inference/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ linkml_store/inference/implementations/rag_inference_engine.py,sha256=MH50-6i30Y5oKgIx47-yDjsPCojYC6-lujtHFBDqIxs,5833
46
+ linkml_store/inference/implementations/rule_based_inference_engine.py,sha256=0IEY_fsHJPJy6QKbYQU_qE87RRnPOXQxPuJKXCQG8jU,6250
47
+ linkml_store/inference/implementations/sklearn_inference_engine.py,sha256=HRhwnlpDJOijxvhLmdTSOq1S9xjBVCrgRT1C8uS0XZQ,13196
48
+ linkml_store/inference/inference_config.py,sha256=xgl3VmueErLIOnQQn4HdC2STJNY6yKoPasWmym4ltHQ,2014
49
+ linkml_store/inference/inference_engine.py,sha256=D1JlkihyNbZp7PYe5lplUbTJgyP7jL4vnxcpBio-KUs,6987
50
+ linkml_store/inference/inference_engine_registry.py,sha256=6o66gvBYBwdeAKm62zqqvfaBlcopVP_cla3L6uXGsHA,3015
42
51
  linkml_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
52
  linkml_store/utils/change_utils.py,sha256=O2rvSvgTKB60reLLz9mX5OWykAA_m93bwnUh5ZWa0EY,471
44
53
  linkml_store/utils/file_utils.py,sha256=rQ7-XpmI6_Kx_dhEnI98muFRr0MmgI_kZ_9cgJBf_0I,1411
45
- linkml_store/utils/format_utils.py,sha256=aBS0uUhvdgPEXzw72iGfSezmHZFnqHtPuFUbVR4cfbA,13174
54
+ linkml_store/utils/format_utils.py,sha256=airJ2_tFsr0dTIbSHT5y0TZbDrvBBV4_qThFPFY5k8U,10925
46
55
  linkml_store/utils/io.py,sha256=JHUrWDtlZC2jtN_PQZ4ypdGIyYlftZEN3JaCvEPs44w,884
56
+ linkml_store/utils/llm_utils.py,sha256=Wb4h_E8vrJZDAYHhOdMCSMcz-xxVia4nfuFqiYitZ98,2864
47
57
  linkml_store/utils/mongodb_utils.py,sha256=Rl1YmMKs1IXwSsJIViSDChbi0Oer5cBnMmjka2TeQS8,4665
48
58
  linkml_store/utils/neo4j_utils.py,sha256=y3KPmDZ8mQmePgg0lUeKkeKqzEr2rV226xxEtHc5pRg,1266
49
- linkml_store/utils/object_utils.py,sha256=is6T2gruvVKvWD5ZntcAl6Qi3L154FObEho_b_crTuE,2539
50
- linkml_store/utils/pandas_utils.py,sha256=INL8aZ5v2OeLg-Uzfa8P1cpnMMKA1DumiTB0q175tw8,1389
59
+ linkml_store/utils/object_utils.py,sha256=Vib-5Ip2DlRVKLZpU-008ZZI813-vfKVSCY0TksRenM,6293
60
+ linkml_store/utils/pandas_utils.py,sha256=djiFPO3YbgRVo2XAZuKCtgH8QVLuUyPIsfS8e-0umsU,3182
51
61
  linkml_store/utils/patch_utils.py,sha256=q-h_v68okyruzdPTEHCe0WubbQHKpi1qy5bJ9vFWDo8,4823
52
62
  linkml_store/utils/query_utils.py,sha256=HWt46BsGWoIGiNBTtvpXGY6onPRWsQky6eu_9cYqbvo,3440
53
63
  linkml_store/utils/schema_utils.py,sha256=iJiZxo5NGr7v87h4DV6V9DrDOZHSswMRuf0N4V2rVtg,646
64
+ linkml_store/utils/sklearn_utils.py,sha256=itPpcrsbbyOazdjmivaaZ1lyZeytm0a0hJ2AS8ziUgg,7590
54
65
  linkml_store/utils/sql_utils.py,sha256=T41w_vsc3SauTJQkDMwid_nOtKW1YOKyUuaxEf470hk,5938
66
+ linkml_store/utils/stats_utils.py,sha256=4KqBb1bqDgAmq-1fJLLu5B2paPgoZZc3A-gnyVam4bI,1799
55
67
  linkml_store/webapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
68
  linkml_store/webapi/html/__init__.py,sha256=hwp5eeBJKH65Bvv1x9Z4vsT1tLSYtb9Dq4I9r1kL1q0,69
57
69
  linkml_store/webapi/html/base.html.j2,sha256=hoiV2uaSxxrQp7VuAZBOHueH7czyJMYcPBRN6dZFYhk,693
@@ -60,8 +72,8 @@ linkml_store/webapi/html/database_details.html.j2,sha256=qtXdavbZb0mohiObI9dvJtk
60
72
  linkml_store/webapi/html/databases.html.j2,sha256=a9BCWQYfPeFhdUd31CWhB0yWhTIFXQayO08JgjyqKoc,294
61
73
  linkml_store/webapi/html/generic.html.j2,sha256=KtLaO2HUEF2Opq-OwHKgRKetNWe8IWc6JuIkxRPsywk,1018
62
74
  linkml_store/webapi/main.py,sha256=B0Da575kKR7X88N9ykm99Dem8FyBAW9f-w3A_JwUzfw,29165
63
- linkml_store-0.1.13.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
64
- linkml_store-0.1.13.dist-info/METADATA,sha256=ouYb20TN5M2D12IUx9qspIIOiI-6qmrPpe1b6xx9xhU,5609
65
- linkml_store-0.1.13.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
66
- linkml_store-0.1.13.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
67
- linkml_store-0.1.13.dist-info/RECORD,,
75
+ linkml_store-0.2.0.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
76
+ linkml_store-0.2.0.dist-info/METADATA,sha256=v_KjIlu-gTOHunF0ASPHRP_utQv-ry1piX3RpfPWX1k,6743
77
+ linkml_store-0.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
78
+ linkml_store-0.2.0.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
79
+ linkml_store-0.2.0.dist-info/RECORD,,