linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,224 @@
1
+ # solr_collection.py
2
+
3
+ import logging
4
+ from copy import copy
5
+ from typing import Any, Dict, List, Optional, Union, Tuple
6
+
7
+ import requests
8
+
9
+ from linkml_store.api import Collection
10
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT
11
+ from linkml_store.api.queries import Query, QueryResult
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class SolrCollection(Collection):
17
+
18
+ @property
19
+ def _collection_base(self) -> str:
20
+ if self.parent.use_cores:
21
+ base_url = f"{self.parent.base_url}/{self.alias}"
22
+ else:
23
+ base_url = self.parent.base_url
24
+ return base_url
25
+
26
+ def search(
27
+ self,
28
+ query: str,
29
+ where: Optional[Any] = None,
30
+ index_name: Optional[str] = None,
31
+ limit: Optional[int] = None,
32
+ **kwargs,
33
+ ) -> QueryResult:
34
+ if index_name is None:
35
+ index_name = "edismax"
36
+ qfs = self.parent.metadata.searchable_slots
37
+ #if not qfs:
38
+ # raise ValueError("No searchable slots configured for Solr collection")
39
+ if not qfs:
40
+ qfs = list(self.class_definition().attributes.keys())
41
+ solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
42
+ logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
43
+
44
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
45
+ response.raise_for_status()
46
+
47
+ data = response.json()
48
+ num_rows = data["response"]["numFound"]
49
+ rows = data["response"]["docs"]
50
+ ranked_rows = [(1.0, row) for row in rows]
51
+ return QueryResult(query=where, search_term=query, num_rows=num_rows, rows=rows, ranked_rows=ranked_rows)
52
+
53
+ def query(self, query: Query, **kwargs) -> QueryResult:
54
+ solr_query = self._build_solr_query(query)
55
+ logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
56
+
57
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
58
+ response.raise_for_status()
59
+
60
+ data = response.json()
61
+ logger.debug(f"Response: {data}")
62
+ num_rows = data["response"]["numFound"]
63
+ rows = data["response"]["docs"]
64
+
65
+ return QueryResult(query=query, num_rows=num_rows, rows=rows)
66
+
67
+ def query_facets(
68
+ self,
69
+ where: Optional[Dict] = None,
70
+ facet_columns: List[Union[str, Tuple[str, ...]]] = None,
71
+ facet_limit=DEFAULT_FACET_LIMIT,
72
+ facet_min_count: int = 1,
73
+ **kwargs,
74
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
75
+ """
76
+ Query facet counts for fields or field combinations.
77
+
78
+ :param where: Filter conditions
79
+ :param facet_columns: List of fields to facet on. Elements can be:
80
+ - Simple strings for single field facets
81
+ - Tuples of strings for field combinations (pivot facets)
82
+ :param facet_limit: Maximum number of facet values to return
83
+ :param facet_min_count: Minimum count for facet values to be included
84
+ :return: Dictionary mapping fields or field tuples to lists of (value, count) tuples
85
+ """
86
+ solr_query = self._build_solr_query(where)
87
+
88
+ # Separate single fields and tuple fields
89
+ single_fields = []
90
+ tuple_fields = []
91
+
92
+ if facet_columns:
93
+ for field in facet_columns:
94
+ if isinstance(field, str):
95
+ single_fields.append(field)
96
+ elif isinstance(field, tuple):
97
+ tuple_fields.append(field)
98
+
99
+ # Process regular facets
100
+ results = {}
101
+ if single_fields:
102
+ solr_query["facet"] = "true"
103
+ solr_query["facet.field"] = single_fields
104
+ solr_query["facet.limit"] = facet_limit
105
+ solr_query["facet.mincount"] = facet_min_count
106
+
107
+ logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
108
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
109
+ response.raise_for_status()
110
+
111
+ data = response.json()
112
+ facet_counts = data["facet_counts"]["facet_fields"]
113
+
114
+ for facet_field, counts in facet_counts.items():
115
+ results[facet_field] = list(zip(counts[::2], counts[1::2]))
116
+
117
+ # Process pivot facets for tuple fields
118
+ if tuple_fields:
119
+ # TODO: Add a warning if Solr < 4.0, when this was introduced
120
+ for field_tuple in tuple_fields:
121
+ # Create a query for this specific field tuple
122
+ pivot_query = self._build_solr_query(where)
123
+ pivot_query["facet"] = "true"
124
+
125
+ # Create pivot facet
126
+ field_str = ','.join(field_tuple)
127
+ pivot_query["facet.pivot"] = field_str
128
+ pivot_query["facet.pivot.mincount"] = facet_min_count
129
+ pivot_query["facet.limit"] = facet_limit
130
+
131
+ logger.info(f"Querying Solr collection {self.alias} for pivot facets with query: {pivot_query}")
132
+ response = requests.get(f"{self._collection_base}/select", params=pivot_query)
133
+ response.raise_for_status()
134
+
135
+ data = response.json()
136
+ pivot_facets = data.get("facet_counts", {}).get("facet_pivot", {})
137
+
138
+ # Process pivot facets into the same format as MongoDB results
139
+ field_str = ','.join(field_tuple)
140
+ pivot_data = pivot_facets.get(field_str, [])
141
+
142
+ # Build a list of tuples (field values, count)
143
+ pivot_results = []
144
+ self._process_pivot_facets(pivot_data, [], pivot_results, field_tuple)
145
+
146
+ results[field_tuple] = pivot_results
147
+
148
+ return results
149
+
150
+ def _process_pivot_facets(self, pivot_data, current_values, results, field_tuple):
151
+ """
152
+ Recursively process pivot facet results to extract combinations of field values.
153
+
154
+ :param pivot_data: The pivot facet data from Solr
155
+ :param current_values: The current path of values in the recursion
156
+ :param results: The result list to populate
157
+ :param field_tuple: The original field tuple for reference
158
+ """
159
+ for item in pivot_data:
160
+ # Add the current field value
161
+ value = item.get("value")
162
+ count = item.get("count", 0)
163
+
164
+ # Update the current path with this value
165
+ values = current_values + [value]
166
+
167
+ # If we have all the fields from the tuple, add a result
168
+ if len(values) == len(field_tuple):
169
+ # Create a tuple of values corresponding to the field tuple
170
+ results.append((tuple(values), count))
171
+
172
+ # Process child pivot fields recursively
173
+ pivot = item.get("pivot", [])
174
+ if pivot and len(values) < len(field_tuple):
175
+ self._process_pivot_facets(pivot, values, results, field_tuple)
176
+
177
+ def _build_solr_query(
178
+ self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None
179
+ ) -> Dict[str, Any]:
180
+ solr_query = {}
181
+ if query is None:
182
+ query = {}
183
+
184
+ if isinstance(query, Query):
185
+ where = query.where_clause
186
+ solr_query["fq"] = self._build_solr_where_clause(where)
187
+
188
+ if query.select_cols:
189
+ solr_query["fl"] = ",".join(query.select_cols)
190
+
191
+ if query.limit:
192
+ solr_query["rows"] = query.limit
193
+
194
+ if query.offset:
195
+ solr_query["start"] = query.offset
196
+
197
+ elif isinstance(query, dict):
198
+ solr_query["fq"] = self._build_solr_where_clause(query)
199
+
200
+ solr_query["wt"] = "json"
201
+ if "q" not in solr_query:
202
+ solr_query["q"] = search_term
203
+ if extra:
204
+ solr_query.update(extra)
205
+ logger.info(f"Built Solr query: {solr_query}")
206
+ return solr_query
207
+
208
+ def _build_solr_where_clause(self, where_clause: Dict) -> str:
209
+ if where_clause is None:
210
+ where_clause = {}
211
+ conditions = []
212
+ if self.parent.metadata.collection_type_slot:
213
+ where_clause = copy(where_clause)
214
+ where_clause[self.parent.metadata.collection_type_slot] = self.alias
215
+ for field, value in where_clause.items():
216
+ if not isinstance(value, (list, tuple)):
217
+ value = [value]
218
+ value = [f'"{v}"' if isinstance(v, str) else str(v) for v in value]
219
+ if len(value) > 1:
220
+ conditions.append(f"{field}:({' '.join(value)})")
221
+ else:
222
+ conditions.append(f"{field}:{value[0]}")
223
+
224
+ return " AND ".join(conditions)
@@ -0,0 +1,83 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import requests
5
+
6
+ from linkml_store.api import Collection, Database
7
+ from linkml_store.api.config import CollectionConfig
8
+ from linkml_store.api.queries import Query, QueryResult
9
+ from linkml_store.api.stores.solr.solr_collection import SolrCollection
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class SolrDatabase(Database):
15
+ base_url: str
16
+ collection_class = SolrCollection
17
+ use_cores: bool = False
18
+
19
+ def __init__(self, handle: Optional[str] = None, **kwargs):
20
+ if handle.startswith("solr:"):
21
+ self.base_url = handle.replace("solr:", "")
22
+ else:
23
+ self.base_url = handle
24
+ super().__init__(handle=handle, **kwargs)
25
+
26
+ def get_collection(self, name: str, create_if_not_exists=True, **kwargs) -> "Collection":
27
+ if not self._collections:
28
+ self.init_collections()
29
+
30
+ if name not in self._collections.keys():
31
+ if create_if_not_exists:
32
+ self._collections[name] = self.create_collection(name)
33
+ else:
34
+ raise KeyError(f"Collection {name} does not exist")
35
+
36
+ return self._collections[name]
37
+
38
+ def create_collection(
39
+ self, name: str, alias: Optional[str] = None, metadata: Optional[CollectionConfig] = None, **kwargs
40
+ ) -> Collection:
41
+ if not name:
42
+ raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
43
+
44
+ collection_cls = self.collection_class
45
+ collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
46
+
47
+ if not self._collections:
48
+ self._collections = {}
49
+
50
+ if not alias:
51
+ alias = name
52
+
53
+ self._collections[alias] = collection
54
+ return collection
55
+
56
+ def init_collections(self):
57
+ if self._collections is None:
58
+ self._collections = {}
59
+ if self.metadata.collection_type_slot:
60
+ response = requests.get(
61
+ f"{self.base_url}/select",
62
+ params={
63
+ "q": "*:*",
64
+ "wt": "json",
65
+ "rows": 0,
66
+ "facet": "true",
67
+ "facet.field": self.metadata.collection_type_slot,
68
+ "facet.limit": -1,
69
+ },
70
+ )
71
+ response.raise_for_status()
72
+ data = response.json()
73
+ coll_names = data["facet_counts"]["facet_fields"][self.metadata.collection_type_slot]
74
+ coll_names = coll_names[::2]
75
+ for coll_name in coll_names:
76
+ self.create_collection(coll_name)
77
+ else:
78
+ self.create_collection("default")
79
+
80
+ def query(self, query: Query, **kwargs) -> QueryResult:
81
+ collection_name = query.from_table
82
+ collection = self.get_collection(collection_name)
83
+ return collection.query(query, **kwargs)
File without changes
@@ -0,0 +1,4 @@
1
+ from typing import TypeVar
2
+
3
+ DatabaseType = TypeVar("DatabaseType", bound="Database") # noqa: F821
4
+ CollectionType = TypeVar("CollectionType", bound="Collection") # noqa: F821