linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,69 @@
1
+ from collections import namedtuple
2
+ from typing import Any, Dict, List, Optional, Tuple, Union
3
+
4
+ import pandas as pd
5
+ from pydantic import BaseModel
6
+
7
+ # defined a named tuple called between with two values (start, end):
8
+ # This is used in the Query class to represent a range of values
9
+ # This is used in the Query class to represent a range of values
10
+ Between = namedtuple("Between", "min max")
11
+
12
+ FACET_GROUP_ATOM = Union[str, int, float, Between]
13
+ FACET_GROUP = Union[FACET_GROUP_ATOM, Tuple[FACET_GROUP_ATOM, ...]]
14
+
15
+
16
+ class Query(BaseModel):
17
+ """
18
+ A query object.
19
+
20
+ - In SQL this would be a SQL query string
21
+ """
22
+
23
+ from_table: Optional[str] = None
24
+ select_cols: Optional[List[str]] = None
25
+ where_clause: Optional[Union[str, List[str], Dict[str, Any]]] = None
26
+ sort_by: Optional[List[str]] = None
27
+ limit: Optional[int] = None
28
+ offset: Optional[int] = None
29
+ include_facet_counts: bool = False
30
+ facet_slots: Optional[List[str]] = None
31
+
32
+
33
+ class FacetCountResult(BaseModel):
34
+ """
35
+ A facet count result
36
+ """
37
+
38
+ as_dict: Dict[FACET_GROUP, List[Tuple[FACET_GROUP, int]]]
39
+
40
+
41
+ class QueryResult(BaseModel):
42
+ """
43
+ A query result.
44
+
45
+ TODO: make this a subclass of Collection
46
+ """
47
+
48
+ query: Optional[Query] = None
49
+ search_term: Optional[str] = None
50
+ num_rows: int
51
+ offset: Optional[int] = 0
52
+ rows: Optional[List[Dict[str, Any]]] = None
53
+ ranked_rows: Optional[List[Tuple[float, Dict[str, Any]]]] = None
54
+ _rows_dataframe: Optional[pd.DataFrame] = None
55
+ facet_counts: Optional[Dict[str, List[Tuple[FACET_GROUP, int]]]] = None
56
+
57
+ @property
58
+ def rows_dataframe(self) -> pd.DataFrame:
59
+ if self.ranked_rows is not None:
60
+ self._rows_dataframe = pd.DataFrame([{"score": score, **row} for score, row in self.ranked_rows])
61
+ if self._rows_dataframe is None and self.rows:
62
+ self._rows_dataframe = pd.DataFrame(self.rows)
63
+ return self._rows_dataframe
64
+
65
+ def set_rows(self, rows: pd.DataFrame):
66
+ self._rows_dataframe = rows
67
+
68
+ class Config:
69
+ arbitrary_types_allowed = True
File without changes
@@ -0,0 +1,7 @@
1
+ """
2
+ Adapter for ChromaDB vector database.
3
+
4
+ .. warning::
5
+
6
+ Support for ChromaDB is experimental and may change in the future.
7
+ """
@@ -0,0 +1,121 @@
1
+ """
2
+ ChromaDB Collection
3
+ """
4
+
5
+ import logging
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+
8
+ from chromadb.api.models.Collection import Collection as ChromaCollection
9
+ from linkml_runtime.linkml_model import SlotDefinition
10
+
11
+ from linkml_store.api import Collection
12
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
13
+ from linkml_store.api.queries import Query, QueryResult
14
+ from linkml_store.index import Indexer
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class ChromaDBCollection(Collection):
20
+ """
21
+ A wrapper for ChromaDB collections.
22
+ """
23
+
24
+ @property
25
+ def native_collection(self) -> ChromaCollection:
26
+ return self.parent.client.get_collection(self.name)
27
+
28
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
29
+ if not isinstance(objs, list):
30
+ objs = [objs]
31
+
32
+ documents = []
33
+ metadatas = []
34
+ ids = []
35
+ indexer = Indexer()
36
+
37
+ for obj in objs:
38
+ obj_id = self.object_identifier(obj)
39
+ ids.append(obj_id)
40
+ doc_text = indexer.object_to_text(obj)
41
+ documents.append(doc_text)
42
+ # TODO: handle nesting
43
+ metadata = {k: v for k, v in obj.items()}
44
+ metadatas.append(metadata)
45
+
46
+ self.native_collection.add(
47
+ documents=documents,
48
+ metadatas=metadatas,
49
+ ids=ids,
50
+ )
51
+
52
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
53
+ if not isinstance(objs, list):
54
+ objs = [objs]
55
+ ids = [obj["id"] for obj in objs]
56
+ self.native_collection.delete(ids=ids)
57
+ return len(ids)
58
+
59
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
60
+ logger.info(f"Deleting from {self.target_class_name} where: {where}")
61
+ if where is None:
62
+ where = {}
63
+ results = self.native_collection.get(where=where)
64
+ ids = [result["id"] for result in results]
65
+ self.native_collection.delete(ids=ids)
66
+ return len(ids)
67
+
68
+ def query(self, query: Query, **kwargs) -> QueryResult:
69
+ chroma_filter = self._build_chroma_filter(query.where_clause)
70
+ if query.limit:
71
+ results = self.native_collection.get(where=chroma_filter, limit=query.limit)
72
+ else:
73
+ results = self.native_collection.get(where=chroma_filter)
74
+
75
+ count = len(results)
76
+ return QueryResult(query=query, num_rows=count, rows=results)
77
+
78
+ def query_facets(
79
+ self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
80
+ ) -> Dict[str, List[Tuple[Any, int]]]:
81
+ results = {}
82
+ cd = self.class_definition()
83
+ if not facet_columns:
84
+ facet_columns = list(self.class_definition().attributes.keys())
85
+
86
+ for col in facet_columns:
87
+ logger.debug(f"Faceting on {col}")
88
+ if isinstance(col, tuple):
89
+ sd = SlotDefinition(name="PLACEHOLDER")
90
+ else:
91
+ sd = cd.attributes[col]
92
+
93
+ if sd.multivalued:
94
+ facet_results = self.native_collection.aggregate(
95
+ aggregation=[
96
+ {"$match": where} if where else {"$match": {}},
97
+ {"$unwind": f"${col}"},
98
+ {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
99
+ {"$sort": {"count": -1}},
100
+ {"$limit": facet_limit},
101
+ ]
102
+ )
103
+ else:
104
+ facet_results = self.native_collection.aggregate(
105
+ aggregation=[
106
+ {"$match": where} if where else {"$match": {}},
107
+ {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
108
+ {"$sort": {"count": -1}},
109
+ {"$limit": facet_limit},
110
+ ]
111
+ )
112
+
113
+ results[col] = [(result["_id"], result["count"]) for result in facet_results]
114
+
115
+ return results
116
+
117
+ def _build_chroma_filter(self, where_clause: Dict[str, Any]) -> Dict[str, Any]:
118
+ chroma_filter = {}
119
+ for field, value in where_clause.items():
120
+ chroma_filter[field] = value
121
+ return chroma_filter
@@ -0,0 +1,89 @@
1
+ # chromadb_database.py
2
+
3
+ import logging
4
+ from typing import Optional
5
+
6
+ import chromadb
7
+ from chromadb.config import Settings
8
+ from linkml_runtime import SchemaView
9
+ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
10
+ from linkml_runtime.utils.schema_builder import SchemaBuilder
11
+
12
+ from linkml_store.api import Database
13
+ from linkml_store.api.queries import Query, QueryResult
14
+ from linkml_store.api.stores.chromadb.chromadb_collection import ChromaDBCollection
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class ChromaDBDatabase(Database):
20
+ _client: chromadb.Client = None
21
+ collection_class = ChromaDBCollection
22
+
23
+ def __init__(self, handle: Optional[str] = None, **kwargs):
24
+ if handle is None:
25
+ handle = ".chromadb"
26
+ super().__init__(handle=handle, **kwargs)
27
+
28
+ @property
29
+ def client(self) -> chromadb.Client:
30
+ if self._client is None:
31
+ self._client = chromadb.Client(
32
+ Settings(
33
+ chroma_db_impl="duckdb+parquet",
34
+ persist_directory=self.handle,
35
+ )
36
+ )
37
+ return self._client
38
+
39
+ def commit(self, **kwargs):
40
+ pass
41
+
42
+ def close(self, **kwargs):
43
+ if self._client:
44
+ self._client.close()
45
+
46
+ def query(self, query: Query, **kwargs) -> QueryResult:
47
+ if query.from_table:
48
+ collection = self.get_collection(query.from_table)
49
+ return collection.query(query, **kwargs)
50
+
51
+ def init_collections(self):
52
+ if self._collections is None:
53
+ self._collections = {}
54
+
55
+ for collection_name in self.client.list_collections():
56
+ if collection_name not in self._collections:
57
+ collection = ChromaDBCollection(name=collection_name, parent=self)
58
+ self._collections[collection_name] = collection
59
+
60
+ def induce_schema_view(self) -> SchemaView:
61
+ logger.info(f"Inducing schema view for {self.handle}")
62
+ sb = SchemaBuilder()
63
+ schema = sb.schema
64
+
65
+ for collection_name in self.client.list_collections():
66
+ sb.add_class(collection_name)
67
+ chroma_collection = self.client.get_collection(collection_name)
68
+ sample_doc = chroma_collection.peek(1)
69
+ if sample_doc:
70
+ for field, value in sample_doc[0].items():
71
+ if field == "_id":
72
+ continue
73
+ sd = SlotDefinition(field)
74
+ if isinstance(value, list):
75
+ sd.multivalued = True
76
+ if isinstance(value, dict):
77
+ sd.inlined = True
78
+ sb.schema.classes[collection_name].attributes[sd.name] = sd
79
+
80
+ sb.add_defaults()
81
+ for cls_name in schema.classes:
82
+ if cls_name in self.metadata.collections:
83
+ collection_metadata = self.metadata.collections[cls_name]
84
+ if collection_metadata.attributes:
85
+ del schema.classes[cls_name]
86
+ cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
87
+ schema.classes[cls.name] = cls
88
+
89
+ return SchemaView(schema)
@@ -0,0 +1,10 @@
1
+ """Dremio database adapter for linkml-store.
2
+
3
+ This module provides a Dremio adapter that uses Arrow Flight SQL for high-performance
4
+ data access to Dremio data lakehouse.
5
+ """
6
+
7
+ from linkml_store.api.stores.dremio.dremio_collection import DremioCollection
8
+ from linkml_store.api.stores.dremio.dremio_database import DremioDatabase
9
+
10
+ __all__ = ["DremioDatabase", "DremioCollection"]