linkml-store 0.0.0__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (35) hide show
  1. linkml_store/api/__init__.py +2 -2
  2. linkml_store/api/client.py +108 -7
  3. linkml_store/api/collection.py +221 -30
  4. linkml_store/api/config.py +97 -0
  5. linkml_store/api/database.py +207 -17
  6. linkml_store/api/queries.py +12 -1
  7. linkml_store/api/stores/chromadb/__init__.py +0 -0
  8. linkml_store/api/stores/chromadb/chromadb_collection.py +114 -0
  9. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  10. linkml_store/api/stores/duckdb/duckdb_collection.py +47 -14
  11. linkml_store/api/stores/duckdb/duckdb_database.py +35 -44
  12. linkml_store/api/stores/hdf5/__init__.py +0 -0
  13. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  14. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  15. linkml_store/api/stores/mongodb/mongodb_collection.py +86 -40
  16. linkml_store/api/stores/mongodb/mongodb_database.py +58 -67
  17. linkml_store/api/stores/solr/solr_collection.py +132 -0
  18. linkml_store/api/stores/solr/solr_database.py +82 -0
  19. linkml_store/api/stores/solr/solr_utils.py +0 -0
  20. linkml_store/cli.py +369 -0
  21. linkml_store/index/__init__.py +33 -0
  22. linkml_store/index/implementations/{llm_index.py → llm_indexer.py} +2 -2
  23. linkml_store/index/implementations/{simple_index.py → simple_indexer.py} +6 -3
  24. linkml_store/index/{index.py → indexer.py} +7 -4
  25. linkml_store/utils/format_utils.py +93 -0
  26. linkml_store/utils/object_utils.py +73 -0
  27. linkml_store/utils/sql_utils.py +46 -7
  28. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/METADATA +17 -6
  29. linkml_store-0.1.6.dist-info/RECORD +41 -0
  30. linkml_store-0.1.6.dist-info/entry_points.txt +3 -0
  31. linkml_store/api/metadata.py +0 -5
  32. linkml_store-0.0.0.dist-info/RECORD +0 -29
  33. linkml_store-0.0.0.dist-info/entry_points.txt +0 -3
  34. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/LICENSE +0 -0
  35. {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/WHEEL +0 -0
@@ -1,112 +1,103 @@
1
- from dataclasses import dataclass
1
+ # mongodb_database.py
2
+
3
+ import logging
2
4
  from typing import Optional
3
5
 
4
6
  from linkml_runtime import SchemaView
5
- from linkml_runtime.linkml_model import SlotDefinition
7
+ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
6
8
  from linkml_runtime.utils.schema_builder import SchemaBuilder
7
9
  from pymongo import MongoClient
10
+ from pymongo.database import Database as NativeDatabase
8
11
 
9
12
  from linkml_store.api import Database
10
13
  from linkml_store.api.queries import Query, QueryResult
11
14
  from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
12
15
 
16
+ logger = logging.getLogger(__name__)
17
+
13
18
 
14
- @dataclass
15
19
  class MongoDBDatabase(Database):
16
20
  """
17
- A wrapper around a MongoDB database
21
+ An adapter for MongoDB databases.
22
+
23
+ The LinkML-Store Database abstraction combines mongodb Client and Database.
18
24
  """
19
25
 
20
- _client: MongoClient = None
21
- _database = None
26
+ _native_client: MongoClient = None
27
+ _native_db = None
28
+ collection_class = MongoDBCollection
22
29
 
23
- def __post_init__(self):
24
- if not self.handle:
25
- self.handle = "mongodb://localhost:27017"
30
+ def __init__(self, handle: Optional[str] = None, **kwargs):
31
+ if handle is None:
32
+ handle = "mongodb://localhost:27017"
33
+ super().__init__(handle=handle, **kwargs)
26
34
 
27
35
  @property
28
- def client(self) -> MongoClient:
29
- if not self._client:
30
- self._client = MongoClient(self.handle)
31
- return self._client
36
+ def native_client(self) -> MongoClient:
37
+ if self._native_client is None:
38
+ self._native_client = MongoClient(self.handle)
39
+ return self._native_client
32
40
 
33
41
  @property
34
- def database(self):
35
- if not self._database:
36
- db_name = self.handle.split("/")[-1]
37
- self._database = self.client[db_name]
38
- return self._database
42
+ def native_db(self) -> NativeDatabase:
43
+ if self._native_db is None:
44
+ alias = self.metadata.alias
45
+ if not alias:
46
+ alias = "default"
47
+ self._native_db = self.native_client[alias]
48
+ return self._native_db
39
49
 
40
50
  def commit(self, **kwargs):
41
51
  pass
42
52
 
43
53
  def close(self, **kwargs):
44
- self.client.close()
54
+ if self._native_client:
55
+ self._native_client.close()
56
+
57
+ def drop(self, **kwargs):
58
+ self.native_client.drop_database(self.metadata.alias)
45
59
 
46
60
  def query(self, query: Query, **kwargs) -> QueryResult:
47
- collection = self.database[query.from_table]
48
- where_clause = query.where_clause or {}
49
- cursor = collection.find(where_clause)
50
- if query.limit:
51
- cursor = cursor.limit(query.limit)
52
- if query.offset:
53
- cursor = cursor.skip(query.offset)
54
- if query.sort_by:
55
- sort_key = [(col, 1) for col in query.sort_by]
56
- cursor = cursor.sort(sort_key)
57
- rows = list(cursor)
58
- num_rows = len(rows)
59
- qr = QueryResult(query=query, num_rows=num_rows, rows=rows)
60
- return qr
61
+ if query.from_table:
62
+ collection = self.get_collection(query.from_table)
63
+ return collection.query(query, **kwargs)
61
64
 
62
65
  def init_collections(self):
63
66
  if self._collections is None:
64
67
  self._collections = {}
65
- for collection_name in self.database.list_collection_names():
68
+
69
+ for collection_name in self.native_db.list_collection_names():
66
70
  if collection_name not in self._collections:
67
71
  collection = MongoDBCollection(name=collection_name, parent=self)
68
72
  self._collections[collection_name] = collection
69
73
 
70
- def create_collection(self, name: str, alias: Optional[str] = None, **kwargs) -> MongoDBCollection:
71
- collection = MongoDBCollection(name=name, parent=self)
72
- if not self._collections:
73
- self._collections = {}
74
- if not alias:
75
- alias = name
76
- self._collections[alias] = collection
77
- return collection
78
-
79
74
  def induce_schema_view(self) -> SchemaView:
75
+ logger.info(f"Inducing schema view for {self.handle}")
80
76
  sb = SchemaBuilder()
81
77
  schema = sb.schema
82
- collection_names = self.database.list_collection_names()
83
- for collection_name in collection_names:
78
+
79
+ for collection_name in self.native_db.list_collection_names():
84
80
  sb.add_class(collection_name)
85
- collection = self.database[collection_name]
86
- sample_doc = collection.find_one()
81
+ mongo_collection = self.native_db[collection_name]
82
+ sample_doc = mongo_collection.find_one()
87
83
  if sample_doc:
88
- for key, value in sample_doc.items():
89
- if key == "_id":
84
+ for field, value in sample_doc.items():
85
+ if field == "_id":
90
86
  continue
87
+ sd = SlotDefinition(field)
91
88
  if isinstance(value, list):
92
- multivalued = True
93
- if value:
94
- value = value[0]
95
- else:
96
- value = None
97
- else:
98
- multivalued = False
99
- if isinstance(value, str):
100
- rng = "string"
101
- elif isinstance(value, int):
102
- rng = "integer"
103
- elif isinstance(value, float):
104
- rng = "float"
105
- elif isinstance(value, bool):
106
- rng = "boolean"
107
- else:
108
- rng = "string"
109
- sd = SlotDefinition(key, range=rng, multivalued=multivalued)
89
+ sd.multivalued = True
90
+ if isinstance(value, dict):
91
+ sd.inlined = True
110
92
  sb.schema.classes[collection_name].attributes[sd.name] = sd
93
+
111
94
  sb.add_defaults()
95
+ for cls_name in schema.classes:
96
+ if cls_name in self.metadata.collections:
97
+ collection_metadata = self.metadata.collections[cls_name]
98
+ if collection_metadata.attributes:
99
+ del schema.classes[cls_name]
100
+ cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
101
+ schema.classes[cls.name] = cls
102
+
112
103
  return SchemaView(schema)
@@ -0,0 +1,132 @@
1
+ # solr_collection.py
2
+
3
+ import logging
4
+ from copy import copy
5
+ from typing import Any, Dict, List, Optional, Union
6
+
7
+ import requests
8
+ from linkml_store.api import Collection
9
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT
10
+ from linkml_store.api.queries import Query, QueryResult
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class SolrCollection(Collection):
16
+
17
+ @property
18
+ def _collection_base(self) -> str:
19
+ if self.parent.use_cores:
20
+ base_url = f"{self.parent.base_url}/{self.name}"
21
+ else:
22
+ base_url = self.parent.base_url
23
+ return base_url
24
+
25
+ def search(
26
+ self,
27
+ query: str,
28
+ where: Optional[Any] = None,
29
+ index_name: Optional[str] = None,
30
+ limit: Optional[int] = None,
31
+ **kwargs,
32
+ ) -> QueryResult:
33
+ if index_name is None:
34
+ index_name = "edismax"
35
+ qfs = self.parent.metadata.searchable_slots
36
+ if not qfs:
37
+ raise ValueError("No searchable slots configured for Solr collection")
38
+ solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
39
+ logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
40
+
41
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
42
+ response.raise_for_status()
43
+
44
+ data = response.json()
45
+ num_rows = data["response"]["numFound"]
46
+ rows = data["response"]["docs"]
47
+ ranked_rows = [(1.0, row) for row in rows]
48
+ return QueryResult(query=where, search_term=query, num_rows=num_rows, rows=rows, ranked_rows=ranked_rows)
49
+
50
+ def query(self, query: Query, **kwargs) -> QueryResult:
51
+ solr_query = self._build_solr_query(query)
52
+ logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
53
+
54
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
55
+ response.raise_for_status()
56
+
57
+ data = response.json()
58
+ num_rows = data["response"]["numFound"]
59
+ rows = data["response"]["docs"]
60
+
61
+ return QueryResult(query=query, num_rows=num_rows, rows=rows)
62
+
63
+ def query_facets(
64
+ self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
65
+ ) -> Dict[str, Dict[str, int]]:
66
+ solr_query = self._build_solr_query(where)
67
+ solr_query["facet"] = "true"
68
+ solr_query["facet.field"] = facet_columns
69
+ solr_query["facet.limit"] = facet_limit
70
+
71
+ logger.info(f"Querying Solr collection {self.name} for facets with query: {solr_query}")
72
+
73
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
74
+ response.raise_for_status()
75
+
76
+ data = response.json()
77
+ facet_counts = data["facet_counts"]["facet_fields"]
78
+
79
+ results = {}
80
+ for facet_field, counts in facet_counts.items():
81
+ results[facet_field] = list(zip(counts[::2], counts[1::2]))
82
+
83
+ return results
84
+
85
+ def _build_solr_query(
86
+ self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None
87
+ ) -> Dict[str, Any]:
88
+ solr_query = {}
89
+ if query is None:
90
+ query = {}
91
+
92
+ if isinstance(query, Query):
93
+ where = query.where_clause
94
+ solr_query["fq"] = self._build_solr_where_clause(where)
95
+
96
+ if query.select_cols:
97
+ solr_query["fl"] = ",".join(query.select_cols)
98
+
99
+ if query.limit:
100
+ solr_query["rows"] = query.limit
101
+
102
+ if query.offset:
103
+ solr_query["start"] = query.offset
104
+
105
+ elif isinstance(query, dict):
106
+ solr_query["fq"] = self._build_solr_where_clause(query)
107
+
108
+ solr_query["wt"] = "json"
109
+ if "q" not in solr_query:
110
+ solr_query["q"] = search_term
111
+ if extra:
112
+ solr_query.update(extra)
113
+ logger.info(f"Built Solr query: {solr_query}")
114
+ return solr_query
115
+
116
+ def _build_solr_where_clause(self, where_clause: Dict) -> str:
117
+ if where_clause is None:
118
+ where_clause = {}
119
+ conditions = []
120
+ if self.parent.metadata.collection_type_slot:
121
+ where_clause = copy(where_clause)
122
+ where_clause[self.parent.metadata.collection_type_slot] = self._alias
123
+ for field, value in where_clause.items():
124
+ if not isinstance(value, (list, tuple)):
125
+ value = [value]
126
+ value = [f'"{v}"' if isinstance(v, str) else str(v) for v in value]
127
+ if len(value) > 1:
128
+ conditions.append(f"{field}:({' '.join(value)})")
129
+ else:
130
+ conditions.append(f"{field}:{value[0]}")
131
+
132
+ return " AND ".join(conditions)
@@ -0,0 +1,82 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import requests
5
+ from linkml_store.api import Collection, Database
6
+ from linkml_store.api.config import CollectionConfig
7
+ from linkml_store.api.queries import Query, QueryResult
8
+ from linkml_store.api.stores.solr.solr_collection import SolrCollection
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class SolrDatabase(Database):
14
+ base_url: str
15
+ collection_class = SolrCollection
16
+ use_cores: bool = False
17
+
18
+ def __init__(self, handle: Optional[str] = None, **kwargs):
19
+ if handle.startswith("solr:"):
20
+ self.base_url = handle.replace("solr:", "")
21
+ else:
22
+ self.base_url = handle
23
+ super().__init__(handle=handle, **kwargs)
24
+
25
+ def get_collection(self, name: str, create_if_not_exists=True, **kwargs) -> "Collection":
26
+ if not self._collections:
27
+ self.init_collections()
28
+
29
+ if name not in self._collections.keys():
30
+ if create_if_not_exists:
31
+ self._collections[name] = self.create_collection(name)
32
+ else:
33
+ raise KeyError(f"Collection {name} does not exist")
34
+
35
+ return self._collections[name]
36
+
37
+ def create_collection(
38
+ self, name: str, alias: Optional[str] = None, metadata: Optional[CollectionConfig] = None, **kwargs
39
+ ) -> Collection:
40
+ if not name:
41
+ raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
42
+
43
+ collection_cls = self.collection_class
44
+ collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
45
+
46
+ if not self._collections:
47
+ self._collections = {}
48
+
49
+ if not alias:
50
+ alias = name
51
+
52
+ self._collections[alias] = collection
53
+ return collection
54
+
55
+ def init_collections(self):
56
+ if self._collections is None:
57
+ self._collections = {}
58
+ if self.metadata.collection_type_slot:
59
+ response = requests.get(
60
+ f"{self.base_url}/select",
61
+ params={
62
+ "q": "*:*",
63
+ "wt": "json",
64
+ "rows": 0,
65
+ "facet": "true",
66
+ "facet.field": self.metadata.collection_type_slot,
67
+ "facet.limit": -1,
68
+ },
69
+ )
70
+ response.raise_for_status()
71
+ data = response.json()
72
+ coll_names = data["facet_counts"]["facet_fields"][self.metadata.collection_type_slot]
73
+ coll_names = coll_names[::2]
74
+ for coll_name in coll_names:
75
+ self.create_collection(coll_name)
76
+ else:
77
+ self.create_collection("default")
78
+
79
+ def query(self, query: Query, **kwargs) -> QueryResult:
80
+ collection_name = query.from_table
81
+ collection = self.get_collection(collection_name)
82
+ return collection.query(query, **kwargs)
File without changes