linkml-store 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/__init__.py +7 -0
- linkml_store/api/__init__.py +8 -0
- linkml_store/api/client.py +151 -0
- linkml_store/api/collection.py +327 -0
- linkml_store/api/database.py +215 -0
- linkml_store/api/metadata.py +5 -0
- linkml_store/api/queries.py +56 -0
- linkml_store/api/stores/__init__.py +0 -0
- linkml_store/api/stores/duckdb/__init__.py +0 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +109 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +166 -0
- linkml_store/api/stores/duckdb/mappings.py +7 -0
- linkml_store/api/stores/mongodb/__init__.py +0 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +56 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +112 -0
- linkml_store/constants.py +7 -0
- linkml_store/index/__init__.py +0 -0
- linkml_store/index/implementations/__init__.py +0 -0
- linkml_store/index/implementations/llm_index.py +44 -0
- linkml_store/index/implementations/simple_index.py +40 -0
- linkml_store/index/index.py +109 -0
- linkml_store/utils/__init__.py +0 -0
- linkml_store/utils/io.py +38 -0
- linkml_store/utils/sql_utils.py +126 -0
- linkml_store-0.0.0.dist-info/LICENSE +22 -0
- linkml_store-0.0.0.dist-info/METADATA +44 -0
- linkml_store-0.0.0.dist-info/RECORD +29 -0
- linkml_store-0.0.0.dist-info/WHEEL +4 -0
- linkml_store-0.0.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from linkml_runtime import SchemaView
|
|
5
|
+
from linkml_runtime.linkml_model import SlotDefinition
|
|
6
|
+
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
7
|
+
from pymongo import MongoClient
|
|
8
|
+
|
|
9
|
+
from linkml_store.api import Database
|
|
10
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
11
|
+
from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class MongoDBDatabase(Database):
|
|
16
|
+
"""
|
|
17
|
+
A wrapper around a MongoDB database
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
_client: MongoClient = None
|
|
21
|
+
_database = None
|
|
22
|
+
|
|
23
|
+
def __post_init__(self):
|
|
24
|
+
if not self.handle:
|
|
25
|
+
self.handle = "mongodb://localhost:27017"
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def client(self) -> MongoClient:
|
|
29
|
+
if not self._client:
|
|
30
|
+
self._client = MongoClient(self.handle)
|
|
31
|
+
return self._client
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def database(self):
|
|
35
|
+
if not self._database:
|
|
36
|
+
db_name = self.handle.split("/")[-1]
|
|
37
|
+
self._database = self.client[db_name]
|
|
38
|
+
return self._database
|
|
39
|
+
|
|
40
|
+
def commit(self, **kwargs):
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def close(self, **kwargs):
|
|
44
|
+
self.client.close()
|
|
45
|
+
|
|
46
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
47
|
+
collection = self.database[query.from_table]
|
|
48
|
+
where_clause = query.where_clause or {}
|
|
49
|
+
cursor = collection.find(where_clause)
|
|
50
|
+
if query.limit:
|
|
51
|
+
cursor = cursor.limit(query.limit)
|
|
52
|
+
if query.offset:
|
|
53
|
+
cursor = cursor.skip(query.offset)
|
|
54
|
+
if query.sort_by:
|
|
55
|
+
sort_key = [(col, 1) for col in query.sort_by]
|
|
56
|
+
cursor = cursor.sort(sort_key)
|
|
57
|
+
rows = list(cursor)
|
|
58
|
+
num_rows = len(rows)
|
|
59
|
+
qr = QueryResult(query=query, num_rows=num_rows, rows=rows)
|
|
60
|
+
return qr
|
|
61
|
+
|
|
62
|
+
def init_collections(self):
|
|
63
|
+
if self._collections is None:
|
|
64
|
+
self._collections = {}
|
|
65
|
+
for collection_name in self.database.list_collection_names():
|
|
66
|
+
if collection_name not in self._collections:
|
|
67
|
+
collection = MongoDBCollection(name=collection_name, parent=self)
|
|
68
|
+
self._collections[collection_name] = collection
|
|
69
|
+
|
|
70
|
+
def create_collection(self, name: str, alias: Optional[str] = None, **kwargs) -> MongoDBCollection:
|
|
71
|
+
collection = MongoDBCollection(name=name, parent=self)
|
|
72
|
+
if not self._collections:
|
|
73
|
+
self._collections = {}
|
|
74
|
+
if not alias:
|
|
75
|
+
alias = name
|
|
76
|
+
self._collections[alias] = collection
|
|
77
|
+
return collection
|
|
78
|
+
|
|
79
|
+
def induce_schema_view(self) -> SchemaView:
|
|
80
|
+
sb = SchemaBuilder()
|
|
81
|
+
schema = sb.schema
|
|
82
|
+
collection_names = self.database.list_collection_names()
|
|
83
|
+
for collection_name in collection_names:
|
|
84
|
+
sb.add_class(collection_name)
|
|
85
|
+
collection = self.database[collection_name]
|
|
86
|
+
sample_doc = collection.find_one()
|
|
87
|
+
if sample_doc:
|
|
88
|
+
for key, value in sample_doc.items():
|
|
89
|
+
if key == "_id":
|
|
90
|
+
continue
|
|
91
|
+
if isinstance(value, list):
|
|
92
|
+
multivalued = True
|
|
93
|
+
if value:
|
|
94
|
+
value = value[0]
|
|
95
|
+
else:
|
|
96
|
+
value = None
|
|
97
|
+
else:
|
|
98
|
+
multivalued = False
|
|
99
|
+
if isinstance(value, str):
|
|
100
|
+
rng = "string"
|
|
101
|
+
elif isinstance(value, int):
|
|
102
|
+
rng = "integer"
|
|
103
|
+
elif isinstance(value, float):
|
|
104
|
+
rng = "float"
|
|
105
|
+
elif isinstance(value, bool):
|
|
106
|
+
rng = "boolean"
|
|
107
|
+
else:
|
|
108
|
+
rng = "string"
|
|
109
|
+
sd = SlotDefinition(key, range=rng, multivalued=multivalued)
|
|
110
|
+
sb.schema.classes[collection_name].attributes[sd.name] = sd
|
|
111
|
+
sb.add_defaults()
|
|
112
|
+
return SchemaView(schema)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, List
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from linkml_store.index.index import INDEX_ITEM, Index
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
import llm
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LLMIndex(Index):
|
|
12
|
+
"""
|
|
13
|
+
A implementations index wraps the llm library
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
embedding_model_name: str = "ada-002"
|
|
17
|
+
_embedding_model: "llm.EmbeddingModel" = None
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def embedding_model(self):
|
|
21
|
+
import llm
|
|
22
|
+
|
|
23
|
+
if self._embedding_model is None:
|
|
24
|
+
self._embedding_model = llm.get_embedding_model(self.embedding_model_name)
|
|
25
|
+
return self._embedding_model
|
|
26
|
+
|
|
27
|
+
def text_to_vector(self, text: str) -> INDEX_ITEM:
|
|
28
|
+
"""
|
|
29
|
+
Convert a text to an indexable object
|
|
30
|
+
|
|
31
|
+
:param text:
|
|
32
|
+
:return:
|
|
33
|
+
"""
|
|
34
|
+
return self.texts_to_vectors([text])[0]
|
|
35
|
+
|
|
36
|
+
def texts_to_vectors(self, texts: List[str]) -> List[INDEX_ITEM]:
|
|
37
|
+
"""
|
|
38
|
+
Use LLM to embed
|
|
39
|
+
|
|
40
|
+
:param texts:
|
|
41
|
+
:return:
|
|
42
|
+
"""
|
|
43
|
+
embeddings = self.embedding_model.embed_multi(texts)
|
|
44
|
+
return [np.array(v, dtype=float) for v in embeddings]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from linkml_store.index.index import INDEX_ITEM, Index
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SimpleIndex(Index):
|
|
9
|
+
"""
|
|
10
|
+
A implementations index that uses a hash function to generate an index from text.
|
|
11
|
+
|
|
12
|
+
This uses a naive method to generate an index from text. It is not suitable for production use.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def text_to_vector(self, text: str) -> INDEX_ITEM:
|
|
16
|
+
"""
|
|
17
|
+
This is a naive method purely for testing
|
|
18
|
+
|
|
19
|
+
:param text:
|
|
20
|
+
:return:
|
|
21
|
+
"""
|
|
22
|
+
vector_length = self.vector_default_length
|
|
23
|
+
text = text.lower()
|
|
24
|
+
# trigrams
|
|
25
|
+
words = [text[i : i + 3] for i in range(len(text) - 2)]
|
|
26
|
+
|
|
27
|
+
vector = np.zeros(vector_length, dtype=float)
|
|
28
|
+
|
|
29
|
+
# Iterate over each trigram in the text
|
|
30
|
+
for word in words:
|
|
31
|
+
# Generate a hash value for the word
|
|
32
|
+
hash_value = int(hashlib.sha1(word.encode("utf-8")).hexdigest(), 16)
|
|
33
|
+
|
|
34
|
+
# Compute the index in the vector using modulo
|
|
35
|
+
index = hash_value % vector_length
|
|
36
|
+
|
|
37
|
+
# Increment the count at the computed index
|
|
38
|
+
vector[index] += 1.0
|
|
39
|
+
|
|
40
|
+
return vector
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
INDEX_ITEM = np.ndarray
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def cosine_similarity(vector1, vector2):
|
|
10
|
+
dot_product = np.dot(vector1, vector2)
|
|
11
|
+
norm1 = np.linalg.norm(vector1)
|
|
12
|
+
norm2 = np.linalg.norm(vector2)
|
|
13
|
+
return dot_product / (norm1 * norm2)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Index(BaseModel):
|
|
17
|
+
"""
|
|
18
|
+
An index operates on a collection in order to search for objects.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name: Optional[str] = None
|
|
22
|
+
index_function: Optional[Callable] = None
|
|
23
|
+
distance_function: Optional[Callable] = None
|
|
24
|
+
index_attributes: Optional[str] = None
|
|
25
|
+
text_template: Optional[str] = None
|
|
26
|
+
filter_nulls: Optional[bool] = True
|
|
27
|
+
vector_default_length: Optional[int] = 1000
|
|
28
|
+
index_field: Optional[str] = "__index__"
|
|
29
|
+
|
|
30
|
+
def object_to_vector(self, obj: Dict[str, Any]) -> INDEX_ITEM:
|
|
31
|
+
"""
|
|
32
|
+
Convert an object to an indexable object
|
|
33
|
+
|
|
34
|
+
:param obj:
|
|
35
|
+
:return:
|
|
36
|
+
"""
|
|
37
|
+
return self.text_to_vector(self.object_to_text(obj))
|
|
38
|
+
|
|
39
|
+
def objects_to_vectors(self, objs: List[Dict[str, Any]]) -> List[INDEX_ITEM]:
|
|
40
|
+
"""
|
|
41
|
+
Convert a list of objects to indexable objects
|
|
42
|
+
|
|
43
|
+
:param objs:
|
|
44
|
+
:return:
|
|
45
|
+
"""
|
|
46
|
+
return [self.object_to_vector(obj) for obj in objs]
|
|
47
|
+
|
|
48
|
+
def texts_to_vectors(self, texts: List[str]) -> List[INDEX_ITEM]:
|
|
49
|
+
"""
|
|
50
|
+
Convert a list of texts to indexable objects
|
|
51
|
+
|
|
52
|
+
:param texts:
|
|
53
|
+
:return:
|
|
54
|
+
"""
|
|
55
|
+
return [self.text_to_vector(text) for text in texts]
|
|
56
|
+
|
|
57
|
+
def text_to_vector(self, text: str) -> INDEX_ITEM:
|
|
58
|
+
"""
|
|
59
|
+
Convert a text to an indexable object
|
|
60
|
+
|
|
61
|
+
:param text:
|
|
62
|
+
:return:
|
|
63
|
+
"""
|
|
64
|
+
raise NotImplementedError
|
|
65
|
+
|
|
66
|
+
def object_to_text(self, obj: Dict[str, Any]) -> str:
|
|
67
|
+
"""
|
|
68
|
+
Create a text from an object suitable for indexing.
|
|
69
|
+
"""
|
|
70
|
+
if self.index_attributes:
|
|
71
|
+
obj = {k: v for k, v in obj.items() if k in self.index_attributes}
|
|
72
|
+
if self.filter_nulls:
|
|
73
|
+
obj = {k: v for k, v in obj.items() if v is not None}
|
|
74
|
+
if self.text_template:
|
|
75
|
+
return self.text_template.format(**obj)
|
|
76
|
+
return str(obj)
|
|
77
|
+
|
|
78
|
+
def search(
|
|
79
|
+
self, query: str, vectors: List[Tuple[str, INDEX_ITEM]], limit: Optional[int] = None
|
|
80
|
+
) -> List[Tuple[float, str]]:
|
|
81
|
+
"""
|
|
82
|
+
Search the index for a query string
|
|
83
|
+
|
|
84
|
+
:param query: The query string to search for
|
|
85
|
+
:param vectors: A list of indexed items, where each item is a tuple of (id, vector)
|
|
86
|
+
:param limit: The maximum number of results to return (optional)
|
|
87
|
+
:return: A list of item IDs that match the query
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
# Convert the query string to a vector
|
|
91
|
+
query_vector = self.text_to_vector(query)
|
|
92
|
+
|
|
93
|
+
distances = []
|
|
94
|
+
|
|
95
|
+
# Iterate over each indexed item
|
|
96
|
+
for item_id, item_vector in vectors:
|
|
97
|
+
# Calculate the Euclidean distance between the query vector and the item vector
|
|
98
|
+
# distance = 1-np.linalg.norm(query_vector - item_vector)
|
|
99
|
+
distance = cosine_similarity(query_vector, item_vector)
|
|
100
|
+
distances.append((distance, item_id))
|
|
101
|
+
|
|
102
|
+
# Sort the distances in ascending order
|
|
103
|
+
distances.sort(key=lambda x: -x[0])
|
|
104
|
+
|
|
105
|
+
# Limit the number of results if specified
|
|
106
|
+
if limit is not None:
|
|
107
|
+
distances = distances[:limit]
|
|
108
|
+
|
|
109
|
+
return distances
|
|
File without changes
|
linkml_store/utils/io.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Iterable, Iterator, Optional, TextIO, Union
|
|
3
|
+
|
|
4
|
+
from linkml_runtime import SchemaView
|
|
5
|
+
|
|
6
|
+
from linkml_store.api.collection import OBJECT
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def export_objects(
|
|
10
|
+
objects: Iterable[OBJECT],
|
|
11
|
+
location: Union[Path, str, TextIO],
|
|
12
|
+
output_type: Optional[str],
|
|
13
|
+
target_class: Optional[str] = None,
|
|
14
|
+
schema_view: Optional[SchemaView] = None,
|
|
15
|
+
**kwargs,
|
|
16
|
+
):
|
|
17
|
+
"""
|
|
18
|
+
Export objects to a file or stream
|
|
19
|
+
|
|
20
|
+
:param objects: objects to export
|
|
21
|
+
:param location: location to export to
|
|
22
|
+
:param kwargs:
|
|
23
|
+
:return:
|
|
24
|
+
"""
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def import_objects_iter(
|
|
29
|
+
location: Union[Path, str, TextIO], schema_view: Optional[SchemaView] = None, **kwargs
|
|
30
|
+
) -> Iterator[OBJECT]:
|
|
31
|
+
"""
|
|
32
|
+
Import objects from a file or stream
|
|
33
|
+
|
|
34
|
+
:param location:
|
|
35
|
+
:param kwargs:
|
|
36
|
+
:return:
|
|
37
|
+
"""
|
|
38
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional, Tuple, Type, Union
|
|
3
|
+
|
|
4
|
+
import sqlalchemy
|
|
5
|
+
import sqlalchemy.sql.sqltypes as sqlt
|
|
6
|
+
from linkml_runtime.linkml_model import SchemaDefinition, SlotDefinition
|
|
7
|
+
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
8
|
+
from sqlalchemy import MetaData
|
|
9
|
+
|
|
10
|
+
from linkml_store.api.queries import Query
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
TYPE_MAP = {
|
|
15
|
+
sqlt.TEXT: "string",
|
|
16
|
+
sqlt.INTEGER: "integer",
|
|
17
|
+
sqlt.FLOAT: "float",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _map_type(typ: Type) -> str:
|
|
22
|
+
for k, v in TYPE_MAP.items():
|
|
23
|
+
if isinstance(typ, k):
|
|
24
|
+
return v
|
|
25
|
+
return "string"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def where_clause_to_sql(query: Query) -> str:
|
|
29
|
+
if not query.where_clause:
|
|
30
|
+
return ""
|
|
31
|
+
if isinstance(query.where_clause, str):
|
|
32
|
+
where_clause_sql = query.where_clause
|
|
33
|
+
elif isinstance(query.where_clause, list):
|
|
34
|
+
where_clause_sql = " AND ".join(query.where_clause)
|
|
35
|
+
elif isinstance(query.where_clause, dict):
|
|
36
|
+
# TODO: bobby tables
|
|
37
|
+
where_clause_sql = " AND ".join([f"{k} = '{v}'" for k, v in query.where_clause.items()])
|
|
38
|
+
else:
|
|
39
|
+
raise ValueError(f"Invalid where_clause type: {type(query.where_clause)}")
|
|
40
|
+
return "WHERE " + where_clause_sql
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def query_to_sql(query: Query, count=False, limit=None, offset: Optional[int] = None):
|
|
44
|
+
select_cols = query.select_cols if query.select_cols else ["*"]
|
|
45
|
+
if count:
|
|
46
|
+
sql_str = ["SELECT COUNT(*)"]
|
|
47
|
+
else:
|
|
48
|
+
sql_str = [f"SELECT {', '.join(select_cols)}"]
|
|
49
|
+
sql_str.append(f"FROM {query.from_table}")
|
|
50
|
+
sql_str.append(where_clause_to_sql(query))
|
|
51
|
+
if not count:
|
|
52
|
+
if query.sort_by:
|
|
53
|
+
sql_str.append(f"ORDER BY {', '.join(query.sort_by)}")
|
|
54
|
+
if not count:
|
|
55
|
+
if limit is None:
|
|
56
|
+
limit = query.limit
|
|
57
|
+
if limit is None:
|
|
58
|
+
limit = 100
|
|
59
|
+
if limit < 0:
|
|
60
|
+
limit = None
|
|
61
|
+
if limit is not None:
|
|
62
|
+
sql_str.append(f" LIMIT {limit}")
|
|
63
|
+
offset = offset if offset else query.offset
|
|
64
|
+
if offset:
|
|
65
|
+
sql_str.append(f" OFFSET {offset}")
|
|
66
|
+
sql_str = [line for line in sql_str if line]
|
|
67
|
+
return "\n".join(sql_str)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def facet_count_sql(query: Query, facet_column: Union[str, Tuple[str, ...]], multivalued=False) -> str:
|
|
71
|
+
# Create a modified WHERE clause that excludes conditions directly related to facet_column
|
|
72
|
+
modified_where = None
|
|
73
|
+
if query.where_clause:
|
|
74
|
+
where_clause_sql = where_clause_to_sql(query)
|
|
75
|
+
# Split the where clause into conditions and exclude those related to the facet_column
|
|
76
|
+
conditions = [cond for cond in where_clause_sql.split(" AND ") if not cond.startswith(f"{facet_column} ")]
|
|
77
|
+
modified_where = " AND ".join(conditions)
|
|
78
|
+
|
|
79
|
+
if isinstance(facet_column, tuple):
|
|
80
|
+
if multivalued:
|
|
81
|
+
raise NotImplementedError("Multivalued facets are not supported for multiple columns")
|
|
82
|
+
facet_column = ", ".join(facet_column)
|
|
83
|
+
from_table = query.from_table
|
|
84
|
+
if multivalued:
|
|
85
|
+
from_table = f"(SELECT UNNEST({facet_column}) as {facet_column} FROM {query.from_table})"
|
|
86
|
+
sql_str = [f"SELECT {facet_column}, COUNT(*) as count", f"FROM {from_table}"]
|
|
87
|
+
if modified_where:
|
|
88
|
+
sql_str.append(f"{modified_where}")
|
|
89
|
+
sql_str.append(f"GROUP BY {facet_column}")
|
|
90
|
+
sql_str.append("ORDER BY count DESC") # Optional, order by count for convenience
|
|
91
|
+
return "\n".join(sql_str)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def introspect_schema(engine: sqlalchemy.Engine) -> SchemaDefinition:
|
|
95
|
+
"""
|
|
96
|
+
Introspect a database schema and return a SchemaDefinition object
|
|
97
|
+
|
|
98
|
+
:param engine:
|
|
99
|
+
:return:
|
|
100
|
+
"""
|
|
101
|
+
metadata_obj = MetaData()
|
|
102
|
+
logging.info(f"Reflecting using {engine}")
|
|
103
|
+
metadata_obj.reflect(bind=engine)
|
|
104
|
+
sb = SchemaBuilder()
|
|
105
|
+
schema = sb.schema
|
|
106
|
+
for table in metadata_obj.sorted_tables:
|
|
107
|
+
logging.info(f"Importing {table.name}")
|
|
108
|
+
sb.add_class(table.name)
|
|
109
|
+
cls = schema.classes[table.name]
|
|
110
|
+
pks = [column for column in table.columns if column.primary_key]
|
|
111
|
+
if len(pks) == 1:
|
|
112
|
+
pk = pks.pop().name
|
|
113
|
+
else:
|
|
114
|
+
pk = None
|
|
115
|
+
for column in table.columns:
|
|
116
|
+
slot = SlotDefinition(column.name)
|
|
117
|
+
cls.attributes[slot.name] = slot
|
|
118
|
+
if pk and pk == column.name:
|
|
119
|
+
slot.identifier = True
|
|
120
|
+
if column.foreign_keys:
|
|
121
|
+
for fk in column.foreign_keys:
|
|
122
|
+
[fk_table, fk_table_col] = str(fk.column).split(".")
|
|
123
|
+
slot.range = fk_table
|
|
124
|
+
else:
|
|
125
|
+
slot.range = _map_type(column.type)
|
|
126
|
+
return schema
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
|
|
2
|
+
The MIT License (MIT)
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2024 Monarch Initiative
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: linkml-store
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: linkml-store
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Author 1
|
|
7
|
+
Author-email: author@org.org
|
|
8
|
+
Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Provides-Extra: analytics
|
|
16
|
+
Provides-Extra: app
|
|
17
|
+
Provides-Extra: llm
|
|
18
|
+
Provides-Extra: mongodb
|
|
19
|
+
Provides-Extra: tests
|
|
20
|
+
Requires-Dist: black (>=24.0.0) ; extra == "tests"
|
|
21
|
+
Requires-Dist: click
|
|
22
|
+
Requires-Dist: duckdb (>=0.10.1,<0.11.0)
|
|
23
|
+
Requires-Dist: duckdb-engine (>=0.11.2,<0.12.0)
|
|
24
|
+
Requires-Dist: linkml-runtime (>=1.7.5,<2.0.0)
|
|
25
|
+
Requires-Dist: llm ; extra == "llm"
|
|
26
|
+
Requires-Dist: matplotlib ; extra == "analytics"
|
|
27
|
+
Requires-Dist: pandas (>=2.2.1,<3.0.0) ; extra == "analytics"
|
|
28
|
+
Requires-Dist: plotly ; extra == "analytics"
|
|
29
|
+
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
30
|
+
Requires-Dist: pymongo ; extra == "mongodb"
|
|
31
|
+
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
32
|
+
Requires-Dist: seaborn ; extra == "analytics"
|
|
33
|
+
Requires-Dist: sqlalchemy
|
|
34
|
+
Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# linkml-store
|
|
38
|
+
|
|
39
|
+
This is the project description.
|
|
40
|
+
|
|
41
|
+
# Acknowledgements
|
|
42
|
+
|
|
43
|
+
This [cookiecutter](https://cookiecutter.readthedocs.io/en/stable/README.html) project was developed from the [monarch-project-template](https://github.com/monarch-initiative/monarch-project-template) template and will be kept up-to-date using [cruft](https://cruft.github.io/cruft/).
|
|
44
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
linkml_store/__init__.py,sha256=jlU6WOUAn8cKIhzbTULmBTWpW9gZdEt7q_RI6KZN1bY,118
|
|
2
|
+
linkml_store/api/__init__.py,sha256=Il3wGkN6evgc3OOWQYL7q3J-tRBs_0yQ3Nj-s6eo-0Y,284
|
|
3
|
+
linkml_store/api/client.py,sha256=6zGzhTd6zJ-V5fzLbgljI6onq_XVvaxoeyXK5d1U9wM,4353
|
|
4
|
+
linkml_store/api/collection.py,sha256=zr9iMXCJPsBOYLwO8I0dLIl01sVG4sgXtpgVdHJIVNo,11334
|
|
5
|
+
linkml_store/api/database.py,sha256=oulUxyUGfq6dMQge___dYcevunDOIlLUBGTeewRZqDs,6682
|
|
6
|
+
linkml_store/api/metadata.py,sha256=k9F6D_nuIZ0wWocj0ew2FYSKOc06CJWvwoUpHcrs7JA,69
|
|
7
|
+
linkml_store/api/queries.py,sha256=5WgI_od_Qlpiza-u-XCrxL0F3Etf6mdeCCSSxeHT0PI,1698
|
|
8
|
+
linkml_store/api/stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
linkml_store/api/stores/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
linkml_store/api/stores/duckdb/duckdb_collection.py,sha256=3hxwnRoY8pIZuW09yi4BSRDUT_pzTo9Mj82SBTT1aNw,4194
|
|
11
|
+
linkml_store/api/stores/duckdb/duckdb_database.py,sha256=efTwzAoAc4YugBqNl9aQhlBzcPU4GDLwGQC5UPk_Uuw,6104
|
|
12
|
+
linkml_store/api/stores/duckdb/mappings.py,sha256=S4MWetLpQcxOwwedXrZTqazxdaHIQXXbq4VRq9Ok4B4,123
|
|
13
|
+
linkml_store/api/stores/mongodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
linkml_store/api/stores/mongodb/mongodb_collection.py,sha256=atFpJWTENwCBmRjtC5iWkwwV-3XnVr16A_nuXDS76Ig,2094
|
|
15
|
+
linkml_store/api/stores/mongodb/mongodb_database.py,sha256=EpiTUftXlmOC8V4lPcrGOX2xq0leqWpmPVaZGsUMESI,3984
|
|
16
|
+
linkml_store/constants.py,sha256=x4ZmDsfE9rZcL5WpA93uTKrRWzCD6GodYXviVzIvR38,112
|
|
17
|
+
linkml_store/index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
linkml_store/index/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
linkml_store/index/implementations/llm_index.py,sha256=JAxg1dIuU7W8ZtnsGcogbBUvyrPTZCbKhEFv-0lEKrk,1061
|
|
20
|
+
linkml_store/index/implementations/simple_index.py,sha256=ep73dg86QqV7B7t_VoOK1weVx5RD0xvJ7uJPsszd5I0,1134
|
|
21
|
+
linkml_store/index/index.py,sha256=uwgKtw76ch0qe5CbSj2Ft_WAhY94Qm5qLBJucl-VPWA,3420
|
|
22
|
+
linkml_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
+
linkml_store/utils/io.py,sha256=JHUrWDtlZC2jtN_PQZ4ypdGIyYlftZEN3JaCvEPs44w,884
|
|
24
|
+
linkml_store/utils/sql_utils.py,sha256=sCM9GqEW6zZ3-2n5tsW7gzi0DerP6TXx3AJDY_0kCJ4,4557
|
|
25
|
+
linkml_store-0.0.0.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
|
|
26
|
+
linkml_store-0.0.0.dist-info/METADATA,sha256=Ignb5ab4IwjIfm60g2yciUqYBb0IstFvABlOMFTygPk,1717
|
|
27
|
+
linkml_store-0.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
28
|
+
linkml_store-0.0.0.dist-info/entry_points.txt,sha256=YdXo7B96u7fP8WX9cirGelqH0kjx7vGjT-w8hq8HFSE,54
|
|
29
|
+
linkml_store-0.0.0.dist-info/RECORD,,
|