linkml-store 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +63 -7
- linkml_store/api/collection.py +152 -32
- linkml_store/api/config.py +49 -6
- linkml_store/api/database.py +77 -30
- linkml_store/api/stores/duckdb/duckdb_collection.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +47 -5
- linkml_store/api/stores/filesystem/filesystem_collection.py +11 -4
- linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
- linkml_store/api/stores/mongodb/mongodb_collection.py +6 -2
- linkml_store/api/stores/mongodb/mongodb_database.py +30 -35
- linkml_store/api/stores/solr/solr_collection.py +4 -4
- linkml_store/cli.py +64 -19
- linkml_store/index/__init__.py +16 -2
- linkml_store/index/implementations/llm_indexer.py +2 -1
- linkml_store/index/indexer.py +13 -2
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +200 -21
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/pandas_utils.py +40 -0
- linkml_store/utils/sql_utils.py +9 -3
- linkml_store/webapi/html/generic.html.j2 +25 -28
- linkml_store/webapi/main.py +346 -63
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/METADATA +36 -3
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/RECORD +27 -24
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
from pymongo import MongoClient
|
|
9
|
+
from pymongo.database import Database
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def extract_connection_info(db: Database):
|
|
15
|
+
client = db.client
|
|
16
|
+
|
|
17
|
+
# Get the host and port
|
|
18
|
+
host_info = client.address
|
|
19
|
+
if host_info:
|
|
20
|
+
host, port = host_info
|
|
21
|
+
else:
|
|
22
|
+
# For replica sets or sharded clusters, we might need to get this differently
|
|
23
|
+
host = client.HOST
|
|
24
|
+
port = client.PORT
|
|
25
|
+
|
|
26
|
+
# Get the database name
|
|
27
|
+
db_name = db.name
|
|
28
|
+
|
|
29
|
+
# Get username if available
|
|
30
|
+
username = None
|
|
31
|
+
if hasattr(client, "options") and hasattr(client.options, "credentials"):
|
|
32
|
+
credentials = client.options.credentials
|
|
33
|
+
if credentials:
|
|
34
|
+
username = credentials.username
|
|
35
|
+
|
|
36
|
+
return {"host": host, "port": port, "db_name": db_name, "username": username}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_connection_string(client: MongoClient):
|
|
40
|
+
"""
|
|
41
|
+
Extract a connection string from the MongoClient.
|
|
42
|
+
This avoids triggering truth value testing on Database objects.
|
|
43
|
+
"""
|
|
44
|
+
if client.address:
|
|
45
|
+
host, port = client.address
|
|
46
|
+
return f"{host}:{port}"
|
|
47
|
+
if hasattr(client, "address") and client.address:
|
|
48
|
+
host, port = client.address
|
|
49
|
+
return f"{host}:{port}"
|
|
50
|
+
elif client.hosts:
|
|
51
|
+
# For replica sets, return all hosts
|
|
52
|
+
return ",".join(f"{host}:{port}" for host, port in client.hosts)
|
|
53
|
+
elif hasattr(client, "HOST"):
|
|
54
|
+
# If we can't determine hosts, use the entire URI
|
|
55
|
+
parsed_uri = urlparse(client.HOST)
|
|
56
|
+
return f"{parsed_uri.hostname}:{parsed_uri.port}"
|
|
57
|
+
else:
|
|
58
|
+
raise ValueError("Unable to determine connection string from client")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_connection_info(db: Database):
|
|
62
|
+
"""
|
|
63
|
+
Extract connection information from the Database object.
|
|
64
|
+
"""
|
|
65
|
+
# Get the name of the database
|
|
66
|
+
db_name = db.name
|
|
67
|
+
|
|
68
|
+
# Get the client's node list (this should work for single nodes and replica sets)
|
|
69
|
+
node_list = db.client.nodes
|
|
70
|
+
|
|
71
|
+
if not node_list:
|
|
72
|
+
raise ValueError("Unable to determine connection information from database")
|
|
73
|
+
|
|
74
|
+
# Use the first node in the list (for single node setups, this will be the only node)
|
|
75
|
+
first_node = node_list[0]
|
|
76
|
+
host, port = first_node
|
|
77
|
+
|
|
78
|
+
return host, port, db_name
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_auth_from_client(client: MongoClient):
|
|
82
|
+
"""Extract authentication details from MongoClient."""
|
|
83
|
+
if hasattr(client, "_MongoClient__options"):
|
|
84
|
+
# For older versions of PyMongo
|
|
85
|
+
options = client._MongoClient__options
|
|
86
|
+
elif hasattr(client, "options"):
|
|
87
|
+
# For newer versions of PyMongo
|
|
88
|
+
options = client.options
|
|
89
|
+
else:
|
|
90
|
+
return None, None, None
|
|
91
|
+
|
|
92
|
+
if hasattr(options, "credentials"):
|
|
93
|
+
creds = options.credentials
|
|
94
|
+
return creds.username, creds.password, creds.source
|
|
95
|
+
return None, None, None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def connection_from_handle(handle: str):
|
|
99
|
+
if handle.startswith("mongodb://"):
|
|
100
|
+
handle = handle.replace("mongodb://", "")
|
|
101
|
+
host, db = handle.split("/")
|
|
102
|
+
return host, db
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def export_mongodb(handle: str, location: str, password: Optional[str] = None):
|
|
106
|
+
host, db_name = connection_from_handle(handle)
|
|
107
|
+
|
|
108
|
+
# Construct the mongodump command
|
|
109
|
+
cmd = ["mongodump", f"--host={host}", f"--db={db_name}"]
|
|
110
|
+
logger.info(f"Exporting MongoDB database {db_name} from {host} to {location}")
|
|
111
|
+
cmd.extend(["--out", location])
|
|
112
|
+
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
113
|
+
logger.info(f"MongoDB export completed successfully. Output: {result.stdout}")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def import_mongodb(handle: str, dump_dir: str, drop: bool = False):
|
|
117
|
+
host, db_name = connection_from_handle(handle)
|
|
118
|
+
|
|
119
|
+
# list dirs in dump_dir
|
|
120
|
+
dir_path = Path(dump_dir)
|
|
121
|
+
if not dir_path.is_dir():
|
|
122
|
+
raise ValueError(f"{dir_path} is not a dir")
|
|
123
|
+
directories = [name for name in os.listdir(dump_dir)]
|
|
124
|
+
if len(directories) != 1:
|
|
125
|
+
raise ValueError(f"Expected exactly one database in {dump_dir}, got: {directories}")
|
|
126
|
+
src_db_name = directories[0]
|
|
127
|
+
|
|
128
|
+
# Construct the mongorestore command
|
|
129
|
+
cmd = [
|
|
130
|
+
"mongorestore",
|
|
131
|
+
f"--host={host}",
|
|
132
|
+
f"--nsFrom={src_db_name}.*",
|
|
133
|
+
f"--nsTo={db_name}.*",
|
|
134
|
+
str(dump_dir),
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
# Add drop option if specified
|
|
138
|
+
if drop:
|
|
139
|
+
cmd.append("--drop")
|
|
140
|
+
logger.info(f"CMD={cmd}")
|
|
141
|
+
# Execute mongorestore
|
|
142
|
+
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
143
|
+
if result.stderr:
|
|
144
|
+
logger.warning(result.stderr)
|
|
145
|
+
logger.info(f"MongoDB import completed successfully. Output: {result.stdout} // {result.stderr}")
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Dict, List, Tuple, Union
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def facet_summary_to_dataframe_unmelted(
|
|
7
|
+
facet_summary: Dict[Union[str, Tuple[str, ...]], List[Tuple[Union[str, Tuple[str, ...]], int]]]
|
|
8
|
+
) -> pd.DataFrame:
|
|
9
|
+
rows = []
|
|
10
|
+
|
|
11
|
+
for facet_type, facet_data in facet_summary.items():
|
|
12
|
+
if isinstance(facet_type, str):
|
|
13
|
+
# Single facet type
|
|
14
|
+
for category, value in facet_data:
|
|
15
|
+
rows.append({facet_type: category, "Value": value})
|
|
16
|
+
else:
|
|
17
|
+
# Multiple facet types
|
|
18
|
+
for cat_val_tuple in facet_data:
|
|
19
|
+
if len(cat_val_tuple) == 2:
|
|
20
|
+
categories, value = cat_val_tuple
|
|
21
|
+
else:
|
|
22
|
+
categories, value = cat_val_tuple[:-1], cat_val_tuple[-1]
|
|
23
|
+
row = {"Value": value}
|
|
24
|
+
for i, facet in enumerate(facet_type):
|
|
25
|
+
row[facet] = categories[i]
|
|
26
|
+
rows.append(row)
|
|
27
|
+
|
|
28
|
+
df = pd.DataFrame(rows)
|
|
29
|
+
|
|
30
|
+
# Ensure all columns are present, fill with None if missing
|
|
31
|
+
all_columns = set(col for facet in facet_summary.keys() for col in (facet if isinstance(facet, tuple) else [facet]))
|
|
32
|
+
for col in all_columns:
|
|
33
|
+
if col not in df.columns:
|
|
34
|
+
df[col] = None
|
|
35
|
+
|
|
36
|
+
# Move 'Value' to the end
|
|
37
|
+
cols = [col for col in df.columns if col != "Value"] + ["Value"]
|
|
38
|
+
df = df[cols]
|
|
39
|
+
|
|
40
|
+
return df
|
linkml_store/utils/sql_utils.py
CHANGED
|
@@ -19,8 +19,9 @@ TYPE_MAP = {
|
|
|
19
19
|
|
|
20
20
|
OP_MAP = {
|
|
21
21
|
"eq": "=",
|
|
22
|
+
"$in": "ARRAY_CONTAINS", ## mongodb
|
|
23
|
+
"$contains": "ARRAY_CONTAINS", ## TODO: this is chromadb-specific
|
|
22
24
|
"in": "ARRAY_CONTAINS",
|
|
23
|
-
"$contains": "ARRAY_CONTAINS",
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
|
|
@@ -65,9 +66,14 @@ def col_val_constraints_to_conjs(col_name: str, val_constraints: Any) -> list:
|
|
|
65
66
|
conjs = []
|
|
66
67
|
for k, v in val_constraints.items():
|
|
67
68
|
if k in OP_MAP:
|
|
68
|
-
|
|
69
|
+
if k == "$in" and isinstance(v, list):
|
|
70
|
+
v_mapped = [_quote(v1) for v1 in v]
|
|
71
|
+
t = f"{col_name} IN ({', '.join(v_mapped)})"
|
|
72
|
+
else:
|
|
73
|
+
t = f"{OP_MAP[k]}({col_name}, {_quote(v)})"
|
|
69
74
|
else:
|
|
70
|
-
|
|
75
|
+
t = f"{col_name} {k} {_quote(v)}"
|
|
76
|
+
conjs.append(t)
|
|
71
77
|
return conjs
|
|
72
78
|
else:
|
|
73
79
|
return [f"{col_name} = {_quote(val_constraints)}"]
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{% extends "base.html.j2" %}
|
|
2
2
|
|
|
3
|
+
|
|
4
|
+
{% macro make_link(link) %}
|
|
5
|
+
{{ link.rel }} [
|
|
6
|
+
page: <a href="/pages{{ link.href }}">/pages{{ link.href }}</a> |
|
|
7
|
+
API: <a href="{{ link.href }}">{{ link.href }}</a> ]
|
|
8
|
+
<a href="{{ href }}">{{ rel }}</a>
|
|
9
|
+
{% endmacro %}
|
|
10
|
+
|
|
3
11
|
{% block title %}{meta.path}{% endblock %}
|
|
4
12
|
|
|
5
13
|
{% block content %}
|
|
@@ -9,38 +17,27 @@
|
|
|
9
17
|
</pre>
|
|
10
18
|
|
|
11
19
|
<h1>Links</h1>
|
|
12
|
-
|
|
13
|
-
{% for link in response.links %}
|
|
14
|
-
<li>
|
|
15
|
-
|
|
16
|
-
</li>
|
|
17
|
-
{% endfor %}
|
|
20
|
+
<ul>
|
|
21
|
+
{% for link in response.links %}
|
|
22
|
+
<li> {{ make_link(link) }} </li>
|
|
23
|
+
{% endfor %}
|
|
18
24
|
</ul>
|
|
19
|
-
</ul>
|
|
20
25
|
|
|
26
|
+
{% if response.items != None and response["items"] != None %}
|
|
27
|
+
<h1>Items</h1>
|
|
28
|
+
<ul>
|
|
29
|
+
{% for item in response["items"] %}
|
|
30
|
+
<li>
|
|
31
|
+
{{ item.name }}
|
|
32
|
+
{% for link in item.links %}
|
|
33
|
+
{{ make_link(link) }}
|
|
34
|
+
{% endfor %}
|
|
35
|
+
HTML: {{ item.html | safe }}
|
|
36
|
+
</li>
|
|
37
|
+
{% endfor %}
|
|
38
|
+
{% endif %}
|
|
21
39
|
<h1>Data</h1>
|
|
22
|
-
{% if data_html %}
|
|
23
|
-
<ul>
|
|
24
|
-
{% for e in data_html %}
|
|
25
|
-
<li>{{ e|safe }}</li>
|
|
26
|
-
{% endfor %}
|
|
27
|
-
</ul>
|
|
28
|
-
{% else %}
|
|
29
|
-
|
|
30
|
-
{% if "items" in response.data %}
|
|
31
|
-
<ul>
|
|
32
|
-
{% for item in response.data['items'] %}
|
|
33
|
-
<li>
|
|
34
|
-
{{ item.name }}
|
|
35
|
-
{% for link in item.links %}
|
|
36
|
-
<a href="/pages{{ link.href }}">{{ link.rel }}</a>
|
|
37
|
-
{% endfor %}
|
|
38
|
-
</li>
|
|
39
|
-
{% endfor %}
|
|
40
|
-
</ul>
|
|
41
|
-
{% endif %}
|
|
42
40
|
<pre>
|
|
43
41
|
{{ response.data }}
|
|
44
42
|
</pre>
|
|
45
|
-
{% endif %}
|
|
46
43
|
{% endblock %}
|