linkml-store 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +76 -11
- linkml_store/api/collection.py +223 -40
- linkml_store/api/config.py +59 -9
- linkml_store/api/database.py +45 -27
- linkml_store/api/stores/duckdb/duckdb_collection.py +21 -3
- linkml_store/api/stores/duckdb/duckdb_database.py +36 -3
- linkml_store/api/stores/filesystem/filesystem_collection.py +13 -4
- linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
- linkml_store/api/stores/mongodb/mongodb_collection.py +80 -34
- linkml_store/api/stores/mongodb/mongodb_database.py +1 -36
- linkml_store/api/stores/solr/solr_collection.py +4 -4
- linkml_store/cli.py +44 -18
- linkml_store/index/__init__.py +21 -5
- linkml_store/index/implementations/llm_indexer.py +2 -1
- linkml_store/index/indexer.py +20 -4
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +69 -8
- linkml_store/utils/pandas_utils.py +40 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/utils/sql_utils.py +2 -1
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +46 -0
- linkml_store/webapi/main.py +572 -0
- linkml_store-0.1.11.dist-info/METADATA +171 -0
- linkml_store-0.1.11.dist-info/RECORD +60 -0
- {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/entry_points.txt +1 -0
- linkml_store-0.1.9.dist-info/METADATA +0 -61
- linkml_store-0.1.9.dist-info/RECORD +0 -49
- {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/WHEEL +0 -0
linkml_store/cli.py
CHANGED
|
@@ -16,6 +16,7 @@ from linkml_store.index.implementations.simple_indexer import SimpleIndexer
|
|
|
16
16
|
from linkml_store.index.indexer import Indexer
|
|
17
17
|
from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output, write_output
|
|
18
18
|
from linkml_store.utils.object_utils import object_path_update
|
|
19
|
+
from linkml_store.utils.pandas_utils import facet_summary_to_dataframe_unmelted
|
|
19
20
|
|
|
20
21
|
index_type_option = click.option(
|
|
21
22
|
"--index-type",
|
|
@@ -87,6 +88,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
|
|
|
87
88
|
@click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
|
|
88
89
|
@click.option("-v", "--verbose", count=True)
|
|
89
90
|
@click.option("-q", "--quiet/--no-quiet")
|
|
91
|
+
@click.option("--base-dir", "-B", help="Base directory for the client configuration")
|
|
90
92
|
@click.option(
|
|
91
93
|
"--stacktrace/--no-stacktrace",
|
|
92
94
|
default=False,
|
|
@@ -94,7 +96,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
|
|
|
94
96
|
help="If set then show full stacktrace on error",
|
|
95
97
|
)
|
|
96
98
|
@click.pass_context
|
|
97
|
-
def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set):
|
|
99
|
+
def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set, **kwargs):
|
|
98
100
|
"""A CLI for interacting with the linkml-store."""
|
|
99
101
|
if not stacktrace:
|
|
100
102
|
sys.tracebacklimit = 0
|
|
@@ -117,7 +119,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
117
119
|
if quiet:
|
|
118
120
|
logger.setLevel(logging.ERROR)
|
|
119
121
|
ctx.ensure_object(dict)
|
|
120
|
-
client = Client().from_config(config) if config else Client()
|
|
122
|
+
client = Client().from_config(config, **kwargs) if config else Client()
|
|
121
123
|
settings = ContextSettings(client=client, database_name=database, collection_name=collection)
|
|
122
124
|
ctx.obj["settings"] = settings
|
|
123
125
|
# DEPRECATED
|
|
@@ -150,7 +152,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
150
152
|
# raise ValueError("Collection must be specified if there are multiple collections.")
|
|
151
153
|
if settings.database and settings.database.list_collections():
|
|
152
154
|
collection = settings.database.list_collections()[0]
|
|
153
|
-
settings.collection_name = collection.
|
|
155
|
+
settings.collection_name = collection.alias
|
|
154
156
|
|
|
155
157
|
|
|
156
158
|
@cli.command()
|
|
@@ -159,7 +161,15 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
159
161
|
@click.option("--object", "-i", multiple=True, help="Input object as YAML")
|
|
160
162
|
@click.pass_context
|
|
161
163
|
def insert(ctx, files, object, format):
|
|
162
|
-
"""Insert objects from files (JSON, YAML, TSV) into the specified collection.
|
|
164
|
+
"""Insert objects from files (JSON, YAML, TSV) into the specified collection.
|
|
165
|
+
|
|
166
|
+
Using a configuration:
|
|
167
|
+
|
|
168
|
+
linkml-store -C config.yaml -c genes insert data/genes/*.json
|
|
169
|
+
|
|
170
|
+
Note: if you don't provide a schema this will be inferred, but it is
|
|
171
|
+
usually better to provide an explicit schema
|
|
172
|
+
"""
|
|
163
173
|
settings = ctx.obj["settings"]
|
|
164
174
|
collection = settings.collection
|
|
165
175
|
if not collection:
|
|
@@ -172,15 +182,15 @@ def insert(ctx, files, object, format):
|
|
|
172
182
|
objects = load_objects(file_path, format=format)
|
|
173
183
|
else:
|
|
174
184
|
objects = load_objects(file_path)
|
|
175
|
-
logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.
|
|
185
|
+
logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
|
|
176
186
|
collection.insert(objects)
|
|
177
|
-
click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.
|
|
187
|
+
click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
|
|
178
188
|
if object:
|
|
179
189
|
for object_str in object:
|
|
180
190
|
logger.info(f"Parsing: {object_str}")
|
|
181
191
|
objects = yaml.safe_load(object_str)
|
|
182
192
|
collection.insert(objects)
|
|
183
|
-
click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.
|
|
193
|
+
click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.alias}'.")
|
|
184
194
|
collection.commit()
|
|
185
195
|
|
|
186
196
|
|
|
@@ -316,7 +326,7 @@ def query(ctx, where, limit, output_type, output):
|
|
|
316
326
|
"""
|
|
317
327
|
collection = ctx.obj["settings"].collection
|
|
318
328
|
where_clause = yaml.safe_load(where) if where else None
|
|
319
|
-
query = Query(from_table=collection.
|
|
329
|
+
query = Query(from_table=collection.alias, where_clause=where_clause, limit=limit)
|
|
320
330
|
result = collection.query(query)
|
|
321
331
|
output_data = render_output(result.rows, output_type)
|
|
322
332
|
if output:
|
|
@@ -333,7 +343,7 @@ def query(ctx, where, limit, output_type, output):
|
|
|
333
343
|
def list_collections(ctx, **kwargs):
|
|
334
344
|
db = ctx.obj["settings"].database
|
|
335
345
|
for collection in db.list_collections(**kwargs):
|
|
336
|
-
click.echo(collection.
|
|
346
|
+
click.echo(collection.alias)
|
|
337
347
|
click.echo(render_output(collection.metadata))
|
|
338
348
|
|
|
339
349
|
|
|
@@ -343,8 +353,9 @@ def list_collections(ctx, **kwargs):
|
|
|
343
353
|
@click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
|
|
344
354
|
@click.option("--output", "-o", type=click.Path(), help="Output file path")
|
|
345
355
|
@click.option("--columns", "-S", help="Columns to facet on")
|
|
356
|
+
@click.option("--wide/--no-wide", "-U/--no-U", default=False, show_default=True, help="Wide table")
|
|
346
357
|
@click.pass_context
|
|
347
|
-
def fq(ctx, where, limit, columns, output_type, output):
|
|
358
|
+
def fq(ctx, where, limit, columns, output_type, wide, output):
|
|
348
359
|
"""
|
|
349
360
|
Query facets from the specified collection.
|
|
350
361
|
|
|
@@ -371,11 +382,22 @@ def fq(ctx, where, limit, columns, output_type, output):
|
|
|
371
382
|
return "+".join([str(x) for x in key])
|
|
372
383
|
return key
|
|
373
384
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
385
|
+
if wide:
|
|
386
|
+
results_obj = facet_summary_to_dataframe_unmelted(results)
|
|
387
|
+
else:
|
|
388
|
+
if output_type == Format.PYTHON.value:
|
|
389
|
+
results_obj = results
|
|
390
|
+
elif output_type in [Format.TSV.value, Format.CSV.value]:
|
|
391
|
+
results_obj = []
|
|
392
|
+
for fc, data in results.items():
|
|
393
|
+
for v, c in data:
|
|
394
|
+
results_obj.append({"facet": fc, "value": v, "count": c})
|
|
395
|
+
else:
|
|
396
|
+
results_obj = {}
|
|
397
|
+
for key, value in results.items():
|
|
398
|
+
value_as_dict = {_untuple(v[0:-1]): v[-1] for v in value}
|
|
399
|
+
results_obj[_untuple(key)] = value_as_dict
|
|
400
|
+
output_data = render_output(results_obj, output_type)
|
|
379
401
|
if output:
|
|
380
402
|
with open(output, "w") as f:
|
|
381
403
|
f.write(output_data)
|
|
@@ -395,14 +417,17 @@ def _get_index(index_type=None, **kwargs) -> Indexer:
|
|
|
395
417
|
@click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
|
|
396
418
|
@click.option("--output-type", "-O", type=format_choice, default=Format.FORMATTED.value, help="Output format")
|
|
397
419
|
@click.option("--output", "-o", type=click.Path(), help="Output file path")
|
|
420
|
+
@click.option(
|
|
421
|
+
"--limit", "-l", default=-1, show_default=True, type=click.INT, help="Maximum number of results to return"
|
|
422
|
+
)
|
|
398
423
|
@click.pass_context
|
|
399
|
-
def describe(ctx, where, output_type, output):
|
|
424
|
+
def describe(ctx, where, output_type, output, limit):
|
|
400
425
|
"""
|
|
401
426
|
Describe the collection schema.
|
|
402
427
|
"""
|
|
403
428
|
where_clause = yaml.safe_load(where) if where else None
|
|
404
429
|
collection = ctx.obj["settings"].collection
|
|
405
|
-
df = collection.find(where_clause, limit=
|
|
430
|
+
df = collection.find(where_clause, limit=limit).rows_dataframe
|
|
406
431
|
write_output(df.describe(include="all").transpose(), output_type, target=output)
|
|
407
432
|
|
|
408
433
|
|
|
@@ -460,7 +485,7 @@ def search(ctx, search_term, where, limit, index_type, output_type, output, auto
|
|
|
460
485
|
"""Search objects in the specified collection."""
|
|
461
486
|
collection = ctx.obj["settings"].collection
|
|
462
487
|
ix = get_indexer(index_type)
|
|
463
|
-
logger.info(f"Attaching index to collection {collection.
|
|
488
|
+
logger.info(f"Attaching index to collection {collection.alias}: {ix.model_dump()}")
|
|
464
489
|
collection.attach_indexer(ix, auto_index=auto_index)
|
|
465
490
|
result = collection.search(search_term, where=where, limit=limit)
|
|
466
491
|
output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
|
|
@@ -490,6 +515,7 @@ def indexes(ctx):
|
|
|
490
515
|
def validate(ctx, output_type, output):
|
|
491
516
|
"""Validate objects in the specified collection."""
|
|
492
517
|
collection = ctx.obj["settings"].collection
|
|
518
|
+
logger.info(f"Validating collection {collection.alias}")
|
|
493
519
|
validation_results = [json_dumper.to_dict(x) for x in collection.iter_validate_collection()]
|
|
494
520
|
output_data = render_output(validation_results, output_type)
|
|
495
521
|
if output:
|
linkml_store/index/__init__.py
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Indexers package.
|
|
3
|
+
|
|
4
|
+
Indexers allow indexes to be added to existing :class:`Collection` objects.
|
|
5
|
+
|
|
6
|
+
Current two are supported:
|
|
7
|
+
|
|
8
|
+
* simple: :class:`SimpleIndexer`
|
|
9
|
+
* llm: :class:`LLMIndexer`
|
|
10
|
+
"""
|
|
11
|
+
|
|
1
12
|
from typing import Type
|
|
2
13
|
|
|
3
14
|
from linkml_store.index.implementations.llm_indexer import LLMIndexer
|
|
@@ -14,7 +25,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
|
|
|
14
25
|
"""
|
|
15
26
|
Get an indexer class by name.
|
|
16
27
|
|
|
17
|
-
:param name: the name of the indexer
|
|
28
|
+
:param name: the name of the indexer (simple, llm, ...)
|
|
18
29
|
:return: the indexer class
|
|
19
30
|
"""
|
|
20
31
|
if name not in INDEXER_CLASSES:
|
|
@@ -22,16 +33,21 @@ def get_indexer_class(name: str) -> Type[Indexer]:
|
|
|
22
33
|
return INDEXER_CLASSES[name]
|
|
23
34
|
|
|
24
35
|
|
|
25
|
-
def get_indexer(
|
|
36
|
+
def get_indexer(index_type: str, **kwargs) -> Indexer:
|
|
26
37
|
"""
|
|
27
38
|
Get an indexer by name.
|
|
28
39
|
|
|
29
|
-
|
|
40
|
+
>>> simple_indexer = get_indexer("simple")
|
|
41
|
+
>>> llm_indexer = get_indexer("llm")
|
|
42
|
+
|
|
43
|
+
:param name: the name of the indexer (simple, llm, ...)
|
|
30
44
|
:param kwargs: additional arguments to pass to the indexer
|
|
31
45
|
:return: the indexer
|
|
32
46
|
"""
|
|
33
47
|
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
34
|
-
cls = get_indexer_class(
|
|
35
|
-
kwargs["
|
|
48
|
+
cls = get_indexer_class(index_type)
|
|
49
|
+
kwargs["index_type"] = index_type
|
|
36
50
|
indexer = cls(**kwargs)
|
|
51
|
+
if not indexer.name:
|
|
52
|
+
indexer.name = index_type
|
|
37
53
|
return indexer
|
|
@@ -74,7 +74,7 @@ class LLMIndexer(Indexer):
|
|
|
74
74
|
|
|
75
75
|
embeddings_client = Client()
|
|
76
76
|
config = CollectionConfig(
|
|
77
|
-
|
|
77
|
+
alias=coll_name,
|
|
78
78
|
type="Embeddings",
|
|
79
79
|
attributes={
|
|
80
80
|
"text": {"range": "string"},
|
|
@@ -116,6 +116,7 @@ class LLMIndexer(Indexer):
|
|
|
116
116
|
embeddings_collection.insert(
|
|
117
117
|
{"text": uncached_texts[i], "embedding": embeddings[index], "model_id": model_id}
|
|
118
118
|
)
|
|
119
|
+
embeddings_collection.commit()
|
|
119
120
|
else:
|
|
120
121
|
logger.info(f"Embedding {len(texts)} texts")
|
|
121
122
|
embeddings = model.embed_multi(texts)
|
linkml_store/index/indexer.py
CHANGED
|
@@ -11,11 +11,22 @@ logger = logging.getLogger(__name__)
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class TemplateSyntaxEnum(str, Enum):
|
|
14
|
+
"""
|
|
15
|
+
Template syntax types.
|
|
16
|
+
"""
|
|
17
|
+
|
|
14
18
|
jinja2 = "jinja2"
|
|
15
19
|
fstring = "fstring"
|
|
16
20
|
|
|
17
21
|
|
|
18
|
-
def cosine_similarity(vector1, vector2):
|
|
22
|
+
def cosine_similarity(vector1, vector2) -> float:
|
|
23
|
+
"""
|
|
24
|
+
Calculate the cosine similarity between two vectors
|
|
25
|
+
|
|
26
|
+
:param vector1:
|
|
27
|
+
:param vector2:
|
|
28
|
+
:return:
|
|
29
|
+
"""
|
|
19
30
|
dot_product = np.dot(vector1, vector2)
|
|
20
31
|
norm1 = np.linalg.norm(vector1)
|
|
21
32
|
norm2 = np.linalg.norm(vector2)
|
|
@@ -24,10 +35,11 @@ def cosine_similarity(vector1, vector2):
|
|
|
24
35
|
|
|
25
36
|
class Indexer(BaseModel):
|
|
26
37
|
"""
|
|
27
|
-
An
|
|
38
|
+
An indexer operates on a collection in order to search for objects.
|
|
28
39
|
"""
|
|
29
40
|
|
|
30
41
|
name: Optional[str] = None
|
|
42
|
+
index_type: Optional[str] = None
|
|
31
43
|
index_function: Optional[Callable] = None
|
|
32
44
|
distance_function: Optional[Callable] = None
|
|
33
45
|
index_attributes: Optional[List[str]] = None
|
|
@@ -93,13 +105,17 @@ class Indexer(BaseModel):
|
|
|
93
105
|
if "{%" in self.text_template or "{{" in self.text_template:
|
|
94
106
|
logger.info("Detected Jinja2 syntax in text template")
|
|
95
107
|
syntax = TemplateSyntaxEnum.jinja2
|
|
96
|
-
if
|
|
108
|
+
if not syntax:
|
|
109
|
+
syntax = TemplateSyntaxEnum.fstring
|
|
110
|
+
if syntax == TemplateSyntaxEnum.jinja2:
|
|
97
111
|
from jinja2 import Template
|
|
98
112
|
|
|
99
113
|
template = Template(self.text_template)
|
|
100
114
|
return template.render(**obj)
|
|
101
|
-
|
|
115
|
+
elif syntax == TemplateSyntaxEnum.fstring:
|
|
102
116
|
return self.text_template.format(**obj)
|
|
117
|
+
else:
|
|
118
|
+
raise NotImplementedError(f"Cannot handle template syntax: {syntax}")
|
|
103
119
|
return str(obj)
|
|
104
120
|
|
|
105
121
|
def search(
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import shutil
|
|
3
|
+
import tempfile
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
# Set up logging
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def safe_remove_directory(dir_path: Path, no_backup: bool = False) -> Optional[Path]:
|
|
13
|
+
# Ensure the directory exists
|
|
14
|
+
if not dir_path.exists():
|
|
15
|
+
raise FileNotFoundError(f"Directory does not exist: {dir_path}")
|
|
16
|
+
try:
|
|
17
|
+
if no_backup:
|
|
18
|
+
# Move to a temporary directory instead of permanent removal
|
|
19
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
20
|
+
tmp_path = Path(tmpdir) / dir_path.name
|
|
21
|
+
shutil.move(str(dir_path), str(tmp_path))
|
|
22
|
+
logger.info(f"Directory moved to temporary location: {tmp_path}")
|
|
23
|
+
# The directory will be automatically removed when exiting the context manager
|
|
24
|
+
return None
|
|
25
|
+
else:
|
|
26
|
+
# Create a backup directory name with timestamp
|
|
27
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
28
|
+
backup_dir = dir_path.with_name(f"{dir_path.name}_backup_{timestamp}")
|
|
29
|
+
|
|
30
|
+
# Move the directory to the backup location
|
|
31
|
+
shutil.move(str(dir_path), str(backup_dir))
|
|
32
|
+
logger.info(f"Directory backed up to: {backup_dir}")
|
|
33
|
+
return backup_dir
|
|
34
|
+
|
|
35
|
+
except Exception as e:
|
|
36
|
+
logger.error(f"An error occurred: {e}")
|
|
37
|
+
return None
|
|
@@ -7,8 +7,10 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any, Dict, List, Optional, TextIO, Type, Union
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
|
+
import pystow
|
|
10
11
|
import yaml
|
|
11
12
|
from pydantic import BaseModel
|
|
13
|
+
from tabulate import tabulate
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class Format(Enum):
|
|
@@ -21,12 +23,40 @@ class Format(Enum):
|
|
|
21
23
|
YAML = "yaml"
|
|
22
24
|
TSV = "tsv"
|
|
23
25
|
CSV = "csv"
|
|
26
|
+
PYTHON = "python"
|
|
24
27
|
PARQUET = "parquet"
|
|
25
28
|
FORMATTED = "formatted"
|
|
29
|
+
TABLE = "table"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_objects_from_url(
|
|
33
|
+
url: str,
|
|
34
|
+
format: Union[Format, str] = None,
|
|
35
|
+
expected_type: Type = None,
|
|
36
|
+
local_path: Optional[str] = None,
|
|
37
|
+
**kwargs,
|
|
38
|
+
) -> List[Dict[str, Any]]:
|
|
39
|
+
"""
|
|
40
|
+
Load objects from a URL in JSON, JSONLines, YAML, CSV, or TSV format.
|
|
41
|
+
|
|
42
|
+
:param url: The URL to the file.
|
|
43
|
+
:param format: The format of the file. Can be a Format enum or a string value.
|
|
44
|
+
:param expected_type: The target type to load the objects into.
|
|
45
|
+
:param local_path: The local path to save the file to.
|
|
46
|
+
:return: A list of dictionaries representing the loaded objects.
|
|
47
|
+
"""
|
|
48
|
+
local_path = pystow.ensure("linkml", "linkml-store", url=url)
|
|
49
|
+
objs = load_objects(local_path, format=format, expected_type=expected_type, **kwargs)
|
|
50
|
+
if not objs:
|
|
51
|
+
raise ValueError(f"No objects loaded from URL: {url}")
|
|
52
|
+
return objs
|
|
26
53
|
|
|
27
54
|
|
|
28
55
|
def load_objects(
|
|
29
|
-
file_path: Union[str, Path],
|
|
56
|
+
file_path: Union[str, Path],
|
|
57
|
+
format: Union[Format, str] = None,
|
|
58
|
+
expected_type: Type = None,
|
|
59
|
+
header_comment_token: Optional[str] = None,
|
|
30
60
|
) -> List[Dict[str, Any]]:
|
|
31
61
|
"""
|
|
32
62
|
Load objects from a file in JSON, JSONLines, YAML, CSV, or TSV format.
|
|
@@ -37,7 +67,7 @@ def load_objects(
|
|
|
37
67
|
|
|
38
68
|
:param file_path: The path to the file.
|
|
39
69
|
:param format: The format of the file. Can be a Format enum or a string value.
|
|
40
|
-
:param expected_type: The target type to load the objects into.
|
|
70
|
+
:param expected_type: The target type to load the objects into, e.g. list
|
|
41
71
|
:return: A list of dictionaries representing the loaded objects.
|
|
42
72
|
"""
|
|
43
73
|
if isinstance(format, str):
|
|
@@ -48,6 +78,12 @@ def load_objects(
|
|
|
48
78
|
|
|
49
79
|
if not format and (file_path.endswith(".parquet") or file_path.endswith(".pq")):
|
|
50
80
|
format = Format.PARQUET
|
|
81
|
+
if not format and file_path.endswith(".tsv"):
|
|
82
|
+
format = Format.TSV
|
|
83
|
+
if not format and file_path.endswith(".csv"):
|
|
84
|
+
format = Format.CSV
|
|
85
|
+
if not format and file_path.endswith(".py"):
|
|
86
|
+
format = Format.PYTHON
|
|
51
87
|
|
|
52
88
|
mode = "r"
|
|
53
89
|
if format == Format.PARQUET:
|
|
@@ -64,15 +100,33 @@ def load_objects(
|
|
|
64
100
|
elif format == Format.JSONL or (not format and file_path.endswith(".jsonl")):
|
|
65
101
|
objs = [json.loads(line) for line in f]
|
|
66
102
|
elif format == Format.YAML or (not format and (file_path.endswith(".yaml") or file_path.endswith(".yml"))):
|
|
67
|
-
if expected_type and expected_type == list:
|
|
103
|
+
if expected_type and expected_type == list: # noqa E721
|
|
68
104
|
objs = list(yaml.safe_load_all(f))
|
|
69
105
|
else:
|
|
70
106
|
objs = yaml.safe_load(f)
|
|
71
|
-
elif format == Format.TSV or
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
107
|
+
elif format == Format.TSV or format == Format.CSV:
|
|
108
|
+
# Skip initial comment lines if comment_char is set
|
|
109
|
+
if header_comment_token:
|
|
110
|
+
# Store the original position
|
|
111
|
+
original_pos = f.tell()
|
|
112
|
+
|
|
113
|
+
# Read and store lines until we find a non-comment line
|
|
114
|
+
lines = []
|
|
115
|
+
for line in f:
|
|
116
|
+
if not line.startswith(header_comment_token):
|
|
117
|
+
break
|
|
118
|
+
lines.append(line)
|
|
119
|
+
|
|
120
|
+
# Go back to the original position
|
|
121
|
+
f.seek(original_pos)
|
|
122
|
+
|
|
123
|
+
# Skip the comment lines we found
|
|
124
|
+
for _ in lines:
|
|
125
|
+
f.readline()
|
|
126
|
+
if format == Format.TSV:
|
|
127
|
+
reader = csv.DictReader(f, delimiter="\t")
|
|
128
|
+
else:
|
|
129
|
+
reader = csv.DictReader(f)
|
|
76
130
|
objs = list(reader)
|
|
77
131
|
elif format == Format.PARQUET:
|
|
78
132
|
import pyarrow.parquet as pq
|
|
@@ -151,6 +205,9 @@ def render_output(
|
|
|
151
205
|
if isinstance(data, pd.DataFrame):
|
|
152
206
|
data = data.to_dict(orient="records")
|
|
153
207
|
|
|
208
|
+
if isinstance(data, dict) and format in [Format.TSV, Format.CSV]:
|
|
209
|
+
data = [data]
|
|
210
|
+
|
|
154
211
|
if isinstance(data, BaseModel):
|
|
155
212
|
data = data.model_dump()
|
|
156
213
|
|
|
@@ -158,6 +215,10 @@ def render_output(
|
|
|
158
215
|
return json.dumps(data, indent=2, default=str)
|
|
159
216
|
elif format == Format.JSONL:
|
|
160
217
|
return "\n".join(json.dumps(obj) for obj in data)
|
|
218
|
+
elif format == Format.PYTHON:
|
|
219
|
+
return str(data)
|
|
220
|
+
elif format == Format.TABLE:
|
|
221
|
+
return tabulate(pd.DataFrame(data), headers="keys", tablefmt="psql")
|
|
161
222
|
elif format == Format.YAML:
|
|
162
223
|
if isinstance(data, list):
|
|
163
224
|
return yaml.safe_dump_all(data, sort_keys=False)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Dict, List, Tuple, Union
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def facet_summary_to_dataframe_unmelted(
|
|
7
|
+
facet_summary: Dict[Union[str, Tuple[str, ...]], List[Tuple[Union[str, Tuple[str, ...]], int]]]
|
|
8
|
+
) -> pd.DataFrame:
|
|
9
|
+
rows = []
|
|
10
|
+
|
|
11
|
+
for facet_type, facet_data in facet_summary.items():
|
|
12
|
+
if isinstance(facet_type, str):
|
|
13
|
+
# Single facet type
|
|
14
|
+
for category, value in facet_data:
|
|
15
|
+
rows.append({facet_type: category, "Value": value})
|
|
16
|
+
else:
|
|
17
|
+
# Multiple facet types
|
|
18
|
+
for cat_val_tuple in facet_data:
|
|
19
|
+
if len(cat_val_tuple) == 2:
|
|
20
|
+
categories, value = cat_val_tuple
|
|
21
|
+
else:
|
|
22
|
+
categories, value = cat_val_tuple[:-1], cat_val_tuple[-1]
|
|
23
|
+
row = {"Value": value}
|
|
24
|
+
for i, facet in enumerate(facet_type):
|
|
25
|
+
row[facet] = categories[i]
|
|
26
|
+
rows.append(row)
|
|
27
|
+
|
|
28
|
+
df = pd.DataFrame(rows)
|
|
29
|
+
|
|
30
|
+
# Ensure all columns are present, fill with None if missing
|
|
31
|
+
all_columns = set(col for facet in facet_summary.keys() for col in (facet if isinstance(facet, tuple) else [facet]))
|
|
32
|
+
for col in all_columns:
|
|
33
|
+
if col not in df.columns:
|
|
34
|
+
df[col] = None
|
|
35
|
+
|
|
36
|
+
# Move 'Value' to the end
|
|
37
|
+
cols = [col for col in df.columns if col != "Value"] + ["Value"]
|
|
38
|
+
df = df[cols]
|
|
39
|
+
|
|
40
|
+
return df
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from linkml_runtime import SchemaView
|
|
4
|
+
from linkml_runtime.linkml_model import SlotDefinition
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def path_to_attribute_list(class_name: str, path: str, schema_view: SchemaView) -> List[SlotDefinition]:
|
|
8
|
+
"""
|
|
9
|
+
Convert a path to a list of attributes.
|
|
10
|
+
|
|
11
|
+
:param path:
|
|
12
|
+
:return:
|
|
13
|
+
"""
|
|
14
|
+
parts = path.split(".")
|
|
15
|
+
att_list = []
|
|
16
|
+
while parts:
|
|
17
|
+
part = parts.pop(0)
|
|
18
|
+
att = schema_view.induced_slot(part, class_name)
|
|
19
|
+
if not att:
|
|
20
|
+
raise ValueError(f"Attribute {part} not found in class {class_name}")
|
|
21
|
+
att_list.append(att)
|
|
22
|
+
class_name = att.range
|
|
23
|
+
return att_list
|
linkml_store/utils/sql_utils.py
CHANGED
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>{% block title %}LinkML Store API{% endblock %}</title>
|
|
7
|
+
<style>
|
|
8
|
+
body { font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }
|
|
9
|
+
h1 { color: #333; }
|
|
10
|
+
a { color: #0066cc; }
|
|
11
|
+
.navigation { margin-bottom: 20px; }
|
|
12
|
+
.content { margin-top: 20px; }
|
|
13
|
+
</style>
|
|
14
|
+
</head>
|
|
15
|
+
<body>
|
|
16
|
+
<div class="navigation">
|
|
17
|
+
<a href="/pages/">Home</a> |
|
|
18
|
+
<a href="/pages/databases">Databases</a>
|
|
19
|
+
</div>
|
|
20
|
+
<div class="content">
|
|
21
|
+
{% block content %}{% endblock %}
|
|
22
|
+
</div>
|
|
23
|
+
</body>
|
|
24
|
+
</html>
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{% extends "base.html.j2" %}
|
|
2
|
+
|
|
3
|
+
{% block content %}
|
|
4
|
+
<h1>{{ response.meta.title }}</h1>
|
|
5
|
+
<p>Name: {{ params.collection_name }}</p>
|
|
6
|
+
|
|
7
|
+
<h2>Collections</h2>
|
|
8
|
+
<ul>
|
|
9
|
+
{% for collection in response.data.collections %}
|
|
10
|
+
<li>
|
|
11
|
+
<a href="/pages{{ collection.links|selectattr('rel', 'equalto', 'self')|first|attr('href') }}">{{ collection.name }}</a>
|
|
12
|
+
</li>
|
|
13
|
+
{% endfor %}
|
|
14
|
+
</ul>
|
|
15
|
+
{% endblock %}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{% extends "base.html.j2" %}
|
|
2
|
+
|
|
3
|
+
{% block content %}
|
|
4
|
+
<h1>{{ response.meta.title }}</h1>
|
|
5
|
+
<p>Handle: {{ response.data.handle }}</p>
|
|
6
|
+
<p>Number of collections: {{ response.data.num_collections }}</p>
|
|
7
|
+
|
|
8
|
+
<h2>Collections</h2>
|
|
9
|
+
<ul>
|
|
10
|
+
{% for collection in response.data.collections %}
|
|
11
|
+
<li>
|
|
12
|
+
<a href="/pages{{ collection.links|selectattr('rel', 'equalto', 'self')|first|attr('href') }}">{{ collection.name }}</a>
|
|
13
|
+
</li>
|
|
14
|
+
{% endfor %}
|
|
15
|
+
</ul>
|
|
16
|
+
{% endblock %}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{% extends "base.html.j2" %}
|
|
2
|
+
|
|
3
|
+
{% block title %}LinkML Store API - Databases{% endblock %}
|
|
4
|
+
|
|
5
|
+
{% block content %}
|
|
6
|
+
<h1>Databases</h1>
|
|
7
|
+
<ul>
|
|
8
|
+
{% for db in response.data.databases %}
|
|
9
|
+
<li>
|
|
10
|
+
<a href="/pages/databases/{{ db.name }}">{{ db.name }}</a>
|
|
11
|
+
</li>
|
|
12
|
+
{% endfor %}
|
|
13
|
+
</ul>
|
|
14
|
+
{% endblock %}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{% extends "base.html.j2" %}
|
|
2
|
+
|
|
3
|
+
{% block title %}{meta.path}{% endblock %}
|
|
4
|
+
|
|
5
|
+
{% block content %}
|
|
6
|
+
<h1>Meta</h1>
|
|
7
|
+
<pre>
|
|
8
|
+
{{ response.meta }}
|
|
9
|
+
</pre>
|
|
10
|
+
|
|
11
|
+
<h1>Links</h1>
|
|
12
|
+
<ul>
|
|
13
|
+
{% for link in response.links %}
|
|
14
|
+
<li>
|
|
15
|
+
<a href="/pages{{ link.href }}">{{ link.rel }} ({{ link.href }})</a>
|
|
16
|
+
</li>
|
|
17
|
+
{% endfor %}
|
|
18
|
+
</ul>
|
|
19
|
+
</ul>
|
|
20
|
+
|
|
21
|
+
<h1>Data</h1>
|
|
22
|
+
{% if data_html %}
|
|
23
|
+
<ul>
|
|
24
|
+
{% for e in data_html %}
|
|
25
|
+
<li>{{ e|safe }}</li>
|
|
26
|
+
{% endfor %}
|
|
27
|
+
</ul>
|
|
28
|
+
{% else %}
|
|
29
|
+
|
|
30
|
+
{% if "items" in response.data %}
|
|
31
|
+
<ul>
|
|
32
|
+
{% for item in response.data['items'] %}
|
|
33
|
+
<li>
|
|
34
|
+
{{ item.name }}
|
|
35
|
+
{% for link in item.links %}
|
|
36
|
+
<a href="/pages{{ link.href }}">{{ link.rel }}</a>
|
|
37
|
+
{% endfor %}
|
|
38
|
+
</li>
|
|
39
|
+
{% endfor %}
|
|
40
|
+
</ul>
|
|
41
|
+
{% endif %}
|
|
42
|
+
<pre>
|
|
43
|
+
{{ response.data }}
|
|
44
|
+
</pre>
|
|
45
|
+
{% endif %}
|
|
46
|
+
{% endblock %}
|