linkml-store 0.1.9__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +13 -4
- linkml_store/api/collection.py +85 -10
- linkml_store/api/config.py +11 -3
- linkml_store/api/stores/duckdb/duckdb_collection.py +5 -3
- linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
- linkml_store/api/stores/filesystem/filesystem_collection.py +2 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +74 -32
- linkml_store/cli.py +9 -1
- linkml_store/index/__init__.py +5 -3
- linkml_store/index/indexer.py +7 -2
- linkml_store/utils/format_utils.py +1 -1
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +46 -0
- linkml_store/webapi/main.py +572 -0
- linkml_store-0.1.10.dist-info/METADATA +138 -0
- {linkml_store-0.1.9.dist-info → linkml_store-0.1.10.dist-info}/RECORD +25 -16
- {linkml_store-0.1.9.dist-info → linkml_store-0.1.10.dist-info}/entry_points.txt +1 -0
- linkml_store-0.1.9.dist-info/METADATA +0 -61
- {linkml_store-0.1.9.dist-info → linkml_store-0.1.10.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.9.dist-info → linkml_store-0.1.10.dist-info}/WHEEL +0 -0
linkml_store/api/client.py
CHANGED
|
@@ -98,7 +98,7 @@ class Client:
|
|
|
98
98
|
"""
|
|
99
99
|
return self.metadata.base_dir
|
|
100
100
|
|
|
101
|
-
def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
|
|
101
|
+
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
|
|
102
102
|
"""
|
|
103
103
|
Create a client from a configuration.
|
|
104
104
|
|
|
@@ -118,11 +118,13 @@ class Client:
|
|
|
118
118
|
:return:
|
|
119
119
|
|
|
120
120
|
"""
|
|
121
|
+
if isinstance(config, dict):
|
|
122
|
+
config = ClientConfig(**config)
|
|
121
123
|
if isinstance(config, Path):
|
|
122
124
|
config = str(config)
|
|
123
125
|
if isinstance(config, str):
|
|
124
|
-
if not base_dir:
|
|
125
|
-
|
|
126
|
+
# if not base_dir:
|
|
127
|
+
# base_dir = Path(config).parent
|
|
126
128
|
parsed_obj = yaml.safe_load(open(config))
|
|
127
129
|
config = ClientConfig(**parsed_obj)
|
|
128
130
|
self.metadata = config
|
|
@@ -133,8 +135,15 @@ class Client:
|
|
|
133
135
|
|
|
134
136
|
def _initialize_databases(self, **kwargs):
|
|
135
137
|
for name, db_config in self.metadata.databases.items():
|
|
136
|
-
|
|
138
|
+
base_dir = self.base_dir
|
|
139
|
+
logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
|
|
140
|
+
if not base_dir:
|
|
141
|
+
base_dir = Path.cwd()
|
|
142
|
+
logger.info(f"Using current working directory: {base_dir}")
|
|
143
|
+
handle = db_config.handle.format(base_dir=base_dir)
|
|
137
144
|
db_config.handle = handle
|
|
145
|
+
if db_config.schema_location:
|
|
146
|
+
db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
|
|
138
147
|
db = self.attach_database(handle, alias=name, **kwargs)
|
|
139
148
|
db.from_config(db_config)
|
|
140
149
|
|
linkml_store/api/collection.py
CHANGED
|
@@ -4,7 +4,7 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
from linkml_runtime import SchemaView
|
|
@@ -64,6 +64,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
64
64
|
# hidden: Optional[bool] = False
|
|
65
65
|
|
|
66
66
|
metadata: Optional[CollectionConfig] = None
|
|
67
|
+
default_index_name: ClassVar[str] = "simple"
|
|
67
68
|
|
|
68
69
|
def __init__(
|
|
69
70
|
self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
|
|
@@ -421,7 +422,30 @@ class Collection(Generic[DatabaseType]):
|
|
|
421
422
|
**kwargs,
|
|
422
423
|
) -> QueryResult:
|
|
423
424
|
"""
|
|
424
|
-
Search the collection using a
|
|
425
|
+
Search the collection using a text-based index index.
|
|
426
|
+
|
|
427
|
+
Example:
|
|
428
|
+
|
|
429
|
+
>>> from linkml_store import Client
|
|
430
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
431
|
+
>>> client = Client()
|
|
432
|
+
>>> db = client.attach_database("duckdb")
|
|
433
|
+
>>> collection = db.create_collection("Country")
|
|
434
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
435
|
+
>>> collection.insert(objs)
|
|
436
|
+
|
|
437
|
+
Now let's index, using the simple trigram-based index
|
|
438
|
+
|
|
439
|
+
>>> index = get_indexer("simple")
|
|
440
|
+
>>> collection.attach_indexer(index)
|
|
441
|
+
|
|
442
|
+
Now let's find all objects:
|
|
443
|
+
|
|
444
|
+
>>> qr = collection.search("France")
|
|
445
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
446
|
+
>>> assert score > 0.1
|
|
447
|
+
>>> top_obj["code"]
|
|
448
|
+
'FR'
|
|
425
449
|
|
|
426
450
|
:param query:
|
|
427
451
|
:param where:
|
|
@@ -431,12 +455,18 @@ class Collection(Generic[DatabaseType]):
|
|
|
431
455
|
:return:
|
|
432
456
|
"""
|
|
433
457
|
if index_name is None:
|
|
434
|
-
if len(self.
|
|
435
|
-
index_name = list(self.
|
|
458
|
+
if len(self.indexers) == 1:
|
|
459
|
+
index_name = list(self.indexers.keys())[0]
|
|
436
460
|
else:
|
|
437
|
-
|
|
461
|
+
logger.warning("Multiple indexes found. Using default index.")
|
|
462
|
+
index_name = self.default_index_name
|
|
438
463
|
ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
|
|
439
|
-
|
|
464
|
+
if index_name not in self.indexers:
|
|
465
|
+
ix = get_indexer(index_name)
|
|
466
|
+
if not self._indexers:
|
|
467
|
+
self._indexers = {}
|
|
468
|
+
self._indexers[index_name] = ix
|
|
469
|
+
ix = self.indexers.get(index_name)
|
|
440
470
|
if not ix:
|
|
441
471
|
raise ValueError(f"No index named {index_name}")
|
|
442
472
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
@@ -453,7 +483,10 @@ class Collection(Generic[DatabaseType]):
|
|
|
453
483
|
@property
|
|
454
484
|
def is_internal(self) -> bool:
|
|
455
485
|
"""
|
|
456
|
-
Check if the collection is internal
|
|
486
|
+
Check if the collection is internal.
|
|
487
|
+
|
|
488
|
+
Internal collections are hidden by default. Examples of internal collections
|
|
489
|
+
include shadow "index" collections
|
|
457
490
|
|
|
458
491
|
:return:
|
|
459
492
|
"""
|
|
@@ -469,6 +502,45 @@ class Collection(Generic[DatabaseType]):
|
|
|
469
502
|
"""
|
|
470
503
|
Attach an index to the collection.
|
|
471
504
|
|
|
505
|
+
As an example, first let's create a collection in a database:
|
|
506
|
+
|
|
507
|
+
>>> from linkml_store import Client
|
|
508
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
509
|
+
>>> client = Client()
|
|
510
|
+
>>> db = client.attach_database("duckdb")
|
|
511
|
+
>>> collection = db.create_collection("Country")
|
|
512
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
513
|
+
>>> collection.insert(objs)
|
|
514
|
+
|
|
515
|
+
We will create two indexes - one that indexes the whole object
|
|
516
|
+
(default behavior), the other one indexes the name only
|
|
517
|
+
|
|
518
|
+
>>> full_index = get_indexer("simple")
|
|
519
|
+
>>> full_index.name = "full"
|
|
520
|
+
>>> name_index = get_indexer("simple", text_template="{name}")
|
|
521
|
+
>>> name_index.name = "name"
|
|
522
|
+
>>> collection.attach_indexer(full_index)
|
|
523
|
+
>>> collection.attach_indexer(name_index)
|
|
524
|
+
|
|
525
|
+
Now let's find objects using the full index, using the string "France".
|
|
526
|
+
We expect the country France to be the top hit, but the score will
|
|
527
|
+
be less than zero because we did not match all fields in the object.
|
|
528
|
+
|
|
529
|
+
>>> qr = collection.search("France", index_name="full")
|
|
530
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
531
|
+
>>> assert score > 0.1
|
|
532
|
+
>>> assert score < 0.5
|
|
533
|
+
>>> top_obj["code"]
|
|
534
|
+
'FR'
|
|
535
|
+
|
|
536
|
+
Now using the name index
|
|
537
|
+
|
|
538
|
+
>>> qr = collection.search("France", index_name="name")
|
|
539
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
540
|
+
>>> assert score > 0.99
|
|
541
|
+
>>> top_obj["code"]
|
|
542
|
+
'FR'
|
|
543
|
+
|
|
472
544
|
:param index:
|
|
473
545
|
:param name:
|
|
474
546
|
:param auto_index: Automatically index all objects in the collection
|
|
@@ -504,15 +576,18 @@ class Collection(Generic[DatabaseType]):
|
|
|
504
576
|
|
|
505
577
|
def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
|
|
506
578
|
"""
|
|
507
|
-
Index a list of objects
|
|
579
|
+
Index a list of objects using a specified index.
|
|
580
|
+
|
|
581
|
+
By default, the indexed objects will be stored in a shadow
|
|
582
|
+
collection in the same database, with additional fields for the index vector
|
|
508
583
|
|
|
509
584
|
:param objs:
|
|
510
|
-
:param index_name:
|
|
585
|
+
:param index_name: e.g. simple, llm
|
|
511
586
|
:param replace:
|
|
512
587
|
:param kwargs:
|
|
513
588
|
:return:
|
|
514
589
|
"""
|
|
515
|
-
ix = self._indexers.get(index_name)
|
|
590
|
+
ix = self._indexers.get(index_name, None)
|
|
516
591
|
if not ix:
|
|
517
592
|
raise ValueError(f"No index named {index_name}")
|
|
518
593
|
ix_coll_name = self._index_collection_name(index_name)
|
linkml_store/api/config.py
CHANGED
|
@@ -3,7 +3,11 @@ from typing import Any, Dict, List, Optional
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class
|
|
6
|
+
class ConfiguredBaseModel(BaseModel, extra="forbid"):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CollectionConfig(ConfiguredBaseModel):
|
|
7
11
|
name: Optional[str] = Field(
|
|
8
12
|
default=None,
|
|
9
13
|
description="An optional name for the collection",
|
|
@@ -42,7 +46,7 @@ class CollectionConfig(BaseModel):
|
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
|
|
45
|
-
class DatabaseConfig(
|
|
49
|
+
class DatabaseConfig(ConfiguredBaseModel):
|
|
46
50
|
handle: str = Field(
|
|
47
51
|
default="duckdb:///:memory:",
|
|
48
52
|
description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
|
|
@@ -86,7 +90,7 @@ class DatabaseConfig(BaseModel):
|
|
|
86
90
|
)
|
|
87
91
|
|
|
88
92
|
|
|
89
|
-
class ClientConfig(
|
|
93
|
+
class ClientConfig(ConfiguredBaseModel):
|
|
90
94
|
handle: Optional[str] = Field(
|
|
91
95
|
default=None,
|
|
92
96
|
description="The client handle",
|
|
@@ -95,6 +99,10 @@ class ClientConfig(BaseModel):
|
|
|
95
99
|
default={},
|
|
96
100
|
description="A dictionary of database configurations",
|
|
97
101
|
)
|
|
102
|
+
default_database: Optional[str] = Field(
|
|
103
|
+
default=None,
|
|
104
|
+
description="The default database",
|
|
105
|
+
)
|
|
98
106
|
schema_path: Optional[str] = Field(
|
|
99
107
|
default=None,
|
|
100
108
|
description="The path to the LinkML schema file",
|
|
@@ -90,7 +90,9 @@ class DuckDBCollection(Collection):
|
|
|
90
90
|
cd = self.class_definition()
|
|
91
91
|
with self.parent.engine.connect() as conn:
|
|
92
92
|
if not facet_columns:
|
|
93
|
-
|
|
93
|
+
if not cd:
|
|
94
|
+
raise ValueError(f"No class definition found for {self.target_class_name}")
|
|
95
|
+
facet_columns = list(cd.attributes.keys())
|
|
94
96
|
for col in facet_columns:
|
|
95
97
|
logger.debug(f"Faceting on {col}")
|
|
96
98
|
if isinstance(col, tuple):
|
|
@@ -101,7 +103,7 @@ class DuckDBCollection(Collection):
|
|
|
101
103
|
facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
|
|
102
104
|
logger.debug(f"Facet query: {facet_query_str}")
|
|
103
105
|
rows = list(conn.execute(text(facet_query_str)))
|
|
104
|
-
results[col] = rows
|
|
106
|
+
results[col] = [tuple(row) for row in rows]
|
|
105
107
|
return results
|
|
106
108
|
|
|
107
109
|
def _sqla_table(self, cd: ClassDefinition) -> Table:
|
|
@@ -110,7 +112,7 @@ class DuckDBCollection(Collection):
|
|
|
110
112
|
cols = []
|
|
111
113
|
for att in schema_view.class_induced_slots(cd.name):
|
|
112
114
|
typ = TMAP.get(att.range, sqla.String)
|
|
113
|
-
if att.inlined:
|
|
115
|
+
if att.inlined or att.inlined_as_list:
|
|
114
116
|
typ = sqla.JSON
|
|
115
117
|
if att.multivalued:
|
|
116
118
|
typ = sqla.ARRAY(typ, dimensions=1)
|
|
@@ -31,6 +31,18 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class DuckDBDatabase(Database):
|
|
34
|
+
"""
|
|
35
|
+
An adapter for DuckDB databases.
|
|
36
|
+
|
|
37
|
+
Note that this adapter does not make use of a LinkML relational model transformation and
|
|
38
|
+
SQL Alchemy ORM layer. Instead, it attempts to map each collection (which is of type
|
|
39
|
+
some LinkML class) to a *single* DuckDB table. New tables are not created for nested references,
|
|
40
|
+
and linking tables are not created for many-to-many relationships.
|
|
41
|
+
|
|
42
|
+
Instead the native DuckDB ARRAY type is used to store multivalued attributes, and DuckDB JSON
|
|
43
|
+
types are used for nested inlined objects.
|
|
44
|
+
"""
|
|
45
|
+
|
|
34
46
|
_connection: DuckDBPyConnection = None
|
|
35
47
|
_engine: sqlalchemy.Engine = None
|
|
36
48
|
collection_class = DuckDBCollection
|
|
@@ -103,7 +115,14 @@ class DuckDBDatabase(Database):
|
|
|
103
115
|
if row[col]:
|
|
104
116
|
if isinstance(row[col], list):
|
|
105
117
|
for i in range(len(row[col])):
|
|
106
|
-
|
|
118
|
+
try:
|
|
119
|
+
parsed_val = json.loads(row[col][i])
|
|
120
|
+
except json.JSONDecodeError as e:
|
|
121
|
+
logger.error(f"Failed to parse col {col}[{i}] == {row[col][i]}")
|
|
122
|
+
raise e
|
|
123
|
+
row[col][i] = parsed_val
|
|
124
|
+
elif isinstance(row[col], dict):
|
|
125
|
+
pass
|
|
107
126
|
else:
|
|
108
127
|
row[col] = json.loads(row[col])
|
|
109
128
|
qr.set_rows(pd.DataFrame(rows))
|
|
@@ -2,7 +2,6 @@ import logging
|
|
|
2
2
|
from copy import copy
|
|
3
3
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
|
-
from linkml_runtime.linkml_model import SlotDefinition
|
|
6
5
|
from pymongo.collection import Collection as MongoCollection
|
|
7
6
|
|
|
8
7
|
from linkml_store.api import Collection
|
|
@@ -38,13 +37,15 @@ class MongoDBCollection(Collection):
|
|
|
38
37
|
del obj["_id"]
|
|
39
38
|
self._post_insert_hook(objs)
|
|
40
39
|
|
|
41
|
-
def query(self, query: Query, limit: Optional[int] = None, **kwargs) -> QueryResult:
|
|
40
|
+
def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
|
|
42
41
|
mongo_filter = self._build_mongo_filter(query.where_clause)
|
|
43
42
|
limit = limit or query.limit
|
|
43
|
+
cursor = self.mongo_collection.find(mongo_filter)
|
|
44
44
|
if limit and limit >= 0:
|
|
45
|
-
cursor =
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
cursor = cursor.limit(limit)
|
|
46
|
+
offset = offset or query.offset
|
|
47
|
+
if offset and offset >= 0:
|
|
48
|
+
cursor = cursor.skip(offset)
|
|
48
49
|
|
|
49
50
|
def _as_row(row: dict):
|
|
50
51
|
row = copy(row)
|
|
@@ -63,46 +64,87 @@ class MongoDBCollection(Collection):
|
|
|
63
64
|
mongo_filter[field] = value
|
|
64
65
|
return mongo_filter
|
|
65
66
|
|
|
67
|
+
from typing import Any, Dict, List, Union
|
|
68
|
+
|
|
66
69
|
def query_facets(
|
|
67
|
-
self,
|
|
68
|
-
|
|
70
|
+
self,
|
|
71
|
+
where: Dict = None,
|
|
72
|
+
facet_columns: List[Union[str, Tuple[str, ...]]] = None,
|
|
73
|
+
facet_limit=DEFAULT_FACET_LIMIT,
|
|
74
|
+
**kwargs,
|
|
75
|
+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
|
|
69
76
|
results = {}
|
|
70
|
-
cd = self.class_definition()
|
|
71
77
|
if not facet_columns:
|
|
72
78
|
facet_columns = list(self.class_definition().attributes.keys())
|
|
73
79
|
|
|
74
80
|
for col in facet_columns:
|
|
75
81
|
logger.debug(f"Faceting on {col}")
|
|
82
|
+
|
|
83
|
+
# Handle tuple columns
|
|
84
|
+
if isinstance(col, tuple):
|
|
85
|
+
group_id = {k.replace(".", "_"): f"${k}" for k in col}
|
|
86
|
+
all_fields = col
|
|
87
|
+
else:
|
|
88
|
+
group_id = f"${col}"
|
|
89
|
+
all_fields = [col]
|
|
90
|
+
|
|
91
|
+
# Initial pipeline without unwinding
|
|
92
|
+
facet_pipeline = [
|
|
93
|
+
{"$match": where} if where else {"$match": {}},
|
|
94
|
+
{"$group": {"_id": group_id, "count": {"$sum": 1}}},
|
|
95
|
+
{"$sort": {"count": -1}},
|
|
96
|
+
{"$limit": facet_limit},
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
logger.info(f"Initial facet pipeline: {facet_pipeline}")
|
|
100
|
+
initial_results = list(self.mongo_collection.aggregate(facet_pipeline))
|
|
101
|
+
|
|
102
|
+
# Check if we need to unwind based on the results
|
|
103
|
+
needs_unwinding = False
|
|
76
104
|
if isinstance(col, tuple):
|
|
77
|
-
|
|
105
|
+
needs_unwinding = any(
|
|
106
|
+
isinstance(result["_id"], dict) and any(isinstance(v, list) for v in result["_id"].values())
|
|
107
|
+
for result in initial_results
|
|
108
|
+
)
|
|
78
109
|
else:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
110
|
+
needs_unwinding = any(isinstance(result["_id"], list) for result in initial_results)
|
|
111
|
+
|
|
112
|
+
if needs_unwinding:
|
|
113
|
+
logger.info(f"Detected array values for {col}, unwinding...")
|
|
114
|
+
facet_pipeline = [{"$match": where} if where else {"$match": {}}]
|
|
115
|
+
|
|
116
|
+
# Unwind each field if needed
|
|
117
|
+
for field in all_fields:
|
|
118
|
+
field_parts = field.split(".")
|
|
119
|
+
for i in range(len(field_parts)):
|
|
120
|
+
facet_pipeline.append({"$unwind": f"${'.'.join(field_parts[:i + 1])}"})
|
|
121
|
+
|
|
122
|
+
facet_pipeline.extend(
|
|
123
|
+
[
|
|
124
|
+
{"$group": {"_id": group_id, "count": {"$sum": 1}}},
|
|
125
|
+
{"$sort": {"count": -1}},
|
|
126
|
+
{"$limit": facet_limit},
|
|
127
|
+
]
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
logger.info(f"Updated facet pipeline with unwinding: {facet_pipeline}")
|
|
131
|
+
facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
|
|
132
|
+
else:
|
|
133
|
+
facet_results = initial_results
|
|
134
|
+
|
|
135
|
+
logger.info(f"Facet results: {facet_results}")
|
|
136
|
+
|
|
137
|
+
# Process results
|
|
85
138
|
if isinstance(col, tuple):
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
{"$match": where} if where else {"$match": {}},
|
|
91
|
-
{"$unwind": f"${col}"},
|
|
92
|
-
group,
|
|
93
|
-
{"$sort": {"count": -1}},
|
|
94
|
-
{"$limit": facet_limit},
|
|
139
|
+
results[col] = [
|
|
140
|
+
(tuple(result["_id"].values()), result["count"])
|
|
141
|
+
for result in facet_results
|
|
142
|
+
if result["_id"] is not None and all(v is not None for v in result["_id"].values())
|
|
95
143
|
]
|
|
96
144
|
else:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
group,
|
|
100
|
-
{"$sort": {"count": -1}},
|
|
101
|
-
{"$limit": facet_limit},
|
|
145
|
+
results[col] = [
|
|
146
|
+
(result["_id"], result["count"]) for result in facet_results if result["_id"] is not None
|
|
102
147
|
]
|
|
103
|
-
logger.info(f"Facet pipeline: {facet_pipeline}")
|
|
104
|
-
facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
|
|
105
|
-
results[col] = [(result["_id"], result["count"]) for result in facet_results]
|
|
106
148
|
|
|
107
149
|
return results
|
|
108
150
|
|
linkml_store/cli.py
CHANGED
|
@@ -159,7 +159,15 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
159
159
|
@click.option("--object", "-i", multiple=True, help="Input object as YAML")
|
|
160
160
|
@click.pass_context
|
|
161
161
|
def insert(ctx, files, object, format):
|
|
162
|
-
"""Insert objects from files (JSON, YAML, TSV) into the specified collection.
|
|
162
|
+
"""Insert objects from files (JSON, YAML, TSV) into the specified collection.
|
|
163
|
+
|
|
164
|
+
Using a configuration:
|
|
165
|
+
|
|
166
|
+
linkml-store -C config.yaml -c genes insert data/genes/*.json
|
|
167
|
+
|
|
168
|
+
Note: if you don't provide a schema this will be inferred, but it is
|
|
169
|
+
usually better to provide an explicit schema
|
|
170
|
+
"""
|
|
163
171
|
settings = ctx.obj["settings"]
|
|
164
172
|
collection = settings.collection
|
|
165
173
|
if not collection:
|
linkml_store/index/__init__.py
CHANGED
|
@@ -22,7 +22,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
|
|
|
22
22
|
return INDEXER_CLASSES[name]
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def get_indexer(
|
|
25
|
+
def get_indexer(index_type: str, **kwargs) -> Indexer:
|
|
26
26
|
"""
|
|
27
27
|
Get an indexer by name.
|
|
28
28
|
|
|
@@ -31,7 +31,9 @@ def get_indexer(name: str, **kwargs) -> Indexer:
|
|
|
31
31
|
:return: the indexer
|
|
32
32
|
"""
|
|
33
33
|
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
34
|
-
cls = get_indexer_class(
|
|
35
|
-
kwargs["
|
|
34
|
+
cls = get_indexer_class(index_type)
|
|
35
|
+
kwargs["index_type"] = index_type
|
|
36
36
|
indexer = cls(**kwargs)
|
|
37
|
+
if not indexer.name:
|
|
38
|
+
indexer.name = index_type
|
|
37
39
|
return indexer
|
linkml_store/index/indexer.py
CHANGED
|
@@ -28,6 +28,7 @@ class Indexer(BaseModel):
|
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
30
|
name: Optional[str] = None
|
|
31
|
+
index_type: Optional[str] = None
|
|
31
32
|
index_function: Optional[Callable] = None
|
|
32
33
|
distance_function: Optional[Callable] = None
|
|
33
34
|
index_attributes: Optional[List[str]] = None
|
|
@@ -93,13 +94,17 @@ class Indexer(BaseModel):
|
|
|
93
94
|
if "{%" in self.text_template or "{{" in self.text_template:
|
|
94
95
|
logger.info("Detected Jinja2 syntax in text template")
|
|
95
96
|
syntax = TemplateSyntaxEnum.jinja2
|
|
96
|
-
if
|
|
97
|
+
if not syntax:
|
|
98
|
+
syntax = TemplateSyntaxEnum.fstring
|
|
99
|
+
if syntax == TemplateSyntaxEnum.jinja2:
|
|
97
100
|
from jinja2 import Template
|
|
98
101
|
|
|
99
102
|
template = Template(self.text_template)
|
|
100
103
|
return template.render(**obj)
|
|
101
|
-
|
|
104
|
+
elif syntax == TemplateSyntaxEnum.fstring:
|
|
102
105
|
return self.text_template.format(**obj)
|
|
106
|
+
else:
|
|
107
|
+
raise NotImplementedError(f"Cannot handle template syntax: {syntax}")
|
|
103
108
|
return str(obj)
|
|
104
109
|
|
|
105
110
|
def search(
|
|
@@ -64,7 +64,7 @@ def load_objects(
|
|
|
64
64
|
elif format == Format.JSONL or (not format and file_path.endswith(".jsonl")):
|
|
65
65
|
objs = [json.loads(line) for line in f]
|
|
66
66
|
elif format == Format.YAML or (not format and (file_path.endswith(".yaml") or file_path.endswith(".yml"))):
|
|
67
|
-
if expected_type and expected_type == list:
|
|
67
|
+
if expected_type and expected_type == list: # noqa E721
|
|
68
68
|
objs = list(yaml.safe_load_all(f))
|
|
69
69
|
else:
|
|
70
70
|
objs = yaml.safe_load(f)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from linkml_runtime import SchemaView
|
|
4
|
+
from linkml_runtime.linkml_model import SlotDefinition
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def path_to_attribute_list(class_name: str, path: str, schema_view: SchemaView) -> List[SlotDefinition]:
|
|
8
|
+
"""
|
|
9
|
+
Convert a path to a list of attributes.
|
|
10
|
+
|
|
11
|
+
:param path:
|
|
12
|
+
:return:
|
|
13
|
+
"""
|
|
14
|
+
parts = path.split(".")
|
|
15
|
+
att_list = []
|
|
16
|
+
while parts:
|
|
17
|
+
part = parts.pop(0)
|
|
18
|
+
att = schema_view.induced_slot(part, class_name)
|
|
19
|
+
if not att:
|
|
20
|
+
raise ValueError(f"Attribute {part} not found in class {class_name}")
|
|
21
|
+
att_list.append(att)
|
|
22
|
+
class_name = att.range
|
|
23
|
+
return att_list
|
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>{% block title %}LinkML Store API{% endblock %}</title>
|
|
7
|
+
<style>
|
|
8
|
+
body { font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }
|
|
9
|
+
h1 { color: #333; }
|
|
10
|
+
a { color: #0066cc; }
|
|
11
|
+
.navigation { margin-bottom: 20px; }
|
|
12
|
+
.content { margin-top: 20px; }
|
|
13
|
+
</style>
|
|
14
|
+
</head>
|
|
15
|
+
<body>
|
|
16
|
+
<div class="navigation">
|
|
17
|
+
<a href="/pages/">Home</a> |
|
|
18
|
+
<a href="/pages/databases">Databases</a>
|
|
19
|
+
</div>
|
|
20
|
+
<div class="content">
|
|
21
|
+
{% block content %}{% endblock %}
|
|
22
|
+
</div>
|
|
23
|
+
</body>
|
|
24
|
+
</html>
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{% extends "base.html.j2" %}
|
|
2
|
+
|
|
3
|
+
{% block content %}
|
|
4
|
+
<h1>{{ response.meta.title }}</h1>
|
|
5
|
+
<p>Name: {{ params.collection_name }}</p>
|
|
6
|
+
|
|
7
|
+
<h2>Collections</h2>
|
|
8
|
+
<ul>
|
|
9
|
+
{% for collection in response.data.collections %}
|
|
10
|
+
<li>
|
|
11
|
+
<a href="/pages{{ collection.links|selectattr('rel', 'equalto', 'self')|first|attr('href') }}">{{ collection.name }}</a>
|
|
12
|
+
</li>
|
|
13
|
+
{% endfor %}
|
|
14
|
+
</ul>
|
|
15
|
+
{% endblock %}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{% extends "base.html.j2" %}
|
|
2
|
+
|
|
3
|
+
{% block content %}
|
|
4
|
+
<h1>{{ response.meta.title }}</h1>
|
|
5
|
+
<p>Handle: {{ response.data.handle }}</p>
|
|
6
|
+
<p>Number of collections: {{ response.data.num_collections }}</p>
|
|
7
|
+
|
|
8
|
+
<h2>Collections</h2>
|
|
9
|
+
<ul>
|
|
10
|
+
{% for collection in response.data.collections %}
|
|
11
|
+
<li>
|
|
12
|
+
<a href="/pages{{ collection.links|selectattr('rel', 'equalto', 'self')|first|attr('href') }}">{{ collection.name }}</a>
|
|
13
|
+
</li>
|
|
14
|
+
{% endfor %}
|
|
15
|
+
</ul>
|
|
16
|
+
{% endblock %}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{% extends "base.html.j2" %}
|
|
2
|
+
|
|
3
|
+
{% block title %}LinkML Store API - Databases{% endblock %}
|
|
4
|
+
|
|
5
|
+
{% block content %}
|
|
6
|
+
<h1>Databases</h1>
|
|
7
|
+
<ul>
|
|
8
|
+
{% for db in response.data.databases %}
|
|
9
|
+
<li>
|
|
10
|
+
<a href="/pages/databases/{{ db.name }}">{{ db.name }}</a>
|
|
11
|
+
</li>
|
|
12
|
+
{% endfor %}
|
|
13
|
+
</ul>
|
|
14
|
+
{% endblock %}
|