linkml-store 0.1.9__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (59) hide show
  1. linkml_store-0.1.10/PKG-INFO +138 -0
  2. linkml_store-0.1.10/README.md +84 -0
  3. {linkml_store-0.1.9 → linkml_store-0.1.10}/pyproject.toml +8 -2
  4. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/client.py +13 -4
  5. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/collection.py +85 -10
  6. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/config.py +11 -3
  7. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +5 -3
  8. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
  9. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +2 -0
  10. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +74 -32
  11. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/cli.py +9 -1
  12. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/__init__.py +5 -3
  13. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/indexer.py +7 -2
  14. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/format_utils.py +1 -1
  15. linkml_store-0.1.10/src/linkml_store/utils/schema_utils.py +23 -0
  16. linkml_store-0.1.10/src/linkml_store/webapi/__init__.py +0 -0
  17. linkml_store-0.1.10/src/linkml_store/webapi/html/__init__.py +3 -0
  18. linkml_store-0.1.10/src/linkml_store/webapi/html/base.html.j2 +24 -0
  19. linkml_store-0.1.10/src/linkml_store/webapi/html/collection_details.html.j2 +15 -0
  20. linkml_store-0.1.10/src/linkml_store/webapi/html/database_details.html.j2 +16 -0
  21. linkml_store-0.1.10/src/linkml_store/webapi/html/databases.html.j2 +14 -0
  22. linkml_store-0.1.10/src/linkml_store/webapi/html/generic.html.j2 +46 -0
  23. linkml_store-0.1.10/src/linkml_store/webapi/main.py +572 -0
  24. linkml_store-0.1.9/PKG-INFO +0 -61
  25. linkml_store-0.1.9/README.md +0 -12
  26. {linkml_store-0.1.9 → linkml_store-0.1.10}/LICENSE +0 -0
  27. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/__init__.py +0 -0
  28. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/__init__.py +0 -0
  29. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/database.py +0 -0
  30. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/queries.py +0 -0
  31. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/__init__.py +0 -0
  32. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
  33. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  34. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  35. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  36. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
  37. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
  38. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
  39. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
  40. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  41. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  42. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  43. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
  44. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  45. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
  46. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  47. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  48. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/types.py +0 -0
  49. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/constants.py +0 -0
  50. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/implementations/__init__.py +0 -0
  51. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
  52. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
  53. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/__init__.py +0 -0
  54. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/change_utils.py +0 -0
  55. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/io.py +0 -0
  56. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/object_utils.py +0 -0
  57. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/patch_utils.py +0 -0
  58. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/query_utils.py +0 -0
  59. {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/sql_utils.py +0 -0
@@ -0,0 +1,138 @@
1
+ Metadata-Version: 2.1
2
+ Name: linkml-store
3
+ Version: 0.1.10
4
+ Summary: linkml-store
5
+ License: MIT
6
+ Author: Author 1
7
+ Author-email: author@org.org
8
+ Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Provides-Extra: analytics
16
+ Provides-Extra: app
17
+ Provides-Extra: chromadb
18
+ Provides-Extra: fastapi
19
+ Provides-Extra: h5py
20
+ Provides-Extra: llm
21
+ Provides-Extra: map
22
+ Provides-Extra: mongodb
23
+ Provides-Extra: pyarrow
24
+ Provides-Extra: renderer
25
+ Provides-Extra: tests
26
+ Provides-Extra: validation
27
+ Requires-Dist: black (>=24.0.0) ; extra == "tests"
28
+ Requires-Dist: chromadb ; extra == "chromadb"
29
+ Requires-Dist: click
30
+ Requires-Dist: duckdb (>=0.10.1,<0.11.0)
31
+ Requires-Dist: duckdb-engine (>=0.11.2)
32
+ Requires-Dist: fastapi ; extra == "fastapi"
33
+ Requires-Dist: h5py ; extra == "h5py"
34
+ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
35
+ Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
36
+ Requires-Dist: linkml ; extra == "validation"
37
+ Requires-Dist: linkml-runtime (>=1.8.0rc2)
38
+ Requires-Dist: linkml_map ; extra == "map"
39
+ Requires-Dist: linkml_renderer ; extra == "renderer"
40
+ Requires-Dist: llm ; extra == "llm"
41
+ Requires-Dist: matplotlib ; extra == "analytics"
42
+ Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
43
+ Requires-Dist: plotly ; extra == "analytics"
44
+ Requires-Dist: pyarrow ; extra == "pyarrow"
45
+ Requires-Dist: pydantic (>=2.0.0,<3.0.0)
46
+ Requires-Dist: pymongo ; extra == "mongodb"
47
+ Requires-Dist: pystow (>=0.5.4,<0.6.0)
48
+ Requires-Dist: seaborn ; extra == "analytics"
49
+ Requires-Dist: sqlalchemy
50
+ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
51
+ Requires-Dist: uvicorn ; extra == "fastapi"
52
+ Description-Content-Type: text/markdown
53
+
54
+ # linkml-store
55
+
56
+ An AI-ready data management and integration platform. LinkML-Store
57
+ provides an abstraction layer over multiple different backends
58
+ (including DuckDB, MongoDB, and local filesystems), allowing for
59
+ common query, index, and storage operations.
60
+
61
+ For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
62
+
63
+ __Warning__ LinkML-Store is still undergoing changes and refactoring,
64
+ APIs and command line options are subject to change!
65
+
66
+ ## Quick Start
67
+
68
+ Install, add data, query it:
69
+
70
+ ```
71
+ pip install linkml-store[all]
72
+ linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
73
+ linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
74
+ ```
75
+
76
+ Index it, search it:
77
+
78
+ ```
79
+ linkml-store -d duckdb:///db/my.db -c persons index -t llm
80
+ linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
81
+ ```
82
+
83
+ Validate it:
84
+
85
+ ```
86
+ linkml-store -d duckdb:///db/my.db -c persons validate
87
+ ```
88
+
89
+ ## Basic usage
90
+
91
+ * [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
92
+ * [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
93
+ * API
94
+ * Streamlit applications
95
+
96
+ ## Features
97
+
98
+ ### Multiple Adapters
99
+
100
+ LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
101
+
102
+ * [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
103
+ * [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
104
+ * [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
105
+ * Filesystem
106
+
107
+ Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
108
+
109
+ The intent is to give a union of all features of each backend. For
110
+ example, analytic faceted queries are provided for *all* backends, not
111
+ just Solr.
112
+
113
+ ### Composable indexes
114
+
115
+ Many backends come with their own indexing and search
116
+ schemes. Classically this was Lucene-based indexes, now it is semantic
117
+ search using LLM embeddings.
118
+
119
+ LinkML store treats indexing as an orthogonal concern - you can
120
+ compose different indexing schemes with different backends. You don't
121
+ need to have a vector database to run embedding search!
122
+
123
+ See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
124
+
125
+ ### Validation
126
+
127
+ LinkML-Store is backed by [LinkML](https://linkml.io), which allows
128
+ for powerful expressive structural and semantic constraints.
129
+
130
+ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
131
+
132
+ and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
133
+
134
+
135
+ ## Background
136
+
137
+ See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
138
+
@@ -0,0 +1,84 @@
1
+ # linkml-store
2
+
3
+ An AI-ready data management and integration platform. LinkML-Store
4
+ provides an abstraction layer over multiple different backends
5
+ (including DuckDB, MongoDB, and local filesystems), allowing for
6
+ common query, index, and storage operations.
7
+
8
+ For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
9
+
10
+ __Warning__ LinkML-Store is still undergoing changes and refactoring,
11
+ APIs and command line options are subject to change!
12
+
13
+ ## Quick Start
14
+
15
+ Install, add data, query it:
16
+
17
+ ```
18
+ pip install linkml-store[all]
19
+ linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
20
+ linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
21
+ ```
22
+
23
+ Index it, search it:
24
+
25
+ ```
26
+ linkml-store -d duckdb:///db/my.db -c persons index -t llm
27
+ linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
28
+ ```
29
+
30
+ Validate it:
31
+
32
+ ```
33
+ linkml-store -d duckdb:///db/my.db -c persons validate
34
+ ```
35
+
36
+ ## Basic usage
37
+
38
+ * [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
39
+ * [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
40
+ * API
41
+ * Streamlit applications
42
+
43
+ ## Features
44
+
45
+ ### Multiple Adapters
46
+
47
+ LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
48
+
49
+ * [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
50
+ * [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
51
+ * [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
52
+ * Filesystem
53
+
54
+ Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
55
+
56
+ The intent is to give a union of all features of each backend. For
57
+ example, analytic faceted queries are provided for *all* backends, not
58
+ just Solr.
59
+
60
+ ### Composable indexes
61
+
62
+ Many backends come with their own indexing and search
63
+ schemes. Classically this was Lucene-based indexes, now it is semantic
64
+ search using LLM embeddings.
65
+
66
+ LinkML store treats indexing as an orthogonal concern - you can
67
+ compose different indexing schemes with different backends. You don't
68
+ need to have a vector database to run embedding search!
69
+
70
+ See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
71
+
72
+ ### Validation
73
+
74
+ LinkML-Store is backed by [LinkML](https://linkml.io), which allows
75
+ for powerful expressive structural and semantic constraints.
76
+
77
+ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
78
+
79
+ and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
80
+
81
+
82
+ ## Background
83
+
84
+ See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.1.9"
3
+ version = "0.1.10"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
@@ -10,7 +10,7 @@ readme = "README.md"
10
10
  python = "^3.9, !=3.9.7"
11
11
  click = "*"
12
12
  pydantic = "^2.0.0"
13
- linkml-runtime = "^1.7.5"
13
+ linkml-runtime = ">=1.8.0rc2"
14
14
  streamlit = { version = "^1.32.2", optional = true }
15
15
  sqlalchemy = "*"
16
16
  duckdb = "^0.10.1"
@@ -27,9 +27,12 @@ pyarrow = { version="*", optional = true }
27
27
  h5py = { version="*", optional = true }
28
28
  linkml = { version="*", optional = true }
29
29
  linkml_map = { version="*", optional = true }
30
+ linkml_renderer = { version="*", optional = true }
30
31
  pandas = ">=2.2.1"
31
32
  jinja2 = "^3.1.4"
32
33
  jsonlines = "^4.0.0"
34
+ fastapi = { version="*", optional = true }
35
+ uvicorn = { version="*", optional = true }
33
36
 
34
37
  [tool.poetry.group.dev.dependencies]
35
38
  pytest = {version = ">=7.1.2"}
@@ -66,9 +69,12 @@ h5py = ["h5py"]
66
69
  pyarrow = ["pyarrow"]
67
70
  validation = ["linkml"]
68
71
  map = ["linkml_map"]
72
+ renderer = ["linkml_renderer"]
73
+ fastapi = ["fastapi", "uvicorn"]
69
74
 
70
75
  [tool.poetry.scripts]
71
76
  linkml-store = "linkml_store.cli:cli"
77
+ linkml-store-api = "linkml_store.webapi.main:start"
72
78
 
73
79
  [tool.poetry-dynamic-versioning]
74
80
  enable = false
@@ -98,7 +98,7 @@ class Client:
98
98
  """
99
99
  return self.metadata.base_dir
100
100
 
101
- def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
101
+ def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
102
102
  """
103
103
  Create a client from a configuration.
104
104
 
@@ -118,11 +118,13 @@ class Client:
118
118
  :return:
119
119
 
120
120
  """
121
+ if isinstance(config, dict):
122
+ config = ClientConfig(**config)
121
123
  if isinstance(config, Path):
122
124
  config = str(config)
123
125
  if isinstance(config, str):
124
- if not base_dir:
125
- base_dir = Path(config).parent
126
+ # if not base_dir:
127
+ # base_dir = Path(config).parent
126
128
  parsed_obj = yaml.safe_load(open(config))
127
129
  config = ClientConfig(**parsed_obj)
128
130
  self.metadata = config
@@ -133,8 +135,15 @@ class Client:
133
135
 
134
136
  def _initialize_databases(self, **kwargs):
135
137
  for name, db_config in self.metadata.databases.items():
136
- handle = db_config.handle.format(base_dir=self.base_dir)
138
+ base_dir = self.base_dir
139
+ logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
140
+ if not base_dir:
141
+ base_dir = Path.cwd()
142
+ logger.info(f"Using current working directory: {base_dir}")
143
+ handle = db_config.handle.format(base_dir=base_dir)
137
144
  db_config.handle = handle
145
+ if db_config.schema_location:
146
+ db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
138
147
  db = self.attach_database(handle, alias=name, **kwargs)
139
148
  db.from_config(db_config)
140
149
 
@@ -4,7 +4,7 @@ import hashlib
4
4
  import logging
5
5
  from collections import defaultdict
6
6
  from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
7
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
8
8
 
9
9
  import numpy as np
10
10
  from linkml_runtime import SchemaView
@@ -64,6 +64,7 @@ class Collection(Generic[DatabaseType]):
64
64
  # hidden: Optional[bool] = False
65
65
 
66
66
  metadata: Optional[CollectionConfig] = None
67
+ default_index_name: ClassVar[str] = "simple"
67
68
 
68
69
  def __init__(
69
70
  self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
@@ -421,7 +422,30 @@ class Collection(Generic[DatabaseType]):
421
422
  **kwargs,
422
423
  ) -> QueryResult:
423
424
  """
424
- Search the collection using a full-text search index.
425
+ Search the collection using a text-based index index.
426
+
427
+ Example:
428
+
429
+ >>> from linkml_store import Client
430
+ >>> from linkml_store.utils.format_utils import load_objects
431
+ >>> client = Client()
432
+ >>> db = client.attach_database("duckdb")
433
+ >>> collection = db.create_collection("Country")
434
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
435
+ >>> collection.insert(objs)
436
+
437
+ Now let's index, using the simple trigram-based index
438
+
439
+ >>> index = get_indexer("simple")
440
+ >>> collection.attach_indexer(index)
441
+
442
+ Now let's find all objects:
443
+
444
+ >>> qr = collection.search("France")
445
+ >>> score, top_obj = qr.ranked_rows[0]
446
+ >>> assert score > 0.1
447
+ >>> top_obj["code"]
448
+ 'FR'
425
449
 
426
450
  :param query:
427
451
  :param where:
@@ -431,12 +455,18 @@ class Collection(Generic[DatabaseType]):
431
455
  :return:
432
456
  """
433
457
  if index_name is None:
434
- if len(self._indexers) == 1:
435
- index_name = list(self._indexers.keys())[0]
458
+ if len(self.indexers) == 1:
459
+ index_name = list(self.indexers.keys())[0]
436
460
  else:
437
- raise ValueError("Multiple indexes found. Please specify an index name.")
461
+ logger.warning("Multiple indexes found. Using default index.")
462
+ index_name = self.default_index_name
438
463
  ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
439
- ix = self._indexers.get(index_name)
464
+ if index_name not in self.indexers:
465
+ ix = get_indexer(index_name)
466
+ if not self._indexers:
467
+ self._indexers = {}
468
+ self._indexers[index_name] = ix
469
+ ix = self.indexers.get(index_name)
440
470
  if not ix:
441
471
  raise ValueError(f"No index named {index_name}")
442
472
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
@@ -453,7 +483,10 @@ class Collection(Generic[DatabaseType]):
453
483
  @property
454
484
  def is_internal(self) -> bool:
455
485
  """
456
- Check if the collection is internal
486
+ Check if the collection is internal.
487
+
488
+ Internal collections are hidden by default. Examples of internal collections
489
+ include shadow "index" collections
457
490
 
458
491
  :return:
459
492
  """
@@ -469,6 +502,45 @@ class Collection(Generic[DatabaseType]):
469
502
  """
470
503
  Attach an index to the collection.
471
504
 
505
+ As an example, first let's create a collection in a database:
506
+
507
+ >>> from linkml_store import Client
508
+ >>> from linkml_store.utils.format_utils import load_objects
509
+ >>> client = Client()
510
+ >>> db = client.attach_database("duckdb")
511
+ >>> collection = db.create_collection("Country")
512
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
513
+ >>> collection.insert(objs)
514
+
515
+ We will create two indexes - one that indexes the whole object
516
+ (default behavior), the other one indexes the name only
517
+
518
+ >>> full_index = get_indexer("simple")
519
+ >>> full_index.name = "full"
520
+ >>> name_index = get_indexer("simple", text_template="{name}")
521
+ >>> name_index.name = "name"
522
+ >>> collection.attach_indexer(full_index)
523
+ >>> collection.attach_indexer(name_index)
524
+
525
+ Now let's find objects using the full index, using the string "France".
526
+ We expect the country France to be the top hit, but the score will
527
+ be less than zero because we did not match all fields in the object.
528
+
529
+ >>> qr = collection.search("France", index_name="full")
530
+ >>> score, top_obj = qr.ranked_rows[0]
531
+ >>> assert score > 0.1
532
+ >>> assert score < 0.5
533
+ >>> top_obj["code"]
534
+ 'FR'
535
+
536
+ Now using the name index
537
+
538
+ >>> qr = collection.search("France", index_name="name")
539
+ >>> score, top_obj = qr.ranked_rows[0]
540
+ >>> assert score > 0.99
541
+ >>> top_obj["code"]
542
+ 'FR'
543
+
472
544
  :param index:
473
545
  :param name:
474
546
  :param auto_index: Automatically index all objects in the collection
@@ -504,15 +576,18 @@ class Collection(Generic[DatabaseType]):
504
576
 
505
577
  def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
506
578
  """
507
- Index a list of objects
579
+ Index a list of objects using a specified index.
580
+
581
+ By default, the indexed objects will be stored in a shadow
582
+ collection in the same database, with additional fields for the index vector
508
583
 
509
584
  :param objs:
510
- :param index_name:
585
+ :param index_name: e.g. simple, llm
511
586
  :param replace:
512
587
  :param kwargs:
513
588
  :return:
514
589
  """
515
- ix = self._indexers.get(index_name)
590
+ ix = self._indexers.get(index_name, None)
516
591
  if not ix:
517
592
  raise ValueError(f"No index named {index_name}")
518
593
  ix_coll_name = self._index_collection_name(index_name)
@@ -3,7 +3,11 @@ from typing import Any, Dict, List, Optional
3
3
  from pydantic import BaseModel, Field
4
4
 
5
5
 
6
- class CollectionConfig(BaseModel):
6
+ class ConfiguredBaseModel(BaseModel, extra="forbid"):
7
+ pass
8
+
9
+
10
+ class CollectionConfig(ConfiguredBaseModel):
7
11
  name: Optional[str] = Field(
8
12
  default=None,
9
13
  description="An optional name for the collection",
@@ -42,7 +46,7 @@ class CollectionConfig(BaseModel):
42
46
  )
43
47
 
44
48
 
45
- class DatabaseConfig(BaseModel):
49
+ class DatabaseConfig(ConfiguredBaseModel):
46
50
  handle: str = Field(
47
51
  default="duckdb:///:memory:",
48
52
  description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
@@ -86,7 +90,7 @@ class DatabaseConfig(BaseModel):
86
90
  )
87
91
 
88
92
 
89
- class ClientConfig(BaseModel):
93
+ class ClientConfig(ConfiguredBaseModel):
90
94
  handle: Optional[str] = Field(
91
95
  default=None,
92
96
  description="The client handle",
@@ -95,6 +99,10 @@ class ClientConfig(BaseModel):
95
99
  default={},
96
100
  description="A dictionary of database configurations",
97
101
  )
102
+ default_database: Optional[str] = Field(
103
+ default=None,
104
+ description="The default database",
105
+ )
98
106
  schema_path: Optional[str] = Field(
99
107
  default=None,
100
108
  description="The path to the LinkML schema file",
@@ -90,7 +90,9 @@ class DuckDBCollection(Collection):
90
90
  cd = self.class_definition()
91
91
  with self.parent.engine.connect() as conn:
92
92
  if not facet_columns:
93
- facet_columns = list(self.class_definition().attributes.keys())
93
+ if not cd:
94
+ raise ValueError(f"No class definition found for {self.target_class_name}")
95
+ facet_columns = list(cd.attributes.keys())
94
96
  for col in facet_columns:
95
97
  logger.debug(f"Faceting on {col}")
96
98
  if isinstance(col, tuple):
@@ -101,7 +103,7 @@ class DuckDBCollection(Collection):
101
103
  facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
102
104
  logger.debug(f"Facet query: {facet_query_str}")
103
105
  rows = list(conn.execute(text(facet_query_str)))
104
- results[col] = rows
106
+ results[col] = [tuple(row) for row in rows]
105
107
  return results
106
108
 
107
109
  def _sqla_table(self, cd: ClassDefinition) -> Table:
@@ -110,7 +112,7 @@ class DuckDBCollection(Collection):
110
112
  cols = []
111
113
  for att in schema_view.class_induced_slots(cd.name):
112
114
  typ = TMAP.get(att.range, sqla.String)
113
- if att.inlined:
115
+ if att.inlined or att.inlined_as_list:
114
116
  typ = sqla.JSON
115
117
  if att.multivalued:
116
118
  typ = sqla.ARRAY(typ, dimensions=1)
@@ -31,6 +31,18 @@ logger = logging.getLogger(__name__)
31
31
 
32
32
 
33
33
  class DuckDBDatabase(Database):
34
+ """
35
+ An adapter for DuckDB databases.
36
+
37
+ Note that this adapter does not make use of a LinkML relational model transformation and
38
+ SQL Alchemy ORM layer. Instead, it attempts to map each collection (which is of type
39
+ some LinkML class) to a *single* DuckDB table. New tables are not created for nested references,
40
+ and linking tables are not created for many-to-many relationships.
41
+
42
+ Instead the native DuckDB ARRAY type is used to store multivalued attributes, and DuckDB JSON
43
+ types are used for nested inlined objects.
44
+ """
45
+
34
46
  _connection: DuckDBPyConnection = None
35
47
  _engine: sqlalchemy.Engine = None
36
48
  collection_class = DuckDBCollection
@@ -103,7 +115,14 @@ class DuckDBDatabase(Database):
103
115
  if row[col]:
104
116
  if isinstance(row[col], list):
105
117
  for i in range(len(row[col])):
106
- row[col][i] = json.loads(row[col][i])
118
+ try:
119
+ parsed_val = json.loads(row[col][i])
120
+ except json.JSONDecodeError as e:
121
+ logger.error(f"Failed to parse col {col}[{i}] == {row[col][i]}")
122
+ raise e
123
+ row[col][i] = parsed_val
124
+ elif isinstance(row[col], dict):
125
+ pass
107
126
  else:
108
127
  row[col] = json.loads(row[col])
109
128
  qr.set_rows(pd.DataFrame(rows))
@@ -170,6 +170,8 @@ class FileSystemCollection(Collection[DatabaseType]):
170
170
  for fc in facet_columns:
171
171
  if fc in row:
172
172
  v = row[fc]
173
+ if not isinstance(v, str):
174
+ v = str(v)
173
175
  if v not in facet_results[fc]:
174
176
  facet_results[fc][v] = 1
175
177
  else: