linkml-store 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (63) hide show
  1. linkml_store-0.1.10/PKG-INFO +138 -0
  2. linkml_store-0.1.10/README.md +84 -0
  3. {linkml_store-0.1.8 → linkml_store-0.1.10}/pyproject.toml +13 -3
  4. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/client.py +15 -4
  5. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/collection.py +185 -15
  6. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/config.py +11 -3
  7. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/database.py +36 -5
  8. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +6 -3
  9. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
  10. linkml_store-0.1.10/src/linkml_store/api/stores/filesystem/__init__.py +15 -0
  11. linkml_store-0.1.10/src/linkml_store/api/stores/filesystem/filesystem_collection.py +179 -0
  12. linkml_store-0.1.10/src/linkml_store/api/stores/filesystem/filesystem_database.py +72 -0
  13. linkml_store-0.1.10/src/linkml_store/api/stores/mongodb/mongodb_collection.py +171 -0
  14. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
  15. linkml_store-0.1.10/src/linkml_store/api/types.py +4 -0
  16. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/cli.py +97 -8
  17. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/__init__.py +5 -3
  18. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/indexer.py +7 -2
  19. linkml_store-0.1.10/src/linkml_store/utils/change_utils.py +17 -0
  20. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/format_utils.py +89 -8
  21. linkml_store-0.1.10/src/linkml_store/utils/patch_utils.py +126 -0
  22. linkml_store-0.1.10/src/linkml_store/utils/query_utils.py +89 -0
  23. linkml_store-0.1.10/src/linkml_store/utils/schema_utils.py +23 -0
  24. linkml_store-0.1.10/src/linkml_store/webapi/__init__.py +0 -0
  25. linkml_store-0.1.10/src/linkml_store/webapi/html/__init__.py +3 -0
  26. linkml_store-0.1.10/src/linkml_store/webapi/html/base.html.j2 +24 -0
  27. linkml_store-0.1.10/src/linkml_store/webapi/html/collection_details.html.j2 +15 -0
  28. linkml_store-0.1.10/src/linkml_store/webapi/html/database_details.html.j2 +16 -0
  29. linkml_store-0.1.10/src/linkml_store/webapi/html/databases.html.j2 +14 -0
  30. linkml_store-0.1.10/src/linkml_store/webapi/html/generic.html.j2 +46 -0
  31. linkml_store-0.1.10/src/linkml_store/webapi/main.py +572 -0
  32. linkml_store-0.1.8/PKG-INFO +0 -58
  33. linkml_store-0.1.8/README.md +0 -12
  34. linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/__init__.py +0 -16
  35. linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -142
  36. linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -36
  37. linkml_store-0.1.8/src/linkml_store/api/stores/mongodb/mongodb_collection.py +0 -123
  38. {linkml_store-0.1.8 → linkml_store-0.1.10}/LICENSE +0 -0
  39. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/__init__.py +0 -0
  40. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/__init__.py +0 -0
  41. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/queries.py +0 -0
  42. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/__init__.py +0 -0
  43. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
  44. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  45. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  46. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  47. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
  48. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
  49. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  50. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  51. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  52. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  53. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
  54. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  55. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  56. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/constants.py +0 -0
  57. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/implementations/__init__.py +0 -0
  58. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
  59. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
  60. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/__init__.py +0 -0
  61. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/io.py +0 -0
  62. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/object_utils.py +0 -0
  63. {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/sql_utils.py +0 -0
@@ -0,0 +1,138 @@
1
+ Metadata-Version: 2.1
2
+ Name: linkml-store
3
+ Version: 0.1.10
4
+ Summary: linkml-store
5
+ License: MIT
6
+ Author: Author 1
7
+ Author-email: author@org.org
8
+ Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Provides-Extra: analytics
16
+ Provides-Extra: app
17
+ Provides-Extra: chromadb
18
+ Provides-Extra: fastapi
19
+ Provides-Extra: h5py
20
+ Provides-Extra: llm
21
+ Provides-Extra: map
22
+ Provides-Extra: mongodb
23
+ Provides-Extra: pyarrow
24
+ Provides-Extra: renderer
25
+ Provides-Extra: tests
26
+ Provides-Extra: validation
27
+ Requires-Dist: black (>=24.0.0) ; extra == "tests"
28
+ Requires-Dist: chromadb ; extra == "chromadb"
29
+ Requires-Dist: click
30
+ Requires-Dist: duckdb (>=0.10.1,<0.11.0)
31
+ Requires-Dist: duckdb-engine (>=0.11.2)
32
+ Requires-Dist: fastapi ; extra == "fastapi"
33
+ Requires-Dist: h5py ; extra == "h5py"
34
+ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
35
+ Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
36
+ Requires-Dist: linkml ; extra == "validation"
37
+ Requires-Dist: linkml-runtime (>=1.8.0rc2)
38
+ Requires-Dist: linkml_map ; extra == "map"
39
+ Requires-Dist: linkml_renderer ; extra == "renderer"
40
+ Requires-Dist: llm ; extra == "llm"
41
+ Requires-Dist: matplotlib ; extra == "analytics"
42
+ Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
43
+ Requires-Dist: plotly ; extra == "analytics"
44
+ Requires-Dist: pyarrow ; extra == "pyarrow"
45
+ Requires-Dist: pydantic (>=2.0.0,<3.0.0)
46
+ Requires-Dist: pymongo ; extra == "mongodb"
47
+ Requires-Dist: pystow (>=0.5.4,<0.6.0)
48
+ Requires-Dist: seaborn ; extra == "analytics"
49
+ Requires-Dist: sqlalchemy
50
+ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
51
+ Requires-Dist: uvicorn ; extra == "fastapi"
52
+ Description-Content-Type: text/markdown
53
+
54
+ # linkml-store
55
+
56
+ An AI-ready data management and integration platform. LinkML-Store
57
+ provides an abstraction layer over multiple different backends
58
+ (including DuckDB, MongoDB, and local filesystems), allowing for
59
+ common query, index, and storage operations.
60
+
61
+ For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
62
+
63
+ __Warning__ LinkML-Store is still undergoing changes and refactoring,
64
+ APIs and command line options are subject to change!
65
+
66
+ ## Quick Start
67
+
68
+ Install, add data, query it:
69
+
70
+ ```
71
+ pip install linkml-store[all]
72
+ linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
73
+ linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
74
+ ```
75
+
76
+ Index it, search it:
77
+
78
+ ```
79
+ linkml-store -d duckdb:///db/my.db -c persons index -t llm
80
+ linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
81
+ ```
82
+
83
+ Validate it:
84
+
85
+ ```
86
+ linkml-store -d duckdb:///db/my.db -c persons validate
87
+ ```
88
+
89
+ ## Basic usage
90
+
91
+ * [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
92
+ * [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
93
+ * API
94
+ * Streamlit applications
95
+
96
+ ## Features
97
+
98
+ ### Multiple Adapters
99
+
100
+ LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
101
+
102
+ * [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
103
+ * [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
104
+ * [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
105
+ * Filesystem
106
+
107
+ Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
108
+
109
+ The intent is to give a union of all features of each backend. For
110
+ example, analytic faceted queries are provided for *all* backends, not
111
+ just Solr.
112
+
113
+ ### Composable indexes
114
+
115
+ Many backends come with their own indexing and search
116
+ schemes. Classically this was Lucene-based indexes, now it is semantic
117
+ search using LLM embeddings.
118
+
119
+ LinkML store treats indexing as an orthogonal concern - you can
120
+ compose different indexing schemes with different backends. You don't
121
+ need to have a vector database to run embedding search!
122
+
123
+ See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
124
+
125
+ ### Validation
126
+
127
+ LinkML-Store is backed by [LinkML](https://linkml.io), which allows
128
+ for powerful expressive structural and semantic constraints.
129
+
130
+ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
131
+
132
+ and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
133
+
134
+
135
+ ## Background
136
+
137
+ See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
138
+
@@ -0,0 +1,84 @@
1
+ # linkml-store
2
+
3
+ An AI-ready data management and integration platform. LinkML-Store
4
+ provides an abstraction layer over multiple different backends
5
+ (including DuckDB, MongoDB, and local filesystems), allowing for
6
+ common query, index, and storage operations.
7
+
8
+ For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
9
+
10
+ __Warning__ LinkML-Store is still undergoing changes and refactoring,
11
+ APIs and command line options are subject to change!
12
+
13
+ ## Quick Start
14
+
15
+ Install, add data, query it:
16
+
17
+ ```
18
+ pip install linkml-store[all]
19
+ linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
20
+ linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
21
+ ```
22
+
23
+ Index it, search it:
24
+
25
+ ```
26
+ linkml-store -d duckdb:///db/my.db -c persons index -t llm
27
+ linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
28
+ ```
29
+
30
+ Validate it:
31
+
32
+ ```
33
+ linkml-store -d duckdb:///db/my.db -c persons validate
34
+ ```
35
+
36
+ ## Basic usage
37
+
38
+ * [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
39
+ * [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
40
+ * API
41
+ * Streamlit applications
42
+
43
+ ## Features
44
+
45
+ ### Multiple Adapters
46
+
47
+ LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
48
+
49
+ * [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
50
+ * [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
51
+ * [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
52
+ * Filesystem
53
+
54
+ Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
55
+
56
+ The intent is to give a union of all features of each backend. For
57
+ example, analytic faceted queries are provided for *all* backends, not
58
+ just Solr.
59
+
60
+ ### Composable indexes
61
+
62
+ Many backends come with their own indexing and search
63
+ schemes. Classically this was Lucene-based indexes, now it is semantic
64
+ search using LLM embeddings.
65
+
66
+ LinkML store treats indexing as an orthogonal concern - you can
67
+ compose different indexing schemes with different backends. You don't
68
+ need to have a vector database to run embedding search!
69
+
70
+ See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
71
+
72
+ ### Validation
73
+
74
+ LinkML-Store is backed by [LinkML](https://linkml.io), which allows
75
+ for powerful expressive structural and semantic constraints.
76
+
77
+ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
78
+
79
+ and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
80
+
81
+
82
+ ## Background
83
+
84
+ See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.1.8"
3
+ version = "0.1.10"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
@@ -10,7 +10,7 @@ readme = "README.md"
10
10
  python = "^3.9, !=3.9.7"
11
11
  click = "*"
12
12
  pydantic = "^2.0.0"
13
- linkml-runtime = "^1.7.5"
13
+ linkml-runtime = ">=1.8.0rc2"
14
14
  streamlit = { version = "^1.32.2", optional = true }
15
15
  sqlalchemy = "*"
16
16
  duckdb = "^0.10.1"
@@ -23,11 +23,16 @@ black = { version=">=24.0.0", optional = true }
23
23
  llm = { version="*", optional = true }
24
24
  pymongo = { version="*", optional = true }
25
25
  chromadb = { version="*", optional = true }
26
+ pyarrow = { version="*", optional = true }
26
27
  h5py = { version="*", optional = true }
27
28
  linkml = { version="*", optional = true }
28
29
  linkml_map = { version="*", optional = true }
30
+ linkml_renderer = { version="*", optional = true }
29
31
  pandas = ">=2.2.1"
30
32
  jinja2 = "^3.1.4"
33
+ jsonlines = "^4.0.0"
34
+ fastapi = { version="*", optional = true }
35
+ uvicorn = { version="*", optional = true }
31
36
 
32
37
  [tool.poetry.group.dev.dependencies]
33
38
  pytest = {version = ">=7.1.2"}
@@ -43,6 +48,7 @@ furo = {version = "*"}
43
48
  nbsphinx = "*"
44
49
  jupyter = "*"
45
50
  jupysql = "*"
51
+ papermill = "*"
46
52
 
47
53
  [tool.poetry.group.tests.dependencies]
48
54
  pytest = "^7.4.0"
@@ -60,11 +66,15 @@ llm = ["llm"]
60
66
  mongodb = ["pymongo"]
61
67
  chromadb = ["chromadb"]
62
68
  h5py = ["h5py"]
69
+ pyarrow = ["pyarrow"]
63
70
  validation = ["linkml"]
64
71
  map = ["linkml_map"]
72
+ renderer = ["linkml_renderer"]
73
+ fastapi = ["fastapi", "uvicorn"]
65
74
 
66
75
  [tool.poetry.scripts]
67
76
  linkml-store = "linkml_store.cli:cli"
77
+ linkml-store-api = "linkml_store.webapi.main:start"
68
78
 
69
79
  [tool.poetry-dynamic-versioning]
70
80
  enable = false
@@ -127,7 +137,7 @@ skip = '.git,*.pdf,*.svg,./tests,pyproject.toml,*.dill,poetry.lock,*.ipynb'
127
137
  # Ignore table where words could be split across rows
128
138
  # Ignore shortcut specifications like [Ff]alse
129
139
  ignore-regex = '(\|.*\|.*\|.*\||\[[A-Z][a-z]\][a-z][a-z])'
130
- ignore-words-list = 'mater,connexion,infarction'
140
+ ignore-words-list = 'mater,connexion,infarction,nin'
131
141
  count = ""
132
142
  quiet-level = 3
133
143
 
@@ -9,6 +9,7 @@ from linkml_store.api import Database
9
9
  from linkml_store.api.config import ClientConfig
10
10
  from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
11
11
  from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
12
+ from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
12
13
  from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
13
14
  from linkml_store.api.stores.solr.solr_database import SolrDatabase
14
15
 
@@ -20,6 +21,7 @@ HANDLE_MAP = {
20
21
  "solr": SolrDatabase,
21
22
  "mongodb": MongoDBDatabase,
22
23
  "chromadb": ChromaDBDatabase,
24
+ "file": FileSystemDatabase,
23
25
  }
24
26
 
25
27
 
@@ -96,7 +98,7 @@ class Client:
96
98
  """
97
99
  return self.metadata.base_dir
98
100
 
99
- def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
101
+ def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
100
102
  """
101
103
  Create a client from a configuration.
102
104
 
@@ -116,11 +118,13 @@ class Client:
116
118
  :return:
117
119
 
118
120
  """
121
+ if isinstance(config, dict):
122
+ config = ClientConfig(**config)
119
123
  if isinstance(config, Path):
120
124
  config = str(config)
121
125
  if isinstance(config, str):
122
- if not base_dir:
123
- base_dir = Path(config).parent
126
+ # if not base_dir:
127
+ # base_dir = Path(config).parent
124
128
  parsed_obj = yaml.safe_load(open(config))
125
129
  config = ClientConfig(**parsed_obj)
126
130
  self.metadata = config
@@ -131,8 +135,15 @@ class Client:
131
135
 
132
136
  def _initialize_databases(self, **kwargs):
133
137
  for name, db_config in self.metadata.databases.items():
134
- handle = db_config.handle.format(base_dir=self.base_dir)
138
+ base_dir = self.base_dir
139
+ logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
140
+ if not base_dir:
141
+ base_dir = Path.cwd()
142
+ logger.info(f"Using current working directory: {base_dir}")
143
+ handle = db_config.handle.format(base_dir=base_dir)
135
144
  db_config.handle = handle
145
+ if db_config.schema_location:
146
+ db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
136
147
  db = self.attach_database(handle, alias=name, **kwargs)
137
148
  db.from_config(db_config)
138
149
 
@@ -4,16 +4,19 @@ import hashlib
4
4
  import logging
5
5
  from collections import defaultdict
6
6
  from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
7
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
8
8
 
9
9
  import numpy as np
10
+ from linkml_runtime import SchemaView
10
11
  from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
11
12
  from linkml_runtime.linkml_model.meta import ArrayExpression
12
13
  from pydantic import BaseModel
13
14
 
15
+ from linkml_store.api.types import DatabaseType
14
16
  from linkml_store.index import get_indexer
15
17
  from linkml_store.utils.format_utils import load_objects
16
18
  from linkml_store.utils.object_utils import clean_empties
19
+ from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
17
20
 
18
21
  try:
19
22
  from linkml.validator.report import ValidationResult
@@ -36,7 +39,7 @@ IDENTIFIER = str
36
39
  FIELD_NAME = str
37
40
 
38
41
 
39
- class Collection:
42
+ class Collection(Generic[DatabaseType]):
40
43
  """
41
44
  A collection is an organized set of objects of the same or similar type.
42
45
 
@@ -56,11 +59,12 @@ class Collection:
56
59
  """
57
60
 
58
61
  # name: str
59
- parent: Optional["Database"] = None
62
+ parent: Optional[DatabaseType] = None
60
63
  _indexers: Optional[Dict[str, Indexer]] = None
61
64
  # hidden: Optional[bool] = False
62
65
 
63
66
  metadata: Optional[CollectionConfig] = None
67
+ default_index_name: ClassVar[str] = "simple"
64
68
 
65
69
  def __init__(
66
70
  self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
@@ -197,6 +201,10 @@ class Collection:
197
201
  """
198
202
  raise NotImplementedError
199
203
 
204
+ def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
205
+ patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
206
+ self._broadcast(patches, **kwargs)
207
+
200
208
  def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
201
209
  """
202
210
  Delete one or more objects from the collection.
@@ -301,7 +309,7 @@ class Collection:
301
309
 
302
310
  def query_facets(
303
311
  self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
304
- ) -> Dict[str, Dict[str, int]]:
312
+ ) -> Dict[str, List[Tuple[Any, int]]]:
305
313
  """
306
314
  Run a query to get facet counts for one or more columns.
307
315
 
@@ -319,7 +327,7 @@ class Collection:
319
327
  :param query: A Query object representing the base query.
320
328
  :param facet_columns: A list of column names to get facet counts for.
321
329
  :param facet_limit:
322
- :return: A dictionary where keys are column names and values are pandas DataFrames
330
+ :return: A dictionary where keys are column names and values are tuples
323
331
  containing the facet counts for each unique value in the respective column.
324
332
  """
325
333
  raise NotImplementedError
@@ -414,7 +422,30 @@ class Collection:
414
422
  **kwargs,
415
423
  ) -> QueryResult:
416
424
  """
417
- Search the collection using a full-text search index.
425
+ Search the collection using a text-based index index.
426
+
427
+ Example:
428
+
429
+ >>> from linkml_store import Client
430
+ >>> from linkml_store.utils.format_utils import load_objects
431
+ >>> client = Client()
432
+ >>> db = client.attach_database("duckdb")
433
+ >>> collection = db.create_collection("Country")
434
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
435
+ >>> collection.insert(objs)
436
+
437
+ Now let's index, using the simple trigram-based index
438
+
439
+ >>> index = get_indexer("simple")
440
+ >>> collection.attach_indexer(index)
441
+
442
+ Now let's find all objects:
443
+
444
+ >>> qr = collection.search("France")
445
+ >>> score, top_obj = qr.ranked_rows[0]
446
+ >>> assert score > 0.1
447
+ >>> top_obj["code"]
448
+ 'FR'
418
449
 
419
450
  :param query:
420
451
  :param where:
@@ -424,12 +455,18 @@ class Collection:
424
455
  :return:
425
456
  """
426
457
  if index_name is None:
427
- if len(self._indexers) == 1:
428
- index_name = list(self._indexers.keys())[0]
458
+ if len(self.indexers) == 1:
459
+ index_name = list(self.indexers.keys())[0]
429
460
  else:
430
- raise ValueError("Multiple indexes found. Please specify an index name.")
461
+ logger.warning("Multiple indexes found. Using default index.")
462
+ index_name = self.default_index_name
431
463
  ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
432
- ix = self._indexers.get(index_name)
464
+ if index_name not in self.indexers:
465
+ ix = get_indexer(index_name)
466
+ if not self._indexers:
467
+ self._indexers = {}
468
+ self._indexers[index_name] = ix
469
+ ix = self.indexers.get(index_name)
433
470
  if not ix:
434
471
  raise ValueError(f"No index named {index_name}")
435
472
  qr = ix_coll.find(where=where, limit=-1, **kwargs)
@@ -446,7 +483,10 @@ class Collection:
446
483
  @property
447
484
  def is_internal(self) -> bool:
448
485
  """
449
- Check if the collection is internal
486
+ Check if the collection is internal.
487
+
488
+ Internal collections are hidden by default. Examples of internal collections
489
+ include shadow "index" collections
450
490
 
451
491
  :return:
452
492
  """
@@ -462,6 +502,45 @@ class Collection:
462
502
  """
463
503
  Attach an index to the collection.
464
504
 
505
+ As an example, first let's create a collection in a database:
506
+
507
+ >>> from linkml_store import Client
508
+ >>> from linkml_store.utils.format_utils import load_objects
509
+ >>> client = Client()
510
+ >>> db = client.attach_database("duckdb")
511
+ >>> collection = db.create_collection("Country")
512
+ >>> objs = load_objects("tests/input/countries/countries.jsonl")
513
+ >>> collection.insert(objs)
514
+
515
+ We will create two indexes - one that indexes the whole object
516
+ (default behavior), the other one indexes the name only
517
+
518
+ >>> full_index = get_indexer("simple")
519
+ >>> full_index.name = "full"
520
+ >>> name_index = get_indexer("simple", text_template="{name}")
521
+ >>> name_index.name = "name"
522
+ >>> collection.attach_indexer(full_index)
523
+ >>> collection.attach_indexer(name_index)
524
+
525
+ Now let's find objects using the full index, using the string "France".
526
+ We expect the country France to be the top hit, but the score will
527
+ be less than zero because we did not match all fields in the object.
528
+
529
+ >>> qr = collection.search("France", index_name="full")
530
+ >>> score, top_obj = qr.ranked_rows[0]
531
+ >>> assert score > 0.1
532
+ >>> assert score < 0.5
533
+ >>> top_obj["code"]
534
+ 'FR'
535
+
536
+ Now using the name index
537
+
538
+ >>> qr = collection.search("France", index_name="name")
539
+ >>> score, top_obj = qr.ranked_rows[0]
540
+ >>> assert score > 0.99
541
+ >>> top_obj["code"]
542
+ 'FR'
543
+
465
544
  :param index:
466
545
  :param name:
467
546
  :param auto_index: Automatically index all objects in the collection
@@ -497,15 +576,18 @@ class Collection:
497
576
 
498
577
  def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
499
578
  """
500
- Index a list of objects
579
+ Index a list of objects using a specified index.
580
+
581
+ By default, the indexed objects will be stored in a shadow
582
+ collection in the same database, with additional fields for the index vector
501
583
 
502
584
  :param objs:
503
- :param index_name:
585
+ :param index_name: e.g. simple, llm
504
586
  :param replace:
505
587
  :param kwargs:
506
588
  :return:
507
589
  """
508
- ix = self._indexers.get(index_name)
590
+ ix = self._indexers.get(index_name, None)
509
591
  if not ix:
510
592
  raise ValueError(f"No index named {index_name}")
511
593
  ix_coll_name = self._index_collection_name(index_name)
@@ -523,6 +605,7 @@ class Collection:
523
605
  ix_coll.delete_where()
524
606
 
525
607
  ix_coll.insert(objects_with_ix, **kwargs)
608
+ ix_coll.commit()
526
609
 
527
610
  def list_index_names(self) -> List[str]:
528
611
  """
@@ -557,12 +640,22 @@ class Collection:
557
640
 
558
641
  :return:
559
642
  """
560
- sv = self.parent.schema_view
643
+ sv: SchemaView = self.parent.schema_view
561
644
  if sv:
562
645
  cls = sv.get_class(self.target_class_name)
646
+ if cls and not cls.attributes:
647
+ if not sv.class_induced_slots(cls.name):
648
+ for att in self._induce_attributes():
649
+ cls.attributes[att.name] = att
650
+ sv.set_modified()
563
651
  return cls
564
652
  return None
565
653
 
654
+ def _induce_attributes(self) -> List[SlotDefinition]:
655
+ result = self.find({}, limit=-1)
656
+ cd = self.induce_class_definition_from_objects(result.rows, max_sample_size=None)
657
+ return list(cd.attributes.values())
658
+
566
659
  @property
567
660
  def identifier_attribute_name(self) -> Optional[str]:
568
661
  """
@@ -579,6 +672,37 @@ class Collection:
579
672
  return att.name
580
673
  return None
581
674
 
675
+ def set_identifier_attribute_name(self, name: str):
676
+ """
677
+ Set the name of the identifier attribute for the collection.
678
+
679
+ AKA the primary key.
680
+
681
+ :param name: The name of the identifier attribute.
682
+ """
683
+ cd = self.class_definition()
684
+ if not cd:
685
+ raise ValueError(f"Cannot find class definition for {self.target_class_name}")
686
+ id_att = None
687
+ candidates = []
688
+ sv: SchemaView = self.parent.schema_view
689
+ cls = sv.get_class(cd.name)
690
+ existing_id_slot = sv.get_identifier_slot(cls.name)
691
+ if existing_id_slot:
692
+ if existing_id_slot.name == name:
693
+ return
694
+ existing_id_slot.identifier = False
695
+ for att in cls.attributes.values():
696
+ candidates.append(att.name)
697
+ if att.name == name:
698
+ att.identifier = True
699
+ id_att = att
700
+ else:
701
+ att.identifier = False
702
+ if not id_att:
703
+ raise ValueError(f"No attribute found with name {name} in {candidates}")
704
+ sv.set_modified()
705
+
582
706
  def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
583
707
  """
584
708
  Return the identifier for an object.
@@ -622,6 +746,8 @@ class Collection:
622
746
  for k, v in obj.items():
623
747
  keys[k].append(v)
624
748
  for k, vs in keys.items():
749
+ if k == "_id":
750
+ continue
625
751
  multivalueds = []
626
752
  inlineds = []
627
753
  rngs = []
@@ -698,6 +824,39 @@ class Collection:
698
824
  """
699
825
  raise NotImplementedError
700
826
 
827
+ def apply_patches(self, patches: List[PatchDict], **kwargs):
828
+ """
829
+ Apply a patch to the collection.
830
+
831
+ Patches conform to the JSON Patch format,
832
+
833
+ :param patches:
834
+ :param kwargs:
835
+ :return:
836
+ """
837
+ all_objs = self.find(limit=-1).rows
838
+ primary_key = self.identifier_attribute_name
839
+ if not primary_key:
840
+ raise ValueError(f"No primary key for {self.target_class_name}")
841
+ new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
842
+ self.replace(new_objs)
843
+
844
+ def diff(self, other: "Collection", **kwargs):
845
+ """
846
+ Diff two collections.
847
+
848
+ :param other:
849
+ :param kwargs:
850
+ :return:
851
+ """
852
+ src_objs = self.find(limit=-1).rows
853
+ tgt_objs = other.find(limit=-1).rows
854
+ primary_key = self.identifier_attribute_name
855
+ if not primary_key:
856
+ raise ValueError(f"No primary key for {self.target_class_name}")
857
+ patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
858
+ return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
859
+
701
860
  def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
702
861
  """
703
862
  Validate the contents of the collection
@@ -717,3 +876,14 @@ class Collection:
717
876
  for obj in result.rows:
718
877
  obj = clean_empties(obj)
719
878
  yield from validator.iter_results(obj, class_name)
879
+
880
+ def commit(self):
881
+ """
882
+ Commit changes to the collection.
883
+
884
+ :return:
885
+ """
886
+ pass
887
+
888
+ def _broadcast(self, *args, **kwargs):
889
+ self.parent.broadcast(self, *args, **kwargs)