linkml-store 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (28) hide show
  1. linkml_store-0.0.0/LICENSE +22 -0
  2. linkml_store-0.0.0/PKG-INFO +44 -0
  3. linkml_store-0.0.0/README.md +7 -0
  4. linkml_store-0.0.0/pyproject.toml +124 -0
  5. linkml_store-0.0.0/src/linkml_store/__init__.py +7 -0
  6. linkml_store-0.0.0/src/linkml_store/api/__init__.py +8 -0
  7. linkml_store-0.0.0/src/linkml_store/api/client.py +151 -0
  8. linkml_store-0.0.0/src/linkml_store/api/collection.py +327 -0
  9. linkml_store-0.0.0/src/linkml_store/api/database.py +215 -0
  10. linkml_store-0.0.0/src/linkml_store/api/metadata.py +5 -0
  11. linkml_store-0.0.0/src/linkml_store/api/queries.py +56 -0
  12. linkml_store-0.0.0/src/linkml_store/api/stores/__init__.py +0 -0
  13. linkml_store-0.0.0/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  14. linkml_store-0.0.0/src/linkml_store/api/stores/duckdb/duckdb_collection.py +109 -0
  15. linkml_store-0.0.0/src/linkml_store/api/stores/duckdb/duckdb_database.py +166 -0
  16. linkml_store-0.0.0/src/linkml_store/api/stores/duckdb/mappings.py +7 -0
  17. linkml_store-0.0.0/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  18. linkml_store-0.0.0/src/linkml_store/api/stores/mongodb/mongodb_collection.py +56 -0
  19. linkml_store-0.0.0/src/linkml_store/api/stores/mongodb/mongodb_database.py +112 -0
  20. linkml_store-0.0.0/src/linkml_store/constants.py +7 -0
  21. linkml_store-0.0.0/src/linkml_store/index/__init__.py +0 -0
  22. linkml_store-0.0.0/src/linkml_store/index/implementations/__init__.py +0 -0
  23. linkml_store-0.0.0/src/linkml_store/index/implementations/llm_index.py +44 -0
  24. linkml_store-0.0.0/src/linkml_store/index/implementations/simple_index.py +40 -0
  25. linkml_store-0.0.0/src/linkml_store/index/index.py +109 -0
  26. linkml_store-0.0.0/src/linkml_store/utils/__init__.py +0 -0
  27. linkml_store-0.0.0/src/linkml_store/utils/io.py +38 -0
  28. linkml_store-0.0.0/src/linkml_store/utils/sql_utils.py +126 -0
@@ -0,0 +1,22 @@
1
+
2
+ The MIT License (MIT)
3
+
4
+ Copyright (c) 2024 Monarch Initiative
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.1
2
+ Name: linkml-store
3
+ Version: 0.0.0
4
+ Summary: linkml-store
5
+ License: MIT
6
+ Author: Author 1
7
+ Author-email: author@org.org
8
+ Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Provides-Extra: analytics
16
+ Provides-Extra: app
17
+ Provides-Extra: llm
18
+ Provides-Extra: mongodb
19
+ Provides-Extra: tests
20
+ Requires-Dist: black (>=24.0.0) ; extra == "tests"
21
+ Requires-Dist: click
22
+ Requires-Dist: duckdb (>=0.10.1,<0.11.0)
23
+ Requires-Dist: duckdb-engine (>=0.11.2,<0.12.0)
24
+ Requires-Dist: linkml-runtime (>=1.7.5,<2.0.0)
25
+ Requires-Dist: llm ; extra == "llm"
26
+ Requires-Dist: matplotlib ; extra == "analytics"
27
+ Requires-Dist: pandas (>=2.2.1,<3.0.0) ; extra == "analytics"
28
+ Requires-Dist: plotly ; extra == "analytics"
29
+ Requires-Dist: pydantic (>=2.0.0,<3.0.0)
30
+ Requires-Dist: pymongo ; extra == "mongodb"
31
+ Requires-Dist: pystow (>=0.5.4,<0.6.0)
32
+ Requires-Dist: seaborn ; extra == "analytics"
33
+ Requires-Dist: sqlalchemy
34
+ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
35
+ Description-Content-Type: text/markdown
36
+
37
+ # linkml-store
38
+
39
+ This is the project description.
40
+
41
+ # Acknowledgements
42
+
43
+ This [cookiecutter](https://cookiecutter.readthedocs.io/en/stable/README.html) project was developed from the [monarch-project-template](https://github.com/monarch-initiative/monarch-project-template) template and will be kept up-to-date using [cruft](https://cruft.github.io/cruft/).
44
+
@@ -0,0 +1,7 @@
1
+ # linkml-store
2
+
3
+ This is the project description.
4
+
5
+ # Acknowledgements
6
+
7
+ This [cookiecutter](https://cookiecutter.readthedocs.io/en/stable/README.html) project was developed from the [monarch-project-template](https://github.com/monarch-initiative/monarch-project-template) template and will be kept up-to-date using [cruft](https://cruft.github.io/cruft/).
@@ -0,0 +1,124 @@
1
+ [tool.poetry]
2
+ name = "linkml-store"
3
+ version = "0.0.0"
4
+ description = "linkml-store"
5
+ authors = ["Author 1 <author@org.org>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.9, !=3.9.7"
11
+ click = "*"
12
+ pydantic = "^2.0.0"
13
+ linkml-runtime = "^1.7.5"
14
+ streamlit = { version = "^1.32.2", optional = true }
15
+ sqlalchemy = "*"
16
+ duckdb = "^0.10.1"
17
+ duckdb-engine = "^0.11.2"
18
+ matplotlib = { version = "*", optional = true }
19
+ seaborn = { version = "*", optional = true }
20
+ plotly = { version = "*", optional = true }
21
+ pystow = "^0.5.4"
22
+ black = { version=">=24.0.0", optional = true }
23
+ llm = { version="*", optional = true }
24
+ pymongo = { version="*", optional = true }
25
+ pandas = "^2.2.1"
26
+
27
+ [tool.poetry.group.dev.dependencies]
28
+ pytest = {version = ">=7.1.2"}
29
+ tox = {version = ">=3.25.1"}
30
+ pre-commit = {version = ">=3.3.3"}
31
+ sphinx = {version = ">=6.1.3"}
32
+ sphinx-rtd-theme = {version = ">=1.0.0"}
33
+ sphinx-autodoc-typehints = {version = "<2.0.0"}
34
+ sphinx-click = {version = ">=4.3.0"}
35
+ myst-parser = {version = ">=0.18.1"}
36
+ nbsphinx = "*"
37
+ jupyter = "*"
38
+
39
+ [tool.poetry.group.tests.dependencies]
40
+ pytest = "^7.4.0"
41
+ pytest-subtests = "^0.11.0"
42
+ numpy = [
43
+ { "version" = ">=1.24.3", "python" = "<3.12" },
44
+ { "version" = ">=1.25.2", "python" = ">=3.12" }
45
+ ]
46
+
47
+ [tool.poetry.extras]
48
+ analytics = ["pandas", "matplotlib", "seaborn", "plotly"]
49
+ app = ["streamlit"]
50
+ tests = ["black"]
51
+ llm = ["llm"]
52
+ mongodb = ["pymongo"]
53
+
54
+ [tool.poetry.scripts]
55
+ linkml-store = "linkml_store.cli:main"
56
+
57
+ [tool.poetry-dynamic-versioning]
58
+ enable = false
59
+ vcs = "git"
60
+ style = "pep440"
61
+
62
+ [tool.black]
63
+ line-length = 120
64
+ target-version = ["py38", "py39", "py310", "py311"]
65
+ force-exclude = '''
66
+ /(
67
+ # default exclude
68
+ \.direnv|\.eggs|\.git|\.hg|\.ipynb_checkpoints|\.mypy_cache|\.nox|\.pytest_cache|\.ruff_cache|\.tox|\.svn|\.venv|\.vscode|__pypackages__|_build|buck-out|build|dist|venv
69
+ # additional exclude
70
+ | tests.*/output
71
+ | __snapshots__
72
+ | docs
73
+ | examples
74
+ | notebooks
75
+ )/
76
+ '''
77
+
78
+ [tool.ruff]
79
+ extend-exclude = [
80
+ "tests/output",
81
+ "tests/**/output",
82
+ "tests/**/__snapshots__",
83
+ "examples/",
84
+ "docs/",
85
+ "notebooks/"
86
+ ]
87
+ force-exclude = true
88
+ line-length = 120
89
+ extend-ignore = ["E203"]
90
+ select = [
91
+ "E", # pycodestyle errors
92
+ "F", # Pyflakes
93
+ "I", # isort
94
+ ]
95
+ # Assume Python 3.8
96
+ target-version = "py38"
97
+
98
+ [tool.ruff.per-file-ignores]
99
+ # These templates can have long lines
100
+ "linkml/generators/sqlalchemy/sqlalchemy_declarative_template.py" = ["E501"]
101
+ "linkml/generators/sqlalchemy/sqlalchemy_imperative_template.py" = ["E501"]
102
+
103
+ # Notebooks can have unsorted imports
104
+ "tests/test_notebooks/input/*" = ["E402"]
105
+
106
+
107
+ [tool.ruff.mccabe]
108
+ # Unlike Flake8, default to a complexity level of 10.
109
+ max-complexity = 10
110
+
111
+
112
+ [tool.codespell]
113
+ # TODO: bring in tests in too
114
+ skip = '.git,*.pdf,*.svg,./tests,pyproject.toml,*.dill,poetry.lock,*.ipynb'
115
+ # Ignore table where words could be split across rows
116
+ # Ignore shortcut specifications like [Ff]alse
117
+ ignore-regex = '(\|.*\|.*\|.*\||\[[A-Z][a-z]\][a-z][a-z])'
118
+ ignore-words-list = 'mater,connexion,infarction'
119
+ count = ""
120
+ quiet-level = 3
121
+
122
+ [build-system]
123
+ requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
124
+ build-backend = "poetry_dynamic_versioning.backend"
@@ -0,0 +1,7 @@
1
+ from pathlib import Path
2
+
3
+ from linkml_store.api import Client
4
+
5
+ THIS_DIR = Path(__file__).parent
6
+
7
+ __all__ = ["Client"]
@@ -0,0 +1,8 @@
1
+ # flake8: noqa: E402
2
+ from linkml_store.api.collection import Collection
3
+ from linkml_store.api.database import Database
4
+ from linkml_store.api.metadata import MetaData
5
+ from linkml_store.api.client import Client
6
+ # flake8: noqa
7
+
8
+ __all__ = ["Client", "Database", "MetaData", "Collection"]
@@ -0,0 +1,151 @@
1
+ from dataclasses import dataclass
2
+ from typing import Dict, Optional
3
+
4
+ from linkml_runtime import SchemaView
5
+
6
+ from linkml_store.api import Database
7
+ from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
8
+
9
+ HANDLE_MAP = {
10
+ "duckdb": DuckDBDatabase,
11
+ }
12
+
13
+
14
+ @dataclass
15
+ class Client:
16
+ """
17
+ A client provides access to named collections.
18
+
19
+ Examples
20
+ --------
21
+ >>> client = Client()
22
+ >>> db = client.attach_database("duckdb", alias="test")
23
+ >>> collection = db.create_collection("Person")
24
+ >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
25
+ >>> collection.add(objs)
26
+ >>> qr = collection.find()
27
+ >>> len(qr.rows)
28
+ 2
29
+ >>> qr.rows[0]["id"]
30
+ 'P1'
31
+ >>> qr.rows[1]["name"]
32
+ 'Alice'
33
+ >>> qr = collection.find({"name": "John"})
34
+ >>> len(qr.rows)
35
+ 1
36
+ >>> qr.rows[0]["name"]
37
+ 'John'
38
+
39
+ """
40
+
41
+ handle: Optional[str] = None
42
+ _databases: Optional[Dict[str, Database]] = None
43
+
44
+ def attach_database(
45
+ self,
46
+ handle: str,
47
+ alias: Optional[str] = None,
48
+ schema_view: Optional[SchemaView] = None,
49
+ recreate_if_exists=False,
50
+ **kwargs,
51
+ ) -> Database:
52
+ """
53
+ Associate a database with a handle.
54
+
55
+ Examples
56
+ --------
57
+ >>> client = Client()
58
+ >>> db = client.attach_database("duckdb", alias="memory")
59
+ >>> "memory" in client.databases
60
+ True
61
+ >>> db = client.attach_database("duckdb:///tmp/another.db", alias="disk")
62
+ >>> len(client.databases)
63
+ 2
64
+ >>> "disk" in client.databases
65
+ True
66
+
67
+ :param handle: handle for the database, e.g. duckdb:///foo.db
68
+ :param alias: alias for the database, e.g foo
69
+ :param schema_view: schema view to associate with the database
70
+ :param kwargs:
71
+ :return:
72
+
73
+ """
74
+ if ":" not in handle:
75
+ scheme = handle
76
+ handle = None
77
+ else:
78
+ scheme, _ = handle.split(":", 1)
79
+ if scheme not in HANDLE_MAP:
80
+ raise ValueError(f"Unknown scheme: {scheme}")
81
+ cls = HANDLE_MAP[scheme]
82
+ db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
83
+ if schema_view:
84
+ db.set_schema_view(schema_view)
85
+ if not alias:
86
+ alias = handle
87
+ if not self._databases:
88
+ self._databases = {}
89
+ self._databases[alias] = db
90
+ return db
91
+
92
+ def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
93
+ """
94
+ Get a named database.
95
+
96
+ Examples
97
+ --------
98
+ >>> client = Client()
99
+ >>> db = client.attach_database("duckdb:///test.db", alias="test")
100
+ >>> retrieved_db = client.get_database("test")
101
+ >>> db == retrieved_db
102
+ True
103
+
104
+ :param name:
105
+ :param create_if_not_exists:
106
+ :param kwargs:
107
+ :return:
108
+
109
+ """
110
+ if not name:
111
+ if not self._databases:
112
+ raise ValueError("No databases attached and no name provided")
113
+ if len(self._databases) > 1:
114
+ raise ValueError("Ambiguous: No name provided and multiple databases attached")
115
+ return list(self._databases.values())[0]
116
+ if not self._databases:
117
+ self._databases = {}
118
+ if name not in self._databases:
119
+ if create_if_not_exists:
120
+ self.attach_database(name, **kwargs)
121
+ else:
122
+ raise ValueError(f"Database {name} does not exist")
123
+ return self._databases[name]
124
+
125
+ @property
126
+ def databases(self) -> Dict[str, Database]:
127
+ """
128
+ Return all attached databases
129
+
130
+ Examples
131
+ --------
132
+ >>> client = Client()
133
+ >>> _ = client.attach_database("duckdb", alias="test1")
134
+ >>> _ = client.attach_database("duckdb", alias="test2")
135
+ >>> len(client.databases)
136
+ 2
137
+ >>> "test1" in client.databases
138
+ True
139
+ >>> "test2" in client.databases
140
+ True
141
+ >>> client.databases["test1"].handle
142
+ 'duckdb:///:memory:'
143
+ >>> client.databases["test2"].handle
144
+ 'duckdb:///:memory:'
145
+
146
+ :return:
147
+
148
+ """
149
+ if not self._databases:
150
+ self._databases = {}
151
+ return self._databases
@@ -0,0 +1,327 @@
1
+ import logging
2
+ from collections import defaultdict
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, TextIO, Type, Union
6
+
7
+ import numpy as np
8
+ from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
9
+ from linkml_runtime.linkml_model.meta import ArrayExpression
10
+ from pydantic import BaseModel
11
+
12
+ from linkml_store.api.queries import Query, QueryResult
13
+ from linkml_store.index.index import Index
14
+
15
+ if TYPE_CHECKING:
16
+ from linkml_store.api.database import Database
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ OBJECT = Union[Dict[str, Any], BaseModel, Type]
21
+
22
+ IDENTIFIER = str
23
+ FIELD_NAME = str
24
+
25
+
26
+ @dataclass
27
+ class Collection:
28
+ """
29
+ A collection is an organized set of objects of the same or similar type.
30
+
31
+ - For relational databases, a collection is typically a table
32
+ - For document databases such as MongoDB, a collection is the native type
33
+ - For a file system, a collection could be a single tabular file such as Parquet or CSV
34
+ """
35
+
36
+ name: str
37
+ parent: Optional["Database"] = None
38
+ _indexes: Optional[Dict[str, Index]] = None
39
+ hidden: Optional[bool] = False
40
+
41
+ def add(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
42
+ """
43
+ Add one or more objects to the collection
44
+
45
+ :param objs:
46
+ :param kwargs:
47
+ :return:
48
+ """
49
+ raise NotImplementedError
50
+
51
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
52
+ """
53
+ Delete one or more objects from the collection
54
+
55
+ :param objs:
56
+ :param kwargs:
57
+ :return:
58
+ """
59
+ raise NotImplementedError
60
+
61
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, **kwargs) -> int:
62
+ """
63
+ Delete objects that match a query
64
+
65
+ :param where:
66
+ :param kwargs:
67
+ :return:
68
+ """
69
+ raise NotImplementedError
70
+
71
+ def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
72
+ """
73
+ Update one or more objects in the collection
74
+
75
+ :param objs:
76
+ :param kwargs:
77
+ :return:
78
+ """
79
+ raise NotImplementedError
80
+
81
+ def _create_query(self, **kwargs) -> Query:
82
+ return Query(from_table=self.name, **kwargs)
83
+
84
+ def query(self, query: Query, **kwargs) -> QueryResult:
85
+ """
86
+ Run a query against the collection
87
+
88
+ :param query:
89
+ :param kwargs:
90
+ :return:
91
+ """
92
+ return self.parent.query(query, **kwargs)
93
+
94
+ def query_facets(self, where: Optional[Dict] = None, facet_columns: List[str] = None) -> Dict[str, Dict[str, int]]:
95
+ """
96
+ Run a query to get facet counts for one or more columns.
97
+
98
+ This function takes a database connection, a Query object, and a list of column names.
99
+ It generates and executes a facet count query for each specified column and returns
100
+ the results as a dictionary where the keys are the column names and the values are
101
+ pandas DataFrames containing the facet counts.
102
+
103
+ The facet count query is generated by modifying the original query's WHERE clause
104
+ to exclude conditions directly related to the facet column. This allows for counting
105
+ the occurrences of each unique value in the facet column while still applying the
106
+ other filtering conditions.
107
+
108
+ :param con: A DuckDB database connection.
109
+ :param query: A Query object representing the base query.
110
+ :param facet_columns: A list of column names to get facet counts for.
111
+ :return: A dictionary where keys are column names and values are pandas DataFrames
112
+ containing the facet counts for each unique value in the respective column.
113
+ """
114
+ raise NotImplementedError
115
+
116
+ def get(self, ids: Optional[IDENTIFIER], **kwargs) -> QueryResult:
117
+ id_field = self.identifier_field
118
+ q = self._create_query(where_clause={id_field: ids})
119
+ return self.query(q, **kwargs)
120
+
121
+ def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
122
+ query = self._create_query(where_clause=where)
123
+ return self.query(query, **kwargs)
124
+
125
+ def search(
126
+ self,
127
+ query: str,
128
+ where: Optional[Any] = None,
129
+ index_name: Optional[str] = None,
130
+ limit: Optional[int] = None,
131
+ **kwargs,
132
+ ) -> QueryResult:
133
+ """
134
+ Search the collection using a full-text search index.
135
+
136
+ :param query:
137
+ :param where:
138
+ :param index_name:
139
+ :param limit:
140
+ :param kwargs:
141
+ :return:
142
+ """
143
+ if index_name is None:
144
+ if len(self._indexes) == 1:
145
+ index_name = list(self._indexes.keys())[0]
146
+ else:
147
+ raise ValueError("Multiple indexes found. Please specify an index name.")
148
+ ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
149
+ ix = self._indexes.get(index_name)
150
+ if not ix:
151
+ raise ValueError(f"No index named {index_name}")
152
+ qr = ix_coll.find(where=where, limit=-1, **kwargs)
153
+ index_col = ix.index_field
154
+ vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
155
+ results = ix.search(query, vector_pairs, limit=limit)
156
+ new_qr = QueryResult(num_rows=len(results))
157
+ new_qr.ranked_rows = results
158
+ return new_qr
159
+
160
+ def attach_index(self, index: Index, auto_index=True, **kwargs):
161
+ """
162
+ Attach an index to the collection.
163
+
164
+ :param index:
165
+ :param auto_index:
166
+ :param kwargs:
167
+ :return:
168
+ """
169
+ index_name = index.name
170
+ if not index_name:
171
+ raise ValueError("Index must have a name")
172
+ if not self._indexes:
173
+ self._indexes = {}
174
+ self._indexes[index_name] = index
175
+ if auto_index:
176
+ all_objs = self.find(limit=-1).rows
177
+ self.index_objects(all_objs, index_name, **kwargs)
178
+
179
+ def _index_collection_name(self, index_name: str) -> str:
180
+ return f"index__{self.name}_{index_name}"
181
+
182
+ def index_objects(self, objs: List[OBJECT], index_name: str, **kwargs):
183
+ """
184
+ Index a list of objects
185
+
186
+ :param objs:
187
+ :param index_name:
188
+ :param kwargs:
189
+ :return:
190
+ """
191
+ ix = self._indexes.get(index_name)
192
+ if not ix:
193
+ raise ValueError(f"No index named {index_name}")
194
+ ix_coll = self.parent.get_collection(self._index_collection_name(index_name), create_if_not_exists=True)
195
+ vectors = [list(float(e) for e in v) for v in ix.objects_to_vectors(objs)]
196
+ objects_with_ix = []
197
+ index_col = ix.index_field
198
+ for obj, vector in zip(objs, vectors):
199
+ # TODO: id field
200
+ objects_with_ix.append({**obj, **{index_col: vector}})
201
+ ix_coll.add(objects_with_ix, **kwargs)
202
+
203
+ def peek(self, limit: Optional[int] = None) -> QueryResult:
204
+ q = self._create_query()
205
+ return self.query(q, limit=limit)
206
+
207
+ def class_definition(self) -> Optional[ClassDefinition]:
208
+ """
209
+ Return the class definition for the collection.
210
+
211
+ :return:
212
+ """
213
+ sv = self.parent.schema_view
214
+ if sv:
215
+ return sv.get_class(self.name)
216
+ return None
217
+
218
+ def identifier_attribute_name(self) -> Optional[str]:
219
+ """
220
+ Return the name of the identifier attribute for the collection.
221
+
222
+ :return: The name of the identifier attribute, if one exists.
223
+ """
224
+ cd = self.class_definition()
225
+ if cd:
226
+ for att in cd.attributes.values():
227
+ if att.identifier:
228
+ return att.name
229
+ return None
230
+
231
+ def induce_class_definition_from_objects(self, objs: List[OBJECT], max_sample_size=10) -> ClassDefinition:
232
+ """
233
+ Induce a class definition from a list of objects.
234
+
235
+ This uses a heuristic procedure to infer the class definition from a list of objects.
236
+ In general it is recommended you explicitly provide a schema.
237
+
238
+ :param objs:
239
+ :param max_sample_size:
240
+ :return:
241
+ """
242
+ cd = ClassDefinition(self.name)
243
+ keys = defaultdict(list)
244
+ for obj in objs[0:max_sample_size]:
245
+ if isinstance(obj, BaseModel):
246
+ obj = obj.model_dump()
247
+ if not isinstance(obj, dict):
248
+ logger.warning(f"Skipping non-dict object: {obj}")
249
+ continue
250
+ for k, v in obj.items():
251
+ keys[k].append(v)
252
+ for k, vs in keys.items():
253
+ multivalueds = []
254
+ inlineds = []
255
+ rngs = []
256
+ exact_dimensions_list = []
257
+ for v in vs:
258
+ if v is None:
259
+ continue
260
+ if isinstance(v, np.ndarray):
261
+ rngs.append("float")
262
+ exact_dimensions_list.append(v.shape)
263
+ break
264
+ if isinstance(v, list):
265
+ v = v[0]
266
+ multivalueds.append(True)
267
+ elif isinstance(v, dict):
268
+ v = list(v.values())[0]
269
+ multivalueds.append(True)
270
+ else:
271
+ multivalueds.append(False)
272
+ if not v:
273
+ continue
274
+ if isinstance(v, str):
275
+ rng = "string"
276
+ elif isinstance(v, bool):
277
+ rng = "boolean"
278
+ elif isinstance(v, int):
279
+ rng = "integer"
280
+ elif isinstance(v, float):
281
+ rng = "float"
282
+ elif isinstance(v, dict):
283
+ rng = None
284
+ inlineds.append(True)
285
+ else:
286
+ # raise ValueError(f"No mappings for {type(v)} // v={v}")
287
+ rng = None
288
+ inlineds.append(False)
289
+ rngs.append(rng)
290
+ multivalued = any(multivalueds)
291
+ inlined = any(inlineds)
292
+ if multivalued and False in multivalueds:
293
+ raise ValueError(f"Mixed list non list: {vs} // inferred= {multivalueds}")
294
+ # if not rngs:
295
+ # raise AssertionError(f"Empty rngs for {k} = {vs}")
296
+ rng = rngs[0] if rngs else None
297
+ for other_rng in rngs:
298
+ if rng != other_rng:
299
+ raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
300
+ cd.attributes[k] = SlotDefinition(k, range=rng, multivalued=multivalued, inlined=inlined)
301
+ if exact_dimensions_list:
302
+ array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
303
+ cd.attributes[k].array = array_expr
304
+ sv = self.parent.schema_view
305
+ sv.schema.classes[self.name] = cd
306
+ sv.set_modified()
307
+ return cd
308
+
309
+ def import_data(self, location: Union[Path, str, TextIO], **kwargs):
310
+ """
311
+ Import data from a file or stream
312
+
313
+ :param location:
314
+ :param kwargs:
315
+ :return:
316
+ """
317
+ raise NotImplementedError
318
+
319
+ def export_data(self, location: Union[Path, str, TextIO], **kwargs):
320
+ """
321
+ Export data to a file or stream
322
+
323
+ :param location:
324
+ :param kwargs:
325
+ :return:
326
+ """
327
+ raise NotImplementedError