linkml-store 0.0.0__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (37) hide show
  1. linkml_store/api/__init__.py +2 -2
  2. linkml_store/api/client.py +113 -8
  3. linkml_store/api/collection.py +272 -34
  4. linkml_store/api/config.py +101 -0
  5. linkml_store/api/database.py +282 -18
  6. linkml_store/api/queries.py +12 -1
  7. linkml_store/api/stores/chromadb/__init__.py +3 -0
  8. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  9. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  10. linkml_store/api/stores/duckdb/__init__.py +7 -0
  11. linkml_store/api/stores/duckdb/duckdb_collection.py +47 -14
  12. linkml_store/api/stores/duckdb/duckdb_database.py +38 -47
  13. linkml_store/api/stores/hdf5/__init__.py +0 -0
  14. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  15. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  16. linkml_store/api/stores/mongodb/mongodb_collection.py +92 -40
  17. linkml_store/api/stores/mongodb/mongodb_database.py +58 -67
  18. linkml_store/api/stores/solr/__init__.py +3 -0
  19. linkml_store/api/stores/solr/solr_collection.py +133 -0
  20. linkml_store/api/stores/solr/solr_database.py +83 -0
  21. linkml_store/api/stores/solr/solr_utils.py +0 -0
  22. linkml_store/cli.py +369 -0
  23. linkml_store/index/__init__.py +33 -0
  24. linkml_store/index/implementations/{llm_index.py → llm_indexer.py} +2 -2
  25. linkml_store/index/implementations/{simple_index.py → simple_indexer.py} +6 -3
  26. linkml_store/index/{index.py → indexer.py} +7 -4
  27. linkml_store/utils/format_utils.py +93 -0
  28. linkml_store/utils/object_utils.py +81 -0
  29. linkml_store/utils/sql_utils.py +46 -7
  30. {linkml_store-0.0.0.dist-info → linkml_store-0.1.7.dist-info}/METADATA +17 -6
  31. linkml_store-0.1.7.dist-info/RECORD +42 -0
  32. linkml_store-0.1.7.dist-info/entry_points.txt +3 -0
  33. linkml_store/api/metadata.py +0 -5
  34. linkml_store-0.0.0.dist-info/RECORD +0 -29
  35. linkml_store-0.0.0.dist-info/entry_points.txt +0 -3
  36. {linkml_store-0.0.0.dist-info → linkml_store-0.1.7.dist-info}/LICENSE +0 -0
  37. {linkml_store-0.0.0.dist-info → linkml_store-0.1.7.dist-info}/WHEEL +0 -0
linkml_store/cli.py ADDED
@@ -0,0 +1,369 @@
1
+ import logging
2
+ import sys
3
+ import warnings
4
+ from typing import Optional
5
+
6
+ import click
7
+ import yaml
8
+ from linkml_runtime.dumpers import json_dumper
9
+ from pydantic import BaseModel
10
+
11
+ from linkml_store import Client
12
+ from linkml_store.api import Collection, Database
13
+ from linkml_store.api.queries import Query
14
+ from linkml_store.index.implementations.simple_indexer import SimpleIndexer
15
+ from linkml_store.index.indexer import Indexer
16
+ from linkml_store.utils.format_utils import Format, load_objects, render_output
17
+ from linkml_store.utils.object_utils import object_path_update
18
+
19
+ index_type_option = click.option("--index-type", "-t")
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ warnings.filterwarnings("ignore", module="duckdb_engine")
24
+
25
+
26
+ class ContextSettings(BaseModel):
27
+ """
28
+ Context object for CLI commands.
29
+ """
30
+
31
+ client: Client
32
+ database_name: Optional[str] = None
33
+ collection_name: Optional[str] = None
34
+
35
+ @property
36
+ def database(self) -> Optional[Database]:
37
+ """
38
+ Get the database object.
39
+ :return:
40
+ """
41
+ name = self.database_name
42
+ if name is None:
43
+ # if len(self.client.databases) > 1:
44
+ # raise ValueError("Database must be specified if there are multiple databases.")
45
+ if not self.client.databases:
46
+ return None
47
+ name = list(self.client.databases.keys())[0]
48
+ return self.client.get_database(name)
49
+
50
+ @property
51
+ def collection(self) -> Optional[Collection]:
52
+ """
53
+ Get the collection object.
54
+ :return:
55
+ """
56
+ name = self.collection_name
57
+ if name is None:
58
+ # if len(self.database.list_collections()) > 1:
59
+ # raise ValueError("Collection must be specified if there are multiple collections.")
60
+ if not self.database.list_collections():
61
+ return None
62
+ name = list(self.database.list_collections())[0]
63
+ return self.database.get_collection(name)
64
+
65
+ class Config:
66
+ arbitrary_types_allowed = True
67
+
68
+
69
+ # format_choice = click.Choice(["json", "yaml", "tsv"])
70
+ format_choice = click.Choice([f.value for f in Format])
71
+
72
+
73
+ @click.group()
74
+ @click.option("--database", "-d", help="Database name")
75
+ @click.option("--collection", "-c", help="Collection name")
76
+ @click.option("--config", "-C", type=click.Path(exists=True), help="Path to the configuration file")
77
+ @click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
78
+ @click.option("-v", "--verbose", count=True)
79
+ @click.option("-q", "--quiet/--no-quiet")
80
+ @click.option(
81
+ "--stacktrace/--no-stacktrace",
82
+ default=False,
83
+ show_default=True,
84
+ help="If set then show full stacktrace on error",
85
+ )
86
+ @click.pass_context
87
+ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set):
88
+ """A CLI for interacting with the linkml-store."""
89
+ if not stacktrace:
90
+ sys.tracebacklimit = 0
91
+ logger = logging.getLogger()
92
+ if verbose >= 2:
93
+ logger.setLevel(logging.DEBUG)
94
+ elif verbose == 1:
95
+ logger.setLevel(logging.INFO)
96
+ else:
97
+ logger.setLevel(logging.WARNING)
98
+ if quiet:
99
+ logger.setLevel(logging.ERROR)
100
+ ctx.ensure_object(dict)
101
+ client = Client().from_config(config) if config else Client()
102
+ settings = ContextSettings(client=client, database_name=database, collection_name=collection)
103
+ ctx.obj["settings"] = settings
104
+ # DEPRECATED
105
+ ctx.obj["client"] = client
106
+ ctx.obj["database"] = database
107
+ ctx.obj["collection"] = collection
108
+ if settings.database_name:
109
+ db = client.get_database(database)
110
+ if set:
111
+ for expr in set:
112
+ if "=" not in expr:
113
+ raise ValueError(f"Expression must be of form PARAM=VALUE. Got: {expr}")
114
+ path, val = expr.split("=", 1)
115
+ val = yaml.safe_load(val)
116
+ logger.info(f"Setting {path} to {val}")
117
+ db.metadata = object_path_update(db.metadata, path, val)
118
+ # settings.database = db
119
+ # DEPRECATED
120
+ ctx.obj["database_obj"] = db
121
+ if collection:
122
+ collection_obj = db.get_collection(collection)
123
+ ctx.obj["collection_obj"] = collection_obj
124
+ if not settings.database_name:
125
+ # if len(client.databases) != 1:
126
+ # raise ValueError("Database must be specified if there are multiple databases.")
127
+ if client.databases:
128
+ settings.database_name = list(client.databases.keys())[0]
129
+ if not settings.collection_name:
130
+ # if len(settings.database.list_collections()) != 1:
131
+ # raise ValueError("Collection must be specified if there are multiple collections.")
132
+ if settings.database and settings.database.list_collections():
133
+ collection = settings.database.list_collections()[0]
134
+ settings.collection_name = collection.name
135
+
136
+
137
+ @cli.command()
138
+ @click.argument("files", type=click.Path(exists=True), nargs=-1)
139
+ @click.option("--format", "-f", type=format_choice, help="Input format")
140
+ @click.option("--object", "-i", multiple=True, help="Input object as YAML")
141
+ @click.pass_context
142
+ def insert(ctx, files, object, format):
143
+ """Insert objects from files (JSON, YAML, TSV) into the specified collection."""
144
+ settings = ctx.obj["settings"]
145
+ collection = settings.collection
146
+ if not collection:
147
+ raise ValueError("Collection must be specified.")
148
+ objects = []
149
+ if not files and not object:
150
+ files = ["-"]
151
+ for file_path in files:
152
+ if format:
153
+ objects = load_objects(file_path, format=format)
154
+ else:
155
+ objects = load_objects(file_path)
156
+ logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.name}'.")
157
+ collection.insert(objects)
158
+ click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.name}'.")
159
+ if object:
160
+ for object_str in object:
161
+ logger.info(f"Parsing: {object_str}")
162
+ objects = yaml.safe_load(object_str)
163
+ collection.insert(objects)
164
+ click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.name}'.")
165
+
166
+
167
+ @cli.command()
168
+ @click.argument("files", type=click.Path(exists=True), nargs=-1)
169
+ @click.option("--format", "-f", type=format_choice, help="Input format")
170
+ @click.option("--object", "-i", multiple=True, help="Input object as YAML")
171
+ @click.pass_context
172
+ def store(ctx, files, object, format):
173
+ """Store objects from files (JSON, YAML, TSV) into the specified collection."""
174
+ settings = ctx.obj["settings"]
175
+ db = settings.database
176
+ if not files and not object:
177
+ files = ["-"]
178
+ for file_path in files:
179
+ if format:
180
+ objects = load_objects(file_path, format=format)
181
+ else:
182
+ objects = load_objects(file_path)
183
+ logger.info(f"Inserting {len(objects)} objects from {file_path} into database '{db}'.")
184
+ for obj in objects:
185
+ db.store(obj)
186
+ click.echo(f"Inserted {len(objects)} objects from {file_path} into database '{db}'.")
187
+ if object:
188
+ for object_str in object:
189
+ logger.info(f"Parsing: {object_str}")
190
+ objects = yaml.safe_load(object_str)
191
+ for obj in objects:
192
+ db.store(obj)
193
+ click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{db.name}'.")
194
+
195
+
196
+ @cli.command()
197
+ @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
198
+ @click.option("--limit", "-l", type=click.INT, help="Maximum number of results to return")
199
+ @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
200
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
201
+ @click.pass_context
202
+ def query(ctx, where, limit, output_type, output):
203
+ """Query objects from the specified collection."""
204
+ collection = ctx.obj["settings"].collection
205
+ where_clause = yaml.safe_load(where) if where else None
206
+ query = Query(from_table=collection.name, where_clause=where_clause, limit=limit)
207
+ result = collection.query(query)
208
+ output_data = render_output(result.rows, output_type)
209
+ if output:
210
+ with open(output, "w") as f:
211
+ f.write(output_data)
212
+ click.echo(f"Query results saved to {output}")
213
+ else:
214
+ click.echo(output_data)
215
+
216
+
217
+ @cli.command()
218
+ @click.pass_context
219
+ def list_collections(ctx):
220
+ db = ctx.obj["settings"].database
221
+ for collection in db.list_collections():
222
+ click.echo(collection.name)
223
+ click.echo(render_output(collection.metadata))
224
+
225
+
226
+ @cli.command()
227
+ @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
228
+ @click.option("--limit", "-l", type=click.INT, help="Maximum number of results to return")
229
+ @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
230
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
231
+ @click.option("--columns", "-S", help="Columns to facet on")
232
+ @click.pass_context
233
+ def fq(ctx, where, limit, columns, output_type, output):
234
+ """
235
+ Query facets from the specified collection.
236
+
237
+ :param ctx:
238
+ :param where:
239
+ :param limit:
240
+ :param columns:
241
+ :param output_type:
242
+ :param output:
243
+ :return:
244
+ """
245
+ collection = ctx.obj["settings"].collection
246
+ where_clause = yaml.safe_load(where) if where else None
247
+ columns = columns.split(",") if columns else None
248
+ if columns:
249
+ columns = [col.strip() for col in columns]
250
+ columns = [(tuple(col.split("+")) if "+" in col else col) for col in columns]
251
+ logger.info(f"Faceting on columns: {columns}")
252
+ results = collection.query_facets(where_clause, facet_columns=columns, limit=limit)
253
+ logger.info(f"Facet results: {results}")
254
+
255
+ def _untuple(key):
256
+ if isinstance(key, tuple):
257
+ return "+".join(key)
258
+ return key
259
+
260
+ count_dict = {}
261
+ for key, value in results.items():
262
+ value_as_dict = {_untuple(v[0:-1]): v[-1] for v in value}
263
+ count_dict[_untuple(key)] = value_as_dict
264
+ output_data = render_output(count_dict, output_type)
265
+ if output:
266
+ with open(output, "w") as f:
267
+ f.write(output_data)
268
+ click.echo(f"Query results saved to {output}")
269
+ else:
270
+ click.echo(output_data)
271
+
272
+
273
+ def _get_index(index_type=None, **kwargs) -> Indexer:
274
+ if index_type is None or index_type == "simple":
275
+ return SimpleIndexer(name="test", **kwargs)
276
+ else:
277
+ raise ValueError(f"Unknown index type: {index_type}")
278
+
279
+
280
+ @cli.command()
281
+ @index_type_option
282
+ @click.pass_context
283
+ def index(ctx, index_type):
284
+ """
285
+ Create an index over a collection.
286
+
287
+ :param ctx:
288
+ :param index_type:
289
+ :return:
290
+ """
291
+ collection = ctx.obj["settings"].collection
292
+ ix = _get_index(index_type)
293
+ collection.attach_indexer(ix)
294
+
295
+
296
+ @cli.command()
297
+ @click.pass_context
298
+ @click.option("--output-type", "-O", type=format_choice, default="yaml", help="Output format")
299
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
300
+ def schema(ctx, output_type, output):
301
+ """
302
+ Show the schema for a database
303
+
304
+ :param ctx:
305
+ :param index_type:
306
+ :return:
307
+ """
308
+ db = ctx.obj["settings"].database
309
+ schema_dict = json_dumper.to_dict(db.schema_view.schema)
310
+ output_data = render_output(schema_dict, output_type)
311
+ if output:
312
+ with open(output, "w") as f:
313
+ f.write(output_data)
314
+ click.echo(f"Schema saved to {output}")
315
+ else:
316
+ click.echo(output_data)
317
+
318
+
319
+ @cli.command()
320
+ @click.argument("search_term")
321
+ @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the search")
322
+ @click.option("--limit", "-l", type=click.INT, help="Maximum number of search results")
323
+ @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
324
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
325
+ @index_type_option
326
+ @click.pass_context
327
+ def search(ctx, search_term, where, limit, index_type, output_type, output):
328
+ """Search objects in the specified collection."""
329
+ collection = ctx.obj["settings"].collection
330
+ ix = _get_index(index_type)
331
+ logger.info(f"Attaching index to collection {collection.name}: {ix.model_dump()}")
332
+ collection.attach_indexer(ix, auto_index=False)
333
+ result = collection.search(search_term, where=where, limit=limit)
334
+ output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
335
+ if output:
336
+ with open(output, "w") as f:
337
+ f.write(output_data)
338
+ click.echo(f"Search results saved to {output}")
339
+ else:
340
+ click.echo(output_data)
341
+
342
+
343
+ @cli.command()
344
+ @click.pass_context
345
+ def indexes(ctx):
346
+ collection = ctx.obj["settings"].collection
347
+ for name, ix in collection.indexers.items():
348
+ click.echo(f"{name}: {type(ix)}\n{ix.model_json()}")
349
+
350
+
351
+ @cli.command()
352
+ @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
353
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
354
+ @click.pass_context
355
+ def validate(ctx, output_type, output):
356
+ """Validate objects in the specified collection."""
357
+ collection = ctx.obj["settings"].collection
358
+ validation_results = [json_dumper.to_dict(x) for x in collection.iter_validate_collection()]
359
+ output_data = render_output(validation_results, output_type)
360
+ if output:
361
+ with open(output, "w") as f:
362
+ f.write(output_data)
363
+ click.echo(f"Validation results saved to {output}")
364
+ else:
365
+ click.echo(output_data)
366
+
367
+
368
+ if __name__ == "__main__":
369
+ cli()
@@ -0,0 +1,33 @@
1
+ from typing import Type
2
+
3
+ from linkml_store.index.implementations.llm_indexer import LLMIndexer
4
+ from linkml_store.index.implementations.simple_indexer import SimpleIndexer
5
+ from linkml_store.index.indexer import Indexer
6
+
7
+ INDEXER_CLASSES = {
8
+ "simple": SimpleIndexer,
9
+ "llm": LLMIndexer,
10
+ }
11
+
12
+
13
+ def get_indexer_class(name: str) -> Type[Indexer]:
14
+ """
15
+ Get an indexer class by name.
16
+
17
+ :param name: the name of the indexer
18
+ :return: the indexer class
19
+ """
20
+ if name not in INDEXER_CLASSES:
21
+ raise ValueError(f"Unknown indexer class: {name}")
22
+ return INDEXER_CLASSES[name]
23
+
24
+
25
+ def get_indexer(name: str, *args, **kwargs) -> Indexer:
26
+ """
27
+ Get an indexer by name.
28
+
29
+ :param name: the name of the indexer
30
+ :param kwargs: additional arguments to pass to the indexer
31
+ :return: the indexer
32
+ """
33
+ return get_indexer_class(name)(*args, **kwargs)
@@ -2,13 +2,13 @@ from typing import TYPE_CHECKING, List
2
2
 
3
3
  import numpy as np
4
4
 
5
- from linkml_store.index.index import INDEX_ITEM, Index
5
+ from linkml_store.index.indexer import INDEX_ITEM, Indexer
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  import llm
9
9
 
10
10
 
11
- class LLMIndex(Index):
11
+ class LLMIndexer(Indexer):
12
12
  """
13
13
  A implementations index wraps the llm library
14
14
  """
@@ -1,11 +1,14 @@
1
1
  import hashlib
2
+ import logging
2
3
 
3
4
  import numpy as np
4
5
 
5
- from linkml_store.index.index import INDEX_ITEM, Index
6
+ from linkml_store.index.indexer import INDEX_ITEM, Indexer
6
7
 
8
+ logger = logging.getLogger(__name__)
7
9
 
8
- class SimpleIndex(Index):
10
+
11
+ class SimpleIndexer(Indexer):
9
12
  """
10
13
  A implementations index that uses a hash function to generate an index from text.
11
14
 
@@ -36,5 +39,5 @@ class SimpleIndex(Index):
36
39
 
37
40
  # Increment the count at the computed index
38
41
  vector[index] += 1.0
39
-
42
+ logger.info(f"Indexed text: {text} as {vector}")
40
43
  return vector
@@ -13,7 +13,7 @@ def cosine_similarity(vector1, vector2):
13
13
  return dot_product / (norm1 * norm2)
14
14
 
15
15
 
16
- class Index(BaseModel):
16
+ class Indexer(BaseModel):
17
17
  """
18
18
  An index operates on a collection in order to search for objects.
19
19
  """
@@ -65,7 +65,10 @@ class Index(BaseModel):
65
65
 
66
66
  def object_to_text(self, obj: Dict[str, Any]) -> str:
67
67
  """
68
- Create a text from an object suitable for indexing.
68
+ Convert an object to a text representation
69
+
70
+ :param obj:
71
+ :return:
69
72
  """
70
73
  if self.index_attributes:
71
74
  obj = {k: v for k, v in obj.items() if k in self.index_attributes}
@@ -77,14 +80,14 @@ class Index(BaseModel):
77
80
 
78
81
  def search(
79
82
  self, query: str, vectors: List[Tuple[str, INDEX_ITEM]], limit: Optional[int] = None
80
- ) -> List[Tuple[float, str]]:
83
+ ) -> List[Tuple[float, Any]]:
81
84
  """
82
85
  Search the index for a query string
83
86
 
84
87
  :param query: The query string to search for
85
88
  :param vectors: A list of indexed items, where each item is a tuple of (id, vector)
86
89
  :param limit: The maximum number of results to return (optional)
87
- :return: A list of item IDs that match the query
90
+ :return: A list of item IDs or objects that match the query
88
91
  """
89
92
 
90
93
  # Convert the query string to a vector
@@ -0,0 +1,93 @@
1
+ import csv
2
+ import json
3
+ import sys
4
+ from enum import Enum
5
+ from io import StringIO
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Union
8
+
9
+ import yaml
10
+ from pydantic import BaseModel
11
+
12
+
13
+ class Format(Enum):
14
+ JSON = "json"
15
+ JSONL = "jsonl"
16
+ YAML = "yaml"
17
+ TSV = "tsv"
18
+ CSV = "csv"
19
+
20
+
21
+ def load_objects(file_path: Union[str, Path], format: Union[Format, str] = None) -> List[Dict[str, Any]]:
22
+ """
23
+ Load objects from a file in JSON, JSONLines, YAML, CSV, or TSV format.
24
+
25
+ :param file_path: The path to the file.
26
+ :param format: The format of the file. Can be a Format enum or a string value.
27
+ :return: A list of dictionaries representing the loaded objects.
28
+ """
29
+ if isinstance(format, str):
30
+ format = Format(format)
31
+
32
+ if isinstance(file_path, Path):
33
+ file_path = str(file_path)
34
+
35
+ if file_path == "-":
36
+ # set file_path to be a stream from stdin
37
+ f = sys.stdin
38
+ else:
39
+ f = open(file_path)
40
+
41
+ if format == Format.JSON or (not format and file_path.endswith(".json")):
42
+ objs = json.load(f)
43
+ elif format == Format.JSONL or (not format and file_path.endswith(".jsonl")):
44
+ objs = [json.loads(line) for line in f]
45
+ elif format == Format.YAML or (not format and (file_path.endswith(".yaml") or file_path.endswith(".yml"))):
46
+ objs = yaml.safe_load(f)
47
+ elif format == Format.TSV or (not format and file_path.endswith(".tsv")):
48
+ reader = csv.DictReader(f, delimiter="\t")
49
+ objs = list(reader)
50
+ elif format == Format.CSV or (not format and file_path.endswith(".csv")):
51
+ reader = csv.DictReader(f)
52
+ objs = list(reader)
53
+ else:
54
+ raise ValueError(f"Unsupported file format: {file_path}")
55
+ if not isinstance(objs, list):
56
+ objs = [objs]
57
+ return objs
58
+
59
+
60
+ def render_output(data: List[Dict[str, Any]], format: Union[Format, str] = Format.YAML) -> str:
61
+ """
62
+ Render output data in JSON, JSONLines, YAML, CSV, or TSV format.
63
+
64
+ :param data: The data to be rendered.
65
+ :param format: The desired output format. Can be a Format enum or a string value.
66
+ :return: The rendered output as a string.
67
+ """
68
+ if isinstance(format, str):
69
+ format = Format(format)
70
+
71
+ if isinstance(data, BaseModel):
72
+ data = data.model_dump()
73
+
74
+ if format == Format.JSON:
75
+ return json.dumps(data, indent=2, default=str)
76
+ elif format == Format.JSONL:
77
+ return "\n".join(json.dumps(obj) for obj in data)
78
+ elif format == Format.YAML:
79
+ return yaml.safe_dump(data, sort_keys=False)
80
+ elif format == Format.TSV:
81
+ output = StringIO()
82
+ writer = csv.DictWriter(output, fieldnames=data[0].keys(), delimiter="\t")
83
+ writer.writeheader()
84
+ writer.writerows(data)
85
+ return output.getvalue()
86
+ elif format == Format.CSV:
87
+ output = StringIO()
88
+ writer = csv.DictWriter(output, fieldnames=data[0].keys())
89
+ writer.writeheader()
90
+ writer.writerows(data)
91
+ return output.getvalue()
92
+ else:
93
+ raise ValueError(f"Unsupported output format: {format}")
@@ -0,0 +1,81 @@
1
+ import json
2
+ from copy import deepcopy
3
+ from typing import Any, Dict, List, Union
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ def object_path_update(
9
+ obj: Union[BaseModel, Dict[str, Any]], path: str, value: Any
10
+ ) -> Union[BaseModel, Dict[str, Any]]:
11
+ """
12
+ Updates a nested object based on a path description and a value. The path to the
13
+ desired field is given in dot and bracket notation (e.g., 'a[0].b.c[1]').
14
+
15
+ :param obj: The dictionary object to be updated.
16
+ :type obj: Dict[str, Any]
17
+ :param path: The path string indicating where to place the value within the object.
18
+ :type path: str
19
+ :param value: The value to be set at the specified path.
20
+ :type value: Any
21
+ :return: None. This function modifies the object in-place.
22
+ :rtype: None
23
+
24
+ **Example**::
25
+
26
+ >>> data = {}
27
+ >>> object_path_update(data, 'persons[0].foo.bar', 1)
28
+ {'persons': [{'foo': {'bar': 1}}]}
29
+ """
30
+ if isinstance(obj, BaseModel):
31
+ typ = type(obj)
32
+ obj = obj.dict()
33
+ obj = object_path_update(obj, path, value)
34
+ return typ(**obj)
35
+ obj = deepcopy(obj)
36
+ ret_obj = obj
37
+ parts = path.split(".")
38
+ for part in parts[:-1]:
39
+ if "[" in part:
40
+ key, index = part[:-1].split("[")
41
+ index = int(index)
42
+ # obj = obj.setdefault(key, [{} for _ in range(index+1)])
43
+ obj = obj.setdefault(key, [])
44
+ while len(obj) <= index:
45
+ obj.append({})
46
+ obj = obj[index]
47
+ else:
48
+ obj = obj.setdefault(part, {})
49
+ last_part = parts[-1]
50
+ if "[" in last_part:
51
+ key, index = last_part[:-1].split("[")
52
+ index = int(index)
53
+ if key not in obj or not isinstance(obj[key], list):
54
+ obj[key] = [{} for _ in range(index + 1)]
55
+ obj[key][index] = value
56
+ else:
57
+ obj[last_part] = value
58
+ return ret_obj
59
+
60
+
61
+ def parse_update_expression(expr: str) -> Union[tuple[str, Any], None]:
62
+ """
63
+ Parse a string expression of the form 'path.to.field=value' into a path and a value.
64
+
65
+ :param expr:
66
+ :return:
67
+ """
68
+ try:
69
+ path, val = expr.split("=", 1)
70
+ val = json.loads(val)
71
+ except ValueError:
72
+ return None
73
+ return path, val
74
+
75
+
76
+ def clean_empties(value: Union[Dict, List]) -> Any:
77
+ if isinstance(value, dict):
78
+ value = {k: v for k, v in ((k, clean_empties(v)) for k, v in value.items()) if v is not None}
79
+ elif isinstance(value, list):
80
+ value = [v for v in (clean_empties(v) for v in value) if v is not None]
81
+ return value