linkml-store 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (35) hide show
  1. linkml_store/api/client.py +15 -4
  2. linkml_store/api/collection.py +185 -15
  3. linkml_store/api/config.py +11 -3
  4. linkml_store/api/database.py +36 -5
  5. linkml_store/api/stores/duckdb/duckdb_collection.py +6 -3
  6. linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
  7. linkml_store/api/stores/filesystem/__init__.py +7 -8
  8. linkml_store/api/stores/filesystem/filesystem_collection.py +150 -113
  9. linkml_store/api/stores/filesystem/filesystem_database.py +57 -21
  10. linkml_store/api/stores/mongodb/mongodb_collection.py +82 -34
  11. linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
  12. linkml_store/api/types.py +4 -0
  13. linkml_store/cli.py +97 -8
  14. linkml_store/index/__init__.py +5 -3
  15. linkml_store/index/indexer.py +7 -2
  16. linkml_store/utils/change_utils.py +17 -0
  17. linkml_store/utils/format_utils.py +89 -8
  18. linkml_store/utils/patch_utils.py +126 -0
  19. linkml_store/utils/query_utils.py +89 -0
  20. linkml_store/utils/schema_utils.py +23 -0
  21. linkml_store/webapi/__init__.py +0 -0
  22. linkml_store/webapi/html/__init__.py +3 -0
  23. linkml_store/webapi/html/base.html.j2 +24 -0
  24. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  25. linkml_store/webapi/html/database_details.html.j2 +16 -0
  26. linkml_store/webapi/html/databases.html.j2 +14 -0
  27. linkml_store/webapi/html/generic.html.j2 +46 -0
  28. linkml_store/webapi/main.py +572 -0
  29. linkml_store-0.1.10.dist-info/METADATA +138 -0
  30. linkml_store-0.1.10.dist-info/RECORD +58 -0
  31. {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/entry_points.txt +1 -0
  32. linkml_store-0.1.8.dist-info/METADATA +0 -58
  33. linkml_store-0.1.8.dist-info/RECORD +0 -45
  34. {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/LICENSE +0 -0
  35. {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/WHEEL +0 -0
linkml_store/cli.py CHANGED
@@ -14,7 +14,7 @@ from linkml_store.api.queries import Query
14
14
  from linkml_store.index import get_indexer
15
15
  from linkml_store.index.implementations.simple_indexer import SimpleIndexer
16
16
  from linkml_store.index.indexer import Indexer
17
- from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output
17
+ from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output, write_output
18
18
  from linkml_store.utils.object_utils import object_path_update
19
19
 
20
20
  index_type_option = click.option(
@@ -159,7 +159,15 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
159
159
  @click.option("--object", "-i", multiple=True, help="Input object as YAML")
160
160
  @click.pass_context
161
161
  def insert(ctx, files, object, format):
162
- """Insert objects from files (JSON, YAML, TSV) into the specified collection."""
162
+ """Insert objects from files (JSON, YAML, TSV) into the specified collection.
163
+
164
+ Using a configuration:
165
+
166
+ linkml-store -C config.yaml -c genes insert data/genes/*.json
167
+
168
+ Note: if you don't provide a schema this will be inferred, but it is
169
+ usually better to provide an explicit schema
170
+ """
163
171
  settings = ctx.obj["settings"]
164
172
  collection = settings.collection
165
173
  if not collection:
@@ -181,6 +189,7 @@ def insert(ctx, files, object, format):
181
189
  objects = yaml.safe_load(object_str)
182
190
  collection.insert(objects)
183
191
  click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.name}'.")
192
+ collection.commit()
184
193
 
185
194
 
186
195
  @cli.command()
@@ -213,9 +222,9 @@ def store(ctx, files, object, format):
213
222
 
214
223
 
215
224
  @cli.command(name="import")
216
- @click.argument("files", type=click.Path(exists=True), nargs=-1)
217
225
  @click.option("--format", "-f", help="Input format")
218
226
  @click.pass_context
227
+ @click.argument("files", type=click.Path(exists=True), nargs=-1)
219
228
  def import_database(ctx, files, format):
220
229
  """Imports a database from a dump."""
221
230
  settings = ctx.obj["settings"]
@@ -242,13 +251,77 @@ def export(ctx, output_type, output):
242
251
 
243
252
 
244
253
  @cli.command()
245
- @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
254
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
255
+ @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
256
+ @click.option("--other-database", "-D", required=False, help="Path to the other database")
257
+ @click.option("--other-collection", "-X", required=True, help="Name of the other collection")
258
+ @click.option("--identifier-attribute", "-I", required=False, help="Primary key name")
259
+ @click.pass_context
260
+ def diff(ctx, output, output_type, other_database, other_collection, identifier_attribute):
261
+ """Diffs two collectoons to create a patch."""
262
+ settings = ctx.obj["settings"]
263
+ db = settings.database
264
+ collection = settings.collection
265
+ if not collection:
266
+ raise ValueError("Collection must be specified.")
267
+ other_db = settings.client.get_database(other_database) if other_database else db
268
+ other_collection = other_db.get_collection(other_collection)
269
+ if identifier_attribute:
270
+ collection.set_identifier_attribute_name(identifier_attribute)
271
+ other_collection.set_identifier_attribute_name(identifier_attribute)
272
+ diff = collection.diff(other_collection)
273
+ write_output(diff, output_type, target=output)
274
+
275
+
276
+ @cli.command()
277
+ @click.option("--identifier-attribute", "-I", required=False, help="Primary key name")
278
+ @click.argument("patch_files", type=click.Path(exists=True), nargs=-1)
279
+ @click.pass_context
280
+ def apply(ctx, patch_files, identifier_attribute):
281
+ """
282
+ Apply a patch to a collection.
283
+ """
284
+ settings = ctx.obj["settings"]
285
+ collection = settings.collection
286
+ if not collection:
287
+ raise ValueError("Collection must be specified.")
288
+ if identifier_attribute:
289
+ collection.set_identifier_attribute_name(identifier_attribute)
290
+ for patch_file in patch_files:
291
+ patch_objs = load_objects(patch_file, expected_type=list)
292
+ collection.apply_patches(patch_objs)
293
+
294
+
295
+ @cli.command()
296
+ @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query, as YAML")
246
297
  @click.option("--limit", "-l", type=click.INT, help="Maximum number of results to return")
247
298
  @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
248
299
  @click.option("--output", "-o", type=click.Path(), help="Output file path")
249
300
  @click.pass_context
250
301
  def query(ctx, where, limit, output_type, output):
251
- """Query objects from the specified collection."""
302
+ """Query objects from the specified collection.
303
+
304
+
305
+ Leave the query field blank to return all objects in the collection.
306
+
307
+ Examples:
308
+
309
+ linkml-store -d duckdb:///countries.db -c countries query
310
+
311
+ Queries can be specified in YAML, as basic key-value pairs
312
+
313
+ Examples:
314
+
315
+ linkml-store -d duckdb:///countries.db -c countries query -w 'code: NZ'
316
+
317
+ More complex queries can be specified using MongoDB-style query syntax
318
+
319
+ Examples:
320
+
321
+ linkml-store -d file:. -c persons query -w 'occupation: {$ne: Architect}'
322
+
323
+ Finds all people who are not architects.
324
+ """
252
325
  collection = ctx.obj["settings"].collection
253
326
  where_clause = yaml.safe_load(where) if where else None
254
327
  query = Query(from_table=collection.name, where_clause=where_clause, limit=limit)
@@ -326,6 +399,21 @@ def _get_index(index_type=None, **kwargs) -> Indexer:
326
399
  raise ValueError(f"Unknown index type: {index_type}")
327
400
 
328
401
 
402
+ @cli.command()
403
+ @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
404
+ @click.option("--output-type", "-O", type=format_choice, default=Format.FORMATTED.value, help="Output format")
405
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
406
+ @click.pass_context
407
+ def describe(ctx, where, output_type, output):
408
+ """
409
+ Describe the collection schema.
410
+ """
411
+ where_clause = yaml.safe_load(where) if where else None
412
+ collection = ctx.obj["settings"].collection
413
+ df = collection.find(where_clause, limit=1).rows_dataframe
414
+ write_output(df.describe(include="all").transpose(), output_type, target=output)
415
+
416
+
329
417
  @cli.command()
330
418
  @index_type_option
331
419
  @click.option("--cached-embeddings-database", "-E", help="Path to the database where embeddings are cached")
@@ -335,9 +423,7 @@ def index(ctx, index_type, **kwargs):
335
423
  """
336
424
  Create an index over a collection.
337
425
 
338
- :param ctx:
339
- :param index_type:
340
- :return:
426
+ By default a simple trigram index is used.
341
427
  """
342
428
  collection = ctx.obj["settings"].collection
343
429
  ix = get_indexer(index_type, **kwargs)
@@ -397,6 +483,9 @@ def search(ctx, search_term, where, limit, index_type, output_type, output, auto
397
483
  @cli.command()
398
484
  @click.pass_context
399
485
  def indexes(ctx):
486
+ """
487
+ Show the indexes for a collection.
488
+ """
400
489
  collection = ctx.obj["settings"].collection
401
490
  for name, ix in collection.indexers.items():
402
491
  click.echo(f"{name}: {type(ix)}\n{ix.model_json()}")
@@ -22,7 +22,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
22
22
  return INDEXER_CLASSES[name]
23
23
 
24
24
 
25
- def get_indexer(name: str, **kwargs) -> Indexer:
25
+ def get_indexer(index_type: str, **kwargs) -> Indexer:
26
26
  """
27
27
  Get an indexer by name.
28
28
 
@@ -31,7 +31,9 @@ def get_indexer(name: str, **kwargs) -> Indexer:
31
31
  :return: the indexer
32
32
  """
33
33
  kwargs = {k: v for k, v in kwargs.items() if v is not None}
34
- cls = get_indexer_class(name)
35
- kwargs["name"] = name
34
+ cls = get_indexer_class(index_type)
35
+ kwargs["index_type"] = index_type
36
36
  indexer = cls(**kwargs)
37
+ if not indexer.name:
38
+ indexer.name = index_type
37
39
  return indexer
@@ -28,6 +28,7 @@ class Indexer(BaseModel):
28
28
  """
29
29
 
30
30
  name: Optional[str] = None
31
+ index_type: Optional[str] = None
31
32
  index_function: Optional[Callable] = None
32
33
  distance_function: Optional[Callable] = None
33
34
  index_attributes: Optional[List[str]] = None
@@ -93,13 +94,17 @@ class Indexer(BaseModel):
93
94
  if "{%" in self.text_template or "{{" in self.text_template:
94
95
  logger.info("Detected Jinja2 syntax in text template")
95
96
  syntax = TemplateSyntaxEnum.jinja2
96
- if syntax and syntax == TemplateSyntaxEnum.jinja2:
97
+ if not syntax:
98
+ syntax = TemplateSyntaxEnum.fstring
99
+ if syntax == TemplateSyntaxEnum.jinja2:
97
100
  from jinja2 import Template
98
101
 
99
102
  template = Template(self.text_template)
100
103
  return template.render(**obj)
101
- else:
104
+ elif syntax == TemplateSyntaxEnum.fstring:
102
105
  return self.text_template.format(**obj)
106
+ else:
107
+ raise NotImplementedError(f"Cannot handle template syntax: {syntax}")
103
108
  return str(obj)
104
109
 
105
110
  def search(
@@ -0,0 +1,17 @@
1
+ from typing import List
2
+
3
+ from linkml_store.api.collection import OBJECT
4
+
5
+
6
+ def insert_operation_to_patches(objs: List[OBJECT], **kwargs):
7
+ """
8
+ Translate a list of objects to a list of patches for insertion.
9
+
10
+ Note: inserts are always treated as being at the start of a list
11
+
12
+ :param objs: objects to insert
13
+ :param kwargs: additional arguments
14
+ """
15
+ patches = []
16
+ for obj in objs:
17
+ patches.append({"op": "add", "path": "/0", "value": obj})
@@ -4,8 +4,9 @@ import sys
4
4
  from enum import Enum
5
5
  from io import StringIO
6
6
  from pathlib import Path
7
- from typing import Any, Dict, List, Optional, Union
7
+ from typing import Any, Dict, List, Optional, TextIO, Type, Union
8
8
 
9
+ import pandas as pd
9
10
  import yaml
10
11
  from pydantic import BaseModel
11
12
 
@@ -20,9 +21,13 @@ class Format(Enum):
20
21
  YAML = "yaml"
21
22
  TSV = "tsv"
22
23
  CSV = "csv"
24
+ PARQUET = "parquet"
25
+ FORMATTED = "formatted"
23
26
 
24
27
 
25
- def load_objects(file_path: Union[str, Path], format: Union[Format, str] = None) -> List[Dict[str, Any]]:
28
+ def load_objects(
29
+ file_path: Union[str, Path], format: Union[Format, str] = None, expected_type: Type = None
30
+ ) -> List[Dict[str, Any]]:
26
31
  """
27
32
  Load objects from a file in JSON, JSONLines, YAML, CSV, or TSV format.
28
33
 
@@ -32,6 +37,7 @@ def load_objects(file_path: Union[str, Path], format: Union[Format, str] = None)
32
37
 
33
38
  :param file_path: The path to the file.
34
39
  :param format: The format of the file. Can be a Format enum or a string value.
40
+ :param expected_type: The target type to load the objects into.
35
41
  :return: A list of dictionaries representing the loaded objects.
36
42
  """
37
43
  if isinstance(format, str):
@@ -40,24 +46,39 @@ def load_objects(file_path: Union[str, Path], format: Union[Format, str] = None)
40
46
  if isinstance(file_path, Path):
41
47
  file_path = str(file_path)
42
48
 
49
+ if not format and (file_path.endswith(".parquet") or file_path.endswith(".pq")):
50
+ format = Format.PARQUET
51
+
52
+ mode = "r"
53
+ if format == Format.PARQUET:
54
+ mode = "rb"
55
+
43
56
  if file_path == "-":
44
57
  # set file_path to be a stream from stdin
45
58
  f = sys.stdin
46
59
  else:
47
- f = open(file_path)
60
+ f = open(file_path, mode)
48
61
 
49
62
  if format == Format.JSON or (not format and file_path.endswith(".json")):
50
63
  objs = json.load(f)
51
64
  elif format == Format.JSONL or (not format and file_path.endswith(".jsonl")):
52
65
  objs = [json.loads(line) for line in f]
53
66
  elif format == Format.YAML or (not format and (file_path.endswith(".yaml") or file_path.endswith(".yml"))):
54
- objs = yaml.safe_load(f)
67
+ if expected_type and expected_type == list: # noqa E721
68
+ objs = list(yaml.safe_load_all(f))
69
+ else:
70
+ objs = yaml.safe_load(f)
55
71
  elif format == Format.TSV or (not format and file_path.endswith(".tsv")):
56
72
  reader = csv.DictReader(f, delimiter="\t")
57
73
  objs = list(reader)
58
74
  elif format == Format.CSV or (not format and file_path.endswith(".csv")):
59
75
  reader = csv.DictReader(f)
60
76
  objs = list(reader)
77
+ elif format == Format.PARQUET:
78
+ import pyarrow.parquet as pq
79
+
80
+ table = pq.read_table(f)
81
+ objs = table.to_pandas().to_dict(orient="records")
61
82
  else:
62
83
  raise ValueError(f"Unsupported file format: {file_path}")
63
84
  if not isinstance(objs, list):
@@ -65,7 +86,40 @@ def load_objects(file_path: Union[str, Path], format: Union[Format, str] = None)
65
86
  return objs
66
87
 
67
88
 
68
- def render_output(data: Union[List[Dict[str, Any]], Dict[str, Any]], format: Union[Format, str] = Format.YAML) -> str:
89
+ def write_output(
90
+ data: Union[List[Dict[str, Any]], Dict[str, Any], pd.DataFrame],
91
+ format: Union[Format, str] = Format.YAML,
92
+ target: Optional[Union[TextIO, str, Path]] = None,
93
+ ) -> None:
94
+ """
95
+ Write output data to a file in JSON, JSONLines, YAML, CSV, or TSV format.
96
+
97
+ >>> write_output([{"a": 1, "b": 2}, {"a": 3, "b": 4}], Format.JSON, sys.stdout)
98
+ [
99
+ {
100
+ "a": 1,
101
+ "b": 2
102
+ },
103
+ {
104
+ "a": 3,
105
+ "b": 4
106
+ }
107
+ ]
108
+ """
109
+ output_str = render_output(data, format)
110
+ if target:
111
+ if isinstance(target, str):
112
+ with open(target, "w") as target:
113
+ target.write(output_str)
114
+ else:
115
+ target.write(output_str)
116
+ else:
117
+ print(output_str)
118
+
119
+
120
+ def render_output(
121
+ data: Union[List[Dict[str, Any]], Dict[str, Any], pd.DataFrame], format: Union[Format, str] = Format.YAML
122
+ ) -> str:
69
123
  """
70
124
  Render output data in JSON, JSONLines, YAML, CSV, or TSV format.
71
125
 
@@ -89,6 +143,14 @@ def render_output(data: Union[List[Dict[str, Any]], Dict[str, Any]], format: Uni
89
143
  if isinstance(format, str):
90
144
  format = Format(format)
91
145
 
146
+ if format == Format.FORMATTED:
147
+ if not isinstance(data, pd.DataFrame):
148
+ data = pd.DataFrame(data)
149
+ return str(data)
150
+
151
+ if isinstance(data, pd.DataFrame):
152
+ data = data.to_dict(orient="records")
153
+
92
154
  if isinstance(data, BaseModel):
93
155
  data = data.model_dump()
94
156
 
@@ -97,16 +159,19 @@ def render_output(data: Union[List[Dict[str, Any]], Dict[str, Any]], format: Uni
97
159
  elif format == Format.JSONL:
98
160
  return "\n".join(json.dumps(obj) for obj in data)
99
161
  elif format == Format.YAML:
100
- return yaml.safe_dump(data, sort_keys=False)
162
+ if isinstance(data, list):
163
+ return yaml.safe_dump_all(data, sort_keys=False)
164
+ else:
165
+ return yaml.safe_dump(data, sort_keys=False)
101
166
  elif format == Format.TSV:
102
167
  output = StringIO()
103
- writer = csv.DictWriter(output, fieldnames=data[0].keys(), delimiter="\t")
168
+ writer = csv.DictWriter(output, fieldnames=get_fieldnames(data), delimiter="\t")
104
169
  writer.writeheader()
105
170
  writer.writerows(data)
106
171
  return output.getvalue()
107
172
  elif format == Format.CSV:
108
173
  output = StringIO()
109
- writer = csv.DictWriter(output, fieldnames=data[0].keys())
174
+ writer = csv.DictWriter(output, fieldnames=get_fieldnames(data))
110
175
  writer.writeheader()
111
176
  writer.writerows(data)
112
177
  return output.getvalue()
@@ -114,6 +179,22 @@ def render_output(data: Union[List[Dict[str, Any]], Dict[str, Any]], format: Uni
114
179
  raise ValueError(f"Unsupported output format: {format}")
115
180
 
116
181
 
182
+ def get_fieldnames(data: List[Dict[str, Any]]) -> List[str]:
183
+ """
184
+ Get the fieldnames of a list of dictionaries.
185
+
186
+ >>> get_fieldnames([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
187
+ ['a', 'b']
188
+
189
+ :param data: The list of dictionaries.
190
+ :return: The fieldnames.
191
+ """
192
+ fieldnames = []
193
+ for obj in data:
194
+ fieldnames.extend([k for k in obj.keys() if k not in fieldnames])
195
+ return fieldnames
196
+
197
+
117
198
  def guess_format(path: str) -> Optional[Format]:
118
199
  """
119
200
  Guess the format of a file based on its extension.
@@ -0,0 +1,126 @@
1
+ from typing import Any, Dict, List, Optional, TypedDict
2
+
3
+ import jsonpatch
4
+
5
+
6
+ class PatchDict(TypedDict):
7
+ op: str
8
+ path: str
9
+ value: Optional[Any]
10
+ _from: Optional[str]
11
+
12
+
13
+ def apply_patches(obj: Any, patches: List[PatchDict], primary_key: Optional[str] = None, in_place=False) -> Any:
14
+ """
15
+ Apply a set of patches to an object.
16
+
17
+ If the object is a list, the primary key must be specified.
18
+
19
+ >>> objs = [{'id': 'F1', 'name': 'Cheese'}, {'id': 'F2', 'name': 'Bread'}]
20
+ >>> patches = [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}]
21
+ >>> apply_patches(objs, patches, primary_key='id')
22
+ [{'id': 'F1', 'name': 'Toast'}, {'id': 'F2', 'name': 'Bread'}]
23
+
24
+ :param obj: object to patch
25
+ :param patches: list of patches, conforming to the JSON Patch format
26
+ :param primary_key: key to use as the primary key for the objects (if obj is a list)
27
+ :param in_place: whether to apply the patches in place
28
+ :return:
29
+ """
30
+ if isinstance(obj, dict):
31
+ patch_obj = jsonpatch.JsonPatch(patches)
32
+ return patch_obj.apply(obj, in_place=in_place)
33
+ elif isinstance(obj, list):
34
+ if not primary_key:
35
+ raise ValueError("Primary key must be specified for list objects")
36
+ return apply_patches_to_list(obj, patches, primary_key, in_place=in_place)
37
+ else:
38
+ raise ValueError(f"Unsupported object type: {type(obj)}")
39
+
40
+
41
+ def apply_patches_to_list(
42
+ objects: List[Dict[str, Any]], patches: List[PatchDict], primary_key: str, in_place=False
43
+ ) -> List[Dict[str, Any]]:
44
+ """
45
+ Apply a set of patches to a list of objects.
46
+
47
+
48
+
49
+ :param objects: list of objects
50
+ :param patches: list of patches, conforming to the JSON Patch format
51
+ :param primary_key: key to use as the primary key for the objects
52
+ :param in_place: whether to apply the patches in place
53
+ :return:
54
+ """
55
+ objs_as_dict = {obj[primary_key]: obj for obj in objects}
56
+ result = apply_patches_to_keyed_list(objs_as_dict, patches, in_place=in_place)
57
+ return list(result.values())
58
+
59
+
60
+ def apply_patches_to_keyed_list(
61
+ objs_as_dict: Dict[str, Dict[str, Any]], patches: List[PatchDict], in_place=False
62
+ ) -> Dict[str, Dict[str, Any]]:
63
+ """
64
+ Apply a set of patches to a list of objects, where the objects are keyed by a primary key
65
+
66
+ :param objs_as_dict:
67
+ :param patches:
68
+ :param in_place:
69
+ :return:
70
+ """
71
+ patch_obj = jsonpatch.JsonPatch(patches)
72
+ result = patch_obj.apply(objs_as_dict, in_place=in_place)
73
+ return result
74
+
75
+
76
+ def patches_from_objects_lists(
77
+ src_objs: List[Dict[str, Any]], dst_objs: List[Dict[str, Any]], primary_key: str, exclude_none=True
78
+ ) -> List[PatchDict]:
79
+ """
80
+ Generate a set of patches to transform src_objs into tgt_objs.
81
+
82
+ >>> src_objs = [{'id': 'F1', 'name': 'Cheese'}, {'id': 'F2', 'name': 'Bread'}]
83
+ >>> tgt_objs = [{'id': 'F1', 'name': 'Toast'}, {'id': 'F2', 'name': 'Bread'}]
84
+ >>> patches_from_objects_lists(src_objs, tgt_objs, primary_key='id')
85
+ [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}]
86
+
87
+ by default exclude_none is True, so None values are excluded from the patch
88
+
89
+ >>> tgt_objs = [{'id': 'F1', 'name': 'Toast'}, {'id': 'F2', 'name': None}]
90
+ >>> patches_from_objects_lists(src_objs, tgt_objs, primary_key='id')
91
+ [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}, {'op': 'remove', 'path': '/F2/name'}]
92
+
93
+ if exclude_none is False, None values are treated as being set to None
94
+
95
+ >>> patches_from_objects_lists(src_objs, tgt_objs, primary_key='id', exclude_none=False)
96
+ [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}, {'op': 'replace', 'path': '/F2/name', 'value': None}]
97
+
98
+ See also: `<https://github.com/orgs/linkml/discussions/1975>`_
99
+
100
+ Note the patches are sorted deterministically, first by path, then by operation.
101
+ This helps ensure operations on the same object are grouped together
102
+
103
+ :param src_objs: source objects
104
+ :param dst_objs: target objects
105
+ :param primary_key: key to use as the primary key for the objects
106
+ :param exclude_none: whether to exclude None values from the patch
107
+ :return:
108
+ """
109
+ src_objs_as_dict = {obj[primary_key]: obj for obj in src_objs}
110
+ dst_objs_as_dict = {obj[primary_key]: obj for obj in dst_objs}
111
+ if exclude_none:
112
+ src_objs_as_dict = {k: remove_nones(v) for k, v in src_objs_as_dict.items()}
113
+ dst_objs_as_dict = {k: remove_nones(v) for k, v in dst_objs_as_dict.items()}
114
+ patch_obj = jsonpatch.JsonPatch.from_diff(src_objs_as_dict, dst_objs_as_dict)
115
+ pl = patch_obj.patch
116
+ return sorted(pl, key=lambda x: (x["path"], x["op"]))
117
+
118
+
119
+ def remove_nones(obj: Dict[str, Any]) -> Dict[str, Any]:
120
+ """
121
+ Remove None values from a dictionary.
122
+
123
+ :param obj:
124
+ :return:
125
+ """
126
+ return {k: v for k, v in obj.items() if v is not None}
@@ -0,0 +1,89 @@
1
+ import operator
2
+ from typing import Any, Callable, Dict
3
+
4
+ MONGO_OPERATORS = {
5
+ "$eq": operator.eq,
6
+ "$ne": operator.ne,
7
+ "$gt": operator.gt,
8
+ "$gte": operator.ge,
9
+ "$lt": operator.lt,
10
+ "$lte": operator.le,
11
+ "$in": lambda a, b: any(x in b for x in (a if isinstance(a, list) else [a])),
12
+ "$nin": lambda a, b: all(x not in b for x in (a if isinstance(a, list) else [a])),
13
+ }
14
+
15
+
16
+ def mongo_query_to_match_function(where: Dict[str, Any]) -> Callable[[Dict[str, Any]], bool]:
17
+ """
18
+ Convert a MongoDB-style query to a matching function.
19
+
20
+ >>> query = {"name": "foo", "age": {"$gt": 25}}
21
+ >>> matcher = mongo_query_to_match_function(query)
22
+ >>> matcher({"name": "foo", "age": 30})
23
+ True
24
+ >>> matcher({"name": "foo", "age": 20})
25
+ False
26
+ >>> matcher({"name": "bar", "age": 30})
27
+ False
28
+
29
+ >>> nested_query = {"nested.job": "engineer", "skills": {"$in": ["python", "mongodb"]}}
30
+ >>> nested_matcher = mongo_query_to_match_function(nested_query)
31
+ >>> nested_matcher({"nested": {"job": "engineer"}, "skills": ["python", "javascript"]})
32
+ True
33
+ >>> nested_matcher({"nested": {"job": "designer"}, "skills": ["python", "mongodb"]})
34
+ False
35
+ >>> nested_matcher({"nested": {"job": "engineer"}, "skills": ["java", "c++"]})
36
+ False
37
+
38
+ >>> complex_query = {"name": "foo", "age": {"$gte": 25, "$lt": 40}, "nested.salary": {"$gt": 50000}}
39
+ >>> complex_matcher = mongo_query_to_match_function(complex_query)
40
+ >>> complex_matcher({"name": "foo", "age": 30, "nested": {"salary": 60000}})
41
+ True
42
+ >>> complex_matcher({"name": "foo", "age": 45, "nested": {"salary": 70000}})
43
+ False
44
+ >>> complex_matcher({"name": "foo", "age": 35, "nested": {"salary": 40000}})
45
+ False
46
+
47
+ >>> invalid_query = {"age": {"$invalid": 25}}
48
+ >>> invalid_matcher = mongo_query_to_match_function(invalid_query)
49
+ >>> invalid_matcher({"age": 30})
50
+ Traceback (most recent call last):
51
+ ...
52
+ ValueError: Unsupported operator: $invalid
53
+ """
54
+ if where is None:
55
+ where = {}
56
+
57
+ def matches(obj: Dict[str, Any]) -> bool:
58
+ def check_condition(key: str, condition: Any) -> bool:
59
+ if isinstance(condition, dict) and any(k.startswith("$") for k in condition.keys()):
60
+ for op, value in condition.items():
61
+ if op in MONGO_OPERATORS:
62
+ if not MONGO_OPERATORS[op](get_nested_value(obj, key), value):
63
+ return False
64
+ else:
65
+ raise ValueError(f"Unsupported operator: {op}")
66
+ elif isinstance(condition, dict):
67
+ return check_nested_condition(get_nested_value(obj, key), condition)
68
+ else:
69
+ return get_nested_value(obj, key) == condition
70
+ return True
71
+
72
+ def check_nested_condition(nested_obj: Dict[str, Any], nested_condition: Dict[str, Any]) -> bool:
73
+ for k, v in nested_condition.items():
74
+ if not check_condition(k, v):
75
+ return False
76
+ return True
77
+
78
+ def get_nested_value(obj: Dict[str, Any], key: str) -> Any:
79
+ parts = key.split(".")
80
+ for part in parts:
81
+ if isinstance(obj, dict):
82
+ obj = obj.get(part)
83
+ else:
84
+ return None
85
+ return obj
86
+
87
+ return all(check_condition(k, v) for k, v in where.items())
88
+
89
+ return matches
@@ -0,0 +1,23 @@
1
+ from typing import List
2
+
3
+ from linkml_runtime import SchemaView
4
+ from linkml_runtime.linkml_model import SlotDefinition
5
+
6
+
7
+ def path_to_attribute_list(class_name: str, path: str, schema_view: SchemaView) -> List[SlotDefinition]:
8
+ """
9
+ Convert a path to a list of attributes.
10
+
11
+ :param path:
12
+ :return:
13
+ """
14
+ parts = path.split(".")
15
+ att_list = []
16
+ while parts:
17
+ part = parts.pop(0)
18
+ att = schema_view.induced_slot(part, class_name)
19
+ if not att:
20
+ raise ValueError(f"Attribute {part} not found in class {class_name}")
21
+ att_list.append(att)
22
+ class_name = att.range
23
+ return att_list
File without changes
@@ -0,0 +1,3 @@
1
+ from pathlib import Path
2
+
3
+ HTML_TEMPLATES_DIR = Path(__file__).parent