odsbox-diff 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,639 @@
1
+ import hashlib
2
+ import io
3
+ import json
4
+ import logging
5
+ import os
6
+ import sys
7
+ import zipfile
8
+ from typing import Any, cast
9
+
10
+ from odsbox.model_cache import ModelCache
11
+ import odsbox.proto.ods_pb2 as ods
12
+ import pandas as pd
13
+ from odsbox.con_i import ConI
14
+ from requests import HTTPError
15
+
16
+ from .rel_to_name import RelToName
17
+
18
+ _log = logging.getLogger(__name__)
19
+
20
+
21
+ class Collector:
22
+ def __init__(
23
+ self,
24
+ con_i: ConI,
25
+ is_null_to_nan: bool = False,
26
+ enum_as_string: bool = True,
27
+ cached_related_entities: list[str] | None = None,
28
+ ) -> None:
29
+ self._con_i = con_i
30
+ self._mc: ModelCache = con_i.mc
31
+ self._is_null_to_nan = is_null_to_nan
32
+ self._enum_as_string = enum_as_string
33
+ self.r2n: RelToName = RelToName(con_i, cached_related_entities)
34
+
35
+ def _query_data(self, query: dict[str, Any]) -> pd.DataFrame:
36
+ return self._con_i.query_data(
37
+ query=query, is_null_to_nan=self._is_null_to_nan, enum_as_string=self._enum_as_string
38
+ )
39
+
40
+ @staticmethod
41
+ def _print_progress_bar(
42
+ iteration: int,
43
+ total: int,
44
+ prefix: str = "",
45
+ suffix: str = "",
46
+ decimals: int = 1,
47
+ length: int = 50,
48
+ fill: str = "█",
49
+ ) -> None:
50
+ """
51
+ Call in a loop to create terminal progress bar
52
+ @params:
53
+ iteration - Required : current iteration (Int)
54
+ total - Required : total iterations (Int)
55
+ prefix - Optional : prefix string (Str)
56
+ suffix - Optional : suffix string (Str)
57
+ decimals - Optional : positive number of decimals in percent complete (Int)
58
+ length - Optional : character length of bar (Int)
59
+ fill - Optional : bar fill character (Str)
60
+ """
61
+ percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
62
+ filled_length = int(length * iteration // total)
63
+ bar = fill * filled_length + "-" * (length - filled_length)
64
+ sys.stdout.write(f"\r{prefix} |{bar}| {percent}% {suffix}")
65
+ sys.stdout.flush()
66
+ # Print New Line on Complete
67
+ if iteration == total:
68
+ print()
69
+
70
+ @staticmethod
71
+ def _hash_pandas_row(row: pd.Series[Any]) -> str:
72
+ row_string = "".join([str(item) for item in row.values])
73
+ return hashlib.sha256(row_string.encode()).hexdigest()
74
+
75
+ def _collect_bulk_data(
76
+ self,
77
+ lookup: dict[Any, Any],
78
+ path_to_root: str,
79
+ root_id: int,
80
+ show_progress: bool,
81
+ ) -> None:
82
+ log = _log
83
+ sub_matrix_entity = self._mc.entity_by_base_name("AoSubMatrix")
84
+ sub_matrices = self._query_data(
85
+ {
86
+ sub_matrix_entity.name: {path_to_root: root_id},
87
+ "$attributes": {"id": 1, "measurement": 1, "number_of_rows": 1},
88
+ }
89
+ )
90
+ log.debug("Found %s submatrices related to your test", sub_matrices.shape[0])
91
+ sub_matrices.columns = ["id", "measurement", "number_of_rows"]
92
+ local_column_entity = self._mc.entity_by_base_name("AoLocalColumn")
93
+ exception_errors = []
94
+ log.debug("Load bulk data from ASAM ODS server")
95
+ for sub_matrix_index, submatrix_row in enumerate(sub_matrices.itertuples()):
96
+ if show_progress:
97
+ self._print_progress_bar(sub_matrix_index + 1, sub_matrices.shape[0], "Bulk:", fill="*")
98
+ submatrix_id = submatrix_row.id
99
+ try:
100
+ bulk_data = self._query_data(
101
+ {
102
+ local_column_entity.name: {"submatrix": submatrix_id},
103
+ "$attributes": {
104
+ "id": 1,
105
+ "generation_parameters": 1,
106
+ "values": 1,
107
+ "flags": 1,
108
+ },
109
+ }
110
+ )
111
+ bulk_data.columns = ["id", "generation_parameters", "values", "flags"]
112
+ for _, row in bulk_data.iterrows():
113
+ hash_value = self._hash_pandas_row(row)
114
+ local_column_id = row.id
115
+ parent_dictionary = lookup.get((local_column_entity.name, local_column_id), None)
116
+ if parent_dictionary is None:
117
+ raise ValueError("parent wasn't added")
118
+ parent_dictionary["__BULK_HASH"] = hash_value
119
+ except HTTPError as e:
120
+ error_text = f"Unable to retrieve bulk for Submatrix {submatrix_id}: {e}"
121
+ exception_errors.append(error_text)
122
+ sub_matrix_dictionary = lookup.get((sub_matrix_entity.name, submatrix_id), None)
123
+ if sub_matrix_dictionary is None:
124
+ raise ValueError("parent wasn't added")
125
+ sub_matrix_dictionary["__BULK_HASH_CALCULATION_ERROR"] = error_text
126
+ log.debug("Load bulk data from ASAM ODS server finished")
127
+ for exception_error in exception_errors:
128
+ log.error(exception_error)
129
+
130
+ def _get_descriptive(self, descriptive_lookup: dict[Any, Any], entity: ods.Model.Entity, id: int) -> dict[str, Any]:
131
+ log = _log
132
+
133
+ descriptive_lookup_key = (entity.name, id)
134
+ cached = descriptive_lookup.get(descriptive_lookup_key, None)
135
+ if cached is not None:
136
+ return cast(dict[str, Any], cached)
137
+
138
+ lookup: dict[Any, Any] = {}
139
+ result: dict[str, Any] = {}
140
+
141
+ jaquel_query = {entity.name: id}
142
+ log.debug("Collect descriptive by query: %s", jaquel_query)
143
+ tests = self._query_data(jaquel_query)
144
+
145
+ id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
146
+ name_entry = f"{entity.name}.{self._mc.attribute(entity, 'name').name}"
147
+
148
+ for instance in tests.to_dict(orient="records"):
149
+ entry_name = instance[name_entry]
150
+ result[f"{entry_name}"] = instance
151
+ lookup[(entity.name, instance[id_entry])] = instance
152
+
153
+ for _name, relation in entity.relations.items():
154
+ if relation.base_name == "children":
155
+ self._add_children_ex(
156
+ lookup,
157
+ descriptive_lookup,
158
+ relation.entity_name,
159
+ relation.inverse_name,
160
+ relation.inverse_name,
161
+ id,
162
+ )
163
+
164
+ descriptive_lookup[descriptive_lookup_key] = result
165
+ return result
166
+
167
+ def _collect_descriptive(
168
+ self,
169
+ descriptive_lookup: dict[Any, Any],
170
+ entity: ods.Model.Entity,
171
+ instance: dict[Any, Any],
172
+ ) -> None:
173
+ log = _log
174
+ for _name, relation in entity.relations.items():
175
+ relation_entity = self._mc.entity(relation.entity_name)
176
+ if relation_entity.base_name in [
177
+ "AoUnitUnderTest",
178
+ "AoTestEquipment",
179
+ "AoTestSequence",
180
+ ]:
181
+ log.debug(
182
+ "Found descriptive %s relation at entity %s. Adding them.",
183
+ relation_entity.name,
184
+ entity.name,
185
+ )
186
+ entry_name = f"{entity.name}.{relation.name}"
187
+ descriptive_id = instance.get(entry_name, None)
188
+ if descriptive_id is not None and not pd.isna(descriptive_id) and 0 != int(descriptive_id):
189
+ instance[entry_name] = self._get_descriptive(
190
+ descriptive_lookup, relation_entity, int(descriptive_id)
191
+ )
192
+
193
+ def _add_children(
194
+ self,
195
+ lookup: dict[Any, Any],
196
+ descriptive_lookup: dict[Any, Any],
197
+ entity_name: str,
198
+ parent_relation_name: str,
199
+ path_to_root: str,
200
+ iid: int,
201
+ ) -> None:
202
+ """Query and attach all child instances of ``entity_name`` under ``iid``.
203
+
204
+ Resolves duplicate child names by appending ``_Version:<n>`` (when a
205
+ version attribute exists) or ``_###<index>`` as a last resort. Replaces
206
+ ``AoLocalColumn``'s ``measurement_quantity`` ID with the MQ name to keep
207
+ diffs stable across servers.
208
+ """
209
+ log = _log
210
+
211
+ entity = self._mc.entity(entity_name)
212
+ jaquel_query = {entity.name: {path_to_root: iid}}
213
+ measurement_quantity_entity = None
214
+ measurement_quantity_name_entry = None
215
+ local_column_measurement_quantity_entry = None
216
+
217
+ if "AoLocalColumn" == entity.base_name:
218
+ attributes = {
219
+ attribute.name: 1
220
+ for _, attribute in entity.attributes.items()
221
+ if attribute.base_name not in ["generation_parameters", "values", "flags"]
222
+ }
223
+ attributes.update({relation.name: 1 for _, relation in entity.relations.items() if 1 == relation.range_max})
224
+ jaquel_query["$attributes"] = attributes
225
+ measurement_quantity_entity = self._mc.entity_by_base_name("AoMeasurementQuantity")
226
+ local_column_measurement_quantity_entry = (
227
+ f"{entity.name}.{self._mc.relation_by_base_name(entity, 'measurement_quantity').name}"
228
+ )
229
+ measurement_quantity_name_entry = f"{measurement_quantity_entity.name}.{self._mc.attribute_by_base_name(measurement_quantity_entity, 'name').name}"
230
+
231
+ parent_relation = self._mc.relation(entity, parent_relation_name)
232
+ parent_entry = f"{entity.name}.{parent_relation.name}"
233
+ if parent_relation.range_max != 1:
234
+ # no children relation
235
+ related_entity = self._mc.entity(parent_relation.entity_name)
236
+ related_entity_id_attribute = self._mc.attribute_by_base_name(related_entity, "id")
237
+ parent_entry = f"{parent_relation.entity_name}.{related_entity_id_attribute.name}"
238
+ jaquel_query["$attributes"] = {
239
+ "*": 1,
240
+ f"{parent_relation.name}.{related_entity_id_attribute.name}": 1,
241
+ }
242
+
243
+ log.debug("Retrieve children using query: %s", jaquel_query)
244
+ df = self._query_data(jaquel_query)
245
+
246
+ self._replace_cached_related(entity, df)
247
+
248
+ id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
249
+ name_entry = f"{entity.name}.{self._mc.attribute(entity, 'name').name}"
250
+ dict_entry_key_entry = name_entry
251
+ version_attribute = self._mc.attribute_no_throw(entity, "version")
252
+ version_entry = f"{entity.name}.{version_attribute.name}" if version_attribute is not None else None
253
+
254
+ if parent_entry not in df.columns:
255
+ raise KeyError(f"Column '{parent_entry}' not found in query result for query: {jaquel_query}")
256
+
257
+ for parent_id, children in df.groupby(parent_entry):
258
+ parent_dictionary = lookup.get((parent_relation.entity_name, parent_id), None)
259
+ if parent_dictionary is None:
260
+ raise ValueError("parent wasn't added")
261
+
262
+ if version_entry is not None:
263
+ # sort descending
264
+ children.sort_values(by=version_entry, ascending=False)
265
+
266
+ children_result = {}
267
+ for instance_index, instance in enumerate(children.drop(columns=[parent_entry]).to_dict(orient="records")):
268
+ instance_id = instance[id_entry]
269
+ self._collect_descriptive(descriptive_lookup, entity, instance)
270
+ children_entry_key = f"{instance[dict_entry_key_entry]}"
271
+ if children_entry_key in children_result:
272
+ if version_entry is not None:
273
+ instance_version = instance[version_entry]
274
+ children_entry_key_with_version = f"{children_entry_key}_Version:{instance_version}"
275
+ if children_entry_key_with_version not in children_result:
276
+ children_entry_key = children_entry_key_with_version
277
+ if children_entry_key in children_result:
278
+ log.warning(
279
+ "Name duplicate exists for children at %s.%s(%s): %s.%s(%s)",
280
+ parent_relation.entity_name,
281
+ parent_relation.name,
282
+ parent_id,
283
+ entity.name,
284
+ instance_id,
285
+ children_entry_key,
286
+ )
287
+ children_entry_key += f"_###{instance_index}"
288
+ if local_column_measurement_quantity_entry is not None:
289
+ # We Replace the AoMeasurementQuantity id by the name because the parent is submatrix here and the MQ ids will differ.
290
+ local_column_measurement_quantity_id = instance.get(local_column_measurement_quantity_entry)
291
+ if local_column_measurement_quantity_id is not None:
292
+ assert measurement_quantity_entity is not None
293
+ local_column_measurement_quantity_dict = lookup.get(
294
+ (
295
+ measurement_quantity_entity.name,
296
+ local_column_measurement_quantity_id,
297
+ )
298
+ )
299
+ if local_column_measurement_quantity_dict is not None:
300
+ instance[local_column_measurement_quantity_entry] = (
301
+ local_column_measurement_quantity_dict.get(measurement_quantity_name_entry)
302
+ )
303
+
304
+ children_result[children_entry_key] = instance
305
+ lookup[(entity.name, instance_id)] = instance
306
+
307
+ parent_dictionary[f"{parent_relation.inverse_name}"] = children_result
308
+
309
+ def _replace_cached_related(self, entity: ods.Model.Entity, df: pd.DataFrame) -> None:
310
+ if df.empty:
311
+ return
312
+
313
+ for column in df.columns:
314
+ if "." in column:
315
+ _, relation_or_attribute_name = column.split(".", 1)
316
+ rel: ods.Model.Relation | None = entity.relations.get(relation_or_attribute_name, None)
317
+ if rel is None:
318
+ continue
319
+ rel_entity = self._mc.entity_no_throw(rel.entity_name)
320
+ if rel_entity is None:
321
+ continue
322
+
323
+ df[column] = self.r2n.map_series(rel.entity_name, df[column])
324
+
325
+ def _add_related(
326
+ self,
327
+ lookup: dict[Any, Any],
328
+ descriptive_lookup: dict[Any, Any],
329
+ entity_name: str,
330
+ path_to_root: str,
331
+ root_id: int,
332
+ ) -> None:
333
+ """Attach related ``AoParameterSet`` (and its parameters) and ``AoFile`` instances."""
334
+ log = _log
335
+ entity = self._mc.entity(entity_name)
336
+ for _, relation in entity.relations.items():
337
+ relation_entity = self._mc.entity(relation.entity_name)
338
+ if relation_entity.base_name == "AoParameterSet":
339
+ log.debug(
340
+ "Found AoParameterSet relation at entity %s. Adding instances.",
341
+ entity.name,
342
+ )
343
+ self._add_children(
344
+ lookup,
345
+ descriptive_lookup,
346
+ relation_entity.name,
347
+ relation.inverse_name,
348
+ f"{relation.inverse_name}.{path_to_root}",
349
+ root_id,
350
+ )
351
+ param_relation = self._mc.relation(relation_entity, "parameters")
352
+ self._add_children(
353
+ lookup,
354
+ descriptive_lookup,
355
+ param_relation.entity_name,
356
+ param_relation.inverse_name,
357
+ f"{param_relation.inverse_name}.{relation.inverse_name}.{path_to_root}",
358
+ root_id,
359
+ )
360
+ elif relation_entity.base_name == "AoFile":
361
+ log.debug("Found AoFile relation at entity %s. Adding instances.", entity.name)
362
+ self._add_children(
363
+ lookup,
364
+ descriptive_lookup,
365
+ relation_entity.name,
366
+ relation.inverse_name,
367
+ f"{relation.inverse_name}.{path_to_root}",
368
+ root_id,
369
+ )
370
+
371
+ def _add_children_ex(
372
+ self,
373
+ lookup: dict[Any, Any],
374
+ descriptive_lookup: dict[Any, Any],
375
+ entity_name: str,
376
+ parent_relation_name: str,
377
+ path_to_root: str,
378
+ root_id: int,
379
+ ) -> None:
380
+ self._add_children(
381
+ lookup,
382
+ descriptive_lookup,
383
+ entity_name,
384
+ parent_relation_name,
385
+ path_to_root,
386
+ root_id,
387
+ )
388
+ self._add_related(lookup, descriptive_lookup, entity_name, path_to_root, root_id)
389
+
390
+ def _create_root(
391
+ self,
392
+ lookup: dict[Any, Any],
393
+ descriptive_lookup: dict[Any, Any],
394
+ entity: ods.Model.Entity,
395
+ parent_relation_name: str,
396
+ root_condition: int | str | dict[str, Any],
397
+ ) -> dict[str, Any]:
398
+ """Build the result root dict for the single root instance ``root_condition``.
399
+
400
+ Raises:
401
+ ValueError: If no instance with ``root_condition`` exists or if more than one
402
+ root instance is returned.
403
+ """
404
+ result: dict[str, Any] = {}
405
+
406
+ condition = (
407
+ root_condition
408
+ if isinstance(root_condition, int) or isinstance(root_condition, dict)
409
+ else json.loads(root_condition)
410
+ )
411
+
412
+ log = _log
413
+ log.debug("Retrieve instances of entity %s", entity.name)
414
+ root_df = self._query_data({entity.name: condition, "$options": {"$rowlimit": 2}})
415
+ if root_df.empty:
416
+ raise ValueError(f"Test instance with id {root_condition} does not exist.")
417
+ self._replace_cached_related(entity, root_df)
418
+
419
+ id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
420
+ parent_relation = self._mc.relation(entity, parent_relation_name)
421
+ parent_entry = f"{entity.name}.{parent_relation.name}"
422
+ instances = root_df.drop(columns=[parent_entry]).to_dict(orient="records")
423
+ if 1 != len(instances):
424
+ raise ValueError(f"there should be only one root but {len(instances)} have been found.")
425
+
426
+ instance = instances[0]
427
+ result[entity.name] = instance
428
+ lookup[(entity.name, instance[id_entry])] = instance
429
+ self._collect_descriptive(descriptive_lookup, entity, instance)
430
+ self._add_related(lookup, descriptive_lookup, entity.name, "id", instance[id_entry])
431
+
432
+ return result
433
+
434
+ @staticmethod
435
+ def _join_path(part_a: str | None, part_b: str | None) -> str | None:
436
+ if part_a is None:
437
+ return part_b
438
+ if part_b is None:
439
+ return part_a
440
+ return f"{part_a}.{part_b}"
441
+
442
+ def collect(
443
+ self,
444
+ root_entity_name: str,
445
+ root_condition: int | str | dict[str, Any],
446
+ calculate_bulk_hash: bool = False,
447
+ show_progress: bool = True,
448
+ ) -> tuple[dict[Any, Any], dict[Any, Any]]:
449
+ """Collect a complete instance hierarchy rooted at ``root_condition``.
450
+
451
+ Walks the ``children`` chain from the root entity, then collects related
452
+ ``AoMeasurementQuantity``, ``AoSubMatrix`` and ``AoLocalColumn`` instances.
453
+ Optionally hashes bulk data per ``LocalColumn`` for change detection.
454
+
455
+ Args:
456
+ root_entity_name: Name of the root entity. Must derive from
457
+ ``AoSubTest`` or ``AoMeasurement``.
458
+ root_condition: Condition to identify the root instance.
459
+ Can be an integer ID or a JSON string representing a complex condition.
460
+ calculate_bulk_hash: Whether to also hash bulk LocalColumn data.
461
+ show_progress: Show a textual progress bar during bulk hashing.
462
+
463
+ Returns:
464
+ A tuple ``(result, lookup)`` where ``result`` is a nested name-keyed
465
+ hierarchy dict suitable for diffing, and ``lookup`` maps
466
+ ``(entity_name, id)`` to the corresponding instance dict.
467
+
468
+ Raises:
469
+ ValueError: If the root entity is not an ``AoSubTest`` or
470
+ ``AoMeasurement`` derivative, or if the instance does not exist.
471
+ """
472
+ log = _log
473
+ lookup: dict[tuple[str, Any], Any] = {}
474
+ descriptive_lookup: dict[tuple[str, int], Any] = {}
475
+
476
+ parent_relation = None
477
+ entity = self._mc.entity(root_entity_name)
478
+ if "AoSubTest" == entity.base_name:
479
+ parent_relation = self._mc.relation(entity, "parent_test")
480
+ elif "AoMeasurement" == entity.base_name:
481
+ parent_relation = self._mc.relation(entity, "test")
482
+ else:
483
+ raise ValueError("Only entities derived from AoSubTest or AoMeasurement can be used as root.")
484
+
485
+ result = self._create_root(lookup, descriptive_lookup, entity, parent_relation.name, root_condition)
486
+
487
+ id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
488
+ resolved_root_id: int = result[entity.name][id_entry]
489
+
490
+ instances_to_collect = []
491
+
492
+ path_to_root_instance = None
493
+
494
+ current_entity = entity
495
+ current_children_relation = self._mc.relation_no_throw(current_entity, "children")
496
+ while current_children_relation is not None:
497
+ path_to_root_instance = self._join_path(current_children_relation.inverse_name, path_to_root_instance)
498
+ instances_to_collect.append(
499
+ (
500
+ current_children_relation.entity_name,
501
+ current_children_relation.inverse_name,
502
+ path_to_root_instance,
503
+ )
504
+ )
505
+ current_entity = self._mc.entity(current_children_relation.entity_name)
506
+ current_children_relation = self._mc.relation_no_throw(current_entity, "children")
507
+
508
+ instances_to_collect += [
509
+ (
510
+ self._mc.entity_by_base_name("AoMeasurementQuantity").name,
511
+ "measurement",
512
+ self._join_path("measurement", path_to_root_instance),
513
+ ),
514
+ (
515
+ self._mc.entity_by_base_name("AoSubMatrix").name,
516
+ "measurement",
517
+ self._join_path("measurement", path_to_root_instance),
518
+ ),
519
+ (
520
+ self._mc.entity_by_base_name("AoLocalColumn").name,
521
+ "submatrix",
522
+ self._join_path("submatrix.measurement", path_to_root_instance),
523
+ ),
524
+ ]
525
+ log.debug("Collecting: %s", instances_to_collect)
526
+ for item in instances_to_collect:
527
+ log.info("Retrieve instances of entity %s", item[0])
528
+ item_path = item[2]
529
+ assert item_path is not None
530
+ self._add_children_ex(
531
+ lookup,
532
+ descriptive_lookup,
533
+ entity_name=item[0],
534
+ parent_relation_name=item[1],
535
+ path_to_root=item_path,
536
+ root_id=resolved_root_id,
537
+ )
538
+
539
+ if calculate_bulk_hash:
540
+ log.info("Retrieve bulk data")
541
+ bulk_path = self._join_path("measurement", path_to_root_instance)
542
+ assert bulk_path is not None
543
+ self._collect_bulk_data(
544
+ lookup,
545
+ path_to_root=bulk_path,
546
+ root_id=resolved_root_id,
547
+ show_progress=show_progress,
548
+ )
549
+
550
+ log.info(
551
+ "Collected %s instances for %s with id %s",
552
+ len(lookup),
553
+ entity.name,
554
+ resolved_root_id,
555
+ )
556
+
557
+ return (result, lookup)
558
+
559
+
560
+ def collect(
561
+ con_i: ConI,
562
+ root_entity_name: str,
563
+ root_condition: int | str | dict[str, Any],
564
+ *,
565
+ calculate_bulk_hash: bool = False,
566
+ show_progress: bool = True,
567
+ is_null_to_nan: bool = True,
568
+ enum_as_string: bool = True,
569
+ cached_related_entities: list[str] | None = None,
570
+ ) -> tuple[dict[Any, Any], dict[Any, Any]]:
571
+ return Collector(
572
+ con_i,
573
+ is_null_to_nan=is_null_to_nan,
574
+ enum_as_string=enum_as_string,
575
+ cached_related_entities=cached_related_entities,
576
+ ).collect(root_entity_name, root_condition, calculate_bulk_hash, show_progress)
577
+
578
+
579
+ def save_collect_results(
580
+ file_path: str,
581
+ data: dict[Any, Any],
582
+ additional_info_for_zip: str | dict[Any, Any] | None = None,
583
+ additional_files_for_zip: list[str] | None = None,
584
+ ) -> None:
585
+ """Persist a collected hierarchy dict to a ``.json`` or ``.zip`` file.
586
+
587
+ For ``.zip`` outputs, ``additional_info_for_zip`` (str or dict) is written as
588
+ ``info.txt`` and any existing files in ``additional_files_for_zip`` are added
589
+ by basename.
590
+ """
591
+ _log.debug("Dump dictionary to file: %s", file_path)
592
+ ext = os.path.splitext(file_path)[1].lower()
593
+
594
+ folder = os.path.dirname(file_path)
595
+ if folder and not os.path.exists(folder):
596
+ os.makedirs(folder, exist_ok=True)
597
+
598
+ if ext == ".zip":
599
+ json_str = json.dumps(data, indent=1, ensure_ascii=False)
600
+ with zipfile.ZipFile(file_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
601
+ _log.debug("Writing JSON to zip file.")
602
+ zf.writestr("result.json", json_str)
603
+ if additional_info_for_zip:
604
+ zf.writestr(
605
+ "info.txt",
606
+ (
607
+ additional_info_for_zip
608
+ if isinstance(additional_info_for_zip, str)
609
+ else json.dumps(additional_info_for_zip, indent=2, ensure_ascii=False)
610
+ ),
611
+ )
612
+ if additional_files_for_zip:
613
+ for additional_file_for_zip in additional_files_for_zip:
614
+ if os.path.exists(additional_file_for_zip):
615
+ zf.write(
616
+ additional_file_for_zip,
617
+ arcname=os.path.basename(additional_file_for_zip),
618
+ )
619
+ else:
620
+ # Save as plain JSON
621
+ with open(file_path, "w", encoding="utf-8") as json_file:
622
+ json.dump(data, json_file, indent=1, ensure_ascii=False)
623
+
624
+
625
+ def load_collect_results(file_path: str) -> dict[Any, Any]:
626
+ """Load a collected hierarchy dict previously written by ``save_collect_results``."""
627
+ _log.info("Read dictionary from file: %s", file_path)
628
+ ext = os.path.splitext(file_path)[1].lower()
629
+
630
+ if ext == ".zip":
631
+ with zipfile.ZipFile(file_path, "r") as zf:
632
+ _log.debug("Reading JSON from zip file.")
633
+ with zf.open("result.json") as json_file:
634
+ _log.debug("Extract zip content.")
635
+ data = json.load(io.TextIOWrapper(json_file, encoding="utf-8"))
636
+ else:
637
+ with open(file_path, "r", encoding="utf-8") as json_file:
638
+ data = json.load(json_file)
639
+ return cast(dict[Any, Any], data)
@@ -0,0 +1,35 @@
1
+ import logging
2
+ import re
3
+ from typing import Any
4
+
5
+ from deepdiff import DeepDiff
6
+
7
+
8
+ def diff_dictionaries(
9
+ result1: dict[Any, Any],
10
+ result2: dict[Any, Any],
11
+ custom_exclude_regex_paths: list[str],
12
+ custom_exclude_paths: list[str],
13
+ ) -> DeepDiff:
14
+ log = logging.getLogger(__name__)
15
+ exclude_regex_paths_str = [r"\.(Id|DateCreated|Version)'\]$"] + custom_exclude_regex_paths
16
+ log.debug("Compile exclude_regex_paths_str %s", exclude_regex_paths_str)
17
+ exclude_regex_paths = [re.compile(item) for item in exclude_regex_paths_str]
18
+ exclude_paths: list[str] = [] + custom_exclude_paths
19
+ log.info("Start DeepDiff.")
20
+ log.debug(" exclude_regex_paths: %s", exclude_regex_paths_str)
21
+ log.debug(" exclude_paths: %s", exclude_paths)
22
+ diff_result = DeepDiff(
23
+ result1,
24
+ result2,
25
+ exclude_paths=exclude_paths,
26
+ exclude_regex_paths=exclude_regex_paths,
27
+ ignore_nan_inequality=True,
28
+ )
29
+ log.info("Finished DeepDiff.")
30
+ return diff_result
31
+
32
+
33
+ def dump_diff_as_json(file_path: str, diff_result: DeepDiff) -> None:
34
+ with open(file_path, "w", encoding="utf-8") as json_file:
35
+ json_file.write(diff_result.to_json(indent=2))