odsbox-diff 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odsbox_diff/__init__.py +32 -0
- odsbox_diff/__main__.py +5 -0
- odsbox_diff/api.py +245 -0
- odsbox_diff/connection/__init__.py +14 -0
- odsbox_diff/connection/config.py +96 -0
- odsbox_diff/connection/factory.py +52 -0
- odsbox_diff/connection/manager.py +150 -0
- odsbox_diff/diff.py +676 -0
- odsbox_diff/ods_diff_hierarchy/__init__.py +16 -0
- odsbox_diff/ods_diff_hierarchy/collect.py +639 -0
- odsbox_diff/ods_diff_hierarchy/diff.py +35 -0
- odsbox_diff/ods_diff_hierarchy/rel_to_name.py +74 -0
- odsbox_diff-1.0.0.dist-info/METADATA +274 -0
- odsbox_diff-1.0.0.dist-info/RECORD +17 -0
- odsbox_diff-1.0.0.dist-info/WHEEL +4 -0
- odsbox_diff-1.0.0.dist-info/entry_points.txt +3 -0
- odsbox_diff-1.0.0.dist-info/licenses/LICENSE +200 -0
|
@@ -0,0 +1,639 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import zipfile
|
|
8
|
+
from typing import Any, cast
|
|
9
|
+
|
|
10
|
+
from odsbox.model_cache import ModelCache
|
|
11
|
+
import odsbox.proto.ods_pb2 as ods
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from odsbox.con_i import ConI
|
|
14
|
+
from requests import HTTPError
|
|
15
|
+
|
|
16
|
+
from .rel_to_name import RelToName
|
|
17
|
+
|
|
18
|
+
_log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Collector:
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
con_i: ConI,
|
|
25
|
+
is_null_to_nan: bool = False,
|
|
26
|
+
enum_as_string: bool = True,
|
|
27
|
+
cached_related_entities: list[str] | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
self._con_i = con_i
|
|
30
|
+
self._mc: ModelCache = con_i.mc
|
|
31
|
+
self._is_null_to_nan = is_null_to_nan
|
|
32
|
+
self._enum_as_string = enum_as_string
|
|
33
|
+
self.r2n: RelToName = RelToName(con_i, cached_related_entities)
|
|
34
|
+
|
|
35
|
+
def _query_data(self, query: dict[str, Any]) -> pd.DataFrame:
|
|
36
|
+
return self._con_i.query_data(
|
|
37
|
+
query=query, is_null_to_nan=self._is_null_to_nan, enum_as_string=self._enum_as_string
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def _print_progress_bar(
|
|
42
|
+
iteration: int,
|
|
43
|
+
total: int,
|
|
44
|
+
prefix: str = "",
|
|
45
|
+
suffix: str = "",
|
|
46
|
+
decimals: int = 1,
|
|
47
|
+
length: int = 50,
|
|
48
|
+
fill: str = "█",
|
|
49
|
+
) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Call in a loop to create terminal progress bar
|
|
52
|
+
@params:
|
|
53
|
+
iteration - Required : current iteration (Int)
|
|
54
|
+
total - Required : total iterations (Int)
|
|
55
|
+
prefix - Optional : prefix string (Str)
|
|
56
|
+
suffix - Optional : suffix string (Str)
|
|
57
|
+
decimals - Optional : positive number of decimals in percent complete (Int)
|
|
58
|
+
length - Optional : character length of bar (Int)
|
|
59
|
+
fill - Optional : bar fill character (Str)
|
|
60
|
+
"""
|
|
61
|
+
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
|
|
62
|
+
filled_length = int(length * iteration // total)
|
|
63
|
+
bar = fill * filled_length + "-" * (length - filled_length)
|
|
64
|
+
sys.stdout.write(f"\r{prefix} |{bar}| {percent}% {suffix}")
|
|
65
|
+
sys.stdout.flush()
|
|
66
|
+
# Print New Line on Complete
|
|
67
|
+
if iteration == total:
|
|
68
|
+
print()
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def _hash_pandas_row(row: pd.Series[Any]) -> str:
|
|
72
|
+
row_string = "".join([str(item) for item in row.values])
|
|
73
|
+
return hashlib.sha256(row_string.encode()).hexdigest()
|
|
74
|
+
|
|
75
|
+
def _collect_bulk_data(
|
|
76
|
+
self,
|
|
77
|
+
lookup: dict[Any, Any],
|
|
78
|
+
path_to_root: str,
|
|
79
|
+
root_id: int,
|
|
80
|
+
show_progress: bool,
|
|
81
|
+
) -> None:
|
|
82
|
+
log = _log
|
|
83
|
+
sub_matrix_entity = self._mc.entity_by_base_name("AoSubMatrix")
|
|
84
|
+
sub_matrices = self._query_data(
|
|
85
|
+
{
|
|
86
|
+
sub_matrix_entity.name: {path_to_root: root_id},
|
|
87
|
+
"$attributes": {"id": 1, "measurement": 1, "number_of_rows": 1},
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
log.debug("Found %s submatrices related to your test", sub_matrices.shape[0])
|
|
91
|
+
sub_matrices.columns = ["id", "measurement", "number_of_rows"]
|
|
92
|
+
local_column_entity = self._mc.entity_by_base_name("AoLocalColumn")
|
|
93
|
+
exception_errors = []
|
|
94
|
+
log.debug("Load bulk data from ASAM ODS server")
|
|
95
|
+
for sub_matrix_index, submatrix_row in enumerate(sub_matrices.itertuples()):
|
|
96
|
+
if show_progress:
|
|
97
|
+
self._print_progress_bar(sub_matrix_index + 1, sub_matrices.shape[0], "Bulk:", fill="*")
|
|
98
|
+
submatrix_id = submatrix_row.id
|
|
99
|
+
try:
|
|
100
|
+
bulk_data = self._query_data(
|
|
101
|
+
{
|
|
102
|
+
local_column_entity.name: {"submatrix": submatrix_id},
|
|
103
|
+
"$attributes": {
|
|
104
|
+
"id": 1,
|
|
105
|
+
"generation_parameters": 1,
|
|
106
|
+
"values": 1,
|
|
107
|
+
"flags": 1,
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
)
|
|
111
|
+
bulk_data.columns = ["id", "generation_parameters", "values", "flags"]
|
|
112
|
+
for _, row in bulk_data.iterrows():
|
|
113
|
+
hash_value = self._hash_pandas_row(row)
|
|
114
|
+
local_column_id = row.id
|
|
115
|
+
parent_dictionary = lookup.get((local_column_entity.name, local_column_id), None)
|
|
116
|
+
if parent_dictionary is None:
|
|
117
|
+
raise ValueError("parent wasn't added")
|
|
118
|
+
parent_dictionary["__BULK_HASH"] = hash_value
|
|
119
|
+
except HTTPError as e:
|
|
120
|
+
error_text = f"Unable to retrieve bulk for Submatrix {submatrix_id}: {e}"
|
|
121
|
+
exception_errors.append(error_text)
|
|
122
|
+
sub_matrix_dictionary = lookup.get((sub_matrix_entity.name, submatrix_id), None)
|
|
123
|
+
if sub_matrix_dictionary is None:
|
|
124
|
+
raise ValueError("parent wasn't added")
|
|
125
|
+
sub_matrix_dictionary["__BULK_HASH_CALCULATION_ERROR"] = error_text
|
|
126
|
+
log.debug("Load bulk data from ASAM ODS server finished")
|
|
127
|
+
for exception_error in exception_errors:
|
|
128
|
+
log.error(exception_error)
|
|
129
|
+
|
|
130
|
+
def _get_descriptive(self, descriptive_lookup: dict[Any, Any], entity: ods.Model.Entity, id: int) -> dict[str, Any]:
|
|
131
|
+
log = _log
|
|
132
|
+
|
|
133
|
+
descriptive_lookup_key = (entity.name, id)
|
|
134
|
+
cached = descriptive_lookup.get(descriptive_lookup_key, None)
|
|
135
|
+
if cached is not None:
|
|
136
|
+
return cast(dict[str, Any], cached)
|
|
137
|
+
|
|
138
|
+
lookup: dict[Any, Any] = {}
|
|
139
|
+
result: dict[str, Any] = {}
|
|
140
|
+
|
|
141
|
+
jaquel_query = {entity.name: id}
|
|
142
|
+
log.debug("Collect descriptive by query: %s", jaquel_query)
|
|
143
|
+
tests = self._query_data(jaquel_query)
|
|
144
|
+
|
|
145
|
+
id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
|
|
146
|
+
name_entry = f"{entity.name}.{self._mc.attribute(entity, 'name').name}"
|
|
147
|
+
|
|
148
|
+
for instance in tests.to_dict(orient="records"):
|
|
149
|
+
entry_name = instance[name_entry]
|
|
150
|
+
result[f"{entry_name}"] = instance
|
|
151
|
+
lookup[(entity.name, instance[id_entry])] = instance
|
|
152
|
+
|
|
153
|
+
for _name, relation in entity.relations.items():
|
|
154
|
+
if relation.base_name == "children":
|
|
155
|
+
self._add_children_ex(
|
|
156
|
+
lookup,
|
|
157
|
+
descriptive_lookup,
|
|
158
|
+
relation.entity_name,
|
|
159
|
+
relation.inverse_name,
|
|
160
|
+
relation.inverse_name,
|
|
161
|
+
id,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
descriptive_lookup[descriptive_lookup_key] = result
|
|
165
|
+
return result
|
|
166
|
+
|
|
167
|
+
def _collect_descriptive(
|
|
168
|
+
self,
|
|
169
|
+
descriptive_lookup: dict[Any, Any],
|
|
170
|
+
entity: ods.Model.Entity,
|
|
171
|
+
instance: dict[Any, Any],
|
|
172
|
+
) -> None:
|
|
173
|
+
log = _log
|
|
174
|
+
for _name, relation in entity.relations.items():
|
|
175
|
+
relation_entity = self._mc.entity(relation.entity_name)
|
|
176
|
+
if relation_entity.base_name in [
|
|
177
|
+
"AoUnitUnderTest",
|
|
178
|
+
"AoTestEquipment",
|
|
179
|
+
"AoTestSequence",
|
|
180
|
+
]:
|
|
181
|
+
log.debug(
|
|
182
|
+
"Found descriptive %s relation at entity %s. Adding them.",
|
|
183
|
+
relation_entity.name,
|
|
184
|
+
entity.name,
|
|
185
|
+
)
|
|
186
|
+
entry_name = f"{entity.name}.{relation.name}"
|
|
187
|
+
descriptive_id = instance.get(entry_name, None)
|
|
188
|
+
if descriptive_id is not None and not pd.isna(descriptive_id) and 0 != int(descriptive_id):
|
|
189
|
+
instance[entry_name] = self._get_descriptive(
|
|
190
|
+
descriptive_lookup, relation_entity, int(descriptive_id)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def _add_children(
|
|
194
|
+
self,
|
|
195
|
+
lookup: dict[Any, Any],
|
|
196
|
+
descriptive_lookup: dict[Any, Any],
|
|
197
|
+
entity_name: str,
|
|
198
|
+
parent_relation_name: str,
|
|
199
|
+
path_to_root: str,
|
|
200
|
+
iid: int,
|
|
201
|
+
) -> None:
|
|
202
|
+
"""Query and attach all child instances of ``entity_name`` under ``iid``.
|
|
203
|
+
|
|
204
|
+
Resolves duplicate child names by appending ``_Version:<n>`` (when a
|
|
205
|
+
version attribute exists) or ``_###<index>`` as a last resort. Replaces
|
|
206
|
+
``AoLocalColumn``'s ``measurement_quantity`` ID with the MQ name to keep
|
|
207
|
+
diffs stable across servers.
|
|
208
|
+
"""
|
|
209
|
+
log = _log
|
|
210
|
+
|
|
211
|
+
entity = self._mc.entity(entity_name)
|
|
212
|
+
jaquel_query = {entity.name: {path_to_root: iid}}
|
|
213
|
+
measurement_quantity_entity = None
|
|
214
|
+
measurement_quantity_name_entry = None
|
|
215
|
+
local_column_measurement_quantity_entry = None
|
|
216
|
+
|
|
217
|
+
if "AoLocalColumn" == entity.base_name:
|
|
218
|
+
attributes = {
|
|
219
|
+
attribute.name: 1
|
|
220
|
+
for _, attribute in entity.attributes.items()
|
|
221
|
+
if attribute.base_name not in ["generation_parameters", "values", "flags"]
|
|
222
|
+
}
|
|
223
|
+
attributes.update({relation.name: 1 for _, relation in entity.relations.items() if 1 == relation.range_max})
|
|
224
|
+
jaquel_query["$attributes"] = attributes
|
|
225
|
+
measurement_quantity_entity = self._mc.entity_by_base_name("AoMeasurementQuantity")
|
|
226
|
+
local_column_measurement_quantity_entry = (
|
|
227
|
+
f"{entity.name}.{self._mc.relation_by_base_name(entity, 'measurement_quantity').name}"
|
|
228
|
+
)
|
|
229
|
+
measurement_quantity_name_entry = f"{measurement_quantity_entity.name}.{self._mc.attribute_by_base_name(measurement_quantity_entity, 'name').name}"
|
|
230
|
+
|
|
231
|
+
parent_relation = self._mc.relation(entity, parent_relation_name)
|
|
232
|
+
parent_entry = f"{entity.name}.{parent_relation.name}"
|
|
233
|
+
if parent_relation.range_max != 1:
|
|
234
|
+
# no children relation
|
|
235
|
+
related_entity = self._mc.entity(parent_relation.entity_name)
|
|
236
|
+
related_entity_id_attribute = self._mc.attribute_by_base_name(related_entity, "id")
|
|
237
|
+
parent_entry = f"{parent_relation.entity_name}.{related_entity_id_attribute.name}"
|
|
238
|
+
jaquel_query["$attributes"] = {
|
|
239
|
+
"*": 1,
|
|
240
|
+
f"{parent_relation.name}.{related_entity_id_attribute.name}": 1,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
log.debug("Retrieve children using query: %s", jaquel_query)
|
|
244
|
+
df = self._query_data(jaquel_query)
|
|
245
|
+
|
|
246
|
+
self._replace_cached_related(entity, df)
|
|
247
|
+
|
|
248
|
+
id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
|
|
249
|
+
name_entry = f"{entity.name}.{self._mc.attribute(entity, 'name').name}"
|
|
250
|
+
dict_entry_key_entry = name_entry
|
|
251
|
+
version_attribute = self._mc.attribute_no_throw(entity, "version")
|
|
252
|
+
version_entry = f"{entity.name}.{version_attribute.name}" if version_attribute is not None else None
|
|
253
|
+
|
|
254
|
+
if parent_entry not in df.columns:
|
|
255
|
+
raise KeyError(f"Column '{parent_entry}' not found in query result for query: {jaquel_query}")
|
|
256
|
+
|
|
257
|
+
for parent_id, children in df.groupby(parent_entry):
|
|
258
|
+
parent_dictionary = lookup.get((parent_relation.entity_name, parent_id), None)
|
|
259
|
+
if parent_dictionary is None:
|
|
260
|
+
raise ValueError("parent wasn't added")
|
|
261
|
+
|
|
262
|
+
if version_entry is not None:
|
|
263
|
+
# sort descending
|
|
264
|
+
children.sort_values(by=version_entry, ascending=False)
|
|
265
|
+
|
|
266
|
+
children_result = {}
|
|
267
|
+
for instance_index, instance in enumerate(children.drop(columns=[parent_entry]).to_dict(orient="records")):
|
|
268
|
+
instance_id = instance[id_entry]
|
|
269
|
+
self._collect_descriptive(descriptive_lookup, entity, instance)
|
|
270
|
+
children_entry_key = f"{instance[dict_entry_key_entry]}"
|
|
271
|
+
if children_entry_key in children_result:
|
|
272
|
+
if version_entry is not None:
|
|
273
|
+
instance_version = instance[version_entry]
|
|
274
|
+
children_entry_key_with_version = f"{children_entry_key}_Version:{instance_version}"
|
|
275
|
+
if children_entry_key_with_version not in children_result:
|
|
276
|
+
children_entry_key = children_entry_key_with_version
|
|
277
|
+
if children_entry_key in children_result:
|
|
278
|
+
log.warning(
|
|
279
|
+
"Name duplicate exists for children at %s.%s(%s): %s.%s(%s)",
|
|
280
|
+
parent_relation.entity_name,
|
|
281
|
+
parent_relation.name,
|
|
282
|
+
parent_id,
|
|
283
|
+
entity.name,
|
|
284
|
+
instance_id,
|
|
285
|
+
children_entry_key,
|
|
286
|
+
)
|
|
287
|
+
children_entry_key += f"_###{instance_index}"
|
|
288
|
+
if local_column_measurement_quantity_entry is not None:
|
|
289
|
+
# We Replace the AoMeasurementQuantity id by the name because the parent is submatrix here and the MQ ids will differ.
|
|
290
|
+
local_column_measurement_quantity_id = instance.get(local_column_measurement_quantity_entry)
|
|
291
|
+
if local_column_measurement_quantity_id is not None:
|
|
292
|
+
assert measurement_quantity_entity is not None
|
|
293
|
+
local_column_measurement_quantity_dict = lookup.get(
|
|
294
|
+
(
|
|
295
|
+
measurement_quantity_entity.name,
|
|
296
|
+
local_column_measurement_quantity_id,
|
|
297
|
+
)
|
|
298
|
+
)
|
|
299
|
+
if local_column_measurement_quantity_dict is not None:
|
|
300
|
+
instance[local_column_measurement_quantity_entry] = (
|
|
301
|
+
local_column_measurement_quantity_dict.get(measurement_quantity_name_entry)
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
children_result[children_entry_key] = instance
|
|
305
|
+
lookup[(entity.name, instance_id)] = instance
|
|
306
|
+
|
|
307
|
+
parent_dictionary[f"{parent_relation.inverse_name}"] = children_result
|
|
308
|
+
|
|
309
|
+
def _replace_cached_related(self, entity: ods.Model.Entity, df: pd.DataFrame) -> None:
|
|
310
|
+
if df.empty:
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
for column in df.columns:
|
|
314
|
+
if "." in column:
|
|
315
|
+
_, relation_or_attribute_name = column.split(".", 1)
|
|
316
|
+
rel: ods.Model.Relation | None = entity.relations.get(relation_or_attribute_name, None)
|
|
317
|
+
if rel is None:
|
|
318
|
+
continue
|
|
319
|
+
rel_entity = self._mc.entity_no_throw(rel.entity_name)
|
|
320
|
+
if rel_entity is None:
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
df[column] = self.r2n.map_series(rel.entity_name, df[column])
|
|
324
|
+
|
|
325
|
+
def _add_related(
|
|
326
|
+
self,
|
|
327
|
+
lookup: dict[Any, Any],
|
|
328
|
+
descriptive_lookup: dict[Any, Any],
|
|
329
|
+
entity_name: str,
|
|
330
|
+
path_to_root: str,
|
|
331
|
+
root_id: int,
|
|
332
|
+
) -> None:
|
|
333
|
+
"""Attach related ``AoParameterSet`` (and its parameters) and ``AoFile`` instances."""
|
|
334
|
+
log = _log
|
|
335
|
+
entity = self._mc.entity(entity_name)
|
|
336
|
+
for _, relation in entity.relations.items():
|
|
337
|
+
relation_entity = self._mc.entity(relation.entity_name)
|
|
338
|
+
if relation_entity.base_name == "AoParameterSet":
|
|
339
|
+
log.debug(
|
|
340
|
+
"Found AoParameterSet relation at entity %s. Adding instances.",
|
|
341
|
+
entity.name,
|
|
342
|
+
)
|
|
343
|
+
self._add_children(
|
|
344
|
+
lookup,
|
|
345
|
+
descriptive_lookup,
|
|
346
|
+
relation_entity.name,
|
|
347
|
+
relation.inverse_name,
|
|
348
|
+
f"{relation.inverse_name}.{path_to_root}",
|
|
349
|
+
root_id,
|
|
350
|
+
)
|
|
351
|
+
param_relation = self._mc.relation(relation_entity, "parameters")
|
|
352
|
+
self._add_children(
|
|
353
|
+
lookup,
|
|
354
|
+
descriptive_lookup,
|
|
355
|
+
param_relation.entity_name,
|
|
356
|
+
param_relation.inverse_name,
|
|
357
|
+
f"{param_relation.inverse_name}.{relation.inverse_name}.{path_to_root}",
|
|
358
|
+
root_id,
|
|
359
|
+
)
|
|
360
|
+
elif relation_entity.base_name == "AoFile":
|
|
361
|
+
log.debug("Found AoFile relation at entity %s. Adding instances.", entity.name)
|
|
362
|
+
self._add_children(
|
|
363
|
+
lookup,
|
|
364
|
+
descriptive_lookup,
|
|
365
|
+
relation_entity.name,
|
|
366
|
+
relation.inverse_name,
|
|
367
|
+
f"{relation.inverse_name}.{path_to_root}",
|
|
368
|
+
root_id,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def _add_children_ex(
|
|
372
|
+
self,
|
|
373
|
+
lookup: dict[Any, Any],
|
|
374
|
+
descriptive_lookup: dict[Any, Any],
|
|
375
|
+
entity_name: str,
|
|
376
|
+
parent_relation_name: str,
|
|
377
|
+
path_to_root: str,
|
|
378
|
+
root_id: int,
|
|
379
|
+
) -> None:
|
|
380
|
+
self._add_children(
|
|
381
|
+
lookup,
|
|
382
|
+
descriptive_lookup,
|
|
383
|
+
entity_name,
|
|
384
|
+
parent_relation_name,
|
|
385
|
+
path_to_root,
|
|
386
|
+
root_id,
|
|
387
|
+
)
|
|
388
|
+
self._add_related(lookup, descriptive_lookup, entity_name, path_to_root, root_id)
|
|
389
|
+
|
|
390
|
+
def _create_root(
|
|
391
|
+
self,
|
|
392
|
+
lookup: dict[Any, Any],
|
|
393
|
+
descriptive_lookup: dict[Any, Any],
|
|
394
|
+
entity: ods.Model.Entity,
|
|
395
|
+
parent_relation_name: str,
|
|
396
|
+
root_condition: int | str | dict[str, Any],
|
|
397
|
+
) -> dict[str, Any]:
|
|
398
|
+
"""Build the result root dict for the single root instance ``root_condition``.
|
|
399
|
+
|
|
400
|
+
Raises:
|
|
401
|
+
ValueError: If no instance with ``root_condition`` exists or if more than one
|
|
402
|
+
root instance is returned.
|
|
403
|
+
"""
|
|
404
|
+
result: dict[str, Any] = {}
|
|
405
|
+
|
|
406
|
+
condition = (
|
|
407
|
+
root_condition
|
|
408
|
+
if isinstance(root_condition, int) or isinstance(root_condition, dict)
|
|
409
|
+
else json.loads(root_condition)
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
log = _log
|
|
413
|
+
log.debug("Retrieve instances of entity %s", entity.name)
|
|
414
|
+
root_df = self._query_data({entity.name: condition, "$options": {"$rowlimit": 2}})
|
|
415
|
+
if root_df.empty:
|
|
416
|
+
raise ValueError(f"Test instance with id {root_condition} does not exist.")
|
|
417
|
+
self._replace_cached_related(entity, root_df)
|
|
418
|
+
|
|
419
|
+
id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
|
|
420
|
+
parent_relation = self._mc.relation(entity, parent_relation_name)
|
|
421
|
+
parent_entry = f"{entity.name}.{parent_relation.name}"
|
|
422
|
+
instances = root_df.drop(columns=[parent_entry]).to_dict(orient="records")
|
|
423
|
+
if 1 != len(instances):
|
|
424
|
+
raise ValueError(f"there should be only one root but {len(instances)} have been found.")
|
|
425
|
+
|
|
426
|
+
instance = instances[0]
|
|
427
|
+
result[entity.name] = instance
|
|
428
|
+
lookup[(entity.name, instance[id_entry])] = instance
|
|
429
|
+
self._collect_descriptive(descriptive_lookup, entity, instance)
|
|
430
|
+
self._add_related(lookup, descriptive_lookup, entity.name, "id", instance[id_entry])
|
|
431
|
+
|
|
432
|
+
return result
|
|
433
|
+
|
|
434
|
+
@staticmethod
|
|
435
|
+
def _join_path(part_a: str | None, part_b: str | None) -> str | None:
|
|
436
|
+
if part_a is None:
|
|
437
|
+
return part_b
|
|
438
|
+
if part_b is None:
|
|
439
|
+
return part_a
|
|
440
|
+
return f"{part_a}.{part_b}"
|
|
441
|
+
|
|
442
|
+
def collect(
|
|
443
|
+
self,
|
|
444
|
+
root_entity_name: str,
|
|
445
|
+
root_condition: int | str | dict[str, Any],
|
|
446
|
+
calculate_bulk_hash: bool = False,
|
|
447
|
+
show_progress: bool = True,
|
|
448
|
+
) -> tuple[dict[Any, Any], dict[Any, Any]]:
|
|
449
|
+
"""Collect a complete instance hierarchy rooted at ``root_condition``.
|
|
450
|
+
|
|
451
|
+
Walks the ``children`` chain from the root entity, then collects related
|
|
452
|
+
``AoMeasurementQuantity``, ``AoSubMatrix`` and ``AoLocalColumn`` instances.
|
|
453
|
+
Optionally hashes bulk data per ``LocalColumn`` for change detection.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
root_entity_name: Name of the root entity. Must derive from
|
|
457
|
+
``AoSubTest`` or ``AoMeasurement``.
|
|
458
|
+
root_condition: Condition to identify the root instance.
|
|
459
|
+
Can be an integer ID or a JSON string representing a complex condition.
|
|
460
|
+
calculate_bulk_hash: Whether to also hash bulk LocalColumn data.
|
|
461
|
+
show_progress: Show a textual progress bar during bulk hashing.
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
A tuple ``(result, lookup)`` where ``result`` is a nested name-keyed
|
|
465
|
+
hierarchy dict suitable for diffing, and ``lookup`` maps
|
|
466
|
+
``(entity_name, id)`` to the corresponding instance dict.
|
|
467
|
+
|
|
468
|
+
Raises:
|
|
469
|
+
ValueError: If the root entity is not an ``AoSubTest`` or
|
|
470
|
+
``AoMeasurement`` derivative, or if the instance does not exist.
|
|
471
|
+
"""
|
|
472
|
+
log = _log
|
|
473
|
+
lookup: dict[tuple[str, Any], Any] = {}
|
|
474
|
+
descriptive_lookup: dict[tuple[str, int], Any] = {}
|
|
475
|
+
|
|
476
|
+
parent_relation = None
|
|
477
|
+
entity = self._mc.entity(root_entity_name)
|
|
478
|
+
if "AoSubTest" == entity.base_name:
|
|
479
|
+
parent_relation = self._mc.relation(entity, "parent_test")
|
|
480
|
+
elif "AoMeasurement" == entity.base_name:
|
|
481
|
+
parent_relation = self._mc.relation(entity, "test")
|
|
482
|
+
else:
|
|
483
|
+
raise ValueError("Only entities derived from AoSubTest or AoMeasurement can be used as root.")
|
|
484
|
+
|
|
485
|
+
result = self._create_root(lookup, descriptive_lookup, entity, parent_relation.name, root_condition)
|
|
486
|
+
|
|
487
|
+
id_entry = f"{entity.name}.{self._mc.attribute(entity, 'id').name}"
|
|
488
|
+
resolved_root_id: int = result[entity.name][id_entry]
|
|
489
|
+
|
|
490
|
+
instances_to_collect = []
|
|
491
|
+
|
|
492
|
+
path_to_root_instance = None
|
|
493
|
+
|
|
494
|
+
current_entity = entity
|
|
495
|
+
current_children_relation = self._mc.relation_no_throw(current_entity, "children")
|
|
496
|
+
while current_children_relation is not None:
|
|
497
|
+
path_to_root_instance = self._join_path(current_children_relation.inverse_name, path_to_root_instance)
|
|
498
|
+
instances_to_collect.append(
|
|
499
|
+
(
|
|
500
|
+
current_children_relation.entity_name,
|
|
501
|
+
current_children_relation.inverse_name,
|
|
502
|
+
path_to_root_instance,
|
|
503
|
+
)
|
|
504
|
+
)
|
|
505
|
+
current_entity = self._mc.entity(current_children_relation.entity_name)
|
|
506
|
+
current_children_relation = self._mc.relation_no_throw(current_entity, "children")
|
|
507
|
+
|
|
508
|
+
instances_to_collect += [
|
|
509
|
+
(
|
|
510
|
+
self._mc.entity_by_base_name("AoMeasurementQuantity").name,
|
|
511
|
+
"measurement",
|
|
512
|
+
self._join_path("measurement", path_to_root_instance),
|
|
513
|
+
),
|
|
514
|
+
(
|
|
515
|
+
self._mc.entity_by_base_name("AoSubMatrix").name,
|
|
516
|
+
"measurement",
|
|
517
|
+
self._join_path("measurement", path_to_root_instance),
|
|
518
|
+
),
|
|
519
|
+
(
|
|
520
|
+
self._mc.entity_by_base_name("AoLocalColumn").name,
|
|
521
|
+
"submatrix",
|
|
522
|
+
self._join_path("submatrix.measurement", path_to_root_instance),
|
|
523
|
+
),
|
|
524
|
+
]
|
|
525
|
+
log.debug("Collecting: %s", instances_to_collect)
|
|
526
|
+
for item in instances_to_collect:
|
|
527
|
+
log.info("Retrieve instances of entity %s", item[0])
|
|
528
|
+
item_path = item[2]
|
|
529
|
+
assert item_path is not None
|
|
530
|
+
self._add_children_ex(
|
|
531
|
+
lookup,
|
|
532
|
+
descriptive_lookup,
|
|
533
|
+
entity_name=item[0],
|
|
534
|
+
parent_relation_name=item[1],
|
|
535
|
+
path_to_root=item_path,
|
|
536
|
+
root_id=resolved_root_id,
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
if calculate_bulk_hash:
|
|
540
|
+
log.info("Retrieve bulk data")
|
|
541
|
+
bulk_path = self._join_path("measurement", path_to_root_instance)
|
|
542
|
+
assert bulk_path is not None
|
|
543
|
+
self._collect_bulk_data(
|
|
544
|
+
lookup,
|
|
545
|
+
path_to_root=bulk_path,
|
|
546
|
+
root_id=resolved_root_id,
|
|
547
|
+
show_progress=show_progress,
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
log.info(
|
|
551
|
+
"Collected %s instances for %s with id %s",
|
|
552
|
+
len(lookup),
|
|
553
|
+
entity.name,
|
|
554
|
+
resolved_root_id,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
return (result, lookup)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def collect(
|
|
561
|
+
con_i: ConI,
|
|
562
|
+
root_entity_name: str,
|
|
563
|
+
root_condition: int | str | dict[str, Any],
|
|
564
|
+
*,
|
|
565
|
+
calculate_bulk_hash: bool = False,
|
|
566
|
+
show_progress: bool = True,
|
|
567
|
+
is_null_to_nan: bool = True,
|
|
568
|
+
enum_as_string: bool = True,
|
|
569
|
+
cached_related_entities: list[str] | None = None,
|
|
570
|
+
) -> tuple[dict[Any, Any], dict[Any, Any]]:
|
|
571
|
+
return Collector(
|
|
572
|
+
con_i,
|
|
573
|
+
is_null_to_nan=is_null_to_nan,
|
|
574
|
+
enum_as_string=enum_as_string,
|
|
575
|
+
cached_related_entities=cached_related_entities,
|
|
576
|
+
).collect(root_entity_name, root_condition, calculate_bulk_hash, show_progress)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def save_collect_results(
|
|
580
|
+
file_path: str,
|
|
581
|
+
data: dict[Any, Any],
|
|
582
|
+
additional_info_for_zip: str | dict[Any, Any] | None = None,
|
|
583
|
+
additional_files_for_zip: list[str] | None = None,
|
|
584
|
+
) -> None:
|
|
585
|
+
"""Persist a collected hierarchy dict to a ``.json`` or ``.zip`` file.
|
|
586
|
+
|
|
587
|
+
For ``.zip`` outputs, ``additional_info_for_zip`` (str or dict) is written as
|
|
588
|
+
``info.txt`` and any existing files in ``additional_files_for_zip`` are added
|
|
589
|
+
by basename.
|
|
590
|
+
"""
|
|
591
|
+
_log.debug("Dump dictionary to file: %s", file_path)
|
|
592
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
593
|
+
|
|
594
|
+
folder = os.path.dirname(file_path)
|
|
595
|
+
if folder and not os.path.exists(folder):
|
|
596
|
+
os.makedirs(folder, exist_ok=True)
|
|
597
|
+
|
|
598
|
+
if ext == ".zip":
|
|
599
|
+
json_str = json.dumps(data, indent=1, ensure_ascii=False)
|
|
600
|
+
with zipfile.ZipFile(file_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
601
|
+
_log.debug("Writing JSON to zip file.")
|
|
602
|
+
zf.writestr("result.json", json_str)
|
|
603
|
+
if additional_info_for_zip:
|
|
604
|
+
zf.writestr(
|
|
605
|
+
"info.txt",
|
|
606
|
+
(
|
|
607
|
+
additional_info_for_zip
|
|
608
|
+
if isinstance(additional_info_for_zip, str)
|
|
609
|
+
else json.dumps(additional_info_for_zip, indent=2, ensure_ascii=False)
|
|
610
|
+
),
|
|
611
|
+
)
|
|
612
|
+
if additional_files_for_zip:
|
|
613
|
+
for additional_file_for_zip in additional_files_for_zip:
|
|
614
|
+
if os.path.exists(additional_file_for_zip):
|
|
615
|
+
zf.write(
|
|
616
|
+
additional_file_for_zip,
|
|
617
|
+
arcname=os.path.basename(additional_file_for_zip),
|
|
618
|
+
)
|
|
619
|
+
else:
|
|
620
|
+
# Save as plain JSON
|
|
621
|
+
with open(file_path, "w", encoding="utf-8") as json_file:
|
|
622
|
+
json.dump(data, json_file, indent=1, ensure_ascii=False)
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def load_collect_results(file_path: str) -> dict[Any, Any]:
|
|
626
|
+
"""Load a collected hierarchy dict previously written by ``save_collect_results``."""
|
|
627
|
+
_log.info("Read dictionary from file: %s", file_path)
|
|
628
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
629
|
+
|
|
630
|
+
if ext == ".zip":
|
|
631
|
+
with zipfile.ZipFile(file_path, "r") as zf:
|
|
632
|
+
_log.debug("Reading JSON from zip file.")
|
|
633
|
+
with zf.open("result.json") as json_file:
|
|
634
|
+
_log.debug("Extract zip content.")
|
|
635
|
+
data = json.load(io.TextIOWrapper(json_file, encoding="utf-8"))
|
|
636
|
+
else:
|
|
637
|
+
with open(file_path, "r", encoding="utf-8") as json_file:
|
|
638
|
+
data = json.load(json_file)
|
|
639
|
+
return cast(dict[Any, Any], data)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from deepdiff import DeepDiff
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def diff_dictionaries(
|
|
9
|
+
result1: dict[Any, Any],
|
|
10
|
+
result2: dict[Any, Any],
|
|
11
|
+
custom_exclude_regex_paths: list[str],
|
|
12
|
+
custom_exclude_paths: list[str],
|
|
13
|
+
) -> DeepDiff:
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
exclude_regex_paths_str = [r"\.(Id|DateCreated|Version)'\]$"] + custom_exclude_regex_paths
|
|
16
|
+
log.debug("Compile exclude_regex_paths_str %s", exclude_regex_paths_str)
|
|
17
|
+
exclude_regex_paths = [re.compile(item) for item in exclude_regex_paths_str]
|
|
18
|
+
exclude_paths: list[str] = [] + custom_exclude_paths
|
|
19
|
+
log.info("Start DeepDiff.")
|
|
20
|
+
log.debug(" exclude_regex_paths: %s", exclude_regex_paths_str)
|
|
21
|
+
log.debug(" exclude_paths: %s", exclude_paths)
|
|
22
|
+
diff_result = DeepDiff(
|
|
23
|
+
result1,
|
|
24
|
+
result2,
|
|
25
|
+
exclude_paths=exclude_paths,
|
|
26
|
+
exclude_regex_paths=exclude_regex_paths,
|
|
27
|
+
ignore_nan_inequality=True,
|
|
28
|
+
)
|
|
29
|
+
log.info("Finished DeepDiff.")
|
|
30
|
+
return diff_result
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def dump_diff_as_json(file_path: str, diff_result: DeepDiff) -> None:
|
|
34
|
+
with open(file_path, "w", encoding="utf-8") as json_file:
|
|
35
|
+
json_file.write(diff_result.to_json(indent=2))
|