esgf-qa 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgf_qa/_constants.py +42 -1
- esgf_qa/_version.py +2 -2
- esgf_qa/cluster_results.py +466 -0
- esgf_qa/con_checks.py +209 -11
- esgf_qa/run_qa.py +247 -418
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/METADATA +42 -28
- esgf_qa-0.4.0.dist-info/RECORD +19 -0
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/top_level.txt +1 -1
- tests/test_cli.py +267 -0
- tests/test_cluster_results.py +166 -0
- tests/test_con_checks.py +263 -0
- tests/test_qaviewer.py +147 -0
- tests/test_run_dummy_qa.py +191 -0
- tests/test_run_qa.py +181 -0
- docs/esgf-qa_Logo.png +0 -0
- esgf_qa-0.3.0.dist-info/RECORD +0 -13
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/WHEEL +0 -0
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/entry_points.txt +0 -0
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.4.0.dist-info}/licenses/LICENSE +0 -0
esgf_qa/run_qa.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import csv
|
|
3
3
|
import datetime
|
|
4
|
-
import difflib
|
|
5
4
|
import hashlib
|
|
6
5
|
import json
|
|
7
6
|
import multiprocessing
|
|
@@ -14,45 +13,19 @@ from pathlib import Path
|
|
|
14
13
|
from compliance_checker import __version__ as cc_version
|
|
15
14
|
from compliance_checker.runner import CheckSuite
|
|
16
15
|
|
|
16
|
+
from esgf_qa._constants import (
|
|
17
|
+
DRS_path_parent,
|
|
18
|
+
checker_dict,
|
|
19
|
+
checker_dict_ext,
|
|
20
|
+
checker_release_versions,
|
|
21
|
+
)
|
|
17
22
|
from esgf_qa._version import version
|
|
23
|
+
from esgf_qa.cluster_results import QAResultAggregator
|
|
18
24
|
from esgf_qa.con_checks import compatibility_checks as comp # noqa
|
|
19
25
|
from esgf_qa.con_checks import consistency_checks as cons # noqa
|
|
20
26
|
from esgf_qa.con_checks import continuity_checks as cont # noqa
|
|
21
27
|
from esgf_qa.con_checks import dataset_coverage_checks, inter_dataset_consistency_checks
|
|
22
28
|
|
|
23
|
-
checker_dict = {
|
|
24
|
-
"cc6": "CORDEX-CMIP6",
|
|
25
|
-
"cf": "CF-Conventions",
|
|
26
|
-
"mip": "MIP",
|
|
27
|
-
"plugin_cmip6": "CMIP6",
|
|
28
|
-
# "wcrp-cmip5": "CMIP5",
|
|
29
|
-
"wcrp_cmip6": "CMIP6",
|
|
30
|
-
# "wcrp_cmip7": "CMIP7-AFT",
|
|
31
|
-
# "wcrp_cmip7": "CMIP7",
|
|
32
|
-
# "wcrp_cordex": "CORDEX",
|
|
33
|
-
"wcrp_cordex_cmip6": "CORDEX-CMIP6",
|
|
34
|
-
# "obs4mips": "Obs4MIPs",
|
|
35
|
-
# "input4mips": "Input4MIPs",
|
|
36
|
-
}
|
|
37
|
-
DRS_path_parent = {
|
|
38
|
-
"CMIP5": "CMIP5",
|
|
39
|
-
"CMIP6": "CMIP6",
|
|
40
|
-
"CMIP7": "CMIP7",
|
|
41
|
-
"CMIP7-AFT": "CMIP7",
|
|
42
|
-
"CORDEX": "CORDEX",
|
|
43
|
-
"CORDEX-CMIP6": "CORDEX-CMIP6",
|
|
44
|
-
"Obs4MIPs": "Obs4MIPs",
|
|
45
|
-
"Input4MIPs": "Input4MIPs",
|
|
46
|
-
}
|
|
47
|
-
checker_release_versions = {}
|
|
48
|
-
checker_dict_ext = {
|
|
49
|
-
# "pcons": "ParentConsistency"
|
|
50
|
-
"cons": "Consistency",
|
|
51
|
-
"cont": "Continuity",
|
|
52
|
-
"comp": "Compatibility",
|
|
53
|
-
**checker_dict,
|
|
54
|
-
}
|
|
55
|
-
|
|
56
29
|
_timestamp_with_ms = datetime.datetime.now().strftime("%Y%m%d-%H%M%S%f")
|
|
57
30
|
_timestamp_filename = datetime.datetime.strptime(
|
|
58
31
|
_timestamp_with_ms, "%Y%m%d-%H%M%S%f"
|
|
@@ -62,321 +35,15 @@ _timestamp_pprint = datetime.datetime.strptime(
|
|
|
62
35
|
).strftime("%Y-%m-%d %H:%M")
|
|
63
36
|
|
|
64
37
|
|
|
65
|
-
class QAResultAggregator:
|
|
66
|
-
def __init__(self, checker_dict):
|
|
67
|
-
"""
|
|
68
|
-
Initialize the aggregator with an empty summary.
|
|
69
|
-
"""
|
|
70
|
-
self.summary = {
|
|
71
|
-
"error": defaultdict(
|
|
72
|
-
lambda: defaultdict(lambda: defaultdict(list))
|
|
73
|
-
), # No weight, just function -> error msg
|
|
74
|
-
"fail": defaultdict(
|
|
75
|
-
lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
|
76
|
-
), # weight -> test -> msg -> dsid -> filenames
|
|
77
|
-
}
|
|
78
|
-
self.checker_dict = checker_dict
|
|
79
|
-
|
|
80
|
-
def update(self, result_dict, dsid, file_name):
|
|
81
|
-
"""
|
|
82
|
-
Update the summary with a single result of a cc-run.
|
|
83
|
-
"""
|
|
84
|
-
for checker in result_dict:
|
|
85
|
-
for test in result_dict[checker]:
|
|
86
|
-
if test == "errors":
|
|
87
|
-
for function_name, error_msg in result_dict[checker][
|
|
88
|
-
"errors"
|
|
89
|
-
].items():
|
|
90
|
-
self.summary["error"][
|
|
91
|
-
f"[{checker_dict[checker]}] " + function_name
|
|
92
|
-
][error_msg][dsid].append(file_name)
|
|
93
|
-
else:
|
|
94
|
-
score, max_score = result_dict[checker][test]["value"]
|
|
95
|
-
weight = result_dict[checker][test].get("weight", 3)
|
|
96
|
-
msgs = result_dict[checker][test].get("msgs", [])
|
|
97
|
-
if score < max_score: # test outcome: fail
|
|
98
|
-
for msg in msgs:
|
|
99
|
-
self.summary["fail"][weight][
|
|
100
|
-
f"[{checker_dict[checker]}] " + test
|
|
101
|
-
][msg][dsid].append(file_name)
|
|
102
|
-
|
|
103
|
-
def update_ds(self, result_dict, dsid):
|
|
104
|
-
"""
|
|
105
|
-
Update the summary with a single result of a esgf-qa run.
|
|
106
|
-
"""
|
|
107
|
-
for checker in result_dict:
|
|
108
|
-
for test in result_dict[checker]:
|
|
109
|
-
if test == "errors":
|
|
110
|
-
for function_name, errdict in result_dict[checker][
|
|
111
|
-
"errors"
|
|
112
|
-
].items():
|
|
113
|
-
for file_name in errdict["files"]:
|
|
114
|
-
self.summary["error"][
|
|
115
|
-
f"[{checker_dict_ext[checker]}] " + function_name
|
|
116
|
-
][errdict["msg"]][dsid].append(file_name)
|
|
117
|
-
else:
|
|
118
|
-
weight = result_dict[checker][test].get("weight", 3)
|
|
119
|
-
fails = result_dict[checker][test].get("msgs", {})
|
|
120
|
-
for msg, file_names in fails.items():
|
|
121
|
-
for file_name in file_names:
|
|
122
|
-
self.summary["fail"][weight][
|
|
123
|
-
f"[{checker_dict_ext[checker]}] " + test
|
|
124
|
-
][msg][dsid].append(file_name)
|
|
125
|
-
|
|
126
|
-
def sort(self):
|
|
127
|
-
"""
|
|
128
|
-
Sort the summary.
|
|
129
|
-
"""
|
|
130
|
-
self.summary["fail"] = dict(sorted(self.summary["fail"].items(), reverse=True))
|
|
131
|
-
for key in self.summary["fail"]:
|
|
132
|
-
self.summary["fail"][key] = dict(sorted(self.summary["fail"][key].items()))
|
|
133
|
-
|
|
134
|
-
# Sort errors by function name
|
|
135
|
-
for checker in self.summary["error"]:
|
|
136
|
-
self.summary["error"][checker] = dict(
|
|
137
|
-
sorted(self.summary["error"][checker].items())
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
@staticmethod
|
|
141
|
-
def cluster_messages(messages, threshold):
|
|
142
|
-
clusters = []
|
|
143
|
-
while messages:
|
|
144
|
-
base = messages.pop(0)
|
|
145
|
-
cluster = [base]
|
|
146
|
-
to_remove = []
|
|
147
|
-
for msg in messages:
|
|
148
|
-
ratio = difflib.SequenceMatcher(None, base, msg).ratio()
|
|
149
|
-
if ratio >= threshold:
|
|
150
|
-
cluster.append(msg)
|
|
151
|
-
to_remove.append(msg)
|
|
152
|
-
for msg in to_remove:
|
|
153
|
-
messages.remove(msg)
|
|
154
|
-
clusters.append(cluster)
|
|
155
|
-
return clusters
|
|
156
|
-
|
|
157
|
-
@staticmethod
|
|
158
|
-
def generalize_message_group(messages):
|
|
159
|
-
if len(messages) == 1:
|
|
160
|
-
return messages[0], {}
|
|
161
|
-
|
|
162
|
-
# Split messages into tokens
|
|
163
|
-
split_messages = [re.findall(r"\w+|\W", m) for m in messages]
|
|
164
|
-
transposed = list(zip(*split_messages))
|
|
165
|
-
template = []
|
|
166
|
-
placeholders = {}
|
|
167
|
-
var_index = 0
|
|
168
|
-
|
|
169
|
-
for i, tokens in enumerate(transposed):
|
|
170
|
-
unique_tokens = set(tokens)
|
|
171
|
-
if len(unique_tokens) == 1:
|
|
172
|
-
template.append(tokens[0])
|
|
173
|
-
else:
|
|
174
|
-
var_name = chr(ord("A") + var_index)
|
|
175
|
-
template.append(f"{{{var_name}}}")
|
|
176
|
-
placeholders[var_name] = tokens[0]
|
|
177
|
-
var_index += 1
|
|
178
|
-
|
|
179
|
-
# Merge placeholders if possible
|
|
180
|
-
template, placeholders = QAResultAggregator.merge_placeholders(
|
|
181
|
-
template, placeholders
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
# Return the generalized message and the placeholders
|
|
185
|
-
generalized = "".join(template)
|
|
186
|
-
return generalized, placeholders
|
|
187
|
-
|
|
188
|
-
@staticmethod
|
|
189
|
-
def merge_placeholders(list_of_strings, dictionary, skip=0):
|
|
190
|
-
def find_next_two_placeholders(list_of_strings, skip):
|
|
191
|
-
placeholders = [
|
|
192
|
-
s for s in list_of_strings if s.startswith("{") and s.endswith("}")
|
|
193
|
-
]
|
|
194
|
-
if len(placeholders) < 2:
|
|
195
|
-
return None, None
|
|
196
|
-
return placeholders[skip] if len(placeholders) >= skip + 1 else None, (
|
|
197
|
-
placeholders[skip + 1] if len(placeholders) >= skip + 2 else None
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
def extract_text_between_placeholders(
|
|
201
|
-
list_of_strings, placeholder1, placeholder2
|
|
202
|
-
):
|
|
203
|
-
idx1 = list_of_strings.index(placeholder1)
|
|
204
|
-
idx2 = list_of_strings.index(placeholder2)
|
|
205
|
-
return "".join(list_of_strings[idx1 + 1 : idx2])
|
|
206
|
-
|
|
207
|
-
def merge_two_placeholders(
|
|
208
|
-
placeholder1, placeholder2, text_between, dictionary
|
|
209
|
-
):
|
|
210
|
-
new_value = (
|
|
211
|
-
dictionary[placeholder1.lstrip("{").rstrip("}")]
|
|
212
|
-
+ text_between
|
|
213
|
-
+ dictionary[placeholder2.lstrip("{").rstrip("}")]
|
|
214
|
-
)
|
|
215
|
-
dictionary[placeholder1.lstrip("{").rstrip("}")] = new_value
|
|
216
|
-
del dictionary[placeholder2.lstrip("{").rstrip("}")]
|
|
217
|
-
return dictionary
|
|
218
|
-
|
|
219
|
-
def update_placeholder_names(list_of_strings, dictionary):
|
|
220
|
-
old_placeholders = sorted(list(dictionary.keys()))
|
|
221
|
-
new_placeholders = [
|
|
222
|
-
chr(ord("A") + i) for i in range(0, len(old_placeholders))
|
|
223
|
-
]
|
|
224
|
-
new_dictionary = dict(
|
|
225
|
-
zip(new_placeholders, [dictionary[val] for val in old_placeholders])
|
|
226
|
-
)
|
|
227
|
-
for old, new in zip(old_placeholders, new_placeholders):
|
|
228
|
-
list_of_strings = [
|
|
229
|
-
s.replace("{" + old + "}", "{" + new + "}") for s in list_of_strings
|
|
230
|
-
]
|
|
231
|
-
return list_of_strings, new_dictionary
|
|
232
|
-
|
|
233
|
-
def replace_placeholders_with_new_one(
|
|
234
|
-
list_of_strings, placeholder1, placeholder2
|
|
235
|
-
):
|
|
236
|
-
idx1 = list_of_strings.index(placeholder1)
|
|
237
|
-
idx2 = list_of_strings.index(placeholder2)
|
|
238
|
-
list_of_strings_new = list_of_strings[:idx1] + [placeholder1]
|
|
239
|
-
if idx2 < len(list_of_strings) + 1:
|
|
240
|
-
list_of_strings_new += list_of_strings[idx2 + 1 :]
|
|
241
|
-
return list_of_strings_new
|
|
242
|
-
|
|
243
|
-
if not any(s.startswith("{") and s.endswith("}") for s in list_of_strings):
|
|
244
|
-
return list_of_strings, dictionary
|
|
245
|
-
|
|
246
|
-
placeholder1, placeholder2 = find_next_two_placeholders(list_of_strings, skip)
|
|
247
|
-
if placeholder1 is None or placeholder2 is None:
|
|
248
|
-
return list_of_strings, dictionary
|
|
249
|
-
|
|
250
|
-
text_between = extract_text_between_placeholders(
|
|
251
|
-
list_of_strings, placeholder1, placeholder2
|
|
252
|
-
)
|
|
253
|
-
if len(text_between) < 5:
|
|
254
|
-
dictionary = merge_two_placeholders(
|
|
255
|
-
placeholder1, placeholder2, text_between, dictionary
|
|
256
|
-
)
|
|
257
|
-
list_of_strings = replace_placeholders_with_new_one(
|
|
258
|
-
list_of_strings, placeholder1, placeholder2
|
|
259
|
-
)
|
|
260
|
-
list_of_strings, dictionary = update_placeholder_names(
|
|
261
|
-
list_of_strings, dictionary
|
|
262
|
-
)
|
|
263
|
-
return QAResultAggregator.merge_placeholders(
|
|
264
|
-
list_of_strings, dictionary, skip
|
|
265
|
-
)
|
|
266
|
-
else:
|
|
267
|
-
return QAResultAggregator.merge_placeholders(
|
|
268
|
-
list_of_strings, dictionary, skip + 1
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
def cluster_summary(self, threshold=0.75):
|
|
272
|
-
self.clustered_summary = defaultdict(
|
|
273
|
-
lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
|
|
274
|
-
)
|
|
275
|
-
for status in self.summary:
|
|
276
|
-
if status == "error":
|
|
277
|
-
for test_id in self.summary[status]:
|
|
278
|
-
messages = list(self.summary[status][test_id].keys())
|
|
279
|
-
# Pass a copy of messages to cluster_messages to generate clusters
|
|
280
|
-
clusters = QAResultAggregator.cluster_messages(
|
|
281
|
-
messages[:], threshold
|
|
282
|
-
)
|
|
283
|
-
|
|
284
|
-
for cluster in clusters:
|
|
285
|
-
generalized, placeholders = (
|
|
286
|
-
QAResultAggregator.generalize_message_group(cluster)
|
|
287
|
-
)
|
|
288
|
-
example_parts = ", ".join(
|
|
289
|
-
[
|
|
290
|
-
(
|
|
291
|
-
f"{k}='{v[0]}'"
|
|
292
|
-
if isinstance(v, list)
|
|
293
|
-
else f"{k}='{v}'"
|
|
294
|
-
)
|
|
295
|
-
for k, v in placeholders.items()
|
|
296
|
-
]
|
|
297
|
-
)
|
|
298
|
-
if example_parts:
|
|
299
|
-
msg_summary = f"{generalized} ({len(cluster)} occurrences, e.g. {example_parts})"
|
|
300
|
-
else:
|
|
301
|
-
msg_summary = f"{generalized}{' (' + str(len(cluster)) + ' occurrences)' if len(cluster) > 1 else ''}"
|
|
302
|
-
|
|
303
|
-
# Gather all ds_ids and filenames across the cluster
|
|
304
|
-
combined = defaultdict(set)
|
|
305
|
-
for message in cluster:
|
|
306
|
-
for ds_id, files in self.summary[status][test_id][
|
|
307
|
-
message
|
|
308
|
-
].items():
|
|
309
|
-
combined[ds_id].update(files)
|
|
310
|
-
|
|
311
|
-
# Shorten file lists to one example
|
|
312
|
-
formatted = {
|
|
313
|
-
ds_id
|
|
314
|
-
+ " ("
|
|
315
|
-
+ str(len(files))
|
|
316
|
-
+ f" file{'s' if len(files) > 1 else ''} affected)": (
|
|
317
|
-
[f"e.g. '{next(iter(files))}'"]
|
|
318
|
-
if len(files) > 1
|
|
319
|
-
else [f"'{next(iter(files))}'"]
|
|
320
|
-
)
|
|
321
|
-
for ds_id, files in combined.items()
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
self.clustered_summary[status][test_id][msg_summary] = formatted
|
|
325
|
-
elif status == "fail":
|
|
326
|
-
for weight in self.summary[status]:
|
|
327
|
-
for test_id in self.summary[status][weight]:
|
|
328
|
-
messages = list(self.summary[status][weight][test_id].keys())
|
|
329
|
-
# Pass a copy of messages to cluster_messages to generate clusters
|
|
330
|
-
clusters = QAResultAggregator.cluster_messages(
|
|
331
|
-
messages[:], threshold
|
|
332
|
-
)
|
|
333
|
-
|
|
334
|
-
for cluster in clusters:
|
|
335
|
-
generalized, placeholders = (
|
|
336
|
-
QAResultAggregator.generalize_message_group(cluster)
|
|
337
|
-
)
|
|
338
|
-
example_parts = ", ".join(
|
|
339
|
-
[
|
|
340
|
-
(
|
|
341
|
-
f"{k}='{v[0]}'"
|
|
342
|
-
if isinstance(v, list)
|
|
343
|
-
else f"{k}='{v}'"
|
|
344
|
-
)
|
|
345
|
-
for k, v in placeholders.items()
|
|
346
|
-
]
|
|
347
|
-
)
|
|
348
|
-
if example_parts:
|
|
349
|
-
msg_summary = f"{generalized} ({len(cluster)} occurrences, e.g. {example_parts})"
|
|
350
|
-
else:
|
|
351
|
-
msg_summary = f"{generalized}{' (' + str(len(cluster)) + ' occurrences)' if len(cluster) > 1 else ''}"
|
|
352
|
-
|
|
353
|
-
# Gather all ds_ids and filenames across the cluster
|
|
354
|
-
combined = defaultdict(set)
|
|
355
|
-
for message in cluster:
|
|
356
|
-
for ds_id, files in self.summary[status][weight][
|
|
357
|
-
test_id
|
|
358
|
-
][message].items():
|
|
359
|
-
combined[ds_id].update(files)
|
|
360
|
-
|
|
361
|
-
# Shorten file lists to one example
|
|
362
|
-
formatted = {
|
|
363
|
-
ds_id
|
|
364
|
-
+ " ("
|
|
365
|
-
+ str(len(files))
|
|
366
|
-
+ f" file{'s' if len(files) > 1 else ''} affected)": (
|
|
367
|
-
[f"e.g. '{next(iter(files))}'"]
|
|
368
|
-
if len(files) > 1
|
|
369
|
-
else [f"'{next(iter(files))}'"]
|
|
370
|
-
)
|
|
371
|
-
for ds_id, files in combined.items()
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
self.clustered_summary[status][weight][test_id][
|
|
375
|
-
msg_summary
|
|
376
|
-
] = formatted
|
|
377
|
-
|
|
378
|
-
|
|
379
38
|
def get_default_result_dir():
|
|
39
|
+
"""
|
|
40
|
+
Get the default result directory.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
str
|
|
45
|
+
Default result directory.
|
|
46
|
+
"""
|
|
380
47
|
global _timestamp
|
|
381
48
|
global _timestamp_with_ms
|
|
382
49
|
hash_object = hashlib.md5(_timestamp_with_ms.encode())
|
|
@@ -385,7 +52,27 @@ def get_default_result_dir():
|
|
|
385
52
|
+ f"/esgf-qa-results_{_timestamp_filename}_{hash_object.hexdigest()}"
|
|
386
53
|
)
|
|
387
54
|
|
|
55
|
+
|
|
388
56
|
def get_dsid(files_to_check_dict, dataset_files_map_ext, file_path, project_id):
|
|
57
|
+
"""
|
|
58
|
+
Get the dataset id for a file.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
files_to_check_dict : dict
|
|
63
|
+
Dictionary of files to check.
|
|
64
|
+
dataset_files_map_ext : dict
|
|
65
|
+
Dictionary of dataset files.
|
|
66
|
+
file_path : str
|
|
67
|
+
Path to the file.
|
|
68
|
+
project_id : str
|
|
69
|
+
Project id.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
str
|
|
74
|
+
Dataset id.
|
|
75
|
+
"""
|
|
389
76
|
dir_id = files_to_check_dict[file_path]["id_dir"].split("/")
|
|
390
77
|
fn_id = files_to_check_dict[file_path]["id_fn"].split("_")
|
|
391
78
|
if project_id in dir_id:
|
|
@@ -397,7 +84,24 @@ def get_dsid(files_to_check_dict, dataset_files_map_ext, file_path, project_id):
|
|
|
397
84
|
dsid += "." + ".".join(fn_id)
|
|
398
85
|
return dsid
|
|
399
86
|
|
|
87
|
+
|
|
400
88
|
def get_checker_release_versions(checkers, checker_options={}):
|
|
89
|
+
"""
|
|
90
|
+
Get the release versions of the checkers.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
checkers : list
|
|
95
|
+
A list of checkers to get the release versions for.
|
|
96
|
+
checker_options : dict, optional
|
|
97
|
+
A dictionary of options for the checkers.
|
|
98
|
+
Example format: {"cf": {"check_dimension_order": True}}
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
None
|
|
103
|
+
Updates the global dictionary ``checker_release_versions``.
|
|
104
|
+
"""
|
|
401
105
|
global checker_release_versions
|
|
402
106
|
global checker_dict
|
|
403
107
|
global checker_dict_ext
|
|
@@ -419,11 +123,20 @@ def run_compliance_checker(file_path, checkers, checker_options={}):
|
|
|
419
123
|
"""
|
|
420
124
|
Run the compliance checker on a file with the specified checkers and options.
|
|
421
125
|
|
|
422
|
-
Parameters
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
file_path : str
|
|
129
|
+
The path to the file to be checked.
|
|
130
|
+
checkers : list
|
|
131
|
+
A list of checkers to run.
|
|
132
|
+
checker_options : dict, optional
|
|
133
|
+
A dictionary of options for the checkers.
|
|
134
|
+
Example format: {"cf": {"check_dimension_order": True}}
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
dict
|
|
139
|
+
A dictionary containing the results of the compliance checker.
|
|
427
140
|
"""
|
|
428
141
|
check_suite = CheckSuite(options=checker_options)
|
|
429
142
|
check_suite.load_all_available_checkers()
|
|
@@ -458,6 +171,21 @@ def run_compliance_checker(file_path, checkers, checker_options={}):
|
|
|
458
171
|
|
|
459
172
|
|
|
460
173
|
def track_checked_datasets(checked_datasets_file, checked_datasets):
|
|
174
|
+
"""
|
|
175
|
+
Track checked datasets.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
checked_datasets_file : str
|
|
180
|
+
The path to the file to track checked datasets.
|
|
181
|
+
checked_datasets : list
|
|
182
|
+
A list of checked datasets.
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
None
|
|
187
|
+
Writes the checked datasets to the file.
|
|
188
|
+
"""
|
|
461
189
|
with open(checked_datasets_file, "a") as file:
|
|
462
190
|
writer = csv.writer(file)
|
|
463
191
|
for dataset_id in checked_datasets:
|
|
@@ -472,6 +200,29 @@ def process_file(
|
|
|
472
200
|
processed_files,
|
|
473
201
|
progress_file,
|
|
474
202
|
):
|
|
203
|
+
"""
|
|
204
|
+
Runs cc checks for a single file.
|
|
205
|
+
|
|
206
|
+
Parameters
|
|
207
|
+
----------
|
|
208
|
+
file_path : str
|
|
209
|
+
The path to the file to be checked.
|
|
210
|
+
checkers : list
|
|
211
|
+
A list of checkers to run.
|
|
212
|
+
checker_options : dict
|
|
213
|
+
A dictionary of options for the checkers.
|
|
214
|
+
files_to_check_dict : dict
|
|
215
|
+
A special dictionary mapping files to check to datasets.
|
|
216
|
+
processed_files : list
|
|
217
|
+
A list of files that have already been checked.
|
|
218
|
+
progress_file : str
|
|
219
|
+
The path to the progress file.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
tuple
|
|
224
|
+
A tuple containing the file path and the results of the compliance checker.
|
|
225
|
+
"""
|
|
475
226
|
# Read result from disk if check was run previously
|
|
476
227
|
result_file = files_to_check_dict[file_path]["result_file"]
|
|
477
228
|
consistency_file = files_to_check_dict[file_path]["consistency_file"]
|
|
@@ -567,6 +318,31 @@ def process_dataset(
|
|
|
567
318
|
processed_datasets,
|
|
568
319
|
progress_file,
|
|
569
320
|
):
|
|
321
|
+
"""
|
|
322
|
+
Runs esgf_qa checks on a dataset.
|
|
323
|
+
|
|
324
|
+
Parameters
|
|
325
|
+
----------
|
|
326
|
+
ds : str
|
|
327
|
+
Dataset to process.
|
|
328
|
+
ds_map : dict
|
|
329
|
+
Dictionary mapping dataset IDs to file paths.
|
|
330
|
+
checkers : list
|
|
331
|
+
List of checkers to run.
|
|
332
|
+
checker_options : dict
|
|
333
|
+
Dictionary of checker options.
|
|
334
|
+
files_to_check_dict : dict
|
|
335
|
+
A special dictionary mapping files to check to datasets.
|
|
336
|
+
processed_datasets : set
|
|
337
|
+
Set of processed datasets.
|
|
338
|
+
progress_file : str
|
|
339
|
+
Path to progress file.
|
|
340
|
+
|
|
341
|
+
Returns
|
|
342
|
+
-------
|
|
343
|
+
tuple
|
|
344
|
+
Dataset ID and check results.
|
|
345
|
+
"""
|
|
570
346
|
# Read result from disk if check was run previously
|
|
571
347
|
result_file = files_to_check_dict[ds_map[ds][0]]["result_file_ds"]
|
|
572
348
|
if ds in processed_datasets and os.path.isfile(result_file):
|
|
@@ -637,10 +413,14 @@ def parse_options(opts):
|
|
|
637
413
|
is a colon. Adapted from
|
|
638
414
|
https://github.com/ioos/compliance-checker/blob/cbb40ed1981c169b74c954f0775d5bd23005ed23/cchecker.py#L23
|
|
639
415
|
|
|
640
|
-
Parameters
|
|
641
|
-
|
|
416
|
+
Parameters
|
|
417
|
+
----------
|
|
418
|
+
opts : Iterable of strings
|
|
419
|
+
Iterable of option strings
|
|
642
420
|
|
|
643
|
-
Returns
|
|
421
|
+
Returns
|
|
422
|
+
-------
|
|
423
|
+
dict
|
|
644
424
|
Dictionary with keys as checker type (i.e. "mip").
|
|
645
425
|
Each value is a dictionary where keys are checker options and values
|
|
646
426
|
are checker option values or None if not provided.
|
|
@@ -649,21 +429,39 @@ def parse_options(opts):
|
|
|
649
429
|
for opt_str in opts:
|
|
650
430
|
try:
|
|
651
431
|
checker_type, checker_opt, *checker_val = opt_str.split(":", 2)
|
|
652
|
-
checker_val = checker_val[0] if checker_val else
|
|
432
|
+
checker_val = checker_val[0] if checker_val else True
|
|
653
433
|
except ValueError:
|
|
654
434
|
raise ValueError(
|
|
655
435
|
f"Could not split option '{opt_str}', seems illegally formatted. The required format is: '<checker>:<option_name>[:<option_value>]', eg. 'mip:tables:/path/to/Tables'."
|
|
656
436
|
)
|
|
657
|
-
if checker_type != "mip":
|
|
658
|
-
raise ValueError(
|
|
659
|
-
f"Currently, only options for 'mip' checker are supported, got '{checker_type}'."
|
|
660
|
-
)
|
|
661
437
|
options_dict[checker_type][checker_opt] = checker_val
|
|
662
438
|
return options_dict
|
|
663
439
|
|
|
664
440
|
|
|
441
|
+
def _verify_options_dict(options):
|
|
442
|
+
"""
|
|
443
|
+
Helper function to verify that the options dictionary is correctly formatted.
|
|
444
|
+
"""
|
|
445
|
+
if not isinstance(options, dict):
|
|
446
|
+
return False
|
|
447
|
+
if options == {}:
|
|
448
|
+
return True
|
|
449
|
+
try:
|
|
450
|
+
for checker_type in options.keys():
|
|
451
|
+
for checker_opt in options[checker_type].keys():
|
|
452
|
+
checker_val = options[checker_type][checker_opt]
|
|
453
|
+
if not isinstance(checker_val, (int, float, str, bool, type(None))):
|
|
454
|
+
return False
|
|
455
|
+
except (AttributeError, KeyError):
|
|
456
|
+
return False
|
|
457
|
+
# Seems to match the required format
|
|
458
|
+
return True
|
|
459
|
+
|
|
460
|
+
|
|
665
461
|
def main():
|
|
666
|
-
|
|
462
|
+
"""
|
|
463
|
+
CLI entry point.
|
|
464
|
+
"""
|
|
667
465
|
parser = argparse.ArgumentParser(description="Run QA checks")
|
|
668
466
|
parser.add_argument(
|
|
669
467
|
"parent_dir",
|
|
@@ -729,39 +527,51 @@ def main():
|
|
|
729
527
|
# Resume information stored in a json file
|
|
730
528
|
resume_info_file = Path(result_dir, ".resume_info")
|
|
731
529
|
|
|
530
|
+
# Do not allow arguments other than -o/--output_dir, -i/--info and -r/--resume if resuming previous QA run
|
|
531
|
+
if resume:
|
|
532
|
+
allowed_with_resume = {"output_dir", "info", "resume"}
|
|
533
|
+
# Convert Namespace to dict for easier checking
|
|
534
|
+
set_args = {k for k, v in vars(args).items() if v not in (None, False, [], "")}
|
|
535
|
+
invalid_args = set_args - allowed_with_resume
|
|
536
|
+
if invalid_args:
|
|
537
|
+
parser.error(
|
|
538
|
+
f"When using -r/--resume, only -o/--output_dir and -i/--info can be set. Invalid: {', '.join(invalid_args)}"
|
|
539
|
+
)
|
|
540
|
+
|
|
732
541
|
# Deal with result_dir
|
|
733
542
|
if not os.path.exists(result_dir):
|
|
734
543
|
if resume:
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
"Resume is set but specified output_directory does not exist. Starting a new QA run..."
|
|
544
|
+
raise FileNotFoundError(
|
|
545
|
+
f"Resume is set but specified output_directory does not exist: '{result_dir}'."
|
|
738
546
|
)
|
|
739
547
|
os.mkdir(result_dir)
|
|
740
548
|
elif os.listdir(result_dir) != []:
|
|
549
|
+
required_files = [progress_file, resume_info_file]
|
|
550
|
+
required_paths = [os.path.join(result_dir, p) for p in ["tables"]]
|
|
741
551
|
if resume:
|
|
742
|
-
required_files = [progress_file, resume_info_file]
|
|
743
|
-
required_paths = [os.path.join(result_dir, p) for p in ["tables"]]
|
|
744
552
|
if not all(os.path.isfile(rfile) for rfile in required_files) or not all(
|
|
745
553
|
os.path.isdir(rpath) for rpath in required_paths
|
|
746
554
|
):
|
|
747
555
|
raise Exception(
|
|
748
|
-
"Resume is set but specified output_directory cannot be identified as
|
|
556
|
+
"Resume is set but specified output_directory cannot be identified as output directory of a previous QA run."
|
|
749
557
|
)
|
|
750
558
|
else:
|
|
751
|
-
if
|
|
752
|
-
|
|
753
|
-
)
|
|
559
|
+
if all(os.path.isfile(rfile) for rfile in required_files) and all(
|
|
560
|
+
os.path.isdir(rpath) for rpath in required_paths
|
|
561
|
+
):
|
|
754
562
|
raise Exception(
|
|
755
|
-
"Specified
|
|
563
|
+
"Specified output directory is not empty but can be identified as output directory of a previous QA run. Use'-r' or '--resume' (together with '-o' or '--output_dir') to continue the previous QA run or choose a different output_directory instead."
|
|
756
564
|
)
|
|
757
565
|
else:
|
|
758
|
-
raise Exception("Specified
|
|
566
|
+
raise Exception("Specified output directory is not empty.")
|
|
759
567
|
else:
|
|
760
568
|
if resume:
|
|
761
569
|
resume = False
|
|
762
|
-
|
|
763
|
-
"Resume is set but specified
|
|
570
|
+
raise FileNotFoundError(
|
|
571
|
+
f"Resume is set but specified output directory is empty: '{result_dir}'."
|
|
764
572
|
)
|
|
573
|
+
|
|
574
|
+
# When resuming previous QA run
|
|
765
575
|
if resume:
|
|
766
576
|
print(f"Resuming previous QA run in '{result_dir}'")
|
|
767
577
|
with open(os.path.join(result_dir, ".resume_info")) as f:
|
|
@@ -770,58 +580,54 @@ def main():
|
|
|
770
580
|
required_keys = ["parent_dir", "info", "tests"]
|
|
771
581
|
if not all(key in resume_info for key in required_keys):
|
|
772
582
|
raise Exception(
|
|
773
|
-
"Invalid .resume_info file. It should contain the keys 'parent_dir', 'info', and 'tests'."
|
|
583
|
+
f"Invalid .resume_info file in '{result_dir}'. It should contain the keys 'parent_dir', 'info', and 'tests'."
|
|
774
584
|
)
|
|
775
585
|
if not (
|
|
776
586
|
isinstance(resume_info["parent_dir"], str)
|
|
777
587
|
and isinstance(resume_info["info"], str)
|
|
778
588
|
and isinstance(resume_info["tests"], list)
|
|
589
|
+
and isinstance(resume_info.get("cl_checker_options", {}), dict)
|
|
590
|
+
and isinstance(
|
|
591
|
+
resume_info.get("include_consistency_checks", False), bool
|
|
592
|
+
)
|
|
593
|
+
and _verify_options_dict(resume_info.get("cl_checker_options", {}))
|
|
779
594
|
and all(isinstance(test, str) for test in resume_info["tests"])
|
|
780
595
|
):
|
|
781
596
|
raise Exception(
|
|
782
|
-
"Invalid .resume_info file. 'parent_dir' and 'info' should be strings, and 'tests' should be a list of strings."
|
|
597
|
+
f"Invalid .resume_info file in '{result_dir}'. 'parent_dir' and 'info' should be strings, and 'tests' should be a list of strings. "
|
|
598
|
+
"'cl_checker_options' (optional) should be a nested dictionary of format 'checker:option_name:option_value', and "
|
|
599
|
+
"'include_consistency_checks' (optional) should be a boolean."
|
|
783
600
|
)
|
|
784
601
|
except json.JSONDecodeError:
|
|
785
602
|
raise Exception(
|
|
786
|
-
"Invalid .resume_info file. It
|
|
603
|
+
f"Invalid .resume_info file in '{result_dir}'. It needs to be a valid JSON file."
|
|
787
604
|
)
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
else:
|
|
791
|
-
tests = resume_info["tests"]
|
|
605
|
+
tests = resume_info["tests"]
|
|
606
|
+
parent_dir = resume_info["parent_dir"]
|
|
792
607
|
if info and info != resume_info["info"]:
|
|
793
608
|
warnings.warn(
|
|
794
609
|
f"<info> argument differs from the originally specified <info> argument ('{resume_info['info']}'). Using the new specification."
|
|
795
610
|
)
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
"Cannot resume a previous QA run with different <parent_dir>."
|
|
801
|
-
)
|
|
802
|
-
if cl_checker_options and cl_checker_options != resume_info.get(
|
|
803
|
-
"checker_options", {}
|
|
804
|
-
):
|
|
805
|
-
raise Exception(
|
|
806
|
-
"Cannot resume a previous QA run with different <option> arguments."
|
|
807
|
-
)
|
|
808
|
-
else:
|
|
809
|
-
parent_dir = Path(resume_info["parent_dir"])
|
|
810
|
-
if "include_consistency_checks" in resume_info:
|
|
811
|
-
include_consistency_checks = resume_info["include_consistency_checks"]
|
|
611
|
+
cl_checker_options = resume_info.get("checker_options", {})
|
|
612
|
+
include_consistency_checks = resume_info.get(
|
|
613
|
+
"include_consistency_checks", False
|
|
614
|
+
)
|
|
812
615
|
else:
|
|
813
616
|
print(f"Storing check results in '{result_dir}'")
|
|
814
617
|
|
|
815
618
|
# Deal with tests
|
|
816
619
|
if not tests:
|
|
817
|
-
checkers = ["
|
|
818
|
-
checkers_versions = {"
|
|
620
|
+
checkers = ["cf"]
|
|
621
|
+
checkers_versions = {"cf": "latest"}
|
|
819
622
|
checker_options = defaultdict(dict)
|
|
820
623
|
else:
|
|
821
|
-
|
|
624
|
+
# Require versions to be specified:
|
|
625
|
+
# test_regex = re.compile(r"^[a-z0-9_]+:(latest|[0-9]+(\.[0-9]+)*)$")
|
|
626
|
+
# Allow versions to be ommitted:
|
|
627
|
+
test_regex = re.compile(r"^[a-z0-9_]+(?::(latest|[0-9]+(?:\.[0-9]+)*))?$")
|
|
822
628
|
if not all([test_regex.match(test) for test in tests]):
|
|
823
629
|
raise Exception(
|
|
824
|
-
f"Invalid test(s) specified. Please specify tests in the format 'checker_name:version'. Currently supported are: {', '.join(list(checker_dict.keys()))}, eerie."
|
|
630
|
+
f"Invalid test(s) specified. Please specify tests in the format 'checker_name' or'checker_name:version'. Currently supported are: {', '.join(list(checker_dict.keys()))}, eerie."
|
|
825
631
|
)
|
|
826
632
|
checkers = [test.split(":")[0] for test in tests]
|
|
827
633
|
if sorted(checkers) != sorted(list(set(checkers))):
|
|
@@ -849,11 +655,9 @@ def main():
|
|
|
849
655
|
if "eerie" in checkers_versions:
|
|
850
656
|
checkers_versions["mip"] = "latest"
|
|
851
657
|
del checkers_versions["eerie"]
|
|
852
|
-
if "
|
|
853
|
-
cl_checker_options["mip"]
|
|
854
|
-
|
|
855
|
-
]
|
|
856
|
-
elif "tables" not in cl_checker_options["mip"]:
|
|
658
|
+
if "eerie" in cl_checker_options:
|
|
659
|
+
cl_checker_options["mip"] = cl_checker_options.pop("eerie")
|
|
660
|
+
if "tables" not in cl_checker_options["mip"]:
|
|
857
661
|
cl_checker_options["mip"][
|
|
858
662
|
"tables"
|
|
859
663
|
] = "/work/bm0021/cmor_tables/eerie_cmor_tables/Tables"
|
|
@@ -887,7 +691,7 @@ def main():
|
|
|
887
691
|
if cl_checker_options:
|
|
888
692
|
resume_info["checker_options"] = cl_checker_options
|
|
889
693
|
with open(os.path.join(result_dir, ".resume_info"), "w") as f:
|
|
890
|
-
json.dump(resume_info, f)
|
|
694
|
+
json.dump(resume_info, f, sort_keys=True, indent=4)
|
|
891
695
|
|
|
892
696
|
# If only cf checker is selected, run cc6 time checks only
|
|
893
697
|
if (
|
|
@@ -907,8 +711,9 @@ def main():
|
|
|
907
711
|
|
|
908
712
|
DRS_parent = "CORDEX-CMIP6"
|
|
909
713
|
for cname in checkers:
|
|
910
|
-
|
|
911
|
-
|
|
714
|
+
DRS_parent_tmp = DRS_path_parent.get(
|
|
715
|
+
checker_dict.get(cname.split(":")[0], ""), ""
|
|
716
|
+
)
|
|
912
717
|
if DRS_parent_tmp:
|
|
913
718
|
DRS_parent = DRS_parent_tmp
|
|
914
719
|
break
|
|
@@ -1027,14 +832,14 @@ def main():
|
|
|
1027
832
|
dataset_files_map[files_to_check_dict[file_path]["id"]] = [file_path]
|
|
1028
833
|
checker_options[file_path] = {
|
|
1029
834
|
"mip": {
|
|
1030
|
-
**cl_checker_options
|
|
835
|
+
**cl_checker_options.get("mip", {}),
|
|
1031
836
|
"consistency_output": files_to_check_dict[file_path][
|
|
1032
837
|
"consistency_file"
|
|
1033
838
|
],
|
|
1034
839
|
"time_checks_only": time_checks_only,
|
|
1035
840
|
},
|
|
1036
841
|
"cc6": {
|
|
1037
|
-
**cl_checker_options
|
|
842
|
+
**cl_checker_options.get("cc6", {}),
|
|
1038
843
|
"consistency_output": files_to_check_dict[file_path][
|
|
1039
844
|
"consistency_file"
|
|
1040
845
|
],
|
|
@@ -1046,15 +851,32 @@ def main():
|
|
|
1046
851
|
"time_checks_only": time_checks_only,
|
|
1047
852
|
},
|
|
1048
853
|
"cf:": {
|
|
1049
|
-
**cl_checker_options
|
|
854
|
+
**cl_checker_options.get("cf", {}),
|
|
1050
855
|
"enable_appendix_a_checks": True,
|
|
1051
856
|
},
|
|
857
|
+
"wcrp_cmip6": {
|
|
858
|
+
**cl_checker_options.get("wcrp_cmip6", {}),
|
|
859
|
+
"consistency_output": files_to_check_dict[file_path][
|
|
860
|
+
"consistency_file"
|
|
861
|
+
],
|
|
862
|
+
},
|
|
863
|
+
"wcrp_cordex_cmip6": {
|
|
864
|
+
**cl_checker_options.get("wcrp_cordex_cmip6", {}),
|
|
865
|
+
"consistency_output": files_to_check_dict[file_path][
|
|
866
|
+
"consistency_file"
|
|
867
|
+
],
|
|
868
|
+
"tables_dir": result_dir + "/tables",
|
|
869
|
+
"force_table_download": file_path == files_to_check[0]
|
|
870
|
+
and (
|
|
871
|
+
not resume or (resume and os.listdir(result_dir + "/tables") == [])
|
|
872
|
+
),
|
|
873
|
+
},
|
|
1052
874
|
}
|
|
1053
875
|
checker_options[file_path].update(
|
|
1054
876
|
{
|
|
1055
877
|
k: v
|
|
1056
878
|
for k, v in cl_checker_options.items()
|
|
1057
|
-
if k not in ["cc6", "cf", "mip"]
|
|
879
|
+
if k not in ["cc6", "cf", "mip", "wcrp_cmip6", "wcrp_cordex_cmip6"]
|
|
1058
880
|
}
|
|
1059
881
|
)
|
|
1060
882
|
|
|
@@ -1090,7 +912,8 @@ def main():
|
|
|
1090
912
|
print()
|
|
1091
913
|
|
|
1092
914
|
# Initialize the summary
|
|
1093
|
-
summary = QAResultAggregator(
|
|
915
|
+
summary = QAResultAggregator()
|
|
916
|
+
reference_ds_dict = {}
|
|
1094
917
|
|
|
1095
918
|
# Calculate the number of processes
|
|
1096
919
|
num_processes = max(multiprocessing.cpu_count() - 4, 1)
|
|
@@ -1140,8 +963,14 @@ def main():
|
|
|
1140
963
|
|
|
1141
964
|
# Skip continuity and consistency checks if no cc6/mip checks were run
|
|
1142
965
|
# (and thus no consistency output file was created)
|
|
1143
|
-
if
|
|
1144
|
-
|
|
966
|
+
if (
|
|
967
|
+
"cc6:latest" in checkers
|
|
968
|
+
or "mip:latest" in checkers
|
|
969
|
+
or "wcrp_cmip6:1.0" in checkers
|
|
970
|
+
or "wcrp_cmip6:latest" in checkers
|
|
971
|
+
or "wcrp_cordex_cmip6:1.0" in checkers
|
|
972
|
+
or "wcrp_cordex_cmip6:latest" in checkers
|
|
973
|
+
):
|
|
1145
974
|
#########################################################
|
|
1146
975
|
# QA Part 2 - Run all consistency & continuity checks
|
|
1147
976
|
#########################################################
|
|
@@ -1252,7 +1081,7 @@ def main():
|
|
|
1252
1081
|
"parent_dir": str(parent_dir),
|
|
1253
1082
|
}
|
|
1254
1083
|
# Add reference datasets for inter-dataset consistency checks
|
|
1255
|
-
if
|
|
1084
|
+
if reference_ds_dict:
|
|
1256
1085
|
summary_info["inter_ds_con_checks_ref"] = reference_ds_dict
|
|
1257
1086
|
|
|
1258
1087
|
dsid_common_prefix = os.path.commonprefix(list(dataset_files_map.keys()))
|