pydpm_xl 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_dpm/__init__.py +1 -1
- py_dpm/api/__init__.py +23 -51
- py_dpm/api/dpm/__init__.py +2 -2
- py_dpm/api/dpm/instance.py +111 -0
- py_dpm/api/dpm_xl/__init__.py +10 -2
- py_dpm/api/dpm_xl/ast_generator.py +690 -10
- py_dpm/api/dpm_xl/complete_ast.py +54 -565
- py_dpm/api/{dpm → dpm_xl}/operation_scopes.py +2 -2
- py_dpm/cli/main.py +1 -1
- py_dpm/dpm/models.py +5 -1
- py_dpm/dpm/queries/tables.py +24 -4
- py_dpm/dpm_xl/ast/operands.py +3 -3
- py_dpm/instance/__init__.py +0 -0
- py_dpm/instance/instance.py +265 -0
- {pydpm_xl-0.2.1.dist-info → pydpm_xl-0.2.3.dist-info}/METADATA +1 -1
- {pydpm_xl-0.2.1.dist-info → pydpm_xl-0.2.3.dist-info}/RECORD +20 -24
- py_dpm/api/explorer.py +0 -4
- py_dpm/api/semantic.py +0 -56
- py_dpm/dpm_xl/validation/__init__.py +0 -12
- py_dpm/dpm_xl/validation/generation_utils.py +0 -428
- py_dpm/dpm_xl/validation/property_constraints.py +0 -225
- py_dpm/dpm_xl/validation/utils.py +0 -98
- py_dpm/dpm_xl/validation/variants.py +0 -359
- {pydpm_xl-0.2.1.dist-info → pydpm_xl-0.2.3.dist-info}/WHEEL +0 -0
- {pydpm_xl-0.2.1.dist-info → pydpm_xl-0.2.3.dist-info}/entry_points.txt +0 -0
- {pydpm_xl-0.2.1.dist-info → pydpm_xl-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {pydpm_xl-0.2.1.dist-info → pydpm_xl-0.2.3.dist-info}/top_level.txt +0 -0
|
@@ -1,428 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from itertools import groupby
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from py_dpm.dpm_xl.utils.tokens import CELL_COMPONENTS, COLUMN, COLUMN_CODE, EXISTENCE_REPORT, \
|
|
7
|
-
HIERARCHY_REPORT, ROW, ROW_CODE, SHEET, SHEET_CODE, \
|
|
8
|
-
SIGN_REPORT
|
|
9
|
-
# from py_dpm.ValidationsGeneration.Utils import ExternalDataExistence, ExternalDataHierarchies, ExternalDataSign
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def from_generate_to_response(validations):
|
|
13
|
-
"""
|
|
14
|
-
"""
|
|
15
|
-
response_validations = {}
|
|
16
|
-
for validation in validations:
|
|
17
|
-
if validation['expression'] not in response_validations:
|
|
18
|
-
response_validations[validation['expression']] = validation
|
|
19
|
-
else:
|
|
20
|
-
for op_code in validation['operation_code']:
|
|
21
|
-
if op_code not in response_validations[validation['expression']]['operation_code']:
|
|
22
|
-
response_validations[validation['expression']]['operation_code'].append(op_code)
|
|
23
|
-
if validation['subcategory_id'] not in response_validations[validation['expression']]['subcategory_id']:
|
|
24
|
-
response_validations[validation['expression']]['subcategory_id'].append(validation['subcategory_id'][0])
|
|
25
|
-
response_validations[validation['expression']]['subcategory_code'].append(validation['subcategory_code'][0])
|
|
26
|
-
response_validations_list = list(response_validations.values())
|
|
27
|
-
return response_validations_list
|
|
28
|
-
|
|
29
|
-
def generate_report_validation_view(validations, report_type):
|
|
30
|
-
report_type_list = [HIERARCHY_REPORT, SIGN_REPORT, EXISTENCE_REPORT]
|
|
31
|
-
if report_type not in report_type_list:
|
|
32
|
-
raise ValueError(f"report_type must be one of {report_type_list}")
|
|
33
|
-
#
|
|
34
|
-
if report_type == HIERARCHY_REPORT:
|
|
35
|
-
external_data = ExternalDataHierarchies()
|
|
36
|
-
# TODO: Check this
|
|
37
|
-
proposed = external_data.proposed_rules[external_data.proposed_rules['Type'] == 'Hierarchy']
|
|
38
|
-
rejected = external_data.rejected_rules[external_data.proposed_rules['Type'] == 'Hierarchy']
|
|
39
|
-
elif report_type == SIGN_REPORT:
|
|
40
|
-
external_data = ExternalDataSign()
|
|
41
|
-
proposed = external_data.proposed_rules
|
|
42
|
-
rejected = external_data.rejected_rules
|
|
43
|
-
else:
|
|
44
|
-
external_data = ExternalDataExistence()
|
|
45
|
-
proposed = external_data.proposed_rules
|
|
46
|
-
rejected = external_data.rejected_rules
|
|
47
|
-
#
|
|
48
|
-
matched = {"number_validations": 0}
|
|
49
|
-
unmatched = {"number_validations": 0}
|
|
50
|
-
review = {"number_validations": 0}
|
|
51
|
-
|
|
52
|
-
#
|
|
53
|
-
for validation in validations:
|
|
54
|
-
valdict = {
|
|
55
|
-
"operation_code": validation['operation_code'],
|
|
56
|
-
"expression": validation['expression'],
|
|
57
|
-
# "parent_id": validation['parent_id'],
|
|
58
|
-
"status": validation['status']
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
if valdict['status'] != 'Correct':
|
|
62
|
-
if valdict['expression'] in review:
|
|
63
|
-
# review[valdict['expression']]['subcategories'].append(validation['subcategory_id'])
|
|
64
|
-
pass
|
|
65
|
-
else:
|
|
66
|
-
review[valdict['expression']] = valdict
|
|
67
|
-
# review[valdict['expression']]['subcategories'] = [validation['subcategory_id']]
|
|
68
|
-
|
|
69
|
-
review['number_validations'] += 1
|
|
70
|
-
|
|
71
|
-
elif valdict['operation_code'] != []:
|
|
72
|
-
if valdict['expression'] in matched:
|
|
73
|
-
pass
|
|
74
|
-
# matched[valdict['expression']]['subcategories'].append(validation['subcategory_id'])
|
|
75
|
-
else:
|
|
76
|
-
matched[valdict['expression']] = valdict
|
|
77
|
-
# matched[valdict['expression']]['subcategories'] = [validation['subcategory_id']]
|
|
78
|
-
|
|
79
|
-
matched['number_validations'] += 1
|
|
80
|
-
else:
|
|
81
|
-
if valdict['expression'] in unmatched:
|
|
82
|
-
pass
|
|
83
|
-
# unmatched[valdict['expression']]['subcategories'].append(validation['subcategory_id'])
|
|
84
|
-
else:
|
|
85
|
-
unmatched[valdict['expression']] = valdict
|
|
86
|
-
# unmatched[valdict['expression']]['subcategories'] = [validation['subcategory_id']]
|
|
87
|
-
unmatched['number_validations'] += 1
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
matched_codes = []
|
|
91
|
-
for val in matched:
|
|
92
|
-
if val != 'number_validations':
|
|
93
|
-
matched_codes += matched[val]['operation_code']
|
|
94
|
-
|
|
95
|
-
# proposed = external_hierarchy_data.proposed_rules[external_hierarchy_data.proposed_rules['Type'] == 'Hierarchy']
|
|
96
|
-
# rejected = external_hierarchy_data.rejected_rules[external_hierarchy_data.proposed_rules['Type'] == 'Hierarchy']
|
|
97
|
-
|
|
98
|
-
proposed_not_generated = proposed[~proposed['ID'].isin(matched_codes)]
|
|
99
|
-
rejected_not_generated = rejected[~rejected['ID'].isin(matched_codes)]
|
|
100
|
-
|
|
101
|
-
with open('./development/data/' + report_type + '_matched.json', 'w') as fp:
|
|
102
|
-
json.dump(matched, fp, indent=4)
|
|
103
|
-
with open('./development/data/' + report_type + '_unmatched.json', 'w') as fp:
|
|
104
|
-
json.dump(unmatched, fp, indent=4)
|
|
105
|
-
with open('./development/data/' + report_type + '_review.json', 'w') as fp:
|
|
106
|
-
json.dump(review, fp, indent=4)
|
|
107
|
-
|
|
108
|
-
proposed_not_generated.to_csv('./development/data/' + report_type + '_proposed_not_generated.csv', index=False)
|
|
109
|
-
rejected_not_generated.to_csv('./development/data/' + report_type + '_rejected_not_generated.csv', index=False)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
class ValidationsGenerationUtils:
|
|
113
|
-
"""
|
|
114
|
-
Class with common methods used by the different generation processes of validations
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
@classmethod
|
|
118
|
-
def get_components_to_group(cls, datapoints_variable: pd.DataFrame):
|
|
119
|
-
"""
|
|
120
|
-
Gets cell components to group by
|
|
121
|
-
:param datapoints_variable: dataframe with datapoints to group
|
|
122
|
-
:return a list with cell components to group by
|
|
123
|
-
"""
|
|
124
|
-
component_values = datapoints_variable[CELL_COMPONENTS]
|
|
125
|
-
components_to_group = []
|
|
126
|
-
components_to_check = []
|
|
127
|
-
for cell_component in CELL_COMPONENTS:
|
|
128
|
-
if not component_values[cell_component].isnull().all():
|
|
129
|
-
components_to_check.append(cell_component)
|
|
130
|
-
|
|
131
|
-
if len(components_to_check):
|
|
132
|
-
|
|
133
|
-
if len(components_to_check) == 1:
|
|
134
|
-
return []
|
|
135
|
-
|
|
136
|
-
for cell_component in components_to_check:
|
|
137
|
-
duplicated = component_values.duplicated(
|
|
138
|
-
subset=[comp for comp in components_to_check if comp != cell_component],
|
|
139
|
-
keep=False)
|
|
140
|
-
if not duplicated.all():
|
|
141
|
-
components_to_group.append(cell_component)
|
|
142
|
-
return components_to_group
|
|
143
|
-
|
|
144
|
-
@classmethod
|
|
145
|
-
def group_cells(cls, datapoints_variable: pd.DataFrame, datapoints_table: pd.DataFrame):
|
|
146
|
-
"""
|
|
147
|
-
Get the cell groups from datapoints by grouping them when necessary
|
|
148
|
-
:param datapoints_variable: datapoints of the variable
|
|
149
|
-
:param datapoints_table: datapoints of the table associated which the table code of operand
|
|
150
|
-
:return a list with rows, cols and sheets for each group
|
|
151
|
-
"""
|
|
152
|
-
components_to_group = cls.get_components_to_group(datapoints_variable=datapoints_variable)
|
|
153
|
-
result_lst = []
|
|
154
|
-
if not len(components_to_group) or len(components_to_group) > 2:
|
|
155
|
-
rows, cols, sheets = cls.group_cell_components(datapoints_variable=datapoints_variable,
|
|
156
|
-
datapoints_table=datapoints_table)
|
|
157
|
-
result_lst.append((rows, cols, sheets))
|
|
158
|
-
|
|
159
|
-
elif len(components_to_group) == 1:
|
|
160
|
-
for key, group_df in datapoints_variable.groupby(components_to_group[0], dropna=False):
|
|
161
|
-
rows, cols, sheets = cls.group_cell_components(datapoints_variable=group_df,
|
|
162
|
-
datapoints_table=datapoints_table)
|
|
163
|
-
result_lst.append((rows, cols, sheets))
|
|
164
|
-
else:
|
|
165
|
-
ref_component = components_to_group[0]
|
|
166
|
-
second_group_component = components_to_group[1]
|
|
167
|
-
third_component = [component for component in CELL_COMPONENTS if component not in components_to_group][0]
|
|
168
|
-
|
|
169
|
-
reference_prefix = ROW if ref_component == ROW_CODE else COLUMN if ref_component == COLUMN_CODE else SHEET
|
|
170
|
-
second_component_prefix = ROW if second_group_component == ROW_CODE else COLUMN if second_group_component == COLUMN_CODE else SHEET
|
|
171
|
-
third_component_prefix = ROW if third_component == ROW_CODE else COLUMN if third_component == COLUMN_CODE else SHEET
|
|
172
|
-
|
|
173
|
-
datapoints_variable = datapoints_variable.sort_values([ref_component, second_group_component])
|
|
174
|
-
|
|
175
|
-
components_dict = {}
|
|
176
|
-
for value in datapoints_variable[ref_component].unique().tolist():
|
|
177
|
-
components_dict[value] = datapoints_variable[datapoints_variable[ref_component] == value][
|
|
178
|
-
second_group_component].unique().tolist()
|
|
179
|
-
|
|
180
|
-
# group reference component values by second group component values
|
|
181
|
-
for keys_values, group_values in groupby(components_dict.items(), key=lambda x: sorted(x[1])):
|
|
182
|
-
group_values = [v[0] for v in group_values]
|
|
183
|
-
reference_component_grouping = cls.group_cell_component_elements(reference_prefix, group_values,
|
|
184
|
-
datapoints_table[ref_component])
|
|
185
|
-
second_component_grouping = cls.group_cell_component_elements(second_component_prefix, keys_values,
|
|
186
|
-
datapoints_table[second_group_component])
|
|
187
|
-
third_component_grouping = None
|
|
188
|
-
if not datapoints_variable[third_component].isnull().all():
|
|
189
|
-
third_values = datapoints_variable[
|
|
190
|
-
datapoints_variable[ref_component].isin(group_values) & datapoints_variable[
|
|
191
|
-
second_group_component].isin(keys_values)][third_component].unique().tolist()
|
|
192
|
-
third_component_grouping = cls.group_cell_component_elements(third_component_prefix, third_values,
|
|
193
|
-
datapoints_table[third_component])
|
|
194
|
-
|
|
195
|
-
rows = reference_component_grouping if reference_prefix == ROW else second_component_grouping if second_component_prefix == ROW else third_component_grouping
|
|
196
|
-
cols = reference_component_grouping if reference_prefix == COLUMN else second_component_grouping if second_component_prefix == COLUMN else third_component_grouping
|
|
197
|
-
sheets = reference_component_grouping if reference_prefix == SHEET else second_component_grouping if second_component_prefix == SHEET else third_component_grouping
|
|
198
|
-
result_lst.append((rows, cols, sheets))
|
|
199
|
-
|
|
200
|
-
return result_lst
|
|
201
|
-
|
|
202
|
-
@classmethod
|
|
203
|
-
def group_cells_test(cls, datapoints_variable: pd.DataFrame, datapoints_table: pd.DataFrame):
|
|
204
|
-
"""
|
|
205
|
-
Get the cell groups from datapoints by grouping them when necessary
|
|
206
|
-
:param datapoints_variable: datapoints of the variable
|
|
207
|
-
:param datapoints_table: datapoints of the table associated which the table code of operand
|
|
208
|
-
:return a list with rows, cols and sheets for each group
|
|
209
|
-
"""
|
|
210
|
-
components_to_group = cls.get_components_to_group(datapoints_variable=datapoints_variable)
|
|
211
|
-
result_lst = []
|
|
212
|
-
if not len(components_to_group) or len(components_to_group) > 2:
|
|
213
|
-
is_several_vals, filtered_df=cls._several_validations_checker(datapoints_variable)
|
|
214
|
-
if is_several_vals:
|
|
215
|
-
for df in filtered_df:
|
|
216
|
-
rows, cols, sheets = cls.group_cell_components(datapoints_variable=df,
|
|
217
|
-
datapoints_table=datapoints_table)
|
|
218
|
-
result_lst.append((rows, cols, sheets, df['cell_id'].to_list()))
|
|
219
|
-
else:
|
|
220
|
-
rows, cols, sheets = cls.group_cell_components(datapoints_variable=datapoints_variable,
|
|
221
|
-
datapoints_table=datapoints_table)
|
|
222
|
-
result_lst.append((rows, cols, sheets, datapoints_variable['cell_id'].to_list()))
|
|
223
|
-
|
|
224
|
-
elif len(components_to_group) == 1:
|
|
225
|
-
for key, group_df in datapoints_variable.groupby(components_to_group[0], dropna=False):
|
|
226
|
-
rows, cols, sheets = cls.group_cell_components(datapoints_variable=group_df,
|
|
227
|
-
datapoints_table=datapoints_table)
|
|
228
|
-
result_lst.append((rows, cols, sheets, group_df['cell_id'].to_list()))
|
|
229
|
-
else:
|
|
230
|
-
ref_component = components_to_group[0]
|
|
231
|
-
second_group_component = components_to_group[1]
|
|
232
|
-
third_component = [component for component in CELL_COMPONENTS if component not in components_to_group][0]
|
|
233
|
-
|
|
234
|
-
reference_prefix = ROW if ref_component == ROW_CODE else COLUMN if ref_component == COLUMN_CODE else SHEET
|
|
235
|
-
second_component_prefix = ROW if second_group_component == ROW_CODE else COLUMN if second_group_component == COLUMN_CODE else SHEET
|
|
236
|
-
third_component_prefix = ROW if third_component == ROW_CODE else COLUMN if third_component == COLUMN_CODE else SHEET
|
|
237
|
-
|
|
238
|
-
datapoints_variable = datapoints_variable.sort_values([ref_component, second_group_component])
|
|
239
|
-
|
|
240
|
-
components_dict = {}
|
|
241
|
-
for value in datapoints_variable[ref_component].unique().tolist():
|
|
242
|
-
components_dict[value] = datapoints_variable[datapoints_variable[ref_component] == value][
|
|
243
|
-
second_group_component].unique().tolist()
|
|
244
|
-
|
|
245
|
-
# group reference component values by second group component values
|
|
246
|
-
for keys_values, group_values in groupby(components_dict.items(), key=lambda x: sorted(x[1])):
|
|
247
|
-
group_values = [v[0] for v in group_values]
|
|
248
|
-
reference_component_grouping = cls.group_cell_component_elements(reference_prefix, group_values,
|
|
249
|
-
datapoints_table[ref_component])
|
|
250
|
-
second_component_grouping = cls.group_cell_component_elements(second_component_prefix, keys_values,
|
|
251
|
-
datapoints_table[second_group_component])
|
|
252
|
-
third_component_grouping = None
|
|
253
|
-
if not datapoints_variable[third_component].isnull().all():
|
|
254
|
-
third_values = datapoints_variable[
|
|
255
|
-
datapoints_variable[ref_component].isin(group_values) & datapoints_variable[
|
|
256
|
-
second_group_component].isin(keys_values)][third_component].unique().tolist()
|
|
257
|
-
third_component_grouping = cls.group_cell_component_elements(third_component_prefix, third_values,
|
|
258
|
-
datapoints_table[third_component])
|
|
259
|
-
|
|
260
|
-
rows = reference_component_grouping if reference_prefix == ROW else second_component_grouping if second_component_prefix == ROW else third_component_grouping
|
|
261
|
-
cols = reference_component_grouping if reference_prefix == COLUMN else second_component_grouping if second_component_prefix == COLUMN else third_component_grouping
|
|
262
|
-
sheets = reference_component_grouping if reference_prefix == SHEET else second_component_grouping if second_component_prefix == SHEET else third_component_grouping
|
|
263
|
-
result_lst.append((rows, cols, sheets, datapoints_variable['cell_id'].to_list())) #TODO: Check this
|
|
264
|
-
|
|
265
|
-
return result_lst
|
|
266
|
-
|
|
267
|
-
@classmethod
|
|
268
|
-
def group_cell_components(cls, datapoints_variable, datapoints_table):
|
|
269
|
-
"""
|
|
270
|
-
Extracts the cell components by grouping them when necessary
|
|
271
|
-
:param datapoints_variable: datapoints of the variable
|
|
272
|
-
:param datapoints_table: datapoints of the table associated which the table code of operand
|
|
273
|
-
:return Rows, cols and sheets of operand
|
|
274
|
-
"""
|
|
275
|
-
rows = cls.group_cell_component_elements(ROW, datapoints_variable[ROW_CODE].tolist(),
|
|
276
|
-
datapoints_table[ROW_CODE])
|
|
277
|
-
cols = cls.group_cell_component_elements(COLUMN, datapoints_variable[COLUMN_CODE].tolist(),
|
|
278
|
-
datapoints_table[COLUMN_CODE])
|
|
279
|
-
sheets = cls.group_cell_component_elements(SHEET, datapoints_variable[SHEET_CODE].tolist(),
|
|
280
|
-
datapoints_table[SHEET_CODE])
|
|
281
|
-
return rows, cols, sheets
|
|
282
|
-
|
|
283
|
-
@classmethod
|
|
284
|
-
def group_cell_component_elements(cls, cell_component_prefix: str, cell_component_elements: list,
|
|
285
|
-
datapoints: pd.Series):
|
|
286
|
-
"""
|
|
287
|
-
Groups elements of a cell component
|
|
288
|
-
:param cell_component_prefix: Cell component name to be operated on
|
|
289
|
-
:param cell_component_elements: values of operand cell component associated with the cell_component_prefix
|
|
290
|
-
argument
|
|
291
|
-
:param datapoints: values of table cell component associated with the cell_component_prefix argument
|
|
292
|
-
return the cell component by grouping it when necessary
|
|
293
|
-
"""
|
|
294
|
-
unique_values = set(cell_component_elements)
|
|
295
|
-
if len(unique_values) == 0:
|
|
296
|
-
return None
|
|
297
|
-
elif len(unique_values) == 1:
|
|
298
|
-
if cell_component_elements[0]:
|
|
299
|
-
return cell_component_prefix + str(cell_component_elements[0])
|
|
300
|
-
return None
|
|
301
|
-
|
|
302
|
-
cell_component_elements.sort()
|
|
303
|
-
cell_component_all_unique_values = datapoints.drop_duplicates().tolist()
|
|
304
|
-
datapoints_cell_component = datapoints[
|
|
305
|
-
datapoints.between(cell_component_elements[0], cell_component_elements[-1])]
|
|
306
|
-
|
|
307
|
-
if len(unique_values) == len(cell_component_all_unique_values):
|
|
308
|
-
return f"{cell_component_prefix}*"
|
|
309
|
-
|
|
310
|
-
if len(unique_values) == len(datapoints_cell_component.drop_duplicates()):
|
|
311
|
-
return f"{cell_component_prefix}{cell_component_elements[0]}-{cell_component_elements[-1]}"
|
|
312
|
-
|
|
313
|
-
return '(' + ', '.join([f"{cell_component_prefix}{component}" for component in sorted(unique_values)]) + ')'
|
|
314
|
-
|
|
315
|
-
@classmethod
|
|
316
|
-
def write_cell(cls, table_code, rows, cols, sheets):
|
|
317
|
-
"""
|
|
318
|
-
Returns a string that represents a cell expression
|
|
319
|
-
:param table_code: Table code
|
|
320
|
-
:param rows: Expression rows
|
|
321
|
-
:param cols: Expression cols
|
|
322
|
-
:param sheets: Expression sheets
|
|
323
|
-
"""
|
|
324
|
-
table_code = f"t{table_code}" if table_code else None
|
|
325
|
-
cell_components = [components for components in (table_code, rows, cols, sheets) if components]
|
|
326
|
-
if len(cell_components):
|
|
327
|
-
cell_info = ', '.join(cell_components)
|
|
328
|
-
return '{' + cell_info + '}'
|
|
329
|
-
return ""
|
|
330
|
-
|
|
331
|
-
@classmethod
|
|
332
|
-
def write_cell_with_asterisk(cls, table_code, rows, cols, sheets, reference_data):
|
|
333
|
-
"""
|
|
334
|
-
Returns a string that represents a cell expression
|
|
335
|
-
:param table_code: Table code
|
|
336
|
-
:param rows: Expression rows
|
|
337
|
-
:param cols: Expression cols
|
|
338
|
-
:param sheets: Expression sheets
|
|
339
|
-
"""
|
|
340
|
-
cell_info = ""
|
|
341
|
-
# check if * is needed
|
|
342
|
-
if rows and "-" in rows:
|
|
343
|
-
rows = replace_range_by_asterisk(rows, reference_data[ROW_CODE], ROW)
|
|
344
|
-
if cols and "-" in cols:
|
|
345
|
-
cols = replace_range_by_asterisk(cols, reference_data[COLUMN_CODE], COLUMN)
|
|
346
|
-
if sheets and "-" in sheets:
|
|
347
|
-
sheets = replace_range_by_asterisk(sheets, reference_data[SHEET_CODE], SHEET)
|
|
348
|
-
|
|
349
|
-
table_code = f"t{table_code}" if table_code else None
|
|
350
|
-
cell_components = [components for components in (table_code, rows, cols, sheets) if components]
|
|
351
|
-
if len(cell_components):
|
|
352
|
-
cell_info = ', '.join(cell_components)
|
|
353
|
-
return '{' + cell_info + '}'
|
|
354
|
-
return None
|
|
355
|
-
|
|
356
|
-
@classmethod
|
|
357
|
-
def _several_validations_checker(cls, df)->(bool,pd.DataFrame):
|
|
358
|
-
"""
|
|
359
|
-
Checks if the dataframe has several validations
|
|
360
|
-
:param df: dataframe with validations
|
|
361
|
-
:return True if the dataframe has several validations, False otherwise
|
|
362
|
-
"""
|
|
363
|
-
# TODO: Check this, example F_18.00.b sign validations
|
|
364
|
-
checker = 0
|
|
365
|
-
checker_component = []
|
|
366
|
-
for c_component in CELL_COMPONENTS:
|
|
367
|
-
if df[c_component].nunique() > 1:
|
|
368
|
-
checker += 1
|
|
369
|
-
checker_component.append(c_component)
|
|
370
|
-
if checker == 2:
|
|
371
|
-
results = _two_components_checker(df, checker_component)
|
|
372
|
-
if results:
|
|
373
|
-
return True, results
|
|
374
|
-
if checker == 3:
|
|
375
|
-
# TODO: To implement, not necessary for now because there are no sign validations (withoout components to group) with 3 components
|
|
376
|
-
pass
|
|
377
|
-
|
|
378
|
-
return False, None
|
|
379
|
-
|
|
380
|
-
def _two_components_checker(df, checker_component)->list:
|
|
381
|
-
"""
|
|
382
|
-
Checks if the dataframe has several validations
|
|
383
|
-
:param df: dataframe with validations
|
|
384
|
-
:return True if the dataframe has several validations, False otherwise
|
|
385
|
-
"""
|
|
386
|
-
results = []
|
|
387
|
-
for i in enumerate(checker_component):
|
|
388
|
-
component_group = checker_component[i[0]-1]
|
|
389
|
-
other_component = checker_component[i[0]]
|
|
390
|
-
# component_group_values = df[component_group].unique().tolist()
|
|
391
|
-
group_df = df.groupby(component_group)
|
|
392
|
-
dict_related = {}
|
|
393
|
-
dict_values = {}
|
|
394
|
-
for a, b in group_df:
|
|
395
|
-
dict_values[a] = b[other_component].unique().tolist()
|
|
396
|
-
|
|
397
|
-
for k, v in dict_values.items():
|
|
398
|
-
dict_related[k] = []
|
|
399
|
-
for i, j in dict_values.items():
|
|
400
|
-
if k != i:
|
|
401
|
-
if set(v) == set(j):
|
|
402
|
-
dict_related[k].append(i)
|
|
403
|
-
components_grouped_list = [(k, *v) for k, v in dict_related.items()]
|
|
404
|
-
components_grouped_sorted = [sorted(x) for x in components_grouped_list]
|
|
405
|
-
components_grouped_sorted = [tuple(x) for x in components_grouped_sorted]
|
|
406
|
-
components_set = set(components_grouped_sorted)
|
|
407
|
-
|
|
408
|
-
if len(components_set) > 1:
|
|
409
|
-
for elto in components_set:
|
|
410
|
-
results.append(df[df[component_group].isin(elto)])
|
|
411
|
-
|
|
412
|
-
return results
|
|
413
|
-
|
|
414
|
-
def replace_range_by_asterisk(expression, df_component, component_prefix):
|
|
415
|
-
"""
|
|
416
|
-
Replaces range by asterisk
|
|
417
|
-
:param expression: expression to be replaced
|
|
418
|
-
:return expression with asterisk
|
|
419
|
-
"""
|
|
420
|
-
sorted_list = sorted(df_component.drop_duplicates().to_list())
|
|
421
|
-
# sorted_list = sorted(list(set(df_component.to_list())))
|
|
422
|
-
first_element_expression = expression.split("-")[0][1:]
|
|
423
|
-
last_element_expression = expression.split("-")[1]
|
|
424
|
-
if len(sorted_list) > 1 and first_element_expression == sorted_list[0] \
|
|
425
|
-
and last_element_expression == sorted_list[-1]:
|
|
426
|
-
return component_prefix + "*"
|
|
427
|
-
|
|
428
|
-
return expression
|
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
|
-
from py_dpm.dpm_xl.ast.nodes import (
|
|
4
|
-
AggregationOp,
|
|
5
|
-
BinOp,
|
|
6
|
-
ComplexNumericOp,
|
|
7
|
-
CondExpr,
|
|
8
|
-
FilterOp,
|
|
9
|
-
GetOp,
|
|
10
|
-
PropertyReference,
|
|
11
|
-
RenameOp,
|
|
12
|
-
Scalar,
|
|
13
|
-
TimeShiftOp,
|
|
14
|
-
UnaryOp,
|
|
15
|
-
VarID,
|
|
16
|
-
WhereClauseOp,
|
|
17
|
-
)
|
|
18
|
-
from py_dpm.dpm_xl.ast.template import ASTTemplate
|
|
19
|
-
from py_dpm.exceptions import exceptions
|
|
20
|
-
from py_dpm.dpm.models import ItemCategory, ViewDatapoints
|
|
21
|
-
from py_dpm.dpm_xl.validation.generation_utils import ValidationsGenerationUtils
|
|
22
|
-
from py_dpm.dpm_xl.utils.tokens import *
|
|
23
|
-
|
|
24
|
-
ALLOWED_OPERATORS = [MATCH, IN, EQ, NEQ, GT, GTE, LT, LTE, LENGTH, CONCATENATE]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _check_property_constraint_exists(signature: str, session):
|
|
28
|
-
if ":" in signature:
|
|
29
|
-
property_query = ItemCategory.get_property_from_signature(signature, session)
|
|
30
|
-
else:
|
|
31
|
-
property_query = ItemCategory.get_property_from_code(signature, session)
|
|
32
|
-
if property_query is None:
|
|
33
|
-
return False
|
|
34
|
-
return True
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class PropertiesConstraintsChecker(ASTTemplate):
|
|
38
|
-
def __init__(self, ast, session):
|
|
39
|
-
super().__init__()
|
|
40
|
-
self.has_property = False
|
|
41
|
-
self.has_table = False
|
|
42
|
-
self.session = session
|
|
43
|
-
self.visit(ast)
|
|
44
|
-
|
|
45
|
-
@property
|
|
46
|
-
def is_property_constraint(self):
|
|
47
|
-
if self.has_table:
|
|
48
|
-
return False
|
|
49
|
-
return self.has_property
|
|
50
|
-
|
|
51
|
-
def visit_PropertyReference(self, node: PropertyReference):
|
|
52
|
-
# Optional
|
|
53
|
-
if not ":" in node.code:
|
|
54
|
-
pass # signature should have : to be a property constraint
|
|
55
|
-
signature = node.code
|
|
56
|
-
# look for property in models
|
|
57
|
-
property_query = ItemCategory.get_property_from_signature(
|
|
58
|
-
signature, self.session
|
|
59
|
-
)
|
|
60
|
-
if property_query is None:
|
|
61
|
-
raise exceptions.SemanticError("5-1-4", ref=signature)
|
|
62
|
-
self.has_property = True
|
|
63
|
-
|
|
64
|
-
def visit_VarID(self, node: VarID):
|
|
65
|
-
if node.table:
|
|
66
|
-
self.has_table = True
|
|
67
|
-
|
|
68
|
-
def visit_Scalar(self, node: Scalar):
|
|
69
|
-
signature = node.item
|
|
70
|
-
if not self.has_property:
|
|
71
|
-
if getattr(node, "scalar_type", None) == "Item":
|
|
72
|
-
# go to models and check if item exists and is a property
|
|
73
|
-
property_query = ItemCategory.get_property_from_signature(
|
|
74
|
-
signature, self.session
|
|
75
|
-
)
|
|
76
|
-
if property_query:
|
|
77
|
-
self.has_property = True
|
|
78
|
-
# other assumption could be always first scalar is a property but this is not true
|
|
79
|
-
# self.has_property = True
|
|
80
|
-
else:
|
|
81
|
-
other_property_query = ItemCategory.get_property_from_signature(
|
|
82
|
-
signature, self.session
|
|
83
|
-
)
|
|
84
|
-
if other_property_query:
|
|
85
|
-
raise exceptions.SemanticError("5-1-2")
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
class PropertiesConstraintsProcessor(ASTTemplate):
|
|
89
|
-
"""
|
|
90
|
-
Class to generate individual validations from properties constraints
|
|
91
|
-
|
|
92
|
-
:parameter expression: DPM-XL expression.
|
|
93
|
-
:parameter ast: Abstract Syntax Tree of expression.
|
|
94
|
-
:parameter validation_code: Code of parent validation.
|
|
95
|
-
:parameter session: SQLAlchemy Session to be used to connect to the DB.
|
|
96
|
-
"""
|
|
97
|
-
|
|
98
|
-
def __init__(self, expression, ast, validation_code, session, release_id):
|
|
99
|
-
super().__init__()
|
|
100
|
-
self.expression = expression
|
|
101
|
-
self.AST = ast
|
|
102
|
-
self.validation_code = validation_code
|
|
103
|
-
self.session = session
|
|
104
|
-
self.current_validation = 1
|
|
105
|
-
self.property_constraint = None
|
|
106
|
-
self.release_id = release_id
|
|
107
|
-
self.new_expressions = []
|
|
108
|
-
self.visit(self.AST)
|
|
109
|
-
|
|
110
|
-
def generate_validations(self):
|
|
111
|
-
"""
|
|
112
|
-
Generates individual validations using the extracted property constraint in the Abstract Syntax Tree
|
|
113
|
-
"""
|
|
114
|
-
if not self.property_constraint:
|
|
115
|
-
raise exceptions.SemanticError("5-1-1")
|
|
116
|
-
|
|
117
|
-
item_category = ItemCategory.get_property_from_signature(
|
|
118
|
-
signature=self.property_constraint,
|
|
119
|
-
session=self.session,
|
|
120
|
-
release_id=self.release_id,
|
|
121
|
-
)
|
|
122
|
-
if item_category is None:
|
|
123
|
-
raise exceptions.SemanticError(
|
|
124
|
-
"1-7", property_code=self.property_constraint
|
|
125
|
-
)
|
|
126
|
-
variables: pd.DataFrame = ViewDatapoints.get_from_property(
|
|
127
|
-
self.session, item_category.ItemID, self.release_id
|
|
128
|
-
)
|
|
129
|
-
for table_code, group_df in variables.groupby(["table_code"]):
|
|
130
|
-
datapoints = ViewDatapoints.get_table_data(
|
|
131
|
-
session=self.session, table=str(table_code)
|
|
132
|
-
)
|
|
133
|
-
self.generate_expressions(table_code, group_df, datapoints)
|
|
134
|
-
|
|
135
|
-
def generate_expressions(self, table_code, data, datapoints_table):
|
|
136
|
-
"""
|
|
137
|
-
Generates new expressions getting their operands by grouping the cells
|
|
138
|
-
:param table_code: code of the operand table
|
|
139
|
-
:param data: dataframe with operand datapoints
|
|
140
|
-
:param datapoints_table: table datapoints
|
|
141
|
-
"""
|
|
142
|
-
groups = ValidationsGenerationUtils.group_cells(
|
|
143
|
-
datapoints_variable=data, datapoints_table=datapoints_table
|
|
144
|
-
)
|
|
145
|
-
for rows, cols, sheets in groups:
|
|
146
|
-
operand = ValidationsGenerationUtils.write_cell(
|
|
147
|
-
table_code, rows, cols, sheets
|
|
148
|
-
)
|
|
149
|
-
new_expression = self.expression
|
|
150
|
-
new_expression = new_expression.replace(
|
|
151
|
-
f"[{self.property_constraint}]", operand
|
|
152
|
-
)
|
|
153
|
-
self.new_expressions.append(new_expression)
|
|
154
|
-
|
|
155
|
-
def create_validation(self, expression, status):
|
|
156
|
-
"""
|
|
157
|
-
Creates a dictionary to represent a validation from expression and status information
|
|
158
|
-
:param expression: Expression of validation
|
|
159
|
-
:param status: Status of validation
|
|
160
|
-
:return a dictionary with validation_code, expression and status
|
|
161
|
-
"""
|
|
162
|
-
validation_code = None
|
|
163
|
-
if status == STATUS_CORRECT:
|
|
164
|
-
validation_code = f"{self.validation_code}-{self.current_validation}"
|
|
165
|
-
self.current_validation += 1
|
|
166
|
-
return {
|
|
167
|
-
VALIDATION_CODE: validation_code,
|
|
168
|
-
EXPRESSION: expression,
|
|
169
|
-
STATUS: status,
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
def visit_PropertyReference(self, node: PropertyReference):
|
|
173
|
-
if not self.property_constraint:
|
|
174
|
-
self.property_constraint = node.code
|
|
175
|
-
signature = node.code
|
|
176
|
-
if not _check_property_constraint_exists(signature, self.session):
|
|
177
|
-
raise exceptions.SemanticError("5-1-4", ref=signature)
|
|
178
|
-
else:
|
|
179
|
-
raise exceptions.SemanticError("5-1-2")
|
|
180
|
-
|
|
181
|
-
def visit_Scalar(self, node: Scalar):
|
|
182
|
-
if getattr(node, "scalar_type", None) == "Item":
|
|
183
|
-
signature = node.item
|
|
184
|
-
property_query = ItemCategory.get_property_from_signature(
|
|
185
|
-
signature, self.session
|
|
186
|
-
)
|
|
187
|
-
if property_query:
|
|
188
|
-
if not self.property_constraint:
|
|
189
|
-
self.property_constraint = signature
|
|
190
|
-
|
|
191
|
-
def visit_BinOp(self, node: BinOp):
|
|
192
|
-
if node.op not in ALLOWED_OPERATORS:
|
|
193
|
-
raise exceptions.SemanticError("5-1-3", operator=node.op)
|
|
194
|
-
|
|
195
|
-
self.visit(node.left)
|
|
196
|
-
self.visit(node.right)
|
|
197
|
-
|
|
198
|
-
def visit_UnaryOp(self, node: UnaryOp):
|
|
199
|
-
if node.op not in ALLOWED_OPERATORS:
|
|
200
|
-
raise exceptions.SemanticError("5-1-3", operator=node.op)
|
|
201
|
-
self.visit(node.operand)
|
|
202
|
-
|
|
203
|
-
def visit_CondExpr(self, node: CondExpr):
|
|
204
|
-
raise exceptions.SemanticError("5-1-3", operator=IF)
|
|
205
|
-
|
|
206
|
-
def visit_AggregationOp(self, node: AggregationOp):
|
|
207
|
-
raise exceptions.SemanticError("5-1-3", operator=node.op)
|
|
208
|
-
|
|
209
|
-
def visit_RenameOp(self, node: RenameOp):
|
|
210
|
-
raise exceptions.SemanticError("5-1-3", operator=RENAME)
|
|
211
|
-
|
|
212
|
-
def visit_TimeShiftOp(self, node: TimeShiftOp):
|
|
213
|
-
raise exceptions.SemanticError("5-1-3", operator=TIME_SHIFT)
|
|
214
|
-
|
|
215
|
-
def visit_FilterOp(self, node: FilterOp):
|
|
216
|
-
raise exceptions.SemanticError("5-1-3", operator=FILTER)
|
|
217
|
-
|
|
218
|
-
def visit_WhereClauseOp(self, node: WhereClauseOp):
|
|
219
|
-
raise exceptions.SemanticError("5-1-3", operator=WHERE)
|
|
220
|
-
|
|
221
|
-
def visit_GetOp(self, node: GetOp):
|
|
222
|
-
raise exceptions.SemanticError("5-1-3", operator=GET)
|
|
223
|
-
|
|
224
|
-
def visit_ComplexNumericOp(self, node: ComplexNumericOp):
|
|
225
|
-
raise exceptions.SemanticError("5-1-3", operator=node.op)
|