pydpm_xl 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. py_dpm/AST/ASTConstructor.py +503 -0
  2. py_dpm/AST/ASTObjects.py +827 -0
  3. py_dpm/AST/ASTTemplate.py +101 -0
  4. py_dpm/AST/ASTVisitor.py +13 -0
  5. py_dpm/AST/MLGeneration.py +588 -0
  6. py_dpm/AST/ModuleAnalyzer.py +79 -0
  7. py_dpm/AST/ModuleDependencies.py +203 -0
  8. py_dpm/AST/WhereClauseChecker.py +12 -0
  9. py_dpm/AST/__init__.py +0 -0
  10. py_dpm/AST/check_operands.py +302 -0
  11. py_dpm/DataTypes/ScalarTypes.py +324 -0
  12. py_dpm/DataTypes/TimeClasses.py +370 -0
  13. py_dpm/DataTypes/TypePromotion.py +195 -0
  14. py_dpm/DataTypes/__init__.py +0 -0
  15. py_dpm/Exceptions/__init__.py +0 -0
  16. py_dpm/Exceptions/exceptions.py +84 -0
  17. py_dpm/Exceptions/messages.py +114 -0
  18. py_dpm/OperationScopes/OperationScopeService.py +247 -0
  19. py_dpm/OperationScopes/__init__.py +0 -0
  20. py_dpm/Operators/AggregateOperators.py +138 -0
  21. py_dpm/Operators/BooleanOperators.py +30 -0
  22. py_dpm/Operators/ClauseOperators.py +159 -0
  23. py_dpm/Operators/ComparisonOperators.py +69 -0
  24. py_dpm/Operators/ConditionalOperators.py +362 -0
  25. py_dpm/Operators/NumericOperators.py +101 -0
  26. py_dpm/Operators/Operator.py +388 -0
  27. py_dpm/Operators/StringOperators.py +27 -0
  28. py_dpm/Operators/TimeOperators.py +53 -0
  29. py_dpm/Operators/__init__.py +0 -0
  30. py_dpm/Utils/ValidationsGenerationUtils.py +429 -0
  31. py_dpm/Utils/__init__.py +0 -0
  32. py_dpm/Utils/operands_mapping.py +73 -0
  33. py_dpm/Utils/operator_mapping.py +89 -0
  34. py_dpm/Utils/tokens.py +172 -0
  35. py_dpm/Utils/utils.py +2 -0
  36. py_dpm/ValidationsGeneration/PropertiesConstraintsProcessor.py +190 -0
  37. py_dpm/ValidationsGeneration/Utils.py +364 -0
  38. py_dpm/ValidationsGeneration/VariantsProcessor.py +265 -0
  39. py_dpm/ValidationsGeneration/__init__.py +0 -0
  40. py_dpm/ValidationsGeneration/auxiliary_functions.py +98 -0
  41. py_dpm/__init__.py +61 -0
  42. py_dpm/api/__init__.py +140 -0
  43. py_dpm/api/ast_generator.py +438 -0
  44. py_dpm/api/complete_ast.py +241 -0
  45. py_dpm/api/data_dictionary_validation.py +577 -0
  46. py_dpm/api/migration.py +77 -0
  47. py_dpm/api/semantic.py +224 -0
  48. py_dpm/api/syntax.py +182 -0
  49. py_dpm/client.py +106 -0
  50. py_dpm/data_handlers.py +99 -0
  51. py_dpm/db_utils.py +117 -0
  52. py_dpm/grammar/__init__.py +0 -0
  53. py_dpm/grammar/dist/__init__.py +0 -0
  54. py_dpm/grammar/dist/dpm_xlLexer.interp +428 -0
  55. py_dpm/grammar/dist/dpm_xlLexer.py +804 -0
  56. py_dpm/grammar/dist/dpm_xlLexer.tokens +106 -0
  57. py_dpm/grammar/dist/dpm_xlParser.interp +249 -0
  58. py_dpm/grammar/dist/dpm_xlParser.py +5224 -0
  59. py_dpm/grammar/dist/dpm_xlParser.tokens +106 -0
  60. py_dpm/grammar/dist/dpm_xlParserListener.py +742 -0
  61. py_dpm/grammar/dist/dpm_xlParserVisitor.py +419 -0
  62. py_dpm/grammar/dist/listeners.py +10 -0
  63. py_dpm/grammar/dpm_xlLexer.g4 +435 -0
  64. py_dpm/grammar/dpm_xlParser.g4 +260 -0
  65. py_dpm/migration.py +282 -0
  66. py_dpm/models.py +2139 -0
  67. py_dpm/semantics/DAG/DAGAnalyzer.py +158 -0
  68. py_dpm/semantics/DAG/__init__.py +0 -0
  69. py_dpm/semantics/SemanticAnalyzer.py +320 -0
  70. py_dpm/semantics/Symbols.py +223 -0
  71. py_dpm/semantics/__init__.py +0 -0
  72. py_dpm/utils/__init__.py +0 -0
  73. py_dpm/utils/ast_serialization.py +481 -0
  74. py_dpm/views/data_types.sql +12 -0
  75. py_dpm/views/datapoints.sql +65 -0
  76. py_dpm/views/hierarchy_operand_reference.sql +11 -0
  77. py_dpm/views/hierarchy_preconditions.sql +13 -0
  78. py_dpm/views/hierarchy_variables.sql +26 -0
  79. py_dpm/views/hierarchy_variables_context.sql +14 -0
  80. py_dpm/views/key_components.sql +18 -0
  81. py_dpm/views/module_from_table.sql +11 -0
  82. py_dpm/views/open_keys.sql +13 -0
  83. py_dpm/views/operation_info.sql +27 -0
  84. py_dpm/views/operation_list.sql +18 -0
  85. py_dpm/views/operations_versions_from_module_version.sql +30 -0
  86. py_dpm/views/precondition_info.sql +17 -0
  87. py_dpm/views/report_type_operand_reference_info.sql +18 -0
  88. py_dpm/views/subcategory_info.sql +17 -0
  89. py_dpm/views/table_info.sql +19 -0
  90. pydpm_xl-0.1.10.dist-info/LICENSE +674 -0
  91. pydpm_xl-0.1.10.dist-info/METADATA +50 -0
  92. pydpm_xl-0.1.10.dist-info/RECORD +94 -0
  93. pydpm_xl-0.1.10.dist-info/WHEEL +4 -0
  94. pydpm_xl-0.1.10.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,429 @@
1
+ import json
2
+ from itertools import groupby
3
+
4
+ import pandas as pd
5
+
6
+ from py_dpm.Utils.tokens import CELL_COMPONENTS, COLUMN, COLUMN_CODE, EXISTENCE_REPORT, \
7
+ HIERARCHY_REPORT, ROW, ROW_CODE, SHEET, SHEET_CODE, \
8
+ SIGN_REPORT
9
+ from py_dpm.ValidationsGeneration.Utils import ExternalDataExistence, ExternalDataHierarchies, \
10
+ ExternalDataSign
11
+
12
+
13
+ def from_generate_to_response(validations):
14
+ """
15
+ """
16
+ response_validations = {}
17
+ for validation in validations:
18
+ if validation['expression'] not in response_validations:
19
+ response_validations[validation['expression']] = validation
20
+ else:
21
+ for op_code in validation['operation_code']:
22
+ if op_code not in response_validations[validation['expression']]['operation_code']:
23
+ response_validations[validation['expression']]['operation_code'].append(op_code)
24
+ if validation['subcategory_id'] not in response_validations[validation['expression']]['subcategory_id']:
25
+ response_validations[validation['expression']]['subcategory_id'].append(validation['subcategory_id'][0])
26
+ response_validations[validation['expression']]['subcategory_code'].append(validation['subcategory_code'][0])
27
+ response_validations_list = list(response_validations.values())
28
+ return response_validations_list
29
+
30
+ def generate_report_validation_view(validations, report_type):
31
+ report_type_list = [HIERARCHY_REPORT, SIGN_REPORT, EXISTENCE_REPORT]
32
+ if report_type not in report_type_list:
33
+ raise ValueError(f"report_type must be one of {report_type_list}")
34
+ #
35
+ if report_type == HIERARCHY_REPORT:
36
+ external_data = ExternalDataHierarchies()
37
+ # TODO: Check this
38
+ proposed = external_data.proposed_rules[external_data.proposed_rules['Type'] == 'Hierarchy']
39
+ rejected = external_data.rejected_rules[external_data.proposed_rules['Type'] == 'Hierarchy']
40
+ elif report_type == SIGN_REPORT:
41
+ external_data = ExternalDataSign()
42
+ proposed = external_data.proposed_rules
43
+ rejected = external_data.rejected_rules
44
+ else:
45
+ external_data = ExternalDataExistence()
46
+ proposed = external_data.proposed_rules
47
+ rejected = external_data.rejected_rules
48
+ #
49
+ matched = {"number_validations": 0}
50
+ unmatched = {"number_validations": 0}
51
+ review = {"number_validations": 0}
52
+
53
+ #
54
+ for validation in validations:
55
+ valdict = {
56
+ "operation_code": validation['operation_code'],
57
+ "expression": validation['expression'],
58
+ # "parent_id": validation['parent_id'],
59
+ "status": validation['status']
60
+ }
61
+
62
+ if valdict['status'] != 'Correct':
63
+ if valdict['expression'] in review:
64
+ # review[valdict['expression']]['subcategories'].append(validation['subcategory_id'])
65
+ pass
66
+ else:
67
+ review[valdict['expression']] = valdict
68
+ # review[valdict['expression']]['subcategories'] = [validation['subcategory_id']]
69
+
70
+ review['number_validations'] += 1
71
+
72
+ elif valdict['operation_code'] != []:
73
+ if valdict['expression'] in matched:
74
+ pass
75
+ # matched[valdict['expression']]['subcategories'].append(validation['subcategory_id'])
76
+ else:
77
+ matched[valdict['expression']] = valdict
78
+ # matched[valdict['expression']]['subcategories'] = [validation['subcategory_id']]
79
+
80
+ matched['number_validations'] += 1
81
+ else:
82
+ if valdict['expression'] in unmatched:
83
+ pass
84
+ # unmatched[valdict['expression']]['subcategories'].append(validation['subcategory_id'])
85
+ else:
86
+ unmatched[valdict['expression']] = valdict
87
+ # unmatched[valdict['expression']]['subcategories'] = [validation['subcategory_id']]
88
+ unmatched['number_validations'] += 1
89
+
90
+
91
+ matched_codes = []
92
+ for val in matched:
93
+ if val != 'number_validations':
94
+ matched_codes += matched[val]['operation_code']
95
+
96
+ # proposed = external_hierarchy_data.proposed_rules[external_hierarchy_data.proposed_rules['Type'] == 'Hierarchy']
97
+ # rejected = external_hierarchy_data.rejected_rules[external_hierarchy_data.proposed_rules['Type'] == 'Hierarchy']
98
+
99
+ proposed_not_generated = proposed[~proposed['ID'].isin(matched_codes)]
100
+ rejected_not_generated = rejected[~rejected['ID'].isin(matched_codes)]
101
+
102
+ with open('./development/data/' + report_type + '_matched.json', 'w') as fp:
103
+ json.dump(matched, fp, indent=4)
104
+ with open('./development/data/' + report_type + '_unmatched.json', 'w') as fp:
105
+ json.dump(unmatched, fp, indent=4)
106
+ with open('./development/data/' + report_type + '_review.json', 'w') as fp:
107
+ json.dump(review, fp, indent=4)
108
+
109
+ proposed_not_generated.to_csv('./development/data/' + report_type + '_proposed_not_generated.csv', index=False)
110
+ rejected_not_generated.to_csv('./development/data/' + report_type + '_rejected_not_generated.csv', index=False)
111
+
112
+
113
+ class ValidationsGenerationUtils:
114
+ """
115
+ Class with common methods used by the different generation processes of validations
116
+ """
117
+
118
+ @classmethod
119
+ def get_components_to_group(cls, datapoints_variable: pd.DataFrame):
120
+ """
121
+ Gets cell components to group by
122
+ :param datapoints_variable: dataframe with datapoints to group
123
+ :return a list with cell components to group by
124
+ """
125
+ component_values = datapoints_variable[CELL_COMPONENTS]
126
+ components_to_group = []
127
+ components_to_check = []
128
+ for cell_component in CELL_COMPONENTS:
129
+ if not component_values[cell_component].isnull().all():
130
+ components_to_check.append(cell_component)
131
+
132
+ if len(components_to_check):
133
+
134
+ if len(components_to_check) == 1:
135
+ return []
136
+
137
+ for cell_component in components_to_check:
138
+ duplicated = component_values.duplicated(
139
+ subset=[comp for comp in components_to_check if comp != cell_component],
140
+ keep=False)
141
+ if not duplicated.all():
142
+ components_to_group.append(cell_component)
143
+ return components_to_group
144
+
145
+ @classmethod
146
+ def group_cells(cls, datapoints_variable: pd.DataFrame, datapoints_table: pd.DataFrame):
147
+ """
148
+ Get the cell groups from datapoints by grouping them when necessary
149
+ :param datapoints_variable: datapoints of the variable
150
+ :param datapoints_table: datapoints of the table associated which the table code of operand
151
+ :return a list with rows, cols and sheets for each group
152
+ """
153
+ components_to_group = cls.get_components_to_group(datapoints_variable=datapoints_variable)
154
+ result_lst = []
155
+ if not len(components_to_group) or len(components_to_group) > 2:
156
+ rows, cols, sheets = cls.group_cell_components(datapoints_variable=datapoints_variable,
157
+ datapoints_table=datapoints_table)
158
+ result_lst.append((rows, cols, sheets))
159
+
160
+ elif len(components_to_group) == 1:
161
+ for key, group_df in datapoints_variable.groupby(components_to_group[0], dropna=False):
162
+ rows, cols, sheets = cls.group_cell_components(datapoints_variable=group_df,
163
+ datapoints_table=datapoints_table)
164
+ result_lst.append((rows, cols, sheets))
165
+ else:
166
+ ref_component = components_to_group[0]
167
+ second_group_component = components_to_group[1]
168
+ third_component = [component for component in CELL_COMPONENTS if component not in components_to_group][0]
169
+
170
+ reference_prefix = ROW if ref_component == ROW_CODE else COLUMN if ref_component == COLUMN_CODE else SHEET
171
+ second_component_prefix = ROW if second_group_component == ROW_CODE else COLUMN if second_group_component == COLUMN_CODE else SHEET
172
+ third_component_prefix = ROW if third_component == ROW_CODE else COLUMN if third_component == COLUMN_CODE else SHEET
173
+
174
+ datapoints_variable = datapoints_variable.sort_values([ref_component, second_group_component])
175
+
176
+ components_dict = {}
177
+ for value in datapoints_variable[ref_component].unique().tolist():
178
+ components_dict[value] = datapoints_variable[datapoints_variable[ref_component] == value][
179
+ second_group_component].unique().tolist()
180
+
181
+ # group reference component values by second group component values
182
+ for keys_values, group_values in groupby(components_dict.items(), key=lambda x: sorted(x[1])):
183
+ group_values = [v[0] for v in group_values]
184
+ reference_component_grouping = cls.group_cell_component_elements(reference_prefix, group_values,
185
+ datapoints_table[ref_component])
186
+ second_component_grouping = cls.group_cell_component_elements(second_component_prefix, keys_values,
187
+ datapoints_table[second_group_component])
188
+ third_component_grouping = None
189
+ if not datapoints_variable[third_component].isnull().all():
190
+ third_values = datapoints_variable[
191
+ datapoints_variable[ref_component].isin(group_values) & datapoints_variable[
192
+ second_group_component].isin(keys_values)][third_component].unique().tolist()
193
+ third_component_grouping = cls.group_cell_component_elements(third_component_prefix, third_values,
194
+ datapoints_table[third_component])
195
+
196
+ rows = reference_component_grouping if reference_prefix == ROW else second_component_grouping if second_component_prefix == ROW else third_component_grouping
197
+ cols = reference_component_grouping if reference_prefix == COLUMN else second_component_grouping if second_component_prefix == COLUMN else third_component_grouping
198
+ sheets = reference_component_grouping if reference_prefix == SHEET else second_component_grouping if second_component_prefix == SHEET else third_component_grouping
199
+ result_lst.append((rows, cols, sheets))
200
+
201
+ return result_lst
202
+
203
+ @classmethod
204
+ def group_cells_test(cls, datapoints_variable: pd.DataFrame, datapoints_table: pd.DataFrame):
205
+ """
206
+ Get the cell groups from datapoints by grouping them when necessary
207
+ :param datapoints_variable: datapoints of the variable
208
+ :param datapoints_table: datapoints of the table associated which the table code of operand
209
+ :return a list with rows, cols and sheets for each group
210
+ """
211
+ components_to_group = cls.get_components_to_group(datapoints_variable=datapoints_variable)
212
+ result_lst = []
213
+ if not len(components_to_group) or len(components_to_group) > 2:
214
+ is_several_vals, filtered_df=cls._several_validations_checker(datapoints_variable)
215
+ if is_several_vals:
216
+ for df in filtered_df:
217
+ rows, cols, sheets = cls.group_cell_components(datapoints_variable=df,
218
+ datapoints_table=datapoints_table)
219
+ result_lst.append((rows, cols, sheets, df['cell_id'].to_list()))
220
+ else:
221
+ rows, cols, sheets = cls.group_cell_components(datapoints_variable=datapoints_variable,
222
+ datapoints_table=datapoints_table)
223
+ result_lst.append((rows, cols, sheets, datapoints_variable['cell_id'].to_list()))
224
+
225
+ elif len(components_to_group) == 1:
226
+ for key, group_df in datapoints_variable.groupby(components_to_group[0], dropna=False):
227
+ rows, cols, sheets = cls.group_cell_components(datapoints_variable=group_df,
228
+ datapoints_table=datapoints_table)
229
+ result_lst.append((rows, cols, sheets, group_df['cell_id'].to_list()))
230
+ else:
231
+ ref_component = components_to_group[0]
232
+ second_group_component = components_to_group[1]
233
+ third_component = [component for component in CELL_COMPONENTS if component not in components_to_group][0]
234
+
235
+ reference_prefix = ROW if ref_component == ROW_CODE else COLUMN if ref_component == COLUMN_CODE else SHEET
236
+ second_component_prefix = ROW if second_group_component == ROW_CODE else COLUMN if second_group_component == COLUMN_CODE else SHEET
237
+ third_component_prefix = ROW if third_component == ROW_CODE else COLUMN if third_component == COLUMN_CODE else SHEET
238
+
239
+ datapoints_variable = datapoints_variable.sort_values([ref_component, second_group_component])
240
+
241
+ components_dict = {}
242
+ for value in datapoints_variable[ref_component].unique().tolist():
243
+ components_dict[value] = datapoints_variable[datapoints_variable[ref_component] == value][
244
+ second_group_component].unique().tolist()
245
+
246
+ # group reference component values by second group component values
247
+ for keys_values, group_values in groupby(components_dict.items(), key=lambda x: sorted(x[1])):
248
+ group_values = [v[0] for v in group_values]
249
+ reference_component_grouping = cls.group_cell_component_elements(reference_prefix, group_values,
250
+ datapoints_table[ref_component])
251
+ second_component_grouping = cls.group_cell_component_elements(second_component_prefix, keys_values,
252
+ datapoints_table[second_group_component])
253
+ third_component_grouping = None
254
+ if not datapoints_variable[third_component].isnull().all():
255
+ third_values = datapoints_variable[
256
+ datapoints_variable[ref_component].isin(group_values) & datapoints_variable[
257
+ second_group_component].isin(keys_values)][third_component].unique().tolist()
258
+ third_component_grouping = cls.group_cell_component_elements(third_component_prefix, third_values,
259
+ datapoints_table[third_component])
260
+
261
+ rows = reference_component_grouping if reference_prefix == ROW else second_component_grouping if second_component_prefix == ROW else third_component_grouping
262
+ cols = reference_component_grouping if reference_prefix == COLUMN else second_component_grouping if second_component_prefix == COLUMN else third_component_grouping
263
+ sheets = reference_component_grouping if reference_prefix == SHEET else second_component_grouping if second_component_prefix == SHEET else third_component_grouping
264
+ result_lst.append((rows, cols, sheets, datapoints_variable['cell_id'].to_list())) #TODO: Check this
265
+
266
+ return result_lst
267
+
268
+ @classmethod
269
+ def group_cell_components(cls, datapoints_variable, datapoints_table):
270
+ """
271
+ Extracts the cell components by grouping them when necessary
272
+ :param datapoints_variable: datapoints of the variable
273
+ :param datapoints_table: datapoints of the table associated which the table code of operand
274
+ :return Rows, cols and sheets of operand
275
+ """
276
+ rows = cls.group_cell_component_elements(ROW, datapoints_variable[ROW_CODE].tolist(),
277
+ datapoints_table[ROW_CODE])
278
+ cols = cls.group_cell_component_elements(COLUMN, datapoints_variable[COLUMN_CODE].tolist(),
279
+ datapoints_table[COLUMN_CODE])
280
+ sheets = cls.group_cell_component_elements(SHEET, datapoints_variable[SHEET_CODE].tolist(),
281
+ datapoints_table[SHEET_CODE])
282
+ return rows, cols, sheets
283
+
284
+ @classmethod
285
+ def group_cell_component_elements(cls, cell_component_prefix: str, cell_component_elements: list,
286
+ datapoints: pd.Series):
287
+ """
288
+ Groups elements of a cell component
289
+ :param cell_component_prefix: Cell component name to be operated on
290
+ :param cell_component_elements: values of operand cell component associated with the cell_component_prefix
291
+ argument
292
+ :param datapoints: values of table cell component associated with the cell_component_prefix argument
293
+ return the cell component by grouping it when necessary
294
+ """
295
+ unique_values = set(cell_component_elements)
296
+ if len(unique_values) == 0:
297
+ return None
298
+ elif len(unique_values) == 1:
299
+ if cell_component_elements[0]:
300
+ return cell_component_prefix + str(cell_component_elements[0])
301
+ return None
302
+
303
+ cell_component_elements.sort()
304
+ cell_component_all_unique_values = datapoints.drop_duplicates().tolist()
305
+ datapoints_cell_component = datapoints[
306
+ datapoints.between(cell_component_elements[0], cell_component_elements[-1])]
307
+
308
+ if len(unique_values) == len(cell_component_all_unique_values):
309
+ return f"{cell_component_prefix}*"
310
+
311
+ if len(unique_values) == len(datapoints_cell_component.drop_duplicates()):
312
+ return f"{cell_component_prefix}{cell_component_elements[0]}-{cell_component_elements[-1]}"
313
+
314
+ return '(' + ', '.join([f"{cell_component_prefix}{component}" for component in sorted(unique_values)]) + ')'
315
+
316
+ @classmethod
317
+ def write_cell(cls, table_code, rows, cols, sheets):
318
+ """
319
+ Returns a string that represents a cell expression
320
+ :param table_code: Table code
321
+ :param rows: Expression rows
322
+ :param cols: Expression cols
323
+ :param sheets: Expression sheets
324
+ """
325
+ table_code = f"t{table_code}" if table_code else None
326
+ cell_components = [components for components in (table_code, rows, cols, sheets) if components]
327
+ if len(cell_components):
328
+ cell_info = ', '.join(cell_components)
329
+ return '{' + cell_info + '}'
330
+ return ""
331
+
332
+ @classmethod
333
+ def write_cell_with_asterisk(cls, table_code, rows, cols, sheets, reference_data):
334
+ """
335
+ Returns a string that represents a cell expression
336
+ :param table_code: Table code
337
+ :param rows: Expression rows
338
+ :param cols: Expression cols
339
+ :param sheets: Expression sheets
340
+ """
341
+ cell_info = ""
342
+ # check if * is needed
343
+ if rows and "-" in rows:
344
+ rows = replace_range_by_asterisk(rows, reference_data[ROW_CODE], ROW)
345
+ if cols and "-" in cols:
346
+ cols = replace_range_by_asterisk(cols, reference_data[COLUMN_CODE], COLUMN)
347
+ if sheets and "-" in sheets:
348
+ sheets = replace_range_by_asterisk(sheets, reference_data[SHEET_CODE], SHEET)
349
+
350
+ table_code = f"t{table_code}" if table_code else None
351
+ cell_components = [components for components in (table_code, rows, cols, sheets) if components]
352
+ if len(cell_components):
353
+ cell_info = ', '.join(cell_components)
354
+ return '{' + cell_info + '}'
355
+ return None
356
+
357
+ @classmethod
358
+ def _several_validations_checker(cls, df)->(bool,pd.DataFrame):
359
+ """
360
+ Checks if the dataframe has several validations
361
+ :param df: dataframe with validations
362
+ :return True if the dataframe has several validations, False otherwise
363
+ """
364
+ # TODO: Check this, example F_18.00.b sign validations
365
+ checker = 0
366
+ checker_component = []
367
+ for c_component in CELL_COMPONENTS:
368
+ if df[c_component].nunique() > 1:
369
+ checker += 1
370
+ checker_component.append(c_component)
371
+ if checker == 2:
372
+ results = _two_components_checker(df, checker_component)
373
+ if results:
374
+ return True, results
375
+ if checker == 3:
376
+ # TODO: To implement, not necessary for now because there are no sign validations (withoout components to group) with 3 components
377
+ pass
378
+
379
+ return False, None
380
+
381
+ def _two_components_checker(df, checker_component)->list:
382
+ """
383
+ Checks if the dataframe has several validations
384
+ :param df: dataframe with validations
385
+ :return True if the dataframe has several validations, False otherwise
386
+ """
387
+ results = []
388
+ for i in enumerate(checker_component):
389
+ component_group = checker_component[i[0]-1]
390
+ other_component = checker_component[i[0]]
391
+ # component_group_values = df[component_group].unique().tolist()
392
+ group_df = df.groupby(component_group)
393
+ dict_related = {}
394
+ dict_values = {}
395
+ for a, b in group_df:
396
+ dict_values[a] = b[other_component].unique().tolist()
397
+
398
+ for k, v in dict_values.items():
399
+ dict_related[k] = []
400
+ for i, j in dict_values.items():
401
+ if k != i:
402
+ if set(v) == set(j):
403
+ dict_related[k].append(i)
404
+ components_grouped_list = [(k, *v) for k, v in dict_related.items()]
405
+ components_grouped_sorted = [sorted(x) for x in components_grouped_list]
406
+ components_grouped_sorted = [tuple(x) for x in components_grouped_sorted]
407
+ components_set = set(components_grouped_sorted)
408
+
409
+ if len(components_set) > 1:
410
+ for elto in components_set:
411
+ results.append(df[df[component_group].isin(elto)])
412
+
413
+ return results
414
+
415
+ def replace_range_by_asterisk(expression, df_component, component_prefix):
416
+ """
417
+ Replaces range by asterisk
418
+ :param expression: expression to be replaced
419
+ :return expression with asterisk
420
+ """
421
+ sorted_list = sorted(df_component.drop_duplicates().to_list())
422
+ # sorted_list = sorted(list(set(df_component.to_list())))
423
+ first_element_expression = expression.split("-")[0][1:]
424
+ last_element_expression = expression.split("-")[1]
425
+ if len(sorted_list) > 1 and first_element_expression == sorted_list[0] \
426
+ and last_element_expression == sorted_list[-1]:
427
+ return component_prefix + "*"
428
+
429
+ return expression
File without changes
@@ -0,0 +1,73 @@
1
+ import itertools
2
+ import string
3
+ from typing import Union
4
+
5
+ from py_dpm.AST.ASTObjects import PreconditionItem, VarID
6
+
7
+
8
+ class LabelHandler:
9
+ _instance = None
10
+ labels = None
11
+ operands_labels = None
12
+
13
+ def __new__(cls):
14
+ if not isinstance(cls._instance, cls):
15
+ cls._instance = object.__new__(cls)
16
+ cls.labels = iter_all_strings()
17
+ cls.operands_labels = {}
18
+ cls.labels_type = {}
19
+ return cls._instance
20
+
21
+ @classmethod
22
+ def reset_instance(cls):
23
+ cls.labels.close()
24
+ cls.labels = iter_all_strings()
25
+ cls.operands_labels = {}
26
+ cls.labels_type = {}
27
+
28
+
29
+ def iter_all_strings():
30
+ for size in itertools.count(1):
31
+ for s in itertools.product(string.ascii_uppercase, repeat=size):
32
+ yield "".join(s)
33
+
34
+
35
+ def set_operand_label(label: str, operand: Union[str, VarID]):
36
+ if isinstance(operand, VarID):
37
+ LabelHandler().operands_labels[label] = generate_operand_expression(operand)
38
+ LabelHandler().labels_type[label] = 'single'
39
+ elif isinstance(operand, PreconditionItem):
40
+ LabelHandler().operands_labels[label] = f"v_{operand.variable_code}"
41
+ LabelHandler().labels_type[label] = 'single'
42
+ else:
43
+ LabelHandler().operands_labels[label] = operand
44
+ LabelHandler().labels_type[label] = 'not_single'
45
+
46
+
47
+ def generate_operand_expression(operand: VarID):
48
+ operand_expression = "{ "
49
+ operand_expression += f"t{operand.table}"
50
+ if operand.rows:
51
+ operand_expression += f", {','.join(['r' + x for x in operand.rows])}"
52
+ if operand.cols:
53
+ operand_expression += f", {','.join(['c' + x for x in operand.cols])}"
54
+ if operand.sheets:
55
+ operand_expression += f", {','.join(['s' + x for x in operand.sheets])}"
56
+ operand_expression += " }"
57
+ return operand_expression
58
+
59
+
60
+ def get_operand_from_label(label: str):
61
+ if label in LabelHandler().operands_labels:
62
+ return LabelHandler().operands_labels[label]
63
+ return None
64
+
65
+ def get_type_from_label(label: str):
66
+ if label in LabelHandler().labels_type:
67
+ return LabelHandler().labels_type[label]
68
+ return None
69
+
70
+
71
+ def generate_new_label():
72
+ label = LabelHandler().labels.__next__()
73
+ return f"$@{label}#"
@@ -0,0 +1,89 @@
1
+ from py_dpm.Operators.AggregateOperators import Avg, Count, MaxAggr, Median, MinAggr, Sum
2
+ from py_dpm.Operators.BooleanOperators import And, Not, Or, Xor
3
+ from py_dpm.Operators.ClauseOperators import Get, Rename, Where
4
+ from py_dpm.Operators.ComparisonOperators import Equal, Greater, GreaterEqual, In, IsNull, Less, LessEqual, Match, NotEqual
5
+ from py_dpm.Operators.ConditionalOperators import Filter, IfOperator, Nvl
6
+ from py_dpm.Operators.NumericOperators import AbsoluteValue, BinMinus, BinPlus, Div, Exponential, Logarithm, \
7
+ Max, Min, Mult, NaturalLogarithm, \
8
+ Power, SquareRoot, UnMinus, UnPlus
9
+ from py_dpm.Operators.StringOperators import Concatenate, Len
10
+ from py_dpm.Operators.TimeOperators import TimeShift
11
+ from .tokens import *
12
+
13
+ BIN_OP_MAPPING = {
14
+ # Boolean operators
15
+ AND: And,
16
+ OR: Or,
17
+ XOR: Xor,
18
+
19
+ # Comparison operators
20
+ EQ: Equal,
21
+ NEQ: NotEqual,
22
+ GT: Greater,
23
+ GTE: GreaterEqual,
24
+ LT: Less,
25
+ LTE: LessEqual,
26
+ IN: In,
27
+ MATCH: Match,
28
+
29
+ # Numeric operators
30
+ PLUS: BinPlus,
31
+ MINUS: BinMinus,
32
+ MULT: Mult,
33
+ DIV: Div,
34
+ POW: Power,
35
+ LOG: Logarithm,
36
+
37
+ # Conditional operator
38
+ NVL: Nvl,
39
+ # String operators
40
+ CONCATENATE: Concatenate
41
+ }
42
+
43
+ UNARY_OP_MAPPING = {
44
+ # Boolean Operators
45
+ NOT: Not,
46
+
47
+ # Numeric Operators
48
+ PLUS: UnPlus,
49
+ MINUS: UnMinus,
50
+ ABS: AbsoluteValue,
51
+ EXP: Exponential,
52
+ LN: NaturalLogarithm,
53
+ SQRT: SquareRoot,
54
+
55
+ # Comparison Operators
56
+ ISNULL: IsNull,
57
+
58
+ # String operators
59
+ LENGTH: Len
60
+ }
61
+
62
+ AGGR_OP_MAPPING = {
63
+ MAX_AGGR: MaxAggr,
64
+ MIN_AGGR: MinAggr,
65
+ SUM: Sum,
66
+ COUNT: Count,
67
+ AVG: Avg,
68
+ MEDIAN: Median
69
+ }
70
+
71
+ CLAUSE_OP_MAPPING = {
72
+ WHERE: Where,
73
+ RENAME: Rename,
74
+ GET: Get
75
+ }
76
+
77
+ TIME_OPERATORS = {
78
+ TIME_SHIFT: TimeShift
79
+ }
80
+
81
+ CONDITIONAL_OP_MAPPING = {
82
+ IF: IfOperator,
83
+ FILTER: Filter
84
+ }
85
+
86
+ COMPLEX_OP_MAPPING = {
87
+ MAX: Max,
88
+ MIN: Min
89
+ }