pydpm_xl 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_dpm/AST/ASTConstructor.py +503 -0
- py_dpm/AST/ASTObjects.py +827 -0
- py_dpm/AST/ASTTemplate.py +101 -0
- py_dpm/AST/ASTVisitor.py +13 -0
- py_dpm/AST/MLGeneration.py +588 -0
- py_dpm/AST/ModuleAnalyzer.py +79 -0
- py_dpm/AST/ModuleDependencies.py +203 -0
- py_dpm/AST/WhereClauseChecker.py +12 -0
- py_dpm/AST/__init__.py +0 -0
- py_dpm/AST/check_operands.py +302 -0
- py_dpm/DataTypes/ScalarTypes.py +324 -0
- py_dpm/DataTypes/TimeClasses.py +370 -0
- py_dpm/DataTypes/TypePromotion.py +195 -0
- py_dpm/DataTypes/__init__.py +0 -0
- py_dpm/Exceptions/__init__.py +0 -0
- py_dpm/Exceptions/exceptions.py +84 -0
- py_dpm/Exceptions/messages.py +114 -0
- py_dpm/OperationScopes/OperationScopeService.py +247 -0
- py_dpm/OperationScopes/__init__.py +0 -0
- py_dpm/Operators/AggregateOperators.py +138 -0
- py_dpm/Operators/BooleanOperators.py +30 -0
- py_dpm/Operators/ClauseOperators.py +159 -0
- py_dpm/Operators/ComparisonOperators.py +69 -0
- py_dpm/Operators/ConditionalOperators.py +362 -0
- py_dpm/Operators/NumericOperators.py +101 -0
- py_dpm/Operators/Operator.py +388 -0
- py_dpm/Operators/StringOperators.py +27 -0
- py_dpm/Operators/TimeOperators.py +53 -0
- py_dpm/Operators/__init__.py +0 -0
- py_dpm/Utils/ValidationsGenerationUtils.py +429 -0
- py_dpm/Utils/__init__.py +0 -0
- py_dpm/Utils/operands_mapping.py +73 -0
- py_dpm/Utils/operator_mapping.py +89 -0
- py_dpm/Utils/tokens.py +172 -0
- py_dpm/Utils/utils.py +2 -0
- py_dpm/ValidationsGeneration/PropertiesConstraintsProcessor.py +190 -0
- py_dpm/ValidationsGeneration/Utils.py +364 -0
- py_dpm/ValidationsGeneration/VariantsProcessor.py +265 -0
- py_dpm/ValidationsGeneration/__init__.py +0 -0
- py_dpm/ValidationsGeneration/auxiliary_functions.py +98 -0
- py_dpm/__init__.py +61 -0
- py_dpm/api/__init__.py +140 -0
- py_dpm/api/ast_generator.py +438 -0
- py_dpm/api/complete_ast.py +241 -0
- py_dpm/api/data_dictionary_validation.py +577 -0
- py_dpm/api/migration.py +77 -0
- py_dpm/api/semantic.py +224 -0
- py_dpm/api/syntax.py +182 -0
- py_dpm/client.py +106 -0
- py_dpm/data_handlers.py +99 -0
- py_dpm/db_utils.py +117 -0
- py_dpm/grammar/__init__.py +0 -0
- py_dpm/grammar/dist/__init__.py +0 -0
- py_dpm/grammar/dist/dpm_xlLexer.interp +428 -0
- py_dpm/grammar/dist/dpm_xlLexer.py +804 -0
- py_dpm/grammar/dist/dpm_xlLexer.tokens +106 -0
- py_dpm/grammar/dist/dpm_xlParser.interp +249 -0
- py_dpm/grammar/dist/dpm_xlParser.py +5224 -0
- py_dpm/grammar/dist/dpm_xlParser.tokens +106 -0
- py_dpm/grammar/dist/dpm_xlParserListener.py +742 -0
- py_dpm/grammar/dist/dpm_xlParserVisitor.py +419 -0
- py_dpm/grammar/dist/listeners.py +10 -0
- py_dpm/grammar/dpm_xlLexer.g4 +435 -0
- py_dpm/grammar/dpm_xlParser.g4 +260 -0
- py_dpm/migration.py +282 -0
- py_dpm/models.py +2139 -0
- py_dpm/semantics/DAG/DAGAnalyzer.py +158 -0
- py_dpm/semantics/DAG/__init__.py +0 -0
- py_dpm/semantics/SemanticAnalyzer.py +320 -0
- py_dpm/semantics/Symbols.py +223 -0
- py_dpm/semantics/__init__.py +0 -0
- py_dpm/utils/__init__.py +0 -0
- py_dpm/utils/ast_serialization.py +481 -0
- py_dpm/views/data_types.sql +12 -0
- py_dpm/views/datapoints.sql +65 -0
- py_dpm/views/hierarchy_operand_reference.sql +11 -0
- py_dpm/views/hierarchy_preconditions.sql +13 -0
- py_dpm/views/hierarchy_variables.sql +26 -0
- py_dpm/views/hierarchy_variables_context.sql +14 -0
- py_dpm/views/key_components.sql +18 -0
- py_dpm/views/module_from_table.sql +11 -0
- py_dpm/views/open_keys.sql +13 -0
- py_dpm/views/operation_info.sql +27 -0
- py_dpm/views/operation_list.sql +18 -0
- py_dpm/views/operations_versions_from_module_version.sql +30 -0
- py_dpm/views/precondition_info.sql +17 -0
- py_dpm/views/report_type_operand_reference_info.sql +18 -0
- py_dpm/views/subcategory_info.sql +17 -0
- py_dpm/views/table_info.sql +19 -0
- pydpm_xl-0.1.10.dist-info/LICENSE +674 -0
- pydpm_xl-0.1.10.dist-info/METADATA +50 -0
- pydpm_xl-0.1.10.dist-info/RECORD +94 -0
- pydpm_xl-0.1.10.dist-info/WHEEL +4 -0
- pydpm_xl-0.1.10.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data Dictionary Validation API
|
|
3
|
+
|
|
4
|
+
This module provides methods to identify and validate data dictionary issues
|
|
5
|
+
that can cause semantic validation failures during DPM-XL transpilation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, List, Set, Optional, Any, Tuple
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from enum import Enum
|
|
11
|
+
import re
|
|
12
|
+
from sqlalchemy import text
|
|
13
|
+
|
|
14
|
+
from py_dpm.db_utils import get_session, get_engine
|
|
15
|
+
from py_dpm.models import *
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ValidationIssueType(Enum):
|
|
19
|
+
"""Types of data dictionary validation issues."""
|
|
20
|
+
MISSING_TABLE = "missing_table"
|
|
21
|
+
MISSING_COLUMN = "missing_column"
|
|
22
|
+
MISSING_ROW = "missing_row"
|
|
23
|
+
MISSING_SHEET = "missing_sheet"
|
|
24
|
+
MISSING_VARIABLE = "missing_variable"
|
|
25
|
+
INVALID_REFERENCE = "invalid_reference"
|
|
26
|
+
TYPE_MISMATCH = "type_mismatch"
|
|
27
|
+
CONFIGURATION_ERROR = "configuration_error"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ValidationIssue:
|
|
32
|
+
"""
|
|
33
|
+
Represents a data dictionary validation issue.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
issue_type (ValidationIssueType): Type of the issue
|
|
37
|
+
description (str): Human-readable description of the issue
|
|
38
|
+
affected_element (str): The specific element that has the issue
|
|
39
|
+
suggested_fix (Optional[str]): Suggested fix for the issue
|
|
40
|
+
severity (str): Severity level ('error', 'warning', 'info')
|
|
41
|
+
code (Optional[str]): Error code for programmatic handling
|
|
42
|
+
"""
|
|
43
|
+
issue_type: ValidationIssueType
|
|
44
|
+
description: str
|
|
45
|
+
affected_element: str
|
|
46
|
+
suggested_fix: Optional[str] = None
|
|
47
|
+
severity: str = "error"
|
|
48
|
+
code: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class CellReference:
|
|
53
|
+
"""Represents a parsed cell reference from DPM-XL expression."""
|
|
54
|
+
table: str
|
|
55
|
+
rows: List[str]
|
|
56
|
+
columns: List[str]
|
|
57
|
+
sheets: List[str]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DataDictionaryValidator:
|
|
61
|
+
"""
|
|
62
|
+
Main class for validating data dictionary consistency and completeness.
|
|
63
|
+
|
|
64
|
+
This class provides methods to detect issues that would cause semantic
|
|
65
|
+
validation failures during DPM-XL transpilation.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(self):
|
|
69
|
+
"""Initialize the Data Dictionary Validator."""
|
|
70
|
+
get_engine()
|
|
71
|
+
self.session = get_session()
|
|
72
|
+
self._table_cache = {}
|
|
73
|
+
self._column_cache = {}
|
|
74
|
+
self._row_cache = {}
|
|
75
|
+
self._sheet_cache = {}
|
|
76
|
+
|
|
77
|
+
def validate_expression_references(self, dpm_xl_expression: str) -> List[ValidationIssue]:
|
|
78
|
+
"""
|
|
79
|
+
Validate all cell references in a DPM-XL expression.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
dpm_xl_expression (str): The DPM-XL expression to validate
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List[ValidationIssue]: List of validation issues found
|
|
86
|
+
"""
|
|
87
|
+
issues = []
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
# Parse cell references from the expression
|
|
91
|
+
cell_refs = self._parse_cell_references(dpm_xl_expression)
|
|
92
|
+
|
|
93
|
+
for cell_ref in cell_refs:
|
|
94
|
+
# Validate table exists
|
|
95
|
+
table_issues = self.validate_table_exists(cell_ref.table)
|
|
96
|
+
issues.extend(table_issues)
|
|
97
|
+
|
|
98
|
+
# If table exists, validate other components
|
|
99
|
+
if not table_issues:
|
|
100
|
+
# Validate columns
|
|
101
|
+
column_issues = self.validate_columns_exist(cell_ref.table, cell_ref.columns)
|
|
102
|
+
issues.extend(column_issues)
|
|
103
|
+
|
|
104
|
+
# Validate rows
|
|
105
|
+
row_issues = self.validate_rows_exist(cell_ref.table, cell_ref.rows)
|
|
106
|
+
issues.extend(row_issues)
|
|
107
|
+
|
|
108
|
+
# Validate sheets
|
|
109
|
+
sheet_issues = self.validate_sheets_exist(cell_ref.table, cell_ref.sheets)
|
|
110
|
+
issues.extend(sheet_issues)
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
issues.append(ValidationIssue(
|
|
114
|
+
issue_type=ValidationIssueType.CONFIGURATION_ERROR,
|
|
115
|
+
description=f"Error parsing expression: {str(e)}",
|
|
116
|
+
affected_element=dpm_xl_expression[:50] + "..." if len(dpm_xl_expression) > 50 else dpm_xl_expression,
|
|
117
|
+
severity="error",
|
|
118
|
+
code="PARSE_ERROR"
|
|
119
|
+
))
|
|
120
|
+
|
|
121
|
+
return issues
|
|
122
|
+
|
|
123
|
+
def validate_table_exists(self, table_name: str) -> List[ValidationIssue]:
|
|
124
|
+
"""
|
|
125
|
+
Validate that a table exists in the data dictionary.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
table_name (str): Name of the table to validate
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
List[ValidationIssue]: List of validation issues found
|
|
132
|
+
"""
|
|
133
|
+
issues = []
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
# Check cache first
|
|
137
|
+
if table_name in self._table_cache:
|
|
138
|
+
return self._table_cache[table_name]
|
|
139
|
+
|
|
140
|
+
# Query the database for the table using the correct schema
|
|
141
|
+
# The actual schema uses table_code instead of table_name
|
|
142
|
+
tables = self.session.execute(
|
|
143
|
+
text("SELECT DISTINCT table_code FROM datapoints WHERE table_code = :table_code"),
|
|
144
|
+
{"table_code": table_name}
|
|
145
|
+
).fetchall()
|
|
146
|
+
|
|
147
|
+
if not tables:
|
|
148
|
+
issue = ValidationIssue(
|
|
149
|
+
issue_type=ValidationIssueType.MISSING_TABLE,
|
|
150
|
+
description=f"Table '{table_name}' was not found in the data dictionary",
|
|
151
|
+
affected_element=table_name,
|
|
152
|
+
suggested_fix=f"Add table '{table_name}' to the data dictionary or check the table name spelling",
|
|
153
|
+
severity="error",
|
|
154
|
+
code="TABLE_NOT_FOUND"
|
|
155
|
+
)
|
|
156
|
+
issues.append(issue)
|
|
157
|
+
|
|
158
|
+
# Cache the result
|
|
159
|
+
self._table_cache[table_name] = issues
|
|
160
|
+
|
|
161
|
+
except Exception as e:
|
|
162
|
+
issues.append(ValidationIssue(
|
|
163
|
+
issue_type=ValidationIssueType.CONFIGURATION_ERROR,
|
|
164
|
+
description=f"Error checking table '{table_name}': {str(e)}",
|
|
165
|
+
affected_element=table_name,
|
|
166
|
+
severity="error",
|
|
167
|
+
code="TABLE_CHECK_ERROR"
|
|
168
|
+
))
|
|
169
|
+
|
|
170
|
+
return issues
|
|
171
|
+
|
|
172
|
+
def validate_columns_exist(self, table_name: str, columns: List[str]) -> List[ValidationIssue]:
|
|
173
|
+
"""
|
|
174
|
+
Validate that columns exist for a table.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
table_name (str): Name of the table
|
|
178
|
+
columns (List[str]): List of column names/patterns to validate
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List[ValidationIssue]: List of validation issues found
|
|
182
|
+
"""
|
|
183
|
+
issues = []
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
cache_key = f"{table_name}:{':'.join(columns)}"
|
|
187
|
+
if cache_key in self._column_cache:
|
|
188
|
+
return self._column_cache[cache_key]
|
|
189
|
+
|
|
190
|
+
for column in columns:
|
|
191
|
+
# Skip wildcards and ranges for now - these need special handling
|
|
192
|
+
if column in ['*'] or '-' in column:
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
# Check if specific column exists
|
|
196
|
+
column_exists = self.session.execute(
|
|
197
|
+
text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND column_code = :column_code"),
|
|
198
|
+
{"table_code": table_name, "column_code": column}
|
|
199
|
+
).fetchone()
|
|
200
|
+
|
|
201
|
+
if not column_exists or column_exists[0] == 0:
|
|
202
|
+
issue = ValidationIssue(
|
|
203
|
+
issue_type=ValidationIssueType.MISSING_COLUMN,
|
|
204
|
+
description=f"Column '{column}' not found in table '{table_name}'",
|
|
205
|
+
affected_element=f"{table_name}.{column}",
|
|
206
|
+
suggested_fix=f"Add column '{column}' to table '{table_name}' or check the column name",
|
|
207
|
+
severity="error",
|
|
208
|
+
code="COLUMN_NOT_FOUND"
|
|
209
|
+
)
|
|
210
|
+
issues.append(issue)
|
|
211
|
+
|
|
212
|
+
self._column_cache[cache_key] = issues
|
|
213
|
+
|
|
214
|
+
except Exception as e:
|
|
215
|
+
issues.append(ValidationIssue(
|
|
216
|
+
issue_type=ValidationIssueType.CONFIGURATION_ERROR,
|
|
217
|
+
description=f"Error checking columns for table '{table_name}': {str(e)}",
|
|
218
|
+
affected_element=f"{table_name}.[{','.join(columns)}]",
|
|
219
|
+
severity="error",
|
|
220
|
+
code="COLUMN_CHECK_ERROR"
|
|
221
|
+
))
|
|
222
|
+
|
|
223
|
+
return issues
|
|
224
|
+
|
|
225
|
+
def validate_rows_exist(self, table_name: str, rows: List[str]) -> List[ValidationIssue]:
|
|
226
|
+
"""
|
|
227
|
+
Validate that rows exist for a table.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
table_name (str): Name of the table
|
|
231
|
+
rows (List[str]): List of row names/patterns to validate
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
List[ValidationIssue]: List of validation issues found
|
|
235
|
+
"""
|
|
236
|
+
issues = []
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
cache_key = f"{table_name}:rows:{':'.join(rows)}"
|
|
240
|
+
if cache_key in self._row_cache:
|
|
241
|
+
return self._row_cache[cache_key]
|
|
242
|
+
|
|
243
|
+
for row in rows:
|
|
244
|
+
# Skip wildcards and ranges for now
|
|
245
|
+
if row in ['*'] or '-' in row:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
# Check if specific row exists
|
|
249
|
+
row_exists = self.session.execute(
|
|
250
|
+
text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND row_code = :row_code"),
|
|
251
|
+
{"table_code": table_name, "row_code": row}
|
|
252
|
+
).fetchone()
|
|
253
|
+
|
|
254
|
+
if not row_exists or row_exists[0] == 0:
|
|
255
|
+
issue = ValidationIssue(
|
|
256
|
+
issue_type=ValidationIssueType.MISSING_ROW,
|
|
257
|
+
description=f"Row '{row}' not found in table '{table_name}'",
|
|
258
|
+
affected_element=f"{table_name}.{row}",
|
|
259
|
+
suggested_fix=f"Add row '{row}' to table '{table_name}' or check the row name",
|
|
260
|
+
severity="warning", # Rows might be more flexible
|
|
261
|
+
code="ROW_NOT_FOUND"
|
|
262
|
+
)
|
|
263
|
+
issues.append(issue)
|
|
264
|
+
|
|
265
|
+
self._row_cache[cache_key] = issues
|
|
266
|
+
|
|
267
|
+
except Exception as e:
|
|
268
|
+
issues.append(ValidationIssue(
|
|
269
|
+
issue_type=ValidationIssueType.CONFIGURATION_ERROR,
|
|
270
|
+
description=f"Error checking rows for table '{table_name}': {str(e)}",
|
|
271
|
+
affected_element=f"{table_name}.[{','.join(rows)}]",
|
|
272
|
+
severity="error",
|
|
273
|
+
code="ROW_CHECK_ERROR"
|
|
274
|
+
))
|
|
275
|
+
|
|
276
|
+
return issues
|
|
277
|
+
|
|
278
|
+
def validate_sheets_exist(self, table_name: str, sheets: List[str]) -> List[ValidationIssue]:
|
|
279
|
+
"""
|
|
280
|
+
Validate that sheets exist for a table.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
table_name (str): Name of the table
|
|
284
|
+
sheets (List[str]): List of sheet names/patterns to validate
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
List[ValidationIssue]: List of validation issues found
|
|
288
|
+
"""
|
|
289
|
+
issues = []
|
|
290
|
+
|
|
291
|
+
try:
|
|
292
|
+
cache_key = f"{table_name}:sheets:{':'.join(sheets)}"
|
|
293
|
+
if cache_key in self._sheet_cache:
|
|
294
|
+
return self._sheet_cache[cache_key]
|
|
295
|
+
|
|
296
|
+
for sheet in sheets:
|
|
297
|
+
# Skip wildcards for now
|
|
298
|
+
if sheet in ['*']:
|
|
299
|
+
# Check if any sheets exist for this table
|
|
300
|
+
sheet_count = self.session.execute(
|
|
301
|
+
text("SELECT COUNT(DISTINCT sheet_code) FROM datapoints WHERE table_code = :table_code AND sheet_code IS NOT NULL AND sheet_code != ''"),
|
|
302
|
+
{"table_code": table_name}
|
|
303
|
+
).fetchone()
|
|
304
|
+
|
|
305
|
+
if not sheet_count or sheet_count[0] == 0:
|
|
306
|
+
issue = ValidationIssue(
|
|
307
|
+
issue_type=ValidationIssueType.MISSING_SHEET,
|
|
308
|
+
description=f"No sheets found for table '{table_name}' but s* wildcard used",
|
|
309
|
+
affected_element=f"{table_name}.s*",
|
|
310
|
+
suggested_fix=f"Add sheet definitions for table '{table_name}' or remove s* wildcard",
|
|
311
|
+
severity="error",
|
|
312
|
+
code="NO_SHEETS_FOR_WILDCARD"
|
|
313
|
+
)
|
|
314
|
+
issues.append(issue)
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
# Check if specific sheet exists
|
|
318
|
+
sheet_exists = self.session.execute(
|
|
319
|
+
text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND sheet_code = :sheet_code"),
|
|
320
|
+
{"table_code": table_name, "sheet_code": sheet}
|
|
321
|
+
).fetchone()
|
|
322
|
+
|
|
323
|
+
if not sheet_exists or sheet_exists[0] == 0:
|
|
324
|
+
issue = ValidationIssue(
|
|
325
|
+
issue_type=ValidationIssueType.MISSING_SHEET,
|
|
326
|
+
description=f"Sheet '{sheet}' not found in table '{table_name}'",
|
|
327
|
+
affected_element=f"{table_name}.{sheet}",
|
|
328
|
+
suggested_fix=f"Add sheet '{sheet}' to table '{table_name}' or check the sheet name",
|
|
329
|
+
severity="error",
|
|
330
|
+
code="SHEET_NOT_FOUND"
|
|
331
|
+
)
|
|
332
|
+
issues.append(issue)
|
|
333
|
+
|
|
334
|
+
self._sheet_cache[cache_key] = issues
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
issues.append(ValidationIssue(
|
|
338
|
+
issue_type=ValidationIssueType.CONFIGURATION_ERROR,
|
|
339
|
+
description=f"Error checking sheets for table '{table_name}': {str(e)}",
|
|
340
|
+
affected_element=f"{table_name}.[{','.join(sheets)}]",
|
|
341
|
+
severity="error",
|
|
342
|
+
code="SHEET_CHECK_ERROR"
|
|
343
|
+
))
|
|
344
|
+
|
|
345
|
+
return issues
|
|
346
|
+
|
|
347
|
+
def validate_variables_exist(self, variable_names: List[str]) -> List[ValidationIssue]:
|
|
348
|
+
"""
|
|
349
|
+
Validate that variables exist in the data dictionary.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
variable_names (List[str]): List of variable names to validate
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
List[ValidationIssue]: List of validation issues found
|
|
356
|
+
"""
|
|
357
|
+
issues = []
|
|
358
|
+
|
|
359
|
+
for var_name in variable_names:
|
|
360
|
+
try:
|
|
361
|
+
# Variable validation - try multiple approaches based on the schema
|
|
362
|
+
# First try to find it as a VariableID (numeric)
|
|
363
|
+
var_exists = None
|
|
364
|
+
try:
|
|
365
|
+
var_id = int(var_name)
|
|
366
|
+
var_exists = self.session.execute(
|
|
367
|
+
text("SELECT COUNT(*) FROM Variable WHERE VariableID = :var_id"),
|
|
368
|
+
{"var_id": var_id}
|
|
369
|
+
).fetchone()
|
|
370
|
+
except ValueError:
|
|
371
|
+
# Not a numeric ID, skip variable validation for now
|
|
372
|
+
# Variables in this schema appear to be referenced by ID, not name
|
|
373
|
+
continue
|
|
374
|
+
|
|
375
|
+
if var_exists and var_exists[0] == 0:
|
|
376
|
+
issue = ValidationIssue(
|
|
377
|
+
issue_type=ValidationIssueType.MISSING_VARIABLE,
|
|
378
|
+
description=f"Variable ID '{var_name}' not found in data dictionary",
|
|
379
|
+
affected_element=var_name,
|
|
380
|
+
suggested_fix=f"Add variable ID '{var_name}' to the data dictionary or check the variable ID",
|
|
381
|
+
severity="warning", # Changed to warning since variable structure is unclear
|
|
382
|
+
code="VARIABLE_NOT_FOUND"
|
|
383
|
+
)
|
|
384
|
+
issues.append(issue)
|
|
385
|
+
|
|
386
|
+
except Exception as e:
|
|
387
|
+
issues.append(ValidationIssue(
|
|
388
|
+
issue_type=ValidationIssueType.CONFIGURATION_ERROR,
|
|
389
|
+
description=f"Error checking variable '{var_name}': {str(e)}",
|
|
390
|
+
affected_element=var_name,
|
|
391
|
+
severity="error",
|
|
392
|
+
code="VARIABLE_CHECK_ERROR"
|
|
393
|
+
))
|
|
394
|
+
|
|
395
|
+
return issues
|
|
396
|
+
|
|
397
|
+
def get_validation_summary(self, issues: List[ValidationIssue]) -> Dict[str, Any]:
|
|
398
|
+
"""
|
|
399
|
+
Generate a summary of validation issues.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
issues (List[ValidationIssue]): List of validation issues
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Dict[str, Any]: Summary statistics and categorized issues
|
|
406
|
+
"""
|
|
407
|
+
summary = {
|
|
408
|
+
"total_issues": len(issues),
|
|
409
|
+
"by_type": {},
|
|
410
|
+
"by_severity": {},
|
|
411
|
+
"fixable_issues": [],
|
|
412
|
+
"critical_issues": []
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
for issue in issues:
|
|
416
|
+
# Count by type
|
|
417
|
+
issue_type = issue.issue_type.value
|
|
418
|
+
summary["by_type"][issue_type] = summary["by_type"].get(issue_type, 0) + 1
|
|
419
|
+
|
|
420
|
+
# Count by severity
|
|
421
|
+
summary["by_severity"][issue.severity] = summary["by_severity"].get(issue.severity, 0) + 1
|
|
422
|
+
|
|
423
|
+
# Categorize issues
|
|
424
|
+
if issue.suggested_fix:
|
|
425
|
+
summary["fixable_issues"].append(issue)
|
|
426
|
+
|
|
427
|
+
if issue.severity == "error":
|
|
428
|
+
summary["critical_issues"].append(issue)
|
|
429
|
+
|
|
430
|
+
return summary
|
|
431
|
+
|
|
432
|
+
def _parse_cell_references(self, expression: str) -> List[CellReference]:
|
|
433
|
+
"""
|
|
434
|
+
Parse cell references from a DPM-XL expression.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
expression (str): DPM-XL expression to parse
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
List[CellReference]: List of parsed cell references
|
|
441
|
+
"""
|
|
442
|
+
cell_refs = []
|
|
443
|
+
|
|
444
|
+
# Regex pattern to match cell references like {tTableName, rRows, cColumns, sSheets}
|
|
445
|
+
pattern = r'\{t([^,]+),\s*([^,]+),\s*([^,]+)(?:,\s*([^}]+))?\}'
|
|
446
|
+
|
|
447
|
+
matches = re.findall(pattern, expression)
|
|
448
|
+
|
|
449
|
+
for match in matches:
|
|
450
|
+
table = match[0].strip()
|
|
451
|
+
|
|
452
|
+
# Parse rows
|
|
453
|
+
rows_str = match[1].strip()
|
|
454
|
+
rows = self._parse_dimension_values(rows_str, 'r')
|
|
455
|
+
|
|
456
|
+
# Parse columns
|
|
457
|
+
cols_str = match[2].strip()
|
|
458
|
+
columns = self._parse_dimension_values(cols_str, 'c')
|
|
459
|
+
|
|
460
|
+
# Parse sheets (optional)
|
|
461
|
+
sheets = []
|
|
462
|
+
if len(match) > 3 and match[3]:
|
|
463
|
+
sheets_str = match[3].strip()
|
|
464
|
+
sheets = self._parse_dimension_values(sheets_str, 's')
|
|
465
|
+
|
|
466
|
+
cell_refs.append(CellReference(
|
|
467
|
+
table=table,
|
|
468
|
+
rows=rows,
|
|
469
|
+
columns=columns,
|
|
470
|
+
sheets=sheets
|
|
471
|
+
))
|
|
472
|
+
|
|
473
|
+
return cell_refs
|
|
474
|
+
|
|
475
|
+
def _parse_dimension_values(self, dim_str: str, prefix: str) -> List[str]:
|
|
476
|
+
"""
|
|
477
|
+
Parse dimension values (rows, columns, or sheets) from a string.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
dim_str (str): String containing dimension values
|
|
481
|
+
prefix (str): Expected prefix ('r', 'c', or 's')
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
List[str]: List of parsed dimension values
|
|
485
|
+
"""
|
|
486
|
+
values = []
|
|
487
|
+
|
|
488
|
+
# Remove prefix and parentheses if present
|
|
489
|
+
dim_str = dim_str.strip()
|
|
490
|
+
if dim_str.startswith(prefix):
|
|
491
|
+
dim_str = dim_str[1:]
|
|
492
|
+
if dim_str.startswith('(') and dim_str.endswith(')'):
|
|
493
|
+
dim_str = dim_str[1:-1]
|
|
494
|
+
|
|
495
|
+
# Split by comma and clean up
|
|
496
|
+
if dim_str:
|
|
497
|
+
for value in dim_str.split(','):
|
|
498
|
+
value = value.strip()
|
|
499
|
+
if value:
|
|
500
|
+
values.append(value)
|
|
501
|
+
|
|
502
|
+
return values
|
|
503
|
+
|
|
504
|
+
def __del__(self):
|
|
505
|
+
"""Clean up resources."""
|
|
506
|
+
if hasattr(self, 'session'):
|
|
507
|
+
self.session.close()
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
# Convenience functions for direct usage
|
|
511
|
+
def validate_dpm_xl_expression(expression: str) -> List[ValidationIssue]:
|
|
512
|
+
"""
|
|
513
|
+
Convenience function to validate a DPM-XL expression.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
expression (str): DPM-XL expression to validate
|
|
517
|
+
|
|
518
|
+
Returns:
|
|
519
|
+
List[ValidationIssue]: List of validation issues found
|
|
520
|
+
"""
|
|
521
|
+
validator = DataDictionaryValidator()
|
|
522
|
+
return validator.validate_expression_references(expression)
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def validate_table_references(table_names: List[str]) -> List[ValidationIssue]:
|
|
526
|
+
"""
|
|
527
|
+
Convenience function to validate table references.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
table_names (List[str]): List of table names to validate
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
List[ValidationIssue]: List of validation issues found
|
|
534
|
+
"""
|
|
535
|
+
validator = DataDictionaryValidator()
|
|
536
|
+
issues = []
|
|
537
|
+
|
|
538
|
+
for table_name in table_names:
|
|
539
|
+
issues.extend(validator.validate_table_exists(table_name))
|
|
540
|
+
|
|
541
|
+
return issues
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def check_data_dictionary_health() -> Dict[str, Any]:
|
|
545
|
+
"""
|
|
546
|
+
Perform a comprehensive health check of the data dictionary.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Dict[str, Any]: Health check results
|
|
550
|
+
"""
|
|
551
|
+
validator = DataDictionaryValidator()
|
|
552
|
+
|
|
553
|
+
# This would include various checks like:
|
|
554
|
+
# - Missing table definitions
|
|
555
|
+
# - Orphaned references
|
|
556
|
+
# - Inconsistent naming
|
|
557
|
+
# - etc.
|
|
558
|
+
|
|
559
|
+
health_report = {
|
|
560
|
+
"status": "healthy", # or "warning" or "critical"
|
|
561
|
+
"total_tables": 0,
|
|
562
|
+
"issues_found": [],
|
|
563
|
+
"recommendations": []
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
try:
|
|
567
|
+
# Get total table count
|
|
568
|
+
result = validator.session.execute(text("SELECT COUNT(DISTINCT table_code) FROM datapoints")).fetchone()
|
|
569
|
+
health_report["total_tables"] = result[0] if result else 0
|
|
570
|
+
|
|
571
|
+
# Add more comprehensive checks here
|
|
572
|
+
|
|
573
|
+
except Exception as e:
|
|
574
|
+
health_report["status"] = "critical"
|
|
575
|
+
health_report["issues_found"].append(f"Database connection error: {str(e)}")
|
|
576
|
+
|
|
577
|
+
return health_report
|
py_dpm/api/migration.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from sqlalchemy import Engine
|
|
4
|
+
|
|
5
|
+
from py_dpm.migration import run_migration as _run_migration
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MigrationAPI:
|
|
9
|
+
"""
|
|
10
|
+
API for database migration operations.
|
|
11
|
+
|
|
12
|
+
This class provides methods to migrate data from Access databases to SQLite.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
"""Initialize the Migration API."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
def migrate_access_to_sqlite(
|
|
20
|
+
self,
|
|
21
|
+
access_file_path: str,
|
|
22
|
+
sqlite_db_path: Optional[str] = None
|
|
23
|
+
) -> Engine:
|
|
24
|
+
"""
|
|
25
|
+
Migrate data from an Access database to SQLite.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
access_file_path (str): Path to the Access database file (.mdb or .accdb)
|
|
29
|
+
sqlite_db_path (Optional[str]): Path for the SQLite database.
|
|
30
|
+
If None, defaults to "database.db"
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Engine: SQLAlchemy engine for the created SQLite database
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
FileNotFoundError: If the Access file doesn't exist
|
|
37
|
+
Exception: If migration fails
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
>>> from pydpm.api import MigrationAPI
|
|
41
|
+
>>> migration = MigrationAPI()
|
|
42
|
+
>>> engine = migration.migrate_access_to_sqlite("data.mdb", "output.db")
|
|
43
|
+
"""
|
|
44
|
+
if not os.path.exists(access_file_path):
|
|
45
|
+
raise FileNotFoundError(f"Access file not found: {access_file_path}")
|
|
46
|
+
|
|
47
|
+
if sqlite_db_path is None:
|
|
48
|
+
sqlite_db_path = os.getenv("SQLITE_DB_PATH", "database.db")
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
engine = _run_migration(access_file_path, sqlite_db_path)
|
|
52
|
+
return engine
|
|
53
|
+
except Exception as e:
|
|
54
|
+
raise Exception(f"Migration failed: {str(e)}") from e
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Convenience function for direct usage
|
|
58
|
+
def migrate_access_to_sqlite(
|
|
59
|
+
access_file_path: str,
|
|
60
|
+
sqlite_db_path: Optional[str] = None
|
|
61
|
+
) -> Engine:
|
|
62
|
+
"""
|
|
63
|
+
Convenience function to migrate Access database to SQLite.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
access_file_path (str): Path to the Access database file
|
|
67
|
+
sqlite_db_path (Optional[str]): Path for the SQLite database
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Engine: SQLAlchemy engine for the created SQLite database
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
>>> from pydpm.api.migration import migrate_access_to_sqlite
|
|
74
|
+
>>> engine = migrate_access_to_sqlite("data.mdb", "output.db")
|
|
75
|
+
"""
|
|
76
|
+
api = MigrationAPI()
|
|
77
|
+
return api.migrate_access_to_sqlite(access_file_path, sqlite_db_path)
|