pydpm_xl 0.1.39rc32__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. py_dpm/__init__.py +1 -1
  2. py_dpm/api/__init__.py +58 -189
  3. py_dpm/api/dpm/__init__.py +20 -0
  4. py_dpm/api/{data_dictionary.py → dpm/data_dictionary.py} +903 -984
  5. py_dpm/api/dpm/explorer.py +236 -0
  6. py_dpm/api/dpm/hierarchical_queries.py +142 -0
  7. py_dpm/api/{migration.py → dpm/migration.py} +16 -19
  8. py_dpm/api/{operation_scopes.py → dpm/operation_scopes.py} +319 -267
  9. py_dpm/api/dpm_xl/__init__.py +25 -0
  10. py_dpm/api/{ast_generator.py → dpm_xl/ast_generator.py} +3 -3
  11. py_dpm/api/{complete_ast.py → dpm_xl/complete_ast.py} +186 -284
  12. py_dpm/api/dpm_xl/semantic.py +358 -0
  13. py_dpm/api/{syntax.py → dpm_xl/syntax.py} +6 -5
  14. py_dpm/api/explorer.py +4 -0
  15. py_dpm/api/semantic.py +30 -306
  16. py_dpm/cli/__init__.py +9 -0
  17. py_dpm/{client.py → cli/main.py} +12 -10
  18. py_dpm/dpm/__init__.py +11 -0
  19. py_dpm/{models.py → dpm/models.py} +112 -88
  20. py_dpm/dpm/queries/base.py +100 -0
  21. py_dpm/dpm/queries/basic_objects.py +33 -0
  22. py_dpm/dpm/queries/explorer_queries.py +352 -0
  23. py_dpm/dpm/queries/filters.py +139 -0
  24. py_dpm/dpm/queries/glossary.py +45 -0
  25. py_dpm/dpm/queries/hierarchical_queries.py +838 -0
  26. py_dpm/dpm/queries/tables.py +133 -0
  27. py_dpm/dpm/utils.py +356 -0
  28. py_dpm/dpm_xl/__init__.py +8 -0
  29. py_dpm/dpm_xl/ast/__init__.py +14 -0
  30. py_dpm/{AST/ASTConstructor.py → dpm_xl/ast/constructor.py} +6 -6
  31. py_dpm/{AST/MLGeneration.py → dpm_xl/ast/ml_generation.py} +137 -87
  32. py_dpm/{AST/ModuleAnalyzer.py → dpm_xl/ast/module_analyzer.py} +7 -7
  33. py_dpm/{AST/ModuleDependencies.py → dpm_xl/ast/module_dependencies.py} +56 -41
  34. py_dpm/{AST/ASTObjects.py → dpm_xl/ast/nodes.py} +1 -1
  35. py_dpm/{AST/check_operands.py → dpm_xl/ast/operands.py} +16 -13
  36. py_dpm/{AST/ASTTemplate.py → dpm_xl/ast/template.py} +2 -2
  37. py_dpm/{AST/WhereClauseChecker.py → dpm_xl/ast/where_clause.py} +2 -2
  38. py_dpm/dpm_xl/grammar/__init__.py +18 -0
  39. py_dpm/dpm_xl/operators/__init__.py +19 -0
  40. py_dpm/{Operators/AggregateOperators.py → dpm_xl/operators/aggregate.py} +7 -7
  41. py_dpm/{Operators/NumericOperators.py → dpm_xl/operators/arithmetic.py} +6 -6
  42. py_dpm/{Operators/Operator.py → dpm_xl/operators/base.py} +5 -5
  43. py_dpm/{Operators/BooleanOperators.py → dpm_xl/operators/boolean.py} +5 -5
  44. py_dpm/{Operators/ClauseOperators.py → dpm_xl/operators/clause.py} +8 -8
  45. py_dpm/{Operators/ComparisonOperators.py → dpm_xl/operators/comparison.py} +5 -5
  46. py_dpm/{Operators/ConditionalOperators.py → dpm_xl/operators/conditional.py} +7 -7
  47. py_dpm/{Operators/StringOperators.py → dpm_xl/operators/string.py} +5 -5
  48. py_dpm/{Operators/TimeOperators.py → dpm_xl/operators/time.py} +6 -6
  49. py_dpm/{semantics/SemanticAnalyzer.py → dpm_xl/semantic_analyzer.py} +168 -68
  50. py_dpm/{semantics/Symbols.py → dpm_xl/symbols.py} +3 -3
  51. py_dpm/dpm_xl/types/__init__.py +13 -0
  52. py_dpm/{DataTypes/TypePromotion.py → dpm_xl/types/promotion.py} +2 -2
  53. py_dpm/{DataTypes/ScalarTypes.py → dpm_xl/types/scalar.py} +2 -2
  54. py_dpm/dpm_xl/utils/__init__.py +14 -0
  55. py_dpm/{data_handlers.py → dpm_xl/utils/data_handlers.py} +2 -2
  56. py_dpm/{Utils → dpm_xl/utils}/operands_mapping.py +1 -1
  57. py_dpm/{Utils → dpm_xl/utils}/operator_mapping.py +8 -8
  58. py_dpm/{OperationScopes/OperationScopeService.py → dpm_xl/utils/scopes_calculator.py} +148 -58
  59. py_dpm/{Utils/ast_serialization.py → dpm_xl/utils/serialization.py} +3 -4
  60. py_dpm/dpm_xl/validation/__init__.py +12 -0
  61. py_dpm/{Utils/ValidationsGenerationUtils.py → dpm_xl/validation/generation_utils.py} +2 -3
  62. py_dpm/{ValidationsGeneration/PropertiesConstraintsProcessor.py → dpm_xl/validation/property_constraints.py} +56 -21
  63. py_dpm/{ValidationsGeneration/auxiliary_functions.py → dpm_xl/validation/utils.py} +2 -2
  64. py_dpm/{ValidationsGeneration/VariantsProcessor.py → dpm_xl/validation/variants.py} +149 -55
  65. py_dpm/exceptions/__init__.py +23 -0
  66. py_dpm/{Exceptions → exceptions}/exceptions.py +7 -2
  67. pydpm_xl-0.2.1.dist-info/METADATA +278 -0
  68. pydpm_xl-0.2.1.dist-info/RECORD +88 -0
  69. pydpm_xl-0.2.1.dist-info/entry_points.txt +2 -0
  70. py_dpm/Exceptions/__init__.py +0 -0
  71. py_dpm/OperationScopes/__init__.py +0 -0
  72. py_dpm/Operators/__init__.py +0 -0
  73. py_dpm/Utils/__init__.py +0 -0
  74. py_dpm/Utils/utils.py +0 -2
  75. py_dpm/ValidationsGeneration/Utils.py +0 -364
  76. py_dpm/ValidationsGeneration/__init__.py +0 -0
  77. py_dpm/api/data_dictionary_validation.py +0 -614
  78. py_dpm/db_utils.py +0 -221
  79. py_dpm/grammar/__init__.py +0 -0
  80. py_dpm/grammar/dist/__init__.py +0 -0
  81. py_dpm/grammar/dpm_xlLexer.g4 +0 -437
  82. py_dpm/grammar/dpm_xlParser.g4 +0 -263
  83. py_dpm/semantics/DAG/DAGAnalyzer.py +0 -158
  84. py_dpm/semantics/DAG/__init__.py +0 -0
  85. py_dpm/semantics/__init__.py +0 -0
  86. py_dpm/views/data_types.sql +0 -12
  87. py_dpm/views/datapoints.sql +0 -65
  88. py_dpm/views/hierarchy_operand_reference.sql +0 -11
  89. py_dpm/views/hierarchy_preconditions.sql +0 -13
  90. py_dpm/views/hierarchy_variables.sql +0 -26
  91. py_dpm/views/hierarchy_variables_context.sql +0 -14
  92. py_dpm/views/key_components.sql +0 -18
  93. py_dpm/views/module_from_table.sql +0 -11
  94. py_dpm/views/open_keys.sql +0 -13
  95. py_dpm/views/operation_info.sql +0 -27
  96. py_dpm/views/operation_list.sql +0 -18
  97. py_dpm/views/operations_versions_from_module_version.sql +0 -30
  98. py_dpm/views/precondition_info.sql +0 -17
  99. py_dpm/views/report_type_operand_reference_info.sql +0 -18
  100. py_dpm/views/subcategory_info.sql +0 -17
  101. py_dpm/views/table_info.sql +0 -19
  102. pydpm_xl-0.1.39rc32.dist-info/METADATA +0 -53
  103. pydpm_xl-0.1.39rc32.dist-info/RECORD +0 -96
  104. pydpm_xl-0.1.39rc32.dist-info/entry_points.txt +0 -2
  105. /py_dpm/{AST → cli/commands}/__init__.py +0 -0
  106. /py_dpm/{migration.py → dpm/migration.py} +0 -0
  107. /py_dpm/{AST/ASTVisitor.py → dpm_xl/ast/visitor.py} +0 -0
  108. /py_dpm/{DataTypes → dpm_xl/grammar/generated}/__init__.py +0 -0
  109. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlLexer.interp +0 -0
  110. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlLexer.py +0 -0
  111. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlLexer.tokens +0 -0
  112. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlParser.interp +0 -0
  113. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlParser.py +0 -0
  114. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlParser.tokens +0 -0
  115. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlParserListener.py +0 -0
  116. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/dpm_xlParserVisitor.py +0 -0
  117. /py_dpm/{grammar/dist → dpm_xl/grammar/generated}/listeners.py +0 -0
  118. /py_dpm/{DataTypes/TimeClasses.py → dpm_xl/types/time.py} +0 -0
  119. /py_dpm/{Utils → dpm_xl/utils}/tokens.py +0 -0
  120. /py_dpm/{Exceptions → exceptions}/messages.py +0 -0
  121. {pydpm_xl-0.1.39rc32.dist-info → pydpm_xl-0.2.1.dist-info}/WHEEL +0 -0
  122. {pydpm_xl-0.1.39rc32.dist-info → pydpm_xl-0.2.1.dist-info}/licenses/LICENSE +0 -0
  123. {pydpm_xl-0.1.39rc32.dist-info → pydpm_xl-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,614 +0,0 @@
1
- """
2
- Data Dictionary Validation API
3
-
4
- This module provides methods to identify and validate data dictionary issues
5
- that can cause semantic validation failures during DPM-XL transpilation.
6
- """
7
-
8
- from typing import Dict, List, Set, Optional, Any, Tuple
9
- from dataclasses import dataclass
10
- from enum import Enum
11
- import re
12
- from sqlalchemy import text
13
-
14
- from py_dpm.db_utils import get_session, get_engine
15
- from py_dpm.models import *
16
-
17
-
18
- class ValidationIssueType(Enum):
19
- """Types of data dictionary validation issues."""
20
- MISSING_TABLE = "missing_table"
21
- MISSING_COLUMN = "missing_column"
22
- MISSING_ROW = "missing_row"
23
- MISSING_SHEET = "missing_sheet"
24
- MISSING_VARIABLE = "missing_variable"
25
- INVALID_REFERENCE = "invalid_reference"
26
- TYPE_MISMATCH = "type_mismatch"
27
- CONFIGURATION_ERROR = "configuration_error"
28
-
29
-
30
- @dataclass
31
- class ValidationIssue:
32
- """
33
- Represents a data dictionary validation issue.
34
-
35
- Attributes:
36
- issue_type (ValidationIssueType): Type of the issue
37
- description (str): Human-readable description of the issue
38
- affected_element (str): The specific element that has the issue
39
- suggested_fix (Optional[str]): Suggested fix for the issue
40
- severity (str): Severity level ('error', 'warning', 'info')
41
- code (Optional[str]): Error code for programmatic handling
42
- """
43
- issue_type: ValidationIssueType
44
- description: str
45
- affected_element: str
46
- suggested_fix: Optional[str] = None
47
- severity: str = "error"
48
- code: Optional[str] = None
49
-
50
-
51
- @dataclass
52
- class CellReference:
53
- """Represents a parsed cell reference from DPM-XL expression."""
54
- table: str
55
- rows: List[str]
56
- columns: List[str]
57
- sheets: List[str]
58
-
59
-
60
- class DataDictionaryValidator:
61
- """
62
- Main class for validating data dictionary consistency and completeness.
63
-
64
- This class provides methods to detect issues that would cause semantic
65
- validation failures during DPM-XL transpilation.
66
- """
67
-
68
- def __init__(self, database_path: Optional[str] = None, connection_url: Optional[str] = None):
69
- """
70
- Initialize the Data Dictionary Validator.
71
-
72
- Args:
73
- database_path: Path to SQLite database (optional)
74
- connection_url: SQLAlchemy connection URL for PostgreSQL (optional)
75
- """
76
- if connection_url:
77
- # Create isolated engine and session for the provided connection URL
78
- from sqlalchemy.orm import sessionmaker
79
- from py_dpm.db_utils import create_engine_from_url
80
-
81
- # Create engine for the connection URL (supports SQLite, PostgreSQL, MySQL, etc.)
82
- self.engine = create_engine_from_url(connection_url)
83
- session_maker = sessionmaker(bind=self.engine)
84
- self.session = session_maker()
85
- elif database_path:
86
- # Create isolated engine and session for this specific database
87
- from sqlalchemy import create_engine
88
- from sqlalchemy.orm import sessionmaker
89
- import os
90
-
91
- # Create the database directory if it doesn't exist
92
- db_dir = os.path.dirname(database_path)
93
- if db_dir and not os.path.exists(db_dir):
94
- os.makedirs(db_dir)
95
-
96
- # Create engine for specific database path
97
- db_connection_url = f"sqlite:///{database_path}"
98
- self.engine = create_engine(db_connection_url, pool_pre_ping=True)
99
- session_maker = sessionmaker(bind=self.engine)
100
- self.session = session_maker()
101
- else:
102
- # Use default global connection
103
- get_engine()
104
- self.session = get_session()
105
- self.engine = None
106
-
107
- self._table_cache = {}
108
- self._column_cache = {}
109
- self._row_cache = {}
110
- self._sheet_cache = {}
111
-
112
- def validate_expression_references(self, dpm_xl_expression: str) -> List[ValidationIssue]:
113
- """
114
- Validate all cell references in a DPM-XL expression.
115
-
116
- Args:
117
- dpm_xl_expression (str): The DPM-XL expression to validate
118
-
119
- Returns:
120
- List[ValidationIssue]: List of validation issues found
121
- """
122
- issues = []
123
-
124
- try:
125
- # Parse cell references from the expression
126
- cell_refs = self._parse_cell_references(dpm_xl_expression)
127
-
128
- for cell_ref in cell_refs:
129
- # Validate table exists
130
- table_issues = self.validate_table_exists(cell_ref.table)
131
- issues.extend(table_issues)
132
-
133
- # If table exists, validate other components
134
- if not table_issues:
135
- # Validate columns
136
- column_issues = self.validate_columns_exist(cell_ref.table, cell_ref.columns)
137
- issues.extend(column_issues)
138
-
139
- # Validate rows
140
- row_issues = self.validate_rows_exist(cell_ref.table, cell_ref.rows)
141
- issues.extend(row_issues)
142
-
143
- # Validate sheets
144
- sheet_issues = self.validate_sheets_exist(cell_ref.table, cell_ref.sheets)
145
- issues.extend(sheet_issues)
146
-
147
- except Exception as e:
148
- issues.append(ValidationIssue(
149
- issue_type=ValidationIssueType.CONFIGURATION_ERROR,
150
- description=f"Error parsing expression: {str(e)}",
151
- affected_element=dpm_xl_expression[:50] + "..." if len(dpm_xl_expression) > 50 else dpm_xl_expression,
152
- severity="error",
153
- code="PARSE_ERROR"
154
- ))
155
-
156
- return issues
157
-
158
- def validate_table_exists(self, table_name: str) -> List[ValidationIssue]:
159
- """
160
- Validate that a table exists in the data dictionary.
161
-
162
- Args:
163
- table_name (str): Name of the table to validate
164
-
165
- Returns:
166
- List[ValidationIssue]: List of validation issues found
167
- """
168
- issues = []
169
-
170
- try:
171
- # Check cache first
172
- if table_name in self._table_cache:
173
- return self._table_cache[table_name]
174
-
175
- # Query the database for the table using the correct schema
176
- # The actual schema uses table_code instead of table_name
177
- tables = self.session.execute(
178
- text("SELECT DISTINCT table_code FROM datapoints WHERE table_code = :table_code"),
179
- {"table_code": table_name}
180
- ).fetchall()
181
-
182
- if not tables:
183
- issue = ValidationIssue(
184
- issue_type=ValidationIssueType.MISSING_TABLE,
185
- description=f"Table '{table_name}' was not found in the data dictionary",
186
- affected_element=table_name,
187
- suggested_fix=f"Add table '{table_name}' to the data dictionary or check the table name spelling",
188
- severity="error",
189
- code="TABLE_NOT_FOUND"
190
- )
191
- issues.append(issue)
192
-
193
- # Cache the result
194
- self._table_cache[table_name] = issues
195
-
196
- except Exception as e:
197
- issues.append(ValidationIssue(
198
- issue_type=ValidationIssueType.CONFIGURATION_ERROR,
199
- description=f"Error checking table '{table_name}': {str(e)}",
200
- affected_element=table_name,
201
- severity="error",
202
- code="TABLE_CHECK_ERROR"
203
- ))
204
-
205
- return issues
206
-
207
- def validate_columns_exist(self, table_name: str, columns: List[str]) -> List[ValidationIssue]:
208
- """
209
- Validate that columns exist for a table.
210
-
211
- Args:
212
- table_name (str): Name of the table
213
- columns (List[str]): List of column names/patterns to validate
214
-
215
- Returns:
216
- List[ValidationIssue]: List of validation issues found
217
- """
218
- issues = []
219
-
220
- try:
221
- cache_key = f"{table_name}:{':'.join(columns)}"
222
- if cache_key in self._column_cache:
223
- return self._column_cache[cache_key]
224
-
225
- for column in columns:
226
- # Skip wildcards and ranges for now - these need special handling
227
- if column in ['*'] or '-' in column:
228
- continue
229
-
230
- # Check if specific column exists
231
- column_exists = self.session.execute(
232
- text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND column_code = :column_code"),
233
- {"table_code": table_name, "column_code": column}
234
- ).fetchone()
235
-
236
- if not column_exists or column_exists[0] == 0:
237
- issue = ValidationIssue(
238
- issue_type=ValidationIssueType.MISSING_COLUMN,
239
- description=f"Column '{column}' not found in table '{table_name}'",
240
- affected_element=f"{table_name}.{column}",
241
- suggested_fix=f"Add column '{column}' to table '{table_name}' or check the column name",
242
- severity="error",
243
- code="COLUMN_NOT_FOUND"
244
- )
245
- issues.append(issue)
246
-
247
- self._column_cache[cache_key] = issues
248
-
249
- except Exception as e:
250
- issues.append(ValidationIssue(
251
- issue_type=ValidationIssueType.CONFIGURATION_ERROR,
252
- description=f"Error checking columns for table '{table_name}': {str(e)}",
253
- affected_element=f"{table_name}.[{','.join(columns)}]",
254
- severity="error",
255
- code="COLUMN_CHECK_ERROR"
256
- ))
257
-
258
- return issues
259
-
260
- def validate_rows_exist(self, table_name: str, rows: List[str]) -> List[ValidationIssue]:
261
- """
262
- Validate that rows exist for a table.
263
-
264
- Args:
265
- table_name (str): Name of the table
266
- rows (List[str]): List of row names/patterns to validate
267
-
268
- Returns:
269
- List[ValidationIssue]: List of validation issues found
270
- """
271
- issues = []
272
-
273
- try:
274
- cache_key = f"{table_name}:rows:{':'.join(rows)}"
275
- if cache_key in self._row_cache:
276
- return self._row_cache[cache_key]
277
-
278
- for row in rows:
279
- # Skip wildcards and ranges for now
280
- if row in ['*'] or '-' in row:
281
- continue
282
-
283
- # Check if specific row exists
284
- row_exists = self.session.execute(
285
- text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND row_code = :row_code"),
286
- {"table_code": table_name, "row_code": row}
287
- ).fetchone()
288
-
289
- if not row_exists or row_exists[0] == 0:
290
- issue = ValidationIssue(
291
- issue_type=ValidationIssueType.MISSING_ROW,
292
- description=f"Row '{row}' not found in table '{table_name}'",
293
- affected_element=f"{table_name}.{row}",
294
- suggested_fix=f"Add row '{row}' to table '{table_name}' or check the row name",
295
- severity="warning", # Rows might be more flexible
296
- code="ROW_NOT_FOUND"
297
- )
298
- issues.append(issue)
299
-
300
- self._row_cache[cache_key] = issues
301
-
302
- except Exception as e:
303
- issues.append(ValidationIssue(
304
- issue_type=ValidationIssueType.CONFIGURATION_ERROR,
305
- description=f"Error checking rows for table '{table_name}': {str(e)}",
306
- affected_element=f"{table_name}.[{','.join(rows)}]",
307
- severity="error",
308
- code="ROW_CHECK_ERROR"
309
- ))
310
-
311
- return issues
312
-
313
- def validate_sheets_exist(self, table_name: str, sheets: List[str]) -> List[ValidationIssue]:
314
- """
315
- Validate that sheets exist for a table.
316
-
317
- Args:
318
- table_name (str): Name of the table
319
- sheets (List[str]): List of sheet names/patterns to validate
320
-
321
- Returns:
322
- List[ValidationIssue]: List of validation issues found
323
- """
324
- issues = []
325
-
326
- try:
327
- cache_key = f"{table_name}:sheets:{':'.join(sheets)}"
328
- if cache_key in self._sheet_cache:
329
- return self._sheet_cache[cache_key]
330
-
331
- for sheet in sheets:
332
- # Skip wildcards for now
333
- if sheet in ['*']:
334
- # Check if any sheets exist for this table
335
- sheet_count = self.session.execute(
336
- text("SELECT COUNT(DISTINCT sheet_code) FROM datapoints WHERE table_code = :table_code AND sheet_code IS NOT NULL AND sheet_code != ''"),
337
- {"table_code": table_name}
338
- ).fetchone()
339
-
340
- if not sheet_count or sheet_count[0] == 0:
341
- issue = ValidationIssue(
342
- issue_type=ValidationIssueType.MISSING_SHEET,
343
- description=f"No sheets found for table '{table_name}' but s* wildcard used",
344
- affected_element=f"{table_name}.s*",
345
- suggested_fix=f"Add sheet definitions for table '{table_name}' or remove s* wildcard",
346
- severity="error",
347
- code="NO_SHEETS_FOR_WILDCARD"
348
- )
349
- issues.append(issue)
350
- continue
351
-
352
- # Check if specific sheet exists
353
- sheet_exists = self.session.execute(
354
- text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND sheet_code = :sheet_code"),
355
- {"table_code": table_name, "sheet_code": sheet}
356
- ).fetchone()
357
-
358
- if not sheet_exists or sheet_exists[0] == 0:
359
- issue = ValidationIssue(
360
- issue_type=ValidationIssueType.MISSING_SHEET,
361
- description=f"Sheet '{sheet}' not found in table '{table_name}'",
362
- affected_element=f"{table_name}.{sheet}",
363
- suggested_fix=f"Add sheet '{sheet}' to table '{table_name}' or check the sheet name",
364
- severity="error",
365
- code="SHEET_NOT_FOUND"
366
- )
367
- issues.append(issue)
368
-
369
- self._sheet_cache[cache_key] = issues
370
-
371
- except Exception as e:
372
- issues.append(ValidationIssue(
373
- issue_type=ValidationIssueType.CONFIGURATION_ERROR,
374
- description=f"Error checking sheets for table '{table_name}': {str(e)}",
375
- affected_element=f"{table_name}.[{','.join(sheets)}]",
376
- severity="error",
377
- code="SHEET_CHECK_ERROR"
378
- ))
379
-
380
- return issues
381
-
382
- def validate_variables_exist(self, variable_names: List[str]) -> List[ValidationIssue]:
383
- """
384
- Validate that variables exist in the data dictionary.
385
-
386
- Args:
387
- variable_names (List[str]): List of variable names to validate
388
-
389
- Returns:
390
- List[ValidationIssue]: List of validation issues found
391
- """
392
- issues = []
393
-
394
- for var_name in variable_names:
395
- try:
396
- # Variable validation - try multiple approaches based on the schema
397
- # First try to find it as a VariableID (numeric)
398
- var_exists = None
399
- try:
400
- var_id = int(var_name)
401
- var_exists = self.session.execute(
402
- text("SELECT COUNT(*) FROM Variable WHERE VariableID = :var_id"),
403
- {"var_id": var_id}
404
- ).fetchone()
405
- except ValueError:
406
- # Not a numeric ID, skip variable validation for now
407
- # Variables in this schema appear to be referenced by ID, not name
408
- continue
409
-
410
- if var_exists and var_exists[0] == 0:
411
- issue = ValidationIssue(
412
- issue_type=ValidationIssueType.MISSING_VARIABLE,
413
- description=f"Variable ID '{var_name}' not found in data dictionary",
414
- affected_element=var_name,
415
- suggested_fix=f"Add variable ID '{var_name}' to the data dictionary or check the variable ID",
416
- severity="warning", # Changed to warning since variable structure is unclear
417
- code="VARIABLE_NOT_FOUND"
418
- )
419
- issues.append(issue)
420
-
421
- except Exception as e:
422
- issues.append(ValidationIssue(
423
- issue_type=ValidationIssueType.CONFIGURATION_ERROR,
424
- description=f"Error checking variable '{var_name}': {str(e)}",
425
- affected_element=var_name,
426
- severity="error",
427
- code="VARIABLE_CHECK_ERROR"
428
- ))
429
-
430
- return issues
431
-
432
- def get_validation_summary(self, issues: List[ValidationIssue]) -> Dict[str, Any]:
433
- """
434
- Generate a summary of validation issues.
435
-
436
- Args:
437
- issues (List[ValidationIssue]): List of validation issues
438
-
439
- Returns:
440
- Dict[str, Any]: Summary statistics and categorized issues
441
- """
442
- summary = {
443
- "total_issues": len(issues),
444
- "by_type": {},
445
- "by_severity": {},
446
- "fixable_issues": [],
447
- "critical_issues": []
448
- }
449
-
450
- for issue in issues:
451
- # Count by type
452
- issue_type = issue.issue_type.value
453
- summary["by_type"][issue_type] = summary["by_type"].get(issue_type, 0) + 1
454
-
455
- # Count by severity
456
- summary["by_severity"][issue.severity] = summary["by_severity"].get(issue.severity, 0) + 1
457
-
458
- # Categorize issues
459
- if issue.suggested_fix:
460
- summary["fixable_issues"].append(issue)
461
-
462
- if issue.severity == "error":
463
- summary["critical_issues"].append(issue)
464
-
465
- return summary
466
-
467
- def _parse_cell_references(self, expression: str) -> List[CellReference]:
468
- """
469
- Parse cell references from a DPM-XL expression.
470
-
471
- Args:
472
- expression (str): DPM-XL expression to parse
473
-
474
- Returns:
475
- List[CellReference]: List of parsed cell references
476
- """
477
- cell_refs = []
478
-
479
- # Regex pattern to match cell references like {tTableName, rRows, cColumns, sSheets}
480
- pattern = r'\{t([^,]+),\s*([^,]+),\s*([^,]+)(?:,\s*([^}]+))?\}'
481
-
482
- matches = re.findall(pattern, expression)
483
-
484
- for match in matches:
485
- table = match[0].strip()
486
-
487
- # Parse rows
488
- rows_str = match[1].strip()
489
- rows = self._parse_dimension_values(rows_str, 'r')
490
-
491
- # Parse columns
492
- cols_str = match[2].strip()
493
- columns = self._parse_dimension_values(cols_str, 'c')
494
-
495
- # Parse sheets (optional)
496
- sheets = []
497
- if len(match) > 3 and match[3]:
498
- sheets_str = match[3].strip()
499
- sheets = self._parse_dimension_values(sheets_str, 's')
500
-
501
- cell_refs.append(CellReference(
502
- table=table,
503
- rows=rows,
504
- columns=columns,
505
- sheets=sheets
506
- ))
507
-
508
- return cell_refs
509
-
510
- def _parse_dimension_values(self, dim_str: str, prefix: str) -> List[str]:
511
- """
512
- Parse dimension values (rows, columns, or sheets) from a string.
513
-
514
- Args:
515
- dim_str (str): String containing dimension values
516
- prefix (str): Expected prefix ('r', 'c', or 's')
517
-
518
- Returns:
519
- List[str]: List of parsed dimension values
520
- """
521
- values = []
522
-
523
- # Remove prefix and parentheses if present
524
- dim_str = dim_str.strip()
525
- if dim_str.startswith(prefix):
526
- dim_str = dim_str[1:]
527
- if dim_str.startswith('(') and dim_str.endswith(')'):
528
- dim_str = dim_str[1:-1]
529
-
530
- # Split by comma and clean up
531
- if dim_str:
532
- for value in dim_str.split(','):
533
- value = value.strip()
534
- if value:
535
- values.append(value)
536
-
537
- return values
538
-
539
- def __del__(self):
540
- """Clean up resources."""
541
- if hasattr(self, 'session') and self.session:
542
- self.session.close()
543
- if hasattr(self, 'engine') and self.engine is not None:
544
- self.engine.dispose()
545
-
546
-
547
- # Convenience functions for direct usage
548
- def validate_dpm_xl_expression(expression: str) -> List[ValidationIssue]:
549
- """
550
- Convenience function to validate a DPM-XL expression.
551
-
552
- Args:
553
- expression (str): DPM-XL expression to validate
554
-
555
- Returns:
556
- List[ValidationIssue]: List of validation issues found
557
- """
558
- validator = DataDictionaryValidator()
559
- return validator.validate_expression_references(expression)
560
-
561
-
562
- def validate_table_references(table_names: List[str]) -> List[ValidationIssue]:
563
- """
564
- Convenience function to validate table references.
565
-
566
- Args:
567
- table_names (List[str]): List of table names to validate
568
-
569
- Returns:
570
- List[ValidationIssue]: List of validation issues found
571
- """
572
- validator = DataDictionaryValidator()
573
- issues = []
574
-
575
- for table_name in table_names:
576
- issues.extend(validator.validate_table_exists(table_name))
577
-
578
- return issues
579
-
580
-
581
- def check_data_dictionary_health() -> Dict[str, Any]:
582
- """
583
- Perform a comprehensive health check of the data dictionary.
584
-
585
- Returns:
586
- Dict[str, Any]: Health check results
587
- """
588
- validator = DataDictionaryValidator()
589
-
590
- # This would include various checks like:
591
- # - Missing table definitions
592
- # - Orphaned references
593
- # - Inconsistent naming
594
- # - etc.
595
-
596
- health_report = {
597
- "status": "healthy", # or "warning" or "critical"
598
- "total_tables": 0,
599
- "issues_found": [],
600
- "recommendations": []
601
- }
602
-
603
- try:
604
- # Get total table count
605
- result = validator.session.execute(text("SELECT COUNT(DISTINCT table_code) FROM datapoints")).fetchone()
606
- health_report["total_tables"] = result[0] if result else 0
607
-
608
- # Add more comprehensive checks here
609
-
610
- except Exception as e:
611
- health_report["status"] = "critical"
612
- health_report["issues_found"].append(f"Database connection error: {str(e)}")
613
-
614
- return health_report