pydpm_xl 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. py_dpm/AST/ASTConstructor.py +503 -0
  2. py_dpm/AST/ASTObjects.py +827 -0
  3. py_dpm/AST/ASTTemplate.py +101 -0
  4. py_dpm/AST/ASTVisitor.py +13 -0
  5. py_dpm/AST/MLGeneration.py +588 -0
  6. py_dpm/AST/ModuleAnalyzer.py +79 -0
  7. py_dpm/AST/ModuleDependencies.py +203 -0
  8. py_dpm/AST/WhereClauseChecker.py +12 -0
  9. py_dpm/AST/__init__.py +0 -0
  10. py_dpm/AST/check_operands.py +302 -0
  11. py_dpm/DataTypes/ScalarTypes.py +324 -0
  12. py_dpm/DataTypes/TimeClasses.py +370 -0
  13. py_dpm/DataTypes/TypePromotion.py +195 -0
  14. py_dpm/DataTypes/__init__.py +0 -0
  15. py_dpm/Exceptions/__init__.py +0 -0
  16. py_dpm/Exceptions/exceptions.py +84 -0
  17. py_dpm/Exceptions/messages.py +114 -0
  18. py_dpm/OperationScopes/OperationScopeService.py +247 -0
  19. py_dpm/OperationScopes/__init__.py +0 -0
  20. py_dpm/Operators/AggregateOperators.py +138 -0
  21. py_dpm/Operators/BooleanOperators.py +30 -0
  22. py_dpm/Operators/ClauseOperators.py +159 -0
  23. py_dpm/Operators/ComparisonOperators.py +69 -0
  24. py_dpm/Operators/ConditionalOperators.py +362 -0
  25. py_dpm/Operators/NumericOperators.py +101 -0
  26. py_dpm/Operators/Operator.py +388 -0
  27. py_dpm/Operators/StringOperators.py +27 -0
  28. py_dpm/Operators/TimeOperators.py +53 -0
  29. py_dpm/Operators/__init__.py +0 -0
  30. py_dpm/Utils/ValidationsGenerationUtils.py +429 -0
  31. py_dpm/Utils/__init__.py +0 -0
  32. py_dpm/Utils/operands_mapping.py +73 -0
  33. py_dpm/Utils/operator_mapping.py +89 -0
  34. py_dpm/Utils/tokens.py +172 -0
  35. py_dpm/Utils/utils.py +2 -0
  36. py_dpm/ValidationsGeneration/PropertiesConstraintsProcessor.py +190 -0
  37. py_dpm/ValidationsGeneration/Utils.py +364 -0
  38. py_dpm/ValidationsGeneration/VariantsProcessor.py +265 -0
  39. py_dpm/ValidationsGeneration/__init__.py +0 -0
  40. py_dpm/ValidationsGeneration/auxiliary_functions.py +98 -0
  41. py_dpm/__init__.py +61 -0
  42. py_dpm/api/__init__.py +140 -0
  43. py_dpm/api/ast_generator.py +438 -0
  44. py_dpm/api/complete_ast.py +241 -0
  45. py_dpm/api/data_dictionary_validation.py +577 -0
  46. py_dpm/api/migration.py +77 -0
  47. py_dpm/api/semantic.py +224 -0
  48. py_dpm/api/syntax.py +182 -0
  49. py_dpm/client.py +106 -0
  50. py_dpm/data_handlers.py +99 -0
  51. py_dpm/db_utils.py +117 -0
  52. py_dpm/grammar/__init__.py +0 -0
  53. py_dpm/grammar/dist/__init__.py +0 -0
  54. py_dpm/grammar/dist/dpm_xlLexer.interp +428 -0
  55. py_dpm/grammar/dist/dpm_xlLexer.py +804 -0
  56. py_dpm/grammar/dist/dpm_xlLexer.tokens +106 -0
  57. py_dpm/grammar/dist/dpm_xlParser.interp +249 -0
  58. py_dpm/grammar/dist/dpm_xlParser.py +5224 -0
  59. py_dpm/grammar/dist/dpm_xlParser.tokens +106 -0
  60. py_dpm/grammar/dist/dpm_xlParserListener.py +742 -0
  61. py_dpm/grammar/dist/dpm_xlParserVisitor.py +419 -0
  62. py_dpm/grammar/dist/listeners.py +10 -0
  63. py_dpm/grammar/dpm_xlLexer.g4 +435 -0
  64. py_dpm/grammar/dpm_xlParser.g4 +260 -0
  65. py_dpm/migration.py +282 -0
  66. py_dpm/models.py +2139 -0
  67. py_dpm/semantics/DAG/DAGAnalyzer.py +158 -0
  68. py_dpm/semantics/DAG/__init__.py +0 -0
  69. py_dpm/semantics/SemanticAnalyzer.py +320 -0
  70. py_dpm/semantics/Symbols.py +223 -0
  71. py_dpm/semantics/__init__.py +0 -0
  72. py_dpm/utils/__init__.py +0 -0
  73. py_dpm/utils/ast_serialization.py +481 -0
  74. py_dpm/views/data_types.sql +12 -0
  75. py_dpm/views/datapoints.sql +65 -0
  76. py_dpm/views/hierarchy_operand_reference.sql +11 -0
  77. py_dpm/views/hierarchy_preconditions.sql +13 -0
  78. py_dpm/views/hierarchy_variables.sql +26 -0
  79. py_dpm/views/hierarchy_variables_context.sql +14 -0
  80. py_dpm/views/key_components.sql +18 -0
  81. py_dpm/views/module_from_table.sql +11 -0
  82. py_dpm/views/open_keys.sql +13 -0
  83. py_dpm/views/operation_info.sql +27 -0
  84. py_dpm/views/operation_list.sql +18 -0
  85. py_dpm/views/operations_versions_from_module_version.sql +30 -0
  86. py_dpm/views/precondition_info.sql +17 -0
  87. py_dpm/views/report_type_operand_reference_info.sql +18 -0
  88. py_dpm/views/subcategory_info.sql +17 -0
  89. py_dpm/views/table_info.sql +19 -0
  90. pydpm_xl-0.1.10.dist-info/LICENSE +674 -0
  91. pydpm_xl-0.1.10.dist-info/METADATA +50 -0
  92. pydpm_xl-0.1.10.dist-info/RECORD +94 -0
  93. pydpm_xl-0.1.10.dist-info/WHEEL +4 -0
  94. pydpm_xl-0.1.10.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,577 @@
1
+ """
2
+ Data Dictionary Validation API
3
+
4
+ This module provides methods to identify and validate data dictionary issues
5
+ that can cause semantic validation failures during DPM-XL transpilation.
6
+ """
7
+
8
+ from typing import Dict, List, Set, Optional, Any, Tuple
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ import re
12
+ from sqlalchemy import text
13
+
14
+ from py_dpm.db_utils import get_session, get_engine
15
+ from py_dpm.models import *
16
+
17
+
18
+ class ValidationIssueType(Enum):
19
+ """Types of data dictionary validation issues."""
20
+ MISSING_TABLE = "missing_table"
21
+ MISSING_COLUMN = "missing_column"
22
+ MISSING_ROW = "missing_row"
23
+ MISSING_SHEET = "missing_sheet"
24
+ MISSING_VARIABLE = "missing_variable"
25
+ INVALID_REFERENCE = "invalid_reference"
26
+ TYPE_MISMATCH = "type_mismatch"
27
+ CONFIGURATION_ERROR = "configuration_error"
28
+
29
+
30
+ @dataclass
31
+ class ValidationIssue:
32
+ """
33
+ Represents a data dictionary validation issue.
34
+
35
+ Attributes:
36
+ issue_type (ValidationIssueType): Type of the issue
37
+ description (str): Human-readable description of the issue
38
+ affected_element (str): The specific element that has the issue
39
+ suggested_fix (Optional[str]): Suggested fix for the issue
40
+ severity (str): Severity level ('error', 'warning', 'info')
41
+ code (Optional[str]): Error code for programmatic handling
42
+ """
43
+ issue_type: ValidationIssueType
44
+ description: str
45
+ affected_element: str
46
+ suggested_fix: Optional[str] = None
47
+ severity: str = "error"
48
+ code: Optional[str] = None
49
+
50
+
51
+ @dataclass
52
+ class CellReference:
53
+ """Represents a parsed cell reference from DPM-XL expression."""
54
+ table: str
55
+ rows: List[str]
56
+ columns: List[str]
57
+ sheets: List[str]
58
+
59
+
60
+ class DataDictionaryValidator:
61
+ """
62
+ Main class for validating data dictionary consistency and completeness.
63
+
64
+ This class provides methods to detect issues that would cause semantic
65
+ validation failures during DPM-XL transpilation.
66
+ """
67
+
68
+ def __init__(self):
69
+ """Initialize the Data Dictionary Validator."""
70
+ get_engine()
71
+ self.session = get_session()
72
+ self._table_cache = {}
73
+ self._column_cache = {}
74
+ self._row_cache = {}
75
+ self._sheet_cache = {}
76
+
77
+ def validate_expression_references(self, dpm_xl_expression: str) -> List[ValidationIssue]:
78
+ """
79
+ Validate all cell references in a DPM-XL expression.
80
+
81
+ Args:
82
+ dpm_xl_expression (str): The DPM-XL expression to validate
83
+
84
+ Returns:
85
+ List[ValidationIssue]: List of validation issues found
86
+ """
87
+ issues = []
88
+
89
+ try:
90
+ # Parse cell references from the expression
91
+ cell_refs = self._parse_cell_references(dpm_xl_expression)
92
+
93
+ for cell_ref in cell_refs:
94
+ # Validate table exists
95
+ table_issues = self.validate_table_exists(cell_ref.table)
96
+ issues.extend(table_issues)
97
+
98
+ # If table exists, validate other components
99
+ if not table_issues:
100
+ # Validate columns
101
+ column_issues = self.validate_columns_exist(cell_ref.table, cell_ref.columns)
102
+ issues.extend(column_issues)
103
+
104
+ # Validate rows
105
+ row_issues = self.validate_rows_exist(cell_ref.table, cell_ref.rows)
106
+ issues.extend(row_issues)
107
+
108
+ # Validate sheets
109
+ sheet_issues = self.validate_sheets_exist(cell_ref.table, cell_ref.sheets)
110
+ issues.extend(sheet_issues)
111
+
112
+ except Exception as e:
113
+ issues.append(ValidationIssue(
114
+ issue_type=ValidationIssueType.CONFIGURATION_ERROR,
115
+ description=f"Error parsing expression: {str(e)}",
116
+ affected_element=dpm_xl_expression[:50] + "..." if len(dpm_xl_expression) > 50 else dpm_xl_expression,
117
+ severity="error",
118
+ code="PARSE_ERROR"
119
+ ))
120
+
121
+ return issues
122
+
123
+ def validate_table_exists(self, table_name: str) -> List[ValidationIssue]:
124
+ """
125
+ Validate that a table exists in the data dictionary.
126
+
127
+ Args:
128
+ table_name (str): Name of the table to validate
129
+
130
+ Returns:
131
+ List[ValidationIssue]: List of validation issues found
132
+ """
133
+ issues = []
134
+
135
+ try:
136
+ # Check cache first
137
+ if table_name in self._table_cache:
138
+ return self._table_cache[table_name]
139
+
140
+ # Query the database for the table using the correct schema
141
+ # The actual schema uses table_code instead of table_name
142
+ tables = self.session.execute(
143
+ text("SELECT DISTINCT table_code FROM datapoints WHERE table_code = :table_code"),
144
+ {"table_code": table_name}
145
+ ).fetchall()
146
+
147
+ if not tables:
148
+ issue = ValidationIssue(
149
+ issue_type=ValidationIssueType.MISSING_TABLE,
150
+ description=f"Table '{table_name}' was not found in the data dictionary",
151
+ affected_element=table_name,
152
+ suggested_fix=f"Add table '{table_name}' to the data dictionary or check the table name spelling",
153
+ severity="error",
154
+ code="TABLE_NOT_FOUND"
155
+ )
156
+ issues.append(issue)
157
+
158
+ # Cache the result
159
+ self._table_cache[table_name] = issues
160
+
161
+ except Exception as e:
162
+ issues.append(ValidationIssue(
163
+ issue_type=ValidationIssueType.CONFIGURATION_ERROR,
164
+ description=f"Error checking table '{table_name}': {str(e)}",
165
+ affected_element=table_name,
166
+ severity="error",
167
+ code="TABLE_CHECK_ERROR"
168
+ ))
169
+
170
+ return issues
171
+
172
+ def validate_columns_exist(self, table_name: str, columns: List[str]) -> List[ValidationIssue]:
173
+ """
174
+ Validate that columns exist for a table.
175
+
176
+ Args:
177
+ table_name (str): Name of the table
178
+ columns (List[str]): List of column names/patterns to validate
179
+
180
+ Returns:
181
+ List[ValidationIssue]: List of validation issues found
182
+ """
183
+ issues = []
184
+
185
+ try:
186
+ cache_key = f"{table_name}:{':'.join(columns)}"
187
+ if cache_key in self._column_cache:
188
+ return self._column_cache[cache_key]
189
+
190
+ for column in columns:
191
+ # Skip wildcards and ranges for now - these need special handling
192
+ if column in ['*'] or '-' in column:
193
+ continue
194
+
195
+ # Check if specific column exists
196
+ column_exists = self.session.execute(
197
+ text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND column_code = :column_code"),
198
+ {"table_code": table_name, "column_code": column}
199
+ ).fetchone()
200
+
201
+ if not column_exists or column_exists[0] == 0:
202
+ issue = ValidationIssue(
203
+ issue_type=ValidationIssueType.MISSING_COLUMN,
204
+ description=f"Column '{column}' not found in table '{table_name}'",
205
+ affected_element=f"{table_name}.{column}",
206
+ suggested_fix=f"Add column '{column}' to table '{table_name}' or check the column name",
207
+ severity="error",
208
+ code="COLUMN_NOT_FOUND"
209
+ )
210
+ issues.append(issue)
211
+
212
+ self._column_cache[cache_key] = issues
213
+
214
+ except Exception as e:
215
+ issues.append(ValidationIssue(
216
+ issue_type=ValidationIssueType.CONFIGURATION_ERROR,
217
+ description=f"Error checking columns for table '{table_name}': {str(e)}",
218
+ affected_element=f"{table_name}.[{','.join(columns)}]",
219
+ severity="error",
220
+ code="COLUMN_CHECK_ERROR"
221
+ ))
222
+
223
+ return issues
224
+
225
+ def validate_rows_exist(self, table_name: str, rows: List[str]) -> List[ValidationIssue]:
226
+ """
227
+ Validate that rows exist for a table.
228
+
229
+ Args:
230
+ table_name (str): Name of the table
231
+ rows (List[str]): List of row names/patterns to validate
232
+
233
+ Returns:
234
+ List[ValidationIssue]: List of validation issues found
235
+ """
236
+ issues = []
237
+
238
+ try:
239
+ cache_key = f"{table_name}:rows:{':'.join(rows)}"
240
+ if cache_key in self._row_cache:
241
+ return self._row_cache[cache_key]
242
+
243
+ for row in rows:
244
+ # Skip wildcards and ranges for now
245
+ if row in ['*'] or '-' in row:
246
+ continue
247
+
248
+ # Check if specific row exists
249
+ row_exists = self.session.execute(
250
+ text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND row_code = :row_code"),
251
+ {"table_code": table_name, "row_code": row}
252
+ ).fetchone()
253
+
254
+ if not row_exists or row_exists[0] == 0:
255
+ issue = ValidationIssue(
256
+ issue_type=ValidationIssueType.MISSING_ROW,
257
+ description=f"Row '{row}' not found in table '{table_name}'",
258
+ affected_element=f"{table_name}.{row}",
259
+ suggested_fix=f"Add row '{row}' to table '{table_name}' or check the row name",
260
+ severity="warning", # Rows might be more flexible
261
+ code="ROW_NOT_FOUND"
262
+ )
263
+ issues.append(issue)
264
+
265
+ self._row_cache[cache_key] = issues
266
+
267
+ except Exception as e:
268
+ issues.append(ValidationIssue(
269
+ issue_type=ValidationIssueType.CONFIGURATION_ERROR,
270
+ description=f"Error checking rows for table '{table_name}': {str(e)}",
271
+ affected_element=f"{table_name}.[{','.join(rows)}]",
272
+ severity="error",
273
+ code="ROW_CHECK_ERROR"
274
+ ))
275
+
276
+ return issues
277
+
278
+ def validate_sheets_exist(self, table_name: str, sheets: List[str]) -> List[ValidationIssue]:
279
+ """
280
+ Validate that sheets exist for a table.
281
+
282
+ Args:
283
+ table_name (str): Name of the table
284
+ sheets (List[str]): List of sheet names/patterns to validate
285
+
286
+ Returns:
287
+ List[ValidationIssue]: List of validation issues found
288
+ """
289
+ issues = []
290
+
291
+ try:
292
+ cache_key = f"{table_name}:sheets:{':'.join(sheets)}"
293
+ if cache_key in self._sheet_cache:
294
+ return self._sheet_cache[cache_key]
295
+
296
+ for sheet in sheets:
297
+ # Skip wildcards for now
298
+ if sheet in ['*']:
299
+ # Check if any sheets exist for this table
300
+ sheet_count = self.session.execute(
301
+ text("SELECT COUNT(DISTINCT sheet_code) FROM datapoints WHERE table_code = :table_code AND sheet_code IS NOT NULL AND sheet_code != ''"),
302
+ {"table_code": table_name}
303
+ ).fetchone()
304
+
305
+ if not sheet_count or sheet_count[0] == 0:
306
+ issue = ValidationIssue(
307
+ issue_type=ValidationIssueType.MISSING_SHEET,
308
+ description=f"No sheets found for table '{table_name}' but s* wildcard used",
309
+ affected_element=f"{table_name}.s*",
310
+ suggested_fix=f"Add sheet definitions for table '{table_name}' or remove s* wildcard",
311
+ severity="error",
312
+ code="NO_SHEETS_FOR_WILDCARD"
313
+ )
314
+ issues.append(issue)
315
+ continue
316
+
317
+ # Check if specific sheet exists
318
+ sheet_exists = self.session.execute(
319
+ text("SELECT COUNT(*) FROM datapoints WHERE table_code = :table_code AND sheet_code = :sheet_code"),
320
+ {"table_code": table_name, "sheet_code": sheet}
321
+ ).fetchone()
322
+
323
+ if not sheet_exists or sheet_exists[0] == 0:
324
+ issue = ValidationIssue(
325
+ issue_type=ValidationIssueType.MISSING_SHEET,
326
+ description=f"Sheet '{sheet}' not found in table '{table_name}'",
327
+ affected_element=f"{table_name}.{sheet}",
328
+ suggested_fix=f"Add sheet '{sheet}' to table '{table_name}' or check the sheet name",
329
+ severity="error",
330
+ code="SHEET_NOT_FOUND"
331
+ )
332
+ issues.append(issue)
333
+
334
+ self._sheet_cache[cache_key] = issues
335
+
336
+ except Exception as e:
337
+ issues.append(ValidationIssue(
338
+ issue_type=ValidationIssueType.CONFIGURATION_ERROR,
339
+ description=f"Error checking sheets for table '{table_name}': {str(e)}",
340
+ affected_element=f"{table_name}.[{','.join(sheets)}]",
341
+ severity="error",
342
+ code="SHEET_CHECK_ERROR"
343
+ ))
344
+
345
+ return issues
346
+
347
+ def validate_variables_exist(self, variable_names: List[str]) -> List[ValidationIssue]:
348
+ """
349
+ Validate that variables exist in the data dictionary.
350
+
351
+ Args:
352
+ variable_names (List[str]): List of variable names to validate
353
+
354
+ Returns:
355
+ List[ValidationIssue]: List of validation issues found
356
+ """
357
+ issues = []
358
+
359
+ for var_name in variable_names:
360
+ try:
361
+ # Variable validation - try multiple approaches based on the schema
362
+ # First try to find it as a VariableID (numeric)
363
+ var_exists = None
364
+ try:
365
+ var_id = int(var_name)
366
+ var_exists = self.session.execute(
367
+ text("SELECT COUNT(*) FROM Variable WHERE VariableID = :var_id"),
368
+ {"var_id": var_id}
369
+ ).fetchone()
370
+ except ValueError:
371
+ # Not a numeric ID, skip variable validation for now
372
+ # Variables in this schema appear to be referenced by ID, not name
373
+ continue
374
+
375
+ if var_exists and var_exists[0] == 0:
376
+ issue = ValidationIssue(
377
+ issue_type=ValidationIssueType.MISSING_VARIABLE,
378
+ description=f"Variable ID '{var_name}' not found in data dictionary",
379
+ affected_element=var_name,
380
+ suggested_fix=f"Add variable ID '{var_name}' to the data dictionary or check the variable ID",
381
+ severity="warning", # Changed to warning since variable structure is unclear
382
+ code="VARIABLE_NOT_FOUND"
383
+ )
384
+ issues.append(issue)
385
+
386
+ except Exception as e:
387
+ issues.append(ValidationIssue(
388
+ issue_type=ValidationIssueType.CONFIGURATION_ERROR,
389
+ description=f"Error checking variable '{var_name}': {str(e)}",
390
+ affected_element=var_name,
391
+ severity="error",
392
+ code="VARIABLE_CHECK_ERROR"
393
+ ))
394
+
395
+ return issues
396
+
397
+ def get_validation_summary(self, issues: List[ValidationIssue]) -> Dict[str, Any]:
398
+ """
399
+ Generate a summary of validation issues.
400
+
401
+ Args:
402
+ issues (List[ValidationIssue]): List of validation issues
403
+
404
+ Returns:
405
+ Dict[str, Any]: Summary statistics and categorized issues
406
+ """
407
+ summary = {
408
+ "total_issues": len(issues),
409
+ "by_type": {},
410
+ "by_severity": {},
411
+ "fixable_issues": [],
412
+ "critical_issues": []
413
+ }
414
+
415
+ for issue in issues:
416
+ # Count by type
417
+ issue_type = issue.issue_type.value
418
+ summary["by_type"][issue_type] = summary["by_type"].get(issue_type, 0) + 1
419
+
420
+ # Count by severity
421
+ summary["by_severity"][issue.severity] = summary["by_severity"].get(issue.severity, 0) + 1
422
+
423
+ # Categorize issues
424
+ if issue.suggested_fix:
425
+ summary["fixable_issues"].append(issue)
426
+
427
+ if issue.severity == "error":
428
+ summary["critical_issues"].append(issue)
429
+
430
+ return summary
431
+
432
+ def _parse_cell_references(self, expression: str) -> List[CellReference]:
433
+ """
434
+ Parse cell references from a DPM-XL expression.
435
+
436
+ Args:
437
+ expression (str): DPM-XL expression to parse
438
+
439
+ Returns:
440
+ List[CellReference]: List of parsed cell references
441
+ """
442
+ cell_refs = []
443
+
444
+ # Regex pattern to match cell references like {tTableName, rRows, cColumns, sSheets}
445
+ pattern = r'\{t([^,]+),\s*([^,]+),\s*([^,]+)(?:,\s*([^}]+))?\}'
446
+
447
+ matches = re.findall(pattern, expression)
448
+
449
+ for match in matches:
450
+ table = match[0].strip()
451
+
452
+ # Parse rows
453
+ rows_str = match[1].strip()
454
+ rows = self._parse_dimension_values(rows_str, 'r')
455
+
456
+ # Parse columns
457
+ cols_str = match[2].strip()
458
+ columns = self._parse_dimension_values(cols_str, 'c')
459
+
460
+ # Parse sheets (optional)
461
+ sheets = []
462
+ if len(match) > 3 and match[3]:
463
+ sheets_str = match[3].strip()
464
+ sheets = self._parse_dimension_values(sheets_str, 's')
465
+
466
+ cell_refs.append(CellReference(
467
+ table=table,
468
+ rows=rows,
469
+ columns=columns,
470
+ sheets=sheets
471
+ ))
472
+
473
+ return cell_refs
474
+
475
+ def _parse_dimension_values(self, dim_str: str, prefix: str) -> List[str]:
476
+ """
477
+ Parse dimension values (rows, columns, or sheets) from a string.
478
+
479
+ Args:
480
+ dim_str (str): String containing dimension values
481
+ prefix (str): Expected prefix ('r', 'c', or 's')
482
+
483
+ Returns:
484
+ List[str]: List of parsed dimension values
485
+ """
486
+ values = []
487
+
488
+ # Remove prefix and parentheses if present
489
+ dim_str = dim_str.strip()
490
+ if dim_str.startswith(prefix):
491
+ dim_str = dim_str[1:]
492
+ if dim_str.startswith('(') and dim_str.endswith(')'):
493
+ dim_str = dim_str[1:-1]
494
+
495
+ # Split by comma and clean up
496
+ if dim_str:
497
+ for value in dim_str.split(','):
498
+ value = value.strip()
499
+ if value:
500
+ values.append(value)
501
+
502
+ return values
503
+
504
+ def __del__(self):
505
+ """Clean up resources."""
506
+ if hasattr(self, 'session'):
507
+ self.session.close()
508
+
509
+
510
+ # Convenience functions for direct usage
511
+ def validate_dpm_xl_expression(expression: str) -> List[ValidationIssue]:
512
+ """
513
+ Convenience function to validate a DPM-XL expression.
514
+
515
+ Args:
516
+ expression (str): DPM-XL expression to validate
517
+
518
+ Returns:
519
+ List[ValidationIssue]: List of validation issues found
520
+ """
521
+ validator = DataDictionaryValidator()
522
+ return validator.validate_expression_references(expression)
523
+
524
+
525
+ def validate_table_references(table_names: List[str]) -> List[ValidationIssue]:
526
+ """
527
+ Convenience function to validate table references.
528
+
529
+ Args:
530
+ table_names (List[str]): List of table names to validate
531
+
532
+ Returns:
533
+ List[ValidationIssue]: List of validation issues found
534
+ """
535
+ validator = DataDictionaryValidator()
536
+ issues = []
537
+
538
+ for table_name in table_names:
539
+ issues.extend(validator.validate_table_exists(table_name))
540
+
541
+ return issues
542
+
543
+
544
+ def check_data_dictionary_health() -> Dict[str, Any]:
545
+ """
546
+ Perform a comprehensive health check of the data dictionary.
547
+
548
+ Returns:
549
+ Dict[str, Any]: Health check results
550
+ """
551
+ validator = DataDictionaryValidator()
552
+
553
+ # This would include various checks like:
554
+ # - Missing table definitions
555
+ # - Orphaned references
556
+ # - Inconsistent naming
557
+ # - etc.
558
+
559
+ health_report = {
560
+ "status": "healthy", # or "warning" or "critical"
561
+ "total_tables": 0,
562
+ "issues_found": [],
563
+ "recommendations": []
564
+ }
565
+
566
+ try:
567
+ # Get total table count
568
+ result = validator.session.execute(text("SELECT COUNT(DISTINCT table_code) FROM datapoints")).fetchone()
569
+ health_report["total_tables"] = result[0] if result else 0
570
+
571
+ # Add more comprehensive checks here
572
+
573
+ except Exception as e:
574
+ health_report["status"] = "critical"
575
+ health_report["issues_found"].append(f"Database connection error: {str(e)}")
576
+
577
+ return health_report
@@ -0,0 +1,77 @@
1
+ import os
2
+ from typing import Optional
3
+ from sqlalchemy import Engine
4
+
5
+ from py_dpm.migration import run_migration as _run_migration
6
+
7
+
8
+ class MigrationAPI:
9
+ """
10
+ API for database migration operations.
11
+
12
+ This class provides methods to migrate data from Access databases to SQLite.
13
+ """
14
+
15
+ def __init__(self):
16
+ """Initialize the Migration API."""
17
+ pass
18
+
19
+ def migrate_access_to_sqlite(
20
+ self,
21
+ access_file_path: str,
22
+ sqlite_db_path: Optional[str] = None
23
+ ) -> Engine:
24
+ """
25
+ Migrate data from an Access database to SQLite.
26
+
27
+ Args:
28
+ access_file_path (str): Path to the Access database file (.mdb or .accdb)
29
+ sqlite_db_path (Optional[str]): Path for the SQLite database.
30
+ If None, defaults to "database.db"
31
+
32
+ Returns:
33
+ Engine: SQLAlchemy engine for the created SQLite database
34
+
35
+ Raises:
36
+ FileNotFoundError: If the Access file doesn't exist
37
+ Exception: If migration fails
38
+
39
+ Example:
40
+ >>> from pydpm.api import MigrationAPI
41
+ >>> migration = MigrationAPI()
42
+ >>> engine = migration.migrate_access_to_sqlite("data.mdb", "output.db")
43
+ """
44
+ if not os.path.exists(access_file_path):
45
+ raise FileNotFoundError(f"Access file not found: {access_file_path}")
46
+
47
+ if sqlite_db_path is None:
48
+ sqlite_db_path = os.getenv("SQLITE_DB_PATH", "database.db")
49
+
50
+ try:
51
+ engine = _run_migration(access_file_path, sqlite_db_path)
52
+ return engine
53
+ except Exception as e:
54
+ raise Exception(f"Migration failed: {str(e)}") from e
55
+
56
+
57
+ # Convenience function for direct usage
58
+ def migrate_access_to_sqlite(
59
+ access_file_path: str,
60
+ sqlite_db_path: Optional[str] = None
61
+ ) -> Engine:
62
+ """
63
+ Convenience function to migrate Access database to SQLite.
64
+
65
+ Args:
66
+ access_file_path (str): Path to the Access database file
67
+ sqlite_db_path (Optional[str]): Path for the SQLite database
68
+
69
+ Returns:
70
+ Engine: SQLAlchemy engine for the created SQLite database
71
+
72
+ Example:
73
+ >>> from pydpm.api.migration import migrate_access_to_sqlite
74
+ >>> engine = migrate_access_to_sqlite("data.mdb", "output.db")
75
+ """
76
+ api = MigrationAPI()
77
+ return api.migrate_access_to_sqlite(access_file_path, sqlite_db_path)