pydpm_xl 0.2.9__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {pydpm_xl-0.2.9/pydpm_xl.egg-info → pydpm_xl-0.2.10}/PKG-INFO +1 -1
  2. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/__init__.py +1 -1
  3. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/ast_generator.py +83 -38
  4. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/migration.py +40 -45
  5. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/models.py +22 -2
  6. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/operands.py +41 -32
  7. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10/pydpm_xl.egg-info}/PKG-INFO +1 -1
  8. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/SOURCES.txt +1 -0
  9. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pyproject.toml +2 -2
  10. pydpm_xl-0.2.10/tests/test_migration_type_inference.py +210 -0
  11. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/LICENSE +0 -0
  12. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/README.md +0 -0
  13. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/__init__.py +0 -0
  14. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/__init__.py +0 -0
  15. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/data_dictionary.py +0 -0
  16. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/explorer.py +0 -0
  17. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/hierarchical_queries.py +0 -0
  18. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/instance.py +0 -0
  19. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/migration.py +0 -0
  20. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/__init__.py +0 -0
  21. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/complete_ast.py +0 -0
  22. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/operation_scopes.py +0 -0
  23. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/semantic.py +0 -0
  24. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/syntax.py +0 -0
  25. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/cli/__init__.py +0 -0
  26. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/cli/commands/__init__.py +0 -0
  27. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/cli/main.py +0 -0
  28. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/__init__.py +0 -0
  29. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/base.py +0 -0
  30. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/basic_objects.py +0 -0
  31. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/explorer_queries.py +0 -0
  32. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/filters.py +0 -0
  33. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/glossary.py +0 -0
  34. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/hierarchical_queries.py +0 -0
  35. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/tables.py +0 -0
  36. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/utils.py +0 -0
  37. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/__init__.py +0 -0
  38. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/__init__.py +0 -0
  39. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/constructor.py +0 -0
  40. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/ml_generation.py +0 -0
  41. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/module_analyzer.py +0 -0
  42. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/module_dependencies.py +0 -0
  43. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/nodes.py +0 -0
  44. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/template.py +0 -0
  45. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/visitor.py +0 -0
  46. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/where_clause.py +0 -0
  47. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/__init__.py +0 -0
  48. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/__init__.py +0 -0
  49. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlLexer.interp +0 -0
  50. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlLexer.py +0 -0
  51. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlLexer.tokens +0 -0
  52. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParser.interp +0 -0
  53. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParser.py +0 -0
  54. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParser.tokens +0 -0
  55. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParserListener.py +0 -0
  56. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParserVisitor.py +0 -0
  57. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/listeners.py +0 -0
  58. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/__init__.py +0 -0
  59. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/aggregate.py +0 -0
  60. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/arithmetic.py +0 -0
  61. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/base.py +0 -0
  62. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/boolean.py +0 -0
  63. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/clause.py +0 -0
  64. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/comparison.py +0 -0
  65. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/conditional.py +0 -0
  66. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/string.py +0 -0
  67. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/time.py +0 -0
  68. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/semantic_analyzer.py +0 -0
  69. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/symbols.py +0 -0
  70. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/__init__.py +0 -0
  71. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/promotion.py +0 -0
  72. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/scalar.py +0 -0
  73. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/time.py +0 -0
  74. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/__init__.py +0 -0
  75. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/data_handlers.py +0 -0
  76. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/operands_mapping.py +0 -0
  77. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/operator_mapping.py +0 -0
  78. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/scopes_calculator.py +0 -0
  79. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/serialization.py +0 -0
  80. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/tokens.py +0 -0
  81. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/exceptions/__init__.py +0 -0
  82. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/exceptions/exceptions.py +0 -0
  83. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/exceptions/messages.py +0 -0
  84. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/instance/__init__.py +0 -0
  85. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/instance/instance.py +0 -0
  86. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/dependency_links.txt +0 -0
  87. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/entry_points.txt +0 -0
  88. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/requires.txt +0 -0
  89. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/top_level.txt +0 -0
  90. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/setup.cfg +0 -0
  91. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_cli_semantic.py +0 -0
  92. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_data_dictionary_releases.py +0 -0
  93. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_db_connection_handling.py +0 -0
  94. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_get_table_details.py +0 -0
  95. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_get_tables_date_filter.py +0 -0
  96. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_get_tables_release_code.py +0 -0
  97. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_hierarchical_query.py +0 -0
  98. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_query_refactor.py +0 -0
  99. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_release_filters_semantic.py +0 -0
  100. {pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_semantic_release.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydpm_xl
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: Python library for DPM-XL data processing and analysis
5
5
  Author-email: "MeaningfulData S.L." <info@meaningfuldata.eu>
6
6
  License: GPL-3.0-or-later
@@ -41,7 +41,7 @@ Available packages:
41
41
  - pydpm.api: Main APIs for migration, syntax, and semantic analysis
42
42
  """
43
43
 
44
- __version__ = "0.2.9"
44
+ __version__ = "0.2.10"
45
45
  __author__ = "MeaningfulData S.L."
46
46
  __email__ = "info@meaningfuldata.eu"
47
47
  __license__ = "GPL-3.0-or-later"
@@ -984,6 +984,7 @@ class ASTGeneratorAPI:
984
984
  """
985
985
  from py_dpm.dpm.utils import get_engine
986
986
  from py_dpm.api.dpm import DataDictionaryAPI
987
+ from py_dpm.api.dpm_xl.operation_scopes import OperationScopesAPI
987
988
 
988
989
  # Initialize database connection
989
990
  engine = get_engine(database_path=self.database_path, connection_url=self.connection_url)
@@ -1017,6 +1018,12 @@ class ASTGeneratorAPI:
1017
1018
  connection_url=self.connection_url
1018
1019
  )
1019
1020
 
1021
+ # Initialize OperationScopesAPI once for all expressions (performance optimization)
1022
+ scopes_api = OperationScopesAPI(
1023
+ database_path=self.database_path,
1024
+ connection_url=self.connection_url
1025
+ )
1026
+
1020
1027
  # Primary module info will be determined from the first expression or module_code
1021
1028
  primary_module_info = None
1022
1029
  namespace = None
@@ -1035,6 +1042,21 @@ class ASTGeneratorAPI:
1035
1042
  complete_ast = complete_result["ast"]
1036
1043
  context = complete_result.get("context") or table_context
1037
1044
 
1045
+ # Get tables with modules for this expression FIRST (reuse scopes_api from outer scope)
1046
+ # This is done before _get_primary_module_info to pass precomputed values
1047
+ tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1048
+ expression=expression,
1049
+ release_id=release_id
1050
+ )
1051
+
1052
+ # Calculate scope_result once (avoid duplicate calls in other methods)
1053
+ scope_result = scopes_api.calculate_scopes_from_expression(
1054
+ expression=expression,
1055
+ release_id=release_id,
1056
+ read_only=True
1057
+ )
1058
+ all_tables_with_modules.extend(tables_with_modules)
1059
+
1038
1060
  # Get primary module info from first expression (or use module_code)
1039
1061
  if primary_module_info is None:
1040
1062
  primary_module_info = self._get_primary_module_info(
@@ -1042,6 +1064,9 @@ class ASTGeneratorAPI:
1042
1064
  primary_module_vid=primary_module_vid,
1043
1065
  release_id=release_id,
1044
1066
  module_code=module_code,
1067
+ # Performance optimization: pass precomputed values
1068
+ tables_with_modules=tables_with_modules,
1069
+ scopes_api=scopes_api,
1045
1070
  )
1046
1071
  namespace = primary_module_info.get("module_uri", "default_module")
1047
1072
 
@@ -1066,18 +1091,6 @@ class ASTGeneratorAPI:
1066
1091
  # Clean extra fields from data entries
1067
1092
  self._clean_ast_data_entries(ast_with_coords)
1068
1093
 
1069
- # Get tables with modules for this expression
1070
- from py_dpm.api.dpm_xl.operation_scopes import OperationScopesAPI
1071
- scopes_api = OperationScopesAPI(
1072
- database_path=self.database_path,
1073
- connection_url=self.connection_url
1074
- )
1075
- tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1076
- expression=expression,
1077
- release_id=release_id
1078
- )
1079
- all_tables_with_modules.extend(tables_with_modules)
1080
-
1081
1094
  # Build mapping of table_code -> module_vid
1082
1095
  # Prefer the module VID that matches the detected primary module
1083
1096
  table_to_module = {}
@@ -1179,6 +1192,10 @@ class ASTGeneratorAPI:
1179
1192
  operation_code=operation_code,
1180
1193
  release_id=release_id,
1181
1194
  preferred_module_dependencies=preferred_module_dependencies,
1195
+ # Performance optimization: pass precomputed values to avoid redundant work
1196
+ tables_with_modules=tables_with_modules,
1197
+ scopes_api=scopes_api,
1198
+ scope_result=scope_result,
1182
1199
  )
1183
1200
 
1184
1201
  # Merge dependency modules (avoid table duplicates)
@@ -1313,6 +1330,8 @@ class ASTGeneratorAPI:
1313
1330
  primary_module_vid: Optional[int],
1314
1331
  release_id: Optional[int],
1315
1332
  module_code: Optional[str] = None,
1333
+ tables_with_modules: Optional[List[Dict[str, Any]]] = None,
1334
+ scopes_api: Optional[Any] = None,
1316
1335
  ) -> Dict[str, Any]:
1317
1336
  """
1318
1337
  Detect and return metadata for the primary module from the expression.
@@ -1323,6 +1342,10 @@ class ASTGeneratorAPI:
1323
1342
  release_id: Optional release ID for filtering
1324
1343
  module_code: Optional module code (e.g., "FINREP9") - takes precedence over
1325
1344
  primary_module_vid if provided
1345
+ tables_with_modules: Optional precomputed tables with module metadata
1346
+ (performance optimization to avoid redundant database queries)
1347
+ scopes_api: Optional precomputed OperationScopesAPI instance
1348
+ (performance optimization to reuse database connections)
1326
1349
 
1327
1350
  Returns:
1328
1351
  Dict with module_uri, module_code, module_version, framework_code,
@@ -1341,20 +1364,28 @@ class ASTGeneratorAPI:
1341
1364
  "module_vid": None,
1342
1365
  }
1343
1366
 
1367
+ # Track if we created the scopes_api locally (need to close it)
1368
+ local_scopes_api = False
1369
+
1344
1370
  try:
1345
- scopes_api = OperationScopesAPI(
1346
- database_path=self.database_path,
1347
- connection_url=self.connection_url
1348
- )
1371
+ # Reuse provided scopes_api or create a new one
1372
+ if scopes_api is None:
1373
+ scopes_api = OperationScopesAPI(
1374
+ database_path=self.database_path,
1375
+ connection_url=self.connection_url
1376
+ )
1377
+ local_scopes_api = True
1349
1378
 
1350
- # Get tables with module metadata from expression
1351
- tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1352
- expression=expression,
1353
- release_id=release_id
1354
- )
1379
+ # Reuse provided tables_with_modules or fetch if not available
1380
+ if tables_with_modules is None:
1381
+ tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1382
+ expression=expression,
1383
+ release_id=release_id
1384
+ )
1355
1385
 
1356
1386
  if not tables_with_modules:
1357
- scopes_api.close()
1387
+ if local_scopes_api:
1388
+ scopes_api.close()
1358
1389
  return default_info
1359
1390
 
1360
1391
  # Determine primary module
@@ -1408,7 +1439,8 @@ class ASTGeneratorAPI:
1408
1439
  to_date = module.get("to_reference_date", to_date)
1409
1440
  break
1410
1441
 
1411
- scopes_api.close()
1442
+ if local_scopes_api:
1443
+ scopes_api.close()
1412
1444
 
1413
1445
  return {
1414
1446
  "module_uri": module_uri or "default_module",
@@ -1864,6 +1896,9 @@ class ASTGeneratorAPI:
1864
1896
  operation_code: str,
1865
1897
  release_id: Optional[int] = None,
1866
1898
  preferred_module_dependencies: Optional[List[str]] = None,
1899
+ tables_with_modules: Optional[List[Dict[str, Any]]] = None,
1900
+ scopes_api: Optional[Any] = None,
1901
+ scope_result: Optional[Any] = None,
1867
1902
  ) -> tuple:
1868
1903
  """
1869
1904
  Detect cross-module dependencies for a single expression.
@@ -1879,6 +1914,12 @@ class ASTGeneratorAPI:
1879
1914
  release_id: Optional release ID for filtering
1880
1915
  preferred_module_dependencies: Optional list of module codes to prefer when
1881
1916
  a table belongs to multiple modules
1917
+ tables_with_modules: Optional precomputed tables with module metadata
1918
+ (performance optimization to avoid redundant database queries)
1919
+ scopes_api: Optional precomputed OperationScopesAPI instance
1920
+ (performance optimization to reuse database connections)
1921
+ scope_result: Optional precomputed scope result from calculate_scopes_from_expression
1922
+ (performance optimization to avoid redundant computation)
1882
1923
 
1883
1924
  Returns:
1884
1925
  Tuple of (dependency_modules, cross_instance_dependencies)
@@ -1889,24 +1930,28 @@ class ASTGeneratorAPI:
1889
1930
  from py_dpm.dpm.queries.explorer_queries import ExplorerQuery
1890
1931
  import logging
1891
1932
 
1892
- scopes_api = OperationScopesAPI(
1893
- database_path=self.database_path,
1894
- connection_url=self.connection_url
1895
- )
1933
+ # Reuse provided scopes_api or create a new one
1934
+ if scopes_api is None:
1935
+ scopes_api = OperationScopesAPI(
1936
+ database_path=self.database_path,
1937
+ connection_url=self.connection_url
1938
+ )
1896
1939
 
1897
1940
  try:
1898
- # Get tables with module info (includes module_version)
1899
- tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1900
- expression=expression,
1901
- release_id=release_id
1902
- )
1941
+ # Reuse provided tables_with_modules or fetch if not available
1942
+ if tables_with_modules is None:
1943
+ tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1944
+ expression=expression,
1945
+ release_id=release_id
1946
+ )
1903
1947
 
1904
- # Check if cross-module
1905
- scope_result = scopes_api.calculate_scopes_from_expression(
1906
- expression=expression,
1907
- release_id=release_id,
1908
- read_only=True
1909
- )
1948
+ # Reuse provided scope_result or compute if not available
1949
+ if scope_result is None:
1950
+ scope_result = scopes_api.calculate_scopes_from_expression(
1951
+ expression=expression,
1952
+ release_id=release_id,
1953
+ read_only=True
1954
+ )
1910
1955
 
1911
1956
  if scope_result.has_error or not scope_result.is_cross_module:
1912
1957
  return {}, []
@@ -103,14 +103,16 @@ def _extract_with_pyodbc(access_file):
103
103
  import pyodbc
104
104
  except ImportError:
105
105
  raise Exception("pyodbc not available")
106
-
106
+
107
+ import decimal
108
+
107
109
  # Try different Access drivers
108
110
  drivers_to_try = [
109
111
  r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};',
110
112
  r'DRIVER={Microsoft Access Driver (*.mdb)};',
111
113
  r'DRIVER={MDBTools};'
112
114
  ]
113
-
115
+
114
116
  conn = None
115
117
  for driver in drivers_to_try:
116
118
  try:
@@ -120,10 +122,10 @@ def _extract_with_pyodbc(access_file):
120
122
  break
121
123
  except pyodbc.Error:
122
124
  continue
123
-
125
+
124
126
  if not conn:
125
127
  raise Exception("No suitable ODBC driver found for Access database")
126
-
128
+
127
129
  try:
128
130
  # Get all table names
129
131
  cursor = conn.cursor()
@@ -132,63 +134,56 @@ def _extract_with_pyodbc(access_file):
132
134
  table_name = table_info.table_name
133
135
  if not table_name.startswith('MSys'): # Skip system tables
134
136
  tables.append(table_name)
135
-
137
+
136
138
  data = {}
137
- STRING_COLUMNS = ["row", "column", "sheet"]
138
-
139
+
139
140
  # Extract each table
140
141
  for table_name in tables:
141
142
  print(table_name)
142
143
  try:
143
144
  cursor.execute(f"SELECT * FROM [{table_name}]")
144
- columns = [column[0] for column in cursor.description]
145
+
146
+ # Get column metadata from cursor.description
147
+ # Each entry is: (name, type_code, display_size, internal_size, precision, scale, null_ok)
148
+ # type_code is a Python type (str, int, float, decimal.Decimal, etc.)
149
+ column_info = []
150
+ for col_desc in cursor.description:
151
+ col_name = col_desc[0]
152
+ col_type = col_desc[1] # Python type from ODBC metadata
153
+ column_info.append((col_name, col_type))
154
+
155
+ columns = [info[0] for info in column_info]
145
156
  rows = cursor.fetchall()
146
-
157
+
147
158
  if rows:
148
159
  # Convert to DataFrame
149
160
  df = pd.DataFrame([list(row) for row in rows], columns=columns)
150
161
 
151
- # Apply same dtype conversion logic as mdb-tools method
152
- # Start with all strings, but preserve None as actual None (not string 'None')
153
- for col in df.columns:
154
- df[col] = df[col].astype(object)
155
- mask = df[col].notna()
156
- df.loc[mask, col] = df.loc[mask, col].astype(str)
157
-
158
- numeric_columns = []
159
- for column in df.columns:
160
- if column in STRING_COLUMNS:
161
- continue
162
- try:
163
- # Convert to numeric and check if any values start with '0' (except '0' itself)
164
- # Only check string values for leading zeros
165
- string_mask = df[column].astype(str).str.match(r'^0\d+', na=False)
166
- has_leading_zeros = string_mask.any()
167
-
168
- # Test numeric conversion
169
- numeric_series = pd.to_numeric(df[column], errors='coerce')
170
-
171
- if not has_leading_zeros and not numeric_series.isna().all():
172
- numeric_columns.append(column)
173
- except Exception:
174
- continue
175
-
176
- # Convert only the identified numeric columns
177
- for col in numeric_columns:
178
- try:
179
- df[col] = pd.to_numeric(df[col], errors='coerce')
180
- except (ValueError, TypeError):
181
- # Keep as string if conversion fails
182
- pass
183
-
162
+ # Use the actual column types from Access schema metadata
163
+ # instead of inferring from data values (fixes Windows vs Linux inconsistency)
164
+ numeric_types = (int, float, decimal.Decimal)
165
+
166
+ for col_name, col_type in column_info:
167
+ if col_type in numeric_types:
168
+ # Column is defined as numeric in Access schema - convert to numeric
169
+ try:
170
+ df[col_name] = pd.to_numeric(df[col_name], errors='coerce')
171
+ except (ValueError, TypeError):
172
+ pass
173
+ else:
174
+ # Column is defined as text/other in Access schema - keep as string
175
+ df[col_name] = df[col_name].astype(object)
176
+ mask = df[col_name].notna()
177
+ df.loc[mask, col_name] = df.loc[mask, col_name].astype(str)
178
+
184
179
  data[table_name] = df
185
-
180
+
186
181
  except Exception as e:
187
182
  print(f"Error processing table {table_name}: {e}", file=sys.stderr)
188
183
  continue
189
-
184
+
190
185
  return data
191
-
186
+
192
187
  finally:
193
188
  conn.close()
194
189
 
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime
2
- from typing import List
2
+ from typing import Dict, Hashable, List, Tuple
3
3
  from sqlalchemy import (
4
4
  Boolean,
5
5
  Column,
@@ -36,6 +36,11 @@ class SerializationMixin:
36
36
  Base = declarative_base(cls=SerializationMixin)
37
37
 
38
38
 
39
+ def _get_engine_cache_key(session) -> Hashable:
40
+ bind = session.get_bind()
41
+ return getattr(bind, "url", repr(bind))
42
+
43
+
39
44
  def _read_sql_with_connection(sql, session):
40
45
  """
41
46
  Execute pd.read_sql with proper connection handling to avoid pandas warnings.
@@ -2319,6 +2324,11 @@ class ViewDatapoints(Base):
2319
2324
  context_id = Column(Integer)
2320
2325
  variable_vid = Column(String)
2321
2326
 
2327
+ _TABLE_DATA_CACHE: Dict[
2328
+ Tuple[Hashable, str, Tuple[str, ...] | None, Tuple[str, ...] | None, Tuple[str, ...] | None, int | None],
2329
+ pd.DataFrame,
2330
+ ] = {}
2331
+
2322
2332
  @classmethod
2323
2333
  def _create_base_query_with_aliases(cls, session):
2324
2334
  """
@@ -2552,7 +2562,16 @@ class ViewDatapoints(Base):
2552
2562
  def get_table_data(
2553
2563
  cls, session, table, rows=None, columns=None, sheets=None, release_id=None
2554
2564
  ):
2555
- # Build query using ORM for database-agnostic compatibility
2565
+ engine_key = _get_engine_cache_key(session)
2566
+ rows_key = tuple(rows) if rows is not None else None
2567
+ columns_key = tuple(columns) if columns is not None else None
2568
+ sheets_key = tuple(sheets) if sheets is not None else None
2569
+ cache_key = (engine_key, table, rows_key, columns_key, sheets_key, release_id)
2570
+
2571
+ cached = cls._TABLE_DATA_CACHE.get(cache_key)
2572
+ if cached is not None:
2573
+ return cached
2574
+
2556
2575
  query, aliases = cls._create_base_query_with_aliases(session)
2557
2576
 
2558
2577
  # Add column selections
@@ -2669,6 +2688,7 @@ class ViewDatapoints(Base):
2669
2688
  data = _check_ranges_values_are_present(data, "column_code", columns)
2670
2689
  data = _check_ranges_values_are_present(data, "sheet_code", sheets)
2671
2690
 
2691
+ cls._TABLE_DATA_CACHE[cache_key] = data
2672
2692
  return data
2673
2693
 
2674
2694
  @classmethod
@@ -2,6 +2,7 @@ from abc import ABC
2
2
 
3
3
  import pandas as pd
4
4
  import warnings
5
+ from typing import Dict, Hashable, Tuple
5
6
 
6
7
  # Suppress pandas UserWarning about SQLAlchemy connection types
7
8
  warnings.filterwarnings("ignore", message=".*pandas only supports SQLAlchemy.*")
@@ -43,6 +44,9 @@ from py_dpm.dpm_xl.utils.data_handlers import filter_all_data
43
44
  operand_elements = ["table", "rows", "cols", "sheets", "default", "interval"]
44
45
 
45
46
 
47
+ _HEADERS_CACHE: Dict[Tuple[Hashable, int, Tuple[str, ...]], pd.DataFrame] = {}
48
+
49
+
46
50
  def _create_operand_label(node):
47
51
  label = generate_new_label()
48
52
  node.label = label
@@ -185,42 +189,47 @@ class OperandsChecking(ASTTemplate, ABC):
185
189
  if len(table_codes) == 0:
186
190
  return
187
191
 
188
- # Build ORM query
189
- query = (
190
- self.session.query(
191
- TableVersion.code.label("Code"),
192
- TableVersion.startreleaseid.label("StartReleaseID"),
193
- TableVersion.endreleaseid.label("EndReleaseID"),
194
- Header.direction.label("Direction"),
195
- Table.hasopenrows.label("HasOpenRows"),
196
- Table.hasopencolumns.label("HasOpenColumns"),
197
- Table.hasopensheets.label("HasOpenSheets"),
198
- )
199
- .join(Table, Table.tableid == TableVersion.tableid)
200
- .join(
201
- TableVersionHeader, TableVersion.tablevid == TableVersionHeader.tablevid
192
+ engine = self.session.get_bind()
193
+ engine_key: Hashable = getattr(engine, "url", repr(engine))
194
+ cache_key = (engine_key, self.release_id, tuple(sorted(table_codes)))
195
+
196
+ df_headers = _HEADERS_CACHE.get(cache_key)
197
+ if df_headers is None:
198
+ query = (
199
+ self.session.query(
200
+ TableVersion.code.label("Code"),
201
+ TableVersion.startreleaseid.label("StartReleaseID"),
202
+ TableVersion.endreleaseid.label("EndReleaseID"),
203
+ Header.direction.label("Direction"),
204
+ Table.hasopenrows.label("HasOpenRows"),
205
+ Table.hasopencolumns.label("HasOpenColumns"),
206
+ Table.hasopensheets.label("HasOpenSheets"),
207
+ )
208
+ .join(Table, Table.tableid == TableVersion.tableid)
209
+ .join(
210
+ TableVersionHeader,
211
+ TableVersion.tablevid == TableVersionHeader.tablevid,
212
+ )
213
+ .join(Header, Header.headerid == TableVersionHeader.headerid)
214
+ .filter(TableVersion.code.in_(table_codes))
215
+ .distinct()
202
216
  )
203
- .join(Header, Header.headerid == TableVersionHeader.headerid)
204
- .filter(TableVersion.code.in_(table_codes))
205
- .distinct()
206
- )
207
217
 
208
- # Apply release filter
209
- query = filter_by_release(
210
- query,
211
- start_col=TableVersion.startreleaseid,
212
- end_col=TableVersion.endreleaseid,
213
- release_id=self.release_id,
214
- )
218
+ query = filter_by_release(
219
+ query,
220
+ start_col=TableVersion.startreleaseid,
221
+ end_col=TableVersion.endreleaseid,
222
+ release_id=self.release_id,
223
+ )
215
224
 
216
- # Execute query and convert to DataFrame
217
- from py_dpm.dpm.models import (
218
- _compile_query_for_pandas,
219
- _read_sql_with_connection,
220
- )
225
+ from py_dpm.dpm.models import (
226
+ _compile_query_for_pandas,
227
+ _read_sql_with_connection,
228
+ )
221
229
 
222
- compiled_query = _compile_query_for_pandas(query.statement, self.session)
223
- df_headers = _read_sql_with_connection(compiled_query, self.session)
230
+ compiled_query = _compile_query_for_pandas(query.statement, self.session)
231
+ df_headers = _read_sql_with_connection(compiled_query, self.session)
232
+ _HEADERS_CACHE[cache_key] = df_headers
224
233
 
225
234
  for table in table_codes:
226
235
  table_headers = df_headers[df_headers["Code"] == table]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydpm_xl
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: Python library for DPM-XL data processing and analysis
5
5
  Author-email: "MeaningfulData S.L." <info@meaningfuldata.eu>
6
6
  License: GPL-3.0-or-later
@@ -92,6 +92,7 @@ tests/test_get_table_details.py
92
92
  tests/test_get_tables_date_filter.py
93
93
  tests/test_get_tables_release_code.py
94
94
  tests/test_hierarchical_query.py
95
+ tests/test_migration_type_inference.py
95
96
  tests/test_query_refactor.py
96
97
  tests/test_release_filters_semantic.py
97
98
  tests/test_semantic_release.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pydpm_xl"
3
- version = "0.2.9"
3
+ version = "0.2.10"
4
4
  description = "Python library for DPM-XL data processing and analysis"
5
5
  authors = [
6
6
  {name = "MeaningfulData S.L.", email = "info@meaningfuldata.eu"}
@@ -52,7 +52,7 @@ exclude = []
52
52
 
53
53
  [tool.poetry]
54
54
  name = "pydpm_xl"
55
- version = "0.2.9"
55
+ version = "0.2.10"
56
56
  description = "Python library for DPM-XL data processing and analysis"
57
57
  authors = ["MeaningfulData S.L. <info@meaningfuldata.eu>"]
58
58
  readme = "README.md"
@@ -0,0 +1,210 @@
1
+ """Tests for migration type inference from Access databases.
2
+
3
+ These tests verify that the pyodbc extraction method uses actual column type
4
+ metadata from the Access schema instead of inferring types from data values,
5
+ which fixes the Windows vs Linux inconsistency issue.
6
+ """
7
+
8
+ import sys
9
+ import os
10
+ import pytest
11
+ from unittest.mock import Mock, MagicMock, patch
12
+ import pandas as pd
13
+ import decimal
14
+
15
+ # Add the project root to sys.path
16
+ current_dir = os.path.dirname(os.path.abspath(__file__))
17
+ project_root = os.path.dirname(current_dir)
18
+ if project_root not in sys.path:
19
+ sys.path.insert(0, project_root)
20
+
21
+ from py_dpm.dpm.migration import _extract_with_pyodbc
22
+
23
+
24
+ class TestPyodbcTypeInference:
25
+ """Tests for _extract_with_pyodbc type inference based on schema metadata."""
26
+
27
+ @pytest.fixture
28
+ def mock_pyodbc(self):
29
+ """Create a mock pyodbc module."""
30
+ mock_module = MagicMock()
31
+ mock_module.Error = Exception
32
+ return mock_module
33
+
34
+ def test_text_column_with_numeric_values_stays_text(self, mock_pyodbc):
35
+ """
36
+ Text columns containing numeric-looking values should remain as text.
37
+ This is the core bug fix - previously these would be converted to REAL/INTEGER.
38
+ """
39
+ # Mock cursor description: column 'product_code' is TEXT (str type) in Access
40
+ # but contains values like '123', '456' that look numeric
41
+ mock_cursor = MagicMock()
42
+ mock_cursor.description = [
43
+ ('product_code', str, None, None, None, None, None), # TEXT column
44
+ ('quantity', int, None, None, None, None, None), # INTEGER column
45
+ ]
46
+ mock_cursor.fetchall.return_value = [
47
+ ('123', 10),
48
+ ('456', 20),
49
+ ('789', 30),
50
+ ]
51
+ mock_cursor.tables.return_value = [
52
+ MagicMock(table_name='Products'),
53
+ ]
54
+ mock_cursor.execute = MagicMock()
55
+
56
+ mock_conn = MagicMock()
57
+ mock_conn.cursor.return_value = mock_cursor
58
+
59
+ mock_pyodbc.connect.return_value = mock_conn
60
+
61
+ with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
62
+ result = _extract_with_pyodbc('/fake/path.accdb')
63
+
64
+ assert 'Products' in result
65
+ df = result['Products']
66
+
67
+ # product_code should be TEXT (object dtype), not numeric
68
+ assert df['product_code'].dtype == object
69
+ assert df['product_code'].tolist() == ['123', '456', '789']
70
+
71
+ # quantity should be numeric (it's defined as int in Access)
72
+ assert pd.api.types.is_numeric_dtype(df['quantity'])
73
+
74
+ def test_leading_zeros_preserved_for_text_columns(self, mock_pyodbc):
75
+ """
76
+ Text columns with leading zeros should preserve the leading zeros.
77
+ """
78
+ mock_cursor = MagicMock()
79
+ mock_cursor.description = [
80
+ ('postal_code', str, None, None, None, None, None), # TEXT column
81
+ ]
82
+ mock_cursor.fetchall.return_value = [
83
+ ('01234',),
84
+ ('00567',),
85
+ ('09876',),
86
+ ]
87
+ mock_cursor.tables.return_value = [
88
+ MagicMock(table_name='Addresses'),
89
+ ]
90
+ mock_cursor.execute = MagicMock()
91
+
92
+ mock_conn = MagicMock()
93
+ mock_conn.cursor.return_value = mock_cursor
94
+
95
+ mock_pyodbc.connect.return_value = mock_conn
96
+
97
+ with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
98
+ result = _extract_with_pyodbc('/fake/path.accdb')
99
+
100
+ df = result['Addresses']
101
+
102
+ # Leading zeros must be preserved
103
+ assert df['postal_code'].tolist() == ['01234', '00567', '09876']
104
+
105
+ def test_numeric_columns_are_converted(self, mock_pyodbc):
106
+ """
107
+ Columns that are actually defined as numeric in Access should be converted.
108
+ """
109
+ mock_cursor = MagicMock()
110
+ mock_cursor.description = [
111
+ ('id', int, None, None, None, None, None),
112
+ ('price', float, None, None, None, None, None),
113
+ ('amount', decimal.Decimal, None, None, None, None, None),
114
+ ]
115
+ mock_cursor.fetchall.return_value = [
116
+ (1, 10.5, decimal.Decimal('100.00')),
117
+ (2, 20.5, decimal.Decimal('200.00')),
118
+ ]
119
+ mock_cursor.tables.return_value = [
120
+ MagicMock(table_name='Orders'),
121
+ ]
122
+ mock_cursor.execute = MagicMock()
123
+
124
+ mock_conn = MagicMock()
125
+ mock_conn.cursor.return_value = mock_cursor
126
+
127
+ mock_pyodbc.connect.return_value = mock_conn
128
+
129
+ with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
130
+ result = _extract_with_pyodbc('/fake/path.accdb')
131
+
132
+ df = result['Orders']
133
+
134
+ # All numeric columns should be numeric types
135
+ assert pd.api.types.is_numeric_dtype(df['id'])
136
+ assert pd.api.types.is_numeric_dtype(df['price'])
137
+ assert pd.api.types.is_numeric_dtype(df['amount'])
138
+
139
+ def test_mixed_columns_respect_schema_types(self, mock_pyodbc):
140
+ """
141
+ Mixed table with both text and numeric columns should respect schema types.
142
+ """
143
+ mock_cursor = MagicMock()
144
+ mock_cursor.description = [
145
+ ('account_number', str, None, None, None, None, None), # TEXT - looks numeric
146
+ ('balance', float, None, None, None, None, None), # REAL - is numeric
147
+ ('status_code', str, None, None, None, None, None), # TEXT - looks numeric
148
+ ('transaction_count', int, None, None, None, None, None), # INTEGER
149
+ ]
150
+ mock_cursor.fetchall.return_value = [
151
+ ('1001234567', 1500.50, '200', 5),
152
+ ('2009876543', 2500.75, '404', 10),
153
+ ]
154
+ mock_cursor.tables.return_value = [
155
+ MagicMock(table_name='Accounts'),
156
+ ]
157
+ mock_cursor.execute = MagicMock()
158
+
159
+ mock_conn = MagicMock()
160
+ mock_conn.cursor.return_value = mock_cursor
161
+
162
+ mock_pyodbc.connect.return_value = mock_conn
163
+
164
+ with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
165
+ result = _extract_with_pyodbc('/fake/path.accdb')
166
+
167
+ df = result['Accounts']
168
+
169
+ # Text columns stay as text
170
+ assert df['account_number'].dtype == object
171
+ assert df['status_code'].dtype == object
172
+
173
+ # Numeric columns are converted
174
+ assert pd.api.types.is_numeric_dtype(df['balance'])
175
+ assert pd.api.types.is_numeric_dtype(df['transaction_count'])
176
+
177
+ def test_null_values_handled_correctly(self, mock_pyodbc):
178
+ """
179
+ NULL values should be handled correctly for both text and numeric columns.
180
+ """
181
+ mock_cursor = MagicMock()
182
+ mock_cursor.description = [
183
+ ('name', str, None, None, None, None, None),
184
+ ('value', float, None, None, None, None, None),
185
+ ]
186
+ mock_cursor.fetchall.return_value = [
187
+ ('Alice', 100.0),
188
+ (None, 200.0),
189
+ ('Bob', None),
190
+ ]
191
+ mock_cursor.tables.return_value = [
192
+ MagicMock(table_name='Data'),
193
+ ]
194
+ mock_cursor.execute = MagicMock()
195
+
196
+ mock_conn = MagicMock()
197
+ mock_conn.cursor.return_value = mock_cursor
198
+
199
+ mock_pyodbc.connect.return_value = mock_conn
200
+
201
+ with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
202
+ result = _extract_with_pyodbc('/fake/path.accdb')
203
+
204
+ df = result['Data']
205
+
206
+ # Check that nulls are preserved correctly
207
+ assert pd.isna(df.loc[1, 'name'])
208
+ assert pd.isna(df.loc[2, 'value'])
209
+ assert df.loc[0, 'name'] == 'Alice'
210
+ assert df.loc[0, 'value'] == 100.0
File without changes
File without changes
File without changes
File without changes
File without changes