PyPI - pydpm_xl - Versions diffs - 0.2.9__tar.gz → 0.2.10__tar.gz - Mend

pydpm_xl 0.2.9tar.gz → 0.2.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

{pydpm_xl-0.2.9/pydpm_xl.egg-info → pydpm_xl-0.2.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydpm_xl
-Version: 0.2.9
+Version: 0.2.10
 Summary: Python library for DPM-XL data processing and analysis
 Author-email: "MeaningfulData S.L." <info@meaningfuldata.eu>
 License: GPL-3.0-or-later

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/__init__.py RENAMED Viewed

@@ -41,7 +41,7 @@ Available packages:
 - pydpm.api: Main APIs for migration, syntax, and semantic analysis
 """
-__version__ = "0.2.9"
+__version__ = "0.2.10"
 __author__ = "MeaningfulData S.L."
 __email__ = "info@meaningfuldata.eu"
 __license__ = "GPL-3.0-or-later"

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/ast_generator.py RENAMED Viewed

@@ -984,6 +984,7 @@ class ASTGeneratorAPI:
         """
         from py_dpm.dpm.utils import get_engine
         from py_dpm.api.dpm import DataDictionaryAPI
+        from py_dpm.api.dpm_xl.operation_scopes import OperationScopesAPI
         # Initialize database connection
         engine = get_engine(database_path=self.database_path, connection_url=self.connection_url)
@@ -1017,6 +1018,12 @@ class ASTGeneratorAPI:
             connection_url=self.connection_url
         )
+        # Initialize OperationScopesAPI once for all expressions (performance optimization)
+        scopes_api = OperationScopesAPI(
+            database_path=self.database_path,
+            connection_url=self.connection_url
+        )
         # Primary module info will be determined from the first expression or module_code
         primary_module_info = None
         namespace = None
@@ -1035,6 +1042,21 @@ class ASTGeneratorAPI:
                 complete_ast = complete_result["ast"]
                 context = complete_result.get("context") or table_context
+                # Get tables with modules for this expression FIRST (reuse scopes_api from outer scope)
+                # This is done before _get_primary_module_info to pass precomputed values
+                tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
+                    expression=expression,
+                    release_id=release_id
+                )
+                # Calculate scope_result once (avoid duplicate calls in other methods)
+                scope_result = scopes_api.calculate_scopes_from_expression(
+                    expression=expression,
+                    release_id=release_id,
+                    read_only=True
+                )
+                all_tables_with_modules.extend(tables_with_modules)
                 # Get primary module info from first expression (or use module_code)
                 if primary_module_info is None:
                     primary_module_info = self._get_primary_module_info(
@@ -1042,6 +1064,9 @@ class ASTGeneratorAPI:
                         primary_module_vid=primary_module_vid,
                         release_id=release_id,
                         module_code=module_code,
+                        # Performance optimization: pass precomputed values
+                        tables_with_modules=tables_with_modules,
+                        scopes_api=scopes_api,
                     )
                     namespace = primary_module_info.get("module_uri", "default_module")
@@ -1066,18 +1091,6 @@ class ASTGeneratorAPI:
                 # Clean extra fields from data entries
                 self._clean_ast_data_entries(ast_with_coords)
-                # Get tables with modules for this expression
-                from py_dpm.api.dpm_xl.operation_scopes import OperationScopesAPI
-                scopes_api = OperationScopesAPI(
-                    database_path=self.database_path,
-                    connection_url=self.connection_url
-                )
-                tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
-                    expression=expression,
-                    release_id=release_id
-                )
-                all_tables_with_modules.extend(tables_with_modules)
                 # Build mapping of table_code -> module_vid
                 # Prefer the module VID that matches the detected primary module
                 table_to_module = {}
@@ -1179,6 +1192,10 @@ class ASTGeneratorAPI:
                     operation_code=operation_code,
                     release_id=release_id,
                     preferred_module_dependencies=preferred_module_dependencies,
+                    # Performance optimization: pass precomputed values to avoid redundant work
+                    tables_with_modules=tables_with_modules,
+                    scopes_api=scopes_api,
+                    scope_result=scope_result,
                 )
                 # Merge dependency modules (avoid table duplicates)
@@ -1313,6 +1330,8 @@ class ASTGeneratorAPI:
         primary_module_vid: Optional[int],
         release_id: Optional[int],
         module_code: Optional[str] = None,
+        tables_with_modules: Optional[List[Dict[str, Any]]] = None,
+        scopes_api: Optional[Any] = None,
     ) -> Dict[str, Any]:
         """
         Detect and return metadata for the primary module from the expression.
@@ -1323,6 +1342,10 @@ class ASTGeneratorAPI:
             release_id: Optional release ID for filtering
             module_code: Optional module code (e.g., "FINREP9") - takes precedence over
                 primary_module_vid if provided
+            tables_with_modules: Optional precomputed tables with module metadata
+                (performance optimization to avoid redundant database queries)
+            scopes_api: Optional precomputed OperationScopesAPI instance
+                (performance optimization to reuse database connections)
         Returns:
             Dict with module_uri, module_code, module_version, framework_code,
@@ -1341,20 +1364,28 @@ class ASTGeneratorAPI:
             "module_vid": None,
         }
+        # Track if we created the scopes_api locally (need to close it)
+        local_scopes_api = False
         try:
-            scopes_api = OperationScopesAPI(
-                database_path=self.database_path,
-                connection_url=self.connection_url
-            )
+            # Reuse provided scopes_api or create a new one
+            if scopes_api is None:
+                scopes_api = OperationScopesAPI(
+                    database_path=self.database_path,
+                    connection_url=self.connection_url
+                )
+                local_scopes_api = True
-            # Get tables with module metadata from expression
-            tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
-                expression=expression,
-                release_id=release_id
-            )
+            # Reuse provided tables_with_modules or fetch if not available
+            if tables_with_modules is None:
+                tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
+                    expression=expression,
+                    release_id=release_id
+                )
             if not tables_with_modules:
-                scopes_api.close()
+                if local_scopes_api:
+                    scopes_api.close()
                 return default_info
             # Determine primary module
@@ -1408,7 +1439,8 @@ class ASTGeneratorAPI:
                         to_date = module.get("to_reference_date", to_date)
                         break
-            scopes_api.close()
+            if local_scopes_api:
+                scopes_api.close()
             return {
                 "module_uri": module_uri or "default_module",
@@ -1864,6 +1896,9 @@ class ASTGeneratorAPI:
         operation_code: str,
         release_id: Optional[int] = None,
         preferred_module_dependencies: Optional[List[str]] = None,
+        tables_with_modules: Optional[List[Dict[str, Any]]] = None,
+        scopes_api: Optional[Any] = None,
+        scope_result: Optional[Any] = None,
     ) -> tuple:
         """
         Detect cross-module dependencies for a single expression.
@@ -1879,6 +1914,12 @@ class ASTGeneratorAPI:
             release_id: Optional release ID for filtering
             preferred_module_dependencies: Optional list of module codes to prefer when
                 a table belongs to multiple modules
+            tables_with_modules: Optional precomputed tables with module metadata
+                (performance optimization to avoid redundant database queries)
+            scopes_api: Optional precomputed OperationScopesAPI instance
+                (performance optimization to reuse database connections)
+            scope_result: Optional precomputed scope result from calculate_scopes_from_expression
+                (performance optimization to avoid redundant computation)
         Returns:
             Tuple of (dependency_modules, cross_instance_dependencies)
@@ -1889,24 +1930,28 @@ class ASTGeneratorAPI:
         from py_dpm.dpm.queries.explorer_queries import ExplorerQuery
         import logging
-        scopes_api = OperationScopesAPI(
-            database_path=self.database_path,
-            connection_url=self.connection_url
-        )
+        # Reuse provided scopes_api or create a new one
+        if scopes_api is None:
+            scopes_api = OperationScopesAPI(
+                database_path=self.database_path,
+                connection_url=self.connection_url
+            )
         try:
-            # Get tables with module info (includes module_version)
-            tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
-                expression=expression,
-                release_id=release_id
-            )
+            # Reuse provided tables_with_modules or fetch if not available
+            if tables_with_modules is None:
+                tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
+                    expression=expression,
+                    release_id=release_id
+                )
-            # Check if cross-module
-            scope_result = scopes_api.calculate_scopes_from_expression(
-                expression=expression,
-                release_id=release_id,
-                read_only=True
-            )
+            # Reuse provided scope_result or compute if not available
+            if scope_result is None:
+                scope_result = scopes_api.calculate_scopes_from_expression(
+                    expression=expression,
+                    release_id=release_id,
+                    read_only=True
+                )
             if scope_result.has_error or not scope_result.is_cross_module:
                 return {}, []

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/migration.py RENAMED Viewed

@@ -103,14 +103,16 @@ def _extract_with_pyodbc(access_file):
         import pyodbc
     except ImportError:
         raise Exception("pyodbc not available")
+    import decimal
     # Try different Access drivers
     drivers_to_try = [
         r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};',
         r'DRIVER={Microsoft Access Driver (*.mdb)};',
         r'DRIVER={MDBTools};'
     ]
     conn = None
     for driver in drivers_to_try:
         try:
@@ -120,10 +122,10 @@ def _extract_with_pyodbc(access_file):
             break
         except pyodbc.Error:
             continue
     if not conn:
         raise Exception("No suitable ODBC driver found for Access database")
     try:
         # Get all table names
         cursor = conn.cursor()
@@ -132,63 +134,56 @@ def _extract_with_pyodbc(access_file):
             table_name = table_info.table_name
             if not table_name.startswith('MSys'):  # Skip system tables
                 tables.append(table_name)
         data = {}
-        STRING_COLUMNS = ["row", "column", "sheet"]
         # Extract each table
         for table_name in tables:
             print(table_name)
             try:
                 cursor.execute(f"SELECT * FROM [{table_name}]")
-                columns = [column[0] for column in cursor.description]
+                # Get column metadata from cursor.description
+                # Each entry is: (name, type_code, display_size, internal_size, precision, scale, null_ok)
+                # type_code is a Python type (str, int, float, decimal.Decimal, etc.)
+                column_info = []
+                for col_desc in cursor.description:
+                    col_name = col_desc[0]
+                    col_type = col_desc[1]  # Python type from ODBC metadata
+                    column_info.append((col_name, col_type))
+                columns = [info[0] for info in column_info]
                 rows = cursor.fetchall()
                 if rows:
                     # Convert to DataFrame
                     df = pd.DataFrame([list(row) for row in rows], columns=columns)
-                    # Apply same dtype conversion logic as mdb-tools method
-                    # Start with all strings, but preserve None as actual None (not string 'None')
-                    for col in df.columns:
-                        df[col] = df[col].astype(object)
-                        mask = df[col].notna()
-                        df.loc[mask, col] = df.loc[mask, col].astype(str)
-                    numeric_columns = []
-                    for column in df.columns:
-                        if column in STRING_COLUMNS:
-                            continue
-                        try:
-                            # Convert to numeric and check if any values start with '0' (except '0' itself)
-                            # Only check string values for leading zeros
-                            string_mask = df[column].astype(str).str.match(r'^0\d+', na=False)
-                            has_leading_zeros = string_mask.any()
-                            # Test numeric conversion
-                            numeric_series = pd.to_numeric(df[column], errors='coerce')
-                            if not has_leading_zeros and not numeric_series.isna().all():
-                                numeric_columns.append(column)
-                        except Exception:
-                            continue
-                    # Convert only the identified numeric columns
-                    for col in numeric_columns:
-                        try:
-                            df[col] = pd.to_numeric(df[col], errors='coerce')
-                        except (ValueError, TypeError):
-                            # Keep as string if conversion fails
-                            pass
+                    # Use the actual column types from Access schema metadata
+                    # instead of inferring from data values (fixes Windows vs Linux inconsistency)
+                    numeric_types = (int, float, decimal.Decimal)
+                    for col_name, col_type in column_info:
+                        if col_type in numeric_types:
+                            # Column is defined as numeric in Access schema - convert to numeric
+                            try:
+                                df[col_name] = pd.to_numeric(df[col_name], errors='coerce')
+                            except (ValueError, TypeError):
+                                pass
+                        else:
+                            # Column is defined as text/other in Access schema - keep as string
+                            df[col_name] = df[col_name].astype(object)
+                            mask = df[col_name].notna()
+                            df.loc[mask, col_name] = df.loc[mask, col_name].astype(str)
                     data[table_name] = df
             except Exception as e:
                 print(f"Error processing table {table_name}: {e}", file=sys.stderr)
                 continue
         return data
     finally:
         conn.close()

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/models.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import List
+from typing import Dict, Hashable, List, Tuple
 from sqlalchemy import (
     Boolean,
     Column,
@@ -36,6 +36,11 @@ class SerializationMixin:
 Base = declarative_base(cls=SerializationMixin)
+def _get_engine_cache_key(session) -> Hashable:
+    bind = session.get_bind()
+    return getattr(bind, "url", repr(bind))
 def _read_sql_with_connection(sql, session):
     """
     Execute pd.read_sql with proper connection handling to avoid pandas warnings.
@@ -2319,6 +2324,11 @@ class ViewDatapoints(Base):
     context_id = Column(Integer)
     variable_vid = Column(String)
+    _TABLE_DATA_CACHE: Dict[
+        Tuple[Hashable, str, Tuple[str, ...] | None, Tuple[str, ...] | None, Tuple[str, ...] | None, int | None],
+        pd.DataFrame,
+    ] = {}
     @classmethod
     def _create_base_query_with_aliases(cls, session):
         """
@@ -2552,7 +2562,16 @@ class ViewDatapoints(Base):
     def get_table_data(
         cls, session, table, rows=None, columns=None, sheets=None, release_id=None
     ):
-        # Build query using ORM for database-agnostic compatibility
+        engine_key = _get_engine_cache_key(session)
+        rows_key = tuple(rows) if rows is not None else None
+        columns_key = tuple(columns) if columns is not None else None
+        sheets_key = tuple(sheets) if sheets is not None else None
+        cache_key = (engine_key, table, rows_key, columns_key, sheets_key, release_id)
+        cached = cls._TABLE_DATA_CACHE.get(cache_key)
+        if cached is not None:
+            return cached
         query, aliases = cls._create_base_query_with_aliases(session)
         # Add column selections
@@ -2669,6 +2688,7 @@ class ViewDatapoints(Base):
         data = _check_ranges_values_are_present(data, "column_code", columns)
         data = _check_ranges_values_are_present(data, "sheet_code", sheets)
+        cls._TABLE_DATA_CACHE[cache_key] = data
         return data
     @classmethod

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/operands.py RENAMED Viewed

@@ -2,6 +2,7 @@ from abc import ABC
 import pandas as pd
 import warnings
+from typing import Dict, Hashable, Tuple
 # Suppress pandas UserWarning about SQLAlchemy connection types
 warnings.filterwarnings("ignore", message=".*pandas only supports SQLAlchemy.*")
@@ -43,6 +44,9 @@ from py_dpm.dpm_xl.utils.data_handlers import filter_all_data
 operand_elements = ["table", "rows", "cols", "sheets", "default", "interval"]
+_HEADERS_CACHE: Dict[Tuple[Hashable, int, Tuple[str, ...]], pd.DataFrame] = {}
 def _create_operand_label(node):
     label = generate_new_label()
     node.label = label
@@ -185,42 +189,47 @@ class OperandsChecking(ASTTemplate, ABC):
         if len(table_codes) == 0:
             return
-        # Build ORM query
-        query = (
-            self.session.query(
-                TableVersion.code.label("Code"),
-                TableVersion.startreleaseid.label("StartReleaseID"),
-                TableVersion.endreleaseid.label("EndReleaseID"),
-                Header.direction.label("Direction"),
-                Table.hasopenrows.label("HasOpenRows"),
-                Table.hasopencolumns.label("HasOpenColumns"),
-                Table.hasopensheets.label("HasOpenSheets"),
-            )
-            .join(Table, Table.tableid == TableVersion.tableid)
-            .join(
-                TableVersionHeader, TableVersion.tablevid == TableVersionHeader.tablevid
+        engine = self.session.get_bind()
+        engine_key: Hashable = getattr(engine, "url", repr(engine))
+        cache_key = (engine_key, self.release_id, tuple(sorted(table_codes)))
+        df_headers = _HEADERS_CACHE.get(cache_key)
+        if df_headers is None:
+            query = (
+                self.session.query(
+                    TableVersion.code.label("Code"),
+                    TableVersion.startreleaseid.label("StartReleaseID"),
+                    TableVersion.endreleaseid.label("EndReleaseID"),
+                    Header.direction.label("Direction"),
+                    Table.hasopenrows.label("HasOpenRows"),
+                    Table.hasopencolumns.label("HasOpenColumns"),
+                    Table.hasopensheets.label("HasOpenSheets"),
+                )
+                .join(Table, Table.tableid == TableVersion.tableid)
+                .join(
+                    TableVersionHeader,
+                    TableVersion.tablevid == TableVersionHeader.tablevid,
+                )
+                .join(Header, Header.headerid == TableVersionHeader.headerid)
+                .filter(TableVersion.code.in_(table_codes))
+                .distinct()
             )
-            .join(Header, Header.headerid == TableVersionHeader.headerid)
-            .filter(TableVersion.code.in_(table_codes))
-            .distinct()
-        )
-        # Apply release filter
-        query = filter_by_release(
-            query,
-            start_col=TableVersion.startreleaseid,
-            end_col=TableVersion.endreleaseid,
-            release_id=self.release_id,
-        )
+            query = filter_by_release(
+                query,
+                start_col=TableVersion.startreleaseid,
+                end_col=TableVersion.endreleaseid,
+                release_id=self.release_id,
+            )
-        # Execute query and convert to DataFrame
-        from py_dpm.dpm.models import (
-            _compile_query_for_pandas,
-            _read_sql_with_connection,
-        )
+            from py_dpm.dpm.models import (
+                _compile_query_for_pandas,
+                _read_sql_with_connection,
+            )
-        compiled_query = _compile_query_for_pandas(query.statement, self.session)
-        df_headers = _read_sql_with_connection(compiled_query, self.session)
+            compiled_query = _compile_query_for_pandas(query.statement, self.session)
+            df_headers = _read_sql_with_connection(compiled_query, self.session)
+            _HEADERS_CACHE[cache_key] = df_headers
         for table in table_codes:
             table_headers = df_headers[df_headers["Code"] == table]

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10/pydpm_xl.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydpm_xl
-Version: 0.2.9
+Version: 0.2.10
 Summary: Python library for DPM-XL data processing and analysis
 Author-email: "MeaningfulData S.L." <info@meaningfuldata.eu>
 License: GPL-3.0-or-later

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/SOURCES.txt RENAMED Viewed

@@ -92,6 +92,7 @@ tests/test_get_table_details.py
 tests/test_get_tables_date_filter.py
 tests/test_get_tables_release_code.py
 tests/test_hierarchical_query.py
+tests/test_migration_type_inference.py
 tests/test_query_refactor.py
 tests/test_release_filters_semantic.py
 tests/test_semantic_release.py

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pydpm_xl"
-version = "0.2.9"
+version = "0.2.10"
 description = "Python library for DPM-XL data processing and analysis"
 authors = [
     {name = "MeaningfulData S.L.", email = "info@meaningfuldata.eu"}
@@ -52,7 +52,7 @@ exclude = []
 [tool.poetry]
 name = "pydpm_xl"
-version = "0.2.9"
+version = "0.2.10"
 description = "Python library for DPM-XL data processing and analysis"
 authors = ["MeaningfulData S.L. <info@meaningfuldata.eu>"]
 readme = "README.md"

pydpm_xl-0.2.10/tests/test_migration_type_inference.py ADDED Viewed

@@ -0,0 +1,210 @@
+"""Tests for migration type inference from Access databases.
+These tests verify that the pyodbc extraction method uses actual column type
+metadata from the Access schema instead of inferring types from data values,
+which fixes the Windows vs Linux inconsistency issue.
+"""
+import sys
+import os
+import pytest
+from unittest.mock import Mock, MagicMock, patch
+import pandas as pd
+import decimal
+# Add the project root to sys.path
+current_dir = os.path.dirname(os.path.abspath(__file__))
+project_root = os.path.dirname(current_dir)
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+from py_dpm.dpm.migration import _extract_with_pyodbc
+class TestPyodbcTypeInference:
+    """Tests for _extract_with_pyodbc type inference based on schema metadata."""
+    @pytest.fixture
+    def mock_pyodbc(self):
+        """Create a mock pyodbc module."""
+        mock_module = MagicMock()
+        mock_module.Error = Exception
+        return mock_module
+    def test_text_column_with_numeric_values_stays_text(self, mock_pyodbc):
+        """
+        Text columns containing numeric-looking values should remain as text.
+        This is the core bug fix - previously these would be converted to REAL/INTEGER.
+        """
+        # Mock cursor description: column 'product_code' is TEXT (str type) in Access
+        # but contains values like '123', '456' that look numeric
+        mock_cursor = MagicMock()
+        mock_cursor.description = [
+            ('product_code', str, None, None, None, None, None),  # TEXT column
+            ('quantity', int, None, None, None, None, None),       # INTEGER column
+        ]
+        mock_cursor.fetchall.return_value = [
+            ('123', 10),
+            ('456', 20),
+            ('789', 30),
+        ]
+        mock_cursor.tables.return_value = [
+            MagicMock(table_name='Products'),
+        ]
+        mock_cursor.execute = MagicMock()
+        mock_conn = MagicMock()
+        mock_conn.cursor.return_value = mock_cursor
+        mock_pyodbc.connect.return_value = mock_conn
+        with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
+            result = _extract_with_pyodbc('/fake/path.accdb')
+        assert 'Products' in result
+        df = result['Products']
+        # product_code should be TEXT (object dtype), not numeric
+        assert df['product_code'].dtype == object
+        assert df['product_code'].tolist() == ['123', '456', '789']
+        # quantity should be numeric (it's defined as int in Access)
+        assert pd.api.types.is_numeric_dtype(df['quantity'])
+    def test_leading_zeros_preserved_for_text_columns(self, mock_pyodbc):
+        """
+        Text columns with leading zeros should preserve the leading zeros.
+        """
+        mock_cursor = MagicMock()
+        mock_cursor.description = [
+            ('postal_code', str, None, None, None, None, None),  # TEXT column
+        ]
+        mock_cursor.fetchall.return_value = [
+            ('01234',),
+            ('00567',),
+            ('09876',),
+        ]
+        mock_cursor.tables.return_value = [
+            MagicMock(table_name='Addresses'),
+        ]
+        mock_cursor.execute = MagicMock()
+        mock_conn = MagicMock()
+        mock_conn.cursor.return_value = mock_cursor
+        mock_pyodbc.connect.return_value = mock_conn
+        with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
+            result = _extract_with_pyodbc('/fake/path.accdb')
+        df = result['Addresses']
+        # Leading zeros must be preserved
+        assert df['postal_code'].tolist() == ['01234', '00567', '09876']
+    def test_numeric_columns_are_converted(self, mock_pyodbc):
+        """
+        Columns that are actually defined as numeric in Access should be converted.
+        """
+        mock_cursor = MagicMock()
+        mock_cursor.description = [
+            ('id', int, None, None, None, None, None),
+            ('price', float, None, None, None, None, None),
+            ('amount', decimal.Decimal, None, None, None, None, None),
+        ]
+        mock_cursor.fetchall.return_value = [
+            (1, 10.5, decimal.Decimal('100.00')),
+            (2, 20.5, decimal.Decimal('200.00')),
+        ]
+        mock_cursor.tables.return_value = [
+            MagicMock(table_name='Orders'),
+        ]
+        mock_cursor.execute = MagicMock()
+        mock_conn = MagicMock()
+        mock_conn.cursor.return_value = mock_cursor
+        mock_pyodbc.connect.return_value = mock_conn
+        with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
+            result = _extract_with_pyodbc('/fake/path.accdb')
+        df = result['Orders']
+        # All numeric columns should be numeric types
+        assert pd.api.types.is_numeric_dtype(df['id'])
+        assert pd.api.types.is_numeric_dtype(df['price'])
+        assert pd.api.types.is_numeric_dtype(df['amount'])
+    def test_mixed_columns_respect_schema_types(self, mock_pyodbc):
+        """
+        Mixed table with both text and numeric columns should respect schema types.
+        """
+        mock_cursor = MagicMock()
+        mock_cursor.description = [
+            ('account_number', str, None, None, None, None, None),  # TEXT - looks numeric
+            ('balance', float, None, None, None, None, None),        # REAL - is numeric
+            ('status_code', str, None, None, None, None, None),      # TEXT - looks numeric
+            ('transaction_count', int, None, None, None, None, None), # INTEGER
+        ]
+        mock_cursor.fetchall.return_value = [
+            ('1001234567', 1500.50, '200', 5),
+            ('2009876543', 2500.75, '404', 10),
+        ]
+        mock_cursor.tables.return_value = [
+            MagicMock(table_name='Accounts'),
+        ]
+        mock_cursor.execute = MagicMock()
+        mock_conn = MagicMock()
+        mock_conn.cursor.return_value = mock_cursor
+        mock_pyodbc.connect.return_value = mock_conn
+        with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
+            result = _extract_with_pyodbc('/fake/path.accdb')
+        df = result['Accounts']
+        # Text columns stay as text
+        assert df['account_number'].dtype == object
+        assert df['status_code'].dtype == object
+        # Numeric columns are converted
+        assert pd.api.types.is_numeric_dtype(df['balance'])
+        assert pd.api.types.is_numeric_dtype(df['transaction_count'])
+    def test_null_values_handled_correctly(self, mock_pyodbc):
+        """
+        NULL values should be handled correctly for both text and numeric columns.
+        """
+        mock_cursor = MagicMock()
+        mock_cursor.description = [
+            ('name', str, None, None, None, None, None),
+            ('value', float, None, None, None, None, None),
+        ]
+        mock_cursor.fetchall.return_value = [
+            ('Alice', 100.0),
+            (None, 200.0),
+            ('Bob', None),
+        ]
+        mock_cursor.tables.return_value = [
+            MagicMock(table_name='Data'),
+        ]
+        mock_cursor.execute = MagicMock()
+        mock_conn = MagicMock()
+        mock_conn.cursor.return_value = mock_cursor
+        mock_pyodbc.connect.return_value = mock_conn
+        with patch.dict(sys.modules, {'pyodbc': mock_pyodbc}):
+            result = _extract_with_pyodbc('/fake/path.accdb')
+        df = result['Data']
+        # Check that nulls are preserved correctly
+        assert pd.isna(df.loc[1, 'name'])
+        assert pd.isna(df.loc[2, 'value'])
+        assert df.loc[0, 'name'] == 'Alice'
+        assert df.loc[0, 'value'] == 100.0

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/LICENSE RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/README.md RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/data_dictionary.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/explorer.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/hierarchical_queries.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/instance.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm/migration.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/complete_ast.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/operation_scopes.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/semantic.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/api/dpm_xl/syntax.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/cli/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/cli/commands/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/cli/main.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/base.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/basic_objects.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/explorer_queries.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/filters.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/glossary.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/hierarchical_queries.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/queries/tables.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm/utils.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/constructor.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/ml_generation.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/module_analyzer.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/module_dependencies.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/nodes.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/template.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/visitor.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/ast/where_clause.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlLexer.interp RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlLexer.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlLexer.tokens RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParser.interp RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParser.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParser.tokens RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParserListener.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/dpm_xlParserVisitor.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/grammar/generated/listeners.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/aggregate.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/arithmetic.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/base.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/boolean.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/clause.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/comparison.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/conditional.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/string.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/operators/time.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/semantic_analyzer.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/symbols.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/promotion.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/scalar.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/types/time.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/data_handlers.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/operands_mapping.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/operator_mapping.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/scopes_calculator.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/serialization.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/dpm_xl/utils/tokens.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/exceptions/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/exceptions/exceptions.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/exceptions/messages.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/instance/__init__.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/py_dpm/instance/instance.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/entry_points.txt RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/requires.txt RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/pydpm_xl.egg-info/top_level.txt RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/setup.cfg RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_cli_semantic.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_data_dictionary_releases.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_db_connection_handling.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_get_table_details.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_get_tables_date_filter.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_get_tables_release_code.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_hierarchical_query.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_query_refactor.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_release_filters_semantic.py RENAMED Viewed

File without changes

{pydpm_xl-0.2.9 → pydpm_xl-0.2.10}/tests/test_semantic_release.py RENAMED Viewed

File without changes

pydpm_xl 0.2.9__tar.gz → 0.2.10__tar.gz

pydpm_xl 0.2.9tar.gz → 0.2.10tar.gz