PyPI - aio-sf - Versions diffs - 0.1.0b3__tar.gz → 0.1.0b5__tar.gz - Mend

aio-sf 0.1.0b3tar.gz → 0.1.0b5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: aio-sf
-Version: 0.1.0b3
-Summary: Async Salesforce library for Python with Bulk API 2.0 support
+Version: 0.1.0b5
+Summary: Async Salesforce library for Python
 Project-URL: Homepage, https://github.com/callawaycloud/aio-salesforce
 Project-URL: Repository, https://github.com/callawaycloud/aio-salesforce
 Project-URL: Issues, https://github.com/callawaycloud/aio-salesforce/issues

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "aio-sf"
 dynamic = ["version"]
-description = "Async Salesforce library for Python with Bulk API 2.0 support"
+description = "Async Salesforce library for Python"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/exporter/bulk_export.py RENAMED Viewed

@@ -2,6 +2,7 @@ import logging
 from typing import Any, Dict, List, Generator, Optional
 import csv
 import asyncio
+import io
 from ..api.describe.types import FieldInfo
 from ..api.client import SalesforceClient
@@ -111,37 +112,50 @@ class QueryResult:
         """
         Stream CSV response and convert to record dictionaries.
+        Uses proper CSV parsing to handle quotes, newlines, and special characters correctly.
         :param response_text: CSV response text
         :yields: Individual record dictionaries
         """
-        lines = response_text.splitlines()
-        # Get the header row first
-        if not lines:
+        if not response_text or not response_text.strip():
             # No data in this batch
             return
         try:
-            header_line = lines[0]
-            fieldnames = next(csv.reader([header_line]))
-        except (IndexError, StopIteration, csv.Error):
-            # No data in this batch
-            return
+            # Create a StringIO object for proper CSV parsing
+            csv_buffer = io.StringIO(response_text)
+            # Use DictReader for proper CSV parsing with header detection
+            # This handles quotes, newlines in fields, and escaping correctly
+            csv_reader = csv.DictReader(
+                csv_buffer,
+                delimiter=",",
+                quotechar='"',
+                quoting=csv.QUOTE_MINIMAL,
+                skipinitialspace=True,
+            )
-        # Process each data row
-        for line in lines[1:]:
-            if line.strip():  # Skip empty lines
+            for row_num, record in enumerate(csv_reader, start=1):
                 try:
-                    # Parse the CSV row
-                    row_values = next(csv.reader([line]))
-                    # Convert to dictionary
-                    row = dict(zip(fieldnames, row_values))
-                    yield row
-                except (csv.Error, StopIteration):
-                    logging.warning(f"Error parsing line: {line}")
-                    # Skip malformed lines
+                    # Convert None values to empty strings for consistency
+                    cleaned_record = {
+                        key: (value if value is not None else "")
+                        for key, value in record.items()
+                    }
+                    yield cleaned_record
+                except Exception as e:
+                    logging.warning(f"Error processing CSV record {row_num}: {e}")
+                    # Continue processing other records
                     continue
+        except csv.Error as e:
+            logging.error(f"CSV parsing error: {e}")
+            # If CSV parsing fails completely, don't yield any records
+            return
+        except Exception as e:
+            logging.error(f"Unexpected error parsing CSV response: {e}")
+            return
     async def _generate_records(self):
         """Async generator that yields individual records."""
         locator = self._query_locator
@@ -170,7 +184,9 @@ class QueryResult:
         except Exception as e:
             raise Exception(
-                f"Error processing record {ctn}: {e}. Current Query Locator: {locator}"
+                f"Error processing record {ctn}: {e}. Current Query Locator: {locator}. "
+                f"This may indicate a CSV parsing issue - check if the response contains "
+                f"malformed CSV data or fields with special characters."
             )
@@ -296,32 +312,15 @@ def resume_from_locator(
 # Helper function to get all fields that can be queried by bulk API
-async def get_bulk_fields(
-    sf: SalesforceClient, object_type: str, api_version: Optional[str] = None
-) -> List[FieldInfo]:
-    """Get field metadata for queryable fields in a Salesforce object.
-    :param sf: Salesforce client instance
-    :param object_type: Name of the Salesforce object (e.g., 'Account', 'Contact')
-    :param api_version: API version to use (defaults to client version)
-    :returns: List of field metadata dictionaries for queryable fields
-    """
+async def get_bulk_fields(fields_metadata: List[FieldInfo]) -> List[FieldInfo]:
+    """Get field metadata for queryable fields in a Salesforce object."""
     # Use the metadata API to get object description
-    describe_data = await sf.describe.sobject(object_type, api_version)
-    fields_metadata = describe_data["fields"]
-    # Create a set of all compound field names to exclude
-    compound_field_names = {
-        field.get("compoundFieldName")
-        for field in fields_metadata
-        if field.get("compoundFieldName")
-    }
-    # Filter to only queryable fields that aren't compound fields
+    # Filter to only queryable fields that aren't compound fields (unless field is actually name)
     queryable_fields = [
         field
         for field in fields_metadata
-        if field.get("name") not in compound_field_names
+        if field.get("type") not in ["address", "location"]
     ]
     return queryable_fields

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/exporter/parquet_writer.py RENAMED Viewed

@@ -3,26 +3,37 @@ Parquet writer module for converting Salesforce QueryResult to Parquet format.
 """
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Callable
 from pathlib import Path
 import pyarrow as pa
 import pandas as pd
 import pyarrow.parquet as pq
+from datetime import datetime
 from ..api.describe.types import FieldInfo
 from .bulk_export import QueryResult, batch_records_async
-def salesforce_to_arrow_type(sf_type: str) -> pa.DataType:
-    """Convert Salesforce data types to Arrow data types."""
+def salesforce_to_arrow_type(
+    sf_type: str, convert_datetime_to_timestamp: bool = True
+) -> pa.DataType:
+    """Convert Salesforce data types to Arrow data types.
+    :param sf_type: Salesforce field type
+    :param convert_datetime_to_timestamp: If True, datetime fields use timestamp type, otherwise string
+    """
     type_mapping = {
         "string": pa.string(),
         "boolean": pa.bool_(),
         "int": pa.int64(),
         "double": pa.float64(),
-        "date": pa.string(),  # Store as string since SF returns ISO format
-        "datetime": pa.string(),  # Store as string since SF returns ISO format
+        "date": pa.string(),  # Always store as string since SF returns ISO format
+        "datetime": (
+            pa.timestamp("us", tz="UTC")
+            if convert_datetime_to_timestamp
+            else pa.string()
+        ),
         "currency": pa.float64(),
         "reference": pa.string(),
         "picklist": pa.string(),
@@ -40,18 +51,26 @@ def salesforce_to_arrow_type(sf_type: str) -> pa.DataType:
     return type_mapping.get(sf_type.lower(), pa.string())
-def create_schema_from_metadata(fields_metadata: List[FieldInfo]) -> pa.Schema:
+def create_schema_from_metadata(
+    fields_metadata: List[FieldInfo],
+    column_formatter: Optional[Callable[[str], str]] = None,
+    convert_datetime_to_timestamp: bool = True,
+) -> pa.Schema:
     """
     Create a PyArrow schema from Salesforce field metadata.
     :param fields_metadata: List of field metadata dictionaries from Salesforce
+    :param column_formatter: Optional function to format column names
+    :param convert_datetime_to_timestamp: If True, datetime fields use timestamp type, otherwise string
     :returns: PyArrow schema
     """
     arrow_fields = []
     for field in fields_metadata:
-        field_name = field.get("name", "").lower()  # Normalize to lowercase
+        field_name = field.get("name", "")
+        if column_formatter:
+            field_name = column_formatter(field_name)
         sf_type = field.get("type", "string")
-        arrow_type = salesforce_to_arrow_type(sf_type)
+        arrow_type = salesforce_to_arrow_type(sf_type, convert_datetime_to_timestamp)
         # All fields are nullable since Salesforce can return empty values
         arrow_fields.append(pa.field(field_name, arrow_type, nullable=True))
@@ -70,6 +89,8 @@ class ParquetWriter:
         schema: Optional[pa.Schema] = None,
         batch_size: int = 10000,
         convert_empty_to_null: bool = True,
+        column_formatter: Optional[Callable[[str], str]] = None,
+        convert_datetime_to_timestamp: bool = True,
     ):
         """
         Initialize ParquetWriter.
@@ -78,11 +99,15 @@ class ParquetWriter:
         :param schema: Optional PyArrow schema. If None, will be inferred from first batch
         :param batch_size: Number of records to process in each batch
         :param convert_empty_to_null: Convert empty strings to null values
+        :param column_formatter: Optional function to format column names. If None, no formatting is applied
+        :param convert_datetime_to_timestamp: If True, datetime fields are converted to timestamps, otherwise stored as strings
         """
         self.file_path = file_path
         self.schema = schema
         self.batch_size = batch_size
         self.convert_empty_to_null = convert_empty_to_null
+        self.column_formatter = column_formatter
+        self.convert_datetime_to_timestamp = convert_datetime_to_timestamp
         self._writer = None
         self._schema_finalized = False
@@ -106,10 +131,15 @@ class ParquetWriter:
         if not batch:
             return
-        # Convert field names to lowercase for consistency
+        # Apply column formatting if specified
         converted_batch = []
         for record in batch:
-            converted_record = {k.lower(): v for k, v in record.items()}
+            if self.column_formatter:
+                converted_record = {
+                    self.column_formatter(k): v for k, v in record.items()
+                }
+            else:
+                converted_record = record.copy()
             converted_batch.append(converted_record)
         # Create DataFrame
@@ -121,7 +151,7 @@ class ParquetWriter:
                 self.schema = self._infer_schema_from_dataframe(df)
             else:
                 # Filter schema to only include fields that are actually in the data
-                self.schema = self._filter_schema_to_data(self.schema, df.columns)
+                self.schema = self._filter_schema_to_data(self.schema, list(df.columns))
             self._schema_finalized = True
         # Apply data type conversions based on schema
@@ -181,6 +211,8 @@ class ParquetWriter:
     def _convert_dataframe_types(self, df: pd.DataFrame) -> None:
         """Convert DataFrame types based on the schema."""
+        if self.schema is None:
+            return
         for field in self.schema:
             field_name = field.name
             if field_name not in df.columns:
@@ -192,23 +224,72 @@ class ParquetWriter:
             # Apply type-specific conversions
             if pa.types.is_boolean(field.type):
-                # Convert string 'true'/'false' to boolean
-                df[field_name] = (
-                    df[field_name]
-                    .map({"true": True, "false": False, None: None})
-                    .fillna(df[field_name])
-                )  # Keep original values for non-string booleans
+                # Convert string 'true'/'false' to boolean, keeping original values for others
+                original_series = df[field_name]
+                mapped_series = original_series.map(
+                    {"true": True, "false": False, None: None}
+                )
+                # For values that weren't mapped, keep the original values
+                # This avoids the fillna FutureWarning by using boolean indexing instead
+                mask = mapped_series.notna()
+                result_series = original_series.copy()
+                result_series.loc[mask] = mapped_series.loc[mask]
+                df[field_name] = result_series
             elif pa.types.is_integer(field.type):
                 df[field_name] = pd.to_numeric(df[field_name], errors="coerce").astype(
                     "Int64"
                 )  # Nullable integer
             elif pa.types.is_floating(field.type):
                 df[field_name] = pd.to_numeric(df[field_name], errors="coerce")
+            elif pa.types.is_timestamp(field.type):
+                # Convert Salesforce ISO datetime strings to timestamps
+                datetime_series = df[field_name]
+                if isinstance(datetime_series, pd.Series):
+                    df[field_name] = self._convert_datetime_strings_to_timestamps(
+                        datetime_series
+                    )
             # Replace empty strings with None for non-string fields
             if not pa.types.is_string(field.type):
                 df[field_name] = df[field_name].replace("", pd.NA)
+    def _convert_datetime_strings_to_timestamps(self, series: pd.Series) -> pd.Series:
+        """
+        Convert Salesforce ISO datetime strings to pandas datetime objects.
+        Salesforce returns datetime in ISO format like '2023-12-25T10:30:00.000+0000'
+        or '2023-12-25T10:30:00Z'. This method handles various ISO formats.
+        """
+        def parse_sf_datetime(dt_str):
+            if pd.isna(dt_str) or dt_str == "" or dt_str is None:
+                return pd.NaT
+            try:
+                # Handle common Salesforce datetime formats
+                dt_str = str(dt_str).strip()
+                # Convert +0000 to Z for pandas compatibility
+                if dt_str.endswith("+0000"):
+                    dt_str = dt_str[:-5] + "Z"
+                elif dt_str.endswith("+00:00"):
+                    dt_str = dt_str[:-6] + "Z"
+                # Use pandas to_datetime with UTC parsing
+                return pd.to_datetime(dt_str, utc=True)
+            except (ValueError, TypeError) as e:
+                logging.warning(f"Failed to parse datetime string '{dt_str}': {e}")
+                return pd.NaT
+        # Apply the conversion function to the series
+        result = series.apply(parse_sf_datetime)
+        if isinstance(result, pd.Series):
+            return result
+        else:
+            # This shouldn't happen, but handle it gracefully
+            return pd.Series(result, index=series.index)
     def close(self) -> None:
         """Close the parquet writer."""
         if self._writer:
@@ -223,6 +304,8 @@ async def write_query_to_parquet(
     schema: Optional[pa.Schema] = None,
     batch_size: int = 10000,
     convert_empty_to_null: bool = True,
+    column_formatter: Optional[Callable[[str], str]] = None,
+    convert_datetime_to_timestamp: bool = True,
 ) -> None:
     """
     Convenience function to write a QueryResult to a parquet file (async version).
@@ -233,18 +316,24 @@ async def write_query_to_parquet(
     :param schema: Optional pre-created PyArrow schema (takes precedence over fields_metadata)
     :param batch_size: Number of records to process in each batch
     :param convert_empty_to_null: Convert empty strings to null values
+    :param column_formatter: Optional function to format column names
+    :param convert_datetime_to_timestamp: If True, datetime fields are converted to timestamps, otherwise stored as strings
     """
     effective_schema = None
     if schema:
         effective_schema = schema
     elif fields_metadata:
-        effective_schema = create_schema_from_metadata(fields_metadata)
+        effective_schema = create_schema_from_metadata(
+            fields_metadata, column_formatter, convert_datetime_to_timestamp
+        )
     writer = ParquetWriter(
         file_path=file_path,
         schema=effective_schema,
         batch_size=batch_size,
         convert_empty_to_null=convert_empty_to_null,
+        column_formatter=column_formatter,
+        convert_datetime_to_timestamp=convert_datetime_to_timestamp,
     )
     await writer.write_query_result(query_result)

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/.cursor/rules/api-structure.mdc RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/.cursor/rules/async-patterns.mdc RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/.cursor/rules/project-tooling.mdc RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/.github/workflows/publish.yml RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/.github/workflows/test.yml RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/.gitignore RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/LICENSE RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/README.md RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/RELEASE.md RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/pytest.ini RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/auth/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/auth/base.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/auth/client_credentials.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/auth/refresh_token.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/auth/sfdx_cli.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/auth/static_token.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/bulk_v2/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/bulk_v2/client.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/bulk_v2/types.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/client.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/collections/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/collections/client.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/collections/types.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/describe/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/describe/client.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/describe/types.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/query/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/query/client.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/query/types.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/api/types.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/src/aio_sf/exporter/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/tests/__init__.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/tests/conftest.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/tests/test_api_clients.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/tests/test_auth.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/tests/test_client.py RENAMED Viewed

File without changes

{aio_sf-0.1.0b3 → aio_sf-0.1.0b5}/uv.lock RENAMED Viewed

File without changes

aio-sf 0.1.0b3__tar.gz → 0.1.0b5__tar.gz

aio-sf 0.1.0b3tar.gz → 0.1.0b5tar.gz