aio-sf 0.1.0b4__tar.gz → 0.1.0b6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/PKG-INFO +17 -10
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/README.md +9 -5
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/RELEASE.md +0 -25
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/pyproject.toml +8 -3
- aio_sf-0.1.0b6/src/aio_sf/__init__.py +61 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/exporter/bulk_export.py +2 -9
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/exporter/parquet_writer.py +96 -12
- aio_sf-0.1.0b4/src/aio_sf/__init__.py +0 -28
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/.cursor/rules/api-structure.mdc +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/.cursor/rules/async-patterns.mdc +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/.cursor/rules/project-tooling.mdc +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/.github/workflows/publish.yml +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/.github/workflows/test.yml +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/.gitignore +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/LICENSE +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/pytest.ini +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/auth/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/auth/base.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/auth/client_credentials.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/auth/refresh_token.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/auth/sfdx_cli.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/auth/static_token.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/bulk_v2/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/bulk_v2/client.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/bulk_v2/types.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/client.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/collections/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/collections/client.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/collections/types.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/describe/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/describe/client.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/describe/types.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/query/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/query/client.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/query/types.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/api/types.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/src/aio_sf/exporter/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/tests/__init__.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/tests/conftest.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/tests/test_api_clients.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/tests/test_auth.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/tests/test_client.py +0 -0
- {aio_sf-0.1.0b4 → aio_sf-0.1.0b6}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aio-sf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0b6
|
|
4
4
|
Summary: Async Salesforce library for Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/callawaycloud/aio-salesforce
|
|
6
6
|
Project-URL: Repository, https://github.com/callawaycloud/aio-salesforce
|
|
@@ -35,13 +35,16 @@ Classifier: Programming Language :: Python :: 3
|
|
|
35
35
|
Classifier: Programming Language :: Python :: 3.11
|
|
36
36
|
Classifier: Programming Language :: Python :: 3.12
|
|
37
37
|
Requires-Python: >=3.11
|
|
38
|
+
Requires-Dist: boto3>=1.34.0
|
|
38
39
|
Requires-Dist: httpx>=0.25.0
|
|
40
|
+
Requires-Dist: pandas>=2.0.0
|
|
41
|
+
Requires-Dist: pyarrow>=10.0.0
|
|
39
42
|
Requires-Dist: pydantic>=2.0.0
|
|
40
43
|
Requires-Dist: python-dotenv>=1.0.0
|
|
41
|
-
Provides-Extra:
|
|
42
|
-
Requires-Dist:
|
|
43
|
-
Requires-Dist:
|
|
44
|
-
Requires-Dist:
|
|
44
|
+
Provides-Extra: core
|
|
45
|
+
Requires-Dist: httpx>=0.25.0; extra == 'core'
|
|
46
|
+
Requires-Dist: pydantic>=2.0.0; extra == 'core'
|
|
47
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == 'core'
|
|
45
48
|
Provides-Extra: dev
|
|
46
49
|
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
47
50
|
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
@@ -88,16 +91,16 @@ An async Salesforce library for Python.
|
|
|
88
91
|
|
|
89
92
|
## Installation
|
|
90
93
|
|
|
91
|
-
###
|
|
94
|
+
### Full Package (Default - Includes Everything)
|
|
92
95
|
```bash
|
|
93
96
|
uv add aio-sf
|
|
94
97
|
# or: pip install aio-sf
|
|
95
98
|
```
|
|
96
99
|
|
|
97
|
-
###
|
|
100
|
+
### Core Only (Minimal Dependencies)
|
|
98
101
|
```bash
|
|
99
|
-
uv add "aio-sf[
|
|
100
|
-
# or: pip install "aio-sf[
|
|
102
|
+
uv add "aio-sf[core]"
|
|
103
|
+
# or: pip install "aio-sf[core]"
|
|
101
104
|
```
|
|
102
105
|
|
|
103
106
|
## Quick Start
|
|
@@ -157,7 +160,11 @@ The Exporter library contains a streamlined and "opinionated" way to export data
|
|
|
157
160
|
|
|
158
161
|
### 3. Export to Parquet
|
|
159
162
|
```python
|
|
160
|
-
|
|
163
|
+
# With full installation (default), you can import directly from aio_sf
|
|
164
|
+
from aio_sf import SalesforceClient, ClientCredentialsAuth, bulk_query, write_query_to_parquet
|
|
165
|
+
|
|
166
|
+
# Or import from the exporter module (both work)
|
|
167
|
+
# from aio_sf.exporter import bulk_query, write_query_to_parquet
|
|
161
168
|
|
|
162
169
|
async def main():
|
|
163
170
|
# ... authentication code from above ...
|
|
@@ -28,16 +28,16 @@ An async Salesforce library for Python.
|
|
|
28
28
|
|
|
29
29
|
## Installation
|
|
30
30
|
|
|
31
|
-
###
|
|
31
|
+
### Full Package (Default - Includes Everything)
|
|
32
32
|
```bash
|
|
33
33
|
uv add aio-sf
|
|
34
34
|
# or: pip install aio-sf
|
|
35
35
|
```
|
|
36
36
|
|
|
37
|
-
###
|
|
37
|
+
### Core Only (Minimal Dependencies)
|
|
38
38
|
```bash
|
|
39
|
-
uv add "aio-sf[
|
|
40
|
-
# or: pip install "aio-sf[
|
|
39
|
+
uv add "aio-sf[core]"
|
|
40
|
+
# or: pip install "aio-sf[core]"
|
|
41
41
|
```
|
|
42
42
|
|
|
43
43
|
## Quick Start
|
|
@@ -97,7 +97,11 @@ The Exporter library contains a streamlined and "opinionated" way to export data
|
|
|
97
97
|
|
|
98
98
|
### 3. Export to Parquet
|
|
99
99
|
```python
|
|
100
|
-
|
|
100
|
+
# With full installation (default), you can import directly from aio_sf
|
|
101
|
+
from aio_sf import SalesforceClient, ClientCredentialsAuth, bulk_query, write_query_to_parquet
|
|
102
|
+
|
|
103
|
+
# Or import from the exporter module (both work)
|
|
104
|
+
# from aio_sf.exporter import bulk_query, write_query_to_parquet
|
|
101
105
|
|
|
102
106
|
async def main():
|
|
103
107
|
# ... authentication code from above ...
|
|
@@ -39,31 +39,6 @@
|
|
|
39
39
|
- Builds and publishes to PyPI automatically
|
|
40
40
|
- Requires manual approval in the `pypi` environment
|
|
41
41
|
|
|
42
|
-
## Manual Release (Backup)
|
|
43
|
-
|
|
44
|
-
If you need to publish manually:
|
|
45
|
-
|
|
46
|
-
```bash
|
|
47
|
-
# Build the package
|
|
48
|
-
uv build
|
|
49
|
-
|
|
50
|
-
# Publish to PyPI (requires PYPI_API_TOKEN env var)
|
|
51
|
-
export PYPI_API_TOKEN=your_token_here
|
|
52
|
-
uv publish --token $PYPI_API_TOKEN
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
## Test Release
|
|
56
|
-
|
|
57
|
-
To test on TestPyPI first:
|
|
58
|
-
|
|
59
|
-
```bash
|
|
60
|
-
# Get TestPyPI token from test.pypi.org
|
|
61
|
-
uv publish --repository testpypi --token $TEST_PYPI_TOKEN
|
|
62
|
-
|
|
63
|
-
# Test install from TestPyPI
|
|
64
|
-
pip install --index-url https://test.pypi.org/simple/ aio-salesforce
|
|
65
|
-
```
|
|
66
|
-
|
|
67
42
|
## Version Strategy
|
|
68
43
|
|
|
69
44
|
### Automatic Versioning
|
|
@@ -24,17 +24,22 @@ dependencies = [
|
|
|
24
24
|
"httpx>=0.25.0",
|
|
25
25
|
"pydantic>=2.0.0",
|
|
26
26
|
"python-dotenv>=1.0.0",
|
|
27
|
+
"pandas>=2.0.0",
|
|
28
|
+
"pyarrow>=10.0.0",
|
|
29
|
+
"boto3>=1.34.0", # For S3 uploads (future feature)
|
|
27
30
|
]
|
|
28
31
|
|
|
29
32
|
[project.optional-dependencies]
|
|
33
|
+
core = [
|
|
34
|
+
"httpx>=0.25.0",
|
|
35
|
+
"pydantic>=2.0.0",
|
|
36
|
+
"python-dotenv>=1.0.0",
|
|
37
|
+
]
|
|
30
38
|
exporter = [
|
|
31
39
|
"pandas>=2.0.0",
|
|
32
40
|
"pyarrow>=10.0.0",
|
|
33
41
|
"boto3>=1.34.0", # For S3 uploads (future feature)
|
|
34
42
|
]
|
|
35
|
-
all = [
|
|
36
|
-
"aio-sf[exporter]",
|
|
37
|
-
]
|
|
38
43
|
dev = [
|
|
39
44
|
"pytest>=7.0.0",
|
|
40
45
|
"pytest-asyncio>=0.21.0",
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""aio-salesforce: Async Salesforce library for Python with Bulk API 2.0 support."""
|
|
2
|
+
|
|
3
|
+
__author__ = "Jonas"
|
|
4
|
+
__email__ = "charlie@callaway.cloud"
|
|
5
|
+
|
|
6
|
+
# Client functionality
|
|
7
|
+
from .api.client import SalesforceClient # noqa: F401
|
|
8
|
+
from .api.auth import ( # noqa: F401
|
|
9
|
+
SalesforceAuthError,
|
|
10
|
+
AuthStrategy,
|
|
11
|
+
ClientCredentialsAuth,
|
|
12
|
+
RefreshTokenAuth,
|
|
13
|
+
StaticTokenAuth,
|
|
14
|
+
SfdxCliAuth,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Core package exports client functionality
|
|
18
|
+
# Exporter functionality is included by default, but gracefully handles missing deps
|
|
19
|
+
__all__ = [
|
|
20
|
+
"SalesforceClient",
|
|
21
|
+
"SalesforceAuthError",
|
|
22
|
+
"AuthStrategy",
|
|
23
|
+
"ClientCredentialsAuth",
|
|
24
|
+
"RefreshTokenAuth",
|
|
25
|
+
"StaticTokenAuth",
|
|
26
|
+
"SfdxCliAuth",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
# Try to import exporter functionality if dependencies are available
|
|
30
|
+
try:
|
|
31
|
+
from .exporter import ( # noqa: F401
|
|
32
|
+
bulk_query,
|
|
33
|
+
get_bulk_fields,
|
|
34
|
+
resume_from_locator,
|
|
35
|
+
write_records_to_csv,
|
|
36
|
+
QueryResult,
|
|
37
|
+
batch_records_async,
|
|
38
|
+
ParquetWriter,
|
|
39
|
+
create_schema_from_metadata,
|
|
40
|
+
write_query_to_parquet,
|
|
41
|
+
salesforce_to_arrow_type,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__.extend(
|
|
45
|
+
[
|
|
46
|
+
"bulk_query",
|
|
47
|
+
"get_bulk_fields",
|
|
48
|
+
"resume_from_locator",
|
|
49
|
+
"write_records_to_csv",
|
|
50
|
+
"QueryResult",
|
|
51
|
+
"batch_records_async",
|
|
52
|
+
"ParquetWriter",
|
|
53
|
+
"create_schema_from_metadata",
|
|
54
|
+
"write_query_to_parquet",
|
|
55
|
+
"salesforce_to_arrow_type",
|
|
56
|
+
]
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
except ImportError:
|
|
60
|
+
# Exporter dependencies not available - this is fine for core-only installs
|
|
61
|
+
pass
|
|
@@ -316,18 +316,11 @@ async def get_bulk_fields(fields_metadata: List[FieldInfo]) -> List[FieldInfo]:
|
|
|
316
316
|
"""Get field metadata for queryable fields in a Salesforce object."""
|
|
317
317
|
# Use the metadata API to get object description
|
|
318
318
|
|
|
319
|
-
#
|
|
320
|
-
compound_field_names = {
|
|
321
|
-
field.get("compoundFieldName")
|
|
322
|
-
for field in fields_metadata
|
|
323
|
-
if field.get("compoundFieldName")
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
# Filter to only queryable fields that aren't compound fields
|
|
319
|
+
# Filter to only queryable fields that aren't compound fields (unless field is actually name)
|
|
327
320
|
queryable_fields = [
|
|
328
321
|
field
|
|
329
322
|
for field in fields_metadata
|
|
330
|
-
if field.get("
|
|
323
|
+
if field.get("type") not in ["address", "location"]
|
|
331
324
|
]
|
|
332
325
|
|
|
333
326
|
return queryable_fields
|
|
@@ -3,26 +3,37 @@ Parquet writer module for converting Salesforce QueryResult to Parquet format.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
from typing import Any, Dict, List, Optional
|
|
6
|
+
from typing import Any, Dict, List, Optional, Callable
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import pyarrow as pa
|
|
9
9
|
import pandas as pd
|
|
10
10
|
import pyarrow.parquet as pq
|
|
11
|
+
from datetime import datetime
|
|
11
12
|
|
|
12
13
|
from ..api.describe.types import FieldInfo
|
|
13
14
|
|
|
14
15
|
from .bulk_export import QueryResult, batch_records_async
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
def salesforce_to_arrow_type(
|
|
18
|
-
|
|
18
|
+
def salesforce_to_arrow_type(
|
|
19
|
+
sf_type: str, convert_datetime_to_timestamp: bool = True
|
|
20
|
+
) -> pa.DataType:
|
|
21
|
+
"""Convert Salesforce data types to Arrow data types.
|
|
22
|
+
|
|
23
|
+
:param sf_type: Salesforce field type
|
|
24
|
+
:param convert_datetime_to_timestamp: If True, datetime fields use timestamp type, otherwise string
|
|
25
|
+
"""
|
|
19
26
|
type_mapping = {
|
|
20
27
|
"string": pa.string(),
|
|
21
28
|
"boolean": pa.bool_(),
|
|
22
29
|
"int": pa.int64(),
|
|
23
30
|
"double": pa.float64(),
|
|
24
|
-
"date": pa.string(), #
|
|
25
|
-
"datetime":
|
|
31
|
+
"date": pa.string(), # Always store as string since SF returns ISO format
|
|
32
|
+
"datetime": (
|
|
33
|
+
pa.timestamp("us", tz="UTC")
|
|
34
|
+
if convert_datetime_to_timestamp
|
|
35
|
+
else pa.string()
|
|
36
|
+
),
|
|
26
37
|
"currency": pa.float64(),
|
|
27
38
|
"reference": pa.string(),
|
|
28
39
|
"picklist": pa.string(),
|
|
@@ -40,18 +51,26 @@ def salesforce_to_arrow_type(sf_type: str) -> pa.DataType:
|
|
|
40
51
|
return type_mapping.get(sf_type.lower(), pa.string())
|
|
41
52
|
|
|
42
53
|
|
|
43
|
-
def create_schema_from_metadata(
|
|
54
|
+
def create_schema_from_metadata(
|
|
55
|
+
fields_metadata: List[FieldInfo],
|
|
56
|
+
column_formatter: Optional[Callable[[str], str]] = None,
|
|
57
|
+
convert_datetime_to_timestamp: bool = True,
|
|
58
|
+
) -> pa.Schema:
|
|
44
59
|
"""
|
|
45
60
|
Create a PyArrow schema from Salesforce field metadata.
|
|
46
61
|
|
|
47
62
|
:param fields_metadata: List of field metadata dictionaries from Salesforce
|
|
63
|
+
:param column_formatter: Optional function to format column names
|
|
64
|
+
:param convert_datetime_to_timestamp: If True, datetime fields use timestamp type, otherwise string
|
|
48
65
|
:returns: PyArrow schema
|
|
49
66
|
"""
|
|
50
67
|
arrow_fields = []
|
|
51
68
|
for field in fields_metadata:
|
|
52
|
-
field_name = field.get("name", "")
|
|
69
|
+
field_name = field.get("name", "")
|
|
70
|
+
if column_formatter:
|
|
71
|
+
field_name = column_formatter(field_name)
|
|
53
72
|
sf_type = field.get("type", "string")
|
|
54
|
-
arrow_type = salesforce_to_arrow_type(sf_type)
|
|
73
|
+
arrow_type = salesforce_to_arrow_type(sf_type, convert_datetime_to_timestamp)
|
|
55
74
|
# All fields are nullable since Salesforce can return empty values
|
|
56
75
|
arrow_fields.append(pa.field(field_name, arrow_type, nullable=True))
|
|
57
76
|
|
|
@@ -70,6 +89,8 @@ class ParquetWriter:
|
|
|
70
89
|
schema: Optional[pa.Schema] = None,
|
|
71
90
|
batch_size: int = 10000,
|
|
72
91
|
convert_empty_to_null: bool = True,
|
|
92
|
+
column_formatter: Optional[Callable[[str], str]] = None,
|
|
93
|
+
convert_datetime_to_timestamp: bool = True,
|
|
73
94
|
):
|
|
74
95
|
"""
|
|
75
96
|
Initialize ParquetWriter.
|
|
@@ -78,11 +99,15 @@ class ParquetWriter:
|
|
|
78
99
|
:param schema: Optional PyArrow schema. If None, will be inferred from first batch
|
|
79
100
|
:param batch_size: Number of records to process in each batch
|
|
80
101
|
:param convert_empty_to_null: Convert empty strings to null values
|
|
102
|
+
:param column_formatter: Optional function to format column names. If None, no formatting is applied
|
|
103
|
+
:param convert_datetime_to_timestamp: If True, datetime fields are converted to timestamps, otherwise stored as strings
|
|
81
104
|
"""
|
|
82
105
|
self.file_path = file_path
|
|
83
106
|
self.schema = schema
|
|
84
107
|
self.batch_size = batch_size
|
|
85
108
|
self.convert_empty_to_null = convert_empty_to_null
|
|
109
|
+
self.column_formatter = column_formatter
|
|
110
|
+
self.convert_datetime_to_timestamp = convert_datetime_to_timestamp
|
|
86
111
|
self._writer = None
|
|
87
112
|
self._schema_finalized = False
|
|
88
113
|
|
|
@@ -106,10 +131,15 @@ class ParquetWriter:
|
|
|
106
131
|
if not batch:
|
|
107
132
|
return
|
|
108
133
|
|
|
109
|
-
#
|
|
134
|
+
# Apply column formatting if specified
|
|
110
135
|
converted_batch = []
|
|
111
136
|
for record in batch:
|
|
112
|
-
|
|
137
|
+
if self.column_formatter:
|
|
138
|
+
converted_record = {
|
|
139
|
+
self.column_formatter(k): v for k, v in record.items()
|
|
140
|
+
}
|
|
141
|
+
else:
|
|
142
|
+
converted_record = record.copy()
|
|
113
143
|
converted_batch.append(converted_record)
|
|
114
144
|
|
|
115
145
|
# Create DataFrame
|
|
@@ -121,7 +151,7 @@ class ParquetWriter:
|
|
|
121
151
|
self.schema = self._infer_schema_from_dataframe(df)
|
|
122
152
|
else:
|
|
123
153
|
# Filter schema to only include fields that are actually in the data
|
|
124
|
-
self.schema = self._filter_schema_to_data(self.schema, df.columns)
|
|
154
|
+
self.schema = self._filter_schema_to_data(self.schema, list(df.columns))
|
|
125
155
|
self._schema_finalized = True
|
|
126
156
|
|
|
127
157
|
# Apply data type conversions based on schema
|
|
@@ -181,6 +211,8 @@ class ParquetWriter:
|
|
|
181
211
|
|
|
182
212
|
def _convert_dataframe_types(self, df: pd.DataFrame) -> None:
|
|
183
213
|
"""Convert DataFrame types based on the schema."""
|
|
214
|
+
if self.schema is None:
|
|
215
|
+
return
|
|
184
216
|
for field in self.schema:
|
|
185
217
|
field_name = field.name
|
|
186
218
|
if field_name not in df.columns:
|
|
@@ -209,11 +241,55 @@ class ParquetWriter:
|
|
|
209
241
|
) # Nullable integer
|
|
210
242
|
elif pa.types.is_floating(field.type):
|
|
211
243
|
df[field_name] = pd.to_numeric(df[field_name], errors="coerce")
|
|
244
|
+
elif pa.types.is_timestamp(field.type):
|
|
245
|
+
# Convert Salesforce ISO datetime strings to timestamps
|
|
246
|
+
datetime_series = df[field_name]
|
|
247
|
+
if isinstance(datetime_series, pd.Series):
|
|
248
|
+
df[field_name] = self._convert_datetime_strings_to_timestamps(
|
|
249
|
+
datetime_series
|
|
250
|
+
)
|
|
212
251
|
|
|
213
252
|
# Replace empty strings with None for non-string fields
|
|
214
253
|
if not pa.types.is_string(field.type):
|
|
215
254
|
df[field_name] = df[field_name].replace("", pd.NA)
|
|
216
255
|
|
|
256
|
+
def _convert_datetime_strings_to_timestamps(self, series: pd.Series) -> pd.Series:
|
|
257
|
+
"""
|
|
258
|
+
Convert Salesforce ISO datetime strings to pandas datetime objects.
|
|
259
|
+
|
|
260
|
+
Salesforce returns datetime in ISO format like '2023-12-25T10:30:00.000+0000'
|
|
261
|
+
or '2023-12-25T10:30:00Z'. This method handles various ISO formats.
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
def parse_sf_datetime(dt_str):
|
|
265
|
+
if pd.isna(dt_str) or dt_str == "" or dt_str is None:
|
|
266
|
+
return pd.NaT
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
# Handle common Salesforce datetime formats
|
|
270
|
+
dt_str = str(dt_str).strip()
|
|
271
|
+
|
|
272
|
+
# Convert +0000 to Z for pandas compatibility
|
|
273
|
+
if dt_str.endswith("+0000"):
|
|
274
|
+
dt_str = dt_str[:-5] + "Z"
|
|
275
|
+
elif dt_str.endswith("+00:00"):
|
|
276
|
+
dt_str = dt_str[:-6] + "Z"
|
|
277
|
+
|
|
278
|
+
# Use pandas to_datetime with UTC parsing
|
|
279
|
+
return pd.to_datetime(dt_str, utc=True)
|
|
280
|
+
|
|
281
|
+
except (ValueError, TypeError) as e:
|
|
282
|
+
logging.warning(f"Failed to parse datetime string '{dt_str}': {e}")
|
|
283
|
+
return pd.NaT
|
|
284
|
+
|
|
285
|
+
# Apply the conversion function to the series
|
|
286
|
+
result = series.apply(parse_sf_datetime)
|
|
287
|
+
if isinstance(result, pd.Series):
|
|
288
|
+
return result
|
|
289
|
+
else:
|
|
290
|
+
# This shouldn't happen, but handle it gracefully
|
|
291
|
+
return pd.Series(result, index=series.index)
|
|
292
|
+
|
|
217
293
|
def close(self) -> None:
|
|
218
294
|
"""Close the parquet writer."""
|
|
219
295
|
if self._writer:
|
|
@@ -228,6 +304,8 @@ async def write_query_to_parquet(
|
|
|
228
304
|
schema: Optional[pa.Schema] = None,
|
|
229
305
|
batch_size: int = 10000,
|
|
230
306
|
convert_empty_to_null: bool = True,
|
|
307
|
+
column_formatter: Optional[Callable[[str], str]] = None,
|
|
308
|
+
convert_datetime_to_timestamp: bool = True,
|
|
231
309
|
) -> None:
|
|
232
310
|
"""
|
|
233
311
|
Convenience function to write a QueryResult to a parquet file (async version).
|
|
@@ -238,18 +316,24 @@ async def write_query_to_parquet(
|
|
|
238
316
|
:param schema: Optional pre-created PyArrow schema (takes precedence over fields_metadata)
|
|
239
317
|
:param batch_size: Number of records to process in each batch
|
|
240
318
|
:param convert_empty_to_null: Convert empty strings to null values
|
|
319
|
+
:param column_formatter: Optional function to format column names
|
|
320
|
+
:param convert_datetime_to_timestamp: If True, datetime fields are converted to timestamps, otherwise stored as strings
|
|
241
321
|
"""
|
|
242
322
|
effective_schema = None
|
|
243
323
|
if schema:
|
|
244
324
|
effective_schema = schema
|
|
245
325
|
elif fields_metadata:
|
|
246
|
-
effective_schema = create_schema_from_metadata(
|
|
326
|
+
effective_schema = create_schema_from_metadata(
|
|
327
|
+
fields_metadata, column_formatter, convert_datetime_to_timestamp
|
|
328
|
+
)
|
|
247
329
|
|
|
248
330
|
writer = ParquetWriter(
|
|
249
331
|
file_path=file_path,
|
|
250
332
|
schema=effective_schema,
|
|
251
333
|
batch_size=batch_size,
|
|
252
334
|
convert_empty_to_null=convert_empty_to_null,
|
|
335
|
+
column_formatter=column_formatter,
|
|
336
|
+
convert_datetime_to_timestamp=convert_datetime_to_timestamp,
|
|
253
337
|
)
|
|
254
338
|
|
|
255
339
|
await writer.write_query_result(query_result)
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
"""aio-salesforce: Async Salesforce library for Python with Bulk API 2.0 support."""
|
|
2
|
-
|
|
3
|
-
__author__ = "Jonas"
|
|
4
|
-
__email__ = "charlie@callaway.cloud"
|
|
5
|
-
|
|
6
|
-
# Client functionality
|
|
7
|
-
from .api.client import SalesforceClient # noqa: F401
|
|
8
|
-
from .api.auth import ( # noqa: F401
|
|
9
|
-
SalesforceAuthError,
|
|
10
|
-
AuthStrategy,
|
|
11
|
-
ClientCredentialsAuth,
|
|
12
|
-
RefreshTokenAuth,
|
|
13
|
-
StaticTokenAuth,
|
|
14
|
-
SfdxCliAuth,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
# Core package only exports client functionality
|
|
18
|
-
# Users import exporter functions directly: from aio_sf.exporter import bulk_query
|
|
19
|
-
|
|
20
|
-
__all__ = [
|
|
21
|
-
"SalesforceClient",
|
|
22
|
-
"SalesforceAuthError",
|
|
23
|
-
"AuthStrategy",
|
|
24
|
-
"ClientCredentialsAuth",
|
|
25
|
-
"RefreshTokenAuth",
|
|
26
|
-
"StaticTokenAuth",
|
|
27
|
-
"SfdxCliAuth",
|
|
28
|
-
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|