aio-sf 0.1.0b5__py3-none-any.whl → 0.1.0b7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aio_sf/__init__.py +36 -3
- aio_sf/exporter/bulk_export.py +1 -1
- aio_sf/exporter/parquet_writer.py +57 -19
- {aio_sf-0.1.0b5.dist-info → aio_sf-0.1.0b7.dist-info}/METADATA +17 -10
- {aio_sf-0.1.0b5.dist-info → aio_sf-0.1.0b7.dist-info}/RECORD +7 -7
- {aio_sf-0.1.0b5.dist-info → aio_sf-0.1.0b7.dist-info}/WHEEL +0 -0
- {aio_sf-0.1.0b5.dist-info → aio_sf-0.1.0b7.dist-info}/licenses/LICENSE +0 -0
aio_sf/__init__.py
CHANGED
|
@@ -14,9 +14,8 @@ from .api.auth import ( # noqa: F401
|
|
|
14
14
|
SfdxCliAuth,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
# Core package
|
|
18
|
-
#
|
|
19
|
-
|
|
17
|
+
# Core package exports client functionality
|
|
18
|
+
# Exporter functionality is included by default, but gracefully handles missing deps
|
|
20
19
|
__all__ = [
|
|
21
20
|
"SalesforceClient",
|
|
22
21
|
"SalesforceAuthError",
|
|
@@ -26,3 +25,37 @@ __all__ = [
|
|
|
26
25
|
"StaticTokenAuth",
|
|
27
26
|
"SfdxCliAuth",
|
|
28
27
|
]
|
|
28
|
+
|
|
29
|
+
# Try to import exporter functionality if dependencies are available
|
|
30
|
+
try:
|
|
31
|
+
from .exporter import ( # noqa: F401
|
|
32
|
+
bulk_query,
|
|
33
|
+
get_bulk_fields,
|
|
34
|
+
resume_from_locator,
|
|
35
|
+
write_records_to_csv,
|
|
36
|
+
QueryResult,
|
|
37
|
+
batch_records_async,
|
|
38
|
+
ParquetWriter,
|
|
39
|
+
create_schema_from_metadata,
|
|
40
|
+
write_query_to_parquet,
|
|
41
|
+
salesforce_to_arrow_type,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__.extend(
|
|
45
|
+
[
|
|
46
|
+
"bulk_query",
|
|
47
|
+
"get_bulk_fields",
|
|
48
|
+
"resume_from_locator",
|
|
49
|
+
"write_records_to_csv",
|
|
50
|
+
"QueryResult",
|
|
51
|
+
"batch_records_async",
|
|
52
|
+
"ParquetWriter",
|
|
53
|
+
"create_schema_from_metadata",
|
|
54
|
+
"write_query_to_parquet",
|
|
55
|
+
"salesforce_to_arrow_type",
|
|
56
|
+
]
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
except ImportError:
|
|
60
|
+
# Exporter dependencies not available - this is fine for core-only installs
|
|
61
|
+
pass
|
aio_sf/exporter/bulk_export.py
CHANGED
|
@@ -320,7 +320,7 @@ async def get_bulk_fields(fields_metadata: List[FieldInfo]) -> List[FieldInfo]:
|
|
|
320
320
|
queryable_fields = [
|
|
321
321
|
field
|
|
322
322
|
for field in fields_metadata
|
|
323
|
-
if field.get("type") not in ["address", "location"]
|
|
323
|
+
if field.get("type") not in ["address", "location", "base64"]
|
|
324
324
|
]
|
|
325
325
|
|
|
326
326
|
return queryable_fields
|
|
@@ -16,24 +16,20 @@ from .bulk_export import QueryResult, batch_records_async
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def salesforce_to_arrow_type(
|
|
19
|
-
sf_type: str,
|
|
19
|
+
sf_type: str, type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None
|
|
20
20
|
) -> pa.DataType:
|
|
21
21
|
"""Convert Salesforce data types to Arrow data types.
|
|
22
22
|
|
|
23
23
|
:param sf_type: Salesforce field type
|
|
24
|
-
:param
|
|
24
|
+
:param type_mapping_overrides: Optional dict to override default type mappings
|
|
25
25
|
"""
|
|
26
|
-
|
|
26
|
+
default_type_mapping = {
|
|
27
27
|
"string": pa.string(),
|
|
28
28
|
"boolean": pa.bool_(),
|
|
29
29
|
"int": pa.int64(),
|
|
30
30
|
"double": pa.float64(),
|
|
31
|
-
"date": pa.
|
|
32
|
-
"datetime": (
|
|
33
|
-
pa.timestamp("us", tz="UTC")
|
|
34
|
-
if convert_datetime_to_timestamp
|
|
35
|
-
else pa.string()
|
|
36
|
-
),
|
|
31
|
+
"date": pa.date32(), # Store as proper date type
|
|
32
|
+
"datetime": pa.timestamp("us", tz="UTC"),
|
|
37
33
|
"currency": pa.float64(),
|
|
38
34
|
"reference": pa.string(),
|
|
39
35
|
"picklist": pa.string(),
|
|
@@ -48,20 +44,27 @@ def salesforce_to_arrow_type(
|
|
|
48
44
|
"base64": pa.string(),
|
|
49
45
|
"anyType": pa.string(),
|
|
50
46
|
}
|
|
47
|
+
|
|
48
|
+
# Apply overrides if provided
|
|
49
|
+
if type_mapping_overrides:
|
|
50
|
+
type_mapping = {**default_type_mapping, **type_mapping_overrides}
|
|
51
|
+
else:
|
|
52
|
+
type_mapping = default_type_mapping
|
|
53
|
+
|
|
51
54
|
return type_mapping.get(sf_type.lower(), pa.string())
|
|
52
55
|
|
|
53
56
|
|
|
54
57
|
def create_schema_from_metadata(
|
|
55
58
|
fields_metadata: List[FieldInfo],
|
|
56
59
|
column_formatter: Optional[Callable[[str], str]] = None,
|
|
57
|
-
|
|
60
|
+
type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None,
|
|
58
61
|
) -> pa.Schema:
|
|
59
62
|
"""
|
|
60
63
|
Create a PyArrow schema from Salesforce field metadata.
|
|
61
64
|
|
|
62
65
|
:param fields_metadata: List of field metadata dictionaries from Salesforce
|
|
63
66
|
:param column_formatter: Optional function to format column names
|
|
64
|
-
:param
|
|
67
|
+
:param type_mapping_overrides: Optional dict to override default type mappings
|
|
65
68
|
:returns: PyArrow schema
|
|
66
69
|
"""
|
|
67
70
|
arrow_fields = []
|
|
@@ -70,7 +73,7 @@ def create_schema_from_metadata(
|
|
|
70
73
|
if column_formatter:
|
|
71
74
|
field_name = column_formatter(field_name)
|
|
72
75
|
sf_type = field.get("type", "string")
|
|
73
|
-
arrow_type = salesforce_to_arrow_type(sf_type,
|
|
76
|
+
arrow_type = salesforce_to_arrow_type(sf_type, type_mapping_overrides)
|
|
74
77
|
# All fields are nullable since Salesforce can return empty values
|
|
75
78
|
arrow_fields.append(pa.field(field_name, arrow_type, nullable=True))
|
|
76
79
|
|
|
@@ -90,7 +93,7 @@ class ParquetWriter:
|
|
|
90
93
|
batch_size: int = 10000,
|
|
91
94
|
convert_empty_to_null: bool = True,
|
|
92
95
|
column_formatter: Optional[Callable[[str], str]] = None,
|
|
93
|
-
|
|
96
|
+
type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None,
|
|
94
97
|
):
|
|
95
98
|
"""
|
|
96
99
|
Initialize ParquetWriter.
|
|
@@ -100,14 +103,14 @@ class ParquetWriter:
|
|
|
100
103
|
:param batch_size: Number of records to process in each batch
|
|
101
104
|
:param convert_empty_to_null: Convert empty strings to null values
|
|
102
105
|
:param column_formatter: Optional function to format column names. If None, no formatting is applied
|
|
103
|
-
:param
|
|
106
|
+
:param type_mapping_overrides: Optional dict to override default type mappings
|
|
104
107
|
"""
|
|
105
108
|
self.file_path = file_path
|
|
106
109
|
self.schema = schema
|
|
107
110
|
self.batch_size = batch_size
|
|
108
111
|
self.convert_empty_to_null = convert_empty_to_null
|
|
109
112
|
self.column_formatter = column_formatter
|
|
110
|
-
self.
|
|
113
|
+
self.type_mapping_overrides = type_mapping_overrides
|
|
111
114
|
self._writer = None
|
|
112
115
|
self._schema_finalized = False
|
|
113
116
|
|
|
@@ -248,6 +251,11 @@ class ParquetWriter:
|
|
|
248
251
|
df[field_name] = self._convert_datetime_strings_to_timestamps(
|
|
249
252
|
datetime_series
|
|
250
253
|
)
|
|
254
|
+
elif pa.types.is_date(field.type):
|
|
255
|
+
# Convert Salesforce ISO date strings to dates
|
|
256
|
+
date_series = df[field_name]
|
|
257
|
+
if isinstance(date_series, pd.Series):
|
|
258
|
+
df[field_name] = self._convert_date_strings_to_dates(date_series)
|
|
251
259
|
|
|
252
260
|
# Replace empty strings with None for non-string fields
|
|
253
261
|
if not pa.types.is_string(field.type):
|
|
@@ -290,6 +298,36 @@ class ParquetWriter:
|
|
|
290
298
|
# This shouldn't happen, but handle it gracefully
|
|
291
299
|
return pd.Series(result, index=series.index)
|
|
292
300
|
|
|
301
|
+
def _convert_date_strings_to_dates(self, series: pd.Series) -> pd.Series:
|
|
302
|
+
"""
|
|
303
|
+
Convert Salesforce ISO date strings to pandas date objects.
|
|
304
|
+
|
|
305
|
+
Salesforce returns date in ISO format like '2025-10-01'.
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
def parse_sf_date(date_str):
|
|
309
|
+
if pd.isna(date_str) or date_str == "" or date_str is None:
|
|
310
|
+
return pd.NaT
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
# Handle Salesforce date format (YYYY-MM-DD)
|
|
314
|
+
date_str = str(date_str).strip()
|
|
315
|
+
|
|
316
|
+
# Use pandas to_datetime for date parsing, then convert to date
|
|
317
|
+
return pd.to_datetime(date_str, format="%Y-%m-%d").date()
|
|
318
|
+
|
|
319
|
+
except (ValueError, TypeError) as e:
|
|
320
|
+
logging.warning(f"Failed to parse date string '{date_str}': {e}")
|
|
321
|
+
return pd.NaT
|
|
322
|
+
|
|
323
|
+
# Apply the conversion function to the series
|
|
324
|
+
result = series.apply(parse_sf_date)
|
|
325
|
+
if isinstance(result, pd.Series):
|
|
326
|
+
return result
|
|
327
|
+
else:
|
|
328
|
+
# This shouldn't happen, but handle it gracefully
|
|
329
|
+
return pd.Series(result, index=series.index)
|
|
330
|
+
|
|
293
331
|
def close(self) -> None:
|
|
294
332
|
"""Close the parquet writer."""
|
|
295
333
|
if self._writer:
|
|
@@ -305,7 +343,7 @@ async def write_query_to_parquet(
|
|
|
305
343
|
batch_size: int = 10000,
|
|
306
344
|
convert_empty_to_null: bool = True,
|
|
307
345
|
column_formatter: Optional[Callable[[str], str]] = None,
|
|
308
|
-
|
|
346
|
+
type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None,
|
|
309
347
|
) -> None:
|
|
310
348
|
"""
|
|
311
349
|
Convenience function to write a QueryResult to a parquet file (async version).
|
|
@@ -317,14 +355,14 @@ async def write_query_to_parquet(
|
|
|
317
355
|
:param batch_size: Number of records to process in each batch
|
|
318
356
|
:param convert_empty_to_null: Convert empty strings to null values
|
|
319
357
|
:param column_formatter: Optional function to format column names
|
|
320
|
-
:param
|
|
358
|
+
:param type_mapping_overrides: Optional dict to override default type mappings
|
|
321
359
|
"""
|
|
322
360
|
effective_schema = None
|
|
323
361
|
if schema:
|
|
324
362
|
effective_schema = schema
|
|
325
363
|
elif fields_metadata:
|
|
326
364
|
effective_schema = create_schema_from_metadata(
|
|
327
|
-
fields_metadata, column_formatter,
|
|
365
|
+
fields_metadata, column_formatter, type_mapping_overrides
|
|
328
366
|
)
|
|
329
367
|
|
|
330
368
|
writer = ParquetWriter(
|
|
@@ -333,7 +371,7 @@ async def write_query_to_parquet(
|
|
|
333
371
|
batch_size=batch_size,
|
|
334
372
|
convert_empty_to_null=convert_empty_to_null,
|
|
335
373
|
column_formatter=column_formatter,
|
|
336
|
-
|
|
374
|
+
type_mapping_overrides=type_mapping_overrides,
|
|
337
375
|
)
|
|
338
376
|
|
|
339
377
|
await writer.write_query_result(query_result)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aio-sf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0b7
|
|
4
4
|
Summary: Async Salesforce library for Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/callawaycloud/aio-salesforce
|
|
6
6
|
Project-URL: Repository, https://github.com/callawaycloud/aio-salesforce
|
|
@@ -35,13 +35,16 @@ Classifier: Programming Language :: Python :: 3
|
|
|
35
35
|
Classifier: Programming Language :: Python :: 3.11
|
|
36
36
|
Classifier: Programming Language :: Python :: 3.12
|
|
37
37
|
Requires-Python: >=3.11
|
|
38
|
+
Requires-Dist: boto3>=1.34.0
|
|
38
39
|
Requires-Dist: httpx>=0.25.0
|
|
40
|
+
Requires-Dist: pandas>=2.0.0
|
|
41
|
+
Requires-Dist: pyarrow>=10.0.0
|
|
39
42
|
Requires-Dist: pydantic>=2.0.0
|
|
40
43
|
Requires-Dist: python-dotenv>=1.0.0
|
|
41
|
-
Provides-Extra:
|
|
42
|
-
Requires-Dist:
|
|
43
|
-
Requires-Dist:
|
|
44
|
-
Requires-Dist:
|
|
44
|
+
Provides-Extra: core
|
|
45
|
+
Requires-Dist: httpx>=0.25.0; extra == 'core'
|
|
46
|
+
Requires-Dist: pydantic>=2.0.0; extra == 'core'
|
|
47
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == 'core'
|
|
45
48
|
Provides-Extra: dev
|
|
46
49
|
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
47
50
|
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
@@ -88,16 +91,16 @@ An async Salesforce library for Python.
|
|
|
88
91
|
|
|
89
92
|
## Installation
|
|
90
93
|
|
|
91
|
-
###
|
|
94
|
+
### Full Package (Default - Includes Everything)
|
|
92
95
|
```bash
|
|
93
96
|
uv add aio-sf
|
|
94
97
|
# or: pip install aio-sf
|
|
95
98
|
```
|
|
96
99
|
|
|
97
|
-
###
|
|
100
|
+
### Core Only (Minimal Dependencies)
|
|
98
101
|
```bash
|
|
99
|
-
uv add "aio-sf[
|
|
100
|
-
# or: pip install "aio-sf[
|
|
102
|
+
uv add "aio-sf[core]"
|
|
103
|
+
# or: pip install "aio-sf[core]"
|
|
101
104
|
```
|
|
102
105
|
|
|
103
106
|
## Quick Start
|
|
@@ -157,7 +160,11 @@ The Exporter library contains a streamlined and "opinionated" way to export data
|
|
|
157
160
|
|
|
158
161
|
### 3. Export to Parquet
|
|
159
162
|
```python
|
|
160
|
-
|
|
163
|
+
# With full installation (default), you can import directly from aio_sf
|
|
164
|
+
from aio_sf import SalesforceClient, ClientCredentialsAuth, bulk_query, write_query_to_parquet
|
|
165
|
+
|
|
166
|
+
# Or import from the exporter module (both work)
|
|
167
|
+
# from aio_sf.exporter import bulk_query, write_query_to_parquet
|
|
161
168
|
|
|
162
169
|
async def main():
|
|
163
170
|
# ... authentication code from above ...
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
aio_sf/__init__.py,sha256=
|
|
1
|
+
aio_sf/__init__.py,sha256=jWZkLTyHyOyHDJBfZORC2KcdtTBbI9pyVoSRxMbcJ1U,1603
|
|
2
2
|
aio_sf/api/__init__.py,sha256=gLa2cbEvXPMLdcZVV7vhUQ-I9yJJitSaRN9mLn3Qhas,2299
|
|
3
3
|
aio_sf/api/client.py,sha256=2epNf31HN1WpAVRSV0MtYthFkXlYiwwgMJ1Qms8-LKc,9750
|
|
4
4
|
aio_sf/api/types.py,sha256=hpdCfzOzq32ESStQAewJc4j4NXQluENKjJEhUeGEI1Y,7738
|
|
@@ -21,9 +21,9 @@ aio_sf/api/query/__init__.py,sha256=OMh9g9PAPBFyQtUJc4aua1nKAiiIFYouZfaa5Zw4yZU,
|
|
|
21
21
|
aio_sf/api/query/client.py,sha256=E9NTFgfAv01SDrOvZqufsM0GkQN4joHRRUp7-WMzBdk,8108
|
|
22
22
|
aio_sf/api/query/types.py,sha256=Wfk75kJpNDCGpTHonCbzjWvayy8guA3eyZp3hE7nBt0,845
|
|
23
23
|
aio_sf/exporter/__init__.py,sha256=waTegrvw_SvJzREAWD4twSDldSL-HfvhLTLLT1o765o,771
|
|
24
|
-
aio_sf/exporter/bulk_export.py,sha256=
|
|
25
|
-
aio_sf/exporter/parquet_writer.py,sha256=
|
|
26
|
-
aio_sf-0.1.
|
|
27
|
-
aio_sf-0.1.
|
|
28
|
-
aio_sf-0.1.
|
|
29
|
-
aio_sf-0.1.
|
|
24
|
+
aio_sf/exporter/bulk_export.py,sha256=2GtiwXChf7dq7dByGLPDhIJJg-yq9eyoE57H4Ekqaus,13169
|
|
25
|
+
aio_sf/exporter/parquet_writer.py,sha256=jGiLooxyaqciSDUbXj5F_4uWoR_YrQaB-PrDfRuXR3Y,14495
|
|
26
|
+
aio_sf-0.1.0b7.dist-info/METADATA,sha256=EB5BfuZ2Td8dNAJThB3ed1VuNHH0LbwupD32sVIzKjA,6780
|
|
27
|
+
aio_sf-0.1.0b7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
28
|
+
aio_sf-0.1.0b7.dist-info/licenses/LICENSE,sha256=gu0Cbpiqs-vX7YgJJhGI1jH1mHup3dZMrZc-gmpEG60,1071
|
|
29
|
+
aio_sf-0.1.0b7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|