aio-sf 0.1.0b11__py3-none-any.whl → 0.1.0b12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aio_sf/exporter/parquet_writer.py +18 -32
- {aio_sf-0.1.0b11.dist-info → aio_sf-0.1.0b12.dist-info}/METADATA +1 -1
- {aio_sf-0.1.0b11.dist-info → aio_sf-0.1.0b12.dist-info}/RECORD +5 -5
- {aio_sf-0.1.0b11.dist-info → aio_sf-0.1.0b12.dist-info}/WHEEL +1 -1
- {aio_sf-0.1.0b11.dist-info → aio_sf-0.1.0b12.dist-info}/licenses/LICENSE +0 -0
|
@@ -227,17 +227,11 @@ class ParquetWriter:
|
|
|
227
227
|
|
|
228
228
|
# Apply type-specific conversions
|
|
229
229
|
if pa.types.is_boolean(field.type):
|
|
230
|
-
# Convert string 'true'/'false' to boolean
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
{"true": True, "false": False,
|
|
230
|
+
# Convert string 'true'/'false' to boolean
|
|
231
|
+
# Use map directly - unmapped values become NaN which is fine for nullable boolean
|
|
232
|
+
df[field_name] = df[field_name].map(
|
|
233
|
+
{"true": True, "false": False, "True": True, "False": False}
|
|
234
234
|
)
|
|
235
|
-
# For values that weren't mapped, keep the original values
|
|
236
|
-
# This avoids the fillna FutureWarning by using boolean indexing instead
|
|
237
|
-
mask = mapped_series.notna()
|
|
238
|
-
result_series = original_series.copy()
|
|
239
|
-
result_series.loc[mask] = mapped_series.loc[mask]
|
|
240
|
-
df[field_name] = result_series
|
|
241
235
|
elif pa.types.is_integer(field.type):
|
|
242
236
|
df[field_name] = pd.to_numeric(df[field_name], errors="coerce").astype(
|
|
243
237
|
"Int64"
|
|
@@ -256,6 +250,12 @@ class ParquetWriter:
|
|
|
256
250
|
date_series = df[field_name]
|
|
257
251
|
if isinstance(date_series, pd.Series):
|
|
258
252
|
df[field_name] = self._convert_date_strings_to_dates(date_series)
|
|
253
|
+
elif pa.types.is_string(field.type):
|
|
254
|
+
# Ensure string columns contain only strings or None
|
|
255
|
+
# This handles edge cases where non-string values might be present
|
|
256
|
+
df[field_name] = df[field_name].apply(
|
|
257
|
+
lambda x: None if pd.isna(x) else str(x)
|
|
258
|
+
)
|
|
259
259
|
|
|
260
260
|
# Replace empty strings with None for non-string fields
|
|
261
261
|
if not pa.types.is_string(field.type):
|
|
@@ -300,33 +300,19 @@ class ParquetWriter:
|
|
|
300
300
|
|
|
301
301
|
def _convert_date_strings_to_dates(self, series: pd.Series) -> pd.Series:
|
|
302
302
|
"""
|
|
303
|
-
Convert Salesforce ISO date strings to pandas
|
|
303
|
+
Convert Salesforce ISO date strings to pandas datetime objects.
|
|
304
304
|
|
|
305
305
|
Salesforce returns date in ISO format like '2025-10-01'.
|
|
306
|
+
PyArrow will handle conversion from datetime64 to date32.
|
|
306
307
|
"""
|
|
308
|
+
# Replace empty strings with None first
|
|
309
|
+
series = series.replace({"": None})
|
|
307
310
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
try:
|
|
313
|
-
# Handle Salesforce date format (YYYY-MM-DD)
|
|
314
|
-
date_str = str(date_str).strip()
|
|
315
|
-
|
|
316
|
-
# Use pandas to_datetime for date parsing, then convert to date
|
|
317
|
-
return pd.to_datetime(date_str, format="%Y-%m-%d").date()
|
|
318
|
-
|
|
319
|
-
except (ValueError, TypeError) as e:
|
|
320
|
-
logging.warning(f"Failed to parse date string '{date_str}': {e}")
|
|
321
|
-
return pd.NaT
|
|
311
|
+
# Use pandas to_datetime for vectorized conversion
|
|
312
|
+
# This returns datetime64[ns] which PyArrow can convert to date32
|
|
313
|
+
result = pd.to_datetime(series, format="%Y-%m-%d", errors="coerce")
|
|
322
314
|
|
|
323
|
-
|
|
324
|
-
result = series.apply(parse_sf_date)
|
|
325
|
-
if isinstance(result, pd.Series):
|
|
326
|
-
return result
|
|
327
|
-
else:
|
|
328
|
-
# This shouldn't happen, but handle it gracefully
|
|
329
|
-
return pd.Series(result, index=series.index)
|
|
315
|
+
return result
|
|
330
316
|
|
|
331
317
|
def close(self) -> None:
|
|
332
318
|
"""Close the parquet writer."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aio-sf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0b12
|
|
4
4
|
Summary: Async Salesforce library for Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/callawaycloud/aio-salesforce
|
|
6
6
|
Project-URL: Repository, https://github.com/callawaycloud/aio-salesforce
|
|
@@ -25,8 +25,8 @@ aio_sf/api/query/client.py,sha256=E9NTFgfAv01SDrOvZqufsM0GkQN4joHRRUp7-WMzBdk,81
|
|
|
25
25
|
aio_sf/api/query/types.py,sha256=Wfk75kJpNDCGpTHonCbzjWvayy8guA3eyZp3hE7nBt0,845
|
|
26
26
|
aio_sf/exporter/__init__.py,sha256=waTegrvw_SvJzREAWD4twSDldSL-HfvhLTLLT1o765o,771
|
|
27
27
|
aio_sf/exporter/bulk_export.py,sha256=2GtiwXChf7dq7dByGLPDhIJJg-yq9eyoE57H4Ekqaus,13169
|
|
28
|
-
aio_sf/exporter/parquet_writer.py,sha256=
|
|
29
|
-
aio_sf-0.1.
|
|
30
|
-
aio_sf-0.1.
|
|
31
|
-
aio_sf-0.1.
|
|
32
|
-
aio_sf-0.1.
|
|
28
|
+
aio_sf/exporter/parquet_writer.py,sha256=11502SuAJNShP4K770taXoY4GV6PV1Fe4eDPKbvS9EE,13994
|
|
29
|
+
aio_sf-0.1.0b12.dist-info/METADATA,sha256=HLPTOlBDAHdyDIIkyS3izksCuGtU62xBSOFyvBm2n1g,8282
|
|
30
|
+
aio_sf-0.1.0b12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
31
|
+
aio_sf-0.1.0b12.dist-info/licenses/LICENSE,sha256=gu0Cbpiqs-vX7YgJJhGI1jH1mHup3dZMrZc-gmpEG60,1071
|
|
32
|
+
aio_sf-0.1.0b12.dist-info/RECORD,,
|
|
File without changes
|