aio-sf 0.1.0b5__py3-none-any.whl → 0.1.0b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aio_sf/__init__.py CHANGED
@@ -14,9 +14,8 @@ from .api.auth import ( # noqa: F401
14
14
  SfdxCliAuth,
15
15
  )
16
16
 
17
- # Core package only exports client functionality
18
- # Users import exporter functions directly: from aio_sf.exporter import bulk_query
19
-
17
+ # Core package exports client functionality
18
+ # Exporter functionality is included by default, but gracefully handles missing deps
20
19
  __all__ = [
21
20
  "SalesforceClient",
22
21
  "SalesforceAuthError",
@@ -26,3 +25,37 @@ __all__ = [
26
25
  "StaticTokenAuth",
27
26
  "SfdxCliAuth",
28
27
  ]
28
+
29
+ # Try to import exporter functionality if dependencies are available
30
+ try:
31
+ from .exporter import ( # noqa: F401
32
+ bulk_query,
33
+ get_bulk_fields,
34
+ resume_from_locator,
35
+ write_records_to_csv,
36
+ QueryResult,
37
+ batch_records_async,
38
+ ParquetWriter,
39
+ create_schema_from_metadata,
40
+ write_query_to_parquet,
41
+ salesforce_to_arrow_type,
42
+ )
43
+
44
+ __all__.extend(
45
+ [
46
+ "bulk_query",
47
+ "get_bulk_fields",
48
+ "resume_from_locator",
49
+ "write_records_to_csv",
50
+ "QueryResult",
51
+ "batch_records_async",
52
+ "ParquetWriter",
53
+ "create_schema_from_metadata",
54
+ "write_query_to_parquet",
55
+ "salesforce_to_arrow_type",
56
+ ]
57
+ )
58
+
59
+ except ImportError:
60
+ # Exporter dependencies not available - this is fine for core-only installs
61
+ pass
@@ -320,7 +320,7 @@ async def get_bulk_fields(fields_metadata: List[FieldInfo]) -> List[FieldInfo]:
320
320
  queryable_fields = [
321
321
  field
322
322
  for field in fields_metadata
323
- if field.get("type") not in ["address", "location"]
323
+ if field.get("type") not in ["address", "location", "base64"]
324
324
  ]
325
325
 
326
326
  return queryable_fields
@@ -16,24 +16,20 @@ from .bulk_export import QueryResult, batch_records_async
16
16
 
17
17
 
18
18
  def salesforce_to_arrow_type(
19
- sf_type: str, convert_datetime_to_timestamp: bool = True
19
+ sf_type: str, type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None
20
20
  ) -> pa.DataType:
21
21
  """Convert Salesforce data types to Arrow data types.
22
22
 
23
23
  :param sf_type: Salesforce field type
24
- :param convert_datetime_to_timestamp: If True, datetime fields use timestamp type, otherwise string
24
+ :param type_mapping_overrides: Optional dict to override default type mappings
25
25
  """
26
- type_mapping = {
26
+ default_type_mapping = {
27
27
  "string": pa.string(),
28
28
  "boolean": pa.bool_(),
29
29
  "int": pa.int64(),
30
30
  "double": pa.float64(),
31
- "date": pa.string(), # Always store as string since SF returns ISO format
32
- "datetime": (
33
- pa.timestamp("us", tz="UTC")
34
- if convert_datetime_to_timestamp
35
- else pa.string()
36
- ),
31
+ "date": pa.date32(), # Store as proper date type
32
+ "datetime": pa.timestamp("us", tz="UTC"),
37
33
  "currency": pa.float64(),
38
34
  "reference": pa.string(),
39
35
  "picklist": pa.string(),
@@ -48,20 +44,27 @@ def salesforce_to_arrow_type(
48
44
  "base64": pa.string(),
49
45
  "anyType": pa.string(),
50
46
  }
47
+
48
+ # Apply overrides if provided
49
+ if type_mapping_overrides:
50
+ type_mapping = {**default_type_mapping, **type_mapping_overrides}
51
+ else:
52
+ type_mapping = default_type_mapping
53
+
51
54
  return type_mapping.get(sf_type.lower(), pa.string())
52
55
 
53
56
 
54
57
  def create_schema_from_metadata(
55
58
  fields_metadata: List[FieldInfo],
56
59
  column_formatter: Optional[Callable[[str], str]] = None,
57
- convert_datetime_to_timestamp: bool = True,
60
+ type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None,
58
61
  ) -> pa.Schema:
59
62
  """
60
63
  Create a PyArrow schema from Salesforce field metadata.
61
64
 
62
65
  :param fields_metadata: List of field metadata dictionaries from Salesforce
63
66
  :param column_formatter: Optional function to format column names
64
- :param convert_datetime_to_timestamp: If True, datetime fields use timestamp type, otherwise string
67
+ :param type_mapping_overrides: Optional dict to override default type mappings
65
68
  :returns: PyArrow schema
66
69
  """
67
70
  arrow_fields = []
@@ -70,7 +73,7 @@ def create_schema_from_metadata(
70
73
  if column_formatter:
71
74
  field_name = column_formatter(field_name)
72
75
  sf_type = field.get("type", "string")
73
- arrow_type = salesforce_to_arrow_type(sf_type, convert_datetime_to_timestamp)
76
+ arrow_type = salesforce_to_arrow_type(sf_type, type_mapping_overrides)
74
77
  # All fields are nullable since Salesforce can return empty values
75
78
  arrow_fields.append(pa.field(field_name, arrow_type, nullable=True))
76
79
 
@@ -90,7 +93,7 @@ class ParquetWriter:
90
93
  batch_size: int = 10000,
91
94
  convert_empty_to_null: bool = True,
92
95
  column_formatter: Optional[Callable[[str], str]] = None,
93
- convert_datetime_to_timestamp: bool = True,
96
+ type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None,
94
97
  ):
95
98
  """
96
99
  Initialize ParquetWriter.
@@ -100,14 +103,14 @@ class ParquetWriter:
100
103
  :param batch_size: Number of records to process in each batch
101
104
  :param convert_empty_to_null: Convert empty strings to null values
102
105
  :param column_formatter: Optional function to format column names. If None, no formatting is applied
103
- :param convert_datetime_to_timestamp: If True, datetime fields are converted to timestamps, otherwise stored as strings
106
+ :param type_mapping_overrides: Optional dict to override default type mappings
104
107
  """
105
108
  self.file_path = file_path
106
109
  self.schema = schema
107
110
  self.batch_size = batch_size
108
111
  self.convert_empty_to_null = convert_empty_to_null
109
112
  self.column_formatter = column_formatter
110
- self.convert_datetime_to_timestamp = convert_datetime_to_timestamp
113
+ self.type_mapping_overrides = type_mapping_overrides
111
114
  self._writer = None
112
115
  self._schema_finalized = False
113
116
 
@@ -248,6 +251,11 @@ class ParquetWriter:
248
251
  df[field_name] = self._convert_datetime_strings_to_timestamps(
249
252
  datetime_series
250
253
  )
254
+ elif pa.types.is_date(field.type):
255
+ # Convert Salesforce ISO date strings to dates
256
+ date_series = df[field_name]
257
+ if isinstance(date_series, pd.Series):
258
+ df[field_name] = self._convert_date_strings_to_dates(date_series)
251
259
 
252
260
  # Replace empty strings with None for non-string fields
253
261
  if not pa.types.is_string(field.type):
@@ -290,6 +298,36 @@ class ParquetWriter:
290
298
  # This shouldn't happen, but handle it gracefully
291
299
  return pd.Series(result, index=series.index)
292
300
 
301
+ def _convert_date_strings_to_dates(self, series: pd.Series) -> pd.Series:
302
+ """
303
+ Convert Salesforce ISO date strings to pandas date objects.
304
+
305
+ Salesforce returns date in ISO format like '2025-10-01'.
306
+ """
307
+
308
+ def parse_sf_date(date_str):
309
+ if pd.isna(date_str) or date_str == "" or date_str is None:
310
+ return pd.NaT
311
+
312
+ try:
313
+ # Handle Salesforce date format (YYYY-MM-DD)
314
+ date_str = str(date_str).strip()
315
+
316
+ # Use pandas to_datetime for date parsing, then convert to date
317
+ return pd.to_datetime(date_str, format="%Y-%m-%d").date()
318
+
319
+ except (ValueError, TypeError) as e:
320
+ logging.warning(f"Failed to parse date string '{date_str}': {e}")
321
+ return pd.NaT
322
+
323
+ # Apply the conversion function to the series
324
+ result = series.apply(parse_sf_date)
325
+ if isinstance(result, pd.Series):
326
+ return result
327
+ else:
328
+ # This shouldn't happen, but handle it gracefully
329
+ return pd.Series(result, index=series.index)
330
+
293
331
  def close(self) -> None:
294
332
  """Close the parquet writer."""
295
333
  if self._writer:
@@ -305,7 +343,7 @@ async def write_query_to_parquet(
305
343
  batch_size: int = 10000,
306
344
  convert_empty_to_null: bool = True,
307
345
  column_formatter: Optional[Callable[[str], str]] = None,
308
- convert_datetime_to_timestamp: bool = True,
346
+ type_mapping_overrides: Optional[Dict[str, pa.DataType]] = None,
309
347
  ) -> None:
310
348
  """
311
349
  Convenience function to write a QueryResult to a parquet file (async version).
@@ -317,14 +355,14 @@ async def write_query_to_parquet(
317
355
  :param batch_size: Number of records to process in each batch
318
356
  :param convert_empty_to_null: Convert empty strings to null values
319
357
  :param column_formatter: Optional function to format column names
320
- :param convert_datetime_to_timestamp: If True, datetime fields are converted to timestamps, otherwise stored as strings
358
+ :param type_mapping_overrides: Optional dict to override default type mappings
321
359
  """
322
360
  effective_schema = None
323
361
  if schema:
324
362
  effective_schema = schema
325
363
  elif fields_metadata:
326
364
  effective_schema = create_schema_from_metadata(
327
- fields_metadata, column_formatter, convert_datetime_to_timestamp
365
+ fields_metadata, column_formatter, type_mapping_overrides
328
366
  )
329
367
 
330
368
  writer = ParquetWriter(
@@ -333,7 +371,7 @@ async def write_query_to_parquet(
333
371
  batch_size=batch_size,
334
372
  convert_empty_to_null=convert_empty_to_null,
335
373
  column_formatter=column_formatter,
336
- convert_datetime_to_timestamp=convert_datetime_to_timestamp,
374
+ type_mapping_overrides=type_mapping_overrides,
337
375
  )
338
376
 
339
377
  await writer.write_query_result(query_result)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aio-sf
3
- Version: 0.1.0b5
3
+ Version: 0.1.0b7
4
4
  Summary: Async Salesforce library for Python
5
5
  Project-URL: Homepage, https://github.com/callawaycloud/aio-salesforce
6
6
  Project-URL: Repository, https://github.com/callawaycloud/aio-salesforce
@@ -35,13 +35,16 @@ Classifier: Programming Language :: Python :: 3
35
35
  Classifier: Programming Language :: Python :: 3.11
36
36
  Classifier: Programming Language :: Python :: 3.12
37
37
  Requires-Python: >=3.11
38
+ Requires-Dist: boto3>=1.34.0
38
39
  Requires-Dist: httpx>=0.25.0
40
+ Requires-Dist: pandas>=2.0.0
41
+ Requires-Dist: pyarrow>=10.0.0
39
42
  Requires-Dist: pydantic>=2.0.0
40
43
  Requires-Dist: python-dotenv>=1.0.0
41
- Provides-Extra: all
42
- Requires-Dist: boto3>=1.34.0; extra == 'all'
43
- Requires-Dist: pandas>=2.0.0; extra == 'all'
44
- Requires-Dist: pyarrow>=10.0.0; extra == 'all'
44
+ Provides-Extra: core
45
+ Requires-Dist: httpx>=0.25.0; extra == 'core'
46
+ Requires-Dist: pydantic>=2.0.0; extra == 'core'
47
+ Requires-Dist: python-dotenv>=1.0.0; extra == 'core'
45
48
  Provides-Extra: dev
46
49
  Requires-Dist: black>=23.0.0; extra == 'dev'
47
50
  Requires-Dist: mypy>=1.5.0; extra == 'dev'
@@ -88,16 +91,16 @@ An async Salesforce library for Python.
88
91
 
89
92
  ## Installation
90
93
 
91
- ### Core (Connection Only)
94
+ ### Full Package (Default - Includes Everything)
92
95
  ```bash
93
96
  uv add aio-sf
94
97
  # or: pip install aio-sf
95
98
  ```
96
99
 
97
- ### With Export Capabilities
100
+ ### Core Only (Minimal Dependencies)
98
101
  ```bash
99
- uv add "aio-sf[exporter]"
100
- # or: pip install "aio-sf[exporter]"
102
+ uv add "aio-sf[core]"
103
+ # or: pip install "aio-sf[core]"
101
104
  ```
102
105
 
103
106
  ## Quick Start
@@ -157,7 +160,11 @@ The Exporter library contains a streamlined and "opinionated" way to export data
157
160
 
158
161
  ### 3. Export to Parquet
159
162
  ```python
160
- from aio_sf.exporter import bulk_query, write_query_to_parquet
163
+ # With full installation (default), you can import directly from aio_sf
164
+ from aio_sf import SalesforceClient, ClientCredentialsAuth, bulk_query, write_query_to_parquet
165
+
166
+ # Or import from the exporter module (both work)
167
+ # from aio_sf.exporter import bulk_query, write_query_to_parquet
161
168
 
162
169
  async def main():
163
170
  # ... authentication code from above ...
@@ -1,4 +1,4 @@
1
- aio_sf/__init__.py,sha256=p1WqfaXQ0ldxDkLCK2hc07rMeq9ywenUYUBQc2SfQOE,707
1
+ aio_sf/__init__.py,sha256=jWZkLTyHyOyHDJBfZORC2KcdtTBbI9pyVoSRxMbcJ1U,1603
2
2
  aio_sf/api/__init__.py,sha256=gLa2cbEvXPMLdcZVV7vhUQ-I9yJJitSaRN9mLn3Qhas,2299
3
3
  aio_sf/api/client.py,sha256=2epNf31HN1WpAVRSV0MtYthFkXlYiwwgMJ1Qms8-LKc,9750
4
4
  aio_sf/api/types.py,sha256=hpdCfzOzq32ESStQAewJc4j4NXQluENKjJEhUeGEI1Y,7738
@@ -21,9 +21,9 @@ aio_sf/api/query/__init__.py,sha256=OMh9g9PAPBFyQtUJc4aua1nKAiiIFYouZfaa5Zw4yZU,
21
21
  aio_sf/api/query/client.py,sha256=E9NTFgfAv01SDrOvZqufsM0GkQN4joHRRUp7-WMzBdk,8108
22
22
  aio_sf/api/query/types.py,sha256=Wfk75kJpNDCGpTHonCbzjWvayy8guA3eyZp3hE7nBt0,845
23
23
  aio_sf/exporter/__init__.py,sha256=waTegrvw_SvJzREAWD4twSDldSL-HfvhLTLLT1o765o,771
24
- aio_sf/exporter/bulk_export.py,sha256=JmlVwDcXqvBYYY9o7DjdfcJ9jIKiVAPrcnk5Bz4sLoU,13159
25
- aio_sf/exporter/parquet_writer.py,sha256=EysoeFI8TptcAfhor8FeJsJO0eNAuw9i250JdlHPoug,13078
26
- aio_sf-0.1.0b5.dist-info/METADATA,sha256=GL__z4LmVTaxTO_soOtE0JwR95tl2hxyDzMAMj7MkNk,6439
27
- aio_sf-0.1.0b5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
- aio_sf-0.1.0b5.dist-info/licenses/LICENSE,sha256=gu0Cbpiqs-vX7YgJJhGI1jH1mHup3dZMrZc-gmpEG60,1071
29
- aio_sf-0.1.0b5.dist-info/RECORD,,
24
+ aio_sf/exporter/bulk_export.py,sha256=2GtiwXChf7dq7dByGLPDhIJJg-yq9eyoE57H4Ekqaus,13169
25
+ aio_sf/exporter/parquet_writer.py,sha256=jGiLooxyaqciSDUbXj5F_4uWoR_YrQaB-PrDfRuXR3Y,14495
26
+ aio_sf-0.1.0b7.dist-info/METADATA,sha256=EB5BfuZ2Td8dNAJThB3ed1VuNHH0LbwupD32sVIzKjA,6780
27
+ aio_sf-0.1.0b7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
+ aio_sf-0.1.0b7.dist-info/licenses/LICENSE,sha256=gu0Cbpiqs-vX7YgJJhGI1jH1mHup3dZMrZc-gmpEG60,1071
29
+ aio_sf-0.1.0b7.dist-info/RECORD,,