aio-sf 0.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aio_salesforce/__init__.py +27 -0
- aio_salesforce/api/README.md +107 -0
- aio_salesforce/api/__init__.py +65 -0
- aio_salesforce/api/bulk_v2/__init__.py +21 -0
- aio_salesforce/api/bulk_v2/client.py +200 -0
- aio_salesforce/api/bulk_v2/types.py +71 -0
- aio_salesforce/api/describe/__init__.py +31 -0
- aio_salesforce/api/describe/client.py +94 -0
- aio_salesforce/api/describe/types.py +303 -0
- aio_salesforce/api/query/__init__.py +18 -0
- aio_salesforce/api/query/client.py +216 -0
- aio_salesforce/api/query/types.py +38 -0
- aio_salesforce/api/types.py +303 -0
- aio_salesforce/connection.py +511 -0
- aio_salesforce/exporter/__init__.py +38 -0
- aio_salesforce/exporter/bulk_export.py +397 -0
- aio_salesforce/exporter/parquet_writer.py +296 -0
- aio_salesforce/exporter/parquet_writer.py.backup +326 -0
- aio_sf-0.1.0b1.dist-info/METADATA +198 -0
- aio_sf-0.1.0b1.dist-info/RECORD +22 -0
- aio_sf-0.1.0b1.dist-info/WHEEL +4 -0
- aio_sf-0.1.0b1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parquet writer module for converting Salesforce QueryResult to Parquet format.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
import pyarrow as pa
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import pyarrow.parquet as pq
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
import pyarrow as pa
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import pyarrow.parquet as pq
|
|
20
|
+
|
|
21
|
+
_DEPENDENCIES_AVAILABLE = True
|
|
22
|
+
except ImportError as e:
|
|
23
|
+
_DEPENDENCIES_AVAILABLE = False
|
|
24
|
+
_MISSING_DEPS = str(e)
|
|
25
|
+
# Create dummy objects to prevent import errors
|
|
26
|
+
pa = None
|
|
27
|
+
pd = None
|
|
28
|
+
pq = None
|
|
29
|
+
|
|
30
|
+
from ..api.types import FieldInfo
|
|
31
|
+
|
|
32
|
+
from .bulk_export import QueryResult, batch_records, batch_records_async
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _check_dependencies():
|
|
36
|
+
"""Check if required dependencies are available and raise helpful error if not."""
|
|
37
|
+
if not _DEPENDENCIES_AVAILABLE:
|
|
38
|
+
raise ImportError(
|
|
39
|
+
f"Missing required dependencies for exporter functionality: {_MISSING_DEPS}. "
|
|
40
|
+
"Install with: pip install 'aio-salesforce[exporter]' or uv add 'aio-salesforce[exporter]'"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def salesforce_to_arrow_type(sf_type: str) -> pa.DataType:
|
|
45
|
+
"""Convert Salesforce data types to Arrow data types."""
|
|
46
|
+
_check_dependencies()
|
|
47
|
+
type_mapping = {
|
|
48
|
+
"string": pa.string(),
|
|
49
|
+
"boolean": pa.bool_(),
|
|
50
|
+
"int": pa.int64(),
|
|
51
|
+
"double": pa.float64(),
|
|
52
|
+
"date": pa.string(), # Store as string since SF returns ISO format
|
|
53
|
+
"datetime": pa.string(), # Store as string since SF returns ISO format
|
|
54
|
+
"currency": pa.float64(),
|
|
55
|
+
"reference": pa.string(),
|
|
56
|
+
"picklist": pa.string(),
|
|
57
|
+
"multipicklist": pa.string(),
|
|
58
|
+
"textarea": pa.string(),
|
|
59
|
+
"phone": pa.string(),
|
|
60
|
+
"url": pa.string(),
|
|
61
|
+
"email": pa.string(),
|
|
62
|
+
"combobox": pa.string(),
|
|
63
|
+
"percent": pa.float64(),
|
|
64
|
+
"id": pa.string(),
|
|
65
|
+
"base64": pa.string(),
|
|
66
|
+
"anyType": pa.string(),
|
|
67
|
+
}
|
|
68
|
+
return type_mapping.get(sf_type.lower(), pa.string())
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def create_schema_from_metadata(fields_metadata: List[FieldInfo]) -> pa.Schema:
|
|
72
|
+
"""
|
|
73
|
+
Create a PyArrow schema from Salesforce field metadata.
|
|
74
|
+
|
|
75
|
+
:param fields_metadata: List of field metadata dictionaries from Salesforce
|
|
76
|
+
:returns: PyArrow schema
|
|
77
|
+
"""
|
|
78
|
+
_check_dependencies()
|
|
79
|
+
arrow_fields = []
|
|
80
|
+
for field in fields_metadata:
|
|
81
|
+
field_name = field.get("name", "").lower() # Normalize to lowercase
|
|
82
|
+
sf_type = field.get("type", "string")
|
|
83
|
+
arrow_type = salesforce_to_arrow_type(sf_type)
|
|
84
|
+
# All fields are nullable since Salesforce can return empty values
|
|
85
|
+
arrow_fields.append(pa.field(field_name, arrow_type, nullable=True))
|
|
86
|
+
|
|
87
|
+
return pa.schema(arrow_fields)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ParquetWriter:
|
|
91
|
+
"""
|
|
92
|
+
Writer class for converting Salesforce QueryResult to Parquet format.
|
|
93
|
+
Supports streaming writes and optional schema from field metadata.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
file_path: str,
|
|
99
|
+
schema: Optional[pa.Schema] = None,
|
|
100
|
+
batch_size: int = 10000,
|
|
101
|
+
convert_empty_to_null: bool = True,
|
|
102
|
+
):
|
|
103
|
+
"""
|
|
104
|
+
Initialize ParquetWriter.
|
|
105
|
+
|
|
106
|
+
:param file_path: Path to output parquet file
|
|
107
|
+
:param schema: Optional PyArrow schema. If None, will be inferred from first batch
|
|
108
|
+
:param batch_size: Number of records to process in each batch
|
|
109
|
+
:param convert_empty_to_null: Convert empty strings to null values
|
|
110
|
+
"""
|
|
111
|
+
_check_dependencies()
|
|
112
|
+
self.file_path = file_path
|
|
113
|
+
self.schema = schema
|
|
114
|
+
self.batch_size = batch_size
|
|
115
|
+
self.convert_empty_to_null = convert_empty_to_null
|
|
116
|
+
self._writer = None
|
|
117
|
+
self._schema_finalized = False
|
|
118
|
+
|
|
119
|
+
# Ensure parent directory exists
|
|
120
|
+
Path(file_path).parent.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
|
|
122
|
+
def write_query_result(self, query_result: QueryResult) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Write all records from a QueryResult to the parquet file.
|
|
125
|
+
|
|
126
|
+
:param query_result: QueryResult to write
|
|
127
|
+
"""
|
|
128
|
+
try:
|
|
129
|
+
for batch in batch_records(query_result, self.batch_size):
|
|
130
|
+
self._write_batch(batch)
|
|
131
|
+
finally:
|
|
132
|
+
self.close()
|
|
133
|
+
|
|
134
|
+
async def write_query_result_async(self, query_result: QueryResult) -> None:
|
|
135
|
+
"""
|
|
136
|
+
Write all records from a QueryResult to the parquet file (async version).
|
|
137
|
+
|
|
138
|
+
:param query_result: QueryResult to write
|
|
139
|
+
"""
|
|
140
|
+
try:
|
|
141
|
+
async for batch in batch_records_async(query_result, self.batch_size):
|
|
142
|
+
self._write_batch(batch)
|
|
143
|
+
finally:
|
|
144
|
+
self.close()
|
|
145
|
+
|
|
146
|
+
def _write_batch(self, batch: List[Dict[str, Any]]) -> None:
|
|
147
|
+
"""Write a batch of records to the parquet file."""
|
|
148
|
+
if not batch:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
# Convert field names to lowercase for consistency
|
|
152
|
+
converted_batch = []
|
|
153
|
+
for record in batch:
|
|
154
|
+
converted_record = {k.lower(): v for k, v in record.items()}
|
|
155
|
+
converted_batch.append(converted_record)
|
|
156
|
+
|
|
157
|
+
# Create DataFrame
|
|
158
|
+
df = pd.DataFrame(converted_batch)
|
|
159
|
+
|
|
160
|
+
# If schema not finalized, create it from first batch
|
|
161
|
+
if not self._schema_finalized:
|
|
162
|
+
if self.schema is None:
|
|
163
|
+
self.schema = self._infer_schema_from_dataframe(df)
|
|
164
|
+
else:
|
|
165
|
+
# Filter schema to only include fields that are actually in the data
|
|
166
|
+
self.schema = self._filter_schema_to_data(self.schema, df.columns)
|
|
167
|
+
self._schema_finalized = True
|
|
168
|
+
|
|
169
|
+
# Apply data type conversions based on schema
|
|
170
|
+
self._convert_dataframe_types(df)
|
|
171
|
+
|
|
172
|
+
# Create Arrow table
|
|
173
|
+
table = pa.Table.from_pandas(df, schema=self.schema)
|
|
174
|
+
|
|
175
|
+
# Initialize writer if needed
|
|
176
|
+
if self._writer is None:
|
|
177
|
+
self._writer = pq.ParquetWriter(self.file_path, self.schema)
|
|
178
|
+
|
|
179
|
+
# Write the table
|
|
180
|
+
self._writer.write_table(table)
|
|
181
|
+
|
|
182
|
+
def _infer_schema_from_dataframe(self, df: pd.DataFrame) -> pa.Schema:
|
|
183
|
+
"""Infer schema from the first DataFrame."""
|
|
184
|
+
fields = []
|
|
185
|
+
for col_name, dtype in df.dtypes.items():
|
|
186
|
+
if dtype == "object":
|
|
187
|
+
arrow_type = pa.string()
|
|
188
|
+
elif dtype == "bool":
|
|
189
|
+
arrow_type = pa.bool_()
|
|
190
|
+
elif dtype in ["int64", "int32"]:
|
|
191
|
+
arrow_type = pa.int64()
|
|
192
|
+
elif dtype in ["float64", "float32"]:
|
|
193
|
+
arrow_type = pa.float64()
|
|
194
|
+
else:
|
|
195
|
+
arrow_type = pa.string()
|
|
196
|
+
|
|
197
|
+
fields.append(pa.field(col_name, arrow_type, nullable=True))
|
|
198
|
+
|
|
199
|
+
return pa.schema(fields)
|
|
200
|
+
|
|
201
|
+
def _filter_schema_to_data(
|
|
202
|
+
self, schema: pa.Schema, data_columns: List[str]
|
|
203
|
+
) -> pa.Schema:
|
|
204
|
+
"""Filter schema to only include fields that are present in the data."""
|
|
205
|
+
# Convert data columns to set for faster lookup
|
|
206
|
+
data_columns_set = set(data_columns)
|
|
207
|
+
|
|
208
|
+
# Filter schema fields to only those present in data
|
|
209
|
+
filtered_fields = []
|
|
210
|
+
for field in schema:
|
|
211
|
+
if field.name in data_columns_set:
|
|
212
|
+
filtered_fields.append(field)
|
|
213
|
+
|
|
214
|
+
if len(filtered_fields) != len(data_columns_set):
|
|
215
|
+
# Log fields that are in data but not in schema (shouldn't happen normally)
|
|
216
|
+
missing_in_schema = data_columns_set - {f.name for f in filtered_fields}
|
|
217
|
+
if missing_in_schema:
|
|
218
|
+
logging.warning(
|
|
219
|
+
f"Fields in data but not in schema: {missing_in_schema}"
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
return pa.schema(filtered_fields)
|
|
223
|
+
|
|
224
|
+
def _convert_dataframe_types(self, df: pd.DataFrame) -> None:
|
|
225
|
+
"""Convert DataFrame types based on the schema."""
|
|
226
|
+
for field in self.schema:
|
|
227
|
+
field_name = field.name
|
|
228
|
+
if field_name not in df.columns:
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
# Convert empty strings to null if requested
|
|
232
|
+
if self.convert_empty_to_null:
|
|
233
|
+
df[field_name] = df[field_name].replace({"": None})
|
|
234
|
+
|
|
235
|
+
# Apply type-specific conversions
|
|
236
|
+
if pa.types.is_boolean(field.type):
|
|
237
|
+
# Convert string 'true'/'false' to boolean
|
|
238
|
+
df[field_name] = (
|
|
239
|
+
df[field_name]
|
|
240
|
+
.map({"true": True, "false": False, None: None})
|
|
241
|
+
.fillna(df[field_name])
|
|
242
|
+
) # Keep original values for non-string booleans
|
|
243
|
+
elif pa.types.is_integer(field.type):
|
|
244
|
+
df[field_name] = pd.to_numeric(df[field_name], errors="coerce").astype(
|
|
245
|
+
"Int64"
|
|
246
|
+
) # Nullable integer
|
|
247
|
+
elif pa.types.is_floating(field.type):
|
|
248
|
+
df[field_name] = pd.to_numeric(df[field_name], errors="coerce")
|
|
249
|
+
|
|
250
|
+
# Replace empty strings with None for non-string fields
|
|
251
|
+
if not pa.types.is_string(field.type):
|
|
252
|
+
df[field_name] = df[field_name].replace("", pd.NA)
|
|
253
|
+
|
|
254
|
+
def close(self) -> None:
|
|
255
|
+
"""Close the parquet writer."""
|
|
256
|
+
if self._writer:
|
|
257
|
+
self._writer.close()
|
|
258
|
+
self._writer = None
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def write_query_to_parquet(
|
|
262
|
+
query_result: QueryResult,
|
|
263
|
+
file_path: str,
|
|
264
|
+
fields_metadata: Optional[List[FieldInfo]] = None,
|
|
265
|
+
schema: Optional[pa.Schema] = None,
|
|
266
|
+
batch_size: int = 10000,
|
|
267
|
+
convert_empty_to_null: bool = True,
|
|
268
|
+
) -> None:
|
|
269
|
+
"""
|
|
270
|
+
Convenience function to write a QueryResult to a parquet file.
|
|
271
|
+
|
|
272
|
+
:param query_result: QueryResult to write
|
|
273
|
+
:param file_path: Path to output parquet file
|
|
274
|
+
:param fields_metadata: Optional Salesforce field metadata for schema creation
|
|
275
|
+
:param schema: Optional pre-created PyArrow schema (takes precedence over fields_metadata)
|
|
276
|
+
:param batch_size: Number of records to process in each batch
|
|
277
|
+
:param convert_empty_to_null: Convert empty strings to null values
|
|
278
|
+
"""
|
|
279
|
+
effective_schema = None
|
|
280
|
+
if schema:
|
|
281
|
+
effective_schema = schema
|
|
282
|
+
elif fields_metadata:
|
|
283
|
+
effective_schema = create_schema_from_metadata(fields_metadata)
|
|
284
|
+
|
|
285
|
+
writer = ParquetWriter(
|
|
286
|
+
file_path=file_path,
|
|
287
|
+
schema=effective_schema,
|
|
288
|
+
batch_size=batch_size,
|
|
289
|
+
convert_empty_to_null=convert_empty_to_null,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
writer.write_query_result(query_result)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
async def write_query_to_parquet_async(
|
|
296
|
+
query_result: QueryResult,
|
|
297
|
+
file_path: str,
|
|
298
|
+
fields_metadata: Optional[List[Dict[str, Any]]] = None,
|
|
299
|
+
schema: Optional[pa.Schema] = None,
|
|
300
|
+
batch_size: int = 10000,
|
|
301
|
+
convert_empty_to_null: bool = True,
|
|
302
|
+
) -> None:
|
|
303
|
+
"""
|
|
304
|
+
Convenience function to write a QueryResult to a parquet file (async version).
|
|
305
|
+
|
|
306
|
+
:param query_result: QueryResult to write
|
|
307
|
+
:param file_path: Path to output parquet file
|
|
308
|
+
:param fields_metadata: Optional Salesforce field metadata for schema creation
|
|
309
|
+
:param schema: Optional pre-created PyArrow schema (takes precedence over fields_metadata)
|
|
310
|
+
:param batch_size: Number of records to process in each batch
|
|
311
|
+
:param convert_empty_to_null: Convert empty strings to null values
|
|
312
|
+
"""
|
|
313
|
+
effective_schema = None
|
|
314
|
+
if schema:
|
|
315
|
+
effective_schema = schema
|
|
316
|
+
elif fields_metadata:
|
|
317
|
+
effective_schema = create_schema_from_metadata(fields_metadata)
|
|
318
|
+
|
|
319
|
+
writer = ParquetWriter(
|
|
320
|
+
file_path=file_path,
|
|
321
|
+
schema=effective_schema,
|
|
322
|
+
batch_size=batch_size,
|
|
323
|
+
convert_empty_to_null=convert_empty_to_null,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
await writer.write_query_result_async(query_result)
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aio-sf
|
|
3
|
+
Version: 0.1.0b1
|
|
4
|
+
Summary: Async Salesforce library for Python with Bulk API 2.0 support
|
|
5
|
+
Project-URL: Homepage, https://github.com/callawaycloud/aio-salesforce
|
|
6
|
+
Project-URL: Repository, https://github.com/callawaycloud/aio-salesforce
|
|
7
|
+
Project-URL: Issues, https://github.com/callawaycloud/aio-salesforce/issues
|
|
8
|
+
Author-email: Jonas <charlie@callaway.cloud>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2025 Callaway Cloud
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Classifier: Development Status :: 3 - Alpha
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Requires-Python: >=3.9
|
|
40
|
+
Requires-Dist: httpx>=0.25.0
|
|
41
|
+
Requires-Dist: pydantic>=2.0.0
|
|
42
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
43
|
+
Provides-Extra: all
|
|
44
|
+
Requires-Dist: boto3>=1.34.0; extra == 'all'
|
|
45
|
+
Requires-Dist: pandas>=2.0.0; extra == 'all'
|
|
46
|
+
Requires-Dist: pyarrow>=10.0.0; extra == 'all'
|
|
47
|
+
Provides-Extra: dev
|
|
48
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
49
|
+
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
50
|
+
Requires-Dist: pre-commit>=3.0.0; extra == 'dev'
|
|
51
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
52
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == 'dev'
|
|
53
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
54
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
55
|
+
Requires-Dist: types-requests>=2.31.0; extra == 'dev'
|
|
56
|
+
Provides-Extra: exporter
|
|
57
|
+
Requires-Dist: boto3>=1.34.0; extra == 'exporter'
|
|
58
|
+
Requires-Dist: pandas>=2.0.0; extra == 'exporter'
|
|
59
|
+
Requires-Dist: pyarrow>=10.0.0; extra == 'exporter'
|
|
60
|
+
Description-Content-Type: text/markdown
|
|
61
|
+
|
|
62
|
+
# aio-sf
|
|
63
|
+
|
|
64
|
+
An async Salesforce library for Python with Bulk API 2.0 support.
|
|
65
|
+
|
|
66
|
+
## Features
|
|
67
|
+
|
|
68
|
+
### ✅ Supported APIs
|
|
69
|
+
- [x] **OAuth Client Credentials Flow** - Automatic authentication
|
|
70
|
+
- [x] **Bulk API 2.0** - Efficient querying of large datasets
|
|
71
|
+
- [x] **Describe API** - Field metadata and object descriptions
|
|
72
|
+
- [x] **SOQL Query API** - Standard Salesforce queries
|
|
73
|
+
|
|
74
|
+
### 🔄 Planned APIs
|
|
75
|
+
- [ ] **SObjects API** - Standard CRUD operations
|
|
76
|
+
- [ ] **Tooling API** - Development and deployment tools
|
|
77
|
+
- [ ] **Bulk API 1.0** - Legacy bulk operations
|
|
78
|
+
- [ ] **Streaming API** - Real-time event streaming
|
|
79
|
+
|
|
80
|
+
### 🚀 Export Features
|
|
81
|
+
- [x] **Parquet Export** - Efficient columnar storage with schema mapping
|
|
82
|
+
- [x] **CSV Export** - Simple text format export
|
|
83
|
+
- [x] **Resume Support** - Resume interrupted queries using job IDs
|
|
84
|
+
- [x] **Streaming Processing** - Memory-efficient processing of large datasets
|
|
85
|
+
- [x] **Type Mapping** - Automatic Salesforce to PyArrow type conversion
|
|
86
|
+
|
|
87
|
+
## Installation
|
|
88
|
+
|
|
89
|
+
### Core (Connection Only)
|
|
90
|
+
```bash
|
|
91
|
+
uv add aio-sf
|
|
92
|
+
# or: pip install aio-sf
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### With Export Capabilities
|
|
96
|
+
```bash
|
|
97
|
+
uv add "aio-sf[exporter]"
|
|
98
|
+
# or: pip install "aio-sf[exporter]"
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Quick Start
|
|
102
|
+
|
|
103
|
+
### Authentication & Connection
|
|
104
|
+
```python
|
|
105
|
+
import asyncio
|
|
106
|
+
import os
|
|
107
|
+
from aio_salesforce import SalesforceConnection, ClientCredentialsAuth
|
|
108
|
+
|
|
109
|
+
async def main():
|
|
110
|
+
auth = ClientCredentialsAuth(
|
|
111
|
+
client_id=os.getenv('SF_CLIENT_ID'),
|
|
112
|
+
client_secret=os.getenv('SF_CLIENT_SECRET'),
|
|
113
|
+
instance_url=os.getenv('SF_INSTANCE_URL'),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
async with SalesforceConnection(auth_strategy=auth) as sf:
|
|
117
|
+
print(f"✅ Connected to: {sf.instance_url}")
|
|
118
|
+
|
|
119
|
+
sobjects = await sf.describe.list_sobjects()
|
|
120
|
+
print(sobjects[0]["name"])
|
|
121
|
+
|
|
122
|
+
contact_describe = await sf.describe.sobject("Contact")
|
|
123
|
+
|
|
124
|
+
# retrieve first 5 "creatable" fields on contact
|
|
125
|
+
queryable_fields = [
|
|
126
|
+
field.get("name", "")
|
|
127
|
+
for field in contact_describe["fields"]
|
|
128
|
+
if field.get("createable")
|
|
129
|
+
][:5]
|
|
130
|
+
|
|
131
|
+
query = f"SELECT {', '.join(queryable_fields)} FROM Contact LIMIT 5"
|
|
132
|
+
print(query)
|
|
133
|
+
|
|
134
|
+
query_result = await sf.query.soql(query)
|
|
135
|
+
# Loop over records using async iteration
|
|
136
|
+
async for record in query_result:
|
|
137
|
+
print(record.get("AccountId"))
|
|
138
|
+
|
|
139
|
+
asyncio.run(main())
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
## Exporter
|
|
146
|
+
|
|
147
|
+
The Exporter library contains a streamlined and "opinionated" way to export data from Salesforce to various formats.
|
|
148
|
+
|
|
149
|
+
### 2. Query Records
|
|
150
|
+
```python
|
|
151
|
+
from aio_salesforce.exporter import bulk_query
|
|
152
|
+
|
|
153
|
+
async def main():
|
|
154
|
+
# ... authentication code from above ...
|
|
155
|
+
|
|
156
|
+
async with SalesforceConnection(auth_strategy=auth) as sf:
|
|
157
|
+
# Execute bulk query
|
|
158
|
+
query_result = await bulk_query(
|
|
159
|
+
sf=sf,
|
|
160
|
+
soql_query="SELECT Id, Name, Email FROM Contact LIMIT 1000"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Process records
|
|
164
|
+
count = 0
|
|
165
|
+
async for record in query_result:
|
|
166
|
+
print(f"Contact: {record['Name']} - {record['Email']}")
|
|
167
|
+
count += 1
|
|
168
|
+
|
|
169
|
+
print(f"Processed {count} records")
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### 3. Export to Parquet
|
|
173
|
+
```python
|
|
174
|
+
from aio_salesforce.exporter import bulk_query, write_query_to_parquet
|
|
175
|
+
|
|
176
|
+
async def main():
|
|
177
|
+
# ... authentication code from above ...
|
|
178
|
+
|
|
179
|
+
async with SalesforceConnection(auth_strategy=auth) as sf:
|
|
180
|
+
# Query with proper schema
|
|
181
|
+
query_result = await bulk_query(
|
|
182
|
+
sf=sf,
|
|
183
|
+
soql_query="SELECT Id, Name, Email, CreatedDate FROM Contact"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Export to Parquet
|
|
187
|
+
write_query_to_parquet(
|
|
188
|
+
query_result=query_result,
|
|
189
|
+
file_path="contacts.parquet"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
print(f"✅ Exported {len(query_result)} contacts to Parquet")
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
## License
|
|
197
|
+
|
|
198
|
+
MIT License
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
aio_salesforce/__init__.py,sha256=KChsKtzT6q_p_-kRf13pibWucHOSYF2hQYv1gHePq2A,688
|
|
2
|
+
aio_salesforce/connection.py,sha256=-t02A45nekySuGaEwT2O_sHNzoxkYP6LzuR8fd5QjNs,18411
|
|
3
|
+
aio_salesforce/api/README.md,sha256=T-LWAnhq-m_Cvsj7GVJZvBdiZk5n3MAmKwuWHlW84J8,3512
|
|
4
|
+
aio_salesforce/api/__init__.py,sha256=kEA4UrX4DncSetO9s0agfKUnf0UxTzBfeYqP6T_SPEk,1328
|
|
5
|
+
aio_salesforce/api/types.py,sha256=5-HcYJVD0-qskpUXCS5EwLuoCJzYOk-hy0KvnVlG3VE,6761
|
|
6
|
+
aio_salesforce/api/bulk_v2/__init__.py,sha256=TxNM9dFmRX5k57Wj_JnposeEqqd9xcJx78wI-d6VH0o,315
|
|
7
|
+
aio_salesforce/api/bulk_v2/client.py,sha256=x9Yd_OsGaOW4FGIHTCO0LB9wmoJPcm6JhSZyD4I9QFg,6878
|
|
8
|
+
aio_salesforce/api/bulk_v2/types.py,sha256=18TN_VMisKJVCvo39q36moYXQhGLRUs73CKWIxBeAcs,1471
|
|
9
|
+
aio_salesforce/api/describe/__init__.py,sha256=Gog7UMc1v474mCOmEhnds6zUXPor-fDLNgy4vsLfBos,528
|
|
10
|
+
aio_salesforce/api/describe/client.py,sha256=9e3JVe35B_25UQ5RJ60NV7e3uTS_byME_MNpJjMApGI,3052
|
|
11
|
+
aio_salesforce/api/describe/types.py,sha256=5-HcYJVD0-qskpUXCS5EwLuoCJzYOk-hy0KvnVlG3VE,6761
|
|
12
|
+
aio_salesforce/api/query/__init__.py,sha256=OMh9g9PAPBFyQtUJc4aua1nKAiiIFYouZfaa5Zw4yZU,335
|
|
13
|
+
aio_salesforce/api/query/client.py,sha256=CS9d73cSVdhry08os__nx6YoP3bM9uQqHZjdSkWVbhY,7164
|
|
14
|
+
aio_salesforce/api/query/types.py,sha256=Wfk75kJpNDCGpTHonCbzjWvayy8guA3eyZp3hE7nBt0,845
|
|
15
|
+
aio_salesforce/exporter/__init__.py,sha256=ae2q2t9F8mSalbM-UT-fAnLV67USL0gG2u4Jky7Ou6o,881
|
|
16
|
+
aio_salesforce/exporter/bulk_export.py,sha256=pK2BNTL8WThLBbjWaHvzIJZoT7KdwfGi7GTguVSkShk,13563
|
|
17
|
+
aio_salesforce/exporter/parquet_writer.py,sha256=BD191vKi9t7qvccCugaL7Sz8qpaKOZzF6BbKsfJ1chI,10620
|
|
18
|
+
aio_salesforce/exporter/parquet_writer.py.backup,sha256=SyWRJyDaE9BIr8_mU2_El3x8b_lpt_Tvkebja0sq7kY,11465
|
|
19
|
+
aio_sf-0.1.0b1.dist-info/METADATA,sha256=T0tMOIyhbVIqwS0v2omrWidDYD5FvhmKnr1sXFXOr1s,6720
|
|
20
|
+
aio_sf-0.1.0b1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
21
|
+
aio_sf-0.1.0b1.dist-info/licenses/LICENSE,sha256=gu0Cbpiqs-vX7YgJJhGI1jH1mHup3dZMrZc-gmpEG60,1071
|
|
22
|
+
aio_sf-0.1.0b1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Callaway Cloud
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|