aio-sf 0.1.0b7__tar.gz → 0.1.0b9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/PKG-INFO +36 -1
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/README.md +35 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/__init__.py +0 -12
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/collections/__init__.py +6 -12
- aio_sf-0.1.0b9/src/aio_sf/api/collections/batch.py +367 -0
- aio_sf-0.1.0b9/src/aio_sf/api/collections/client.py +463 -0
- aio_sf-0.1.0b9/src/aio_sf/api/collections/records.py +138 -0
- aio_sf-0.1.0b9/src/aio_sf/api/collections/retry.py +141 -0
- aio_sf-0.1.0b9/src/aio_sf/api/collections/types.py +28 -0
- aio_sf-0.1.0b9/tests/test_retry_and_batch.py +890 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/uv.lock +14 -8
- aio_sf-0.1.0b7/src/aio_sf/api/collections/client.py +0 -660
- aio_sf-0.1.0b7/src/aio_sf/api/collections/types.py +0 -70
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/.cursor/rules/api-structure.mdc +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/.cursor/rules/async-patterns.mdc +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/.cursor/rules/project-tooling.mdc +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/.github/workflows/publish.yml +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/.github/workflows/test.yml +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/.gitignore +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/LICENSE +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/RELEASE.md +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/pyproject.toml +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/pytest.ini +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/__init__.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/auth/__init__.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/auth/base.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/auth/client_credentials.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/auth/refresh_token.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/auth/sfdx_cli.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/auth/static_token.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/bulk_v2/__init__.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/bulk_v2/client.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/bulk_v2/types.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/client.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/describe/__init__.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/describe/client.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/describe/types.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/query/__init__.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/query/client.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/query/types.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/api/types.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/exporter/__init__.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/exporter/bulk_export.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/src/aio_sf/exporter/parquet_writer.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/tests/__init__.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/tests/conftest.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/tests/test_api_clients.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/tests/test_auth.py +0 -0
- {aio_sf-0.1.0b7 → aio_sf-0.1.0b9}/tests/test_client.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aio-sf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0b9
|
|
4
4
|
Summary: Async Salesforce library for Python
|
|
5
5
|
Project-URL: Homepage, https://github.com/callawaycloud/aio-salesforce
|
|
6
6
|
Project-URL: Repository, https://github.com/callawaycloud/aio-salesforce
|
|
@@ -151,6 +151,41 @@ async def main():
|
|
|
151
151
|
asyncio.run(main())
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
+
### Collections API - Batch Operations
|
|
155
|
+
|
|
156
|
+
Bulk operations (insert, update, upsert, delete) with automatic batching and concurrency.
|
|
157
|
+
|
|
158
|
+
**Basic Usage:**
|
|
159
|
+
```python
|
|
160
|
+
async with SalesforceClient(auth_strategy=auth) as sf:
|
|
161
|
+
records = [{"Name": f"Account {i}"} for i in range(1000)]
|
|
162
|
+
|
|
163
|
+
results = await sf.collections.insert(records, sobject_type="Account")
|
|
164
|
+
# Also: update(), upsert(), delete()
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
**Advanced - With Retries, Concurrency Scaling, and Progress:**
|
|
168
|
+
```python
|
|
169
|
+
from aio_sf.api.collections import ProgressInfo
|
|
170
|
+
|
|
171
|
+
async def on_progress(info: ProgressInfo):
|
|
172
|
+
print(
|
|
173
|
+
f"Attempt {info['current_attempt']}: "
|
|
174
|
+
f"{info['records_succeeded']} succeeded, "
|
|
175
|
+
f"{info['records_failed']} failed, "
|
|
176
|
+
f"{info['records_pending']} pending"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
async with SalesforceClient(auth_strategy=auth) as sf:
|
|
180
|
+
results = await sf.collections.insert(
|
|
181
|
+
records=records,
|
|
182
|
+
sobject_type="Account",
|
|
183
|
+
batch_size=[200, 100, 25], # Shrink batch size on retry
|
|
184
|
+
max_concurrent_batches=[5, 3, 1], # Reduce concurrency on retry
|
|
185
|
+
max_attempts=5, # Retry up to 5 times
|
|
186
|
+
on_batch_complete=on_progress, # Progress callback
|
|
187
|
+
)
|
|
188
|
+
```
|
|
154
189
|
|
|
155
190
|
|
|
156
191
|
## Exporter
|
|
@@ -88,6 +88,41 @@ async def main():
|
|
|
88
88
|
asyncio.run(main())
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
+
### Collections API - Batch Operations
|
|
92
|
+
|
|
93
|
+
Bulk operations (insert, update, upsert, delete) with automatic batching and concurrency.
|
|
94
|
+
|
|
95
|
+
**Basic Usage:**
|
|
96
|
+
```python
|
|
97
|
+
async with SalesforceClient(auth_strategy=auth) as sf:
|
|
98
|
+
records = [{"Name": f"Account {i}"} for i in range(1000)]
|
|
99
|
+
|
|
100
|
+
results = await sf.collections.insert(records, sobject_type="Account")
|
|
101
|
+
# Also: update(), upsert(), delete()
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Advanced - With Retries, Concurrency Scaling, and Progress:**
|
|
105
|
+
```python
|
|
106
|
+
from aio_sf.api.collections import ProgressInfo
|
|
107
|
+
|
|
108
|
+
async def on_progress(info: ProgressInfo):
|
|
109
|
+
print(
|
|
110
|
+
f"Attempt {info['current_attempt']}: "
|
|
111
|
+
f"{info['records_succeeded']} succeeded, "
|
|
112
|
+
f"{info['records_failed']} failed, "
|
|
113
|
+
f"{info['records_pending']} pending"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
async with SalesforceClient(auth_strategy=auth) as sf:
|
|
117
|
+
results = await sf.collections.insert(
|
|
118
|
+
records=records,
|
|
119
|
+
sobject_type="Account",
|
|
120
|
+
batch_size=[200, 100, 25], # Shrink batch size on retry
|
|
121
|
+
max_concurrent_batches=[5, 3, 1], # Reduce concurrency on retry
|
|
122
|
+
max_attempts=5, # Retry up to 5 times
|
|
123
|
+
on_batch_complete=on_progress, # Progress callback
|
|
124
|
+
)
|
|
125
|
+
```
|
|
91
126
|
|
|
92
127
|
|
|
93
128
|
## Exporter
|
|
@@ -19,13 +19,7 @@ from .bulk_v2 import (
|
|
|
19
19
|
from .collections import (
|
|
20
20
|
CollectionsAPI,
|
|
21
21
|
CollectionError,
|
|
22
|
-
CollectionRequest,
|
|
23
22
|
CollectionResult,
|
|
24
|
-
CollectionResponse,
|
|
25
|
-
InsertCollectionRequest,
|
|
26
|
-
UpdateCollectionRequest,
|
|
27
|
-
UpsertCollectionRequest,
|
|
28
|
-
DeleteCollectionRequest,
|
|
29
23
|
CollectionInsertResponse,
|
|
30
24
|
CollectionUpdateResponse,
|
|
31
25
|
CollectionUpsertResponse,
|
|
@@ -69,13 +63,7 @@ __all__ = [
|
|
|
69
63
|
"BulkJobError",
|
|
70
64
|
# Collections Types
|
|
71
65
|
"CollectionError",
|
|
72
|
-
"CollectionRequest",
|
|
73
66
|
"CollectionResult",
|
|
74
|
-
"CollectionResponse",
|
|
75
|
-
"InsertCollectionRequest",
|
|
76
|
-
"UpdateCollectionRequest",
|
|
77
|
-
"UpsertCollectionRequest",
|
|
78
|
-
"DeleteCollectionRequest",
|
|
79
67
|
"CollectionInsertResponse",
|
|
80
68
|
"CollectionUpdateResponse",
|
|
81
69
|
"CollectionUpsertResponse",
|
|
@@ -1,15 +1,11 @@
|
|
|
1
1
|
"""Salesforce Collections API module."""
|
|
2
2
|
|
|
3
3
|
from .client import CollectionsAPI
|
|
4
|
+
from .batch import ProgressInfo, ProgressCallback
|
|
5
|
+
from .retry import ShouldRetryCallback, default_should_retry
|
|
4
6
|
from .types import (
|
|
5
7
|
CollectionError,
|
|
6
|
-
CollectionRequest,
|
|
7
8
|
CollectionResult,
|
|
8
|
-
CollectionResponse,
|
|
9
|
-
InsertCollectionRequest,
|
|
10
|
-
UpdateCollectionRequest,
|
|
11
|
-
UpsertCollectionRequest,
|
|
12
|
-
DeleteCollectionRequest,
|
|
13
9
|
CollectionInsertResponse,
|
|
14
10
|
CollectionUpdateResponse,
|
|
15
11
|
CollectionUpsertResponse,
|
|
@@ -18,14 +14,12 @@ from .types import (
|
|
|
18
14
|
|
|
19
15
|
__all__ = [
|
|
20
16
|
"CollectionsAPI",
|
|
17
|
+
"ProgressInfo",
|
|
18
|
+
"ProgressCallback",
|
|
19
|
+
"ShouldRetryCallback",
|
|
20
|
+
"default_should_retry",
|
|
21
21
|
"CollectionError",
|
|
22
|
-
"CollectionRequest",
|
|
23
22
|
"CollectionResult",
|
|
24
|
-
"CollectionResponse",
|
|
25
|
-
"InsertCollectionRequest",
|
|
26
|
-
"UpdateCollectionRequest",
|
|
27
|
-
"UpsertCollectionRequest",
|
|
28
|
-
"DeleteCollectionRequest",
|
|
29
23
|
"CollectionInsertResponse",
|
|
30
24
|
"CollectionUpdateResponse",
|
|
31
25
|
"CollectionUpsertResponse",
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
"""Batch processing and concurrency management for Collections API."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any, Awaitable, Callable, Dict, List, Optional, TypedDict, Union
|
|
6
|
+
|
|
7
|
+
from .retry import (
|
|
8
|
+
RecordWithAttempt,
|
|
9
|
+
ShouldRetryCallback,
|
|
10
|
+
convert_exception_to_result,
|
|
11
|
+
get_value_for_attempt,
|
|
12
|
+
should_retry_record,
|
|
13
|
+
)
|
|
14
|
+
from .types import CollectionResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ProgressInfo(TypedDict):
|
|
21
|
+
"""Progress information for batch operations."""
|
|
22
|
+
|
|
23
|
+
total_records: int # Total records being processed
|
|
24
|
+
records_completed: int # Records finished (succeeded or failed permanently)
|
|
25
|
+
records_succeeded: int # Records that succeeded
|
|
26
|
+
records_failed: int # Records that failed permanently (exhausted retries)
|
|
27
|
+
records_pending: int # Records still being retried
|
|
28
|
+
current_attempt: int # Current retry attempt number (1-indexed)
|
|
29
|
+
current_batch_size: int # Batch size for current attempt
|
|
30
|
+
current_concurrency: int # Concurrency level for current attempt
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Type alias for progress callback
|
|
34
|
+
ProgressCallback = Callable[[ProgressInfo], Awaitable[None]]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def split_into_batches(
|
|
38
|
+
items: List[Any], batch_size: int, max_limit: int
|
|
39
|
+
) -> List[List[Any]]:
|
|
40
|
+
"""
|
|
41
|
+
Split a list of items into batches of specified size.
|
|
42
|
+
|
|
43
|
+
:param items: List of items to split
|
|
44
|
+
:param batch_size: Maximum size of each batch
|
|
45
|
+
:param max_limit: Maximum allowed batch size for the operation
|
|
46
|
+
:returns: List of batches
|
|
47
|
+
:raises ValueError: If batch_size is invalid
|
|
48
|
+
"""
|
|
49
|
+
if batch_size <= 0:
|
|
50
|
+
raise ValueError("batch_size must be greater than 0")
|
|
51
|
+
if batch_size > max_limit:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"batch_size ({batch_size}) cannot exceed Salesforce limit ({max_limit})"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
batches = []
|
|
57
|
+
for i in range(0, len(items), batch_size):
|
|
58
|
+
batch = items[i : i + batch_size]
|
|
59
|
+
batches.append(batch)
|
|
60
|
+
return batches
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
async def process_batches_concurrently(
|
|
64
|
+
batches: List[Any],
|
|
65
|
+
operation_func,
|
|
66
|
+
max_concurrent_batches: int,
|
|
67
|
+
total_records: int,
|
|
68
|
+
on_batch_complete: Optional[ProgressCallback] = None,
|
|
69
|
+
progress_state: Optional[Dict[str, int]] = None,
|
|
70
|
+
*args,
|
|
71
|
+
**kwargs,
|
|
72
|
+
) -> List[Any]:
|
|
73
|
+
"""
|
|
74
|
+
Process batches concurrently with a limit on concurrent operations.
|
|
75
|
+
|
|
76
|
+
Order preservation: Results are returned in the same order as input batches,
|
|
77
|
+
regardless of which batch completes first.
|
|
78
|
+
|
|
79
|
+
:param batches: List of batches to process
|
|
80
|
+
:param operation_func: Function to call for each batch
|
|
81
|
+
:param max_concurrent_batches: Maximum number of concurrent batch operations
|
|
82
|
+
:param total_records: Total number of records being processed
|
|
83
|
+
:param on_batch_complete: Optional callback invoked after each batch completes
|
|
84
|
+
:param progress_state: Dict with progress state (updated by caller)
|
|
85
|
+
:param args: Additional positional arguments for operation_func
|
|
86
|
+
:param kwargs: Additional keyword arguments for operation_func
|
|
87
|
+
:returns: List of results from all batches in the same order as input
|
|
88
|
+
:raises ValueError: If max_concurrent_batches is invalid
|
|
89
|
+
"""
|
|
90
|
+
if max_concurrent_batches <= 0:
|
|
91
|
+
raise ValueError("max_concurrent_batches must be greater than 0")
|
|
92
|
+
|
|
93
|
+
semaphore = asyncio.Semaphore(max_concurrent_batches)
|
|
94
|
+
callback_lock = asyncio.Lock() if on_batch_complete else None
|
|
95
|
+
|
|
96
|
+
async def process_batch_with_semaphore(batch_index: int, batch):
|
|
97
|
+
async with semaphore:
|
|
98
|
+
try:
|
|
99
|
+
result = await operation_func(batch, *args, **kwargs)
|
|
100
|
+
except Exception as e:
|
|
101
|
+
# HTTP/network error - return the exception for each record
|
|
102
|
+
logger.warning(
|
|
103
|
+
f"Batch {batch_index} failed with exception: {type(e).__name__}: {e}"
|
|
104
|
+
)
|
|
105
|
+
result = [e for _ in range(len(batch))]
|
|
106
|
+
|
|
107
|
+
# Invoke progress callback if provided
|
|
108
|
+
if on_batch_complete and callback_lock and progress_state:
|
|
109
|
+
async with callback_lock:
|
|
110
|
+
progress_info: ProgressInfo = {
|
|
111
|
+
"total_records": progress_state["total_records"],
|
|
112
|
+
"records_completed": progress_state["records_completed"],
|
|
113
|
+
"records_succeeded": progress_state["records_succeeded"],
|
|
114
|
+
"records_failed": progress_state["records_failed"],
|
|
115
|
+
"records_pending": progress_state["records_pending"],
|
|
116
|
+
"current_attempt": progress_state["current_attempt"],
|
|
117
|
+
"current_batch_size": progress_state["current_batch_size"],
|
|
118
|
+
"current_concurrency": progress_state["current_concurrency"],
|
|
119
|
+
}
|
|
120
|
+
await on_batch_complete(progress_info)
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
# Process all batches concurrently with semaphore limiting concurrency
|
|
125
|
+
tasks = [process_batch_with_semaphore(i, batch) for i, batch in enumerate(batches)]
|
|
126
|
+
# asyncio.gather() preserves order
|
|
127
|
+
results = await asyncio.gather(*tasks)
|
|
128
|
+
|
|
129
|
+
# Flatten results from all batches, maintaining order
|
|
130
|
+
flattened_results = []
|
|
131
|
+
for batch_result in results:
|
|
132
|
+
flattened_results.extend(batch_result)
|
|
133
|
+
|
|
134
|
+
return flattened_results
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
async def process_with_retries(
|
|
138
|
+
records_with_attempts: List[RecordWithAttempt],
|
|
139
|
+
operation_func,
|
|
140
|
+
batch_size: Union[int, List[int]],
|
|
141
|
+
max_attempts: int,
|
|
142
|
+
should_retry_callback: Optional[ShouldRetryCallback],
|
|
143
|
+
max_concurrent_batches: Union[int, List[int]],
|
|
144
|
+
on_batch_complete: Optional[ProgressCallback],
|
|
145
|
+
max_limit: int,
|
|
146
|
+
*args,
|
|
147
|
+
**kwargs,
|
|
148
|
+
) -> List[CollectionResult]:
|
|
149
|
+
"""
|
|
150
|
+
Process records with retry logic, shrinking batch sizes, and scaling concurrency.
|
|
151
|
+
|
|
152
|
+
:param records_with_attempts: List of records with attempt tracking
|
|
153
|
+
:param operation_func: The single-batch operation function to call
|
|
154
|
+
:param batch_size: Batch size (int or list of ints per attempt)
|
|
155
|
+
:param max_attempts: Maximum number of attempts per record
|
|
156
|
+
:param should_retry_callback: Optional callback to determine if record should be retried
|
|
157
|
+
:param max_concurrent_batches: Maximum concurrent batches (int or list of ints per attempt)
|
|
158
|
+
:param on_batch_complete: Progress callback
|
|
159
|
+
:param max_limit: Maximum batch size limit for the operation
|
|
160
|
+
:param args: Additional args for operation_func
|
|
161
|
+
:param kwargs: Additional kwargs for operation_func
|
|
162
|
+
:returns: List of results in order of original input
|
|
163
|
+
"""
|
|
164
|
+
# Initialize result array with None placeholders
|
|
165
|
+
max_index = max(r.original_index for r in records_with_attempts)
|
|
166
|
+
final_results: List[Optional[CollectionResult]] = [None] * (max_index + 1)
|
|
167
|
+
total_records_count = max_index + 1
|
|
168
|
+
|
|
169
|
+
# Initialize progress state
|
|
170
|
+
progress_state = {
|
|
171
|
+
"total_records": total_records_count,
|
|
172
|
+
"records_completed": 0,
|
|
173
|
+
"records_succeeded": 0,
|
|
174
|
+
"records_failed": 0,
|
|
175
|
+
"records_pending": total_records_count,
|
|
176
|
+
"current_attempt": 1,
|
|
177
|
+
"current_batch_size": 0,
|
|
178
|
+
"current_concurrency": 0,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
current_records = records_with_attempts
|
|
182
|
+
|
|
183
|
+
while current_records:
|
|
184
|
+
current_attempt = current_records[0].attempt
|
|
185
|
+
current_batch_size = min(
|
|
186
|
+
get_value_for_attempt(current_attempt, batch_size), max_limit
|
|
187
|
+
)
|
|
188
|
+
current_concurrency = get_value_for_attempt(
|
|
189
|
+
current_attempt, max_concurrent_batches
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Update progress state for current attempt
|
|
193
|
+
progress_state["current_attempt"] = current_attempt
|
|
194
|
+
progress_state["current_batch_size"] = current_batch_size
|
|
195
|
+
progress_state["current_concurrency"] = current_concurrency
|
|
196
|
+
|
|
197
|
+
logger.debug(
|
|
198
|
+
f"Processing {len(current_records)} records on attempt {current_attempt} "
|
|
199
|
+
f"with batch_size={current_batch_size}, concurrency={current_concurrency}"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Extract records and split into batches
|
|
203
|
+
records_to_process = [r.record for r in current_records]
|
|
204
|
+
batches = split_into_batches(records_to_process, current_batch_size, max_limit)
|
|
205
|
+
|
|
206
|
+
# Process batches with current concurrency level (no callback here)
|
|
207
|
+
batch_results = await process_batches_concurrently(
|
|
208
|
+
batches,
|
|
209
|
+
operation_func,
|
|
210
|
+
current_concurrency,
|
|
211
|
+
len(records_to_process),
|
|
212
|
+
None, # Don't invoke callback during batch processing
|
|
213
|
+
None,
|
|
214
|
+
*args,
|
|
215
|
+
**kwargs,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Process results and determine retries
|
|
219
|
+
records_to_retry = await _collect_records_for_retry(
|
|
220
|
+
current_records,
|
|
221
|
+
batch_results,
|
|
222
|
+
max_attempts,
|
|
223
|
+
should_retry_callback,
|
|
224
|
+
final_results,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Update progress state based on results
|
|
228
|
+
# Count completed records (those not being retried)
|
|
229
|
+
records_completed_this_round = len(current_records) - len(records_to_retry)
|
|
230
|
+
|
|
231
|
+
# Count successes and failures in final_results so far
|
|
232
|
+
records_succeeded = sum(
|
|
233
|
+
1 for r in final_results if r is not None and r.get("success", False)
|
|
234
|
+
)
|
|
235
|
+
records_failed = sum(
|
|
236
|
+
1 for r in final_results if r is not None and not r.get("success", False)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
progress_state["records_completed"] = records_succeeded + records_failed
|
|
240
|
+
progress_state["records_succeeded"] = records_succeeded
|
|
241
|
+
progress_state["records_failed"] = records_failed
|
|
242
|
+
progress_state["records_pending"] = len(records_to_retry)
|
|
243
|
+
|
|
244
|
+
# Invoke progress callback after we know the results
|
|
245
|
+
if on_batch_complete:
|
|
246
|
+
progress_info: ProgressInfo = {
|
|
247
|
+
"total_records": progress_state["total_records"],
|
|
248
|
+
"records_completed": progress_state["records_completed"],
|
|
249
|
+
"records_succeeded": progress_state["records_succeeded"],
|
|
250
|
+
"records_failed": progress_state["records_failed"],
|
|
251
|
+
"records_pending": progress_state["records_pending"],
|
|
252
|
+
"current_attempt": progress_state["current_attempt"],
|
|
253
|
+
"current_batch_size": progress_state["current_batch_size"],
|
|
254
|
+
"current_concurrency": progress_state["current_concurrency"],
|
|
255
|
+
}
|
|
256
|
+
await on_batch_complete(progress_info)
|
|
257
|
+
|
|
258
|
+
if records_to_retry:
|
|
259
|
+
logger.info(
|
|
260
|
+
f"Retrying {len(records_to_retry)} failed records "
|
|
261
|
+
f"(attempt {records_to_retry[0].attempt})"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
current_records = records_to_retry
|
|
265
|
+
|
|
266
|
+
# Return results (all should be non-None at this point)
|
|
267
|
+
return [r for r in final_results if r is not None]
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
async def _collect_records_for_retry(
|
|
271
|
+
current_records: List[RecordWithAttempt],
|
|
272
|
+
batch_results: List[Union[CollectionResult, Exception]],
|
|
273
|
+
max_attempts: int,
|
|
274
|
+
should_retry_callback: Optional[ShouldRetryCallback],
|
|
275
|
+
final_results: List[Optional[CollectionResult]],
|
|
276
|
+
) -> List[RecordWithAttempt]:
|
|
277
|
+
"""Process results and collect records that should be retried."""
|
|
278
|
+
records_to_retry: List[RecordWithAttempt] = []
|
|
279
|
+
|
|
280
|
+
for record_wrapper, result in zip(current_records, batch_results):
|
|
281
|
+
original_index = record_wrapper.original_index
|
|
282
|
+
|
|
283
|
+
if isinstance(result, Exception):
|
|
284
|
+
# HTTP/network error
|
|
285
|
+
await _handle_exception_result(
|
|
286
|
+
record_wrapper,
|
|
287
|
+
result,
|
|
288
|
+
max_attempts,
|
|
289
|
+
should_retry_callback,
|
|
290
|
+
records_to_retry,
|
|
291
|
+
final_results,
|
|
292
|
+
original_index,
|
|
293
|
+
)
|
|
294
|
+
elif result.get("success", False):
|
|
295
|
+
# Success - store the result
|
|
296
|
+
final_results[original_index] = result
|
|
297
|
+
else:
|
|
298
|
+
# Failed CollectionResult
|
|
299
|
+
await _handle_failed_result(
|
|
300
|
+
record_wrapper,
|
|
301
|
+
result,
|
|
302
|
+
max_attempts,
|
|
303
|
+
should_retry_callback,
|
|
304
|
+
records_to_retry,
|
|
305
|
+
final_results,
|
|
306
|
+
original_index,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
return records_to_retry
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
async def _handle_exception_result(
|
|
313
|
+
record_wrapper: RecordWithAttempt,
|
|
314
|
+
exception: Exception,
|
|
315
|
+
max_attempts: int,
|
|
316
|
+
should_retry_callback: Optional[ShouldRetryCallback],
|
|
317
|
+
records_to_retry: List[RecordWithAttempt],
|
|
318
|
+
final_results: List[Optional[CollectionResult]],
|
|
319
|
+
original_index: int,
|
|
320
|
+
) -> None:
|
|
321
|
+
"""Handle an exception result - either retry or convert to error result."""
|
|
322
|
+
can_retry = record_wrapper.attempt < max_attempts
|
|
323
|
+
if can_retry and await should_retry_record(
|
|
324
|
+
record_wrapper.record,
|
|
325
|
+
exception,
|
|
326
|
+
record_wrapper.attempt,
|
|
327
|
+
should_retry_callback,
|
|
328
|
+
):
|
|
329
|
+
records_to_retry.append(
|
|
330
|
+
RecordWithAttempt(
|
|
331
|
+
record_wrapper.record,
|
|
332
|
+
original_index,
|
|
333
|
+
record_wrapper.attempt + 1,
|
|
334
|
+
)
|
|
335
|
+
)
|
|
336
|
+
else:
|
|
337
|
+
# No more retries - convert Exception to CollectionResult format
|
|
338
|
+
final_results[original_index] = convert_exception_to_result(exception)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
async def _handle_failed_result(
|
|
342
|
+
record_wrapper: RecordWithAttempt,
|
|
343
|
+
result: CollectionResult,
|
|
344
|
+
max_attempts: int,
|
|
345
|
+
should_retry_callback: Optional[ShouldRetryCallback],
|
|
346
|
+
records_to_retry: List[RecordWithAttempt],
|
|
347
|
+
final_results: List[Optional[CollectionResult]],
|
|
348
|
+
original_index: int,
|
|
349
|
+
) -> None:
|
|
350
|
+
"""Handle a failed CollectionResult - either retry or store the failure."""
|
|
351
|
+
can_retry = record_wrapper.attempt < max_attempts
|
|
352
|
+
if can_retry and await should_retry_record(
|
|
353
|
+
record_wrapper.record,
|
|
354
|
+
result,
|
|
355
|
+
record_wrapper.attempt,
|
|
356
|
+
should_retry_callback,
|
|
357
|
+
):
|
|
358
|
+
records_to_retry.append(
|
|
359
|
+
RecordWithAttempt(
|
|
360
|
+
record_wrapper.record,
|
|
361
|
+
original_index,
|
|
362
|
+
record_wrapper.attempt + 1,
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
else:
|
|
366
|
+
# No more retries - store the failed result
|
|
367
|
+
final_results[original_index] = result
|