batchly 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
batchly/__init__.py ADDED
@@ -0,0 +1,28 @@
1
+ """batchly — Batch processing made simple."""
2
+
3
+ from .batch import Batch, batch
4
+ from .errors import BatchError, TimeoutError
5
+ from .filter_ import async_batch_filter, batch_filter
6
+ from .foreach import async_batch_for_each, batch_for_each
7
+ from .map_ import async_batch_map, batch_map
8
+ from .progress import ProgressBar, ProgressInfo
9
+ from .rate_limit import RateLimiter
10
+ from .result import BatchResult
11
+
12
+ __version__ = "0.1.0"
13
+ __all__ = [
14
+ "batch",
15
+ "Batch",
16
+ "batch_map",
17
+ "async_batch_map",
18
+ "batch_filter",
19
+ "async_batch_filter",
20
+ "batch_for_each",
21
+ "async_batch_for_each",
22
+ "BatchResult",
23
+ "BatchError",
24
+ "TimeoutError",
25
+ "ProgressBar",
26
+ "ProgressInfo",
27
+ "RateLimiter",
28
+ ]
batchly/batch.py ADDED
@@ -0,0 +1,136 @@
1
+ """@batch decorator and Batch context class."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import inspect
7
+ from typing import Any, Callable
8
+
9
+ from .filter_ import async_batch_filter, batch_filter
10
+ from .foreach import async_batch_for_each, batch_for_each
11
+ from .map_ import async_batch_map, batch_map
12
+
13
+
14
+ class Batch:
15
+ """Reusable batch processing context.
16
+
17
+ Usage:
18
+ b = Batch(max_workers=10, retries=3)
19
+ results = b.map(fn, items)
20
+ filtered = b.filter(pred, items)
21
+ b.foreach(fn, items)
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ *,
27
+ max_workers: int = 4,
28
+ retries: int = 0,
29
+ backoff: str = "exponential",
30
+ retry_on: tuple[type[Exception], ...] = (Exception,),
31
+ on_error: str = "skip",
32
+ chunk_size: int | None = None,
33
+ rate_limit: int | None = None,
34
+ ordered: bool = True,
35
+ timeout: float | None = None,
36
+ progress: Callable | None = None,
37
+ ):
38
+ self.max_workers = max_workers
39
+ self.retries = retries
40
+ self.backoff = backoff
41
+ self.retry_on = retry_on
42
+ self.on_error = on_error
43
+ self.chunk_size = chunk_size
44
+ self.rate_limit = rate_limit
45
+ self.ordered = ordered
46
+ self.timeout = timeout
47
+ self.progress = progress
48
+
49
+ def _common_kwargs(self) -> dict:
50
+ return dict(
51
+ max_workers=self.max_workers,
52
+ retries=self.retries,
53
+ backoff=self.backoff,
54
+ retry_on=self.retry_on,
55
+ on_error=self.on_error,
56
+ chunk_size=self.chunk_size,
57
+ rate_limit=self.rate_limit,
58
+ ordered=self.ordered,
59
+ timeout=self.timeout,
60
+ progress=self.progress,
61
+ )
62
+
63
+ def map(self, fn: Callable, items, **overrides):
64
+ kw = {**self._common_kwargs(), **overrides}
65
+ return batch_map(fn, items, **kw)
66
+
67
+ async def amap(self, fn: Callable, items, **overrides):
68
+ kw = {**self._common_kwargs(), **overrides}
69
+ return await async_batch_map(fn, items, **kw)
70
+
71
+ def filter(self, fn: Callable, items, **overrides):
72
+ kw = {**self._common_kwargs(), **overrides}
73
+ return batch_filter(fn, items, **kw)
74
+
75
+ async def afilter(self, fn: Callable, items, **overrides):
76
+ kw = {**self._common_kwargs(), **overrides}
77
+ return await async_batch_filter(fn, items, **kw)
78
+
79
+ def foreach(self, fn: Callable, items, **overrides):
80
+ kw = {**self._common_kwargs(), **overrides}
81
+ return batch_for_each(fn, items, **kw)
82
+
83
+ async def aforeach(self, fn: Callable, items, **overrides):
84
+ kw = {**self._common_kwargs(), **overrides}
85
+ return await async_batch_for_each(fn, items, **kw)
86
+
87
+
88
+ def batch(*, max_workers: int = 4, retries: int = 0, **kwargs) -> Callable:
89
+ """Decorator to turn a single-item function into a batch processor.
90
+
91
+ Usage:
92
+ @batch(max_workers=10, retries=3)
93
+ def process(item):
94
+ ...
95
+
96
+ results = process([1, 2, 3]) # processes all in parallel
97
+ single = process(42) # calls directly for single item
98
+ """
99
+
100
+ def decorator(fn: Callable) -> Callable:
101
+ _ctx = Batch(max_workers=max_workers, retries=retries, **kwargs)
102
+
103
+ @functools.wraps(fn)
104
+ def wrapper(items_or_single):
105
+ # If it's a single item (not iterable of items), call directly
106
+ if isinstance(items_or_single, (str, bytes, bytearray)):
107
+ # Strings are iterable but usually single items
108
+ if len(items_or_single) <= 1:
109
+ return fn(items_or_single)
110
+ # Multi-char string: treat as single item
111
+ return fn(items_or_single)
112
+
113
+ try:
114
+ iter(items_or_single)
115
+ except TypeError:
116
+ # Not iterable — single item
117
+ return fn(items_or_single)
118
+
119
+ # It's iterable — batch process
120
+ return _ctx.map(fn, list(items_or_single))
121
+
122
+ @functools.wraps(fn)
123
+ async def async_wrapper(items_or_single):
124
+ if isinstance(items_or_single, (str, bytes, bytearray)):
125
+ return await fn(items_or_single)
126
+ try:
127
+ iter(items_or_single)
128
+ except TypeError:
129
+ return await fn(items_or_single)
130
+ return await _ctx.amap(fn, list(items_or_single))
131
+
132
+ if inspect.iscoroutinefunction(fn):
133
+ return async_wrapper
134
+ return wrapper
135
+
136
+ return decorator
batchly/errors.py ADDED
@@ -0,0 +1,14 @@
1
+ """Custom errors for batchly."""
2
+
3
+
4
+ class BatchError(Exception):
5
+ """Raised when a batch operation fails (on_error='raise')."""
6
+
7
+ def __init__(self, message: str, item=None, original_error: Exception | None = None):
8
+ super().__init__(message)
9
+ self.item = item
10
+ self.original_error = original_error
11
+
12
+
13
+ class TimeoutError(BatchError):
14
+ """Raised when an item exceeds its timeout."""
batchly/filter_.py ADDED
@@ -0,0 +1,36 @@
1
+ """batch_filter implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import inspect
6
+ from typing import Any, Callable, Iterable
7
+
8
+ from .map_ import batch_map, async_batch_map
9
+
10
+
11
+ def batch_filter(
12
+ fn: Callable[..., bool],
13
+ items: Iterable[Any],
14
+ **kwargs,
15
+ ) -> list:
16
+ """Filter items where fn(item) is truthy, processing in parallel.
17
+
18
+ Returns list of items (not BatchResult) that pass the filter.
19
+ """
20
+ if inspect.iscoroutinefunction(fn):
21
+ raise TypeError(
22
+ "batch_filter called with async function. Use async_batch_filter."
23
+ )
24
+
25
+ results = batch_map(fn, items, **kwargs)
26
+ return [r.item for r in results if r.ok and r.value]
27
+
28
+
29
+ async def async_batch_filter(
30
+ fn: Callable[..., bool],
31
+ items: Iterable[Any],
32
+ **kwargs,
33
+ ) -> list:
34
+ """Async version of batch_filter."""
35
+ results = await async_batch_map(fn, items, **kwargs)
36
+ return [r.item for r in results if r.ok and r.value]
batchly/foreach.py ADDED
@@ -0,0 +1,31 @@
1
+ """batch_for_each implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import inspect
6
+ from typing import Any, Callable, Iterable
7
+
8
+ from .map_ import batch_map, async_batch_map
9
+
10
+
11
+ def batch_for_each(
12
+ fn: Callable[..., Any],
13
+ items: Iterable[Any],
14
+ **kwargs,
15
+ ) -> None:
16
+ """Apply fn to each item (side effects only), processing in parallel."""
17
+ if inspect.iscoroutinefunction(fn):
18
+ raise TypeError(
19
+ "batch_for_each called with async function. Use async_batch_for_each."
20
+ )
21
+
22
+ batch_map(fn, items, **kwargs)
23
+
24
+
25
+ async def async_batch_for_each(
26
+ fn: Callable[..., Any],
27
+ items: Iterable[Any],
28
+ **kwargs,
29
+ ) -> None:
30
+ """Async version of batch_for_each."""
31
+ await async_batch_map(fn, items, **kwargs)
batchly/map_.py ADDED
@@ -0,0 +1,461 @@
1
+ """batch_map implementation — parallel map with all options."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import inspect
7
+ import time
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from typing import Any, Callable, Generator, Iterable, AsyncGenerator
10
+
11
+ from .errors import BatchError, TimeoutError
12
+ from .progress import ProgressInfo
13
+ from .rate_limit import RateLimiter
14
+ from .result import BatchResult
15
+ from .retry import async_retry_call, retry_call
16
+
17
+
18
+ def batch_map(
19
+ fn: Callable[..., Any],
20
+ items: Iterable[Any],
21
+ *,
22
+ max_workers: int = 4,
23
+ retries: int = 0,
24
+ backoff: str = "exponential",
25
+ retry_on: tuple[type[Exception], ...] = (Exception,),
26
+ on_error: str = "skip",
27
+ chunk_size: int | None = None,
28
+ rate_limit: int | None = None,
29
+ ordered: bool = True,
30
+ timeout: float | None = None,
31
+ stream: bool = False,
32
+ progress: Callable[[ProgressInfo], None] | None = None,
33
+ ) -> list[BatchResult] | Generator[BatchResult, None, None]:
34
+ """Map function over items in parallel."""
35
+ items = list(items)
36
+
37
+ if inspect.iscoroutinefunction(fn):
38
+ raise TypeError(
39
+ "batch_map called with async function but not awaited. "
40
+ "Use: results = await async_batch_map(fn, items, ...)"
41
+ )
42
+
43
+ if stream:
44
+ return _stream_sync(
45
+ fn, items,
46
+ max_workers=max_workers, retries=retries, backoff=backoff,
47
+ retry_on=retry_on, on_error=on_error, chunk_size=chunk_size,
48
+ rate_limit=rate_limit, timeout=timeout, progress=progress,
49
+ )
50
+
51
+ return _map_sync(
52
+ fn, items,
53
+ max_workers=max_workers, retries=retries, backoff=backoff,
54
+ retry_on=retry_on, on_error=on_error, chunk_size=chunk_size,
55
+ rate_limit=rate_limit, ordered=ordered, timeout=timeout, progress=progress,
56
+ )
57
+
58
+
59
+ async def async_batch_map(
60
+ fn: Callable[..., Any],
61
+ items: Iterable[Any],
62
+ *,
63
+ max_workers: int = 4,
64
+ retries: int = 0,
65
+ backoff: str = "exponential",
66
+ retry_on: tuple[type[Exception], ...] = (Exception,),
67
+ on_error: str = "skip",
68
+ chunk_size: int | None = None,
69
+ rate_limit: int | None = None,
70
+ ordered: bool = True,
71
+ timeout: float | None = None,
72
+ stream: bool = False,
73
+ progress: Callable[[ProgressInfo], None] | None = None,
74
+ ) -> list[BatchResult] | AsyncGenerator[BatchResult, None]:
75
+ """Async version of batch_map."""
76
+ if stream:
77
+ return _async_stream(
78
+ fn, list(items),
79
+ max_workers=max_workers, retries=retries, backoff=backoff,
80
+ retry_on=retry_on, on_error=on_error, chunk_size=chunk_size,
81
+ rate_limit=rate_limit, timeout=timeout, progress=progress,
82
+ )
83
+
84
+ return await _async_map(
85
+ fn, list(items),
86
+ max_workers=max_workers, retries=retries, backoff=backoff,
87
+ retry_on=retry_on, on_error=on_error, chunk_size=chunk_size,
88
+ rate_limit=rate_limit, ordered=ordered, timeout=timeout, progress=progress,
89
+ )
90
+
91
+
92
+ async def _async_map(
93
+ fn, items, *,
94
+ max_workers=4, retries=0, backoff="exponential",
95
+ retry_on=(Exception,), on_error="skip",
96
+ chunk_size=None, rate_limit=None,
97
+ ordered=True, timeout=None, progress=None,
98
+ ) -> list[BatchResult]:
99
+ """Async batch map that returns a list."""
100
+ limiter = RateLimiter(rate_limit) if rate_limit else None
101
+
102
+ if chunk_size is not None:
103
+ chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
104
+ total_tasks = len(chunks)
105
+ item_mapping = list(enumerate(chunks))
106
+ else:
107
+ total_tasks = len(items)
108
+ item_mapping = list(enumerate(items))
109
+
110
+ start = time.monotonic()
111
+ completed = 0
112
+ results = [None] * total_tasks
113
+ semaphore = asyncio.Semaphore(max_workers)
114
+
115
+ async def _process_task(task_idx: int, item: Any):
116
+ nonlocal completed
117
+ async with semaphore:
118
+ if limiter:
119
+ await limiter.async_acquire()
120
+ t0 = time.monotonic()
121
+
122
+ args = (item,) if chunk_size is None else ()
123
+ kw = {"chunk": item} if chunk_size is not None else {}
124
+
125
+ try:
126
+ if timeout is not None:
127
+ val, err = await asyncio.wait_for(
128
+ async_retry_call(fn, args=args, kwargs=kw,
129
+ retries=retries, backoff=backoff,
130
+ retry_on=retry_on, rate_limiter=None),
131
+ timeout=timeout,
132
+ )
133
+ else:
134
+ val, err = await async_retry_call(
135
+ fn, args=args, kwargs=kw,
136
+ retries=retries, backoff=backoff,
137
+ retry_on=retry_on, rate_limiter=None,
138
+ )
139
+ except asyncio.TimeoutError as e:
140
+ if on_error == "raise":
141
+ raise TimeoutError(f"Timed out: {item}", item=item, original_error=e)
142
+ elif on_error == "collect":
143
+ err = e
144
+ val = None
145
+ else:
146
+ completed += 1
147
+ if progress:
148
+ elapsed = time.monotonic() - start
149
+ eta = (elapsed / completed * (total_tasks - completed)) if completed > 0 else 0
150
+ progress(ProgressInfo(completed=completed, total=total_tasks, elapsed=elapsed, eta=eta))
151
+ return task_idx, None
152
+
153
+ duration = time.monotonic() - t0
154
+
155
+ if err is not None:
156
+ if on_error == "raise":
157
+ raise BatchError(f"Failed: {item}", item=item, original_error=err)
158
+ elif on_error == "collect":
159
+ br = BatchResult(value=None, error=err, item=item, duration=duration)
160
+ else:
161
+ br = None
162
+ else:
163
+ br = BatchResult(value=val, error=None, item=item, duration=duration)
164
+
165
+ completed += 1
166
+ if progress:
167
+ elapsed = time.monotonic() - start
168
+ eta = (elapsed / completed * (total_tasks - completed)) if completed > 0 else 0
169
+ progress(ProgressInfo(completed=completed, total=total_tasks, elapsed=elapsed, eta=eta))
170
+
171
+ return task_idx, br
172
+
173
+ coros = [_process_task(idx, item) for idx, item in item_mapping]
174
+ gathered = await asyncio.gather(*coros, return_exceptions=True)
175
+
176
+ for i, result in enumerate(gathered):
177
+ if isinstance(result, Exception):
178
+ if on_error == "raise":
179
+ raise result
180
+ continue
181
+ task_idx, br = result
182
+ if br is not None and ordered:
183
+ results[task_idx] = br
184
+ elif br is not None:
185
+ results.append(br)
186
+
187
+ if on_error in ("skip", "collect"):
188
+ results = [r for r in results if r is not None]
189
+
190
+ return results
191
+
192
+
193
+ async def _async_stream(
194
+ fn, items, *,
195
+ max_workers=4, retries=0, backoff="exponential",
196
+ retry_on=(Exception,), on_error="skip",
197
+ chunk_size=None, rate_limit=None,
198
+ timeout=None, progress=None,
199
+ ) -> AsyncGenerator[BatchResult, None]:
200
+ """Async streaming batch map — yields results as they complete."""
201
+ limiter = RateLimiter(rate_limit) if rate_limit else None
202
+
203
+ if chunk_size is not None:
204
+ chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
205
+ total_tasks = len(chunks)
206
+ item_mapping = list(enumerate(chunks))
207
+ else:
208
+ total_tasks = len(items)
209
+ item_mapping = list(enumerate(items))
210
+
211
+ start = time.monotonic()
212
+ completed = 0
213
+ semaphore = asyncio.Semaphore(max_workers)
214
+
215
+ async def _process_task(task_idx: int, item: Any):
216
+ nonlocal completed
217
+ async with semaphore:
218
+ if limiter:
219
+ await limiter.async_acquire()
220
+ t0 = time.monotonic()
221
+
222
+ args = (item,) if chunk_size is None else ()
223
+ kw = {"chunk": item} if chunk_size is not None else {}
224
+
225
+ try:
226
+ if timeout is not None:
227
+ val, err = await asyncio.wait_for(
228
+ async_retry_call(fn, args=args, kwargs=kw,
229
+ retries=retries, backoff=backoff,
230
+ retry_on=retry_on, rate_limiter=None),
231
+ timeout=timeout,
232
+ )
233
+ else:
234
+ val, err = await async_retry_call(
235
+ fn, args=args, kwargs=kw,
236
+ retries=retries, backoff=backoff,
237
+ retry_on=retry_on, rate_limiter=None,
238
+ )
239
+ except asyncio.TimeoutError as e:
240
+ if on_error == "raise":
241
+ raise TimeoutError(f"Timed out: {item}", item=item, original_error=e)
242
+ elif on_error == "collect":
243
+ err = e
244
+ val = None
245
+ else:
246
+ completed += 1
247
+ if progress:
248
+ elapsed = time.monotonic() - start
249
+ eta = (elapsed / completed * (total_tasks - completed)) if completed > 0 else 0
250
+ progress(ProgressInfo(completed=completed, total=total_tasks, elapsed=elapsed, eta=eta))
251
+ return task_idx, None
252
+
253
+ duration = time.monotonic() - t0
254
+
255
+ if err is not None:
256
+ if on_error == "raise":
257
+ raise BatchError(f"Failed: {item}", item=item, original_error=err)
258
+ elif on_error == "collect":
259
+ br = BatchResult(value=None, error=err, item=item, duration=duration)
260
+ else:
261
+ br = None
262
+ else:
263
+ br = BatchResult(value=val, error=None, item=item, duration=duration)
264
+
265
+ completed += 1
266
+ if progress:
267
+ elapsed = time.monotonic() - start
268
+ eta = (elapsed / completed * (total_tasks - completed)) if completed > 0 else 0
269
+ progress(ProgressInfo(completed=completed, total=total_tasks, elapsed=elapsed, eta=eta))
270
+
271
+ return task_idx, br
272
+
273
+ coros = [_process_task(idx, item) for idx, item in item_mapping]
274
+ for coro in asyncio.as_completed(coros):
275
+ task_idx, br = await coro
276
+ if br is not None:
277
+ yield br
278
+
279
+
280
+ def _takes_chunk(fn) -> bool:
281
+ import inspect
282
+ sig = inspect.signature(fn)
283
+ return 'chunk' in sig.parameters
284
+
285
+
286
+ def _takes_items(fn) -> bool:
287
+ import inspect
288
+ sig = inspect.signature(fn)
289
+ return 'items' in sig.parameters
290
+
291
+
292
+ def _map_sync(
293
+ fn, items, *,
294
+ max_workers=4, retries=0, backoff="exponential",
295
+ retry_on=(Exception,), on_error="skip",
296
+ chunk_size=None, rate_limit=None,
297
+ ordered=True, timeout=None, progress=None,
298
+ ) -> list[BatchResult]:
299
+ """Synchronous batch map."""
300
+ limiter = RateLimiter(rate_limit) if rate_limit else None
301
+
302
+ if chunk_size is not None:
303
+ chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
304
+ work_items = list(enumerate(chunks))
305
+ else:
306
+ work_items = list(enumerate(items))
307
+
308
+ total = len(work_items)
309
+ results = [None] * total
310
+ completed = 0
311
+ start = time.monotonic()
312
+
313
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
314
+ futures = {}
315
+ for idx, item in work_items:
316
+ future = executor.submit(
317
+ _process_single,
318
+ fn, item, idx,
319
+ chunk_size=chunk_size, retries=retries,
320
+ backoff=backoff, retry_on=retry_on,
321
+ rate_limiter=limiter, timeout=timeout,
322
+ )
323
+ futures[future] = idx
324
+
325
+ for future in as_completed(futures):
326
+ idx = futures[future]
327
+ try:
328
+ br = future.result()
329
+ except BatchError as e:
330
+ if on_error == "raise":
331
+ raise
332
+ if on_error == "collect":
333
+ br = BatchResult(value=None, error=e.original_error or e, item=e.item, duration=0)
334
+ else:
335
+ br = None
336
+ except Exception as e:
337
+ if on_error == "raise":
338
+ raise
339
+ if on_error == "collect":
340
+ br = BatchResult(value=None, error=e, item=work_items[idx][1], duration=0)
341
+ else:
342
+ br = None
343
+
344
+ if br is not None:
345
+ results[idx] = br
346
+
347
+ completed += 1
348
+ if progress:
349
+ elapsed = time.monotonic() - start
350
+ eta = (elapsed / completed * (total - completed)) if completed > 0 else 0
351
+ progress(ProgressInfo(completed=completed, total=total, elapsed=elapsed, eta=eta))
352
+
353
+ if on_error in ("skip", "collect"):
354
+ results = [r for r in results if r is not None]
355
+
356
+ return results
357
+
358
+
359
+ def _stream_sync(
360
+ fn, items, *,
361
+ max_workers=4, retries=0, backoff="exponential",
362
+ retry_on=(Exception,), on_error="skip",
363
+ chunk_size=None, rate_limit=None,
364
+ timeout=None, progress=None,
365
+ ) -> Generator[BatchResult, None, None]:
366
+ """Streaming sync batch map — yields results as they complete."""
367
+ limiter = RateLimiter(rate_limit) if rate_limit else None
368
+
369
+ if chunk_size is not None:
370
+ chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
371
+ work_items = list(enumerate(chunks))
372
+ else:
373
+ work_items = list(enumerate(items))
374
+
375
+ total = len(work_items)
376
+ completed = 0
377
+ start = time.monotonic()
378
+
379
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
380
+ futures = {}
381
+ for idx, item in work_items:
382
+ future = executor.submit(
383
+ _process_single,
384
+ fn, item, idx,
385
+ chunk_size=chunk_size, retries=retries,
386
+ backoff=backoff, retry_on=retry_on,
387
+ rate_limiter=limiter, timeout=timeout,
388
+ )
389
+ futures[future] = idx
390
+
391
+ for future in as_completed(futures):
392
+ idx = futures[future]
393
+ try:
394
+ br = future.result()
395
+ except BatchError as e:
396
+ if on_error == "raise":
397
+ raise
398
+ if on_error == "collect":
399
+ br = BatchResult(value=None, error=e.original_error or e, item=e.item, duration=0)
400
+ else:
401
+ br = None
402
+ except Exception as e:
403
+ if on_error == "raise":
404
+ raise
405
+ if on_error == "collect":
406
+ br = BatchResult(value=None, error=e, item=work_items[idx][1], duration=0)
407
+ else:
408
+ br = None
409
+
410
+ completed += 1
411
+ if progress:
412
+ elapsed = time.monotonic() - start
413
+ eta = (elapsed / completed * (total - completed)) if completed > 0 else 0
414
+ progress(ProgressInfo(completed=completed, total=total, elapsed=elapsed, eta=eta))
415
+
416
+ if br is not None:
417
+ yield br
418
+
419
+
420
+ def _process_single(fn, item, idx, *, chunk_size=None, retries=0,
421
+ backoff="exponential", retry_on=(Exception,),
422
+ rate_limiter=None, timeout=None):
423
+ """Process a single item with retries, rate limiting, and timeout."""
424
+ if chunk_size is not None:
425
+ args = ()
426
+ kwargs = {"chunk": item}
427
+ else:
428
+ args = (item,)
429
+ kwargs = {}
430
+
431
+ if timeout is not None:
432
+ from concurrent.futures import ThreadPoolExecutor as TPE, TimeoutError as FuturesTimeout
433
+ tpe = TPE(max_workers=1)
434
+ future = tpe.submit(
435
+ retry_call, fn, args=args, kwargs=kwargs,
436
+ retries=retries, backoff=backoff, retry_on=retry_on,
437
+ rate_limiter=rate_limiter,
438
+ )
439
+ try:
440
+ val, err = future.result(timeout=timeout)
441
+ except FuturesTimeout:
442
+ future.cancel()
443
+ tpe.shutdown(wait=False, cancel_futures=True)
444
+ raise TimeoutError(
445
+ f"Item {item} timed out after {timeout}s",
446
+ item=item,
447
+ original_error=FuturesTimeout(f"Timed out after {timeout}s"),
448
+ )
449
+ finally:
450
+ tpe.shutdown(wait=False)
451
+ else:
452
+ val, err = retry_call(
453
+ fn, args=args, kwargs=kwargs,
454
+ retries=retries, backoff=backoff, retry_on=retry_on,
455
+ rate_limiter=rate_limiter,
456
+ )
457
+
458
+ if err is not None:
459
+ raise BatchError(f"Failed processing item: {item}", item=item, original_error=err)
460
+
461
+ return BatchResult(value=val, error=None, item=item, duration=0)
batchly/progress.py ADDED
@@ -0,0 +1,73 @@
1
+ """Progress tracking for batch operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ import threading
7
+ import time
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Callable
10
+
11
+
12
+ @dataclass
13
+ class ProgressInfo:
14
+ """Progress information passed to callbacks."""
15
+
16
+ completed: int = 0
17
+ total: int = 0
18
+ elapsed: float = 0.0
19
+ eta: float = 0.0
20
+
21
+ @property
22
+ def pct(self) -> float:
23
+ if self.total == 0:
24
+ return 0.0
25
+ return self.completed / self.total * 100.0
26
+
27
+ @property
28
+ def rate(self) -> float:
29
+ if self.elapsed == 0:
30
+ return 0.0
31
+ return self.completed / self.elapsed
32
+
33
+
34
+ class ProgressBar:
35
+ """Terminal progress bar.
36
+
37
+ Usage:
38
+ results = batch_map(fn, items, progress=ProgressBar())
39
+ """
40
+
41
+ def __init__(self, width: int = 40, file=None):
42
+ self.width = width
43
+ self._file = file or sys.stderr
44
+ self._lock = threading.Lock()
45
+ self._start = time.monotonic()
46
+
47
+ def __call__(self, info: ProgressInfo) -> None:
48
+ pct = info.pct
49
+ filled = int(self.width * pct / 100)
50
+ bar = "█" * filled + "░" * (self.width - filled)
51
+ elapsed = info.elapsed
52
+ eta = info.eta
53
+ if eta >= 60:
54
+ eta_str = f"{eta / 60:.1f}m"
55
+ elif eta > 0:
56
+ eta_str = f"{eta:.1f}s"
57
+ else:
58
+ eta_str = "—"
59
+ if elapsed >= 60:
60
+ elapsed_str = f"{elapsed / 60:.1f}m"
61
+ else:
62
+ elapsed_str = f"{elapsed:.1f}s"
63
+
64
+ line = f"\r[{bar}] {pct:5.1f}% ({info.completed}/{info.total}) {elapsed_str} elapsed, {eta_str} remaining"
65
+ with self._lock:
66
+ self._file.write(line)
67
+ self._file.flush()
68
+ if info.completed >= info.total and info.total > 0:
69
+ self._file.write("\n")
70
+ self._file.flush()
71
+
72
+ def reset(self) -> None:
73
+ self._start = time.monotonic()
batchly/rate_limit.py ADDED
@@ -0,0 +1,60 @@
1
+ """Token bucket rate limiter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ import time
7
+
8
+
9
+ class RateLimiter:
10
+ """Token bucket rate limiter.
11
+
12
+ Args:
13
+ max_per_second: Maximum number of tokens (calls) per second.
14
+ """
15
+
16
+ def __init__(self, max_per_second: int):
17
+ self.max_per_second = max_per_second
18
+ self._tokens = float(max_per_second)
19
+ self._last_refill = time.monotonic()
20
+ self._lock = threading.Lock()
21
+ self._min_interval = 1.0 / max_per_second
22
+
23
+ def acquire(self) -> None:
24
+ """Block until a token is available."""
25
+ while True:
26
+ with self._lock:
27
+ now = time.monotonic()
28
+ elapsed = now - self._last_refill
29
+ self._tokens += elapsed * self.max_per_second
30
+ if self._tokens > self.max_per_second:
31
+ self._tokens = float(self.max_per_second)
32
+ self._last_refill = now
33
+
34
+ if self._tokens >= 1.0:
35
+ self._tokens -= 1.0
36
+ return
37
+ # Calculate wait time
38
+ wait = (1.0 - self._tokens) / self.max_per_second
39
+
40
+ time.sleep(wait)
41
+
42
+ async def async_acquire(self) -> None:
43
+ """Async version of acquire."""
44
+ import asyncio
45
+
46
+ while True:
47
+ with self._lock:
48
+ now = time.monotonic()
49
+ elapsed = now - self._last_refill
50
+ self._tokens += elapsed * self.max_per_second
51
+ if self._tokens > self.max_per_second:
52
+ self._tokens = float(self.max_per_second)
53
+ self._last_refill = now
54
+
55
+ if self._tokens >= 1.0:
56
+ self._tokens -= 1.0
57
+ return
58
+ wait = (1.0 - self._tokens) / self.max_per_second
59
+
60
+ await asyncio.sleep(wait)
batchly/result.py ADDED
@@ -0,0 +1,28 @@
1
+ """Result wrapper for batch operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Generic, TypeVar
8
+
9
+ T = TypeVar("T")
10
+
11
+
12
+ @dataclass
13
+ class BatchResult(Generic[T]):
14
+ """Wraps the result of processing a single item."""
15
+
16
+ value: T | None = None
17
+ error: Exception | None = None
18
+ item: Any = None
19
+ duration: float = 0.0
20
+
21
+ @property
22
+ def ok(self) -> bool:
23
+ return self.error is None
24
+
25
+ def unwrap(self) -> T:
26
+ if self.error is not None:
27
+ raise self.error
28
+ return self.value
batchly/retry.py ADDED
@@ -0,0 +1,80 @@
1
+ """Retry logic with backoff strategies."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ import time
7
+ from typing import Any, Callable
8
+
9
+
10
+ def _compute_backoff(attempt: int, strategy: str, base: float = 1.0) -> float:
11
+ """Compute backoff delay in seconds."""
12
+ if strategy == "fixed":
13
+ return base
14
+ elif strategy == "exponential":
15
+ return base * (2 ** (attempt - 1)) + random.uniform(0, 0.1)
16
+ elif strategy == "adaptive":
17
+ return min(base * (2 ** (attempt - 1)), 60.0) + random.uniform(0, 0.1)
18
+ else:
19
+ return base
20
+
21
+
22
+ def retry_call(
23
+ fn: Callable[..., Any],
24
+ args: tuple = (),
25
+ kwargs: dict | None = None,
26
+ retries: int = 0,
27
+ backoff: str = "exponential",
28
+ retry_on: tuple[type[Exception], ...] = (Exception,),
29
+ rate_limiter=None,
30
+ ) -> Any:
31
+ """Call fn with retry logic.
32
+
33
+ Returns (result, error) tuple. error is None on success.
34
+ """
35
+ last_error = None
36
+ for attempt in range(retries + 1):
37
+ if rate_limiter is not None:
38
+ rate_limiter.acquire()
39
+ try:
40
+ return fn(*args, **(kwargs or {})), None
41
+ except retry_on as e:
42
+ last_error = e
43
+ if attempt < retries:
44
+ delay = _compute_backoff(attempt + 1, backoff)
45
+ time.sleep(delay)
46
+ else:
47
+ return None, e
48
+ except Exception as e:
49
+ return None, e
50
+ return None, last_error
51
+
52
+
53
+ async def async_retry_call(
54
+ fn: Callable[..., Any],
55
+ args: tuple = (),
56
+ kwargs: dict | None = None,
57
+ retries: int = 0,
58
+ backoff: str = "exponential",
59
+ retry_on: tuple[type[Exception], ...] = (Exception,),
60
+ rate_limiter=None,
61
+ ) -> tuple[Any, Exception | None]:
62
+ """Async version of retry_call."""
63
+ import asyncio
64
+
65
+ last_error = None
66
+ for attempt in range(retries + 1):
67
+ if rate_limiter is not None:
68
+ await rate_limiter.async_acquire()
69
+ try:
70
+ return await fn(*args, **(kwargs or {})), None
71
+ except retry_on as e:
72
+ last_error = e
73
+ if attempt < retries:
74
+ delay = _compute_backoff(attempt + 1, backoff)
75
+ await asyncio.sleep(delay)
76
+ else:
77
+ return None, e
78
+ except Exception as e:
79
+ return None, e
80
+ return None, last_error
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: batchly
3
+ Version: 0.1.0
4
+ Summary: Batch processing made simple — concurrency, retries, progress, and error handling
5
+ Author: Teja
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=7.0; extra == "dev"
10
+ Requires-Dist: hypothesis; extra == "dev"
11
+ Requires-Dist: pytest-cov; extra == "dev"
12
+ Requires-Dist: pytest-asyncio; extra == "dev"
@@ -0,0 +1,14 @@
1
+ batchly/__init__.py,sha256=UqmQg7UOqEzQNZicgcP_elJiFMLqfTPDGJKp6UgCsHE,713
2
+ batchly/batch.py,sha256=FqCj_L_k-5n-1GwqPXNoUaQyIJWLRXdAGFIZg8FkX-Y,4486
3
+ batchly/errors.py,sha256=qFZTk0Of9ZE1jlk-SZ_OY0Ui3XoN57AOL7mM2V9VLq0,410
4
+ batchly/filter_.py,sha256=aGkMh6uu-wQ8lRg_H0kGrLP-KbWax7iIK5Lg5-0H42I,945
5
+ batchly/foreach.py,sha256=N6uCj8QQ-YcJzmBH58dMdeRiJ8io8tPyBZNJTiU2HRY,753
6
+ batchly/map_.py,sha256=EFBi16k04vhCNlov3Zc1Ly2dZwn0rhRnSxi3iZfjf_Y,16692
7
+ batchly/progress.py,sha256=PrmKOVthVhX_UArNsJYYSgfxHPkz4nh7_FicWbwAfgY,1935
8
+ batchly/rate_limit.py,sha256=tIPMC32hKHNk8U8coYMcXmAAqBvL6c2wMQAgniPaZrM,1868
9
+ batchly/result.py,sha256=KGUC0p7MmcIthCFlmEs38lf6wv2RTsx6rn5GSua_a3o,595
10
+ batchly/retry.py,sha256=OcszoLZXiGY_ZeQEJK54nfMWTt7u8IXYKOpzd1prRWE,2322
11
+ batchly-0.1.0.dist-info/METADATA,sha256=dK1-5S00Uvq2J_yNMpiWWyMZfm8JZqtdHruLuD_5toU,387
12
+ batchly-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
13
+ batchly-0.1.0.dist-info/top_level.txt,sha256=lcpXnKXKuPFalW1pqVpYhPphM9xgVa-OHyUilnvFDug,8
14
+ batchly-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ batchly