batchly 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- batchly-0.1.0/PKG-INFO +12 -0
- batchly-0.1.0/README.md +48 -0
- batchly-0.1.0/pyproject.toml +21 -0
- batchly-0.1.0/setup.cfg +4 -0
- batchly-0.1.0/src/batchly/__init__.py +28 -0
- batchly-0.1.0/src/batchly/batch.py +136 -0
- batchly-0.1.0/src/batchly/errors.py +14 -0
- batchly-0.1.0/src/batchly/filter_.py +36 -0
- batchly-0.1.0/src/batchly/foreach.py +31 -0
- batchly-0.1.0/src/batchly/map_.py +461 -0
- batchly-0.1.0/src/batchly/progress.py +73 -0
- batchly-0.1.0/src/batchly/rate_limit.py +60 -0
- batchly-0.1.0/src/batchly/result.py +28 -0
- batchly-0.1.0/src/batchly/retry.py +80 -0
- batchly-0.1.0/src/batchly.egg-info/PKG-INFO +12 -0
- batchly-0.1.0/src/batchly.egg-info/SOURCES.txt +27 -0
- batchly-0.1.0/src/batchly.egg-info/dependency_links.txt +1 -0
- batchly-0.1.0/src/batchly.egg-info/requires.txt +6 -0
- batchly-0.1.0/src/batchly.egg-info/top_level.txt +1 -0
- batchly-0.1.0/tests/test_adversarial.py +404 -0
- batchly-0.1.0/tests/test_batch.py +111 -0
- batchly-0.1.0/tests/test_filter.py +76 -0
- batchly-0.1.0/tests/test_foreach.py +73 -0
- batchly-0.1.0/tests/test_integration.py +365 -0
- batchly-0.1.0/tests/test_map.py +261 -0
- batchly-0.1.0/tests/test_progress.py +103 -0
- batchly-0.1.0/tests/test_rate_limit.py +117 -0
- batchly-0.1.0/tests/test_retry.py +144 -0
- batchly-0.1.0/tests/test_round2.py +389 -0
batchly-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: batchly
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Batch processing made simple — concurrency, retries, progress, and error handling
|
|
5
|
+
Author: Teja
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
Requires-Dist: hypothesis; extra == "dev"
|
|
11
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
12
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
batchly-0.1.0/README.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# batchly
|
|
2
|
+
|
|
3
|
+
Batch processing made simple — concurrency, retries, progress, and error handling.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install -e .
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from batchly import batch, batch_map, batch_filter, batch_for_each
|
|
15
|
+
|
|
16
|
+
# Decorator
|
|
17
|
+
@batch(max_workers=10, retries=3)
|
|
18
|
+
def process(item):
|
|
19
|
+
return item * 2
|
|
20
|
+
|
|
21
|
+
results = process([1, 2, 3, 4, 5])
|
|
22
|
+
|
|
23
|
+
# Functional
|
|
24
|
+
results = batch_map(transform, items, max_workers=20, retries=3)
|
|
25
|
+
keep = batch_filter(predicate, items, max_workers=10)
|
|
26
|
+
batch_for_each(side_effect, items, max_workers=5)
|
|
27
|
+
|
|
28
|
+
# Reusable context
|
|
29
|
+
b = Batch(max_workers=10, retries=3, progress=ProgressBar())
|
|
30
|
+
results = b.map(fn, items)
|
|
31
|
+
filtered = b.filter(pred, results)
|
|
32
|
+
b.foreach(save, filtered)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Features
|
|
36
|
+
|
|
37
|
+
- **Concurrency** — ThreadPoolExecutor for sync, asyncio for async
|
|
38
|
+
- **Retries** — Exponential/fixed/adaptive backoff
|
|
39
|
+
- **Error handling** — skip, raise, or collect errors
|
|
40
|
+
- **Rate limiting** — Token bucket algorithm
|
|
41
|
+
- **Progress** — Built-in ProgressBar or custom callbacks
|
|
42
|
+
- **Timeouts** — Per-item timeout support
|
|
43
|
+
- **Chunked** — Group items for bulk processing
|
|
44
|
+
- **Ordered** — Results match input order
|
|
45
|
+
- **Streaming** — Generator mode for results as they complete
|
|
46
|
+
- **Zero dependencies** — stdlib only
|
|
47
|
+
|
|
48
|
+
MIT License.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "batchly"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Batch processing made simple — concurrency, retries, progress, and error handling"
|
|
5
|
+
license = {text = "MIT"}
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
authors = [{name = "Teja"}]
|
|
8
|
+
dependencies = []
|
|
9
|
+
|
|
10
|
+
[project.optional-dependencies]
|
|
11
|
+
dev = ["pytest>=7.0", "hypothesis", "pytest-cov", "pytest-asyncio"]
|
|
12
|
+
|
|
13
|
+
[build-system]
|
|
14
|
+
requires = ["setuptools>=64"]
|
|
15
|
+
build-backend = "setuptools.build_meta"
|
|
16
|
+
|
|
17
|
+
[tool.setuptools.packages.find]
|
|
18
|
+
where = ["src"]
|
|
19
|
+
|
|
20
|
+
[tool.pytest.ini_options]
|
|
21
|
+
asyncio_mode = "auto"
|
batchly-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""batchly — Batch processing made simple."""
|
|
2
|
+
|
|
3
|
+
from .batch import Batch, batch
|
|
4
|
+
from .errors import BatchError, TimeoutError
|
|
5
|
+
from .filter_ import async_batch_filter, batch_filter
|
|
6
|
+
from .foreach import async_batch_for_each, batch_for_each
|
|
7
|
+
from .map_ import async_batch_map, batch_map
|
|
8
|
+
from .progress import ProgressBar, ProgressInfo
|
|
9
|
+
from .rate_limit import RateLimiter
|
|
10
|
+
from .result import BatchResult
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
13
|
+
__all__ = [
|
|
14
|
+
"batch",
|
|
15
|
+
"Batch",
|
|
16
|
+
"batch_map",
|
|
17
|
+
"async_batch_map",
|
|
18
|
+
"batch_filter",
|
|
19
|
+
"async_batch_filter",
|
|
20
|
+
"batch_for_each",
|
|
21
|
+
"async_batch_for_each",
|
|
22
|
+
"BatchResult",
|
|
23
|
+
"BatchError",
|
|
24
|
+
"TimeoutError",
|
|
25
|
+
"ProgressBar",
|
|
26
|
+
"ProgressInfo",
|
|
27
|
+
"RateLimiter",
|
|
28
|
+
]
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""@batch decorator and Batch context class."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import inspect
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
from .filter_ import async_batch_filter, batch_filter
|
|
10
|
+
from .foreach import async_batch_for_each, batch_for_each
|
|
11
|
+
from .map_ import async_batch_map, batch_map
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Batch:
|
|
15
|
+
"""Reusable batch processing context.
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
b = Batch(max_workers=10, retries=3)
|
|
19
|
+
results = b.map(fn, items)
|
|
20
|
+
filtered = b.filter(pred, items)
|
|
21
|
+
b.foreach(fn, items)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
*,
|
|
27
|
+
max_workers: int = 4,
|
|
28
|
+
retries: int = 0,
|
|
29
|
+
backoff: str = "exponential",
|
|
30
|
+
retry_on: tuple[type[Exception], ...] = (Exception,),
|
|
31
|
+
on_error: str = "skip",
|
|
32
|
+
chunk_size: int | None = None,
|
|
33
|
+
rate_limit: int | None = None,
|
|
34
|
+
ordered: bool = True,
|
|
35
|
+
timeout: float | None = None,
|
|
36
|
+
progress: Callable | None = None,
|
|
37
|
+
):
|
|
38
|
+
self.max_workers = max_workers
|
|
39
|
+
self.retries = retries
|
|
40
|
+
self.backoff = backoff
|
|
41
|
+
self.retry_on = retry_on
|
|
42
|
+
self.on_error = on_error
|
|
43
|
+
self.chunk_size = chunk_size
|
|
44
|
+
self.rate_limit = rate_limit
|
|
45
|
+
self.ordered = ordered
|
|
46
|
+
self.timeout = timeout
|
|
47
|
+
self.progress = progress
|
|
48
|
+
|
|
49
|
+
def _common_kwargs(self) -> dict:
|
|
50
|
+
return dict(
|
|
51
|
+
max_workers=self.max_workers,
|
|
52
|
+
retries=self.retries,
|
|
53
|
+
backoff=self.backoff,
|
|
54
|
+
retry_on=self.retry_on,
|
|
55
|
+
on_error=self.on_error,
|
|
56
|
+
chunk_size=self.chunk_size,
|
|
57
|
+
rate_limit=self.rate_limit,
|
|
58
|
+
ordered=self.ordered,
|
|
59
|
+
timeout=self.timeout,
|
|
60
|
+
progress=self.progress,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def map(self, fn: Callable, items, **overrides):
|
|
64
|
+
kw = {**self._common_kwargs(), **overrides}
|
|
65
|
+
return batch_map(fn, items, **kw)
|
|
66
|
+
|
|
67
|
+
async def amap(self, fn: Callable, items, **overrides):
|
|
68
|
+
kw = {**self._common_kwargs(), **overrides}
|
|
69
|
+
return await async_batch_map(fn, items, **kw)
|
|
70
|
+
|
|
71
|
+
def filter(self, fn: Callable, items, **overrides):
|
|
72
|
+
kw = {**self._common_kwargs(), **overrides}
|
|
73
|
+
return batch_filter(fn, items, **kw)
|
|
74
|
+
|
|
75
|
+
async def afilter(self, fn: Callable, items, **overrides):
|
|
76
|
+
kw = {**self._common_kwargs(), **overrides}
|
|
77
|
+
return await async_batch_filter(fn, items, **kw)
|
|
78
|
+
|
|
79
|
+
def foreach(self, fn: Callable, items, **overrides):
|
|
80
|
+
kw = {**self._common_kwargs(), **overrides}
|
|
81
|
+
return batch_for_each(fn, items, **kw)
|
|
82
|
+
|
|
83
|
+
async def aforeach(self, fn: Callable, items, **overrides):
|
|
84
|
+
kw = {**self._common_kwargs(), **overrides}
|
|
85
|
+
return await async_batch_for_each(fn, items, **kw)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def batch(*, max_workers: int = 4, retries: int = 0, **kwargs) -> Callable:
|
|
89
|
+
"""Decorator to turn a single-item function into a batch processor.
|
|
90
|
+
|
|
91
|
+
Usage:
|
|
92
|
+
@batch(max_workers=10, retries=3)
|
|
93
|
+
def process(item):
|
|
94
|
+
...
|
|
95
|
+
|
|
96
|
+
results = process([1, 2, 3]) # processes all in parallel
|
|
97
|
+
single = process(42) # calls directly for single item
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def decorator(fn: Callable) -> Callable:
|
|
101
|
+
_ctx = Batch(max_workers=max_workers, retries=retries, **kwargs)
|
|
102
|
+
|
|
103
|
+
@functools.wraps(fn)
|
|
104
|
+
def wrapper(items_or_single):
|
|
105
|
+
# If it's a single item (not iterable of items), call directly
|
|
106
|
+
if isinstance(items_or_single, (str, bytes, bytearray)):
|
|
107
|
+
# Strings are iterable but usually single items
|
|
108
|
+
if len(items_or_single) <= 1:
|
|
109
|
+
return fn(items_or_single)
|
|
110
|
+
# Multi-char string: treat as single item
|
|
111
|
+
return fn(items_or_single)
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
iter(items_or_single)
|
|
115
|
+
except TypeError:
|
|
116
|
+
# Not iterable — single item
|
|
117
|
+
return fn(items_or_single)
|
|
118
|
+
|
|
119
|
+
# It's iterable — batch process
|
|
120
|
+
return _ctx.map(fn, list(items_or_single))
|
|
121
|
+
|
|
122
|
+
@functools.wraps(fn)
|
|
123
|
+
async def async_wrapper(items_or_single):
|
|
124
|
+
if isinstance(items_or_single, (str, bytes, bytearray)):
|
|
125
|
+
return await fn(items_or_single)
|
|
126
|
+
try:
|
|
127
|
+
iter(items_or_single)
|
|
128
|
+
except TypeError:
|
|
129
|
+
return await fn(items_or_single)
|
|
130
|
+
return await _ctx.amap(fn, list(items_or_single))
|
|
131
|
+
|
|
132
|
+
if inspect.iscoroutinefunction(fn):
|
|
133
|
+
return async_wrapper
|
|
134
|
+
return wrapper
|
|
135
|
+
|
|
136
|
+
return decorator
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Custom errors for batchly."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BatchError(Exception):
|
|
5
|
+
"""Raised when a batch operation fails (on_error='raise')."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, message: str, item=None, original_error: Exception | None = None):
|
|
8
|
+
super().__init__(message)
|
|
9
|
+
self.item = item
|
|
10
|
+
self.original_error = original_error
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TimeoutError(BatchError):
|
|
14
|
+
"""Raised when an item exceeds its timeout."""
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""batch_filter implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from typing import Any, Callable, Iterable
|
|
7
|
+
|
|
8
|
+
from .map_ import batch_map, async_batch_map
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def batch_filter(
|
|
12
|
+
fn: Callable[..., bool],
|
|
13
|
+
items: Iterable[Any],
|
|
14
|
+
**kwargs,
|
|
15
|
+
) -> list:
|
|
16
|
+
"""Filter items where fn(item) is truthy, processing in parallel.
|
|
17
|
+
|
|
18
|
+
Returns list of items (not BatchResult) that pass the filter.
|
|
19
|
+
"""
|
|
20
|
+
if inspect.iscoroutinefunction(fn):
|
|
21
|
+
raise TypeError(
|
|
22
|
+
"batch_filter called with async function. Use async_batch_filter."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
results = batch_map(fn, items, **kwargs)
|
|
26
|
+
return [r.item for r in results if r.ok and r.value]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def async_batch_filter(
|
|
30
|
+
fn: Callable[..., bool],
|
|
31
|
+
items: Iterable[Any],
|
|
32
|
+
**kwargs,
|
|
33
|
+
) -> list:
|
|
34
|
+
"""Async version of batch_filter."""
|
|
35
|
+
results = await async_batch_map(fn, items, **kwargs)
|
|
36
|
+
return [r.item for r in results if r.ok and r.value]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""batch_for_each implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from typing import Any, Callable, Iterable
|
|
7
|
+
|
|
8
|
+
from .map_ import batch_map, async_batch_map
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def batch_for_each(
|
|
12
|
+
fn: Callable[..., Any],
|
|
13
|
+
items: Iterable[Any],
|
|
14
|
+
**kwargs,
|
|
15
|
+
) -> None:
|
|
16
|
+
"""Apply fn to each item (side effects only), processing in parallel."""
|
|
17
|
+
if inspect.iscoroutinefunction(fn):
|
|
18
|
+
raise TypeError(
|
|
19
|
+
"batch_for_each called with async function. Use async_batch_for_each."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
batch_map(fn, items, **kwargs)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def async_batch_for_each(
|
|
26
|
+
fn: Callable[..., Any],
|
|
27
|
+
items: Iterable[Any],
|
|
28
|
+
**kwargs,
|
|
29
|
+
) -> None:
|
|
30
|
+
"""Async version of batch_for_each."""
|
|
31
|
+
await async_batch_map(fn, items, **kwargs)
|