comio 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {comio-0.2.0 → comio-0.2.1}/PKG-INFO +1 -1
- {comio-0.2.0 → comio-0.2.1}/comio/adapters/jsonl.py +9 -10
- {comio-0.2.0 → comio-0.2.1}/comio/io.py +24 -30
- {comio-0.2.0 → comio-0.2.1}/comio/pipe.py +3 -4
- {comio-0.2.0 → comio-0.2.1}/comio/sync/io.py +9 -5
- {comio-0.2.0 → comio-0.2.1}/pyproject.toml +1 -1
- {comio-0.2.0 → comio-0.2.1}/tests/test_jsonl.py +81 -0
- {comio-0.2.0 → comio-0.2.1}/.github/workflows/publish.yml +0 -0
- {comio-0.2.0 → comio-0.2.1}/.gitignore +0 -0
- {comio-0.2.0 → comio-0.2.1}/README.md +0 -0
- {comio-0.2.0 → comio-0.2.1}/comio/__init__.py +0 -0
- {comio-0.2.0 → comio-0.2.1}/comio/adapters/__init__.py +0 -0
- {comio-0.2.0 → comio-0.2.1}/comio/py.typed +0 -0
- {comio-0.2.0 → comio-0.2.1}/comio/sync/__init__.py +0 -0
|
@@ -11,28 +11,27 @@ class Page:
|
|
|
11
11
|
|
|
12
12
|
class JsonL:
|
|
13
13
|
|
|
14
|
-
def __init__(self, f: io.
|
|
14
|
+
def __init__(self, f: io.TextIOBase):
|
|
15
15
|
self.f = f
|
|
16
16
|
|
|
17
|
-
def read(self, cursor: Cursor | None = None, n: int | None =None) -> Page:
|
|
17
|
+
def read(self, cursor: Cursor | None = None, n: int | None = None) -> Page:
|
|
18
18
|
self.f.seek(cursor or 0)
|
|
19
|
-
|
|
20
|
-
if
|
|
19
|
+
line = self.f.readline()
|
|
20
|
+
if line == "":
|
|
21
21
|
return Page([], EOF)
|
|
22
|
-
return Page(items=[json.loads(
|
|
23
|
-
|
|
22
|
+
return Page(items=[json.loads(line)], next_cursor=self.f.tell())
|
|
24
23
|
|
|
25
24
|
def write(self, item: dict) -> None:
|
|
26
25
|
self.f.write(json.dumps(item) + "\n")
|
|
27
26
|
self.f.flush()
|
|
28
27
|
|
|
29
|
-
|
|
28
|
+
|
|
30
29
|
class AsyncJsonL:
|
|
31
30
|
|
|
32
|
-
def __init__(self, f: io.
|
|
31
|
+
def __init__(self, f: io.TextIOBase):
|
|
33
32
|
self.syncio = JsonL(f)
|
|
34
|
-
|
|
35
|
-
async def read(self, cursor: Cursor | None = None, n: int | None =None) -> Page:
|
|
33
|
+
|
|
34
|
+
async def read(self, cursor: Cursor | None = None, n: int | None = None) -> Page:
|
|
36
35
|
return self.syncio.read(cursor=cursor, n=n)
|
|
37
36
|
|
|
38
37
|
async def write(self, item: dict) -> None:
|
|
@@ -1,17 +1,18 @@
|
|
|
1
|
-
"""Composable I/O primitives.
|
|
1
|
+
"""Composable async I/O primitives.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Protocols:
|
|
4
4
|
|
|
5
|
-
Reader:
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
Reader: pull-based, paginated source (read → Page)
|
|
6
|
+
Listener: push-based, streaming source (listen → AsyncIterator)
|
|
7
|
+
Writer: single-item sink (write)
|
|
8
|
+
Batcher: multi-item sink (batch)
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Functions:
|
|
11
11
|
|
|
12
|
-
scroll:
|
|
13
|
-
read_all:
|
|
14
|
-
copy:
|
|
12
|
+
scroll: iterate pages from a Reader
|
|
13
|
+
read_all: drain a Reader into a list
|
|
14
|
+
copy: buffer a Listener into a Batcher
|
|
15
|
+
as_listener: convert a Reader into a Listener
|
|
15
16
|
"""
|
|
16
17
|
|
|
17
18
|
from __future__ import annotations
|
|
@@ -86,7 +87,7 @@ class Listener(t.Protocol, t.Generic[T_co]):
|
|
|
86
87
|
a Listener yields items as they become available.
|
|
87
88
|
"""
|
|
88
89
|
|
|
89
|
-
|
|
90
|
+
def listen(self) -> t.AsyncIterator[T_co]:
|
|
90
91
|
"""Start listening and yield items as they arrive.
|
|
91
92
|
|
|
92
93
|
The iterator completes when the source is exhausted or closed.
|
|
@@ -117,8 +118,6 @@ class Batcher(t.Protocol, t.Generic[T_contra]):
|
|
|
117
118
|
...
|
|
118
119
|
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
122
121
|
async def scroll(r: Reader[T_co], *, cursor: Cursor = None, n: int | None = None) -> t.AsyncIterator[Page[T_co]]:
|
|
123
122
|
"""Iterate pages from a Reader until the source is exhausted.
|
|
124
123
|
|
|
@@ -167,7 +166,7 @@ async def copy(src: Listener[T_co], dst: Batcher[T_co], n: int) -> None:
|
|
|
167
166
|
n: Buffer size. Flushes every ``n`` items, plus any remainder at the end.
|
|
168
167
|
"""
|
|
169
168
|
buf: t.List[T_co] = []
|
|
170
|
-
async for item in
|
|
169
|
+
async for item in src.listen():
|
|
171
170
|
buf.append(item)
|
|
172
171
|
if len(buf) >= n:
|
|
173
172
|
await dst.batch(buf)
|
|
@@ -176,22 +175,17 @@ async def copy(src: Listener[T_co], dst: Batcher[T_co], n: int) -> None:
|
|
|
176
175
|
await dst.batch(buf)
|
|
177
176
|
|
|
178
177
|
|
|
179
|
-
|
|
180
|
-
"""Convert a Reader into
|
|
181
|
-
|
|
182
|
-
Normalizes a pull-based Reader into the same shape as Listener.listen(),
|
|
183
|
-
so downstream code can treat both sources uniformly.
|
|
184
|
-
|
|
185
|
-
Args:
|
|
186
|
-
r: The reader to convert.
|
|
187
|
-
cursor: Starting position.
|
|
188
|
-
n: Page size passed to the reader.
|
|
178
|
+
def as_listener(r: Reader[T_co], *, cursor: Cursor = None, n: int | None = None) -> Listener[T_co]:
|
|
179
|
+
"""Convert a Reader into a Listener.
|
|
189
180
|
|
|
190
|
-
|
|
191
|
-
|
|
181
|
+
Returns a Listener whose ``listen()`` yields individual items
|
|
182
|
+
from the Reader's pages, so downstream code (e.g. ``copy``, ``pipe``)
|
|
183
|
+
can treat both sources uniformly.
|
|
192
184
|
"""
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
185
|
+
class _Listener:
|
|
186
|
+
async def listen(self) -> t.AsyncIterator[T_co]:
|
|
187
|
+
async for page in scroll(r, cursor=cursor, n=n):
|
|
188
|
+
for item in page.items:
|
|
189
|
+
yield item
|
|
197
190
|
|
|
191
|
+
return _Listener()
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
"""Streaming pipe: Listener
|
|
1
|
+
"""Streaming pipe: Listener -> Handler -> Writer.
|
|
2
2
|
|
|
3
3
|
Connects a push-based source to a sink through a transformation handler,
|
|
4
4
|
using in-memory streams with configurable backpressure.
|
|
5
5
|
|
|
6
|
-
src
|
|
6
|
+
src --> [in_stream] --> handler --> [out_stream] --> dest
|
|
7
7
|
|
|
8
8
|
Three concurrent tasks (ingress / process / egress) are managed
|
|
9
9
|
by an anyio task group. When the source is exhausted, streams close
|
|
@@ -19,7 +19,6 @@ import anyio
|
|
|
19
19
|
from .io import Listener, Writer
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
|
|
23
22
|
In = t.TypeVar("In", covariant=True)
|
|
24
23
|
Out = t.TypeVar("Out", contravariant=True)
|
|
25
24
|
H_In = t.TypeVar("H_In", contravariant=True)
|
|
@@ -84,7 +83,7 @@ async def _pipe(
|
|
|
84
83
|
async def ingress() -> None:
|
|
85
84
|
"""src → in_stream. Closes in_send when the source is exhausted."""
|
|
86
85
|
async with in_send:
|
|
87
|
-
async for item in
|
|
86
|
+
async for item in src.listen():
|
|
88
87
|
await in_send.send(item)
|
|
89
88
|
|
|
90
89
|
async def process() -> None:
|
|
@@ -75,8 +75,12 @@ def copy(src: Listener[T_co], dst: Batcher[T_co], n: int) -> None:
|
|
|
75
75
|
dst.batch(buf)
|
|
76
76
|
|
|
77
77
|
|
|
78
|
-
def as_listener(r: Reader[T_co], *, cursor: Cursor = None, n: int | None = None) ->
|
|
79
|
-
"""Convert a Reader into
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
78
|
+
def as_listener(r: Reader[T_co], *, cursor: Cursor = None, n: int | None = None) -> Listener[T_co]:
|
|
79
|
+
"""Convert a sync Reader into a Listener."""
|
|
80
|
+
class _Listener:
|
|
81
|
+
def listen(self) -> t.Iterator[T_co]:
|
|
82
|
+
for page in scroll(r, cursor=cursor, n=n):
|
|
83
|
+
for item in page.items:
|
|
84
|
+
yield item
|
|
85
|
+
|
|
86
|
+
return _Listener()
|
|
@@ -2,6 +2,9 @@ import io
|
|
|
2
2
|
import json
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
+
import comio
|
|
6
|
+
import typing as t
|
|
7
|
+
|
|
5
8
|
from comio import scroll, read_all, EOF
|
|
6
9
|
from comio.sync import scroll as sync_scroll, read_all as sync_read_all
|
|
7
10
|
from comio.adapters.jsonl import JsonL, AsyncJsonL
|
|
@@ -146,3 +149,81 @@ async def test_async_read_all_from_cursor(sample_file):
|
|
|
146
149
|
first = await reader.read()
|
|
147
150
|
items = await read_all(reader, cursor=first.next_cursor)
|
|
148
151
|
assert items == [{"id": 1}, {"id": 2}]
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class AsyncJsonLBatcher:
|
|
155
|
+
def __init__(self, f):
|
|
156
|
+
self.f = f
|
|
157
|
+
|
|
158
|
+
async def batch(self, items: t.Sequence[dict]) -> None:
|
|
159
|
+
for item in items:
|
|
160
|
+
self.f.write(json.dumps(item) + "\n")
|
|
161
|
+
self.f.flush()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@pytest.mark.asyncio
|
|
165
|
+
async def test_copy_via_as_listener(sample_file):
|
|
166
|
+
reader = AsyncJsonL(sample_file)
|
|
167
|
+
out = io.StringIO()
|
|
168
|
+
batcher = AsyncJsonLBatcher(out)
|
|
169
|
+
|
|
170
|
+
listener = comio.as_listener(reader)
|
|
171
|
+
await comio.copy(listener, batcher, n=2)
|
|
172
|
+
|
|
173
|
+
out.seek(0)
|
|
174
|
+
lines = [json.loads(l) for l in out.read().strip().split("\n")]
|
|
175
|
+
assert lines == [{"id": 0}, {"id": 1}, {"id": 2}]
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@pytest.mark.asyncio
|
|
180
|
+
async def test_pipe(sample_file):
|
|
181
|
+
from comio import pipe
|
|
182
|
+
|
|
183
|
+
reader = AsyncJsonL(sample_file)
|
|
184
|
+
out = io.StringIO()
|
|
185
|
+
writer = AsyncJsonL(out)
|
|
186
|
+
listener = comio.as_listener(reader)
|
|
187
|
+
|
|
188
|
+
async def add_flag(item: dict) -> dict:
|
|
189
|
+
return {**item, "processed": True}
|
|
190
|
+
|
|
191
|
+
await pipe(listener, writer, add_flag)
|
|
192
|
+
|
|
193
|
+
out.seek(0)
|
|
194
|
+
lines = [json.loads(l) for l in out.read().strip().split("\n")]
|
|
195
|
+
assert lines == [
|
|
196
|
+
{"id": 0, "processed": True},
|
|
197
|
+
{"id": 1, "processed": True},
|
|
198
|
+
{"id": 2, "processed": True},
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@pytest.mark.asyncio
|
|
203
|
+
async def test_pipe_with_hooks(sample_file):
|
|
204
|
+
from comio import pipe, PipeConfig
|
|
205
|
+
|
|
206
|
+
reader = AsyncJsonL(sample_file)
|
|
207
|
+
out = io.StringIO()
|
|
208
|
+
writer = AsyncJsonL(out)
|
|
209
|
+
listener = comio.as_listener(reader)
|
|
210
|
+
|
|
211
|
+
log: list[str] = []
|
|
212
|
+
|
|
213
|
+
async def on_boot():
|
|
214
|
+
log.append("boot")
|
|
215
|
+
|
|
216
|
+
async def on_close():
|
|
217
|
+
log.append("close")
|
|
218
|
+
|
|
219
|
+
cfg = PipeConfig(on_boot=[on_boot], on_close=[on_close])
|
|
220
|
+
|
|
221
|
+
async def identity(item: dict) -> dict:
|
|
222
|
+
return item
|
|
223
|
+
|
|
224
|
+
await pipe(listener, writer, identity, cfg=cfg)
|
|
225
|
+
|
|
226
|
+
assert log == ["boot", "close"]
|
|
227
|
+
out.seek(0)
|
|
228
|
+
lines = [json.loads(l) for l in out.read().strip().split("\n")]
|
|
229
|
+
assert len(lines) == 3
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|