comio 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- comio-0.1.0/.claude/projects/-Users-sungdongkim-works-cio/memory/MEMORY.md +1 -0
- comio-0.1.0/.claude/projects/-Users-sungdongkim-works-cio/memory/feedback_import_convention.md +11 -0
- comio-0.1.0/.claude/settings.local.json +10 -0
- comio-0.1.0/PKG-INFO +109 -0
- comio-0.1.0/README.md +93 -0
- comio-0.1.0/comio/__init__.py +37 -0
- comio-0.1.0/comio/adapters/__init__.py +0 -0
- comio-0.1.0/comio/adapters/jsonl.py +28 -0
- comio-0.1.0/comio/io.py +193 -0
- comio-0.1.0/comio/pipe.py +106 -0
- comio-0.1.0/comio/py.typed +0 -0
- comio-0.1.0/pyproject.toml +31 -0
- comio-0.1.0/tests/test_jsonl.py +83 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
- [Import convention](feedback_import_convention.md) — `import cio as io`, Go-like DX is a core design goal
|
comio-0.1.0/.claude/projects/-Users-sungdongkim-works-cio/memory/feedback_import_convention.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: import convention
|
|
3
|
+
description: User uses "import cio as io" as the standard import — Go-like DX (io.Reader, io.Writer) is a core design goal
|
|
4
|
+
type: feedback
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Always use `import cio as io` in examples and documentation. The goal is Go-like DX where protocols are accessed as `io.Reader`, `io.Writer`, `io.scroll()`, etc.
|
|
8
|
+
|
|
9
|
+
**Why:** The user explicitly wants a Go-inspired developer experience for Python I/O primitives.
|
|
10
|
+
|
|
11
|
+
**How to apply:** Use this convention in all code examples, README, and documentation. When suggesting usage patterns, frame them as `io.X` not `cio.X`.
|
comio-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: comio
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Composable I/O primitives for async Python
|
|
5
|
+
Project-URL: Repository, https://github.com/sungdongkim/cio
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Classifier: Framework :: AsyncIO
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Typing :: Typed
|
|
10
|
+
Requires-Python: >=3.11
|
|
11
|
+
Requires-Dist: anyio>=4.0
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# cio – Composable I/O
|
|
18
|
+
|
|
19
|
+
I/O primitives for async Python. Designed to be used as:
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import comio as io
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Giving you `io.Reader`, `io.Writer`, `io.Listener`, etc. — a Go-like DX for Python.
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install comio
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
### Implement a Reader / Writer
|
|
36
|
+
|
|
37
|
+
Any object with the right method is a valid `io.Reader` or `io.Writer` — no base class needed.
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
import comio as io
|
|
41
|
+
|
|
42
|
+
class JsonL:
|
|
43
|
+
def __init__(self, f):
|
|
44
|
+
self.f = f
|
|
45
|
+
|
|
46
|
+
async def read(self, *, cursor=None, n=None) -> io.Page:
|
|
47
|
+
self.f.seek(cursor or 0)
|
|
48
|
+
line = self.f.readline()
|
|
49
|
+
if line == "":
|
|
50
|
+
return Page([], io.EOF)
|
|
51
|
+
return Page(items=[json.loads(line)], next_cursor=self.f.tell())
|
|
52
|
+
|
|
53
|
+
async def write(self, item: dict) -> None:
|
|
54
|
+
self.f.write(json.dumps(item) + "\n")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Read pages with `scroll`
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
reader = JsonL(open("data.jsonl"))
|
|
61
|
+
|
|
62
|
+
async for page in io.scroll(reader):
|
|
63
|
+
print(page.items)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Drain everything with `read_all`
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
items = await io.read_all(reader)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Resume from a cursor
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
items = await io.read_all(reader, cursor=saved_cursor)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Normalize a Reader into a stream with `as_listener`
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
async for item in io.as_listener(reader):
|
|
82
|
+
process(item)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Buffered copy: Listener → Batcher
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
await io.copy(listener, batcher, n=100) # flush every 100 items
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Streaming pipe with backpressure
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from cio import pipe
|
|
95
|
+
|
|
96
|
+
async def transform(item):
|
|
97
|
+
return {**item, "processed": True}
|
|
98
|
+
|
|
99
|
+
await pipe(listener, writer, transform)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Protocols
|
|
103
|
+
|
|
104
|
+
| Protocol | Method | Description |
|
|
105
|
+
|--------------|----------|----------------------------------|
|
|
106
|
+
| `Reader[T]` | `read` | Pull a page of items |
|
|
107
|
+
| `Listener[T]`| `listen` | Push items as an async iterator |
|
|
108
|
+
| `Writer[T]` | `write` | Accept a single item |
|
|
109
|
+
| `Batcher[T]` | `batch` | Accept a sequence of items |
|
comio-0.1.0/README.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# cio – Composable I/O
|
|
2
|
+
|
|
3
|
+
I/O primitives for async Python. Designed to be used as:
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import comio as io
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Giving you `io.Reader`, `io.Writer`, `io.Listener`, etc. — a Go-like DX for Python.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install comio
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
### Implement a Reader / Writer
|
|
20
|
+
|
|
21
|
+
Any object with the right method is a valid `io.Reader` or `io.Writer` — no base class needed.
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
import comio as io
|
|
25
|
+
|
|
26
|
+
class JsonL:
|
|
27
|
+
def __init__(self, f):
|
|
28
|
+
self.f = f
|
|
29
|
+
|
|
30
|
+
async def read(self, *, cursor=None, n=None) -> io.Page:
|
|
31
|
+
self.f.seek(cursor or 0)
|
|
32
|
+
line = self.f.readline()
|
|
33
|
+
if line == "":
|
|
34
|
+
return Page([], io.EOF)
|
|
35
|
+
return Page(items=[json.loads(line)], next_cursor=self.f.tell())
|
|
36
|
+
|
|
37
|
+
async def write(self, item: dict) -> None:
|
|
38
|
+
self.f.write(json.dumps(item) + "\n")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Read pages with `scroll`
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
reader = JsonL(open("data.jsonl"))
|
|
45
|
+
|
|
46
|
+
async for page in io.scroll(reader):
|
|
47
|
+
print(page.items)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Drain everything with `read_all`
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
items = await io.read_all(reader)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Resume from a cursor
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
items = await io.read_all(reader, cursor=saved_cursor)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Normalize a Reader into a stream with `as_listener`
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
async for item in io.as_listener(reader):
|
|
66
|
+
process(item)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Buffered copy: Listener → Batcher
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
await io.copy(listener, batcher, n=100) # flush every 100 items
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Streaming pipe with backpressure
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from cio import pipe
|
|
79
|
+
|
|
80
|
+
async def transform(item):
|
|
81
|
+
return {**item, "processed": True}
|
|
82
|
+
|
|
83
|
+
await pipe(listener, writer, transform)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Protocols
|
|
87
|
+
|
|
88
|
+
| Protocol | Method | Description |
|
|
89
|
+
|--------------|----------|----------------------------------|
|
|
90
|
+
| `Reader[T]` | `read` | Pull a page of items |
|
|
91
|
+
| `Listener[T]`| `listen` | Push items as an async iterator |
|
|
92
|
+
| `Writer[T]` | `write` | Accept a single item |
|
|
93
|
+
| `Batcher[T]` | `batch` | Accept a sequence of items |
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""comio – Composable I/O primitives for async Python."""
|
|
2
|
+
|
|
3
|
+
from .io import (
|
|
4
|
+
Batcher,
|
|
5
|
+
Cursor,
|
|
6
|
+
EOF,
|
|
7
|
+
Listener,
|
|
8
|
+
Page,
|
|
9
|
+
Reader,
|
|
10
|
+
Writer,
|
|
11
|
+
copy,
|
|
12
|
+
as_listener,
|
|
13
|
+
read_all,
|
|
14
|
+
scroll,
|
|
15
|
+
)
|
|
16
|
+
from .pipe import Handler, PipeConfig, pipe
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Primitives
|
|
20
|
+
"Cursor",
|
|
21
|
+
"EOF",
|
|
22
|
+
# Protocols
|
|
23
|
+
"Page",
|
|
24
|
+
"Reader",
|
|
25
|
+
"Listener",
|
|
26
|
+
"Writer",
|
|
27
|
+
"Batcher",
|
|
28
|
+
"Handler",
|
|
29
|
+
# Functions
|
|
30
|
+
"scroll",
|
|
31
|
+
"read_all",
|
|
32
|
+
"copy",
|
|
33
|
+
"as_listener",
|
|
34
|
+
"pipe",
|
|
35
|
+
# Config
|
|
36
|
+
"PipeConfig",
|
|
37
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import io
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from ..io import EOF, Cursor
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Page:
|
|
9
|
+
items: list[dict]
|
|
10
|
+
next_cursor: object
|
|
11
|
+
|
|
12
|
+
class JsonL:
|
|
13
|
+
|
|
14
|
+
def __init__(self, f: io.TextIOWrapper):
|
|
15
|
+
self.f = f
|
|
16
|
+
|
|
17
|
+
async def read(self, cursor: Cursor | None = None, n: int | None =None) -> Page:
|
|
18
|
+
self.f.seek(cursor or 0)
|
|
19
|
+
l = self.f.readline()
|
|
20
|
+
if l == "":
|
|
21
|
+
return Page([], EOF)
|
|
22
|
+
return Page(items=[json.loads(l)], next_cursor=self.f.tell())
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def write(self, item: dict) -> None:
|
|
26
|
+
self.f.write(json.dumps(item) + "\n")
|
|
27
|
+
self.f.flush()
|
|
28
|
+
|
comio-0.1.0/comio/io.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""Composable I/O primitives.
|
|
2
|
+
|
|
3
|
+
Defines four protocol interfaces for asynchronous data flow:
|
|
4
|
+
|
|
5
|
+
Reader: pull-based, paginated source (read → Page)
|
|
6
|
+
Writer: single-item sink (write)
|
|
7
|
+
Batch: multi-item sink (batch)
|
|
8
|
+
Listener: push-based, streaming source (listen → AsyncIterator)
|
|
9
|
+
|
|
10
|
+
And composable functions that wire them together:
|
|
11
|
+
|
|
12
|
+
scroll: iterate pages from a Reader
|
|
13
|
+
read_all: drain a Reader into a list
|
|
14
|
+
copy: connect any source to any sink
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import typing as t
|
|
20
|
+
|
|
21
|
+
T = t.TypeVar("T")
|
|
22
|
+
T_co = t.TypeVar("T_co", covariant=True)
|
|
23
|
+
T_contra = t.TypeVar("T_contra", contravariant=True)
|
|
24
|
+
|
|
25
|
+
Cursor: t.TypeAlias = t.Any
|
|
26
|
+
"""An opaque position marker within a data source.
|
|
27
|
+
|
|
28
|
+
Can be any type the data source uses to track position:
|
|
29
|
+
an integer offset, a string token, a complex object, etc.
|
|
30
|
+
Each Reader implementation defines how to interpret its cursors.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class _EOF:
|
|
35
|
+
"""Sentinel indicating the end of a data source."""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
EOF = _EOF()
|
|
40
|
+
"""Singleton sentinel. Return this as ``next_cursor`` to signal
|
|
41
|
+
that there is no more data to read."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Page(t.Protocol, t.Generic[T_co]):
|
|
45
|
+
"""A single page of results returned by a Reader.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
items: The items contained in this page.
|
|
49
|
+
next_cursor: The cursor pointing to the next page,
|
|
50
|
+
or ``EOF`` if this is the last page.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def items(self) -> t.Sequence[T_co]: ...
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def next_cursor(self) -> Cursor: ...
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Reader(t.Protocol, t.Generic[T_co]):
|
|
61
|
+
"""Pull-based, paginated data source.
|
|
62
|
+
|
|
63
|
+
Implementors decide:
|
|
64
|
+
- How ``cursor`` maps to a position in the underlying store.
|
|
65
|
+
- What page size ``n`` means (item count, byte count, etc.).
|
|
66
|
+
- When to return ``EOF`` as ``next_cursor`` to signal completion.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
async def read(self, *, cursor: Cursor = None, n: int | None = None) -> Page[T_co]:
|
|
70
|
+
"""Read one page of data starting at ``cursor``.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
cursor: Position to read from. ``None`` means the beginning.
|
|
74
|
+
n: Requested page size. ``None`` lets the implementation choose.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
A Page whose ``next_cursor`` is ``EOF`` when no more data remains.
|
|
78
|
+
"""
|
|
79
|
+
...
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class Listener(t.Protocol, t.Generic[T_co]):
|
|
83
|
+
"""Push-based, streaming data source.
|
|
84
|
+
|
|
85
|
+
Unlike Reader which requires the caller to pull pages,
|
|
86
|
+
a Listener yields items as they become available.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
async def listen(self) -> t.AsyncIterator[T_co]:
|
|
90
|
+
"""Start listening and yield items as they arrive.
|
|
91
|
+
|
|
92
|
+
The iterator completes when the source is exhausted or closed.
|
|
93
|
+
"""
|
|
94
|
+
...
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class Writer(t.Protocol, t.Generic[T_contra]):
|
|
98
|
+
"""Single-item sink.
|
|
99
|
+
|
|
100
|
+
Accepts one item at a time via ``write()``.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
async def write(self, item: T_contra) -> None:
|
|
104
|
+
"""Write a single item to the destination."""
|
|
105
|
+
...
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class Batcher(t.Protocol, t.Generic[T_contra]):
|
|
109
|
+
"""Multi-item sink.
|
|
110
|
+
|
|
111
|
+
Accepts a sequence of items at once via ``batch()``,
|
|
112
|
+
enabling bulk inserts, buffered writes, etc.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
async def batch(self, items: t.Sequence[T_contra]) -> None:
|
|
116
|
+
"""Write a batch of items to the destination."""
|
|
117
|
+
...
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def scroll(r: Reader[T_co], *, cursor: Cursor = None, n: int | None = None) -> t.AsyncIterator[Page[T_co]]:
|
|
121
|
+
"""Iterate pages from a Reader until the source is exhausted.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
r: The reader to pull pages from.
|
|
125
|
+
cursor: Starting position. ``None`` begins at the start.
|
|
126
|
+
n: Requested page size per read. ``None`` lets the reader choose.
|
|
127
|
+
|
|
128
|
+
Yields:
|
|
129
|
+
Each non-empty Page. Stops when the page is empty
|
|
130
|
+
or ``next_cursor`` is ``EOF``.
|
|
131
|
+
"""
|
|
132
|
+
while True:
|
|
133
|
+
page = await r.read(cursor=cursor, n=n)
|
|
134
|
+
if not page.items:
|
|
135
|
+
break
|
|
136
|
+
yield page
|
|
137
|
+
if page.next_cursor is EOF:
|
|
138
|
+
break
|
|
139
|
+
cursor = page.next_cursor
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def read_all(r: Reader[T_co], *, cursor: Cursor = None, n: int | None = None) -> t.Sequence[T_co]:
|
|
143
|
+
"""Drain a Reader and collect every item into a single sequence.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
r: The reader to drain.
|
|
147
|
+
cursor: Starting position. ``None`` begins at the start.
|
|
148
|
+
n: Requested page size per read. ``None`` lets the reader choose.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
All items concatenated across every page.
|
|
152
|
+
"""
|
|
153
|
+
items: t.List[T_co] = []
|
|
154
|
+
async for page in scroll(r, cursor=cursor, n=n):
|
|
155
|
+
items.extend(page.items)
|
|
156
|
+
return items
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
async def copy(src: Listener[T_co], dst: Batcher[T_co], n: int) -> None:
|
|
160
|
+
"""Buffer items from a Listener and flush to a Batcher every ``n`` items.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
src: Push-based source to consume from.
|
|
164
|
+
dst: Batch sink to flush into.
|
|
165
|
+
n: Buffer size. Flushes every ``n`` items, plus any remainder at the end.
|
|
166
|
+
"""
|
|
167
|
+
buf: t.List[T_co] = []
|
|
168
|
+
async for item in await src.listen():
|
|
169
|
+
buf.append(item)
|
|
170
|
+
if len(buf) >= n:
|
|
171
|
+
await dst.batch(buf)
|
|
172
|
+
buf = []
|
|
173
|
+
if buf:
|
|
174
|
+
await dst.batch(buf)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
async def as_listener(r: Reader[T_co], *, cursor: Cursor = None, n: int | None = None) -> t.AsyncIterator[T_co]:
|
|
178
|
+
"""Convert a Reader into an item-level AsyncIterator.
|
|
179
|
+
|
|
180
|
+
Normalizes a pull-based Reader into the same shape as Listener.listen(),
|
|
181
|
+
so downstream code can treat both sources uniformly.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
r: The reader to convert.
|
|
185
|
+
cursor: Starting position.
|
|
186
|
+
n: Page size passed to the reader.
|
|
187
|
+
|
|
188
|
+
Yields:
|
|
189
|
+
Individual items from each page.
|
|
190
|
+
"""
|
|
191
|
+
async for page in scroll(r, cursor=cursor, n=n):
|
|
192
|
+
for item in page.items:
|
|
193
|
+
yield item
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Streaming pipe: Listener → Handler → Writer.
|
|
2
|
+
|
|
3
|
+
Connects a push-based source to a sink through a transformation handler,
|
|
4
|
+
using in-memory streams with configurable backpressure.
|
|
5
|
+
|
|
6
|
+
src ──→ [in_stream] ──→ handler ──→ [out_stream] ──→ dest
|
|
7
|
+
|
|
8
|
+
Three concurrent tasks (ingress / process / egress) are managed
|
|
9
|
+
by an anyio task group. When the source is exhausted, streams close
|
|
10
|
+
in order and the pipe shuts down gracefully.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import typing as t
|
|
16
|
+
|
|
17
|
+
import anyio
|
|
18
|
+
|
|
19
|
+
from .io import Listener, Writer
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
In = t.TypeVar("In", covariant=True)
|
|
24
|
+
Out = t.TypeVar("Out", contravariant=True)
|
|
25
|
+
H_In = t.TypeVar("H_In", contravariant=True)
|
|
26
|
+
H_Out = t.TypeVar("H_Out", covariant=True)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Handler(t.Protocol, t.Generic[H_In, H_Out]):
|
|
30
|
+
"""Transforms a single input item into a single output item."""
|
|
31
|
+
|
|
32
|
+
async def __call__(self, item: H_In) -> H_Out: ...
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PipeConfig(t.TypedDict):
|
|
36
|
+
buffer: t.NotRequired[int]
|
|
37
|
+
"""Buffer size for the memory streams between stages.
|
|
38
|
+
Default is 0 (rendezvous channel = strongest backpressure)."""
|
|
39
|
+
|
|
40
|
+
on_boot: t.NotRequired[t.Sequence[t.Callable[[], t.Awaitable[None]]]]
|
|
41
|
+
"""Hooks executed sequentially before starting the pipe."""
|
|
42
|
+
|
|
43
|
+
on_close: t.NotRequired[t.Sequence[t.Callable[[], t.Awaitable[None]]]]
|
|
44
|
+
"""Hooks executed sequentially after shutdown (even on failure)."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
async def pipe(
|
|
48
|
+
src: Listener[In],
|
|
49
|
+
dest: Writer[Out],
|
|
50
|
+
h: Handler[In, Out],
|
|
51
|
+
*,
|
|
52
|
+
cfg: PipeConfig | None = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Run a streaming pipe: ``src → h → dest``.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
src: Push-based source that yields input items.
|
|
58
|
+
dest: Sink that receives transformed output items.
|
|
59
|
+
h: Handler that transforms each input into an output.
|
|
60
|
+
cfg: Optional configuration for buffer size and lifecycle hooks.
|
|
61
|
+
"""
|
|
62
|
+
if cfg is None:
|
|
63
|
+
cfg = PipeConfig()
|
|
64
|
+
|
|
65
|
+
for boot in cfg.get("on_boot", []):
|
|
66
|
+
await boot()
|
|
67
|
+
try:
|
|
68
|
+
await _run(src, dest, h, buffer=cfg.get("buffer", 0))
|
|
69
|
+
finally:
|
|
70
|
+
for close in cfg.get("on_close", []):
|
|
71
|
+
await close()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
async def _run(
|
|
75
|
+
src: Listener[In],
|
|
76
|
+
dest: Writer[Out],
|
|
77
|
+
h: Handler[In, Out],
|
|
78
|
+
*,
|
|
79
|
+
buffer: int,
|
|
80
|
+
) -> None:
|
|
81
|
+
in_send, in_recv = anyio.create_memory_object_stream[In](buffer)
|
|
82
|
+
out_send, out_recv = anyio.create_memory_object_stream[Out](buffer)
|
|
83
|
+
|
|
84
|
+
async def ingress() -> None:
|
|
85
|
+
"""src → in_stream. Closes in_send when the source is exhausted."""
|
|
86
|
+
async with in_send:
|
|
87
|
+
async for item in await src.listen():
|
|
88
|
+
await in_send.send(item)
|
|
89
|
+
|
|
90
|
+
async def process() -> None:
|
|
91
|
+
"""in_stream → handler → out_stream.
|
|
92
|
+
Closes out_send when in_recv is exhausted."""
|
|
93
|
+
async with out_send, in_recv:
|
|
94
|
+
async for item in in_recv:
|
|
95
|
+
await out_send.send(await h(item))
|
|
96
|
+
|
|
97
|
+
async def egress() -> None:
|
|
98
|
+
"""out_stream → dest. Finishes when out_recv is closed."""
|
|
99
|
+
async with out_recv:
|
|
100
|
+
async for item in out_recv:
|
|
101
|
+
await dest.write(item)
|
|
102
|
+
|
|
103
|
+
async with anyio.create_task_group() as tg:
|
|
104
|
+
tg.start_soon(egress)
|
|
105
|
+
tg.start_soon(process)
|
|
106
|
+
tg.start_soon(ingress)
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "comio"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Composable I/O primitives for async Python"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"Framework :: AsyncIO",
|
|
15
|
+
"Typing :: Typed",
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"anyio>=4.0",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
dev = [
|
|
23
|
+
"pytest>=8.0",
|
|
24
|
+
"pytest-asyncio>=0.24",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Repository = "https://github.com/sungdongkim/cio"
|
|
29
|
+
|
|
30
|
+
[tool.pytest.ini_options]
|
|
31
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import json
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from comio import scroll, read_all, EOF
|
|
6
|
+
from comio.adapters.jsonl import JsonL
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def sample_file():
|
|
11
|
+
"""Create an in-memory JSONL file with 3 lines."""
|
|
12
|
+
buf = io.StringIO()
|
|
13
|
+
for i in range(3):
|
|
14
|
+
buf.write(json.dumps({"id": i}) + "\n")
|
|
15
|
+
buf.seek(0)
|
|
16
|
+
return buf
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.mark.asyncio
|
|
20
|
+
async def test_read_single_page(sample_file):
|
|
21
|
+
reader = JsonL(sample_file)
|
|
22
|
+
page = await reader.read()
|
|
23
|
+
assert page.items == [{"id": 0}]
|
|
24
|
+
assert page.next_cursor is not EOF
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pytest.mark.asyncio
|
|
28
|
+
async def test_read_with_cursor(sample_file):
|
|
29
|
+
reader = JsonL(sample_file)
|
|
30
|
+
first = await reader.read()
|
|
31
|
+
second = await reader.read(cursor=first.next_cursor)
|
|
32
|
+
assert second.items == [{"id": 1}]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@pytest.mark.asyncio
|
|
36
|
+
async def test_read_eof(sample_file):
|
|
37
|
+
reader = JsonL(sample_file)
|
|
38
|
+
cursor = None
|
|
39
|
+
for _ in range(3):
|
|
40
|
+
page = await reader.read(cursor=cursor)
|
|
41
|
+
cursor = page.next_cursor
|
|
42
|
+
last = await reader.read(cursor=cursor)
|
|
43
|
+
assert last.items == []
|
|
44
|
+
assert last.next_cursor is EOF
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.asyncio
|
|
48
|
+
async def test_scroll(sample_file):
|
|
49
|
+
reader = JsonL(sample_file)
|
|
50
|
+
pages = []
|
|
51
|
+
async for page in scroll(reader):
|
|
52
|
+
pages.append(page)
|
|
53
|
+
assert len(pages) == 3
|
|
54
|
+
assert [p.items[0]["id"] for p in pages] == [0, 1, 2]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_read_all(sample_file):
|
|
59
|
+
reader = JsonL(sample_file)
|
|
60
|
+
items = await read_all(reader)
|
|
61
|
+
assert items == [{"id": 0}, {"id": 1}, {"id": 2}]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@pytest.mark.asyncio
|
|
65
|
+
async def test_write(tmp_path):
|
|
66
|
+
path = tmp_path / "out.jsonl"
|
|
67
|
+
with open(path, "w") as f:
|
|
68
|
+
writer = JsonL(f)
|
|
69
|
+
await writer.write({"a": 1})
|
|
70
|
+
await writer.write({"b": 2})
|
|
71
|
+
|
|
72
|
+
lines = path.read_text().strip().split("\n")
|
|
73
|
+
assert len(lines) == 2
|
|
74
|
+
assert json.loads(lines[0]) == {"a": 1}
|
|
75
|
+
assert json.loads(lines[1]) == {"b": 2}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.mark.asyncio
|
|
79
|
+
async def test_read_all_from_cursor(sample_file):
|
|
80
|
+
reader = JsonL(sample_file)
|
|
81
|
+
first = await reader.read()
|
|
82
|
+
items = await read_all(reader, cursor=first.next_cursor)
|
|
83
|
+
assert items == [{"id": 1}, {"id": 2}]
|