chunkr-ai 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/decorators.py +5 -1
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/METADATA +89 -39
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/RECORD +6 -6
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/LICENSE +0 -0
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/top_level.txt +0 -0
chunkr_ai/api/decorators.py
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
import functools
|
2
2
|
import asyncio
|
3
3
|
import httpx
|
4
|
-
from typing import Callable, Any, TypeVar, Awaitable,
|
4
|
+
from typing import Callable, Any, TypeVar, Awaitable, Union, overload
|
5
|
+
try:
|
6
|
+
from typing import ParamSpec
|
7
|
+
except ImportError:
|
8
|
+
from typing_extensions import ParamSpec
|
5
9
|
|
6
10
|
T = TypeVar('T')
|
7
11
|
P = ParamSpec('P')
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.20
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
Project-URL: Homepage, https://chunkr.ai
|
@@ -33,7 +33,7 @@ pip install chunkr-ai
|
|
33
33
|
|
34
34
|
## Usage
|
35
35
|
|
36
|
-
|
36
|
+
The `Chunkr` client works seamlessly in both synchronous and asynchronous contexts.
|
37
37
|
|
38
38
|
### Synchronous Usage
|
39
39
|
|
@@ -45,62 +45,86 @@ chunkr = Chunkr()
|
|
45
45
|
|
46
46
|
# Upload a file and wait for processing
|
47
47
|
task = chunkr.upload("document.pdf")
|
48
|
+
print(task.task_id)
|
48
49
|
|
49
|
-
#
|
50
|
-
|
50
|
+
# Create task without waiting
|
51
|
+
task = chunkr.create_task("document.pdf")
|
52
|
+
result = task.poll() # Check status when needed
|
51
53
|
|
52
|
-
#
|
53
|
-
|
54
|
-
|
55
|
-
# If you want to upload without waiting for processing
|
56
|
-
task = chunkr.start_upload("document.pdf")
|
57
|
-
# ... do other things ...
|
58
|
-
task.poll() # Check status when needed
|
54
|
+
# Clean up when done
|
55
|
+
chunkr.close()
|
59
56
|
```
|
60
57
|
|
61
58
|
### Asynchronous Usage
|
62
59
|
|
63
60
|
```python
|
64
|
-
from chunkr_ai import
|
61
|
+
from chunkr_ai import Chunkr
|
62
|
+
import asyncio
|
65
63
|
|
66
64
|
async def process_document():
|
67
65
|
# Initialize client
|
68
|
-
chunkr =
|
66
|
+
chunkr = Chunkr()
|
67
|
+
|
68
|
+
try:
|
69
|
+
# Upload a file and wait for processing
|
70
|
+
task = await chunkr.upload("document.pdf")
|
71
|
+
print(task.task_id)
|
72
|
+
|
73
|
+
# Create task without waiting
|
74
|
+
task = await chunkr.create_task("document.pdf")
|
75
|
+
result = await task.poll() # Check status when needed
|
76
|
+
finally:
|
77
|
+
await chunkr.close()
|
69
78
|
|
70
|
-
|
71
|
-
|
79
|
+
# Run the async function
|
80
|
+
asyncio.run(process_document())
|
81
|
+
```
|
72
82
|
|
73
|
-
|
74
|
-
print(task)
|
83
|
+
### Concurrent Processing
|
75
84
|
|
76
|
-
|
77
|
-
output = task.output
|
85
|
+
The client supports both async concurrency and multiprocessing:
|
78
86
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
87
|
+
```python
|
88
|
+
# Async concurrency
|
89
|
+
async def process_multiple():
|
90
|
+
chunkr = Chunkr()
|
91
|
+
try:
|
92
|
+
tasks = [
|
93
|
+
chunkr.upload("doc1.pdf"),
|
94
|
+
chunkr.upload("doc2.pdf"),
|
95
|
+
chunkr.upload("doc3.pdf")
|
96
|
+
]
|
97
|
+
results = await asyncio.gather(*tasks)
|
98
|
+
finally:
|
99
|
+
await chunkr.close()
|
100
|
+
|
101
|
+
# Multiprocessing
|
102
|
+
from multiprocessing import Pool
|
103
|
+
|
104
|
+
def process_file(path):
|
105
|
+
chunkr = Chunkr()
|
106
|
+
try:
|
107
|
+
return chunkr.upload(path)
|
108
|
+
finally:
|
109
|
+
chunkr.close()
|
110
|
+
|
111
|
+
with Pool(processes=3) as pool:
|
112
|
+
results = pool.map(process_file, ["doc1.pdf", "doc2.pdf", "doc3.pdf"])
|
83
113
|
```
|
84
114
|
|
85
|
-
###
|
115
|
+
### Input Types
|
86
116
|
|
87
|
-
|
117
|
+
The client supports various input types:
|
88
118
|
|
89
119
|
```python
|
90
|
-
#
|
120
|
+
# File path
|
91
121
|
chunkr.upload("document.pdf")
|
92
122
|
|
93
|
-
#
|
123
|
+
# Opened file
|
94
124
|
with open("document.pdf", "rb") as f:
|
95
125
|
chunkr.upload(f)
|
96
126
|
|
97
|
-
#
|
98
|
-
chunkr.upload("https://example.com/document.pdf")
|
99
|
-
|
100
|
-
# Upload from base64 string
|
101
|
-
chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
|
102
|
-
|
103
|
-
# Upload an image
|
127
|
+
# PIL Image
|
104
128
|
from PIL import Image
|
105
129
|
img = Image.open("photo.jpg")
|
106
130
|
chunkr.upload(img)
|
@@ -111,9 +135,13 @@ chunkr.upload(img)
|
|
111
135
|
You can customize the processing behavior by passing a `Configuration` object:
|
112
136
|
|
113
137
|
```python
|
114
|
-
from chunkr_ai.models import
|
138
|
+
from chunkr_ai.models import (
|
139
|
+
Configuration,
|
140
|
+
OcrStrategy,
|
141
|
+
SegmentationStrategy,
|
142
|
+
GenerationStrategy
|
143
|
+
)
|
115
144
|
|
116
|
-
# Basic configuration
|
117
145
|
config = Configuration(
|
118
146
|
ocr_strategy=OcrStrategy.AUTO,
|
119
147
|
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
|
@@ -121,8 +149,9 @@ config = Configuration(
|
|
121
149
|
expires_in=3600, # seconds
|
122
150
|
)
|
123
151
|
|
124
|
-
#
|
125
|
-
task = chunkr.upload("document.pdf", config)
|
152
|
+
# Works in both sync and async contexts
|
153
|
+
task = chunkr.upload("document.pdf", config) # sync
|
154
|
+
task = await chunkr.upload("document.pdf", config) # async
|
126
155
|
```
|
127
156
|
|
128
157
|
#### Available Configuration Examples
|
@@ -180,7 +209,7 @@ task = chunkr.upload("document.pdf", config)
|
|
180
209
|
)
|
181
210
|
```
|
182
211
|
|
183
|
-
## Environment
|
212
|
+
## Environment Setup
|
184
213
|
|
185
214
|
You can provide your API key and URL in several ways:
|
186
215
|
1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
|
@@ -192,3 +221,24 @@ chunkr = Chunkr(
|
|
192
221
|
url="https://api.chunkr.ai"
|
193
222
|
)
|
194
223
|
```
|
224
|
+
|
225
|
+
## Resource Management
|
226
|
+
|
227
|
+
It's recommended to properly close the client when you're done:
|
228
|
+
|
229
|
+
```python
|
230
|
+
# Sync context
|
231
|
+
chunkr = Chunkr()
|
232
|
+
try:
|
233
|
+
result = chunkr.upload("document.pdf")
|
234
|
+
finally:
|
235
|
+
chunkr.close()
|
236
|
+
|
237
|
+
# Async context
|
238
|
+
async def process():
|
239
|
+
chunkr = Chunkr()
|
240
|
+
try:
|
241
|
+
result = await chunkr.upload("document.pdf")
|
242
|
+
finally:
|
243
|
+
await chunkr.close()
|
244
|
+
```
|
@@ -6,12 +6,12 @@ chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
|
6
6
|
chunkr_ai/api/chunkr.py,sha256=V56SP8qs7J2QKRCRM9NGlyA1TtDTdFmGYZWbwbFTK_I,2674
|
7
7
|
chunkr_ai/api/chunkr_base.py,sha256=TDqEwCCfgshggi_Mzv76FhPj5z21QP8EVj7siczvfao,9826
|
8
8
|
chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
|
9
|
-
chunkr_ai/api/decorators.py,sha256=
|
9
|
+
chunkr_ai/api/decorators.py,sha256=UD3Nb0b5EKcwGH2kXb9FPn4GtnJovheoHeF_Gi7WFGk,2657
|
10
10
|
chunkr_ai/api/misc.py,sha256=wUG4SpfEEo7NcVK47gmw42dRy9zT5F9S2DtVC4T4ERs,4877
|
11
11
|
chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
|
12
12
|
chunkr_ai/api/task_response.py,sha256=I0_XJ6WYYu_TwbaSF95wqRPaOm2PhgMKnarxjAx-BZI,3857
|
13
|
-
chunkr_ai-0.0.
|
14
|
-
chunkr_ai-0.0.
|
15
|
-
chunkr_ai-0.0.
|
16
|
-
chunkr_ai-0.0.
|
17
|
-
chunkr_ai-0.0.
|
13
|
+
chunkr_ai-0.0.20.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
chunkr_ai-0.0.20.dist-info/METADATA,sha256=owGbM-pt3qbWboqgyMgRimUtOdQrsDHx8RHebIKhFZM,5696
|
15
|
+
chunkr_ai-0.0.20.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
16
|
+
chunkr_ai-0.0.20.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
17
|
+
chunkr_ai-0.0.20.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|