chunkr-ai 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- chunkr_ai/api/decorators.py +5 -1
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/METADATA +89 -39
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/RECORD +6 -6
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/LICENSE +0 -0
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.18.dist-info → chunkr_ai-0.0.20.dist-info}/top_level.txt +0 -0
chunkr_ai/api/decorators.py
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
import functools
|
2
2
|
import asyncio
|
3
3
|
import httpx
|
4
|
-
from typing import Callable, Any, TypeVar, Awaitable,
|
4
|
+
from typing import Callable, Any, TypeVar, Awaitable, Union, overload
|
5
|
+
try:
|
6
|
+
from typing import ParamSpec
|
7
|
+
except ImportError:
|
8
|
+
from typing_extensions import ParamSpec
|
5
9
|
|
6
10
|
T = TypeVar('T')
|
7
11
|
P = ParamSpec('P')
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.20
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
Project-URL: Homepage, https://chunkr.ai
|
@@ -33,7 +33,7 @@ pip install chunkr-ai
|
|
33
33
|
|
34
34
|
## Usage
|
35
35
|
|
36
|
-
|
36
|
+
The `Chunkr` client works seamlessly in both synchronous and asynchronous contexts.
|
37
37
|
|
38
38
|
### Synchronous Usage
|
39
39
|
|
@@ -45,62 +45,86 @@ chunkr = Chunkr()
|
|
45
45
|
|
46
46
|
# Upload a file and wait for processing
|
47
47
|
task = chunkr.upload("document.pdf")
|
48
|
+
print(task.task_id)
|
48
49
|
|
49
|
-
#
|
50
|
-
|
50
|
+
# Create task without waiting
|
51
|
+
task = chunkr.create_task("document.pdf")
|
52
|
+
result = task.poll() # Check status when needed
|
51
53
|
|
52
|
-
#
|
53
|
-
|
54
|
-
|
55
|
-
# If you want to upload without waiting for processing
|
56
|
-
task = chunkr.start_upload("document.pdf")
|
57
|
-
# ... do other things ...
|
58
|
-
task.poll() # Check status when needed
|
54
|
+
# Clean up when done
|
55
|
+
chunkr.close()
|
59
56
|
```
|
60
57
|
|
61
58
|
### Asynchronous Usage
|
62
59
|
|
63
60
|
```python
|
64
|
-
from chunkr_ai import
|
61
|
+
from chunkr_ai import Chunkr
|
62
|
+
import asyncio
|
65
63
|
|
66
64
|
async def process_document():
|
67
65
|
# Initialize client
|
68
|
-
chunkr =
|
66
|
+
chunkr = Chunkr()
|
67
|
+
|
68
|
+
try:
|
69
|
+
# Upload a file and wait for processing
|
70
|
+
task = await chunkr.upload("document.pdf")
|
71
|
+
print(task.task_id)
|
72
|
+
|
73
|
+
# Create task without waiting
|
74
|
+
task = await chunkr.create_task("document.pdf")
|
75
|
+
result = await task.poll() # Check status when needed
|
76
|
+
finally:
|
77
|
+
await chunkr.close()
|
69
78
|
|
70
|
-
|
71
|
-
|
79
|
+
# Run the async function
|
80
|
+
asyncio.run(process_document())
|
81
|
+
```
|
72
82
|
|
73
|
-
|
74
|
-
print(task)
|
83
|
+
### Concurrent Processing
|
75
84
|
|
76
|
-
|
77
|
-
output = task.output
|
85
|
+
The client supports both async concurrency and multiprocessing:
|
78
86
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
87
|
+
```python
|
88
|
+
# Async concurrency
|
89
|
+
async def process_multiple():
|
90
|
+
chunkr = Chunkr()
|
91
|
+
try:
|
92
|
+
tasks = [
|
93
|
+
chunkr.upload("doc1.pdf"),
|
94
|
+
chunkr.upload("doc2.pdf"),
|
95
|
+
chunkr.upload("doc3.pdf")
|
96
|
+
]
|
97
|
+
results = await asyncio.gather(*tasks)
|
98
|
+
finally:
|
99
|
+
await chunkr.close()
|
100
|
+
|
101
|
+
# Multiprocessing
|
102
|
+
from multiprocessing import Pool
|
103
|
+
|
104
|
+
def process_file(path):
|
105
|
+
chunkr = Chunkr()
|
106
|
+
try:
|
107
|
+
return chunkr.upload(path)
|
108
|
+
finally:
|
109
|
+
chunkr.close()
|
110
|
+
|
111
|
+
with Pool(processes=3) as pool:
|
112
|
+
results = pool.map(process_file, ["doc1.pdf", "doc2.pdf", "doc3.pdf"])
|
83
113
|
```
|
84
114
|
|
85
|
-
###
|
115
|
+
### Input Types
|
86
116
|
|
87
|
-
|
117
|
+
The client supports various input types:
|
88
118
|
|
89
119
|
```python
|
90
|
-
#
|
120
|
+
# File path
|
91
121
|
chunkr.upload("document.pdf")
|
92
122
|
|
93
|
-
#
|
123
|
+
# Opened file
|
94
124
|
with open("document.pdf", "rb") as f:
|
95
125
|
chunkr.upload(f)
|
96
126
|
|
97
|
-
#
|
98
|
-
chunkr.upload("https://example.com/document.pdf")
|
99
|
-
|
100
|
-
# Upload from base64 string
|
101
|
-
chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
|
102
|
-
|
103
|
-
# Upload an image
|
127
|
+
# PIL Image
|
104
128
|
from PIL import Image
|
105
129
|
img = Image.open("photo.jpg")
|
106
130
|
chunkr.upload(img)
|
@@ -111,9 +135,13 @@ chunkr.upload(img)
|
|
111
135
|
You can customize the processing behavior by passing a `Configuration` object:
|
112
136
|
|
113
137
|
```python
|
114
|
-
from chunkr_ai.models import
|
138
|
+
from chunkr_ai.models import (
|
139
|
+
Configuration,
|
140
|
+
OcrStrategy,
|
141
|
+
SegmentationStrategy,
|
142
|
+
GenerationStrategy
|
143
|
+
)
|
115
144
|
|
116
|
-
# Basic configuration
|
117
145
|
config = Configuration(
|
118
146
|
ocr_strategy=OcrStrategy.AUTO,
|
119
147
|
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
|
@@ -121,8 +149,9 @@ config = Configuration(
|
|
121
149
|
expires_in=3600, # seconds
|
122
150
|
)
|
123
151
|
|
124
|
-
#
|
125
|
-
task = chunkr.upload("document.pdf", config)
|
152
|
+
# Works in both sync and async contexts
|
153
|
+
task = chunkr.upload("document.pdf", config) # sync
|
154
|
+
task = await chunkr.upload("document.pdf", config) # async
|
126
155
|
```
|
127
156
|
|
128
157
|
#### Available Configuration Examples
|
@@ -180,7 +209,7 @@ task = chunkr.upload("document.pdf", config)
|
|
180
209
|
)
|
181
210
|
```
|
182
211
|
|
183
|
-
## Environment
|
212
|
+
## Environment Setup
|
184
213
|
|
185
214
|
You can provide your API key and URL in several ways:
|
186
215
|
1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
|
@@ -192,3 +221,24 @@ chunkr = Chunkr(
|
|
192
221
|
url="https://api.chunkr.ai"
|
193
222
|
)
|
194
223
|
```
|
224
|
+
|
225
|
+
## Resource Management
|
226
|
+
|
227
|
+
It's recommended to properly close the client when you're done:
|
228
|
+
|
229
|
+
```python
|
230
|
+
# Sync context
|
231
|
+
chunkr = Chunkr()
|
232
|
+
try:
|
233
|
+
result = chunkr.upload("document.pdf")
|
234
|
+
finally:
|
235
|
+
chunkr.close()
|
236
|
+
|
237
|
+
# Async context
|
238
|
+
async def process():
|
239
|
+
chunkr = Chunkr()
|
240
|
+
try:
|
241
|
+
result = await chunkr.upload("document.pdf")
|
242
|
+
finally:
|
243
|
+
await chunkr.close()
|
244
|
+
```
|
@@ -6,12 +6,12 @@ chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
|
6
6
|
chunkr_ai/api/chunkr.py,sha256=V56SP8qs7J2QKRCRM9NGlyA1TtDTdFmGYZWbwbFTK_I,2674
|
7
7
|
chunkr_ai/api/chunkr_base.py,sha256=TDqEwCCfgshggi_Mzv76FhPj5z21QP8EVj7siczvfao,9826
|
8
8
|
chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
|
9
|
-
chunkr_ai/api/decorators.py,sha256=
|
9
|
+
chunkr_ai/api/decorators.py,sha256=UD3Nb0b5EKcwGH2kXb9FPn4GtnJovheoHeF_Gi7WFGk,2657
|
10
10
|
chunkr_ai/api/misc.py,sha256=wUG4SpfEEo7NcVK47gmw42dRy9zT5F9S2DtVC4T4ERs,4877
|
11
11
|
chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
|
12
12
|
chunkr_ai/api/task_response.py,sha256=I0_XJ6WYYu_TwbaSF95wqRPaOm2PhgMKnarxjAx-BZI,3857
|
13
|
-
chunkr_ai-0.0.
|
14
|
-
chunkr_ai-0.0.
|
15
|
-
chunkr_ai-0.0.
|
16
|
-
chunkr_ai-0.0.
|
17
|
-
chunkr_ai-0.0.
|
13
|
+
chunkr_ai-0.0.20.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
chunkr_ai-0.0.20.dist-info/METADATA,sha256=owGbM-pt3qbWboqgyMgRimUtOdQrsDHx8RHebIKhFZM,5696
|
15
|
+
chunkr_ai-0.0.20.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
16
|
+
chunkr_ai-0.0.20.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
17
|
+
chunkr_ai-0.0.20.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|