chunkr-ai 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,11 @@
1
1
  import functools
2
2
  import asyncio
3
3
  import httpx
4
- from typing import Callable, Any, TypeVar, Awaitable, ParamSpec, Union, overload
4
+ from typing import Callable, Any, TypeVar, Awaitable, Union, overload
5
+ try:
6
+ from typing import ParamSpec
7
+ except ImportError:
8
+ from typing_extensions import ParamSpec
5
9
 
6
10
  T = TypeVar('T')
7
11
  P = ParamSpec('P')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.18
3
+ Version: 0.0.20
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -33,7 +33,7 @@ pip install chunkr-ai
33
33
 
34
34
  ## Usage
35
35
 
36
- We provide two clients: `Chunkr` for synchronous operations and `ChunkrAsync` for asynchronous operations.
36
+ The `Chunkr` client works seamlessly in both synchronous and asynchronous contexts.
37
37
 
38
38
  ### Synchronous Usage
39
39
 
@@ -45,62 +45,86 @@ chunkr = Chunkr()
45
45
 
46
46
  # Upload a file and wait for processing
47
47
  task = chunkr.upload("document.pdf")
48
+ print(task.task_id)
48
49
 
49
- # Print the response
50
- print(task)
50
+ # Create task without waiting
51
+ task = chunkr.create_task("document.pdf")
52
+ result = task.poll() # Check status when needed
51
53
 
52
- # Get output from task
53
- output = task.output
54
-
55
- # If you want to upload without waiting for processing
56
- task = chunkr.start_upload("document.pdf")
57
- # ... do other things ...
58
- task.poll() # Check status when needed
54
+ # Clean up when done
55
+ chunkr.close()
59
56
  ```
60
57
 
61
58
  ### Asynchronous Usage
62
59
 
63
60
  ```python
64
- from chunkr_ai import ChunkrAsync
61
+ from chunkr_ai import Chunkr
62
+ import asyncio
65
63
 
66
64
  async def process_document():
67
65
  # Initialize client
68
- chunkr = ChunkrAsync()
66
+ chunkr = Chunkr()
67
+
68
+ try:
69
+ # Upload a file and wait for processing
70
+ task = await chunkr.upload("document.pdf")
71
+ print(task.task_id)
72
+
73
+ # Create task without waiting
74
+ task = await chunkr.create_task("document.pdf")
75
+ result = await task.poll() # Check status when needed
76
+ finally:
77
+ await chunkr.close()
69
78
 
70
- # Upload a file and wait for processing
71
- task = await chunkr.upload("document.pdf")
79
+ # Run the async function
80
+ asyncio.run(process_document())
81
+ ```
72
82
 
73
- # Print the response
74
- print(task)
83
+ ### Concurrent Processing
75
84
 
76
- # Get output from task
77
- output = task.output
85
+ The client supports both async concurrency and multiprocessing:
78
86
 
79
- # If you want to upload without waiting for processing
80
- task = await chunkr.start_upload("document.pdf")
81
- # ... do other things ...
82
- await task.poll() # Check status when needed
87
+ ```python
88
+ # Async concurrency
89
+ async def process_multiple():
90
+ chunkr = Chunkr()
91
+ try:
92
+ tasks = [
93
+ chunkr.upload("doc1.pdf"),
94
+ chunkr.upload("doc2.pdf"),
95
+ chunkr.upload("doc3.pdf")
96
+ ]
97
+ results = await asyncio.gather(*tasks)
98
+ finally:
99
+ await chunkr.close()
100
+
101
+ # Multiprocessing
102
+ from multiprocessing import Pool
103
+
104
+ def process_file(path):
105
+ chunkr = Chunkr()
106
+ try:
107
+ return chunkr.upload(path)
108
+ finally:
109
+ chunkr.close()
110
+
111
+ with Pool(processes=3) as pool:
112
+ results = pool.map(process_file, ["doc1.pdf", "doc2.pdf", "doc3.pdf"])
83
113
  ```
84
114
 
85
- ### Additional Features
115
+ ### Input Types
86
116
 
87
- Both clients support various input types:
117
+ The client supports various input types:
88
118
 
89
119
  ```python
90
- # Upload from file path
120
+ # File path
91
121
  chunkr.upload("document.pdf")
92
122
 
93
- # Upload from opened file
123
+ # Opened file
94
124
  with open("document.pdf", "rb") as f:
95
125
  chunkr.upload(f)
96
126
 
97
- # Upload from URL
98
- chunkr.upload("https://example.com/document.pdf")
99
-
100
- # Upload from base64 string
101
- chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
102
-
103
- # Upload an image
127
+ # PIL Image
104
128
  from PIL import Image
105
129
  img = Image.open("photo.jpg")
106
130
  chunkr.upload(img)
@@ -111,9 +135,13 @@ chunkr.upload(img)
111
135
  You can customize the processing behavior by passing a `Configuration` object:
112
136
 
113
137
  ```python
114
- from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
138
+ from chunkr_ai.models import (
139
+ Configuration,
140
+ OcrStrategy,
141
+ SegmentationStrategy,
142
+ GenerationStrategy
143
+ )
115
144
 
116
- # Basic configuration
117
145
  config = Configuration(
118
146
  ocr_strategy=OcrStrategy.AUTO,
119
147
  segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
@@ -121,8 +149,9 @@ config = Configuration(
121
149
  expires_in=3600, # seconds
122
150
  )
123
151
 
124
- # Upload with configuration
125
- task = chunkr.upload("document.pdf", config)
152
+ # Works in both sync and async contexts
153
+ task = chunkr.upload("document.pdf", config) # sync
154
+ task = await chunkr.upload("document.pdf", config) # async
126
155
  ```
127
156
 
128
157
  #### Available Configuration Examples
@@ -180,7 +209,7 @@ task = chunkr.upload("document.pdf", config)
180
209
  )
181
210
  ```
182
211
 
183
- ## Environment setup
212
+ ## Environment Setup
184
213
 
185
214
  You can provide your API key and URL in several ways:
186
215
  1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
@@ -192,3 +221,24 @@ chunkr = Chunkr(
192
221
  url="https://api.chunkr.ai"
193
222
  )
194
223
  ```
224
+
225
+ ## Resource Management
226
+
227
+ It's recommended to properly close the client when you're done:
228
+
229
+ ```python
230
+ # Sync context
231
+ chunkr = Chunkr()
232
+ try:
233
+ result = chunkr.upload("document.pdf")
234
+ finally:
235
+ chunkr.close()
236
+
237
+ # Async context
238
+ async def process():
239
+ chunkr = Chunkr()
240
+ try:
241
+ result = await chunkr.upload("document.pdf")
242
+ finally:
243
+ await chunkr.close()
244
+ ```
@@ -6,12 +6,12 @@ chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
6
6
  chunkr_ai/api/chunkr.py,sha256=V56SP8qs7J2QKRCRM9NGlyA1TtDTdFmGYZWbwbFTK_I,2674
7
7
  chunkr_ai/api/chunkr_base.py,sha256=TDqEwCCfgshggi_Mzv76FhPj5z21QP8EVj7siczvfao,9826
8
8
  chunkr_ai/api/config.py,sha256=NmPTsDvcjkvNx0gNzDTz-oFG5rQC7jm-H70O_crJCw8,4478
9
- chunkr_ai/api/decorators.py,sha256=PzaTaPBXUMHoSLz6P0sL5JXANFSJff2vjvESKNiOGQY,2566
9
+ chunkr_ai/api/decorators.py,sha256=UD3Nb0b5EKcwGH2kXb9FPn4GtnJovheoHeF_Gi7WFGk,2657
10
10
  chunkr_ai/api/misc.py,sha256=wUG4SpfEEo7NcVK47gmw42dRy9zT5F9S2DtVC4T4ERs,4877
11
11
  chunkr_ai/api/protocol.py,sha256=Nt8aWr4ouVwCvoLqVI5vnXJhT2cvxt0sQC-svUk2G5w,458
12
12
  chunkr_ai/api/task_response.py,sha256=I0_XJ6WYYu_TwbaSF95wqRPaOm2PhgMKnarxjAx-BZI,3857
13
- chunkr_ai-0.0.18.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- chunkr_ai-0.0.18.dist-info/METADATA,sha256=U9774MiX43ALUNaQRS6y05Kcg4q8REMVu5lyds1DJUo,4807
15
- chunkr_ai-0.0.18.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
- chunkr_ai-0.0.18.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
- chunkr_ai-0.0.18.dist-info/RECORD,,
13
+ chunkr_ai-0.0.20.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ chunkr_ai-0.0.20.dist-info/METADATA,sha256=owGbM-pt3qbWboqgyMgRimUtOdQrsDHx8RHebIKhFZM,5696
15
+ chunkr_ai-0.0.20.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ chunkr_ai-0.0.20.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
17
+ chunkr_ai-0.0.20.dist-info/RECORD,,