chunkr-ai 0.0.3__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chunkr_ai-0.0.3/src/chunkr_ai.egg-info → chunkr_ai-0.0.5}/PKG-INFO +82 -12
- chunkr_ai-0.0.5/README.md +177 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/pyproject.toml +1 -1
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/config.py +12 -11
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/task.py +8 -1
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/models.py +1 -2
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5/src/chunkr_ai.egg-info}/PKG-INFO +82 -12
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/tests/test_chunkr.py +18 -1
- chunkr_ai-0.0.3/README.md +0 -107
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/LICENSE +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/setup.cfg +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/__init__.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/api.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/auth.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/base.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/chunkr.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/chunkr_async.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/api/protocol.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai/main.py +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai.egg-info/requires.txt +0 -0
- {chunkr_ai-0.0.3 → chunkr_ai-0.0.5}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.5
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
Project-URL: Homepage, https://chunkr.ai
|
@@ -17,7 +17,13 @@ Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
|
|
17
17
|
|
18
18
|
# Chunkr Python Client
|
19
19
|
|
20
|
-
This
|
20
|
+
This provides a simple interface to interact with the Chunkr API.
|
21
|
+
|
22
|
+
## Getting Started
|
23
|
+
|
24
|
+
You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
|
25
|
+
|
26
|
+
For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
|
21
27
|
|
22
28
|
## Installation
|
23
29
|
|
@@ -102,6 +108,80 @@ chunkr.upload(img)
|
|
102
108
|
|
103
109
|
### Configuration
|
104
110
|
|
111
|
+
You can customize the processing behavior by passing a `Configuration` object:
|
112
|
+
|
113
|
+
```python
|
114
|
+
from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
|
115
|
+
|
116
|
+
# Basic configuration
|
117
|
+
config = Configuration(
|
118
|
+
ocr_strategy=OcrStrategy.AUTO,
|
119
|
+
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
|
120
|
+
high_resolution=True,
|
121
|
+
expires_in=3600, # seconds
|
122
|
+
)
|
123
|
+
|
124
|
+
# Upload with configuration
|
125
|
+
task = chunkr.upload("document.pdf", config)
|
126
|
+
```
|
127
|
+
|
128
|
+
#### Available Configuration Examples
|
129
|
+
|
130
|
+
- **Chunk Processing**
|
131
|
+
```python
|
132
|
+
from chunkr_ai.models import ChunkProcessing
|
133
|
+
config = Configuration(
|
134
|
+
chunk_processing=ChunkProcessing(target_length=1024)
|
135
|
+
)
|
136
|
+
```
|
137
|
+
- **Expires In**
|
138
|
+
```python
|
139
|
+
config = Configuration(expires_in=3600)
|
140
|
+
```
|
141
|
+
|
142
|
+
- **High Resolution**
|
143
|
+
```python
|
144
|
+
config = Configuration(high_resolution=True)
|
145
|
+
```
|
146
|
+
|
147
|
+
- **JSON Schema**
|
148
|
+
```python
|
149
|
+
config = Configuration(json_schema=JsonSchema(
|
150
|
+
title="Sales Data",
|
151
|
+
properties=[
|
152
|
+
Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
|
153
|
+
Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
|
154
|
+
]
|
155
|
+
))
|
156
|
+
```
|
157
|
+
|
158
|
+
- **OCR Strategy**
|
159
|
+
```python
|
160
|
+
config = Configuration(ocr_strategy=OcrStrategy.AUTO)
|
161
|
+
```
|
162
|
+
|
163
|
+
- **Segment Processing**
|
164
|
+
```python
|
165
|
+
from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
|
166
|
+
config = Configuration(
|
167
|
+
segment_processing=SegmentProcessing(
|
168
|
+
page=GenerationConfig(
|
169
|
+
html=GenerationStrategy.LLM,
|
170
|
+
markdown=GenerationStrategy.LLM
|
171
|
+
)
|
172
|
+
)
|
173
|
+
)
|
174
|
+
```
|
175
|
+
|
176
|
+
- **Segmentation Strategy**
|
177
|
+
```python
|
178
|
+
config = Configuration(
|
179
|
+
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
|
180
|
+
)
|
181
|
+
```
|
182
|
+
|
183
|
+
## Environment setup
|
184
|
+
|
105
185
|
You can provide your API key and URL in several ways:
|
106
186
|
1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
|
107
187
|
2. `.env` file
|
@@ -112,13 +192,3 @@ chunkr = Chunkr(
|
|
112
192
|
url="https://api.chunkr.ai"
|
113
193
|
)
|
114
194
|
```
|
115
|
-
|
116
|
-
## Run tests
|
117
|
-
|
118
|
-
```python
|
119
|
-
# Install dependencies
|
120
|
-
uv pip install -e ".[test]"
|
121
|
-
|
122
|
-
# Run tests
|
123
|
-
uv run pytest
|
124
|
-
```
|
@@ -0,0 +1,177 @@
|
|
1
|
+
# Chunkr Python Client
|
2
|
+
|
3
|
+
This provides a simple interface to interact with the Chunkr API.
|
4
|
+
|
5
|
+
## Getting Started
|
6
|
+
|
7
|
+
You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
|
8
|
+
|
9
|
+
For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
```bash
|
14
|
+
pip install chunkr-ai
|
15
|
+
```
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
We provide two clients: `Chunkr` for synchronous operations and `ChunkrAsync` for asynchronous operations.
|
20
|
+
|
21
|
+
### Synchronous Usage
|
22
|
+
|
23
|
+
```python
|
24
|
+
from chunkr_ai import Chunkr
|
25
|
+
|
26
|
+
# Initialize client
|
27
|
+
chunkr = Chunkr()
|
28
|
+
|
29
|
+
# Upload a file and wait for processing
|
30
|
+
task = chunkr.upload("document.pdf")
|
31
|
+
|
32
|
+
# Print the response
|
33
|
+
print(task)
|
34
|
+
|
35
|
+
# Get output from task
|
36
|
+
output = task.output
|
37
|
+
|
38
|
+
# If you want to upload without waiting for processing
|
39
|
+
task = chunkr.start_upload("document.pdf")
|
40
|
+
# ... do other things ...
|
41
|
+
task.poll() # Check status when needed
|
42
|
+
```
|
43
|
+
|
44
|
+
### Asynchronous Usage
|
45
|
+
|
46
|
+
```python
|
47
|
+
from chunkr_ai import ChunkrAsync
|
48
|
+
|
49
|
+
async def process_document():
|
50
|
+
# Initialize client
|
51
|
+
chunkr = ChunkrAsync()
|
52
|
+
|
53
|
+
# Upload a file and wait for processing
|
54
|
+
task = await chunkr.upload("document.pdf")
|
55
|
+
|
56
|
+
# Print the response
|
57
|
+
print(task)
|
58
|
+
|
59
|
+
# Get output from task
|
60
|
+
output = task.output
|
61
|
+
|
62
|
+
# If you want to upload without waiting for processing
|
63
|
+
task = await chunkr.start_upload("document.pdf")
|
64
|
+
# ... do other things ...
|
65
|
+
await task.poll_async() # Check status when needed
|
66
|
+
```
|
67
|
+
|
68
|
+
### Additional Features
|
69
|
+
|
70
|
+
Both clients support various input types:
|
71
|
+
|
72
|
+
```python
|
73
|
+
# Upload from file path
|
74
|
+
chunkr.upload("document.pdf")
|
75
|
+
|
76
|
+
# Upload from opened file
|
77
|
+
with open("document.pdf", "rb") as f:
|
78
|
+
chunkr.upload(f)
|
79
|
+
|
80
|
+
# Upload from URL
|
81
|
+
chunkr.upload("https://example.com/document.pdf")
|
82
|
+
|
83
|
+
# Upload from base64 string
|
84
|
+
chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
|
85
|
+
|
86
|
+
# Upload an image
|
87
|
+
from PIL import Image
|
88
|
+
img = Image.open("photo.jpg")
|
89
|
+
chunkr.upload(img)
|
90
|
+
```
|
91
|
+
|
92
|
+
### Configuration
|
93
|
+
|
94
|
+
You can customize the processing behavior by passing a `Configuration` object:
|
95
|
+
|
96
|
+
```python
|
97
|
+
from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
|
98
|
+
|
99
|
+
# Basic configuration
|
100
|
+
config = Configuration(
|
101
|
+
ocr_strategy=OcrStrategy.AUTO,
|
102
|
+
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
|
103
|
+
high_resolution=True,
|
104
|
+
expires_in=3600, # seconds
|
105
|
+
)
|
106
|
+
|
107
|
+
# Upload with configuration
|
108
|
+
task = chunkr.upload("document.pdf", config)
|
109
|
+
```
|
110
|
+
|
111
|
+
#### Available Configuration Examples
|
112
|
+
|
113
|
+
- **Chunk Processing**
|
114
|
+
```python
|
115
|
+
from chunkr_ai.models import ChunkProcessing
|
116
|
+
config = Configuration(
|
117
|
+
chunk_processing=ChunkProcessing(target_length=1024)
|
118
|
+
)
|
119
|
+
```
|
120
|
+
- **Expires In**
|
121
|
+
```python
|
122
|
+
config = Configuration(expires_in=3600)
|
123
|
+
```
|
124
|
+
|
125
|
+
- **High Resolution**
|
126
|
+
```python
|
127
|
+
config = Configuration(high_resolution=True)
|
128
|
+
```
|
129
|
+
|
130
|
+
- **JSON Schema**
|
131
|
+
```python
|
132
|
+
config = Configuration(json_schema=JsonSchema(
|
133
|
+
title="Sales Data",
|
134
|
+
properties=[
|
135
|
+
Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
|
136
|
+
Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
|
137
|
+
]
|
138
|
+
))
|
139
|
+
```
|
140
|
+
|
141
|
+
- **OCR Strategy**
|
142
|
+
```python
|
143
|
+
config = Configuration(ocr_strategy=OcrStrategy.AUTO)
|
144
|
+
```
|
145
|
+
|
146
|
+
- **Segment Processing**
|
147
|
+
```python
|
148
|
+
from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
|
149
|
+
config = Configuration(
|
150
|
+
segment_processing=SegmentProcessing(
|
151
|
+
page=GenerationConfig(
|
152
|
+
html=GenerationStrategy.LLM,
|
153
|
+
markdown=GenerationStrategy.LLM
|
154
|
+
)
|
155
|
+
)
|
156
|
+
)
|
157
|
+
```
|
158
|
+
|
159
|
+
- **Segmentation Strategy**
|
160
|
+
```python
|
161
|
+
config = Configuration(
|
162
|
+
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
|
163
|
+
)
|
164
|
+
```
|
165
|
+
|
166
|
+
## Environment setup
|
167
|
+
|
168
|
+
You can provide your API key and URL in several ways:
|
169
|
+
1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
|
170
|
+
2. `.env` file
|
171
|
+
3. Direct initialization:
|
172
|
+
```python
|
173
|
+
chunkr = Chunkr(
|
174
|
+
api_key="your-api-key",
|
175
|
+
url="https://api.chunkr.ai"
|
176
|
+
)
|
177
|
+
```
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.0.
|
7
|
+
version = "0.0.5"
|
8
8
|
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from pydantic import BaseModel, Field
|
1
|
+
from pydantic import BaseModel, Field, model_validator
|
2
2
|
from enum import Enum
|
3
3
|
from typing import Optional, List, Dict
|
4
4
|
|
@@ -40,15 +40,14 @@ class ChunkProcessing(BaseModel):
|
|
40
40
|
|
41
41
|
class Property(BaseModel):
|
42
42
|
name: str
|
43
|
-
title: Optional[str]
|
43
|
+
title: Optional[str] = None
|
44
44
|
prop_type: str
|
45
|
-
description: Optional[str]
|
46
|
-
default: Optional[str]
|
45
|
+
description: Optional[str] = None
|
46
|
+
default: Optional[str] = None
|
47
47
|
|
48
48
|
class JsonSchema(BaseModel):
|
49
49
|
title: str
|
50
50
|
properties: List[Property]
|
51
|
-
schema_type: Optional[str]
|
52
51
|
|
53
52
|
class OcrStrategy(str, Enum):
|
54
53
|
ALL = "All"
|
@@ -121,10 +120,12 @@ class Configuration(BaseModel):
|
|
121
120
|
ocr_strategy: Optional[OcrStrategy] = Field(default=None)
|
122
121
|
segment_processing: Optional[SegmentProcessing] = Field(default=None)
|
123
122
|
segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
|
124
|
-
target_chunk_length: Optional[int] = Field(default=None)
|
125
123
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
124
|
+
@model_validator(mode='before')
|
125
|
+
def map_deprecated_fields(cls, values: Dict) -> Dict:
|
126
|
+
if isinstance(values, dict) and "target_chunk_length" in values:
|
127
|
+
target_length = values.pop("target_chunk_length")
|
128
|
+
if target_length is not None:
|
129
|
+
values["chunk_processing"] = values.get("chunk_processing", {}) or {}
|
130
|
+
values["chunk_processing"]["target_length"] = target_length
|
131
|
+
return values
|
@@ -1,11 +1,18 @@
|
|
1
1
|
from .protocol import ChunkrClientProtocol
|
2
|
-
from .config import Configuration,
|
2
|
+
from .config import Configuration, OutputResponse
|
3
3
|
import asyncio
|
4
4
|
from datetime import datetime
|
5
|
+
from enum import Enum
|
5
6
|
from pydantic import BaseModel, PrivateAttr
|
6
7
|
import time
|
7
8
|
from typing import Optional, Union
|
8
9
|
|
10
|
+
class Status(str, Enum):
|
11
|
+
STARTING = "Starting"
|
12
|
+
PROCESSING = "Processing"
|
13
|
+
SUCCEEDED = "Succeeded"
|
14
|
+
FAILED = "Failed"
|
15
|
+
|
9
16
|
class TaskResponse(BaseModel):
|
10
17
|
configuration: Configuration
|
11
18
|
created_at: datetime
|
@@ -18,10 +18,9 @@ from .api.config import (
|
|
18
18
|
SegmentProcessing,
|
19
19
|
SegmentType,
|
20
20
|
SegmentationStrategy,
|
21
|
-
Status
|
22
21
|
)
|
23
22
|
|
24
|
-
from .api.task import TaskResponse, TaskPayload
|
23
|
+
from .api.task import TaskResponse, TaskPayload, Status
|
25
24
|
|
26
25
|
__all__ = [
|
27
26
|
'BoundingBox',
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.5
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
Project-URL: Homepage, https://chunkr.ai
|
@@ -17,7 +17,13 @@ Requires-Dist: pytest-xdist>=3.6.1; extra == "test"
|
|
17
17
|
|
18
18
|
# Chunkr Python Client
|
19
19
|
|
20
|
-
This
|
20
|
+
This provides a simple interface to interact with the Chunkr API.
|
21
|
+
|
22
|
+
## Getting Started
|
23
|
+
|
24
|
+
You can get an API key from [Chunkr](https://chunkr.ai) or deploy your own Chunkr instance. For self-hosted deployment options, check out our [deployment guide](https://github.com/lumina-ai-inc/chunkr/tree/main?tab=readme-ov-file#self-hosted-deployment-options).
|
25
|
+
|
26
|
+
For more information about the API and its capabilities, visit the [Chunkr API docs](https://docs.chunkr.ai).
|
21
27
|
|
22
28
|
## Installation
|
23
29
|
|
@@ -102,6 +108,80 @@ chunkr.upload(img)
|
|
102
108
|
|
103
109
|
### Configuration
|
104
110
|
|
111
|
+
You can customize the processing behavior by passing a `Configuration` object:
|
112
|
+
|
113
|
+
```python
|
114
|
+
from chunkr_ai.models import Configuration, OcrStrategy, SegmentationStrategy, GenerationStrategy
|
115
|
+
|
116
|
+
# Basic configuration
|
117
|
+
config = Configuration(
|
118
|
+
ocr_strategy=OcrStrategy.AUTO,
|
119
|
+
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS,
|
120
|
+
high_resolution=True,
|
121
|
+
expires_in=3600, # seconds
|
122
|
+
)
|
123
|
+
|
124
|
+
# Upload with configuration
|
125
|
+
task = chunkr.upload("document.pdf", config)
|
126
|
+
```
|
127
|
+
|
128
|
+
#### Available Configuration Examples
|
129
|
+
|
130
|
+
- **Chunk Processing**
|
131
|
+
```python
|
132
|
+
from chunkr_ai.models import ChunkProcessing
|
133
|
+
config = Configuration(
|
134
|
+
chunk_processing=ChunkProcessing(target_length=1024)
|
135
|
+
)
|
136
|
+
```
|
137
|
+
- **Expires In**
|
138
|
+
```python
|
139
|
+
config = Configuration(expires_in=3600)
|
140
|
+
```
|
141
|
+
|
142
|
+
- **High Resolution**
|
143
|
+
```python
|
144
|
+
config = Configuration(high_resolution=True)
|
145
|
+
```
|
146
|
+
|
147
|
+
- **JSON Schema**
|
148
|
+
```python
|
149
|
+
config = Configuration(json_schema=JsonSchema(
|
150
|
+
title="Sales Data",
|
151
|
+
properties=[
|
152
|
+
Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
|
153
|
+
Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
|
154
|
+
]
|
155
|
+
))
|
156
|
+
```
|
157
|
+
|
158
|
+
- **OCR Strategy**
|
159
|
+
```python
|
160
|
+
config = Configuration(ocr_strategy=OcrStrategy.AUTO)
|
161
|
+
```
|
162
|
+
|
163
|
+
- **Segment Processing**
|
164
|
+
```python
|
165
|
+
from chunkr_ai.models import SegmentProcessing, GenerationConfig, GenerationStrategy
|
166
|
+
config = Configuration(
|
167
|
+
segment_processing=SegmentProcessing(
|
168
|
+
page=GenerationConfig(
|
169
|
+
html=GenerationStrategy.LLM,
|
170
|
+
markdown=GenerationStrategy.LLM
|
171
|
+
)
|
172
|
+
)
|
173
|
+
)
|
174
|
+
```
|
175
|
+
|
176
|
+
- **Segmentation Strategy**
|
177
|
+
```python
|
178
|
+
config = Configuration(
|
179
|
+
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
|
180
|
+
)
|
181
|
+
```
|
182
|
+
|
183
|
+
## Environment setup
|
184
|
+
|
105
185
|
You can provide your API key and URL in several ways:
|
106
186
|
1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
|
107
187
|
2. `.env` file
|
@@ -112,13 +192,3 @@ chunkr = Chunkr(
|
|
112
192
|
url="https://api.chunkr.ai"
|
113
193
|
)
|
114
194
|
```
|
115
|
-
|
116
|
-
## Run tests
|
117
|
-
|
118
|
-
```python
|
119
|
-
# Install dependencies
|
120
|
-
uv pip install -e ".[test]"
|
121
|
-
|
122
|
-
# Run tests
|
123
|
-
uv run pytest
|
124
|
-
```
|
@@ -8,7 +8,9 @@ from chunkr_ai.models import (
|
|
8
8
|
Configuration,
|
9
9
|
GenerationStrategy,
|
10
10
|
GenerationConfig,
|
11
|
+
JsonSchema,
|
11
12
|
OcrStrategy,
|
13
|
+
Property,
|
12
14
|
SegmentationStrategy,
|
13
15
|
SegmentProcessing,
|
14
16
|
TaskResponse,
|
@@ -129,7 +131,21 @@ def test_page_llm(chunkr, sample_path):
|
|
129
131
|
assert response.task_id is not None
|
130
132
|
assert response.status == "Succeeded"
|
131
133
|
assert response.output is not None
|
132
|
-
|
134
|
+
|
135
|
+
def test_json_schema(chunkr, sample_path):
|
136
|
+
response = chunkr.upload(sample_path, Configuration(
|
137
|
+
json_schema=JsonSchema(
|
138
|
+
title="Sales Data",
|
139
|
+
properties=[
|
140
|
+
Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
|
141
|
+
Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
|
142
|
+
]
|
143
|
+
)
|
144
|
+
))
|
145
|
+
assert isinstance(response, TaskResponse)
|
146
|
+
assert response.task_id is not None
|
147
|
+
assert response.status == "Succeeded"
|
148
|
+
assert response.output is not None
|
133
149
|
|
134
150
|
async def test_async_send_file_path(async_chunkr, sample_path):
|
135
151
|
response = await async_chunkr.upload(sample_path)
|
@@ -138,4 +154,5 @@ async def test_async_send_file_path(async_chunkr, sample_path):
|
|
138
154
|
assert response.task_id is not None
|
139
155
|
assert response.status == "Succeeded"
|
140
156
|
assert response.output is not None
|
157
|
+
|
141
158
|
|
chunkr_ai-0.0.3/README.md
DELETED
@@ -1,107 +0,0 @@
|
|
1
|
-
# Chunkr Python Client
|
2
|
-
|
3
|
-
This is the Python client for the Chunkr API. It provides a simple interface to interact with Chunkr's services.
|
4
|
-
|
5
|
-
## Installation
|
6
|
-
|
7
|
-
```bash
|
8
|
-
pip install chunkr-ai
|
9
|
-
```
|
10
|
-
|
11
|
-
## Usage
|
12
|
-
|
13
|
-
We provide two clients: `Chunkr` for synchronous operations and `ChunkrAsync` for asynchronous operations.
|
14
|
-
|
15
|
-
### Synchronous Usage
|
16
|
-
|
17
|
-
```python
|
18
|
-
from chunkr_ai import Chunkr
|
19
|
-
|
20
|
-
# Initialize client
|
21
|
-
chunkr = Chunkr()
|
22
|
-
|
23
|
-
# Upload a file and wait for processing
|
24
|
-
task = chunkr.upload("document.pdf")
|
25
|
-
|
26
|
-
# Print the response
|
27
|
-
print(task)
|
28
|
-
|
29
|
-
# Get output from task
|
30
|
-
output = task.output
|
31
|
-
|
32
|
-
# If you want to upload without waiting for processing
|
33
|
-
task = chunkr.start_upload("document.pdf")
|
34
|
-
# ... do other things ...
|
35
|
-
task.poll() # Check status when needed
|
36
|
-
```
|
37
|
-
|
38
|
-
### Asynchronous Usage
|
39
|
-
|
40
|
-
```python
|
41
|
-
from chunkr_ai import ChunkrAsync
|
42
|
-
|
43
|
-
async def process_document():
|
44
|
-
# Initialize client
|
45
|
-
chunkr = ChunkrAsync()
|
46
|
-
|
47
|
-
# Upload a file and wait for processing
|
48
|
-
task = await chunkr.upload("document.pdf")
|
49
|
-
|
50
|
-
# Print the response
|
51
|
-
print(task)
|
52
|
-
|
53
|
-
# Get output from task
|
54
|
-
output = task.output
|
55
|
-
|
56
|
-
# If you want to upload without waiting for processing
|
57
|
-
task = await chunkr.start_upload("document.pdf")
|
58
|
-
# ... do other things ...
|
59
|
-
await task.poll_async() # Check status when needed
|
60
|
-
```
|
61
|
-
|
62
|
-
### Additional Features
|
63
|
-
|
64
|
-
Both clients support various input types:
|
65
|
-
|
66
|
-
```python
|
67
|
-
# Upload from file path
|
68
|
-
chunkr.upload("document.pdf")
|
69
|
-
|
70
|
-
# Upload from opened file
|
71
|
-
with open("document.pdf", "rb") as f:
|
72
|
-
chunkr.upload(f)
|
73
|
-
|
74
|
-
# Upload from URL
|
75
|
-
chunkr.upload("https://example.com/document.pdf")
|
76
|
-
|
77
|
-
# Upload from base64 string
|
78
|
-
chunkr.upload("data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmo...")
|
79
|
-
|
80
|
-
# Upload an image
|
81
|
-
from PIL import Image
|
82
|
-
img = Image.open("photo.jpg")
|
83
|
-
chunkr.upload(img)
|
84
|
-
```
|
85
|
-
|
86
|
-
### Configuration
|
87
|
-
|
88
|
-
You can provide your API key and URL in several ways:
|
89
|
-
1. Environment variables: `CHUNKR_API_KEY` and `CHUNKR_URL`
|
90
|
-
2. `.env` file
|
91
|
-
3. Direct initialization:
|
92
|
-
```python
|
93
|
-
chunkr = Chunkr(
|
94
|
-
api_key="your-api-key",
|
95
|
-
url="https://api.chunkr.ai"
|
96
|
-
)
|
97
|
-
```
|
98
|
-
|
99
|
-
## Run tests
|
100
|
-
|
101
|
-
```python
|
102
|
-
# Install dependencies
|
103
|
-
uv pip install -e ".[test]"
|
104
|
-
|
105
|
-
# Run tests
|
106
|
-
uv run pytest
|
107
|
-
```
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|