chunkr-ai 0.0.4__tar.gz → 0.0.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {chunkr_ai-0.0.4/src/chunkr_ai.egg-info → chunkr_ai-0.0.6}/PKG-INFO +2 -11
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/README.md +1 -11
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/pyproject.toml +3 -1
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/task.py +5 -14
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/models.py +1 -2
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6/src/chunkr_ai.egg-info}/PKG-INFO +2 -11
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai.egg-info/requires.txt +1 -0
- chunkr_ai-0.0.6/tests/test_chunkr.py +212 -0
- chunkr_ai-0.0.4/tests/test_chunkr.py +0 -158
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/LICENSE +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/setup.cfg +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/__init__.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/api.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/auth.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/base.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/chunkr.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/chunkr_async.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/config.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/api/protocol.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai/main.py +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.0.4 → chunkr_ai-0.0.6}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.6
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
Project-URL: Homepage, https://chunkr.ai
|
@@ -9,6 +9,7 @@ License-File: LICENSE
|
|
9
9
|
Requires-Dist: httpx>=0.28.1
|
10
10
|
Requires-Dist: pillow>=11.1.0
|
11
11
|
Requires-Dist: pydantic>=2.10.4
|
12
|
+
Requires-Dist: pytest-asyncio>=0.25.2
|
12
13
|
Requires-Dist: python-dotenv>=1.0.1
|
13
14
|
Requires-Dist: requests>=2.32.3
|
14
15
|
Provides-Extra: test
|
@@ -192,13 +193,3 @@ chunkr = Chunkr(
|
|
192
193
|
url="https://api.chunkr.ai"
|
193
194
|
)
|
194
195
|
```
|
195
|
-
|
196
|
-
## Run tests
|
197
|
-
|
198
|
-
```python
|
199
|
-
# Install dependencies
|
200
|
-
uv pip install -e ".[test]"
|
201
|
-
|
202
|
-
# Run tests
|
203
|
-
uv run pytest
|
204
|
-
```
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.0.
|
7
|
+
version = "0.0.6"
|
8
8
|
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
@@ -14,6 +14,7 @@ dependencies = [
|
|
14
14
|
"httpx>=0.28.1",
|
15
15
|
"pillow>=11.1.0",
|
16
16
|
"pydantic>=2.10.4",
|
17
|
+
"pytest-asyncio>=0.25.2",
|
17
18
|
"python-dotenv>=1.0.1",
|
18
19
|
"requests>=2.32.3",
|
19
20
|
]
|
@@ -23,3 +24,4 @@ test = [
|
|
23
24
|
"pytest>=8.3.4",
|
24
25
|
"pytest-xdist>=3.6.1",
|
25
26
|
]
|
27
|
+
|
@@ -24,6 +24,7 @@ class TaskResponse(BaseModel):
|
|
24
24
|
output: Optional[OutputResponse]
|
25
25
|
page_count: Optional[int]
|
26
26
|
pdf_url: Optional[str]
|
27
|
+
started_at: Optional[datetime]
|
27
28
|
status: Status
|
28
29
|
task_id: str
|
29
30
|
task_url: Optional[str]
|
@@ -57,8 +58,9 @@ class TaskResponse(BaseModel):
|
|
57
58
|
while True:
|
58
59
|
try:
|
59
60
|
r = await self._client._client.get(self.task_url, headers=self._client._headers())
|
60
|
-
|
61
|
-
|
61
|
+
r.raise_for_status()
|
62
|
+
response = r.json()
|
63
|
+
return response
|
62
64
|
except (ConnectionError, TimeoutError) as _:
|
63
65
|
print("Connection error while polling the task, retrying...")
|
64
66
|
await asyncio.sleep(0.5)
|
@@ -117,15 +119,4 @@ class TaskResponse(BaseModel):
|
|
117
119
|
|
118
120
|
def content(self) -> str:
|
119
121
|
"""Get full text for the task"""
|
120
|
-
return self._get_content("content")
|
121
|
-
|
122
|
-
class TaskPayload(BaseModel):
|
123
|
-
current_configuration: Configuration
|
124
|
-
file_name: str
|
125
|
-
image_folder_location: str
|
126
|
-
input_location: str
|
127
|
-
output_location: str
|
128
|
-
pdf_location: str
|
129
|
-
previous_configuration: Optional[Configuration]
|
130
|
-
task_id: str
|
131
|
-
user_id: str
|
122
|
+
return self._get_content("content")
|
@@ -20,7 +20,7 @@ from .api.config import (
|
|
20
20
|
SegmentationStrategy,
|
21
21
|
)
|
22
22
|
|
23
|
-
from .api.task import TaskResponse,
|
23
|
+
from .api.task import TaskResponse, Status
|
24
24
|
|
25
25
|
__all__ = [
|
26
26
|
'BoundingBox',
|
@@ -43,6 +43,5 @@ __all__ = [
|
|
43
43
|
'SegmentType',
|
44
44
|
'SegmentationStrategy',
|
45
45
|
'Status',
|
46
|
-
'TaskPayload',
|
47
46
|
'TaskResponse'
|
48
47
|
]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.6
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
Project-URL: Homepage, https://chunkr.ai
|
@@ -9,6 +9,7 @@ License-File: LICENSE
|
|
9
9
|
Requires-Dist: httpx>=0.28.1
|
10
10
|
Requires-Dist: pillow>=11.1.0
|
11
11
|
Requires-Dist: pydantic>=2.10.4
|
12
|
+
Requires-Dist: pytest-asyncio>=0.25.2
|
12
13
|
Requires-Dist: python-dotenv>=1.0.1
|
13
14
|
Requires-Dist: requests>=2.32.3
|
14
15
|
Provides-Extra: test
|
@@ -192,13 +193,3 @@ chunkr = Chunkr(
|
|
192
193
|
url="https://api.chunkr.ai"
|
193
194
|
)
|
194
195
|
```
|
195
|
-
|
196
|
-
## Run tests
|
197
|
-
|
198
|
-
```python
|
199
|
-
# Install dependencies
|
200
|
-
uv pip install -e ".[test]"
|
201
|
-
|
202
|
-
# Run tests
|
203
|
-
uv run pytest
|
204
|
-
```
|
@@ -0,0 +1,212 @@
|
|
1
|
+
import pytest
|
2
|
+
import pytest_asyncio
|
3
|
+
from pathlib import Path
|
4
|
+
from PIL import Image
|
5
|
+
|
6
|
+
from chunkr_ai import Chunkr, ChunkrAsync
|
7
|
+
from chunkr_ai.models import (
|
8
|
+
ChunkProcessing,
|
9
|
+
Configuration,
|
10
|
+
GenerationStrategy,
|
11
|
+
GenerationConfig,
|
12
|
+
JsonSchema,
|
13
|
+
OcrStrategy,
|
14
|
+
Property,
|
15
|
+
SegmentationStrategy,
|
16
|
+
SegmentProcessing,
|
17
|
+
TaskResponse,
|
18
|
+
)
|
19
|
+
|
20
|
+
@pytest.fixture(params=[
|
21
|
+
pytest.param(("sync", Chunkr()), id="sync"),
|
22
|
+
pytest.param(("async", ChunkrAsync()), id="async")
|
23
|
+
])
|
24
|
+
def chunkr_client(request):
|
25
|
+
return request.param
|
26
|
+
|
27
|
+
@pytest.fixture
|
28
|
+
def sample_path():
|
29
|
+
return Path("tests/files/test.pdf")
|
30
|
+
|
31
|
+
@pytest.fixture
|
32
|
+
def sample_image():
|
33
|
+
img = Image.open("tests/files/test.jpg")
|
34
|
+
return img
|
35
|
+
|
36
|
+
@pytest.mark.asyncio
|
37
|
+
async def test_send_file_path(chunkr_client, sample_path):
|
38
|
+
client_type, client = chunkr_client
|
39
|
+
response = await client.upload(sample_path) if client_type == "async" else client.upload(sample_path)
|
40
|
+
|
41
|
+
assert isinstance(response, TaskResponse)
|
42
|
+
assert response.task_id is not None
|
43
|
+
assert response.status == "Succeeded"
|
44
|
+
assert response.output is not None
|
45
|
+
|
46
|
+
@pytest.mark.asyncio
|
47
|
+
async def test_send_file_path_str(chunkr_client, sample_path):
|
48
|
+
client_type, client = chunkr_client
|
49
|
+
response = await client.upload(str(sample_path)) if client_type == "async" else client.upload(str(sample_path))
|
50
|
+
|
51
|
+
assert isinstance(response, TaskResponse)
|
52
|
+
assert response.task_id is not None
|
53
|
+
assert response.status == "Succeeded"
|
54
|
+
assert response.output is not None
|
55
|
+
|
56
|
+
@pytest.mark.asyncio
|
57
|
+
async def test_send_opened_file(chunkr_client, sample_path):
|
58
|
+
client_type, client = chunkr_client
|
59
|
+
with open(sample_path, 'rb') as f:
|
60
|
+
response = await client.upload(f) if client_type == "async" else client.upload(f)
|
61
|
+
|
62
|
+
assert isinstance(response, TaskResponse)
|
63
|
+
assert response.task_id is not None
|
64
|
+
assert response.status == "Succeeded"
|
65
|
+
assert response.output is not None
|
66
|
+
|
67
|
+
@pytest.mark.asyncio
|
68
|
+
async def test_send_pil_image(chunkr_client, sample_image):
|
69
|
+
client_type, client = chunkr_client
|
70
|
+
response = await client.upload(sample_image) if client_type == "async" else client.upload(sample_image)
|
71
|
+
|
72
|
+
assert isinstance(response, TaskResponse)
|
73
|
+
assert response.task_id is not None
|
74
|
+
assert response.status == "Succeeded"
|
75
|
+
|
76
|
+
@pytest.mark.asyncio
|
77
|
+
async def test_ocr_auto(chunkr_client, sample_path):
|
78
|
+
client_type, client = chunkr_client
|
79
|
+
response = await client.upload(sample_path, Configuration(
|
80
|
+
ocr_strategy=OcrStrategy.AUTO
|
81
|
+
)) if client_type == "async" else client.upload(sample_path, Configuration(
|
82
|
+
ocr_strategy=OcrStrategy.AUTO
|
83
|
+
))
|
84
|
+
|
85
|
+
assert isinstance(response, TaskResponse)
|
86
|
+
assert response.task_id is not None
|
87
|
+
assert response.status == "Succeeded"
|
88
|
+
assert response.output is not None
|
89
|
+
|
90
|
+
@pytest.mark.asyncio
|
91
|
+
async def test_expires_in(chunkr_client, sample_path):
|
92
|
+
client_type, client = chunkr_client
|
93
|
+
response = await client.upload(sample_path, Configuration(
|
94
|
+
expires_in=10
|
95
|
+
)) if client_type == "async" else client.upload(sample_path, Configuration(
|
96
|
+
expires_in=10
|
97
|
+
))
|
98
|
+
|
99
|
+
assert isinstance(response, TaskResponse)
|
100
|
+
assert response.task_id is not None
|
101
|
+
assert response.status == "Succeeded"
|
102
|
+
assert response.output is not None
|
103
|
+
|
104
|
+
@pytest.mark.asyncio
|
105
|
+
async def test_chunk_processing(chunkr_client, sample_path):
|
106
|
+
client_type, client = chunkr_client
|
107
|
+
response = await client.upload(sample_path, Configuration(
|
108
|
+
chunk_processing=ChunkProcessing(
|
109
|
+
target_length=1024
|
110
|
+
)
|
111
|
+
)) if client_type == "async" else client.upload(sample_path, Configuration(
|
112
|
+
chunk_processing=ChunkProcessing(
|
113
|
+
target_length=1024
|
114
|
+
)
|
115
|
+
))
|
116
|
+
|
117
|
+
assert isinstance(response, TaskResponse)
|
118
|
+
assert response.task_id is not None
|
119
|
+
assert response.status == "Succeeded"
|
120
|
+
assert response.output is not None
|
121
|
+
|
122
|
+
@pytest.mark.asyncio
|
123
|
+
async def test_segmentation_strategy_page(chunkr_client, sample_path):
|
124
|
+
client_type, client = chunkr_client
|
125
|
+
response = await client.upload(sample_path, Configuration(
|
126
|
+
segmentation_strategy=SegmentationStrategy.PAGE
|
127
|
+
)) if client_type == "async" else client.upload(sample_path, Configuration(
|
128
|
+
segmentation_strategy=SegmentationStrategy.PAGE
|
129
|
+
))
|
130
|
+
|
131
|
+
assert isinstance(response, TaskResponse)
|
132
|
+
assert response.task_id is not None
|
133
|
+
assert response.status == "Succeeded"
|
134
|
+
assert response.output is not None
|
135
|
+
|
136
|
+
@pytest.mark.asyncio
|
137
|
+
async def test_page_llm_html(chunkr_client, sample_path):
|
138
|
+
client_type, client = chunkr_client
|
139
|
+
response = await client.upload(sample_path, Configuration(
|
140
|
+
segmentation_strategy=SegmentationStrategy.PAGE,
|
141
|
+
segment_processing=SegmentProcessing(
|
142
|
+
page=GenerationConfig(
|
143
|
+
html=GenerationStrategy.LLM
|
144
|
+
)
|
145
|
+
)
|
146
|
+
)) if client_type == "async" else client.upload(sample_path, Configuration(
|
147
|
+
segmentation_strategy=SegmentationStrategy.PAGE,
|
148
|
+
segment_processing=SegmentProcessing(
|
149
|
+
page=GenerationConfig(
|
150
|
+
html=GenerationStrategy.LLM
|
151
|
+
)
|
152
|
+
)
|
153
|
+
))
|
154
|
+
|
155
|
+
assert isinstance(response, TaskResponse)
|
156
|
+
assert response.task_id is not None
|
157
|
+
assert response.status == "Succeeded"
|
158
|
+
assert response.output is not None
|
159
|
+
|
160
|
+
@pytest.mark.asyncio
|
161
|
+
async def test_page_llm(chunkr_client, sample_path):
|
162
|
+
client_type, client = chunkr_client
|
163
|
+
response = await client.upload(sample_path, Configuration(
|
164
|
+
segmentation_strategy=SegmentationStrategy.PAGE,
|
165
|
+
segment_processing=SegmentProcessing(
|
166
|
+
page=GenerationConfig(
|
167
|
+
html=GenerationStrategy.LLM,
|
168
|
+
markdown=GenerationStrategy.LLM
|
169
|
+
)
|
170
|
+
)
|
171
|
+
)) if client_type == "async" else client.upload(sample_path, Configuration(
|
172
|
+
segmentation_strategy=SegmentationStrategy.PAGE,
|
173
|
+
segment_processing=SegmentProcessing(
|
174
|
+
page=GenerationConfig(
|
175
|
+
html=GenerationStrategy.LLM,
|
176
|
+
markdown=GenerationStrategy.LLM
|
177
|
+
)
|
178
|
+
)
|
179
|
+
))
|
180
|
+
|
181
|
+
assert isinstance(response, TaskResponse)
|
182
|
+
assert response.task_id is not None
|
183
|
+
assert response.status == "Succeeded"
|
184
|
+
assert response.output is not None
|
185
|
+
|
186
|
+
@pytest.mark.asyncio
|
187
|
+
async def test_json_schema(chunkr_client, sample_path):
|
188
|
+
client_type, client = chunkr_client
|
189
|
+
response = await client.upload(sample_path, Configuration(
|
190
|
+
json_schema=JsonSchema(
|
191
|
+
title="Sales Data",
|
192
|
+
properties=[
|
193
|
+
Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
|
194
|
+
Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
|
195
|
+
]
|
196
|
+
)
|
197
|
+
)) if client_type == "async" else client.upload(sample_path, Configuration(
|
198
|
+
json_schema=JsonSchema(
|
199
|
+
title="Sales Data",
|
200
|
+
properties=[
|
201
|
+
Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
|
202
|
+
Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
|
203
|
+
]
|
204
|
+
)
|
205
|
+
))
|
206
|
+
|
207
|
+
assert isinstance(response, TaskResponse)
|
208
|
+
assert response.task_id is not None
|
209
|
+
assert response.status == "Succeeded"
|
210
|
+
assert response.output is not None
|
211
|
+
|
212
|
+
|
@@ -1,158 +0,0 @@
|
|
1
|
-
import pytest
|
2
|
-
from pathlib import Path
|
3
|
-
from PIL import Image
|
4
|
-
|
5
|
-
from chunkr_ai import Chunkr, ChunkrAsync
|
6
|
-
from chunkr_ai.models import (
|
7
|
-
ChunkProcessing,
|
8
|
-
Configuration,
|
9
|
-
GenerationStrategy,
|
10
|
-
GenerationConfig,
|
11
|
-
JsonSchema,
|
12
|
-
OcrStrategy,
|
13
|
-
Property,
|
14
|
-
SegmentationStrategy,
|
15
|
-
SegmentProcessing,
|
16
|
-
TaskResponse,
|
17
|
-
)
|
18
|
-
|
19
|
-
@pytest.fixture
|
20
|
-
def chunkr():
|
21
|
-
return Chunkr()
|
22
|
-
|
23
|
-
@pytest.fixture
|
24
|
-
def async_chunkr():
|
25
|
-
return ChunkrAsync()
|
26
|
-
|
27
|
-
@pytest.fixture
|
28
|
-
def sample_path():
|
29
|
-
return Path("tests/files/test.pdf")
|
30
|
-
|
31
|
-
@pytest.fixture
|
32
|
-
def sample_image():
|
33
|
-
img = Image.open("tests/files/test.jpg")
|
34
|
-
return img
|
35
|
-
|
36
|
-
def test_send_file_path(chunkr, sample_path):
|
37
|
-
response = chunkr.upload(sample_path)
|
38
|
-
|
39
|
-
assert isinstance(response, TaskResponse)
|
40
|
-
assert response.task_id is not None
|
41
|
-
assert response.status == "Succeeded"
|
42
|
-
assert response.output is not None
|
43
|
-
|
44
|
-
def test_send_file_path_str(chunkr, sample_path):
|
45
|
-
response = chunkr.upload(str(sample_path))
|
46
|
-
|
47
|
-
assert isinstance(response, TaskResponse)
|
48
|
-
assert response.task_id is not None
|
49
|
-
assert response.status == "Succeeded"
|
50
|
-
assert response.output is not None
|
51
|
-
|
52
|
-
def test_send_opened_file(chunkr, sample_path):
|
53
|
-
with open(sample_path, 'rb') as f:
|
54
|
-
response = chunkr.upload(f)
|
55
|
-
|
56
|
-
assert isinstance(response, TaskResponse)
|
57
|
-
assert response.task_id is not None
|
58
|
-
assert response.status == "Succeeded"
|
59
|
-
assert response.output is not None
|
60
|
-
|
61
|
-
def test_send_pil_image(chunkr, sample_image):
|
62
|
-
response = chunkr.upload(sample_image)
|
63
|
-
|
64
|
-
assert isinstance(response, TaskResponse)
|
65
|
-
assert response.task_id is not None
|
66
|
-
assert response.status == "Succeeded"
|
67
|
-
|
68
|
-
def test_ocr_auto(chunkr, sample_path):
|
69
|
-
response = chunkr.upload(sample_path, Configuration(
|
70
|
-
ocr_strategy=OcrStrategy.AUTO
|
71
|
-
))
|
72
|
-
assert isinstance(response, TaskResponse)
|
73
|
-
assert response.task_id is not None
|
74
|
-
assert response.status == "Succeeded"
|
75
|
-
assert response.output is not None
|
76
|
-
|
77
|
-
def test_expires_in(chunkr, sample_path):
|
78
|
-
response = chunkr.upload(sample_path, Configuration(
|
79
|
-
expires_in=10
|
80
|
-
))
|
81
|
-
assert isinstance(response, TaskResponse)
|
82
|
-
assert response.task_id is not None
|
83
|
-
assert response.status == "Succeeded"
|
84
|
-
assert response.output is not None
|
85
|
-
|
86
|
-
def test_chunk_processing(chunkr, sample_path):
|
87
|
-
response = chunkr.upload(sample_path, Configuration(
|
88
|
-
chunk_processing=ChunkProcessing(
|
89
|
-
target_length=1024
|
90
|
-
)
|
91
|
-
))
|
92
|
-
assert isinstance(response, TaskResponse)
|
93
|
-
assert response.task_id is not None
|
94
|
-
assert response.status == "Succeeded"
|
95
|
-
assert response.output is not None
|
96
|
-
|
97
|
-
def test_segmentation_strategy_page(chunkr, sample_path):
|
98
|
-
response = chunkr.upload(sample_path, Configuration(
|
99
|
-
segmentation_strategy=SegmentationStrategy.PAGE
|
100
|
-
))
|
101
|
-
assert isinstance(response, TaskResponse)
|
102
|
-
assert response.task_id is not None
|
103
|
-
assert response.status == "Succeeded"
|
104
|
-
assert response.output is not None
|
105
|
-
|
106
|
-
def test_page_llm_html(chunkr, sample_path):
|
107
|
-
response = chunkr.upload(sample_path, Configuration(
|
108
|
-
segmentation_strategy=SegmentationStrategy.PAGE,
|
109
|
-
segment_processing=SegmentProcessing(
|
110
|
-
page=GenerationConfig(
|
111
|
-
html=GenerationStrategy.LLM
|
112
|
-
)
|
113
|
-
)
|
114
|
-
))
|
115
|
-
assert isinstance(response, TaskResponse)
|
116
|
-
assert response.task_id is not None
|
117
|
-
assert response.status == "Succeeded"
|
118
|
-
assert response.output is not None
|
119
|
-
|
120
|
-
def test_page_llm(chunkr, sample_path):
|
121
|
-
response = chunkr.upload(sample_path, Configuration(
|
122
|
-
segmentation_strategy=SegmentationStrategy.PAGE,
|
123
|
-
segment_processing=SegmentProcessing(
|
124
|
-
page=GenerationConfig(
|
125
|
-
html=GenerationStrategy.LLM,
|
126
|
-
markdown=GenerationStrategy.LLM
|
127
|
-
)
|
128
|
-
)
|
129
|
-
))
|
130
|
-
assert isinstance(response, TaskResponse)
|
131
|
-
assert response.task_id is not None
|
132
|
-
assert response.status == "Succeeded"
|
133
|
-
assert response.output is not None
|
134
|
-
|
135
|
-
def test_json_schema(chunkr, sample_path):
|
136
|
-
response = chunkr.upload(sample_path, Configuration(
|
137
|
-
json_schema=JsonSchema(
|
138
|
-
title="Sales Data",
|
139
|
-
properties=[
|
140
|
-
Property(name="Person with highest sales", prop_type="string", description="The person with the highest sales"),
|
141
|
-
Property(name="Person with lowest sales", prop_type="string", description="The person with the lowest sales"),
|
142
|
-
]
|
143
|
-
)
|
144
|
-
))
|
145
|
-
assert isinstance(response, TaskResponse)
|
146
|
-
assert response.task_id is not None
|
147
|
-
assert response.status == "Succeeded"
|
148
|
-
assert response.output is not None
|
149
|
-
|
150
|
-
async def test_async_send_file_path(async_chunkr, sample_path):
|
151
|
-
response = await async_chunkr.upload(sample_path)
|
152
|
-
|
153
|
-
assert isinstance(response, TaskResponse)
|
154
|
-
assert response.task_id is not None
|
155
|
-
assert response.status == "Succeeded"
|
156
|
-
assert response.output is not None
|
157
|
-
|
158
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|