chunkr-ai 0.0.48__tar.gz → 0.0.49__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chunkr_ai-0.0.48/src/chunkr_ai.egg-info → chunkr_ai-0.0.49}/PKG-INFO +1 -1
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/pyproject.toml +1 -1
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/configuration.py +1 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/tests/test_chunkr.py +66 -13
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/LICENSE +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/README.md +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/setup.cfg +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/__init__.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/auth.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/chunkr.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/chunkr_base.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/decorators.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/misc.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/protocol.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/task_response.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/models.py +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/requires.txt +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/top_level.txt +0 -0
- {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/tests/test_file_handling.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.0.
|
7
|
+
version = "0.0.49"
|
8
8
|
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
@@ -23,6 +23,7 @@ class GenerationConfig(BaseModel):
|
|
23
23
|
markdown: Optional[GenerationStrategy] = None
|
24
24
|
crop_image: Optional[CroppingStrategy] = None
|
25
25
|
embed_sources: Optional[List[EmbedSource]] = Field(default_factory=lambda: [EmbedSource.MARKDOWN])
|
26
|
+
extended_context: Optional[bool] = None
|
26
27
|
|
27
28
|
class SegmentProcessing(BaseModel):
|
28
29
|
model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
|
@@ -2,27 +2,25 @@ import pytest
|
|
2
2
|
from pathlib import Path
|
3
3
|
from PIL import Image
|
4
4
|
import asyncio
|
5
|
-
import base64
|
6
|
-
import io
|
7
|
-
import tempfile
|
8
5
|
from typing import Awaitable
|
9
6
|
|
10
7
|
from chunkr_ai import Chunkr
|
11
8
|
from chunkr_ai.models import (
|
9
|
+
ChunkProcessing,
|
12
10
|
Configuration,
|
13
|
-
|
11
|
+
EmbedSource,
|
12
|
+
ErrorHandlingStrategy,
|
13
|
+
FallbackStrategy,
|
14
14
|
GenerationConfig,
|
15
|
+
GenerationStrategy,
|
16
|
+
LlmProcessing,
|
15
17
|
OcrStrategy,
|
16
18
|
Pipeline,
|
17
19
|
SegmentationStrategy,
|
18
20
|
SegmentProcessing,
|
19
|
-
|
21
|
+
Status,
|
20
22
|
TaskResponse,
|
21
|
-
EmbedSource,
|
22
|
-
ErrorHandlingStrategy,
|
23
23
|
Tokenizer,
|
24
|
-
LlmProcessing,
|
25
|
-
FallbackStrategy,
|
26
24
|
)
|
27
25
|
|
28
26
|
@pytest.fixture
|
@@ -167,6 +165,21 @@ def model_fallback_config():
|
|
167
165
|
),
|
168
166
|
)
|
169
167
|
|
168
|
+
@pytest.fixture
|
169
|
+
def extended_context_config():
|
170
|
+
return Configuration(
|
171
|
+
segment_processing=SegmentProcessing(
|
172
|
+
picture=GenerationConfig(
|
173
|
+
extended_context=True,
|
174
|
+
html=GenerationStrategy.LLM,
|
175
|
+
),
|
176
|
+
table=GenerationConfig(
|
177
|
+
extended_context=True,
|
178
|
+
html=GenerationStrategy.LLM,
|
179
|
+
)
|
180
|
+
),
|
181
|
+
)
|
182
|
+
|
170
183
|
@pytest.mark.asyncio
|
171
184
|
async def test_ocr_auto(client, sample_path):
|
172
185
|
response = await client.upload(sample_path, Configuration(ocr_strategy=OcrStrategy.AUTO))
|
@@ -265,9 +278,18 @@ async def test_cancel_task(client, sample_path):
|
|
265
278
|
@pytest.mark.asyncio
|
266
279
|
async def test_cancel_task_direct(client, sample_path):
|
267
280
|
task = await client.create_task(sample_path)
|
268
|
-
assert isinstance(task, Awaitable) and isinstance(task, TaskResponse)
|
269
281
|
assert task.status == "Starting"
|
270
|
-
|
282
|
+
try:
|
283
|
+
await task.cancel()
|
284
|
+
except Exception as e:
|
285
|
+
task = await client.get_task(task.task_id)
|
286
|
+
print(task.status)
|
287
|
+
if task.status == Status.PROCESSING:
|
288
|
+
print("Task is processing, so it can't be cancelled")
|
289
|
+
assert True
|
290
|
+
else:
|
291
|
+
print("Task status:", task.status)
|
292
|
+
raise e
|
271
293
|
assert task.status == "Cancelled"
|
272
294
|
|
273
295
|
@pytest.mark.asyncio
|
@@ -304,6 +326,7 @@ async def test_pipeline_type_azure(client, sample_path):
|
|
304
326
|
assert response.task_id is not None
|
305
327
|
assert response.status == "Succeeded"
|
306
328
|
assert response.output is not None
|
329
|
+
assert response.configuration.pipeline == Pipeline.AZURE
|
307
330
|
|
308
331
|
@pytest.mark.asyncio
|
309
332
|
async def test_pipeline_type_chunkr(client, sample_path):
|
@@ -311,7 +334,8 @@ async def test_pipeline_type_chunkr(client, sample_path):
|
|
311
334
|
assert response.task_id is not None
|
312
335
|
assert response.status == "Succeeded"
|
313
336
|
assert response.output is not None
|
314
|
-
|
337
|
+
assert response.configuration.pipeline == Pipeline.CHUNKR
|
338
|
+
|
315
339
|
@pytest.mark.asyncio
|
316
340
|
async def test_client_lifecycle(client, sample_path):
|
317
341
|
response1 = await client.upload(sample_path)
|
@@ -528,4 +552,33 @@ async def test_fallback_strategy_serialization():
|
|
528
552
|
# Test string representation
|
529
553
|
assert str(none_strategy) == "None"
|
530
554
|
assert str(default_strategy) == "Default"
|
531
|
-
assert str(model_strategy) == "Model(gpt-4.1)"
|
555
|
+
assert str(model_strategy) == "Model(gpt-4.1)"
|
556
|
+
|
557
|
+
@pytest.mark.asyncio
|
558
|
+
async def test_extended_context(client, sample_path, extended_context_config):
|
559
|
+
"""Tests uploading with extended context enabled for pictures and tables."""
|
560
|
+
print("\nTesting extended context for Pictures and Tables...")
|
561
|
+
try:
|
562
|
+
task = await client.upload(sample_path, config=extended_context_config)
|
563
|
+
print(f"Task created with extended context config: {task.task_id}")
|
564
|
+
print(f"Initial Status: {task.status}")
|
565
|
+
|
566
|
+
# Poll the task until it finishes or fails
|
567
|
+
print(f"Final Status: {task.status}")
|
568
|
+
print(f"Message: {task.message}")
|
569
|
+
|
570
|
+
# Basic assertion: Check if the task completed (either succeeded or failed)
|
571
|
+
assert task.status in [Status.SUCCEEDED, Status.FAILED], f"Task ended in unexpected state: {task.status}"
|
572
|
+
|
573
|
+
# More specific assertions based on expected outcomes with your local server
|
574
|
+
# if task.status == Status.FAILED:
|
575
|
+
# assert "context_length_exceeded" in task.message, "Expected context length error"
|
576
|
+
# elif task.status == Status.SUCCEEDED:
|
577
|
+
# # Check if output reflects extended context usage if possible
|
578
|
+
# pass
|
579
|
+
|
580
|
+
print("Extended context test completed.")
|
581
|
+
|
582
|
+
except Exception as e:
|
583
|
+
print(f"Error during extended context test: {e}")
|
584
|
+
raise # Re-raise the exception to fail the test explicitly
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|