chunkr-ai 0.0.15__tar.gz → 0.0.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chunkr_ai-0.0.15/src/chunkr_ai.egg-info → chunkr_ai-0.0.16}/PKG-INFO +1 -1
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/pyproject.toml +1 -1
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/config.py +3 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/misc.py +1 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_base.py +3 -1
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/models.py +2 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/tests/test_chunkr.py +23 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/LICENSE +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/README.md +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/setup.cfg +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/__init__.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/__init__.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/api.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/auth.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/base.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_async.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_base.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/protocol.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/schema.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_async.py +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/requires.txt +0 -0
- {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "chunkr-ai"
|
7
|
-
version = "0.0.
|
7
|
+
version = "0.0.16"
|
8
8
|
authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
|
9
9
|
description = "Python client for Chunkr: open source document intelligence"
|
10
10
|
readme = "README.md"
|
@@ -127,6 +127,8 @@ class Model(str, Enum):
|
|
127
127
|
FAST = "Fast"
|
128
128
|
HIGH_QUALITY = "HighQuality"
|
129
129
|
|
130
|
+
class PipelineType(str, Enum):
|
131
|
+
AZURE = "Azure"
|
130
132
|
|
131
133
|
class Configuration(BaseModel):
|
132
134
|
chunk_processing: Optional[ChunkProcessing] = Field(default=None)
|
@@ -139,6 +141,7 @@ class Configuration(BaseModel):
|
|
139
141
|
ocr_strategy: Optional[OcrStrategy] = Field(default=None)
|
140
142
|
segment_processing: Optional[SegmentProcessing] = Field(default=None)
|
141
143
|
segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
|
144
|
+
pipeline: Optional[PipelineType] = Field(default=None)
|
142
145
|
|
143
146
|
@model_validator(mode="before")
|
144
147
|
def map_deprecated_fields(cls, values: Dict) -> Dict:
|
@@ -7,7 +7,6 @@ from datetime import datetime
|
|
7
7
|
|
8
8
|
T = TypeVar("T", bound="TaskBase")
|
9
9
|
|
10
|
-
|
11
10
|
class TaskBase(BaseModel, ABC, Generic[T]):
|
12
11
|
configuration: Configuration
|
13
12
|
created_at: datetime
|
@@ -63,12 +62,15 @@ class TaskBase(BaseModel, ABC, Generic[T]):
|
|
63
62
|
return None
|
64
63
|
|
65
64
|
def html(self) -> str:
|
65
|
+
"""Get the full HTML of the task"""
|
66
66
|
return self._get_content("html")
|
67
67
|
|
68
68
|
def markdown(self) -> str:
|
69
|
+
"""Get the full markdown of the task"""
|
69
70
|
return self._get_content("markdown")
|
70
71
|
|
71
72
|
def content(self) -> str:
|
73
|
+
"""Get the full content of the task"""
|
72
74
|
return self._get_content("content")
|
73
75
|
|
74
76
|
def _get_content(self, t: str) -> str:
|
@@ -18,6 +18,7 @@ from .api.config import (
|
|
18
18
|
SegmentType,
|
19
19
|
SegmentationStrategy,
|
20
20
|
Status,
|
21
|
+
PipelineType,
|
21
22
|
)
|
22
23
|
|
23
24
|
from .api.task import TaskResponse
|
@@ -45,4 +46,5 @@ __all__ = [
|
|
45
46
|
"Status",
|
46
47
|
"TaskResponse",
|
47
48
|
"TaskResponseAsync",
|
49
|
+
"PipelineType",
|
48
50
|
]
|
@@ -10,6 +10,7 @@ from chunkr_ai.models import (
|
|
10
10
|
GenerationConfig,
|
11
11
|
JsonSchema,
|
12
12
|
OcrStrategy,
|
13
|
+
PipelineType,
|
13
14
|
Property,
|
14
15
|
SegmentationStrategy,
|
15
16
|
SegmentProcessing,
|
@@ -38,6 +39,14 @@ def sample_image():
|
|
38
39
|
return img
|
39
40
|
|
40
41
|
|
42
|
+
@pytest.fixture(params=[
|
43
|
+
pytest.param(None, id="none_pipeline"),
|
44
|
+
pytest.param(PipelineType.AZURE, id="azure_pipeline"),
|
45
|
+
])
|
46
|
+
def pipeline_type(request):
|
47
|
+
return request.param
|
48
|
+
|
49
|
+
|
41
50
|
@pytest.mark.asyncio
|
42
51
|
async def test_send_file_path(chunkr_client, sample_path):
|
43
52
|
client_type, client = chunkr_client
|
@@ -411,3 +420,17 @@ async def test_update_task_direct(chunkr_client, sample_path):
|
|
411
420
|
assert task.status == "Succeeded"
|
412
421
|
assert task.output is not None
|
413
422
|
assert task.configuration.segmentation_strategy == SegmentationStrategy.PAGE
|
423
|
+
|
424
|
+
|
425
|
+
@pytest.mark.asyncio
|
426
|
+
async def test_pipeline_type(chunkr_client, sample_path, pipeline_type):
|
427
|
+
client_type, client = chunkr_client
|
428
|
+
response = (
|
429
|
+
await client.upload(sample_path, Configuration(pipeline=pipeline_type))
|
430
|
+
if client_type == "async"
|
431
|
+
else client.upload(sample_path, Configuration(pipeline=pipeline_type))
|
432
|
+
)
|
433
|
+
|
434
|
+
assert response.task_id is not None
|
435
|
+
assert response.status == "Succeeded"
|
436
|
+
assert response.output is not None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|