chunkr-ai 0.0.15__tar.gz → 0.0.16__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. {chunkr_ai-0.0.15/src/chunkr_ai.egg-info → chunkr_ai-0.0.16}/PKG-INFO +1 -1
  2. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/pyproject.toml +1 -1
  3. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/config.py +3 -0
  4. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/misc.py +1 -0
  5. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_base.py +3 -1
  6. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/models.py +2 -0
  7. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
  8. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/tests/test_chunkr.py +23 -0
  9. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/LICENSE +0 -0
  10. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/README.md +0 -0
  11. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/setup.cfg +0 -0
  12. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/__init__.py +0 -0
  13. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/__init__.py +0 -0
  14. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/api.py +0 -0
  15. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/auth.py +0 -0
  16. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/base.py +0 -0
  17. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr.py +0 -0
  18. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_async.py +0 -0
  19. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/chunkr_base.py +0 -0
  20. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/protocol.py +0 -0
  21. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/schema.py +0 -0
  22. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task.py +0 -0
  23. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai/api/task_async.py +0 -0
  24. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  25. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  26. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/requires.txt +0 -0
  27. {chunkr_ai-0.0.15 → chunkr_ai-0.0.16}/src/chunkr_ai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.15
3
+ Version: 0.0.16
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.15"
7
+ version = "0.0.16"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -127,6 +127,8 @@ class Model(str, Enum):
127
127
  FAST = "Fast"
128
128
  HIGH_QUALITY = "HighQuality"
129
129
 
130
+ class PipelineType(str, Enum):
131
+ AZURE = "Azure"
130
132
 
131
133
  class Configuration(BaseModel):
132
134
  chunk_processing: Optional[ChunkProcessing] = Field(default=None)
@@ -139,6 +141,7 @@ class Configuration(BaseModel):
139
141
  ocr_strategy: Optional[OcrStrategy] = Field(default=None)
140
142
  segment_processing: Optional[SegmentProcessing] = Field(default=None)
141
143
  segmentation_strategy: Optional[SegmentationStrategy] = Field(default=None)
144
+ pipeline: Optional[PipelineType] = Field(default=None)
142
145
 
143
146
  @model_validator(mode="before")
144
147
  def map_deprecated_fields(cls, values: Dict) -> Dict:
@@ -3,6 +3,7 @@ import io
3
3
  import json
4
4
  from pathlib import Path
5
5
  from PIL import Image
6
+ from pydantic import BaseModel
6
7
  import requests
7
8
  from typing import Union, Tuple, BinaryIO, Optional
8
9
 
@@ -7,7 +7,6 @@ from datetime import datetime
7
7
 
8
8
  T = TypeVar("T", bound="TaskBase")
9
9
 
10
-
11
10
  class TaskBase(BaseModel, ABC, Generic[T]):
12
11
  configuration: Configuration
13
12
  created_at: datetime
@@ -63,12 +62,15 @@ class TaskBase(BaseModel, ABC, Generic[T]):
63
62
  return None
64
63
 
65
64
  def html(self) -> str:
65
+ """Get the full HTML of the task"""
66
66
  return self._get_content("html")
67
67
 
68
68
  def markdown(self) -> str:
69
+ """Get the full markdown of the task"""
69
70
  return self._get_content("markdown")
70
71
 
71
72
  def content(self) -> str:
73
+ """Get the full content of the task"""
72
74
  return self._get_content("content")
73
75
 
74
76
  def _get_content(self, t: str) -> str:
@@ -18,6 +18,7 @@ from .api.config import (
18
18
  SegmentType,
19
19
  SegmentationStrategy,
20
20
  Status,
21
+ PipelineType,
21
22
  )
22
23
 
23
24
  from .api.task import TaskResponse
@@ -45,4 +46,5 @@ __all__ = [
45
46
  "Status",
46
47
  "TaskResponse",
47
48
  "TaskResponseAsync",
49
+ "PipelineType",
48
50
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.15
3
+ Version: 0.0.16
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  Project-URL: Homepage, https://chunkr.ai
@@ -10,6 +10,7 @@ from chunkr_ai.models import (
10
10
  GenerationConfig,
11
11
  JsonSchema,
12
12
  OcrStrategy,
13
+ PipelineType,
13
14
  Property,
14
15
  SegmentationStrategy,
15
16
  SegmentProcessing,
@@ -38,6 +39,14 @@ def sample_image():
38
39
  return img
39
40
 
40
41
 
42
+ @pytest.fixture(params=[
43
+ pytest.param(None, id="none_pipeline"),
44
+ pytest.param(PipelineType.AZURE, id="azure_pipeline"),
45
+ ])
46
+ def pipeline_type(request):
47
+ return request.param
48
+
49
+
41
50
  @pytest.mark.asyncio
42
51
  async def test_send_file_path(chunkr_client, sample_path):
43
52
  client_type, client = chunkr_client
@@ -411,3 +420,17 @@ async def test_update_task_direct(chunkr_client, sample_path):
411
420
  assert task.status == "Succeeded"
412
421
  assert task.output is not None
413
422
  assert task.configuration.segmentation_strategy == SegmentationStrategy.PAGE
423
+
424
+
425
+ @pytest.mark.asyncio
426
+ async def test_pipeline_type(chunkr_client, sample_path, pipeline_type):
427
+ client_type, client = chunkr_client
428
+ response = (
429
+ await client.upload(sample_path, Configuration(pipeline=pipeline_type))
430
+ if client_type == "async"
431
+ else client.upload(sample_path, Configuration(pipeline=pipeline_type))
432
+ )
433
+
434
+ assert response.task_id is not None
435
+ assert response.status == "Succeeded"
436
+ assert response.output is not None
File without changes
File without changes
File without changes