chunkr-ai 0.0.50__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,27 +3,34 @@ from enum import Enum
3
3
  from typing import Any, List, Optional, Union
4
4
  from pydantic import field_validator, field_serializer
5
5
 
6
- class GenerationStrategy(str, Enum):
7
- LLM = "LLM"
8
- AUTO = "Auto"
9
-
10
6
  class CroppingStrategy(str, Enum):
11
7
  ALL = "All"
12
8
  AUTO = "Auto"
13
9
 
14
- class EmbedSource(str, Enum):
15
- HTML = "HTML"
10
+ class SegmentFormat(str, Enum):
11
+ HTML = "Html"
16
12
  MARKDOWN = "Markdown"
17
- LLM = "LLM"
13
+
14
+ class EmbedSource(str, Enum):
18
15
  CONTENT = "Content"
16
+ HTML = "HTML" # Deprecated
17
+ MARKDOWN = "Markdown" # Deprecated
18
+ LLM = "LLM"
19
+
20
+ class GenerationStrategy(str, Enum):
21
+ LLM = "LLM"
22
+ AUTO = "Auto"
19
23
 
20
24
  class GenerationConfig(BaseModel):
21
- html: Optional[GenerationStrategy] = None
25
+ format: Optional[SegmentFormat] = None
26
+ strategy: Optional[GenerationStrategy] = None
22
27
  llm: Optional[str] = None
23
- markdown: Optional[GenerationStrategy] = None
24
28
  crop_image: Optional[CroppingStrategy] = None
25
- embed_sources: Optional[List[EmbedSource]] = Field(default_factory=lambda: [EmbedSource.MARKDOWN])
29
+ embed_sources: Optional[List[EmbedSource]] = None
26
30
  extended_context: Optional[bool] = None
31
+ # Deprecated fields for backwards compatibility
32
+ html: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.HTML and strategy instead
33
+ markdown: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.MARKDOWN and strategy instead
27
34
 
28
35
  class SegmentProcessing(BaseModel):
29
36
  model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
@@ -244,9 +251,45 @@ class SegmentType(str, Enum):
244
251
  TEXT = "Text"
245
252
  TITLE = "Title"
246
253
 
254
+ class Alignment(str, Enum):
255
+ LEFT = "Left"
256
+ CENTER = "Center"
257
+ RIGHT = "Right"
258
+ JUSTIFY = "Justify"
259
+
260
+ class VerticalAlignment(str, Enum):
261
+ TOP = "Top"
262
+ MIDDLE = "Middle"
263
+ BOTTOM = "Bottom"
264
+ BASELINE = "Baseline"
265
+
266
+ class CellStyle(BaseModel):
267
+ bg_color: Optional[str] = None
268
+ text_color: Optional[str] = None
269
+ font_face: Optional[str] = None
270
+ is_bold: Optional[bool] = None
271
+ align: Optional[Alignment] = None
272
+ valign: Optional[VerticalAlignment] = None
273
+
274
+ class Cell(BaseModel):
275
+ cell_id: str
276
+ text: str
277
+ range: str
278
+ formula: Optional[str] = None
279
+ value: Optional[str] = None
280
+ hyperlink: Optional[str] = None
281
+ style: Optional[CellStyle] = None
282
+
283
+ class Page(BaseModel):
284
+ image: str
285
+ page_number: int
286
+ page_height: float
287
+ page_width: float
288
+ ss_sheet_name: Optional[str] = None
289
+
247
290
  class Segment(BaseModel):
248
291
  bbox: BoundingBox
249
- content: str
292
+ content: str = ""
250
293
  page_height: float
251
294
  llm: Optional[str] = None
252
295
  html: Optional[str] = None
@@ -258,6 +301,16 @@ class Segment(BaseModel):
258
301
  segment_id: str
259
302
  segment_type: SegmentType
260
303
  confidence: Optional[float]
304
+ text: str = ""
305
+ segment_length: Optional[int] = None
306
+ # Spreadsheet-specific fields
307
+ ss_cells: Optional[List[Cell]] = None
308
+ ss_header_bbox: Optional[BoundingBox] = None
309
+ ss_header_ocr: Optional[List[OCRResult]] = None
310
+ ss_header_text: Optional[str] = None
311
+ ss_header_range: Optional[str] = None
312
+ ss_range: Optional[str] = None
313
+ ss_sheet_name: Optional[str] = None
261
314
 
262
315
  class Chunk(BaseModel):
263
316
  chunk_id: str
@@ -268,6 +321,8 @@ class Chunk(BaseModel):
268
321
  class OutputResponse(BaseModel):
269
322
  chunks: List[Chunk]
270
323
  file_name: Optional[str]
324
+ mime_type: Optional[str] = None
325
+ pages: Optional[List[Page]] = None
271
326
  page_count: Optional[int]
272
327
  pdf_url: Optional[str]
273
328
 
chunkr_ai/models.py CHANGED
@@ -1,5 +1,8 @@
1
1
  from .api.configuration import (
2
+ Alignment,
2
3
  BoundingBox,
4
+ Cell,
5
+ CellStyle,
3
6
  Chunk,
4
7
  ChunkProcessing,
5
8
  Configuration,
@@ -14,7 +17,9 @@ from .api.configuration import (
14
17
  OCRResult,
15
18
  OcrStrategy,
16
19
  OutputResponse,
20
+ Page,
17
21
  Segment,
22
+ SegmentFormat,
18
23
  SegmentProcessing,
19
24
  SegmentType,
20
25
  SegmentationStrategy,
@@ -22,11 +27,15 @@ from .api.configuration import (
22
27
  Pipeline,
23
28
  Tokenizer,
24
29
  TokenizerType,
30
+ VerticalAlignment,
25
31
  )
26
32
  from .api.task_response import TaskResponse
27
33
 
28
34
  __all__ = [
35
+ "Alignment",
29
36
  "BoundingBox",
37
+ "Cell",
38
+ "CellStyle",
30
39
  "Chunk",
31
40
  "ChunkProcessing",
32
41
  "Configuration",
@@ -41,7 +50,9 @@ __all__ = [
41
50
  "OCRResult",
42
51
  "OcrStrategy",
43
52
  "OutputResponse",
53
+ "Page",
44
54
  "Segment",
55
+ "SegmentFormat",
45
56
  "SegmentProcessing",
46
57
  "SegmentType",
47
58
  "SegmentationStrategy",
@@ -50,4 +61,5 @@ __all__ = [
50
61
  "Pipeline",
51
62
  "Tokenizer",
52
63
  "TokenizerType",
64
+ "VerticalAlignment",
53
65
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.0.50
3
+ Version: 0.3.0
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -28,6 +28,7 @@ Project-URL: Homepage, https://chunkr.ai
28
28
  Description-Content-Type: text/markdown
29
29
  License-File: LICENSE
30
30
  Requires-Dist: httpx>=0.25.0
31
+ Requires-Dist: matplotlib>=3.10.3
31
32
  Requires-Dist: nest-asyncio>=1.6.0
32
33
  Requires-Dist: pillow>=10.0.0
33
34
  Requires-Dist: pydantic>=2.0.0
@@ -1,16 +1,16 @@
1
1
  chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
- chunkr_ai/models.py,sha256=L0L9CjY8SgSh9_Fzvo_nJXqKf_2urZHngMWtBVlAQAo,1006
2
+ chunkr_ai/models.py,sha256=NvFJOpsgzEyYHhE-flp7Yr9tpTDvFmF4T87jttFRquU,1202
3
3
  chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  chunkr_ai/api/auth.py,sha256=0RSNFPvHt4Nrg8qtP2xvA2KbR0J_KUe1B_tKynbq9Fc,436
5
5
  chunkr_ai/api/chunkr.py,sha256=uSNYtB_mcs4-QRKsX7wZb8yv6ayXgRrJSDNZ-EbAyvc,3857
6
6
  chunkr_ai/api/chunkr_base.py,sha256=8roSPoCADmaXM2r7zz2iHfZzIcY9NopOfa4j-dfk8RA,6310
7
- chunkr_ai/api/configuration.py,sha256=M0oAXJmGBakOAKywg_Eyoz4GLginBrXuSvCtG0_BznQ,9926
7
+ chunkr_ai/api/configuration.py,sha256=y_jd3K5GB-P8N3uym4wqHDVq-Rq-VT_bhqJqgKs0PVg,11586
8
8
  chunkr_ai/api/decorators.py,sha256=w1l_ZEkl99C-BO3qRTbi74sYwHDFspB1Bjt1Arv9lPc,4384
9
9
  chunkr_ai/api/misc.py,sha256=AaGLxZlMzNgVPwErskDRKc2UVGkC0JwxLXU-enPwzA0,5354
10
10
  chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
11
11
  chunkr_ai/api/task_response.py,sha256=VYa62E08VlZUyjn2YslnY4cohdK9e53HbEzsaYIXKXM,8028
12
- chunkr_ai-0.0.50.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
- chunkr_ai-0.0.50.dist-info/METADATA,sha256=ZlM0XHhykHUOLEsNCF52OmMEw2QlvgSUy972FQjgD2s,7053
14
- chunkr_ai-0.0.50.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
15
- chunkr_ai-0.0.50.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
- chunkr_ai-0.0.50.dist-info/RECORD,,
12
+ chunkr_ai-0.3.0.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
+ chunkr_ai-0.3.0.dist-info/METADATA,sha256=RCgp4cjj3CduWuM1ycLczz7iBHohtaEXLv8CZseephw,7086
14
+ chunkr_ai-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ chunkr_ai-0.3.0.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
+ chunkr_ai-0.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5