chunkr-ai 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -251,6 +251,42 @@ class SegmentType(str, Enum):
251
251
  TEXT = "Text"
252
252
  TITLE = "Title"
253
253
 
254
+ class Alignment(str, Enum):
255
+ LEFT = "Left"
256
+ CENTER = "Center"
257
+ RIGHT = "Right"
258
+ JUSTIFY = "Justify"
259
+
260
+ class VerticalAlignment(str, Enum):
261
+ TOP = "Top"
262
+ MIDDLE = "Middle"
263
+ BOTTOM = "Bottom"
264
+ BASELINE = "Baseline"
265
+
266
+ class CellStyle(BaseModel):
267
+ bg_color: Optional[str] = None
268
+ text_color: Optional[str] = None
269
+ font_face: Optional[str] = None
270
+ is_bold: Optional[bool] = None
271
+ align: Optional[Alignment] = None
272
+ valign: Optional[VerticalAlignment] = None
273
+
274
+ class Cell(BaseModel):
275
+ cell_id: str
276
+ text: str
277
+ range: str
278
+ formula: Optional[str] = None
279
+ value: Optional[str] = None
280
+ hyperlink: Optional[str] = None
281
+ style: Optional[CellStyle] = None
282
+
283
+ class Page(BaseModel):
284
+ image: str
285
+ page_number: int
286
+ page_height: float
287
+ page_width: float
288
+ ss_sheet_name: Optional[str] = None
289
+
254
290
  class Segment(BaseModel):
255
291
  bbox: BoundingBox
256
292
  content: str = ""
@@ -266,6 +302,15 @@ class Segment(BaseModel):
266
302
  segment_type: SegmentType
267
303
  confidence: Optional[float]
268
304
  text: str = ""
305
+ segment_length: Optional[int] = None
306
+ # Spreadsheet-specific fields
307
+ ss_cells: Optional[List[Cell]] = None
308
+ ss_header_bbox: Optional[BoundingBox] = None
309
+ ss_header_ocr: Optional[List[OCRResult]] = None
310
+ ss_header_text: Optional[str] = None
311
+ ss_header_range: Optional[str] = None
312
+ ss_range: Optional[str] = None
313
+ ss_sheet_name: Optional[str] = None
269
314
 
270
315
  class Chunk(BaseModel):
271
316
  chunk_id: str
@@ -276,6 +321,8 @@ class Chunk(BaseModel):
276
321
  class OutputResponse(BaseModel):
277
322
  chunks: List[Chunk]
278
323
  file_name: Optional[str]
324
+ mime_type: Optional[str] = None
325
+ pages: Optional[List[Page]] = None
279
326
  page_count: Optional[int]
280
327
  pdf_url: Optional[str]
281
328
 
chunkr_ai/models.py CHANGED
@@ -1,5 +1,8 @@
1
1
  from .api.configuration import (
2
+ Alignment,
2
3
  BoundingBox,
4
+ Cell,
5
+ CellStyle,
3
6
  Chunk,
4
7
  ChunkProcessing,
5
8
  Configuration,
@@ -14,6 +17,7 @@ from .api.configuration import (
14
17
  OCRResult,
15
18
  OcrStrategy,
16
19
  OutputResponse,
20
+ Page,
17
21
  Segment,
18
22
  SegmentFormat,
19
23
  SegmentProcessing,
@@ -23,11 +27,15 @@ from .api.configuration import (
23
27
  Pipeline,
24
28
  Tokenizer,
25
29
  TokenizerType,
30
+ VerticalAlignment,
26
31
  )
27
32
  from .api.task_response import TaskResponse
28
33
 
29
34
  __all__ = [
35
+ "Alignment",
30
36
  "BoundingBox",
37
+ "Cell",
38
+ "CellStyle",
31
39
  "Chunk",
32
40
  "ChunkProcessing",
33
41
  "Configuration",
@@ -42,6 +50,7 @@ __all__ = [
42
50
  "OCRResult",
43
51
  "OcrStrategy",
44
52
  "OutputResponse",
53
+ "Page",
45
54
  "Segment",
46
55
  "SegmentFormat",
47
56
  "SegmentProcessing",
@@ -52,4 +61,5 @@ __all__ = [
52
61
  "Pipeline",
53
62
  "Tokenizer",
54
63
  "TokenizerType",
64
+ "VerticalAlignment",
55
65
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.1.0
3
+ Version: 0.3.1
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -28,6 +28,7 @@ Project-URL: Homepage, https://chunkr.ai
28
28
  Description-Content-Type: text/markdown
29
29
  License-File: LICENSE
30
30
  Requires-Dist: httpx>=0.25.0
31
+ Requires-Dist: matplotlib>=3.10.3
31
32
  Requires-Dist: nest-asyncio>=1.6.0
32
33
  Requires-Dist: pillow>=10.0.0
33
34
  Requires-Dist: pydantic>=2.0.0
@@ -1,16 +1,16 @@
1
1
  chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
- chunkr_ai/models.py,sha256=1q4l7fSXU7cVfx2ZUcmc5EZ4K0AeUhCbvY9yaBbXL8E,1046
2
+ chunkr_ai/models.py,sha256=NvFJOpsgzEyYHhE-flp7Yr9tpTDvFmF4T87jttFRquU,1202
3
3
  chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  chunkr_ai/api/auth.py,sha256=0RSNFPvHt4Nrg8qtP2xvA2KbR0J_KUe1B_tKynbq9Fc,436
5
5
  chunkr_ai/api/chunkr.py,sha256=uSNYtB_mcs4-QRKsX7wZb8yv6ayXgRrJSDNZ-EbAyvc,3857
6
6
  chunkr_ai/api/chunkr_base.py,sha256=8roSPoCADmaXM2r7zz2iHfZzIcY9NopOfa4j-dfk8RA,6310
7
- chunkr_ai/api/configuration.py,sha256=MVAxKe8vTSUMy0AHOPWyJEOgmIL-rPTsAQ8Z83gGXew,10287
7
+ chunkr_ai/api/configuration.py,sha256=y_jd3K5GB-P8N3uym4wqHDVq-Rq-VT_bhqJqgKs0PVg,11586
8
8
  chunkr_ai/api/decorators.py,sha256=w1l_ZEkl99C-BO3qRTbi74sYwHDFspB1Bjt1Arv9lPc,4384
9
9
  chunkr_ai/api/misc.py,sha256=AaGLxZlMzNgVPwErskDRKc2UVGkC0JwxLXU-enPwzA0,5354
10
10
  chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
11
11
  chunkr_ai/api/task_response.py,sha256=VYa62E08VlZUyjn2YslnY4cohdK9e53HbEzsaYIXKXM,8028
12
- chunkr_ai-0.1.0.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
- chunkr_ai-0.1.0.dist-info/METADATA,sha256=nRApqY3sX17_8fqSwfgYUH7Sy4Ahg5ybpxU9ZTXvdH4,7052
14
- chunkr_ai-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- chunkr_ai-0.1.0.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
- chunkr_ai-0.1.0.dist-info/RECORD,,
12
+ chunkr_ai-0.3.1.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
+ chunkr_ai-0.3.1.dist-info/METADATA,sha256=_Lg59OcvE1hpsbc3zg20yQFGQ2bpAqOXSx_o6_1UlzY,7086
14
+ chunkr_ai-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ chunkr_ai-0.3.1.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
+ chunkr_ai-0.3.1.dist-info/RECORD,,