chunkr-ai 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/configuration.py +47 -0
- chunkr_ai/models.py +10 -0
- {chunkr_ai-0.1.0.dist-info → chunkr_ai-0.3.0.dist-info}/METADATA +2 -1
- {chunkr_ai-0.1.0.dist-info → chunkr_ai-0.3.0.dist-info}/RECORD +7 -7
- {chunkr_ai-0.1.0.dist-info → chunkr_ai-0.3.0.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0.dist-info → chunkr_ai-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {chunkr_ai-0.1.0.dist-info → chunkr_ai-0.3.0.dist-info}/top_level.txt +0 -0
chunkr_ai/api/configuration.py
CHANGED
@@ -251,6 +251,42 @@ class SegmentType(str, Enum):
|
|
251
251
|
TEXT = "Text"
|
252
252
|
TITLE = "Title"
|
253
253
|
|
254
|
+
class Alignment(str, Enum):
|
255
|
+
LEFT = "Left"
|
256
|
+
CENTER = "Center"
|
257
|
+
RIGHT = "Right"
|
258
|
+
JUSTIFY = "Justify"
|
259
|
+
|
260
|
+
class VerticalAlignment(str, Enum):
|
261
|
+
TOP = "Top"
|
262
|
+
MIDDLE = "Middle"
|
263
|
+
BOTTOM = "Bottom"
|
264
|
+
BASELINE = "Baseline"
|
265
|
+
|
266
|
+
class CellStyle(BaseModel):
|
267
|
+
bg_color: Optional[str] = None
|
268
|
+
text_color: Optional[str] = None
|
269
|
+
font_face: Optional[str] = None
|
270
|
+
is_bold: Optional[bool] = None
|
271
|
+
align: Optional[Alignment] = None
|
272
|
+
valign: Optional[VerticalAlignment] = None
|
273
|
+
|
274
|
+
class Cell(BaseModel):
|
275
|
+
cell_id: str
|
276
|
+
text: str
|
277
|
+
range: str
|
278
|
+
formula: Optional[str] = None
|
279
|
+
value: Optional[str] = None
|
280
|
+
hyperlink: Optional[str] = None
|
281
|
+
style: Optional[CellStyle] = None
|
282
|
+
|
283
|
+
class Page(BaseModel):
|
284
|
+
image: str
|
285
|
+
page_number: int
|
286
|
+
page_height: float
|
287
|
+
page_width: float
|
288
|
+
ss_sheet_name: Optional[str] = None
|
289
|
+
|
254
290
|
class Segment(BaseModel):
|
255
291
|
bbox: BoundingBox
|
256
292
|
content: str = ""
|
@@ -266,6 +302,15 @@ class Segment(BaseModel):
|
|
266
302
|
segment_type: SegmentType
|
267
303
|
confidence: Optional[float]
|
268
304
|
text: str = ""
|
305
|
+
segment_length: Optional[int] = None
|
306
|
+
# Spreadsheet-specific fields
|
307
|
+
ss_cells: Optional[List[Cell]] = None
|
308
|
+
ss_header_bbox: Optional[BoundingBox] = None
|
309
|
+
ss_header_ocr: Optional[List[OCRResult]] = None
|
310
|
+
ss_header_text: Optional[str] = None
|
311
|
+
ss_header_range: Optional[str] = None
|
312
|
+
ss_range: Optional[str] = None
|
313
|
+
ss_sheet_name: Optional[str] = None
|
269
314
|
|
270
315
|
class Chunk(BaseModel):
|
271
316
|
chunk_id: str
|
@@ -276,6 +321,8 @@ class Chunk(BaseModel):
|
|
276
321
|
class OutputResponse(BaseModel):
|
277
322
|
chunks: List[Chunk]
|
278
323
|
file_name: Optional[str]
|
324
|
+
mime_type: Optional[str] = None
|
325
|
+
pages: Optional[List[Page]] = None
|
279
326
|
page_count: Optional[int]
|
280
327
|
pdf_url: Optional[str]
|
281
328
|
|
chunkr_ai/models.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
from .api.configuration import (
|
2
|
+
Alignment,
|
2
3
|
BoundingBox,
|
4
|
+
Cell,
|
5
|
+
CellStyle,
|
3
6
|
Chunk,
|
4
7
|
ChunkProcessing,
|
5
8
|
Configuration,
|
@@ -14,6 +17,7 @@ from .api.configuration import (
|
|
14
17
|
OCRResult,
|
15
18
|
OcrStrategy,
|
16
19
|
OutputResponse,
|
20
|
+
Page,
|
17
21
|
Segment,
|
18
22
|
SegmentFormat,
|
19
23
|
SegmentProcessing,
|
@@ -23,11 +27,15 @@ from .api.configuration import (
|
|
23
27
|
Pipeline,
|
24
28
|
Tokenizer,
|
25
29
|
TokenizerType,
|
30
|
+
VerticalAlignment,
|
26
31
|
)
|
27
32
|
from .api.task_response import TaskResponse
|
28
33
|
|
29
34
|
__all__ = [
|
35
|
+
"Alignment",
|
30
36
|
"BoundingBox",
|
37
|
+
"Cell",
|
38
|
+
"CellStyle",
|
31
39
|
"Chunk",
|
32
40
|
"ChunkProcessing",
|
33
41
|
"Configuration",
|
@@ -42,6 +50,7 @@ __all__ = [
|
|
42
50
|
"OCRResult",
|
43
51
|
"OcrStrategy",
|
44
52
|
"OutputResponse",
|
53
|
+
"Page",
|
45
54
|
"Segment",
|
46
55
|
"SegmentFormat",
|
47
56
|
"SegmentProcessing",
|
@@ -52,4 +61,5 @@ __all__ = [
|
|
52
61
|
"Pipeline",
|
53
62
|
"Tokenizer",
|
54
63
|
"TokenizerType",
|
64
|
+
"VerticalAlignment",
|
55
65
|
]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Python client for Chunkr: open source document intelligence
|
5
5
|
Author-email: Ishaan Kapoor <ishaan@lumina.sh>
|
6
6
|
License: MIT License
|
@@ -28,6 +28,7 @@ Project-URL: Homepage, https://chunkr.ai
|
|
28
28
|
Description-Content-Type: text/markdown
|
29
29
|
License-File: LICENSE
|
30
30
|
Requires-Dist: httpx>=0.25.0
|
31
|
+
Requires-Dist: matplotlib>=3.10.3
|
31
32
|
Requires-Dist: nest-asyncio>=1.6.0
|
32
33
|
Requires-Dist: pillow>=10.0.0
|
33
34
|
Requires-Dist: pydantic>=2.0.0
|
@@ -1,16 +1,16 @@
|
|
1
1
|
chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
|
2
|
-
chunkr_ai/models.py,sha256=
|
2
|
+
chunkr_ai/models.py,sha256=NvFJOpsgzEyYHhE-flp7Yr9tpTDvFmF4T87jttFRquU,1202
|
3
3
|
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
chunkr_ai/api/auth.py,sha256=0RSNFPvHt4Nrg8qtP2xvA2KbR0J_KUe1B_tKynbq9Fc,436
|
5
5
|
chunkr_ai/api/chunkr.py,sha256=uSNYtB_mcs4-QRKsX7wZb8yv6ayXgRrJSDNZ-EbAyvc,3857
|
6
6
|
chunkr_ai/api/chunkr_base.py,sha256=8roSPoCADmaXM2r7zz2iHfZzIcY9NopOfa4j-dfk8RA,6310
|
7
|
-
chunkr_ai/api/configuration.py,sha256=
|
7
|
+
chunkr_ai/api/configuration.py,sha256=y_jd3K5GB-P8N3uym4wqHDVq-Rq-VT_bhqJqgKs0PVg,11586
|
8
8
|
chunkr_ai/api/decorators.py,sha256=w1l_ZEkl99C-BO3qRTbi74sYwHDFspB1Bjt1Arv9lPc,4384
|
9
9
|
chunkr_ai/api/misc.py,sha256=AaGLxZlMzNgVPwErskDRKc2UVGkC0JwxLXU-enPwzA0,5354
|
10
10
|
chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
|
11
11
|
chunkr_ai/api/task_response.py,sha256=VYa62E08VlZUyjn2YslnY4cohdK9e53HbEzsaYIXKXM,8028
|
12
|
-
chunkr_ai-0.
|
13
|
-
chunkr_ai-0.
|
14
|
-
chunkr_ai-0.
|
15
|
-
chunkr_ai-0.
|
16
|
-
chunkr_ai-0.
|
12
|
+
chunkr_ai-0.3.0.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
|
13
|
+
chunkr_ai-0.3.0.dist-info/METADATA,sha256=RCgp4cjj3CduWuM1ycLczz7iBHohtaEXLv8CZseephw,7086
|
14
|
+
chunkr_ai-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
chunkr_ai-0.3.0.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
16
|
+
chunkr_ai-0.3.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|