chunkr-ai 0.0.50__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,27 +3,34 @@ from enum import Enum
3
3
  from typing import Any, List, Optional, Union
4
4
  from pydantic import field_validator, field_serializer
5
5
 
6
- class GenerationStrategy(str, Enum):
7
- LLM = "LLM"
8
- AUTO = "Auto"
9
-
10
6
  class CroppingStrategy(str, Enum):
11
7
  ALL = "All"
12
8
  AUTO = "Auto"
13
9
 
14
- class EmbedSource(str, Enum):
15
- HTML = "HTML"
10
+ class SegmentFormat(str, Enum):
11
+ HTML = "Html"
16
12
  MARKDOWN = "Markdown"
17
- LLM = "LLM"
13
+
14
+ class EmbedSource(str, Enum):
18
15
  CONTENT = "Content"
16
+ HTML = "HTML" # Deprecated
17
+ MARKDOWN = "Markdown" # Deprecated
18
+ LLM = "LLM"
19
+
20
+ class GenerationStrategy(str, Enum):
21
+ LLM = "LLM"
22
+ AUTO = "Auto"
19
23
 
20
24
  class GenerationConfig(BaseModel):
21
- html: Optional[GenerationStrategy] = None
25
+ format: Optional[SegmentFormat] = None
26
+ strategy: Optional[GenerationStrategy] = None
22
27
  llm: Optional[str] = None
23
- markdown: Optional[GenerationStrategy] = None
24
28
  crop_image: Optional[CroppingStrategy] = None
25
- embed_sources: Optional[List[EmbedSource]] = Field(default_factory=lambda: [EmbedSource.MARKDOWN])
29
+ embed_sources: Optional[List[EmbedSource]] = None
26
30
  extended_context: Optional[bool] = None
31
+ # Deprecated fields for backwards compatibility
32
+ html: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.HTML and strategy instead
33
+ markdown: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.MARKDOWN and strategy instead
27
34
 
28
35
  class SegmentProcessing(BaseModel):
29
36
  model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
@@ -246,7 +253,7 @@ class SegmentType(str, Enum):
246
253
 
247
254
  class Segment(BaseModel):
248
255
  bbox: BoundingBox
249
- content: str
256
+ content: str = ""
250
257
  page_height: float
251
258
  llm: Optional[str] = None
252
259
  html: Optional[str] = None
@@ -258,6 +265,7 @@ class Segment(BaseModel):
258
265
  segment_id: str
259
266
  segment_type: SegmentType
260
267
  confidence: Optional[float]
268
+ text: str = ""
261
269
 
262
270
  class Chunk(BaseModel):
263
271
  chunk_id: str
chunkr_ai/models.py CHANGED
@@ -15,6 +15,7 @@ from .api.configuration import (
15
15
  OcrStrategy,
16
16
  OutputResponse,
17
17
  Segment,
18
+ SegmentFormat,
18
19
  SegmentProcessing,
19
20
  SegmentType,
20
21
  SegmentationStrategy,
@@ -42,6 +43,7 @@ __all__ = [
42
43
  "OcrStrategy",
43
44
  "OutputResponse",
44
45
  "Segment",
46
+ "SegmentFormat",
45
47
  "SegmentProcessing",
46
48
  "SegmentType",
47
49
  "SegmentationStrategy",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.0.50
3
+ Version: 0.1.0
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -1,16 +1,16 @@
1
1
  chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
2
- chunkr_ai/models.py,sha256=L0L9CjY8SgSh9_Fzvo_nJXqKf_2urZHngMWtBVlAQAo,1006
2
+ chunkr_ai/models.py,sha256=1q4l7fSXU7cVfx2ZUcmc5EZ4K0AeUhCbvY9yaBbXL8E,1046
3
3
  chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  chunkr_ai/api/auth.py,sha256=0RSNFPvHt4Nrg8qtP2xvA2KbR0J_KUe1B_tKynbq9Fc,436
5
5
  chunkr_ai/api/chunkr.py,sha256=uSNYtB_mcs4-QRKsX7wZb8yv6ayXgRrJSDNZ-EbAyvc,3857
6
6
  chunkr_ai/api/chunkr_base.py,sha256=8roSPoCADmaXM2r7zz2iHfZzIcY9NopOfa4j-dfk8RA,6310
7
- chunkr_ai/api/configuration.py,sha256=M0oAXJmGBakOAKywg_Eyoz4GLginBrXuSvCtG0_BznQ,9926
7
+ chunkr_ai/api/configuration.py,sha256=MVAxKe8vTSUMy0AHOPWyJEOgmIL-rPTsAQ8Z83gGXew,10287
8
8
  chunkr_ai/api/decorators.py,sha256=w1l_ZEkl99C-BO3qRTbi74sYwHDFspB1Bjt1Arv9lPc,4384
9
9
  chunkr_ai/api/misc.py,sha256=AaGLxZlMzNgVPwErskDRKc2UVGkC0JwxLXU-enPwzA0,5354
10
10
  chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
11
11
  chunkr_ai/api/task_response.py,sha256=VYa62E08VlZUyjn2YslnY4cohdK9e53HbEzsaYIXKXM,8028
12
- chunkr_ai-0.0.50.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
- chunkr_ai-0.0.50.dist-info/METADATA,sha256=ZlM0XHhykHUOLEsNCF52OmMEw2QlvgSUy972FQjgD2s,7053
14
- chunkr_ai-0.0.50.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
15
- chunkr_ai-0.0.50.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
- chunkr_ai-0.0.50.dist-info/RECORD,,
12
+ chunkr_ai-0.1.0.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
+ chunkr_ai-0.1.0.dist-info/METADATA,sha256=nRApqY3sX17_8fqSwfgYUH7Sy4Ahg5ybpxU9ZTXvdH4,7052
14
+ chunkr_ai-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ chunkr_ai-0.1.0.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
+ chunkr_ai-0.1.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5