chunkr-ai 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,20 +19,18 @@ class GenerationConfig(BaseModel):
19
19
  class SegmentProcessing(BaseModel):
20
20
  model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
21
21
 
22
- title: Optional[GenerationConfig] = Field(default=None, alias="Title")
23
- section_header: Optional[GenerationConfig] = Field(
24
- default=None, alias="SectionHeader"
25
- )
26
- text: Optional[GenerationConfig] = Field(default=None, alias="Text")
27
- list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
28
- table: Optional[GenerationConfig] = Field(default=None, alias="Table")
29
- picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
30
22
  caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
31
- formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
32
23
  footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
33
- page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
34
- page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
24
+ formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
25
+ list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
35
26
  page: Optional[GenerationConfig] = Field(default=None, alias="Page")
27
+ page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
28
+ page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
29
+ picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
30
+ section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
31
+ table: Optional[GenerationConfig] = Field(default=None, alias="Table")
32
+ text: Optional[GenerationConfig] = Field(default=None, alias="Text")
33
+ title: Optional[GenerationConfig] = Field(default=None, alias="Title")
36
34
 
37
35
  class ChunkProcessing(BaseModel):
38
36
  ignore_headers_and_footers: Optional[bool] = None
@@ -84,11 +82,13 @@ class Segment(BaseModel):
84
82
  page_width: float
85
83
  segment_id: str
86
84
  segment_type: SegmentType
85
+ confidence: Optional[float]
87
86
 
88
87
  class Chunk(BaseModel):
89
88
  chunk_id: str
90
89
  chunk_length: int
91
90
  segments: List[Segment]
91
+ embed: Optional[str] = None
92
92
 
93
93
  class OutputResponse(BaseModel):
94
94
  chunks: List[Chunk]
@@ -144,7 +144,7 @@ class TaskResponse(BaseModel, Generic[T]):
144
144
  Args:
145
145
  output_file (str, optional): Path to save the markdown content. Defaults to None.
146
146
  """
147
- content = self._get_content("markdown")
147
+ content = self._get_content("markdown", separator="\n\n")
148
148
  self._write_to_file(content, output_file)
149
149
  return content
150
150
 
@@ -168,7 +168,7 @@ class TaskResponse(BaseModel, Generic[T]):
168
168
  self._write_to_file(data, output_file, is_json=True)
169
169
  return data
170
170
 
171
- def _get_content(self, t: str) -> str:
171
+ def _get_content(self, t: str, separator: str = "\n") -> str:
172
172
  if not self.output:
173
173
  return ""
174
174
  parts = []
@@ -177,4 +177,4 @@ class TaskResponse(BaseModel, Generic[T]):
177
177
  v = getattr(s, t)
178
178
  if v:
179
179
  parts.append(v)
180
- return "\n".join(parts)
180
+ return separator.join(parts)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chunkr-ai
3
- Version: 0.0.37
3
+ Version: 0.0.39
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -4,13 +4,13 @@ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
5
5
  chunkr_ai/api/chunkr.py,sha256=BzwcKNCuLfVR-HzgY8tKStsW4pIDVVjBgnEqPLyUUMM,3292
6
6
  chunkr_ai/api/chunkr_base.py,sha256=FDl0Ew8eOY4hur5FFqPENZiq9YQy0G3XWEqcKPeCO-U,6130
7
- chunkr_ai/api/configuration.py,sha256=2Bfw_c8eQVijb0EvsexiuRbF1pZUspYFBMuZ-ErJHvs,3835
7
+ chunkr_ai/api/configuration.py,sha256=KrXKcC1Yd7wfK8JMfihlWjNxlDyzKydr1Pe1_r1DTZw,3885
8
8
  chunkr_ai/api/decorators.py,sha256=VJX4qGBIL00K2zY8bh5KAMWv7SltJ38TvPJH06FnFss,4415
9
9
  chunkr_ai/api/misc.py,sha256=QN-2YWQ8e3VvvK63Ua-e8jsx6gxVxkO88Z96yWOofu0,3653
10
10
  chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
11
- chunkr_ai/api/task_response.py,sha256=E1H5Cmn9GSYHX60f3Iz8hcTItPv6DpBEEO2vP2vcKDM,6282
12
- chunkr_ai-0.0.37.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
- chunkr_ai-0.0.37.dist-info/METADATA,sha256=eKMu3F3yYMO2v1i-PAgUKEz1LyU_ManhLOpZqY7RiPo,7031
14
- chunkr_ai-0.0.37.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
15
- chunkr_ai-0.0.37.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
- chunkr_ai-0.0.37.dist-info/RECORD,,
11
+ chunkr_ai/api/task_response.py,sha256=ti_2VTYtYS9FWyW-QIm16rp6qhs8RVy4vvgCZUkI2wA,6328
12
+ chunkr_ai-0.0.39.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
13
+ chunkr_ai-0.0.39.dist-info/METADATA,sha256=Q99T7lpBksl0FuWYaEb6k9Qblp3gk8fnq9-yHLN2i9Y,7031
14
+ chunkr_ai-0.0.39.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
15
+ chunkr_ai-0.0.39.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
16
+ chunkr_ai-0.0.39.dist-info/RECORD,,