chunkr-ai 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/configuration.py +11 -11
- chunkr_ai/api/task_response.py +3 -3
- {chunkr_ai-0.0.37.dist-info → chunkr_ai-0.0.38.dist-info}/METADATA +1 -1
- {chunkr_ai-0.0.37.dist-info → chunkr_ai-0.0.38.dist-info}/RECORD +7 -7
- {chunkr_ai-0.0.37.dist-info → chunkr_ai-0.0.38.dist-info}/LICENSE +0 -0
- {chunkr_ai-0.0.37.dist-info → chunkr_ai-0.0.38.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.0.37.dist-info → chunkr_ai-0.0.38.dist-info}/top_level.txt +0 -0
chunkr_ai/api/configuration.py
CHANGED
@@ -19,20 +19,18 @@ class GenerationConfig(BaseModel):
|
|
19
19
|
class SegmentProcessing(BaseModel):
|
20
20
|
model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
|
21
21
|
|
22
|
-
title: Optional[GenerationConfig] = Field(default=None, alias="Title")
|
23
|
-
section_header: Optional[GenerationConfig] = Field(
|
24
|
-
default=None, alias="SectionHeader"
|
25
|
-
)
|
26
|
-
text: Optional[GenerationConfig] = Field(default=None, alias="Text")
|
27
|
-
list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
|
28
|
-
table: Optional[GenerationConfig] = Field(default=None, alias="Table")
|
29
|
-
picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
|
30
22
|
caption: Optional[GenerationConfig] = Field(default=None, alias="Caption")
|
31
|
-
formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
|
32
23
|
footnote: Optional[GenerationConfig] = Field(default=None, alias="Footnote")
|
33
|
-
|
34
|
-
|
24
|
+
formula: Optional[GenerationConfig] = Field(default=None, alias="Formula")
|
25
|
+
list_item: Optional[GenerationConfig] = Field(default=None, alias="ListItem")
|
35
26
|
page: Optional[GenerationConfig] = Field(default=None, alias="Page")
|
27
|
+
page_footer: Optional[GenerationConfig] = Field(default=None, alias="PageFooter")
|
28
|
+
page_header: Optional[GenerationConfig] = Field(default=None, alias="PageHeader")
|
29
|
+
picture: Optional[GenerationConfig] = Field(default=None, alias="Picture")
|
30
|
+
section_header: Optional[GenerationConfig] = Field(default=None, alias="SectionHeader")
|
31
|
+
table: Optional[GenerationConfig] = Field(default=None, alias="Table")
|
32
|
+
text: Optional[GenerationConfig] = Field(default=None, alias="Text")
|
33
|
+
title: Optional[GenerationConfig] = Field(default=None, alias="Title")
|
36
34
|
|
37
35
|
class ChunkProcessing(BaseModel):
|
38
36
|
ignore_headers_and_footers: Optional[bool] = None
|
@@ -84,11 +82,13 @@ class Segment(BaseModel):
|
|
84
82
|
page_width: float
|
85
83
|
segment_id: str
|
86
84
|
segment_type: SegmentType
|
85
|
+
confidence: Optional[float]
|
87
86
|
|
88
87
|
class Chunk(BaseModel):
|
89
88
|
chunk_id: str
|
90
89
|
chunk_length: int
|
91
90
|
segments: List[Segment]
|
91
|
+
embed: Optional[str] = None
|
92
92
|
|
93
93
|
class OutputResponse(BaseModel):
|
94
94
|
chunks: List[Chunk]
|
chunkr_ai/api/task_response.py
CHANGED
@@ -144,7 +144,7 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
144
144
|
Args:
|
145
145
|
output_file (str, optional): Path to save the markdown content. Defaults to None.
|
146
146
|
"""
|
147
|
-
content = self._get_content("markdown")
|
147
|
+
content = self._get_content("markdown", separator="\n\n")
|
148
148
|
self._write_to_file(content, output_file)
|
149
149
|
return content
|
150
150
|
|
@@ -168,7 +168,7 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
168
168
|
self._write_to_file(data, output_file, is_json=True)
|
169
169
|
return data
|
170
170
|
|
171
|
-
def _get_content(self, t: str) -> str:
|
171
|
+
def _get_content(self, t: str, separator: str = "\n") -> str:
|
172
172
|
if not self.output:
|
173
173
|
return ""
|
174
174
|
parts = []
|
@@ -177,4 +177,4 @@ class TaskResponse(BaseModel, Generic[T]):
|
|
177
177
|
v = getattr(s, t)
|
178
178
|
if v:
|
179
179
|
parts.append(v)
|
180
|
-
return
|
180
|
+
return separator.join(parts)
|
@@ -4,13 +4,13 @@ chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
chunkr_ai/api/auth.py,sha256=hlv0GiUmlsbFO1wLL9sslqOnsBSoBqkL_6Mk2SDvxgE,413
|
5
5
|
chunkr_ai/api/chunkr.py,sha256=BzwcKNCuLfVR-HzgY8tKStsW4pIDVVjBgnEqPLyUUMM,3292
|
6
6
|
chunkr_ai/api/chunkr_base.py,sha256=FDl0Ew8eOY4hur5FFqPENZiq9YQy0G3XWEqcKPeCO-U,6130
|
7
|
-
chunkr_ai/api/configuration.py,sha256=
|
7
|
+
chunkr_ai/api/configuration.py,sha256=KrXKcC1Yd7wfK8JMfihlWjNxlDyzKydr1Pe1_r1DTZw,3885
|
8
8
|
chunkr_ai/api/decorators.py,sha256=VJX4qGBIL00K2zY8bh5KAMWv7SltJ38TvPJH06FnFss,4415
|
9
9
|
chunkr_ai/api/misc.py,sha256=QN-2YWQ8e3VvvK63Ua-e8jsx6gxVxkO88Z96yWOofu0,3653
|
10
10
|
chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
|
11
|
-
chunkr_ai/api/task_response.py,sha256=
|
12
|
-
chunkr_ai-0.0.
|
13
|
-
chunkr_ai-0.0.
|
14
|
-
chunkr_ai-0.0.
|
15
|
-
chunkr_ai-0.0.
|
16
|
-
chunkr_ai-0.0.
|
11
|
+
chunkr_ai/api/task_response.py,sha256=ti_2VTYtYS9FWyW-QIm16rp6qhs8RVy4vvgCZUkI2wA,6328
|
12
|
+
chunkr_ai-0.0.38.dist-info/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
|
13
|
+
chunkr_ai-0.0.38.dist-info/METADATA,sha256=R_ZY3lS_hw4velP2QW-YKoK9UW9GZxr_y_qpblbZCYI,7031
|
14
|
+
chunkr_ai-0.0.38.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
15
|
+
chunkr_ai-0.0.38.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
16
|
+
chunkr_ai-0.0.38.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|