chunkr-ai 0.0.50__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/api/configuration.py +19 -11
- chunkr_ai/models.py +2 -0
- {chunkr_ai-0.0.50.dist-info → chunkr_ai-0.1.0.dist-info}/METADATA +1 -1
- {chunkr_ai-0.0.50.dist-info → chunkr_ai-0.1.0.dist-info}/RECORD +7 -7
- {chunkr_ai-0.0.50.dist-info → chunkr_ai-0.1.0.dist-info}/WHEEL +1 -1
- {chunkr_ai-0.0.50.dist-info → chunkr_ai-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {chunkr_ai-0.0.50.dist-info → chunkr_ai-0.1.0.dist-info}/top_level.txt +0 -0
chunkr_ai/api/configuration.py
CHANGED
@@ -3,27 +3,34 @@ from enum import Enum
|
|
3
3
|
from typing import Any, List, Optional, Union
|
4
4
|
from pydantic import field_validator, field_serializer
|
5
5
|
|
6
|
-
class GenerationStrategy(str, Enum):
|
7
|
-
LLM = "LLM"
|
8
|
-
AUTO = "Auto"
|
9
|
-
|
10
6
|
class CroppingStrategy(str, Enum):
|
11
7
|
ALL = "All"
|
12
8
|
AUTO = "Auto"
|
13
9
|
|
14
|
-
class
|
15
|
-
HTML = "
|
10
|
+
class SegmentFormat(str, Enum):
|
11
|
+
HTML = "Html"
|
16
12
|
MARKDOWN = "Markdown"
|
17
|
-
|
13
|
+
|
14
|
+
class EmbedSource(str, Enum):
|
18
15
|
CONTENT = "Content"
|
16
|
+
HTML = "HTML" # Deprecated
|
17
|
+
MARKDOWN = "Markdown" # Deprecated
|
18
|
+
LLM = "LLM"
|
19
|
+
|
20
|
+
class GenerationStrategy(str, Enum):
|
21
|
+
LLM = "LLM"
|
22
|
+
AUTO = "Auto"
|
19
23
|
|
20
24
|
class GenerationConfig(BaseModel):
|
21
|
-
|
25
|
+
format: Optional[SegmentFormat] = None
|
26
|
+
strategy: Optional[GenerationStrategy] = None
|
22
27
|
llm: Optional[str] = None
|
23
|
-
markdown: Optional[GenerationStrategy] = None
|
24
28
|
crop_image: Optional[CroppingStrategy] = None
|
25
|
-
embed_sources: Optional[List[EmbedSource]] =
|
29
|
+
embed_sources: Optional[List[EmbedSource]] = None
|
26
30
|
extended_context: Optional[bool] = None
|
31
|
+
# Deprecated fields for backwards compatibility
|
32
|
+
html: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.HTML and strategy instead
|
33
|
+
markdown: Optional[GenerationStrategy] = None # Deprecated: Use format=SegmentFormat.MARKDOWN and strategy instead
|
27
34
|
|
28
35
|
class SegmentProcessing(BaseModel):
|
29
36
|
model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
|
@@ -246,7 +253,7 @@ class SegmentType(str, Enum):
|
|
246
253
|
|
247
254
|
class Segment(BaseModel):
|
248
255
|
bbox: BoundingBox
|
249
|
-
content: str
|
256
|
+
content: str = ""
|
250
257
|
page_height: float
|
251
258
|
llm: Optional[str] = None
|
252
259
|
html: Optional[str] = None
|
@@ -258,6 +265,7 @@ class Segment(BaseModel):
|
|
258
265
|
segment_id: str
|
259
266
|
segment_type: SegmentType
|
260
267
|
confidence: Optional[float]
|
268
|
+
text: str = ""
|
261
269
|
|
262
270
|
class Chunk(BaseModel):
|
263
271
|
chunk_id: str
|
chunkr_ai/models.py
CHANGED
@@ -15,6 +15,7 @@ from .api.configuration import (
|
|
15
15
|
OcrStrategy,
|
16
16
|
OutputResponse,
|
17
17
|
Segment,
|
18
|
+
SegmentFormat,
|
18
19
|
SegmentProcessing,
|
19
20
|
SegmentType,
|
20
21
|
SegmentationStrategy,
|
@@ -42,6 +43,7 @@ __all__ = [
|
|
42
43
|
"OcrStrategy",
|
43
44
|
"OutputResponse",
|
44
45
|
"Segment",
|
46
|
+
"SegmentFormat",
|
45
47
|
"SegmentProcessing",
|
46
48
|
"SegmentType",
|
47
49
|
"SegmentationStrategy",
|
@@ -1,16 +1,16 @@
|
|
1
1
|
chunkr_ai/__init__.py,sha256=6KpYv2lmD6S5z2kc9pqwuLP5VDHmOuu2qDZArUIhb1s,53
|
2
|
-
chunkr_ai/models.py,sha256=
|
2
|
+
chunkr_ai/models.py,sha256=1q4l7fSXU7cVfx2ZUcmc5EZ4K0AeUhCbvY9yaBbXL8E,1046
|
3
3
|
chunkr_ai/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
chunkr_ai/api/auth.py,sha256=0RSNFPvHt4Nrg8qtP2xvA2KbR0J_KUe1B_tKynbq9Fc,436
|
5
5
|
chunkr_ai/api/chunkr.py,sha256=uSNYtB_mcs4-QRKsX7wZb8yv6ayXgRrJSDNZ-EbAyvc,3857
|
6
6
|
chunkr_ai/api/chunkr_base.py,sha256=8roSPoCADmaXM2r7zz2iHfZzIcY9NopOfa4j-dfk8RA,6310
|
7
|
-
chunkr_ai/api/configuration.py,sha256=
|
7
|
+
chunkr_ai/api/configuration.py,sha256=MVAxKe8vTSUMy0AHOPWyJEOgmIL-rPTsAQ8Z83gGXew,10287
|
8
8
|
chunkr_ai/api/decorators.py,sha256=w1l_ZEkl99C-BO3qRTbi74sYwHDFspB1Bjt1Arv9lPc,4384
|
9
9
|
chunkr_ai/api/misc.py,sha256=AaGLxZlMzNgVPwErskDRKc2UVGkC0JwxLXU-enPwzA0,5354
|
10
10
|
chunkr_ai/api/protocol.py,sha256=LjPrYSq52m1afIlAo0yVGXlGZxPRh8J6g7S4PAit3Zo,388
|
11
11
|
chunkr_ai/api/task_response.py,sha256=VYa62E08VlZUyjn2YslnY4cohdK9e53HbEzsaYIXKXM,8028
|
12
|
-
chunkr_ai-0.0.
|
13
|
-
chunkr_ai-0.0.
|
14
|
-
chunkr_ai-0.0.
|
15
|
-
chunkr_ai-0.0.
|
16
|
-
chunkr_ai-0.0.
|
12
|
+
chunkr_ai-0.1.0.dist-info/licenses/LICENSE,sha256=w3R12yNDyZpMiy2lxy_hvNbsldC75ww79sF0u11rkho,1069
|
13
|
+
chunkr_ai-0.1.0.dist-info/METADATA,sha256=nRApqY3sX17_8fqSwfgYUH7Sy4Ahg5ybpxU9ZTXvdH4,7052
|
14
|
+
chunkr_ai-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
chunkr_ai-0.1.0.dist-info/top_level.txt,sha256=0IZY7PZIiS8bw5r4NUQRUQ-ATi-L_3vLQVq3ZLouOW8,10
|
16
|
+
chunkr_ai-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|