chunkr-ai 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/_client.py +18 -9
- chunkr_ai/_files.py +1 -1
- chunkr_ai/_version.py +1 -1
- chunkr_ai/pagination.py +61 -1
- chunkr_ai/resources/__init__.py +27 -13
- chunkr_ai/resources/files.py +712 -0
- chunkr_ai/resources/tasks/__init__.py +33 -0
- chunkr_ai/resources/tasks/parse.py +612 -0
- chunkr_ai/resources/tasks/tasks.py +596 -0
- chunkr_ai/types/__init__.py +7 -19
- chunkr_ai/types/delete.py +10 -0
- chunkr_ai/types/file.py +30 -0
- chunkr_ai/types/file_create_params.py +17 -0
- chunkr_ai/types/file_list_params.py +28 -0
- chunkr_ai/types/file_url.py +15 -0
- chunkr_ai/types/file_url_params.py +15 -0
- chunkr_ai/types/files_page_response.py +20 -0
- chunkr_ai/types/task.py +866 -27
- chunkr_ai/types/tasks/__init__.py +6 -0
- chunkr_ai/types/tasks/parse_create_params.py +844 -0
- chunkr_ai/types/tasks/parse_update_params.py +838 -0
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/METADATA +39 -21
- chunkr_ai-0.1.0a3.dist-info/RECORD +52 -0
- chunkr_ai/resources/task.py +0 -1166
- chunkr_ai/types/auto_generation_config.py +0 -39
- chunkr_ai/types/auto_generation_config_param.py +0 -39
- chunkr_ai/types/bounding_box.py +0 -19
- chunkr_ai/types/chunk_processing.py +0 -40
- chunkr_ai/types/chunk_processing_param.py +0 -42
- chunkr_ai/types/ignore_generation_config.py +0 -39
- chunkr_ai/types/ignore_generation_config_param.py +0 -39
- chunkr_ai/types/llm_generation_config.py +0 -39
- chunkr_ai/types/llm_generation_config_param.py +0 -39
- chunkr_ai/types/llm_processing.py +0 -36
- chunkr_ai/types/llm_processing_param.py +0 -36
- chunkr_ai/types/picture_generation_config.py +0 -39
- chunkr_ai/types/picture_generation_config_param.py +0 -39
- chunkr_ai/types/segment_processing.py +0 -280
- chunkr_ai/types/segment_processing_param.py +0 -281
- chunkr_ai/types/table_generation_config.py +0 -39
- chunkr_ai/types/table_generation_config_param.py +0 -39
- chunkr_ai/types/task_parse_params.py +0 -90
- chunkr_ai/types/task_update_params.py +0 -90
- chunkr_ai-0.1.0a1.dist-info/RECORD +0 -58
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -1,90 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import Optional
|
6
|
-
from typing_extensions import Literal, Required, TypedDict
|
7
|
-
|
8
|
-
from .llm_processing_param import LlmProcessingParam
|
9
|
-
from .chunk_processing_param import ChunkProcessingParam
|
10
|
-
from .segment_processing_param import SegmentProcessingParam
|
11
|
-
|
12
|
-
__all__ = ["TaskParseParams"]
|
13
|
-
|
14
|
-
|
15
|
-
class TaskParseParams(TypedDict, total=False):
|
16
|
-
file: Required[str]
|
17
|
-
"""The file to be uploaded. Can be a URL or a base64 encoded file."""
|
18
|
-
|
19
|
-
chunk_processing: Optional[ChunkProcessingParam]
|
20
|
-
"""Controls the setting for the chunking and post-processing of each chunk."""
|
21
|
-
|
22
|
-
error_handling: Optional[Literal["Fail", "Continue"]]
|
23
|
-
"""Controls how errors are handled during processing:
|
24
|
-
|
25
|
-
- `Fail`: Stops processing and fails the task when any error occurs
|
26
|
-
- `Continue`: Attempts to continue processing despite non-critical errors (eg.
|
27
|
-
LLM refusals etc.)
|
28
|
-
"""
|
29
|
-
|
30
|
-
expires_in: Optional[int]
|
31
|
-
"""
|
32
|
-
The number of seconds until task is deleted. Expired tasks can **not** be
|
33
|
-
updated, polled or accessed via web interface.
|
34
|
-
"""
|
35
|
-
|
36
|
-
file_name: Optional[str]
|
37
|
-
"""The name of the file to be uploaded. If not set a name will be generated."""
|
38
|
-
|
39
|
-
llm_processing: Optional[LlmProcessingParam]
|
40
|
-
"""Controls the LLM used for the task."""
|
41
|
-
|
42
|
-
ocr_strategy: Optional[Literal["All", "Auto"]]
|
43
|
-
"""Controls the Optical Character Recognition (OCR) strategy.
|
44
|
-
|
45
|
-
- `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
|
46
|
-
- `Auto`: Selectively applies OCR only to pages with missing or low-quality
|
47
|
-
text. When text layer is present the bounding boxes from the text layer are
|
48
|
-
used.
|
49
|
-
"""
|
50
|
-
|
51
|
-
pipeline: Optional[Literal["Azure", "Chunkr"]]
|
52
|
-
"""
|
53
|
-
Choose the provider whose models will be used for segmentation and OCR. The
|
54
|
-
output will be unified to the Chunkr `output` format.
|
55
|
-
"""
|
56
|
-
|
57
|
-
segment_processing: Optional[SegmentProcessingParam]
|
58
|
-
"""Defines how each segment type is handled when generating the final output.
|
59
|
-
|
60
|
-
Each segment uses one of three strategies. The chosen strategy controls: •
|
61
|
-
Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
|
62
|
-
content is produced (rule-based vs. LLM). • The output format (`Html` or
|
63
|
-
`Markdown`).
|
64
|
-
|
65
|
-
Optional flags such as image **cropping**, **extended context**, and **LLM
|
66
|
-
descriptions** further refine behaviour.
|
67
|
-
|
68
|
-
---
|
69
|
-
|
70
|
-
**Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
|
71
|
-
`Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
|
72
|
-
description on) • `Picture` → **LLM** (Markdown, description on, cropping _All_)
|
73
|
-
• `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
74
|
-
**Ignore** (removed from output)
|
75
|
-
|
76
|
-
---
|
77
|
-
|
78
|
-
**Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
|
79
|
-
generate content with an LLM. • **Ignore** – exclude the segment entirely.
|
80
|
-
"""
|
81
|
-
|
82
|
-
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
|
83
|
-
"""Controls the segmentation strategy:
|
84
|
-
|
85
|
-
- `LayoutAnalysis`: Analyzes pages for layout elements (e.g., `Table`,
|
86
|
-
`Picture`, `Formula`, etc.) using bounding boxes. Provides fine-grained
|
87
|
-
segmentation and better chunking. (Latency penalty: ~TBD seconds per page).
|
88
|
-
- `Page`: Treats each page as a single segment. Faster processing, but without
|
89
|
-
layout element detection and only simple chunking.
|
90
|
-
"""
|
@@ -1,90 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import Optional
|
6
|
-
from typing_extensions import Literal, TypedDict
|
7
|
-
|
8
|
-
from .llm_processing_param import LlmProcessingParam
|
9
|
-
from .chunk_processing_param import ChunkProcessingParam
|
10
|
-
from .segment_processing_param import SegmentProcessingParam
|
11
|
-
|
12
|
-
__all__ = ["TaskUpdateParams"]
|
13
|
-
|
14
|
-
|
15
|
-
class TaskUpdateParams(TypedDict, total=False):
|
16
|
-
chunk_processing: Optional[ChunkProcessingParam]
|
17
|
-
"""Controls the setting for the chunking and post-processing of each chunk."""
|
18
|
-
|
19
|
-
error_handling: Optional[Literal["Fail", "Continue"]]
|
20
|
-
"""Controls how errors are handled during processing:
|
21
|
-
|
22
|
-
- `Fail`: Stops processing and fails the task when any error occurs
|
23
|
-
- `Continue`: Attempts to continue processing despite non-critical errors (eg.
|
24
|
-
LLM refusals etc.)
|
25
|
-
"""
|
26
|
-
|
27
|
-
expires_in: Optional[int]
|
28
|
-
"""
|
29
|
-
The number of seconds until task is deleted. Expired tasks can **not** be
|
30
|
-
updated, polled or accessed via web interface.
|
31
|
-
"""
|
32
|
-
|
33
|
-
high_resolution: Optional[bool]
|
34
|
-
"""Whether to use high-resolution images for cropping and post-processing.
|
35
|
-
|
36
|
-
(Latency penalty: ~7 seconds per page)
|
37
|
-
"""
|
38
|
-
|
39
|
-
llm_processing: Optional[LlmProcessingParam]
|
40
|
-
"""Controls the LLM used for the task."""
|
41
|
-
|
42
|
-
ocr_strategy: Optional[Literal["All", "Auto"]]
|
43
|
-
"""Controls the Optical Character Recognition (OCR) strategy.
|
44
|
-
|
45
|
-
- `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
|
46
|
-
- `Auto`: Selectively applies OCR only to pages with missing or low-quality
|
47
|
-
text. When text layer is present the bounding boxes from the text layer are
|
48
|
-
used.
|
49
|
-
"""
|
50
|
-
|
51
|
-
pipeline: Optional[Literal["Azure", "Chunkr"]]
|
52
|
-
"""
|
53
|
-
Choose the provider whose models will be used for segmentation and OCR. The
|
54
|
-
output will be unified to the Chunkr `output` format.
|
55
|
-
"""
|
56
|
-
|
57
|
-
segment_processing: Optional[SegmentProcessingParam]
|
58
|
-
"""Defines how each segment type is handled when generating the final output.
|
59
|
-
|
60
|
-
Each segment uses one of three strategies. The chosen strategy controls: •
|
61
|
-
Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
|
62
|
-
content is produced (rule-based vs. LLM). • The output format (`Html` or
|
63
|
-
`Markdown`).
|
64
|
-
|
65
|
-
Optional flags such as image **cropping**, **extended context**, and **LLM
|
66
|
-
descriptions** further refine behaviour.
|
67
|
-
|
68
|
-
---
|
69
|
-
|
70
|
-
**Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
|
71
|
-
`Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
|
72
|
-
description on) • `Picture` → **LLM** (Markdown, description on, cropping _All_)
|
73
|
-
• `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
|
74
|
-
**Ignore** (removed from output)
|
75
|
-
|
76
|
-
---
|
77
|
-
|
78
|
-
**Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
|
79
|
-
generate content with an LLM. • **Ignore** – exclude the segment entirely.
|
80
|
-
"""
|
81
|
-
|
82
|
-
segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
|
83
|
-
"""Controls the segmentation strategy:
|
84
|
-
|
85
|
-
- `LayoutAnalysis`: Analyzes pages for layout elements (e.g., `Table`,
|
86
|
-
`Picture`, `Formula`, etc.) using bounding boxes. Provides fine-grained
|
87
|
-
segmentation and better chunking. (Latency penalty: ~TBD seconds per page).
|
88
|
-
- `Page`: Treats each page as a single segment. Faster processing, but without
|
89
|
-
layout element detection and only simple chunking.
|
90
|
-
"""
|
@@ -1,58 +0,0 @@
|
|
1
|
-
chunkr_ai/__init__.py,sha256=scS30uHiCpLbaalKTAJSCFSTqnu_b9R5JCkTu2hmbzU,2587
|
2
|
-
chunkr_ai/_base_client.py,sha256=Nv5b_rmVdmmPbF42mlOfymbSC6lxcYsrsvBhKSBDXWQ,67038
|
3
|
-
chunkr_ai/_client.py,sha256=FHxLInwFr7bHnlt2oiZZCdGrYul-6uzkQk1byBxGTdE,15335
|
4
|
-
chunkr_ai/_compat.py,sha256=VWemUKbj6DDkQ-O4baSpHVLJafotzeXmCQGJugfVTIw,6580
|
5
|
-
chunkr_ai/_constants.py,sha256=S14PFzyN9-I31wiV7SmIlL5Ga0MLHxdvegInGdXH7tM,462
|
6
|
-
chunkr_ai/_exceptions.py,sha256=ClgXUcwf4qhBTXnK4LzUPQCFdFldRxAlcYdOFFgpTxA,3220
|
7
|
-
chunkr_ai/_files.py,sha256=KnEzGi_O756MvKyJ4fOCW_u3JhOeWPQ4RsmDvqihDQU,3545
|
8
|
-
chunkr_ai/_models.py,sha256=KvjsMfb88XZlFUKVoOxr8OyDj47MhoH2OKqWNEbBhk4,30010
|
9
|
-
chunkr_ai/_qs.py,sha256=AOkSz4rHtK4YI3ZU_kzea-zpwBUgEY8WniGmTPyEimc,4846
|
10
|
-
chunkr_ai/_resource.py,sha256=f5tiwjxcKdbeMor8idoHtMFTUhqD9yc2xXtq5rqeLLk,1100
|
11
|
-
chunkr_ai/_response.py,sha256=xXNpF53hiYARmAW7npKuxQ5UHAEjgAzm7ME_L3eIstY,28800
|
12
|
-
chunkr_ai/_streaming.py,sha256=ZmyrVWk7-AWkLAATR55WgNxnyFzYmaqJt2LthA_PTqQ,10100
|
13
|
-
chunkr_ai/_types.py,sha256=dnzU2Q2tLcuk29QFEcnPC1wp0-4XB4Cpef_3AnRhV5Y,6200
|
14
|
-
chunkr_ai/_version.py,sha256=DjH8N3RuL6wzCqGACuOApaeMK7oF2_r00cZzxQ5fn4I,169
|
15
|
-
chunkr_ai/pagination.py,sha256=mKx7wg1MEeJT-stWQ60VUHotL6Y3QdDmTr1fjG9scP4,1924
|
16
|
-
chunkr_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
chunkr_ai/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
|
18
|
-
chunkr_ai/_utils/_logs.py,sha256=ylZvP2JTPNlFCbxYajpsnWkA253kDFgnFYDWWuvgf_Q,780
|
19
|
-
chunkr_ai/_utils/_proxy.py,sha256=aglnj2yBTDyGX9Akk2crZHrl10oqRmceUy2Zp008XEs,1975
|
20
|
-
chunkr_ai/_utils/_reflection.py,sha256=ZmGkIgT_PuwedyNBrrKGbxoWtkpytJNU1uU4QHnmEMU,1364
|
21
|
-
chunkr_ai/_utils/_resources_proxy.py,sha256=3KbSCApjaz7x_frFAxJe9ltY-dIJBQUVnIhR2GvVRY8,604
|
22
|
-
chunkr_ai/_utils/_streams.py,sha256=SMC90diFFecpEg_zgDRVbdR3hSEIgVVij4taD-noMLM,289
|
23
|
-
chunkr_ai/_utils/_sync.py,sha256=TpGLrrhRNWTJtODNE6Fup3_k7zrWm1j2RlirzBwre-0,2862
|
24
|
-
chunkr_ai/_utils/_transform.py,sha256=n7kskEWz6o__aoNvhFoGVyDoalNe6mJwp-g7BWkdj88,15617
|
25
|
-
chunkr_ai/_utils/_typing.py,sha256=D0DbbNu8GnYQTSICnTSHDGsYXj8TcAKyhejb0XcnjtY,4602
|
26
|
-
chunkr_ai/_utils/_utils.py,sha256=ts4CiiuNpFiGB6YMdkQRh2SZvYvsl7mAF-JWHCcLDf4,12312
|
27
|
-
chunkr_ai/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
|
28
|
-
chunkr_ai/resources/__init__.py,sha256=zhuIEAnBOs0bLyNTeWngJbEFhlevoTx0NzTZMlX_cs8,976
|
29
|
-
chunkr_ai/resources/health.py,sha256=XTvUtRs5hEK-uccb_40mcIex85eEUo1a171nQUjpSOs,4965
|
30
|
-
chunkr_ai/resources/task.py,sha256=Is4Z8lhKfxAiITBmSv7E6oebCiZEoNBA4xiHl-L1yEU,48182
|
31
|
-
chunkr_ai/types/__init__.py,sha256=_SppK-MG3ZexF2C3d89XcrQWcnShexkVYAIBcHA5Qb0,1901
|
32
|
-
chunkr_ai/types/auto_generation_config.py,sha256=6j9Kbj05FObYmp2g8oPOY69AxlPr6-TLC9p91Qx_SDo,1337
|
33
|
-
chunkr_ai/types/auto_generation_config_param.py,sha256=SIp5SSE060DiKQW0QOHXgqYbKZisPqn6koeowehvJCI,1251
|
34
|
-
chunkr_ai/types/bounding_box.py,sha256=JDZlhJJl4lg6RYGf8VpC46soQfQ10-K8YwHHA6XBFkM,431
|
35
|
-
chunkr_ai/types/chunk_processing.py,sha256=KWnebuSLIwSWPaHSmAGUPZAMrkbDKA6RYDq9TwrQZJk,1217
|
36
|
-
chunkr_ai/types/chunk_processing_param.py,sha256=7Yq6ZpuMNnyqc08GLow4lsGA_vpI7S5tWMac5T4Hyak,1294
|
37
|
-
chunkr_ai/types/health_check_response.py,sha256=6Zn5YYHCQf2RgMjDlf39mtiTPqfaBfC9Vv599U_rKCI,200
|
38
|
-
chunkr_ai/types/ignore_generation_config.py,sha256=lU2qjsombMTlsQHGtvs2G-FaNBL1mHmF8BThYMd1UYk,1341
|
39
|
-
chunkr_ai/types/ignore_generation_config_param.py,sha256=-gdBQy4HffH7GQwEXMvSfeHC5fgiMry3jHVEwrLUPrM,1255
|
40
|
-
chunkr_ai/types/llm_generation_config.py,sha256=ifPhOvYrC968WfaI3z0qwQyUewyWdtEDUnuWEhNJN7o,1335
|
41
|
-
chunkr_ai/types/llm_generation_config_param.py,sha256=s3iWDYtQ04FWubWWl22cIFTNiwhLzluAyt6zmNcNCd4,1249
|
42
|
-
chunkr_ai/types/llm_processing.py,sha256=f6w52vkvQKHu1HxWD0r9xZ9BlufMcRiY47hBVpeFPTw,1132
|
43
|
-
chunkr_ai/types/llm_processing_param.py,sha256=GlvSYRc-_1ec1TgZiybY7G5unzdDpIJiKcs7Ou4cbPo,1131
|
44
|
-
chunkr_ai/types/picture_generation_config.py,sha256=-W4SkGwuK6DmMoA_CbgHz-dfYKq2bF1vlZUcWiTFKsc,1343
|
45
|
-
chunkr_ai/types/picture_generation_config_param.py,sha256=G5czQb5jbuYajBPojYah8_QFl7Hw0gXNojxtwixy8Ao,1257
|
46
|
-
chunkr_ai/types/segment_processing.py,sha256=3K50PMRUeTLZ7rUzLXsQyqFga0lYjGAer7xotHFFZn8,14132
|
47
|
-
chunkr_ai/types/segment_processing_param.py,sha256=AV9PRcdXRVcrjMXNNhTxnJflCFsib_shmb0MSYHgrjs,14306
|
48
|
-
chunkr_ai/types/table_generation_config.py,sha256=TQqAKji9Bf7N8UBN9nM0cyEp3g3e7tbN-Ehjr2uHTVE,1339
|
49
|
-
chunkr_ai/types/table_generation_config_param.py,sha256=KLgSwuA2bB_ASg8vIhsBJTkMDPqBZJQcwl75u7NaQm8,1253
|
50
|
-
chunkr_ai/types/task.py,sha256=CqrrvA_wPGALVxLQXN4m6cXAavXh1DoLnLljPVQjef4,11992
|
51
|
-
chunkr_ai/types/task_get_params.py,sha256=Nx2luhebcoaiuRln4KP4FarWvBPd1OYi__efi56zHPM,460
|
52
|
-
chunkr_ai/types/task_list_params.py,sha256=fCku42QW6QUsLmZgKJBaxisGvUcmcQ5fa6LgHHRIwiQ,1043
|
53
|
-
chunkr_ai/types/task_parse_params.py,sha256=3IpiYdCi54DlROXaB_vx-hIZ5pk3tkSBRiftAcwq8h8,3585
|
54
|
-
chunkr_ai/types/task_update_params.py,sha256=Vfgoshoig9MWtkBv0VeDFfBmtgktap6-Mm9R1SwHw68,3532
|
55
|
-
chunkr_ai-0.1.0a1.dist-info/METADATA,sha256=_KVY6caTB5upWNyQfhJYScFOlkBySVutRplRq_JGn4A,15782
|
56
|
-
chunkr_ai-0.1.0a1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
57
|
-
chunkr_ai-0.1.0a1.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
|
58
|
-
chunkr_ai-0.1.0a1.dist-info/RECORD,,
|
File without changes
|
File without changes
|