chunkr-ai 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. chunkr_ai/_client.py +18 -9
  2. chunkr_ai/_files.py +1 -1
  3. chunkr_ai/_version.py +1 -1
  4. chunkr_ai/pagination.py +61 -1
  5. chunkr_ai/resources/__init__.py +27 -13
  6. chunkr_ai/resources/files.py +712 -0
  7. chunkr_ai/resources/tasks/__init__.py +33 -0
  8. chunkr_ai/resources/tasks/parse.py +612 -0
  9. chunkr_ai/resources/tasks/tasks.py +596 -0
  10. chunkr_ai/types/__init__.py +7 -19
  11. chunkr_ai/types/delete.py +10 -0
  12. chunkr_ai/types/file.py +30 -0
  13. chunkr_ai/types/file_create_params.py +17 -0
  14. chunkr_ai/types/file_list_params.py +28 -0
  15. chunkr_ai/types/file_url.py +15 -0
  16. chunkr_ai/types/file_url_params.py +15 -0
  17. chunkr_ai/types/files_page_response.py +20 -0
  18. chunkr_ai/types/task.py +866 -27
  19. chunkr_ai/types/tasks/__init__.py +6 -0
  20. chunkr_ai/types/tasks/parse_create_params.py +844 -0
  21. chunkr_ai/types/tasks/parse_update_params.py +838 -0
  22. {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/METADATA +39 -21
  23. chunkr_ai-0.1.0a3.dist-info/RECORD +52 -0
  24. chunkr_ai/resources/task.py +0 -1166
  25. chunkr_ai/types/auto_generation_config.py +0 -39
  26. chunkr_ai/types/auto_generation_config_param.py +0 -39
  27. chunkr_ai/types/bounding_box.py +0 -19
  28. chunkr_ai/types/chunk_processing.py +0 -40
  29. chunkr_ai/types/chunk_processing_param.py +0 -42
  30. chunkr_ai/types/ignore_generation_config.py +0 -39
  31. chunkr_ai/types/ignore_generation_config_param.py +0 -39
  32. chunkr_ai/types/llm_generation_config.py +0 -39
  33. chunkr_ai/types/llm_generation_config_param.py +0 -39
  34. chunkr_ai/types/llm_processing.py +0 -36
  35. chunkr_ai/types/llm_processing_param.py +0 -36
  36. chunkr_ai/types/picture_generation_config.py +0 -39
  37. chunkr_ai/types/picture_generation_config_param.py +0 -39
  38. chunkr_ai/types/segment_processing.py +0 -280
  39. chunkr_ai/types/segment_processing_param.py +0 -281
  40. chunkr_ai/types/table_generation_config.py +0 -39
  41. chunkr_ai/types/table_generation_config_param.py +0 -39
  42. chunkr_ai/types/task_parse_params.py +0 -90
  43. chunkr_ai/types/task_update_params.py +0 -90
  44. chunkr_ai-0.1.0a1.dist-info/RECORD +0 -58
  45. {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/WHEEL +0 -0
  46. {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -1,90 +0,0 @@
1
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Optional
6
- from typing_extensions import Literal, Required, TypedDict
7
-
8
- from .llm_processing_param import LlmProcessingParam
9
- from .chunk_processing_param import ChunkProcessingParam
10
- from .segment_processing_param import SegmentProcessingParam
11
-
12
- __all__ = ["TaskParseParams"]
13
-
14
-
15
- class TaskParseParams(TypedDict, total=False):
16
- file: Required[str]
17
- """The file to be uploaded. Can be a URL or a base64 encoded file."""
18
-
19
- chunk_processing: Optional[ChunkProcessingParam]
20
- """Controls the setting for the chunking and post-processing of each chunk."""
21
-
22
- error_handling: Optional[Literal["Fail", "Continue"]]
23
- """Controls how errors are handled during processing:
24
-
25
- - `Fail`: Stops processing and fails the task when any error occurs
26
- - `Continue`: Attempts to continue processing despite non-critical errors (eg.
27
- LLM refusals etc.)
28
- """
29
-
30
- expires_in: Optional[int]
31
- """
32
- The number of seconds until task is deleted. Expired tasks can **not** be
33
- updated, polled or accessed via web interface.
34
- """
35
-
36
- file_name: Optional[str]
37
- """The name of the file to be uploaded. If not set a name will be generated."""
38
-
39
- llm_processing: Optional[LlmProcessingParam]
40
- """Controls the LLM used for the task."""
41
-
42
- ocr_strategy: Optional[Literal["All", "Auto"]]
43
- """Controls the Optical Character Recognition (OCR) strategy.
44
-
45
- - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
46
- - `Auto`: Selectively applies OCR only to pages with missing or low-quality
47
- text. When text layer is present the bounding boxes from the text layer are
48
- used.
49
- """
50
-
51
- pipeline: Optional[Literal["Azure", "Chunkr"]]
52
- """
53
- Choose the provider whose models will be used for segmentation and OCR. The
54
- output will be unified to the Chunkr `output` format.
55
- """
56
-
57
- segment_processing: Optional[SegmentProcessingParam]
58
- """Defines how each segment type is handled when generating the final output.
59
-
60
- Each segment uses one of three strategies. The chosen strategy controls: •
61
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
62
- content is produced (rule-based vs. LLM). • The output format (`Html` or
63
- `Markdown`).
64
-
65
- Optional flags such as image **cropping**, **extended context**, and **LLM
66
- descriptions** further refine behaviour.
67
-
68
- ---
69
-
70
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
71
- `Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
72
- description on) • `Picture` → **LLM** (Markdown, description on, cropping _All_)
73
- • `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
74
- **Ignore** (removed from output)
75
-
76
- ---
77
-
78
- **Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
79
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
80
- """
81
-
82
- segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
83
- """Controls the segmentation strategy:
84
-
85
- - `LayoutAnalysis`: Analyzes pages for layout elements (e.g., `Table`,
86
- `Picture`, `Formula`, etc.) using bounding boxes. Provides fine-grained
87
- segmentation and better chunking. (Latency penalty: ~TBD seconds per page).
88
- - `Page`: Treats each page as a single segment. Faster processing, but without
89
- layout element detection and only simple chunking.
90
- """
@@ -1,90 +0,0 @@
1
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Optional
6
- from typing_extensions import Literal, TypedDict
7
-
8
- from .llm_processing_param import LlmProcessingParam
9
- from .chunk_processing_param import ChunkProcessingParam
10
- from .segment_processing_param import SegmentProcessingParam
11
-
12
- __all__ = ["TaskUpdateParams"]
13
-
14
-
15
- class TaskUpdateParams(TypedDict, total=False):
16
- chunk_processing: Optional[ChunkProcessingParam]
17
- """Controls the setting for the chunking and post-processing of each chunk."""
18
-
19
- error_handling: Optional[Literal["Fail", "Continue"]]
20
- """Controls how errors are handled during processing:
21
-
22
- - `Fail`: Stops processing and fails the task when any error occurs
23
- - `Continue`: Attempts to continue processing despite non-critical errors (eg.
24
- LLM refusals etc.)
25
- """
26
-
27
- expires_in: Optional[int]
28
- """
29
- The number of seconds until task is deleted. Expired tasks can **not** be
30
- updated, polled or accessed via web interface.
31
- """
32
-
33
- high_resolution: Optional[bool]
34
- """Whether to use high-resolution images for cropping and post-processing.
35
-
36
- (Latency penalty: ~7 seconds per page)
37
- """
38
-
39
- llm_processing: Optional[LlmProcessingParam]
40
- """Controls the LLM used for the task."""
41
-
42
- ocr_strategy: Optional[Literal["All", "Auto"]]
43
- """Controls the Optical Character Recognition (OCR) strategy.
44
-
45
- - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds per page)
46
- - `Auto`: Selectively applies OCR only to pages with missing or low-quality
47
- text. When text layer is present the bounding boxes from the text layer are
48
- used.
49
- """
50
-
51
- pipeline: Optional[Literal["Azure", "Chunkr"]]
52
- """
53
- Choose the provider whose models will be used for segmentation and OCR. The
54
- output will be unified to the Chunkr `output` format.
55
- """
56
-
57
- segment_processing: Optional[SegmentProcessingParam]
58
- """Defines how each segment type is handled when generating the final output.
59
-
60
- Each segment uses one of three strategies. The chosen strategy controls: •
61
- Whether the segment is kept (`Auto`, `LLM`) or skipped (`Ignore`). • How the
62
- content is produced (rule-based vs. LLM). • The output format (`Html` or
63
- `Markdown`).
64
-
65
- Optional flags such as image **cropping**, **extended context**, and **LLM
66
- descriptions** further refine behaviour.
67
-
68
- ---
69
-
70
- **Default strategy per segment** • `Title`, `SectionHeader`, `Text`, `ListItem`,
71
- `Caption`, `Footnote` → **Auto** (Markdown) • `Table` → **LLM** (HTML,
72
- description on) • `Picture` → **LLM** (Markdown, description on, cropping _All_)
73
- • `Formula`, `Page` → **LLM** (Markdown) • `PageHeader`, `PageFooter` →
74
- **Ignore** (removed from output)
75
-
76
- ---
77
-
78
- **Strategy reference** • **Auto** – rule-based content generation. • **LLM** –
79
- generate content with an LLM. • **Ignore** – exclude the segment entirely.
80
- """
81
-
82
- segmentation_strategy: Optional[Literal["LayoutAnalysis", "Page"]]
83
- """Controls the segmentation strategy:
84
-
85
- - `LayoutAnalysis`: Analyzes pages for layout elements (e.g., `Table`,
86
- `Picture`, `Formula`, etc.) using bounding boxes. Provides fine-grained
87
- segmentation and better chunking. (Latency penalty: ~TBD seconds per page).
88
- - `Page`: Treats each page as a single segment. Faster processing, but without
89
- layout element detection and only simple chunking.
90
- """
@@ -1,58 +0,0 @@
1
- chunkr_ai/__init__.py,sha256=scS30uHiCpLbaalKTAJSCFSTqnu_b9R5JCkTu2hmbzU,2587
2
- chunkr_ai/_base_client.py,sha256=Nv5b_rmVdmmPbF42mlOfymbSC6lxcYsrsvBhKSBDXWQ,67038
3
- chunkr_ai/_client.py,sha256=FHxLInwFr7bHnlt2oiZZCdGrYul-6uzkQk1byBxGTdE,15335
4
- chunkr_ai/_compat.py,sha256=VWemUKbj6DDkQ-O4baSpHVLJafotzeXmCQGJugfVTIw,6580
5
- chunkr_ai/_constants.py,sha256=S14PFzyN9-I31wiV7SmIlL5Ga0MLHxdvegInGdXH7tM,462
6
- chunkr_ai/_exceptions.py,sha256=ClgXUcwf4qhBTXnK4LzUPQCFdFldRxAlcYdOFFgpTxA,3220
7
- chunkr_ai/_files.py,sha256=KnEzGi_O756MvKyJ4fOCW_u3JhOeWPQ4RsmDvqihDQU,3545
8
- chunkr_ai/_models.py,sha256=KvjsMfb88XZlFUKVoOxr8OyDj47MhoH2OKqWNEbBhk4,30010
9
- chunkr_ai/_qs.py,sha256=AOkSz4rHtK4YI3ZU_kzea-zpwBUgEY8WniGmTPyEimc,4846
10
- chunkr_ai/_resource.py,sha256=f5tiwjxcKdbeMor8idoHtMFTUhqD9yc2xXtq5rqeLLk,1100
11
- chunkr_ai/_response.py,sha256=xXNpF53hiYARmAW7npKuxQ5UHAEjgAzm7ME_L3eIstY,28800
12
- chunkr_ai/_streaming.py,sha256=ZmyrVWk7-AWkLAATR55WgNxnyFzYmaqJt2LthA_PTqQ,10100
13
- chunkr_ai/_types.py,sha256=dnzU2Q2tLcuk29QFEcnPC1wp0-4XB4Cpef_3AnRhV5Y,6200
14
- chunkr_ai/_version.py,sha256=DjH8N3RuL6wzCqGACuOApaeMK7oF2_r00cZzxQ5fn4I,169
15
- chunkr_ai/pagination.py,sha256=mKx7wg1MEeJT-stWQ60VUHotL6Y3QdDmTr1fjG9scP4,1924
16
- chunkr_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- chunkr_ai/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
18
- chunkr_ai/_utils/_logs.py,sha256=ylZvP2JTPNlFCbxYajpsnWkA253kDFgnFYDWWuvgf_Q,780
19
- chunkr_ai/_utils/_proxy.py,sha256=aglnj2yBTDyGX9Akk2crZHrl10oqRmceUy2Zp008XEs,1975
20
- chunkr_ai/_utils/_reflection.py,sha256=ZmGkIgT_PuwedyNBrrKGbxoWtkpytJNU1uU4QHnmEMU,1364
21
- chunkr_ai/_utils/_resources_proxy.py,sha256=3KbSCApjaz7x_frFAxJe9ltY-dIJBQUVnIhR2GvVRY8,604
22
- chunkr_ai/_utils/_streams.py,sha256=SMC90diFFecpEg_zgDRVbdR3hSEIgVVij4taD-noMLM,289
23
- chunkr_ai/_utils/_sync.py,sha256=TpGLrrhRNWTJtODNE6Fup3_k7zrWm1j2RlirzBwre-0,2862
24
- chunkr_ai/_utils/_transform.py,sha256=n7kskEWz6o__aoNvhFoGVyDoalNe6mJwp-g7BWkdj88,15617
25
- chunkr_ai/_utils/_typing.py,sha256=D0DbbNu8GnYQTSICnTSHDGsYXj8TcAKyhejb0XcnjtY,4602
26
- chunkr_ai/_utils/_utils.py,sha256=ts4CiiuNpFiGB6YMdkQRh2SZvYvsl7mAF-JWHCcLDf4,12312
27
- chunkr_ai/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
28
- chunkr_ai/resources/__init__.py,sha256=zhuIEAnBOs0bLyNTeWngJbEFhlevoTx0NzTZMlX_cs8,976
29
- chunkr_ai/resources/health.py,sha256=XTvUtRs5hEK-uccb_40mcIex85eEUo1a171nQUjpSOs,4965
30
- chunkr_ai/resources/task.py,sha256=Is4Z8lhKfxAiITBmSv7E6oebCiZEoNBA4xiHl-L1yEU,48182
31
- chunkr_ai/types/__init__.py,sha256=_SppK-MG3ZexF2C3d89XcrQWcnShexkVYAIBcHA5Qb0,1901
32
- chunkr_ai/types/auto_generation_config.py,sha256=6j9Kbj05FObYmp2g8oPOY69AxlPr6-TLC9p91Qx_SDo,1337
33
- chunkr_ai/types/auto_generation_config_param.py,sha256=SIp5SSE060DiKQW0QOHXgqYbKZisPqn6koeowehvJCI,1251
34
- chunkr_ai/types/bounding_box.py,sha256=JDZlhJJl4lg6RYGf8VpC46soQfQ10-K8YwHHA6XBFkM,431
35
- chunkr_ai/types/chunk_processing.py,sha256=KWnebuSLIwSWPaHSmAGUPZAMrkbDKA6RYDq9TwrQZJk,1217
36
- chunkr_ai/types/chunk_processing_param.py,sha256=7Yq6ZpuMNnyqc08GLow4lsGA_vpI7S5tWMac5T4Hyak,1294
37
- chunkr_ai/types/health_check_response.py,sha256=6Zn5YYHCQf2RgMjDlf39mtiTPqfaBfC9Vv599U_rKCI,200
38
- chunkr_ai/types/ignore_generation_config.py,sha256=lU2qjsombMTlsQHGtvs2G-FaNBL1mHmF8BThYMd1UYk,1341
39
- chunkr_ai/types/ignore_generation_config_param.py,sha256=-gdBQy4HffH7GQwEXMvSfeHC5fgiMry3jHVEwrLUPrM,1255
40
- chunkr_ai/types/llm_generation_config.py,sha256=ifPhOvYrC968WfaI3z0qwQyUewyWdtEDUnuWEhNJN7o,1335
41
- chunkr_ai/types/llm_generation_config_param.py,sha256=s3iWDYtQ04FWubWWl22cIFTNiwhLzluAyt6zmNcNCd4,1249
42
- chunkr_ai/types/llm_processing.py,sha256=f6w52vkvQKHu1HxWD0r9xZ9BlufMcRiY47hBVpeFPTw,1132
43
- chunkr_ai/types/llm_processing_param.py,sha256=GlvSYRc-_1ec1TgZiybY7G5unzdDpIJiKcs7Ou4cbPo,1131
44
- chunkr_ai/types/picture_generation_config.py,sha256=-W4SkGwuK6DmMoA_CbgHz-dfYKq2bF1vlZUcWiTFKsc,1343
45
- chunkr_ai/types/picture_generation_config_param.py,sha256=G5czQb5jbuYajBPojYah8_QFl7Hw0gXNojxtwixy8Ao,1257
46
- chunkr_ai/types/segment_processing.py,sha256=3K50PMRUeTLZ7rUzLXsQyqFga0lYjGAer7xotHFFZn8,14132
47
- chunkr_ai/types/segment_processing_param.py,sha256=AV9PRcdXRVcrjMXNNhTxnJflCFsib_shmb0MSYHgrjs,14306
48
- chunkr_ai/types/table_generation_config.py,sha256=TQqAKji9Bf7N8UBN9nM0cyEp3g3e7tbN-Ehjr2uHTVE,1339
49
- chunkr_ai/types/table_generation_config_param.py,sha256=KLgSwuA2bB_ASg8vIhsBJTkMDPqBZJQcwl75u7NaQm8,1253
50
- chunkr_ai/types/task.py,sha256=CqrrvA_wPGALVxLQXN4m6cXAavXh1DoLnLljPVQjef4,11992
51
- chunkr_ai/types/task_get_params.py,sha256=Nx2luhebcoaiuRln4KP4FarWvBPd1OYi__efi56zHPM,460
52
- chunkr_ai/types/task_list_params.py,sha256=fCku42QW6QUsLmZgKJBaxisGvUcmcQ5fa6LgHHRIwiQ,1043
53
- chunkr_ai/types/task_parse_params.py,sha256=3IpiYdCi54DlROXaB_vx-hIZ5pk3tkSBRiftAcwq8h8,3585
54
- chunkr_ai/types/task_update_params.py,sha256=Vfgoshoig9MWtkBv0VeDFfBmtgktap6-Mm9R1SwHw68,3532
55
- chunkr_ai-0.1.0a1.dist-info/METADATA,sha256=_KVY6caTB5upWNyQfhJYScFOlkBySVutRplRq_JGn4A,15782
56
- chunkr_ai-0.1.0a1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
57
- chunkr_ai-0.1.0a1.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
58
- chunkr_ai-0.1.0a1.dist-info/RECORD,,