chunkr-ai 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/_client.py +2 -1
- chunkr_ai/_version.py +1 -1
- chunkr_ai/resources/task/__init__.py +33 -0
- chunkr_ai/resources/{task.py → task/parse.py} +146 -696
- chunkr_ai/resources/task/task.py +664 -0
- chunkr_ai/types/__init__.py +0 -19
- chunkr_ai/types/task/__init__.py +7 -0
- chunkr_ai/types/task/parse_create_params.py +806 -0
- chunkr_ai/types/task/parse_update_params.py +806 -0
- chunkr_ai/types/task/task.py +1186 -0
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/METADATA +12 -12
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/RECORD +14 -28
- chunkr_ai/types/auto_generation_config.py +0 -39
- chunkr_ai/types/auto_generation_config_param.py +0 -39
- chunkr_ai/types/bounding_box.py +0 -19
- chunkr_ai/types/chunk_processing.py +0 -40
- chunkr_ai/types/chunk_processing_param.py +0 -42
- chunkr_ai/types/ignore_generation_config.py +0 -39
- chunkr_ai/types/ignore_generation_config_param.py +0 -39
- chunkr_ai/types/llm_generation_config.py +0 -39
- chunkr_ai/types/llm_generation_config_param.py +0 -39
- chunkr_ai/types/llm_processing.py +0 -36
- chunkr_ai/types/llm_processing_param.py +0 -36
- chunkr_ai/types/picture_generation_config.py +0 -39
- chunkr_ai/types/picture_generation_config_param.py +0 -39
- chunkr_ai/types/segment_processing.py +0 -280
- chunkr_ai/types/segment_processing_param.py +0 -281
- chunkr_ai/types/table_generation_config.py +0 -39
- chunkr_ai/types/table_generation_config_param.py +0 -39
- chunkr_ai/types/task.py +0 -379
- chunkr_ai/types/task_parse_params.py +0 -90
- chunkr_ai/types/task_update_params.py +0 -90
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a1.dist-info → chunkr_ai-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: chunkr-ai
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.0a2
|
4
4
|
Summary: The official Python library for the chunkr API
|
5
5
|
Project-URL: Homepage, https://github.com/lumina-ai-inc/chunkr-python
|
6
6
|
Project-URL: Repository, https://github.com/lumina-ai-inc/chunkr-python
|
@@ -70,7 +70,7 @@ client = Chunkr(
|
|
70
70
|
api_key=os.environ.get("CHUNKR_API_KEY"), # This is the default and can be omitted
|
71
71
|
)
|
72
72
|
|
73
|
-
task = client.task.parse(
|
73
|
+
task = client.task.parse.create(
|
74
74
|
file="string",
|
75
75
|
)
|
76
76
|
print(task.task_id)
|
@@ -96,7 +96,7 @@ client = AsyncChunkr(
|
|
96
96
|
|
97
97
|
|
98
98
|
async def main() -> None:
|
99
|
-
task = await client.task.parse(
|
99
|
+
task = await client.task.parse.create(
|
100
100
|
file="string",
|
101
101
|
)
|
102
102
|
print(task.task_id)
|
@@ -131,7 +131,7 @@ async def main() -> None:
|
|
131
131
|
api_key="My API Key",
|
132
132
|
http_client=DefaultAioHttpClient(),
|
133
133
|
) as client:
|
134
|
-
task = await client.task.parse(
|
134
|
+
task = await client.task.parse.create(
|
135
135
|
file="string",
|
136
136
|
)
|
137
137
|
print(task.task_id)
|
@@ -229,7 +229,7 @@ from chunkr_ai import Chunkr
|
|
229
229
|
|
230
230
|
client = Chunkr()
|
231
231
|
|
232
|
-
task = client.task.parse(
|
232
|
+
task = client.task.parse.create(
|
233
233
|
file="file",
|
234
234
|
chunk_processing={},
|
235
235
|
)
|
@@ -252,7 +252,7 @@ from chunkr_ai import Chunkr
|
|
252
252
|
client = Chunkr()
|
253
253
|
|
254
254
|
try:
|
255
|
-
client.task.parse(
|
255
|
+
client.task.parse.create(
|
256
256
|
file="base64 or url",
|
257
257
|
)
|
258
258
|
except chunkr_ai.APIConnectionError as e:
|
@@ -297,7 +297,7 @@ client = Chunkr(
|
|
297
297
|
)
|
298
298
|
|
299
299
|
# Or, configure per-request:
|
300
|
-
client.with_options(max_retries=5).task.parse(
|
300
|
+
client.with_options(max_retries=5).task.parse.create(
|
301
301
|
file="base64 or url",
|
302
302
|
)
|
303
303
|
```
|
@@ -322,7 +322,7 @@ client = Chunkr(
|
|
322
322
|
)
|
323
323
|
|
324
324
|
# Override per-request:
|
325
|
-
client.with_options(timeout=5.0).task.parse(
|
325
|
+
client.with_options(timeout=5.0).task.parse.create(
|
326
326
|
file="base64 or url",
|
327
327
|
)
|
328
328
|
```
|
@@ -365,13 +365,13 @@ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to
|
|
365
365
|
from chunkr_ai import Chunkr
|
366
366
|
|
367
367
|
client = Chunkr()
|
368
|
-
response = client.task.with_raw_response.
|
368
|
+
response = client.task.parse.with_raw_response.create(
|
369
369
|
file="base64 or url",
|
370
370
|
)
|
371
371
|
print(response.headers.get('X-My-Header'))
|
372
372
|
|
373
|
-
|
374
|
-
print(
|
373
|
+
parse = response.parse() # get the object that `task.parse.create()` would have returned
|
374
|
+
print(parse.task_id)
|
375
375
|
```
|
376
376
|
|
377
377
|
These methods return an [`APIResponse`](https://github.com/lumina-ai-inc/chunkr-python/tree/main/src/chunkr_ai/_response.py) object.
|
@@ -385,7 +385,7 @@ The above interface eagerly reads the full response body when you make the reque
|
|
385
385
|
To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
|
386
386
|
|
387
387
|
```python
|
388
|
-
with client.task.with_streaming_response.
|
388
|
+
with client.task.parse.with_streaming_response.create(
|
389
389
|
file="base64 or url",
|
390
390
|
) as response:
|
391
391
|
print(response.headers.get("X-My-Header"))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
chunkr_ai/__init__.py,sha256=scS30uHiCpLbaalKTAJSCFSTqnu_b9R5JCkTu2hmbzU,2587
|
2
2
|
chunkr_ai/_base_client.py,sha256=Nv5b_rmVdmmPbF42mlOfymbSC6lxcYsrsvBhKSBDXWQ,67038
|
3
|
-
chunkr_ai/_client.py,sha256=
|
3
|
+
chunkr_ai/_client.py,sha256=6Dmn7QJXjRXrP9TbOZUhTylnN9adREdxLmobyHrhnbo,15362
|
4
4
|
chunkr_ai/_compat.py,sha256=VWemUKbj6DDkQ-O4baSpHVLJafotzeXmCQGJugfVTIw,6580
|
5
5
|
chunkr_ai/_constants.py,sha256=S14PFzyN9-I31wiV7SmIlL5Ga0MLHxdvegInGdXH7tM,462
|
6
6
|
chunkr_ai/_exceptions.py,sha256=ClgXUcwf4qhBTXnK4LzUPQCFdFldRxAlcYdOFFgpTxA,3220
|
@@ -11,7 +11,7 @@ chunkr_ai/_resource.py,sha256=f5tiwjxcKdbeMor8idoHtMFTUhqD9yc2xXtq5rqeLLk,1100
|
|
11
11
|
chunkr_ai/_response.py,sha256=xXNpF53hiYARmAW7npKuxQ5UHAEjgAzm7ME_L3eIstY,28800
|
12
12
|
chunkr_ai/_streaming.py,sha256=ZmyrVWk7-AWkLAATR55WgNxnyFzYmaqJt2LthA_PTqQ,10100
|
13
13
|
chunkr_ai/_types.py,sha256=dnzU2Q2tLcuk29QFEcnPC1wp0-4XB4Cpef_3AnRhV5Y,6200
|
14
|
-
chunkr_ai/_version.py,sha256=
|
14
|
+
chunkr_ai/_version.py,sha256=aZMksQV_irE9ZNKGRmM5QMcr3SxXmViodBDpjr3crag,169
|
15
15
|
chunkr_ai/pagination.py,sha256=mKx7wg1MEeJT-stWQ60VUHotL6Y3QdDmTr1fjG9scP4,1924
|
16
16
|
chunkr_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
chunkr_ai/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
|
@@ -27,32 +27,18 @@ chunkr_ai/_utils/_utils.py,sha256=ts4CiiuNpFiGB6YMdkQRh2SZvYvsl7mAF-JWHCcLDf4,12
|
|
27
27
|
chunkr_ai/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
|
28
28
|
chunkr_ai/resources/__init__.py,sha256=zhuIEAnBOs0bLyNTeWngJbEFhlevoTx0NzTZMlX_cs8,976
|
29
29
|
chunkr_ai/resources/health.py,sha256=XTvUtRs5hEK-uccb_40mcIex85eEUo1a171nQUjpSOs,4965
|
30
|
-
chunkr_ai/resources/task.py,sha256=
|
31
|
-
chunkr_ai/
|
32
|
-
chunkr_ai/
|
33
|
-
chunkr_ai/types/
|
34
|
-
chunkr_ai/types/bounding_box.py,sha256=JDZlhJJl4lg6RYGf8VpC46soQfQ10-K8YwHHA6XBFkM,431
|
35
|
-
chunkr_ai/types/chunk_processing.py,sha256=KWnebuSLIwSWPaHSmAGUPZAMrkbDKA6RYDq9TwrQZJk,1217
|
36
|
-
chunkr_ai/types/chunk_processing_param.py,sha256=7Yq6ZpuMNnyqc08GLow4lsGA_vpI7S5tWMac5T4Hyak,1294
|
30
|
+
chunkr_ai/resources/task/__init__.py,sha256=TrjBrfPuVqlcYULga_NosiVxZETfe8dKdUJHjIDJ5zE,963
|
31
|
+
chunkr_ai/resources/task/parse.py,sha256=Sv4aIP6LEOPg_4lTEZBKpSxQrhRSN0daBKubLMg1dV0,28057
|
32
|
+
chunkr_ai/resources/task/task.py,sha256=Rn2zfndPYOtFa5LacvGcNURJkEa7TCjgOtavKWmHO04,24172
|
33
|
+
chunkr_ai/types/__init__.py,sha256=uDFPzVZGkt8tLxw7DID6EVKQQnemw_j4qZnzpK7WZng,355
|
37
34
|
chunkr_ai/types/health_check_response.py,sha256=6Zn5YYHCQf2RgMjDlf39mtiTPqfaBfC9Vv599U_rKCI,200
|
38
|
-
chunkr_ai/types/ignore_generation_config.py,sha256=lU2qjsombMTlsQHGtvs2G-FaNBL1mHmF8BThYMd1UYk,1341
|
39
|
-
chunkr_ai/types/ignore_generation_config_param.py,sha256=-gdBQy4HffH7GQwEXMvSfeHC5fgiMry3jHVEwrLUPrM,1255
|
40
|
-
chunkr_ai/types/llm_generation_config.py,sha256=ifPhOvYrC968WfaI3z0qwQyUewyWdtEDUnuWEhNJN7o,1335
|
41
|
-
chunkr_ai/types/llm_generation_config_param.py,sha256=s3iWDYtQ04FWubWWl22cIFTNiwhLzluAyt6zmNcNCd4,1249
|
42
|
-
chunkr_ai/types/llm_processing.py,sha256=f6w52vkvQKHu1HxWD0r9xZ9BlufMcRiY47hBVpeFPTw,1132
|
43
|
-
chunkr_ai/types/llm_processing_param.py,sha256=GlvSYRc-_1ec1TgZiybY7G5unzdDpIJiKcs7Ou4cbPo,1131
|
44
|
-
chunkr_ai/types/picture_generation_config.py,sha256=-W4SkGwuK6DmMoA_CbgHz-dfYKq2bF1vlZUcWiTFKsc,1343
|
45
|
-
chunkr_ai/types/picture_generation_config_param.py,sha256=G5czQb5jbuYajBPojYah8_QFl7Hw0gXNojxtwixy8Ao,1257
|
46
|
-
chunkr_ai/types/segment_processing.py,sha256=3K50PMRUeTLZ7rUzLXsQyqFga0lYjGAer7xotHFFZn8,14132
|
47
|
-
chunkr_ai/types/segment_processing_param.py,sha256=AV9PRcdXRVcrjMXNNhTxnJflCFsib_shmb0MSYHgrjs,14306
|
48
|
-
chunkr_ai/types/table_generation_config.py,sha256=TQqAKji9Bf7N8UBN9nM0cyEp3g3e7tbN-Ehjr2uHTVE,1339
|
49
|
-
chunkr_ai/types/table_generation_config_param.py,sha256=KLgSwuA2bB_ASg8vIhsBJTkMDPqBZJQcwl75u7NaQm8,1253
|
50
|
-
chunkr_ai/types/task.py,sha256=CqrrvA_wPGALVxLQXN4m6cXAavXh1DoLnLljPVQjef4,11992
|
51
35
|
chunkr_ai/types/task_get_params.py,sha256=Nx2luhebcoaiuRln4KP4FarWvBPd1OYi__efi56zHPM,460
|
52
36
|
chunkr_ai/types/task_list_params.py,sha256=fCku42QW6QUsLmZgKJBaxisGvUcmcQ5fa6LgHHRIwiQ,1043
|
53
|
-
chunkr_ai/types/
|
54
|
-
chunkr_ai/types/
|
55
|
-
chunkr_ai
|
56
|
-
chunkr_ai
|
57
|
-
chunkr_ai-0.1.
|
58
|
-
chunkr_ai-0.1.
|
37
|
+
chunkr_ai/types/task/__init__.py,sha256=dglHZXlnA7NvA-Bz5O2aLS7Ug4NP6KuQDeYlQuEx5S8,298
|
38
|
+
chunkr_ai/types/task/parse_create_params.py,sha256=YT4_p3k5eamPOud_e8BP3EailaDrTuGrG17gwVyHvr8,31794
|
39
|
+
chunkr_ai/types/task/parse_update_params.py,sha256=GpIpT74YY44J_RoLksyAAWmSG39MlTxq2YR3WrdHrOc,31749
|
40
|
+
chunkr_ai/types/task/task.py,sha256=p8HWN1usu35R-KS1sk24BZlS2pwsR7HBZWUkGUAEoXg,44340
|
41
|
+
chunkr_ai-0.1.0a2.dist-info/METADATA,sha256=M88Syly-5DGBErrj2n0By9XYLY6wb6q4QtN2ff1IAC8,15854
|
42
|
+
chunkr_ai-0.1.0a2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
43
|
+
chunkr_ai-0.1.0a2.dist-info/licenses/LICENSE,sha256=3FDRL-L-DFkrFy8yJpb1Nxhuztm0PB2kawcCgK5utFg,11336
|
44
|
+
chunkr_ai-0.1.0a2.dist-info/RECORD,,
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from typing import List, Optional
|
4
|
-
from typing_extensions import Literal
|
5
|
-
|
6
|
-
from .._models import BaseModel
|
7
|
-
|
8
|
-
__all__ = ["AutoGenerationConfig"]
|
9
|
-
|
10
|
-
|
11
|
-
class AutoGenerationConfig(BaseModel):
|
12
|
-
crop_image: Optional[Literal["All", "Auto"]] = None
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: Optional[bool] = None
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: Optional[bool] = None
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Optional[Literal["Html", "Markdown"]] = None
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str] = None
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import List, Optional
|
6
|
-
from typing_extensions import Literal, TypedDict
|
7
|
-
|
8
|
-
__all__ = ["AutoGenerationConfigParam"]
|
9
|
-
|
10
|
-
|
11
|
-
class AutoGenerationConfigParam(TypedDict, total=False):
|
12
|
-
crop_image: Literal["All", "Auto"]
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: bool
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: bool
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Literal["Html", "Markdown"]
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]]
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str]
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Literal["LLM", "Auto", "Ignore"]
|
chunkr_ai/types/bounding_box.py
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from .._models import BaseModel
|
4
|
-
|
5
|
-
__all__ = ["BoundingBox"]
|
6
|
-
|
7
|
-
|
8
|
-
class BoundingBox(BaseModel):
|
9
|
-
height: float
|
10
|
-
"""The height of the bounding box."""
|
11
|
-
|
12
|
-
left: float
|
13
|
-
"""The left coordinate of the bounding box."""
|
14
|
-
|
15
|
-
top: float
|
16
|
-
"""The top coordinate of the bounding box."""
|
17
|
-
|
18
|
-
width: float
|
19
|
-
"""The width of the bounding box."""
|
@@ -1,40 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from typing import Union, Optional
|
4
|
-
from typing_extensions import Literal, TypeAlias
|
5
|
-
|
6
|
-
from pydantic import Field as FieldInfo
|
7
|
-
|
8
|
-
from .._models import BaseModel
|
9
|
-
|
10
|
-
__all__ = ["ChunkProcessing", "Tokenizer", "TokenizerEnum", "TokenizerString"]
|
11
|
-
|
12
|
-
|
13
|
-
class TokenizerEnum(BaseModel):
|
14
|
-
enum: Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"] = FieldInfo(alias="Enum")
|
15
|
-
"""Use one of the predefined tokenizer types"""
|
16
|
-
|
17
|
-
|
18
|
-
class TokenizerString(BaseModel):
|
19
|
-
string: str = FieldInfo(alias="String")
|
20
|
-
"""
|
21
|
-
Use any Hugging Face tokenizer by specifying its model ID Examples:
|
22
|
-
"Qwen/Qwen-tokenizer", "facebook/bart-large"
|
23
|
-
"""
|
24
|
-
|
25
|
-
|
26
|
-
Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
|
27
|
-
|
28
|
-
|
29
|
-
class ChunkProcessing(BaseModel):
|
30
|
-
ignore_headers_and_footers: Optional[bool] = None
|
31
|
-
"""DEPRECATED: use `segment_processing.ignore` This value will not be used"""
|
32
|
-
|
33
|
-
target_length: Optional[int] = None
|
34
|
-
"""The target number of words in each chunk.
|
35
|
-
|
36
|
-
If 0, each chunk will contain a single segment.
|
37
|
-
"""
|
38
|
-
|
39
|
-
tokenizer: Optional[Tokenizer] = None
|
40
|
-
"""The tokenizer to use for the chunking process."""
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import Union
|
6
|
-
from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
|
7
|
-
|
8
|
-
from .._utils import PropertyInfo
|
9
|
-
|
10
|
-
__all__ = ["ChunkProcessingParam", "Tokenizer", "TokenizerEnum", "TokenizerString"]
|
11
|
-
|
12
|
-
|
13
|
-
class TokenizerEnum(TypedDict, total=False):
|
14
|
-
enum: Required[
|
15
|
-
Annotated[Literal["Word", "Cl100kBase", "XlmRobertaBase", "BertBaseUncased"], PropertyInfo(alias="Enum")]
|
16
|
-
]
|
17
|
-
"""Use one of the predefined tokenizer types"""
|
18
|
-
|
19
|
-
|
20
|
-
class TokenizerString(TypedDict, total=False):
|
21
|
-
string: Required[Annotated[str, PropertyInfo(alias="String")]]
|
22
|
-
"""
|
23
|
-
Use any Hugging Face tokenizer by specifying its model ID Examples:
|
24
|
-
"Qwen/Qwen-tokenizer", "facebook/bart-large"
|
25
|
-
"""
|
26
|
-
|
27
|
-
|
28
|
-
Tokenizer: TypeAlias = Union[TokenizerEnum, TokenizerString]
|
29
|
-
|
30
|
-
|
31
|
-
class ChunkProcessingParam(TypedDict, total=False):
|
32
|
-
ignore_headers_and_footers: bool
|
33
|
-
"""DEPRECATED: use `segment_processing.ignore` This value will not be used"""
|
34
|
-
|
35
|
-
target_length: int
|
36
|
-
"""The target number of words in each chunk.
|
37
|
-
|
38
|
-
If 0, each chunk will contain a single segment.
|
39
|
-
"""
|
40
|
-
|
41
|
-
tokenizer: Tokenizer
|
42
|
-
"""The tokenizer to use for the chunking process."""
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from typing import List, Optional
|
4
|
-
from typing_extensions import Literal
|
5
|
-
|
6
|
-
from .._models import BaseModel
|
7
|
-
|
8
|
-
__all__ = ["IgnoreGenerationConfig"]
|
9
|
-
|
10
|
-
|
11
|
-
class IgnoreGenerationConfig(BaseModel):
|
12
|
-
crop_image: Optional[Literal["All", "Auto"]] = None
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: Optional[bool] = None
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: Optional[bool] = None
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Optional[Literal["Html", "Markdown"]] = None
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str] = None
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import List, Optional
|
6
|
-
from typing_extensions import Literal, TypedDict
|
7
|
-
|
8
|
-
__all__ = ["IgnoreGenerationConfigParam"]
|
9
|
-
|
10
|
-
|
11
|
-
class IgnoreGenerationConfigParam(TypedDict, total=False):
|
12
|
-
crop_image: Literal["All", "Auto"]
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: bool
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: bool
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Literal["Html", "Markdown"]
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]]
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str]
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Literal["LLM", "Auto", "Ignore"]
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from typing import List, Optional
|
4
|
-
from typing_extensions import Literal
|
5
|
-
|
6
|
-
from .._models import BaseModel
|
7
|
-
|
8
|
-
__all__ = ["LlmGenerationConfig"]
|
9
|
-
|
10
|
-
|
11
|
-
class LlmGenerationConfig(BaseModel):
|
12
|
-
crop_image: Optional[Literal["All", "Auto"]] = None
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: Optional[bool] = None
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: Optional[bool] = None
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Optional[Literal["Html", "Markdown"]] = None
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str] = None
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import List, Optional
|
6
|
-
from typing_extensions import Literal, TypedDict
|
7
|
-
|
8
|
-
__all__ = ["LlmGenerationConfigParam"]
|
9
|
-
|
10
|
-
|
11
|
-
class LlmGenerationConfigParam(TypedDict, total=False):
|
12
|
-
crop_image: Literal["All", "Auto"]
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: bool
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: bool
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Literal["Html", "Markdown"]
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]]
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str]
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Literal["LLM", "Auto", "Ignore"]
|
@@ -1,36 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from typing import Union, Optional
|
4
|
-
from typing_extensions import Literal, TypeAlias
|
5
|
-
|
6
|
-
from pydantic import Field as FieldInfo
|
7
|
-
|
8
|
-
from .._models import BaseModel
|
9
|
-
|
10
|
-
__all__ = ["LlmProcessing", "FallbackStrategy", "FallbackStrategyModel"]
|
11
|
-
|
12
|
-
|
13
|
-
class FallbackStrategyModel(BaseModel):
|
14
|
-
model: str = FieldInfo(alias="Model")
|
15
|
-
"""Use a specific model as fallback"""
|
16
|
-
|
17
|
-
|
18
|
-
FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
|
19
|
-
|
20
|
-
|
21
|
-
class LlmProcessing(BaseModel):
|
22
|
-
fallback_strategy: Optional[FallbackStrategy] = None
|
23
|
-
"""The fallback strategy to use for the LLMs in the task."""
|
24
|
-
|
25
|
-
max_completion_tokens: Optional[int] = None
|
26
|
-
"""The maximum number of tokens to generate."""
|
27
|
-
|
28
|
-
api_model_id: Optional[str] = FieldInfo(alias="model_id", default=None)
|
29
|
-
"""The ID of the model to use for the task.
|
30
|
-
|
31
|
-
If not provided, the default model will be used. Please check the documentation
|
32
|
-
for the model you want to use.
|
33
|
-
"""
|
34
|
-
|
35
|
-
temperature: Optional[float] = None
|
36
|
-
"""The temperature to use for the LLM."""
|
@@ -1,36 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import Union, Optional
|
6
|
-
from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
|
7
|
-
|
8
|
-
from .._utils import PropertyInfo
|
9
|
-
|
10
|
-
__all__ = ["LlmProcessingParam", "FallbackStrategy", "FallbackStrategyModel"]
|
11
|
-
|
12
|
-
|
13
|
-
class FallbackStrategyModel(TypedDict, total=False):
|
14
|
-
model: Required[Annotated[str, PropertyInfo(alias="Model")]]
|
15
|
-
"""Use a specific model as fallback"""
|
16
|
-
|
17
|
-
|
18
|
-
FallbackStrategy: TypeAlias = Union[Literal["None", "Default"], FallbackStrategyModel]
|
19
|
-
|
20
|
-
|
21
|
-
class LlmProcessingParam(TypedDict, total=False):
|
22
|
-
fallback_strategy: FallbackStrategy
|
23
|
-
"""The fallback strategy to use for the LLMs in the task."""
|
24
|
-
|
25
|
-
max_completion_tokens: Optional[int]
|
26
|
-
"""The maximum number of tokens to generate."""
|
27
|
-
|
28
|
-
model_id: Optional[str]
|
29
|
-
"""The ID of the model to use for the task.
|
30
|
-
|
31
|
-
If not provided, the default model will be used. Please check the documentation
|
32
|
-
for the model you want to use.
|
33
|
-
"""
|
34
|
-
|
35
|
-
temperature: float
|
36
|
-
"""The temperature to use for the LLM."""
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from typing import List, Optional
|
4
|
-
from typing_extensions import Literal
|
5
|
-
|
6
|
-
from .._models import BaseModel
|
7
|
-
|
8
|
-
__all__ = ["PictureGenerationConfig"]
|
9
|
-
|
10
|
-
|
11
|
-
class PictureGenerationConfig(BaseModel):
|
12
|
-
crop_image: Optional[Literal["All", "Auto"]] = None
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: Optional[bool] = None
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: Optional[List[Literal["Content", "HTML", "Markdown", "LLM"]]] = None
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: Optional[bool] = None
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Optional[Literal["Html", "Markdown"]] = None
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str] = None
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Optional[Literal["LLM", "Auto", "Ignore"]] = None
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import List, Optional
|
6
|
-
from typing_extensions import Literal, TypedDict
|
7
|
-
|
8
|
-
__all__ = ["PictureGenerationConfigParam"]
|
9
|
-
|
10
|
-
|
11
|
-
class PictureGenerationConfigParam(TypedDict, total=False):
|
12
|
-
crop_image: Literal["All", "Auto"]
|
13
|
-
"""Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
|
14
|
-
|
15
|
-
- `All` crops all images in the item
|
16
|
-
- `Auto` crops images only if required for post-processing
|
17
|
-
"""
|
18
|
-
|
19
|
-
description: bool
|
20
|
-
"""Generate LLM descriptions for this segment"""
|
21
|
-
|
22
|
-
embed_sources: List[Literal["Content", "HTML", "Markdown", "LLM"]]
|
23
|
-
"""**DEPRECATED**: `embed` field is auto populated"""
|
24
|
-
|
25
|
-
extended_context: bool
|
26
|
-
"""Use the full page image as context for LLM generation"""
|
27
|
-
|
28
|
-
format: Literal["Html", "Markdown"]
|
29
|
-
|
30
|
-
html: Optional[Literal["LLM", "Auto", "Ignore"]]
|
31
|
-
"""**DEPRECATED**: Use `format: html` and `strategy` instead."""
|
32
|
-
|
33
|
-
llm: Optional[str]
|
34
|
-
"""**DEPRECATED**: use description instead"""
|
35
|
-
|
36
|
-
markdown: Optional[Literal["LLM", "Auto", "Ignore"]]
|
37
|
-
"""**DEPRECATED**: Use `format: markdown` and `strategy` instead."""
|
38
|
-
|
39
|
-
strategy: Literal["LLM", "Auto", "Ignore"]
|