llama-cloud 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (55) hide show
  1. llama_cloud/__init__.py +36 -18
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +20 -0
  4. llama_cloud/resources/beta/__init__.py +2 -0
  5. llama_cloud/resources/beta/client.py +371 -0
  6. llama_cloud/resources/chat_apps/client.py +4 -4
  7. llama_cloud/resources/embedding_model_configs/client.py +82 -22
  8. llama_cloud/resources/llama_extract/__init__.py +21 -0
  9. llama_cloud/resources/llama_extract/client.py +223 -114
  10. llama_cloud/resources/llama_extract/types/__init__.py +21 -0
  11. llama_cloud/resources/parsing/client.py +83 -29
  12. llama_cloud/resources/pipelines/client.py +107 -2
  13. llama_cloud/resources/projects/client.py +70 -0
  14. llama_cloud/types/__init__.py +26 -26
  15. llama_cloud/types/{parsing_usage.py → audio_block.py} +5 -3
  16. llama_cloud/types/batch.py +47 -0
  17. llama_cloud/types/batch_item.py +40 -0
  18. llama_cloud/types/{extract_agent_update.py → batch_paginated_list.py} +6 -9
  19. llama_cloud/types/{extract_schema_validate_request.py → batch_public_output.py} +7 -3
  20. llama_cloud/types/cloud_confluence_data_source.py +1 -0
  21. llama_cloud/types/cloud_postgres_vector_store.py +2 -0
  22. llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
  23. llama_cloud/types/extract_config.py +2 -0
  24. llama_cloud/types/fail_page_mode.py +29 -0
  25. llama_cloud/types/{extract_agent_create.py → file_count_by_status_response.py} +8 -10
  26. llama_cloud/types/file_parse_public.py +36 -0
  27. llama_cloud/types/job_names.py +8 -12
  28. llama_cloud/types/llama_extract_settings.py +2 -2
  29. llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
  30. llama_cloud/types/llama_parse_parameters.py +10 -2
  31. llama_cloud/types/markdown_node_parser.py +4 -0
  32. llama_cloud/types/message_role.py +4 -0
  33. llama_cloud/types/pg_vector_distance_method.py +43 -0
  34. llama_cloud/types/{extract_job_create_batch.py → pg_vector_hnsw_settings.py} +12 -9
  35. llama_cloud/types/pg_vector_vector_type.py +35 -0
  36. llama_cloud/types/pipeline_create.py +1 -0
  37. llama_cloud/types/pipeline_data_source.py +3 -0
  38. llama_cloud/types/pipeline_data_source_status.py +33 -0
  39. llama_cloud/types/pipeline_file.py +1 -0
  40. llama_cloud/types/prompt_conf.py +3 -0
  41. llama_cloud/types/struct_parse_conf.py +4 -1
  42. llama_cloud/types/supported_llm_model_names.py +0 -12
  43. llama_cloud/types/token_text_splitter.py +3 -0
  44. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/METADATA +1 -1
  45. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/RECORD +55 -45
  46. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
  47. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
  48. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
  49. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
  50. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
  51. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
  52. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
  53. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
  54. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/LICENSE +0 -0
  55. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/WHEEL +0 -0
@@ -16,6 +16,7 @@ from .advanced_mode_transform_config_segmentation_config import (
16
16
  AdvancedModeTransformConfigSegmentationConfig_Page,
17
17
  )
18
18
  from .app_schema_chat_chat_message import AppSchemaChatChatMessage
19
+ from .audio_block import AudioBlock
19
20
  from .auto_transform_config import AutoTransformConfig
20
21
  from .azure_open_ai_embedding import AzureOpenAiEmbedding
21
22
  from .azure_open_ai_embedding_config import AzureOpenAiEmbeddingConfig
@@ -24,6 +25,10 @@ from .base_plan_metronome_plan_type import BasePlanMetronomePlanType
24
25
  from .base_plan_name import BasePlanName
25
26
  from .base_plan_plan_frequency import BasePlanPlanFrequency
26
27
  from .base_prompt_template import BasePromptTemplate
28
+ from .batch import Batch
29
+ from .batch_item import BatchItem
30
+ from .batch_paginated_list import BatchPaginatedList
31
+ from .batch_public_output import BatchPublicOutput
27
32
  from .bedrock_embedding import BedrockEmbedding
28
33
  from .bedrock_embedding_config import BedrockEmbeddingConfig
29
34
  from .billing_period import BillingPeriod
@@ -105,19 +110,10 @@ from .embedding_model_config_update_embedding_config import (
105
110
  )
106
111
  from .eval_execution_params import EvalExecutionParams
107
112
  from .extract_agent import ExtractAgent
108
- from .extract_agent_create import ExtractAgentCreate
109
- from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
110
- from .extract_agent_create_data_schema_zero_value import ExtractAgentCreateDataSchemaZeroValue
111
113
  from .extract_agent_data_schema_value import ExtractAgentDataSchemaValue
112
- from .extract_agent_update import ExtractAgentUpdate
113
- from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
114
- from .extract_agent_update_data_schema_zero_value import ExtractAgentUpdateDataSchemaZeroValue
115
114
  from .extract_config import ExtractConfig
116
115
  from .extract_job import ExtractJob
117
116
  from .extract_job_create import ExtractJobCreate
118
- from .extract_job_create_batch import ExtractJobCreateBatch
119
- from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
120
- from .extract_job_create_batch_data_schema_override_zero_value import ExtractJobCreateBatchDataSchemaOverrideZeroValue
121
117
  from .extract_job_create_data_schema_override import ExtractJobCreateDataSchemaOverride
122
118
  from .extract_job_create_data_schema_override_zero_value import ExtractJobCreateDataSchemaOverrideZeroValue
123
119
  from .extract_mode import ExtractMode
@@ -132,14 +128,14 @@ from .extract_run_data_item_value import ExtractRunDataItemValue
132
128
  from .extract_run_data_schema_value import ExtractRunDataSchemaValue
133
129
  from .extract_run_data_zero_value import ExtractRunDataZeroValue
134
130
  from .extract_run_extraction_metadata_value import ExtractRunExtractionMetadataValue
135
- from .extract_schema_validate_request import ExtractSchemaValidateRequest
136
- from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
137
- from .extract_schema_validate_request_data_schema_zero_value import ExtractSchemaValidateRequestDataSchemaZeroValue
138
131
  from .extract_schema_validate_response import ExtractSchemaValidateResponse
139
132
  from .extract_schema_validate_response_data_schema_value import ExtractSchemaValidateResponseDataSchemaValue
140
133
  from .extract_state import ExtractState
141
134
  from .extract_target import ExtractTarget
135
+ from .fail_page_mode import FailPageMode
142
136
  from .file import File
137
+ from .file_count_by_status_response import FileCountByStatusResponse
138
+ from .file_parse_public import FileParsePublic
143
139
  from .file_permission_info_value import FilePermissionInfoValue
144
140
  from .file_resource_info_value import FileResourceInfoValue
145
141
  from .filter_condition import FilterCondition
@@ -162,6 +158,7 @@ from .llama_extract_settings import LlamaExtractSettings
162
158
  from .llama_index_core_base_llms_types_chat_message import LlamaIndexCoreBaseLlmsTypesChatMessage
163
159
  from .llama_index_core_base_llms_types_chat_message_blocks_item import (
164
160
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem,
161
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio,
165
162
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image,
166
163
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text,
167
164
  )
@@ -208,9 +205,11 @@ from .parsing_job_markdown_result import ParsingJobMarkdownResult
208
205
  from .parsing_job_structured_result import ParsingJobStructuredResult
209
206
  from .parsing_job_text_result import ParsingJobTextResult
210
207
  from .parsing_mode import ParsingMode
211
- from .parsing_usage import ParsingUsage
212
208
  from .partition_names import PartitionNames
213
209
  from .permission import Permission
210
+ from .pg_vector_distance_method import PgVectorDistanceMethod
211
+ from .pg_vector_hnsw_settings import PgVectorHnswSettings
212
+ from .pg_vector_vector_type import PgVectorVectorType
214
213
  from .pipeline import Pipeline
215
214
  from .pipeline_configuration_hashes import PipelineConfigurationHashes
216
215
  from .pipeline_create import PipelineCreate
@@ -229,6 +228,7 @@ from .pipeline_data_source import PipelineDataSource
229
228
  from .pipeline_data_source_component import PipelineDataSourceComponent
230
229
  from .pipeline_data_source_create import PipelineDataSourceCreate
231
230
  from .pipeline_data_source_custom_metadata_value import PipelineDataSourceCustomMetadataValue
231
+ from .pipeline_data_source_status import PipelineDataSourceStatus
232
232
  from .pipeline_deployment import PipelineDeployment
233
233
  from .pipeline_embedding_config import (
234
234
  PipelineEmbeddingConfig,
@@ -341,6 +341,7 @@ __all__ = [
341
341
  "AdvancedModeTransformConfigSegmentationConfig_None",
342
342
  "AdvancedModeTransformConfigSegmentationConfig_Page",
343
343
  "AppSchemaChatChatMessage",
344
+ "AudioBlock",
344
345
  "AutoTransformConfig",
345
346
  "AzureOpenAiEmbedding",
346
347
  "AzureOpenAiEmbeddingConfig",
@@ -349,6 +350,10 @@ __all__ = [
349
350
  "BasePlanName",
350
351
  "BasePlanPlanFrequency",
351
352
  "BasePromptTemplate",
353
+ "Batch",
354
+ "BatchItem",
355
+ "BatchPaginatedList",
356
+ "BatchPublicOutput",
352
357
  "BedrockEmbedding",
353
358
  "BedrockEmbeddingConfig",
354
359
  "BillingPeriod",
@@ -426,19 +431,10 @@ __all__ = [
426
431
  "EmbeddingModelConfigUpdateEmbeddingConfig_VertexaiEmbedding",
427
432
  "EvalExecutionParams",
428
433
  "ExtractAgent",
429
- "ExtractAgentCreate",
430
- "ExtractAgentCreateDataSchema",
431
- "ExtractAgentCreateDataSchemaZeroValue",
432
434
  "ExtractAgentDataSchemaValue",
433
- "ExtractAgentUpdate",
434
- "ExtractAgentUpdateDataSchema",
435
- "ExtractAgentUpdateDataSchemaZeroValue",
436
435
  "ExtractConfig",
437
436
  "ExtractJob",
438
437
  "ExtractJobCreate",
439
- "ExtractJobCreateBatch",
440
- "ExtractJobCreateBatchDataSchemaOverride",
441
- "ExtractJobCreateBatchDataSchemaOverrideZeroValue",
442
438
  "ExtractJobCreateDataSchemaOverride",
443
439
  "ExtractJobCreateDataSchemaOverrideZeroValue",
444
440
  "ExtractMode",
@@ -453,14 +449,14 @@ __all__ = [
453
449
  "ExtractRunDataSchemaValue",
454
450
  "ExtractRunDataZeroValue",
455
451
  "ExtractRunExtractionMetadataValue",
456
- "ExtractSchemaValidateRequest",
457
- "ExtractSchemaValidateRequestDataSchema",
458
- "ExtractSchemaValidateRequestDataSchemaZeroValue",
459
452
  "ExtractSchemaValidateResponse",
460
453
  "ExtractSchemaValidateResponseDataSchemaValue",
461
454
  "ExtractState",
462
455
  "ExtractTarget",
456
+ "FailPageMode",
463
457
  "File",
458
+ "FileCountByStatusResponse",
459
+ "FileParsePublic",
464
460
  "FilePermissionInfoValue",
465
461
  "FileResourceInfoValue",
466
462
  "FilterCondition",
@@ -482,6 +478,7 @@ __all__ = [
482
478
  "LlamaExtractSettings",
483
479
  "LlamaIndexCoreBaseLlmsTypesChatMessage",
484
480
  "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem",
481
+ "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio",
485
482
  "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image",
486
483
  "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text",
487
484
  "LlamaParseParameters",
@@ -527,9 +524,11 @@ __all__ = [
527
524
  "ParsingJobStructuredResult",
528
525
  "ParsingJobTextResult",
529
526
  "ParsingMode",
530
- "ParsingUsage",
531
527
  "PartitionNames",
532
528
  "Permission",
529
+ "PgVectorDistanceMethod",
530
+ "PgVectorHnswSettings",
531
+ "PgVectorVectorType",
533
532
  "Pipeline",
534
533
  "PipelineConfigurationHashes",
535
534
  "PipelineCreate",
@@ -546,6 +545,7 @@ __all__ = [
546
545
  "PipelineDataSourceComponent",
547
546
  "PipelineDataSourceCreate",
548
547
  "PipelineDataSourceCustomMetadataValue",
548
+ "PipelineDataSourceStatus",
549
549
  "PipelineDeployment",
550
550
  "PipelineEmbeddingConfig",
551
551
  "PipelineEmbeddingConfig_AzureEmbedding",
@@ -14,9 +14,11 @@ except ImportError:
14
14
  import pydantic # type: ignore
15
15
 
16
16
 
17
- class ParsingUsage(pydantic.BaseModel):
18
- usage_pdf_pages: int
19
- max_pdf_pages: typing.Optional[int]
17
+ class AudioBlock(pydantic.BaseModel):
18
+ audio: typing.Optional[str]
19
+ path: typing.Optional[str]
20
+ url: typing.Optional[str]
21
+ format: typing.Optional[str]
20
22
 
21
23
  def json(self, **kwargs: typing.Any) -> str:
22
24
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,47 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .llama_parse_parameters import LlamaParseParameters
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class Batch(pydantic.BaseModel):
19
+ tool: str = pydantic.Field(description="The tool to be used for all requests in the batch.")
20
+ tool_data: typing.Optional[LlamaParseParameters]
21
+ input_type: str = pydantic.Field(description="The type of input file. Currently only 'datasource' is supported.")
22
+ input_id: str = pydantic.Field(description="The ID of the input file for the batch.")
23
+ output_type: typing.Optional[str]
24
+ output_id: typing.Optional[str]
25
+ id: str = pydantic.Field(description="Unique identifier for the batch")
26
+ project_id: str = pydantic.Field(description="The ID of the project to which the batch belongs")
27
+ organization_id: str = pydantic.Field(description="The ID of the organization to which the batch belongs")
28
+ user_id: str = pydantic.Field(description="The ID of the user who created the batch")
29
+ external_id: typing.Optional[str]
30
+ completion_window: int = pydantic.Field(description="The time frame within which the batch should be processed")
31
+ pipeline_id: str = pydantic.Field(description="The ID of the pipeline to which the batch belongs")
32
+ status: str = pydantic.Field(description="The current status of the batch")
33
+ created_at: typing.Optional[dt.datetime]
34
+ updated_at: typing.Optional[dt.datetime]
35
+
36
+ def json(self, **kwargs: typing.Any) -> str:
37
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
38
+ return super().json(**kwargs_with_defaults)
39
+
40
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
41
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
42
+ return super().dict(**kwargs_with_defaults)
43
+
44
+ class Config:
45
+ frozen = True
46
+ smart_union = True
47
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,40 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .file_parse_public import FileParsePublic
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class BatchItem(pydantic.BaseModel):
19
+ id: str = pydantic.Field(description="Unique identifier for the batch item")
20
+ batch_id: str = pydantic.Field(description="The ID of the batch to which the item belongs")
21
+ status: str = pydantic.Field(description="The current status of the batch item")
22
+ status_updated_at: typing.Optional[dt.datetime]
23
+ created_at: typing.Optional[dt.datetime]
24
+ updated_at: typing.Optional[dt.datetime]
25
+ input_file: str = pydantic.Field(description="The input file associated with the batch item")
26
+ output_file: typing.Optional[str]
27
+ task: typing.Optional[FileParsePublic]
28
+
29
+ def json(self, **kwargs: typing.Any) -> str:
30
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
31
+ return super().json(**kwargs_with_defaults)
32
+
33
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
34
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
35
+ return super().dict(**kwargs_with_defaults)
36
+
37
+ class Config:
38
+ frozen = True
39
+ smart_union = True
40
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -4,8 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
8
- from .extract_config import ExtractConfig
7
+ from .batch import Batch
9
8
 
10
9
  try:
11
10
  import pydantic
@@ -16,13 +15,11 @@ except ImportError:
16
15
  import pydantic # type: ignore
17
16
 
18
17
 
19
- class ExtractAgentUpdate(pydantic.BaseModel):
20
- """
21
- Settings for updating an extraction schema.
22
- """
23
-
24
- data_schema: ExtractAgentUpdateDataSchema = pydantic.Field(description="The schema of the data")
25
- config: ExtractConfig = pydantic.Field(description="The configuration parameters for the extraction agent.")
18
+ class BatchPaginatedList(pydantic.BaseModel):
19
+ data: typing.List[Batch] = pydantic.Field(description="List of batches")
20
+ limit: int = pydantic.Field(description="Pagination limit")
21
+ offset: int = pydantic.Field(description="Pagination offset")
22
+ total_count: int = pydantic.Field(description="Total number of batches")
26
23
 
27
24
  def json(self, **kwargs: typing.Any) -> str:
28
25
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -4,7 +4,9 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
7
+ from .batch import Batch
8
+ from .batch_item import BatchItem
9
+ from .managed_ingestion_status_response import ManagedIngestionStatusResponse
8
10
 
9
11
  try:
10
12
  import pydantic
@@ -15,8 +17,10 @@ except ImportError:
15
17
  import pydantic # type: ignore
16
18
 
17
19
 
18
- class ExtractSchemaValidateRequest(pydantic.BaseModel):
19
- data_schema: ExtractSchemaValidateRequestDataSchema
20
+ class BatchPublicOutput(pydantic.BaseModel):
21
+ batch: Batch
22
+ batch_items: typing.List[BatchItem]
23
+ ingestion_status: ManagedIngestionStatusResponse
20
24
 
21
25
  def json(self, **kwargs: typing.Any) -> str:
22
26
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -27,6 +27,7 @@ class CloudConfluenceDataSource(pydantic.BaseModel):
27
27
  cql: typing.Optional[str]
28
28
  label: typing.Optional[str]
29
29
  index_restricted_pages: typing.Optional[bool] = pydantic.Field(description="Whether to index restricted pages.")
30
+ keep_markdown_format: typing.Optional[bool] = pydantic.Field(description="Whether to keep the markdown format.")
30
31
  class_name: typing.Optional[str]
31
32
 
32
33
  def json(self, **kwargs: typing.Any) -> str:
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .pg_vector_hnsw_settings import PgVectorHnswSettings
7
8
 
8
9
  try:
9
10
  import pydantic
@@ -26,6 +27,7 @@ class CloudPostgresVectorStore(pydantic.BaseModel):
26
27
  embed_dim: int
27
28
  hybrid_search: typing.Optional[bool]
28
29
  perform_setup: typing.Optional[bool]
30
+ hnsw_settings: typing.Optional[PgVectorHnswSettings]
29
31
  class_name: typing.Optional[str]
30
32
 
31
33
  def json(self, **kwargs: typing.Any) -> str:
@@ -25,6 +25,7 @@ class CloudSharepointDataSource(pydantic.BaseModel):
25
25
  client_secret: str = pydantic.Field(description="The client secret to use for authentication.")
26
26
  tenant_id: str = pydantic.Field(description="The tenant ID to use for authentication.")
27
27
  required_exts: typing.Optional[typing.List[str]]
28
+ get_permissions: typing.Optional[bool]
28
29
  class_name: typing.Optional[str]
29
30
 
30
31
  def json(self, **kwargs: typing.Any) -> str:
@@ -24,6 +24,8 @@ class ExtractConfig(pydantic.BaseModel):
24
24
  extraction_target: typing.Optional[ExtractTarget] = pydantic.Field(description="The extraction target specified.")
25
25
  extraction_mode: typing.Optional[ExtractMode] = pydantic.Field(description="The extraction mode specified.")
26
26
  system_prompt: typing.Optional[str]
27
+ use_reasoning: typing.Optional[bool] = pydantic.Field(description="Whether to use reasoning for the extraction.")
28
+ cite_sources: typing.Optional[bool] = pydantic.Field(description="Whether to cite sources for the extraction.")
27
29
 
28
30
  def json(self, **kwargs: typing.Any) -> str:
29
31
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,29 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class FailPageMode(str, enum.Enum):
10
+ """
11
+ Enum for representing the different available page error handling modes
12
+ """
13
+
14
+ RAW_TEXT = "raw_text"
15
+ BLANK_PAGE = "blank_page"
16
+ ERROR_MESSAGE = "error_message"
17
+
18
+ def visit(
19
+ self,
20
+ raw_text: typing.Callable[[], T_Result],
21
+ blank_page: typing.Callable[[], T_Result],
22
+ error_message: typing.Callable[[], T_Result],
23
+ ) -> T_Result:
24
+ if self is FailPageMode.RAW_TEXT:
25
+ return raw_text()
26
+ if self is FailPageMode.BLANK_PAGE:
27
+ return blank_page()
28
+ if self is FailPageMode.ERROR_MESSAGE:
29
+ return error_message()
@@ -4,8 +4,6 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
8
- from .extract_config import ExtractConfig
9
7
 
10
8
  try:
11
9
  import pydantic
@@ -16,14 +14,14 @@ except ImportError:
16
14
  import pydantic # type: ignore
17
15
 
18
16
 
19
- class ExtractAgentCreate(pydantic.BaseModel):
20
- """
21
- Settings for creating an extraction agent.
22
- """
23
-
24
- name: str = pydantic.Field(description="The name of the extraction schema")
25
- data_schema: ExtractAgentCreateDataSchema = pydantic.Field(description="The schema of the data.")
26
- config: ExtractConfig = pydantic.Field(description="The configuration parameters for the extraction agent.")
17
+ class FileCountByStatusResponse(pydantic.BaseModel):
18
+ counts: typing.Dict[str, int] = pydantic.Field(description="The counts of files by status")
19
+ total_count: int = pydantic.Field(description="The total number of files")
20
+ pipeline_id: typing.Optional[str]
21
+ data_source_id: typing.Optional[str]
22
+ only_manually_uploaded: typing.Optional[bool] = pydantic.Field(
23
+ description="Whether to only count manually uploaded files"
24
+ )
27
25
 
28
26
  def json(self, **kwargs: typing.Any) -> str:
29
27
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,36 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class FileParsePublic(pydantic.BaseModel):
18
+ created_at: dt.datetime = pydantic.Field(description="The date and time when the file was parsed.")
19
+ status: str = pydantic.Field(description="The status of the parse task.")
20
+ started_at: typing.Optional[dt.datetime]
21
+ ended_at: typing.Optional[dt.datetime]
22
+ input_path: str = pydantic.Field(description="The path to the input file.")
23
+ data_path: str = pydantic.Field(description="The path to the data file.")
24
+
25
+ def json(self, **kwargs: typing.Any) -> str:
26
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
27
+ return super().json(**kwargs_with_defaults)
28
+
29
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
30
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
31
+ return super().dict(**kwargs_with_defaults)
32
+
33
+ class Config:
34
+ frozen = True
35
+ smart_union = True
36
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -14,7 +14,6 @@ class JobNames(str, enum.Enum):
14
14
  LOAD_DOCUMENTS_JOB = "load_documents_job"
15
15
  LOAD_FILES_JOB = "load_files_job"
16
16
  PLAYGROUND_JOB = "playground_job"
17
- EVAL_DATASET_JOB = "eval_dataset_job"
18
17
  PIPELINE_MANAGED_INGESTION_JOB = "pipeline_managed_ingestion_job"
19
18
  DATA_SOURCE_MANAGED_INGESTION_JOB = "data_source_managed_ingestion_job"
20
19
  DATA_SOURCE_UPDATE_DISPATCHER_JOB = "data_source_update_dispatcher_job"
@@ -22,20 +21,19 @@ class JobNames(str, enum.Enum):
22
21
  PIPELINE_FILE_UPDATER_JOB = "pipeline_file_updater_job"
23
22
  FILE_MANAGED_INGESTION_JOB = "file_managed_ingestion_job"
24
23
  DOCUMENT_INGESTION_JOB = "document_ingestion_job"
25
- PARSE_RAW_FILE_JOB = "parse_raw_file_job"
26
- LLAMA_PARSE_TRANSFORM_JOB = "llama_parse_transform_job"
27
24
  METADATA_UPDATE_JOB = "metadata_update_job"
28
25
  PARSE_RAW_FILE_JOB_CACHED = "parse_raw_file_job_cached"
29
26
  EXTRACTION_JOB = "extraction_job"
30
27
  EXTRACT_JOB = "extract_job"
31
28
  ASYNCIO_TEST_JOB = "asyncio_test_job"
29
+ PARSE_RAW_FILE_JOB = "parse_raw_file_job"
30
+ LLAMA_PARSE_TRANSFORM_JOB = "llama_parse_transform_job"
32
31
 
33
32
  def visit(
34
33
  self,
35
34
  load_documents_job: typing.Callable[[], T_Result],
36
35
  load_files_job: typing.Callable[[], T_Result],
37
36
  playground_job: typing.Callable[[], T_Result],
38
- eval_dataset_job: typing.Callable[[], T_Result],
39
37
  pipeline_managed_ingestion_job: typing.Callable[[], T_Result],
40
38
  data_source_managed_ingestion_job: typing.Callable[[], T_Result],
41
39
  data_source_update_dispatcher_job: typing.Callable[[], T_Result],
@@ -43,13 +41,13 @@ class JobNames(str, enum.Enum):
43
41
  pipeline_file_updater_job: typing.Callable[[], T_Result],
44
42
  file_managed_ingestion_job: typing.Callable[[], T_Result],
45
43
  document_ingestion_job: typing.Callable[[], T_Result],
46
- parse_raw_file_job: typing.Callable[[], T_Result],
47
- llama_parse_transform_job: typing.Callable[[], T_Result],
48
44
  metadata_update_job: typing.Callable[[], T_Result],
49
45
  parse_raw_file_job_cached: typing.Callable[[], T_Result],
50
46
  extraction_job: typing.Callable[[], T_Result],
51
47
  extract_job: typing.Callable[[], T_Result],
52
48
  asyncio_test_job: typing.Callable[[], T_Result],
49
+ parse_raw_file_job: typing.Callable[[], T_Result],
50
+ llama_parse_transform_job: typing.Callable[[], T_Result],
53
51
  ) -> T_Result:
54
52
  if self is JobNames.LOAD_DOCUMENTS_JOB:
55
53
  return load_documents_job()
@@ -57,8 +55,6 @@ class JobNames(str, enum.Enum):
57
55
  return load_files_job()
58
56
  if self is JobNames.PLAYGROUND_JOB:
59
57
  return playground_job()
60
- if self is JobNames.EVAL_DATASET_JOB:
61
- return eval_dataset_job()
62
58
  if self is JobNames.PIPELINE_MANAGED_INGESTION_JOB:
63
59
  return pipeline_managed_ingestion_job()
64
60
  if self is JobNames.DATA_SOURCE_MANAGED_INGESTION_JOB:
@@ -73,10 +69,6 @@ class JobNames(str, enum.Enum):
73
69
  return file_managed_ingestion_job()
74
70
  if self is JobNames.DOCUMENT_INGESTION_JOB:
75
71
  return document_ingestion_job()
76
- if self is JobNames.PARSE_RAW_FILE_JOB:
77
- return parse_raw_file_job()
78
- if self is JobNames.LLAMA_PARSE_TRANSFORM_JOB:
79
- return llama_parse_transform_job()
80
72
  if self is JobNames.METADATA_UPDATE_JOB:
81
73
  return metadata_update_job()
82
74
  if self is JobNames.PARSE_RAW_FILE_JOB_CACHED:
@@ -87,3 +79,7 @@ class JobNames(str, enum.Enum):
87
79
  return extract_job()
88
80
  if self is JobNames.ASYNCIO_TEST_JOB:
89
81
  return asyncio_test_job()
82
+ if self is JobNames.PARSE_RAW_FILE_JOB:
83
+ return parse_raw_file_job()
84
+ if self is JobNames.LLAMA_PARSE_TRANSFORM_JOB:
85
+ return llama_parse_transform_job()
@@ -26,8 +26,8 @@ class LlamaExtractSettings(pydantic.BaseModel):
26
26
  max_file_size: typing.Optional[int] = pydantic.Field(
27
27
  description="The maximum file size (in bytes) allowed for the document."
28
28
  )
29
- max_tokens: typing.Optional[int] = pydantic.Field(
30
- description="The maximum number of tokens allowed for the document."
29
+ max_file_size_ui: typing.Optional[int] = pydantic.Field(
30
+ description="The maximum file size (in bytes) allowed for the document."
31
31
  )
32
32
  max_pages: typing.Optional[int] = pydantic.Field(
33
33
  description="The maximum number of pages allowed for the document."
@@ -6,10 +6,20 @@ import typing
6
6
 
7
7
  import typing_extensions
8
8
 
9
+ from .audio_block import AudioBlock
9
10
  from .image_block import ImageBlock
10
11
  from .text_block import TextBlock
11
12
 
12
13
 
14
+ class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio(AudioBlock):
15
+ block_type: typing_extensions.Literal["audio"]
16
+
17
+ class Config:
18
+ frozen = True
19
+ smart_union = True
20
+ allow_population_by_field_name = True
21
+
22
+
13
23
  class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image(ImageBlock):
14
24
  block_type: typing_extensions.Literal["image"]
15
25
 
@@ -29,5 +39,7 @@ class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text(TextBlock):
29
39
 
30
40
 
31
41
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem = typing.Union[
32
- LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image, LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text
42
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio,
43
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image,
44
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text,
33
45
  ]
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .fail_page_mode import FailPageMode
7
8
  from .parser_languages import ParserLanguages
8
9
  from .parsing_mode import ParsingMode
9
10
 
@@ -26,6 +27,7 @@ class LlamaParseParameters(pydantic.BaseModel):
26
27
  disable_ocr: typing.Optional[bool]
27
28
  annotate_links: typing.Optional[bool]
28
29
  adaptive_long_table: typing.Optional[bool]
30
+ compact_markdown_table: typing.Optional[bool]
29
31
  disable_reconstruction: typing.Optional[bool]
30
32
  disable_image_extraction: typing.Optional[bool]
31
33
  invalidate_cache: typing.Optional[bool]
@@ -61,9 +63,9 @@ class LlamaParseParameters(pydantic.BaseModel):
61
63
  is_formatting_instruction: typing.Optional[bool]
62
64
  premium_mode: typing.Optional[bool]
63
65
  continuous_mode: typing.Optional[bool]
64
- s_3_input_path: typing.Optional[str] = pydantic.Field(alias="s3_input_path")
66
+ input_s_3_path: typing.Optional[str] = pydantic.Field(alias="input_s3_path")
65
67
  input_s_3_region: typing.Optional[str] = pydantic.Field(alias="input_s3_region")
66
- s_3_output_path_prefix: typing.Optional[str] = pydantic.Field(alias="s3_output_path_prefix")
68
+ output_s_3_path_prefix: typing.Optional[str] = pydantic.Field(alias="output_s3_path_prefix")
67
69
  output_s_3_region: typing.Optional[str] = pydantic.Field(alias="output_s3_region")
68
70
  project_id: typing.Optional[str]
69
71
  azure_openai_deployment_name: typing.Optional[str]
@@ -93,6 +95,7 @@ class LlamaParseParameters(pydantic.BaseModel):
93
95
  strict_mode_image_ocr: typing.Optional[bool]
94
96
  strict_mode_reconstruction: typing.Optional[bool]
95
97
  strict_mode_buggy_font: typing.Optional[bool]
98
+ save_images: typing.Optional[bool]
96
99
  ignore_document_elements_for_layout_detection: typing.Optional[bool]
97
100
  output_tables_as_html: typing.Optional[bool] = pydantic.Field(alias="output_tables_as_HTML")
98
101
  internal_is_screenshot_job: typing.Optional[bool]
@@ -100,6 +103,11 @@ class LlamaParseParameters(pydantic.BaseModel):
100
103
  system_prompt: typing.Optional[str]
101
104
  system_prompt_append: typing.Optional[str]
102
105
  user_prompt: typing.Optional[str]
106
+ page_error_tolerance: typing.Optional[float]
107
+ replace_failed_page_mode: typing.Optional[FailPageMode]
108
+ replace_failed_page_with_error_message_prefix: typing.Optional[str]
109
+ replace_failed_page_with_error_message_suffix: typing.Optional[str]
110
+ markdown_table_multiline_header_separator: typing.Optional[str]
103
111
 
104
112
  def json(self, **kwargs: typing.Any) -> str:
105
113
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -24,6 +24,7 @@ class MarkdownNodeParser(pydantic.BaseModel):
24
24
  Args:
25
25
  include_metadata (bool): whether to include metadata in nodes
26
26
  include_prev_next_rel (bool): whether to include prev/next relationships
27
+ header_path_separator (str): separator char used for section header path metadata
27
28
  """
28
29
 
29
30
  include_metadata: typing.Optional[bool] = pydantic.Field(
@@ -32,6 +33,9 @@ class MarkdownNodeParser(pydantic.BaseModel):
32
33
  include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
33
34
  callback_manager: typing.Optional[typing.Any]
34
35
  id_func: typing.Optional[str]
36
+ header_path_separator: typing.Optional[str] = pydantic.Field(
37
+ description="Separator char used for section header path metadata."
38
+ )
35
39
  class_name: typing.Optional[str]
36
40
 
37
41
  def json(self, **kwargs: typing.Any) -> str: