llama-cloud 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (92) hide show
  1. llama_cloud/__init__.py +202 -42
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +61 -2
  4. llama_cloud/resources/beta/__init__.py +2 -0
  5. llama_cloud/resources/beta/client.py +371 -0
  6. llama_cloud/resources/data_sinks/__init__.py +18 -2
  7. llama_cloud/resources/data_sinks/client.py +2 -94
  8. llama_cloud/resources/data_sinks/types/__init__.py +18 -2
  9. llama_cloud/resources/data_sinks/types/data_sink_update_component.py +65 -7
  10. llama_cloud/resources/data_sources/__init__.py +30 -2
  11. llama_cloud/resources/data_sources/types/__init__.py +28 -1
  12. llama_cloud/resources/data_sources/types/data_source_update_component.py +2 -23
  13. llama_cloud/resources/data_sources/types/data_source_update_component_one.py +122 -0
  14. llama_cloud/resources/embedding_model_configs/client.py +82 -22
  15. llama_cloud/resources/files/client.py +18 -4
  16. llama_cloud/resources/llama_extract/__init__.py +21 -0
  17. llama_cloud/resources/llama_extract/client.py +227 -114
  18. llama_cloud/resources/llama_extract/types/__init__.py +21 -0
  19. llama_cloud/resources/parsing/client.py +123 -4
  20. llama_cloud/resources/pipelines/client.py +116 -11
  21. llama_cloud/types/__init__.py +172 -52
  22. llama_cloud/types/{extract_schema_validate_request.py → audio_block.py} +5 -3
  23. llama_cloud/types/batch.py +47 -0
  24. llama_cloud/types/batch_item.py +40 -0
  25. llama_cloud/types/batch_paginated_list.py +35 -0
  26. llama_cloud/types/{base_prompt_template.py → batch_public_output.py} +7 -7
  27. llama_cloud/types/cloud_confluence_data_source.py +1 -0
  28. llama_cloud/types/cloud_jira_data_source.py +0 -4
  29. llama_cloud/types/cloud_postgres_vector_store.py +2 -0
  30. llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
  31. llama_cloud/types/data_sink_component.py +65 -7
  32. llama_cloud/types/data_sink_create_component.py +65 -7
  33. llama_cloud/types/data_source_component.py +2 -23
  34. llama_cloud/types/data_source_component_one.py +122 -0
  35. llama_cloud/types/data_source_create_component.py +2 -23
  36. llama_cloud/types/data_source_create_component_one.py +122 -0
  37. llama_cloud/types/{extract_agent_update.py → data_source_update_dispatcher_config.py} +6 -6
  38. llama_cloud/types/{node_parser.py → delete_params.py} +7 -9
  39. llama_cloud/types/{extract_agent_create.py → document_ingestion_job_params.py} +11 -7
  40. llama_cloud/types/extract_config.py +2 -0
  41. llama_cloud/types/extract_job_create.py +1 -2
  42. llama_cloud/types/fail_page_mode.py +29 -0
  43. llama_cloud/types/file_count_by_status_response.py +37 -0
  44. llama_cloud/types/file_parse_public.py +36 -0
  45. llama_cloud/types/job_names.py +8 -12
  46. llama_cloud/types/job_record.py +2 -2
  47. llama_cloud/types/job_record_parameters.py +111 -0
  48. llama_cloud/types/l_lama_parse_transform_config.py +37 -0
  49. llama_cloud/types/legacy_parse_job_config.py +189 -0
  50. llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
  51. llama_cloud/types/llama_parse_parameters.py +8 -0
  52. llama_cloud/types/load_files_job_config.py +35 -0
  53. llama_cloud/types/message_role.py +4 -0
  54. llama_cloud/types/parse_job_config.py +134 -0
  55. llama_cloud/types/pg_vector_distance_method.py +43 -0
  56. llama_cloud/types/{extract_job_create_batch.py → pg_vector_hnsw_settings.py} +12 -9
  57. llama_cloud/types/pg_vector_vector_type.py +35 -0
  58. llama_cloud/types/pipeline.py +2 -4
  59. llama_cloud/types/pipeline_create.py +3 -2
  60. llama_cloud/types/pipeline_data_source.py +3 -0
  61. llama_cloud/types/pipeline_data_source_component.py +2 -23
  62. llama_cloud/types/pipeline_data_source_component_one.py +122 -0
  63. llama_cloud/types/pipeline_data_source_status.py +33 -0
  64. llama_cloud/types/pipeline_file.py +1 -0
  65. llama_cloud/types/pipeline_file_update_dispatcher_config.py +38 -0
  66. llama_cloud/types/{markdown_node_parser.py → pipeline_file_updater_config.py} +14 -15
  67. llama_cloud/types/pipeline_managed_ingestion_job_params.py +37 -0
  68. llama_cloud/types/pipeline_metadata_config.py +36 -0
  69. llama_cloud/types/prompt_conf.py +3 -0
  70. llama_cloud/types/struct_parse_conf.py +4 -1
  71. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/METADATA +4 -2
  72. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/RECORD +82 -68
  73. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/WHEEL +1 -1
  74. llama_cloud/types/character_splitter.py +0 -46
  75. llama_cloud/types/code_splitter.py +0 -50
  76. llama_cloud/types/configured_transformation_item.py +0 -46
  77. llama_cloud/types/configured_transformation_item_component.py +0 -22
  78. llama_cloud/types/llm.py +0 -60
  79. llama_cloud/types/markdown_element_node_parser.py +0 -51
  80. llama_cloud/types/page_splitter_node_parser.py +0 -42
  81. llama_cloud/types/pydantic_program_mode.py +0 -41
  82. llama_cloud/types/sentence_splitter.py +0 -50
  83. llama_cloud/types/token_text_splitter.py +0 -47
  84. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
  85. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
  86. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
  87. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
  88. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
  89. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
  90. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
  91. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
  92. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/LICENSE +0 -0
@@ -0,0 +1,134 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .fail_page_mode import FailPageMode
8
+ from .parser_languages import ParserLanguages
9
+ from .parsing_mode import ParsingMode
10
+
11
+ try:
12
+ import pydantic
13
+ if pydantic.__version__.startswith("1."):
14
+ raise ImportError
15
+ import pydantic.v1 as pydantic # type: ignore
16
+ except ImportError:
17
+ import pydantic # type: ignore
18
+
19
+
20
+ class ParseJobConfig(pydantic.BaseModel):
21
+ """
22
+ Configuration for llamaparse job
23
+ """
24
+
25
+ custom_metadata: typing.Optional[typing.Dict[str, typing.Any]]
26
+ resource_info: typing.Optional[typing.Dict[str, typing.Any]]
27
+ languages: typing.Optional[typing.List[ParserLanguages]]
28
+ parsing_instruction: typing.Optional[str]
29
+ disable_ocr: typing.Optional[bool]
30
+ annotate_links: typing.Optional[bool]
31
+ adaptive_long_table: typing.Optional[bool]
32
+ compact_markdown_table: typing.Optional[bool]
33
+ disable_reconstruction: typing.Optional[bool]
34
+ disable_image_extraction: typing.Optional[bool]
35
+ invalidate_cache: typing.Optional[bool]
36
+ output_pdf_of_document: typing.Optional[bool]
37
+ do_not_cache: typing.Optional[bool]
38
+ fast_mode: typing.Optional[bool]
39
+ skip_diagonal_text: typing.Optional[bool]
40
+ preserve_layout_alignment_across_pages: typing.Optional[bool]
41
+ gpt_4_o_mode: typing.Optional[bool] = pydantic.Field(alias="gpt4o_mode")
42
+ gpt_4_o_api_key: typing.Optional[str] = pydantic.Field(alias="gpt4o_api_key")
43
+ do_not_unroll_columns: typing.Optional[bool]
44
+ extract_layout: typing.Optional[bool]
45
+ html_make_all_elements_visible: typing.Optional[bool]
46
+ html_remove_navigation_elements: typing.Optional[bool]
47
+ html_remove_fixed_elements: typing.Optional[bool]
48
+ guess_xlsx_sheet_name: typing.Optional[bool]
49
+ page_separator: typing.Optional[str]
50
+ bounding_box: typing.Optional[str]
51
+ bbox_top: typing.Optional[float]
52
+ bbox_right: typing.Optional[float]
53
+ bbox_bottom: typing.Optional[float]
54
+ bbox_left: typing.Optional[float]
55
+ target_pages: typing.Optional[str]
56
+ use_vendor_multimodal_model: typing.Optional[bool]
57
+ vendor_multimodal_model_name: typing.Optional[str]
58
+ model: typing.Optional[str]
59
+ vendor_multimodal_api_key: typing.Optional[str]
60
+ page_prefix: typing.Optional[str]
61
+ page_suffix: typing.Optional[str]
62
+ webhook_url: typing.Optional[str]
63
+ preset: typing.Optional[str]
64
+ take_screenshot: typing.Optional[bool]
65
+ is_formatting_instruction: typing.Optional[bool]
66
+ premium_mode: typing.Optional[bool]
67
+ continuous_mode: typing.Optional[bool]
68
+ input_s_3_path: typing.Optional[str] = pydantic.Field(alias="input_s3_path")
69
+ input_s_3_region: typing.Optional[str] = pydantic.Field(alias="input_s3_region")
70
+ output_s_3_path_prefix: typing.Optional[str] = pydantic.Field(alias="output_s3_path_prefix")
71
+ output_s_3_region: typing.Optional[str] = pydantic.Field(alias="output_s3_region")
72
+ project_id: typing.Optional[str]
73
+ azure_openai_deployment_name: typing.Optional[str]
74
+ azure_openai_endpoint: typing.Optional[str]
75
+ azure_openai_api_version: typing.Optional[str]
76
+ azure_openai_key: typing.Optional[str]
77
+ input_url: typing.Optional[str]
78
+ http_proxy: typing.Optional[str]
79
+ auto_mode: typing.Optional[bool]
80
+ auto_mode_trigger_on_regexp_in_page: typing.Optional[str]
81
+ auto_mode_trigger_on_text_in_page: typing.Optional[str]
82
+ auto_mode_trigger_on_table_in_page: typing.Optional[bool]
83
+ auto_mode_trigger_on_image_in_page: typing.Optional[bool]
84
+ auto_mode_configuration_json: typing.Optional[str]
85
+ structured_output: typing.Optional[bool]
86
+ structured_output_json_schema: typing.Optional[str]
87
+ structured_output_json_schema_name: typing.Optional[str]
88
+ max_pages: typing.Optional[int]
89
+ max_pages_enforced: typing.Optional[int]
90
+ extract_charts: typing.Optional[bool]
91
+ formatting_instruction: typing.Optional[str]
92
+ complemental_formatting_instruction: typing.Optional[str]
93
+ content_guideline_instruction: typing.Optional[str]
94
+ spreadsheet_extract_sub_tables: typing.Optional[bool]
95
+ job_timeout_in_seconds: typing.Optional[float]
96
+ job_timeout_extra_time_per_page_in_seconds: typing.Optional[float]
97
+ strict_mode_image_extraction: typing.Optional[bool]
98
+ strict_mode_image_ocr: typing.Optional[bool]
99
+ strict_mode_reconstruction: typing.Optional[bool]
100
+ strict_mode_buggy_font: typing.Optional[bool]
101
+ save_images: typing.Optional[bool]
102
+ ignore_document_elements_for_layout_detection: typing.Optional[bool]
103
+ output_tables_as_html: typing.Optional[bool] = pydantic.Field(alias="output_tables_as_HTML")
104
+ internal_is_screenshot_job: typing.Optional[bool]
105
+ parse_mode: typing.Optional[ParsingMode]
106
+ system_prompt: typing.Optional[str]
107
+ system_prompt_append: typing.Optional[str]
108
+ user_prompt: typing.Optional[str]
109
+ page_error_tolerance: typing.Optional[float]
110
+ replace_failed_page_mode: typing.Optional[FailPageMode]
111
+ replace_failed_page_with_error_message_prefix: typing.Optional[str]
112
+ replace_failed_page_with_error_message_suffix: typing.Optional[str]
113
+ markdown_table_multiline_header_separator: typing.Optional[str]
114
+ file_name: str = pydantic.Field(description="The file name.")
115
+ original_file_name: str = pydantic.Field(description="The original file name.")
116
+ file_key: str = pydantic.Field(description="The file key.")
117
+ lang: str = pydantic.Field(description="The language.")
118
+ output_bucket: typing.Optional[str] = pydantic.Field(alias="outputBucket")
119
+ file_id: typing.Optional[str]
120
+ pipeline_id: typing.Optional[str]
121
+
122
+ def json(self, **kwargs: typing.Any) -> str:
123
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
124
+ return super().json(**kwargs_with_defaults)
125
+
126
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
127
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
128
+ return super().dict(**kwargs_with_defaults)
129
+
130
+ class Config:
131
+ frozen = True
132
+ smart_union = True
133
+ allow_population_by_field_name = True
134
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,43 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class PgVectorDistanceMethod(str, enum.Enum):
10
+ """
11
+ Distance methods for PGVector.
12
+ Docs:
13
+ https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
14
+ """
15
+
16
+ L_2 = "l2"
17
+ IP = "ip"
18
+ COSINE = "cosine"
19
+ L_1 = "l1"
20
+ HAMMING = "hamming"
21
+ JACCARD = "jaccard"
22
+
23
+ def visit(
24
+ self,
25
+ l_2: typing.Callable[[], T_Result],
26
+ ip: typing.Callable[[], T_Result],
27
+ cosine: typing.Callable[[], T_Result],
28
+ l_1: typing.Callable[[], T_Result],
29
+ hamming: typing.Callable[[], T_Result],
30
+ jaccard: typing.Callable[[], T_Result],
31
+ ) -> T_Result:
32
+ if self is PgVectorDistanceMethod.L_2:
33
+ return l_2()
34
+ if self is PgVectorDistanceMethod.IP:
35
+ return ip()
36
+ if self is PgVectorDistanceMethod.COSINE:
37
+ return cosine()
38
+ if self is PgVectorDistanceMethod.L_1:
39
+ return l_1()
40
+ if self is PgVectorDistanceMethod.HAMMING:
41
+ return hamming()
42
+ if self is PgVectorDistanceMethod.JACCARD:
43
+ return jaccard()
@@ -4,8 +4,8 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_config import ExtractConfig
8
- from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
7
+ from .pg_vector_distance_method import PgVectorDistanceMethod
8
+ from .pg_vector_vector_type import PgVectorVectorType
9
9
 
10
10
  try:
11
11
  import pydantic
@@ -16,17 +16,20 @@ except ImportError:
16
16
  import pydantic # type: ignore
17
17
 
18
18
 
19
- class ExtractJobCreateBatch(pydantic.BaseModel):
19
+ class PgVectorHnswSettings(pydantic.BaseModel):
20
20
  """
21
- Schema for creating extraction jobs in batch.
21
+ HNSW settings for PGVector.
22
22
  """
23
23
 
24
- extraction_agent_id: str = pydantic.Field(description="The id of the extraction agent")
25
- file_ids: typing.List[str] = pydantic.Field(description="The ids of the files")
26
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = pydantic.Field(
27
- description="The data schema to override the extraction agent's data schema with"
24
+ ef_construction: typing.Optional[int] = pydantic.Field(
25
+ description="The number of edges to use during the construction phase."
28
26
  )
29
- config_override: typing.Optional[ExtractConfig]
27
+ ef_search: typing.Optional[int] = pydantic.Field(description="The number of edges to use during the search phase.")
28
+ m: typing.Optional[int] = pydantic.Field(
29
+ description="The number of bi-directional links created for each new element."
30
+ )
31
+ vector_type: typing.Optional[PgVectorVectorType] = pydantic.Field(description="The type of vector to use.")
32
+ distance_method: typing.Optional[PgVectorDistanceMethod] = pydantic.Field(description="The distance method to use.")
30
33
 
31
34
  def json(self, **kwargs: typing.Any) -> str:
32
35
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,35 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class PgVectorVectorType(str, enum.Enum):
10
+ """
11
+ Vector storage formats for PGVector.
12
+ Docs:
13
+ https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
14
+ """
15
+
16
+ VECTOR = "vector"
17
+ HALF_VEC = "half_vec"
18
+ BIT = "bit"
19
+ SPARSE_VEC = "sparse_vec"
20
+
21
+ def visit(
22
+ self,
23
+ vector: typing.Callable[[], T_Result],
24
+ half_vec: typing.Callable[[], T_Result],
25
+ bit: typing.Callable[[], T_Result],
26
+ sparse_vec: typing.Callable[[], T_Result],
27
+ ) -> T_Result:
28
+ if self is PgVectorVectorType.VECTOR:
29
+ return vector()
30
+ if self is PgVectorVectorType.HALF_VEC:
31
+ return half_vec()
32
+ if self is PgVectorVectorType.BIT:
33
+ return bit()
34
+ if self is PgVectorVectorType.SPARSE_VEC:
35
+ return sparse_vec()
@@ -4,12 +4,12 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .configured_transformation_item import ConfiguredTransformationItem
8
7
  from .data_sink import DataSink
9
8
  from .eval_execution_params import EvalExecutionParams
10
9
  from .llama_parse_parameters import LlamaParseParameters
11
10
  from .pipeline_configuration_hashes import PipelineConfigurationHashes
12
11
  from .pipeline_embedding_config import PipelineEmbeddingConfig
12
+ from .pipeline_metadata_config import PipelineMetadataConfig
13
13
  from .pipeline_transform_config import PipelineTransformConfig
14
14
  from .pipeline_type import PipelineType
15
15
  from .preset_retrieval_params import PresetRetrievalParams
@@ -39,9 +39,6 @@ class Pipeline(pydantic.BaseModel):
39
39
  )
40
40
  managed_pipeline_id: typing.Optional[str]
41
41
  embedding_config: PipelineEmbeddingConfig
42
- configured_transformations: typing.Optional[typing.List[ConfiguredTransformationItem]] = pydantic.Field(
43
- description="Deprecated don't use it, List of configured transformations."
44
- )
45
42
  config_hash: typing.Optional[PipelineConfigurationHashes]
46
43
  transform_config: typing.Optional[PipelineTransformConfig] = pydantic.Field(
47
44
  description="Configuration for the transformation."
@@ -54,6 +51,7 @@ class Pipeline(pydantic.BaseModel):
54
51
  )
55
52
  llama_parse_parameters: typing.Optional[LlamaParseParameters]
56
53
  data_sink: typing.Optional[DataSink]
54
+ metadata_config: typing.Optional[PipelineMetadataConfig]
57
55
 
58
56
  def json(self, **kwargs: typing.Any) -> str:
59
57
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -4,12 +4,12 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .configured_transformation_item import ConfiguredTransformationItem
8
7
  from .data_sink_create import DataSinkCreate
9
8
  from .eval_execution_params import EvalExecutionParams
10
9
  from .llama_parse_parameters import LlamaParseParameters
11
10
  from .pipeline_create_embedding_config import PipelineCreateEmbeddingConfig
12
11
  from .pipeline_create_transform_config import PipelineCreateTransformConfig
12
+ from .pipeline_metadata_config import PipelineMetadataConfig
13
13
  from .pipeline_type import PipelineType
14
14
  from .preset_retrieval_params import PresetRetrievalParams
15
15
 
@@ -31,7 +31,6 @@ class PipelineCreate(pydantic.BaseModel):
31
31
  transform_config: typing.Optional[PipelineCreateTransformConfig] = pydantic.Field(
32
32
  description="Configuration for the transformation."
33
33
  )
34
- configured_transformations: typing.Optional[typing.List[ConfiguredTransformationItem]]
35
34
  data_sink_id: typing.Optional[str]
36
35
  embedding_model_config_id: typing.Optional[str]
37
36
  data_sink: typing.Optional[DataSinkCreate]
@@ -44,6 +43,8 @@ class PipelineCreate(pydantic.BaseModel):
44
43
  llama_parse_parameters: typing.Optional[LlamaParseParameters] = pydantic.Field(
45
44
  description="Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline."
46
45
  )
46
+ status: typing.Optional[str]
47
+ metadata_config: typing.Optional[PipelineMetadataConfig]
47
48
  name: str
48
49
  pipeline_type: typing.Optional[PipelineType] = pydantic.Field(
49
50
  description="Type of pipeline. Either PLAYGROUND or MANAGED."
@@ -7,6 +7,7 @@ from ..core.datetime_utils import serialize_datetime
7
7
  from .configurable_data_source_names import ConfigurableDataSourceNames
8
8
  from .pipeline_data_source_component import PipelineDataSourceComponent
9
9
  from .pipeline_data_source_custom_metadata_value import PipelineDataSourceCustomMetadataValue
10
+ from .pipeline_data_source_status import PipelineDataSourceStatus
10
11
 
11
12
  try:
12
13
  import pydantic
@@ -36,6 +37,8 @@ class PipelineDataSource(pydantic.BaseModel):
36
37
  last_synced_at: dt.datetime = pydantic.Field(description="The last time the data source was automatically synced.")
37
38
  sync_interval: typing.Optional[float]
38
39
  sync_schedule_set_by: typing.Optional[str]
40
+ status: typing.Optional[PipelineDataSourceStatus]
41
+ status_updated_at: typing.Optional[dt.datetime]
39
42
 
40
43
  def json(self, **kwargs: typing.Any) -> str:
41
44
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -2,27 +2,6 @@
2
2
 
3
3
  import typing
4
4
 
5
- from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
6
- from .cloud_box_data_source import CloudBoxDataSource
7
- from .cloud_confluence_data_source import CloudConfluenceDataSource
8
- from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
9
- from .cloud_jira_data_source import CloudJiraDataSource
10
- from .cloud_notion_page_data_source import CloudNotionPageDataSource
11
- from .cloud_one_drive_data_source import CloudOneDriveDataSource
12
- from .cloud_s_3_data_source import CloudS3DataSource
13
- from .cloud_sharepoint_data_source import CloudSharepointDataSource
14
- from .cloud_slack_data_source import CloudSlackDataSource
5
+ from .pipeline_data_source_component_one import PipelineDataSourceComponentOne
15
6
 
16
- PipelineDataSourceComponent = typing.Union[
17
- typing.Dict[str, typing.Any],
18
- CloudS3DataSource,
19
- CloudAzStorageBlobDataSource,
20
- CloudGoogleDriveDataSource,
21
- CloudOneDriveDataSource,
22
- CloudSharepointDataSource,
23
- CloudSlackDataSource,
24
- CloudNotionPageDataSource,
25
- CloudConfluenceDataSource,
26
- CloudJiraDataSource,
27
- CloudBoxDataSource,
28
- ]
7
+ PipelineDataSourceComponent = typing.Union[typing.Dict[str, typing.Any], PipelineDataSourceComponentOne]
@@ -0,0 +1,122 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from __future__ import annotations
4
+
5
+ import typing
6
+
7
+ import typing_extensions
8
+
9
+ from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
10
+ from .cloud_box_data_source import CloudBoxDataSource
11
+ from .cloud_confluence_data_source import CloudConfluenceDataSource
12
+ from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
13
+ from .cloud_jira_data_source import CloudJiraDataSource
14
+ from .cloud_notion_page_data_source import CloudNotionPageDataSource
15
+ from .cloud_one_drive_data_source import CloudOneDriveDataSource
16
+ from .cloud_s_3_data_source import CloudS3DataSource
17
+ from .cloud_sharepoint_data_source import CloudSharepointDataSource
18
+ from .cloud_slack_data_source import CloudSlackDataSource
19
+
20
+
21
+ class PipelineDataSourceComponentOne_AzureStorageBlob(CloudAzStorageBlobDataSource):
22
+ type: typing_extensions.Literal["AZURE_STORAGE_BLOB"]
23
+
24
+ class Config:
25
+ frozen = True
26
+ smart_union = True
27
+ allow_population_by_field_name = True
28
+
29
+
30
+ class PipelineDataSourceComponentOne_Box(CloudBoxDataSource):
31
+ type: typing_extensions.Literal["BOX"]
32
+
33
+ class Config:
34
+ frozen = True
35
+ smart_union = True
36
+ allow_population_by_field_name = True
37
+
38
+
39
+ class PipelineDataSourceComponentOne_Confluence(CloudConfluenceDataSource):
40
+ type: typing_extensions.Literal["CONFLUENCE"]
41
+
42
+ class Config:
43
+ frozen = True
44
+ smart_union = True
45
+ allow_population_by_field_name = True
46
+
47
+
48
+ class PipelineDataSourceComponentOne_GoogleDrive(CloudGoogleDriveDataSource):
49
+ type: typing_extensions.Literal["GOOGLE_DRIVE"]
50
+
51
+ class Config:
52
+ frozen = True
53
+ smart_union = True
54
+ allow_population_by_field_name = True
55
+
56
+
57
+ class PipelineDataSourceComponentOne_Jira(CloudJiraDataSource):
58
+ type: typing_extensions.Literal["JIRA"]
59
+
60
+ class Config:
61
+ frozen = True
62
+ smart_union = True
63
+ allow_population_by_field_name = True
64
+
65
+
66
+ class PipelineDataSourceComponentOne_MicrosoftOnedrive(CloudOneDriveDataSource):
67
+ type: typing_extensions.Literal["MICROSOFT_ONEDRIVE"]
68
+
69
+ class Config:
70
+ frozen = True
71
+ smart_union = True
72
+ allow_population_by_field_name = True
73
+
74
+
75
+ class PipelineDataSourceComponentOne_MicrosoftSharepoint(CloudSharepointDataSource):
76
+ type: typing_extensions.Literal["MICROSOFT_SHAREPOINT"]
77
+
78
+ class Config:
79
+ frozen = True
80
+ smart_union = True
81
+ allow_population_by_field_name = True
82
+
83
+
84
+ class PipelineDataSourceComponentOne_NotionPage(CloudNotionPageDataSource):
85
+ type: typing_extensions.Literal["NOTION_PAGE"]
86
+
87
+ class Config:
88
+ frozen = True
89
+ smart_union = True
90
+ allow_population_by_field_name = True
91
+
92
+
93
+ class PipelineDataSourceComponentOne_S3(CloudS3DataSource):
94
+ type: typing_extensions.Literal["S3"]
95
+
96
+ class Config:
97
+ frozen = True
98
+ smart_union = True
99
+ allow_population_by_field_name = True
100
+
101
+
102
+ class PipelineDataSourceComponentOne_Slack(CloudSlackDataSource):
103
+ type: typing_extensions.Literal["SLACK"]
104
+
105
+ class Config:
106
+ frozen = True
107
+ smart_union = True
108
+ allow_population_by_field_name = True
109
+
110
+
111
+ PipelineDataSourceComponentOne = typing.Union[
112
+ PipelineDataSourceComponentOne_AzureStorageBlob,
113
+ PipelineDataSourceComponentOne_Box,
114
+ PipelineDataSourceComponentOne_Confluence,
115
+ PipelineDataSourceComponentOne_GoogleDrive,
116
+ PipelineDataSourceComponentOne_Jira,
117
+ PipelineDataSourceComponentOne_MicrosoftOnedrive,
118
+ PipelineDataSourceComponentOne_MicrosoftSharepoint,
119
+ PipelineDataSourceComponentOne_NotionPage,
120
+ PipelineDataSourceComponentOne_S3,
121
+ PipelineDataSourceComponentOne_Slack,
122
+ ]
@@ -0,0 +1,33 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class PipelineDataSourceStatus(str, enum.Enum):
10
+ NOT_STARTED = "NOT_STARTED"
11
+ IN_PROGRESS = "IN_PROGRESS"
12
+ SUCCESS = "SUCCESS"
13
+ ERROR = "ERROR"
14
+ CANCELLED = "CANCELLED"
15
+
16
+ def visit(
17
+ self,
18
+ not_started: typing.Callable[[], T_Result],
19
+ in_progress: typing.Callable[[], T_Result],
20
+ success: typing.Callable[[], T_Result],
21
+ error: typing.Callable[[], T_Result],
22
+ cancelled: typing.Callable[[], T_Result],
23
+ ) -> T_Result:
24
+ if self is PipelineDataSourceStatus.NOT_STARTED:
25
+ return not_started()
26
+ if self is PipelineDataSourceStatus.IN_PROGRESS:
27
+ return in_progress()
28
+ if self is PipelineDataSourceStatus.SUCCESS:
29
+ return success()
30
+ if self is PipelineDataSourceStatus.ERROR:
31
+ return error()
32
+ if self is PipelineDataSourceStatus.CANCELLED:
33
+ return cancelled()
@@ -42,6 +42,7 @@ class PipelineFile(pydantic.BaseModel):
42
42
  config_hash: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileConfigHashValue]]]
43
43
  indexed_page_count: typing.Optional[int]
44
44
  status: typing.Optional[PipelineFileStatus]
45
+ status_updated_at: typing.Optional[dt.datetime]
45
46
 
46
47
  def json(self, **kwargs: typing.Any) -> str:
47
48
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,38 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .delete_params import DeleteParams
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class PipelineFileUpdateDispatcherConfig(pydantic.BaseModel):
19
+ """
20
+ Schema for the parameters of a load files job.
21
+ """
22
+
23
+ pipeline_file_ids: typing.Optional[typing.List[str]]
24
+ should_delete: typing.Optional[bool]
25
+ delete_info: typing.Optional[DeleteParams]
26
+
27
+ def json(self, **kwargs: typing.Any) -> str:
28
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
29
+ return super().json(**kwargs_with_defaults)
30
+
31
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
32
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
33
+ return super().dict(**kwargs_with_defaults)
34
+
35
+ class Config:
36
+ frozen = True
37
+ smart_union = True
38
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .delete_params import DeleteParams
7
8
 
8
9
  try:
9
10
  import pydantic
@@ -14,25 +15,23 @@ except ImportError:
14
15
  import pydantic # type: ignore
15
16
 
16
17
 
17
- class MarkdownNodeParser(pydantic.BaseModel):
18
+ class PipelineFileUpdaterConfig(pydantic.BaseModel):
18
19
  """
19
- Markdown node parser.
20
-
21
- Splits a document into Nodes using Markdown header-based splitting logic.
22
- Each node contains its text content and the path of headers leading to it.
23
-
24
- Args:
25
- include_metadata (bool): whether to include metadata in nodes
26
- include_prev_next_rel (bool): whether to include prev/next relationships
20
+ Schema for the parameters of a load files job.
27
21
  """
28
22
 
29
- include_metadata: typing.Optional[bool] = pydantic.Field(
30
- description="Whether or not to consider metadata when splitting."
23
+ custom_metadata: typing.Optional[typing.Dict[str, typing.Any]]
24
+ resource_info: typing.Optional[typing.Dict[str, typing.Any]]
25
+ should_delete: typing.Optional[bool]
26
+ should_parse: typing.Optional[bool]
27
+ delete_info: typing.Optional[DeleteParams]
28
+ is_new_file: typing.Optional[bool] = pydantic.Field(description="Whether the file is new")
29
+ data_source_project_file_changed: typing.Optional[bool] = pydantic.Field(
30
+ description="Whether the data source project file has changed"
31
+ )
32
+ should_migrate_pipeline_file_to_external_file_id: typing.Optional[bool] = pydantic.Field(
33
+ description="Whether to migrate the pipeline file to the external file id"
31
34
  )
32
- include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
33
- callback_manager: typing.Optional[typing.Any]
34
- id_func: typing.Optional[str]
35
- class_name: typing.Optional[str]
36
35
 
37
36
  def json(self, **kwargs: typing.Any) -> str:
38
37
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,37 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .delete_params import DeleteParams
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class PipelineManagedIngestionJobParams(pydantic.BaseModel):
19
+ """
20
+ Schema for the parameters of a managed pipeline ingestion job.
21
+ """
22
+
23
+ should_delete: typing.Optional[bool]
24
+ delete_info: typing.Optional[DeleteParams]
25
+
26
+ def json(self, **kwargs: typing.Any) -> str:
27
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
28
+ return super().json(**kwargs_with_defaults)
29
+
30
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
31
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
32
+ return super().dict(**kwargs_with_defaults)
33
+
34
+ class Config:
35
+ frozen = True
36
+ smart_union = True
37
+ json_encoders = {dt.datetime: serialize_datetime}