llama-cloud 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (52) hide show
  1. llama_cloud/__init__.py +36 -16
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +20 -0
  4. llama_cloud/resources/beta/__init__.py +2 -0
  5. llama_cloud/resources/beta/client.py +371 -0
  6. llama_cloud/resources/embedding_model_configs/client.py +82 -22
  7. llama_cloud/resources/llama_extract/__init__.py +21 -0
  8. llama_cloud/resources/llama_extract/client.py +227 -114
  9. llama_cloud/resources/llama_extract/types/__init__.py +21 -0
  10. llama_cloud/resources/parsing/client.py +115 -4
  11. llama_cloud/resources/pipelines/client.py +105 -0
  12. llama_cloud/types/__init__.py +26 -24
  13. llama_cloud/types/{extract_schema_validate_request.py → audio_block.py} +5 -3
  14. llama_cloud/types/batch.py +47 -0
  15. llama_cloud/types/batch_item.py +40 -0
  16. llama_cloud/types/{extract_agent_update.py → batch_paginated_list.py} +6 -9
  17. llama_cloud/types/{extract_agent_create.py → batch_public_output.py} +7 -10
  18. llama_cloud/types/cloud_confluence_data_source.py +1 -0
  19. llama_cloud/types/cloud_postgres_vector_store.py +2 -0
  20. llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
  21. llama_cloud/types/extract_config.py +2 -0
  22. llama_cloud/types/extract_job_create.py +1 -2
  23. llama_cloud/types/fail_page_mode.py +29 -0
  24. llama_cloud/types/{extract_job_create_batch.py → file_count_by_status_response.py} +7 -12
  25. llama_cloud/types/file_parse_public.py +36 -0
  26. llama_cloud/types/job_names.py +8 -12
  27. llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
  28. llama_cloud/types/llama_parse_parameters.py +7 -0
  29. llama_cloud/types/markdown_node_parser.py +4 -0
  30. llama_cloud/types/message_role.py +4 -0
  31. llama_cloud/types/pg_vector_distance_method.py +43 -0
  32. llama_cloud/types/pg_vector_hnsw_settings.py +45 -0
  33. llama_cloud/types/pg_vector_vector_type.py +35 -0
  34. llama_cloud/types/pipeline_create.py +1 -0
  35. llama_cloud/types/pipeline_data_source.py +3 -0
  36. llama_cloud/types/pipeline_data_source_status.py +33 -0
  37. llama_cloud/types/pipeline_file.py +1 -0
  38. llama_cloud/types/prompt_conf.py +3 -0
  39. llama_cloud/types/struct_parse_conf.py +4 -1
  40. llama_cloud/types/token_text_splitter.py +3 -0
  41. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/METADATA +1 -1
  42. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/RECORD +52 -41
  43. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
  44. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
  45. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
  46. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
  47. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
  48. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
  49. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
  50. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
  51. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/LICENSE +0 -0
  52. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/WHEEL +0 -0
@@ -0,0 +1,40 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .file_parse_public import FileParsePublic
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class BatchItem(pydantic.BaseModel):
19
+ id: str = pydantic.Field(description="Unique identifier for the batch item")
20
+ batch_id: str = pydantic.Field(description="The ID of the batch to which the item belongs")
21
+ status: str = pydantic.Field(description="The current status of the batch item")
22
+ status_updated_at: typing.Optional[dt.datetime]
23
+ created_at: typing.Optional[dt.datetime]
24
+ updated_at: typing.Optional[dt.datetime]
25
+ input_file: str = pydantic.Field(description="The input file associated with the batch item")
26
+ output_file: typing.Optional[str]
27
+ task: typing.Optional[FileParsePublic]
28
+
29
+ def json(self, **kwargs: typing.Any) -> str:
30
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
31
+ return super().json(**kwargs_with_defaults)
32
+
33
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
34
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
35
+ return super().dict(**kwargs_with_defaults)
36
+
37
+ class Config:
38
+ frozen = True
39
+ smart_union = True
40
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -4,8 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
8
- from .extract_config import ExtractConfig
7
+ from .batch import Batch
9
8
 
10
9
  try:
11
10
  import pydantic
@@ -16,13 +15,11 @@ except ImportError:
16
15
  import pydantic # type: ignore
17
16
 
18
17
 
19
- class ExtractAgentUpdate(pydantic.BaseModel):
20
- """
21
- Settings for updating an extraction schema.
22
- """
23
-
24
- data_schema: ExtractAgentUpdateDataSchema = pydantic.Field(description="The schema of the data")
25
- config: ExtractConfig = pydantic.Field(description="The configuration parameters for the extraction agent.")
18
+ class BatchPaginatedList(pydantic.BaseModel):
19
+ data: typing.List[Batch] = pydantic.Field(description="List of batches")
20
+ limit: int = pydantic.Field(description="Pagination limit")
21
+ offset: int = pydantic.Field(description="Pagination offset")
22
+ total_count: int = pydantic.Field(description="Total number of batches")
26
23
 
27
24
  def json(self, **kwargs: typing.Any) -> str:
28
25
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -4,8 +4,9 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
8
- from .extract_config import ExtractConfig
7
+ from .batch import Batch
8
+ from .batch_item import BatchItem
9
+ from .managed_ingestion_status_response import ManagedIngestionStatusResponse
9
10
 
10
11
  try:
11
12
  import pydantic
@@ -16,14 +17,10 @@ except ImportError:
16
17
  import pydantic # type: ignore
17
18
 
18
19
 
19
- class ExtractAgentCreate(pydantic.BaseModel):
20
- """
21
- Settings for creating an extraction agent.
22
- """
23
-
24
- name: str = pydantic.Field(description="The name of the extraction schema")
25
- data_schema: ExtractAgentCreateDataSchema = pydantic.Field(description="The schema of the data.")
26
- config: ExtractConfig = pydantic.Field(description="The configuration parameters for the extraction agent.")
20
+ class BatchPublicOutput(pydantic.BaseModel):
21
+ batch: Batch
22
+ batch_items: typing.List[BatchItem]
23
+ ingestion_status: ManagedIngestionStatusResponse
27
24
 
28
25
  def json(self, **kwargs: typing.Any) -> str:
29
26
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -27,6 +27,7 @@ class CloudConfluenceDataSource(pydantic.BaseModel):
27
27
  cql: typing.Optional[str]
28
28
  label: typing.Optional[str]
29
29
  index_restricted_pages: typing.Optional[bool] = pydantic.Field(description="Whether to index restricted pages.")
30
+ keep_markdown_format: typing.Optional[bool] = pydantic.Field(description="Whether to keep the markdown format.")
30
31
  class_name: typing.Optional[str]
31
32
 
32
33
  def json(self, **kwargs: typing.Any) -> str:
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .pg_vector_hnsw_settings import PgVectorHnswSettings
7
8
 
8
9
  try:
9
10
  import pydantic
@@ -26,6 +27,7 @@ class CloudPostgresVectorStore(pydantic.BaseModel):
26
27
  embed_dim: int
27
28
  hybrid_search: typing.Optional[bool]
28
29
  perform_setup: typing.Optional[bool]
30
+ hnsw_settings: typing.Optional[PgVectorHnswSettings]
29
31
  class_name: typing.Optional[str]
30
32
 
31
33
  def json(self, **kwargs: typing.Any) -> str:
@@ -25,6 +25,7 @@ class CloudSharepointDataSource(pydantic.BaseModel):
25
25
  client_secret: str = pydantic.Field(description="The client secret to use for authentication.")
26
26
  tenant_id: str = pydantic.Field(description="The tenant ID to use for authentication.")
27
27
  required_exts: typing.Optional[typing.List[str]]
28
+ get_permissions: typing.Optional[bool]
28
29
  class_name: typing.Optional[str]
29
30
 
30
31
  def json(self, **kwargs: typing.Any) -> str:
@@ -24,6 +24,8 @@ class ExtractConfig(pydantic.BaseModel):
24
24
  extraction_target: typing.Optional[ExtractTarget] = pydantic.Field(description="The extraction target specified.")
25
25
  extraction_mode: typing.Optional[ExtractMode] = pydantic.Field(description="The extraction mode specified.")
26
26
  system_prompt: typing.Optional[str]
27
+ use_reasoning: typing.Optional[bool] = pydantic.Field(description="Whether to use reasoning for the extraction.")
28
+ cite_sources: typing.Optional[bool] = pydantic.Field(description="Whether to cite sources for the extraction.")
27
29
 
28
30
  def json(self, **kwargs: typing.Any) -> str:
29
31
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -22,8 +22,7 @@ class ExtractJobCreate(pydantic.BaseModel):
22
22
  """
23
23
 
24
24
  extraction_agent_id: str = pydantic.Field(description="The id of the extraction agent")
25
- file_id: typing.Optional[str]
26
- file: typing.Optional[str]
25
+ file_id: str = pydantic.Field(description="The id of the file")
27
26
  data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride] = pydantic.Field(
28
27
  description="The data schema to override the extraction agent's data schema with"
29
28
  )
@@ -0,0 +1,29 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class FailPageMode(str, enum.Enum):
10
+ """
11
+ Enum for representing the different available page error handling modes
12
+ """
13
+
14
+ RAW_TEXT = "raw_text"
15
+ BLANK_PAGE = "blank_page"
16
+ ERROR_MESSAGE = "error_message"
17
+
18
+ def visit(
19
+ self,
20
+ raw_text: typing.Callable[[], T_Result],
21
+ blank_page: typing.Callable[[], T_Result],
22
+ error_message: typing.Callable[[], T_Result],
23
+ ) -> T_Result:
24
+ if self is FailPageMode.RAW_TEXT:
25
+ return raw_text()
26
+ if self is FailPageMode.BLANK_PAGE:
27
+ return blank_page()
28
+ if self is FailPageMode.ERROR_MESSAGE:
29
+ return error_message()
@@ -4,8 +4,6 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_config import ExtractConfig
8
- from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
9
7
 
10
8
  try:
11
9
  import pydantic
@@ -16,17 +14,14 @@ except ImportError:
16
14
  import pydantic # type: ignore
17
15
 
18
16
 
19
- class ExtractJobCreateBatch(pydantic.BaseModel):
20
- """
21
- Schema for creating extraction jobs in batch.
22
- """
23
-
24
- extraction_agent_id: str = pydantic.Field(description="The id of the extraction agent")
25
- file_ids: typing.List[str] = pydantic.Field(description="The ids of the files")
26
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = pydantic.Field(
27
- description="The data schema to override the extraction agent's data schema with"
17
+ class FileCountByStatusResponse(pydantic.BaseModel):
18
+ counts: typing.Dict[str, int] = pydantic.Field(description="The counts of files by status")
19
+ total_count: int = pydantic.Field(description="The total number of files")
20
+ pipeline_id: typing.Optional[str]
21
+ data_source_id: typing.Optional[str]
22
+ only_manually_uploaded: typing.Optional[bool] = pydantic.Field(
23
+ description="Whether to only count manually uploaded files"
28
24
  )
29
- config_override: typing.Optional[ExtractConfig]
30
25
 
31
26
  def json(self, **kwargs: typing.Any) -> str:
32
27
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,36 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class FileParsePublic(pydantic.BaseModel):
18
+ created_at: dt.datetime = pydantic.Field(description="The date and time when the file was parsed.")
19
+ status: str = pydantic.Field(description="The status of the parse task.")
20
+ started_at: typing.Optional[dt.datetime]
21
+ ended_at: typing.Optional[dt.datetime]
22
+ input_path: str = pydantic.Field(description="The path to the input file.")
23
+ data_path: str = pydantic.Field(description="The path to the data file.")
24
+
25
+ def json(self, **kwargs: typing.Any) -> str:
26
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
27
+ return super().json(**kwargs_with_defaults)
28
+
29
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
30
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
31
+ return super().dict(**kwargs_with_defaults)
32
+
33
+ class Config:
34
+ frozen = True
35
+ smart_union = True
36
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -14,7 +14,6 @@ class JobNames(str, enum.Enum):
14
14
  LOAD_DOCUMENTS_JOB = "load_documents_job"
15
15
  LOAD_FILES_JOB = "load_files_job"
16
16
  PLAYGROUND_JOB = "playground_job"
17
- EVAL_DATASET_JOB = "eval_dataset_job"
18
17
  PIPELINE_MANAGED_INGESTION_JOB = "pipeline_managed_ingestion_job"
19
18
  DATA_SOURCE_MANAGED_INGESTION_JOB = "data_source_managed_ingestion_job"
20
19
  DATA_SOURCE_UPDATE_DISPATCHER_JOB = "data_source_update_dispatcher_job"
@@ -22,20 +21,19 @@ class JobNames(str, enum.Enum):
22
21
  PIPELINE_FILE_UPDATER_JOB = "pipeline_file_updater_job"
23
22
  FILE_MANAGED_INGESTION_JOB = "file_managed_ingestion_job"
24
23
  DOCUMENT_INGESTION_JOB = "document_ingestion_job"
25
- PARSE_RAW_FILE_JOB = "parse_raw_file_job"
26
- LLAMA_PARSE_TRANSFORM_JOB = "llama_parse_transform_job"
27
24
  METADATA_UPDATE_JOB = "metadata_update_job"
28
25
  PARSE_RAW_FILE_JOB_CACHED = "parse_raw_file_job_cached"
29
26
  EXTRACTION_JOB = "extraction_job"
30
27
  EXTRACT_JOB = "extract_job"
31
28
  ASYNCIO_TEST_JOB = "asyncio_test_job"
29
+ PARSE_RAW_FILE_JOB = "parse_raw_file_job"
30
+ LLAMA_PARSE_TRANSFORM_JOB = "llama_parse_transform_job"
32
31
 
33
32
  def visit(
34
33
  self,
35
34
  load_documents_job: typing.Callable[[], T_Result],
36
35
  load_files_job: typing.Callable[[], T_Result],
37
36
  playground_job: typing.Callable[[], T_Result],
38
- eval_dataset_job: typing.Callable[[], T_Result],
39
37
  pipeline_managed_ingestion_job: typing.Callable[[], T_Result],
40
38
  data_source_managed_ingestion_job: typing.Callable[[], T_Result],
41
39
  data_source_update_dispatcher_job: typing.Callable[[], T_Result],
@@ -43,13 +41,13 @@ class JobNames(str, enum.Enum):
43
41
  pipeline_file_updater_job: typing.Callable[[], T_Result],
44
42
  file_managed_ingestion_job: typing.Callable[[], T_Result],
45
43
  document_ingestion_job: typing.Callable[[], T_Result],
46
- parse_raw_file_job: typing.Callable[[], T_Result],
47
- llama_parse_transform_job: typing.Callable[[], T_Result],
48
44
  metadata_update_job: typing.Callable[[], T_Result],
49
45
  parse_raw_file_job_cached: typing.Callable[[], T_Result],
50
46
  extraction_job: typing.Callable[[], T_Result],
51
47
  extract_job: typing.Callable[[], T_Result],
52
48
  asyncio_test_job: typing.Callable[[], T_Result],
49
+ parse_raw_file_job: typing.Callable[[], T_Result],
50
+ llama_parse_transform_job: typing.Callable[[], T_Result],
53
51
  ) -> T_Result:
54
52
  if self is JobNames.LOAD_DOCUMENTS_JOB:
55
53
  return load_documents_job()
@@ -57,8 +55,6 @@ class JobNames(str, enum.Enum):
57
55
  return load_files_job()
58
56
  if self is JobNames.PLAYGROUND_JOB:
59
57
  return playground_job()
60
- if self is JobNames.EVAL_DATASET_JOB:
61
- return eval_dataset_job()
62
58
  if self is JobNames.PIPELINE_MANAGED_INGESTION_JOB:
63
59
  return pipeline_managed_ingestion_job()
64
60
  if self is JobNames.DATA_SOURCE_MANAGED_INGESTION_JOB:
@@ -73,10 +69,6 @@ class JobNames(str, enum.Enum):
73
69
  return file_managed_ingestion_job()
74
70
  if self is JobNames.DOCUMENT_INGESTION_JOB:
75
71
  return document_ingestion_job()
76
- if self is JobNames.PARSE_RAW_FILE_JOB:
77
- return parse_raw_file_job()
78
- if self is JobNames.LLAMA_PARSE_TRANSFORM_JOB:
79
- return llama_parse_transform_job()
80
72
  if self is JobNames.METADATA_UPDATE_JOB:
81
73
  return metadata_update_job()
82
74
  if self is JobNames.PARSE_RAW_FILE_JOB_CACHED:
@@ -87,3 +79,7 @@ class JobNames(str, enum.Enum):
87
79
  return extract_job()
88
80
  if self is JobNames.ASYNCIO_TEST_JOB:
89
81
  return asyncio_test_job()
82
+ if self is JobNames.PARSE_RAW_FILE_JOB:
83
+ return parse_raw_file_job()
84
+ if self is JobNames.LLAMA_PARSE_TRANSFORM_JOB:
85
+ return llama_parse_transform_job()
@@ -6,10 +6,20 @@ import typing
6
6
 
7
7
  import typing_extensions
8
8
 
9
+ from .audio_block import AudioBlock
9
10
  from .image_block import ImageBlock
10
11
  from .text_block import TextBlock
11
12
 
12
13
 
14
+ class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio(AudioBlock):
15
+ block_type: typing_extensions.Literal["audio"]
16
+
17
+ class Config:
18
+ frozen = True
19
+ smart_union = True
20
+ allow_population_by_field_name = True
21
+
22
+
13
23
  class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image(ImageBlock):
14
24
  block_type: typing_extensions.Literal["image"]
15
25
 
@@ -29,5 +39,7 @@ class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text(TextBlock):
29
39
 
30
40
 
31
41
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem = typing.Union[
32
- LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image, LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text
42
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio,
43
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image,
44
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text,
33
45
  ]
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .fail_page_mode import FailPageMode
7
8
  from .parser_languages import ParserLanguages
8
9
  from .parsing_mode import ParsingMode
9
10
 
@@ -94,6 +95,7 @@ class LlamaParseParameters(pydantic.BaseModel):
94
95
  strict_mode_image_ocr: typing.Optional[bool]
95
96
  strict_mode_reconstruction: typing.Optional[bool]
96
97
  strict_mode_buggy_font: typing.Optional[bool]
98
+ save_images: typing.Optional[bool]
97
99
  ignore_document_elements_for_layout_detection: typing.Optional[bool]
98
100
  output_tables_as_html: typing.Optional[bool] = pydantic.Field(alias="output_tables_as_HTML")
99
101
  internal_is_screenshot_job: typing.Optional[bool]
@@ -101,6 +103,11 @@ class LlamaParseParameters(pydantic.BaseModel):
101
103
  system_prompt: typing.Optional[str]
102
104
  system_prompt_append: typing.Optional[str]
103
105
  user_prompt: typing.Optional[str]
106
+ page_error_tolerance: typing.Optional[float]
107
+ replace_failed_page_mode: typing.Optional[FailPageMode]
108
+ replace_failed_page_with_error_message_prefix: typing.Optional[str]
109
+ replace_failed_page_with_error_message_suffix: typing.Optional[str]
110
+ markdown_table_multiline_header_separator: typing.Optional[str]
104
111
 
105
112
  def json(self, **kwargs: typing.Any) -> str:
106
113
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -24,6 +24,7 @@ class MarkdownNodeParser(pydantic.BaseModel):
24
24
  Args:
25
25
  include_metadata (bool): whether to include metadata in nodes
26
26
  include_prev_next_rel (bool): whether to include prev/next relationships
27
+ header_path_separator (str): separator char used for section header path metadata
27
28
  """
28
29
 
29
30
  include_metadata: typing.Optional[bool] = pydantic.Field(
@@ -32,6 +33,9 @@ class MarkdownNodeParser(pydantic.BaseModel):
32
33
  include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
33
34
  callback_manager: typing.Optional[typing.Any]
34
35
  id_func: typing.Optional[str]
36
+ header_path_separator: typing.Optional[str] = pydantic.Field(
37
+ description="Separator char used for section header path metadata."
38
+ )
35
39
  class_name: typing.Optional[str]
36
40
 
37
41
  def json(self, **kwargs: typing.Any) -> str:
@@ -12,6 +12,7 @@ class MessageRole(str, enum.Enum):
12
12
  """
13
13
 
14
14
  SYSTEM = "system"
15
+ DEVELOPER = "developer"
15
16
  USER = "user"
16
17
  ASSISTANT = "assistant"
17
18
  FUNCTION = "function"
@@ -22,6 +23,7 @@ class MessageRole(str, enum.Enum):
22
23
  def visit(
23
24
  self,
24
25
  system: typing.Callable[[], T_Result],
26
+ developer: typing.Callable[[], T_Result],
25
27
  user: typing.Callable[[], T_Result],
26
28
  assistant: typing.Callable[[], T_Result],
27
29
  function: typing.Callable[[], T_Result],
@@ -31,6 +33,8 @@ class MessageRole(str, enum.Enum):
31
33
  ) -> T_Result:
32
34
  if self is MessageRole.SYSTEM:
33
35
  return system()
36
+ if self is MessageRole.DEVELOPER:
37
+ return developer()
34
38
  if self is MessageRole.USER:
35
39
  return user()
36
40
  if self is MessageRole.ASSISTANT:
@@ -0,0 +1,43 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class PgVectorDistanceMethod(str, enum.Enum):
10
+ """
11
+ Distance methods for PGVector.
12
+ Docs:
13
+ https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
14
+ """
15
+
16
+ L_2 = "l2"
17
+ IP = "ip"
18
+ COSINE = "cosine"
19
+ L_1 = "l1"
20
+ HAMMING = "hamming"
21
+ JACCARD = "jaccard"
22
+
23
+ def visit(
24
+ self,
25
+ l_2: typing.Callable[[], T_Result],
26
+ ip: typing.Callable[[], T_Result],
27
+ cosine: typing.Callable[[], T_Result],
28
+ l_1: typing.Callable[[], T_Result],
29
+ hamming: typing.Callable[[], T_Result],
30
+ jaccard: typing.Callable[[], T_Result],
31
+ ) -> T_Result:
32
+ if self is PgVectorDistanceMethod.L_2:
33
+ return l_2()
34
+ if self is PgVectorDistanceMethod.IP:
35
+ return ip()
36
+ if self is PgVectorDistanceMethod.COSINE:
37
+ return cosine()
38
+ if self is PgVectorDistanceMethod.L_1:
39
+ return l_1()
40
+ if self is PgVectorDistanceMethod.HAMMING:
41
+ return hamming()
42
+ if self is PgVectorDistanceMethod.JACCARD:
43
+ return jaccard()
@@ -0,0 +1,45 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .pg_vector_distance_method import PgVectorDistanceMethod
8
+ from .pg_vector_vector_type import PgVectorVectorType
9
+
10
+ try:
11
+ import pydantic
12
+ if pydantic.__version__.startswith("1."):
13
+ raise ImportError
14
+ import pydantic.v1 as pydantic # type: ignore
15
+ except ImportError:
16
+ import pydantic # type: ignore
17
+
18
+
19
+ class PgVectorHnswSettings(pydantic.BaseModel):
20
+ """
21
+ HNSW settings for PGVector.
22
+ """
23
+
24
+ ef_construction: typing.Optional[int] = pydantic.Field(
25
+ description="The number of edges to use during the construction phase."
26
+ )
27
+ ef_search: typing.Optional[int] = pydantic.Field(description="The number of edges to use during the search phase.")
28
+ m: typing.Optional[int] = pydantic.Field(
29
+ description="The number of bi-directional links created for each new element."
30
+ )
31
+ vector_type: typing.Optional[PgVectorVectorType] = pydantic.Field(description="The type of vector to use.")
32
+ distance_method: typing.Optional[PgVectorDistanceMethod] = pydantic.Field(description="The distance method to use.")
33
+
34
+ def json(self, **kwargs: typing.Any) -> str:
35
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
36
+ return super().json(**kwargs_with_defaults)
37
+
38
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
39
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
40
+ return super().dict(**kwargs_with_defaults)
41
+
42
+ class Config:
43
+ frozen = True
44
+ smart_union = True
45
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,35 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class PgVectorVectorType(str, enum.Enum):
10
+ """
11
+ Vector storage formats for PGVector.
12
+ Docs:
13
+ https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
14
+ """
15
+
16
+ VECTOR = "vector"
17
+ HALF_VEC = "half_vec"
18
+ BIT = "bit"
19
+ SPARSE_VEC = "sparse_vec"
20
+
21
+ def visit(
22
+ self,
23
+ vector: typing.Callable[[], T_Result],
24
+ half_vec: typing.Callable[[], T_Result],
25
+ bit: typing.Callable[[], T_Result],
26
+ sparse_vec: typing.Callable[[], T_Result],
27
+ ) -> T_Result:
28
+ if self is PgVectorVectorType.VECTOR:
29
+ return vector()
30
+ if self is PgVectorVectorType.HALF_VEC:
31
+ return half_vec()
32
+ if self is PgVectorVectorType.BIT:
33
+ return bit()
34
+ if self is PgVectorVectorType.SPARSE_VEC:
35
+ return sparse_vec()
@@ -44,6 +44,7 @@ class PipelineCreate(pydantic.BaseModel):
44
44
  llama_parse_parameters: typing.Optional[LlamaParseParameters] = pydantic.Field(
45
45
  description="Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline."
46
46
  )
47
+ status: typing.Optional[str]
47
48
  name: str
48
49
  pipeline_type: typing.Optional[PipelineType] = pydantic.Field(
49
50
  description="Type of pipeline. Either PLAYGROUND or MANAGED."
@@ -7,6 +7,7 @@ from ..core.datetime_utils import serialize_datetime
7
7
  from .configurable_data_source_names import ConfigurableDataSourceNames
8
8
  from .pipeline_data_source_component import PipelineDataSourceComponent
9
9
  from .pipeline_data_source_custom_metadata_value import PipelineDataSourceCustomMetadataValue
10
+ from .pipeline_data_source_status import PipelineDataSourceStatus
10
11
 
11
12
  try:
12
13
  import pydantic
@@ -36,6 +37,8 @@ class PipelineDataSource(pydantic.BaseModel):
36
37
  last_synced_at: dt.datetime = pydantic.Field(description="The last time the data source was automatically synced.")
37
38
  sync_interval: typing.Optional[float]
38
39
  sync_schedule_set_by: typing.Optional[str]
40
+ status: typing.Optional[PipelineDataSourceStatus]
41
+ status_updated_at: typing.Optional[dt.datetime]
39
42
 
40
43
  def json(self, **kwargs: typing.Any) -> str:
41
44
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,33 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class PipelineDataSourceStatus(str, enum.Enum):
10
+ NOT_STARTED = "NOT_STARTED"
11
+ IN_PROGRESS = "IN_PROGRESS"
12
+ SUCCESS = "SUCCESS"
13
+ ERROR = "ERROR"
14
+ CANCELLED = "CANCELLED"
15
+
16
+ def visit(
17
+ self,
18
+ not_started: typing.Callable[[], T_Result],
19
+ in_progress: typing.Callable[[], T_Result],
20
+ success: typing.Callable[[], T_Result],
21
+ error: typing.Callable[[], T_Result],
22
+ cancelled: typing.Callable[[], T_Result],
23
+ ) -> T_Result:
24
+ if self is PipelineDataSourceStatus.NOT_STARTED:
25
+ return not_started()
26
+ if self is PipelineDataSourceStatus.IN_PROGRESS:
27
+ return in_progress()
28
+ if self is PipelineDataSourceStatus.SUCCESS:
29
+ return success()
30
+ if self is PipelineDataSourceStatus.ERROR:
31
+ return error()
32
+ if self is PipelineDataSourceStatus.CANCELLED:
33
+ return cancelled()
@@ -42,6 +42,7 @@ class PipelineFile(pydantic.BaseModel):
42
42
  config_hash: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileConfigHashValue]]]
43
43
  indexed_page_count: typing.Optional[int]
44
44
  status: typing.Optional[PipelineFileStatus]
45
+ status_updated_at: typing.Optional[dt.datetime]
45
46
 
46
47
  def json(self, **kwargs: typing.Any) -> str:
47
48
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -19,6 +19,9 @@ class PromptConf(pydantic.BaseModel):
19
19
  extraction_prompt: typing.Optional[str] = pydantic.Field(description="The prompt to use for the extraction.")
20
20
  error_handling_prompt: typing.Optional[str] = pydantic.Field(description="The prompt to use for error handling.")
21
21
  reasoning_prompt: typing.Optional[str] = pydantic.Field(description="The prompt to use for reasoning.")
22
+ cite_sources_prompt: typing.Optional[typing.Dict[str, str]] = pydantic.Field(
23
+ description="The prompt to use for citing sources."
24
+ )
22
25
 
23
26
  def json(self, **kwargs: typing.Any) -> str:
24
27
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -36,7 +36,10 @@ class StructParseConf(pydantic.BaseModel):
36
36
  description="Whether to handle missing fields in the schema."
37
37
  )
38
38
  use_reasoning: typing.Optional[bool] = pydantic.Field(
39
- description="Whether to use reasoning for the structured parsing."
39
+ description="Whether to use reasoning for the structured extraction."
40
+ )
41
+ cite_sources: typing.Optional[bool] = pydantic.Field(
42
+ description="Whether to cite sources for the structured extraction."
40
43
  )
41
44
  prompt_conf: typing.Optional[PromptConf] = pydantic.Field(
42
45
  description="The prompt configuration for the structured parsing."