llama-cloud 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (52) hide show
  1. llama_cloud/__init__.py +36 -16
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +20 -0
  4. llama_cloud/resources/beta/__init__.py +2 -0
  5. llama_cloud/resources/beta/client.py +371 -0
  6. llama_cloud/resources/embedding_model_configs/client.py +82 -22
  7. llama_cloud/resources/llama_extract/__init__.py +21 -0
  8. llama_cloud/resources/llama_extract/client.py +227 -114
  9. llama_cloud/resources/llama_extract/types/__init__.py +21 -0
  10. llama_cloud/resources/parsing/client.py +115 -4
  11. llama_cloud/resources/pipelines/client.py +105 -0
  12. llama_cloud/types/__init__.py +26 -24
  13. llama_cloud/types/{extract_schema_validate_request.py → audio_block.py} +5 -3
  14. llama_cloud/types/batch.py +47 -0
  15. llama_cloud/types/batch_item.py +40 -0
  16. llama_cloud/types/{extract_agent_update.py → batch_paginated_list.py} +6 -9
  17. llama_cloud/types/{extract_agent_create.py → batch_public_output.py} +7 -10
  18. llama_cloud/types/cloud_confluence_data_source.py +1 -0
  19. llama_cloud/types/cloud_postgres_vector_store.py +2 -0
  20. llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
  21. llama_cloud/types/extract_config.py +2 -0
  22. llama_cloud/types/extract_job_create.py +1 -2
  23. llama_cloud/types/fail_page_mode.py +29 -0
  24. llama_cloud/types/{extract_job_create_batch.py → file_count_by_status_response.py} +7 -12
  25. llama_cloud/types/file_parse_public.py +36 -0
  26. llama_cloud/types/job_names.py +8 -12
  27. llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
  28. llama_cloud/types/llama_parse_parameters.py +7 -0
  29. llama_cloud/types/markdown_node_parser.py +4 -0
  30. llama_cloud/types/message_role.py +4 -0
  31. llama_cloud/types/pg_vector_distance_method.py +43 -0
  32. llama_cloud/types/pg_vector_hnsw_settings.py +45 -0
  33. llama_cloud/types/pg_vector_vector_type.py +35 -0
  34. llama_cloud/types/pipeline_create.py +1 -0
  35. llama_cloud/types/pipeline_data_source.py +3 -0
  36. llama_cloud/types/pipeline_data_source_status.py +33 -0
  37. llama_cloud/types/pipeline_file.py +1 -0
  38. llama_cloud/types/prompt_conf.py +3 -0
  39. llama_cloud/types/struct_parse_conf.py +4 -1
  40. llama_cloud/types/token_text_splitter.py +3 -0
  41. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/METADATA +1 -1
  42. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/RECORD +52 -41
  43. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
  44. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
  45. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
  46. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
  47. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
  48. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
  49. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
  50. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
  51. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/LICENSE +0 -0
  52. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/WHEEL +0 -0
@@ -9,6 +9,7 @@ from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
9
9
  from ...core.jsonable_encoder import jsonable_encoder
10
10
  from ...core.remove_none_from_dict import remove_none_from_dict
11
11
  from ...errors.unprocessable_entity_error import UnprocessableEntityError
12
+ from ...types.fail_page_mode import FailPageMode
12
13
  from ...types.http_validation_error import HttpValidationError
13
14
  from ...types.llama_parse_supported_file_extensions import LlamaParseSupportedFileExtensions
14
15
  from ...types.parser_languages import ParserLanguages
@@ -250,6 +251,10 @@ class ParsingClient:
250
251
  webhook_url: str,
251
252
  preset: str,
252
253
  parse_mode: typing.Optional[ParsingMode] = OMIT,
254
+ page_error_tolerance: float,
255
+ replace_failed_page_mode: typing.Optional[FailPageMode] = OMIT,
256
+ replace_failed_page_with_error_message_prefix: str,
257
+ replace_failed_page_with_error_message_suffix: str,
253
258
  system_prompt: str,
254
259
  system_prompt_append: str,
255
260
  user_prompt: str,
@@ -259,8 +264,10 @@ class ParsingClient:
259
264
  strict_mode_image_ocr: bool,
260
265
  strict_mode_reconstruction: bool,
261
266
  strict_mode_buggy_font: bool,
267
+ save_images: bool,
262
268
  ignore_document_elements_for_layout_detection: bool,
263
269
  output_tables_as_html: bool,
270
+ markdown_table_multiline_header_separator: str,
264
271
  use_vendor_multimodal_model: bool,
265
272
  bounding_box: str,
266
273
  gpt_4_o_mode: bool,
@@ -275,8 +282,6 @@ class ParsingClient:
275
282
  formatting_instruction: str,
276
283
  ) -> ParsingJob:
277
284
  """
278
- Upload a file to s3 and create a job. return a job id
279
-
280
285
  Parameters:
281
286
  - organization_id: typing.Optional[str].
282
287
 
@@ -392,6 +397,14 @@ class ParsingClient:
392
397
 
393
398
  - parse_mode: typing.Optional[ParsingMode].
394
399
 
400
+ - page_error_tolerance: float.
401
+
402
+ - replace_failed_page_mode: typing.Optional[FailPageMode].
403
+
404
+ - replace_failed_page_with_error_message_prefix: str.
405
+
406
+ - replace_failed_page_with_error_message_suffix: str.
407
+
395
408
  - system_prompt: str.
396
409
 
397
410
  - system_prompt_append: str.
@@ -410,10 +423,14 @@ class ParsingClient:
410
423
 
411
424
  - strict_mode_buggy_font: bool.
412
425
 
426
+ - save_images: bool.
427
+
413
428
  - ignore_document_elements_for_layout_detection: bool.
414
429
 
415
430
  - output_tables_as_html: bool.
416
431
 
432
+ - markdown_table_multiline_header_separator: str.
433
+
417
434
  - use_vendor_multimodal_model: bool.
418
435
 
419
436
  - bounding_box: str.
@@ -491,6 +508,9 @@ class ParsingClient:
491
508
  "model": model,
492
509
  "webhook_url": webhook_url,
493
510
  "preset": preset,
511
+ "page_error_tolerance": page_error_tolerance,
512
+ "replace_failed_page_with_error_message_prefix": replace_failed_page_with_error_message_prefix,
513
+ "replace_failed_page_with_error_message_suffix": replace_failed_page_with_error_message_suffix,
494
514
  "system_prompt": system_prompt,
495
515
  "system_prompt_append": system_prompt_append,
496
516
  "user_prompt": user_prompt,
@@ -500,8 +520,10 @@ class ParsingClient:
500
520
  "strict_mode_image_ocr": strict_mode_image_ocr,
501
521
  "strict_mode_reconstruction": strict_mode_reconstruction,
502
522
  "strict_mode_buggy_font": strict_mode_buggy_font,
523
+ "save_images": save_images,
503
524
  "ignore_document_elements_for_layout_detection": ignore_document_elements_for_layout_detection,
504
525
  "output_tables_as_HTML": output_tables_as_html,
526
+ "markdown_table_multiline_header_separator": markdown_table_multiline_header_separator,
505
527
  "use_vendor_multimodal_model": use_vendor_multimodal_model,
506
528
  "bounding_box": bounding_box,
507
529
  "gpt4o_mode": gpt_4_o_mode,
@@ -521,6 +543,8 @@ class ParsingClient:
521
543
  _request["max_pages"] = max_pages
522
544
  if parse_mode is not OMIT:
523
545
  _request["parse_mode"] = parse_mode
546
+ if replace_failed_page_mode is not OMIT:
547
+ _request["replace_failed_page_mode"] = replace_failed_page_mode
524
548
  _response = self._client_wrapper.httpx_client.request(
525
549
  "POST",
526
550
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
@@ -571,6 +595,38 @@ class ParsingClient:
571
595
  raise ApiError(status_code=_response.status_code, body=_response.text)
572
596
  raise ApiError(status_code=_response.status_code, body=_response_json)
573
597
 
598
+ def get_job_parameters(self, job_id: str) -> typing.Any:
599
+ """
600
+ Get a job by id
601
+
602
+ Parameters:
603
+ - job_id: str.
604
+ ---
605
+ from llama_cloud.client import LlamaCloud
606
+
607
+ client = LlamaCloud(
608
+ token="YOUR_TOKEN",
609
+ )
610
+ client.parsing.get_job_parameters(
611
+ job_id="string",
612
+ )
613
+ """
614
+ _response = self._client_wrapper.httpx_client.request(
615
+ "GET",
616
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/parameters"),
617
+ headers=self._client_wrapper.get_headers(),
618
+ timeout=60,
619
+ )
620
+ if 200 <= _response.status_code < 300:
621
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
622
+ if _response.status_code == 422:
623
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
624
+ try:
625
+ _response_json = _response.json()
626
+ except JSONDecodeError:
627
+ raise ApiError(status_code=_response.status_code, body=_response.text)
628
+ raise ApiError(status_code=_response.status_code, body=_response_json)
629
+
574
630
  def get_parsing_job_details(self, job_id: str) -> typing.Any:
575
631
  """
576
632
  Get a job by id
@@ -1202,6 +1258,10 @@ class AsyncParsingClient:
1202
1258
  webhook_url: str,
1203
1259
  preset: str,
1204
1260
  parse_mode: typing.Optional[ParsingMode] = OMIT,
1261
+ page_error_tolerance: float,
1262
+ replace_failed_page_mode: typing.Optional[FailPageMode] = OMIT,
1263
+ replace_failed_page_with_error_message_prefix: str,
1264
+ replace_failed_page_with_error_message_suffix: str,
1205
1265
  system_prompt: str,
1206
1266
  system_prompt_append: str,
1207
1267
  user_prompt: str,
@@ -1211,8 +1271,10 @@ class AsyncParsingClient:
1211
1271
  strict_mode_image_ocr: bool,
1212
1272
  strict_mode_reconstruction: bool,
1213
1273
  strict_mode_buggy_font: bool,
1274
+ save_images: bool,
1214
1275
  ignore_document_elements_for_layout_detection: bool,
1215
1276
  output_tables_as_html: bool,
1277
+ markdown_table_multiline_header_separator: str,
1216
1278
  use_vendor_multimodal_model: bool,
1217
1279
  bounding_box: str,
1218
1280
  gpt_4_o_mode: bool,
@@ -1227,8 +1289,6 @@ class AsyncParsingClient:
1227
1289
  formatting_instruction: str,
1228
1290
  ) -> ParsingJob:
1229
1291
  """
1230
- Upload a file to s3 and create a job. return a job id
1231
-
1232
1292
  Parameters:
1233
1293
  - organization_id: typing.Optional[str].
1234
1294
 
@@ -1344,6 +1404,14 @@ class AsyncParsingClient:
1344
1404
 
1345
1405
  - parse_mode: typing.Optional[ParsingMode].
1346
1406
 
1407
+ - page_error_tolerance: float.
1408
+
1409
+ - replace_failed_page_mode: typing.Optional[FailPageMode].
1410
+
1411
+ - replace_failed_page_with_error_message_prefix: str.
1412
+
1413
+ - replace_failed_page_with_error_message_suffix: str.
1414
+
1347
1415
  - system_prompt: str.
1348
1416
 
1349
1417
  - system_prompt_append: str.
@@ -1362,10 +1430,14 @@ class AsyncParsingClient:
1362
1430
 
1363
1431
  - strict_mode_buggy_font: bool.
1364
1432
 
1433
+ - save_images: bool.
1434
+
1365
1435
  - ignore_document_elements_for_layout_detection: bool.
1366
1436
 
1367
1437
  - output_tables_as_html: bool.
1368
1438
 
1439
+ - markdown_table_multiline_header_separator: str.
1440
+
1369
1441
  - use_vendor_multimodal_model: bool.
1370
1442
 
1371
1443
  - bounding_box: str.
@@ -1443,6 +1515,9 @@ class AsyncParsingClient:
1443
1515
  "model": model,
1444
1516
  "webhook_url": webhook_url,
1445
1517
  "preset": preset,
1518
+ "page_error_tolerance": page_error_tolerance,
1519
+ "replace_failed_page_with_error_message_prefix": replace_failed_page_with_error_message_prefix,
1520
+ "replace_failed_page_with_error_message_suffix": replace_failed_page_with_error_message_suffix,
1446
1521
  "system_prompt": system_prompt,
1447
1522
  "system_prompt_append": system_prompt_append,
1448
1523
  "user_prompt": user_prompt,
@@ -1452,8 +1527,10 @@ class AsyncParsingClient:
1452
1527
  "strict_mode_image_ocr": strict_mode_image_ocr,
1453
1528
  "strict_mode_reconstruction": strict_mode_reconstruction,
1454
1529
  "strict_mode_buggy_font": strict_mode_buggy_font,
1530
+ "save_images": save_images,
1455
1531
  "ignore_document_elements_for_layout_detection": ignore_document_elements_for_layout_detection,
1456
1532
  "output_tables_as_HTML": output_tables_as_html,
1533
+ "markdown_table_multiline_header_separator": markdown_table_multiline_header_separator,
1457
1534
  "use_vendor_multimodal_model": use_vendor_multimodal_model,
1458
1535
  "bounding_box": bounding_box,
1459
1536
  "gpt4o_mode": gpt_4_o_mode,
@@ -1473,6 +1550,8 @@ class AsyncParsingClient:
1473
1550
  _request["max_pages"] = max_pages
1474
1551
  if parse_mode is not OMIT:
1475
1552
  _request["parse_mode"] = parse_mode
1553
+ if replace_failed_page_mode is not OMIT:
1554
+ _request["replace_failed_page_mode"] = replace_failed_page_mode
1476
1555
  _response = await self._client_wrapper.httpx_client.request(
1477
1556
  "POST",
1478
1557
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
@@ -1523,6 +1602,38 @@ class AsyncParsingClient:
1523
1602
  raise ApiError(status_code=_response.status_code, body=_response.text)
1524
1603
  raise ApiError(status_code=_response.status_code, body=_response_json)
1525
1604
 
1605
+ async def get_job_parameters(self, job_id: str) -> typing.Any:
1606
+ """
1607
+ Get a job by id
1608
+
1609
+ Parameters:
1610
+ - job_id: str.
1611
+ ---
1612
+ from llama_cloud.client import AsyncLlamaCloud
1613
+
1614
+ client = AsyncLlamaCloud(
1615
+ token="YOUR_TOKEN",
1616
+ )
1617
+ await client.parsing.get_job_parameters(
1618
+ job_id="string",
1619
+ )
1620
+ """
1621
+ _response = await self._client_wrapper.httpx_client.request(
1622
+ "GET",
1623
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/parameters"),
1624
+ headers=self._client_wrapper.get_headers(),
1625
+ timeout=60,
1626
+ )
1627
+ if 200 <= _response.status_code < 300:
1628
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
1629
+ if _response.status_code == 422:
1630
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1631
+ try:
1632
+ _response_json = _response.json()
1633
+ except JSONDecodeError:
1634
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1635
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1636
+
1526
1637
  async def get_parsing_job_details(self, job_id: str) -> typing.Any:
1527
1638
  """
1528
1639
  Get a job by id
@@ -15,6 +15,7 @@ from ...types.cloud_document_create import CloudDocumentCreate
15
15
  from ...types.configured_transformation_item import ConfiguredTransformationItem
16
16
  from ...types.data_sink_create import DataSinkCreate
17
17
  from ...types.eval_execution_params import EvalExecutionParams
18
+ from ...types.file_count_by_status_response import FileCountByStatusResponse
18
19
  from ...types.http_validation_error import HttpValidationError
19
20
  from ...types.input_message import InputMessage
20
21
  from ...types.llama_parse_parameters import LlamaParseParameters
@@ -216,6 +217,7 @@ class PipelinesClient:
216
217
  preset_retrieval_parameters: typing.Optional[PresetRetrievalParams] = OMIT,
217
218
  eval_parameters: typing.Optional[EvalExecutionParams] = OMIT,
218
219
  llama_parse_parameters: typing.Optional[LlamaParseParameters] = OMIT,
220
+ status: typing.Optional[str] = OMIT,
219
221
  name: typing.Optional[str] = OMIT,
220
222
  managed_pipeline_id: typing.Optional[str] = OMIT,
221
223
  ) -> Pipeline:
@@ -243,6 +245,8 @@ class PipelinesClient:
243
245
 
244
246
  - llama_parse_parameters: typing.Optional[LlamaParseParameters].
245
247
 
248
+ - status: typing.Optional[str].
249
+
246
250
  - name: typing.Optional[str].
247
251
 
248
252
  - managed_pipeline_id: typing.Optional[str].
@@ -266,6 +270,8 @@ class PipelinesClient:
266
270
  _request["eval_parameters"] = eval_parameters
267
271
  if llama_parse_parameters is not OMIT:
268
272
  _request["llama_parse_parameters"] = llama_parse_parameters
273
+ if status is not OMIT:
274
+ _request["status"] = status
269
275
  if name is not OMIT:
270
276
  _request["name"] = name
271
277
  if managed_pipeline_id is not OMIT:
@@ -559,6 +565,53 @@ class PipelinesClient:
559
565
  raise ApiError(status_code=_response.status_code, body=_response.text)
560
566
  raise ApiError(status_code=_response.status_code, body=_response_json)
561
567
 
568
+ def get_pipeline_file_status_counts(
569
+ self,
570
+ pipeline_id: str,
571
+ *,
572
+ data_source_id: typing.Optional[str] = None,
573
+ only_manually_uploaded: typing.Optional[bool] = None,
574
+ ) -> FileCountByStatusResponse:
575
+ """
576
+ Get files for a pipeline.
577
+
578
+ Parameters:
579
+ - pipeline_id: str.
580
+
581
+ - data_source_id: typing.Optional[str].
582
+
583
+ - only_manually_uploaded: typing.Optional[bool].
584
+ ---
585
+ from llama_cloud.client import LlamaCloud
586
+
587
+ client = LlamaCloud(
588
+ token="YOUR_TOKEN",
589
+ )
590
+ client.pipelines.get_pipeline_file_status_counts(
591
+ pipeline_id="string",
592
+ )
593
+ """
594
+ _response = self._client_wrapper.httpx_client.request(
595
+ "GET",
596
+ urllib.parse.urljoin(
597
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/files/status-counts"
598
+ ),
599
+ params=remove_none_from_dict(
600
+ {"data_source_id": data_source_id, "only_manually_uploaded": only_manually_uploaded}
601
+ ),
602
+ headers=self._client_wrapper.get_headers(),
603
+ timeout=60,
604
+ )
605
+ if 200 <= _response.status_code < 300:
606
+ return pydantic.parse_obj_as(FileCountByStatusResponse, _response.json()) # type: ignore
607
+ if _response.status_code == 422:
608
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
609
+ try:
610
+ _response_json = _response.json()
611
+ except JSONDecodeError:
612
+ raise ApiError(status_code=_response.status_code, body=_response.text)
613
+ raise ApiError(status_code=_response.status_code, body=_response_json)
614
+
562
615
  def get_pipeline_file_status(self, file_id: str, pipeline_id: str) -> ManagedIngestionStatusResponse:
563
616
  """
564
617
  Get status of a file for a pipeline.
@@ -1771,6 +1824,7 @@ class AsyncPipelinesClient:
1771
1824
  preset_retrieval_parameters: typing.Optional[PresetRetrievalParams] = OMIT,
1772
1825
  eval_parameters: typing.Optional[EvalExecutionParams] = OMIT,
1773
1826
  llama_parse_parameters: typing.Optional[LlamaParseParameters] = OMIT,
1827
+ status: typing.Optional[str] = OMIT,
1774
1828
  name: typing.Optional[str] = OMIT,
1775
1829
  managed_pipeline_id: typing.Optional[str] = OMIT,
1776
1830
  ) -> Pipeline:
@@ -1798,6 +1852,8 @@ class AsyncPipelinesClient:
1798
1852
 
1799
1853
  - llama_parse_parameters: typing.Optional[LlamaParseParameters].
1800
1854
 
1855
+ - status: typing.Optional[str].
1856
+
1801
1857
  - name: typing.Optional[str].
1802
1858
 
1803
1859
  - managed_pipeline_id: typing.Optional[str].
@@ -1821,6 +1877,8 @@ class AsyncPipelinesClient:
1821
1877
  _request["eval_parameters"] = eval_parameters
1822
1878
  if llama_parse_parameters is not OMIT:
1823
1879
  _request["llama_parse_parameters"] = llama_parse_parameters
1880
+ if status is not OMIT:
1881
+ _request["status"] = status
1824
1882
  if name is not OMIT:
1825
1883
  _request["name"] = name
1826
1884
  if managed_pipeline_id is not OMIT:
@@ -2114,6 +2172,53 @@ class AsyncPipelinesClient:
2114
2172
  raise ApiError(status_code=_response.status_code, body=_response.text)
2115
2173
  raise ApiError(status_code=_response.status_code, body=_response_json)
2116
2174
 
2175
+ async def get_pipeline_file_status_counts(
2176
+ self,
2177
+ pipeline_id: str,
2178
+ *,
2179
+ data_source_id: typing.Optional[str] = None,
2180
+ only_manually_uploaded: typing.Optional[bool] = None,
2181
+ ) -> FileCountByStatusResponse:
2182
+ """
2183
+ Get files for a pipeline.
2184
+
2185
+ Parameters:
2186
+ - pipeline_id: str.
2187
+
2188
+ - data_source_id: typing.Optional[str].
2189
+
2190
+ - only_manually_uploaded: typing.Optional[bool].
2191
+ ---
2192
+ from llama_cloud.client import AsyncLlamaCloud
2193
+
2194
+ client = AsyncLlamaCloud(
2195
+ token="YOUR_TOKEN",
2196
+ )
2197
+ await client.pipelines.get_pipeline_file_status_counts(
2198
+ pipeline_id="string",
2199
+ )
2200
+ """
2201
+ _response = await self._client_wrapper.httpx_client.request(
2202
+ "GET",
2203
+ urllib.parse.urljoin(
2204
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/files/status-counts"
2205
+ ),
2206
+ params=remove_none_from_dict(
2207
+ {"data_source_id": data_source_id, "only_manually_uploaded": only_manually_uploaded}
2208
+ ),
2209
+ headers=self._client_wrapper.get_headers(),
2210
+ timeout=60,
2211
+ )
2212
+ if 200 <= _response.status_code < 300:
2213
+ return pydantic.parse_obj_as(FileCountByStatusResponse, _response.json()) # type: ignore
2214
+ if _response.status_code == 422:
2215
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
2216
+ try:
2217
+ _response_json = _response.json()
2218
+ except JSONDecodeError:
2219
+ raise ApiError(status_code=_response.status_code, body=_response.text)
2220
+ raise ApiError(status_code=_response.status_code, body=_response_json)
2221
+
2117
2222
  async def get_pipeline_file_status(self, file_id: str, pipeline_id: str) -> ManagedIngestionStatusResponse:
2118
2223
  """
2119
2224
  Get status of a file for a pipeline.
@@ -16,6 +16,7 @@ from .advanced_mode_transform_config_segmentation_config import (
16
16
  AdvancedModeTransformConfigSegmentationConfig_Page,
17
17
  )
18
18
  from .app_schema_chat_chat_message import AppSchemaChatChatMessage
19
+ from .audio_block import AudioBlock
19
20
  from .auto_transform_config import AutoTransformConfig
20
21
  from .azure_open_ai_embedding import AzureOpenAiEmbedding
21
22
  from .azure_open_ai_embedding_config import AzureOpenAiEmbeddingConfig
@@ -24,6 +25,10 @@ from .base_plan_metronome_plan_type import BasePlanMetronomePlanType
24
25
  from .base_plan_name import BasePlanName
25
26
  from .base_plan_plan_frequency import BasePlanPlanFrequency
26
27
  from .base_prompt_template import BasePromptTemplate
28
+ from .batch import Batch
29
+ from .batch_item import BatchItem
30
+ from .batch_paginated_list import BatchPaginatedList
31
+ from .batch_public_output import BatchPublicOutput
27
32
  from .bedrock_embedding import BedrockEmbedding
28
33
  from .bedrock_embedding_config import BedrockEmbeddingConfig
29
34
  from .billing_period import BillingPeriod
@@ -105,19 +110,10 @@ from .embedding_model_config_update_embedding_config import (
105
110
  )
106
111
  from .eval_execution_params import EvalExecutionParams
107
112
  from .extract_agent import ExtractAgent
108
- from .extract_agent_create import ExtractAgentCreate
109
- from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
110
- from .extract_agent_create_data_schema_zero_value import ExtractAgentCreateDataSchemaZeroValue
111
113
  from .extract_agent_data_schema_value import ExtractAgentDataSchemaValue
112
- from .extract_agent_update import ExtractAgentUpdate
113
- from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
114
- from .extract_agent_update_data_schema_zero_value import ExtractAgentUpdateDataSchemaZeroValue
115
114
  from .extract_config import ExtractConfig
116
115
  from .extract_job import ExtractJob
117
116
  from .extract_job_create import ExtractJobCreate
118
- from .extract_job_create_batch import ExtractJobCreateBatch
119
- from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
120
- from .extract_job_create_batch_data_schema_override_zero_value import ExtractJobCreateBatchDataSchemaOverrideZeroValue
121
117
  from .extract_job_create_data_schema_override import ExtractJobCreateDataSchemaOverride
122
118
  from .extract_job_create_data_schema_override_zero_value import ExtractJobCreateDataSchemaOverrideZeroValue
123
119
  from .extract_mode import ExtractMode
@@ -132,14 +128,14 @@ from .extract_run_data_item_value import ExtractRunDataItemValue
132
128
  from .extract_run_data_schema_value import ExtractRunDataSchemaValue
133
129
  from .extract_run_data_zero_value import ExtractRunDataZeroValue
134
130
  from .extract_run_extraction_metadata_value import ExtractRunExtractionMetadataValue
135
- from .extract_schema_validate_request import ExtractSchemaValidateRequest
136
- from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
137
- from .extract_schema_validate_request_data_schema_zero_value import ExtractSchemaValidateRequestDataSchemaZeroValue
138
131
  from .extract_schema_validate_response import ExtractSchemaValidateResponse
139
132
  from .extract_schema_validate_response_data_schema_value import ExtractSchemaValidateResponseDataSchemaValue
140
133
  from .extract_state import ExtractState
141
134
  from .extract_target import ExtractTarget
135
+ from .fail_page_mode import FailPageMode
142
136
  from .file import File
137
+ from .file_count_by_status_response import FileCountByStatusResponse
138
+ from .file_parse_public import FileParsePublic
143
139
  from .file_permission_info_value import FilePermissionInfoValue
144
140
  from .file_resource_info_value import FileResourceInfoValue
145
141
  from .filter_condition import FilterCondition
@@ -162,6 +158,7 @@ from .llama_extract_settings import LlamaExtractSettings
162
158
  from .llama_index_core_base_llms_types_chat_message import LlamaIndexCoreBaseLlmsTypesChatMessage
163
159
  from .llama_index_core_base_llms_types_chat_message_blocks_item import (
164
160
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem,
161
+ LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio,
165
162
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image,
166
163
  LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text,
167
164
  )
@@ -210,6 +207,9 @@ from .parsing_job_text_result import ParsingJobTextResult
210
207
  from .parsing_mode import ParsingMode
211
208
  from .partition_names import PartitionNames
212
209
  from .permission import Permission
210
+ from .pg_vector_distance_method import PgVectorDistanceMethod
211
+ from .pg_vector_hnsw_settings import PgVectorHnswSettings
212
+ from .pg_vector_vector_type import PgVectorVectorType
213
213
  from .pipeline import Pipeline
214
214
  from .pipeline_configuration_hashes import PipelineConfigurationHashes
215
215
  from .pipeline_create import PipelineCreate
@@ -228,6 +228,7 @@ from .pipeline_data_source import PipelineDataSource
228
228
  from .pipeline_data_source_component import PipelineDataSourceComponent
229
229
  from .pipeline_data_source_create import PipelineDataSourceCreate
230
230
  from .pipeline_data_source_custom_metadata_value import PipelineDataSourceCustomMetadataValue
231
+ from .pipeline_data_source_status import PipelineDataSourceStatus
231
232
  from .pipeline_deployment import PipelineDeployment
232
233
  from .pipeline_embedding_config import (
233
234
  PipelineEmbeddingConfig,
@@ -340,6 +341,7 @@ __all__ = [
340
341
  "AdvancedModeTransformConfigSegmentationConfig_None",
341
342
  "AdvancedModeTransformConfigSegmentationConfig_Page",
342
343
  "AppSchemaChatChatMessage",
344
+ "AudioBlock",
343
345
  "AutoTransformConfig",
344
346
  "AzureOpenAiEmbedding",
345
347
  "AzureOpenAiEmbeddingConfig",
@@ -348,6 +350,10 @@ __all__ = [
348
350
  "BasePlanName",
349
351
  "BasePlanPlanFrequency",
350
352
  "BasePromptTemplate",
353
+ "Batch",
354
+ "BatchItem",
355
+ "BatchPaginatedList",
356
+ "BatchPublicOutput",
351
357
  "BedrockEmbedding",
352
358
  "BedrockEmbeddingConfig",
353
359
  "BillingPeriod",
@@ -425,19 +431,10 @@ __all__ = [
425
431
  "EmbeddingModelConfigUpdateEmbeddingConfig_VertexaiEmbedding",
426
432
  "EvalExecutionParams",
427
433
  "ExtractAgent",
428
- "ExtractAgentCreate",
429
- "ExtractAgentCreateDataSchema",
430
- "ExtractAgentCreateDataSchemaZeroValue",
431
434
  "ExtractAgentDataSchemaValue",
432
- "ExtractAgentUpdate",
433
- "ExtractAgentUpdateDataSchema",
434
- "ExtractAgentUpdateDataSchemaZeroValue",
435
435
  "ExtractConfig",
436
436
  "ExtractJob",
437
437
  "ExtractJobCreate",
438
- "ExtractJobCreateBatch",
439
- "ExtractJobCreateBatchDataSchemaOverride",
440
- "ExtractJobCreateBatchDataSchemaOverrideZeroValue",
441
438
  "ExtractJobCreateDataSchemaOverride",
442
439
  "ExtractJobCreateDataSchemaOverrideZeroValue",
443
440
  "ExtractMode",
@@ -452,14 +449,14 @@ __all__ = [
452
449
  "ExtractRunDataSchemaValue",
453
450
  "ExtractRunDataZeroValue",
454
451
  "ExtractRunExtractionMetadataValue",
455
- "ExtractSchemaValidateRequest",
456
- "ExtractSchemaValidateRequestDataSchema",
457
- "ExtractSchemaValidateRequestDataSchemaZeroValue",
458
452
  "ExtractSchemaValidateResponse",
459
453
  "ExtractSchemaValidateResponseDataSchemaValue",
460
454
  "ExtractState",
461
455
  "ExtractTarget",
456
+ "FailPageMode",
462
457
  "File",
458
+ "FileCountByStatusResponse",
459
+ "FileParsePublic",
463
460
  "FilePermissionInfoValue",
464
461
  "FileResourceInfoValue",
465
462
  "FilterCondition",
@@ -481,6 +478,7 @@ __all__ = [
481
478
  "LlamaExtractSettings",
482
479
  "LlamaIndexCoreBaseLlmsTypesChatMessage",
483
480
  "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem",
481
+ "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio",
484
482
  "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image",
485
483
  "LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text",
486
484
  "LlamaParseParameters",
@@ -528,6 +526,9 @@ __all__ = [
528
526
  "ParsingMode",
529
527
  "PartitionNames",
530
528
  "Permission",
529
+ "PgVectorDistanceMethod",
530
+ "PgVectorHnswSettings",
531
+ "PgVectorVectorType",
531
532
  "Pipeline",
532
533
  "PipelineConfigurationHashes",
533
534
  "PipelineCreate",
@@ -544,6 +545,7 @@ __all__ = [
544
545
  "PipelineDataSourceComponent",
545
546
  "PipelineDataSourceCreate",
546
547
  "PipelineDataSourceCustomMetadataValue",
548
+ "PipelineDataSourceStatus",
547
549
  "PipelineDeployment",
548
550
  "PipelineEmbeddingConfig",
549
551
  "PipelineEmbeddingConfig_AzureEmbedding",
@@ -4,7 +4,6 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
8
7
 
9
8
  try:
10
9
  import pydantic
@@ -15,8 +14,11 @@ except ImportError:
15
14
  import pydantic # type: ignore
16
15
 
17
16
 
18
- class ExtractSchemaValidateRequest(pydantic.BaseModel):
19
- data_schema: ExtractSchemaValidateRequestDataSchema
17
+ class AudioBlock(pydantic.BaseModel):
18
+ audio: typing.Optional[str]
19
+ path: typing.Optional[str]
20
+ url: typing.Optional[str]
21
+ format: typing.Optional[str]
20
22
 
21
23
  def json(self, **kwargs: typing.Any) -> str:
22
24
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,47 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .llama_parse_parameters import LlamaParseParameters
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class Batch(pydantic.BaseModel):
19
+ tool: str = pydantic.Field(description="The tool to be used for all requests in the batch.")
20
+ tool_data: typing.Optional[LlamaParseParameters]
21
+ input_type: str = pydantic.Field(description="The type of input file. Currently only 'datasource' is supported.")
22
+ input_id: str = pydantic.Field(description="The ID of the input file for the batch.")
23
+ output_type: typing.Optional[str]
24
+ output_id: typing.Optional[str]
25
+ id: str = pydantic.Field(description="Unique identifier for the batch")
26
+ project_id: str = pydantic.Field(description="The ID of the project to which the batch belongs")
27
+ organization_id: str = pydantic.Field(description="The ID of the organization to which the batch belongs")
28
+ user_id: str = pydantic.Field(description="The ID of the user who created the batch")
29
+ external_id: typing.Optional[str]
30
+ completion_window: int = pydantic.Field(description="The time frame within which the batch should be processed")
31
+ pipeline_id: str = pydantic.Field(description="The ID of the pipeline to which the batch belongs")
32
+ status: str = pydantic.Field(description="The current status of the batch")
33
+ created_at: typing.Optional[dt.datetime]
34
+ updated_at: typing.Optional[dt.datetime]
35
+
36
+ def json(self, **kwargs: typing.Any) -> str:
37
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
38
+ return super().json(**kwargs_with_defaults)
39
+
40
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
41
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
42
+ return super().dict(**kwargs_with_defaults)
43
+
44
+ class Config:
45
+ frozen = True
46
+ smart_union = True
47
+ json_encoders = {dt.datetime: serialize_datetime}