PyPI - h2ogpte - Versions diffs - 1.6.43rc2__py3-none-any.whl → 1.6.43rc5__py3-none-any.whl - Mend

h2ogpte 1.6.43rc2py3-none-any.whl → 1.6.43rc5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

h2ogpte/__init__.py +1 -1
h2ogpte/connectors.py +11 -0
h2ogpte/h2ogpte.py +86 -0
h2ogpte/h2ogpte_async.py +87 -0
h2ogpte/rest_async/__init__.py +3 -1
h2ogpte/rest_async/api/document_ingestion_api.py +1365 -436
h2ogpte/rest_async/api_client.py +1 -1
h2ogpte/rest_async/configuration.py +1 -1
h2ogpte/rest_async/models/__init__.py +2 -0
h2ogpte/rest_async/models/confluence_credentials.py +89 -0
h2ogpte/rest_async/models/ingest_from_confluence_body.py +97 -0
h2ogpte/rest_sync/__init__.py +3 -1
h2ogpte/rest_sync/api/document_ingestion_api.py +1365 -436
h2ogpte/rest_sync/api_client.py +1 -1
h2ogpte/rest_sync/configuration.py +1 -1
h2ogpte/rest_sync/models/__init__.py +2 -0
h2ogpte/rest_sync/models/confluence_credentials.py +89 -0
h2ogpte/rest_sync/models/ingest_from_confluence_body.py +97 -0
h2ogpte/session.py +8 -0
h2ogpte/session_async.py +8 -0
{h2ogpte-1.6.43rc2.dist-info → h2ogpte-1.6.43rc5.dist-info}/METADATA +1 -1
{h2ogpte-1.6.43rc2.dist-info → h2ogpte-1.6.43rc5.dist-info}/RECORD +25 -21
{h2ogpte-1.6.43rc2.dist-info → h2ogpte-1.6.43rc5.dist-info}/WHEEL +0 -0
{h2ogpte-1.6.43rc2.dist-info → h2ogpte-1.6.43rc5.dist-info}/entry_points.txt +0 -0
{h2ogpte-1.6.43rc2.dist-info → h2ogpte-1.6.43rc5.dist-info}/top_level.txt +0 -0

h2ogpte/rest_sync/api/document_ingestion_api.py CHANGED Viewed

@@ -20,6 +20,7 @@ from pydantic import Field, StrictBool, StrictBytes, StrictFloat, StrictInt, Str
 from typing import List, Optional, Tuple, Union
 from typing_extensions import Annotated
 from h2ogpte.rest_sync.models.ingest_from_azure_blob_storage_body import IngestFromAzureBlobStorageBody
+from h2ogpte.rest_sync.models.ingest_from_confluence_body import IngestFromConfluenceBody
 from h2ogpte.rest_sync.models.ingest_from_file_system_body import IngestFromFileSystemBody
 from h2ogpte.rest_sync.models.ingest_from_gcs_body import IngestFromGcsBody
 from h2ogpte.rest_sync.models.ingest_from_s3_body import IngestFromS3Body
@@ -982,10 +983,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_file_system_job(
+    def create_ingest_from_confluence_job(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_file_system_body: IngestFromFileSystemBody,
+        ingest_from_confluence_body: IngestFromConfluenceBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -1009,14 +1010,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> JobDetails:
-        """Creates a job to add files from the local system into a collection.
+        """Creates a job to ingest confluence pages into collection.
-        Creates a job to add files from the local system into a collection.
+        Creates a job to confluence pages into collection. If an ingested page has sub-pages, the subpages are also ingested.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_file_system_body: (required)
-        :type ingest_from_file_system_body: IngestFromFileSystemBody
+        :param ingest_from_confluence_body: (required)
+        :type ingest_from_confluence_body: IngestFromConfluenceBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -1059,9 +1060,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_file_system_job_serialize(
+        _param = self._create_ingest_from_confluence_job_serialize(
             collection_id=collection_id,
-            ingest_from_file_system_body=ingest_from_file_system_body,
+            ingest_from_confluence_body=ingest_from_confluence_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -1094,10 +1095,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_file_system_job_with_http_info(
+    def create_ingest_from_confluence_job_with_http_info(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_file_system_body: IngestFromFileSystemBody,
+        ingest_from_confluence_body: IngestFromConfluenceBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -1121,14 +1122,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> ApiResponse[JobDetails]:
-        """Creates a job to add files from the local system into a collection.
+        """Creates a job to ingest confluence pages into collection.
-        Creates a job to add files from the local system into a collection.
+        Creates a job to confluence pages into collection. If an ingested page has sub-pages, the subpages are also ingested.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_file_system_body: (required)
-        :type ingest_from_file_system_body: IngestFromFileSystemBody
+        :param ingest_from_confluence_body: (required)
+        :type ingest_from_confluence_body: IngestFromConfluenceBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -1171,9 +1172,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_file_system_job_serialize(
+        _param = self._create_ingest_from_confluence_job_serialize(
             collection_id=collection_id,
-            ingest_from_file_system_body=ingest_from_file_system_body,
+            ingest_from_confluence_body=ingest_from_confluence_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -1206,10 +1207,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_file_system_job_without_preload_content(
+    def create_ingest_from_confluence_job_without_preload_content(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_file_system_body: IngestFromFileSystemBody,
+        ingest_from_confluence_body: IngestFromConfluenceBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -1233,14 +1234,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Creates a job to add files from the local system into a collection.
+        """Creates a job to ingest confluence pages into collection.
-        Creates a job to add files from the local system into a collection.
+        Creates a job to confluence pages into collection. If an ingested page has sub-pages, the subpages are also ingested.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_file_system_body: (required)
-        :type ingest_from_file_system_body: IngestFromFileSystemBody
+        :param ingest_from_confluence_body: (required)
+        :type ingest_from_confluence_body: IngestFromConfluenceBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -1283,9 +1284,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_file_system_job_serialize(
+        _param = self._create_ingest_from_confluence_job_serialize(
             collection_id=collection_id,
-            ingest_from_file_system_body=ingest_from_file_system_body,
+            ingest_from_confluence_body=ingest_from_confluence_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -1313,10 +1314,10 @@ class DocumentIngestionApi:
         return response_data.response
-    def _create_ingest_from_file_system_job_serialize(
+    def _create_ingest_from_confluence_job_serialize(
         self,
         collection_id,
-        ingest_from_file_system_body,
+        ingest_from_confluence_body,
         gen_doc_summaries,
         gen_doc_questions,
         audio_input_language,
@@ -1396,8 +1397,8 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if ingest_from_file_system_body is not None:
-            _body_params = ingest_from_file_system_body
+        if ingest_from_confluence_body is not None:
+            _body_params = ingest_from_confluence_body
         # set the HTTP header `Accept`
@@ -1429,7 +1430,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/ingest/file_system/job',
+            resource_path='/ingest/confluence/job',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -1446,10 +1447,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_gcs_job(
+    def create_ingest_from_file_system_job(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_gcs_body: IngestFromGcsBody,
+        ingest_from_file_system_body: IngestFromFileSystemBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -1473,14 +1474,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> JobDetails:
-        """Creates a job to add files from the Google Cloud Storage into a collection.
+        """Creates a job to add files from the local system into a collection.
-        Creates a job to add files from the Google Cloud Storage into a collection.
+        Creates a job to add files from the local system into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_gcs_body: (required)
-        :type ingest_from_gcs_body: IngestFromGcsBody
+        :param ingest_from_file_system_body: (required)
+        :type ingest_from_file_system_body: IngestFromFileSystemBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -1523,9 +1524,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_gcs_job_serialize(
+        _param = self._create_ingest_from_file_system_job_serialize(
             collection_id=collection_id,
-            ingest_from_gcs_body=ingest_from_gcs_body,
+            ingest_from_file_system_body=ingest_from_file_system_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -1558,10 +1559,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_gcs_job_with_http_info(
+    def create_ingest_from_file_system_job_with_http_info(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_gcs_body: IngestFromGcsBody,
+        ingest_from_file_system_body: IngestFromFileSystemBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -1585,14 +1586,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> ApiResponse[JobDetails]:
-        """Creates a job to add files from the Google Cloud Storage into a collection.
+        """Creates a job to add files from the local system into a collection.
-        Creates a job to add files from the Google Cloud Storage into a collection.
+        Creates a job to add files from the local system into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_gcs_body: (required)
-        :type ingest_from_gcs_body: IngestFromGcsBody
+        :param ingest_from_file_system_body: (required)
+        :type ingest_from_file_system_body: IngestFromFileSystemBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -1635,9 +1636,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_gcs_job_serialize(
+        _param = self._create_ingest_from_file_system_job_serialize(
             collection_id=collection_id,
-            ingest_from_gcs_body=ingest_from_gcs_body,
+            ingest_from_file_system_body=ingest_from_file_system_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -1670,10 +1671,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_gcs_job_without_preload_content(
+    def create_ingest_from_file_system_job_without_preload_content(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_gcs_body: IngestFromGcsBody,
+        ingest_from_file_system_body: IngestFromFileSystemBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -1697,14 +1698,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Creates a job to add files from the Google Cloud Storage into a collection.
+        """Creates a job to add files from the local system into a collection.
-        Creates a job to add files from the Google Cloud Storage into a collection.
+        Creates a job to add files from the local system into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_gcs_body: (required)
-        :type ingest_from_gcs_body: IngestFromGcsBody
+        :param ingest_from_file_system_body: (required)
+        :type ingest_from_file_system_body: IngestFromFileSystemBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -1747,9 +1748,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_gcs_job_serialize(
+        _param = self._create_ingest_from_file_system_job_serialize(
             collection_id=collection_id,
-            ingest_from_gcs_body=ingest_from_gcs_body,
+            ingest_from_file_system_body=ingest_from_file_system_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -1777,10 +1778,10 @@ class DocumentIngestionApi:
         return response_data.response
-    def _create_ingest_from_gcs_job_serialize(
+    def _create_ingest_from_file_system_job_serialize(
         self,
         collection_id,
-        ingest_from_gcs_body,
+        ingest_from_file_system_body,
         gen_doc_summaries,
         gen_doc_questions,
         audio_input_language,
@@ -1860,8 +1861,8 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if ingest_from_gcs_body is not None:
-            _body_params = ingest_from_gcs_body
+        if ingest_from_file_system_body is not None:
+            _body_params = ingest_from_file_system_body
         # set the HTTP header `Accept`
@@ -1893,7 +1894,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/ingest/gcs/job',
+            resource_path='/ingest/file_system/job',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -1910,14 +1911,19 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_plain_text_job(
+    def create_ingest_from_gcs_job(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        file_name: Annotated[StrictStr, Field(description="String of the file name to use for the document.")],
-        body: Annotated[StrictStr, Field(description="The text that will ingested into a collection.")],
+        ingest_from_gcs_body: IngestFromGcsBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
-        metadata: Annotated[Optional[StrictStr], Field(description="String with json-encoded metadata for the document.")] = None,
+        audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
+        ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
+        keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
+        chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
+        handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
         _request_timeout: Union[
             None,
@@ -1932,22 +1938,32 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> JobDetails:
-        """Creates a job to add plain text to a collection.
+        """Creates a job to add files from the Google Cloud Storage into a collection.
-        Creates a job to add plain text to a collection.
+        Creates a job to add files from the Google Cloud Storage into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param file_name: String of the file name to use for the document. (required)
-        :type file_name: str
-        :param body: The text that will ingested into a collection. (required)
-        :type body: str
+        :param ingest_from_gcs_body: (required)
+        :type ingest_from_gcs_body: IngestFromGcsBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
         :type gen_doc_questions: bool
-        :param metadata: String with json-encoded metadata for the document.
-        :type metadata: str
+        :param audio_input_language: Language of audio files.
+        :type audio_input_language: str
+        :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
+        :type ocr_model: str
+        :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
+        :type tesseract_lang: str
+        :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
+        :type keep_tables_as_one_chunk: bool
+        :param chunk_by_page: Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.
+        :type chunk_by_page: bool
+        :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
+        :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
         :param timeout: Timeout in seconds
         :type timeout: float
         :param _request_timeout: timeout setting for this request. If one
@@ -1972,13 +1988,18 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_plain_text_job_serialize(
+        _param = self._create_ingest_from_gcs_job_serialize(
             collection_id=collection_id,
-            file_name=file_name,
-            body=body,
+            ingest_from_gcs_body=ingest_from_gcs_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
-            metadata=metadata,
+            audio_input_language=audio_input_language,
+            ocr_model=ocr_model,
+            tesseract_lang=tesseract_lang,
+            keep_tables_as_one_chunk=keep_tables_as_one_chunk,
+            chunk_by_page=chunk_by_page,
+            handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
             timeout=timeout,
             _request_auth=_request_auth,
             _content_type=_content_type,
@@ -2002,14 +2023,19 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_plain_text_job_with_http_info(
+    def create_ingest_from_gcs_job_with_http_info(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        file_name: Annotated[StrictStr, Field(description="String of the file name to use for the document.")],
-        body: Annotated[StrictStr, Field(description="The text that will ingested into a collection.")],
+        ingest_from_gcs_body: IngestFromGcsBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
-        metadata: Annotated[Optional[StrictStr], Field(description="String with json-encoded metadata for the document.")] = None,
+        audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
+        ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
+        keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
+        chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
+        handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
         _request_timeout: Union[
             None,
@@ -2024,22 +2050,32 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> ApiResponse[JobDetails]:
-        """Creates a job to add plain text to a collection.
+        """Creates a job to add files from the Google Cloud Storage into a collection.
-        Creates a job to add plain text to a collection.
+        Creates a job to add files from the Google Cloud Storage into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param file_name: String of the file name to use for the document. (required)
-        :type file_name: str
-        :param body: The text that will ingested into a collection. (required)
-        :type body: str
+        :param ingest_from_gcs_body: (required)
+        :type ingest_from_gcs_body: IngestFromGcsBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
         :type gen_doc_questions: bool
-        :param metadata: String with json-encoded metadata for the document.
-        :type metadata: str
+        :param audio_input_language: Language of audio files.
+        :type audio_input_language: str
+        :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
+        :type ocr_model: str
+        :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
+        :type tesseract_lang: str
+        :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
+        :type keep_tables_as_one_chunk: bool
+        :param chunk_by_page: Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.
+        :type chunk_by_page: bool
+        :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
+        :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
         :param timeout: Timeout in seconds
         :type timeout: float
         :param _request_timeout: timeout setting for this request. If one
@@ -2064,13 +2100,18 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_plain_text_job_serialize(
+        _param = self._create_ingest_from_gcs_job_serialize(
             collection_id=collection_id,
-            file_name=file_name,
-            body=body,
+            ingest_from_gcs_body=ingest_from_gcs_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
-            metadata=metadata,
+            audio_input_language=audio_input_language,
+            ocr_model=ocr_model,
+            tesseract_lang=tesseract_lang,
+            keep_tables_as_one_chunk=keep_tables_as_one_chunk,
+            chunk_by_page=chunk_by_page,
+            handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
             timeout=timeout,
             _request_auth=_request_auth,
             _content_type=_content_type,
@@ -2094,14 +2135,19 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_plain_text_job_without_preload_content(
+    def create_ingest_from_gcs_job_without_preload_content(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        file_name: Annotated[StrictStr, Field(description="String of the file name to use for the document.")],
-        body: Annotated[StrictStr, Field(description="The text that will ingested into a collection.")],
+        ingest_from_gcs_body: IngestFromGcsBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
-        metadata: Annotated[Optional[StrictStr], Field(description="String with json-encoded metadata for the document.")] = None,
+        audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
+        ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
+        keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
+        chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
+        handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
         _request_timeout: Union[
             None,
@@ -2116,22 +2162,32 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Creates a job to add plain text to a collection.
+        """Creates a job to add files from the Google Cloud Storage into a collection.
-        Creates a job to add plain text to a collection.
+        Creates a job to add files from the Google Cloud Storage into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param file_name: String of the file name to use for the document. (required)
-        :type file_name: str
-        :param body: The text that will ingested into a collection. (required)
-        :type body: str
+        :param ingest_from_gcs_body: (required)
+        :type ingest_from_gcs_body: IngestFromGcsBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
         :type gen_doc_questions: bool
-        :param metadata: String with json-encoded metadata for the document.
-        :type metadata: str
+        :param audio_input_language: Language of audio files.
+        :type audio_input_language: str
+        :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
+        :type ocr_model: str
+        :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
+        :type tesseract_lang: str
+        :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
+        :type keep_tables_as_one_chunk: bool
+        :param chunk_by_page: Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.
+        :type chunk_by_page: bool
+        :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
+        :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
         :param timeout: Timeout in seconds
         :type timeout: float
         :param _request_timeout: timeout setting for this request. If one
@@ -2156,13 +2212,18 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_plain_text_job_serialize(
+        _param = self._create_ingest_from_gcs_job_serialize(
             collection_id=collection_id,
-            file_name=file_name,
-            body=body,
+            ingest_from_gcs_body=ingest_from_gcs_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
-            metadata=metadata,
+            audio_input_language=audio_input_language,
+            ocr_model=ocr_model,
+            tesseract_lang=tesseract_lang,
+            keep_tables_as_one_chunk=keep_tables_as_one_chunk,
+            chunk_by_page=chunk_by_page,
+            handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
             timeout=timeout,
             _request_auth=_request_auth,
             _content_type=_content_type,
@@ -2181,14 +2242,418 @@ class DocumentIngestionApi:
         return response_data.response
-    def _create_ingest_from_plain_text_job_serialize(
+    def _create_ingest_from_gcs_job_serialize(
         self,
         collection_id,
-        file_name,
-        body,
+        ingest_from_gcs_body,
         gen_doc_summaries,
         gen_doc_questions,
-        metadata,
+        audio_input_language,
+        ocr_model,
+        tesseract_lang,
+        keep_tables_as_one_chunk,
+        chunk_by_page,
+        handwriting_check,
+        ingest_mode,
+        timeout,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+        _host = None
+        _collection_formats: Dict[str, str] = {
+        }
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+        # process the path parameters
+        # process the query parameters
+        if collection_id is not None:
+            _query_params.append(('collection_id', collection_id))
+        if gen_doc_summaries is not None:
+            _query_params.append(('gen_doc_summaries', gen_doc_summaries))
+        if gen_doc_questions is not None:
+            _query_params.append(('gen_doc_questions', gen_doc_questions))
+        if audio_input_language is not None:
+            _query_params.append(('audio_input_language', audio_input_language))
+        if ocr_model is not None:
+            _query_params.append(('ocr_model', ocr_model))
+        if tesseract_lang is not None:
+            _query_params.append(('tesseract_lang', tesseract_lang))
+        if keep_tables_as_one_chunk is not None:
+            _query_params.append(('keep_tables_as_one_chunk', keep_tables_as_one_chunk))
+        if chunk_by_page is not None:
+            _query_params.append(('chunk_by_page', chunk_by_page))
+        if handwriting_check is not None:
+            _query_params.append(('handwriting_check', handwriting_check))
+        if ingest_mode is not None:
+            _query_params.append(('ingest_mode', ingest_mode))
+        if timeout is not None:
+            _query_params.append(('timeout', timeout))
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+        if ingest_from_gcs_body is not None:
+            _body_params = ingest_from_gcs_body
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+        # set the HTTP header `Content-Type`
+        if _content_type:
+            _header_params['Content-Type'] = _content_type
+        else:
+            _default_content_type = (
+                self.api_client.select_header_content_type(
+                    [
+                        'application/json'
+                    ]
+                )
+            )
+            if _default_content_type is not None:
+                _header_params['Content-Type'] = _default_content_type
+        # authentication setting
+        _auth_settings: List[str] = [
+            'bearerAuth'
+        ]
+        return self.api_client.param_serialize(
+            method='POST',
+            resource_path='/ingest/gcs/job',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+    @validate_call
+    def create_ingest_from_plain_text_job(
+        self,
+        collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        file_name: Annotated[StrictStr, Field(description="String of the file name to use for the document.")],
+        body: Annotated[StrictStr, Field(description="The text that will ingested into a collection.")],
+        gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
+        gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
+        metadata: Annotated[Optional[StrictStr], Field(description="String with json-encoded metadata for the document.")] = None,
+        timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> JobDetails:
+        """Creates a job to add plain text to a collection.
+        Creates a job to add plain text to a collection.
+        :param collection_id: String id of the collection to add the ingested documents into. (required)
+        :type collection_id: str
+        :param file_name: String of the file name to use for the document. (required)
+        :type file_name: str
+        :param body: The text that will ingested into a collection. (required)
+        :type body: str
+        :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
+        :type gen_doc_summaries: bool
+        :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
+        :type gen_doc_questions: bool
+        :param metadata: String with json-encoded metadata for the document.
+        :type metadata: str
+        :param timeout: Timeout in seconds
+        :type timeout: float
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+        _param = self._create_ingest_from_plain_text_job_serialize(
+            collection_id=collection_id,
+            file_name=file_name,
+            body=body,
+            gen_doc_summaries=gen_doc_summaries,
+            gen_doc_questions=gen_doc_questions,
+            metadata=metadata,
+            timeout=timeout,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+        _response_types_map: Dict[str, Optional[str]] = {
+            '201': "JobDetails",
+            '401': "EndpointError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+    @validate_call
+    def create_ingest_from_plain_text_job_with_http_info(
+        self,
+        collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        file_name: Annotated[StrictStr, Field(description="String of the file name to use for the document.")],
+        body: Annotated[StrictStr, Field(description="The text that will ingested into a collection.")],
+        gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
+        gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
+        metadata: Annotated[Optional[StrictStr], Field(description="String with json-encoded metadata for the document.")] = None,
+        timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[JobDetails]:
+        """Creates a job to add plain text to a collection.
+        Creates a job to add plain text to a collection.
+        :param collection_id: String id of the collection to add the ingested documents into. (required)
+        :type collection_id: str
+        :param file_name: String of the file name to use for the document. (required)
+        :type file_name: str
+        :param body: The text that will ingested into a collection. (required)
+        :type body: str
+        :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
+        :type gen_doc_summaries: bool
+        :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
+        :type gen_doc_questions: bool
+        :param metadata: String with json-encoded metadata for the document.
+        :type metadata: str
+        :param timeout: Timeout in seconds
+        :type timeout: float
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+        _param = self._create_ingest_from_plain_text_job_serialize(
+            collection_id=collection_id,
+            file_name=file_name,
+            body=body,
+            gen_doc_summaries=gen_doc_summaries,
+            gen_doc_questions=gen_doc_questions,
+            metadata=metadata,
+            timeout=timeout,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+        _response_types_map: Dict[str, Optional[str]] = {
+            '201': "JobDetails",
+            '401': "EndpointError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+    @validate_call
+    def create_ingest_from_plain_text_job_without_preload_content(
+        self,
+        collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        file_name: Annotated[StrictStr, Field(description="String of the file name to use for the document.")],
+        body: Annotated[StrictStr, Field(description="The text that will ingested into a collection.")],
+        gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
+        gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
+        metadata: Annotated[Optional[StrictStr], Field(description="String with json-encoded metadata for the document.")] = None,
+        timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Creates a job to add plain text to a collection.
+        Creates a job to add plain text to a collection.
+        :param collection_id: String id of the collection to add the ingested documents into. (required)
+        :type collection_id: str
+        :param file_name: String of the file name to use for the document. (required)
+        :type file_name: str
+        :param body: The text that will ingested into a collection. (required)
+        :type body: str
+        :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
+        :type gen_doc_summaries: bool
+        :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
+        :type gen_doc_questions: bool
+        :param metadata: String with json-encoded metadata for the document.
+        :type metadata: str
+        :param timeout: Timeout in seconds
+        :type timeout: float
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+        _param = self._create_ingest_from_plain_text_job_serialize(
+            collection_id=collection_id,
+            file_name=file_name,
+            body=body,
+            gen_doc_summaries=gen_doc_summaries,
+            gen_doc_questions=gen_doc_questions,
+            metadata=metadata,
+            timeout=timeout,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+        _response_types_map: Dict[str, Optional[str]] = {
+            '201': "JobDetails",
+            '401': "EndpointError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+    def _create_ingest_from_plain_text_job_serialize(
+        self,
+        collection_id,
+        file_name,
+        body,
+        gen_doc_summaries,
+        gen_doc_questions,
+        metadata,
         timeout,
         _request_auth,
         _content_type,
@@ -2218,7 +2683,447 @@ class DocumentIngestionApi:
         if file_name is not None:
-            _query_params.append(('file_name', file_name))
+            _query_params.append(('file_name', file_name))
+        if gen_doc_summaries is not None:
+            _query_params.append(('gen_doc_summaries', gen_doc_summaries))
+        if gen_doc_questions is not None:
+            _query_params.append(('gen_doc_questions', gen_doc_questions))
+        if metadata is not None:
+            _query_params.append(('metadata', metadata))
+        if timeout is not None:
+            _query_params.append(('timeout', timeout))
+        # process the header parameters
+        # process the form parameters
+        # process the body parameter
+        if body is not None:
+            _body_params = body
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+        # set the HTTP header `Content-Type`
+        if _content_type:
+            _header_params['Content-Type'] = _content_type
+        else:
+            _default_content_type = (
+                self.api_client.select_header_content_type(
+                    [
+                        'text/plain'
+                    ]
+                )
+            )
+            if _default_content_type is not None:
+                _header_params['Content-Type'] = _default_content_type
+        # authentication setting
+        _auth_settings: List[str] = [
+            'bearerAuth'
+        ]
+        return self.api_client.param_serialize(
+            method='POST',
+            resource_path='/ingest/plain_text/job',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+    @validate_call
+    def create_ingest_from_s3_job(
+        self,
+        collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        ingest_from_s3_body: IngestFromS3Body,
+        gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
+        gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
+        audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
+        ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
+        keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
+        chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
+        handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
+        timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> JobDetails:
+        """Creates a job to add files from the AWS S3 storage into a collection.
+        Creates a job to add files from the AWS S3 storage into a collection.
+        :param collection_id: String id of the collection to add the ingested documents into. (required)
+        :type collection_id: str
+        :param ingest_from_s3_body: (required)
+        :type ingest_from_s3_body: IngestFromS3Body
+        :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
+        :type gen_doc_summaries: bool
+        :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
+        :type gen_doc_questions: bool
+        :param audio_input_language: Language of audio files.
+        :type audio_input_language: str
+        :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
+        :type ocr_model: str
+        :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
+        :type tesseract_lang: str
+        :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
+        :type keep_tables_as_one_chunk: bool
+        :param chunk_by_page: Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.
+        :type chunk_by_page: bool
+        :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
+        :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
+        :param timeout: Timeout in seconds
+        :type timeout: float
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+        _param = self._create_ingest_from_s3_job_serialize(
+            collection_id=collection_id,
+            ingest_from_s3_body=ingest_from_s3_body,
+            gen_doc_summaries=gen_doc_summaries,
+            gen_doc_questions=gen_doc_questions,
+            audio_input_language=audio_input_language,
+            ocr_model=ocr_model,
+            tesseract_lang=tesseract_lang,
+            keep_tables_as_one_chunk=keep_tables_as_one_chunk,
+            chunk_by_page=chunk_by_page,
+            handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
+            timeout=timeout,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+        _response_types_map: Dict[str, Optional[str]] = {
+            '201': "JobDetails",
+            '401': "EndpointError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+    @validate_call
+    def create_ingest_from_s3_job_with_http_info(
+        self,
+        collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        ingest_from_s3_body: IngestFromS3Body,
+        gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
+        gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
+        audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
+        ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
+        keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
+        chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
+        handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
+        timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[JobDetails]:
+        """Creates a job to add files from the AWS S3 storage into a collection.
+        Creates a job to add files from the AWS S3 storage into a collection.
+        :param collection_id: String id of the collection to add the ingested documents into. (required)
+        :type collection_id: str
+        :param ingest_from_s3_body: (required)
+        :type ingest_from_s3_body: IngestFromS3Body
+        :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
+        :type gen_doc_summaries: bool
+        :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
+        :type gen_doc_questions: bool
+        :param audio_input_language: Language of audio files.
+        :type audio_input_language: str
+        :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
+        :type ocr_model: str
+        :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
+        :type tesseract_lang: str
+        :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
+        :type keep_tables_as_one_chunk: bool
+        :param chunk_by_page: Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.
+        :type chunk_by_page: bool
+        :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
+        :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
+        :param timeout: Timeout in seconds
+        :type timeout: float
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+        _param = self._create_ingest_from_s3_job_serialize(
+            collection_id=collection_id,
+            ingest_from_s3_body=ingest_from_s3_body,
+            gen_doc_summaries=gen_doc_summaries,
+            gen_doc_questions=gen_doc_questions,
+            audio_input_language=audio_input_language,
+            ocr_model=ocr_model,
+            tesseract_lang=tesseract_lang,
+            keep_tables_as_one_chunk=keep_tables_as_one_chunk,
+            chunk_by_page=chunk_by_page,
+            handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
+            timeout=timeout,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+        _response_types_map: Dict[str, Optional[str]] = {
+            '201': "JobDetails",
+            '401': "EndpointError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+    @validate_call
+    def create_ingest_from_s3_job_without_preload_content(
+        self,
+        collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        ingest_from_s3_body: IngestFromS3Body,
+        gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
+        gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
+        audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
+        ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
+        keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
+        chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
+        handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
+        timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Creates a job to add files from the AWS S3 storage into a collection.
+        Creates a job to add files from the AWS S3 storage into a collection.
+        :param collection_id: String id of the collection to add the ingested documents into. (required)
+        :type collection_id: str
+        :param ingest_from_s3_body: (required)
+        :type ingest_from_s3_body: IngestFromS3Body
+        :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
+        :type gen_doc_summaries: bool
+        :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
+        :type gen_doc_questions: bool
+        :param audio_input_language: Language of audio files.
+        :type audio_input_language: str
+        :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
+        :type ocr_model: str
+        :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
+        :type tesseract_lang: str
+        :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
+        :type keep_tables_as_one_chunk: bool
+        :param chunk_by_page: Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.
+        :type chunk_by_page: bool
+        :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
+        :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
+        :param timeout: Timeout in seconds
+        :type timeout: float
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+        _param = self._create_ingest_from_s3_job_serialize(
+            collection_id=collection_id,
+            ingest_from_s3_body=ingest_from_s3_body,
+            gen_doc_summaries=gen_doc_summaries,
+            gen_doc_questions=gen_doc_questions,
+            audio_input_language=audio_input_language,
+            ocr_model=ocr_model,
+            tesseract_lang=tesseract_lang,
+            keep_tables_as_one_chunk=keep_tables_as_one_chunk,
+            chunk_by_page=chunk_by_page,
+            handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
+            timeout=timeout,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+        _response_types_map: Dict[str, Optional[str]] = {
+            '201': "JobDetails",
+            '401': "EndpointError",
+        }
+        response_data = self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+    def _create_ingest_from_s3_job_serialize(
+        self,
+        collection_id,
+        ingest_from_s3_body,
+        gen_doc_summaries,
+        gen_doc_questions,
+        audio_input_language,
+        ocr_model,
+        tesseract_lang,
+        keep_tables_as_one_chunk,
+        chunk_by_page,
+        handwriting_check,
+        ingest_mode,
+        timeout,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+        _host = None
+        _collection_formats: Dict[str, str] = {
+        }
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+        # process the path parameters
+        # process the query parameters
+        if collection_id is not None:
+            _query_params.append(('collection_id', collection_id))
         if gen_doc_summaries is not None:
@@ -2228,9 +3133,33 @@ class DocumentIngestionApi:
             _query_params.append(('gen_doc_questions', gen_doc_questions))
-        if metadata is not None:
+        if audio_input_language is not None:
-            _query_params.append(('metadata', metadata))
+            _query_params.append(('audio_input_language', audio_input_language))
+        if ocr_model is not None:
+            _query_params.append(('ocr_model', ocr_model))
+        if tesseract_lang is not None:
+            _query_params.append(('tesseract_lang', tesseract_lang))
+        if keep_tables_as_one_chunk is not None:
+            _query_params.append(('keep_tables_as_one_chunk', keep_tables_as_one_chunk))
+        if chunk_by_page is not None:
+            _query_params.append(('chunk_by_page', chunk_by_page))
+        if handwriting_check is not None:
+            _query_params.append(('handwriting_check', handwriting_check))
+        if ingest_mode is not None:
+            _query_params.append(('ingest_mode', ingest_mode))
         if timeout is not None:
@@ -2239,8 +3168,8 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if body is not None:
-            _body_params = body
+        if ingest_from_s3_body is not None:
+            _body_params = ingest_from_s3_body
         # set the HTTP header `Accept`
@@ -2258,7 +3187,7 @@ class DocumentIngestionApi:
             _default_content_type = (
                 self.api_client.select_header_content_type(
                     [
-                        'text/plain'
+                        'application/json'
                     ]
                 )
             )
@@ -2272,7 +3201,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/ingest/plain_text/job',
+            resource_path='/ingest/s3/job',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -2289,10 +3218,13 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_s3_job(
+    def create_ingest_from_website_job(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_s3_body: IngestFromS3Body,
+        ingest_from_website_body: IngestFromWebsiteBody,
+        follow_links: Annotated[Optional[StrictBool], Field(description="Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.")] = None,
+        max_depth: Annotated[Optional[StrictInt], Field(description="Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).")] = None,
+        max_documents: Annotated[Optional[StrictInt], Field(description="Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).")] = None,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -2316,14 +3248,20 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> JobDetails:
-        """Creates a job to add files from the AWS S3 storage into a collection.
+        """Creates a job to crawl and ingest a URL into a collection.
-        Creates a job to add files from the AWS S3 storage into a collection.
+        Creates a job to crawl and ingest a URL into a collection. The web page or document linked from this URL will be imported.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_s3_body: (required)
-        :type ingest_from_s3_body: IngestFromS3Body
+        :param ingest_from_website_body: (required)
+        :type ingest_from_website_body: IngestFromWebsiteBody
+        :param follow_links: Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.
+        :type follow_links: bool
+        :param max_depth: Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).
+        :type max_depth: int
+        :param max_documents: Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).
+        :type max_documents: int
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -2366,9 +3304,12 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_s3_job_serialize(
+        _param = self._create_ingest_from_website_job_serialize(
             collection_id=collection_id,
-            ingest_from_s3_body=ingest_from_s3_body,
+            ingest_from_website_body=ingest_from_website_body,
+            follow_links=follow_links,
+            max_depth=max_depth,
+            max_documents=max_documents,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -2401,10 +3342,13 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_s3_job_with_http_info(
+    def create_ingest_from_website_job_with_http_info(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_s3_body: IngestFromS3Body,
+        ingest_from_website_body: IngestFromWebsiteBody,
+        follow_links: Annotated[Optional[StrictBool], Field(description="Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.")] = None,
+        max_depth: Annotated[Optional[StrictInt], Field(description="Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).")] = None,
+        max_documents: Annotated[Optional[StrictInt], Field(description="Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).")] = None,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -2428,14 +3372,20 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> ApiResponse[JobDetails]:
-        """Creates a job to add files from the AWS S3 storage into a collection.
+        """Creates a job to crawl and ingest a URL into a collection.
-        Creates a job to add files from the AWS S3 storage into a collection.
+        Creates a job to crawl and ingest a URL into a collection. The web page or document linked from this URL will be imported.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_s3_body: (required)
-        :type ingest_from_s3_body: IngestFromS3Body
+        :param ingest_from_website_body: (required)
+        :type ingest_from_website_body: IngestFromWebsiteBody
+        :param follow_links: Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.
+        :type follow_links: bool
+        :param max_depth: Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).
+        :type max_depth: int
+        :param max_documents: Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).
+        :type max_documents: int
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -2478,9 +3428,12 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_s3_job_serialize(
+        _param = self._create_ingest_from_website_job_serialize(
             collection_id=collection_id,
-            ingest_from_s3_body=ingest_from_s3_body,
+            ingest_from_website_body=ingest_from_website_body,
+            follow_links=follow_links,
+            max_depth=max_depth,
+            max_documents=max_documents,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -2513,10 +3466,13 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_s3_job_without_preload_content(
+    def create_ingest_from_website_job_without_preload_content(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_s3_body: IngestFromS3Body,
+        ingest_from_website_body: IngestFromWebsiteBody,
+        follow_links: Annotated[Optional[StrictBool], Field(description="Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.")] = None,
+        max_depth: Annotated[Optional[StrictInt], Field(description="Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).")] = None,
+        max_documents: Annotated[Optional[StrictInt], Field(description="Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).")] = None,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -2540,14 +3496,20 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Creates a job to add files from the AWS S3 storage into a collection.
+        """Creates a job to crawl and ingest a URL into a collection.
-        Creates a job to add files from the AWS S3 storage into a collection.
+        Creates a job to crawl and ingest a URL into a collection. The web page or document linked from this URL will be imported.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_s3_body: (required)
-        :type ingest_from_s3_body: IngestFromS3Body
+        :param ingest_from_website_body: (required)
+        :type ingest_from_website_body: IngestFromWebsiteBody
+        :param follow_links: Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.
+        :type follow_links: bool
+        :param max_depth: Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).
+        :type max_depth: int
+        :param max_documents: Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).
+        :type max_documents: int
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -2590,9 +3552,12 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_s3_job_serialize(
+        _param = self._create_ingest_from_website_job_serialize(
             collection_id=collection_id,
-            ingest_from_s3_body=ingest_from_s3_body,
+            ingest_from_website_body=ingest_from_website_body,
+            follow_links=follow_links,
+            max_depth=max_depth,
+            max_documents=max_documents,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -2620,10 +3585,13 @@ class DocumentIngestionApi:
         return response_data.response
-    def _create_ingest_from_s3_job_serialize(
+    def _create_ingest_from_website_job_serialize(
         self,
         collection_id,
-        ingest_from_s3_body,
+        ingest_from_website_body,
+        follow_links,
+        max_depth,
+        max_documents,
         gen_doc_summaries,
         gen_doc_questions,
         audio_input_language,
@@ -2658,7 +3626,19 @@ class DocumentIngestionApi:
         # process the query parameters
         if collection_id is not None:
-            _query_params.append(('collection_id', collection_id))
+            _query_params.append(('collection_id', collection_id))
+        if follow_links is not None:
+            _query_params.append(('follow_links', follow_links))
+        if max_depth is not None:
+            _query_params.append(('max_depth', max_depth))
+        if max_documents is not None:
+            _query_params.append(('max_documents', max_documents))
         if gen_doc_summaries is not None:
@@ -2703,8 +3683,8 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if ingest_from_s3_body is not None:
-            _body_params = ingest_from_s3_body
+        if ingest_from_website_body is not None:
+            _body_params = ingest_from_website_body
         # set the HTTP header `Accept`
@@ -2736,7 +3716,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/ingest/s3/job',
+            resource_path='/ingest/website/job',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -2753,13 +3733,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_website_job(
+    def create_ingest_upload_job(
         self,
+        upload_ids: Annotated[List[StrictStr], Field(description="Id of uploaded document")],
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_website_body: IngestFromWebsiteBody,
-        follow_links: Annotated[Optional[StrictBool], Field(description="Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.")] = None,
-        max_depth: Annotated[Optional[StrictInt], Field(description="Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).")] = None,
-        max_documents: Annotated[Optional[StrictInt], Field(description="Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).")] = None,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -2769,7 +3746,10 @@ class DocumentIngestionApi:
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
         ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
+        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
+        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        ingest_upload_body: Optional[IngestUploadBody] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2783,20 +3763,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> JobDetails:
-        """Creates a job to crawl and ingest a URL into a collection.
+        """Creates a job to ingest uploaded document
-        Creates a job to crawl and ingest a URL into a collection. The web page or document linked from this URL will be imported.
+        Creates a job to ingest uploaded document identified to a given collection
+        :param upload_ids: Id of uploaded document (required)
+        :type upload_ids: List[str]
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_website_body: (required)
-        :type ingest_from_website_body: IngestFromWebsiteBody
-        :param follow_links: Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.
-        :type follow_links: bool
-        :param max_depth: Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).
-        :type max_depth: int
-        :param max_documents: Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).
-        :type max_documents: int
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -2815,8 +3789,14 @@ class DocumentIngestionApi:
         :type handwriting_check: bool
         :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
         :type ingest_mode: str
+        :param restricted: Whether the document should be restricted only to certain users.
+        :type restricted: bool
+        :param permissions: The list of usernames having permissions to the document.
+        :type permissions: List[str]
         :param timeout: Timeout in seconds
         :type timeout: float
+        :param ingest_upload_body:
+        :type ingest_upload_body: IngestUploadBody
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2839,12 +3819,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_website_job_serialize(
+        _param = self._create_ingest_upload_job_serialize(
+            upload_ids=upload_ids,
             collection_id=collection_id,
-            ingest_from_website_body=ingest_from_website_body,
-            follow_links=follow_links,
-            max_depth=max_depth,
-            max_documents=max_documents,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -2854,7 +3831,10 @@ class DocumentIngestionApi:
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
             ingest_mode=ingest_mode,
+            restricted=restricted,
+            permissions=permissions,
             timeout=timeout,
+            ingest_upload_body=ingest_upload_body,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -2877,13 +3857,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_website_job_with_http_info(
+    def create_ingest_upload_job_with_http_info(
         self,
+        upload_ids: Annotated[List[StrictStr], Field(description="Id of uploaded document")],
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_website_body: IngestFromWebsiteBody,
-        follow_links: Annotated[Optional[StrictBool], Field(description="Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.")] = None,
-        max_depth: Annotated[Optional[StrictInt], Field(description="Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).")] = None,
-        max_documents: Annotated[Optional[StrictInt], Field(description="Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).")] = None,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -2893,7 +3870,10 @@ class DocumentIngestionApi:
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
         ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
+        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
+        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        ingest_upload_body: Optional[IngestUploadBody] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -2907,20 +3887,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> ApiResponse[JobDetails]:
-        """Creates a job to crawl and ingest a URL into a collection.
+        """Creates a job to ingest uploaded document
-        Creates a job to crawl and ingest a URL into a collection. The web page or document linked from this URL will be imported.
+        Creates a job to ingest uploaded document identified to a given collection
+        :param upload_ids: Id of uploaded document (required)
+        :type upload_ids: List[str]
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_website_body: (required)
-        :type ingest_from_website_body: IngestFromWebsiteBody
-        :param follow_links: Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.
-        :type follow_links: bool
-        :param max_depth: Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).
-        :type max_depth: int
-        :param max_documents: Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).
-        :type max_documents: int
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -2939,8 +3913,14 @@ class DocumentIngestionApi:
         :type handwriting_check: bool
         :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
         :type ingest_mode: str
+        :param restricted: Whether the document should be restricted only to certain users.
+        :type restricted: bool
+        :param permissions: The list of usernames having permissions to the document.
+        :type permissions: List[str]
         :param timeout: Timeout in seconds
         :type timeout: float
+        :param ingest_upload_body:
+        :type ingest_upload_body: IngestUploadBody
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -2963,12 +3943,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_website_job_serialize(
+        _param = self._create_ingest_upload_job_serialize(
+            upload_ids=upload_ids,
             collection_id=collection_id,
-            ingest_from_website_body=ingest_from_website_body,
-            follow_links=follow_links,
-            max_depth=max_depth,
-            max_documents=max_documents,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -2978,7 +3955,10 @@ class DocumentIngestionApi:
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
             ingest_mode=ingest_mode,
+            restricted=restricted,
+            permissions=permissions,
             timeout=timeout,
+            ingest_upload_body=ingest_upload_body,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -3001,13 +3981,10 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_from_website_job_without_preload_content(
+    def create_ingest_upload_job_without_preload_content(
         self,
+        upload_ids: Annotated[List[StrictStr], Field(description="Id of uploaded document")],
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_website_body: IngestFromWebsiteBody,
-        follow_links: Annotated[Optional[StrictBool], Field(description="Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.")] = None,
-        max_depth: Annotated[Optional[StrictInt], Field(description="Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).")] = None,
-        max_documents: Annotated[Optional[StrictInt], Field(description="Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).")] = None,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -3017,7 +3994,10 @@ class DocumentIngestionApi:
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
         ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
+        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
+        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
+        ingest_upload_body: Optional[IngestUploadBody] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -3031,20 +4011,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Creates a job to crawl and ingest a URL into a collection.
+        """Creates a job to ingest uploaded document
-        Creates a job to crawl and ingest a URL into a collection. The web page or document linked from this URL will be imported.
+        Creates a job to ingest uploaded document identified to a given collection
+        :param upload_ids: Id of uploaded document (required)
+        :type upload_ids: List[str]
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_website_body: (required)
-        :type ingest_from_website_body: IngestFromWebsiteBody
-        :param follow_links: Whether to import all web pages linked from this URL will be imported. External links will be ignored. Links to other pages on the same domain will be followed as long as they are at the same level or below the URL you specify. Each page will be transformed into a PDF document.
-        :type follow_links: bool
-        :param max_depth: Max depth of recursion when following links, only when follow_links is `true`. Max_depth of 0 means don't follow any links, max_depth of 1 means follow only top-level links, etc. Use -1 for automatic (system settings).
-        :type max_depth: int
-        :param max_documents: Max number of documents when following links, only when follow_links is `true`. Use None for automatic (system defaults). Use -1 for max (system limit).
-        :type max_documents: int
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -3063,8 +4037,14 @@ class DocumentIngestionApi:
         :type handwriting_check: bool
         :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
         :type ingest_mode: str
+        :param restricted: Whether the document should be restricted only to certain users.
+        :type restricted: bool
+        :param permissions: The list of usernames having permissions to the document.
+        :type permissions: List[str]
         :param timeout: Timeout in seconds
         :type timeout: float
+        :param ingest_upload_body:
+        :type ingest_upload_body: IngestUploadBody
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -3087,12 +4067,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_from_website_job_serialize(
+        _param = self._create_ingest_upload_job_serialize(
+            upload_ids=upload_ids,
             collection_id=collection_id,
-            ingest_from_website_body=ingest_from_website_body,
-            follow_links=follow_links,
-            max_depth=max_depth,
-            max_documents=max_documents,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -3102,7 +4079,10 @@ class DocumentIngestionApi:
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
             ingest_mode=ingest_mode,
+            restricted=restricted,
+            permissions=permissions,
             timeout=timeout,
+            ingest_upload_body=ingest_upload_body,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -3120,13 +4100,10 @@ class DocumentIngestionApi:
         return response_data.response
-    def _create_ingest_from_website_job_serialize(
+    def _create_ingest_upload_job_serialize(
         self,
+        upload_ids,
         collection_id,
-        ingest_from_website_body,
-        follow_links,
-        max_depth,
-        max_documents,
         gen_doc_summaries,
         gen_doc_questions,
         audio_input_language,
@@ -3136,7 +4113,10 @@ class DocumentIngestionApi:
         chunk_by_page,
         handwriting_check,
         ingest_mode,
+        restricted,
+        permissions,
         timeout,
+        ingest_upload_body,
         _request_auth,
         _content_type,
         _headers,
@@ -3146,6 +4126,8 @@ class DocumentIngestionApi:
         _host = None
         _collection_formats: Dict[str, str] = {
+            'upload_ids': 'csv',
+            'permissions': 'multi',
         }
         _path_params: Dict[str, str] = {}
@@ -3158,23 +4140,13 @@ class DocumentIngestionApi:
         _body_params: Optional[bytes] = None
         # process the path parameters
+        if upload_ids is not None:
+            _path_params['upload_ids'] = upload_ids
         # process the query parameters
         if collection_id is not None:
             _query_params.append(('collection_id', collection_id))
-        if follow_links is not None:
-            _query_params.append(('follow_links', follow_links))
-        if max_depth is not None:
-            _query_params.append(('max_depth', max_depth))
-        if max_documents is not None:
-            _query_params.append(('max_documents', max_documents))
         if gen_doc_summaries is not None:
             _query_params.append(('gen_doc_summaries', gen_doc_summaries))
@@ -3211,6 +4183,14 @@ class DocumentIngestionApi:
             _query_params.append(('ingest_mode', ingest_mode))
+        if restricted is not None:
+            _query_params.append(('restricted', restricted))
+        if permissions is not None:
+            _query_params.append(('permissions', permissions))
         if timeout is not None:
             _query_params.append(('timeout', timeout))
@@ -3218,8 +4198,8 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if ingest_from_website_body is not None:
-            _body_params = ingest_from_website_body
+        if ingest_upload_body is not None:
+            _body_params = ingest_upload_body
         # set the HTTP header `Accept`
@@ -3251,7 +4231,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/ingest/website/job',
+            resource_path='/uploads/{upload_ids}/ingest/job',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -3268,23 +4248,21 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_upload_job(
+    def ingest_agent_only_to_standard(
         self,
-        upload_ids: Annotated[List[StrictStr], Field(description="Id of uploaded document")],
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        document_id: Annotated[StrictStr, Field(description="String id of the document to be parsed.")],
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
         ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
+        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
         keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
-        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
-        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
-        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
-        ingest_upload_body: Optional[IngestUploadBody] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -3297,15 +4275,15 @@ class DocumentIngestionApi:
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> JobDetails:
-        """Creates a job to ingest uploaded document
+    ) -> None:
+        """Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
-        Creates a job to ingest uploaded document identified to a given collection
+        Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
-        :param upload_ids: Id of uploaded document (required)
-        :type upload_ids: List[str]
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
+        :param document_id: String id of the document to be parsed. (required)
+        :type document_id: str
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -3314,6 +4292,10 @@ class DocumentIngestionApi:
         :type audio_input_language: str
         :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
         :type ocr_model: str
+        :param restricted: Whether the document should be restricted only to certain users.
+        :type restricted: bool
+        :param permissions: The list of usernames having permissions to the document.
+        :type permissions: List[str]
         :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
         :type tesseract_lang: str
         :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
@@ -3322,16 +4304,8 @@ class DocumentIngestionApi:
         :type chunk_by_page: bool
         :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
         :type handwriting_check: bool
-        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
-        :type ingest_mode: str
-        :param restricted: Whether the document should be restricted only to certain users.
-        :type restricted: bool
-        :param permissions: The list of usernames having permissions to the document.
-        :type permissions: List[str]
         :param timeout: Timeout in seconds
         :type timeout: float
-        :param ingest_upload_body:
-        :type ingest_upload_body: IngestUploadBody
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -3354,22 +4328,20 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_upload_job_serialize(
-            upload_ids=upload_ids,
+        _param = self._ingest_agent_only_to_standard_serialize(
             collection_id=collection_id,
+            document_id=document_id,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
             ocr_model=ocr_model,
+            restricted=restricted,
+            permissions=permissions,
             tesseract_lang=tesseract_lang,
             keep_tables_as_one_chunk=keep_tables_as_one_chunk,
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
-            ingest_mode=ingest_mode,
-            restricted=restricted,
-            permissions=permissions,
             timeout=timeout,
-            ingest_upload_body=ingest_upload_body,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -3377,7 +4349,7 @@ class DocumentIngestionApi:
         )
         _response_types_map: Dict[str, Optional[str]] = {
-            '201': "JobDetails",
+            '204': None,
             '401': "EndpointError",
         }
         response_data = self.api_client.call_api(
@@ -3392,23 +4364,21 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_upload_job_with_http_info(
+    def ingest_agent_only_to_standard_with_http_info(
         self,
-        upload_ids: Annotated[List[StrictStr], Field(description="Id of uploaded document")],
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        document_id: Annotated[StrictStr, Field(description="String id of the document to be parsed.")],
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
         ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
+        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
         keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
-        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
-        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
-        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
-        ingest_upload_body: Optional[IngestUploadBody] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -3421,15 +4391,15 @@ class DocumentIngestionApi:
         _content_type: Optional[StrictStr] = None,
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
-    ) -> ApiResponse[JobDetails]:
-        """Creates a job to ingest uploaded document
+    ) -> ApiResponse[None]:
+        """Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
-        Creates a job to ingest uploaded document identified to a given collection
+        Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
-        :param upload_ids: Id of uploaded document (required)
-        :type upload_ids: List[str]
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
+        :param document_id: String id of the document to be parsed. (required)
+        :type document_id: str
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -3438,6 +4408,10 @@ class DocumentIngestionApi:
         :type audio_input_language: str
         :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
         :type ocr_model: str
+        :param restricted: Whether the document should be restricted only to certain users.
+        :type restricted: bool
+        :param permissions: The list of usernames having permissions to the document.
+        :type permissions: List[str]
         :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
         :type tesseract_lang: str
         :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
@@ -3446,16 +4420,8 @@ class DocumentIngestionApi:
         :type chunk_by_page: bool
         :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
         :type handwriting_check: bool
-        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
-        :type ingest_mode: str
-        :param restricted: Whether the document should be restricted only to certain users.
-        :type restricted: bool
-        :param permissions: The list of usernames having permissions to the document.
-        :type permissions: List[str]
         :param timeout: Timeout in seconds
         :type timeout: float
-        :param ingest_upload_body:
-        :type ingest_upload_body: IngestUploadBody
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -3478,22 +4444,20 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_upload_job_serialize(
-            upload_ids=upload_ids,
+        _param = self._ingest_agent_only_to_standard_serialize(
             collection_id=collection_id,
+            document_id=document_id,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
             ocr_model=ocr_model,
+            restricted=restricted,
+            permissions=permissions,
             tesseract_lang=tesseract_lang,
             keep_tables_as_one_chunk=keep_tables_as_one_chunk,
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
-            ingest_mode=ingest_mode,
-            restricted=restricted,
-            permissions=permissions,
             timeout=timeout,
-            ingest_upload_body=ingest_upload_body,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -3501,7 +4465,7 @@ class DocumentIngestionApi:
         )
         _response_types_map: Dict[str, Optional[str]] = {
-            '201': "JobDetails",
+            '204': None,
             '401': "EndpointError",
         }
         response_data = self.api_client.call_api(
@@ -3516,23 +4480,21 @@ class DocumentIngestionApi:
     @validate_call
-    def create_ingest_upload_job_without_preload_content(
+    def ingest_agent_only_to_standard_without_preload_content(
         self,
-        upload_ids: Annotated[List[StrictStr], Field(description="Id of uploaded document")],
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
+        document_id: Annotated[StrictStr, Field(description="String id of the document to be parsed.")],
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
         ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
+        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
+        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
         keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
-        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
-        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
-        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
-        ingest_upload_body: Optional[IngestUploadBody] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -3546,14 +4508,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Creates a job to ingest uploaded document
+        """Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
-        Creates a job to ingest uploaded document identified to a given collection
+        Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
-        :param upload_ids: Id of uploaded document (required)
-        :type upload_ids: List[str]
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
+        :param document_id: String id of the document to be parsed. (required)
+        :type document_id: str
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -3562,6 +4524,10 @@ class DocumentIngestionApi:
         :type audio_input_language: str
         :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
         :type ocr_model: str
+        :param restricted: Whether the document should be restricted only to certain users.
+        :type restricted: bool
+        :param permissions: The list of usernames having permissions to the document.
+        :type permissions: List[str]
         :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
         :type tesseract_lang: str
         :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
@@ -3570,16 +4536,8 @@ class DocumentIngestionApi:
         :type chunk_by_page: bool
         :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
         :type handwriting_check: bool
-        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
-        :type ingest_mode: str
-        :param restricted: Whether the document should be restricted only to certain users.
-        :type restricted: bool
-        :param permissions: The list of usernames having permissions to the document.
-        :type permissions: List[str]
         :param timeout: Timeout in seconds
         :type timeout: float
-        :param ingest_upload_body:
-        :type ingest_upload_body: IngestUploadBody
         :param _request_timeout: timeout setting for this request. If one
                                  number provided, it will be total request
                                  timeout. It can also be a pair (tuple) of
@@ -3602,22 +4560,20 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._create_ingest_upload_job_serialize(
-            upload_ids=upload_ids,
+        _param = self._ingest_agent_only_to_standard_serialize(
             collection_id=collection_id,
+            document_id=document_id,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
             ocr_model=ocr_model,
+            restricted=restricted,
+            permissions=permissions,
             tesseract_lang=tesseract_lang,
             keep_tables_as_one_chunk=keep_tables_as_one_chunk,
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
-            ingest_mode=ingest_mode,
-            restricted=restricted,
-            permissions=permissions,
             timeout=timeout,
-            ingest_upload_body=ingest_upload_body,
             _request_auth=_request_auth,
             _content_type=_content_type,
             _headers=_headers,
@@ -3625,7 +4581,7 @@ class DocumentIngestionApi:
         )
         _response_types_map: Dict[str, Optional[str]] = {
-            '201': "JobDetails",
+            '204': None,
             '401': "EndpointError",
         }
         response_data = self.api_client.call_api(
@@ -3635,23 +4591,21 @@ class DocumentIngestionApi:
         return response_data.response
-    def _create_ingest_upload_job_serialize(
+    def _ingest_agent_only_to_standard_serialize(
         self,
-        upload_ids,
         collection_id,
+        document_id,
         gen_doc_summaries,
         gen_doc_questions,
         audio_input_language,
         ocr_model,
+        restricted,
+        permissions,
         tesseract_lang,
         keep_tables_as_one_chunk,
         chunk_by_page,
         handwriting_check,
-        ingest_mode,
-        restricted,
-        permissions,
         timeout,
-        ingest_upload_body,
         _request_auth,
         _content_type,
         _headers,
@@ -3661,7 +4615,6 @@ class DocumentIngestionApi:
         _host = None
         _collection_formats: Dict[str, str] = {
-            'upload_ids': 'csv',
             'permissions': 'multi',
         }
@@ -3675,13 +4628,15 @@ class DocumentIngestionApi:
         _body_params: Optional[bytes] = None
         # process the path parameters
-        if upload_ids is not None:
-            _path_params['upload_ids'] = upload_ids
         # process the query parameters
         if collection_id is not None:
             _query_params.append(('collection_id', collection_id))
+        if document_id is not None:
+            _query_params.append(('document_id', document_id))
         if gen_doc_summaries is not None:
             _query_params.append(('gen_doc_summaries', gen_doc_summaries))
@@ -3698,6 +4653,14 @@ class DocumentIngestionApi:
             _query_params.append(('ocr_model', ocr_model))
+        if restricted is not None:
+            _query_params.append(('restricted', restricted))
+        if permissions is not None:
+            _query_params.append(('permissions', permissions))
         if tesseract_lang is not None:
             _query_params.append(('tesseract_lang', tesseract_lang))
@@ -3714,18 +4677,6 @@ class DocumentIngestionApi:
             _query_params.append(('handwriting_check', handwriting_check))
-        if ingest_mode is not None:
-            _query_params.append(('ingest_mode', ingest_mode))
-        if restricted is not None:
-            _query_params.append(('restricted', restricted))
-        if permissions is not None:
-            _query_params.append(('permissions', permissions))
         if timeout is not None:
             _query_params.append(('timeout', timeout))
@@ -3733,8 +4684,6 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if ingest_upload_body is not None:
-            _body_params = ingest_upload_body
         # set the HTTP header `Accept`
@@ -3745,19 +4694,6 @@ class DocumentIngestionApi:
                 ]
             )
-        # set the HTTP header `Content-Type`
-        if _content_type:
-            _header_params['Content-Type'] = _content_type
-        else:
-            _default_content_type = (
-                self.api_client.select_header_content_type(
-                    [
-                        'application/json'
-                    ]
-                )
-            )
-            if _default_content_type is not None:
-                _header_params['Content-Type'] = _default_content_type
         # authentication setting
         _auth_settings: List[str] = [
@@ -3766,7 +4702,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/uploads/{upload_ids}/ingest/job',
+            resource_path='/ingest/agent_only_to_standard',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -3783,20 +4719,19 @@ class DocumentIngestionApi:
     @validate_call
-    def ingest_agent_only_to_standard(
+    def ingest_from_azure_blob_storage(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        document_id: Annotated[StrictStr, Field(description="String id of the document to be parsed.")],
+        ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
         ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
-        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
-        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
         keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
         _request_timeout: Union[
             None,
@@ -3811,14 +4746,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> None:
-        """Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
+        """Adds files from the Azure Blob Storage into a collection.
-        Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
+        Adds files from the Azure Blob Storage into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param document_id: String id of the document to be parsed. (required)
-        :type document_id: str
+        :param ingest_from_azure_blob_storage_body: (required)
+        :type ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -3827,10 +4762,6 @@ class DocumentIngestionApi:
         :type audio_input_language: str
         :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
         :type ocr_model: str
-        :param restricted: Whether the document should be restricted only to certain users.
-        :type restricted: bool
-        :param permissions: The list of usernames having permissions to the document.
-        :type permissions: List[str]
         :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
         :type tesseract_lang: str
         :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
@@ -3839,6 +4770,8 @@ class DocumentIngestionApi:
         :type chunk_by_page: bool
         :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
         :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
         :param timeout: Timeout in seconds
         :type timeout: float
         :param _request_timeout: timeout setting for this request. If one
@@ -3863,19 +4796,18 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._ingest_agent_only_to_standard_serialize(
+        _param = self._ingest_from_azure_blob_storage_serialize(
             collection_id=collection_id,
-            document_id=document_id,
+            ingest_from_azure_blob_storage_body=ingest_from_azure_blob_storage_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
             ocr_model=ocr_model,
-            restricted=restricted,
-            permissions=permissions,
             tesseract_lang=tesseract_lang,
             keep_tables_as_one_chunk=keep_tables_as_one_chunk,
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
             timeout=timeout,
             _request_auth=_request_auth,
             _content_type=_content_type,
@@ -3899,20 +4831,19 @@ class DocumentIngestionApi:
     @validate_call
-    def ingest_agent_only_to_standard_with_http_info(
+    def ingest_from_azure_blob_storage_with_http_info(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        document_id: Annotated[StrictStr, Field(description="String id of the document to be parsed.")],
+        ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
         ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
-        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
-        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
         keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
         _request_timeout: Union[
             None,
@@ -3927,14 +4858,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> ApiResponse[None]:
-        """Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
+        """Adds files from the Azure Blob Storage into a collection.
-        Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
+        Adds files from the Azure Blob Storage into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param document_id: String id of the document to be parsed. (required)
-        :type document_id: str
+        :param ingest_from_azure_blob_storage_body: (required)
+        :type ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -3943,10 +4874,6 @@ class DocumentIngestionApi:
         :type audio_input_language: str
         :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
         :type ocr_model: str
-        :param restricted: Whether the document should be restricted only to certain users.
-        :type restricted: bool
-        :param permissions: The list of usernames having permissions to the document.
-        :type permissions: List[str]
         :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
         :type tesseract_lang: str
         :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
@@ -3955,6 +4882,8 @@ class DocumentIngestionApi:
         :type chunk_by_page: bool
         :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
         :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
         :param timeout: Timeout in seconds
         :type timeout: float
         :param _request_timeout: timeout setting for this request. If one
@@ -3979,19 +4908,18 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._ingest_agent_only_to_standard_serialize(
+        _param = self._ingest_from_azure_blob_storage_serialize(
             collection_id=collection_id,
-            document_id=document_id,
+            ingest_from_azure_blob_storage_body=ingest_from_azure_blob_storage_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
             ocr_model=ocr_model,
-            restricted=restricted,
-            permissions=permissions,
             tesseract_lang=tesseract_lang,
             keep_tables_as_one_chunk=keep_tables_as_one_chunk,
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
             timeout=timeout,
             _request_auth=_request_auth,
             _content_type=_content_type,
@@ -4015,20 +4943,19 @@ class DocumentIngestionApi:
     @validate_call
-    def ingest_agent_only_to_standard_without_preload_content(
+    def ingest_from_azure_blob_storage_without_preload_content(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        document_id: Annotated[StrictStr, Field(description="String id of the document to be parsed.")],
+        ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
         ocr_model: Annotated[Optional[StrictStr], Field(description="Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).")] = None,
-        restricted: Annotated[Optional[StrictBool], Field(description="Whether the document should be restricted only to certain users.")] = None,
-        permissions: Annotated[Optional[List[StrictStr]], Field(description="The list of usernames having permissions to the document.")] = None,
         tesseract_lang: Annotated[Optional[StrictStr], Field(description="Which language to use when using ocr_model=\"tesseract\".")] = None,
         keep_tables_as_one_chunk: Annotated[Optional[StrictBool], Field(description="When tables are identified by the table parser the table tokens will be kept in a single chunk.")] = None,
         chunk_by_page: Annotated[Optional[StrictBool], Field(description="Each page will be a chunk. `keep_tables_as_one_chunk` will be ignored if this is `true`.")] = None,
         handwriting_check: Annotated[Optional[StrictBool], Field(description="Check pages for handwriting. Will use specialized models if handwriting is found.")] = None,
+        ingest_mode: Annotated[Optional[StrictStr], Field(description="Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.")] = None,
         timeout: Annotated[Optional[Union[StrictFloat, StrictInt]], Field(description="Timeout in seconds")] = None,
         _request_timeout: Union[
             None,
@@ -4043,14 +4970,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
+        """Adds files from the Azure Blob Storage into a collection.
-        Converts files uploaded in \"agent_only\" ingest mode  to PDF and parses them.
+        Adds files from the Azure Blob Storage into a collection.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param document_id: String id of the document to be parsed. (required)
-        :type document_id: str
+        :param ingest_from_azure_blob_storage_body: (required)
+        :type ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -4059,10 +4986,6 @@ class DocumentIngestionApi:
         :type audio_input_language: str
         :param ocr_model: Which method to use to extract text from images using AI-enabled optical character recognition (OCR) models. docTR is best for Latin text, PaddleOCR is best for certain non-Latin languages, Tesseract covers a wide range of languages. Mississippi works well on handwriting. - `auto` - Automatic will auto-select the best OCR model for every page. - `off` - Disable OCR for speed, but all images will then be skipped (also no image captions will be made).
         :type ocr_model: str
-        :param restricted: Whether the document should be restricted only to certain users.
-        :type restricted: bool
-        :param permissions: The list of usernames having permissions to the document.
-        :type permissions: List[str]
         :param tesseract_lang: Which language to use when using ocr_model=\"tesseract\".
         :type tesseract_lang: str
         :param keep_tables_as_one_chunk: When tables are identified by the table parser the table tokens will be kept in a single chunk.
@@ -4071,6 +4994,8 @@ class DocumentIngestionApi:
         :type chunk_by_page: bool
         :param handwriting_check: Check pages for handwriting. Will use specialized models if handwriting is found.
         :type handwriting_check: bool
+        :param ingest_mode: Ingest mode to use. - `standard` - Files will be ingested for use with RAG - `agent_only` - Bypasses standard ingestion. Files can only be used with agents.
+        :type ingest_mode: str
         :param timeout: Timeout in seconds
         :type timeout: float
         :param _request_timeout: timeout setting for this request. If one
@@ -4095,19 +5020,18 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._ingest_agent_only_to_standard_serialize(
+        _param = self._ingest_from_azure_blob_storage_serialize(
             collection_id=collection_id,
-            document_id=document_id,
+            ingest_from_azure_blob_storage_body=ingest_from_azure_blob_storage_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
             ocr_model=ocr_model,
-            restricted=restricted,
-            permissions=permissions,
             tesseract_lang=tesseract_lang,
             keep_tables_as_one_chunk=keep_tables_as_one_chunk,
             chunk_by_page=chunk_by_page,
             handwriting_check=handwriting_check,
+            ingest_mode=ingest_mode,
             timeout=timeout,
             _request_auth=_request_auth,
             _content_type=_content_type,
@@ -4126,20 +5050,19 @@ class DocumentIngestionApi:
         return response_data.response
-    def _ingest_agent_only_to_standard_serialize(
+    def _ingest_from_azure_blob_storage_serialize(
         self,
         collection_id,
-        document_id,
+        ingest_from_azure_blob_storage_body,
         gen_doc_summaries,
         gen_doc_questions,
         audio_input_language,
         ocr_model,
-        restricted,
-        permissions,
         tesseract_lang,
         keep_tables_as_one_chunk,
         chunk_by_page,
         handwriting_check,
+        ingest_mode,
         timeout,
         _request_auth,
         _content_type,
@@ -4150,7 +5073,6 @@ class DocumentIngestionApi:
         _host = None
         _collection_formats: Dict[str, str] = {
-            'permissions': 'multi',
         }
         _path_params: Dict[str, str] = {}
@@ -4168,10 +5090,6 @@ class DocumentIngestionApi:
             _query_params.append(('collection_id', collection_id))
-        if document_id is not None:
-            _query_params.append(('document_id', document_id))
         if gen_doc_summaries is not None:
             _query_params.append(('gen_doc_summaries', gen_doc_summaries))
@@ -4188,14 +5106,6 @@ class DocumentIngestionApi:
             _query_params.append(('ocr_model', ocr_model))
-        if restricted is not None:
-            _query_params.append(('restricted', restricted))
-        if permissions is not None:
-            _query_params.append(('permissions', permissions))
         if tesseract_lang is not None:
             _query_params.append(('tesseract_lang', tesseract_lang))
@@ -4212,6 +5122,10 @@ class DocumentIngestionApi:
             _query_params.append(('handwriting_check', handwriting_check))
+        if ingest_mode is not None:
+            _query_params.append(('ingest_mode', ingest_mode))
         if timeout is not None:
             _query_params.append(('timeout', timeout))
@@ -4219,6 +5133,8 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
+        if ingest_from_azure_blob_storage_body is not None:
+            _body_params = ingest_from_azure_blob_storage_body
         # set the HTTP header `Accept`
@@ -4229,6 +5145,19 @@ class DocumentIngestionApi:
                 ]
             )
+        # set the HTTP header `Content-Type`
+        if _content_type:
+            _header_params['Content-Type'] = _content_type
+        else:
+            _default_content_type = (
+                self.api_client.select_header_content_type(
+                    [
+                        'application/json'
+                    ]
+                )
+            )
+            if _default_content_type is not None:
+                _header_params['Content-Type'] = _default_content_type
         # authentication setting
         _auth_settings: List[str] = [
@@ -4237,7 +5166,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/ingest/agent_only_to_standard',
+            resource_path='/ingest/azure_blob_storage',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,
@@ -4254,10 +5183,10 @@ class DocumentIngestionApi:
     @validate_call
-    def ingest_from_azure_blob_storage(
+    def ingest_from_confluence(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody,
+        ingest_from_confluence_body: IngestFromConfluenceBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -4281,14 +5210,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> None:
-        """Adds files from the Azure Blob Storage into a collection.
+        """Ingests confluence pages into collection.
-        Adds files from the Azure Blob Storage into a collection.
+        Ingests confluence pages into collection. If an ingested page has sub-pages, the subpages are also ingested.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_azure_blob_storage_body: (required)
-        :type ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody
+        :param ingest_from_confluence_body: (required)
+        :type ingest_from_confluence_body: IngestFromConfluenceBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -4331,9 +5260,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._ingest_from_azure_blob_storage_serialize(
+        _param = self._ingest_from_confluence_serialize(
             collection_id=collection_id,
-            ingest_from_azure_blob_storage_body=ingest_from_azure_blob_storage_body,
+            ingest_from_confluence_body=ingest_from_confluence_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -4366,10 +5295,10 @@ class DocumentIngestionApi:
     @validate_call
-    def ingest_from_azure_blob_storage_with_http_info(
+    def ingest_from_confluence_with_http_info(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody,
+        ingest_from_confluence_body: IngestFromConfluenceBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -4393,14 +5322,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> ApiResponse[None]:
-        """Adds files from the Azure Blob Storage into a collection.
+        """Ingests confluence pages into collection.
-        Adds files from the Azure Blob Storage into a collection.
+        Ingests confluence pages into collection. If an ingested page has sub-pages, the subpages are also ingested.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_azure_blob_storage_body: (required)
-        :type ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody
+        :param ingest_from_confluence_body: (required)
+        :type ingest_from_confluence_body: IngestFromConfluenceBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -4443,9 +5372,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._ingest_from_azure_blob_storage_serialize(
+        _param = self._ingest_from_confluence_serialize(
             collection_id=collection_id,
-            ingest_from_azure_blob_storage_body=ingest_from_azure_blob_storage_body,
+            ingest_from_confluence_body=ingest_from_confluence_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -4478,10 +5407,10 @@ class DocumentIngestionApi:
     @validate_call
-    def ingest_from_azure_blob_storage_without_preload_content(
+    def ingest_from_confluence_without_preload_content(
         self,
         collection_id: Annotated[StrictStr, Field(description="String id of the collection to add the ingested documents into.")],
-        ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody,
+        ingest_from_confluence_body: IngestFromConfluenceBody,
         gen_doc_summaries: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate document summaries (uses LLM).")] = None,
         gen_doc_questions: Annotated[Optional[StrictBool], Field(description="Whether to auto-generate sample questions for each document (uses LLM).")] = None,
         audio_input_language: Annotated[Optional[StrictStr], Field(description="Language of audio files.")] = None,
@@ -4505,14 +5434,14 @@ class DocumentIngestionApi:
         _headers: Optional[Dict[StrictStr, Any]] = None,
         _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
     ) -> RESTResponseType:
-        """Adds files from the Azure Blob Storage into a collection.
+        """Ingests confluence pages into collection.
-        Adds files from the Azure Blob Storage into a collection.
+        Ingests confluence pages into collection. If an ingested page has sub-pages, the subpages are also ingested.
         :param collection_id: String id of the collection to add the ingested documents into. (required)
         :type collection_id: str
-        :param ingest_from_azure_blob_storage_body: (required)
-        :type ingest_from_azure_blob_storage_body: IngestFromAzureBlobStorageBody
+        :param ingest_from_confluence_body: (required)
+        :type ingest_from_confluence_body: IngestFromConfluenceBody
         :param gen_doc_summaries: Whether to auto-generate document summaries (uses LLM).
         :type gen_doc_summaries: bool
         :param gen_doc_questions: Whether to auto-generate sample questions for each document (uses LLM).
@@ -4555,9 +5484,9 @@ class DocumentIngestionApi:
         :return: Returns the result object.
         """ # noqa: E501
-        _param = self._ingest_from_azure_blob_storage_serialize(
+        _param = self._ingest_from_confluence_serialize(
             collection_id=collection_id,
-            ingest_from_azure_blob_storage_body=ingest_from_azure_blob_storage_body,
+            ingest_from_confluence_body=ingest_from_confluence_body,
             gen_doc_summaries=gen_doc_summaries,
             gen_doc_questions=gen_doc_questions,
             audio_input_language=audio_input_language,
@@ -4585,10 +5514,10 @@ class DocumentIngestionApi:
         return response_data.response
-    def _ingest_from_azure_blob_storage_serialize(
+    def _ingest_from_confluence_serialize(
         self,
         collection_id,
-        ingest_from_azure_blob_storage_body,
+        ingest_from_confluence_body,
         gen_doc_summaries,
         gen_doc_questions,
         audio_input_language,
@@ -4668,8 +5597,8 @@ class DocumentIngestionApi:
         # process the header parameters
         # process the form parameters
         # process the body parameter
-        if ingest_from_azure_blob_storage_body is not None:
-            _body_params = ingest_from_azure_blob_storage_body
+        if ingest_from_confluence_body is not None:
+            _body_params = ingest_from_confluence_body
         # set the HTTP header `Accept`
@@ -4701,7 +5630,7 @@ class DocumentIngestionApi:
         return self.api_client.param_serialize(
             method='POST',
-            resource_path='/ingest/azure_blob_storage',
+            resource_path='/ingest/confluence',
             path_params=_path_params,
             query_params=_query_params,
             header_params=_header_params,

h2ogpte 1.6.43rc2__py3-none-any.whl → 1.6.43rc5__py3-none-any.whl

h2ogpte 1.6.43rc2py3-none-any.whl → 1.6.43rc5py3-none-any.whl