RubyGems - carbon_ruby_sdk - Versions diffs - 0.2.14 → 0.2.15 - Mend

carbon_ruby_sdk 0.2.14 → 0.2.15

Files changed (13) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/README.md +19 -7
data/lib/carbon_ruby_sdk/api/integrations_api.rb +20 -12
data/lib/carbon_ruby_sdk/api/utilities_api.rb +6 -2
data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb +2 -2
data/lib/carbon_ruby_sdk/models/sitemap_scrape_request.rb +18 -5
data/lib/carbon_ruby_sdk/models/sync_files_request.rb +1 -1
data/lib/carbon_ruby_sdk/models/sync_options.rb +1 -1
data/lib/carbon_ruby_sdk/version.rb +1 -1
data/spec/api/integrations_api_spec.rb +2 -2
data/spec/models/sitemap_scrape_request_spec.rb +6 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 984e5089a3db27f2a5612d3bdf0dcfd39fe085da968e42eb3478209784f585a8
-  data.tar.gz: ff2177a599a167616aa47081b6116e417e83322dda52cd53bfa49b2e2a82578a
+  metadata.gz: 06d33414be65f6bd0cd426ab4a944e8de9dcc7dca64b35d4ea90fa2ce3bf0474
+  data.tar.gz: 3a9d7272878fa2303e3bd0f2cf41796646f9f1689c034a5a16770db3e3fdcc02
 SHA512:
-  metadata.gz: 128fde907d3414ddf759bbc45edc6a6328193af688d789dc43f759e2646c6ed194ce3951094cc4ec8f5728e071d62108fce87d51e73e70f0ddd0be9a443c458d
-  data.tar.gz: e656ca1e4059b26a0ea3b3ac387bd5ec1dcfa8e55cd831c5efead211d5fd68b10db1f717c7fb1ff59a7fb1222bbbb3aa2310086b267ba2a622e1d6309fb8795c
+  metadata.gz: 41d5d17eb9b5f4bf0dedccadba7b29d796baddca4fe74bf4f1844b5bb52b440242bb917d6c49d074ca83598097b57483fe2cc5dc6a1acaade61cc9f5c1fb9813
+  data.tar.gz: 19f659c16c5845b371f99d82daf3cb8d37a5ffa88b7166e9afb7386a5e2991c2caed36c1d494a6a2237be4f0e0754b01b7bdd7e7ec6e8410f25c65754e20a436

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    carbon_ruby_sdk (0.2.13)
+    carbon_ruby_sdk (0.2.14)
       faraday (>= 1.0.1, < 3.0)
       faraday-multipart (~> 1.0, >= 1.0.4)

data/README.md CHANGED Viewed

@@ -6,7 +6,7 @@
 Connect external data to LLMs, no matter the source.
-[![npm](https://img.shields.io/badge/gem-v0.2.14-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.14)
+[![npm](https://img.shields.io/badge/gem-v0.2.15-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.15)
 </div>
@@ -91,7 +91,7 @@ Connect external data to LLMs, no matter the source.
 Add to Gemfile:
 ```ruby
-gem 'carbon_ruby_sdk', '~> 0.2.14'
+gem 'carbon_ruby_sdk', '~> 0.2.15'
 ```
 ## Getting Started<a id="getting-started"></a>
@@ -1300,7 +1300,7 @@ result = carbon.integrations.connect_data_source(
         "prepend_filename_to_chunks" => false,
         "sync_files_on_connection" => true,
         "set_page_as_boundary" => false,
-        "request_id" => "3a0195db-42f0-48ed-b809-d253f436a8e0",
+        "request_id" => "07b02a24-9429-4a3c-aa98-27ff63503082",
         "enable_file_picker" => true,
         "sync_source_items" => true,
         "incremental_sync" => false,
@@ -1520,7 +1520,7 @@ result = carbon.integrations.get_oauth_url(
   set_page_as_boundary: false,
   data_source_id: 1,
   connecting_new_account: false,
-  request_id: "b2935b7f-ee64-4d76-8864-8b6731211938",
+  request_id: "d29ec177-bbc6-43c7-9b16-0ee340804c99",
   use_ocr: false,
   parse_pdf_tables_with_ocr: false,
   enable_file_picker: true,
@@ -1581,7 +1581,7 @@ Enable OCR for files that support it. Supported formats: pdf
 ##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
 ##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
 Enable integration's file picker for sources that support it. Supported sources:
-SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
+BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
 ##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
 Enabling this flag will fetch all available content from the source to be listed
@@ -1608,6 +1608,9 @@ other data sources.
 ### `carbon.integrations.list_confluence_pages`<a id="carbonintegrationslist_confluence_pages"></a>
+![Deprecated](https://img.shields.io/badge/deprecated-yellow)
+This endpoint has been deprecated. Use /integrations/items/list instead.
 To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
 Confluence account must be specified. This base request returns a list of root pages for
@@ -1858,6 +1861,9 @@ p result
 ### `carbon.integrations.sync_confluence`<a id="carbonintegrationssync_confluence"></a>
+![Deprecated](https://img.shields.io/badge/deprecated-yellow)
+This endpoint has been deprecated. Use /integrations/files/sync instead.
 After listing pages in a user's Confluence account, the set of selected page `ids` and the
 connected account's `data_source_id` can be passed into this endpoint to sync them into
@@ -1881,7 +1887,7 @@ result = carbon.integrations.sync_confluence(
   prepend_filename_to_chunks: false,
   max_items_per_chunk: 1,
   set_page_as_boundary: false,
-  request_id: "50140a8f-c06f-40df-9b91-d10d074bcb67",
+  request_id: "ca60b474-8b43-4b44-9deb-adb701e40610",
   use_ocr: false,
   parse_pdf_tables_with_ocr: false,
   incremental_sync: false,
@@ -1986,7 +1992,7 @@ result = carbon.integrations.sync_files(
   prepend_filename_to_chunks: false,
   max_items_per_chunk: 1,
   set_page_as_boundary: false,
-  request_id: "50140a8f-c06f-40df-9b91-d10d074bcb67",
+  request_id: "ca60b474-8b43-4b44-9deb-adb701e40610",
   use_ocr: false,
   parse_pdf_tables_with_ocr: false,
   incremental_sync: false,
@@ -2966,6 +2972,7 @@ result = carbon.utilities.scrape_sitemap(
   embedding_model: "OPENAI",
   url_paths_to_include: [],
   url_paths_to_exclude: [],
+  urls_to_scrape: [],
 )
 p result
 ```
@@ -2995,6 +3002,11 @@ URL subpaths or directories that you want to exclude. For example if you want to
 exclude URLs that start with /questions in stackoverflow.com, you will add
 /questions/ in this input
+##### urls_to_scrape: Array<`String`><a id="urls_to_scrape-array"></a>
+You can submit a subset of URLs from the sitemap that should be scraped. To get
+the list of URLs, you can check out /process_sitemap endpoint. If left empty,
+all URLs from the sitemap will be scraped.
 #### 🌐 Endpoint<a id="🌐-endpoint"></a>
 `/scrape_sitemap` `POST`

data/lib/carbon_ruby_sdk/api/integrations_api.rb CHANGED Viewed

@@ -653,13 +653,13 @@ module Carbon
     # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
     # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
     # @param parse_pdf_tables_with_ocr [Boolean]
-    # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
+    # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
     # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
     # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX, INTERCOM, GMAIL, OUTLOOK. It will be ignored for other data sources.
     # @param file_sync_config [FileSyncConfigNullable]
     # @param body [OAuthURLRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b2935b7f-ee64-4d76-8864-8b6731211938', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
+    def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'd29ec177-bbc6-43c7-9b16-0ee340804c99', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:scope] = scope if scope != SENTINEL
@@ -721,13 +721,13 @@ module Carbon
     # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
     # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
     # @param parse_pdf_tables_with_ocr [Boolean]
-    # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
+    # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
     # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
     # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX, INTERCOM, GMAIL, OUTLOOK. It will be ignored for other data sources.
     # @param file_sync_config [FileSyncConfigNullable]
     # @param body [OAuthURLRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b2935b7f-ee64-4d76-8864-8b6731211938', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
+    def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'd29ec177-bbc6-43c7-9b16-0ee340804c99', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:scope] = scope if scope != SENTINEL
@@ -830,6 +830,8 @@ module Carbon
     # Confluence List
     #
+    # This endpoint has been deprecated. Use /integrations/items/list instead.
+    #
     # To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
     # Confluence account must be specified. This base request returns a list of root pages for
     # every space the user has access to in a Confluence instance. To traverse further down
@@ -853,6 +855,8 @@ module Carbon
     # Confluence List
     #
+    # This endpoint has been deprecated. Use /integrations/items/list instead.
+    #
     # To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
     # Confluence account must be specified. This base request returns a list of root pages for
     # every space the user has access to in a Confluence instance. To traverse further down
@@ -874,7 +878,7 @@ module Carbon
     end
     # Confluence List
-    # To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
+    # This endpoint has been deprecated. Use /integrations/items/list instead.  To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
     # @param list_request [ListRequest]
     # @param [Hash] opts the optional parameters
     # @return [ListResponse]
@@ -884,7 +888,7 @@ module Carbon
     end
     # Confluence List
-    # To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
+    # This endpoint has been deprecated. Use /integrations/items/list instead.  To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
     # @param list_request [ListRequest]
     # @param [Hash] opts the optional parameters
     # @return [APIResponse] data is ListResponse, status code, headers and response
@@ -1617,6 +1621,8 @@ module Carbon
     # Confluence Sync
     #
+    # This endpoint has been deprecated. Use /integrations/files/sync instead.
+    #
     # After listing pages in a user's Confluence account, the set of selected page `ids` and the
     # connected account's `data_source_id` can be passed into this endpoint to sync them into
     # Carbon. Additional parameters listed below can be used to associate data to the selected
@@ -1640,7 +1646,7 @@ module Carbon
     # @param file_sync_config [FileSyncConfigNullable]
     # @param body [SyncFilesRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
+    def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1665,6 +1671,8 @@ module Carbon
     # Confluence Sync
     #
+    # This endpoint has been deprecated. Use /integrations/files/sync instead.
+    #
     # After listing pages in a user's Confluence account, the set of selected page `ids` and the
     # connected account's `data_source_id` can be passed into this endpoint to sync them into
     # Carbon. Additional parameters listed below can be used to associate data to the selected
@@ -1688,7 +1696,7 @@ module Carbon
     # @param file_sync_config [FileSyncConfigNullable]
     # @param body [SyncFilesRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
+    def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1711,7 +1719,7 @@ module Carbon
     end
     # Confluence Sync
-    # After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
+    # This endpoint has been deprecated. Use /integrations/files/sync instead.  After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
     # @param sync_files_request [SyncFilesRequest]
     # @param [Hash] opts the optional parameters
     # @return [GenericSuccessResponse]
@@ -1721,7 +1729,7 @@ module Carbon
     end
     # Confluence Sync
-    # After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
+    # This endpoint has been deprecated. Use /integrations/files/sync instead.  After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
     # @param sync_files_request [SyncFilesRequest]
     # @param [Hash] opts the optional parameters
     # @return [APIResponse] data is GenericSuccessResponse, status code, headers and response
@@ -1896,7 +1904,7 @@ module Carbon
     # @param file_sync_config [FileSyncConfigNullable]
     # @param body [SyncFilesRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
+    def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1944,7 +1952,7 @@ module Carbon
     # @param file_sync_config [FileSyncConfigNullable]
     # @param body [SyncFilesRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
+    def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:data_source_id] = data_source_id if data_source_id != SENTINEL

data/lib/carbon_ruby_sdk/api/utilities_api.rb CHANGED Viewed

@@ -436,9 +436,10 @@ module Carbon
     # @param embedding_model [EmbeddingGenerators]
     # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
     # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
+    # @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
     # @param body [SitemapScrapeRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
+    def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:url] = url if url != SENTINEL
@@ -455,6 +456,7 @@ module Carbon
       _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
       _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
       _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
+      _body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
       sitemap_scrape_request = _body
       api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
       api_response.data
@@ -485,9 +487,10 @@ module Carbon
     # @param embedding_model [EmbeddingGenerators]
     # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
     # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
+    # @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
     # @param body [SitemapScrapeRequest]
     # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
-    def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
+    def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
       _body = {}
       _body[:tags] = tags if tags != SENTINEL
       _body[:url] = url if url != SENTINEL
@@ -504,6 +507,7 @@ module Carbon
       _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
       _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
       _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
+      _body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
       sitemap_scrape_request = _body
       scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
     end

data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb CHANGED Viewed

@@ -61,7 +61,7 @@ module Carbon
     attr_accessor :parse_pdf_tables_with_ocr
-    # Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
+    # Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
     attr_accessor :enable_file_picker
     # Enabling this flag will fetch all available content from the source to be listed via list items endpoint
@@ -279,7 +279,7 @@ module Carbon
       if attributes.key?(:'request_id')
         self.request_id = attributes[:'request_id']
       else
-        self.request_id = 'b2935b7f-ee64-4d76-8864-8b6731211938'
+        self.request_id = 'd29ec177-bbc6-43c7-9b16-0ee340804c99'
       end
       if attributes.key?(:'use_ocr')

data/lib/carbon_ruby_sdk/models/sitemap_scrape_request.rb CHANGED Viewed

@@ -43,6 +43,9 @@ module Carbon
     # URL subpaths or directories that you want to exclude. For example if you want to exclude         URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
     attr_accessor :url_paths_to_exclude
+    # You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs,           you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
+    attr_accessor :urls_to_scrape
     # Attribute mapping from ruby-style variable name to JSON key.
     def self.attribute_map
       {
@@ -60,7 +63,8 @@ module Carbon
         :'css_selectors_to_skip' => :'css_selectors_to_skip',
         :'embedding_model' => :'embedding_model',
         :'url_paths_to_include' => :'url_paths_to_include',
-        :'url_paths_to_exclude' => :'url_paths_to_exclude'
+        :'url_paths_to_exclude' => :'url_paths_to_exclude',
+        :'urls_to_scrape' => :'urls_to_scrape'
       }
     end
@@ -86,7 +90,8 @@ module Carbon
         :'css_selectors_to_skip' => :'Array<String>',
         :'embedding_model' => :'EmbeddingGenerators',
         :'url_paths_to_include' => :'Array<String>',
-        :'url_paths_to_exclude' => :'Array<String>'
+        :'url_paths_to_exclude' => :'Array<String>',
+        :'urls_to_scrape' => :'Array<String>'
       }
     end
@@ -105,7 +110,8 @@ module Carbon
         :'css_classes_to_skip',
         :'css_selectors_to_skip',
         :'url_paths_to_include',
-        :'url_paths_to_exclude'
+        :'url_paths_to_exclude',
+        :'urls_to_scrape'
       ])
     end
@@ -209,6 +215,12 @@ module Carbon
           self.url_paths_to_exclude = value
         end
       end
+      if attributes.key?(:'urls_to_scrape')
+        if (value = attributes[:'urls_to_scrape']).is_a?(Array)
+          self.urls_to_scrape = value
+        end
+      end
     end
     # Show invalid properties with the reasons. Usually used together with valid?
@@ -293,7 +305,8 @@ module Carbon
           css_selectors_to_skip == o.css_selectors_to_skip &&
           embedding_model == o.embedding_model &&
           url_paths_to_include == o.url_paths_to_include &&
-          url_paths_to_exclude == o.url_paths_to_exclude
+          url_paths_to_exclude == o.url_paths_to_exclude &&
+          urls_to_scrape == o.urls_to_scrape
     end
     # @see the `==` method
@@ -305,7 +318,7 @@ module Carbon
     # Calculates hash code according to all attributes.
     # @return [Integer] Hash code
     def hash
-      [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
+      [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude, urls_to_scrape].hash
     end
     # Builds the object from hash

data/lib/carbon_ruby_sdk/models/sync_files_request.rb CHANGED Viewed

@@ -187,7 +187,7 @@ module Carbon
       if attributes.key?(:'request_id')
         self.request_id = attributes[:'request_id']
       else
-        self.request_id = '50140a8f-c06f-40df-9b91-d10d074bcb67'
+        self.request_id = 'ca60b474-8b43-4b44-9deb-adb701e40610'
       end
       if attributes.key?(:'use_ocr')

data/lib/carbon_ruby_sdk/models/sync_options.rb CHANGED Viewed

@@ -182,7 +182,7 @@ module Carbon
       if attributes.key?(:'request_id')
         self.request_id = attributes[:'request_id']
       else
-        self.request_id = '3a0195db-42f0-48ed-b809-d253f436a8e0'
+        self.request_id = '07b02a24-9429-4a3c-aa98-27ff63503082'
       end
       if attributes.key?(:'enable_file_picker')

data/lib/carbon_ruby_sdk/version.rb CHANGED Viewed

@@ -7,5 +7,5 @@ The version of the OpenAPI document: 1.0.0
 =end
 module Carbon
-  VERSION = '0.2.14'
+  VERSION = '0.2.15'
 end

data/spec/api/integrations_api_spec.rb CHANGED Viewed

@@ -98,7 +98,7 @@ describe 'IntegrationsApi' do
   # unit tests for list_confluence_pages
   # Confluence List
-  # To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
+  # This endpoint has been deprecated. Use /integrations/items/list instead.  To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
   # @param list_request
   # @param [Hash] opts the optional parameters
   # @return [ListResponse]
@@ -198,7 +198,7 @@ describe 'IntegrationsApi' do
   # unit tests for sync_confluence
   # Confluence Sync
-  # After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
+  # This endpoint has been deprecated. Use /integrations/files/sync instead.  After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
   # @param sync_files_request
   # @param [Hash] opts the optional parameters
   # @return [GenericSuccessResponse]

data/spec/models/sitemap_scrape_request_spec.rb CHANGED Viewed

@@ -109,4 +109,10 @@ describe Carbon::SitemapScrapeRequest do
     end
   end
+  describe 'test attribute "urls_to_scrape"' do
+    it 'should work' do
+      # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: carbon_ruby_sdk
 version: !ruby/object:Gem::Version
-  version: 0.2.14
+  version: 0.2.15
 platform: ruby
 authors:
 - Konfig
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-07-17 00:00:00.000000000 Z
+date: 2024-07-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: faraday