carbon_ruby_sdk 0.2.14 → 0.2.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 984e5089a3db27f2a5612d3bdf0dcfd39fe085da968e42eb3478209784f585a8
4
- data.tar.gz: ff2177a599a167616aa47081b6116e417e83322dda52cd53bfa49b2e2a82578a
3
+ metadata.gz: 06d33414be65f6bd0cd426ab4a944e8de9dcc7dca64b35d4ea90fa2ce3bf0474
4
+ data.tar.gz: 3a9d7272878fa2303e3bd0f2cf41796646f9f1689c034a5a16770db3e3fdcc02
5
5
  SHA512:
6
- metadata.gz: 128fde907d3414ddf759bbc45edc6a6328193af688d789dc43f759e2646c6ed194ce3951094cc4ec8f5728e071d62108fce87d51e73e70f0ddd0be9a443c458d
7
- data.tar.gz: e656ca1e4059b26a0ea3b3ac387bd5ec1dcfa8e55cd831c5efead211d5fd68b10db1f717c7fb1ff59a7fb1222bbbb3aa2310086b267ba2a622e1d6309fb8795c
6
+ metadata.gz: 41d5d17eb9b5f4bf0dedccadba7b29d796baddca4fe74bf4f1844b5bb52b440242bb917d6c49d074ca83598097b57483fe2cc5dc6a1acaade61cc9f5c1fb9813
7
+ data.tar.gz: 19f659c16c5845b371f99d82daf3cb8d37a5ffa88b7166e9afb7386a5e2991c2caed36c1d494a6a2237be4f0e0754b01b7bdd7e7ec6e8410f25c65754e20a436
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- carbon_ruby_sdk (0.2.13)
4
+ carbon_ruby_sdk (0.2.14)
5
5
  faraday (>= 1.0.1, < 3.0)
6
6
  faraday-multipart (~> 1.0, >= 1.0.4)
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  Connect external data to LLMs, no matter the source.
8
8
 
9
- [![npm](https://img.shields.io/badge/gem-v0.2.14-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.14)
9
+ [![npm](https://img.shields.io/badge/gem-v0.2.15-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.15)
10
10
 
11
11
  </div>
12
12
 
@@ -91,7 +91,7 @@ Connect external data to LLMs, no matter the source.
91
91
  Add to Gemfile:
92
92
 
93
93
  ```ruby
94
- gem 'carbon_ruby_sdk', '~> 0.2.14'
94
+ gem 'carbon_ruby_sdk', '~> 0.2.15'
95
95
  ```
96
96
 
97
97
  ## Getting Started<a id="getting-started"></a>
@@ -1300,7 +1300,7 @@ result = carbon.integrations.connect_data_source(
1300
1300
  "prepend_filename_to_chunks" => false,
1301
1301
  "sync_files_on_connection" => true,
1302
1302
  "set_page_as_boundary" => false,
1303
- "request_id" => "3a0195db-42f0-48ed-b809-d253f436a8e0",
1303
+ "request_id" => "07b02a24-9429-4a3c-aa98-27ff63503082",
1304
1304
  "enable_file_picker" => true,
1305
1305
  "sync_source_items" => true,
1306
1306
  "incremental_sync" => false,
@@ -1520,7 +1520,7 @@ result = carbon.integrations.get_oauth_url(
1520
1520
  set_page_as_boundary: false,
1521
1521
  data_source_id: 1,
1522
1522
  connecting_new_account: false,
1523
- request_id: "b2935b7f-ee64-4d76-8864-8b6731211938",
1523
+ request_id: "d29ec177-bbc6-43c7-9b16-0ee340804c99",
1524
1524
  use_ocr: false,
1525
1525
  parse_pdf_tables_with_ocr: false,
1526
1526
  enable_file_picker: true,
@@ -1581,7 +1581,7 @@ Enable OCR for files that support it. Supported formats: pdf
1581
1581
  ##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
1582
1582
  ##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
1583
1583
  Enable integration's file picker for sources that support it. Supported sources:
1584
- SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
1584
+ BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
1585
1585
 
1586
1586
  ##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
1587
1587
  Enabling this flag will fetch all available content from the source to be listed
@@ -1608,6 +1608,9 @@ other data sources.
1608
1608
 
1609
1609
 
1610
1610
  ### `carbon.integrations.list_confluence_pages`<a id="carbonintegrationslist_confluence_pages"></a>
1611
+ ![Deprecated](https://img.shields.io/badge/deprecated-yellow)
1612
+
1613
+ This endpoint has been deprecated. Use /integrations/items/list instead.
1611
1614
 
1612
1615
  To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
1613
1616
  Confluence account must be specified. This base request returns a list of root pages for
@@ -1858,6 +1861,9 @@ p result
1858
1861
 
1859
1862
 
1860
1863
  ### `carbon.integrations.sync_confluence`<a id="carbonintegrationssync_confluence"></a>
1864
+ ![Deprecated](https://img.shields.io/badge/deprecated-yellow)
1865
+
1866
+ This endpoint has been deprecated. Use /integrations/files/sync instead.
1861
1867
 
1862
1868
  After listing pages in a user's Confluence account, the set of selected page `ids` and the
1863
1869
  connected account's `data_source_id` can be passed into this endpoint to sync them into
@@ -1881,7 +1887,7 @@ result = carbon.integrations.sync_confluence(
1881
1887
  prepend_filename_to_chunks: false,
1882
1888
  max_items_per_chunk: 1,
1883
1889
  set_page_as_boundary: false,
1884
- request_id: "50140a8f-c06f-40df-9b91-d10d074bcb67",
1890
+ request_id: "ca60b474-8b43-4b44-9deb-adb701e40610",
1885
1891
  use_ocr: false,
1886
1892
  parse_pdf_tables_with_ocr: false,
1887
1893
  incremental_sync: false,
@@ -1986,7 +1992,7 @@ result = carbon.integrations.sync_files(
1986
1992
  prepend_filename_to_chunks: false,
1987
1993
  max_items_per_chunk: 1,
1988
1994
  set_page_as_boundary: false,
1989
- request_id: "50140a8f-c06f-40df-9b91-d10d074bcb67",
1995
+ request_id: "ca60b474-8b43-4b44-9deb-adb701e40610",
1990
1996
  use_ocr: false,
1991
1997
  parse_pdf_tables_with_ocr: false,
1992
1998
  incremental_sync: false,
@@ -2966,6 +2972,7 @@ result = carbon.utilities.scrape_sitemap(
2966
2972
  embedding_model: "OPENAI",
2967
2973
  url_paths_to_include: [],
2968
2974
  url_paths_to_exclude: [],
2975
+ urls_to_scrape: [],
2969
2976
  )
2970
2977
  p result
2971
2978
  ```
@@ -2995,6 +3002,11 @@ URL subpaths or directories that you want to exclude. For example if you want to
2995
3002
  exclude URLs that start with /questions in stackoverflow.com, you will add
2996
3003
  /questions/ in this input
2997
3004
 
3005
+ ##### urls_to_scrape: Array<`String`><a id="urls_to_scrape-array"></a>
3006
+ You can submit a subset of URLs from the sitemap that should be scraped. To get
3007
+ the list of URLs, you can check out /process_sitemap endpoint. If left empty,
3008
+ all URLs from the sitemap will be scraped.
3009
+
2998
3010
  #### 🌐 Endpoint<a id="🌐-endpoint"></a>
2999
3011
 
3000
3012
  `/scrape_sitemap` `POST`
@@ -653,13 +653,13 @@ module Carbon
653
653
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
654
654
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
655
655
  # @param parse_pdf_tables_with_ocr [Boolean]
656
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
656
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
657
657
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
658
658
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX, INTERCOM, GMAIL, OUTLOOK. It will be ignored for other data sources.
659
659
  # @param file_sync_config [FileSyncConfigNullable]
660
660
  # @param body [OAuthURLRequest]
661
661
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
662
- def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b2935b7f-ee64-4d76-8864-8b6731211938', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
662
+ def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'd29ec177-bbc6-43c7-9b16-0ee340804c99', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
663
663
  _body = {}
664
664
  _body[:tags] = tags if tags != SENTINEL
665
665
  _body[:scope] = scope if scope != SENTINEL
@@ -721,13 +721,13 @@ module Carbon
721
721
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
722
722
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
723
723
  # @param parse_pdf_tables_with_ocr [Boolean]
724
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
724
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
725
725
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
726
726
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX, INTERCOM, GMAIL, OUTLOOK. It will be ignored for other data sources.
727
727
  # @param file_sync_config [FileSyncConfigNullable]
728
728
  # @param body [OAuthURLRequest]
729
729
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
730
- def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b2935b7f-ee64-4d76-8864-8b6731211938', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
730
+ def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'd29ec177-bbc6-43c7-9b16-0ee340804c99', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
731
731
  _body = {}
732
732
  _body[:tags] = tags if tags != SENTINEL
733
733
  _body[:scope] = scope if scope != SENTINEL
@@ -830,6 +830,8 @@ module Carbon
830
830
 
831
831
  # Confluence List
832
832
  #
833
+ # This endpoint has been deprecated. Use /integrations/items/list instead.
834
+ #
833
835
  # To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
834
836
  # Confluence account must be specified. This base request returns a list of root pages for
835
837
  # every space the user has access to in a Confluence instance. To traverse further down
@@ -853,6 +855,8 @@ module Carbon
853
855
 
854
856
  # Confluence List
855
857
  #
858
+ # This endpoint has been deprecated. Use /integrations/items/list instead.
859
+ #
856
860
  # To begin listing a user's Confluence pages, at least a `data_source_id` of a connected
857
861
  # Confluence account must be specified. This base request returns a list of root pages for
858
862
  # every space the user has access to in a Confluence instance. To traverse further down
@@ -874,7 +878,7 @@ module Carbon
874
878
  end
875
879
 
876
880
  # Confluence List
877
- # To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
881
+ # This endpoint has been deprecated. Use /integrations/items/list instead. To begin listing a user's Confluence pages, at least a `data_source_id` of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user's page directory, additional requests to this endpoint can be made with the same `data_source_id` and with `parent_id` set to the id of page from a previous request. For convenience, the `has_children` property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the `parent_id`.
878
882
  # @param list_request [ListRequest]
879
883
  # @param [Hash] opts the optional parameters
880
884
  # @return [ListResponse]
@@ -884,7 +888,7 @@ module Carbon
884
888
  end
885
889
 
886
890
  # Confluence List
887
- # To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
891
+ # This endpoint has been deprecated. Use /integrations/items/list instead. To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
888
892
  # @param list_request [ListRequest]
889
893
  # @param [Hash] opts the optional parameters
890
894
  # @return [APIResponse] data is ListResponse, status code, headers and response
@@ -1617,6 +1621,8 @@ module Carbon
1617
1621
 
1618
1622
  # Confluence Sync
1619
1623
  #
1624
+ # This endpoint has been deprecated. Use /integrations/files/sync instead.
1625
+ #
1620
1626
  # After listing pages in a user's Confluence account, the set of selected page `ids` and the
1621
1627
  # connected account's `data_source_id` can be passed into this endpoint to sync them into
1622
1628
  # Carbon. Additional parameters listed below can be used to associate data to the selected
@@ -1640,7 +1646,7 @@ module Carbon
1640
1646
  # @param file_sync_config [FileSyncConfigNullable]
1641
1647
  # @param body [SyncFilesRequest]
1642
1648
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1643
- def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1649
+ def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1644
1650
  _body = {}
1645
1651
  _body[:tags] = tags if tags != SENTINEL
1646
1652
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1665,6 +1671,8 @@ module Carbon
1665
1671
 
1666
1672
  # Confluence Sync
1667
1673
  #
1674
+ # This endpoint has been deprecated. Use /integrations/files/sync instead.
1675
+ #
1668
1676
  # After listing pages in a user's Confluence account, the set of selected page `ids` and the
1669
1677
  # connected account's `data_source_id` can be passed into this endpoint to sync them into
1670
1678
  # Carbon. Additional parameters listed below can be used to associate data to the selected
@@ -1688,7 +1696,7 @@ module Carbon
1688
1696
  # @param file_sync_config [FileSyncConfigNullable]
1689
1697
  # @param body [SyncFilesRequest]
1690
1698
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1691
- def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1699
+ def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1692
1700
  _body = {}
1693
1701
  _body[:tags] = tags if tags != SENTINEL
1694
1702
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1711,7 +1719,7 @@ module Carbon
1711
1719
  end
1712
1720
 
1713
1721
  # Confluence Sync
1714
- # After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
1722
+ # This endpoint has been deprecated. Use /integrations/files/sync instead. After listing pages in a user's Confluence account, the set of selected page `ids` and the connected account's `data_source_id` can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
1715
1723
  # @param sync_files_request [SyncFilesRequest]
1716
1724
  # @param [Hash] opts the optional parameters
1717
1725
  # @return [GenericSuccessResponse]
@@ -1721,7 +1729,7 @@ module Carbon
1721
1729
  end
1722
1730
 
1723
1731
  # Confluence Sync
1724
- # After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
1732
+ # This endpoint has been deprecated. Use /integrations/files/sync instead. After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
1725
1733
  # @param sync_files_request [SyncFilesRequest]
1726
1734
  # @param [Hash] opts the optional parameters
1727
1735
  # @return [APIResponse] data is GenericSuccessResponse, status code, headers and response
@@ -1896,7 +1904,7 @@ module Carbon
1896
1904
  # @param file_sync_config [FileSyncConfigNullable]
1897
1905
  # @param body [SyncFilesRequest]
1898
1906
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1899
- def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1907
+ def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1900
1908
  _body = {}
1901
1909
  _body[:tags] = tags if tags != SENTINEL
1902
1910
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1944,7 +1952,7 @@ module Carbon
1944
1952
  # @param file_sync_config [FileSyncConfigNullable]
1945
1953
  # @param body [SyncFilesRequest]
1946
1954
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1947
- def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '50140a8f-c06f-40df-9b91-d10d074bcb67', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1955
+ def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'ca60b474-8b43-4b44-9deb-adb701e40610', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1948
1956
  _body = {}
1949
1957
  _body[:tags] = tags if tags != SENTINEL
1950
1958
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -436,9 +436,10 @@ module Carbon
436
436
  # @param embedding_model [EmbeddingGenerators]
437
437
  # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
438
438
  # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
439
+ # @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
439
440
  # @param body [SitemapScrapeRequest]
440
441
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
441
- def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
442
+ def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
442
443
  _body = {}
443
444
  _body[:tags] = tags if tags != SENTINEL
444
445
  _body[:url] = url if url != SENTINEL
@@ -455,6 +456,7 @@ module Carbon
455
456
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
456
457
  _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
457
458
  _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
459
+ _body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
458
460
  sitemap_scrape_request = _body
459
461
  api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
460
462
  api_response.data
@@ -485,9 +487,10 @@ module Carbon
485
487
  # @param embedding_model [EmbeddingGenerators]
486
488
  # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
487
489
  # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
490
+ # @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
488
491
  # @param body [SitemapScrapeRequest]
489
492
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
490
- def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
493
+ def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
491
494
  _body = {}
492
495
  _body[:tags] = tags if tags != SENTINEL
493
496
  _body[:url] = url if url != SENTINEL
@@ -504,6 +507,7 @@ module Carbon
504
507
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
505
508
  _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
506
509
  _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
510
+ _body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
507
511
  sitemap_scrape_request = _body
508
512
  scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
509
513
  end
@@ -61,7 +61,7 @@ module Carbon
61
61
 
62
62
  attr_accessor :parse_pdf_tables_with_ocr
63
63
 
64
- # Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, DROPBOX, ONEDRIVE, GOOGLE_DRIVE, BOX
64
+ # Enable integration's file picker for sources that support it. Supported sources: BOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, DROPBOX
65
65
  attr_accessor :enable_file_picker
66
66
 
67
67
  # Enabling this flag will fetch all available content from the source to be listed via list items endpoint
@@ -279,7 +279,7 @@ module Carbon
279
279
  if attributes.key?(:'request_id')
280
280
  self.request_id = attributes[:'request_id']
281
281
  else
282
- self.request_id = 'b2935b7f-ee64-4d76-8864-8b6731211938'
282
+ self.request_id = 'd29ec177-bbc6-43c7-9b16-0ee340804c99'
283
283
  end
284
284
 
285
285
  if attributes.key?(:'use_ocr')
@@ -43,6 +43,9 @@ module Carbon
43
43
  # URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
44
44
  attr_accessor :url_paths_to_exclude
45
45
 
46
+ # You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
47
+ attr_accessor :urls_to_scrape
48
+
46
49
  # Attribute mapping from ruby-style variable name to JSON key.
47
50
  def self.attribute_map
48
51
  {
@@ -60,7 +63,8 @@ module Carbon
60
63
  :'css_selectors_to_skip' => :'css_selectors_to_skip',
61
64
  :'embedding_model' => :'embedding_model',
62
65
  :'url_paths_to_include' => :'url_paths_to_include',
63
- :'url_paths_to_exclude' => :'url_paths_to_exclude'
66
+ :'url_paths_to_exclude' => :'url_paths_to_exclude',
67
+ :'urls_to_scrape' => :'urls_to_scrape'
64
68
  }
65
69
  end
66
70
 
@@ -86,7 +90,8 @@ module Carbon
86
90
  :'css_selectors_to_skip' => :'Array<String>',
87
91
  :'embedding_model' => :'EmbeddingGenerators',
88
92
  :'url_paths_to_include' => :'Array<String>',
89
- :'url_paths_to_exclude' => :'Array<String>'
93
+ :'url_paths_to_exclude' => :'Array<String>',
94
+ :'urls_to_scrape' => :'Array<String>'
90
95
  }
91
96
  end
92
97
 
@@ -105,7 +110,8 @@ module Carbon
105
110
  :'css_classes_to_skip',
106
111
  :'css_selectors_to_skip',
107
112
  :'url_paths_to_include',
108
- :'url_paths_to_exclude'
113
+ :'url_paths_to_exclude',
114
+ :'urls_to_scrape'
109
115
  ])
110
116
  end
111
117
 
@@ -209,6 +215,12 @@ module Carbon
209
215
  self.url_paths_to_exclude = value
210
216
  end
211
217
  end
218
+
219
+ if attributes.key?(:'urls_to_scrape')
220
+ if (value = attributes[:'urls_to_scrape']).is_a?(Array)
221
+ self.urls_to_scrape = value
222
+ end
223
+ end
212
224
  end
213
225
 
214
226
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -293,7 +305,8 @@ module Carbon
293
305
  css_selectors_to_skip == o.css_selectors_to_skip &&
294
306
  embedding_model == o.embedding_model &&
295
307
  url_paths_to_include == o.url_paths_to_include &&
296
- url_paths_to_exclude == o.url_paths_to_exclude
308
+ url_paths_to_exclude == o.url_paths_to_exclude &&
309
+ urls_to_scrape == o.urls_to_scrape
297
310
  end
298
311
 
299
312
  # @see the `==` method
@@ -305,7 +318,7 @@ module Carbon
305
318
  # Calculates hash code according to all attributes.
306
319
  # @return [Integer] Hash code
307
320
  def hash
308
- [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
321
+ [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude, urls_to_scrape].hash
309
322
  end
310
323
 
311
324
  # Builds the object from hash
@@ -187,7 +187,7 @@ module Carbon
187
187
  if attributes.key?(:'request_id')
188
188
  self.request_id = attributes[:'request_id']
189
189
  else
190
- self.request_id = '50140a8f-c06f-40df-9b91-d10d074bcb67'
190
+ self.request_id = 'ca60b474-8b43-4b44-9deb-adb701e40610'
191
191
  end
192
192
 
193
193
  if attributes.key?(:'use_ocr')
@@ -182,7 +182,7 @@ module Carbon
182
182
  if attributes.key?(:'request_id')
183
183
  self.request_id = attributes[:'request_id']
184
184
  else
185
- self.request_id = '3a0195db-42f0-48ed-b809-d253f436a8e0'
185
+ self.request_id = '07b02a24-9429-4a3c-aa98-27ff63503082'
186
186
  end
187
187
 
188
188
  if attributes.key?(:'enable_file_picker')
@@ -7,5 +7,5 @@ The version of the OpenAPI document: 1.0.0
7
7
  =end
8
8
 
9
9
  module Carbon
10
- VERSION = '0.2.14'
10
+ VERSION = '0.2.15'
11
11
  end
@@ -98,7 +98,7 @@ describe 'IntegrationsApi' do
98
98
 
99
99
  # unit tests for list_confluence_pages
100
100
  # Confluence List
101
- # To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
101
+ # This endpoint has been deprecated. Use /integrations/items/list instead. To begin listing a user&#39;s Confluence pages, at least a &#x60;data_source_id&#x60; of a connected Confluence account must be specified. This base request returns a list of root pages for every space the user has access to in a Confluence instance. To traverse further down the user&#39;s page directory, additional requests to this endpoint can be made with the same &#x60;data_source_id&#x60; and with &#x60;parent_id&#x60; set to the id of page from a previous request. For convenience, the &#x60;has_children&#x60; property in each directory item in the response list will flag which pages will return non-empty lists of pages when set as the &#x60;parent_id&#x60;.
102
102
  # @param list_request
103
103
  # @param [Hash] opts the optional parameters
104
104
  # @return [ListResponse]
@@ -198,7 +198,7 @@ describe 'IntegrationsApi' do
198
198
 
199
199
  # unit tests for sync_confluence
200
200
  # Confluence Sync
201
- # After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
201
+ # This endpoint has been deprecated. Use /integrations/files/sync instead. After listing pages in a user&#39;s Confluence account, the set of selected page &#x60;ids&#x60; and the connected account&#39;s &#x60;data_source_id&#x60; can be passed into this endpoint to sync them into Carbon. Additional parameters listed below can be used to associate data to the selected pages or alter the behavior of the sync.
202
202
  # @param sync_files_request
203
203
  # @param [Hash] opts the optional parameters
204
204
  # @return [GenericSuccessResponse]
@@ -109,4 +109,10 @@ describe Carbon::SitemapScrapeRequest do
109
109
  end
110
110
  end
111
111
 
112
+ describe 'test attribute "urls_to_scrape"' do
113
+ it 'should work' do
114
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
115
+ end
116
+ end
117
+
112
118
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carbon_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.14
4
+ version: 0.2.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Konfig
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-17 00:00:00.000000000 Z
11
+ date: 2024-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday