carbon_ruby_sdk 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48a58cba1f17ea688e80ef5572f4944be16851ffe4ab0bed12010fe0f8b12f3e
4
- data.tar.gz: 637e68cdedc5092c2c3de53c87f99b732cb2ba2eea49f6e0296220cd741dc217
3
+ metadata.gz: af28a3b256d49d38a6aca558d12a49a6e3f2888587dedeef54311ad7d0bd0ac9
4
+ data.tar.gz: a685b15e3ad3ab32463c4bd03c92432c21bc3c7cf3b8f29bcf340cde6e468377
5
5
  SHA512:
6
- metadata.gz: a3ca1057eaaa76eac67308c1f22a46828e1abc53732e8ab309d4d9fe53f87a59cd9712e9d541d2ef7f3b05854dbef675ab80fbf38ea7cd2b54b1cb2cab41f002
7
- data.tar.gz: 4354aafdeb4fdd30b24c74b266b0c839eadcba4776590a412b77f54bff85465e49f734e538bc634f3d176d20c1157932a9dc7470acb3cbebc3572619bdceb0e2
6
+ metadata.gz: 023c5d51386e0e76ecd6954f25c69a8562302f89de2a1a15cd72fbfc0e3491c6129ad496da1ad9dbfd80a0030c30ba501cb2e8552cbdd92c07513a20f0257211
7
+ data.tar.gz: 465fbc642ce7bf817b5c5d647d34c1c963e254a21facef003efc0a60a5074dad6c1b642a1093e3f0e9228505bde79b9f5c55e263827987395734718b019c7a06
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- carbon_ruby_sdk (0.2.1)
4
+ carbon_ruby_sdk (0.2.2)
5
5
  faraday (>= 1.0.1, < 3.0)
6
6
  faraday-multipart (~> 1.0, >= 1.0.4)
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  Connect external data to LLMs, no matter the source.
8
8
 
9
- [![npm](https://img.shields.io/badge/gem-v0.2.2-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.2)
9
+ [![npm](https://img.shields.io/badge/gem-v0.2.3-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.3)
10
10
 
11
11
  </div>
12
12
 
@@ -86,7 +86,7 @@ Connect external data to LLMs, no matter the source.
86
86
  Add to Gemfile:
87
87
 
88
88
  ```ruby
89
- gem 'carbon_ruby_sdk', '~> 0.2.2'
89
+ gem 'carbon_ruby_sdk', '~> 0.2.3'
90
90
  ```
91
91
 
92
92
  ## Getting Started<a id="getting-started"></a>
@@ -1240,7 +1240,7 @@ result = carbon.integrations.connect_data_source(
1240
1240
  "prepend_filename_to_chunks" => false,
1241
1241
  "sync_files_on_connection" => true,
1242
1242
  "set_page_as_boundary" => false,
1243
- "request_id" => "368135ce-5cca-4fb5-a19d-42b9a409af35",
1243
+ "request_id" => "652297b9-0f55-46d8-869d-13a36e89e5da",
1244
1244
  "enable_file_picker" => true,
1245
1245
  "sync_source_items" => true,
1246
1246
  "incremental_sync" => false,
@@ -1459,7 +1459,7 @@ result = carbon.integrations.get_oauth_url(
1459
1459
  set_page_as_boundary: false,
1460
1460
  data_source_id: 1,
1461
1461
  connecting_new_account: false,
1462
- request_id: "2e662fad-1193-4482-a2d7-ec7b821a9d2b",
1462
+ request_id: "71f214fa-2155-41cb-9336-9b3070e86897",
1463
1463
  use_ocr: false,
1464
1464
  parse_pdf_tables_with_ocr: false,
1465
1465
  enable_file_picker: true,
@@ -1519,7 +1519,7 @@ Enable OCR for files that support it. Supported formats: pdf
1519
1519
  ##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
1520
1520
  ##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
1521
1521
  Enable integration's file picker for sources that support it. Supported sources:
1522
- SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
1522
+ DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
1523
1523
 
1524
1524
  ##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
1525
1525
  Enabling this flag will fetch all available content from the source to be listed
@@ -1781,7 +1781,7 @@ result = carbon.integrations.sync_confluence(
1781
1781
  prepend_filename_to_chunks: false,
1782
1782
  max_items_per_chunk: 1,
1783
1783
  set_page_as_boundary: false,
1784
- request_id: "dd2130b5-0f9f-4f3a-b450-f3fa458763ae",
1784
+ request_id: "6136b467-242e-49df-9478-d3e0cfdde299",
1785
1785
  use_ocr: false,
1786
1786
  parse_pdf_tables_with_ocr: false,
1787
1787
  incremental_sync: false,
@@ -1884,7 +1884,7 @@ result = carbon.integrations.sync_files(
1884
1884
  prepend_filename_to_chunks: false,
1885
1885
  max_items_per_chunk: 1,
1886
1886
  set_page_as_boundary: false,
1887
- request_id: "dd2130b5-0f9f-4f3a-b450-f3fa458763ae",
1887
+ request_id: "6136b467-242e-49df-9478-d3e0cfdde299",
1888
1888
  use_ocr: false,
1889
1889
  parse_pdf_tables_with_ocr: false,
1890
1890
  incremental_sync: false,
@@ -2741,6 +2741,8 @@ result = carbon.utilities.scrape_sitemap(
2741
2741
  css_classes_to_skip: [],
2742
2742
  css_selectors_to_skip: [],
2743
2743
  embedding_model: "OPENAI",
2744
+ url_paths_to_include: [],
2745
+ url_paths_to_exclude: [],
2744
2746
  )
2745
2747
  p result
2746
2748
  ```
@@ -2760,6 +2762,16 @@ p result
2760
2762
  ##### css_classes_to_skip: Array<`String`><a id="css_classes_to_skip-array"></a>
2761
2763
  ##### css_selectors_to_skip: Array<`String`><a id="css_selectors_to_skip-array"></a>
2762
2764
  ##### embedding_model: [`EmbeddingGenerators`](./lib/carbon_ruby_sdk/models/embedding_generators.rb)<a id="embedding_model-embeddinggeneratorslibcarbon_ruby_sdkmodelsembedding_generatorsrb"></a>
2765
+ ##### url_paths_to_include: Array<`String`><a id="url_paths_to_include-array"></a>
2766
+ URL subpaths or directories that you want to include. For example if you want to
2767
+ only include URLs that start with /questions in stackoverflow.com, you will add
2768
+ /questions/ in this input
2769
+
2770
+ ##### url_paths_to_exclude: Array<`String`><a id="url_paths_to_exclude-array"></a>
2771
+ URL subpaths or directories that you want to exclude. For example if you want to
2772
+ exclude URLs that start with /questions in stackoverflow.com, you will add
2773
+ /questions/ in this input
2774
+
2763
2775
  #### 🌐 Endpoint<a id="🌐-endpoint"></a>
2764
2776
 
2765
2777
  `/scrape_sitemap` `POST`
@@ -2799,6 +2811,7 @@ result = carbon.utilities.scrape_web(
2799
2811
  "css_classes_to_skip" => [],
2800
2812
  "css_selectors_to_skip" => [],
2801
2813
  "embedding_model" => "OPENAI",
2814
+ "url_paths_to_include" => [],
2802
2815
  }
2803
2816
  ],
2804
2817
  )
@@ -653,13 +653,13 @@ module Carbon
653
653
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
654
654
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
655
655
  # @param parse_pdf_tables_with_ocr [Boolean]
656
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
656
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
657
657
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
658
658
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
659
659
  # @param file_sync_config [FileSyncConfigNullable]
660
660
  # @param body [OAuthURLRequest]
661
661
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
662
- def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '2e662fad-1193-4482-a2d7-ec7b821a9d2b', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
662
+ def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '71f214fa-2155-41cb-9336-9b3070e86897', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
663
663
  _body = {}
664
664
  _body[:tags] = tags if tags != SENTINEL
665
665
  _body[:scope] = scope if scope != SENTINEL
@@ -721,13 +721,13 @@ module Carbon
721
721
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
722
722
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
723
723
  # @param parse_pdf_tables_with_ocr [Boolean]
724
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
724
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
725
725
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
726
726
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
727
727
  # @param file_sync_config [FileSyncConfigNullable]
728
728
  # @param body [OAuthURLRequest]
729
729
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
730
- def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '2e662fad-1193-4482-a2d7-ec7b821a9d2b', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
730
+ def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '71f214fa-2155-41cb-9336-9b3070e86897', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
731
731
  _body = {}
732
732
  _body[:tags] = tags if tags != SENTINEL
733
733
  _body[:scope] = scope if scope != SENTINEL
@@ -1523,7 +1523,7 @@ module Carbon
1523
1523
  # @param file_sync_config [FileSyncConfigNullable]
1524
1524
  # @param body [SyncFilesRequest]
1525
1525
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1526
- def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1526
+ def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1527
1527
  _body = {}
1528
1528
  _body[:tags] = tags if tags != SENTINEL
1529
1529
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1571,7 +1571,7 @@ module Carbon
1571
1571
  # @param file_sync_config [FileSyncConfigNullable]
1572
1572
  # @param body [SyncFilesRequest]
1573
1573
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1574
- def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1574
+ def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1575
1575
  _body = {}
1576
1576
  _body[:tags] = tags if tags != SENTINEL
1577
1577
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1779,7 +1779,7 @@ module Carbon
1779
1779
  # @param file_sync_config [FileSyncConfigNullable]
1780
1780
  # @param body [SyncFilesRequest]
1781
1781
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1782
- def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1782
+ def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1783
1783
  _body = {}
1784
1784
  _body[:tags] = tags if tags != SENTINEL
1785
1785
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1827,7 +1827,7 @@ module Carbon
1827
1827
  # @param file_sync_config [FileSyncConfigNullable]
1828
1828
  # @param body [SyncFilesRequest]
1829
1829
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1830
- def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1830
+ def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1831
1831
  _body = {}
1832
1832
  _body[:tags] = tags if tags != SENTINEL
1833
1833
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -342,9 +342,11 @@ module Carbon
342
342
  # @param css_classes_to_skip [Array<String>]
343
343
  # @param css_selectors_to_skip [Array<String>]
344
344
  # @param embedding_model [EmbeddingGenerators]
345
+ # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
346
+ # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
345
347
  # @param body [SitemapScrapeRequest]
346
348
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
347
- def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
349
+ def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
348
350
  _body = {}
349
351
  _body[:tags] = tags if tags != SENTINEL
350
352
  _body[:url] = url if url != SENTINEL
@@ -359,6 +361,8 @@ module Carbon
359
361
  _body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
360
362
  _body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
361
363
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
364
+ _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
365
+ _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
362
366
  sitemap_scrape_request = _body
363
367
  api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
364
368
  api_response.data
@@ -387,9 +391,11 @@ module Carbon
387
391
  # @param css_classes_to_skip [Array<String>]
388
392
  # @param css_selectors_to_skip [Array<String>]
389
393
  # @param embedding_model [EmbeddingGenerators]
394
+ # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
395
+ # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
390
396
  # @param body [SitemapScrapeRequest]
391
397
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
392
- def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
398
+ def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
393
399
  _body = {}
394
400
  _body[:tags] = tags if tags != SENTINEL
395
401
  _body[:url] = url if url != SENTINEL
@@ -404,6 +410,8 @@ module Carbon
404
410
  _body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
405
411
  _body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
406
412
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
413
+ _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
414
+ _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
407
415
  sitemap_scrape_request = _body
408
416
  scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
409
417
  end
@@ -61,7 +61,7 @@ module Carbon
61
61
 
62
62
  attr_accessor :parse_pdf_tables_with_ocr
63
63
 
64
- # Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
64
+ # Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
65
65
  attr_accessor :enable_file_picker
66
66
 
67
67
  # Enabling this flag will fetch all available content from the source to be listed via list items endpoint
@@ -279,7 +279,7 @@ module Carbon
279
279
  if attributes.key?(:'request_id')
280
280
  self.request_id = attributes[:'request_id']
281
281
  else
282
- self.request_id = '2e662fad-1193-4482-a2d7-ec7b821a9d2b'
282
+ self.request_id = '71f214fa-2155-41cb-9336-9b3070e86897'
283
283
  end
284
284
 
285
285
  if attributes.key?(:'use_ocr')
@@ -37,6 +37,12 @@ module Carbon
37
37
 
38
38
  attr_accessor :embedding_model
39
39
 
40
+ # URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
41
+ attr_accessor :url_paths_to_include
42
+
43
+ # URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
44
+ attr_accessor :url_paths_to_exclude
45
+
40
46
  # Attribute mapping from ruby-style variable name to JSON key.
41
47
  def self.attribute_map
42
48
  {
@@ -52,7 +58,9 @@ module Carbon
52
58
  :'html_tags_to_skip' => :'html_tags_to_skip',
53
59
  :'css_classes_to_skip' => :'css_classes_to_skip',
54
60
  :'css_selectors_to_skip' => :'css_selectors_to_skip',
55
- :'embedding_model' => :'embedding_model'
61
+ :'embedding_model' => :'embedding_model',
62
+ :'url_paths_to_include' => :'url_paths_to_include',
63
+ :'url_paths_to_exclude' => :'url_paths_to_exclude'
56
64
  }
57
65
  end
58
66
 
@@ -76,7 +84,9 @@ module Carbon
76
84
  :'html_tags_to_skip' => :'Array<String>',
77
85
  :'css_classes_to_skip' => :'Array<String>',
78
86
  :'css_selectors_to_skip' => :'Array<String>',
79
- :'embedding_model' => :'EmbeddingGenerators'
87
+ :'embedding_model' => :'EmbeddingGenerators',
88
+ :'url_paths_to_include' => :'Array<String>',
89
+ :'url_paths_to_exclude' => :'Array<String>'
80
90
  }
81
91
  end
82
92
 
@@ -94,6 +104,8 @@ module Carbon
94
104
  :'html_tags_to_skip',
95
105
  :'css_classes_to_skip',
96
106
  :'css_selectors_to_skip',
107
+ :'url_paths_to_include',
108
+ :'url_paths_to_exclude'
97
109
  ])
98
110
  end
99
111
 
@@ -185,6 +197,18 @@ module Carbon
185
197
  else
186
198
  self.embedding_model = 'OPENAI'
187
199
  end
200
+
201
+ if attributes.key?(:'url_paths_to_include')
202
+ if (value = attributes[:'url_paths_to_include']).is_a?(Array)
203
+ self.url_paths_to_include = value
204
+ end
205
+ end
206
+
207
+ if attributes.key?(:'url_paths_to_exclude')
208
+ if (value = attributes[:'url_paths_to_exclude']).is_a?(Array)
209
+ self.url_paths_to_exclude = value
210
+ end
211
+ end
188
212
  end
189
213
 
190
214
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -199,6 +223,14 @@ module Carbon
199
223
  invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
200
224
  end
201
225
 
226
+ if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
227
+ invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
228
+ end
229
+
230
+ if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
231
+ invalid_properties.push('invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.')
232
+ end
233
+
202
234
  invalid_properties
203
235
  end
204
236
 
@@ -207,6 +239,8 @@ module Carbon
207
239
  def valid?
208
240
  return false if @url.nil?
209
241
  return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
242
+ return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
243
+ return false if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
210
244
  true
211
245
  end
212
246
 
@@ -220,6 +254,26 @@ module Carbon
220
254
  @max_pages_to_scrape = max_pages_to_scrape
221
255
  end
222
256
 
257
+ # Custom attribute writer method with validation
258
+ # @param [Object] url_paths_to_include Value to be assigned
259
+ def url_paths_to_include=(url_paths_to_include)
260
+ if !url_paths_to_include.nil? && url_paths_to_include.length > 10
261
+ fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
262
+ end
263
+
264
+ @url_paths_to_include = url_paths_to_include
265
+ end
266
+
267
+ # Custom attribute writer method with validation
268
+ # @param [Object] url_paths_to_exclude Value to be assigned
269
+ def url_paths_to_exclude=(url_paths_to_exclude)
270
+ if !url_paths_to_exclude.nil? && url_paths_to_exclude.length > 10
271
+ fail ArgumentError, 'invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.'
272
+ end
273
+
274
+ @url_paths_to_exclude = url_paths_to_exclude
275
+ end
276
+
223
277
  # Checks equality by comparing each attribute.
224
278
  # @param [Object] Object to be compared
225
279
  def ==(o)
@@ -237,7 +291,9 @@ module Carbon
237
291
  html_tags_to_skip == o.html_tags_to_skip &&
238
292
  css_classes_to_skip == o.css_classes_to_skip &&
239
293
  css_selectors_to_skip == o.css_selectors_to_skip &&
240
- embedding_model == o.embedding_model
294
+ embedding_model == o.embedding_model &&
295
+ url_paths_to_include == o.url_paths_to_include &&
296
+ url_paths_to_exclude == o.url_paths_to_exclude
241
297
  end
242
298
 
243
299
  # @see the `==` method
@@ -249,7 +305,7 @@ module Carbon
249
305
  # Calculates hash code according to all attributes.
250
306
  # @return [Integer] Hash code
251
307
  def hash
252
- [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
308
+ [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
253
309
  end
254
310
 
255
311
  # Builds the object from hash
@@ -187,7 +187,7 @@ module Carbon
187
187
  if attributes.key?(:'request_id')
188
188
  self.request_id = attributes[:'request_id']
189
189
  else
190
- self.request_id = 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae'
190
+ self.request_id = '6136b467-242e-49df-9478-d3e0cfdde299'
191
191
  end
192
192
 
193
193
  if attributes.key?(:'use_ocr')
@@ -182,7 +182,7 @@ module Carbon
182
182
  if attributes.key?(:'request_id')
183
183
  self.request_id = attributes[:'request_id']
184
184
  else
185
- self.request_id = '368135ce-5cca-4fb5-a19d-42b9a409af35'
185
+ self.request_id = '652297b9-0f55-46d8-869d-13a36e89e5da'
186
186
  end
187
187
 
188
188
  if attributes.key?(:'enable_file_picker')
@@ -39,6 +39,9 @@ module Carbon
39
39
 
40
40
  attr_accessor :embedding_model
41
41
 
42
+ # URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
43
+ attr_accessor :url_paths_to_include
44
+
42
45
  # Attribute mapping from ruby-style variable name to JSON key.
43
46
  def self.attribute_map
44
47
  {
@@ -55,7 +58,8 @@ module Carbon
55
58
  :'html_tags_to_skip' => :'html_tags_to_skip',
56
59
  :'css_classes_to_skip' => :'css_classes_to_skip',
57
60
  :'css_selectors_to_skip' => :'css_selectors_to_skip',
58
- :'embedding_model' => :'embedding_model'
61
+ :'embedding_model' => :'embedding_model',
62
+ :'url_paths_to_include' => :'url_paths_to_include'
59
63
  }
60
64
  end
61
65
 
@@ -80,7 +84,8 @@ module Carbon
80
84
  :'html_tags_to_skip' => :'Array<String>',
81
85
  :'css_classes_to_skip' => :'Array<String>',
82
86
  :'css_selectors_to_skip' => :'Array<String>',
83
- :'embedding_model' => :'EmbeddingGenerators'
87
+ :'embedding_model' => :'EmbeddingGenerators',
88
+ :'url_paths_to_include' => :'Array<String>'
84
89
  }
85
90
  end
86
91
 
@@ -99,6 +104,7 @@ module Carbon
99
104
  :'html_tags_to_skip',
100
105
  :'css_classes_to_skip',
101
106
  :'css_selectors_to_skip',
107
+ :'url_paths_to_include'
102
108
  ])
103
109
  end
104
110
 
@@ -198,6 +204,12 @@ module Carbon
198
204
  else
199
205
  self.embedding_model = 'OPENAI'
200
206
  end
207
+
208
+ if attributes.key?(:'url_paths_to_include')
209
+ if (value = attributes[:'url_paths_to_include']).is_a?(Array)
210
+ self.url_paths_to_include = value
211
+ end
212
+ end
201
213
  end
202
214
 
203
215
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -216,6 +228,10 @@ module Carbon
216
228
  invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
217
229
  end
218
230
 
231
+ if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
232
+ invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
233
+ end
234
+
219
235
  invalid_properties
220
236
  end
221
237
 
@@ -225,6 +241,7 @@ module Carbon
225
241
  return false if @url.nil?
226
242
  return false if !@recursion_depth.nil? && @recursion_depth < 0
227
243
  return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
244
+ return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
228
245
  true
229
246
  end
230
247
 
@@ -248,6 +265,16 @@ module Carbon
248
265
  @max_pages_to_scrape = max_pages_to_scrape
249
266
  end
250
267
 
268
+ # Custom attribute writer method with validation
269
+ # @param [Object] url_paths_to_include Value to be assigned
270
+ def url_paths_to_include=(url_paths_to_include)
271
+ if !url_paths_to_include.nil? && url_paths_to_include.length > 10
272
+ fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
273
+ end
274
+
275
+ @url_paths_to_include = url_paths_to_include
276
+ end
277
+
251
278
  # Checks equality by comparing each attribute.
252
279
  # @param [Object] Object to be compared
253
280
  def ==(o)
@@ -266,7 +293,8 @@ module Carbon
266
293
  html_tags_to_skip == o.html_tags_to_skip &&
267
294
  css_classes_to_skip == o.css_classes_to_skip &&
268
295
  css_selectors_to_skip == o.css_selectors_to_skip &&
269
- embedding_model == o.embedding_model
296
+ embedding_model == o.embedding_model &&
297
+ url_paths_to_include == o.url_paths_to_include
270
298
  end
271
299
 
272
300
  # @see the `==` method
@@ -278,7 +306,7 @@ module Carbon
278
306
  # Calculates hash code according to all attributes.
279
307
  # @return [Integer] Hash code
280
308
  def hash
281
- [tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
309
+ [tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include].hash
282
310
  end
283
311
 
284
312
  # Builds the object from hash
@@ -7,5 +7,5 @@ The version of the OpenAPI document: 1.0.0
7
7
  =end
8
8
 
9
9
  module Carbon
10
- VERSION = '0.2.2'
10
+ VERSION = '0.2.3'
11
11
  end
@@ -97,4 +97,16 @@ describe Carbon::SitemapScrapeRequest do
97
97
  end
98
98
  end
99
99
 
100
+ describe 'test attribute "url_paths_to_include"' do
101
+ it 'should work' do
102
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
103
+ end
104
+ end
105
+
106
+ describe 'test attribute "url_paths_to_exclude"' do
107
+ it 'should work' do
108
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
109
+ end
110
+ end
111
+
100
112
  end
@@ -103,4 +103,10 @@ describe Carbon::WebscrapeRequest do
103
103
  end
104
104
  end
105
105
 
106
+ describe 'test attribute "url_paths_to_include"' do
107
+ it 'should work' do
108
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
109
+ end
110
+ end
111
+
106
112
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carbon_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Konfig
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-01 00:00:00.000000000 Z
11
+ date: 2024-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday