carbon_ruby_sdk 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48a58cba1f17ea688e80ef5572f4944be16851ffe4ab0bed12010fe0f8b12f3e
4
- data.tar.gz: 637e68cdedc5092c2c3de53c87f99b732cb2ba2eea49f6e0296220cd741dc217
3
+ metadata.gz: af28a3b256d49d38a6aca558d12a49a6e3f2888587dedeef54311ad7d0bd0ac9
4
+ data.tar.gz: a685b15e3ad3ab32463c4bd03c92432c21bc3c7cf3b8f29bcf340cde6e468377
5
5
  SHA512:
6
- metadata.gz: a3ca1057eaaa76eac67308c1f22a46828e1abc53732e8ab309d4d9fe53f87a59cd9712e9d541d2ef7f3b05854dbef675ab80fbf38ea7cd2b54b1cb2cab41f002
7
- data.tar.gz: 4354aafdeb4fdd30b24c74b266b0c839eadcba4776590a412b77f54bff85465e49f734e538bc634f3d176d20c1157932a9dc7470acb3cbebc3572619bdceb0e2
6
+ metadata.gz: 023c5d51386e0e76ecd6954f25c69a8562302f89de2a1a15cd72fbfc0e3491c6129ad496da1ad9dbfd80a0030c30ba501cb2e8552cbdd92c07513a20f0257211
7
+ data.tar.gz: 465fbc642ce7bf817b5c5d647d34c1c963e254a21facef003efc0a60a5074dad6c1b642a1093e3f0e9228505bde79b9f5c55e263827987395734718b019c7a06
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- carbon_ruby_sdk (0.2.1)
4
+ carbon_ruby_sdk (0.2.2)
5
5
  faraday (>= 1.0.1, < 3.0)
6
6
  faraday-multipart (~> 1.0, >= 1.0.4)
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  Connect external data to LLMs, no matter the source.
8
8
 
9
- [![npm](https://img.shields.io/badge/gem-v0.2.2-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.2)
9
+ [![npm](https://img.shields.io/badge/gem-v0.2.3-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.3)
10
10
 
11
11
  </div>
12
12
 
@@ -86,7 +86,7 @@ Connect external data to LLMs, no matter the source.
86
86
  Add to Gemfile:
87
87
 
88
88
  ```ruby
89
- gem 'carbon_ruby_sdk', '~> 0.2.2'
89
+ gem 'carbon_ruby_sdk', '~> 0.2.3'
90
90
  ```
91
91
 
92
92
  ## Getting Started<a id="getting-started"></a>
@@ -1240,7 +1240,7 @@ result = carbon.integrations.connect_data_source(
1240
1240
  "prepend_filename_to_chunks" => false,
1241
1241
  "sync_files_on_connection" => true,
1242
1242
  "set_page_as_boundary" => false,
1243
- "request_id" => "368135ce-5cca-4fb5-a19d-42b9a409af35",
1243
+ "request_id" => "652297b9-0f55-46d8-869d-13a36e89e5da",
1244
1244
  "enable_file_picker" => true,
1245
1245
  "sync_source_items" => true,
1246
1246
  "incremental_sync" => false,
@@ -1459,7 +1459,7 @@ result = carbon.integrations.get_oauth_url(
1459
1459
  set_page_as_boundary: false,
1460
1460
  data_source_id: 1,
1461
1461
  connecting_new_account: false,
1462
- request_id: "2e662fad-1193-4482-a2d7-ec7b821a9d2b",
1462
+ request_id: "71f214fa-2155-41cb-9336-9b3070e86897",
1463
1463
  use_ocr: false,
1464
1464
  parse_pdf_tables_with_ocr: false,
1465
1465
  enable_file_picker: true,
@@ -1519,7 +1519,7 @@ Enable OCR for files that support it. Supported formats: pdf
1519
1519
  ##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
1520
1520
  ##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
1521
1521
  Enable integration's file picker for sources that support it. Supported sources:
1522
- SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
1522
+ DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
1523
1523
 
1524
1524
  ##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
1525
1525
  Enabling this flag will fetch all available content from the source to be listed
@@ -1781,7 +1781,7 @@ result = carbon.integrations.sync_confluence(
1781
1781
  prepend_filename_to_chunks: false,
1782
1782
  max_items_per_chunk: 1,
1783
1783
  set_page_as_boundary: false,
1784
- request_id: "dd2130b5-0f9f-4f3a-b450-f3fa458763ae",
1784
+ request_id: "6136b467-242e-49df-9478-d3e0cfdde299",
1785
1785
  use_ocr: false,
1786
1786
  parse_pdf_tables_with_ocr: false,
1787
1787
  incremental_sync: false,
@@ -1884,7 +1884,7 @@ result = carbon.integrations.sync_files(
1884
1884
  prepend_filename_to_chunks: false,
1885
1885
  max_items_per_chunk: 1,
1886
1886
  set_page_as_boundary: false,
1887
- request_id: "dd2130b5-0f9f-4f3a-b450-f3fa458763ae",
1887
+ request_id: "6136b467-242e-49df-9478-d3e0cfdde299",
1888
1888
  use_ocr: false,
1889
1889
  parse_pdf_tables_with_ocr: false,
1890
1890
  incremental_sync: false,
@@ -2741,6 +2741,8 @@ result = carbon.utilities.scrape_sitemap(
2741
2741
  css_classes_to_skip: [],
2742
2742
  css_selectors_to_skip: [],
2743
2743
  embedding_model: "OPENAI",
2744
+ url_paths_to_include: [],
2745
+ url_paths_to_exclude: [],
2744
2746
  )
2745
2747
  p result
2746
2748
  ```
@@ -2760,6 +2762,16 @@ p result
2760
2762
  ##### css_classes_to_skip: Array<`String`><a id="css_classes_to_skip-array"></a>
2761
2763
  ##### css_selectors_to_skip: Array<`String`><a id="css_selectors_to_skip-array"></a>
2762
2764
  ##### embedding_model: [`EmbeddingGenerators`](./lib/carbon_ruby_sdk/models/embedding_generators.rb)<a id="embedding_model-embeddinggeneratorslibcarbon_ruby_sdkmodelsembedding_generatorsrb"></a>
2765
+ ##### url_paths_to_include: Array<`String`><a id="url_paths_to_include-array"></a>
2766
+ URL subpaths or directories that you want to include. For example if you want to
2767
+ only include URLs that start with /questions in stackoverflow.com, you will add
2768
+ /questions/ in this input
2769
+
2770
+ ##### url_paths_to_exclude: Array<`String`><a id="url_paths_to_exclude-array"></a>
2771
+ URL subpaths or directories that you want to exclude. For example if you want to
2772
+ exclude URLs that start with /questions in stackoverflow.com, you will add
2773
+ /questions/ in this input
2774
+
2763
2775
  #### 🌐 Endpoint<a id="🌐-endpoint"></a>
2764
2776
 
2765
2777
  `/scrape_sitemap` `POST`
@@ -2799,6 +2811,7 @@ result = carbon.utilities.scrape_web(
2799
2811
  "css_classes_to_skip" => [],
2800
2812
  "css_selectors_to_skip" => [],
2801
2813
  "embedding_model" => "OPENAI",
2814
+ "url_paths_to_include" => [],
2802
2815
  }
2803
2816
  ],
2804
2817
  )
@@ -653,13 +653,13 @@ module Carbon
653
653
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
654
654
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
655
655
  # @param parse_pdf_tables_with_ocr [Boolean]
656
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
656
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
657
657
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
658
658
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
659
659
  # @param file_sync_config [FileSyncConfigNullable]
660
660
  # @param body [OAuthURLRequest]
661
661
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
662
- def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '2e662fad-1193-4482-a2d7-ec7b821a9d2b', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
662
+ def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '71f214fa-2155-41cb-9336-9b3070e86897', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
663
663
  _body = {}
664
664
  _body[:tags] = tags if tags != SENTINEL
665
665
  _body[:scope] = scope if scope != SENTINEL
@@ -721,13 +721,13 @@ module Carbon
721
721
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
722
722
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
723
723
  # @param parse_pdf_tables_with_ocr [Boolean]
724
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
724
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
725
725
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
726
726
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
727
727
  # @param file_sync_config [FileSyncConfigNullable]
728
728
  # @param body [OAuthURLRequest]
729
729
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
730
- def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '2e662fad-1193-4482-a2d7-ec7b821a9d2b', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
730
+ def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '71f214fa-2155-41cb-9336-9b3070e86897', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
731
731
  _body = {}
732
732
  _body[:tags] = tags if tags != SENTINEL
733
733
  _body[:scope] = scope if scope != SENTINEL
@@ -1523,7 +1523,7 @@ module Carbon
1523
1523
  # @param file_sync_config [FileSyncConfigNullable]
1524
1524
  # @param body [SyncFilesRequest]
1525
1525
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1526
- def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1526
+ def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1527
1527
  _body = {}
1528
1528
  _body[:tags] = tags if tags != SENTINEL
1529
1529
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1571,7 +1571,7 @@ module Carbon
1571
1571
  # @param file_sync_config [FileSyncConfigNullable]
1572
1572
  # @param body [SyncFilesRequest]
1573
1573
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1574
- def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1574
+ def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1575
1575
  _body = {}
1576
1576
  _body[:tags] = tags if tags != SENTINEL
1577
1577
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1779,7 +1779,7 @@ module Carbon
1779
1779
  # @param file_sync_config [FileSyncConfigNullable]
1780
1780
  # @param body [SyncFilesRequest]
1781
1781
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1782
- def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1782
+ def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1783
1783
  _body = {}
1784
1784
  _body[:tags] = tags if tags != SENTINEL
1785
1785
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1827,7 +1827,7 @@ module Carbon
1827
1827
  # @param file_sync_config [FileSyncConfigNullable]
1828
1828
  # @param body [SyncFilesRequest]
1829
1829
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1830
- def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1830
+ def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1831
1831
  _body = {}
1832
1832
  _body[:tags] = tags if tags != SENTINEL
1833
1833
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -342,9 +342,11 @@ module Carbon
342
342
  # @param css_classes_to_skip [Array<String>]
343
343
  # @param css_selectors_to_skip [Array<String>]
344
344
  # @param embedding_model [EmbeddingGenerators]
345
+ # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
346
+ # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
345
347
  # @param body [SitemapScrapeRequest]
346
348
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
347
- def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
349
+ def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
348
350
  _body = {}
349
351
  _body[:tags] = tags if tags != SENTINEL
350
352
  _body[:url] = url if url != SENTINEL
@@ -359,6 +361,8 @@ module Carbon
359
361
  _body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
360
362
  _body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
361
363
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
364
+ _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
365
+ _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
362
366
  sitemap_scrape_request = _body
363
367
  api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
364
368
  api_response.data
@@ -387,9 +391,11 @@ module Carbon
387
391
  # @param css_classes_to_skip [Array<String>]
388
392
  # @param css_selectors_to_skip [Array<String>]
389
393
  # @param embedding_model [EmbeddingGenerators]
394
+ # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
395
+ # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
390
396
  # @param body [SitemapScrapeRequest]
391
397
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
392
- def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
398
+ def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
393
399
  _body = {}
394
400
  _body[:tags] = tags if tags != SENTINEL
395
401
  _body[:url] = url if url != SENTINEL
@@ -404,6 +410,8 @@ module Carbon
404
410
  _body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
405
411
  _body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
406
412
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
413
+ _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
414
+ _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
407
415
  sitemap_scrape_request = _body
408
416
  scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
409
417
  end
@@ -61,7 +61,7 @@ module Carbon
61
61
 
62
62
  attr_accessor :parse_pdf_tables_with_ocr
63
63
 
64
- # Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
64
+ # Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
65
65
  attr_accessor :enable_file_picker
66
66
 
67
67
  # Enabling this flag will fetch all available content from the source to be listed via list items endpoint
@@ -279,7 +279,7 @@ module Carbon
279
279
  if attributes.key?(:'request_id')
280
280
  self.request_id = attributes[:'request_id']
281
281
  else
282
- self.request_id = '2e662fad-1193-4482-a2d7-ec7b821a9d2b'
282
+ self.request_id = '71f214fa-2155-41cb-9336-9b3070e86897'
283
283
  end
284
284
 
285
285
  if attributes.key?(:'use_ocr')
@@ -37,6 +37,12 @@ module Carbon
37
37
 
38
38
  attr_accessor :embedding_model
39
39
 
40
+ # URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
41
+ attr_accessor :url_paths_to_include
42
+
43
+ # URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
44
+ attr_accessor :url_paths_to_exclude
45
+
40
46
  # Attribute mapping from ruby-style variable name to JSON key.
41
47
  def self.attribute_map
42
48
  {
@@ -52,7 +58,9 @@ module Carbon
52
58
  :'html_tags_to_skip' => :'html_tags_to_skip',
53
59
  :'css_classes_to_skip' => :'css_classes_to_skip',
54
60
  :'css_selectors_to_skip' => :'css_selectors_to_skip',
55
- :'embedding_model' => :'embedding_model'
61
+ :'embedding_model' => :'embedding_model',
62
+ :'url_paths_to_include' => :'url_paths_to_include',
63
+ :'url_paths_to_exclude' => :'url_paths_to_exclude'
56
64
  }
57
65
  end
58
66
 
@@ -76,7 +84,9 @@ module Carbon
76
84
  :'html_tags_to_skip' => :'Array<String>',
77
85
  :'css_classes_to_skip' => :'Array<String>',
78
86
  :'css_selectors_to_skip' => :'Array<String>',
79
- :'embedding_model' => :'EmbeddingGenerators'
87
+ :'embedding_model' => :'EmbeddingGenerators',
88
+ :'url_paths_to_include' => :'Array<String>',
89
+ :'url_paths_to_exclude' => :'Array<String>'
80
90
  }
81
91
  end
82
92
 
@@ -94,6 +104,8 @@ module Carbon
94
104
  :'html_tags_to_skip',
95
105
  :'css_classes_to_skip',
96
106
  :'css_selectors_to_skip',
107
+ :'url_paths_to_include',
108
+ :'url_paths_to_exclude'
97
109
  ])
98
110
  end
99
111
 
@@ -185,6 +197,18 @@ module Carbon
185
197
  else
186
198
  self.embedding_model = 'OPENAI'
187
199
  end
200
+
201
+ if attributes.key?(:'url_paths_to_include')
202
+ if (value = attributes[:'url_paths_to_include']).is_a?(Array)
203
+ self.url_paths_to_include = value
204
+ end
205
+ end
206
+
207
+ if attributes.key?(:'url_paths_to_exclude')
208
+ if (value = attributes[:'url_paths_to_exclude']).is_a?(Array)
209
+ self.url_paths_to_exclude = value
210
+ end
211
+ end
188
212
  end
189
213
 
190
214
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -199,6 +223,14 @@ module Carbon
199
223
  invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
200
224
  end
201
225
 
226
+ if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
227
+ invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
228
+ end
229
+
230
+ if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
231
+ invalid_properties.push('invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.')
232
+ end
233
+
202
234
  invalid_properties
203
235
  end
204
236
 
@@ -207,6 +239,8 @@ module Carbon
207
239
  def valid?
208
240
  return false if @url.nil?
209
241
  return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
242
+ return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
243
+ return false if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
210
244
  true
211
245
  end
212
246
 
@@ -220,6 +254,26 @@ module Carbon
220
254
  @max_pages_to_scrape = max_pages_to_scrape
221
255
  end
222
256
 
257
+ # Custom attribute writer method with validation
258
+ # @param [Object] url_paths_to_include Value to be assigned
259
+ def url_paths_to_include=(url_paths_to_include)
260
+ if !url_paths_to_include.nil? && url_paths_to_include.length > 10
261
+ fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
262
+ end
263
+
264
+ @url_paths_to_include = url_paths_to_include
265
+ end
266
+
267
+ # Custom attribute writer method with validation
268
+ # @param [Object] url_paths_to_exclude Value to be assigned
269
+ def url_paths_to_exclude=(url_paths_to_exclude)
270
+ if !url_paths_to_exclude.nil? && url_paths_to_exclude.length > 10
271
+ fail ArgumentError, 'invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.'
272
+ end
273
+
274
+ @url_paths_to_exclude = url_paths_to_exclude
275
+ end
276
+
223
277
  # Checks equality by comparing each attribute.
224
278
  # @param [Object] Object to be compared
225
279
  def ==(o)
@@ -237,7 +291,9 @@ module Carbon
237
291
  html_tags_to_skip == o.html_tags_to_skip &&
238
292
  css_classes_to_skip == o.css_classes_to_skip &&
239
293
  css_selectors_to_skip == o.css_selectors_to_skip &&
240
- embedding_model == o.embedding_model
294
+ embedding_model == o.embedding_model &&
295
+ url_paths_to_include == o.url_paths_to_include &&
296
+ url_paths_to_exclude == o.url_paths_to_exclude
241
297
  end
242
298
 
243
299
  # @see the `==` method
@@ -249,7 +305,7 @@ module Carbon
249
305
  # Calculates hash code according to all attributes.
250
306
  # @return [Integer] Hash code
251
307
  def hash
252
- [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
308
+ [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
253
309
  end
254
310
 
255
311
  # Builds the object from hash
@@ -187,7 +187,7 @@ module Carbon
187
187
  if attributes.key?(:'request_id')
188
188
  self.request_id = attributes[:'request_id']
189
189
  else
190
- self.request_id = 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae'
190
+ self.request_id = '6136b467-242e-49df-9478-d3e0cfdde299'
191
191
  end
192
192
 
193
193
  if attributes.key?(:'use_ocr')
@@ -182,7 +182,7 @@ module Carbon
182
182
  if attributes.key?(:'request_id')
183
183
  self.request_id = attributes[:'request_id']
184
184
  else
185
- self.request_id = '368135ce-5cca-4fb5-a19d-42b9a409af35'
185
+ self.request_id = '652297b9-0f55-46d8-869d-13a36e89e5da'
186
186
  end
187
187
 
188
188
  if attributes.key?(:'enable_file_picker')
@@ -39,6 +39,9 @@ module Carbon
39
39
 
40
40
  attr_accessor :embedding_model
41
41
 
42
+ # URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
43
+ attr_accessor :url_paths_to_include
44
+
42
45
  # Attribute mapping from ruby-style variable name to JSON key.
43
46
  def self.attribute_map
44
47
  {
@@ -55,7 +58,8 @@ module Carbon
55
58
  :'html_tags_to_skip' => :'html_tags_to_skip',
56
59
  :'css_classes_to_skip' => :'css_classes_to_skip',
57
60
  :'css_selectors_to_skip' => :'css_selectors_to_skip',
58
- :'embedding_model' => :'embedding_model'
61
+ :'embedding_model' => :'embedding_model',
62
+ :'url_paths_to_include' => :'url_paths_to_include'
59
63
  }
60
64
  end
61
65
 
@@ -80,7 +84,8 @@ module Carbon
80
84
  :'html_tags_to_skip' => :'Array<String>',
81
85
  :'css_classes_to_skip' => :'Array<String>',
82
86
  :'css_selectors_to_skip' => :'Array<String>',
83
- :'embedding_model' => :'EmbeddingGenerators'
87
+ :'embedding_model' => :'EmbeddingGenerators',
88
+ :'url_paths_to_include' => :'Array<String>'
84
89
  }
85
90
  end
86
91
 
@@ -99,6 +104,7 @@ module Carbon
99
104
  :'html_tags_to_skip',
100
105
  :'css_classes_to_skip',
101
106
  :'css_selectors_to_skip',
107
+ :'url_paths_to_include'
102
108
  ])
103
109
  end
104
110
 
@@ -198,6 +204,12 @@ module Carbon
198
204
  else
199
205
  self.embedding_model = 'OPENAI'
200
206
  end
207
+
208
+ if attributes.key?(:'url_paths_to_include')
209
+ if (value = attributes[:'url_paths_to_include']).is_a?(Array)
210
+ self.url_paths_to_include = value
211
+ end
212
+ end
201
213
  end
202
214
 
203
215
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -216,6 +228,10 @@ module Carbon
216
228
  invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
217
229
  end
218
230
 
231
+ if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
232
+ invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
233
+ end
234
+
219
235
  invalid_properties
220
236
  end
221
237
 
@@ -225,6 +241,7 @@ module Carbon
225
241
  return false if @url.nil?
226
242
  return false if !@recursion_depth.nil? && @recursion_depth < 0
227
243
  return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
244
+ return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
228
245
  true
229
246
  end
230
247
 
@@ -248,6 +265,16 @@ module Carbon
248
265
  @max_pages_to_scrape = max_pages_to_scrape
249
266
  end
250
267
 
268
+ # Custom attribute writer method with validation
269
+ # @param [Object] url_paths_to_include Value to be assigned
270
+ def url_paths_to_include=(url_paths_to_include)
271
+ if !url_paths_to_include.nil? && url_paths_to_include.length > 10
272
+ fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
273
+ end
274
+
275
+ @url_paths_to_include = url_paths_to_include
276
+ end
277
+
251
278
  # Checks equality by comparing each attribute.
252
279
  # @param [Object] Object to be compared
253
280
  def ==(o)
@@ -266,7 +293,8 @@ module Carbon
266
293
  html_tags_to_skip == o.html_tags_to_skip &&
267
294
  css_classes_to_skip == o.css_classes_to_skip &&
268
295
  css_selectors_to_skip == o.css_selectors_to_skip &&
269
- embedding_model == o.embedding_model
296
+ embedding_model == o.embedding_model &&
297
+ url_paths_to_include == o.url_paths_to_include
270
298
  end
271
299
 
272
300
  # @see the `==` method
@@ -278,7 +306,7 @@ module Carbon
278
306
  # Calculates hash code according to all attributes.
279
307
  # @return [Integer] Hash code
280
308
  def hash
281
- [tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
309
+ [tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include].hash
282
310
  end
283
311
 
284
312
  # Builds the object from hash
@@ -7,5 +7,5 @@ The version of the OpenAPI document: 1.0.0
7
7
  =end
8
8
 
9
9
  module Carbon
10
- VERSION = '0.2.2'
10
+ VERSION = '0.2.3'
11
11
  end
@@ -97,4 +97,16 @@ describe Carbon::SitemapScrapeRequest do
97
97
  end
98
98
  end
99
99
 
100
+ describe 'test attribute "url_paths_to_include"' do
101
+ it 'should work' do
102
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
103
+ end
104
+ end
105
+
106
+ describe 'test attribute "url_paths_to_exclude"' do
107
+ it 'should work' do
108
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
109
+ end
110
+ end
111
+
100
112
  end
@@ -103,4 +103,10 @@ describe Carbon::WebscrapeRequest do
103
103
  end
104
104
  end
105
105
 
106
+ describe 'test attribute "url_paths_to_include"' do
107
+ it 'should work' do
108
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
109
+ end
110
+ end
111
+
106
112
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carbon_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Konfig
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-01 00:00:00.000000000 Z
11
+ date: 2024-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday