carbon_ruby_sdk 0.2.2 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48a58cba1f17ea688e80ef5572f4944be16851ffe4ab0bed12010fe0f8b12f3e
4
- data.tar.gz: 637e68cdedc5092c2c3de53c87f99b732cb2ba2eea49f6e0296220cd741dc217
3
+ metadata.gz: f36c299546586666be2828957b23ea39f49cffb5f16a0934aab82aaf29d0b1c7
4
+ data.tar.gz: f3299f1ee7db27666209ef10411d43190e8769fba5a852aa609c6ab4721d4dec
5
5
  SHA512:
6
- metadata.gz: a3ca1057eaaa76eac67308c1f22a46828e1abc53732e8ab309d4d9fe53f87a59cd9712e9d541d2ef7f3b05854dbef675ab80fbf38ea7cd2b54b1cb2cab41f002
7
- data.tar.gz: 4354aafdeb4fdd30b24c74b266b0c839eadcba4776590a412b77f54bff85465e49f734e538bc634f3d176d20c1157932a9dc7470acb3cbebc3572619bdceb0e2
6
+ metadata.gz: 42689eebf092d715da57445f2ec9a2e98dd367a6f610b34de15928f8cf7e2f0333a9492bddb40ec5996ff84cbe04907bbad0b83e9e742140af25fd524b208a0e
7
+ data.tar.gz: 22d020e4ea8811d8e5595b62eb3ea3917df41a10e5c1ee6e0d251eb946061b4a24ff1f2f20b579e3be4ae1e91dd7c319c0107e55290b88ad64c96f3662425293
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- carbon_ruby_sdk (0.2.1)
4
+ carbon_ruby_sdk (0.2.4)
5
5
  faraday (>= 1.0.1, < 3.0)
6
6
  faraday-multipart (~> 1.0, >= 1.0.4)
7
7
 
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  Connect external data to LLMs, no matter the source.
8
8
 
9
- [![npm](https://img.shields.io/badge/gem-v0.2.2-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.2)
9
+ [![npm](https://img.shields.io/badge/gem-v0.2.4-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.4)
10
10
 
11
11
  </div>
12
12
 
@@ -86,7 +86,7 @@ Connect external data to LLMs, no matter the source.
86
86
  Add to Gemfile:
87
87
 
88
88
  ```ruby
89
- gem 'carbon_ruby_sdk', '~> 0.2.2'
89
+ gem 'carbon_ruby_sdk', '~> 0.2.4'
90
90
  ```
91
91
 
92
92
  ## Getting Started<a id="getting-started"></a>
@@ -999,6 +999,7 @@ result = carbon.files.upload(
999
999
  parse_pdf_tables_with_ocr: false,
1000
1000
  detect_audio_language: false,
1001
1001
  media_type: "TEXT",
1002
+ split_rows: false,
1002
1003
  )
1003
1004
  p result
1004
1005
  ```
@@ -1048,6 +1049,10 @@ Whether to automatically detect the language of the uploaded audio file.
1048
1049
  The media type of the file. If not provided, it will be inferred from the file
1049
1050
  extension.
1050
1051
 
1052
+ ##### split_rows: `Boolean`<a id="split_rows-boolean"></a>
1053
+ Whether to split tabular rows into chunks. Currently only valid for CSV, TSV,
1054
+ and XLSX files.
1055
+
1051
1056
  #### 🔄 Return<a id="🔄-return"></a>
1052
1057
 
1053
1058
  [UserFile](./lib/carbon_ruby_sdk/models/user_file.rb)
@@ -1083,6 +1088,7 @@ result = carbon.files.upload_from_url(
1083
1088
  parse_pdf_tables_with_ocr: false,
1084
1089
  detect_audio_language: false,
1085
1090
  media_type: "TEXT",
1091
+ split_rows: false,
1086
1092
  )
1087
1093
  p result
1088
1094
  ```
@@ -1105,6 +1111,7 @@ Number of objects per chunk. For csv, tsv, xlsx, and json files only.
1105
1111
  ##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
1106
1112
  ##### detect_audio_language: `Boolean`<a id="detect_audio_language-boolean"></a>
1107
1113
  ##### media_type: [`FileContentTypesNullable`](./lib/carbon_ruby_sdk/models/file_content_types_nullable.rb)<a id="media_type-filecontenttypesnullablelibcarbon_ruby_sdkmodelsfile_content_types_nullablerb"></a>
1114
+ ##### split_rows: `Boolean`<a id="split_rows-boolean"></a>
1108
1115
  #### 🔄 Return<a id="🔄-return"></a>
1109
1116
 
1110
1117
  [UserFile](./lib/carbon_ruby_sdk/models/user_file.rb)
@@ -1240,7 +1247,7 @@ result = carbon.integrations.connect_data_source(
1240
1247
  "prepend_filename_to_chunks" => false,
1241
1248
  "sync_files_on_connection" => true,
1242
1249
  "set_page_as_boundary" => false,
1243
- "request_id" => "368135ce-5cca-4fb5-a19d-42b9a409af35",
1250
+ "request_id" => "07144230-657d-40ab-9fb5-89095bf3fc65",
1244
1251
  "enable_file_picker" => true,
1245
1252
  "sync_source_items" => true,
1246
1253
  "incremental_sync" => false,
@@ -1459,7 +1466,7 @@ result = carbon.integrations.get_oauth_url(
1459
1466
  set_page_as_boundary: false,
1460
1467
  data_source_id: 1,
1461
1468
  connecting_new_account: false,
1462
- request_id: "2e662fad-1193-4482-a2d7-ec7b821a9d2b",
1469
+ request_id: "b7620173-662c-4ae7-bb61-2e6ffd8619f5",
1463
1470
  use_ocr: false,
1464
1471
  parse_pdf_tables_with_ocr: false,
1465
1472
  enable_file_picker: true,
@@ -1519,7 +1526,7 @@ Enable OCR for files that support it. Supported formats: pdf
1519
1526
  ##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
1520
1527
  ##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
1521
1528
  Enable integration's file picker for sources that support it. Supported sources:
1522
- SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
1529
+ DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
1523
1530
 
1524
1531
  ##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
1525
1532
  Enabling this flag will fetch all available content from the source to be listed
@@ -1781,7 +1788,7 @@ result = carbon.integrations.sync_confluence(
1781
1788
  prepend_filename_to_chunks: false,
1782
1789
  max_items_per_chunk: 1,
1783
1790
  set_page_as_boundary: false,
1784
- request_id: "dd2130b5-0f9f-4f3a-b450-f3fa458763ae",
1791
+ request_id: "b2c5c595-0cfb-4ec3-96ff-87158c2b6207",
1785
1792
  use_ocr: false,
1786
1793
  parse_pdf_tables_with_ocr: false,
1787
1794
  incremental_sync: false,
@@ -1884,7 +1891,7 @@ result = carbon.integrations.sync_files(
1884
1891
  prepend_filename_to_chunks: false,
1885
1892
  max_items_per_chunk: 1,
1886
1893
  set_page_as_boundary: false,
1887
- request_id: "dd2130b5-0f9f-4f3a-b450-f3fa458763ae",
1894
+ request_id: "b2c5c595-0cfb-4ec3-96ff-87158c2b6207",
1888
1895
  use_ocr: false,
1889
1896
  parse_pdf_tables_with_ocr: false,
1890
1897
  incremental_sync: false,
@@ -2741,6 +2748,8 @@ result = carbon.utilities.scrape_sitemap(
2741
2748
  css_classes_to_skip: [],
2742
2749
  css_selectors_to_skip: [],
2743
2750
  embedding_model: "OPENAI",
2751
+ url_paths_to_include: [],
2752
+ url_paths_to_exclude: [],
2744
2753
  )
2745
2754
  p result
2746
2755
  ```
@@ -2760,6 +2769,16 @@ p result
2760
2769
  ##### css_classes_to_skip: Array<`String`><a id="css_classes_to_skip-array"></a>
2761
2770
  ##### css_selectors_to_skip: Array<`String`><a id="css_selectors_to_skip-array"></a>
2762
2771
  ##### embedding_model: [`EmbeddingGenerators`](./lib/carbon_ruby_sdk/models/embedding_generators.rb)<a id="embedding_model-embeddinggeneratorslibcarbon_ruby_sdkmodelsembedding_generatorsrb"></a>
2772
+ ##### url_paths_to_include: Array<`String`><a id="url_paths_to_include-array"></a>
2773
+ URL subpaths or directories that you want to include. For example if you want to
2774
+ only include URLs that start with /questions in stackoverflow.com, you will add
2775
+ /questions/ in this input
2776
+
2777
+ ##### url_paths_to_exclude: Array<`String`><a id="url_paths_to_exclude-array"></a>
2778
+ URL subpaths or directories that you want to exclude. For example if you want to
2779
+ exclude URLs that start with /questions in stackoverflow.com, you will add
2780
+ /questions/ in this input
2781
+
2763
2782
  #### 🌐 Endpoint<a id="🌐-endpoint"></a>
2764
2783
 
2765
2784
  `/scrape_sitemap` `POST`
@@ -2799,6 +2818,7 @@ result = carbon.utilities.scrape_web(
2799
2818
  "css_classes_to_skip" => [],
2800
2819
  "css_selectors_to_skip" => [],
2801
2820
  "embedding_model" => "OPENAI",
2821
+ "url_paths_to_include" => [],
2802
2822
  }
2803
2823
  ],
2804
2824
  )
@@ -1174,9 +1174,10 @@ module Carbon
1174
1174
  # @param parse_pdf_tables_with_ocr [Boolean] Whether to use rich table parsing when `use_ocr` is enabled.
1175
1175
  # @param detect_audio_language [Boolean] Whether to automatically detect the language of the uploaded audio file.
1176
1176
  # @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
1177
+ # @param split_rows [Boolean] Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.
1177
1178
  # @param body [BodyCreateUploadFileUploadfilePost]
1178
1179
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1179
- def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
1180
+ def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
1180
1181
  _body = {}
1181
1182
  _body[:file] = file if file != SENTINEL
1182
1183
  body_create_upload_file_uploadfile_post = _body
@@ -1192,6 +1193,7 @@ module Carbon
1192
1193
  extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
1193
1194
  extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
1194
1195
  extra[:media_type] = media_type if media_type != SENTINEL
1196
+ extra[:split_rows] = split_rows if split_rows != SENTINEL
1195
1197
  api_response = upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra)
1196
1198
  api_response.data
1197
1199
  end
@@ -1237,9 +1239,10 @@ module Carbon
1237
1239
  # @param parse_pdf_tables_with_ocr [Boolean] Whether to use rich table parsing when `use_ocr` is enabled.
1238
1240
  # @param detect_audio_language [Boolean] Whether to automatically detect the language of the uploaded audio file.
1239
1241
  # @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
1242
+ # @param split_rows [Boolean] Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.
1240
1243
  # @param body [BodyCreateUploadFileUploadfilePost]
1241
1244
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1242
- def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
1245
+ def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
1243
1246
  _body = {}
1244
1247
  _body[:file] = file if file != SENTINEL
1245
1248
  body_create_upload_file_uploadfile_post = _body
@@ -1255,6 +1258,7 @@ module Carbon
1255
1258
  extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
1256
1259
  extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
1257
1260
  extra[:media_type] = media_type if media_type != SENTINEL
1261
+ extra[:split_rows] = split_rows if split_rows != SENTINEL
1258
1262
  upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra)
1259
1263
  end
1260
1264
 
@@ -1275,6 +1279,7 @@ module Carbon
1275
1279
  # @option opts [Boolean] :parse_pdf_tables_with_ocr Whether to use rich table parsing when `use_ocr` is enabled. (default to false)
1276
1280
  # @option opts [Boolean] :detect_audio_language Whether to automatically detect the language of the uploaded audio file. (default to false)
1277
1281
  # @option opts [FileContentTypesNullable] :media_type The media type of the file. If not provided, it will be inferred from the file extension.
1282
+ # @option opts [Boolean] :split_rows Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files. (default to false)
1278
1283
  # @return [UserFile]
1279
1284
  private def upload_impl(file, body_create_upload_file_uploadfile_post, opts = {})
1280
1285
  data, _status_code, _headers = upload_with_http_info(file, body_create_upload_file_uploadfile_post, opts)
@@ -1298,6 +1303,7 @@ module Carbon
1298
1303
  # @option opts [Boolean] :parse_pdf_tables_with_ocr Whether to use rich table parsing when `use_ocr` is enabled. (default to false)
1299
1304
  # @option opts [Boolean] :detect_audio_language Whether to automatically detect the language of the uploaded audio file. (default to false)
1300
1305
  # @option opts [FileContentTypesNullable] :media_type The media type of the file. If not provided, it will be inferred from the file extension.
1306
+ # @option opts [Boolean] :split_rows Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files. (default to false)
1301
1307
  # @return [APIResponse] data is UserFile, status code, headers and response
1302
1308
  private def upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, opts = {})
1303
1309
  if @api_client.config.debugging
@@ -1328,6 +1334,7 @@ module Carbon
1328
1334
  query_params[:'parse_pdf_tables_with_ocr'] = opts[:'parse_pdf_tables_with_ocr'] if !opts[:'parse_pdf_tables_with_ocr'].nil?
1329
1335
  query_params[:'detect_audio_language'] = opts[:'detect_audio_language'] if !opts[:'detect_audio_language'].nil?
1330
1336
  query_params[:'media_type'] = opts[:'media_type'] if !opts[:'media_type'].nil?
1337
+ query_params[:'split_rows'] = opts[:'split_rows'] if !opts[:'split_rows'].nil?
1331
1338
 
1332
1339
  # header parameters
1333
1340
  header_params = opts[:header_params] || {}
@@ -1386,9 +1393,10 @@ module Carbon
1386
1393
  # @param parse_pdf_tables_with_ocr [Boolean]
1387
1394
  # @param detect_audio_language [Boolean]
1388
1395
  # @param media_type [FileContentTypesNullable]
1396
+ # @param split_rows [Boolean]
1389
1397
  # @param body [UploadFileFromUrlInput]
1390
1398
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1391
- def upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
1399
+ def upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
1392
1400
  _body = {}
1393
1401
  _body[:url] = url if url != SENTINEL
1394
1402
  _body[:file_name] = file_name if file_name != SENTINEL
@@ -1404,6 +1412,7 @@ module Carbon
1404
1412
  _body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
1405
1413
  _body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
1406
1414
  _body[:media_type] = media_type if media_type != SENTINEL
1415
+ _body[:split_rows] = split_rows if split_rows != SENTINEL
1407
1416
  upload_file_from_url_input = _body
1408
1417
  api_response = upload_from_url_with_http_info_impl(upload_file_from_url_input, extra)
1409
1418
  api_response.data
@@ -1425,9 +1434,10 @@ module Carbon
1425
1434
  # @param parse_pdf_tables_with_ocr [Boolean]
1426
1435
  # @param detect_audio_language [Boolean]
1427
1436
  # @param media_type [FileContentTypesNullable]
1437
+ # @param split_rows [Boolean]
1428
1438
  # @param body [UploadFileFromUrlInput]
1429
1439
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1430
- def upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
1440
+ def upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
1431
1441
  _body = {}
1432
1442
  _body[:url] = url if url != SENTINEL
1433
1443
  _body[:file_name] = file_name if file_name != SENTINEL
@@ -1443,6 +1453,7 @@ module Carbon
1443
1453
  _body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
1444
1454
  _body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
1445
1455
  _body[:media_type] = media_type if media_type != SENTINEL
1456
+ _body[:split_rows] = split_rows if split_rows != SENTINEL
1446
1457
  upload_file_from_url_input = _body
1447
1458
  upload_from_url_with_http_info_impl(upload_file_from_url_input, extra)
1448
1459
  end
@@ -653,13 +653,13 @@ module Carbon
653
653
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
654
654
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
655
655
  # @param parse_pdf_tables_with_ocr [Boolean]
656
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
656
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
657
657
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
658
658
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
659
659
  # @param file_sync_config [FileSyncConfigNullable]
660
660
  # @param body [OAuthURLRequest]
661
661
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
662
- def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '2e662fad-1193-4482-a2d7-ec7b821a9d2b', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
662
+ def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b7620173-662c-4ae7-bb61-2e6ffd8619f5', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
663
663
  _body = {}
664
664
  _body[:tags] = tags if tags != SENTINEL
665
665
  _body[:scope] = scope if scope != SENTINEL
@@ -721,13 +721,13 @@ module Carbon
721
721
  # @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
722
722
  # @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
723
723
  # @param parse_pdf_tables_with_ocr [Boolean]
724
- # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
724
+ # @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
725
725
  # @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
726
726
  # @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
727
727
  # @param file_sync_config [FileSyncConfigNullable]
728
728
  # @param body [OAuthURLRequest]
729
729
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
730
- def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '2e662fad-1193-4482-a2d7-ec7b821a9d2b', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
730
+ def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b7620173-662c-4ae7-bb61-2e6ffd8619f5', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
731
731
  _body = {}
732
732
  _body[:tags] = tags if tags != SENTINEL
733
733
  _body[:scope] = scope if scope != SENTINEL
@@ -1523,7 +1523,7 @@ module Carbon
1523
1523
  # @param file_sync_config [FileSyncConfigNullable]
1524
1524
  # @param body [SyncFilesRequest]
1525
1525
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1526
- def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1526
+ def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1527
1527
  _body = {}
1528
1528
  _body[:tags] = tags if tags != SENTINEL
1529
1529
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1571,7 +1571,7 @@ module Carbon
1571
1571
  # @param file_sync_config [FileSyncConfigNullable]
1572
1572
  # @param body [SyncFilesRequest]
1573
1573
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1574
- def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1574
+ def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1575
1575
  _body = {}
1576
1576
  _body[:tags] = tags if tags != SENTINEL
1577
1577
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1779,7 +1779,7 @@ module Carbon
1779
1779
  # @param file_sync_config [FileSyncConfigNullable]
1780
1780
  # @param body [SyncFilesRequest]
1781
1781
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1782
- def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1782
+ def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1783
1783
  _body = {}
1784
1784
  _body[:tags] = tags if tags != SENTINEL
1785
1785
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -1827,7 +1827,7 @@ module Carbon
1827
1827
  # @param file_sync_config [FileSyncConfigNullable]
1828
1828
  # @param body [SyncFilesRequest]
1829
1829
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
1830
- def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1830
+ def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
1831
1831
  _body = {}
1832
1832
  _body[:tags] = tags if tags != SENTINEL
1833
1833
  _body[:data_source_id] = data_source_id if data_source_id != SENTINEL
@@ -342,9 +342,11 @@ module Carbon
342
342
  # @param css_classes_to_skip [Array<String>]
343
343
  # @param css_selectors_to_skip [Array<String>]
344
344
  # @param embedding_model [EmbeddingGenerators]
345
+ # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
346
+ # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
345
347
  # @param body [SitemapScrapeRequest]
346
348
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
347
- def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
349
+ def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
348
350
  _body = {}
349
351
  _body[:tags] = tags if tags != SENTINEL
350
352
  _body[:url] = url if url != SENTINEL
@@ -359,6 +361,8 @@ module Carbon
359
361
  _body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
360
362
  _body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
361
363
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
364
+ _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
365
+ _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
362
366
  sitemap_scrape_request = _body
363
367
  api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
364
368
  api_response.data
@@ -387,9 +391,11 @@ module Carbon
387
391
  # @param css_classes_to_skip [Array<String>]
388
392
  # @param css_selectors_to_skip [Array<String>]
389
393
  # @param embedding_model [EmbeddingGenerators]
394
+ # @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
395
+ # @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
390
396
  # @param body [SitemapScrapeRequest]
391
397
  # @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
392
- def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
398
+ def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
393
399
  _body = {}
394
400
  _body[:tags] = tags if tags != SENTINEL
395
401
  _body[:url] = url if url != SENTINEL
@@ -404,6 +410,8 @@ module Carbon
404
410
  _body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
405
411
  _body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
406
412
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
413
+ _body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
414
+ _body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
407
415
  sitemap_scrape_request = _body
408
416
  scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
409
417
  end
@@ -61,7 +61,7 @@ module Carbon
61
61
 
62
62
  attr_accessor :parse_pdf_tables_with_ocr
63
63
 
64
- # Enable integration's file picker for sources that support it. Supported sources: SHAREPOINT, BOX, ONEDRIVE, GOOGLE_DRIVE, DROPBOX
64
+ # Enable integration's file picker for sources that support it. Supported sources: DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
65
65
  attr_accessor :enable_file_picker
66
66
 
67
67
  # Enabling this flag will fetch all available content from the source to be listed via list items endpoint
@@ -279,7 +279,7 @@ module Carbon
279
279
  if attributes.key?(:'request_id')
280
280
  self.request_id = attributes[:'request_id']
281
281
  else
282
- self.request_id = '2e662fad-1193-4482-a2d7-ec7b821a9d2b'
282
+ self.request_id = 'b7620173-662c-4ae7-bb61-2e6ffd8619f5'
283
283
  end
284
284
 
285
285
  if attributes.key?(:'use_ocr')
@@ -37,6 +37,12 @@ module Carbon
37
37
 
38
38
  attr_accessor :embedding_model
39
39
 
40
+ # URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
41
+ attr_accessor :url_paths_to_include
42
+
43
+ # URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
44
+ attr_accessor :url_paths_to_exclude
45
+
40
46
  # Attribute mapping from ruby-style variable name to JSON key.
41
47
  def self.attribute_map
42
48
  {
@@ -52,7 +58,9 @@ module Carbon
52
58
  :'html_tags_to_skip' => :'html_tags_to_skip',
53
59
  :'css_classes_to_skip' => :'css_classes_to_skip',
54
60
  :'css_selectors_to_skip' => :'css_selectors_to_skip',
55
- :'embedding_model' => :'embedding_model'
61
+ :'embedding_model' => :'embedding_model',
62
+ :'url_paths_to_include' => :'url_paths_to_include',
63
+ :'url_paths_to_exclude' => :'url_paths_to_exclude'
56
64
  }
57
65
  end
58
66
 
@@ -76,7 +84,9 @@ module Carbon
76
84
  :'html_tags_to_skip' => :'Array<String>',
77
85
  :'css_classes_to_skip' => :'Array<String>',
78
86
  :'css_selectors_to_skip' => :'Array<String>',
79
- :'embedding_model' => :'EmbeddingGenerators'
87
+ :'embedding_model' => :'EmbeddingGenerators',
88
+ :'url_paths_to_include' => :'Array<String>',
89
+ :'url_paths_to_exclude' => :'Array<String>'
80
90
  }
81
91
  end
82
92
 
@@ -94,6 +104,8 @@ module Carbon
94
104
  :'html_tags_to_skip',
95
105
  :'css_classes_to_skip',
96
106
  :'css_selectors_to_skip',
107
+ :'url_paths_to_include',
108
+ :'url_paths_to_exclude'
97
109
  ])
98
110
  end
99
111
 
@@ -185,6 +197,18 @@ module Carbon
185
197
  else
186
198
  self.embedding_model = 'OPENAI'
187
199
  end
200
+
201
+ if attributes.key?(:'url_paths_to_include')
202
+ if (value = attributes[:'url_paths_to_include']).is_a?(Array)
203
+ self.url_paths_to_include = value
204
+ end
205
+ end
206
+
207
+ if attributes.key?(:'url_paths_to_exclude')
208
+ if (value = attributes[:'url_paths_to_exclude']).is_a?(Array)
209
+ self.url_paths_to_exclude = value
210
+ end
211
+ end
188
212
  end
189
213
 
190
214
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -199,6 +223,14 @@ module Carbon
199
223
  invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
200
224
  end
201
225
 
226
+ if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
227
+ invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
228
+ end
229
+
230
+ if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
231
+ invalid_properties.push('invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.')
232
+ end
233
+
202
234
  invalid_properties
203
235
  end
204
236
 
@@ -207,6 +239,8 @@ module Carbon
207
239
  def valid?
208
240
  return false if @url.nil?
209
241
  return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
242
+ return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
243
+ return false if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
210
244
  true
211
245
  end
212
246
 
@@ -220,6 +254,26 @@ module Carbon
220
254
  @max_pages_to_scrape = max_pages_to_scrape
221
255
  end
222
256
 
257
+ # Custom attribute writer method with validation
258
+ # @param [Object] url_paths_to_include Value to be assigned
259
+ def url_paths_to_include=(url_paths_to_include)
260
+ if !url_paths_to_include.nil? && url_paths_to_include.length > 10
261
+ fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
262
+ end
263
+
264
+ @url_paths_to_include = url_paths_to_include
265
+ end
266
+
267
+ # Custom attribute writer method with validation
268
+ # @param [Object] url_paths_to_exclude Value to be assigned
269
+ def url_paths_to_exclude=(url_paths_to_exclude)
270
+ if !url_paths_to_exclude.nil? && url_paths_to_exclude.length > 10
271
+ fail ArgumentError, 'invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.'
272
+ end
273
+
274
+ @url_paths_to_exclude = url_paths_to_exclude
275
+ end
276
+
223
277
  # Checks equality by comparing each attribute.
224
278
  # @param [Object] Object to be compared
225
279
  def ==(o)
@@ -237,7 +291,9 @@ module Carbon
237
291
  html_tags_to_skip == o.html_tags_to_skip &&
238
292
  css_classes_to_skip == o.css_classes_to_skip &&
239
293
  css_selectors_to_skip == o.css_selectors_to_skip &&
240
- embedding_model == o.embedding_model
294
+ embedding_model == o.embedding_model &&
295
+ url_paths_to_include == o.url_paths_to_include &&
296
+ url_paths_to_exclude == o.url_paths_to_exclude
241
297
  end
242
298
 
243
299
  # @see the `==` method
@@ -249,7 +305,7 @@ module Carbon
249
305
  # Calculates hash code according to all attributes.
250
306
  # @return [Integer] Hash code
251
307
  def hash
252
- [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
308
+ [tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
253
309
  end
254
310
 
255
311
  # Builds the object from hash
@@ -187,7 +187,7 @@ module Carbon
187
187
  if attributes.key?(:'request_id')
188
188
  self.request_id = attributes[:'request_id']
189
189
  else
190
- self.request_id = 'dd2130b5-0f9f-4f3a-b450-f3fa458763ae'
190
+ self.request_id = 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207'
191
191
  end
192
192
 
193
193
  if attributes.key?(:'use_ocr')
@@ -182,7 +182,7 @@ module Carbon
182
182
  if attributes.key?(:'request_id')
183
183
  self.request_id = attributes[:'request_id']
184
184
  else
185
- self.request_id = '368135ce-5cca-4fb5-a19d-42b9a409af35'
185
+ self.request_id = '07144230-657d-40ab-9fb5-89095bf3fc65'
186
186
  end
187
187
 
188
188
  if attributes.key?(:'enable_file_picker')
@@ -40,6 +40,8 @@ module Carbon
40
40
 
41
41
  attr_accessor :media_type
42
42
 
43
+ attr_accessor :split_rows
44
+
43
45
  # Attribute mapping from ruby-style variable name to JSON key.
44
46
  def self.attribute_map
45
47
  {
@@ -56,7 +58,8 @@ module Carbon
56
58
  :'max_items_per_chunk' => :'max_items_per_chunk',
57
59
  :'parse_pdf_tables_with_ocr' => :'parse_pdf_tables_with_ocr',
58
60
  :'detect_audio_language' => :'detect_audio_language',
59
- :'media_type' => :'media_type'
61
+ :'media_type' => :'media_type',
62
+ :'split_rows' => :'split_rows'
60
63
  }
61
64
  end
62
65
 
@@ -81,7 +84,8 @@ module Carbon
81
84
  :'max_items_per_chunk' => :'Integer',
82
85
  :'parse_pdf_tables_with_ocr' => :'Boolean',
83
86
  :'detect_audio_language' => :'Boolean',
84
- :'media_type' => :'FileContentTypesNullable'
87
+ :'media_type' => :'FileContentTypesNullable',
88
+ :'split_rows' => :'Boolean'
85
89
  }
86
90
  end
87
91
 
@@ -92,7 +96,7 @@ module Carbon
92
96
  :'chunk_size',
93
97
  :'chunk_overlap',
94
98
  :'max_items_per_chunk',
95
- :'media_type'
99
+ :'media_type',
96
100
  ])
97
101
  end
98
102
 
@@ -182,6 +186,12 @@ module Carbon
182
186
  if attributes.key?(:'media_type')
183
187
  self.media_type = attributes[:'media_type']
184
188
  end
189
+
190
+ if attributes.key?(:'split_rows')
191
+ self.split_rows = attributes[:'split_rows']
192
+ else
193
+ self.split_rows = false
194
+ end
185
195
  end
186
196
 
187
197
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -220,7 +230,8 @@ module Carbon
220
230
  max_items_per_chunk == o.max_items_per_chunk &&
221
231
  parse_pdf_tables_with_ocr == o.parse_pdf_tables_with_ocr &&
222
232
  detect_audio_language == o.detect_audio_language &&
223
- media_type == o.media_type
233
+ media_type == o.media_type &&
234
+ split_rows == o.split_rows
224
235
  end
225
236
 
226
237
  # @see the `==` method
@@ -232,7 +243,7 @@ module Carbon
232
243
  # Calculates hash code according to all attributes.
233
244
  # @return [Integer] Hash code
234
245
  def hash
235
- [url, file_name, chunk_size, chunk_overlap, skip_embedding_generation, set_page_as_boundary, embedding_model, generate_sparse_vectors, use_textract, prepend_filename_to_chunks, max_items_per_chunk, parse_pdf_tables_with_ocr, detect_audio_language, media_type].hash
246
+ [url, file_name, chunk_size, chunk_overlap, skip_embedding_generation, set_page_as_boundary, embedding_model, generate_sparse_vectors, use_textract, prepend_filename_to_chunks, max_items_per_chunk, parse_pdf_tables_with_ocr, detect_audio_language, media_type, split_rows].hash
236
247
  end
237
248
 
238
249
  # Builds the object from hash
@@ -39,6 +39,9 @@ module Carbon
39
39
 
40
40
  attr_accessor :embedding_model
41
41
 
42
+ # URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
43
+ attr_accessor :url_paths_to_include
44
+
42
45
  # Attribute mapping from ruby-style variable name to JSON key.
43
46
  def self.attribute_map
44
47
  {
@@ -55,7 +58,8 @@ module Carbon
55
58
  :'html_tags_to_skip' => :'html_tags_to_skip',
56
59
  :'css_classes_to_skip' => :'css_classes_to_skip',
57
60
  :'css_selectors_to_skip' => :'css_selectors_to_skip',
58
- :'embedding_model' => :'embedding_model'
61
+ :'embedding_model' => :'embedding_model',
62
+ :'url_paths_to_include' => :'url_paths_to_include'
59
63
  }
60
64
  end
61
65
 
@@ -80,7 +84,8 @@ module Carbon
80
84
  :'html_tags_to_skip' => :'Array<String>',
81
85
  :'css_classes_to_skip' => :'Array<String>',
82
86
  :'css_selectors_to_skip' => :'Array<String>',
83
- :'embedding_model' => :'EmbeddingGenerators'
87
+ :'embedding_model' => :'EmbeddingGenerators',
88
+ :'url_paths_to_include' => :'Array<String>'
84
89
  }
85
90
  end
86
91
 
@@ -99,6 +104,7 @@ module Carbon
99
104
  :'html_tags_to_skip',
100
105
  :'css_classes_to_skip',
101
106
  :'css_selectors_to_skip',
107
+ :'url_paths_to_include'
102
108
  ])
103
109
  end
104
110
 
@@ -198,6 +204,12 @@ module Carbon
198
204
  else
199
205
  self.embedding_model = 'OPENAI'
200
206
  end
207
+
208
+ if attributes.key?(:'url_paths_to_include')
209
+ if (value = attributes[:'url_paths_to_include']).is_a?(Array)
210
+ self.url_paths_to_include = value
211
+ end
212
+ end
201
213
  end
202
214
 
203
215
  # Show invalid properties with the reasons. Usually used together with valid?
@@ -216,6 +228,10 @@ module Carbon
216
228
  invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
217
229
  end
218
230
 
231
+ if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
232
+ invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
233
+ end
234
+
219
235
  invalid_properties
220
236
  end
221
237
 
@@ -225,6 +241,7 @@ module Carbon
225
241
  return false if @url.nil?
226
242
  return false if !@recursion_depth.nil? && @recursion_depth < 0
227
243
  return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
244
+ return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
228
245
  true
229
246
  end
230
247
 
@@ -248,6 +265,16 @@ module Carbon
248
265
  @max_pages_to_scrape = max_pages_to_scrape
249
266
  end
250
267
 
268
+ # Custom attribute writer method with validation
269
+ # @param [Object] url_paths_to_include Value to be assigned
270
+ def url_paths_to_include=(url_paths_to_include)
271
+ if !url_paths_to_include.nil? && url_paths_to_include.length > 10
272
+ fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
273
+ end
274
+
275
+ @url_paths_to_include = url_paths_to_include
276
+ end
277
+
251
278
  # Checks equality by comparing each attribute.
252
279
  # @param [Object] Object to be compared
253
280
  def ==(o)
@@ -266,7 +293,8 @@ module Carbon
266
293
  html_tags_to_skip == o.html_tags_to_skip &&
267
294
  css_classes_to_skip == o.css_classes_to_skip &&
268
295
  css_selectors_to_skip == o.css_selectors_to_skip &&
269
- embedding_model == o.embedding_model
296
+ embedding_model == o.embedding_model &&
297
+ url_paths_to_include == o.url_paths_to_include
270
298
  end
271
299
 
272
300
  # @see the `==` method
@@ -278,7 +306,7 @@ module Carbon
278
306
  # Calculates hash code according to all attributes.
279
307
  # @return [Integer] Hash code
280
308
  def hash
281
- [tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
309
+ [tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include].hash
282
310
  end
283
311
 
284
312
  # Builds the object from hash
@@ -7,5 +7,5 @@ The version of the OpenAPI document: 1.0.0
7
7
  =end
8
8
 
9
9
  module Carbon
10
- VERSION = '0.2.2'
10
+ VERSION = '0.2.4'
11
11
  end
@@ -159,6 +159,7 @@ describe 'FilesApi' do
159
159
  # @option opts [Boolean] :parse_pdf_tables_with_ocr Whether to use rich table parsing when &#x60;use_ocr&#x60; is enabled.
160
160
  # @option opts [Boolean] :detect_audio_language Whether to automatically detect the language of the uploaded audio file.
161
161
  # @option opts [FileContentTypesNullable] :media_type The media type of the file. If not provided, it will be inferred from the file extension.
162
+ # @option opts [Boolean] :split_rows Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.
162
163
  # @return [UserFile]
163
164
  describe 'upload test' do
164
165
  it 'should work' do
@@ -97,4 +97,16 @@ describe Carbon::SitemapScrapeRequest do
97
97
  end
98
98
  end
99
99
 
100
+ describe 'test attribute "url_paths_to_include"' do
101
+ it 'should work' do
102
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
103
+ end
104
+ end
105
+
106
+ describe 'test attribute "url_paths_to_exclude"' do
107
+ it 'should work' do
108
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
109
+ end
110
+ end
111
+
100
112
  end
@@ -103,4 +103,10 @@ describe Carbon::UploadFileFromUrlInput do
103
103
  end
104
104
  end
105
105
 
106
+ describe 'test attribute "split_rows"' do
107
+ it 'should work' do
108
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
109
+ end
110
+ end
111
+
106
112
  end
@@ -103,4 +103,10 @@ describe Carbon::WebscrapeRequest do
103
103
  end
104
104
  end
105
105
 
106
+ describe 'test attribute "url_paths_to_include"' do
107
+ it 'should work' do
108
+ # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
109
+ end
110
+ end
111
+
106
112
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carbon_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Konfig
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-01 00:00:00.000000000 Z
11
+ date: 2024-06-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday