carbon_ruby_sdk 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +27 -7
- data/lib/carbon_ruby_sdk/api/files_api.rb +15 -4
- data/lib/carbon_ruby_sdk/api/integrations_api.rb +8 -8
- data/lib/carbon_ruby_sdk/api/utilities_api.rb +10 -2
- data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb +2 -2
- data/lib/carbon_ruby_sdk/models/sitemap_scrape_request.rb +60 -4
- data/lib/carbon_ruby_sdk/models/sync_files_request.rb +1 -1
- data/lib/carbon_ruby_sdk/models/sync_options.rb +1 -1
- data/lib/carbon_ruby_sdk/models/upload_file_from_url_input.rb +16 -5
- data/lib/carbon_ruby_sdk/models/webscrape_request.rb +32 -4
- data/lib/carbon_ruby_sdk/version.rb +1 -1
- data/spec/api/files_api_spec.rb +1 -0
- data/spec/models/sitemap_scrape_request_spec.rb +12 -0
- data/spec/models/upload_file_from_url_input_spec.rb +6 -0
- data/spec/models/webscrape_request_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f36c299546586666be2828957b23ea39f49cffb5f16a0934aab82aaf29d0b1c7
|
4
|
+
data.tar.gz: f3299f1ee7db27666209ef10411d43190e8769fba5a852aa609c6ab4721d4dec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42689eebf092d715da57445f2ec9a2e98dd367a6f610b34de15928f8cf7e2f0333a9492bddb40ec5996ff84cbe04907bbad0b83e9e742140af25fd524b208a0e
|
7
|
+
data.tar.gz: 22d020e4ea8811d8e5595b62eb3ea3917df41a10e5c1ee6e0d251eb946061b4a24ff1f2f20b579e3be4ae1e91dd7c319c0107e55290b88ad64c96f3662425293
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Connect external data to LLMs, no matter the source.
|
8
8
|
|
9
|
-
[![npm](https://img.shields.io/badge/gem-v0.2.
|
9
|
+
[![npm](https://img.shields.io/badge/gem-v0.2.4-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.4)
|
10
10
|
|
11
11
|
</div>
|
12
12
|
|
@@ -86,7 +86,7 @@ Connect external data to LLMs, no matter the source.
|
|
86
86
|
Add to Gemfile:
|
87
87
|
|
88
88
|
```ruby
|
89
|
-
gem 'carbon_ruby_sdk', '~> 0.2.
|
89
|
+
gem 'carbon_ruby_sdk', '~> 0.2.4'
|
90
90
|
```
|
91
91
|
|
92
92
|
## Getting Started<a id="getting-started"></a>
|
@@ -999,6 +999,7 @@ result = carbon.files.upload(
|
|
999
999
|
parse_pdf_tables_with_ocr: false,
|
1000
1000
|
detect_audio_language: false,
|
1001
1001
|
media_type: "TEXT",
|
1002
|
+
split_rows: false,
|
1002
1003
|
)
|
1003
1004
|
p result
|
1004
1005
|
```
|
@@ -1048,6 +1049,10 @@ Whether to automatically detect the language of the uploaded audio file.
|
|
1048
1049
|
The media type of the file. If not provided, it will be inferred from the file
|
1049
1050
|
extension.
|
1050
1051
|
|
1052
|
+
##### split_rows: `Boolean`<a id="split_rows-boolean"></a>
|
1053
|
+
Whether to split tabular rows into chunks. Currently only valid for CSV, TSV,
|
1054
|
+
and XLSX files.
|
1055
|
+
|
1051
1056
|
#### 🔄 Return<a id="🔄-return"></a>
|
1052
1057
|
|
1053
1058
|
[UserFile](./lib/carbon_ruby_sdk/models/user_file.rb)
|
@@ -1083,6 +1088,7 @@ result = carbon.files.upload_from_url(
|
|
1083
1088
|
parse_pdf_tables_with_ocr: false,
|
1084
1089
|
detect_audio_language: false,
|
1085
1090
|
media_type: "TEXT",
|
1091
|
+
split_rows: false,
|
1086
1092
|
)
|
1087
1093
|
p result
|
1088
1094
|
```
|
@@ -1105,6 +1111,7 @@ Number of objects per chunk. For csv, tsv, xlsx, and json files only.
|
|
1105
1111
|
##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
|
1106
1112
|
##### detect_audio_language: `Boolean`<a id="detect_audio_language-boolean"></a>
|
1107
1113
|
##### media_type: [`FileContentTypesNullable`](./lib/carbon_ruby_sdk/models/file_content_types_nullable.rb)<a id="media_type-filecontenttypesnullablelibcarbon_ruby_sdkmodelsfile_content_types_nullablerb"></a>
|
1114
|
+
##### split_rows: `Boolean`<a id="split_rows-boolean"></a>
|
1108
1115
|
#### 🔄 Return<a id="🔄-return"></a>
|
1109
1116
|
|
1110
1117
|
[UserFile](./lib/carbon_ruby_sdk/models/user_file.rb)
|
@@ -1240,7 +1247,7 @@ result = carbon.integrations.connect_data_source(
|
|
1240
1247
|
"prepend_filename_to_chunks" => false,
|
1241
1248
|
"sync_files_on_connection" => true,
|
1242
1249
|
"set_page_as_boundary" => false,
|
1243
|
-
"request_id" => "
|
1250
|
+
"request_id" => "07144230-657d-40ab-9fb5-89095bf3fc65",
|
1244
1251
|
"enable_file_picker" => true,
|
1245
1252
|
"sync_source_items" => true,
|
1246
1253
|
"incremental_sync" => false,
|
@@ -1459,7 +1466,7 @@ result = carbon.integrations.get_oauth_url(
|
|
1459
1466
|
set_page_as_boundary: false,
|
1460
1467
|
data_source_id: 1,
|
1461
1468
|
connecting_new_account: false,
|
1462
|
-
request_id: "
|
1469
|
+
request_id: "b7620173-662c-4ae7-bb61-2e6ffd8619f5",
|
1463
1470
|
use_ocr: false,
|
1464
1471
|
parse_pdf_tables_with_ocr: false,
|
1465
1472
|
enable_file_picker: true,
|
@@ -1519,7 +1526,7 @@ Enable OCR for files that support it. Supported formats: pdf
|
|
1519
1526
|
##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
|
1520
1527
|
##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
|
1521
1528
|
Enable integration's file picker for sources that support it. Supported sources:
|
1522
|
-
|
1529
|
+
DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
|
1523
1530
|
|
1524
1531
|
##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
|
1525
1532
|
Enabling this flag will fetch all available content from the source to be listed
|
@@ -1781,7 +1788,7 @@ result = carbon.integrations.sync_confluence(
|
|
1781
1788
|
prepend_filename_to_chunks: false,
|
1782
1789
|
max_items_per_chunk: 1,
|
1783
1790
|
set_page_as_boundary: false,
|
1784
|
-
request_id: "
|
1791
|
+
request_id: "b2c5c595-0cfb-4ec3-96ff-87158c2b6207",
|
1785
1792
|
use_ocr: false,
|
1786
1793
|
parse_pdf_tables_with_ocr: false,
|
1787
1794
|
incremental_sync: false,
|
@@ -1884,7 +1891,7 @@ result = carbon.integrations.sync_files(
|
|
1884
1891
|
prepend_filename_to_chunks: false,
|
1885
1892
|
max_items_per_chunk: 1,
|
1886
1893
|
set_page_as_boundary: false,
|
1887
|
-
request_id: "
|
1894
|
+
request_id: "b2c5c595-0cfb-4ec3-96ff-87158c2b6207",
|
1888
1895
|
use_ocr: false,
|
1889
1896
|
parse_pdf_tables_with_ocr: false,
|
1890
1897
|
incremental_sync: false,
|
@@ -2741,6 +2748,8 @@ result = carbon.utilities.scrape_sitemap(
|
|
2741
2748
|
css_classes_to_skip: [],
|
2742
2749
|
css_selectors_to_skip: [],
|
2743
2750
|
embedding_model: "OPENAI",
|
2751
|
+
url_paths_to_include: [],
|
2752
|
+
url_paths_to_exclude: [],
|
2744
2753
|
)
|
2745
2754
|
p result
|
2746
2755
|
```
|
@@ -2760,6 +2769,16 @@ p result
|
|
2760
2769
|
##### css_classes_to_skip: Array<`String`><a id="css_classes_to_skip-array"></a>
|
2761
2770
|
##### css_selectors_to_skip: Array<`String`><a id="css_selectors_to_skip-array"></a>
|
2762
2771
|
##### embedding_model: [`EmbeddingGenerators`](./lib/carbon_ruby_sdk/models/embedding_generators.rb)<a id="embedding_model-embeddinggeneratorslibcarbon_ruby_sdkmodelsembedding_generatorsrb"></a>
|
2772
|
+
##### url_paths_to_include: Array<`String`><a id="url_paths_to_include-array"></a>
|
2773
|
+
URL subpaths or directories that you want to include. For example if you want to
|
2774
|
+
only include URLs that start with /questions in stackoverflow.com, you will add
|
2775
|
+
/questions/ in this input
|
2776
|
+
|
2777
|
+
##### url_paths_to_exclude: Array<`String`><a id="url_paths_to_exclude-array"></a>
|
2778
|
+
URL subpaths or directories that you want to exclude. For example if you want to
|
2779
|
+
exclude URLs that start with /questions in stackoverflow.com, you will add
|
2780
|
+
/questions/ in this input
|
2781
|
+
|
2763
2782
|
#### 🌐 Endpoint<a id="🌐-endpoint"></a>
|
2764
2783
|
|
2765
2784
|
`/scrape_sitemap` `POST`
|
@@ -2799,6 +2818,7 @@ result = carbon.utilities.scrape_web(
|
|
2799
2818
|
"css_classes_to_skip" => [],
|
2800
2819
|
"css_selectors_to_skip" => [],
|
2801
2820
|
"embedding_model" => "OPENAI",
|
2821
|
+
"url_paths_to_include" => [],
|
2802
2822
|
}
|
2803
2823
|
],
|
2804
2824
|
)
|
@@ -1174,9 +1174,10 @@ module Carbon
|
|
1174
1174
|
# @param parse_pdf_tables_with_ocr [Boolean] Whether to use rich table parsing when `use_ocr` is enabled.
|
1175
1175
|
# @param detect_audio_language [Boolean] Whether to automatically detect the language of the uploaded audio file.
|
1176
1176
|
# @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
|
1177
|
+
# @param split_rows [Boolean] Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.
|
1177
1178
|
# @param body [BodyCreateUploadFileUploadfilePost]
|
1178
1179
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1179
|
-
def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
|
1180
|
+
def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
|
1180
1181
|
_body = {}
|
1181
1182
|
_body[:file] = file if file != SENTINEL
|
1182
1183
|
body_create_upload_file_uploadfile_post = _body
|
@@ -1192,6 +1193,7 @@ module Carbon
|
|
1192
1193
|
extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
|
1193
1194
|
extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
|
1194
1195
|
extra[:media_type] = media_type if media_type != SENTINEL
|
1196
|
+
extra[:split_rows] = split_rows if split_rows != SENTINEL
|
1195
1197
|
api_response = upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra)
|
1196
1198
|
api_response.data
|
1197
1199
|
end
|
@@ -1237,9 +1239,10 @@ module Carbon
|
|
1237
1239
|
# @param parse_pdf_tables_with_ocr [Boolean] Whether to use rich table parsing when `use_ocr` is enabled.
|
1238
1240
|
# @param detect_audio_language [Boolean] Whether to automatically detect the language of the uploaded audio file.
|
1239
1241
|
# @param media_type [FileContentTypesNullable] The media type of the file. If not provided, it will be inferred from the file extension.
|
1242
|
+
# @param split_rows [Boolean] Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.
|
1240
1243
|
# @param body [BodyCreateUploadFileUploadfilePost]
|
1241
1244
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1242
|
-
def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
|
1245
|
+
def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
|
1243
1246
|
_body = {}
|
1244
1247
|
_body[:file] = file if file != SENTINEL
|
1245
1248
|
body_create_upload_file_uploadfile_post = _body
|
@@ -1255,6 +1258,7 @@ module Carbon
|
|
1255
1258
|
extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
|
1256
1259
|
extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
|
1257
1260
|
extra[:media_type] = media_type if media_type != SENTINEL
|
1261
|
+
extra[:split_rows] = split_rows if split_rows != SENTINEL
|
1258
1262
|
upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra)
|
1259
1263
|
end
|
1260
1264
|
|
@@ -1275,6 +1279,7 @@ module Carbon
|
|
1275
1279
|
# @option opts [Boolean] :parse_pdf_tables_with_ocr Whether to use rich table parsing when `use_ocr` is enabled. (default to false)
|
1276
1280
|
# @option opts [Boolean] :detect_audio_language Whether to automatically detect the language of the uploaded audio file. (default to false)
|
1277
1281
|
# @option opts [FileContentTypesNullable] :media_type The media type of the file. If not provided, it will be inferred from the file extension.
|
1282
|
+
# @option opts [Boolean] :split_rows Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files. (default to false)
|
1278
1283
|
# @return [UserFile]
|
1279
1284
|
private def upload_impl(file, body_create_upload_file_uploadfile_post, opts = {})
|
1280
1285
|
data, _status_code, _headers = upload_with_http_info(file, body_create_upload_file_uploadfile_post, opts)
|
@@ -1298,6 +1303,7 @@ module Carbon
|
|
1298
1303
|
# @option opts [Boolean] :parse_pdf_tables_with_ocr Whether to use rich table parsing when `use_ocr` is enabled. (default to false)
|
1299
1304
|
# @option opts [Boolean] :detect_audio_language Whether to automatically detect the language of the uploaded audio file. (default to false)
|
1300
1305
|
# @option opts [FileContentTypesNullable] :media_type The media type of the file. If not provided, it will be inferred from the file extension.
|
1306
|
+
# @option opts [Boolean] :split_rows Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files. (default to false)
|
1301
1307
|
# @return [APIResponse] data is UserFile, status code, headers and response
|
1302
1308
|
private def upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, opts = {})
|
1303
1309
|
if @api_client.config.debugging
|
@@ -1328,6 +1334,7 @@ module Carbon
|
|
1328
1334
|
query_params[:'parse_pdf_tables_with_ocr'] = opts[:'parse_pdf_tables_with_ocr'] if !opts[:'parse_pdf_tables_with_ocr'].nil?
|
1329
1335
|
query_params[:'detect_audio_language'] = opts[:'detect_audio_language'] if !opts[:'detect_audio_language'].nil?
|
1330
1336
|
query_params[:'media_type'] = opts[:'media_type'] if !opts[:'media_type'].nil?
|
1337
|
+
query_params[:'split_rows'] = opts[:'split_rows'] if !opts[:'split_rows'].nil?
|
1331
1338
|
|
1332
1339
|
# header parameters
|
1333
1340
|
header_params = opts[:header_params] || {}
|
@@ -1386,9 +1393,10 @@ module Carbon
|
|
1386
1393
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
1387
1394
|
# @param detect_audio_language [Boolean]
|
1388
1395
|
# @param media_type [FileContentTypesNullable]
|
1396
|
+
# @param split_rows [Boolean]
|
1389
1397
|
# @param body [UploadFileFromUrlInput]
|
1390
1398
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1391
|
-
def upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
|
1399
|
+
def upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
|
1392
1400
|
_body = {}
|
1393
1401
|
_body[:url] = url if url != SENTINEL
|
1394
1402
|
_body[:file_name] = file_name if file_name != SENTINEL
|
@@ -1404,6 +1412,7 @@ module Carbon
|
|
1404
1412
|
_body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
|
1405
1413
|
_body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
|
1406
1414
|
_body[:media_type] = media_type if media_type != SENTINEL
|
1415
|
+
_body[:split_rows] = split_rows if split_rows != SENTINEL
|
1407
1416
|
upload_file_from_url_input = _body
|
1408
1417
|
api_response = upload_from_url_with_http_info_impl(upload_file_from_url_input, extra)
|
1409
1418
|
api_response.data
|
@@ -1425,9 +1434,10 @@ module Carbon
|
|
1425
1434
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
1426
1435
|
# @param detect_audio_language [Boolean]
|
1427
1436
|
# @param media_type [FileContentTypesNullable]
|
1437
|
+
# @param split_rows [Boolean]
|
1428
1438
|
# @param body [UploadFileFromUrlInput]
|
1429
1439
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1430
|
-
def upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, extra: {})
|
1440
|
+
def upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, media_type: SENTINEL, split_rows: false, extra: {})
|
1431
1441
|
_body = {}
|
1432
1442
|
_body[:url] = url if url != SENTINEL
|
1433
1443
|
_body[:file_name] = file_name if file_name != SENTINEL
|
@@ -1443,6 +1453,7 @@ module Carbon
|
|
1443
1453
|
_body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
|
1444
1454
|
_body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
|
1445
1455
|
_body[:media_type] = media_type if media_type != SENTINEL
|
1456
|
+
_body[:split_rows] = split_rows if split_rows != SENTINEL
|
1446
1457
|
upload_file_from_url_input = _body
|
1447
1458
|
upload_from_url_with_http_info_impl(upload_file_from_url_input, extra)
|
1448
1459
|
end
|
@@ -653,13 +653,13 @@ module Carbon
|
|
653
653
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
654
654
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
655
655
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
656
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
656
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
|
657
657
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
658
658
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
659
659
|
# @param file_sync_config [FileSyncConfigNullable]
|
660
660
|
# @param body [OAuthURLRequest]
|
661
661
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
662
|
-
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
662
|
+
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b7620173-662c-4ae7-bb61-2e6ffd8619f5', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
663
663
|
_body = {}
|
664
664
|
_body[:tags] = tags if tags != SENTINEL
|
665
665
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -721,13 +721,13 @@ module Carbon
|
|
721
721
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
722
722
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
723
723
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
724
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
724
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
|
725
725
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
726
726
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
727
727
|
# @param file_sync_config [FileSyncConfigNullable]
|
728
728
|
# @param body [OAuthURLRequest]
|
729
729
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
730
|
-
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
730
|
+
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: 'b7620173-662c-4ae7-bb61-2e6ffd8619f5', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
731
731
|
_body = {}
|
732
732
|
_body[:tags] = tags if tags != SENTINEL
|
733
733
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -1523,7 +1523,7 @@ module Carbon
|
|
1523
1523
|
# @param file_sync_config [FileSyncConfigNullable]
|
1524
1524
|
# @param body [SyncFilesRequest]
|
1525
1525
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1526
|
-
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1526
|
+
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1527
1527
|
_body = {}
|
1528
1528
|
_body[:tags] = tags if tags != SENTINEL
|
1529
1529
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1571,7 +1571,7 @@ module Carbon
|
|
1571
1571
|
# @param file_sync_config [FileSyncConfigNullable]
|
1572
1572
|
# @param body [SyncFilesRequest]
|
1573
1573
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1574
|
-
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1574
|
+
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1575
1575
|
_body = {}
|
1576
1576
|
_body[:tags] = tags if tags != SENTINEL
|
1577
1577
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1779,7 +1779,7 @@ module Carbon
|
|
1779
1779
|
# @param file_sync_config [FileSyncConfigNullable]
|
1780
1780
|
# @param body [SyncFilesRequest]
|
1781
1781
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1782
|
-
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1782
|
+
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1783
1783
|
_body = {}
|
1784
1784
|
_body[:tags] = tags if tags != SENTINEL
|
1785
1785
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1827,7 +1827,7 @@ module Carbon
|
|
1827
1827
|
# @param file_sync_config [FileSyncConfigNullable]
|
1828
1828
|
# @param body [SyncFilesRequest]
|
1829
1829
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1830
|
-
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1830
|
+
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1831
1831
|
_body = {}
|
1832
1832
|
_body[:tags] = tags if tags != SENTINEL
|
1833
1833
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -342,9 +342,11 @@ module Carbon
|
|
342
342
|
# @param css_classes_to_skip [Array<String>]
|
343
343
|
# @param css_selectors_to_skip [Array<String>]
|
344
344
|
# @param embedding_model [EmbeddingGenerators]
|
345
|
+
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
346
|
+
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
345
347
|
# @param body [SitemapScrapeRequest]
|
346
348
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
347
|
-
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
|
349
|
+
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
|
348
350
|
_body = {}
|
349
351
|
_body[:tags] = tags if tags != SENTINEL
|
350
352
|
_body[:url] = url if url != SENTINEL
|
@@ -359,6 +361,8 @@ module Carbon
|
|
359
361
|
_body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
|
360
362
|
_body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
|
361
363
|
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
364
|
+
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
365
|
+
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
362
366
|
sitemap_scrape_request = _body
|
363
367
|
api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
364
368
|
api_response.data
|
@@ -387,9 +391,11 @@ module Carbon
|
|
387
391
|
# @param css_classes_to_skip [Array<String>]
|
388
392
|
# @param css_selectors_to_skip [Array<String>]
|
389
393
|
# @param embedding_model [EmbeddingGenerators]
|
394
|
+
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
395
|
+
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
390
396
|
# @param body [SitemapScrapeRequest]
|
391
397
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
392
|
-
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
|
398
|
+
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
|
393
399
|
_body = {}
|
394
400
|
_body[:tags] = tags if tags != SENTINEL
|
395
401
|
_body[:url] = url if url != SENTINEL
|
@@ -404,6 +410,8 @@ module Carbon
|
|
404
410
|
_body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
|
405
411
|
_body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
|
406
412
|
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
413
|
+
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
414
|
+
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
407
415
|
sitemap_scrape_request = _body
|
408
416
|
scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
409
417
|
end
|
@@ -61,7 +61,7 @@ module Carbon
|
|
61
61
|
|
62
62
|
attr_accessor :parse_pdf_tables_with_ocr
|
63
63
|
|
64
|
-
# Enable integration's file picker for sources that support it. Supported sources:
|
64
|
+
# Enable integration's file picker for sources that support it. Supported sources: DROPBOX, ONEDRIVE, BOX, GOOGLE_DRIVE, SHAREPOINT
|
65
65
|
attr_accessor :enable_file_picker
|
66
66
|
|
67
67
|
# Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
@@ -279,7 +279,7 @@ module Carbon
|
|
279
279
|
if attributes.key?(:'request_id')
|
280
280
|
self.request_id = attributes[:'request_id']
|
281
281
|
else
|
282
|
-
self.request_id = '
|
282
|
+
self.request_id = 'b7620173-662c-4ae7-bb61-2e6ffd8619f5'
|
283
283
|
end
|
284
284
|
|
285
285
|
if attributes.key?(:'use_ocr')
|
@@ -37,6 +37,12 @@ module Carbon
|
|
37
37
|
|
38
38
|
attr_accessor :embedding_model
|
39
39
|
|
40
|
+
# URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
41
|
+
attr_accessor :url_paths_to_include
|
42
|
+
|
43
|
+
# URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
44
|
+
attr_accessor :url_paths_to_exclude
|
45
|
+
|
40
46
|
# Attribute mapping from ruby-style variable name to JSON key.
|
41
47
|
def self.attribute_map
|
42
48
|
{
|
@@ -52,7 +58,9 @@ module Carbon
|
|
52
58
|
:'html_tags_to_skip' => :'html_tags_to_skip',
|
53
59
|
:'css_classes_to_skip' => :'css_classes_to_skip',
|
54
60
|
:'css_selectors_to_skip' => :'css_selectors_to_skip',
|
55
|
-
:'embedding_model' => :'embedding_model'
|
61
|
+
:'embedding_model' => :'embedding_model',
|
62
|
+
:'url_paths_to_include' => :'url_paths_to_include',
|
63
|
+
:'url_paths_to_exclude' => :'url_paths_to_exclude'
|
56
64
|
}
|
57
65
|
end
|
58
66
|
|
@@ -76,7 +84,9 @@ module Carbon
|
|
76
84
|
:'html_tags_to_skip' => :'Array<String>',
|
77
85
|
:'css_classes_to_skip' => :'Array<String>',
|
78
86
|
:'css_selectors_to_skip' => :'Array<String>',
|
79
|
-
:'embedding_model' => :'EmbeddingGenerators'
|
87
|
+
:'embedding_model' => :'EmbeddingGenerators',
|
88
|
+
:'url_paths_to_include' => :'Array<String>',
|
89
|
+
:'url_paths_to_exclude' => :'Array<String>'
|
80
90
|
}
|
81
91
|
end
|
82
92
|
|
@@ -94,6 +104,8 @@ module Carbon
|
|
94
104
|
:'html_tags_to_skip',
|
95
105
|
:'css_classes_to_skip',
|
96
106
|
:'css_selectors_to_skip',
|
107
|
+
:'url_paths_to_include',
|
108
|
+
:'url_paths_to_exclude'
|
97
109
|
])
|
98
110
|
end
|
99
111
|
|
@@ -185,6 +197,18 @@ module Carbon
|
|
185
197
|
else
|
186
198
|
self.embedding_model = 'OPENAI'
|
187
199
|
end
|
200
|
+
|
201
|
+
if attributes.key?(:'url_paths_to_include')
|
202
|
+
if (value = attributes[:'url_paths_to_include']).is_a?(Array)
|
203
|
+
self.url_paths_to_include = value
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
if attributes.key?(:'url_paths_to_exclude')
|
208
|
+
if (value = attributes[:'url_paths_to_exclude']).is_a?(Array)
|
209
|
+
self.url_paths_to_exclude = value
|
210
|
+
end
|
211
|
+
end
|
188
212
|
end
|
189
213
|
|
190
214
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -199,6 +223,14 @@ module Carbon
|
|
199
223
|
invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
|
200
224
|
end
|
201
225
|
|
226
|
+
if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
227
|
+
invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
|
228
|
+
end
|
229
|
+
|
230
|
+
if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
|
231
|
+
invalid_properties.push('invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.')
|
232
|
+
end
|
233
|
+
|
202
234
|
invalid_properties
|
203
235
|
end
|
204
236
|
|
@@ -207,6 +239,8 @@ module Carbon
|
|
207
239
|
def valid?
|
208
240
|
return false if @url.nil?
|
209
241
|
return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
|
242
|
+
return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
243
|
+
return false if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
|
210
244
|
true
|
211
245
|
end
|
212
246
|
|
@@ -220,6 +254,26 @@ module Carbon
|
|
220
254
|
@max_pages_to_scrape = max_pages_to_scrape
|
221
255
|
end
|
222
256
|
|
257
|
+
# Custom attribute writer method with validation
|
258
|
+
# @param [Object] url_paths_to_include Value to be assigned
|
259
|
+
def url_paths_to_include=(url_paths_to_include)
|
260
|
+
if !url_paths_to_include.nil? && url_paths_to_include.length > 10
|
261
|
+
fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
|
262
|
+
end
|
263
|
+
|
264
|
+
@url_paths_to_include = url_paths_to_include
|
265
|
+
end
|
266
|
+
|
267
|
+
# Custom attribute writer method with validation
|
268
|
+
# @param [Object] url_paths_to_exclude Value to be assigned
|
269
|
+
def url_paths_to_exclude=(url_paths_to_exclude)
|
270
|
+
if !url_paths_to_exclude.nil? && url_paths_to_exclude.length > 10
|
271
|
+
fail ArgumentError, 'invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.'
|
272
|
+
end
|
273
|
+
|
274
|
+
@url_paths_to_exclude = url_paths_to_exclude
|
275
|
+
end
|
276
|
+
|
223
277
|
# Checks equality by comparing each attribute.
|
224
278
|
# @param [Object] Object to be compared
|
225
279
|
def ==(o)
|
@@ -237,7 +291,9 @@ module Carbon
|
|
237
291
|
html_tags_to_skip == o.html_tags_to_skip &&
|
238
292
|
css_classes_to_skip == o.css_classes_to_skip &&
|
239
293
|
css_selectors_to_skip == o.css_selectors_to_skip &&
|
240
|
-
embedding_model == o.embedding_model
|
294
|
+
embedding_model == o.embedding_model &&
|
295
|
+
url_paths_to_include == o.url_paths_to_include &&
|
296
|
+
url_paths_to_exclude == o.url_paths_to_exclude
|
241
297
|
end
|
242
298
|
|
243
299
|
# @see the `==` method
|
@@ -249,7 +305,7 @@ module Carbon
|
|
249
305
|
# Calculates hash code according to all attributes.
|
250
306
|
# @return [Integer] Hash code
|
251
307
|
def hash
|
252
|
-
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
|
308
|
+
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
|
253
309
|
end
|
254
310
|
|
255
311
|
# Builds the object from hash
|
@@ -187,7 +187,7 @@ module Carbon
|
|
187
187
|
if attributes.key?(:'request_id')
|
188
188
|
self.request_id = attributes[:'request_id']
|
189
189
|
else
|
190
|
-
self.request_id = '
|
190
|
+
self.request_id = 'b2c5c595-0cfb-4ec3-96ff-87158c2b6207'
|
191
191
|
end
|
192
192
|
|
193
193
|
if attributes.key?(:'use_ocr')
|
@@ -182,7 +182,7 @@ module Carbon
|
|
182
182
|
if attributes.key?(:'request_id')
|
183
183
|
self.request_id = attributes[:'request_id']
|
184
184
|
else
|
185
|
-
self.request_id = '
|
185
|
+
self.request_id = '07144230-657d-40ab-9fb5-89095bf3fc65'
|
186
186
|
end
|
187
187
|
|
188
188
|
if attributes.key?(:'enable_file_picker')
|
@@ -40,6 +40,8 @@ module Carbon
|
|
40
40
|
|
41
41
|
attr_accessor :media_type
|
42
42
|
|
43
|
+
attr_accessor :split_rows
|
44
|
+
|
43
45
|
# Attribute mapping from ruby-style variable name to JSON key.
|
44
46
|
def self.attribute_map
|
45
47
|
{
|
@@ -56,7 +58,8 @@ module Carbon
|
|
56
58
|
:'max_items_per_chunk' => :'max_items_per_chunk',
|
57
59
|
:'parse_pdf_tables_with_ocr' => :'parse_pdf_tables_with_ocr',
|
58
60
|
:'detect_audio_language' => :'detect_audio_language',
|
59
|
-
:'media_type' => :'media_type'
|
61
|
+
:'media_type' => :'media_type',
|
62
|
+
:'split_rows' => :'split_rows'
|
60
63
|
}
|
61
64
|
end
|
62
65
|
|
@@ -81,7 +84,8 @@ module Carbon
|
|
81
84
|
:'max_items_per_chunk' => :'Integer',
|
82
85
|
:'parse_pdf_tables_with_ocr' => :'Boolean',
|
83
86
|
:'detect_audio_language' => :'Boolean',
|
84
|
-
:'media_type' => :'FileContentTypesNullable'
|
87
|
+
:'media_type' => :'FileContentTypesNullable',
|
88
|
+
:'split_rows' => :'Boolean'
|
85
89
|
}
|
86
90
|
end
|
87
91
|
|
@@ -92,7 +96,7 @@ module Carbon
|
|
92
96
|
:'chunk_size',
|
93
97
|
:'chunk_overlap',
|
94
98
|
:'max_items_per_chunk',
|
95
|
-
:'media_type'
|
99
|
+
:'media_type',
|
96
100
|
])
|
97
101
|
end
|
98
102
|
|
@@ -182,6 +186,12 @@ module Carbon
|
|
182
186
|
if attributes.key?(:'media_type')
|
183
187
|
self.media_type = attributes[:'media_type']
|
184
188
|
end
|
189
|
+
|
190
|
+
if attributes.key?(:'split_rows')
|
191
|
+
self.split_rows = attributes[:'split_rows']
|
192
|
+
else
|
193
|
+
self.split_rows = false
|
194
|
+
end
|
185
195
|
end
|
186
196
|
|
187
197
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -220,7 +230,8 @@ module Carbon
|
|
220
230
|
max_items_per_chunk == o.max_items_per_chunk &&
|
221
231
|
parse_pdf_tables_with_ocr == o.parse_pdf_tables_with_ocr &&
|
222
232
|
detect_audio_language == o.detect_audio_language &&
|
223
|
-
media_type == o.media_type
|
233
|
+
media_type == o.media_type &&
|
234
|
+
split_rows == o.split_rows
|
224
235
|
end
|
225
236
|
|
226
237
|
# @see the `==` method
|
@@ -232,7 +243,7 @@ module Carbon
|
|
232
243
|
# Calculates hash code according to all attributes.
|
233
244
|
# @return [Integer] Hash code
|
234
245
|
def hash
|
235
|
-
[url, file_name, chunk_size, chunk_overlap, skip_embedding_generation, set_page_as_boundary, embedding_model, generate_sparse_vectors, use_textract, prepend_filename_to_chunks, max_items_per_chunk, parse_pdf_tables_with_ocr, detect_audio_language, media_type].hash
|
246
|
+
[url, file_name, chunk_size, chunk_overlap, skip_embedding_generation, set_page_as_boundary, embedding_model, generate_sparse_vectors, use_textract, prepend_filename_to_chunks, max_items_per_chunk, parse_pdf_tables_with_ocr, detect_audio_language, media_type, split_rows].hash
|
236
247
|
end
|
237
248
|
|
238
249
|
# Builds the object from hash
|
@@ -39,6 +39,9 @@ module Carbon
|
|
39
39
|
|
40
40
|
attr_accessor :embedding_model
|
41
41
|
|
42
|
+
# URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
43
|
+
attr_accessor :url_paths_to_include
|
44
|
+
|
42
45
|
# Attribute mapping from ruby-style variable name to JSON key.
|
43
46
|
def self.attribute_map
|
44
47
|
{
|
@@ -55,7 +58,8 @@ module Carbon
|
|
55
58
|
:'html_tags_to_skip' => :'html_tags_to_skip',
|
56
59
|
:'css_classes_to_skip' => :'css_classes_to_skip',
|
57
60
|
:'css_selectors_to_skip' => :'css_selectors_to_skip',
|
58
|
-
:'embedding_model' => :'embedding_model'
|
61
|
+
:'embedding_model' => :'embedding_model',
|
62
|
+
:'url_paths_to_include' => :'url_paths_to_include'
|
59
63
|
}
|
60
64
|
end
|
61
65
|
|
@@ -80,7 +84,8 @@ module Carbon
|
|
80
84
|
:'html_tags_to_skip' => :'Array<String>',
|
81
85
|
:'css_classes_to_skip' => :'Array<String>',
|
82
86
|
:'css_selectors_to_skip' => :'Array<String>',
|
83
|
-
:'embedding_model' => :'EmbeddingGenerators'
|
87
|
+
:'embedding_model' => :'EmbeddingGenerators',
|
88
|
+
:'url_paths_to_include' => :'Array<String>'
|
84
89
|
}
|
85
90
|
end
|
86
91
|
|
@@ -99,6 +104,7 @@ module Carbon
|
|
99
104
|
:'html_tags_to_skip',
|
100
105
|
:'css_classes_to_skip',
|
101
106
|
:'css_selectors_to_skip',
|
107
|
+
:'url_paths_to_include'
|
102
108
|
])
|
103
109
|
end
|
104
110
|
|
@@ -198,6 +204,12 @@ module Carbon
|
|
198
204
|
else
|
199
205
|
self.embedding_model = 'OPENAI'
|
200
206
|
end
|
207
|
+
|
208
|
+
if attributes.key?(:'url_paths_to_include')
|
209
|
+
if (value = attributes[:'url_paths_to_include']).is_a?(Array)
|
210
|
+
self.url_paths_to_include = value
|
211
|
+
end
|
212
|
+
end
|
201
213
|
end
|
202
214
|
|
203
215
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -216,6 +228,10 @@ module Carbon
|
|
216
228
|
invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
|
217
229
|
end
|
218
230
|
|
231
|
+
if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
232
|
+
invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
|
233
|
+
end
|
234
|
+
|
219
235
|
invalid_properties
|
220
236
|
end
|
221
237
|
|
@@ -225,6 +241,7 @@ module Carbon
|
|
225
241
|
return false if @url.nil?
|
226
242
|
return false if !@recursion_depth.nil? && @recursion_depth < 0
|
227
243
|
return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
|
244
|
+
return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
228
245
|
true
|
229
246
|
end
|
230
247
|
|
@@ -248,6 +265,16 @@ module Carbon
|
|
248
265
|
@max_pages_to_scrape = max_pages_to_scrape
|
249
266
|
end
|
250
267
|
|
268
|
+
# Custom attribute writer method with validation
|
269
|
+
# @param [Object] url_paths_to_include Value to be assigned
|
270
|
+
def url_paths_to_include=(url_paths_to_include)
|
271
|
+
if !url_paths_to_include.nil? && url_paths_to_include.length > 10
|
272
|
+
fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
|
273
|
+
end
|
274
|
+
|
275
|
+
@url_paths_to_include = url_paths_to_include
|
276
|
+
end
|
277
|
+
|
251
278
|
# Checks equality by comparing each attribute.
|
252
279
|
# @param [Object] Object to be compared
|
253
280
|
def ==(o)
|
@@ -266,7 +293,8 @@ module Carbon
|
|
266
293
|
html_tags_to_skip == o.html_tags_to_skip &&
|
267
294
|
css_classes_to_skip == o.css_classes_to_skip &&
|
268
295
|
css_selectors_to_skip == o.css_selectors_to_skip &&
|
269
|
-
embedding_model == o.embedding_model
|
296
|
+
embedding_model == o.embedding_model &&
|
297
|
+
url_paths_to_include == o.url_paths_to_include
|
270
298
|
end
|
271
299
|
|
272
300
|
# @see the `==` method
|
@@ -278,7 +306,7 @@ module Carbon
|
|
278
306
|
# Calculates hash code according to all attributes.
|
279
307
|
# @return [Integer] Hash code
|
280
308
|
def hash
|
281
|
-
[tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
|
309
|
+
[tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include].hash
|
282
310
|
end
|
283
311
|
|
284
312
|
# Builds the object from hash
|
data/spec/api/files_api_spec.rb
CHANGED
@@ -159,6 +159,7 @@ describe 'FilesApi' do
|
|
159
159
|
# @option opts [Boolean] :parse_pdf_tables_with_ocr Whether to use rich table parsing when `use_ocr` is enabled.
|
160
160
|
# @option opts [Boolean] :detect_audio_language Whether to automatically detect the language of the uploaded audio file.
|
161
161
|
# @option opts [FileContentTypesNullable] :media_type The media type of the file. If not provided, it will be inferred from the file extension.
|
162
|
+
# @option opts [Boolean] :split_rows Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.
|
162
163
|
# @return [UserFile]
|
163
164
|
describe 'upload test' do
|
164
165
|
it 'should work' do
|
@@ -97,4 +97,16 @@ describe Carbon::SitemapScrapeRequest do
|
|
97
97
|
end
|
98
98
|
end
|
99
99
|
|
100
|
+
describe 'test attribute "url_paths_to_include"' do
|
101
|
+
it 'should work' do
|
102
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe 'test attribute "url_paths_to_exclude"' do
|
107
|
+
it 'should work' do
|
108
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
100
112
|
end
|
@@ -103,4 +103,10 @@ describe Carbon::UploadFileFromUrlInput do
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
+
describe 'test attribute "split_rows"' do
|
107
|
+
it 'should work' do
|
108
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
106
112
|
end
|
@@ -103,4 +103,10 @@ describe Carbon::WebscrapeRequest do
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
+
describe 'test attribute "url_paths_to_include"' do
|
107
|
+
it 'should work' do
|
108
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
106
112
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carbon_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Konfig
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|