carbon_ruby_sdk 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +20 -7
- data/lib/carbon_ruby_sdk/api/integrations_api.rb +8 -8
- data/lib/carbon_ruby_sdk/api/utilities_api.rb +10 -2
- data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb +2 -2
- data/lib/carbon_ruby_sdk/models/sitemap_scrape_request.rb +60 -4
- data/lib/carbon_ruby_sdk/models/sync_files_request.rb +1 -1
- data/lib/carbon_ruby_sdk/models/sync_options.rb +1 -1
- data/lib/carbon_ruby_sdk/models/webscrape_request.rb +32 -4
- data/lib/carbon_ruby_sdk/version.rb +1 -1
- data/spec/models/sitemap_scrape_request_spec.rb +12 -0
- data/spec/models/webscrape_request_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af28a3b256d49d38a6aca558d12a49a6e3f2888587dedeef54311ad7d0bd0ac9
|
4
|
+
data.tar.gz: a685b15e3ad3ab32463c4bd03c92432c21bc3c7cf3b8f29bcf340cde6e468377
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 023c5d51386e0e76ecd6954f25c69a8562302f89de2a1a15cd72fbfc0e3491c6129ad496da1ad9dbfd80a0030c30ba501cb2e8552cbdd92c07513a20f0257211
|
7
|
+
data.tar.gz: 465fbc642ce7bf817b5c5d647d34c1c963e254a21facef003efc0a60a5074dad6c1b642a1093e3f0e9228505bde79b9f5c55e263827987395734718b019c7a06
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Connect external data to LLMs, no matter the source.
|
8
8
|
|
9
|
-
[![npm](https://img.shields.io/badge/gem-v0.2.
|
9
|
+
[![npm](https://img.shields.io/badge/gem-v0.2.3-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.3)
|
10
10
|
|
11
11
|
</div>
|
12
12
|
|
@@ -86,7 +86,7 @@ Connect external data to LLMs, no matter the source.
|
|
86
86
|
Add to Gemfile:
|
87
87
|
|
88
88
|
```ruby
|
89
|
-
gem 'carbon_ruby_sdk', '~> 0.2.
|
89
|
+
gem 'carbon_ruby_sdk', '~> 0.2.3'
|
90
90
|
```
|
91
91
|
|
92
92
|
## Getting Started<a id="getting-started"></a>
|
@@ -1240,7 +1240,7 @@ result = carbon.integrations.connect_data_source(
|
|
1240
1240
|
"prepend_filename_to_chunks" => false,
|
1241
1241
|
"sync_files_on_connection" => true,
|
1242
1242
|
"set_page_as_boundary" => false,
|
1243
|
-
"request_id" => "
|
1243
|
+
"request_id" => "652297b9-0f55-46d8-869d-13a36e89e5da",
|
1244
1244
|
"enable_file_picker" => true,
|
1245
1245
|
"sync_source_items" => true,
|
1246
1246
|
"incremental_sync" => false,
|
@@ -1459,7 +1459,7 @@ result = carbon.integrations.get_oauth_url(
|
|
1459
1459
|
set_page_as_boundary: false,
|
1460
1460
|
data_source_id: 1,
|
1461
1461
|
connecting_new_account: false,
|
1462
|
-
request_id: "
|
1462
|
+
request_id: "71f214fa-2155-41cb-9336-9b3070e86897",
|
1463
1463
|
use_ocr: false,
|
1464
1464
|
parse_pdf_tables_with_ocr: false,
|
1465
1465
|
enable_file_picker: true,
|
@@ -1519,7 +1519,7 @@ Enable OCR for files that support it. Supported formats: pdf
|
|
1519
1519
|
##### parse_pdf_tables_with_ocr: `Boolean`<a id="parse_pdf_tables_with_ocr-boolean"></a>
|
1520
1520
|
##### enable_file_picker: `Boolean`<a id="enable_file_picker-boolean"></a>
|
1521
1521
|
Enable integration's file picker for sources that support it. Supported sources:
|
1522
|
-
|
1522
|
+
DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
|
1523
1523
|
|
1524
1524
|
##### sync_source_items: `Boolean`<a id="sync_source_items-boolean"></a>
|
1525
1525
|
Enabling this flag will fetch all available content from the source to be listed
|
@@ -1781,7 +1781,7 @@ result = carbon.integrations.sync_confluence(
|
|
1781
1781
|
prepend_filename_to_chunks: false,
|
1782
1782
|
max_items_per_chunk: 1,
|
1783
1783
|
set_page_as_boundary: false,
|
1784
|
-
request_id: "
|
1784
|
+
request_id: "6136b467-242e-49df-9478-d3e0cfdde299",
|
1785
1785
|
use_ocr: false,
|
1786
1786
|
parse_pdf_tables_with_ocr: false,
|
1787
1787
|
incremental_sync: false,
|
@@ -1884,7 +1884,7 @@ result = carbon.integrations.sync_files(
|
|
1884
1884
|
prepend_filename_to_chunks: false,
|
1885
1885
|
max_items_per_chunk: 1,
|
1886
1886
|
set_page_as_boundary: false,
|
1887
|
-
request_id: "
|
1887
|
+
request_id: "6136b467-242e-49df-9478-d3e0cfdde299",
|
1888
1888
|
use_ocr: false,
|
1889
1889
|
parse_pdf_tables_with_ocr: false,
|
1890
1890
|
incremental_sync: false,
|
@@ -2741,6 +2741,8 @@ result = carbon.utilities.scrape_sitemap(
|
|
2741
2741
|
css_classes_to_skip: [],
|
2742
2742
|
css_selectors_to_skip: [],
|
2743
2743
|
embedding_model: "OPENAI",
|
2744
|
+
url_paths_to_include: [],
|
2745
|
+
url_paths_to_exclude: [],
|
2744
2746
|
)
|
2745
2747
|
p result
|
2746
2748
|
```
|
@@ -2760,6 +2762,16 @@ p result
|
|
2760
2762
|
##### css_classes_to_skip: Array<`String`><a id="css_classes_to_skip-array"></a>
|
2761
2763
|
##### css_selectors_to_skip: Array<`String`><a id="css_selectors_to_skip-array"></a>
|
2762
2764
|
##### embedding_model: [`EmbeddingGenerators`](./lib/carbon_ruby_sdk/models/embedding_generators.rb)<a id="embedding_model-embeddinggeneratorslibcarbon_ruby_sdkmodelsembedding_generatorsrb"></a>
|
2765
|
+
##### url_paths_to_include: Array<`String`><a id="url_paths_to_include-array"></a>
|
2766
|
+
URL subpaths or directories that you want to include. For example if you want to
|
2767
|
+
only include URLs that start with /questions in stackoverflow.com, you will add
|
2768
|
+
/questions/ in this input
|
2769
|
+
|
2770
|
+
##### url_paths_to_exclude: Array<`String`><a id="url_paths_to_exclude-array"></a>
|
2771
|
+
URL subpaths or directories that you want to exclude. For example if you want to
|
2772
|
+
exclude URLs that start with /questions in stackoverflow.com, you will add
|
2773
|
+
/questions/ in this input
|
2774
|
+
|
2763
2775
|
#### 🌐 Endpoint<a id="🌐-endpoint"></a>
|
2764
2776
|
|
2765
2777
|
`/scrape_sitemap` `POST`
|
@@ -2799,6 +2811,7 @@ result = carbon.utilities.scrape_web(
|
|
2799
2811
|
"css_classes_to_skip" => [],
|
2800
2812
|
"css_selectors_to_skip" => [],
|
2801
2813
|
"embedding_model" => "OPENAI",
|
2814
|
+
"url_paths_to_include" => [],
|
2802
2815
|
}
|
2803
2816
|
],
|
2804
2817
|
)
|
@@ -653,13 +653,13 @@ module Carbon
|
|
653
653
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
654
654
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
655
655
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
656
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
656
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
|
657
657
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
658
658
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
659
659
|
# @param file_sync_config [FileSyncConfigNullable]
|
660
660
|
# @param body [OAuthURLRequest]
|
661
661
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
662
|
-
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
662
|
+
def get_oauth_url(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '71f214fa-2155-41cb-9336-9b3070e86897', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
663
663
|
_body = {}
|
664
664
|
_body[:tags] = tags if tags != SENTINEL
|
665
665
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -721,13 +721,13 @@ module Carbon
|
|
721
721
|
# @param request_id [String] This request id will be added to all files that get synced using the generated OAuth URL
|
722
722
|
# @param use_ocr [Boolean] Enable OCR for files that support it. Supported formats: pdf
|
723
723
|
# @param parse_pdf_tables_with_ocr [Boolean]
|
724
|
-
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources:
|
724
|
+
# @param enable_file_picker [Boolean] Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
|
725
725
|
# @param sync_source_items [Boolean] Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
726
726
|
# @param incremental_sync [Boolean] Only sync files if they have not already been synced or if the embedding properties have changed. This flag is currently supported by ONEDRIVE, GOOGLE_DRIVE, BOX, DROPBOX. It will be ignored for other data sources.
|
727
727
|
# @param file_sync_config [FileSyncConfigNullable]
|
728
728
|
# @param body [OAuthURLRequest]
|
729
729
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
730
|
-
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '
|
730
|
+
def get_oauth_url_with_http_info(service:, tags: SENTINEL, scope: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', zendesk_subdomain: SENTINEL, microsoft_tenant: SENTINEL, sharepoint_site_name: SENTINEL, confluence_subdomain: SENTINEL, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, salesforce_domain: SENTINEL, sync_files_on_connection: true, set_page_as_boundary: false, data_source_id: SENTINEL, connecting_new_account: false, request_id: '71f214fa-2155-41cb-9336-9b3070e86897', use_ocr: false, parse_pdf_tables_with_ocr: false, enable_file_picker: true, sync_source_items: true, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
731
731
|
_body = {}
|
732
732
|
_body[:tags] = tags if tags != SENTINEL
|
733
733
|
_body[:scope] = scope if scope != SENTINEL
|
@@ -1523,7 +1523,7 @@ module Carbon
|
|
1523
1523
|
# @param file_sync_config [FileSyncConfigNullable]
|
1524
1524
|
# @param body [SyncFilesRequest]
|
1525
1525
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1526
|
-
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1526
|
+
def sync_confluence(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1527
1527
|
_body = {}
|
1528
1528
|
_body[:tags] = tags if tags != SENTINEL
|
1529
1529
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1571,7 +1571,7 @@ module Carbon
|
|
1571
1571
|
# @param file_sync_config [FileSyncConfigNullable]
|
1572
1572
|
# @param body [SyncFilesRequest]
|
1573
1573
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1574
|
-
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1574
|
+
def sync_confluence_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1575
1575
|
_body = {}
|
1576
1576
|
_body[:tags] = tags if tags != SENTINEL
|
1577
1577
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1779,7 +1779,7 @@ module Carbon
|
|
1779
1779
|
# @param file_sync_config [FileSyncConfigNullable]
|
1780
1780
|
# @param body [SyncFilesRequest]
|
1781
1781
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1782
|
-
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1782
|
+
def sync_files(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1783
1783
|
_body = {}
|
1784
1784
|
_body[:tags] = tags if tags != SENTINEL
|
1785
1785
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -1827,7 +1827,7 @@ module Carbon
|
|
1827
1827
|
# @param file_sync_config [FileSyncConfigNullable]
|
1828
1828
|
# @param body [SyncFilesRequest]
|
1829
1829
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
1830
|
-
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '
|
1830
|
+
def sync_files_with_http_info(data_source_id:, ids:, tags: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, embedding_model: 'OPENAI', generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, set_page_as_boundary: false, request_id: '6136b467-242e-49df-9478-d3e0cfdde299', use_ocr: false, parse_pdf_tables_with_ocr: false, incremental_sync: false, file_sync_config: SENTINEL, extra: {})
|
1831
1831
|
_body = {}
|
1832
1832
|
_body[:tags] = tags if tags != SENTINEL
|
1833
1833
|
_body[:data_source_id] = data_source_id if data_source_id != SENTINEL
|
@@ -342,9 +342,11 @@ module Carbon
|
|
342
342
|
# @param css_classes_to_skip [Array<String>]
|
343
343
|
# @param css_selectors_to_skip [Array<String>]
|
344
344
|
# @param embedding_model [EmbeddingGenerators]
|
345
|
+
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
346
|
+
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
345
347
|
# @param body [SitemapScrapeRequest]
|
346
348
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
347
|
-
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
|
349
|
+
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
|
348
350
|
_body = {}
|
349
351
|
_body[:tags] = tags if tags != SENTINEL
|
350
352
|
_body[:url] = url if url != SENTINEL
|
@@ -359,6 +361,8 @@ module Carbon
|
|
359
361
|
_body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
|
360
362
|
_body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
|
361
363
|
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
364
|
+
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
365
|
+
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
362
366
|
sitemap_scrape_request = _body
|
363
367
|
api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
364
368
|
api_response.data
|
@@ -387,9 +391,11 @@ module Carbon
|
|
387
391
|
# @param css_classes_to_skip [Array<String>]
|
388
392
|
# @param css_selectors_to_skip [Array<String>]
|
389
393
|
# @param embedding_model [EmbeddingGenerators]
|
394
|
+
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
395
|
+
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
390
396
|
# @param body [SitemapScrapeRequest]
|
391
397
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
392
|
-
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
|
398
|
+
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, extra: {})
|
393
399
|
_body = {}
|
394
400
|
_body[:tags] = tags if tags != SENTINEL
|
395
401
|
_body[:url] = url if url != SENTINEL
|
@@ -404,6 +410,8 @@ module Carbon
|
|
404
410
|
_body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
|
405
411
|
_body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
|
406
412
|
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
413
|
+
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
414
|
+
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
407
415
|
sitemap_scrape_request = _body
|
408
416
|
scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
409
417
|
end
|
@@ -61,7 +61,7 @@ module Carbon
|
|
61
61
|
|
62
62
|
attr_accessor :parse_pdf_tables_with_ocr
|
63
63
|
|
64
|
-
# Enable integration's file picker for sources that support it. Supported sources:
|
64
|
+
# Enable integration's file picker for sources that support it. Supported sources: DROPBOX, GOOGLE_DRIVE, SHAREPOINT, ONEDRIVE, BOX
|
65
65
|
attr_accessor :enable_file_picker
|
66
66
|
|
67
67
|
# Enabling this flag will fetch all available content from the source to be listed via list items endpoint
|
@@ -279,7 +279,7 @@ module Carbon
|
|
279
279
|
if attributes.key?(:'request_id')
|
280
280
|
self.request_id = attributes[:'request_id']
|
281
281
|
else
|
282
|
-
self.request_id = '
|
282
|
+
self.request_id = '71f214fa-2155-41cb-9336-9b3070e86897'
|
283
283
|
end
|
284
284
|
|
285
285
|
if attributes.key?(:'use_ocr')
|
@@ -37,6 +37,12 @@ module Carbon
|
|
37
37
|
|
38
38
|
attr_accessor :embedding_model
|
39
39
|
|
40
|
+
# URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
41
|
+
attr_accessor :url_paths_to_include
|
42
|
+
|
43
|
+
# URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
44
|
+
attr_accessor :url_paths_to_exclude
|
45
|
+
|
40
46
|
# Attribute mapping from ruby-style variable name to JSON key.
|
41
47
|
def self.attribute_map
|
42
48
|
{
|
@@ -52,7 +58,9 @@ module Carbon
|
|
52
58
|
:'html_tags_to_skip' => :'html_tags_to_skip',
|
53
59
|
:'css_classes_to_skip' => :'css_classes_to_skip',
|
54
60
|
:'css_selectors_to_skip' => :'css_selectors_to_skip',
|
55
|
-
:'embedding_model' => :'embedding_model'
|
61
|
+
:'embedding_model' => :'embedding_model',
|
62
|
+
:'url_paths_to_include' => :'url_paths_to_include',
|
63
|
+
:'url_paths_to_exclude' => :'url_paths_to_exclude'
|
56
64
|
}
|
57
65
|
end
|
58
66
|
|
@@ -76,7 +84,9 @@ module Carbon
|
|
76
84
|
:'html_tags_to_skip' => :'Array<String>',
|
77
85
|
:'css_classes_to_skip' => :'Array<String>',
|
78
86
|
:'css_selectors_to_skip' => :'Array<String>',
|
79
|
-
:'embedding_model' => :'EmbeddingGenerators'
|
87
|
+
:'embedding_model' => :'EmbeddingGenerators',
|
88
|
+
:'url_paths_to_include' => :'Array<String>',
|
89
|
+
:'url_paths_to_exclude' => :'Array<String>'
|
80
90
|
}
|
81
91
|
end
|
82
92
|
|
@@ -94,6 +104,8 @@ module Carbon
|
|
94
104
|
:'html_tags_to_skip',
|
95
105
|
:'css_classes_to_skip',
|
96
106
|
:'css_selectors_to_skip',
|
107
|
+
:'url_paths_to_include',
|
108
|
+
:'url_paths_to_exclude'
|
97
109
|
])
|
98
110
|
end
|
99
111
|
|
@@ -185,6 +197,18 @@ module Carbon
|
|
185
197
|
else
|
186
198
|
self.embedding_model = 'OPENAI'
|
187
199
|
end
|
200
|
+
|
201
|
+
if attributes.key?(:'url_paths_to_include')
|
202
|
+
if (value = attributes[:'url_paths_to_include']).is_a?(Array)
|
203
|
+
self.url_paths_to_include = value
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
if attributes.key?(:'url_paths_to_exclude')
|
208
|
+
if (value = attributes[:'url_paths_to_exclude']).is_a?(Array)
|
209
|
+
self.url_paths_to_exclude = value
|
210
|
+
end
|
211
|
+
end
|
188
212
|
end
|
189
213
|
|
190
214
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -199,6 +223,14 @@ module Carbon
|
|
199
223
|
invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
|
200
224
|
end
|
201
225
|
|
226
|
+
if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
227
|
+
invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
|
228
|
+
end
|
229
|
+
|
230
|
+
if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
|
231
|
+
invalid_properties.push('invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.')
|
232
|
+
end
|
233
|
+
|
202
234
|
invalid_properties
|
203
235
|
end
|
204
236
|
|
@@ -207,6 +239,8 @@ module Carbon
|
|
207
239
|
def valid?
|
208
240
|
return false if @url.nil?
|
209
241
|
return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
|
242
|
+
return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
243
|
+
return false if !@url_paths_to_exclude.nil? && @url_paths_to_exclude.length > 10
|
210
244
|
true
|
211
245
|
end
|
212
246
|
|
@@ -220,6 +254,26 @@ module Carbon
|
|
220
254
|
@max_pages_to_scrape = max_pages_to_scrape
|
221
255
|
end
|
222
256
|
|
257
|
+
# Custom attribute writer method with validation
|
258
|
+
# @param [Object] url_paths_to_include Value to be assigned
|
259
|
+
def url_paths_to_include=(url_paths_to_include)
|
260
|
+
if !url_paths_to_include.nil? && url_paths_to_include.length > 10
|
261
|
+
fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
|
262
|
+
end
|
263
|
+
|
264
|
+
@url_paths_to_include = url_paths_to_include
|
265
|
+
end
|
266
|
+
|
267
|
+
# Custom attribute writer method with validation
|
268
|
+
# @param [Object] url_paths_to_exclude Value to be assigned
|
269
|
+
def url_paths_to_exclude=(url_paths_to_exclude)
|
270
|
+
if !url_paths_to_exclude.nil? && url_paths_to_exclude.length > 10
|
271
|
+
fail ArgumentError, 'invalid value for "url_paths_to_exclude", number of items must be less than or equal to 10.'
|
272
|
+
end
|
273
|
+
|
274
|
+
@url_paths_to_exclude = url_paths_to_exclude
|
275
|
+
end
|
276
|
+
|
223
277
|
# Checks equality by comparing each attribute.
|
224
278
|
# @param [Object] Object to be compared
|
225
279
|
def ==(o)
|
@@ -237,7 +291,9 @@ module Carbon
|
|
237
291
|
html_tags_to_skip == o.html_tags_to_skip &&
|
238
292
|
css_classes_to_skip == o.css_classes_to_skip &&
|
239
293
|
css_selectors_to_skip == o.css_selectors_to_skip &&
|
240
|
-
embedding_model == o.embedding_model
|
294
|
+
embedding_model == o.embedding_model &&
|
295
|
+
url_paths_to_include == o.url_paths_to_include &&
|
296
|
+
url_paths_to_exclude == o.url_paths_to_exclude
|
241
297
|
end
|
242
298
|
|
243
299
|
# @see the `==` method
|
@@ -249,7 +305,7 @@ module Carbon
|
|
249
305
|
# Calculates hash code according to all attributes.
|
250
306
|
# @return [Integer] Hash code
|
251
307
|
def hash
|
252
|
-
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
|
308
|
+
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude].hash
|
253
309
|
end
|
254
310
|
|
255
311
|
# Builds the object from hash
|
@@ -187,7 +187,7 @@ module Carbon
|
|
187
187
|
if attributes.key?(:'request_id')
|
188
188
|
self.request_id = attributes[:'request_id']
|
189
189
|
else
|
190
|
-
self.request_id = '
|
190
|
+
self.request_id = '6136b467-242e-49df-9478-d3e0cfdde299'
|
191
191
|
end
|
192
192
|
|
193
193
|
if attributes.key?(:'use_ocr')
|
@@ -182,7 +182,7 @@ module Carbon
|
|
182
182
|
if attributes.key?(:'request_id')
|
183
183
|
self.request_id = attributes[:'request_id']
|
184
184
|
else
|
185
|
-
self.request_id = '
|
185
|
+
self.request_id = '652297b9-0f55-46d8-869d-13a36e89e5da'
|
186
186
|
end
|
187
187
|
|
188
188
|
if attributes.key?(:'enable_file_picker')
|
@@ -39,6 +39,9 @@ module Carbon
|
|
39
39
|
|
40
40
|
attr_accessor :embedding_model
|
41
41
|
|
42
|
+
# URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
43
|
+
attr_accessor :url_paths_to_include
|
44
|
+
|
42
45
|
# Attribute mapping from ruby-style variable name to JSON key.
|
43
46
|
def self.attribute_map
|
44
47
|
{
|
@@ -55,7 +58,8 @@ module Carbon
|
|
55
58
|
:'html_tags_to_skip' => :'html_tags_to_skip',
|
56
59
|
:'css_classes_to_skip' => :'css_classes_to_skip',
|
57
60
|
:'css_selectors_to_skip' => :'css_selectors_to_skip',
|
58
|
-
:'embedding_model' => :'embedding_model'
|
61
|
+
:'embedding_model' => :'embedding_model',
|
62
|
+
:'url_paths_to_include' => :'url_paths_to_include'
|
59
63
|
}
|
60
64
|
end
|
61
65
|
|
@@ -80,7 +84,8 @@ module Carbon
|
|
80
84
|
:'html_tags_to_skip' => :'Array<String>',
|
81
85
|
:'css_classes_to_skip' => :'Array<String>',
|
82
86
|
:'css_selectors_to_skip' => :'Array<String>',
|
83
|
-
:'embedding_model' => :'EmbeddingGenerators'
|
87
|
+
:'embedding_model' => :'EmbeddingGenerators',
|
88
|
+
:'url_paths_to_include' => :'Array<String>'
|
84
89
|
}
|
85
90
|
end
|
86
91
|
|
@@ -99,6 +104,7 @@ module Carbon
|
|
99
104
|
:'html_tags_to_skip',
|
100
105
|
:'css_classes_to_skip',
|
101
106
|
:'css_selectors_to_skip',
|
107
|
+
:'url_paths_to_include'
|
102
108
|
])
|
103
109
|
end
|
104
110
|
|
@@ -198,6 +204,12 @@ module Carbon
|
|
198
204
|
else
|
199
205
|
self.embedding_model = 'OPENAI'
|
200
206
|
end
|
207
|
+
|
208
|
+
if attributes.key?(:'url_paths_to_include')
|
209
|
+
if (value = attributes[:'url_paths_to_include']).is_a?(Array)
|
210
|
+
self.url_paths_to_include = value
|
211
|
+
end
|
212
|
+
end
|
201
213
|
end
|
202
214
|
|
203
215
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -216,6 +228,10 @@ module Carbon
|
|
216
228
|
invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.')
|
217
229
|
end
|
218
230
|
|
231
|
+
if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
232
|
+
invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.')
|
233
|
+
end
|
234
|
+
|
219
235
|
invalid_properties
|
220
236
|
end
|
221
237
|
|
@@ -225,6 +241,7 @@ module Carbon
|
|
225
241
|
return false if @url.nil?
|
226
242
|
return false if !@recursion_depth.nil? && @recursion_depth < 0
|
227
243
|
return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1
|
244
|
+
return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10
|
228
245
|
true
|
229
246
|
end
|
230
247
|
|
@@ -248,6 +265,16 @@ module Carbon
|
|
248
265
|
@max_pages_to_scrape = max_pages_to_scrape
|
249
266
|
end
|
250
267
|
|
268
|
+
# Custom attribute writer method with validation
|
269
|
+
# @param [Object] url_paths_to_include Value to be assigned
|
270
|
+
def url_paths_to_include=(url_paths_to_include)
|
271
|
+
if !url_paths_to_include.nil? && url_paths_to_include.length > 10
|
272
|
+
fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.'
|
273
|
+
end
|
274
|
+
|
275
|
+
@url_paths_to_include = url_paths_to_include
|
276
|
+
end
|
277
|
+
|
251
278
|
# Checks equality by comparing each attribute.
|
252
279
|
# @param [Object] Object to be compared
|
253
280
|
def ==(o)
|
@@ -266,7 +293,8 @@ module Carbon
|
|
266
293
|
html_tags_to_skip == o.html_tags_to_skip &&
|
267
294
|
css_classes_to_skip == o.css_classes_to_skip &&
|
268
295
|
css_selectors_to_skip == o.css_selectors_to_skip &&
|
269
|
-
embedding_model == o.embedding_model
|
296
|
+
embedding_model == o.embedding_model &&
|
297
|
+
url_paths_to_include == o.url_paths_to_include
|
270
298
|
end
|
271
299
|
|
272
300
|
# @see the `==` method
|
@@ -278,7 +306,7 @@ module Carbon
|
|
278
306
|
# Calculates hash code according to all attributes.
|
279
307
|
# @return [Integer] Hash code
|
280
308
|
def hash
|
281
|
-
[tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model].hash
|
309
|
+
[tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include].hash
|
282
310
|
end
|
283
311
|
|
284
312
|
# Builds the object from hash
|
@@ -97,4 +97,16 @@ describe Carbon::SitemapScrapeRequest do
|
|
97
97
|
end
|
98
98
|
end
|
99
99
|
|
100
|
+
describe 'test attribute "url_paths_to_include"' do
|
101
|
+
it 'should work' do
|
102
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe 'test attribute "url_paths_to_exclude"' do
|
107
|
+
it 'should work' do
|
108
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
100
112
|
end
|
@@ -103,4 +103,10 @@ describe Carbon::WebscrapeRequest do
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
+
describe 'test attribute "url_paths_to_include"' do
|
107
|
+
it 'should work' do
|
108
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
106
112
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carbon_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Konfig
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|