carbon_ruby_sdk 0.2.26 → 0.2.28
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +10 -3
- data/lib/carbon_ruby_sdk/api/integrations_api.rb +4 -4
- data/lib/carbon_ruby_sdk/api/utilities_api.rb +6 -2
- data/lib/carbon_ruby_sdk/models/sitemap_scrape_request.rb +18 -5
- data/lib/carbon_ruby_sdk/models/webscrape_request.rb +18 -5
- data/lib/carbon_ruby_sdk/version.rb +1 -1
- data/spec/api/integrations_api_spec.rb +1 -1
- data/spec/models/sitemap_scrape_request_spec.rb +6 -0
- data/spec/models/webscrape_request_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 631a1a24ddb1e9ee7ae6cf1bfe231dabaa4a9236ddb585a1cc234f5ad7a6a738
|
4
|
+
data.tar.gz: 5515b1db4929dc0cfa12e99c1a7a216a969c84d59d70d0ccf410f371f797c0ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec3740b74b7cf918778880dcfdafc8054d8d012c297a18d1c9d127d086b55692d94142bd2a0b20da80616d7b717ae4798dc1b0871a5e9baebbb19bb548a8d7ce
|
7
|
+
data.tar.gz: be240b0e3bc839bf3c741a98a7ced2453601e690c512f7a8ea883f32a797841218e9205498fcb6a0e2dc60a661d6d9eb22401f7792dab5adc82a5fd3c9982874
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
carbon_ruby_sdk (0.2.
|
4
|
+
carbon_ruby_sdk (0.2.28)
|
5
5
|
faraday (>= 1.0.1, < 3.0)
|
6
6
|
faraday-multipart (~> 1.0, >= 1.0.4)
|
7
7
|
|
@@ -44,7 +44,7 @@ GEM
|
|
44
44
|
regexp_parser (2.9.2)
|
45
45
|
reline (0.5.9)
|
46
46
|
io-console (~> 0.5)
|
47
|
-
rexml (3.3.
|
47
|
+
rexml (3.3.6)
|
48
48
|
strscan
|
49
49
|
rspec (3.13.0)
|
50
50
|
rspec-core (~> 3.13.0)
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Connect external data to LLMs, no matter the source.
|
8
8
|
|
9
|
-
[![npm](https://img.shields.io/badge/gem-v0.2.
|
9
|
+
[![npm](https://img.shields.io/badge/gem-v0.2.28-blue)](https://rubygems.org/gems/carbon_ruby_sdk/versions/0.2.28)
|
10
10
|
|
11
11
|
</div>
|
12
12
|
|
@@ -93,7 +93,7 @@ Connect external data to LLMs, no matter the source.
|
|
93
93
|
Add to Gemfile:
|
94
94
|
|
95
95
|
```ruby
|
96
|
-
gem 'carbon_ruby_sdk', '~> 0.2.
|
96
|
+
gem 'carbon_ruby_sdk', '~> 0.2.28'
|
97
97
|
```
|
98
98
|
|
99
99
|
## Getting Started<a id="getting-started"></a>
|
@@ -1566,7 +1566,7 @@ the same permissions.</li>
|
|
1566
1566
|
</ol>
|
1567
1567
|
Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand.
|
1568
1568
|
For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces.
|
1569
|
-
Endpoint URL is required to connect Digital Ocean Spaces.
|
1569
|
+
Endpoint URL is required to connect Digital Ocean Spaces. It should look like <<region>>.digitaloceanspaces.com
|
1570
1570
|
|
1571
1571
|
#### 🛠️ Usage<a id="🛠️-usage"></a>
|
1572
1572
|
|
@@ -3116,6 +3116,7 @@ result = carbon.utilities.scrape_sitemap(
|
|
3116
3116
|
url_paths_to_include: [],
|
3117
3117
|
url_paths_to_exclude: [],
|
3118
3118
|
urls_to_scrape: [],
|
3119
|
+
download_css_and_media: false,
|
3119
3120
|
)
|
3120
3121
|
p result
|
3121
3122
|
```
|
@@ -3150,6 +3151,11 @@ You can submit a subset of URLs from the sitemap that should be scraped. To get
|
|
3150
3151
|
the list of URLs, you can check out /process_sitemap endpoint. If left empty,
|
3151
3152
|
all URLs from the sitemap will be scraped.
|
3152
3153
|
|
3154
|
+
##### download_css_and_media: `Boolean`<a id="download_css_and_media-boolean"></a>
|
3155
|
+
Whether the scraper should download css and media from the page (images, fonts,
|
3156
|
+
etc). Scrapes might take longer to finish with this flag enabled, but the
|
3157
|
+
success rate is improved.
|
3158
|
+
|
3153
3159
|
#### 🌐 Endpoint<a id="🌐-endpoint"></a>
|
3154
3160
|
|
3155
3161
|
`/scrape_sitemap` `POST`
|
@@ -3190,6 +3196,7 @@ result = carbon.utilities.scrape_web(
|
|
3190
3196
|
"css_selectors_to_skip" => [],
|
3191
3197
|
"embedding_model" => "OPENAI",
|
3192
3198
|
"url_paths_to_include" => [],
|
3199
|
+
"download_css_and_media" => false,
|
3193
3200
|
}
|
3194
3201
|
],
|
3195
3202
|
)
|
@@ -517,7 +517,7 @@ module Carbon
|
|
517
517
|
# </ol>
|
518
518
|
# Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand.
|
519
519
|
# For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces.
|
520
|
-
# Endpoint URL is required to connect Digital Ocean Spaces.
|
520
|
+
# Endpoint URL is required to connect Digital Ocean Spaces. It should look like <<region>>.digitaloceanspaces.com
|
521
521
|
#
|
522
522
|
# @param access_key [String]
|
523
523
|
# @param access_key_secret [String]
|
@@ -547,7 +547,7 @@ module Carbon
|
|
547
547
|
# </ol>
|
548
548
|
# Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand.
|
549
549
|
# For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces.
|
550
|
-
# Endpoint URL is required to connect Digital Ocean Spaces.
|
550
|
+
# Endpoint URL is required to connect Digital Ocean Spaces. It should look like <<region>>.digitaloceanspaces.com
|
551
551
|
#
|
552
552
|
# @param access_key [String]
|
553
553
|
# @param access_key_secret [String]
|
@@ -566,7 +566,7 @@ module Carbon
|
|
566
566
|
end
|
567
567
|
|
568
568
|
# S3 Auth
|
569
|
-
# This endpoint can be used to connect S3 as well as Digital Ocean Spaces (S3 compatible) For S3, create a new IAM user with permissions to: <ol> <li>List all buckets.</li> <li>Read from the specific buckets and objects to sync with Carbon. Ensure any future buckets or objects carry the same permissions.</li> </ol> Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand. For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces. Endpoint URL is required to connect Digital Ocean Spaces.
|
569
|
+
# This endpoint can be used to connect S3 as well as Digital Ocean Spaces (S3 compatible) For S3, create a new IAM user with permissions to: <ol> <li>List all buckets.</li> <li>Read from the specific buckets and objects to sync with Carbon. Ensure any future buckets or objects carry the same permissions.</li> </ol> Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand. For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces. Endpoint URL is required to connect Digital Ocean Spaces. It should look like <<region>>.digitaloceanspaces.com
|
570
570
|
# @param s3_auth_request [S3AuthRequest]
|
571
571
|
# @param [Hash] opts the optional parameters
|
572
572
|
# @return [OrganizationUserDataSourceAPI]
|
@@ -576,7 +576,7 @@ module Carbon
|
|
576
576
|
end
|
577
577
|
|
578
578
|
# S3 Auth
|
579
|
-
# This endpoint can be used to connect S3 as well as Digital Ocean Spaces (S3 compatible) For S3, create a new IAM user with permissions to: <ol> <li>List all buckets.</li> <li>Read from the specific buckets and objects to sync with Carbon. Ensure any future buckets or objects carry the same permissions.</li> </ol> Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand. For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces. Endpoint URL is required to connect Digital Ocean Spaces.
|
579
|
+
# This endpoint can be used to connect S3 as well as Digital Ocean Spaces (S3 compatible) For S3, create a new IAM user with permissions to: <ol> <li>List all buckets.</li> <li>Read from the specific buckets and objects to sync with Carbon. Ensure any future buckets or objects carry the same permissions.</li> </ol> Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand. For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces. Endpoint URL is required to connect Digital Ocean Spaces. It should look like <<region>>.digitaloceanspaces.com
|
580
580
|
# @param s3_auth_request [S3AuthRequest]
|
581
581
|
# @param [Hash] opts the optional parameters
|
582
582
|
# @return [APIResponse] data is OrganizationUserDataSourceAPI, status code, headers and response
|
@@ -437,9 +437,10 @@ module Carbon
|
|
437
437
|
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
438
438
|
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
439
439
|
# @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
|
440
|
+
# @param download_css_and_media [Boolean] Whether the scraper should download css and media from the page (images, fonts, etc). Scrapes might take longer to finish with this flag enabled, but the success rate is improved.
|
440
441
|
# @param body [SitemapScrapeRequest]
|
441
442
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
442
|
-
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
|
443
|
+
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, download_css_and_media: false, extra: {})
|
443
444
|
_body = {}
|
444
445
|
_body[:tags] = tags if tags != SENTINEL
|
445
446
|
_body[:url] = url if url != SENTINEL
|
@@ -457,6 +458,7 @@ module Carbon
|
|
457
458
|
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
458
459
|
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
459
460
|
_body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
|
461
|
+
_body[:download_css_and_media] = download_css_and_media if download_css_and_media != SENTINEL
|
460
462
|
sitemap_scrape_request = _body
|
461
463
|
api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
462
464
|
api_response.data
|
@@ -488,9 +490,10 @@ module Carbon
|
|
488
490
|
# @param url_paths_to_include [Array<String>] URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
489
491
|
# @param url_paths_to_exclude [Array<String>] URL subpaths or directories that you want to exclude. For example if you want to exclude URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
490
492
|
# @param urls_to_scrape [Array<String>] You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
|
493
|
+
# @param download_css_and_media [Boolean] Whether the scraper should download css and media from the page (images, fonts, etc). Scrapes might take longer to finish with this flag enabled, but the success rate is improved.
|
491
494
|
# @param body [SitemapScrapeRequest]
|
492
495
|
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
493
|
-
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, extra: {})
|
496
|
+
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', url_paths_to_include: SENTINEL, url_paths_to_exclude: SENTINEL, urls_to_scrape: SENTINEL, download_css_and_media: false, extra: {})
|
494
497
|
_body = {}
|
495
498
|
_body[:tags] = tags if tags != SENTINEL
|
496
499
|
_body[:url] = url if url != SENTINEL
|
@@ -508,6 +511,7 @@ module Carbon
|
|
508
511
|
_body[:url_paths_to_include] = url_paths_to_include if url_paths_to_include != SENTINEL
|
509
512
|
_body[:url_paths_to_exclude] = url_paths_to_exclude if url_paths_to_exclude != SENTINEL
|
510
513
|
_body[:urls_to_scrape] = urls_to_scrape if urls_to_scrape != SENTINEL
|
514
|
+
_body[:download_css_and_media] = download_css_and_media if download_css_and_media != SENTINEL
|
511
515
|
sitemap_scrape_request = _body
|
512
516
|
scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
513
517
|
end
|
@@ -46,6 +46,9 @@ module Carbon
|
|
46
46
|
# You can submit a subset of URLs from the sitemap that should be scraped. To get the list of URLs, you can check out /process_sitemap endpoint. If left empty, all URLs from the sitemap will be scraped.
|
47
47
|
attr_accessor :urls_to_scrape
|
48
48
|
|
49
|
+
# Whether the scraper should download css and media from the page (images, fonts, etc). Scrapes might take longer to finish with this flag enabled, but the success rate is improved.
|
50
|
+
attr_accessor :download_css_and_media
|
51
|
+
|
49
52
|
# Attribute mapping from ruby-style variable name to JSON key.
|
50
53
|
def self.attribute_map
|
51
54
|
{
|
@@ -64,7 +67,8 @@ module Carbon
|
|
64
67
|
:'embedding_model' => :'embedding_model',
|
65
68
|
:'url_paths_to_include' => :'url_paths_to_include',
|
66
69
|
:'url_paths_to_exclude' => :'url_paths_to_exclude',
|
67
|
-
:'urls_to_scrape' => :'urls_to_scrape'
|
70
|
+
:'urls_to_scrape' => :'urls_to_scrape',
|
71
|
+
:'download_css_and_media' => :'download_css_and_media'
|
68
72
|
}
|
69
73
|
end
|
70
74
|
|
@@ -91,7 +95,8 @@ module Carbon
|
|
91
95
|
:'embedding_model' => :'EmbeddingGenerators',
|
92
96
|
:'url_paths_to_include' => :'Array<String>',
|
93
97
|
:'url_paths_to_exclude' => :'Array<String>',
|
94
|
-
:'urls_to_scrape' => :'Array<String>'
|
98
|
+
:'urls_to_scrape' => :'Array<String>',
|
99
|
+
:'download_css_and_media' => :'Boolean'
|
95
100
|
}
|
96
101
|
end
|
97
102
|
|
@@ -111,7 +116,8 @@ module Carbon
|
|
111
116
|
:'css_selectors_to_skip',
|
112
117
|
:'url_paths_to_include',
|
113
118
|
:'url_paths_to_exclude',
|
114
|
-
:'urls_to_scrape'
|
119
|
+
:'urls_to_scrape',
|
120
|
+
:'download_css_and_media'
|
115
121
|
])
|
116
122
|
end
|
117
123
|
|
@@ -221,6 +227,12 @@ module Carbon
|
|
221
227
|
self.urls_to_scrape = value
|
222
228
|
end
|
223
229
|
end
|
230
|
+
|
231
|
+
if attributes.key?(:'download_css_and_media')
|
232
|
+
self.download_css_and_media = attributes[:'download_css_and_media']
|
233
|
+
else
|
234
|
+
self.download_css_and_media = false
|
235
|
+
end
|
224
236
|
end
|
225
237
|
|
226
238
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -306,7 +318,8 @@ module Carbon
|
|
306
318
|
embedding_model == o.embedding_model &&
|
307
319
|
url_paths_to_include == o.url_paths_to_include &&
|
308
320
|
url_paths_to_exclude == o.url_paths_to_exclude &&
|
309
|
-
urls_to_scrape == o.urls_to_scrape
|
321
|
+
urls_to_scrape == o.urls_to_scrape &&
|
322
|
+
download_css_and_media == o.download_css_and_media
|
310
323
|
end
|
311
324
|
|
312
325
|
# @see the `==` method
|
@@ -318,7 +331,7 @@ module Carbon
|
|
318
331
|
# Calculates hash code according to all attributes.
|
319
332
|
# @return [Integer] Hash code
|
320
333
|
def hash
|
321
|
-
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude, urls_to_scrape].hash
|
334
|
+
[tags, url, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, url_paths_to_exclude, urls_to_scrape, download_css_and_media].hash
|
322
335
|
end
|
323
336
|
|
324
337
|
# Builds the object from hash
|
@@ -42,6 +42,9 @@ module Carbon
|
|
42
42
|
# URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input
|
43
43
|
attr_accessor :url_paths_to_include
|
44
44
|
|
45
|
+
# Whether the scraper should download css and media from the page (images, fonts, etc). Scrapes might take longer to finish with this flag enabled, but the success rate is improved.
|
46
|
+
attr_accessor :download_css_and_media
|
47
|
+
|
45
48
|
# Attribute mapping from ruby-style variable name to JSON key.
|
46
49
|
def self.attribute_map
|
47
50
|
{
|
@@ -59,7 +62,8 @@ module Carbon
|
|
59
62
|
:'css_classes_to_skip' => :'css_classes_to_skip',
|
60
63
|
:'css_selectors_to_skip' => :'css_selectors_to_skip',
|
61
64
|
:'embedding_model' => :'embedding_model',
|
62
|
-
:'url_paths_to_include' => :'url_paths_to_include'
|
65
|
+
:'url_paths_to_include' => :'url_paths_to_include',
|
66
|
+
:'download_css_and_media' => :'download_css_and_media'
|
63
67
|
}
|
64
68
|
end
|
65
69
|
|
@@ -85,7 +89,8 @@ module Carbon
|
|
85
89
|
:'css_classes_to_skip' => :'Array<String>',
|
86
90
|
:'css_selectors_to_skip' => :'Array<String>',
|
87
91
|
:'embedding_model' => :'EmbeddingGenerators',
|
88
|
-
:'url_paths_to_include' => :'Array<String>'
|
92
|
+
:'url_paths_to_include' => :'Array<String>',
|
93
|
+
:'download_css_and_media' => :'Boolean'
|
89
94
|
}
|
90
95
|
end
|
91
96
|
|
@@ -104,7 +109,8 @@ module Carbon
|
|
104
109
|
:'html_tags_to_skip',
|
105
110
|
:'css_classes_to_skip',
|
106
111
|
:'css_selectors_to_skip',
|
107
|
-
:'url_paths_to_include'
|
112
|
+
:'url_paths_to_include',
|
113
|
+
:'download_css_and_media'
|
108
114
|
])
|
109
115
|
end
|
110
116
|
|
@@ -210,6 +216,12 @@ module Carbon
|
|
210
216
|
self.url_paths_to_include = value
|
211
217
|
end
|
212
218
|
end
|
219
|
+
|
220
|
+
if attributes.key?(:'download_css_and_media')
|
221
|
+
self.download_css_and_media = attributes[:'download_css_and_media']
|
222
|
+
else
|
223
|
+
self.download_css_and_media = false
|
224
|
+
end
|
213
225
|
end
|
214
226
|
|
215
227
|
# Show invalid properties with the reasons. Usually used together with valid?
|
@@ -294,7 +306,8 @@ module Carbon
|
|
294
306
|
css_classes_to_skip == o.css_classes_to_skip &&
|
295
307
|
css_selectors_to_skip == o.css_selectors_to_skip &&
|
296
308
|
embedding_model == o.embedding_model &&
|
297
|
-
url_paths_to_include == o.url_paths_to_include
|
309
|
+
url_paths_to_include == o.url_paths_to_include &&
|
310
|
+
download_css_and_media == o.download_css_and_media
|
298
311
|
end
|
299
312
|
|
300
313
|
# @see the `==` method
|
@@ -306,7 +319,7 @@ module Carbon
|
|
306
319
|
# Calculates hash code according to all attributes.
|
307
320
|
# @return [Integer] Hash code
|
308
321
|
def hash
|
309
|
-
[tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include].hash
|
322
|
+
[tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, download_css_and_media].hash
|
310
323
|
end
|
311
324
|
|
312
325
|
# Builds the object from hash
|
@@ -74,7 +74,7 @@ describe 'IntegrationsApi' do
|
|
74
74
|
|
75
75
|
# unit tests for create_aws_iam_user
|
76
76
|
# S3 Auth
|
77
|
-
# This endpoint can be used to connect S3 as well as Digital Ocean Spaces (S3 compatible) For S3, create a new IAM user with permissions to: <ol> <li>List all buckets.</li> <li>Read from the specific buckets and objects to sync with Carbon. Ensure any future buckets or objects carry the same permissions.</li> </ol> Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand. For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces. Endpoint URL is required to connect Digital Ocean Spaces.
|
77
|
+
# This endpoint can be used to connect S3 as well as Digital Ocean Spaces (S3 compatible) For S3, create a new IAM user with permissions to: <ol> <li>List all buckets.</li> <li>Read from the specific buckets and objects to sync with Carbon. Ensure any future buckets or objects carry the same permissions.</li> </ol> Once created, generate an access key for this user and share the credentials with us. We recommend testing this key beforehand. For Digital Ocean Spaces, generate the above credentials in your Applications and API page here https://cloud.digitalocean.com/account/api/spaces. Endpoint URL is required to connect Digital Ocean Spaces. It should look like <<region>>.digitaloceanspaces.com
|
78
78
|
# @param s3_auth_request
|
79
79
|
# @param [Hash] opts the optional parameters
|
80
80
|
# @return [OrganizationUserDataSourceAPI]
|
@@ -115,4 +115,10 @@ describe Carbon::SitemapScrapeRequest do
|
|
115
115
|
end
|
116
116
|
end
|
117
117
|
|
118
|
+
describe 'test attribute "download_css_and_media"' do
|
119
|
+
it 'should work' do
|
120
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
118
124
|
end
|
@@ -109,4 +109,10 @@ describe Carbon::WebscrapeRequest do
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
+
describe 'test attribute "download_css_and_media"' do
|
113
|
+
it 'should work' do
|
114
|
+
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
112
118
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carbon_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.28
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Konfig
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-08-
|
11
|
+
date: 2024-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|