carbon_ruby_sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +89 -0
- data/README.md +2325 -0
- data/Rakefile +10 -0
- data/carbon_ruby_sdk.gemspec +39 -0
- data/header.png +0 -0
- data/lib/carbon_ruby_sdk/api/auth_api.rb +177 -0
- data/lib/carbon_ruby_sdk/api/data_sources_api.rb +217 -0
- data/lib/carbon_ruby_sdk/api/embeddings_api.rb +565 -0
- data/lib/carbon_ruby_sdk/api/files_api.rb +1526 -0
- data/lib/carbon_ruby_sdk/api/health_api.rb +92 -0
- data/lib/carbon_ruby_sdk/api/integrations_api.rb +2201 -0
- data/lib/carbon_ruby_sdk/api/organizations_api.rb +92 -0
- data/lib/carbon_ruby_sdk/api/users_api.rb +209 -0
- data/lib/carbon_ruby_sdk/api/utilities_api.rb +710 -0
- data/lib/carbon_ruby_sdk/api/webhooks_api.rb +296 -0
- data/lib/carbon_ruby_sdk/api_client.rb +436 -0
- data/lib/carbon_ruby_sdk/api_client_custom.rb +14 -0
- data/lib/carbon_ruby_sdk/api_error.rb +53 -0
- data/lib/carbon_ruby_sdk/configuration.rb +354 -0
- data/lib/carbon_ruby_sdk/models/add_webhook_props.rb +220 -0
- data/lib/carbon_ruby_sdk/models/body_create_upload_file_uploadfile_post.rb +220 -0
- data/lib/carbon_ruby_sdk/models/chunk_properties.rb +248 -0
- data/lib/carbon_ruby_sdk/models/chunk_properties_nullable.rb +248 -0
- data/lib/carbon_ruby_sdk/models/chunks_and_embeddings.rb +246 -0
- data/lib/carbon_ruby_sdk/models/chunks_and_embeddings_upload_input.rb +247 -0
- data/lib/carbon_ruby_sdk/models/data_source_last_sync_actions.rb +38 -0
- data/lib/carbon_ruby_sdk/models/data_source_sync_statuses.rb +38 -0
- data/lib/carbon_ruby_sdk/models/data_source_type.rb +76 -0
- data/lib/carbon_ruby_sdk/models/data_source_type_nullable.rb +76 -0
- data/lib/carbon_ruby_sdk/models/delete_files_query_input.rb +252 -0
- data/lib/carbon_ruby_sdk/models/directory_item.rb +266 -0
- data/lib/carbon_ruby_sdk/models/document_response.rb +328 -0
- data/lib/carbon_ruby_sdk/models/document_response_list.rb +222 -0
- data/lib/carbon_ruby_sdk/models/embedding_and_chunk.rb +256 -0
- data/lib/carbon_ruby_sdk/models/embedding_generators.rb +43 -0
- data/lib/carbon_ruby_sdk/models/embedding_generators_nullable.rb +43 -0
- data/lib/carbon_ruby_sdk/models/embedding_properties.rb +226 -0
- data/lib/carbon_ruby_sdk/models/embeddings_and_chunks_filters.rb +232 -0
- data/lib/carbon_ruby_sdk/models/embeddings_and_chunks_order_by_columns.rb +37 -0
- data/lib/carbon_ruby_sdk/models/embeddings_and_chunks_query_input.rb +262 -0
- data/lib/carbon_ruby_sdk/models/embeddings_and_chunks_response.rb +236 -0
- data/lib/carbon_ruby_sdk/models/external_file_sync_statuses.rb +43 -0
- data/lib/carbon_ruby_sdk/models/external_source_item.rb +420 -0
- data/lib/carbon_ruby_sdk/models/fetch_urls_response.rb +236 -0
- data/lib/carbon_ruby_sdk/models/file_content_types.rb +36 -0
- data/lib/carbon_ruby_sdk/models/file_content_types_nullable.rb +36 -0
- data/lib/carbon_ruby_sdk/models/file_formats.rb +76 -0
- data/lib/carbon_ruby_sdk/models/file_formats_nullable.rb +76 -0
- data/lib/carbon_ruby_sdk/models/file_statistics.rb +256 -0
- data/lib/carbon_ruby_sdk/models/file_statistics_nullable.rb +256 -0
- data/lib/carbon_ruby_sdk/models/fresh_desk_connect_request.rb +316 -0
- data/lib/carbon_ruby_sdk/models/generic_success_response.rb +220 -0
- data/lib/carbon_ruby_sdk/models/get_embedding_documents_body.rb +416 -0
- data/lib/carbon_ruby_sdk/models/gitbook_connect_request.rb +234 -0
- data/lib/carbon_ruby_sdk/models/gitbook_sync_request.rb +336 -0
- data/lib/carbon_ruby_sdk/models/gmail_sync_input.rb +301 -0
- data/lib/carbon_ruby_sdk/models/http_validation_error.rb +217 -0
- data/lib/carbon_ruby_sdk/models/hybrid_search_tuning_params.rb +234 -0
- data/lib/carbon_ruby_sdk/models/hybrid_search_tuning_params_nullable.rb +235 -0
- data/lib/carbon_ruby_sdk/models/list_data_source_items_request.rb +239 -0
- data/lib/carbon_ruby_sdk/models/list_data_source_items_response.rb +236 -0
- data/lib/carbon_ruby_sdk/models/list_request.rb +230 -0
- data/lib/carbon_ruby_sdk/models/list_response.rb +222 -0
- data/lib/carbon_ruby_sdk/models/location_property_inner.rb +226 -0
- data/lib/carbon_ruby_sdk/models/modify_user_configuration_input.rb +234 -0
- data/lib/carbon_ruby_sdk/models/o_auth_url_request.rb +362 -0
- data/lib/carbon_ruby_sdk/models/order_dir.rb +36 -0
- data/lib/carbon_ruby_sdk/models/organization_response.rb +362 -0
- data/lib/carbon_ruby_sdk/models/organization_user_data_source_api.rb +380 -0
- data/lib/carbon_ruby_sdk/models/organization_user_data_source_filters.rb +238 -0
- data/lib/carbon_ruby_sdk/models/organization_user_data_source_order_by_columns.rb +36 -0
- data/lib/carbon_ruby_sdk/models/organization_user_data_source_query_input.rb +246 -0
- data/lib/carbon_ruby_sdk/models/organization_user_data_source_response.rb +236 -0
- data/lib/carbon_ruby_sdk/models/organization_user_file_tag_create.rb +236 -0
- data/lib/carbon_ruby_sdk/models/organization_user_file_tags_remove.rb +236 -0
- data/lib/carbon_ruby_sdk/models/organization_user_files_to_sync_filters.rb +330 -0
- data/lib/carbon_ruby_sdk/models/organization_user_files_to_sync_order_by_types.rb +39 -0
- data/lib/carbon_ruby_sdk/models/organization_user_files_to_sync_query_input.rb +276 -0
- data/lib/carbon_ruby_sdk/models/outlook_sync_input.rb +313 -0
- data/lib/carbon_ruby_sdk/models/pagination.rb +228 -0
- data/lib/carbon_ruby_sdk/models/presigned_url_response.rb +220 -0
- data/lib/carbon_ruby_sdk/models/rank_property.rb +226 -0
- data/lib/carbon_ruby_sdk/models/raw_text_input.rb +295 -0
- data/lib/carbon_ruby_sdk/models/raw_transcript_property_inner_value.rb +226 -0
- data/lib/carbon_ruby_sdk/models/resync_file_query_input.rb +240 -0
- data/lib/carbon_ruby_sdk/models/revoke_access_token_input.rb +220 -0
- data/lib/carbon_ruby_sdk/models/rss_feed_input.rb +301 -0
- data/lib/carbon_ruby_sdk/models/s3_auth_request.rb +234 -0
- data/lib/carbon_ruby_sdk/models/s3_file_sync_input.rb +313 -0
- data/lib/carbon_ruby_sdk/models/s3_get_file_input.rb +226 -0
- data/lib/carbon_ruby_sdk/models/single_chunks_and_embeddings_upload_input.rb +256 -0
- data/lib/carbon_ruby_sdk/models/sitemap_scrape_request.rb +361 -0
- data/lib/carbon_ruby_sdk/models/source_property.rb +226 -0
- data/lib/carbon_ruby_sdk/models/sync_directory_request.rb +220 -0
- data/lib/carbon_ruby_sdk/models/sync_files_request.rb +328 -0
- data/lib/carbon_ruby_sdk/models/tags.rb +232 -0
- data/lib/carbon_ruby_sdk/models/tags1.rb +230 -0
- data/lib/carbon_ruby_sdk/models/text_embedding_generators.rb +42 -0
- data/lib/carbon_ruby_sdk/models/token_response.rb +234 -0
- data/lib/carbon_ruby_sdk/models/upload_file_from_url_input.rb +326 -0
- data/lib/carbon_ruby_sdk/models/user_file.rb +524 -0
- data/lib/carbon_ruby_sdk/models/user_files_v2.rb +236 -0
- data/lib/carbon_ruby_sdk/models/user_request_content.rb +220 -0
- data/lib/carbon_ruby_sdk/models/user_response.rb +354 -0
- data/lib/carbon_ruby_sdk/models/validation_error.rb +250 -0
- data/lib/carbon_ruby_sdk/models/webhook.rb +290 -0
- data/lib/carbon_ruby_sdk/models/webhook_filters.rb +218 -0
- data/lib/carbon_ruby_sdk/models/webhook_no_key.rb +276 -0
- data/lib/carbon_ruby_sdk/models/webhook_order_by_columns.rb +36 -0
- data/lib/carbon_ruby_sdk/models/webhook_query_input.rb +246 -0
- data/lib/carbon_ruby_sdk/models/webhook_query_response.rb +236 -0
- data/lib/carbon_ruby_sdk/models/webscrape_request.rb +375 -0
- data/lib/carbon_ruby_sdk/models/white_labeling_response.rb +234 -0
- data/lib/carbon_ruby_sdk/models/youtube_transcript_response.rb +252 -0
- data/lib/carbon_ruby_sdk/version.rb +11 -0
- data/lib/carbon_ruby_sdk.rb +207 -0
- data/spec/api/auth_api_spec.rb +50 -0
- data/spec/api/data_sources_api_spec.rb +51 -0
- data/spec/api/embeddings_api_spec.rb +63 -0
- data/spec/api/files_api_spec.rb +178 -0
- data/spec/api/health_api_spec.rb +39 -0
- data/spec/api/integrations_api_spec.rb +226 -0
- data/spec/api/organizations_api_spec.rb +39 -0
- data/spec/api/users_api_spec.rb +51 -0
- data/spec/api/utilities_api_spec.rb +102 -0
- data/spec/api/webhooks_api_spec.rb +62 -0
- data/spec/api_client_spec.rb +191 -0
- data/spec/configuration_spec.rb +38 -0
- data/spec/getting_started_spec.rb +196 -0
- data/spec/models/add_webhook_props_spec.rb +28 -0
- data/spec/models/body_create_upload_file_uploadfile_post_spec.rb +28 -0
- data/spec/models/chunk_properties_nullable_spec.rb +40 -0
- data/spec/models/chunk_properties_spec.rb +40 -0
- data/spec/models/chunks_and_embeddings_spec.rb +40 -0
- data/spec/models/chunks_and_embeddings_upload_input_spec.rb +40 -0
- data/spec/models/data_source_last_sync_actions_spec.rb +22 -0
- data/spec/models/data_source_sync_statuses_spec.rb +22 -0
- data/spec/models/data_source_type_nullable_spec.rb +22 -0
- data/spec/models/data_source_type_spec.rb +22 -0
- data/spec/models/delete_files_query_input_spec.rb +46 -0
- data/spec/models/directory_item_spec.rb +46 -0
- data/spec/models/document_response_list_spec.rb +28 -0
- data/spec/models/document_response_spec.rb +88 -0
- data/spec/models/embedding_and_chunk_spec.rb +46 -0
- data/spec/models/embedding_generators_nullable_spec.rb +22 -0
- data/spec/models/embedding_generators_spec.rb +22 -0
- data/spec/models/embedding_properties_spec.rb +34 -0
- data/spec/models/embeddings_and_chunks_filters_spec.rb +34 -0
- data/spec/models/embeddings_and_chunks_order_by_columns_spec.rb +22 -0
- data/spec/models/embeddings_and_chunks_query_input_spec.rb +52 -0
- data/spec/models/embeddings_and_chunks_response_spec.rb +34 -0
- data/spec/models/external_file_sync_statuses_spec.rb +22 -0
- data/spec/models/external_source_item_spec.rb +124 -0
- data/spec/models/fetch_urls_response_spec.rb +34 -0
- data/spec/models/file_content_types_nullable_spec.rb +22 -0
- data/spec/models/file_content_types_spec.rb +22 -0
- data/spec/models/file_formats_nullable_spec.rb +22 -0
- data/spec/models/file_formats_spec.rb +22 -0
- data/spec/models/file_statistics_nullable_spec.rb +52 -0
- data/spec/models/file_statistics_spec.rb +52 -0
- data/spec/models/fresh_desk_connect_request_spec.rb +76 -0
- data/spec/models/generic_success_response_spec.rb +28 -0
- data/spec/models/get_embedding_documents_body_spec.rb +106 -0
- data/spec/models/gitbook_connect_request_spec.rb +34 -0
- data/spec/models/gitbook_sync_request_spec.rb +76 -0
- data/spec/models/gmail_sync_input_spec.rb +70 -0
- data/spec/models/http_validation_error_spec.rb +28 -0
- data/spec/models/hybrid_search_tuning_params_nullable_spec.rb +34 -0
- data/spec/models/hybrid_search_tuning_params_spec.rb +34 -0
- data/spec/models/list_data_source_items_request_spec.rb +40 -0
- data/spec/models/list_data_source_items_response_spec.rb +34 -0
- data/spec/models/list_request_spec.rb +34 -0
- data/spec/models/list_response_spec.rb +28 -0
- data/spec/models/location_property_inner_spec.rb +22 -0
- data/spec/models/modify_user_configuration_input_spec.rb +34 -0
- data/spec/models/o_auth_url_request_spec.rb +106 -0
- data/spec/models/order_dir_spec.rb +22 -0
- data/spec/models/organization_response_spec.rb +94 -0
- data/spec/models/organization_user_data_source_api_spec.rb +100 -0
- data/spec/models/organization_user_data_source_filters_spec.rb +40 -0
- data/spec/models/organization_user_data_source_order_by_columns_spec.rb +22 -0
- data/spec/models/organization_user_data_source_query_input_spec.rb +46 -0
- data/spec/models/organization_user_data_source_response_spec.rb +34 -0
- data/spec/models/organization_user_file_tag_create_spec.rb +34 -0
- data/spec/models/organization_user_file_tags_remove_spec.rb +34 -0
- data/spec/models/organization_user_files_to_sync_filters_spec.rb +88 -0
- data/spec/models/organization_user_files_to_sync_order_by_types_spec.rb +22 -0
- data/spec/models/organization_user_files_to_sync_query_input_spec.rb +64 -0
- data/spec/models/outlook_sync_input_spec.rb +76 -0
- data/spec/models/pagination_spec.rb +34 -0
- data/spec/models/presigned_url_response_spec.rb +28 -0
- data/spec/models/rank_property_spec.rb +22 -0
- data/spec/models/raw_text_input_spec.rb +70 -0
- data/spec/models/raw_transcript_property_inner_value_spec.rb +22 -0
- data/spec/models/resync_file_query_input_spec.rb +40 -0
- data/spec/models/revoke_access_token_input_spec.rb +28 -0
- data/spec/models/rss_feed_input_spec.rb +70 -0
- data/spec/models/s3_auth_request_spec.rb +34 -0
- data/spec/models/s3_file_sync_input_spec.rb +76 -0
- data/spec/models/s3_get_file_input_spec.rb +34 -0
- data/spec/models/single_chunks_and_embeddings_upload_input_spec.rb +46 -0
- data/spec/models/sitemap_scrape_request_spec.rb +100 -0
- data/spec/models/source_property_spec.rb +22 -0
- data/spec/models/sync_directory_request_spec.rb +28 -0
- data/spec/models/sync_files_request_spec.rb +82 -0
- data/spec/models/tags1_spec.rb +22 -0
- data/spec/models/tags_spec.rb +22 -0
- data/spec/models/text_embedding_generators_spec.rb +22 -0
- data/spec/models/token_response_spec.rb +34 -0
- data/spec/models/upload_file_from_url_input_spec.rb +88 -0
- data/spec/models/user_file_spec.rb +190 -0
- data/spec/models/user_files_v2_spec.rb +34 -0
- data/spec/models/user_request_content_spec.rb +28 -0
- data/spec/models/user_response_spec.rb +88 -0
- data/spec/models/validation_error_spec.rb +40 -0
- data/spec/models/webhook_filters_spec.rb +28 -0
- data/spec/models/webhook_no_key_spec.rb +52 -0
- data/spec/models/webhook_order_by_columns_spec.rb +22 -0
- data/spec/models/webhook_query_input_spec.rb +46 -0
- data/spec/models/webhook_query_response_spec.rb +34 -0
- data/spec/models/webhook_spec.rb +58 -0
- data/spec/models/webscrape_request_spec.rb +106 -0
- data/spec/models/white_labeling_response_spec.rb +34 -0
- data/spec/models/youtube_transcript_response_spec.rb +46 -0
- data/spec/spec_helper.rb +107 -0
- metadata +440 -0
|
@@ -0,0 +1,710 @@
|
|
|
1
|
+
=begin
|
|
2
|
+
#Carbon
|
|
3
|
+
|
|
4
|
+
#Connect external data to LLMs, no matter the source.
|
|
5
|
+
|
|
6
|
+
The version of the OpenAPI document: 1.0.0
|
|
7
|
+
=end
|
|
8
|
+
|
|
9
|
+
require 'cgi'
|
|
10
|
+
|
|
11
|
+
module Carbon
|
|
12
|
+
class UtilitiesApi
|
|
13
|
+
attr_accessor :api_client
|
|
14
|
+
|
|
15
|
+
def initialize(api_client = ApiClient.default)
|
|
16
|
+
@api_client = api_client
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Fetch Urls
|
|
20
|
+
#
|
|
21
|
+
# Extracts all URLs from a webpage.
|
|
22
|
+
#
|
|
23
|
+
# Args:
|
|
24
|
+
# url (str): URL of the webpage
|
|
25
|
+
#
|
|
26
|
+
# Returns:
|
|
27
|
+
# FetchURLsResponse: A response object with a list of URLs extracted from the webpage and the webpage content.
|
|
28
|
+
#
|
|
29
|
+
# @param url [String]
|
|
30
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
31
|
+
def fetch_urls(url:, extra: {})
|
|
32
|
+
api_response = fetch_urls_with_http_info_impl(url, extra)
|
|
33
|
+
api_response.data
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Fetch Urls
|
|
37
|
+
#
|
|
38
|
+
# Extracts all URLs from a webpage.
|
|
39
|
+
#
|
|
40
|
+
# Args:
|
|
41
|
+
# url (str): URL of the webpage
|
|
42
|
+
#
|
|
43
|
+
# Returns:
|
|
44
|
+
# FetchURLsResponse: A response object with a list of URLs extracted from the webpage and the webpage content.
|
|
45
|
+
#
|
|
46
|
+
# @param url [String]
|
|
47
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
48
|
+
def fetch_urls_with_http_info(url:, extra: {})
|
|
49
|
+
fetch_urls_with_http_info_impl(url, extra)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Fetch Urls
|
|
53
|
+
# Extracts all URLs from a webpage. Args: url (str): URL of the webpage Returns: FetchURLsResponse: A response object with a list of URLs extracted from the webpage and the webpage content.
|
|
54
|
+
# @param url [String]
|
|
55
|
+
# @param [Hash] opts the optional parameters
|
|
56
|
+
# @return [FetchURLsResponse]
|
|
57
|
+
private def fetch_urls_impl(url, opts = {})
|
|
58
|
+
data, _status_code, _headers = fetch_urls_with_http_info(url, opts)
|
|
59
|
+
data
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Fetch Urls
|
|
63
|
+
# Extracts all URLs from a webpage. Args: url (str): URL of the webpage Returns: FetchURLsResponse: A response object with a list of URLs extracted from the webpage and the webpage content.
|
|
64
|
+
# @param url [String]
|
|
65
|
+
# @param [Hash] opts the optional parameters
|
|
66
|
+
# @return [APIResponse] data is FetchURLsResponse, status code, headers and response
|
|
67
|
+
private def fetch_urls_with_http_info_impl(url, opts = {})
|
|
68
|
+
if @api_client.config.debugging
|
|
69
|
+
@api_client.config.logger.debug 'Calling API: UtilitiesApi.fetch_urls ...'
|
|
70
|
+
end
|
|
71
|
+
# verify the required parameter 'url' is set
|
|
72
|
+
if @api_client.config.client_side_validation && url.nil?
|
|
73
|
+
fail ArgumentError, "Missing the required parameter 'url' when calling UtilitiesApi.fetch_urls"
|
|
74
|
+
end
|
|
75
|
+
# resource path
|
|
76
|
+
local_var_path = '/fetch_urls'
|
|
77
|
+
|
|
78
|
+
# query parameters
|
|
79
|
+
query_params = opts[:query_params] || {}
|
|
80
|
+
query_params[:'url'] = url
|
|
81
|
+
|
|
82
|
+
# header parameters
|
|
83
|
+
header_params = opts[:header_params] || {}
|
|
84
|
+
# HTTP header 'Accept' (if needed)
|
|
85
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json'])
|
|
86
|
+
|
|
87
|
+
# form parameters
|
|
88
|
+
form_params = opts[:form_params] || {}
|
|
89
|
+
|
|
90
|
+
# http body (model)
|
|
91
|
+
post_body = opts[:debug_body]
|
|
92
|
+
|
|
93
|
+
# return_type
|
|
94
|
+
return_type = opts[:debug_return_type] || 'FetchURLsResponse'
|
|
95
|
+
|
|
96
|
+
# auth_names
|
|
97
|
+
auth_names = opts[:debug_auth_names] || ['accessToken', 'apiKey', 'customerId']
|
|
98
|
+
|
|
99
|
+
new_options = opts.merge(
|
|
100
|
+
:operation => :"UtilitiesApi.fetch_urls",
|
|
101
|
+
:header_params => header_params,
|
|
102
|
+
:query_params => query_params,
|
|
103
|
+
:form_params => form_params,
|
|
104
|
+
:body => post_body,
|
|
105
|
+
:auth_names => auth_names,
|
|
106
|
+
:return_type => return_type
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
data, status_code, headers, response = @api_client.call_api(:GET, local_var_path, new_options)
|
|
110
|
+
if @api_client.config.debugging
|
|
111
|
+
@api_client.config.logger.debug "API called: UtilitiesApi#fetch_urls\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
|
112
|
+
end
|
|
113
|
+
APIResponse::new(data, status_code, headers, response)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# Fetch Youtube Transcripts
|
|
118
|
+
#
|
|
119
|
+
# Fetches english transcripts from YouTube videos.
|
|
120
|
+
#
|
|
121
|
+
# Args:
|
|
122
|
+
# id (str): The ID of the YouTube video.
|
|
123
|
+
# raw (bool): Whether to return the raw transcript or not. Defaults to False.
|
|
124
|
+
#
|
|
125
|
+
# Returns:
|
|
126
|
+
# dict: A dictionary with the transcript of the YouTube video.
|
|
127
|
+
#
|
|
128
|
+
# @param id [String]
|
|
129
|
+
# @param raw [Boolean]
|
|
130
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
131
|
+
def fetch_youtube_transcripts(id:, raw: false, extra: {})
|
|
132
|
+
extra[:raw] = raw if raw != SENTINEL
|
|
133
|
+
api_response = fetch_youtube_transcripts_with_http_info_impl(id, extra)
|
|
134
|
+
api_response.data
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Fetch Youtube Transcripts
|
|
138
|
+
#
|
|
139
|
+
# Fetches english transcripts from YouTube videos.
|
|
140
|
+
#
|
|
141
|
+
# Args:
|
|
142
|
+
# id (str): The ID of the YouTube video.
|
|
143
|
+
# raw (bool): Whether to return the raw transcript or not. Defaults to False.
|
|
144
|
+
#
|
|
145
|
+
# Returns:
|
|
146
|
+
# dict: A dictionary with the transcript of the YouTube video.
|
|
147
|
+
#
|
|
148
|
+
# @param id [String]
|
|
149
|
+
# @param raw [Boolean]
|
|
150
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
151
|
+
def fetch_youtube_transcripts_with_http_info(id:, raw: false, extra: {})
|
|
152
|
+
extra[:raw] = raw if raw != SENTINEL
|
|
153
|
+
fetch_youtube_transcripts_with_http_info_impl(id, extra)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Fetch Youtube Transcripts
|
|
157
|
+
# Fetches english transcripts from YouTube videos. Args: id (str): The ID of the YouTube video. raw (bool): Whether to return the raw transcript or not. Defaults to False. Returns: dict: A dictionary with the transcript of the YouTube video.
|
|
158
|
+
# @param id [String]
|
|
159
|
+
# @param [Hash] opts the optional parameters
|
|
160
|
+
# @option opts [Boolean] :raw (default to false)
|
|
161
|
+
# @return [YoutubeTranscriptResponse]
|
|
162
|
+
private def fetch_youtube_transcripts_impl(id, opts = {})
|
|
163
|
+
data, _status_code, _headers = fetch_youtube_transcripts_with_http_info(id, opts)
|
|
164
|
+
data
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Fetch Youtube Transcripts
|
|
168
|
+
# Fetches english transcripts from YouTube videos. Args: id (str): The ID of the YouTube video. raw (bool): Whether to return the raw transcript or not. Defaults to False. Returns: dict: A dictionary with the transcript of the YouTube video.
|
|
169
|
+
# @param id [String]
|
|
170
|
+
# @param [Hash] opts the optional parameters
|
|
171
|
+
# @option opts [Boolean] :raw (default to false)
|
|
172
|
+
# @return [APIResponse] data is YoutubeTranscriptResponse, status code, headers and response
|
|
173
|
+
private def fetch_youtube_transcripts_with_http_info_impl(id, opts = {})
|
|
174
|
+
if @api_client.config.debugging
|
|
175
|
+
@api_client.config.logger.debug 'Calling API: UtilitiesApi.fetch_youtube_transcripts ...'
|
|
176
|
+
end
|
|
177
|
+
# verify the required parameter 'id' is set
|
|
178
|
+
if @api_client.config.client_side_validation && id.nil?
|
|
179
|
+
fail ArgumentError, "Missing the required parameter 'id' when calling UtilitiesApi.fetch_youtube_transcripts"
|
|
180
|
+
end
|
|
181
|
+
# resource path
|
|
182
|
+
local_var_path = '/fetch_youtube_transcript'
|
|
183
|
+
|
|
184
|
+
# query parameters
|
|
185
|
+
query_params = opts[:query_params] || {}
|
|
186
|
+
query_params[:'id'] = id
|
|
187
|
+
query_params[:'raw'] = opts[:'raw'] if !opts[:'raw'].nil?
|
|
188
|
+
|
|
189
|
+
# header parameters
|
|
190
|
+
header_params = opts[:header_params] || {}
|
|
191
|
+
# HTTP header 'Accept' (if needed)
|
|
192
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json'])
|
|
193
|
+
|
|
194
|
+
# form parameters
|
|
195
|
+
form_params = opts[:form_params] || {}
|
|
196
|
+
|
|
197
|
+
# http body (model)
|
|
198
|
+
post_body = opts[:debug_body]
|
|
199
|
+
|
|
200
|
+
# return_type
|
|
201
|
+
return_type = opts[:debug_return_type] || 'YoutubeTranscriptResponse'
|
|
202
|
+
|
|
203
|
+
# auth_names
|
|
204
|
+
auth_names = opts[:debug_auth_names] || ['accessToken', 'apiKey', 'customerId']
|
|
205
|
+
|
|
206
|
+
new_options = opts.merge(
|
|
207
|
+
:operation => :"UtilitiesApi.fetch_youtube_transcripts",
|
|
208
|
+
:header_params => header_params,
|
|
209
|
+
:query_params => query_params,
|
|
210
|
+
:form_params => form_params,
|
|
211
|
+
:body => post_body,
|
|
212
|
+
:auth_names => auth_names,
|
|
213
|
+
:return_type => return_type
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
data, status_code, headers, response = @api_client.call_api(:GET, local_var_path, new_options)
|
|
217
|
+
if @api_client.config.debugging
|
|
218
|
+
@api_client.config.logger.debug "API called: UtilitiesApi#fetch_youtube_transcripts\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
|
219
|
+
end
|
|
220
|
+
APIResponse::new(data, status_code, headers, response)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# Sitemap
|
|
225
|
+
#
|
|
226
|
+
# Retrieves all URLs from a sitemap, which can subsequently be utilized with our `web_scrape` endpoint.
|
|
227
|
+
#
|
|
228
|
+
# <!--Args:
|
|
229
|
+
# url (str): URL of the sitemap
|
|
230
|
+
#
|
|
231
|
+
# Returns:
|
|
232
|
+
# dict: A dictionary with a list of URLs extracted from the sitemap.-->
|
|
233
|
+
#
|
|
234
|
+
# @param url [String]
|
|
235
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
236
|
+
def process_sitemap(url:, extra: {})
|
|
237
|
+
api_response = process_sitemap_with_http_info_impl(url, extra)
|
|
238
|
+
api_response.data
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Sitemap
|
|
242
|
+
#
|
|
243
|
+
# Retrieves all URLs from a sitemap, which can subsequently be utilized with our `web_scrape` endpoint.
|
|
244
|
+
#
|
|
245
|
+
# <!--Args:
|
|
246
|
+
# url (str): URL of the sitemap
|
|
247
|
+
#
|
|
248
|
+
# Returns:
|
|
249
|
+
# dict: A dictionary with a list of URLs extracted from the sitemap.-->
|
|
250
|
+
#
|
|
251
|
+
# @param url [String]
|
|
252
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
253
|
+
def process_sitemap_with_http_info(url:, extra: {})
|
|
254
|
+
process_sitemap_with_http_info_impl(url, extra)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Sitemap
|
|
258
|
+
# Retrieves all URLs from a sitemap, which can subsequently be utilized with our `web_scrape` endpoint. <!--Args: url (str): URL of the sitemap Returns: dict: A dictionary with a list of URLs extracted from the sitemap.-->
|
|
259
|
+
# @param url [String]
|
|
260
|
+
# @param [Hash] opts the optional parameters
|
|
261
|
+
# @return [Object]
|
|
262
|
+
private def process_sitemap_impl(url, opts = {})
|
|
263
|
+
data, _status_code, _headers = process_sitemap_with_http_info(url, opts)
|
|
264
|
+
data
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Sitemap
|
|
268
|
+
# Retrieves all URLs from a sitemap, which can subsequently be utilized with our `web_scrape` endpoint. <!--Args: url (str): URL of the sitemap Returns: dict: A dictionary with a list of URLs extracted from the sitemap.-->
|
|
269
|
+
# @param url [String]
|
|
270
|
+
# @param [Hash] opts the optional parameters
|
|
271
|
+
# @return [APIResponse] data is Object, status code, headers and response
|
|
272
|
+
private def process_sitemap_with_http_info_impl(url, opts = {})
|
|
273
|
+
if @api_client.config.debugging
|
|
274
|
+
@api_client.config.logger.debug 'Calling API: UtilitiesApi.process_sitemap ...'
|
|
275
|
+
end
|
|
276
|
+
# verify the required parameter 'url' is set
|
|
277
|
+
if @api_client.config.client_side_validation && url.nil?
|
|
278
|
+
fail ArgumentError, "Missing the required parameter 'url' when calling UtilitiesApi.process_sitemap"
|
|
279
|
+
end
|
|
280
|
+
# resource path
|
|
281
|
+
local_var_path = '/process_sitemap'
|
|
282
|
+
|
|
283
|
+
# query parameters
|
|
284
|
+
query_params = opts[:query_params] || {}
|
|
285
|
+
query_params[:'url'] = url
|
|
286
|
+
|
|
287
|
+
# header parameters
|
|
288
|
+
header_params = opts[:header_params] || {}
|
|
289
|
+
# HTTP header 'Accept' (if needed)
|
|
290
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json'])
|
|
291
|
+
|
|
292
|
+
# form parameters
|
|
293
|
+
form_params = opts[:form_params] || {}
|
|
294
|
+
|
|
295
|
+
# http body (model)
|
|
296
|
+
post_body = opts[:debug_body]
|
|
297
|
+
|
|
298
|
+
# return_type
|
|
299
|
+
return_type = opts[:debug_return_type] || 'Object'
|
|
300
|
+
|
|
301
|
+
# auth_names
|
|
302
|
+
auth_names = opts[:debug_auth_names] || ['accessToken', 'apiKey', 'customerId']
|
|
303
|
+
|
|
304
|
+
new_options = opts.merge(
|
|
305
|
+
:operation => :"UtilitiesApi.process_sitemap",
|
|
306
|
+
:header_params => header_params,
|
|
307
|
+
:query_params => query_params,
|
|
308
|
+
:form_params => form_params,
|
|
309
|
+
:body => post_body,
|
|
310
|
+
:auth_names => auth_names,
|
|
311
|
+
:return_type => return_type
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
data, status_code, headers, response = @api_client.call_api(:GET, local_var_path, new_options)
|
|
315
|
+
if @api_client.config.debugging
|
|
316
|
+
@api_client.config.logger.debug "API called: UtilitiesApi#process_sitemap\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
|
317
|
+
end
|
|
318
|
+
APIResponse::new(data, status_code, headers, response)
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
# Scrape Sitemap
|
|
323
|
+
#
|
|
324
|
+
# Extracts all URLs from a sitemap and performs a web scrape on each of them.
|
|
325
|
+
#
|
|
326
|
+
# Args:
|
|
327
|
+
# sitemap_url (str): URL of the sitemap
|
|
328
|
+
#
|
|
329
|
+
# Returns:
|
|
330
|
+
# dict: A response object with the status of the scraping job message.-->
|
|
331
|
+
#
|
|
332
|
+
# @param url [String]
|
|
333
|
+
# @param tags [Hash<String, Tags1>]
|
|
334
|
+
# @param max_pages_to_scrape [Integer]
|
|
335
|
+
# @param chunk_size [Integer]
|
|
336
|
+
# @param chunk_overlap [Integer]
|
|
337
|
+
# @param skip_embedding_generation [Boolean]
|
|
338
|
+
# @param enable_auto_sync [Boolean]
|
|
339
|
+
# @param generate_sparse_vectors [Boolean]
|
|
340
|
+
# @param prepend_filename_to_chunks [Boolean]
|
|
341
|
+
# @param html_tags_to_skip [Array<String>]
|
|
342
|
+
# @param css_classes_to_skip [Array<String>]
|
|
343
|
+
# @param css_selectors_to_skip [Array<String>]
|
|
344
|
+
# @param embedding_model [EmbeddingGenerators]
|
|
345
|
+
# @param body [SitemapScrapeRequest]
|
|
346
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
347
|
+
def scrape_sitemap(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
|
|
348
|
+
_body = {}
|
|
349
|
+
_body[:tags] = tags if tags != SENTINEL
|
|
350
|
+
_body[:url] = url if url != SENTINEL
|
|
351
|
+
_body[:max_pages_to_scrape] = max_pages_to_scrape if max_pages_to_scrape != SENTINEL
|
|
352
|
+
_body[:chunk_size] = chunk_size if chunk_size != SENTINEL
|
|
353
|
+
_body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
|
|
354
|
+
_body[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
|
|
355
|
+
_body[:enable_auto_sync] = enable_auto_sync if enable_auto_sync != SENTINEL
|
|
356
|
+
_body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
|
|
357
|
+
_body[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL
|
|
358
|
+
_body[:html_tags_to_skip] = html_tags_to_skip if html_tags_to_skip != SENTINEL
|
|
359
|
+
_body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
|
|
360
|
+
_body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
|
|
361
|
+
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
|
362
|
+
sitemap_scrape_request = _body
|
|
363
|
+
api_response = scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
|
364
|
+
api_response.data
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Scrape Sitemap
|
|
368
|
+
#
|
|
369
|
+
# Extracts all URLs from a sitemap and performs a web scrape on each of them.
|
|
370
|
+
#
|
|
371
|
+
# Args:
|
|
372
|
+
# sitemap_url (str): URL of the sitemap
|
|
373
|
+
#
|
|
374
|
+
# Returns:
|
|
375
|
+
# dict: A response object with the status of the scraping job message.-->
|
|
376
|
+
#
|
|
377
|
+
# @param url [String]
|
|
378
|
+
# @param tags [Hash<String, Tags1>]
|
|
379
|
+
# @param max_pages_to_scrape [Integer]
|
|
380
|
+
# @param chunk_size [Integer]
|
|
381
|
+
# @param chunk_overlap [Integer]
|
|
382
|
+
# @param skip_embedding_generation [Boolean]
|
|
383
|
+
# @param enable_auto_sync [Boolean]
|
|
384
|
+
# @param generate_sparse_vectors [Boolean]
|
|
385
|
+
# @param prepend_filename_to_chunks [Boolean]
|
|
386
|
+
# @param html_tags_to_skip [Array<String>]
|
|
387
|
+
# @param css_classes_to_skip [Array<String>]
|
|
388
|
+
# @param css_selectors_to_skip [Array<String>]
|
|
389
|
+
# @param embedding_model [EmbeddingGenerators]
|
|
390
|
+
# @param body [SitemapScrapeRequest]
|
|
391
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
392
|
+
def scrape_sitemap_with_http_info(url:, tags: SENTINEL, max_pages_to_scrape: SENTINEL, chunk_size: 1500, chunk_overlap: 20, skip_embedding_generation: false, enable_auto_sync: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, html_tags_to_skip: SENTINEL, css_classes_to_skip: SENTINEL, css_selectors_to_skip: SENTINEL, embedding_model: 'OPENAI', extra: {})
|
|
393
|
+
_body = {}
|
|
394
|
+
_body[:tags] = tags if tags != SENTINEL
|
|
395
|
+
_body[:url] = url if url != SENTINEL
|
|
396
|
+
_body[:max_pages_to_scrape] = max_pages_to_scrape if max_pages_to_scrape != SENTINEL
|
|
397
|
+
_body[:chunk_size] = chunk_size if chunk_size != SENTINEL
|
|
398
|
+
_body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
|
|
399
|
+
_body[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
|
|
400
|
+
_body[:enable_auto_sync] = enable_auto_sync if enable_auto_sync != SENTINEL
|
|
401
|
+
_body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
|
|
402
|
+
_body[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL
|
|
403
|
+
_body[:html_tags_to_skip] = html_tags_to_skip if html_tags_to_skip != SENTINEL
|
|
404
|
+
_body[:css_classes_to_skip] = css_classes_to_skip if css_classes_to_skip != SENTINEL
|
|
405
|
+
_body[:css_selectors_to_skip] = css_selectors_to_skip if css_selectors_to_skip != SENTINEL
|
|
406
|
+
_body[:embedding_model] = embedding_model if embedding_model != SENTINEL
|
|
407
|
+
sitemap_scrape_request = _body
|
|
408
|
+
scrape_sitemap_with_http_info_impl(sitemap_scrape_request, extra)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Scrape Sitemap
|
|
412
|
+
# Extracts all URLs from a sitemap and performs a web scrape on each of them. Args: sitemap_url (str): URL of the sitemap Returns: dict: A response object with the status of the scraping job message.-->
|
|
413
|
+
# @param sitemap_scrape_request [SitemapScrapeRequest]
|
|
414
|
+
# @param [Hash] opts the optional parameters
|
|
415
|
+
# @return [Object]
|
|
416
|
+
private def scrape_sitemap_impl(sitemap_scrape_request, opts = {})
|
|
417
|
+
data, _status_code, _headers = scrape_sitemap_with_http_info(sitemap_scrape_request, opts)
|
|
418
|
+
data
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# Scrape Sitemap
|
|
422
|
+
# Extracts all URLs from a sitemap and performs a web scrape on each of them. Args: sitemap_url (str): URL of the sitemap Returns: dict: A response object with the status of the scraping job message.-->
|
|
423
|
+
# @param sitemap_scrape_request [SitemapScrapeRequest]
|
|
424
|
+
# @param [Hash] opts the optional parameters
|
|
425
|
+
# @return [APIResponse] data is Object, status code, headers and response
|
|
426
|
+
private def scrape_sitemap_with_http_info_impl(sitemap_scrape_request, opts = {})
|
|
427
|
+
if @api_client.config.debugging
|
|
428
|
+
@api_client.config.logger.debug 'Calling API: UtilitiesApi.scrape_sitemap ...'
|
|
429
|
+
end
|
|
430
|
+
# verify the required parameter 'sitemap_scrape_request' is set
|
|
431
|
+
if @api_client.config.client_side_validation && sitemap_scrape_request.nil?
|
|
432
|
+
fail ArgumentError, "Missing the required parameter 'sitemap_scrape_request' when calling UtilitiesApi.scrape_sitemap"
|
|
433
|
+
end
|
|
434
|
+
# resource path
|
|
435
|
+
local_var_path = '/scrape_sitemap'
|
|
436
|
+
|
|
437
|
+
# query parameters
|
|
438
|
+
query_params = opts[:query_params] || {}
|
|
439
|
+
|
|
440
|
+
# header parameters
|
|
441
|
+
header_params = opts[:header_params] || {}
|
|
442
|
+
# HTTP header 'Accept' (if needed)
|
|
443
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json'])
|
|
444
|
+
# HTTP header 'Content-Type'
|
|
445
|
+
content_type = @api_client.select_header_content_type(['application/json'])
|
|
446
|
+
if !content_type.nil?
|
|
447
|
+
header_params['Content-Type'] = content_type
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# form parameters
|
|
451
|
+
form_params = opts[:form_params] || {}
|
|
452
|
+
|
|
453
|
+
# http body (model)
|
|
454
|
+
post_body = opts[:debug_body] || @api_client.object_to_http_body(sitemap_scrape_request)
|
|
455
|
+
|
|
456
|
+
# return_type
|
|
457
|
+
return_type = opts[:debug_return_type] || 'Object'
|
|
458
|
+
|
|
459
|
+
# auth_names
|
|
460
|
+
auth_names = opts[:debug_auth_names] || ['accessToken', 'apiKey', 'customerId']
|
|
461
|
+
|
|
462
|
+
new_options = opts.merge(
|
|
463
|
+
:operation => :"UtilitiesApi.scrape_sitemap",
|
|
464
|
+
:header_params => header_params,
|
|
465
|
+
:query_params => query_params,
|
|
466
|
+
:form_params => form_params,
|
|
467
|
+
:body => post_body,
|
|
468
|
+
:auth_names => auth_names,
|
|
469
|
+
:return_type => return_type
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
data, status_code, headers, response = @api_client.call_api(:POST, local_var_path, new_options)
|
|
473
|
+
if @api_client.config.debugging
|
|
474
|
+
@api_client.config.logger.debug "API called: UtilitiesApi#scrape_sitemap\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
|
475
|
+
end
|
|
476
|
+
APIResponse::new(data, status_code, headers, response)
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
# Web Scrape
|
|
481
|
+
#
|
|
482
|
+
# Conduct a web scrape on a given webpage URL. Our web scraper is fully compatible with JavaScript and supports recursion depth, enabling you to efficiently extract all content from the target website.
|
|
483
|
+
#
|
|
484
|
+
# <!--Args:
|
|
485
|
+
# scraping_requests (List[WebscrapeRequest]): A list of WebscrapeRequest objects.
|
|
486
|
+
#
|
|
487
|
+
#
|
|
488
|
+
# Returns:
|
|
489
|
+
# dict: A response object with the status of the scraping job message.-->
|
|
490
|
+
#
|
|
491
|
+
# @param body [Array<WebscrapeRequest>]
|
|
492
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
493
|
+
def scrape_web(body:, extra: {})
|
|
494
|
+
webscrape_request = body
|
|
495
|
+
api_response = scrape_web_with_http_info_impl(webscrape_request, extra)
|
|
496
|
+
api_response.data
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
# Web Scrape
|
|
500
|
+
#
|
|
501
|
+
# Conduct a web scrape on a given webpage URL. Our web scraper is fully compatible with JavaScript and supports recursion depth, enabling you to efficiently extract all content from the target website.
|
|
502
|
+
#
|
|
503
|
+
# <!--Args:
|
|
504
|
+
# scraping_requests (List[WebscrapeRequest]): A list of WebscrapeRequest objects.
|
|
505
|
+
#
|
|
506
|
+
#
|
|
507
|
+
# Returns:
|
|
508
|
+
# dict: A response object with the status of the scraping job message.-->
|
|
509
|
+
#
|
|
510
|
+
# @param body [Array<WebscrapeRequest>]
|
|
511
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
512
|
+
def scrape_web_with_http_info(body:, extra: {})
|
|
513
|
+
webscrape_request = body
|
|
514
|
+
scrape_web_with_http_info_impl(webscrape_request, extra)
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
# Web Scrape
|
|
518
|
+
# Conduct a web scrape on a given webpage URL. Our web scraper is fully compatible with JavaScript and supports recursion depth, enabling you to efficiently extract all content from the target website. <!--Args: scraping_requests (List[WebscrapeRequest]): A list of WebscrapeRequest objects. Returns: dict: A response object with the status of the scraping job message.-->
|
|
519
|
+
# @param webscrape_request [Array<WebscrapeRequest>]
|
|
520
|
+
# @param [Hash] opts the optional parameters
|
|
521
|
+
# @return [Object]
|
|
522
|
+
private def scrape_web_impl(webscrape_request, opts = {})
|
|
523
|
+
data, _status_code, _headers = scrape_web_with_http_info(webscrape_request, opts)
|
|
524
|
+
data
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
# Web Scrape
|
|
528
|
+
# Conduct a web scrape on a given webpage URL. Our web scraper is fully compatible with JavaScript and supports recursion depth, enabling you to efficiently extract all content from the target website. <!--Args: scraping_requests (List[WebscrapeRequest]): A list of WebscrapeRequest objects. Returns: dict: A response object with the status of the scraping job message.-->
|
|
529
|
+
# @param webscrape_request [Array<WebscrapeRequest>]
|
|
530
|
+
# @param [Hash] opts the optional parameters
|
|
531
|
+
# @return [APIResponse] data is Object, status code, headers and response
|
|
532
|
+
private def scrape_web_with_http_info_impl(webscrape_request, opts = {})
|
|
533
|
+
if @api_client.config.debugging
|
|
534
|
+
@api_client.config.logger.debug 'Calling API: UtilitiesApi.scrape_web ...'
|
|
535
|
+
end
|
|
536
|
+
# verify the required parameter 'webscrape_request' is set
|
|
537
|
+
if @api_client.config.client_side_validation && webscrape_request.nil?
|
|
538
|
+
fail ArgumentError, "Missing the required parameter 'webscrape_request' when calling UtilitiesApi.scrape_web"
|
|
539
|
+
end
|
|
540
|
+
# resource path
|
|
541
|
+
local_var_path = '/web_scrape'
|
|
542
|
+
|
|
543
|
+
# query parameters
|
|
544
|
+
query_params = opts[:query_params] || {}
|
|
545
|
+
|
|
546
|
+
# header parameters
|
|
547
|
+
header_params = opts[:header_params] || {}
|
|
548
|
+
# HTTP header 'Accept' (if needed)
|
|
549
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json'])
|
|
550
|
+
# HTTP header 'Content-Type'
|
|
551
|
+
content_type = @api_client.select_header_content_type(['application/json'])
|
|
552
|
+
if !content_type.nil?
|
|
553
|
+
header_params['Content-Type'] = content_type
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
# form parameters
|
|
557
|
+
form_params = opts[:form_params] || {}
|
|
558
|
+
|
|
559
|
+
# http body (model)
|
|
560
|
+
post_body = opts[:debug_body] || @api_client.object_to_http_body(webscrape_request)
|
|
561
|
+
|
|
562
|
+
# return_type
|
|
563
|
+
return_type = opts[:debug_return_type] || 'Object'
|
|
564
|
+
|
|
565
|
+
# auth_names
|
|
566
|
+
auth_names = opts[:debug_auth_names] || ['accessToken', 'apiKey', 'customerId']
|
|
567
|
+
|
|
568
|
+
new_options = opts.merge(
|
|
569
|
+
:operation => :"UtilitiesApi.scrape_web",
|
|
570
|
+
:header_params => header_params,
|
|
571
|
+
:query_params => query_params,
|
|
572
|
+
:form_params => form_params,
|
|
573
|
+
:body => post_body,
|
|
574
|
+
:auth_names => auth_names,
|
|
575
|
+
:return_type => return_type
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
data, status_code, headers, response = @api_client.call_api(:POST, local_var_path, new_options)
|
|
579
|
+
if @api_client.config.debugging
|
|
580
|
+
@api_client.config.logger.debug "API called: UtilitiesApi#scrape_web\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
|
581
|
+
end
|
|
582
|
+
APIResponse::new(data, status_code, headers, response)
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
# Search Urls
|
|
587
|
+
#
|
|
588
|
+
# Perform a web search and obtain a list of relevant URLs.
|
|
589
|
+
#
|
|
590
|
+
# As an illustration, when you perform a search for “content related to MRNA,” you will receive a list of links such as the following:
|
|
591
|
+
#
|
|
592
|
+
# - https://tomrenz.substack.com/p/mrna-and-why-it-matters
|
|
593
|
+
#
|
|
594
|
+
# - https://www.statnews.com/2020/11/10/the-story-of-mrna-how-a-once-dismissed-idea-became-a-leading-technology-in-the-covid-vaccine-race/
|
|
595
|
+
#
|
|
596
|
+
# - https://www.statnews.com/2022/11/16/covid-19-vaccines-were-a-success-but-mrna-still-has-a-delivery-problem/
|
|
597
|
+
#
|
|
598
|
+
# - https://joomi.substack.com/p/were-still-being-misled-about-how
|
|
599
|
+
#
|
|
600
|
+
# Subsequently, you can submit these links to the web_scrape endpoint in order to retrieve the content of the respective web pages.
|
|
601
|
+
#
|
|
602
|
+
# Args:
|
|
603
|
+
# query (str): Query to search for
|
|
604
|
+
#
|
|
605
|
+
# Returns:
|
|
606
|
+
# FetchURLsResponse: A response object with a list of URLs for a given search query.
|
|
607
|
+
#
|
|
608
|
+
# @param query [String]
|
|
609
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
610
|
+
def search_urls(query:, extra: {})
|
|
611
|
+
api_response = search_urls_with_http_info_impl(query, extra)
|
|
612
|
+
api_response.data
|
|
613
|
+
end
|
|
614
|
+
|
|
615
|
+
# Search Urls
|
|
616
|
+
#
|
|
617
|
+
# Perform a web search and obtain a list of relevant URLs.
|
|
618
|
+
#
|
|
619
|
+
# As an illustration, when you perform a search for “content related to MRNA,” you will receive a list of links such as the following:
|
|
620
|
+
#
|
|
621
|
+
# - https://tomrenz.substack.com/p/mrna-and-why-it-matters
|
|
622
|
+
#
|
|
623
|
+
# - https://www.statnews.com/2020/11/10/the-story-of-mrna-how-a-once-dismissed-idea-became-a-leading-technology-in-the-covid-vaccine-race/
|
|
624
|
+
#
|
|
625
|
+
# - https://www.statnews.com/2022/11/16/covid-19-vaccines-were-a-success-but-mrna-still-has-a-delivery-problem/
|
|
626
|
+
#
|
|
627
|
+
# - https://joomi.substack.com/p/were-still-being-misled-about-how
|
|
628
|
+
#
|
|
629
|
+
# Subsequently, you can submit these links to the web_scrape endpoint in order to retrieve the content of the respective web pages.
|
|
630
|
+
#
|
|
631
|
+
# Args:
|
|
632
|
+
# query (str): Query to search for
|
|
633
|
+
#
|
|
634
|
+
# Returns:
|
|
635
|
+
# FetchURLsResponse: A response object with a list of URLs for a given search query.
|
|
636
|
+
#
|
|
637
|
+
# @param query [String]
|
|
638
|
+
# @param [Hash] extra additional parameters to pass along through :header_params, :query_params, or parameter name
|
|
639
|
+
def search_urls_with_http_info(query:, extra: {})
|
|
640
|
+
search_urls_with_http_info_impl(query, extra)
|
|
641
|
+
end
|
|
642
|
+
|
|
643
|
+
# Search Urls
|
|
644
|
+
# Perform a web search and obtain a list of relevant URLs. As an illustration, when you perform a search for “content related to MRNA,” you will receive a list of links such as the following: - https://tomrenz.substack.com/p/mrna-and-why-it-matters - https://www.statnews.com/2020/11/10/the-story-of-mrna-how-a-once-dismissed-idea-became-a-leading-technology-in-the-covid-vaccine-race/ - https://www.statnews.com/2022/11/16/covid-19-vaccines-were-a-success-but-mrna-still-has-a-delivery-problem/ - https://joomi.substack.com/p/were-still-being-misled-about-how Subsequently, you can submit these links to the web_scrape endpoint in order to retrieve the content of the respective web pages. Args: query (str): Query to search for Returns: FetchURLsResponse: A response object with a list of URLs for a given search query.
|
|
645
|
+
# @param query [String]
|
|
646
|
+
# @param [Hash] opts the optional parameters
|
|
647
|
+
# @return [FetchURLsResponse]
|
|
648
|
+
private def search_urls_impl(query, opts = {})
|
|
649
|
+
data, _status_code, _headers = search_urls_with_http_info(query, opts)
|
|
650
|
+
data
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
# Search Urls
|
|
654
|
+
# Perform a web search and obtain a list of relevant URLs. As an illustration, when you perform a search for “content related to MRNA,” you will receive a list of links such as the following: - https://tomrenz.substack.com/p/mrna-and-why-it-matters - https://www.statnews.com/2020/11/10/the-story-of-mrna-how-a-once-dismissed-idea-became-a-leading-technology-in-the-covid-vaccine-race/ - https://www.statnews.com/2022/11/16/covid-19-vaccines-were-a-success-but-mrna-still-has-a-delivery-problem/ - https://joomi.substack.com/p/were-still-being-misled-about-how Subsequently, you can submit these links to the web_scrape endpoint in order to retrieve the content of the respective web pages. Args: query (str): Query to search for Returns: FetchURLsResponse: A response object with a list of URLs for a given search query.
|
|
655
|
+
# @param query [String]
|
|
656
|
+
# @param [Hash] opts the optional parameters
|
|
657
|
+
# @return [APIResponse] data is FetchURLsResponse, status code, headers and response
|
|
658
|
+
private def search_urls_with_http_info_impl(query, opts = {})
|
|
659
|
+
if @api_client.config.debugging
|
|
660
|
+
@api_client.config.logger.debug 'Calling API: UtilitiesApi.search_urls ...'
|
|
661
|
+
end
|
|
662
|
+
# verify the required parameter 'query' is set
|
|
663
|
+
if @api_client.config.client_side_validation && query.nil?
|
|
664
|
+
fail ArgumentError, "Missing the required parameter 'query' when calling UtilitiesApi.search_urls"
|
|
665
|
+
end
|
|
666
|
+
# resource path
|
|
667
|
+
local_var_path = '/search_urls'
|
|
668
|
+
|
|
669
|
+
# query parameters
|
|
670
|
+
query_params = opts[:query_params] || {}
|
|
671
|
+
query_params[:'query'] = query
|
|
672
|
+
|
|
673
|
+
# header parameters
|
|
674
|
+
header_params = opts[:header_params] || {}
|
|
675
|
+
# HTTP header 'Accept' (if needed)
|
|
676
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json'])
|
|
677
|
+
|
|
678
|
+
# form parameters
|
|
679
|
+
form_params = opts[:form_params] || {}
|
|
680
|
+
|
|
681
|
+
# http body (model)
|
|
682
|
+
post_body = opts[:debug_body]
|
|
683
|
+
|
|
684
|
+
# return_type
|
|
685
|
+
return_type = opts[:debug_return_type] || 'FetchURLsResponse'
|
|
686
|
+
|
|
687
|
+
# auth_names
|
|
688
|
+
auth_names = opts[:debug_auth_names] || ['accessToken', 'apiKey', 'customerId']
|
|
689
|
+
|
|
690
|
+
new_options = opts.merge(
|
|
691
|
+
:operation => :"UtilitiesApi.search_urls",
|
|
692
|
+
:header_params => header_params,
|
|
693
|
+
:query_params => query_params,
|
|
694
|
+
:form_params => form_params,
|
|
695
|
+
:body => post_body,
|
|
696
|
+
:auth_names => auth_names,
|
|
697
|
+
:return_type => return_type
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
data, status_code, headers, response = @api_client.call_api(:GET, local_var_path, new_options)
|
|
701
|
+
if @api_client.config.debugging
|
|
702
|
+
@api_client.config.logger.debug "API called: UtilitiesApi#search_urls\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
|
703
|
+
end
|
|
704
|
+
APIResponse::new(data, status_code, headers, response)
|
|
705
|
+
end
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
# top-level client access to avoid having the user to insantiate their own API instances
|
|
709
|
+
Utilities = UtilitiesApi::new
|
|
710
|
+
end
|