context.dev 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/README.md +1 -1
- data/lib/context_dev/internal/util.rb +3 -1
- data/lib/context_dev/models/brand_identify_from_transaction_params.rb +65 -0
- data/lib/context_dev/models/brand_identify_from_transaction_response.rb +190 -5
- data/lib/context_dev/models/brand_retrieve_by_email_params.rb +65 -0
- data/lib/context_dev/models/brand_retrieve_by_email_response.rb +190 -5
- data/lib/context_dev/models/brand_retrieve_by_isin_params.rb +65 -0
- data/lib/context_dev/models/brand_retrieve_by_isin_response.rb +190 -5
- data/lib/context_dev/models/brand_retrieve_by_name_params.rb +65 -0
- data/lib/context_dev/models/brand_retrieve_by_name_response.rb +190 -5
- data/lib/context_dev/models/brand_retrieve_by_ticker_params.rb +65 -0
- data/lib/context_dev/models/brand_retrieve_by_ticker_response.rb +190 -5
- data/lib/context_dev/models/brand_retrieve_params.rb +68 -5
- data/lib/context_dev/models/brand_retrieve_response.rb +190 -5
- data/lib/context_dev/models/web_screenshot_params.rb +17 -5
- data/lib/context_dev/models/web_web_crawl_md_params.rb +92 -0
- data/lib/context_dev/models/web_web_crawl_md_response.rb +121 -0
- data/lib/context_dev/models.rb +2 -0
- data/lib/context_dev/resources/brand.rb +1 -1
- data/lib/context_dev/resources/web.rb +51 -5
- data/lib/context_dev/version.rb +1 -1
- data/lib/context_dev.rb +2 -0
- data/rbi/context_dev/models/brand_identify_from_transaction_params.rbi +325 -0
- data/rbi/context_dev/models/brand_identify_from_transaction_response.rbi +859 -6
- data/rbi/context_dev/models/brand_retrieve_by_email_params.rbi +325 -0
- data/rbi/context_dev/models/brand_retrieve_by_email_response.rbi +859 -6
- data/rbi/context_dev/models/brand_retrieve_by_isin_params.rbi +325 -0
- data/rbi/context_dev/models/brand_retrieve_by_isin_response.rbi +859 -6
- data/rbi/context_dev/models/brand_retrieve_by_name_params.rbi +325 -0
- data/rbi/context_dev/models/brand_retrieve_by_name_response.rbi +859 -6
- data/rbi/context_dev/models/brand_retrieve_by_ticker_params.rbi +325 -0
- data/rbi/context_dev/models/brand_retrieve_by_ticker_response.rbi +859 -6
- data/rbi/context_dev/models/brand_retrieve_params.rbi +328 -6
- data/rbi/context_dev/models/brand_retrieve_response.rbi +859 -6
- data/rbi/context_dev/models/web_screenshot_params.rbi +27 -6
- data/rbi/context_dev/models/web_web_crawl_md_params.rbi +137 -0
- data/rbi/context_dev/models/web_web_crawl_md_response.rbi +230 -0
- data/rbi/context_dev/models.rbi +2 -0
- data/rbi/context_dev/resources/brand.rbi +1 -2
- data/rbi/context_dev/resources/web.rbi +54 -4
- data/sig/context_dev/models/brand_identify_from_transaction_params.rbs +131 -1
- data/sig/context_dev/models/brand_identify_from_transaction_response.rbs +342 -5
- data/sig/context_dev/models/brand_retrieve_by_email_params.rbs +131 -1
- data/sig/context_dev/models/brand_retrieve_by_email_response.rbs +342 -5
- data/sig/context_dev/models/brand_retrieve_by_isin_params.rbs +131 -1
- data/sig/context_dev/models/brand_retrieve_by_isin_response.rbs +342 -5
- data/sig/context_dev/models/brand_retrieve_by_name_params.rbs +131 -1
- data/sig/context_dev/models/brand_retrieve_by_name_response.rbs +342 -5
- data/sig/context_dev/models/brand_retrieve_by_ticker_params.rbs +131 -1
- data/sig/context_dev/models/brand_retrieve_by_ticker_response.rbs +342 -5
- data/sig/context_dev/models/brand_retrieve_params.rbs +131 -1
- data/sig/context_dev/models/brand_retrieve_response.rbs +342 -5
- data/sig/context_dev/models/web_screenshot_params.rbs +11 -2
- data/sig/context_dev/models/web_web_crawl_md_params.rbs +82 -0
- data/sig/context_dev/models/web_web_crawl_md_response.rbs +116 -0
- data/sig/context_dev/models.rbs +2 -0
- data/sig/context_dev/resources/web.rbs +15 -1
- metadata +8 -2
|
@@ -11,10 +11,22 @@ module ContextDev
|
|
|
11
11
|
T.any(ContextDev::WebScreenshotParams, ContextDev::Internal::AnyHash)
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
+
# A specific URL to screenshot directly, bypassing domain resolution (e.g.,
|
|
15
|
+
# 'https://example.com/pricing'). When provided, the screenshot is taken of this
|
|
16
|
+
# exact URL.
|
|
17
|
+
sig { returns(T.nilable(String)) }
|
|
18
|
+
attr_reader :direct_url
|
|
19
|
+
|
|
20
|
+
sig { params(direct_url: String).void }
|
|
21
|
+
attr_writer :direct_url
|
|
22
|
+
|
|
14
23
|
# Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
|
|
15
24
|
# domain will be automatically normalized and validated.
|
|
16
|
-
sig { returns(String) }
|
|
17
|
-
|
|
25
|
+
sig { returns(T.nilable(String)) }
|
|
26
|
+
attr_reader :domain
|
|
27
|
+
|
|
28
|
+
sig { params(domain: String).void }
|
|
29
|
+
attr_writer :domain
|
|
18
30
|
|
|
19
31
|
# Optional parameter to determine screenshot type. If 'true', takes a full page
|
|
20
32
|
# screenshot capturing all content. If 'false' or not provided, takes a viewport
|
|
@@ -37,7 +49,8 @@ module ContextDev
|
|
|
37
49
|
# Optional parameter to specify which page type to screenshot. If provided, the
|
|
38
50
|
# system will scrape the domain's links and use heuristics to find the most
|
|
39
51
|
# appropriate URL for the specified page type (30 supported languages). If not
|
|
40
|
-
# provided, screenshots the main domain landing page.
|
|
52
|
+
# provided, screenshots the main domain landing page. Only applicable when using
|
|
53
|
+
# 'domain', not 'directUrl'.
|
|
41
54
|
sig do
|
|
42
55
|
returns(T.nilable(ContextDev::WebScreenshotParams::Page::OrSymbol))
|
|
43
56
|
end
|
|
@@ -65,6 +78,7 @@ module ContextDev
|
|
|
65
78
|
|
|
66
79
|
sig do
|
|
67
80
|
params(
|
|
81
|
+
direct_url: String,
|
|
68
82
|
domain: String,
|
|
69
83
|
full_screenshot:
|
|
70
84
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
@@ -74,9 +88,13 @@ module ContextDev
|
|
|
74
88
|
).returns(T.attached_class)
|
|
75
89
|
end
|
|
76
90
|
def self.new(
|
|
91
|
+
# A specific URL to screenshot directly, bypassing domain resolution (e.g.,
|
|
92
|
+
# 'https://example.com/pricing'). When provided, the screenshot is taken of this
|
|
93
|
+
# exact URL.
|
|
94
|
+
direct_url: nil,
|
|
77
95
|
# Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
|
|
78
96
|
# domain will be automatically normalized and validated.
|
|
79
|
-
domain
|
|
97
|
+
domain: nil,
|
|
80
98
|
# Optional parameter to determine screenshot type. If 'true', takes a full page
|
|
81
99
|
# screenshot capturing all content. If 'false' or not provided, takes a viewport
|
|
82
100
|
# screenshot (standard browser view).
|
|
@@ -84,7 +102,8 @@ module ContextDev
|
|
|
84
102
|
# Optional parameter to specify which page type to screenshot. If provided, the
|
|
85
103
|
# system will scrape the domain's links and use heuristics to find the most
|
|
86
104
|
# appropriate URL for the specified page type (30 supported languages). If not
|
|
87
|
-
# provided, screenshots the main domain landing page.
|
|
105
|
+
# provided, screenshots the main domain landing page. Only applicable when using
|
|
106
|
+
# 'domain', not 'directUrl'.
|
|
88
107
|
page: nil,
|
|
89
108
|
# Optional parameter to prioritize screenshot capture. If 'speed', optimizes for
|
|
90
109
|
# faster capture with basic quality. If 'quality', optimizes for higher quality
|
|
@@ -97,6 +116,7 @@ module ContextDev
|
|
|
97
116
|
sig do
|
|
98
117
|
override.returns(
|
|
99
118
|
{
|
|
119
|
+
direct_url: String,
|
|
100
120
|
domain: String,
|
|
101
121
|
full_screenshot:
|
|
102
122
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
@@ -146,7 +166,8 @@ module ContextDev
|
|
|
146
166
|
# Optional parameter to specify which page type to screenshot. If provided, the
|
|
147
167
|
# system will scrape the domain's links and use heuristics to find the most
|
|
148
168
|
# appropriate URL for the specified page type (30 supported languages). If not
|
|
149
|
-
# provided, screenshots the main domain landing page.
|
|
169
|
+
# provided, screenshots the main domain landing page. Only applicable when using
|
|
170
|
+
# 'domain', not 'directUrl'.
|
|
150
171
|
module Page
|
|
151
172
|
extend ContextDev::Internal::Type::Enum
|
|
152
173
|
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module ContextDev
|
|
4
|
+
module Models
|
|
5
|
+
class WebWebCrawlMdParams < ContextDev::Internal::Type::BaseModel
|
|
6
|
+
extend ContextDev::Internal::Type::RequestParameters::Converter
|
|
7
|
+
include ContextDev::Internal::Type::RequestParameters
|
|
8
|
+
|
|
9
|
+
OrHash =
|
|
10
|
+
T.type_alias do
|
|
11
|
+
T.any(ContextDev::WebWebCrawlMdParams, ContextDev::Internal::AnyHash)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# The starting URL for the crawl (must include http:// or https:// protocol)
|
|
15
|
+
sig { returns(String) }
|
|
16
|
+
attr_accessor :url
|
|
17
|
+
|
|
18
|
+
# When true, follow links on subdomains of the starting URL's domain (e.g.
|
|
19
|
+
# docs.example.com when starting from example.com). www and apex are always
|
|
20
|
+
# treated as equivalent.
|
|
21
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
22
|
+
attr_reader :follow_subdomains
|
|
23
|
+
|
|
24
|
+
sig { params(follow_subdomains: T::Boolean).void }
|
|
25
|
+
attr_writer :follow_subdomains
|
|
26
|
+
|
|
27
|
+
# Include image references in the Markdown output
|
|
28
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
29
|
+
attr_reader :include_images
|
|
30
|
+
|
|
31
|
+
sig { params(include_images: T::Boolean).void }
|
|
32
|
+
attr_writer :include_images
|
|
33
|
+
|
|
34
|
+
# Preserve hyperlinks in the Markdown output
|
|
35
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
36
|
+
attr_reader :include_links
|
|
37
|
+
|
|
38
|
+
sig { params(include_links: T::Boolean).void }
|
|
39
|
+
attr_writer :include_links
|
|
40
|
+
|
|
41
|
+
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
42
|
+
sig { returns(T.nilable(Integer)) }
|
|
43
|
+
attr_reader :max_depth
|
|
44
|
+
|
|
45
|
+
sig { params(max_depth: Integer).void }
|
|
46
|
+
attr_writer :max_depth
|
|
47
|
+
|
|
48
|
+
# Maximum number of pages to crawl. Hard cap: 500.
|
|
49
|
+
sig { returns(T.nilable(Integer)) }
|
|
50
|
+
attr_reader :max_pages
|
|
51
|
+
|
|
52
|
+
sig { params(max_pages: Integer).void }
|
|
53
|
+
attr_writer :max_pages
|
|
54
|
+
|
|
55
|
+
# Truncate base64-encoded image data in the Markdown output
|
|
56
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
57
|
+
attr_reader :shorten_base64_images
|
|
58
|
+
|
|
59
|
+
sig { params(shorten_base64_images: T::Boolean).void }
|
|
60
|
+
attr_writer :shorten_base64_images
|
|
61
|
+
|
|
62
|
+
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
63
|
+
sig { returns(T.nilable(String)) }
|
|
64
|
+
attr_reader :url_regex
|
|
65
|
+
|
|
66
|
+
sig { params(url_regex: String).void }
|
|
67
|
+
attr_writer :url_regex
|
|
68
|
+
|
|
69
|
+
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
70
|
+
# navigation
|
|
71
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
72
|
+
attr_reader :use_main_content_only
|
|
73
|
+
|
|
74
|
+
sig { params(use_main_content_only: T::Boolean).void }
|
|
75
|
+
attr_writer :use_main_content_only
|
|
76
|
+
|
|
77
|
+
sig do
|
|
78
|
+
params(
|
|
79
|
+
url: String,
|
|
80
|
+
follow_subdomains: T::Boolean,
|
|
81
|
+
include_images: T::Boolean,
|
|
82
|
+
include_links: T::Boolean,
|
|
83
|
+
max_depth: Integer,
|
|
84
|
+
max_pages: Integer,
|
|
85
|
+
shorten_base64_images: T::Boolean,
|
|
86
|
+
url_regex: String,
|
|
87
|
+
use_main_content_only: T::Boolean,
|
|
88
|
+
request_options: ContextDev::RequestOptions::OrHash
|
|
89
|
+
).returns(T.attached_class)
|
|
90
|
+
end
|
|
91
|
+
def self.new(
|
|
92
|
+
# The starting URL for the crawl (must include http:// or https:// protocol)
|
|
93
|
+
url:,
|
|
94
|
+
# When true, follow links on subdomains of the starting URL's domain (e.g.
|
|
95
|
+
# docs.example.com when starting from example.com). www and apex are always
|
|
96
|
+
# treated as equivalent.
|
|
97
|
+
follow_subdomains: nil,
|
|
98
|
+
# Include image references in the Markdown output
|
|
99
|
+
include_images: nil,
|
|
100
|
+
# Preserve hyperlinks in the Markdown output
|
|
101
|
+
include_links: nil,
|
|
102
|
+
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
103
|
+
max_depth: nil,
|
|
104
|
+
# Maximum number of pages to crawl. Hard cap: 500.
|
|
105
|
+
max_pages: nil,
|
|
106
|
+
# Truncate base64-encoded image data in the Markdown output
|
|
107
|
+
shorten_base64_images: nil,
|
|
108
|
+
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
109
|
+
url_regex: nil,
|
|
110
|
+
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
111
|
+
# navigation
|
|
112
|
+
use_main_content_only: nil,
|
|
113
|
+
request_options: {}
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
sig do
|
|
118
|
+
override.returns(
|
|
119
|
+
{
|
|
120
|
+
url: String,
|
|
121
|
+
follow_subdomains: T::Boolean,
|
|
122
|
+
include_images: T::Boolean,
|
|
123
|
+
include_links: T::Boolean,
|
|
124
|
+
max_depth: Integer,
|
|
125
|
+
max_pages: Integer,
|
|
126
|
+
shorten_base64_images: T::Boolean,
|
|
127
|
+
url_regex: String,
|
|
128
|
+
use_main_content_only: T::Boolean,
|
|
129
|
+
request_options: ContextDev::RequestOptions
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
def to_hash
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module ContextDev
|
|
4
|
+
module Models
|
|
5
|
+
class WebWebCrawlMdResponse < ContextDev::Internal::Type::BaseModel
|
|
6
|
+
OrHash =
|
|
7
|
+
T.type_alias do
|
|
8
|
+
T.any(
|
|
9
|
+
ContextDev::Models::WebWebCrawlMdResponse,
|
|
10
|
+
ContextDev::Internal::AnyHash
|
|
11
|
+
)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
sig { returns(ContextDev::Models::WebWebCrawlMdResponse::Metadata) }
|
|
15
|
+
attr_reader :metadata
|
|
16
|
+
|
|
17
|
+
sig do
|
|
18
|
+
params(
|
|
19
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash
|
|
20
|
+
).void
|
|
21
|
+
end
|
|
22
|
+
attr_writer :metadata
|
|
23
|
+
|
|
24
|
+
sig do
|
|
25
|
+
returns(T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result])
|
|
26
|
+
end
|
|
27
|
+
attr_accessor :results
|
|
28
|
+
|
|
29
|
+
sig do
|
|
30
|
+
params(
|
|
31
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash,
|
|
32
|
+
results:
|
|
33
|
+
T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result::OrHash]
|
|
34
|
+
).returns(T.attached_class)
|
|
35
|
+
end
|
|
36
|
+
def self.new(metadata:, results:)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
sig do
|
|
40
|
+
override.returns(
|
|
41
|
+
{
|
|
42
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata,
|
|
43
|
+
results: T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
def to_hash
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
51
|
+
OrHash =
|
|
52
|
+
T.type_alias do
|
|
53
|
+
T.any(
|
|
54
|
+
ContextDev::Models::WebWebCrawlMdResponse::Metadata,
|
|
55
|
+
ContextDev::Internal::AnyHash
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Maximum crawl depth reached during the crawl
|
|
60
|
+
sig { returns(Integer) }
|
|
61
|
+
attr_accessor :max_crawl_depth
|
|
62
|
+
|
|
63
|
+
# Number of pages that failed to crawl
|
|
64
|
+
sig { returns(Integer) }
|
|
65
|
+
attr_accessor :num_failed
|
|
66
|
+
|
|
67
|
+
# Number of pages successfully crawled
|
|
68
|
+
sig { returns(Integer) }
|
|
69
|
+
attr_accessor :num_succeeded
|
|
70
|
+
|
|
71
|
+
# Total number of URLs crawled
|
|
72
|
+
sig { returns(Integer) }
|
|
73
|
+
attr_accessor :num_urls
|
|
74
|
+
|
|
75
|
+
sig do
|
|
76
|
+
params(
|
|
77
|
+
max_crawl_depth: Integer,
|
|
78
|
+
num_failed: Integer,
|
|
79
|
+
num_succeeded: Integer,
|
|
80
|
+
num_urls: Integer
|
|
81
|
+
).returns(T.attached_class)
|
|
82
|
+
end
|
|
83
|
+
def self.new(
|
|
84
|
+
# Maximum crawl depth reached during the crawl
|
|
85
|
+
max_crawl_depth:,
|
|
86
|
+
# Number of pages that failed to crawl
|
|
87
|
+
num_failed:,
|
|
88
|
+
# Number of pages successfully crawled
|
|
89
|
+
num_succeeded:,
|
|
90
|
+
# Total number of URLs crawled
|
|
91
|
+
num_urls:
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
sig do
|
|
96
|
+
override.returns(
|
|
97
|
+
{
|
|
98
|
+
max_crawl_depth: Integer,
|
|
99
|
+
num_failed: Integer,
|
|
100
|
+
num_succeeded: Integer,
|
|
101
|
+
num_urls: Integer
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
def to_hash
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
class Result < ContextDev::Internal::Type::BaseModel
|
|
110
|
+
OrHash =
|
|
111
|
+
T.type_alias do
|
|
112
|
+
T.any(
|
|
113
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result,
|
|
114
|
+
ContextDev::Internal::AnyHash
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Extracted page content as Markdown (empty string on failure)
|
|
119
|
+
sig { returns(String) }
|
|
120
|
+
attr_accessor :markdown
|
|
121
|
+
|
|
122
|
+
sig do
|
|
123
|
+
returns(ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata)
|
|
124
|
+
end
|
|
125
|
+
attr_reader :metadata
|
|
126
|
+
|
|
127
|
+
sig do
|
|
128
|
+
params(
|
|
129
|
+
metadata:
|
|
130
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
|
|
131
|
+
).void
|
|
132
|
+
end
|
|
133
|
+
attr_writer :metadata
|
|
134
|
+
|
|
135
|
+
sig do
|
|
136
|
+
params(
|
|
137
|
+
markdown: String,
|
|
138
|
+
metadata:
|
|
139
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
|
|
140
|
+
).returns(T.attached_class)
|
|
141
|
+
end
|
|
142
|
+
def self.new(
|
|
143
|
+
# Extracted page content as Markdown (empty string on failure)
|
|
144
|
+
markdown:,
|
|
145
|
+
metadata:
|
|
146
|
+
)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
sig do
|
|
150
|
+
override.returns(
|
|
151
|
+
{
|
|
152
|
+
markdown: String,
|
|
153
|
+
metadata:
|
|
154
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
def to_hash
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
162
|
+
OrHash =
|
|
163
|
+
T.type_alias do
|
|
164
|
+
T.any(
|
|
165
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata,
|
|
166
|
+
ContextDev::Internal::AnyHash
|
|
167
|
+
)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Depth relative to the start URL. 0 = start URL, 1 = one link away.
|
|
171
|
+
sig { returns(Integer) }
|
|
172
|
+
attr_accessor :crawl_depth
|
|
173
|
+
|
|
174
|
+
# HTTP status code of the response
|
|
175
|
+
sig { returns(Integer) }
|
|
176
|
+
attr_accessor :status_code
|
|
177
|
+
|
|
178
|
+
# true if the page was fetched and parsed successfully
|
|
179
|
+
sig { returns(T::Boolean) }
|
|
180
|
+
attr_accessor :success
|
|
181
|
+
|
|
182
|
+
# The page's <title> content (empty string if unavailable)
|
|
183
|
+
sig { returns(String) }
|
|
184
|
+
attr_accessor :title
|
|
185
|
+
|
|
186
|
+
# The URL that was fetched
|
|
187
|
+
sig { returns(String) }
|
|
188
|
+
attr_accessor :url
|
|
189
|
+
|
|
190
|
+
sig do
|
|
191
|
+
params(
|
|
192
|
+
crawl_depth: Integer,
|
|
193
|
+
status_code: Integer,
|
|
194
|
+
success: T::Boolean,
|
|
195
|
+
title: String,
|
|
196
|
+
url: String
|
|
197
|
+
).returns(T.attached_class)
|
|
198
|
+
end
|
|
199
|
+
def self.new(
|
|
200
|
+
# Depth relative to the start URL. 0 = start URL, 1 = one link away.
|
|
201
|
+
crawl_depth:,
|
|
202
|
+
# HTTP status code of the response
|
|
203
|
+
status_code:,
|
|
204
|
+
# true if the page was fetched and parsed successfully
|
|
205
|
+
success:,
|
|
206
|
+
# The page's <title> content (empty string if unavailable)
|
|
207
|
+
title:,
|
|
208
|
+
# The URL that was fetched
|
|
209
|
+
url:
|
|
210
|
+
)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
sig do
|
|
214
|
+
override.returns(
|
|
215
|
+
{
|
|
216
|
+
crawl_depth: Integer,
|
|
217
|
+
status_code: Integer,
|
|
218
|
+
success: T::Boolean,
|
|
219
|
+
title: String,
|
|
220
|
+
url: String
|
|
221
|
+
}
|
|
222
|
+
)
|
|
223
|
+
end
|
|
224
|
+
def to_hash
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
data/rbi/context_dev/models.rbi
CHANGED
|
@@ -37,6 +37,8 @@ module ContextDev
|
|
|
37
37
|
|
|
38
38
|
WebScreenshotParams = ContextDev::Models::WebScreenshotParams
|
|
39
39
|
|
|
40
|
+
WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
|
|
41
|
+
|
|
40
42
|
WebWebScrapeHTMLParams = ContextDev::Models::WebWebScrapeHTMLParams
|
|
41
43
|
|
|
42
44
|
WebWebScrapeImagesParams = ContextDev::Models::WebWebScrapeImagesParams
|
|
@@ -19,8 +19,7 @@ module ContextDev
|
|
|
19
19
|
# Domain name to retrieve brand data for (e.g., 'example.com', 'google.com').
|
|
20
20
|
# Cannot be used with name or ticker parameters.
|
|
21
21
|
domain:,
|
|
22
|
-
# Optional parameter to force the language of the retrieved brand data.
|
|
23
|
-
# all three lookup methods.
|
|
22
|
+
# Optional parameter to force the language of the retrieved brand data.
|
|
24
23
|
force_language: nil,
|
|
25
24
|
# Optional parameter to optimize the API call for maximum speed. When set to true,
|
|
26
25
|
# the API will skip time-consuming operations for faster response at the cost of
|
|
@@ -5,10 +5,12 @@ module ContextDev
|
|
|
5
5
|
class Web
|
|
6
6
|
# Capture a screenshot of a website. Supports both viewport (standard browser
|
|
7
7
|
# view) and full-page screenshots. Can also screenshot specific page types (login,
|
|
8
|
-
# pricing, etc.) by using heuristics to find the appropriate URL.
|
|
9
|
-
#
|
|
8
|
+
# pricing, etc.) by using heuristics to find the appropriate URL. Either 'domain'
|
|
9
|
+
# or 'directUrl' must be provided as a query parameter, but not both. Returns a
|
|
10
|
+
# URL to the uploaded screenshot image hosted on our CDN.
|
|
10
11
|
sig do
|
|
11
12
|
params(
|
|
13
|
+
direct_url: String,
|
|
12
14
|
domain: String,
|
|
13
15
|
full_screenshot:
|
|
14
16
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
@@ -18,9 +20,13 @@ module ContextDev
|
|
|
18
20
|
).returns(ContextDev::Models::WebScreenshotResponse)
|
|
19
21
|
end
|
|
20
22
|
def screenshot(
|
|
23
|
+
# A specific URL to screenshot directly, bypassing domain resolution (e.g.,
|
|
24
|
+
# 'https://example.com/pricing'). When provided, the screenshot is taken of this
|
|
25
|
+
# exact URL.
|
|
26
|
+
direct_url: nil,
|
|
21
27
|
# Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
|
|
22
28
|
# domain will be automatically normalized and validated.
|
|
23
|
-
domain
|
|
29
|
+
domain: nil,
|
|
24
30
|
# Optional parameter to determine screenshot type. If 'true', takes a full page
|
|
25
31
|
# screenshot capturing all content. If 'false' or not provided, takes a viewport
|
|
26
32
|
# screenshot (standard browser view).
|
|
@@ -28,7 +34,8 @@ module ContextDev
|
|
|
28
34
|
# Optional parameter to specify which page type to screenshot. If provided, the
|
|
29
35
|
# system will scrape the domain's links and use heuristics to find the most
|
|
30
36
|
# appropriate URL for the specified page type (30 supported languages). If not
|
|
31
|
-
# provided, screenshots the main domain landing page.
|
|
37
|
+
# provided, screenshots the main domain landing page. Only applicable when using
|
|
38
|
+
# 'domain', not 'directUrl'.
|
|
32
39
|
page: nil,
|
|
33
40
|
# Optional parameter to prioritize screenshot capture. If 'speed', optimizes for
|
|
34
41
|
# faster capture with basic quality. If 'quality', optimizes for higher quality
|
|
@@ -38,6 +45,49 @@ module ContextDev
|
|
|
38
45
|
)
|
|
39
46
|
end
|
|
40
47
|
|
|
48
|
+
# Performs a crawl starting from a given URL, extracts page content as Markdown,
|
|
49
|
+
# and returns results for all crawled pages. Only follows links within the same
|
|
50
|
+
# domain as the starting URL. Costs 1 credit per successful page crawled.
|
|
51
|
+
sig do
|
|
52
|
+
params(
|
|
53
|
+
url: String,
|
|
54
|
+
follow_subdomains: T::Boolean,
|
|
55
|
+
include_images: T::Boolean,
|
|
56
|
+
include_links: T::Boolean,
|
|
57
|
+
max_depth: Integer,
|
|
58
|
+
max_pages: Integer,
|
|
59
|
+
shorten_base64_images: T::Boolean,
|
|
60
|
+
url_regex: String,
|
|
61
|
+
use_main_content_only: T::Boolean,
|
|
62
|
+
request_options: ContextDev::RequestOptions::OrHash
|
|
63
|
+
).returns(ContextDev::Models::WebWebCrawlMdResponse)
|
|
64
|
+
end
|
|
65
|
+
def web_crawl_md(
|
|
66
|
+
# The starting URL for the crawl (must include http:// or https:// protocol)
|
|
67
|
+
url:,
|
|
68
|
+
# When true, follow links on subdomains of the starting URL's domain (e.g.
|
|
69
|
+
# docs.example.com when starting from example.com). www and apex are always
|
|
70
|
+
# treated as equivalent.
|
|
71
|
+
follow_subdomains: nil,
|
|
72
|
+
# Include image references in the Markdown output
|
|
73
|
+
include_images: nil,
|
|
74
|
+
# Preserve hyperlinks in the Markdown output
|
|
75
|
+
include_links: nil,
|
|
76
|
+
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
77
|
+
max_depth: nil,
|
|
78
|
+
# Maximum number of pages to crawl. Hard cap: 500.
|
|
79
|
+
max_pages: nil,
|
|
80
|
+
# Truncate base64-encoded image data in the Markdown output
|
|
81
|
+
shorten_base64_images: nil,
|
|
82
|
+
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
83
|
+
url_regex: nil,
|
|
84
|
+
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
85
|
+
# navigation
|
|
86
|
+
use_main_content_only: nil,
|
|
87
|
+
request_options: {}
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
|
|
41
91
|
# Scrapes the given URL and returns the raw HTML content of the page.
|
|
42
92
|
sig do
|
|
43
93
|
params(
|