context.dev 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/README.md +1 -1
- data/lib/context_dev/client.rb +0 -4
- data/lib/context_dev/internal/type/base_model.rb +3 -3
- data/lib/context_dev/models/brand_identify_from_transaction_params.rb +0 -1
- data/lib/context_dev/models/brand_retrieve_by_name_params.rb +5 -5
- data/lib/context_dev/models/industry_retrieve_naics_params.rb +4 -4
- data/lib/context_dev/models/{style_extract_fonts_params.rb → web_extract_fonts_params.rb} +18 -7
- data/lib/context_dev/models/web_extract_fonts_response.rb +168 -0
- data/lib/context_dev/models/{style_extract_styleguide_params.rb → web_extract_styleguide_params.rb} +8 -5
- data/lib/context_dev/models/{style_extract_styleguide_response.rb → web_extract_styleguide_response.rb} +147 -80
- data/lib/context_dev/models/web_screenshot_params.rb +3 -2
- data/lib/context_dev/models/web_web_crawl_md_params.rb +11 -1
- data/lib/context_dev/models/web_web_scrape_html_params.rb +14 -1
- data/lib/context_dev/models/web_web_scrape_md_params.rb +13 -3
- data/lib/context_dev/models/web_web_scrape_sitemap_params.rb +12 -4
- data/lib/context_dev/models.rb +4 -4
- data/lib/context_dev/resources/brand.rb +8 -12
- data/lib/context_dev/resources/industry.rb +3 -3
- data/lib/context_dev/resources/web.rb +87 -21
- data/lib/context_dev/version.rb +1 -1
- data/lib/context_dev.rb +4 -5
- data/rbi/context_dev/client.rbi +0 -3
- data/rbi/context_dev/models/brand_identify_from_transaction_params.rbi +0 -2
- data/rbi/context_dev/models/brand_retrieve_by_name_params.rbi +6 -6
- data/rbi/context_dev/models/industry_retrieve_naics_params.rbi +6 -6
- data/rbi/context_dev/models/{style_extract_fonts_params.rbi → web_extract_fonts_params.rbi} +27 -7
- data/rbi/context_dev/models/web_extract_fonts_response.rbi +301 -0
- data/rbi/context_dev/models/{style_extract_styleguide_params.rbi → web_extract_styleguide_params.rbi} +12 -6
- data/rbi/context_dev/models/{style_extract_styleguide_response.rbi → web_extract_styleguide_response.rbi} +232 -94
- data/rbi/context_dev/models/web_screenshot_params.rbi +6 -4
- data/rbi/context_dev/models/web_web_crawl_md_params.rbi +15 -0
- data/rbi/context_dev/models/web_web_scrape_html_params.rbi +19 -1
- data/rbi/context_dev/models/web_web_scrape_md_params.rbi +17 -2
- data/rbi/context_dev/models/web_web_scrape_sitemap_params.rbi +15 -4
- data/rbi/context_dev/models.rbi +4 -5
- data/rbi/context_dev/resources/brand.rbi +9 -14
- data/rbi/context_dev/resources/industry.rbi +4 -4
- data/rbi/context_dev/resources/web.rbi +83 -17
- data/sig/context_dev/client.rbs +0 -2
- data/sig/context_dev/models/{style_extract_styleguide_params.rbs → web_extract_fonts_params.rbs} +2 -2
- data/sig/context_dev/models/web_extract_fonts_response.rbs +138 -0
- data/sig/context_dev/models/{style_extract_fonts_params.rbs → web_extract_styleguide_params.rbs} +13 -5
- data/sig/context_dev/models/{style_extract_styleguide_response.rbs → web_extract_styleguide_response.rbs} +147 -95
- data/sig/context_dev/models/web_web_crawl_md_params.rbs +7 -0
- data/sig/context_dev/models/web_web_scrape_html_params.rbs +8 -1
- data/sig/context_dev/models/web_web_scrape_md_params.rbs +7 -0
- data/sig/context_dev/models/web_web_scrape_sitemap_params.rbs +7 -1
- data/sig/context_dev/models.rbs +4 -4
- data/sig/context_dev/resources/web.rbs +18 -0
- metadata +14 -17
- data/lib/context_dev/models/style_extract_fonts_response.rb +0 -101
- data/lib/context_dev/resources/style.rb +0 -76
- data/rbi/context_dev/models/style_extract_fonts_response.rbi +0 -153
- data/rbi/context_dev/resources/style.rbi +0 -60
- data/sig/context_dev/models/style_extract_fonts_response.rbs +0 -82
- data/sig/context_dev/resources/style.rbs +0 -20
|
@@ -13,7 +13,7 @@ module ContextDev
|
|
|
13
13
|
|
|
14
14
|
# A specific URL to screenshot directly, bypassing domain resolution (e.g.,
|
|
15
15
|
# 'https://example.com/pricing'). When provided, the screenshot is taken of this
|
|
16
|
-
# exact URL.
|
|
16
|
+
# exact URL. You must provide either 'domain' or 'directUrl', but not both.
|
|
17
17
|
sig { returns(T.nilable(String)) }
|
|
18
18
|
attr_reader :direct_url
|
|
19
19
|
|
|
@@ -21,7 +21,8 @@ module ContextDev
|
|
|
21
21
|
attr_writer :direct_url
|
|
22
22
|
|
|
23
23
|
# Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
|
|
24
|
-
# domain will be automatically normalized and validated.
|
|
24
|
+
# domain will be automatically normalized and validated. You must provide either
|
|
25
|
+
# 'domain' or 'directUrl', but not both.
|
|
25
26
|
sig { returns(T.nilable(String)) }
|
|
26
27
|
attr_reader :domain
|
|
27
28
|
|
|
@@ -90,10 +91,11 @@ module ContextDev
|
|
|
90
91
|
def self.new(
|
|
91
92
|
# A specific URL to screenshot directly, bypassing domain resolution (e.g.,
|
|
92
93
|
# 'https://example.com/pricing'). When provided, the screenshot is taken of this
|
|
93
|
-
# exact URL.
|
|
94
|
+
# exact URL. You must provide either 'domain' or 'directUrl', but not both.
|
|
94
95
|
direct_url: nil,
|
|
95
96
|
# Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
|
|
96
|
-
# domain will be automatically normalized and validated.
|
|
97
|
+
# domain will be automatically normalized and validated. You must provide either
|
|
98
|
+
# 'domain' or 'directUrl', but not both.
|
|
97
99
|
domain: nil,
|
|
98
100
|
# Optional parameter to determine screenshot type. If 'true', takes a full page
|
|
99
101
|
# screenshot capturing all content. If 'false' or not provided, takes a viewport
|
|
@@ -38,6 +38,15 @@ module ContextDev
|
|
|
38
38
|
sig { params(include_links: T::Boolean).void }
|
|
39
39
|
attr_writer :include_links
|
|
40
40
|
|
|
41
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
42
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
43
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
44
|
+
sig { returns(T.nilable(Integer)) }
|
|
45
|
+
attr_reader :max_age_ms
|
|
46
|
+
|
|
47
|
+
sig { params(max_age_ms: Integer).void }
|
|
48
|
+
attr_writer :max_age_ms
|
|
49
|
+
|
|
41
50
|
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
42
51
|
sig { returns(T.nilable(Integer)) }
|
|
43
52
|
attr_reader :max_depth
|
|
@@ -80,6 +89,7 @@ module ContextDev
|
|
|
80
89
|
follow_subdomains: T::Boolean,
|
|
81
90
|
include_images: T::Boolean,
|
|
82
91
|
include_links: T::Boolean,
|
|
92
|
+
max_age_ms: Integer,
|
|
83
93
|
max_depth: Integer,
|
|
84
94
|
max_pages: Integer,
|
|
85
95
|
shorten_base64_images: T::Boolean,
|
|
@@ -99,6 +109,10 @@ module ContextDev
|
|
|
99
109
|
include_images: nil,
|
|
100
110
|
# Preserve hyperlinks in the Markdown output
|
|
101
111
|
include_links: nil,
|
|
112
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
113
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
114
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
115
|
+
max_age_ms: nil,
|
|
102
116
|
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
103
117
|
max_depth: nil,
|
|
104
118
|
# Maximum number of pages to crawl. Hard cap: 500.
|
|
@@ -121,6 +135,7 @@ module ContextDev
|
|
|
121
135
|
follow_subdomains: T::Boolean,
|
|
122
136
|
include_images: T::Boolean,
|
|
123
137
|
include_links: T::Boolean,
|
|
138
|
+
max_age_ms: Integer,
|
|
124
139
|
max_depth: Integer,
|
|
125
140
|
max_pages: Integer,
|
|
126
141
|
shorten_base64_images: T::Boolean,
|
|
@@ -18,22 +18,40 @@ module ContextDev
|
|
|
18
18
|
sig { returns(String) }
|
|
19
19
|
attr_accessor :url
|
|
20
20
|
|
|
21
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
22
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
23
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
24
|
+
sig { returns(T.nilable(Integer)) }
|
|
25
|
+
attr_reader :max_age_ms
|
|
26
|
+
|
|
27
|
+
sig { params(max_age_ms: Integer).void }
|
|
28
|
+
attr_writer :max_age_ms
|
|
29
|
+
|
|
21
30
|
sig do
|
|
22
31
|
params(
|
|
23
32
|
url: String,
|
|
33
|
+
max_age_ms: Integer,
|
|
24
34
|
request_options: ContextDev::RequestOptions::OrHash
|
|
25
35
|
).returns(T.attached_class)
|
|
26
36
|
end
|
|
27
37
|
def self.new(
|
|
28
38
|
# Full URL to scrape (must include http:// or https:// protocol)
|
|
29
39
|
url:,
|
|
40
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
41
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
42
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
43
|
+
max_age_ms: nil,
|
|
30
44
|
request_options: {}
|
|
31
45
|
)
|
|
32
46
|
end
|
|
33
47
|
|
|
34
48
|
sig do
|
|
35
49
|
override.returns(
|
|
36
|
-
{
|
|
50
|
+
{
|
|
51
|
+
url: String,
|
|
52
|
+
max_age_ms: Integer,
|
|
53
|
+
request_options: ContextDev::RequestOptions
|
|
54
|
+
}
|
|
37
55
|
)
|
|
38
56
|
end
|
|
39
57
|
def to_hash
|
|
@@ -11,7 +11,7 @@ module ContextDev
|
|
|
11
11
|
T.any(ContextDev::WebWebScrapeMdParams, ContextDev::Internal::AnyHash)
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
# Full URL to scrape
|
|
14
|
+
# Full URL to scrape into LLM usable Markdown (must include http:// or https://
|
|
15
15
|
# protocol)
|
|
16
16
|
sig { returns(String) }
|
|
17
17
|
attr_accessor :url
|
|
@@ -30,6 +30,15 @@ module ContextDev
|
|
|
30
30
|
sig { params(include_links: T::Boolean).void }
|
|
31
31
|
attr_writer :include_links
|
|
32
32
|
|
|
33
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
34
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
35
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
36
|
+
sig { returns(T.nilable(Integer)) }
|
|
37
|
+
attr_reader :max_age_ms
|
|
38
|
+
|
|
39
|
+
sig { params(max_age_ms: Integer).void }
|
|
40
|
+
attr_writer :max_age_ms
|
|
41
|
+
|
|
33
42
|
# Shorten base64-encoded image data in the Markdown output
|
|
34
43
|
sig { returns(T.nilable(T::Boolean)) }
|
|
35
44
|
attr_reader :shorten_base64_images
|
|
@@ -50,19 +59,24 @@ module ContextDev
|
|
|
50
59
|
url: String,
|
|
51
60
|
include_images: T::Boolean,
|
|
52
61
|
include_links: T::Boolean,
|
|
62
|
+
max_age_ms: Integer,
|
|
53
63
|
shorten_base64_images: T::Boolean,
|
|
54
64
|
use_main_content_only: T::Boolean,
|
|
55
65
|
request_options: ContextDev::RequestOptions::OrHash
|
|
56
66
|
).returns(T.attached_class)
|
|
57
67
|
end
|
|
58
68
|
def self.new(
|
|
59
|
-
# Full URL to scrape
|
|
69
|
+
# Full URL to scrape into LLM usable Markdown (must include http:// or https://
|
|
60
70
|
# protocol)
|
|
61
71
|
url:,
|
|
62
72
|
# Include image references in Markdown output
|
|
63
73
|
include_images: nil,
|
|
64
74
|
# Preserve hyperlinks in Markdown output
|
|
65
75
|
include_links: nil,
|
|
76
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
77
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
78
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
79
|
+
max_age_ms: nil,
|
|
66
80
|
# Shorten base64-encoded image data in the Markdown output
|
|
67
81
|
shorten_base64_images: nil,
|
|
68
82
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
@@ -78,6 +92,7 @@ module ContextDev
|
|
|
78
92
|
url: String,
|
|
79
93
|
include_images: T::Boolean,
|
|
80
94
|
include_links: T::Boolean,
|
|
95
|
+
max_age_ms: Integer,
|
|
81
96
|
shorten_base64_images: T::Boolean,
|
|
82
97
|
use_main_content_only: T::Boolean,
|
|
83
98
|
request_options: ContextDev::RequestOptions
|
|
@@ -14,8 +14,7 @@ module ContextDev
|
|
|
14
14
|
)
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
# Domain
|
|
18
|
-
# automatically normalized and validated.
|
|
17
|
+
# Domain to build a sitemap for
|
|
19
18
|
sig { returns(String) }
|
|
20
19
|
attr_accessor :domain
|
|
21
20
|
|
|
@@ -27,20 +26,31 @@ module ContextDev
|
|
|
27
26
|
sig { params(max_links: Integer).void }
|
|
28
27
|
attr_writer :max_links
|
|
29
28
|
|
|
29
|
+
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
30
|
+
# returned and counted against maxLinks.
|
|
31
|
+
sig { returns(T.nilable(String)) }
|
|
32
|
+
attr_reader :url_regex
|
|
33
|
+
|
|
34
|
+
sig { params(url_regex: String).void }
|
|
35
|
+
attr_writer :url_regex
|
|
36
|
+
|
|
30
37
|
sig do
|
|
31
38
|
params(
|
|
32
39
|
domain: String,
|
|
33
40
|
max_links: Integer,
|
|
41
|
+
url_regex: String,
|
|
34
42
|
request_options: ContextDev::RequestOptions::OrHash
|
|
35
43
|
).returns(T.attached_class)
|
|
36
44
|
end
|
|
37
45
|
def self.new(
|
|
38
|
-
# Domain
|
|
39
|
-
# automatically normalized and validated.
|
|
46
|
+
# Domain to build a sitemap for
|
|
40
47
|
domain:,
|
|
41
48
|
# Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
|
|
42
49
|
# Minimum is 1, maximum is 100,000.
|
|
43
50
|
max_links: nil,
|
|
51
|
+
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
52
|
+
# returned and counted against maxLinks.
|
|
53
|
+
url_regex: nil,
|
|
44
54
|
request_options: {}
|
|
45
55
|
)
|
|
46
56
|
end
|
|
@@ -50,6 +60,7 @@ module ContextDev
|
|
|
50
60
|
{
|
|
51
61
|
domain: String,
|
|
52
62
|
max_links: Integer,
|
|
63
|
+
url_regex: String,
|
|
53
64
|
request_options: ContextDev::RequestOptions
|
|
54
65
|
}
|
|
55
66
|
)
|
data/rbi/context_dev/models.rbi
CHANGED
|
@@ -25,16 +25,15 @@ module ContextDev
|
|
|
25
25
|
|
|
26
26
|
IndustryRetrieveNaicsParams = ContextDev::Models::IndustryRetrieveNaicsParams
|
|
27
27
|
|
|
28
|
-
StyleExtractFontsParams = ContextDev::Models::StyleExtractFontsParams
|
|
29
|
-
|
|
30
|
-
StyleExtractStyleguideParams =
|
|
31
|
-
ContextDev::Models::StyleExtractStyleguideParams
|
|
32
|
-
|
|
33
28
|
UtilityPrefetchByEmailParams =
|
|
34
29
|
ContextDev::Models::UtilityPrefetchByEmailParams
|
|
35
30
|
|
|
36
31
|
UtilityPrefetchParams = ContextDev::Models::UtilityPrefetchParams
|
|
37
32
|
|
|
33
|
+
WebExtractFontsParams = ContextDev::Models::WebExtractFontsParams
|
|
34
|
+
|
|
35
|
+
WebExtractStyleguideParams = ContextDev::Models::WebExtractStyleguideParams
|
|
36
|
+
|
|
38
37
|
WebScreenshotParams = ContextDev::Models::WebScreenshotParams
|
|
39
38
|
|
|
40
39
|
WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
|
|
@@ -63,7 +63,6 @@ module ContextDev
|
|
|
63
63
|
force_language: nil,
|
|
64
64
|
# When set to true, the API will perform an additional verification steps to
|
|
65
65
|
# ensure the identified brand matches the transaction with high confidence.
|
|
66
|
-
# Defaults to false.
|
|
67
66
|
high_confidence_only: nil,
|
|
68
67
|
# Optional parameter to optimize the API call for maximum speed. When set to true,
|
|
69
68
|
# the API will skip time-consuming operations for faster response at the cost of
|
|
@@ -83,9 +82,8 @@ module ContextDev
|
|
|
83
82
|
end
|
|
84
83
|
|
|
85
84
|
# Retrieve brand information using an email address while detecting disposable and
|
|
86
|
-
# free email addresses.
|
|
87
|
-
#
|
|
88
|
-
# (like gmail.com, yahoo.com) will throw a 422 error.
|
|
85
|
+
# free email addresses. Disposable and free email addresses (like gmail.com,
|
|
86
|
+
# yahoo.com) will throw a 422 error.
|
|
89
87
|
sig do
|
|
90
88
|
params(
|
|
91
89
|
email: String,
|
|
@@ -116,8 +114,7 @@ module ContextDev
|
|
|
116
114
|
end
|
|
117
115
|
|
|
118
116
|
# Retrieve brand information using an ISIN (International Securities
|
|
119
|
-
# Identification Number).
|
|
120
|
-
# ISIN and returns its brand data.
|
|
117
|
+
# Identification Number).
|
|
121
118
|
sig do
|
|
122
119
|
params(
|
|
123
120
|
isin: String,
|
|
@@ -147,8 +144,7 @@ module ContextDev
|
|
|
147
144
|
)
|
|
148
145
|
end
|
|
149
146
|
|
|
150
|
-
# Retrieve brand information using a company name.
|
|
151
|
-
# company by name and returns its brand data.
|
|
147
|
+
# Retrieve brand information using a company name.
|
|
152
148
|
sig do
|
|
153
149
|
params(
|
|
154
150
|
name: String,
|
|
@@ -165,8 +161,8 @@ module ContextDev
|
|
|
165
161
|
# Company name to retrieve brand data for (e.g., 'Apple Inc', 'Microsoft
|
|
166
162
|
# Corporation'). Must be 3-30 characters.
|
|
167
163
|
name:,
|
|
168
|
-
# Optional country code (GL parameter) to specify the country
|
|
169
|
-
#
|
|
164
|
+
# Optional country code hint (GL parameter) to specify the country for the company
|
|
165
|
+
# name.
|
|
170
166
|
country_gl: nil,
|
|
171
167
|
# Optional parameter to force the language of the retrieved brand data.
|
|
172
168
|
force_language: nil,
|
|
@@ -182,8 +178,7 @@ module ContextDev
|
|
|
182
178
|
)
|
|
183
179
|
end
|
|
184
180
|
|
|
185
|
-
# Retrieve brand information using a stock ticker symbol.
|
|
186
|
-
# the company associated with the ticker and returns its brand data.
|
|
181
|
+
# Retrieve brand information using a stock ticker symbol.
|
|
187
182
|
sig do
|
|
188
183
|
params(
|
|
189
184
|
ticker: String,
|
|
@@ -217,8 +212,8 @@ module ContextDev
|
|
|
217
212
|
end
|
|
218
213
|
|
|
219
214
|
# Returns a simplified version of brand data containing only essential
|
|
220
|
-
# information: domain, title, colors, logos, and backdrops.
|
|
221
|
-
#
|
|
215
|
+
# information: domain, title, colors, logos, and backdrops. Optimized for faster
|
|
216
|
+
# responses and reduced data transfer.
|
|
222
217
|
sig do
|
|
223
218
|
params(
|
|
224
219
|
domain: String,
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module ContextDev
|
|
4
4
|
module Resources
|
|
5
5
|
class Industry
|
|
6
|
-
#
|
|
6
|
+
# Classify any brand into 2022 NAICS industry codes from its domain or name.
|
|
7
7
|
sig do
|
|
8
8
|
params(
|
|
9
9
|
input: String,
|
|
@@ -14,9 +14,9 @@ module ContextDev
|
|
|
14
14
|
).returns(ContextDev::Models::IndustryRetrieveNaicsResponse)
|
|
15
15
|
end
|
|
16
16
|
def retrieve_naics(
|
|
17
|
-
# Brand domain or title to retrieve NAICS code for. If a valid domain is provided
|
|
18
|
-
#
|
|
19
|
-
#
|
|
17
|
+
# Brand domain or title to retrieve NAICS code for. If a valid domain is provided,
|
|
18
|
+
# it will be used for classification, otherwise, we will search for the brand
|
|
19
|
+
# using the provided title.
|
|
20
20
|
input:,
|
|
21
21
|
# Maximum number of NAICS codes to return. Must be between 1 and 10. Defaults
|
|
22
22
|
# to 5.
|
|
@@ -3,11 +3,62 @@
|
|
|
3
3
|
module ContextDev
|
|
4
4
|
module Resources
|
|
5
5
|
class Web
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
6
|
+
# Scrape font information from a website including font families, usage
|
|
7
|
+
# statistics, fallbacks, and element/word counts.
|
|
8
|
+
sig do
|
|
9
|
+
params(
|
|
10
|
+
direct_url: String,
|
|
11
|
+
domain: String,
|
|
12
|
+
timeout_ms: Integer,
|
|
13
|
+
request_options: ContextDev::RequestOptions::OrHash
|
|
14
|
+
).returns(ContextDev::Models::WebExtractFontsResponse)
|
|
15
|
+
end
|
|
16
|
+
def extract_fonts(
|
|
17
|
+
# A specific URL to fetch fonts from directly, bypassing domain resolution (e.g.,
|
|
18
|
+
# 'https://example.com/design-system'). When provided, fonts are extracted from
|
|
19
|
+
# this exact URL. You must provide either 'domain' or 'directUrl', but not both.
|
|
20
|
+
direct_url: nil,
|
|
21
|
+
# Domain name to extract fonts from (e.g., 'example.com', 'google.com'). The
|
|
22
|
+
# domain will be automatically normalized and validated. You must provide either
|
|
23
|
+
# 'domain' or 'directUrl', but not both.
|
|
24
|
+
domain: nil,
|
|
25
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
26
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
27
|
+
# value is 300000ms (5 minutes).
|
|
28
|
+
timeout_ms: nil,
|
|
29
|
+
request_options: {}
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Extract a comprehensive design system from a website including colors,
|
|
34
|
+
# typography, spacing, shadows, and UI components.
|
|
35
|
+
sig do
|
|
36
|
+
params(
|
|
37
|
+
direct_url: String,
|
|
38
|
+
domain: String,
|
|
39
|
+
timeout_ms: Integer,
|
|
40
|
+
request_options: ContextDev::RequestOptions::OrHash
|
|
41
|
+
).returns(ContextDev::Models::WebExtractStyleguideResponse)
|
|
42
|
+
end
|
|
43
|
+
def extract_styleguide(
|
|
44
|
+
# A specific URL to fetch the styleguide from directly, bypassing domain
|
|
45
|
+
# resolution (e.g., 'https://example.com/design-system'). When provided, the
|
|
46
|
+
# styleguide is extracted from this exact URL. You must provide either 'domain' or
|
|
47
|
+
# 'directUrl', but not both.
|
|
48
|
+
direct_url: nil,
|
|
49
|
+
# Domain name to extract styleguide from (e.g., 'example.com', 'google.com'). The
|
|
50
|
+
# domain will be automatically normalized and validated. You must provide either
|
|
51
|
+
# 'domain' or 'directUrl', but not both.
|
|
52
|
+
domain: nil,
|
|
53
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
54
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
55
|
+
# value is 300000ms (5 minutes).
|
|
56
|
+
timeout_ms: nil,
|
|
57
|
+
request_options: {}
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Capture a screenshot of a website.
|
|
11
62
|
sig do
|
|
12
63
|
params(
|
|
13
64
|
direct_url: String,
|
|
@@ -22,10 +73,11 @@ module ContextDev
|
|
|
22
73
|
def screenshot(
|
|
23
74
|
# A specific URL to screenshot directly, bypassing domain resolution (e.g.,
|
|
24
75
|
# 'https://example.com/pricing'). When provided, the screenshot is taken of this
|
|
25
|
-
# exact URL.
|
|
76
|
+
# exact URL. You must provide either 'domain' or 'directUrl', but not both.
|
|
26
77
|
direct_url: nil,
|
|
27
78
|
# Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
|
|
28
|
-
# domain will be automatically normalized and validated.
|
|
79
|
+
# domain will be automatically normalized and validated. You must provide either
|
|
80
|
+
# 'domain' or 'directUrl', but not both.
|
|
29
81
|
domain: nil,
|
|
30
82
|
# Optional parameter to determine screenshot type. If 'true', takes a full page
|
|
31
83
|
# screenshot capturing all content. If 'false' or not provided, takes a viewport
|
|
@@ -46,14 +98,14 @@ module ContextDev
|
|
|
46
98
|
end
|
|
47
99
|
|
|
48
100
|
# Performs a crawl starting from a given URL, extracts page content as Markdown,
|
|
49
|
-
# and returns results for all crawled pages.
|
|
50
|
-
# domain as the starting URL. Costs 1 credit per successful page crawled.
|
|
101
|
+
# and returns results for all crawled pages.
|
|
51
102
|
sig do
|
|
52
103
|
params(
|
|
53
104
|
url: String,
|
|
54
105
|
follow_subdomains: T::Boolean,
|
|
55
106
|
include_images: T::Boolean,
|
|
56
107
|
include_links: T::Boolean,
|
|
108
|
+
max_age_ms: Integer,
|
|
57
109
|
max_depth: Integer,
|
|
58
110
|
max_pages: Integer,
|
|
59
111
|
shorten_base64_images: T::Boolean,
|
|
@@ -73,6 +125,10 @@ module ContextDev
|
|
|
73
125
|
include_images: nil,
|
|
74
126
|
# Preserve hyperlinks in the Markdown output
|
|
75
127
|
include_links: nil,
|
|
128
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
129
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
130
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
131
|
+
max_age_ms: nil,
|
|
76
132
|
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
77
133
|
max_depth: nil,
|
|
78
134
|
# Maximum number of pages to crawl. Hard cap: 500.
|
|
@@ -92,12 +148,17 @@ module ContextDev
|
|
|
92
148
|
sig do
|
|
93
149
|
params(
|
|
94
150
|
url: String,
|
|
151
|
+
max_age_ms: Integer,
|
|
95
152
|
request_options: ContextDev::RequestOptions::OrHash
|
|
96
153
|
).returns(ContextDev::Models::WebWebScrapeHTMLResponse)
|
|
97
154
|
end
|
|
98
155
|
def web_scrape_html(
|
|
99
156
|
# Full URL to scrape (must include http:// or https:// protocol)
|
|
100
157
|
url:,
|
|
158
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
159
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
160
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
161
|
+
max_age_ms: nil,
|
|
101
162
|
request_options: {}
|
|
102
163
|
)
|
|
103
164
|
end
|
|
@@ -118,26 +179,30 @@ module ContextDev
|
|
|
118
179
|
)
|
|
119
180
|
end
|
|
120
181
|
|
|
121
|
-
# Scrapes the given URL
|
|
122
|
-
# result.
|
|
182
|
+
# Scrapes the given URL into LLM usable Markdown.
|
|
123
183
|
sig do
|
|
124
184
|
params(
|
|
125
185
|
url: String,
|
|
126
186
|
include_images: T::Boolean,
|
|
127
187
|
include_links: T::Boolean,
|
|
188
|
+
max_age_ms: Integer,
|
|
128
189
|
shorten_base64_images: T::Boolean,
|
|
129
190
|
use_main_content_only: T::Boolean,
|
|
130
191
|
request_options: ContextDev::RequestOptions::OrHash
|
|
131
192
|
).returns(ContextDev::Models::WebWebScrapeMdResponse)
|
|
132
193
|
end
|
|
133
194
|
def web_scrape_md(
|
|
134
|
-
# Full URL to scrape
|
|
195
|
+
# Full URL to scrape into LLM usable Markdown (must include http:// or https://
|
|
135
196
|
# protocol)
|
|
136
197
|
url:,
|
|
137
198
|
# Include image references in Markdown output
|
|
138
199
|
include_images: nil,
|
|
139
200
|
# Preserve hyperlinks in Markdown output
|
|
140
201
|
include_links: nil,
|
|
202
|
+
# Return a cached result if a prior scrape for the same parameters exists and is
|
|
203
|
+
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
204
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
205
|
+
max_age_ms: nil,
|
|
141
206
|
# Shorten base64-encoded image data in the Markdown output
|
|
142
207
|
shorten_base64_images: nil,
|
|
143
208
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
@@ -147,23 +212,24 @@ module ContextDev
|
|
|
147
212
|
)
|
|
148
213
|
end
|
|
149
214
|
|
|
150
|
-
#
|
|
151
|
-
# Supports sitemap index files (recursive), parallel fetching with concurrency
|
|
152
|
-
# control, deduplication, and filters out non-page resources (images, PDFs, etc.).
|
|
215
|
+
# Crawl an entire website's sitemap and return all discovered page URLs.
|
|
153
216
|
sig do
|
|
154
217
|
params(
|
|
155
218
|
domain: String,
|
|
156
219
|
max_links: Integer,
|
|
220
|
+
url_regex: String,
|
|
157
221
|
request_options: ContextDev::RequestOptions::OrHash
|
|
158
222
|
).returns(ContextDev::Models::WebWebScrapeSitemapResponse)
|
|
159
223
|
end
|
|
160
224
|
def web_scrape_sitemap(
|
|
161
|
-
# Domain
|
|
162
|
-
# automatically normalized and validated.
|
|
225
|
+
# Domain to build a sitemap for
|
|
163
226
|
domain:,
|
|
164
227
|
# Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
|
|
165
228
|
# Minimum is 1, maximum is 100,000.
|
|
166
229
|
max_links: nil,
|
|
230
|
+
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
231
|
+
# returned and counted against maxLinks.
|
|
232
|
+
url_regex: nil,
|
|
167
233
|
request_options: {}
|
|
168
234
|
)
|
|
169
235
|
end
|
data/sig/context_dev/client.rbs
CHANGED
data/sig/context_dev/models/{style_extract_styleguide_params.rbs → web_extract_fonts_params.rbs}
RENAMED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
module ContextDev
|
|
2
2
|
module Models
|
|
3
|
-
type
|
|
3
|
+
type web_extract_fonts_params =
|
|
4
4
|
{ direct_url: String, domain: String, timeout_ms: Integer }
|
|
5
5
|
& ContextDev::Internal::Type::request_parameters
|
|
6
6
|
|
|
7
|
-
class
|
|
7
|
+
class WebExtractFontsParams < ContextDev::Internal::Type::BaseModel
|
|
8
8
|
extend ContextDev::Internal::Type::RequestParameters::Converter
|
|
9
9
|
include ContextDev::Internal::Type::RequestParameters
|
|
10
10
|
|