context.dev 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -0
  3. data/README.md +1 -1
  4. data/lib/context_dev/internal/util.rb +18 -4
  5. data/lib/context_dev/models/style_extract_styleguide_params.rb +1 -26
  6. data/lib/context_dev/models/style_extract_styleguide_response.rb +495 -246
  7. data/lib/context_dev/models/web_web_crawl_md_params.rb +92 -0
  8. data/lib/context_dev/models/web_web_crawl_md_response.rb +121 -0
  9. data/lib/context_dev/models.rb +2 -0
  10. data/lib/context_dev/resources/style.rb +1 -3
  11. data/lib/context_dev/resources/web.rb +43 -0
  12. data/lib/context_dev/version.rb +1 -1
  13. data/lib/context_dev.rb +2 -0
  14. data/rbi/context_dev/internal/util.rbi +8 -0
  15. data/rbi/context_dev/models/style_extract_styleguide_params.rbi +0 -65
  16. data/rbi/context_dev/models/style_extract_styleguide_response.rbi +471 -529
  17. data/rbi/context_dev/models/web_web_crawl_md_params.rbi +137 -0
  18. data/rbi/context_dev/models/web_web_crawl_md_response.rbi +230 -0
  19. data/rbi/context_dev/models.rbi +2 -0
  20. data/rbi/context_dev/resources/style.rbi +0 -7
  21. data/rbi/context_dev/resources/web.rbi +43 -0
  22. data/sig/context_dev/internal/util.rbs +4 -0
  23. data/sig/context_dev/models/style_extract_styleguide_params.rbs +1 -25
  24. data/sig/context_dev/models/style_extract_styleguide_response.rbs +285 -337
  25. data/sig/context_dev/models/web_web_crawl_md_params.rbs +82 -0
  26. data/sig/context_dev/models/web_web_crawl_md_response.rbs +116 -0
  27. data/sig/context_dev/models.rbs +2 -0
  28. data/sig/context_dev/resources/style.rbs +0 -1
  29. data/sig/context_dev/resources/web.rbs +13 -0
  30. metadata +8 -2
@@ -0,0 +1,137 @@
1
+ # typed: strong
2
+
3
+ module ContextDev
4
+ module Models
5
+ class WebWebCrawlMdParams < ContextDev::Internal::Type::BaseModel
6
+ extend ContextDev::Internal::Type::RequestParameters::Converter
7
+ include ContextDev::Internal::Type::RequestParameters
8
+
9
+ OrHash =
10
+ T.type_alias do
11
+ T.any(ContextDev::WebWebCrawlMdParams, ContextDev::Internal::AnyHash)
12
+ end
13
+
14
+ # The starting URL for the crawl (must include http:// or https:// protocol)
15
+ sig { returns(String) }
16
+ attr_accessor :url
17
+
18
+ # When true, follow links on subdomains of the starting URL's domain (e.g.
19
+ # docs.example.com when starting from example.com). www and apex are always
20
+ # treated as equivalent.
21
+ sig { returns(T.nilable(T::Boolean)) }
22
+ attr_reader :follow_subdomains
23
+
24
+ sig { params(follow_subdomains: T::Boolean).void }
25
+ attr_writer :follow_subdomains
26
+
27
+ # Include image references in the Markdown output
28
+ sig { returns(T.nilable(T::Boolean)) }
29
+ attr_reader :include_images
30
+
31
+ sig { params(include_images: T::Boolean).void }
32
+ attr_writer :include_images
33
+
34
+ # Preserve hyperlinks in the Markdown output
35
+ sig { returns(T.nilable(T::Boolean)) }
36
+ attr_reader :include_links
37
+
38
+ sig { params(include_links: T::Boolean).void }
39
+ attr_writer :include_links
40
+
41
+ # Maximum link depth from the starting URL (0 = only the starting page)
42
+ sig { returns(T.nilable(Integer)) }
43
+ attr_reader :max_depth
44
+
45
+ sig { params(max_depth: Integer).void }
46
+ attr_writer :max_depth
47
+
48
+ # Maximum number of pages to crawl. Hard cap: 500.
49
+ sig { returns(T.nilable(Integer)) }
50
+ attr_reader :max_pages
51
+
52
+ sig { params(max_pages: Integer).void }
53
+ attr_writer :max_pages
54
+
55
+ # Truncate base64-encoded image data in the Markdown output
56
+ sig { returns(T.nilable(T::Boolean)) }
57
+ attr_reader :shorten_base64_images
58
+
59
+ sig { params(shorten_base64_images: T::Boolean).void }
60
+ attr_writer :shorten_base64_images
61
+
62
+ # Regex pattern. Only URLs matching this pattern will be followed and scraped.
63
+ sig { returns(T.nilable(String)) }
64
+ attr_reader :url_regex
65
+
66
+ sig { params(url_regex: String).void }
67
+ attr_writer :url_regex
68
+
69
+ # Extract only the main content, stripping headers, footers, sidebars, and
70
+ # navigation
71
+ sig { returns(T.nilable(T::Boolean)) }
72
+ attr_reader :use_main_content_only
73
+
74
+ sig { params(use_main_content_only: T::Boolean).void }
75
+ attr_writer :use_main_content_only
76
+
77
+ sig do
78
+ params(
79
+ url: String,
80
+ follow_subdomains: T::Boolean,
81
+ include_images: T::Boolean,
82
+ include_links: T::Boolean,
83
+ max_depth: Integer,
84
+ max_pages: Integer,
85
+ shorten_base64_images: T::Boolean,
86
+ url_regex: String,
87
+ use_main_content_only: T::Boolean,
88
+ request_options: ContextDev::RequestOptions::OrHash
89
+ ).returns(T.attached_class)
90
+ end
91
+ def self.new(
92
+ # The starting URL for the crawl (must include http:// or https:// protocol)
93
+ url:,
94
+ # When true, follow links on subdomains of the starting URL's domain (e.g.
95
+ # docs.example.com when starting from example.com). www and apex are always
96
+ # treated as equivalent.
97
+ follow_subdomains: nil,
98
+ # Include image references in the Markdown output
99
+ include_images: nil,
100
+ # Preserve hyperlinks in the Markdown output
101
+ include_links: nil,
102
+ # Maximum link depth from the starting URL (0 = only the starting page)
103
+ max_depth: nil,
104
+ # Maximum number of pages to crawl. Hard cap: 500.
105
+ max_pages: nil,
106
+ # Truncate base64-encoded image data in the Markdown output
107
+ shorten_base64_images: nil,
108
+ # Regex pattern. Only URLs matching this pattern will be followed and scraped.
109
+ url_regex: nil,
110
+ # Extract only the main content, stripping headers, footers, sidebars, and
111
+ # navigation
112
+ use_main_content_only: nil,
113
+ request_options: {}
114
+ )
115
+ end
116
+
117
+ sig do
118
+ override.returns(
119
+ {
120
+ url: String,
121
+ follow_subdomains: T::Boolean,
122
+ include_images: T::Boolean,
123
+ include_links: T::Boolean,
124
+ max_depth: Integer,
125
+ max_pages: Integer,
126
+ shorten_base64_images: T::Boolean,
127
+ url_regex: String,
128
+ use_main_content_only: T::Boolean,
129
+ request_options: ContextDev::RequestOptions
130
+ }
131
+ )
132
+ end
133
+ def to_hash
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,230 @@
1
+ # typed: strong
2
+
3
+ module ContextDev
4
+ module Models
5
+ class WebWebCrawlMdResponse < ContextDev::Internal::Type::BaseModel
6
+ OrHash =
7
+ T.type_alias do
8
+ T.any(
9
+ ContextDev::Models::WebWebCrawlMdResponse,
10
+ ContextDev::Internal::AnyHash
11
+ )
12
+ end
13
+
14
+ sig { returns(ContextDev::Models::WebWebCrawlMdResponse::Metadata) }
15
+ attr_reader :metadata
16
+
17
+ sig do
18
+ params(
19
+ metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash
20
+ ).void
21
+ end
22
+ attr_writer :metadata
23
+
24
+ sig do
25
+ returns(T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result])
26
+ end
27
+ attr_accessor :results
28
+
29
+ sig do
30
+ params(
31
+ metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash,
32
+ results:
33
+ T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result::OrHash]
34
+ ).returns(T.attached_class)
35
+ end
36
+ def self.new(metadata:, results:)
37
+ end
38
+
39
+ sig do
40
+ override.returns(
41
+ {
42
+ metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata,
43
+ results: T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
44
+ }
45
+ )
46
+ end
47
+ def to_hash
48
+ end
49
+
50
+ class Metadata < ContextDev::Internal::Type::BaseModel
51
+ OrHash =
52
+ T.type_alias do
53
+ T.any(
54
+ ContextDev::Models::WebWebCrawlMdResponse::Metadata,
55
+ ContextDev::Internal::AnyHash
56
+ )
57
+ end
58
+
59
+ # Maximum crawl depth reached during the crawl
60
+ sig { returns(Integer) }
61
+ attr_accessor :max_crawl_depth
62
+
63
+ # Number of pages that failed to crawl
64
+ sig { returns(Integer) }
65
+ attr_accessor :num_failed
66
+
67
+ # Number of pages successfully crawled
68
+ sig { returns(Integer) }
69
+ attr_accessor :num_succeeded
70
+
71
+ # Total number of URLs crawled
72
+ sig { returns(Integer) }
73
+ attr_accessor :num_urls
74
+
75
+ sig do
76
+ params(
77
+ max_crawl_depth: Integer,
78
+ num_failed: Integer,
79
+ num_succeeded: Integer,
80
+ num_urls: Integer
81
+ ).returns(T.attached_class)
82
+ end
83
+ def self.new(
84
+ # Maximum crawl depth reached during the crawl
85
+ max_crawl_depth:,
86
+ # Number of pages that failed to crawl
87
+ num_failed:,
88
+ # Number of pages successfully crawled
89
+ num_succeeded:,
90
+ # Total number of URLs crawled
91
+ num_urls:
92
+ )
93
+ end
94
+
95
+ sig do
96
+ override.returns(
97
+ {
98
+ max_crawl_depth: Integer,
99
+ num_failed: Integer,
100
+ num_succeeded: Integer,
101
+ num_urls: Integer
102
+ }
103
+ )
104
+ end
105
+ def to_hash
106
+ end
107
+ end
108
+
109
+ class Result < ContextDev::Internal::Type::BaseModel
110
+ OrHash =
111
+ T.type_alias do
112
+ T.any(
113
+ ContextDev::Models::WebWebCrawlMdResponse::Result,
114
+ ContextDev::Internal::AnyHash
115
+ )
116
+ end
117
+
118
+ # Extracted page content as Markdown (empty string on failure)
119
+ sig { returns(String) }
120
+ attr_accessor :markdown
121
+
122
+ sig do
123
+ returns(ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata)
124
+ end
125
+ attr_reader :metadata
126
+
127
+ sig do
128
+ params(
129
+ metadata:
130
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
131
+ ).void
132
+ end
133
+ attr_writer :metadata
134
+
135
+ sig do
136
+ params(
137
+ markdown: String,
138
+ metadata:
139
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
140
+ ).returns(T.attached_class)
141
+ end
142
+ def self.new(
143
+ # Extracted page content as Markdown (empty string on failure)
144
+ markdown:,
145
+ metadata:
146
+ )
147
+ end
148
+
149
+ sig do
150
+ override.returns(
151
+ {
152
+ markdown: String,
153
+ metadata:
154
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
155
+ }
156
+ )
157
+ end
158
+ def to_hash
159
+ end
160
+
161
+ class Metadata < ContextDev::Internal::Type::BaseModel
162
+ OrHash =
163
+ T.type_alias do
164
+ T.any(
165
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata,
166
+ ContextDev::Internal::AnyHash
167
+ )
168
+ end
169
+
170
+ # Depth relative to the start URL. 0 = start URL, 1 = one link away.
171
+ sig { returns(Integer) }
172
+ attr_accessor :crawl_depth
173
+
174
+ # HTTP status code of the response
175
+ sig { returns(Integer) }
176
+ attr_accessor :status_code
177
+
178
+ # true if the page was fetched and parsed successfully
179
+ sig { returns(T::Boolean) }
180
+ attr_accessor :success
181
+
182
+ # The page's <title> content (empty string if unavailable)
183
+ sig { returns(String) }
184
+ attr_accessor :title
185
+
186
+ # The URL that was fetched
187
+ sig { returns(String) }
188
+ attr_accessor :url
189
+
190
+ sig do
191
+ params(
192
+ crawl_depth: Integer,
193
+ status_code: Integer,
194
+ success: T::Boolean,
195
+ title: String,
196
+ url: String
197
+ ).returns(T.attached_class)
198
+ end
199
+ def self.new(
200
+ # Depth relative to the start URL. 0 = start URL, 1 = one link away.
201
+ crawl_depth:,
202
+ # HTTP status code of the response
203
+ status_code:,
204
+ # true if the page was fetched and parsed successfully
205
+ success:,
206
+ # The page's <title> content (empty string if unavailable)
207
+ title:,
208
+ # The URL that was fetched
209
+ url:
210
+ )
211
+ end
212
+
213
+ sig do
214
+ override.returns(
215
+ {
216
+ crawl_depth: Integer,
217
+ status_code: Integer,
218
+ success: T::Boolean,
219
+ title: String,
220
+ url: String
221
+ }
222
+ )
223
+ end
224
+ def to_hash
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -37,6 +37,8 @@ module ContextDev
37
37
 
38
38
  WebScreenshotParams = ContextDev::Models::WebScreenshotParams
39
39
 
40
+ WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
41
+
40
42
  WebWebScrapeHTMLParams = ContextDev::Models::WebWebScrapeHTMLParams
41
43
 
42
44
  WebWebScrapeImagesParams = ContextDev::Models::WebWebScrapeImagesParams
@@ -32,8 +32,6 @@ module ContextDev
32
32
  params(
33
33
  direct_url: String,
34
34
  domain: String,
35
- prioritize:
36
- ContextDev::StyleExtractStyleguideParams::Prioritize::OrSymbol,
37
35
  timeout_ms: Integer,
38
36
  request_options: ContextDev::RequestOptions::OrHash
39
37
  ).returns(ContextDev::Models::StyleExtractStyleguideResponse)
@@ -45,11 +43,6 @@ module ContextDev
45
43
  # Domain name to extract styleguide from (e.g., 'example.com', 'google.com'). The
46
44
  # domain will be automatically normalized and validated.
47
45
  domain: nil,
48
- # Optional parameter to prioritize screenshot capture for styleguide extraction.
49
- # If 'speed', optimizes for faster capture with basic quality. If 'quality',
50
- # optimizes for higher quality with longer wait times. Defaults to 'quality' if
51
- # not provided.
52
- prioritize: nil,
53
46
  # Optional timeout in milliseconds for the request. If the request takes longer
54
47
  # than this value, it will be aborted with a 408 status code. Maximum allowed
55
48
  # value is 300000ms (5 minutes).
@@ -38,6 +38,49 @@ module ContextDev
38
38
  )
39
39
  end
40
40
 
41
+ # Performs a crawl starting from a given URL, extracts page content as Markdown,
42
+ # and returns results for all crawled pages. Only follows links within the same
43
+ # domain as the starting URL. Costs 1 credit per successful page crawled.
44
+ sig do
45
+ params(
46
+ url: String,
47
+ follow_subdomains: T::Boolean,
48
+ include_images: T::Boolean,
49
+ include_links: T::Boolean,
50
+ max_depth: Integer,
51
+ max_pages: Integer,
52
+ shorten_base64_images: T::Boolean,
53
+ url_regex: String,
54
+ use_main_content_only: T::Boolean,
55
+ request_options: ContextDev::RequestOptions::OrHash
56
+ ).returns(ContextDev::Models::WebWebCrawlMdResponse)
57
+ end
58
+ def web_crawl_md(
59
+ # The starting URL for the crawl (must include http:// or https:// protocol)
60
+ url:,
61
+ # When true, follow links on subdomains of the starting URL's domain (e.g.
62
+ # docs.example.com when starting from example.com). www and apex are always
63
+ # treated as equivalent.
64
+ follow_subdomains: nil,
65
+ # Include image references in the Markdown output
66
+ include_images: nil,
67
+ # Preserve hyperlinks in the Markdown output
68
+ include_links: nil,
69
+ # Maximum link depth from the starting URL (0 = only the starting page)
70
+ max_depth: nil,
71
+ # Maximum number of pages to crawl. Hard cap: 500.
72
+ max_pages: nil,
73
+ # Truncate base64-encoded image data in the Markdown output
74
+ shorten_base64_images: nil,
75
+ # Regex pattern. Only URLs matching this pattern will be followed and scraped.
76
+ url_regex: nil,
77
+ # Extract only the main content, stripping headers, footers, sidebars, and
78
+ # navigation
79
+ use_main_content_only: nil,
80
+ request_options: {}
81
+ )
82
+ end
83
+
41
84
  # Scrapes the given URL and returns the raw HTML content of the page.
42
85
  sig do
43
86
  params(
@@ -45,8 +45,12 @@ module ContextDev
45
45
  -> top?
46
46
  } -> top?
47
47
 
48
+ RFC_3986_NOT_PCHARS: Regexp
49
+
48
50
  def self?.uri_origin: (URI::Generic uri) -> String
49
51
 
52
+ def self?.encode_path: (String | Integer path) -> String
53
+
50
54
  def self?.interpolate_path: (String | ::Array[String] path) -> String
51
55
 
52
56
  def self?.decode_query: (String? query) -> ::Hash[String, ::Array[String]]
@@ -1,12 +1,7 @@
1
1
  module ContextDev
2
2
  module Models
3
3
  type style_extract_styleguide_params =
4
- {
5
- direct_url: String,
6
- domain: String,
7
- prioritize: ContextDev::Models::StyleExtractStyleguideParams::prioritize,
8
- timeout_ms: Integer
9
- }
4
+ { direct_url: String, domain: String, timeout_ms: Integer }
10
5
  & ContextDev::Internal::Type::request_parameters
11
6
 
12
7
  class StyleExtractStyleguideParams < ContextDev::Internal::Type::BaseModel
@@ -21,12 +16,6 @@ module ContextDev
21
16
 
22
17
  def domain=: (String) -> String
23
18
 
24
- attr_reader prioritize: ContextDev::Models::StyleExtractStyleguideParams::prioritize?
25
-
26
- def prioritize=: (
27
- ContextDev::Models::StyleExtractStyleguideParams::prioritize
28
- ) -> ContextDev::Models::StyleExtractStyleguideParams::prioritize
29
-
30
19
  attr_reader timeout_ms: Integer?
31
20
 
32
21
  def timeout_ms=: (Integer) -> Integer
@@ -34,7 +23,6 @@ module ContextDev
34
23
  def initialize: (
35
24
  ?direct_url: String,
36
25
  ?domain: String,
37
- ?prioritize: ContextDev::Models::StyleExtractStyleguideParams::prioritize,
38
26
  ?timeout_ms: Integer,
39
27
  ?request_options: ContextDev::request_opts
40
28
  ) -> void
@@ -42,21 +30,9 @@ module ContextDev
42
30
  def to_hash: -> {
43
31
  direct_url: String,
44
32
  domain: String,
45
- prioritize: ContextDev::Models::StyleExtractStyleguideParams::prioritize,
46
33
  timeout_ms: Integer,
47
34
  request_options: ContextDev::RequestOptions
48
35
  }
49
-
50
- type prioritize = :speed | :quality
51
-
52
- module Prioritize
53
- extend ContextDev::Internal::Type::Enum
54
-
55
- SPEED: :speed
56
- QUALITY: :quality
57
-
58
- def self?.values: -> ::Array[ContextDev::Models::StyleExtractStyleguideParams::prioritize]
59
- end
60
36
  end
61
37
  end
62
38
  end