context.dev 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -0
  3. data/README.md +1 -1
  4. data/lib/context_dev/internal/util.rb +18 -4
  5. data/lib/context_dev/models/style_extract_styleguide_params.rb +1 -26
  6. data/lib/context_dev/models/style_extract_styleguide_response.rb +495 -246
  7. data/lib/context_dev/models/web_web_crawl_md_params.rb +92 -0
  8. data/lib/context_dev/models/web_web_crawl_md_response.rb +121 -0
  9. data/lib/context_dev/models.rb +2 -0
  10. data/lib/context_dev/resources/style.rb +1 -3
  11. data/lib/context_dev/resources/web.rb +43 -0
  12. data/lib/context_dev/version.rb +1 -1
  13. data/lib/context_dev.rb +2 -0
  14. data/rbi/context_dev/internal/util.rbi +8 -0
  15. data/rbi/context_dev/models/style_extract_styleguide_params.rbi +0 -65
  16. data/rbi/context_dev/models/style_extract_styleguide_response.rbi +471 -529
  17. data/rbi/context_dev/models/web_web_crawl_md_params.rbi +137 -0
  18. data/rbi/context_dev/models/web_web_crawl_md_response.rbi +230 -0
  19. data/rbi/context_dev/models.rbi +2 -0
  20. data/rbi/context_dev/resources/style.rbi +0 -7
  21. data/rbi/context_dev/resources/web.rbi +43 -0
  22. data/sig/context_dev/internal/util.rbs +4 -0
  23. data/sig/context_dev/models/style_extract_styleguide_params.rbs +1 -25
  24. data/sig/context_dev/models/style_extract_styleguide_response.rbs +285 -337
  25. data/sig/context_dev/models/web_web_crawl_md_params.rbs +82 -0
  26. data/sig/context_dev/models/web_web_crawl_md_response.rbs +116 -0
  27. data/sig/context_dev/models.rbs +2 -0
  28. data/sig/context_dev/resources/style.rbs +0 -1
  29. data/sig/context_dev/resources/web.rbs +13 -0
  30. metadata +8 -2
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ContextDev
4
+ module Models
5
+ # @see ContextDev::Resources::Web#web_crawl_md
6
+ class WebWebCrawlMdParams < ContextDev::Internal::Type::BaseModel
7
+ extend ContextDev::Internal::Type::RequestParameters::Converter
8
+ include ContextDev::Internal::Type::RequestParameters
9
+
10
+ # @!attribute url
11
+ # The starting URL for the crawl (must include http:// or https:// protocol)
12
+ #
13
+ # @return [String]
14
+ required :url, String
15
+
16
+ # @!attribute follow_subdomains
17
+ # When true, follow links on subdomains of the starting URL's domain (e.g.
18
+ # docs.example.com when starting from example.com). www and apex are always
19
+ # treated as equivalent.
20
+ #
21
+ # @return [Boolean, nil]
22
+ optional :follow_subdomains, ContextDev::Internal::Type::Boolean, api_name: :followSubdomains
23
+
24
+ # @!attribute include_images
25
+ # Include image references in the Markdown output
26
+ #
27
+ # @return [Boolean, nil]
28
+ optional :include_images, ContextDev::Internal::Type::Boolean, api_name: :includeImages
29
+
30
+ # @!attribute include_links
31
+ # Preserve hyperlinks in the Markdown output
32
+ #
33
+ # @return [Boolean, nil]
34
+ optional :include_links, ContextDev::Internal::Type::Boolean, api_name: :includeLinks
35
+
36
+ # @!attribute max_depth
37
+ # Maximum link depth from the starting URL (0 = only the starting page)
38
+ #
39
+ # @return [Integer, nil]
40
+ optional :max_depth, Integer, api_name: :maxDepth
41
+
42
+ # @!attribute max_pages
43
+ # Maximum number of pages to crawl. Hard cap: 500.
44
+ #
45
+ # @return [Integer, nil]
46
+ optional :max_pages, Integer, api_name: :maxPages
47
+
48
+ # @!attribute shorten_base64_images
49
+ # Truncate base64-encoded image data in the Markdown output
50
+ #
51
+ # @return [Boolean, nil]
52
+ optional :shorten_base64_images, ContextDev::Internal::Type::Boolean, api_name: :shortenBase64Images
53
+
54
+ # @!attribute url_regex
55
+ # Regex pattern. Only URLs matching this pattern will be followed and scraped.
56
+ #
57
+ # @return [String, nil]
58
+ optional :url_regex, String, api_name: :urlRegex
59
+
60
+ # @!attribute use_main_content_only
61
+ # Extract only the main content, stripping headers, footers, sidebars, and
62
+ # navigation
63
+ #
64
+ # @return [Boolean, nil]
65
+ optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly
66
+
67
+ # @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
68
+ # Some parameter documentations has been truncated, see
69
+ # {ContextDev::Models::WebWebCrawlMdParams} for more details.
70
+ #
71
+ # @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
72
+ #
73
+ # @param follow_subdomains [Boolean] When true, follow links on subdomains of the starting URL's domain (e.g. docs.ex
74
+ #
75
+ # @param include_images [Boolean] Include image references in the Markdown output
76
+ #
77
+ # @param include_links [Boolean] Preserve hyperlinks in the Markdown output
78
+ #
79
+ # @param max_depth [Integer] Maximum link depth from the starting URL (0 = only the starting page)
80
+ #
81
+ # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
82
+ #
83
+ # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
84
+ #
85
+ # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
86
+ #
87
+ # @param use_main_content_only [Boolean] Extract only the main content, stripping headers, footers, sidebars, and navigat
88
+ #
89
+ # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ContextDev
4
+ module Models
5
+ # @see ContextDev::Resources::Web#web_crawl_md
6
+ class WebWebCrawlMdResponse < ContextDev::Internal::Type::BaseModel
7
+ # @!attribute metadata
8
+ #
9
+ # @return [ContextDev::Models::WebWebCrawlMdResponse::Metadata]
10
+ required :metadata, -> { ContextDev::Models::WebWebCrawlMdResponse::Metadata }
11
+
12
+ # @!attribute results
13
+ #
14
+ # @return [Array<ContextDev::Models::WebWebCrawlMdResponse::Result>]
15
+ required :results,
16
+ -> { ContextDev::Internal::Type::ArrayOf[ContextDev::Models::WebWebCrawlMdResponse::Result] }
17
+
18
+ # @!method initialize(metadata:, results:)
19
+ # @param metadata [ContextDev::Models::WebWebCrawlMdResponse::Metadata]
20
+ # @param results [Array<ContextDev::Models::WebWebCrawlMdResponse::Result>]
21
+
22
+ # @see ContextDev::Models::WebWebCrawlMdResponse#metadata
23
+ class Metadata < ContextDev::Internal::Type::BaseModel
24
+ # @!attribute max_crawl_depth
25
+ # Maximum crawl depth reached during the crawl
26
+ #
27
+ # @return [Integer]
28
+ required :max_crawl_depth, Integer, api_name: :maxCrawlDepth
29
+
30
+ # @!attribute num_failed
31
+ # Number of pages that failed to crawl
32
+ #
33
+ # @return [Integer]
34
+ required :num_failed, Integer, api_name: :numFailed
35
+
36
+ # @!attribute num_succeeded
37
+ # Number of pages successfully crawled
38
+ #
39
+ # @return [Integer]
40
+ required :num_succeeded, Integer, api_name: :numSucceeded
41
+
42
+ # @!attribute num_urls
43
+ # Total number of URLs crawled
44
+ #
45
+ # @return [Integer]
46
+ required :num_urls, Integer, api_name: :numUrls
47
+
48
+ # @!method initialize(max_crawl_depth:, num_failed:, num_succeeded:, num_urls:)
49
+ # @param max_crawl_depth [Integer] Maximum crawl depth reached during the crawl
50
+ #
51
+ # @param num_failed [Integer] Number of pages that failed to crawl
52
+ #
53
+ # @param num_succeeded [Integer] Number of pages successfully crawled
54
+ #
55
+ # @param num_urls [Integer] Total number of URLs crawled
56
+ end
57
+
58
+ class Result < ContextDev::Internal::Type::BaseModel
59
+ # @!attribute markdown
60
+ # Extracted page content as Markdown (empty string on failure)
61
+ #
62
+ # @return [String]
63
+ required :markdown, String
64
+
65
+ # @!attribute metadata
66
+ #
67
+ # @return [ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata]
68
+ required :metadata, -> { ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata }
69
+
70
+ # @!method initialize(markdown:, metadata:)
71
+ # @param markdown [String] Extracted page content as Markdown (empty string on failure)
72
+ #
73
+ # @param metadata [ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata]
74
+
75
+ # @see ContextDev::Models::WebWebCrawlMdResponse::Result#metadata
76
+ class Metadata < ContextDev::Internal::Type::BaseModel
77
+ # @!attribute crawl_depth
78
+ # Depth relative to the start URL. 0 = start URL, 1 = one link away.
79
+ #
80
+ # @return [Integer]
81
+ required :crawl_depth, Integer, api_name: :crawlDepth
82
+
83
+ # @!attribute status_code
84
+ # HTTP status code of the response
85
+ #
86
+ # @return [Integer]
87
+ required :status_code, Integer, api_name: :statusCode
88
+
89
+ # @!attribute success
90
+ # true if the page was fetched and parsed successfully
91
+ #
92
+ # @return [Boolean]
93
+ required :success, ContextDev::Internal::Type::Boolean
94
+
95
+ # @!attribute title
96
+ # The page's <title> content (empty string if unavailable)
97
+ #
98
+ # @return [String]
99
+ required :title, String
100
+
101
+ # @!attribute url
102
+ # The URL that was fetched
103
+ #
104
+ # @return [String]
105
+ required :url, String
106
+
107
+ # @!method initialize(crawl_depth:, status_code:, success:, title:, url:)
108
+ # @param crawl_depth [Integer] Depth relative to the start URL. 0 = start URL, 1 = one link away.
109
+ #
110
+ # @param status_code [Integer] HTTP status code of the response
111
+ #
112
+ # @param success [Boolean] true if the page was fetched and parsed successfully
113
+ #
114
+ # @param title [String] The page's <title> content (empty string if unavailable)
115
+ #
116
+ # @param url [String] The URL that was fetched
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -71,6 +71,8 @@ module ContextDev
71
71
 
72
72
  WebScreenshotParams = ContextDev::Models::WebScreenshotParams
73
73
 
74
+ WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
75
+
74
76
  WebWebScrapeHTMLParams = ContextDev::Models::WebWebScrapeHTMLParams
75
77
 
76
78
  WebWebScrapeImagesParams = ContextDev::Models::WebWebScrapeImagesParams
@@ -40,14 +40,12 @@ module ContextDev
40
40
  # Either 'domain' or 'directUrl' must be provided as a query parameter, but not
41
41
  # both.
42
42
  #
43
- # @overload extract_styleguide(direct_url: nil, domain: nil, prioritize: nil, timeout_ms: nil, request_options: {})
43
+ # @overload extract_styleguide(direct_url: nil, domain: nil, timeout_ms: nil, request_options: {})
44
44
  #
45
45
  # @param direct_url [String] A specific URL to fetch the styleguide from directly, bypassing domain resolutio
46
46
  #
47
47
  # @param domain [String] Domain name to extract styleguide from (e.g., 'example.com', 'google.com'). The
48
48
  #
49
- # @param prioritize [Symbol, ContextDev::Models::StyleExtractStyleguideParams::Prioritize] Optional parameter to prioritize screenshot capture for styleguide extraction. I
50
- #
51
49
  # @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
52
50
  #
53
51
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
@@ -38,6 +38,49 @@ module ContextDev
38
38
  )
39
39
  end
40
40
 
41
+ # Some parameter documentations has been truncated, see
42
+ # {ContextDev::Models::WebWebCrawlMdParams} for more details.
43
+ #
44
+ # Performs a crawl starting from a given URL, extracts page content as Markdown,
45
+ # and returns results for all crawled pages. Only follows links within the same
46
+ # domain as the starting URL. Costs 1 credit per successful page crawled.
47
+ #
48
+ # @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
49
+ #
50
+ # @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
51
+ #
52
+ # @param follow_subdomains [Boolean] When true, follow links on subdomains of the starting URL's domain (e.g. docs.ex
53
+ #
54
+ # @param include_images [Boolean] Include image references in the Markdown output
55
+ #
56
+ # @param include_links [Boolean] Preserve hyperlinks in the Markdown output
57
+ #
58
+ # @param max_depth [Integer] Maximum link depth from the starting URL (0 = only the starting page)
59
+ #
60
+ # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
61
+ #
62
+ # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
63
+ #
64
+ # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
65
+ #
66
+ # @param use_main_content_only [Boolean] Extract only the main content, stripping headers, footers, sidebars, and navigat
67
+ #
68
+ # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
69
+ #
70
+ # @return [ContextDev::Models::WebWebCrawlMdResponse]
71
+ #
72
+ # @see ContextDev::Models::WebWebCrawlMdParams
73
+ def web_crawl_md(params)
74
+ parsed, options = ContextDev::WebWebCrawlMdParams.dump_request(params)
75
+ @client.request(
76
+ method: :post,
77
+ path: "web/crawl",
78
+ body: parsed,
79
+ model: ContextDev::Models::WebWebCrawlMdResponse,
80
+ options: options
81
+ )
82
+ end
83
+
41
84
  # Scrapes the given URL and returns the raw HTML content of the page.
42
85
  #
43
86
  # @overload web_scrape_html(url:, request_options: {})
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ContextDev
4
- VERSION = "1.1.0"
4
+ VERSION = "1.3.0"
5
5
  end
data/lib/context_dev.rb CHANGED
@@ -84,6 +84,8 @@ require_relative "context_dev/models/utility_prefetch_params"
84
84
  require_relative "context_dev/models/utility_prefetch_response"
85
85
  require_relative "context_dev/models/web_screenshot_params"
86
86
  require_relative "context_dev/models/web_screenshot_response"
87
+ require_relative "context_dev/models/web_web_crawl_md_params"
88
+ require_relative "context_dev/models/web_web_crawl_md_response"
87
89
  require_relative "context_dev/models/web_web_scrape_html_params"
88
90
  require_relative "context_dev/models/web_web_scrape_html_response"
89
91
  require_relative "context_dev/models/web_web_scrape_images_params"
@@ -148,12 +148,20 @@ module ContextDev
148
148
  end
149
149
  end
150
150
 
151
+ # https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
152
+ RFC_3986_NOT_PCHARS = T.let(/[^A-Za-z0-9\-._~!$&'()*+,;=:@]+/, Regexp)
153
+
151
154
  class << self
152
155
  # @api private
153
156
  sig { params(uri: URI::Generic).returns(String) }
154
157
  def uri_origin(uri)
155
158
  end
156
159
 
160
+ # @api private
161
+ sig { params(path: T.any(String, Integer)).returns(String) }
162
+ def encode_path(path)
163
+ end
164
+
157
165
  # @api private
158
166
  sig { params(path: T.any(String, T::Array[String])).returns(String) }
159
167
  def interpolate_path(path)
@@ -30,27 +30,6 @@ module ContextDev
30
30
  sig { params(domain: String).void }
31
31
  attr_writer :domain
32
32
 
33
- # Optional parameter to prioritize screenshot capture for styleguide extraction.
34
- # If 'speed', optimizes for faster capture with basic quality. If 'quality',
35
- # optimizes for higher quality with longer wait times. Defaults to 'quality' if
36
- # not provided.
37
- sig do
38
- returns(
39
- T.nilable(
40
- ContextDev::StyleExtractStyleguideParams::Prioritize::OrSymbol
41
- )
42
- )
43
- end
44
- attr_reader :prioritize
45
-
46
- sig do
47
- params(
48
- prioritize:
49
- ContextDev::StyleExtractStyleguideParams::Prioritize::OrSymbol
50
- ).void
51
- end
52
- attr_writer :prioritize
53
-
54
33
  # Optional timeout in milliseconds for the request. If the request takes longer
55
34
  # than this value, it will be aborted with a 408 status code. Maximum allowed
56
35
  # value is 300000ms (5 minutes).
@@ -64,8 +43,6 @@ module ContextDev
64
43
  params(
65
44
  direct_url: String,
66
45
  domain: String,
67
- prioritize:
68
- ContextDev::StyleExtractStyleguideParams::Prioritize::OrSymbol,
69
46
  timeout_ms: Integer,
70
47
  request_options: ContextDev::RequestOptions::OrHash
71
48
  ).returns(T.attached_class)
@@ -77,11 +54,6 @@ module ContextDev
77
54
  # Domain name to extract styleguide from (e.g., 'example.com', 'google.com'). The
78
55
  # domain will be automatically normalized and validated.
79
56
  domain: nil,
80
- # Optional parameter to prioritize screenshot capture for styleguide extraction.
81
- # If 'speed', optimizes for faster capture with basic quality. If 'quality',
82
- # optimizes for higher quality with longer wait times. Defaults to 'quality' if
83
- # not provided.
84
- prioritize: nil,
85
57
  # Optional timeout in milliseconds for the request. If the request takes longer
86
58
  # than this value, it will be aborted with a 408 status code. Maximum allowed
87
59
  # value is 300000ms (5 minutes).
@@ -95,8 +67,6 @@ module ContextDev
95
67
  {
96
68
  direct_url: String,
97
69
  domain: String,
98
- prioritize:
99
- ContextDev::StyleExtractStyleguideParams::Prioritize::OrSymbol,
100
70
  timeout_ms: Integer,
101
71
  request_options: ContextDev::RequestOptions
102
72
  }
@@ -104,41 +74,6 @@ module ContextDev
104
74
  end
105
75
  def to_hash
106
76
  end
107
-
108
- # Optional parameter to prioritize screenshot capture for styleguide extraction.
109
- # If 'speed', optimizes for faster capture with basic quality. If 'quality',
110
- # optimizes for higher quality with longer wait times. Defaults to 'quality' if
111
- # not provided.
112
- module Prioritize
113
- extend ContextDev::Internal::Type::Enum
114
-
115
- TaggedSymbol =
116
- T.type_alias do
117
- T.all(Symbol, ContextDev::StyleExtractStyleguideParams::Prioritize)
118
- end
119
- OrSymbol = T.type_alias { T.any(Symbol, String) }
120
-
121
- SPEED =
122
- T.let(
123
- :speed,
124
- ContextDev::StyleExtractStyleguideParams::Prioritize::TaggedSymbol
125
- )
126
- QUALITY =
127
- T.let(
128
- :quality,
129
- ContextDev::StyleExtractStyleguideParams::Prioritize::TaggedSymbol
130
- )
131
-
132
- sig do
133
- override.returns(
134
- T::Array[
135
- ContextDev::StyleExtractStyleguideParams::Prioritize::TaggedSymbol
136
- ]
137
- )
138
- end
139
- def self.values
140
- end
141
- end
142
77
  end
143
78
  end
144
79
  end