context.dev 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +1 -1
- data/lib/context_dev/models/web_web_crawl_md_params.rb +92 -0
- data/lib/context_dev/models/web_web_crawl_md_response.rb +121 -0
- data/lib/context_dev/models.rb +2 -0
- data/lib/context_dev/resources/web.rb +43 -0
- data/lib/context_dev/version.rb +1 -1
- data/lib/context_dev.rb +2 -0
- data/rbi/context_dev/models/web_web_crawl_md_params.rbi +137 -0
- data/rbi/context_dev/models/web_web_crawl_md_response.rbi +230 -0
- data/rbi/context_dev/models.rbi +2 -0
- data/rbi/context_dev/resources/web.rbi +43 -0
- data/sig/context_dev/models/web_web_crawl_md_params.rbs +82 -0
- data/sig/context_dev/models/web_web_crawl_md_response.rbs +116 -0
- data/sig/context_dev/models.rbs +2 -0
- data/sig/context_dev/resources/web.rbs +13 -0
- metadata +8 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 97e6cffa5978a220c51dbfd57f2dc0f2b0fd80d79519c0e9496053b98b014140
|
|
4
|
+
data.tar.gz: 76c2b5464a39a8fd2a527873d8453c1d2ece17eeb6e20161e16d34ea388a364c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 43b8b053b4f1edf7d1a15f8208de0150a324c76e61ae68e7c16a045f21cc851907c80d2b4af30da2a94b14f0883d024938da22c9d34a2d8322715613f014206c
|
|
7
|
+
data.tar.gz: f6fa15cd06b6da296aa5aa1c6d0df03ffaeec1adf15d138b6c1df3b33093c30885d012c65c0014e40febd0a503bcd22705ac9a2ef8d10c238220390f111fe406
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.3.0 (2026-04-04)
|
|
4
|
+
|
|
5
|
+
Full Changelog: [v1.2.0...v1.3.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.2.0...v1.3.0)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
* **api:** manual updates ([8e8fcc2](https://github.com/context-dot-dev/context-ruby-sdk/commit/8e8fcc26f2fbbb2bdcca9713fa3b9f8518303586))
|
|
10
|
+
|
|
3
11
|
## 1.2.0 (2026-04-03)
|
|
4
12
|
|
|
5
13
|
Full Changelog: [v1.1.0...v1.2.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.1.0...v1.2.0)
|
data/README.md
CHANGED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ContextDev
|
|
4
|
+
module Models
|
|
5
|
+
# @see ContextDev::Resources::Web#web_crawl_md
|
|
6
|
+
class WebWebCrawlMdParams < ContextDev::Internal::Type::BaseModel
|
|
7
|
+
extend ContextDev::Internal::Type::RequestParameters::Converter
|
|
8
|
+
include ContextDev::Internal::Type::RequestParameters
|
|
9
|
+
|
|
10
|
+
# @!attribute url
|
|
11
|
+
# The starting URL for the crawl (must include http:// or https:// protocol)
|
|
12
|
+
#
|
|
13
|
+
# @return [String]
|
|
14
|
+
required :url, String
|
|
15
|
+
|
|
16
|
+
# @!attribute follow_subdomains
|
|
17
|
+
# When true, follow links on subdomains of the starting URL's domain (e.g.
|
|
18
|
+
# docs.example.com when starting from example.com). www and apex are always
|
|
19
|
+
# treated as equivalent.
|
|
20
|
+
#
|
|
21
|
+
# @return [Boolean, nil]
|
|
22
|
+
optional :follow_subdomains, ContextDev::Internal::Type::Boolean, api_name: :followSubdomains
|
|
23
|
+
|
|
24
|
+
# @!attribute include_images
|
|
25
|
+
# Include image references in the Markdown output
|
|
26
|
+
#
|
|
27
|
+
# @return [Boolean, nil]
|
|
28
|
+
optional :include_images, ContextDev::Internal::Type::Boolean, api_name: :includeImages
|
|
29
|
+
|
|
30
|
+
# @!attribute include_links
|
|
31
|
+
# Preserve hyperlinks in the Markdown output
|
|
32
|
+
#
|
|
33
|
+
# @return [Boolean, nil]
|
|
34
|
+
optional :include_links, ContextDev::Internal::Type::Boolean, api_name: :includeLinks
|
|
35
|
+
|
|
36
|
+
# @!attribute max_depth
|
|
37
|
+
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
38
|
+
#
|
|
39
|
+
# @return [Integer, nil]
|
|
40
|
+
optional :max_depth, Integer, api_name: :maxDepth
|
|
41
|
+
|
|
42
|
+
# @!attribute max_pages
|
|
43
|
+
# Maximum number of pages to crawl. Hard cap: 500.
|
|
44
|
+
#
|
|
45
|
+
# @return [Integer, nil]
|
|
46
|
+
optional :max_pages, Integer, api_name: :maxPages
|
|
47
|
+
|
|
48
|
+
# @!attribute shorten_base64_images
|
|
49
|
+
# Truncate base64-encoded image data in the Markdown output
|
|
50
|
+
#
|
|
51
|
+
# @return [Boolean, nil]
|
|
52
|
+
optional :shorten_base64_images, ContextDev::Internal::Type::Boolean, api_name: :shortenBase64Images
|
|
53
|
+
|
|
54
|
+
# @!attribute url_regex
|
|
55
|
+
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
56
|
+
#
|
|
57
|
+
# @return [String, nil]
|
|
58
|
+
optional :url_regex, String, api_name: :urlRegex
|
|
59
|
+
|
|
60
|
+
# @!attribute use_main_content_only
|
|
61
|
+
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
62
|
+
# navigation
|
|
63
|
+
#
|
|
64
|
+
# @return [Boolean, nil]
|
|
65
|
+
optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly
|
|
66
|
+
|
|
67
|
+
# @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
|
|
68
|
+
# Some parameter documentations has been truncated, see
|
|
69
|
+
# {ContextDev::Models::WebWebCrawlMdParams} for more details.
|
|
70
|
+
#
|
|
71
|
+
# @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
|
|
72
|
+
#
|
|
73
|
+
# @param follow_subdomains [Boolean] When true, follow links on subdomains of the starting URL's domain (e.g. docs.ex
|
|
74
|
+
#
|
|
75
|
+
# @param include_images [Boolean] Include image references in the Markdown output
|
|
76
|
+
#
|
|
77
|
+
# @param include_links [Boolean] Preserve hyperlinks in the Markdown output
|
|
78
|
+
#
|
|
79
|
+
# @param max_depth [Integer] Maximum link depth from the starting URL (0 = only the starting page)
|
|
80
|
+
#
|
|
81
|
+
# @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
|
|
82
|
+
#
|
|
83
|
+
# @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
|
|
84
|
+
#
|
|
85
|
+
# @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
86
|
+
#
|
|
87
|
+
# @param use_main_content_only [Boolean] Extract only the main content, stripping headers, footers, sidebars, and navigat
|
|
88
|
+
#
|
|
89
|
+
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ContextDev
|
|
4
|
+
module Models
|
|
5
|
+
# @see ContextDev::Resources::Web#web_crawl_md
|
|
6
|
+
class WebWebCrawlMdResponse < ContextDev::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute metadata
|
|
8
|
+
#
|
|
9
|
+
# @return [ContextDev::Models::WebWebCrawlMdResponse::Metadata]
|
|
10
|
+
required :metadata, -> { ContextDev::Models::WebWebCrawlMdResponse::Metadata }
|
|
11
|
+
|
|
12
|
+
# @!attribute results
|
|
13
|
+
#
|
|
14
|
+
# @return [Array<ContextDev::Models::WebWebCrawlMdResponse::Result>]
|
|
15
|
+
required :results,
|
|
16
|
+
-> { ContextDev::Internal::Type::ArrayOf[ContextDev::Models::WebWebCrawlMdResponse::Result] }
|
|
17
|
+
|
|
18
|
+
# @!method initialize(metadata:, results:)
|
|
19
|
+
# @param metadata [ContextDev::Models::WebWebCrawlMdResponse::Metadata]
|
|
20
|
+
# @param results [Array<ContextDev::Models::WebWebCrawlMdResponse::Result>]
|
|
21
|
+
|
|
22
|
+
# @see ContextDev::Models::WebWebCrawlMdResponse#metadata
|
|
23
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
24
|
+
# @!attribute max_crawl_depth
|
|
25
|
+
# Maximum crawl depth reached during the crawl
|
|
26
|
+
#
|
|
27
|
+
# @return [Integer]
|
|
28
|
+
required :max_crawl_depth, Integer, api_name: :maxCrawlDepth
|
|
29
|
+
|
|
30
|
+
# @!attribute num_failed
|
|
31
|
+
# Number of pages that failed to crawl
|
|
32
|
+
#
|
|
33
|
+
# @return [Integer]
|
|
34
|
+
required :num_failed, Integer, api_name: :numFailed
|
|
35
|
+
|
|
36
|
+
# @!attribute num_succeeded
|
|
37
|
+
# Number of pages successfully crawled
|
|
38
|
+
#
|
|
39
|
+
# @return [Integer]
|
|
40
|
+
required :num_succeeded, Integer, api_name: :numSucceeded
|
|
41
|
+
|
|
42
|
+
# @!attribute num_urls
|
|
43
|
+
# Total number of URLs crawled
|
|
44
|
+
#
|
|
45
|
+
# @return [Integer]
|
|
46
|
+
required :num_urls, Integer, api_name: :numUrls
|
|
47
|
+
|
|
48
|
+
# @!method initialize(max_crawl_depth:, num_failed:, num_succeeded:, num_urls:)
|
|
49
|
+
# @param max_crawl_depth [Integer] Maximum crawl depth reached during the crawl
|
|
50
|
+
#
|
|
51
|
+
# @param num_failed [Integer] Number of pages that failed to crawl
|
|
52
|
+
#
|
|
53
|
+
# @param num_succeeded [Integer] Number of pages successfully crawled
|
|
54
|
+
#
|
|
55
|
+
# @param num_urls [Integer] Total number of URLs crawled
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
class Result < ContextDev::Internal::Type::BaseModel
|
|
59
|
+
# @!attribute markdown
|
|
60
|
+
# Extracted page content as Markdown (empty string on failure)
|
|
61
|
+
#
|
|
62
|
+
# @return [String]
|
|
63
|
+
required :markdown, String
|
|
64
|
+
|
|
65
|
+
# @!attribute metadata
|
|
66
|
+
#
|
|
67
|
+
# @return [ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata]
|
|
68
|
+
required :metadata, -> { ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata }
|
|
69
|
+
|
|
70
|
+
# @!method initialize(markdown:, metadata:)
|
|
71
|
+
# @param markdown [String] Extracted page content as Markdown (empty string on failure)
|
|
72
|
+
#
|
|
73
|
+
# @param metadata [ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata]
|
|
74
|
+
|
|
75
|
+
# @see ContextDev::Models::WebWebCrawlMdResponse::Result#metadata
|
|
76
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
77
|
+
# @!attribute crawl_depth
|
|
78
|
+
# Depth relative to the start URL. 0 = start URL, 1 = one link away.
|
|
79
|
+
#
|
|
80
|
+
# @return [Integer]
|
|
81
|
+
required :crawl_depth, Integer, api_name: :crawlDepth
|
|
82
|
+
|
|
83
|
+
# @!attribute status_code
|
|
84
|
+
# HTTP status code of the response
|
|
85
|
+
#
|
|
86
|
+
# @return [Integer]
|
|
87
|
+
required :status_code, Integer, api_name: :statusCode
|
|
88
|
+
|
|
89
|
+
# @!attribute success
|
|
90
|
+
# true if the page was fetched and parsed successfully
|
|
91
|
+
#
|
|
92
|
+
# @return [Boolean]
|
|
93
|
+
required :success, ContextDev::Internal::Type::Boolean
|
|
94
|
+
|
|
95
|
+
# @!attribute title
|
|
96
|
+
# The page's <title> content (empty string if unavailable)
|
|
97
|
+
#
|
|
98
|
+
# @return [String]
|
|
99
|
+
required :title, String
|
|
100
|
+
|
|
101
|
+
# @!attribute url
|
|
102
|
+
# The URL that was fetched
|
|
103
|
+
#
|
|
104
|
+
# @return [String]
|
|
105
|
+
required :url, String
|
|
106
|
+
|
|
107
|
+
# @!method initialize(crawl_depth:, status_code:, success:, title:, url:)
|
|
108
|
+
# @param crawl_depth [Integer] Depth relative to the start URL. 0 = start URL, 1 = one link away.
|
|
109
|
+
#
|
|
110
|
+
# @param status_code [Integer] HTTP status code of the response
|
|
111
|
+
#
|
|
112
|
+
# @param success [Boolean] true if the page was fetched and parsed successfully
|
|
113
|
+
#
|
|
114
|
+
# @param title [String] The page's <title> content (empty string if unavailable)
|
|
115
|
+
#
|
|
116
|
+
# @param url [String] The URL that was fetched
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
data/lib/context_dev/models.rb
CHANGED
|
@@ -71,6 +71,8 @@ module ContextDev
|
|
|
71
71
|
|
|
72
72
|
WebScreenshotParams = ContextDev::Models::WebScreenshotParams
|
|
73
73
|
|
|
74
|
+
WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
|
|
75
|
+
|
|
74
76
|
WebWebScrapeHTMLParams = ContextDev::Models::WebWebScrapeHTMLParams
|
|
75
77
|
|
|
76
78
|
WebWebScrapeImagesParams = ContextDev::Models::WebWebScrapeImagesParams
|
|
@@ -38,6 +38,49 @@ module ContextDev
|
|
|
38
38
|
)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
# Some parameter documentations has been truncated, see
|
|
42
|
+
# {ContextDev::Models::WebWebCrawlMdParams} for more details.
|
|
43
|
+
#
|
|
44
|
+
# Performs a crawl starting from a given URL, extracts page content as Markdown,
|
|
45
|
+
# and returns results for all crawled pages. Only follows links within the same
|
|
46
|
+
# domain as the starting URL. Costs 1 credit per successful page crawled.
|
|
47
|
+
#
|
|
48
|
+
# @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
|
|
49
|
+
#
|
|
50
|
+
# @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
|
|
51
|
+
#
|
|
52
|
+
# @param follow_subdomains [Boolean] When true, follow links on subdomains of the starting URL's domain (e.g. docs.ex
|
|
53
|
+
#
|
|
54
|
+
# @param include_images [Boolean] Include image references in the Markdown output
|
|
55
|
+
#
|
|
56
|
+
# @param include_links [Boolean] Preserve hyperlinks in the Markdown output
|
|
57
|
+
#
|
|
58
|
+
# @param max_depth [Integer] Maximum link depth from the starting URL (0 = only the starting page)
|
|
59
|
+
#
|
|
60
|
+
# @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
|
|
61
|
+
#
|
|
62
|
+
# @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
|
|
63
|
+
#
|
|
64
|
+
# @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
65
|
+
#
|
|
66
|
+
# @param use_main_content_only [Boolean] Extract only the main content, stripping headers, footers, sidebars, and navigat
|
|
67
|
+
#
|
|
68
|
+
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
69
|
+
#
|
|
70
|
+
# @return [ContextDev::Models::WebWebCrawlMdResponse]
|
|
71
|
+
#
|
|
72
|
+
# @see ContextDev::Models::WebWebCrawlMdParams
|
|
73
|
+
def web_crawl_md(params)
|
|
74
|
+
parsed, options = ContextDev::WebWebCrawlMdParams.dump_request(params)
|
|
75
|
+
@client.request(
|
|
76
|
+
method: :post,
|
|
77
|
+
path: "web/crawl",
|
|
78
|
+
body: parsed,
|
|
79
|
+
model: ContextDev::Models::WebWebCrawlMdResponse,
|
|
80
|
+
options: options
|
|
81
|
+
)
|
|
82
|
+
end
|
|
83
|
+
|
|
41
84
|
# Scrapes the given URL and returns the raw HTML content of the page.
|
|
42
85
|
#
|
|
43
86
|
# @overload web_scrape_html(url:, request_options: {})
|
data/lib/context_dev/version.rb
CHANGED
data/lib/context_dev.rb
CHANGED
|
@@ -84,6 +84,8 @@ require_relative "context_dev/models/utility_prefetch_params"
|
|
|
84
84
|
require_relative "context_dev/models/utility_prefetch_response"
|
|
85
85
|
require_relative "context_dev/models/web_screenshot_params"
|
|
86
86
|
require_relative "context_dev/models/web_screenshot_response"
|
|
87
|
+
require_relative "context_dev/models/web_web_crawl_md_params"
|
|
88
|
+
require_relative "context_dev/models/web_web_crawl_md_response"
|
|
87
89
|
require_relative "context_dev/models/web_web_scrape_html_params"
|
|
88
90
|
require_relative "context_dev/models/web_web_scrape_html_response"
|
|
89
91
|
require_relative "context_dev/models/web_web_scrape_images_params"
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module ContextDev
|
|
4
|
+
module Models
|
|
5
|
+
class WebWebCrawlMdParams < ContextDev::Internal::Type::BaseModel
|
|
6
|
+
extend ContextDev::Internal::Type::RequestParameters::Converter
|
|
7
|
+
include ContextDev::Internal::Type::RequestParameters
|
|
8
|
+
|
|
9
|
+
OrHash =
|
|
10
|
+
T.type_alias do
|
|
11
|
+
T.any(ContextDev::WebWebCrawlMdParams, ContextDev::Internal::AnyHash)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# The starting URL for the crawl (must include http:// or https:// protocol)
|
|
15
|
+
sig { returns(String) }
|
|
16
|
+
attr_accessor :url
|
|
17
|
+
|
|
18
|
+
# When true, follow links on subdomains of the starting URL's domain (e.g.
|
|
19
|
+
# docs.example.com when starting from example.com). www and apex are always
|
|
20
|
+
# treated as equivalent.
|
|
21
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
22
|
+
attr_reader :follow_subdomains
|
|
23
|
+
|
|
24
|
+
sig { params(follow_subdomains: T::Boolean).void }
|
|
25
|
+
attr_writer :follow_subdomains
|
|
26
|
+
|
|
27
|
+
# Include image references in the Markdown output
|
|
28
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
29
|
+
attr_reader :include_images
|
|
30
|
+
|
|
31
|
+
sig { params(include_images: T::Boolean).void }
|
|
32
|
+
attr_writer :include_images
|
|
33
|
+
|
|
34
|
+
# Preserve hyperlinks in the Markdown output
|
|
35
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
36
|
+
attr_reader :include_links
|
|
37
|
+
|
|
38
|
+
sig { params(include_links: T::Boolean).void }
|
|
39
|
+
attr_writer :include_links
|
|
40
|
+
|
|
41
|
+
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
42
|
+
sig { returns(T.nilable(Integer)) }
|
|
43
|
+
attr_reader :max_depth
|
|
44
|
+
|
|
45
|
+
sig { params(max_depth: Integer).void }
|
|
46
|
+
attr_writer :max_depth
|
|
47
|
+
|
|
48
|
+
# Maximum number of pages to crawl. Hard cap: 500.
|
|
49
|
+
sig { returns(T.nilable(Integer)) }
|
|
50
|
+
attr_reader :max_pages
|
|
51
|
+
|
|
52
|
+
sig { params(max_pages: Integer).void }
|
|
53
|
+
attr_writer :max_pages
|
|
54
|
+
|
|
55
|
+
# Truncate base64-encoded image data in the Markdown output
|
|
56
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
57
|
+
attr_reader :shorten_base64_images
|
|
58
|
+
|
|
59
|
+
sig { params(shorten_base64_images: T::Boolean).void }
|
|
60
|
+
attr_writer :shorten_base64_images
|
|
61
|
+
|
|
62
|
+
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
63
|
+
sig { returns(T.nilable(String)) }
|
|
64
|
+
attr_reader :url_regex
|
|
65
|
+
|
|
66
|
+
sig { params(url_regex: String).void }
|
|
67
|
+
attr_writer :url_regex
|
|
68
|
+
|
|
69
|
+
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
70
|
+
# navigation
|
|
71
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
72
|
+
attr_reader :use_main_content_only
|
|
73
|
+
|
|
74
|
+
sig { params(use_main_content_only: T::Boolean).void }
|
|
75
|
+
attr_writer :use_main_content_only
|
|
76
|
+
|
|
77
|
+
sig do
|
|
78
|
+
params(
|
|
79
|
+
url: String,
|
|
80
|
+
follow_subdomains: T::Boolean,
|
|
81
|
+
include_images: T::Boolean,
|
|
82
|
+
include_links: T::Boolean,
|
|
83
|
+
max_depth: Integer,
|
|
84
|
+
max_pages: Integer,
|
|
85
|
+
shorten_base64_images: T::Boolean,
|
|
86
|
+
url_regex: String,
|
|
87
|
+
use_main_content_only: T::Boolean,
|
|
88
|
+
request_options: ContextDev::RequestOptions::OrHash
|
|
89
|
+
).returns(T.attached_class)
|
|
90
|
+
end
|
|
91
|
+
def self.new(
|
|
92
|
+
# The starting URL for the crawl (must include http:// or https:// protocol)
|
|
93
|
+
url:,
|
|
94
|
+
# When true, follow links on subdomains of the starting URL's domain (e.g.
|
|
95
|
+
# docs.example.com when starting from example.com). www and apex are always
|
|
96
|
+
# treated as equivalent.
|
|
97
|
+
follow_subdomains: nil,
|
|
98
|
+
# Include image references in the Markdown output
|
|
99
|
+
include_images: nil,
|
|
100
|
+
# Preserve hyperlinks in the Markdown output
|
|
101
|
+
include_links: nil,
|
|
102
|
+
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
103
|
+
max_depth: nil,
|
|
104
|
+
# Maximum number of pages to crawl. Hard cap: 500.
|
|
105
|
+
max_pages: nil,
|
|
106
|
+
# Truncate base64-encoded image data in the Markdown output
|
|
107
|
+
shorten_base64_images: nil,
|
|
108
|
+
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
109
|
+
url_regex: nil,
|
|
110
|
+
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
111
|
+
# navigation
|
|
112
|
+
use_main_content_only: nil,
|
|
113
|
+
request_options: {}
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
sig do
|
|
118
|
+
override.returns(
|
|
119
|
+
{
|
|
120
|
+
url: String,
|
|
121
|
+
follow_subdomains: T::Boolean,
|
|
122
|
+
include_images: T::Boolean,
|
|
123
|
+
include_links: T::Boolean,
|
|
124
|
+
max_depth: Integer,
|
|
125
|
+
max_pages: Integer,
|
|
126
|
+
shorten_base64_images: T::Boolean,
|
|
127
|
+
url_regex: String,
|
|
128
|
+
use_main_content_only: T::Boolean,
|
|
129
|
+
request_options: ContextDev::RequestOptions
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
def to_hash
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module ContextDev
|
|
4
|
+
module Models
|
|
5
|
+
class WebWebCrawlMdResponse < ContextDev::Internal::Type::BaseModel
|
|
6
|
+
OrHash =
|
|
7
|
+
T.type_alias do
|
|
8
|
+
T.any(
|
|
9
|
+
ContextDev::Models::WebWebCrawlMdResponse,
|
|
10
|
+
ContextDev::Internal::AnyHash
|
|
11
|
+
)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
sig { returns(ContextDev::Models::WebWebCrawlMdResponse::Metadata) }
|
|
15
|
+
attr_reader :metadata
|
|
16
|
+
|
|
17
|
+
sig do
|
|
18
|
+
params(
|
|
19
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash
|
|
20
|
+
).void
|
|
21
|
+
end
|
|
22
|
+
attr_writer :metadata
|
|
23
|
+
|
|
24
|
+
sig do
|
|
25
|
+
returns(T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result])
|
|
26
|
+
end
|
|
27
|
+
attr_accessor :results
|
|
28
|
+
|
|
29
|
+
sig do
|
|
30
|
+
params(
|
|
31
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash,
|
|
32
|
+
results:
|
|
33
|
+
T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result::OrHash]
|
|
34
|
+
).returns(T.attached_class)
|
|
35
|
+
end
|
|
36
|
+
def self.new(metadata:, results:)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
sig do
|
|
40
|
+
override.returns(
|
|
41
|
+
{
|
|
42
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata,
|
|
43
|
+
results: T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
def to_hash
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
51
|
+
OrHash =
|
|
52
|
+
T.type_alias do
|
|
53
|
+
T.any(
|
|
54
|
+
ContextDev::Models::WebWebCrawlMdResponse::Metadata,
|
|
55
|
+
ContextDev::Internal::AnyHash
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Maximum crawl depth reached during the crawl
|
|
60
|
+
sig { returns(Integer) }
|
|
61
|
+
attr_accessor :max_crawl_depth
|
|
62
|
+
|
|
63
|
+
# Number of pages that failed to crawl
|
|
64
|
+
sig { returns(Integer) }
|
|
65
|
+
attr_accessor :num_failed
|
|
66
|
+
|
|
67
|
+
# Number of pages successfully crawled
|
|
68
|
+
sig { returns(Integer) }
|
|
69
|
+
attr_accessor :num_succeeded
|
|
70
|
+
|
|
71
|
+
# Total number of URLs crawled
|
|
72
|
+
sig { returns(Integer) }
|
|
73
|
+
attr_accessor :num_urls
|
|
74
|
+
|
|
75
|
+
sig do
|
|
76
|
+
params(
|
|
77
|
+
max_crawl_depth: Integer,
|
|
78
|
+
num_failed: Integer,
|
|
79
|
+
num_succeeded: Integer,
|
|
80
|
+
num_urls: Integer
|
|
81
|
+
).returns(T.attached_class)
|
|
82
|
+
end
|
|
83
|
+
def self.new(
|
|
84
|
+
# Maximum crawl depth reached during the crawl
|
|
85
|
+
max_crawl_depth:,
|
|
86
|
+
# Number of pages that failed to crawl
|
|
87
|
+
num_failed:,
|
|
88
|
+
# Number of pages successfully crawled
|
|
89
|
+
num_succeeded:,
|
|
90
|
+
# Total number of URLs crawled
|
|
91
|
+
num_urls:
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
sig do
|
|
96
|
+
override.returns(
|
|
97
|
+
{
|
|
98
|
+
max_crawl_depth: Integer,
|
|
99
|
+
num_failed: Integer,
|
|
100
|
+
num_succeeded: Integer,
|
|
101
|
+
num_urls: Integer
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
def to_hash
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
class Result < ContextDev::Internal::Type::BaseModel
|
|
110
|
+
OrHash =
|
|
111
|
+
T.type_alias do
|
|
112
|
+
T.any(
|
|
113
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result,
|
|
114
|
+
ContextDev::Internal::AnyHash
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Extracted page content as Markdown (empty string on failure)
|
|
119
|
+
sig { returns(String) }
|
|
120
|
+
attr_accessor :markdown
|
|
121
|
+
|
|
122
|
+
sig do
|
|
123
|
+
returns(ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata)
|
|
124
|
+
end
|
|
125
|
+
attr_reader :metadata
|
|
126
|
+
|
|
127
|
+
sig do
|
|
128
|
+
params(
|
|
129
|
+
metadata:
|
|
130
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
|
|
131
|
+
).void
|
|
132
|
+
end
|
|
133
|
+
attr_writer :metadata
|
|
134
|
+
|
|
135
|
+
sig do
|
|
136
|
+
params(
|
|
137
|
+
markdown: String,
|
|
138
|
+
metadata:
|
|
139
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
|
|
140
|
+
).returns(T.attached_class)
|
|
141
|
+
end
|
|
142
|
+
def self.new(
|
|
143
|
+
# Extracted page content as Markdown (empty string on failure)
|
|
144
|
+
markdown:,
|
|
145
|
+
metadata:
|
|
146
|
+
)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
sig do
|
|
150
|
+
override.returns(
|
|
151
|
+
{
|
|
152
|
+
markdown: String,
|
|
153
|
+
metadata:
|
|
154
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
def to_hash
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
162
|
+
OrHash =
|
|
163
|
+
T.type_alias do
|
|
164
|
+
T.any(
|
|
165
|
+
ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata,
|
|
166
|
+
ContextDev::Internal::AnyHash
|
|
167
|
+
)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Depth relative to the start URL. 0 = start URL, 1 = one link away.
|
|
171
|
+
sig { returns(Integer) }
|
|
172
|
+
attr_accessor :crawl_depth
|
|
173
|
+
|
|
174
|
+
# HTTP status code of the response
|
|
175
|
+
sig { returns(Integer) }
|
|
176
|
+
attr_accessor :status_code
|
|
177
|
+
|
|
178
|
+
# true if the page was fetched and parsed successfully
|
|
179
|
+
sig { returns(T::Boolean) }
|
|
180
|
+
attr_accessor :success
|
|
181
|
+
|
|
182
|
+
# The page's <title> content (empty string if unavailable)
|
|
183
|
+
sig { returns(String) }
|
|
184
|
+
attr_accessor :title
|
|
185
|
+
|
|
186
|
+
# The URL that was fetched
|
|
187
|
+
sig { returns(String) }
|
|
188
|
+
attr_accessor :url
|
|
189
|
+
|
|
190
|
+
sig do
|
|
191
|
+
params(
|
|
192
|
+
crawl_depth: Integer,
|
|
193
|
+
status_code: Integer,
|
|
194
|
+
success: T::Boolean,
|
|
195
|
+
title: String,
|
|
196
|
+
url: String
|
|
197
|
+
).returns(T.attached_class)
|
|
198
|
+
end
|
|
199
|
+
def self.new(
|
|
200
|
+
# Depth relative to the start URL. 0 = start URL, 1 = one link away.
|
|
201
|
+
crawl_depth:,
|
|
202
|
+
# HTTP status code of the response
|
|
203
|
+
status_code:,
|
|
204
|
+
# true if the page was fetched and parsed successfully
|
|
205
|
+
success:,
|
|
206
|
+
# The page's <title> content (empty string if unavailable)
|
|
207
|
+
title:,
|
|
208
|
+
# The URL that was fetched
|
|
209
|
+
url:
|
|
210
|
+
)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
sig do
|
|
214
|
+
override.returns(
|
|
215
|
+
{
|
|
216
|
+
crawl_depth: Integer,
|
|
217
|
+
status_code: Integer,
|
|
218
|
+
success: T::Boolean,
|
|
219
|
+
title: String,
|
|
220
|
+
url: String
|
|
221
|
+
}
|
|
222
|
+
)
|
|
223
|
+
end
|
|
224
|
+
def to_hash
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
data/rbi/context_dev/models.rbi
CHANGED
|
@@ -37,6 +37,8 @@ module ContextDev
|
|
|
37
37
|
|
|
38
38
|
WebScreenshotParams = ContextDev::Models::WebScreenshotParams
|
|
39
39
|
|
|
40
|
+
WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
|
|
41
|
+
|
|
40
42
|
WebWebScrapeHTMLParams = ContextDev::Models::WebWebScrapeHTMLParams
|
|
41
43
|
|
|
42
44
|
WebWebScrapeImagesParams = ContextDev::Models::WebWebScrapeImagesParams
|
|
@@ -38,6 +38,49 @@ module ContextDev
|
|
|
38
38
|
)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
# Performs a crawl starting from a given URL, extracts page content as Markdown,
|
|
42
|
+
# and returns results for all crawled pages. Only follows links within the same
|
|
43
|
+
# domain as the starting URL. Costs 1 credit per successful page crawled.
|
|
44
|
+
sig do
|
|
45
|
+
params(
|
|
46
|
+
url: String,
|
|
47
|
+
follow_subdomains: T::Boolean,
|
|
48
|
+
include_images: T::Boolean,
|
|
49
|
+
include_links: T::Boolean,
|
|
50
|
+
max_depth: Integer,
|
|
51
|
+
max_pages: Integer,
|
|
52
|
+
shorten_base64_images: T::Boolean,
|
|
53
|
+
url_regex: String,
|
|
54
|
+
use_main_content_only: T::Boolean,
|
|
55
|
+
request_options: ContextDev::RequestOptions::OrHash
|
|
56
|
+
).returns(ContextDev::Models::WebWebCrawlMdResponse)
|
|
57
|
+
end
|
|
58
|
+
def web_crawl_md(
|
|
59
|
+
# The starting URL for the crawl (must include http:// or https:// protocol)
|
|
60
|
+
url:,
|
|
61
|
+
# When true, follow links on subdomains of the starting URL's domain (e.g.
|
|
62
|
+
# docs.example.com when starting from example.com). www and apex are always
|
|
63
|
+
# treated as equivalent.
|
|
64
|
+
follow_subdomains: nil,
|
|
65
|
+
# Include image references in the Markdown output
|
|
66
|
+
include_images: nil,
|
|
67
|
+
# Preserve hyperlinks in the Markdown output
|
|
68
|
+
include_links: nil,
|
|
69
|
+
# Maximum link depth from the starting URL (0 = only the starting page)
|
|
70
|
+
max_depth: nil,
|
|
71
|
+
# Maximum number of pages to crawl. Hard cap: 500.
|
|
72
|
+
max_pages: nil,
|
|
73
|
+
# Truncate base64-encoded image data in the Markdown output
|
|
74
|
+
shorten_base64_images: nil,
|
|
75
|
+
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
76
|
+
url_regex: nil,
|
|
77
|
+
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
78
|
+
# navigation
|
|
79
|
+
use_main_content_only: nil,
|
|
80
|
+
request_options: {}
|
|
81
|
+
)
|
|
82
|
+
end
|
|
83
|
+
|
|
41
84
|
# Scrapes the given URL and returns the raw HTML content of the page.
|
|
42
85
|
sig do
|
|
43
86
|
params(
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
module ContextDev
|
|
2
|
+
module Models
|
|
3
|
+
type web_web_crawl_md_params =
|
|
4
|
+
{
|
|
5
|
+
url: String,
|
|
6
|
+
follow_subdomains: bool,
|
|
7
|
+
include_images: bool,
|
|
8
|
+
include_links: bool,
|
|
9
|
+
max_depth: Integer,
|
|
10
|
+
max_pages: Integer,
|
|
11
|
+
:shorten_base64_images => bool,
|
|
12
|
+
url_regex: String,
|
|
13
|
+
use_main_content_only: bool
|
|
14
|
+
}
|
|
15
|
+
& ContextDev::Internal::Type::request_parameters
|
|
16
|
+
|
|
17
|
+
class WebWebCrawlMdParams < ContextDev::Internal::Type::BaseModel
|
|
18
|
+
extend ContextDev::Internal::Type::RequestParameters::Converter
|
|
19
|
+
include ContextDev::Internal::Type::RequestParameters
|
|
20
|
+
|
|
21
|
+
attr_accessor url: String
|
|
22
|
+
|
|
23
|
+
attr_reader follow_subdomains: bool?
|
|
24
|
+
|
|
25
|
+
def follow_subdomains=: (bool) -> bool
|
|
26
|
+
|
|
27
|
+
attr_reader include_images: bool?
|
|
28
|
+
|
|
29
|
+
def include_images=: (bool) -> bool
|
|
30
|
+
|
|
31
|
+
attr_reader include_links: bool?
|
|
32
|
+
|
|
33
|
+
def include_links=: (bool) -> bool
|
|
34
|
+
|
|
35
|
+
attr_reader max_depth: Integer?
|
|
36
|
+
|
|
37
|
+
def max_depth=: (Integer) -> Integer
|
|
38
|
+
|
|
39
|
+
attr_reader max_pages: Integer?
|
|
40
|
+
|
|
41
|
+
def max_pages=: (Integer) -> Integer
|
|
42
|
+
|
|
43
|
+
attr_reader shorten_base64_images: bool?
|
|
44
|
+
|
|
45
|
+
def shorten_base64_images=: (bool) -> bool
|
|
46
|
+
|
|
47
|
+
attr_reader url_regex: String?
|
|
48
|
+
|
|
49
|
+
def url_regex=: (String) -> String
|
|
50
|
+
|
|
51
|
+
attr_reader use_main_content_only: bool?
|
|
52
|
+
|
|
53
|
+
def use_main_content_only=: (bool) -> bool
|
|
54
|
+
|
|
55
|
+
def initialize: (
|
|
56
|
+
url: String,
|
|
57
|
+
?follow_subdomains: bool,
|
|
58
|
+
?include_images: bool,
|
|
59
|
+
?include_links: bool,
|
|
60
|
+
?max_depth: Integer,
|
|
61
|
+
?max_pages: Integer,
|
|
62
|
+
?shorten_base64_images: bool,
|
|
63
|
+
?url_regex: String,
|
|
64
|
+
?use_main_content_only: bool,
|
|
65
|
+
?request_options: ContextDev::request_opts
|
|
66
|
+
) -> void
|
|
67
|
+
|
|
68
|
+
def to_hash: -> {
|
|
69
|
+
url: String,
|
|
70
|
+
follow_subdomains: bool,
|
|
71
|
+
include_images: bool,
|
|
72
|
+
include_links: bool,
|
|
73
|
+
max_depth: Integer,
|
|
74
|
+
max_pages: Integer,
|
|
75
|
+
:shorten_base64_images => bool,
|
|
76
|
+
url_regex: String,
|
|
77
|
+
use_main_content_only: bool,
|
|
78
|
+
request_options: ContextDev::RequestOptions
|
|
79
|
+
}
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
module ContextDev
|
|
2
|
+
module Models
|
|
3
|
+
type web_web_crawl_md_response =
|
|
4
|
+
{
|
|
5
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata,
|
|
6
|
+
results: ::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
class WebWebCrawlMdResponse < ContextDev::Internal::Type::BaseModel
|
|
10
|
+
attr_accessor metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata
|
|
11
|
+
|
|
12
|
+
attr_accessor results: ::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
|
|
13
|
+
|
|
14
|
+
def initialize: (
|
|
15
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata,
|
|
16
|
+
results: ::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
|
|
17
|
+
) -> void
|
|
18
|
+
|
|
19
|
+
def to_hash: -> {
|
|
20
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata,
|
|
21
|
+
results: ::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
type metadata =
|
|
25
|
+
{
|
|
26
|
+
max_crawl_depth: Integer,
|
|
27
|
+
num_failed: Integer,
|
|
28
|
+
num_succeeded: Integer,
|
|
29
|
+
num_urls: Integer
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
33
|
+
attr_accessor max_crawl_depth: Integer
|
|
34
|
+
|
|
35
|
+
attr_accessor num_failed: Integer
|
|
36
|
+
|
|
37
|
+
attr_accessor num_succeeded: Integer
|
|
38
|
+
|
|
39
|
+
attr_accessor num_urls: Integer
|
|
40
|
+
|
|
41
|
+
def initialize: (
|
|
42
|
+
max_crawl_depth: Integer,
|
|
43
|
+
num_failed: Integer,
|
|
44
|
+
num_succeeded: Integer,
|
|
45
|
+
num_urls: Integer
|
|
46
|
+
) -> void
|
|
47
|
+
|
|
48
|
+
def to_hash: -> {
|
|
49
|
+
max_crawl_depth: Integer,
|
|
50
|
+
num_failed: Integer,
|
|
51
|
+
num_succeeded: Integer,
|
|
52
|
+
num_urls: Integer
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
type result =
|
|
57
|
+
{
|
|
58
|
+
markdown: String,
|
|
59
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
class Result < ContextDev::Internal::Type::BaseModel
|
|
63
|
+
attr_accessor markdown: String
|
|
64
|
+
|
|
65
|
+
attr_accessor metadata: ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
|
|
66
|
+
|
|
67
|
+
def initialize: (
|
|
68
|
+
markdown: String,
|
|
69
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
|
|
70
|
+
) -> void
|
|
71
|
+
|
|
72
|
+
def to_hash: -> {
|
|
73
|
+
markdown: String,
|
|
74
|
+
metadata: ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
type metadata =
|
|
78
|
+
{
|
|
79
|
+
crawl_depth: Integer,
|
|
80
|
+
status_code: Integer,
|
|
81
|
+
success: bool,
|
|
82
|
+
title: String,
|
|
83
|
+
url: String
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
class Metadata < ContextDev::Internal::Type::BaseModel
|
|
87
|
+
attr_accessor crawl_depth: Integer
|
|
88
|
+
|
|
89
|
+
attr_accessor status_code: Integer
|
|
90
|
+
|
|
91
|
+
attr_accessor success: bool
|
|
92
|
+
|
|
93
|
+
attr_accessor title: String
|
|
94
|
+
|
|
95
|
+
attr_accessor url: String
|
|
96
|
+
|
|
97
|
+
def initialize: (
|
|
98
|
+
crawl_depth: Integer,
|
|
99
|
+
status_code: Integer,
|
|
100
|
+
success: bool,
|
|
101
|
+
title: String,
|
|
102
|
+
url: String
|
|
103
|
+
) -> void
|
|
104
|
+
|
|
105
|
+
def to_hash: -> {
|
|
106
|
+
crawl_depth: Integer,
|
|
107
|
+
status_code: Integer,
|
|
108
|
+
success: bool,
|
|
109
|
+
title: String,
|
|
110
|
+
url: String
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
data/sig/context_dev/models.rbs
CHANGED
|
@@ -31,6 +31,8 @@ module ContextDev
|
|
|
31
31
|
|
|
32
32
|
class WebScreenshotParams = ContextDev::Models::WebScreenshotParams
|
|
33
33
|
|
|
34
|
+
class WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
|
|
35
|
+
|
|
34
36
|
class WebWebScrapeHTMLParams = ContextDev::Models::WebWebScrapeHTMLParams
|
|
35
37
|
|
|
36
38
|
class WebWebScrapeImagesParams = ContextDev::Models::WebWebScrapeImagesParams
|
|
@@ -9,6 +9,19 @@ module ContextDev
|
|
|
9
9
|
?request_options: ContextDev::request_opts
|
|
10
10
|
) -> ContextDev::Models::WebScreenshotResponse
|
|
11
11
|
|
|
12
|
+
def web_crawl_md: (
|
|
13
|
+
url: String,
|
|
14
|
+
?follow_subdomains: bool,
|
|
15
|
+
?include_images: bool,
|
|
16
|
+
?include_links: bool,
|
|
17
|
+
?max_depth: Integer,
|
|
18
|
+
?max_pages: Integer,
|
|
19
|
+
?shorten_base64_images: bool,
|
|
20
|
+
?url_regex: String,
|
|
21
|
+
?use_main_content_only: bool,
|
|
22
|
+
?request_options: ContextDev::request_opts
|
|
23
|
+
) -> ContextDev::Models::WebWebCrawlMdResponse
|
|
24
|
+
|
|
12
25
|
def web_scrape_html: (
|
|
13
26
|
url: String,
|
|
14
27
|
?request_options: ContextDev::request_opts
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: context.dev
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Context Dev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: cgi
|
|
@@ -101,6 +101,8 @@ files:
|
|
|
101
101
|
- lib/context_dev/models/utility_prefetch_response.rb
|
|
102
102
|
- lib/context_dev/models/web_screenshot_params.rb
|
|
103
103
|
- lib/context_dev/models/web_screenshot_response.rb
|
|
104
|
+
- lib/context_dev/models/web_web_crawl_md_params.rb
|
|
105
|
+
- lib/context_dev/models/web_web_crawl_md_response.rb
|
|
104
106
|
- lib/context_dev/models/web_web_scrape_html_params.rb
|
|
105
107
|
- lib/context_dev/models/web_web_scrape_html_response.rb
|
|
106
108
|
- lib/context_dev/models/web_web_scrape_images_params.rb
|
|
@@ -169,6 +171,8 @@ files:
|
|
|
169
171
|
- rbi/context_dev/models/utility_prefetch_response.rbi
|
|
170
172
|
- rbi/context_dev/models/web_screenshot_params.rbi
|
|
171
173
|
- rbi/context_dev/models/web_screenshot_response.rbi
|
|
174
|
+
- rbi/context_dev/models/web_web_crawl_md_params.rbi
|
|
175
|
+
- rbi/context_dev/models/web_web_crawl_md_response.rbi
|
|
172
176
|
- rbi/context_dev/models/web_web_scrape_html_params.rbi
|
|
173
177
|
- rbi/context_dev/models/web_web_scrape_html_response.rbi
|
|
174
178
|
- rbi/context_dev/models/web_web_scrape_images_params.rbi
|
|
@@ -236,6 +240,8 @@ files:
|
|
|
236
240
|
- sig/context_dev/models/utility_prefetch_response.rbs
|
|
237
241
|
- sig/context_dev/models/web_screenshot_params.rbs
|
|
238
242
|
- sig/context_dev/models/web_screenshot_response.rbs
|
|
243
|
+
- sig/context_dev/models/web_web_crawl_md_params.rbs
|
|
244
|
+
- sig/context_dev/models/web_web_crawl_md_response.rbs
|
|
239
245
|
- sig/context_dev/models/web_web_scrape_html_params.rbs
|
|
240
246
|
- sig/context_dev/models/web_web_scrape_html_response.rbs
|
|
241
247
|
- sig/context_dev/models/web_web_scrape_images_params.rbs
|