reaxar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/reaxar/client.rb +155 -0
- data/lib/reaxar/element/a.rb +41 -0
- data/lib/reaxar/element/iframe.rb +8 -0
- data/lib/reaxar/element/img.rb +8 -0
- data/lib/reaxar/middleware/log.rb +76 -0
- data/lib/reaxar/middleware/middleware_abstract.rb +30 -0
- data/lib/reaxar/middleware/middleware_stack.rb +49 -0
- data/lib/reaxar/middleware/redirect.rb +63 -0
- data/lib/reaxar/page.rb +96 -0
- data/lib/reaxar/version.rb +5 -0
- data/lib/reaxar.rb +10 -0
- metadata +82 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 6b2fd9a3dbf706503da293b66d5837c30f93625db9fc37d74b7bc7acfe342340
|
|
4
|
+
data.tar.gz: a9959db54d8cc2a0c64ef02dce21023c6118fc673df8cd9f0dfbc5cbc434d23b
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: '00081eadeb2c21f70cb2382d8a6c0a472d350383cf8dc10d6143ec8b35de1da7bc2d4b059cd2e3ba7f8f1fd81df6535d564b23185fb896b8c8a0f94842b73a8f'
|
|
7
|
+
data.tar.gz: d118de9ec7ddc0c3676ad93057a16322548a6bd95e200e36ab125bcf7c8560dadad390d4356d29d3a0a0a5db538b89ea3a660ec0f12249f75a38a41602c576ca
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'async/http/internet'
|
|
4
|
+
require_relative 'middleware/middleware_stack'
|
|
5
|
+
require_relative 'middleware/redirect'
|
|
6
|
+
require_relative 'middleware/log'
|
|
7
|
+
|
|
8
|
+
module Reaxar
|
|
9
|
+
# HTTP client for performing asynchronous web requests with middleware support.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic usage
|
|
12
|
+
# client = Reaxar::Client.new(Logger.new($stdout))
|
|
13
|
+
# response = client.get('https://example.com')
|
|
14
|
+
# puts response.read
|
|
15
|
+
#
|
|
16
|
+
# @!attribute [r] cookies
|
|
17
|
+
# @return [Hash] The cookies stored by the client, keyed by cookie name.
|
|
18
|
+
class Client
|
|
19
|
+
attr_reader :cookies
|
|
20
|
+
|
|
21
|
+
# Initializes a new Client instance.
|
|
22
|
+
# @param logger [Logger, nil] Logger instance for logging requests (optional).
|
|
23
|
+
def initialize(logger)
|
|
24
|
+
@cookies = {}
|
|
25
|
+
@logger = logger || Logger.new($stdout)
|
|
26
|
+
@internet = Async::HTTP::Internet.new
|
|
27
|
+
@middleware = Reaxar::Middleware::MiddlewareStack.new
|
|
28
|
+
|
|
29
|
+
@middleware.use Reaxar::Middleware::Log.new(@logger)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Performs an HTTP GET request.
|
|
33
|
+
# @param uri [String] The URI to request.
|
|
34
|
+
# @return [Object] The HTTP response object.
|
|
35
|
+
def get(uri)
|
|
36
|
+
request(uri, :get)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Performs an HTTP POST request.
|
|
40
|
+
# @param uri [String] The URI to request.
|
|
41
|
+
# @param form_data [Hash] The form data to send in the POST body.
|
|
42
|
+
# @return [Object] The HTTP response object.
|
|
43
|
+
def post(uri, form_data = {})
|
|
44
|
+
request(uri, :post, form_data)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Closes the underlying HTTP connection.
|
|
48
|
+
# @return [void]
|
|
49
|
+
def close
|
|
50
|
+
@internet.close
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Adds a middleware to the middleware stack.
|
|
54
|
+
# @param middleware_class [Class] The middleware class to add.
|
|
55
|
+
# @param args [Array] Arguments to pass to the middleware initializer.
|
|
56
|
+
# @yield [block] Optional block for middleware initialization.
|
|
57
|
+
# @return [void]
|
|
58
|
+
def use(middleware_class, *args, &block)
|
|
59
|
+
@middleware.use(middleware_class.new(*args, &block))
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# Internal method to perform an HTTP request with middleware processing.
|
|
65
|
+
# @param uri [String] The URI to request.
|
|
66
|
+
# @param method [Symbol] The HTTP method (:get, :post, etc.).
|
|
67
|
+
# @param body [Object, nil] The request body (for POST, etc.).
|
|
68
|
+
# @return [Object] The processed HTTP response.
|
|
69
|
+
def request(uri, method, body = nil) # rubocop:disable Metrics/MethodLength
|
|
70
|
+
request_env = {
|
|
71
|
+
uri:,
|
|
72
|
+
method:,
|
|
73
|
+
body:,
|
|
74
|
+
headers: {},
|
|
75
|
+
cookies: @cookies
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
loop do
|
|
79
|
+
# Process request through middleware
|
|
80
|
+
processed_request = @middleware.run(request_env)
|
|
81
|
+
|
|
82
|
+
# Execute HTTP request
|
|
83
|
+
response = execute_http_request(
|
|
84
|
+
processed_request[:uri],
|
|
85
|
+
processed_request[:method],
|
|
86
|
+
processed_request[:body],
|
|
87
|
+
processed_request[:headers]
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Process response through middleware
|
|
91
|
+
middleware_result = @middleware.process_response(response, processed_request)
|
|
92
|
+
|
|
93
|
+
# Retry request if needed (e.g., redirect)
|
|
94
|
+
if middleware_result == :retry_request
|
|
95
|
+
request_env = processed_request
|
|
96
|
+
next
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Update cookies if needed
|
|
100
|
+
# update_cookies(middleware_result, URI(processed_request[:uri]).host)
|
|
101
|
+
|
|
102
|
+
return middleware_result
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Executes the actual HTTP request using Async::HTTP::Internet.
|
|
107
|
+
# @param uri [String] The URI to request.
|
|
108
|
+
# @param method [Symbol] The HTTP method.
|
|
109
|
+
# @param body [Object, nil] The request body.
|
|
110
|
+
# @param headers [Hash] The request headers.
|
|
111
|
+
# @return [Object] The HTTP response.
|
|
112
|
+
def execute_http_request(uri, method, body, headers)
|
|
113
|
+
url = URI(uri)
|
|
114
|
+
headers = headers.merge(headers_with_cookies(url))
|
|
115
|
+
|
|
116
|
+
case method
|
|
117
|
+
when :get
|
|
118
|
+
@internet.get(url, headers)
|
|
119
|
+
when :post
|
|
120
|
+
headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
|
121
|
+
@internet.post(url, headers, URI.encode_www_form(body))
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Builds headers including cookies for the given URL.
|
|
126
|
+
# @param url [URI] The URI object.
|
|
127
|
+
# @return [Hash] The headers including the 'Cookie' header if cookies are present.
|
|
128
|
+
def headers_with_cookies(url)
|
|
129
|
+
return {} if @cookies.empty?
|
|
130
|
+
|
|
131
|
+
domain_cookies = @cookies.select { |_key, cookie| cookie[:domain] == url.host }
|
|
132
|
+
cookie_string = domain_cookies.map { |key, cookie| "#{key}=#{cookie[:value]}" }.join('; ')
|
|
133
|
+
|
|
134
|
+
{ 'Cookie' => cookie_string }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Updates the client's cookies from the response.
|
|
138
|
+
# @param response [Object] The HTTP response object.
|
|
139
|
+
# @return [void]
|
|
140
|
+
def update_cookies(response)
|
|
141
|
+
return unless response.headers['set-cookie']
|
|
142
|
+
|
|
143
|
+
response.headers['set-cookie'].split("\n").each do |cookie|
|
|
144
|
+
name, value = cookie.split('=', 2).map(&:strip)
|
|
145
|
+
value = value.split(';').first
|
|
146
|
+
|
|
147
|
+
@cookies[name] = {
|
|
148
|
+
value:,
|
|
149
|
+
domain: response.endpoint.host,
|
|
150
|
+
path: '/'
|
|
151
|
+
}
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Reaxar
|
|
4
|
+
module Element
|
|
5
|
+
# Represents an HTML anchor (`<a>`) element on a page.
|
|
6
|
+
#
|
|
7
|
+
# @example Accessing link attributes and clicking
|
|
8
|
+
# link = Reaxar::Element::A.new(nokogiri_element, page)
|
|
9
|
+
# puts link.href
|
|
10
|
+
# link.click { |new_page| puts new_page.title }
|
|
11
|
+
#
|
|
12
|
+
# @!attribute [r] element
|
|
13
|
+
# @return [Nokogiri::XML::Element] The underlying Nokogiri element.
|
|
14
|
+
# @!attribute [r] page
|
|
15
|
+
# @return [Reaxar::Page] The page this element belongs to.
|
|
16
|
+
class A
|
|
17
|
+
attr_reader :element, :page
|
|
18
|
+
|
|
19
|
+
# Initializes a new anchor element wrapper.
|
|
20
|
+
# @param element [Nokogiri::XML::Element] The Nokogiri element.
|
|
21
|
+
# @param page [Reaxar::Page] The parent page.
|
|
22
|
+
def initialize(element, page)
|
|
23
|
+
@element = element
|
|
24
|
+
@page = page
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Returns the absolute URL for the link.
|
|
28
|
+
# @return [String] The resolved href attribute.
|
|
29
|
+
def href
|
|
30
|
+
URI.join(page.url, element[:href]).to_s
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Opens the link in a new page context.
|
|
34
|
+
# @yield [page] Optional block with the new page instance.
|
|
35
|
+
# @return [Async::Task] The async task wrapping the new page.
|
|
36
|
+
def click(&block)
|
|
37
|
+
Page.open(href, page.client, &block)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
module Reaxar
|
|
7
|
+
module Middleware
|
|
8
|
+
# Middleware for logging HTTP requests and responses.
|
|
9
|
+
#
|
|
10
|
+
# @example Usage
|
|
11
|
+
# logger = Logger.new($stdout)
|
|
12
|
+
# middleware = Reaxar::Middleware::Log.new(logger)
|
|
13
|
+
#
|
|
14
|
+
# @!attribute [r] logger
|
|
15
|
+
# @return [Logger] The logger used for output.
|
|
16
|
+
class Log < MiddlewareAbstract
|
|
17
|
+
# Initializes the Log middleware.
|
|
18
|
+
# @param logger [Logger, nil] Logger instance for output (defaults to $logger).
|
|
19
|
+
def initialize(logger = nil)
|
|
20
|
+
super()
|
|
21
|
+
@logger = logger || Logger.new($stdout)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Logs information about the HTTP request.чц
|
|
25
|
+
# @param request [Hash] The request data.
|
|
26
|
+
# @return [Hash] The original request.
|
|
27
|
+
def process_request(request)
|
|
28
|
+
@start_time = Time.now
|
|
29
|
+
log_request_info(request, @start_time)
|
|
30
|
+
log_request_headers(request)
|
|
31
|
+
request
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Logs information about the HTTP response.
|
|
35
|
+
# @param response [Object] The HTTP response.
|
|
36
|
+
# @param request [Hash] The original request.
|
|
37
|
+
# @return [Object] The HTTP response.
|
|
38
|
+
def process_response(response, request)
|
|
39
|
+
duration = Time.now - @start_time
|
|
40
|
+
status = response.respond_to?(:status) ? response.status : '???'
|
|
41
|
+
log_response_info(request, response, status, duration)
|
|
42
|
+
log_response_headers(response)
|
|
43
|
+
response
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def log_request_info(request, _start_time)
|
|
49
|
+
@logger.info("➡️ [#{@start_time.iso8601}] #{request[:method].upcase} #{request[:uri]}")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def log_request_headers(request)
|
|
53
|
+
return unless request[:headers] && !request[:headers].empty?
|
|
54
|
+
|
|
55
|
+
@logger.info('Request headers:')
|
|
56
|
+
request[:headers].each do |key, value|
|
|
57
|
+
@logger.info(" #{key}: #{value}")
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def log_response_info(request, _response, status, duration)
|
|
62
|
+
@logger.info("⬅️ [#{Time.now.iso8601}] #{request[:method].upcase} #{request[:uri]} - " \
|
|
63
|
+
"#{status} (#{duration.round(2)}s)")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def log_response_headers(response)
|
|
67
|
+
return unless response.respond_to?(:headers) && response.headers && !response.headers.empty?
|
|
68
|
+
|
|
69
|
+
@logger.info('Response headers:')
|
|
70
|
+
response.headers.each do |key, value|
|
|
71
|
+
@logger.info(" #{key}: #{value}")
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Reaxar
|
|
4
|
+
module Middleware
|
|
5
|
+
# Abstract base class for HTTP middleware.
|
|
6
|
+
#
|
|
7
|
+
# All middleware should inherit from this class and implement
|
|
8
|
+
# the {#process_request} and {#process_response} methods.
|
|
9
|
+
#
|
|
10
|
+
# @abstract
|
|
11
|
+
class MiddlewareAbstract
|
|
12
|
+
# Processes the HTTP request before it is sent.
|
|
13
|
+
# @param request [Hash] The request data.
|
|
14
|
+
# @return [Hash] The modified request.
|
|
15
|
+
# @raise [NotImplementedError] If not implemented in subclass.
|
|
16
|
+
def process_request(request)
|
|
17
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Processes the HTTP response after it is received.
|
|
21
|
+
# @param response [Object] The HTTP response.
|
|
22
|
+
# @param _request [Hash] The original request.
|
|
23
|
+
# @return [Object] The modified response.
|
|
24
|
+
# @raise [NotImplementedError] If not implemented in subclass.
|
|
25
|
+
def process_response(response, request)
|
|
26
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Reaxar
|
|
4
|
+
module Middleware
|
|
5
|
+
# Manages a stack of HTTP middleware and applies them to requests and responses.
|
|
6
|
+
#
|
|
7
|
+
# @example
|
|
8
|
+
# stack = Reaxar::Middleware::MiddlewareStack.new
|
|
9
|
+
# stack.use(MyMiddleware.new)
|
|
10
|
+
# request = stack.run(request)
|
|
11
|
+
# response = stack.process_response(response, request)
|
|
12
|
+
class MiddlewareStack
|
|
13
|
+
# Initializes a new, empty middleware stack.
|
|
14
|
+
def initialize
|
|
15
|
+
@stack = []
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Adds a middleware to the stack.
|
|
19
|
+
# @param middleware [Object] The middleware instance to add.
|
|
20
|
+
# @return [void]
|
|
21
|
+
def use(middleware)
|
|
22
|
+
@stack << middleware
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Runs all middleware on the request in order.
|
|
26
|
+
# @param request [Hash] The request data.
|
|
27
|
+
# @return [Hash] The processed request.
|
|
28
|
+
def run(request)
|
|
29
|
+
@stack.reduce(request) do |req, middleware|
|
|
30
|
+
middleware.process_request(req) || req
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Runs all middleware on the response in reverse order.
|
|
35
|
+
# @param response [Object] The HTTP response.
|
|
36
|
+
# @param request [Hash] The original request.
|
|
37
|
+
# @return [Object, Symbol] The processed response or :retry_request.
|
|
38
|
+
def process_response(response, request)
|
|
39
|
+
@stack.reverse.reduce(response) do |resp, middleware|
|
|
40
|
+
result = middleware.process_response(resp, request)
|
|
41
|
+
|
|
42
|
+
return :retry_request if result == :retry_request
|
|
43
|
+
|
|
44
|
+
result || resp
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'middleware_abstract'
|
|
4
|
+
|
|
5
|
+
module Reaxar
|
|
6
|
+
module Middleware
|
|
7
|
+
# Middleware for handling HTTP redirects.
|
|
8
|
+
#
|
|
9
|
+
# @example
|
|
10
|
+
# middleware = Reaxar::Middleware::Redirect.new
|
|
11
|
+
#
|
|
12
|
+
# @!attribute [r] redirect_count
|
|
13
|
+
# @return [Integer] The number of redirects for the request.
|
|
14
|
+
class Redirect < MiddlewareAbstract
|
|
15
|
+
# The maximum number of allowed redirects.
|
|
16
|
+
MAX_REDIRECTS = 5
|
|
17
|
+
|
|
18
|
+
# Initializes the Redirect middleware.
|
|
19
|
+
def initialize
|
|
20
|
+
super
|
|
21
|
+
@redirect_count = 0
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Initializes the redirect counter for the request.
|
|
25
|
+
# @param request [Hash] The request data.
|
|
26
|
+
# @return [Hash] The modified request.
|
|
27
|
+
def process_request(request)
|
|
28
|
+
request[:redirect_count] ||= 0
|
|
29
|
+
request
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Processes the HTTP response and handles redirects if needed.
|
|
33
|
+
# @param response [Object] The HTTP response.
|
|
34
|
+
# @param request [Hash] The original request.
|
|
35
|
+
# @return [Object, Symbol] The response or :retry_request for another attempt.
|
|
36
|
+
def process_response(response, request)
|
|
37
|
+
return response unless response.headers['location']
|
|
38
|
+
return response if request[:redirect_count] >= MAX_REDIRECTS
|
|
39
|
+
|
|
40
|
+
request[:redirect_count] += 1
|
|
41
|
+
|
|
42
|
+
redirect_uri = process_location_url(request, response)
|
|
43
|
+
|
|
44
|
+
request[:uri] = redirect_uri
|
|
45
|
+
request[:method] = :get if [301, 302].include?(response.status)
|
|
46
|
+
|
|
47
|
+
:retry_request
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def process_location_url(request, response)
|
|
53
|
+
location = response.headers['location']
|
|
54
|
+
base_uri = URI(request[:uri])
|
|
55
|
+
if location.start_with?('http')
|
|
56
|
+
location
|
|
57
|
+
else
|
|
58
|
+
"#{base_uri.scheme}://#{base_uri.host}#{location}"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
data/lib/reaxar/page.rb
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require_relative 'element/a'
|
|
5
|
+
|
|
6
|
+
module Reaxar
|
|
7
|
+
# Represents a web page and provides methods for interacting with its content.
|
|
8
|
+
#
|
|
9
|
+
# @example Open a page and print its title
|
|
10
|
+
# page = Reaxar::Page.open('https://example.com')
|
|
11
|
+
# puts page.title
|
|
12
|
+
#
|
|
13
|
+
# @!attribute [r] url
|
|
14
|
+
# @return [String] The URL of the page.
|
|
15
|
+
# @!attribute [r] client
|
|
16
|
+
# @return [Client] The HTTP client used to fetch the page.
|
|
17
|
+
# @!attribute [r] response
|
|
18
|
+
# @return [Object] The HTTP response object.
|
|
19
|
+
# @!attribute [r] document
|
|
20
|
+
# @return [Nokogiri::HTML::Document] The parsed HTML document.
|
|
21
|
+
class Page
|
|
22
|
+
attr_reader :url, :client, :response, :document
|
|
23
|
+
|
|
24
|
+
class << self
|
|
25
|
+
# @!attribute [rw] logger
|
|
26
|
+
# @return [Logger, nil] The logger instance used by the client.
|
|
27
|
+
attr_accessor :logger
|
|
28
|
+
|
|
29
|
+
# Configures the logger for the Page class.
|
|
30
|
+
# @param logger [Logger] The logger to use.
|
|
31
|
+
# @return [void]
|
|
32
|
+
def configure(logger:)
|
|
33
|
+
self.logger = logger
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Opens a page asynchronously.
|
|
38
|
+
# @param url [String] The URL to open.
|
|
39
|
+
# @param client [Client, nil] Optional HTTP client.
|
|
40
|
+
# @yield [page] Optional block to yield the page instance.
|
|
41
|
+
# @yieldparam page [Page] The page instance.
|
|
42
|
+
# @return [Async::Task] The async task wrapping the page.
|
|
43
|
+
def self.open(url, client = nil, &block)
|
|
44
|
+
Async { new(url, client, &block) }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Initializes a new Page instance.
|
|
48
|
+
# @param url [String] The URL of the page.
|
|
49
|
+
# @param client [Client, nil] Optional HTTP client.
|
|
50
|
+
# @yield [self] Optional block to yield the page instance.
|
|
51
|
+
def initialize(url, client = nil)
|
|
52
|
+
@url = url
|
|
53
|
+
@client = client || Client.new(self.class.logger)
|
|
54
|
+
@client.use Reaxar::Middleware::Redirect
|
|
55
|
+
@response = @client.get(url)
|
|
56
|
+
@document = Nokogiri::HTML(@response.read)
|
|
57
|
+
|
|
58
|
+
yield self if block_given?
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Returns the title of the page.
|
|
62
|
+
# @return [String, nil] The page title or nil if not found.
|
|
63
|
+
def title
|
|
64
|
+
document.title
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Returns all links (<a> elements) on the page.
|
|
68
|
+
# @return [Array<Reaxar::Element::A>] The array of link elements.
|
|
69
|
+
def links
|
|
70
|
+
@links ||= document.css('a[href]').map do |link|
|
|
71
|
+
Reaxar::Element::A.new(link, self)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Returns the HTML content of the page.
|
|
76
|
+
# @return [String] The HTML content.
|
|
77
|
+
def html
|
|
78
|
+
document.to_html
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Finds a form on the page.
|
|
82
|
+
# @param selector [String] CSS selector for the form (default: 'form').
|
|
83
|
+
# @return [Object, nil] The form element or nil if not found.
|
|
84
|
+
def form(selector = 'form')
|
|
85
|
+
# Реализация работы с формами (можно расширить)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Submits a form on the page.
|
|
89
|
+
# @param selector [String] CSS selector for the form.
|
|
90
|
+
# @param data [Hash] Data to submit with the form.
|
|
91
|
+
# @return [Object] The result of the form submission.
|
|
92
|
+
def submit_form(selector, data = {})
|
|
93
|
+
# Реализация отправки формы
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
data/lib/reaxar.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: reaxar
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Petr Ustyugov
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-08-05 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: async-http
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 0.89.0
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 0.89.0
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: nokogiri
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.18'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.18'
|
|
41
|
+
description: REST client with cookie support and page navigation
|
|
42
|
+
email:
|
|
43
|
+
- peter.ustyugov@gmail.com
|
|
44
|
+
executables: []
|
|
45
|
+
extensions: []
|
|
46
|
+
extra_rdoc_files: []
|
|
47
|
+
files:
|
|
48
|
+
- lib/reaxar.rb
|
|
49
|
+
- lib/reaxar/client.rb
|
|
50
|
+
- lib/reaxar/element/a.rb
|
|
51
|
+
- lib/reaxar/element/iframe.rb
|
|
52
|
+
- lib/reaxar/element/img.rb
|
|
53
|
+
- lib/reaxar/middleware/log.rb
|
|
54
|
+
- lib/reaxar/middleware/middleware_abstract.rb
|
|
55
|
+
- lib/reaxar/middleware/middleware_stack.rb
|
|
56
|
+
- lib/reaxar/middleware/redirect.rb
|
|
57
|
+
- lib/reaxar/page.rb
|
|
58
|
+
- lib/reaxar/version.rb
|
|
59
|
+
homepage: https://github.com/senap/reaxar
|
|
60
|
+
licenses:
|
|
61
|
+
- MIT
|
|
62
|
+
metadata: {}
|
|
63
|
+
post_install_message:
|
|
64
|
+
rdoc_options: []
|
|
65
|
+
require_paths:
|
|
66
|
+
- lib
|
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
68
|
+
requirements:
|
|
69
|
+
- - ">="
|
|
70
|
+
- !ruby/object:Gem::Version
|
|
71
|
+
version: '3.2'
|
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
|
+
requirements:
|
|
74
|
+
- - ">="
|
|
75
|
+
- !ruby/object:Gem::Version
|
|
76
|
+
version: '0'
|
|
77
|
+
requirements: []
|
|
78
|
+
rubygems_version: 3.4.19
|
|
79
|
+
signing_key:
|
|
80
|
+
specification_version: 4
|
|
81
|
+
summary: Simple HTTP browser with cookie persistence
|
|
82
|
+
test_files: []
|