reaxar 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/reaxar/client.rb +7 -39
- data/lib/reaxar/middleware/cookies.rb +60 -0
- data/lib/reaxar/page.rb +31 -1
- data/lib/reaxar/parser/base_parser.rb +31 -0
- data/lib/reaxar/parser/factory_parser.rb +73 -0
- data/lib/reaxar/parser/html_parser.rb +26 -0
- data/lib/reaxar/parser/xml_parser.rb +26 -0
- data/lib/reaxar/version.rb +1 -1
- data/lib/reaxar.rb +5 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77d79539eb20d915f08e88a6f913ec33e0941590735546d388e2d9cf10d6fd46
|
4
|
+
data.tar.gz: 2afcb3488c6c786b8ecd30ff1cc956ab70a6cae37be002794a03c7cade539541
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f38c65236f0a2c9e6a5162b7b903f909b4acf3db191a7a0c463bc70845b24e0fee79d60ba6056a580a04c32f1aac3a41077786227955299b837311168ed3bc2
|
7
|
+
data.tar.gz: 6349cb5b45f988829a41ee3b3bb718e1dd18143a025bba7080e55b356cd5c6da822901ceef9b916e6be21a944b590e0eae355bc6c711d0a2daee08ab43263f41
|
data/lib/reaxar/client.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'ostruct'
|
3
4
|
require 'async/http/internet'
|
4
5
|
require_relative 'middleware/middleware_stack'
|
5
6
|
require_relative 'middleware/redirect'
|
7
|
+
require_relative 'middleware/cookies'
|
6
8
|
require_relative 'middleware/log'
|
7
9
|
|
8
10
|
module Reaxar
|
@@ -67,13 +69,11 @@ module Reaxar
|
|
67
69
|
# @param body [Object, nil] The request body (for POST, etc.).
|
68
70
|
# @return [Object] The processed HTTP response.
|
69
71
|
def request(uri, method, body = nil) # rubocop:disable Metrics/MethodLength
|
70
|
-
request_env =
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
cookies: @cookies
|
76
|
-
}
|
72
|
+
request_env = OpenStruct.new(uri: uri,
|
73
|
+
method: method,
|
74
|
+
body: body,
|
75
|
+
headers: {},
|
76
|
+
cookies:)
|
77
77
|
|
78
78
|
loop do
|
79
79
|
# Process request through middleware
|
@@ -111,8 +111,6 @@ module Reaxar
|
|
111
111
|
# @return [Object] The HTTP response.
|
112
112
|
def execute_http_request(uri, method, body, headers)
|
113
113
|
url = URI(uri)
|
114
|
-
headers = headers.merge(headers_with_cookies(url))
|
115
|
-
|
116
114
|
case method
|
117
115
|
when :get
|
118
116
|
@internet.get(url, headers)
|
@@ -121,35 +119,5 @@ module Reaxar
|
|
121
119
|
@internet.post(url, headers, URI.encode_www_form(body))
|
122
120
|
end
|
123
121
|
end
|
124
|
-
|
125
|
-
# Builds headers including cookies for the given URL.
|
126
|
-
# @param url [URI] The URI object.
|
127
|
-
# @return [Hash] The headers including the 'Cookie' header if cookies are present.
|
128
|
-
def headers_with_cookies(url)
|
129
|
-
return {} if @cookies.empty?
|
130
|
-
|
131
|
-
domain_cookies = @cookies.select { |_key, cookie| cookie[:domain] == url.host }
|
132
|
-
cookie_string = domain_cookies.map { |key, cookie| "#{key}=#{cookie[:value]}" }.join('; ')
|
133
|
-
|
134
|
-
{ 'Cookie' => cookie_string }
|
135
|
-
end
|
136
|
-
|
137
|
-
# Updates the client's cookies from the response.
|
138
|
-
# @param response [Object] The HTTP response object.
|
139
|
-
# @return [void]
|
140
|
-
def update_cookies(response)
|
141
|
-
return unless response.headers['set-cookie']
|
142
|
-
|
143
|
-
response.headers['set-cookie'].split("\n").each do |cookie|
|
144
|
-
name, value = cookie.split('=', 2).map(&:strip)
|
145
|
-
value = value.split(';').first
|
146
|
-
|
147
|
-
@cookies[name] = {
|
148
|
-
value:,
|
149
|
-
domain: response.endpoint.host,
|
150
|
-
path: '/'
|
151
|
-
}
|
152
|
-
end
|
153
|
-
end
|
154
122
|
end
|
155
123
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'http-cookie'
|
4
|
+
|
5
|
+
module Reaxar
|
6
|
+
module Middleware
|
7
|
+
# Middleware to manage HTTP cookies for requests and responses.
|
8
|
+
#
|
9
|
+
# This middleware loads cookies from a cookie jar file on initialization,
|
10
|
+
# attaches cookies to outgoing requests, updates the cookie jar with any
|
11
|
+
# `Set-Cookie` headers received in responses, and saves the updated jar back to file.
|
12
|
+
#
|
13
|
+
# @example Usage in HTTP client middleware stack
|
14
|
+
# client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
|
15
|
+
#
|
16
|
+
class Cookies < MiddlewareAbstract
|
17
|
+
# Initializes the Cookies middleware.
|
18
|
+
#
|
19
|
+
# Loads the cookie jar from the specified file if it exists.
|
20
|
+
#
|
21
|
+
# @param args [Hash] Arguments for configuration.
|
22
|
+
# @option args [String] :jar_file Path to the cookie jar file.
|
23
|
+
#
|
24
|
+
# @return [void]
|
25
|
+
def initialize(args) # rubocop:disable Lint/MissingSuper
|
26
|
+
@jar_file = args[:jar_file]
|
27
|
+
@jar = HTTP::CookieJar.new
|
28
|
+
@jar.load(@jar_file) if File.exist?(@jar_file)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Processes the outgoing HTTP request to add cookies.
|
32
|
+
#
|
33
|
+
# Adds a `Cookie` header containing all relevant cookies from the jar
|
34
|
+
# matching the request URI.
|
35
|
+
#
|
36
|
+
# @param request [HTTP::Request] The HTTP request to process.
|
37
|
+
# @return [HTTP::Request] The modified request with cookies added.
|
38
|
+
def process_request(request)
|
39
|
+
request.headers['cookie'] = HTTP::Cookie.cookie_value(@jar.cookies(request.uri))
|
40
|
+
request
|
41
|
+
end
|
42
|
+
|
43
|
+
# Processes the incoming HTTP response to update the cookie jar.
|
44
|
+
#
|
45
|
+
# Parses all `Set-Cookie` headers from the response and adds them to the jar.
|
46
|
+
# Saves the updated cookie jar to the jar file.
|
47
|
+
#
|
48
|
+
# @param response [HTTP::Response] The HTTP response received.
|
49
|
+
# @param request [HTTP::Request] The original HTTP request sent.
|
50
|
+
# @return [HTTP::Response] The unmodified response.
|
51
|
+
def process_response(response, request)
|
52
|
+
response.headers['set-cookie']&.each do |value|
|
53
|
+
@jar.parse(value, request[:uri])
|
54
|
+
end
|
55
|
+
@jar.save(@jar_file)
|
56
|
+
response
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/reaxar/page.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'nokogiri'
|
4
4
|
require_relative 'element/a'
|
5
|
+
require_relative 'parser/factory_parser'
|
5
6
|
|
6
7
|
module Reaxar
|
7
8
|
# Represents a web page and provides methods for interacting with its content.
|
@@ -44,6 +45,13 @@ module Reaxar
|
|
44
45
|
Async { new(url, client, &block) }
|
45
46
|
end
|
46
47
|
|
48
|
+
# Closes the page's HTTP client.
|
49
|
+
#
|
50
|
+
# @return [void]
|
51
|
+
def close
|
52
|
+
@client.close
|
53
|
+
end
|
54
|
+
|
47
55
|
# Initializes a new Page instance.
|
48
56
|
# @param url [String] The URL of the page.
|
49
57
|
# @param client [Client, nil] Optional HTTP client.
|
@@ -51,13 +59,35 @@ module Reaxar
|
|
51
59
|
def initialize(url, client = nil)
|
52
60
|
@url = url
|
53
61
|
@client = client || Client.new(self.class.logger)
|
62
|
+
@client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
|
54
63
|
@client.use Reaxar::Middleware::Redirect
|
55
64
|
@response = @client.get(url)
|
56
|
-
@document =
|
65
|
+
@document = Reaxar::Parser::FactoryParser.call(content: @response.read,
|
66
|
+
mime_type: @response.headers['content-type'])
|
57
67
|
|
58
68
|
yield self if block_given?
|
59
69
|
end
|
60
70
|
|
71
|
+
# @return [Hash{String => String}, nil]
|
72
|
+
# The response headers as a hash with lowercase keys, or nil if no response is set.
|
73
|
+
# Headers are memoized after the first call.
|
74
|
+
def headers
|
75
|
+
@headers ||= @response&.headers&.to_h&.transform_keys(&:downcase)
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [String, nil]
|
79
|
+
# The `Content-Type` header value from the response, or nil if not present.
|
80
|
+
def content_type
|
81
|
+
headers&.[]('content-type')
|
82
|
+
end
|
83
|
+
|
84
|
+
# @return [String, Integer]
|
85
|
+
# The `Content-Length` header value from the response.
|
86
|
+
# Falls back to the byte size of the HTML content if the header is missing.
|
87
|
+
def content_length
|
88
|
+
headers&.[]('content-length') || html.bytesize
|
89
|
+
end
|
90
|
+
|
61
91
|
# Returns the title of the page.
|
62
92
|
# @return [String, nil] The page title or nil if not found.
|
63
93
|
def title
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Reaxar
|
4
|
+
module Parser
|
5
|
+
# Base class for all parsers in the Reaxar system.
|
6
|
+
#
|
7
|
+
# This class provides a common interface for parsing operations.
|
8
|
+
# Subclasses must implement the {#parse} method to define
|
9
|
+
# specific parsing behavior for the given content.
|
10
|
+
#
|
11
|
+
# @abstract
|
12
|
+
class BaseParser
|
13
|
+
# @param content [String] The raw content to be parsed.
|
14
|
+
def initialize(content)
|
15
|
+
@content = content
|
16
|
+
end
|
17
|
+
|
18
|
+
# Parses the provided content.
|
19
|
+
#
|
20
|
+
# @return [Object] The result of the parsing process, defined by the subclass.
|
21
|
+
#
|
22
|
+
# @raise [NotImplementedError] Raised if the method is not implemented by a subclass.
|
23
|
+
#
|
24
|
+
# @abstract
|
25
|
+
def parse
|
26
|
+
raise NotImplementedError,
|
27
|
+
"#{self.class} has not implemented method '#{__method__}'"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'html_parser'
|
4
|
+
require_relative 'xml_parser'
|
5
|
+
|
6
|
+
module Reaxar
|
7
|
+
module Parser
|
8
|
+
# Factory class for selecting and invoking the correct parser based on MIME type.
|
9
|
+
#
|
10
|
+
# This class maintains a registry of MIME type to parser class mappings.
|
11
|
+
# Parsers must respond to `#parse` and accept the content in their initializer.
|
12
|
+
#
|
13
|
+
# @example Parsing HTML content
|
14
|
+
# Reaxar::Parser::FactoryParser.call(
|
15
|
+
# content: "<html><body>Hello</body></html>",
|
16
|
+
# mime_type: "text/html"
|
17
|
+
# )
|
18
|
+
#
|
19
|
+
# @example Registering a custom parser
|
20
|
+
# Reaxar::Parser::FactoryParser.register(
|
21
|
+
# "application/json",
|
22
|
+
# MyJsonParser
|
23
|
+
# )
|
24
|
+
#
|
25
|
+
class FactoryParser
|
26
|
+
class << self
|
27
|
+
# Selects the appropriate parser for the given MIME type and parses the content.
|
28
|
+
#
|
29
|
+
# @param content [String] The raw content to parse.
|
30
|
+
# @param mime_type [String, Symbol] The MIME type of the content.
|
31
|
+
#
|
32
|
+
# @return [Object] The result of the parsing process.
|
33
|
+
#
|
34
|
+
# @raise [NotImplementedError] If no parser is registered for the given MIME type.
|
35
|
+
def call(content:, mime_type:)
|
36
|
+
normalized_mime = normalize_mime_type(mime_type)
|
37
|
+
parser = registry[normalized_mime]
|
38
|
+
raise NotImplementedError, "No parser for MIME type: #{mime_type}" unless parser
|
39
|
+
|
40
|
+
parser.new(content).parse
|
41
|
+
end
|
42
|
+
|
43
|
+
# Normalizes a MIME type by removing parameters and trimming whitespace.
|
44
|
+
#
|
45
|
+
# @param mime_type [String, Symbol, nil] The MIME type to normalize.
|
46
|
+
# @return [String] The normalized MIME type.
|
47
|
+
def normalize_mime_type(mime_type)
|
48
|
+
mime_type.to_s.split(';').first.strip
|
49
|
+
end
|
50
|
+
|
51
|
+
# Registers a parser class for a given MIME type.
|
52
|
+
#
|
53
|
+
# @param mime_type [String] The MIME type to associate with the parser.
|
54
|
+
# @param klass [Class<#parse>] The parser class to use.
|
55
|
+
# @return [void]
|
56
|
+
def register(mime_type, klass)
|
57
|
+
registry[mime_type] = klass
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the registry mapping MIME types to parser classes.
|
61
|
+
#
|
62
|
+
# @return [Hash{String => Class}] The MIME type → parser class mapping.
|
63
|
+
def registry
|
64
|
+
@registry ||= {
|
65
|
+
'text/html' => Reaxar::Parser::HtmlParser,
|
66
|
+
'text/xml' => Reaxar::Parser::XmlParser,
|
67
|
+
'application/xml' => Reaxar::Parser::XmlParser
|
68
|
+
}
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_parser'
|
4
|
+
|
5
|
+
module Reaxar
|
6
|
+
module Parser
|
7
|
+
# Parses HTML content into a Nokogiri document.
|
8
|
+
#
|
9
|
+
# This parser uses {https://nokogiri.org/ Nokogiri} to parse the provided
|
10
|
+
# HTML string into a `Nokogiri::HTML::Document` object for further processing.
|
11
|
+
#
|
12
|
+
# @example Parsing HTML
|
13
|
+
# parser = Reaxar::Parser::HtmlParser.new("<html><body>Hello</body></html>")
|
14
|
+
# doc = parser.parse
|
15
|
+
# doc.at('body').text # => "Hello"
|
16
|
+
#
|
17
|
+
class HtmlParser < BaseParser
|
18
|
+
# Parses the HTML content into a Nokogiri HTML document.
|
19
|
+
#
|
20
|
+
# @return [Nokogiri::HTML::Document] The parsed HTML document.
|
21
|
+
def parse
|
22
|
+
Nokogiri::HTML(@content)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_parser'
|
4
|
+
|
5
|
+
module Reaxar
|
6
|
+
module Parser
|
7
|
+
# Parses XML content into a Nokogiri XML document.
|
8
|
+
#
|
9
|
+
# This parser uses {https://nokogiri.org/ Nokogiri} to parse the provided
|
10
|
+
# XML string into a `Nokogiri::XML::Document` object for further processing.
|
11
|
+
#
|
12
|
+
# @example Parsing XML
|
13
|
+
# parser = Reaxar::Parser::XmlParser.new("<root><message>Hello</message></root>")
|
14
|
+
# doc = parser.parse
|
15
|
+
# doc.at('message').text # => "Hello"
|
16
|
+
#
|
17
|
+
class XmlParser < BaseParser
|
18
|
+
# Parses the XML content into a Nokogiri XML document.
|
19
|
+
#
|
20
|
+
# @return [Nokogiri::XML::Document] The parsed XML document.
|
21
|
+
def parse
|
22
|
+
Nokogiri::XML(@content)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/reaxar/version.rb
CHANGED
data/lib/reaxar.rb
CHANGED
@@ -5,6 +5,11 @@ require_relative 'reaxar/page'
|
|
5
5
|
require_relative 'reaxar/element/a'
|
6
6
|
require_relative 'reaxar/middleware/redirect'
|
7
7
|
|
8
|
+
require_relative 'reaxar/middleware/cookies'
|
9
|
+
require_relative 'reaxar/parser/base_parser'
|
10
|
+
require_relative 'reaxar/parser/html_parser'
|
11
|
+
require_relative 'reaxar/parser/factory_parser'
|
12
|
+
|
8
13
|
module Reaxar
|
9
14
|
class Error < StandardError; end
|
10
15
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: reaxar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Petr Ustyugov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-08-
|
11
|
+
date: 2025-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: async-http
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.18'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: http-cookie
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.0.8
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.8
|
41
55
|
description: |
|
42
56
|
Reaxar is a lightweight asynchronous HTTP/REST client for Ruby, built on top of async-http.
|
43
57
|
It features automatic cookie management, support for redirects and relative navigation,
|
@@ -55,11 +69,16 @@ files:
|
|
55
69
|
- lib/reaxar/element/a.rb
|
56
70
|
- lib/reaxar/element/iframe.rb
|
57
71
|
- lib/reaxar/element/img.rb
|
72
|
+
- lib/reaxar/middleware/cookies.rb
|
58
73
|
- lib/reaxar/middleware/log.rb
|
59
74
|
- lib/reaxar/middleware/middleware_abstract.rb
|
60
75
|
- lib/reaxar/middleware/middleware_stack.rb
|
61
76
|
- lib/reaxar/middleware/redirect.rb
|
62
77
|
- lib/reaxar/page.rb
|
78
|
+
- lib/reaxar/parser/base_parser.rb
|
79
|
+
- lib/reaxar/parser/factory_parser.rb
|
80
|
+
- lib/reaxar/parser/html_parser.rb
|
81
|
+
- lib/reaxar/parser/xml_parser.rb
|
63
82
|
- lib/reaxar/version.rb
|
64
83
|
homepage: https://github.com/senap/reaxar
|
65
84
|
licenses:
|