reaxar 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01e89fabba1ce9ad462932c803b4b2b1e6a53e7b3ed15ad341623ba680603a76
4
- data.tar.gz: 6d53ae34f6e18b0575a04233c0e9a859db3840b0a41f68615c3e26c22041e86e
3
+ metadata.gz: 77d79539eb20d915f08e88a6f913ec33e0941590735546d388e2d9cf10d6fd46
4
+ data.tar.gz: 2afcb3488c6c786b8ecd30ff1cc956ab70a6cae37be002794a03c7cade539541
5
5
  SHA512:
6
- metadata.gz: 74fd153a23aea5ece45bdd6b673e49e500033ea51e7c029e09c3c9970472d779cf83faadd2ccc5449c504c02b076eb13ea7ad544b1385c35682513c08210936a
7
- data.tar.gz: ed241bd8d808511990df8b62beb7ee664f857e5feb476fa11461c91252800fa2f5d545f71570ce65abca6826dfd4d50238c57931bed9f4fe43062192630d8cbb
6
+ metadata.gz: 4f38c65236f0a2c9e6a5162b7b903f909b4acf3db191a7a0c463bc70845b24e0fee79d60ba6056a580a04c32f1aac3a41077786227955299b837311168ed3bc2
7
+ data.tar.gz: 6349cb5b45f988829a41ee3b3bb718e1dd18143a025bba7080e55b356cd5c6da822901ceef9b916e6be21a944b590e0eae355bc6c711d0a2daee08ab43263f41
data/lib/reaxar/client.rb CHANGED
@@ -1,8 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'ostruct'
3
4
  require 'async/http/internet'
4
5
  require_relative 'middleware/middleware_stack'
5
6
  require_relative 'middleware/redirect'
7
+ require_relative 'middleware/cookies'
6
8
  require_relative 'middleware/log'
7
9
 
8
10
  module Reaxar
@@ -67,13 +69,11 @@ module Reaxar
67
69
  # @param body [Object, nil] The request body (for POST, etc.).
68
70
  # @return [Object] The processed HTTP response.
69
71
  def request(uri, method, body = nil) # rubocop:disable Metrics/MethodLength
70
- request_env = {
71
- uri:,
72
- method:,
73
- body:,
74
- headers: {},
75
- cookies: @cookies
76
- }
72
+ request_env = OpenStruct.new(uri: uri,
73
+ method: method,
74
+ body: body,
75
+ headers: {},
76
+ cookies:)
77
77
 
78
78
  loop do
79
79
  # Process request through middleware
@@ -111,8 +111,6 @@ module Reaxar
111
111
  # @return [Object] The HTTP response.
112
112
  def execute_http_request(uri, method, body, headers)
113
113
  url = URI(uri)
114
- headers = headers.merge(headers_with_cookies(url))
115
-
116
114
  case method
117
115
  when :get
118
116
  @internet.get(url, headers)
@@ -121,35 +119,5 @@ module Reaxar
121
119
  @internet.post(url, headers, URI.encode_www_form(body))
122
120
  end
123
121
  end
124
-
125
- # Builds headers including cookies for the given URL.
126
- # @param url [URI] The URI object.
127
- # @return [Hash] The headers including the 'Cookie' header if cookies are present.
128
- def headers_with_cookies(url)
129
- return {} if @cookies.empty?
130
-
131
- domain_cookies = @cookies.select { |_key, cookie| cookie[:domain] == url.host }
132
- cookie_string = domain_cookies.map { |key, cookie| "#{key}=#{cookie[:value]}" }.join('; ')
133
-
134
- { 'Cookie' => cookie_string }
135
- end
136
-
137
- # Updates the client's cookies from the response.
138
- # @param response [Object] The HTTP response object.
139
- # @return [void]
140
- def update_cookies(response)
141
- return unless response.headers['set-cookie']
142
-
143
- response.headers['set-cookie'].split("\n").each do |cookie|
144
- name, value = cookie.split('=', 2).map(&:strip)
145
- value = value.split(';').first
146
-
147
- @cookies[name] = {
148
- value:,
149
- domain: response.endpoint.host,
150
- path: '/'
151
- }
152
- end
153
- end
154
122
  end
155
123
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'http-cookie'
4
+
5
+ module Reaxar
6
+ module Middleware
7
+ # Middleware to manage HTTP cookies for requests and responses.
8
+ #
9
+ # This middleware loads cookies from a cookie jar file on initialization,
10
+ # attaches cookies to outgoing requests, updates the cookie jar with any
11
+ # `Set-Cookie` headers received in responses, and saves the updated jar back to file.
12
+ #
13
+ # @example Usage in HTTP client middleware stack
14
+ # client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
15
+ #
16
+ class Cookies < MiddlewareAbstract
17
+ # Initializes the Cookies middleware.
18
+ #
19
+ # Loads the cookie jar from the specified file if it exists.
20
+ #
21
+ # @param args [Hash] Arguments for configuration.
22
+ # @option args [String] :jar_file Path to the cookie jar file.
23
+ #
24
+ # @return [void]
25
+ def initialize(args) # rubocop:disable Lint/MissingSuper
26
+ @jar_file = args[:jar_file]
27
+ @jar = HTTP::CookieJar.new
28
+ @jar.load(@jar_file) if File.exist?(@jar_file)
29
+ end
30
+
31
+ # Processes the outgoing HTTP request to add cookies.
32
+ #
33
+ # Adds a `Cookie` header containing all relevant cookies from the jar
34
+ # matching the request URI.
35
+ #
36
+ # @param request [HTTP::Request] The HTTP request to process.
37
+ # @return [HTTP::Request] The modified request with cookies added.
38
+ def process_request(request)
39
+ request.headers['cookie'] = HTTP::Cookie.cookie_value(@jar.cookies(request.uri))
40
+ request
41
+ end
42
+
43
+ # Processes the incoming HTTP response to update the cookie jar.
44
+ #
45
+ # Parses all `Set-Cookie` headers from the response and adds them to the jar.
46
+ # Saves the updated cookie jar to the jar file.
47
+ #
48
+ # @param response [HTTP::Response] The HTTP response received.
49
+ # @param request [HTTP::Request] The original HTTP request sent.
50
+ # @return [HTTP::Response] The unmodified response.
51
+ def process_response(response, request)
52
+ response.headers['set-cookie']&.each do |value|
53
+ @jar.parse(value, request[:uri])
54
+ end
55
+ @jar.save(@jar_file)
56
+ response
57
+ end
58
+ end
59
+ end
60
+ end
data/lib/reaxar/page.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'nokogiri'
4
4
  require_relative 'element/a'
5
+ require_relative 'parser/factory_parser'
5
6
 
6
7
  module Reaxar
7
8
  # Represents a web page and provides methods for interacting with its content.
@@ -44,6 +45,13 @@ module Reaxar
44
45
  Async { new(url, client, &block) }
45
46
  end
46
47
 
48
+ # Closes the page's HTTP client.
49
+ #
50
+ # @return [void]
51
+ def close
52
+ @client.close
53
+ end
54
+
47
55
  # Initializes a new Page instance.
48
56
  # @param url [String] The URL of the page.
49
57
  # @param client [Client, nil] Optional HTTP client.
@@ -51,13 +59,35 @@ module Reaxar
51
59
  def initialize(url, client = nil)
52
60
  @url = url
53
61
  @client = client || Client.new(self.class.logger)
62
+ @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
54
63
  @client.use Reaxar::Middleware::Redirect
55
64
  @response = @client.get(url)
56
- @document = Nokogiri::HTML(@response.read)
65
+ @document = Reaxar::Parser::FactoryParser.call(content: @response.read,
66
+ mime_type: @response.headers['content-type'])
57
67
 
58
68
  yield self if block_given?
59
69
  end
60
70
 
71
+ # @return [Hash{String => String}, nil]
72
+ # The response headers as a hash with lowercase keys, or nil if no response is set.
73
+ # Headers are memoized after the first call.
74
+ def headers
75
+ @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase)
76
+ end
77
+
78
+ # @return [String, nil]
79
+ # The `Content-Type` header value from the response, or nil if not present.
80
+ def content_type
81
+ headers&.[]('content-type')
82
+ end
83
+
84
+ # @return [String, Integer]
85
+ # The `Content-Length` header value from the response.
86
+ # Falls back to the byte size of the HTML content if the header is missing.
87
+ def content_length
88
+ headers&.[]('content-length') || html.bytesize
89
+ end
90
+
61
91
  # Returns the title of the page.
62
92
  # @return [String, nil] The page title or nil if not found.
63
93
  def title
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Reaxar
4
+ module Parser
5
+ # Base class for all parsers in the Reaxar system.
6
+ #
7
+ # This class provides a common interface for parsing operations.
8
+ # Subclasses must implement the {#parse} method to define
9
+ # specific parsing behavior for the given content.
10
+ #
11
+ # @abstract
12
+ class BaseParser
13
+ # @param content [String] The raw content to be parsed.
14
+ def initialize(content)
15
+ @content = content
16
+ end
17
+
18
+ # Parses the provided content.
19
+ #
20
+ # @return [Object] The result of the parsing process, defined by the subclass.
21
+ #
22
+ # @raise [NotImplementedError] Raised if the method is not implemented by a subclass.
23
+ #
24
+ # @abstract
25
+ def parse
26
+ raise NotImplementedError,
27
+ "#{self.class} has not implemented method '#{__method__}'"
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'html_parser'
4
+ require_relative 'xml_parser'
5
+
6
+ module Reaxar
7
+ module Parser
8
+ # Factory class for selecting and invoking the correct parser based on MIME type.
9
+ #
10
+ # This class maintains a registry of MIME type to parser class mappings.
11
+ # Parsers must respond to `#parse` and accept the content in their initializer.
12
+ #
13
+ # @example Parsing HTML content
14
+ # Reaxar::Parser::FactoryParser.call(
15
+ # content: "<html><body>Hello</body></html>",
16
+ # mime_type: "text/html"
17
+ # )
18
+ #
19
+ # @example Registering a custom parser
20
+ # Reaxar::Parser::FactoryParser.register(
21
+ # "application/json",
22
+ # MyJsonParser
23
+ # )
24
+ #
25
+ class FactoryParser
26
+ class << self
27
+ # Selects the appropriate parser for the given MIME type and parses the content.
28
+ #
29
+ # @param content [String] The raw content to parse.
30
+ # @param mime_type [String, Symbol] The MIME type of the content.
31
+ #
32
+ # @return [Object] The result of the parsing process.
33
+ #
34
+ # @raise [NotImplementedError] If no parser is registered for the given MIME type.
35
+ def call(content:, mime_type:)
36
+ normalized_mime = normalize_mime_type(mime_type)
37
+ parser = registry[normalized_mime]
38
+ raise NotImplementedError, "No parser for MIME type: #{mime_type}" unless parser
39
+
40
+ parser.new(content).parse
41
+ end
42
+
43
+ # Normalizes a MIME type by removing parameters and trimming whitespace.
44
+ #
45
+ # @param mime_type [String, Symbol, nil] The MIME type to normalize.
46
+ # @return [String] The normalized MIME type.
47
+ def normalize_mime_type(mime_type)
48
+ mime_type.to_s.split(';').first.strip
49
+ end
50
+
51
+ # Registers a parser class for a given MIME type.
52
+ #
53
+ # @param mime_type [String] The MIME type to associate with the parser.
54
+ # @param klass [Class<#parse>] The parser class to use.
55
+ # @return [void]
56
+ def register(mime_type, klass)
57
+ registry[mime_type] = klass
58
+ end
59
+
60
+ # Returns the registry mapping MIME types to parser classes.
61
+ #
62
+ # @return [Hash{String => Class}] The MIME type → parser class mapping.
63
+ def registry
64
+ @registry ||= {
65
+ 'text/html' => Reaxar::Parser::HtmlParser,
66
+ 'text/xml' => Reaxar::Parser::XmlParser,
67
+ 'application/xml' => Reaxar::Parser::XmlParser
68
+ }
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_parser'
4
+
5
+ module Reaxar
6
+ module Parser
7
+ # Parses HTML content into a Nokogiri document.
8
+ #
9
+ # This parser uses {https://nokogiri.org/ Nokogiri} to parse the provided
10
+ # HTML string into a `Nokogiri::HTML::Document` object for further processing.
11
+ #
12
+ # @example Parsing HTML
13
+ # parser = Reaxar::Parser::HtmlParser.new("<html><body>Hello</body></html>")
14
+ # doc = parser.parse
15
+ # doc.at('body').text # => "Hello"
16
+ #
17
+ class HtmlParser < BaseParser
18
+ # Parses the HTML content into a Nokogiri HTML document.
19
+ #
20
+ # @return [Nokogiri::HTML::Document] The parsed HTML document.
21
+ def parse
22
+ Nokogiri::HTML(@content)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_parser'
4
+
5
+ module Reaxar
6
+ module Parser
7
+ # Parses XML content into a Nokogiri XML document.
8
+ #
9
+ # This parser uses {https://nokogiri.org/ Nokogiri} to parse the provided
10
+ # XML string into a `Nokogiri::XML::Document` object for further processing.
11
+ #
12
+ # @example Parsing XML
13
+ # parser = Reaxar::Parser::XmlParser.new("<root><message>Hello</message></root>")
14
+ # doc = parser.parse
15
+ # doc.at('message').text # => "Hello"
16
+ #
17
+ class XmlParser < BaseParser
18
+ # Parses the XML content into a Nokogiri XML document.
19
+ #
20
+ # @return [Nokogiri::XML::Document] The parsed XML document.
21
+ def parse
22
+ Nokogiri::XML(@content)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Reaxar
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.1'
5
5
  end
data/lib/reaxar.rb CHANGED
@@ -5,6 +5,11 @@ require_relative 'reaxar/page'
5
5
  require_relative 'reaxar/element/a'
6
6
  require_relative 'reaxar/middleware/redirect'
7
7
 
8
+ require_relative 'reaxar/middleware/cookies'
9
+ require_relative 'reaxar/parser/base_parser'
10
+ require_relative 'reaxar/parser/html_parser'
11
+ require_relative 'reaxar/parser/factory_parser'
12
+
8
13
  module Reaxar
9
14
  class Error < StandardError; end
10
15
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reaxar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Petr Ustyugov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-08-05 00:00:00.000000000 Z
11
+ date: 2025-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: async-http
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.18'
41
+ - !ruby/object:Gem::Dependency
42
+ name: http-cookie
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.8
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.8
41
55
  description: |
42
56
  Reaxar is a lightweight asynchronous HTTP/REST client for Ruby, built on top of async-http.
43
57
  It features automatic cookie management, support for redirects and relative navigation,
@@ -55,11 +69,16 @@ files:
55
69
  - lib/reaxar/element/a.rb
56
70
  - lib/reaxar/element/iframe.rb
57
71
  - lib/reaxar/element/img.rb
72
+ - lib/reaxar/middleware/cookies.rb
58
73
  - lib/reaxar/middleware/log.rb
59
74
  - lib/reaxar/middleware/middleware_abstract.rb
60
75
  - lib/reaxar/middleware/middleware_stack.rb
61
76
  - lib/reaxar/middleware/redirect.rb
62
77
  - lib/reaxar/page.rb
78
+ - lib/reaxar/parser/base_parser.rb
79
+ - lib/reaxar/parser/factory_parser.rb
80
+ - lib/reaxar/parser/html_parser.rb
81
+ - lib/reaxar/parser/xml_parser.rb
63
82
  - lib/reaxar/version.rb
64
83
  homepage: https://github.com/senap/reaxar
65
84
  licenses: