aranha-parsers 0.23.1 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6a199311d583a58c2c57bd9440cf4427cacb84b2dea969c576f1be3e0fba1a33
4
- data.tar.gz: 6f8ec7116bdeca022910267fcc27ebd64295fd70f93f99ceb0d409859c64601e
3
+ metadata.gz: 0e9f2435af805f1aaac850b47ade4df067af4517e9a8d44e39cc0aadc12ef8fe
4
+ data.tar.gz: 6e1146e598ca952c5f15f0d0c83deaec7382b6487996b489727642ec3ba5a553
5
5
  SHA512:
6
- metadata.gz: 75994fd4861bf9dab93c2fa9196ebb022004c9dcdc2d88702385d62d0b55fb81b985df82b42d980decd410e5c8d0dd8664e0e3bfe9b83c27f056f560b61da247
7
- data.tar.gz: 5798eebdd9d63c3cd6717bbc33a58feec336e0d033df20181db42a49fb19b70e01b7264d8cdaa303bb1c649ed950d64793b612425ada4d65f29afeb1c84b7f64
6
+ metadata.gz: 72d12c157d957b8238d2c9f8d39a545d06aa974e9ac49e7103b64c69648e946ecd8c439b626f1196b789a8eb30c17bc3fdf0fb46956999ddacdbbf08d37f4de0
7
+ data.tar.gz: ee5fe7392f18a318a64ad35bd16341118c4254d978288c64a62ddbaa85e6ff7997018311317614e5c48f1c27f159d547c33151f8ae1c5f768b3a191e343a5faa
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Firefox
8
+ class RequestFromFirefox
9
+ BASE_URL_SUBPATH = 'base_url'
10
+ BODY_SUBPATH = 'body'
11
+ REQUEST_SUBPATH = 'request'
12
+
13
+ class << self
14
+ def from_directory(path)
15
+ path = path.to_pathname
16
+ body_path = path.join(BODY_SUBPATH)
17
+ new(
18
+ path.join(BASE_URL_SUBPATH).read.strip,
19
+ ::Aranha::Parsers::Firefox::RequestHeaderFromFirefox
20
+ .from_file(path.join(REQUEST_SUBPATH)),
21
+ body_path.file? ? body_path.read : nil
22
+ )
23
+ end
24
+ end
25
+
26
+ enable_simple_cache
27
+ common_constructor :the_base_uri, :header, :body, default: [nil] do
28
+ self.the_base_uri = the_base_uri.to_uri
29
+ end
30
+
31
+ def to_uri_source
32
+ {
33
+ method: header.verb,
34
+ url: url,
35
+ headers: header.headers,
36
+ body: body
37
+ }
38
+ end
39
+
40
+ def url
41
+ (the_base_uri + header.uri).to_s
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ module Parsers
5
+ module Firefox
6
+ class RequestHeaderFromFirefox
7
+ class << self
8
+ def from_file(path)
9
+ new(path.to_pathname.read)
10
+ end
11
+ end
12
+
13
+ FIRST_LINE_PARSER = /\A(\S+)\s(\S+)\s(\S+)\z/.to_parser do |m|
14
+ { verb: m[1], uri: m[2], version: m[3] }
15
+ end
16
+
17
+ HEADER_LINE_PARSER = /\A([^:]+):\s+(.+)\z/.to_parser do |m|
18
+ m[1..2]
19
+ end
20
+
21
+ enable_simple_cache
22
+
23
+ common_constructor :string
24
+
25
+ def to_h
26
+ %w[verb uri headers].index_with { |m| send(m) }
27
+ end
28
+
29
+ def headers
30
+ all_except_first_line.map { |line| HEADER_LINE_PARSER.parse!(line) }.to_h # rubocop:disable Style/MapToHash
31
+ end
32
+
33
+ def verb
34
+ parsed_first_line.fetch(:verb)
35
+ end
36
+
37
+ def uri
38
+ parsed_first_line.fetch(:uri)
39
+ end
40
+
41
+ private
42
+
43
+ def all_lines_uncached
44
+ string.each_line.map(&:strip)
45
+ end
46
+
47
+ def parsed_first_line_uncached
48
+ FIRST_LINE_PARSER.parse!(all_lines.first)
49
+ end
50
+
51
+ def all_except_first_line
52
+ all_lines[1..-1] # rubocop:disable Style/SlicingWithRange
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Firefox
8
+ class UriFromHar
9
+ class << self
10
+ def from_file(path)
11
+ new(::JSON.parse(path.to_pathname.read))
12
+ end
13
+ end
14
+
15
+ common_constructor :data
16
+
17
+ def result
18
+ data.fetch('log').fetch('entries').map { |e| e.fetch('request').fetch('url') }
19
+ end
20
+
21
+ def request_data; end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/require_sub'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Firefox
8
+ ::EacRubyUtils.require_sub __FILE__
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ class SourceAddress
8
+ class Base
9
+ acts_as_abstract
10
+ common_constructor :source
11
+ compare_by :source
12
+
13
+ # @return [String]
14
+ def content
15
+ raise_abstract_method __method__
16
+ end
17
+
18
+ # @return [Addressable::URI]
19
+ def uri
20
+ raise_abstract_method __method__
21
+ end
22
+
23
+ # @return [String]
24
+ def url
25
+ uri.to_s
26
+ end
27
+
28
+ # @return [Hash]
29
+ def source_as_hash
30
+ source_as_hash? ? source.with_indifferent_access : raise('source is not a Hash')
31
+ end
32
+
33
+ # @return [Boolean]
34
+ def source_as_hash?
35
+ source.is_a?(::Hash)
36
+ end
37
+
38
+ # @|return [Hash]
39
+ def source_as_uri
40
+ source_as_uri? ? source.to_uri : raise('source is not a URI')
41
+ end
42
+
43
+ # @return [Boolean]
44
+ def source_as_uri?
45
+ source.to_uri.scheme.present?
46
+ end
47
+
48
+ # @return [Boolean]
49
+ def valid?
50
+ raise_abstract_method __method__
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'aranha/parsers/source_address/http_get'
4
+ require 'eac_ruby_utils/core_ext'
4
5
 
5
6
  module Aranha
6
7
  module Parsers
@@ -8,23 +9,23 @@ module Aranha
8
9
  class File < ::Aranha::Parsers::SourceAddress::HttpGet
9
10
  SCHEME = 'file://'
10
11
 
11
- class << self
12
- def valid_source?(source)
13
- source.to_s.start_with?("#{SCHEME}/", '/')
14
- end
15
- end
16
-
17
12
  def initialize(source)
18
13
  super(source.to_s.gsub(/\A#{Regexp.quote(SCHEME)}/, ''))
19
14
  end
20
15
 
21
- def url
22
- "#{SCHEME}#{source}"
23
- end
24
-
25
16
  def content
26
17
  ::File.read(source)
27
18
  end
19
+
20
+ # @return [Addressable::URI]
21
+ def uri
22
+ source_as_uri? ? source_as_uri : "#{SCHEME}#{source}".to_uri
23
+ end
24
+
25
+ # @return [Boolean]
26
+ def valid?
27
+ source.to_s.start_with?("#{SCHEME}/", '/')
28
+ end
28
29
  end
29
30
  end
30
31
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'aranha/parsers/source_address/fetch_content_error'
4
- require 'aranha/parsers/source_address/hash_http_base'
4
+ require 'aranha/parsers/source_address/base'
5
5
  require 'eac_envs/http/error'
6
6
  require 'eac_envs/http/request'
7
7
  require 'eac_ruby_utils/core_ext'
@@ -10,16 +10,11 @@ require 'yaml'
10
10
  module Aranha
11
11
  module Parsers
12
12
  class SourceAddress
13
- class HashHttpBase
13
+ class HashHttpBase < ::Aranha::Parsers::SourceAddress::Base
14
14
  class << self
15
15
  def http_method
16
16
  const_get 'HTTP_METHOD'
17
17
  end
18
-
19
- def valid_source?(source)
20
- source.is_a?(::Hash) &&
21
- source.with_indifferent_access[:method].to_s.downcase.strip == http_method.to_s
22
- end
23
18
  end
24
19
 
25
20
  DEFAULT_BODY = ''
@@ -31,11 +26,6 @@ module Aranha
31
26
 
32
27
  enable_simple_cache
33
28
 
34
- common_constructor :source do
35
- self.source = source.with_indifferent_access
36
- end
37
- compare_by :source
38
-
39
29
  def body
40
30
  param(:body, DEFAULT_BODY)
41
31
  end
@@ -48,12 +38,8 @@ module Aranha
48
38
  param(:headers, DEFAULT_HEADERS)
49
39
  end
50
40
 
51
- def url
52
- source.fetch(:url)
53
- end
54
-
55
41
  def serialize
56
- source.to_yaml
42
+ source_as_hash.to_yaml
57
43
  end
58
44
 
59
45
  def content
@@ -64,11 +50,22 @@ module Aranha
64
50
  end
65
51
 
66
52
  def param(key, default_value)
67
- source[key] || params[key] || default_value
53
+ source_as_hash[key] || params[key] || default_value
68
54
  end
69
55
 
70
56
  def params
71
- source[:params].if_present(DEFAULT_PARAMS)
57
+ source_as_hash[:params].if_present(DEFAULT_PARAMS)
58
+ end
59
+
60
+ # @return [Addressable::URI]
61
+ def uri
62
+ ::Addressable::URI.parse(source_as_hash.fetch(:url))
63
+ end
64
+
65
+ # @return [Boolean]
66
+ def valid?
67
+ source_as_hash? &&
68
+ source_as_hash[:method].to_s.downcase.strip == self.class.http_method.to_s
72
69
  end
73
70
 
74
71
  private
@@ -1,38 +1,26 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'addressable'
4
- require 'aranha/parsers/source_address/fetch_content_error'
4
+ require 'aranha/parsers/source_address/base'
5
5
  require 'eac_envs/http/error'
6
6
  require 'eac_envs/http/request'
7
7
 
8
8
  module Aranha
9
9
  module Parsers
10
10
  class SourceAddress
11
- class HttpGet
11
+ class HttpGet < ::Aranha::Parsers::SourceAddress::Base
12
12
  class << self
13
13
  def location_uri(source_uri, location)
14
14
  ::Addressable::URI.join(source_uri, location).to_s
15
15
  end
16
-
17
- def valid_source?(source)
18
- source.to_s =~ %r{\Ahttps?://}
19
- end
20
16
  end
21
17
 
22
- attr_reader :source
23
-
24
- def initialize(source)
25
- @source = source.to_s
26
- end
18
+ common_constructor :source, super_args: -> { [source.to_s] }
27
19
 
28
20
  def ==(other)
29
21
  self.class == other.class && source == other.source
30
22
  end
31
23
 
32
- def url
33
- source
34
- end
35
-
36
24
  def final_url
37
25
  content unless @final_url
38
26
  @final_url
@@ -51,6 +39,16 @@ module Aranha
51
39
  def serialize
52
40
  url
53
41
  end
42
+
43
+ # @return [Addressable::URI]
44
+ def uri
45
+ source_as_uri
46
+ end
47
+
48
+ # @return [Boolean]
49
+ def valid?
50
+ source.to_s =~ %r{\Ahttps?://}
51
+ end
54
52
  end
55
53
  end
56
54
  end
@@ -20,8 +20,10 @@ module Aranha
20
20
  def detect_sub(source)
21
21
  return source.sub if source.is_a?(self)
22
22
 
23
- SUBS.each do |sub|
24
- return sub.new(source) if sub.valid_source?(source)
23
+ SUBS.each do |sub_class|
24
+ sub_class.new(source).then do |sub|
25
+ return sub if sub.valid?
26
+ end
25
27
  end
26
28
  raise "No content fetcher found for source \"#{source}\""
27
29
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.23.1'
5
+ VERSION = '0.24.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.1
4
+ version: 0.24.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-03 00:00:00.000000000 Z
11
+ date: 2024-10-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -51,6 +51,9 @@ dependencies:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
53
  version: '0.6'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 0.6.1
54
57
  type: :runtime
55
58
  prerelease: false
56
59
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,6 +61,9 @@ dependencies:
58
61
  - - "~>"
59
62
  - !ruby/object:Gem::Version
60
63
  version: '0.6'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 0.6.1
61
67
  - !ruby/object:Gem::Dependency
62
68
  name: eac_ruby_utils
63
69
  requirement: !ruby/object:Gem::Requirement
@@ -128,6 +134,10 @@ extra_rdoc_files: []
128
134
  files:
129
135
  - lib/aranha/parsers.rb
130
136
  - lib/aranha/parsers/base.rb
137
+ - lib/aranha/parsers/firefox.rb
138
+ - lib/aranha/parsers/firefox/request_from_firefox.rb
139
+ - lib/aranha/parsers/firefox/request_header_from_firefox.rb
140
+ - lib/aranha/parsers/firefox/uri_from_har.rb
131
141
  - lib/aranha/parsers/html.rb
132
142
  - lib/aranha/parsers/html/base.rb
133
143
  - lib/aranha/parsers/html/item.rb
@@ -145,6 +155,7 @@ files:
145
155
  - lib/aranha/parsers/patches.rb
146
156
  - lib/aranha/parsers/patches/ofx_parser.rb
147
157
  - lib/aranha/parsers/source_address.rb
158
+ - lib/aranha/parsers/source_address/base.rb
148
159
  - lib/aranha/parsers/source_address/fetch_content_error.rb
149
160
  - lib/aranha/parsers/source_address/file.rb
150
161
  - lib/aranha/parsers/source_address/hash_http_base.rb