aranha-parsers 0.10.0 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6ec27959167dfbae56e1fb00bfdbedad5bfe022a0708e493c623ca259df5c5d
4
- data.tar.gz: c87db56022bfdf05f3f5cc99f58fbba4f0e73a5e8ddb7683d2569aabbf7bf28a
3
+ metadata.gz: 46d8f5c357e5bedf69c56ee590df3efec25f5eaf814af3456442d9b3d6323e9d
4
+ data.tar.gz: 1c0f0f1642003a1bb3d742b61f6d1d796fa27bc3518ac72c0bac83cbe43104f8
5
5
  SHA512:
6
- metadata.gz: 9116a522005c0a01b650bc889ce1a4c70f7096b478febc2e1b24f6596a4d94c77e750d81e616bd70b6ca557a1c61e56d55fcfe2066ff4f62aea3a9a14237ed01
7
- data.tar.gz: 5080f643d1da251f05afb094a1c7ac9c5cbbe140a15f0d05b77ec87591e700b148ba4766615ee1853284d24674e2720149f25e6858b485e0123ebd3add74c4db
6
+ metadata.gz: 61aae78c8114ba83cc4b4d9c81731dd478b1f0df851abcd9a0068309d79488ac4e639cdea78a358aebc66034547211ec46761aaa366b0d48c2cfb4162fe8f26e
7
+ data.tar.gz: a8fab9c04756953d1fd680f636067f14d4eff15396c99de585da710362a5d88e42bdf83be84169524e821a67fb23fae2617249ed9cf51e54bcaa125927071b37
@@ -12,7 +12,7 @@ module Aranha
12
12
  def from_content(content)
13
13
  ::EacRubyUtils::Fs::Temp.on_file do |path|
14
14
  ::File.open(path.to_s, 'w:UTF-8') do |f|
15
- f.write content.force_encoding('UTF-8')
15
+ f.write content.dup.force_encoding('UTF-8')
16
16
  end
17
17
  r = new(path.to_path)
18
18
  r.content
@@ -37,21 +37,21 @@ module Aranha
37
37
  delegate :url, to: :source_address
38
38
 
39
39
  def content
40
- @content ||= begin
41
- s = source_address.content
42
- log_content(s)
43
- s
44
- end
40
+ @content ||= log_content(source_address_content)
45
41
  end
46
42
 
43
+ # @return [String]
44
+ delegate :content, to: :source_address, prefix: true
45
+
47
46
  private
48
47
 
48
+ # @return [String]
49
49
  def log_content(content, suffix = '')
50
50
  path = log_file(suffix)
51
51
 
52
- return unless path
52
+ File.open(path, 'wb') { |file| file.write(content) } if path
53
53
 
54
- File.open(path, 'wb') { |file| file.write(content) }
54
+ content
55
55
  end
56
56
 
57
57
  def log_file(suffix)
@@ -10,11 +10,15 @@ module Aranha
10
10
  items_data
11
11
  end
12
12
 
13
+ def item_data(item)
14
+ item
15
+ end
16
+
13
17
  def items_data
14
18
  count = 0
15
19
  @data ||= nokogiri.xpath(items_xpath).map do |m|
16
20
  count += 1
17
- node_parser.parse(m)
21
+ item_data(node_parser.parse(m))
18
22
  end
19
23
  rescue StandardError => e
20
24
  raise StandardError, "#{e.message} (Count: #{count})"
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/source_address/fetch_content_error'
4
+ require 'aranha/parsers/source_address/hash_http_base'
5
+ require 'eac_ruby_utils/core_ext'
6
+ require 'faraday_middleware'
7
+ require 'yaml'
8
+
9
+ module Aranha
10
+ module Parsers
11
+ class SourceAddress
12
+ class HashHttpBase
13
+ class << self
14
+ def http_method
15
+ const_get 'HTTP_METHOD'
16
+ end
17
+
18
+ def valid_source?(source)
19
+ source.is_a?(::Hash) &&
20
+ source.with_indifferent_access[:method].to_s.downcase.strip == http_method.to_s
21
+ end
22
+ end
23
+
24
+ DEFAULT_BODY = ''
25
+ DEFAULT_FOLLOW_REDIRECT = true
26
+ DEFAULT_HEADERS = {}.freeze
27
+ DEFAULT_PARAMS = {}.freeze
28
+
29
+ common_constructor :source do
30
+ self.source = source.with_indifferent_access
31
+ end
32
+ compare_by :source
33
+
34
+ def body
35
+ param(:body, DEFAULT_BODY)
36
+ end
37
+
38
+ def follow_redirect?
39
+ param(:follow_redirect, DEFAULT_FOLLOW_REDIRECT)
40
+ end
41
+
42
+ def headers
43
+ param(:headers, DEFAULT_HEADERS)
44
+ end
45
+
46
+ def url
47
+ source.fetch(:url)
48
+ end
49
+
50
+ def serialize
51
+ source.to_yaml
52
+ end
53
+
54
+ # @return [Faraday]
55
+ def faraday_connection
56
+ ::Faraday.new do |f|
57
+ f.response :follow_redirects if follow_redirect?
58
+ end
59
+ end
60
+
61
+ def faraday_request
62
+ faraday_connection.send(self.class.http_method, url) do |req|
63
+ headers.if_present { |v| req.headers = v }
64
+ body.if_present { |v| req.body = v }
65
+ end
66
+ end
67
+
68
+ def content
69
+ req = faraday_request
70
+ return req.body if req.status == 200
71
+
72
+ raise ::Aranha::Parsers::SourceAddress::FetchContentError,
73
+ "Get #{url} returned #{req.status.to_i}"
74
+ end
75
+
76
+ def param(key, default_value)
77
+ source[key] || params[key] || default_value
78
+ end
79
+
80
+ def params
81
+ source[:params].if_present(DEFAULT_PARAMS)
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -1,24 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'aranha/parsers/source_address/hash_http_post'
3
+ require 'aranha/parsers/source_address/hash_http_base'
4
4
 
5
5
  module Aranha
6
6
  module Parsers
7
7
  class SourceAddress
8
- class HashHttpGet < ::Aranha::Parsers::SourceAddress::HashHttpPost
9
- class << self
10
- def valid_source?(source)
11
- source.is_a?(::Hash) &&
12
- source.with_indifferent_access[:method].to_s.downcase.strip == 'get'
13
- end
14
- end
15
-
16
- def content
17
- HTTPClient.new.get_content(
18
- source[:url],
19
- source[:params]
20
- )
21
- end
8
+ class HashHttpGet < ::Aranha::Parsers::SourceAddress::HashHttpBase
9
+ HTTP_METHOD = :get
22
10
  end
23
11
  end
24
12
  end
@@ -1,44 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_support/core_ext/hash/indifferent_access'
4
- require 'httpclient'
5
- require 'yaml'
3
+ require 'aranha/parsers/source_address/hash_http_base'
6
4
 
7
5
  module Aranha
8
6
  module Parsers
9
7
  class SourceAddress
10
- class HashHttpPost
11
- class << self
12
- def valid_source?(source)
13
- source.is_a?(::Hash) &&
14
- source.with_indifferent_access[:method].to_s.downcase.strip == 'post'
15
- end
16
- end
17
-
18
- attr_reader :source
19
-
20
- def initialize(source)
21
- @source = source.with_indifferent_access
22
- end
23
-
24
- def ==(other)
25
- self.class == other.class && source == other.source
26
- end
27
-
28
- def url
29
- source.fetch(:url)
30
- end
31
-
32
- def serialize
33
- source.to_yaml
34
- end
35
-
36
- def content
37
- HTTPClient.new.post_content(
38
- source[:url],
39
- source[:params].merge(follow_redirect: true)
40
- )
41
- end
8
+ class HashHttpPost < ::Aranha::Parsers::SourceAddress::HashHttpBase
9
+ HTTP_METHOD = :post
42
10
  end
43
11
  end
44
12
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Aranha
4
4
  module Parsers
5
- VERSION = '0.10.0'
5
+ VERSION = '0.12.1'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aranha-parsers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esquilo Azul Company
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-27 00:00:00.000000000 Z
11
+ date: 2022-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -58,14 +58,20 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.74'
61
+ version: '0.92'
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 0.92.1
62
65
  type: :runtime
63
66
  prerelease: false
64
67
  version_requirements: !ruby/object:Gem::Requirement
65
68
  requirements:
66
69
  - - "~>"
67
70
  - !ruby/object:Gem::Version
68
- version: '0.74'
71
+ version: '0.92'
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.92.1
69
75
  - !ruby/object:Gem::Dependency
70
76
  name: faraday_middleware
71
77
  requirement: !ruby/object:Gem::Requirement
@@ -80,26 +86,6 @@ dependencies:
80
86
  - - ">="
81
87
  - !ruby/object:Gem::Version
82
88
  version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: httpclient
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - "~>"
88
- - !ruby/object:Gem::Version
89
- version: '2.8'
90
- - - ">="
91
- - !ruby/object:Gem::Version
92
- version: 2.8.3
93
- type: :runtime
94
- prerelease: false
95
- version_requirements: !ruby/object:Gem::Requirement
96
- requirements:
97
- - - "~>"
98
- - !ruby/object:Gem::Version
99
- version: '2.8'
100
- - - ">="
101
- - !ruby/object:Gem::Version
102
- version: 2.8.3
103
89
  - !ruby/object:Gem::Dependency
104
90
  name: nokogiri
105
91
  requirement: !ruby/object:Gem::Requirement
@@ -183,6 +169,7 @@ files:
183
169
  - lib/aranha/parsers/source_address.rb
184
170
  - lib/aranha/parsers/source_address/fetch_content_error.rb
185
171
  - lib/aranha/parsers/source_address/file.rb
172
+ - lib/aranha/parsers/source_address/hash_http_base.rb
186
173
  - lib/aranha/parsers/source_address/hash_http_get.rb
187
174
  - lib/aranha/parsers/source_address/hash_http_post.rb
188
175
  - lib/aranha/parsers/source_address/http_get.rb