aranha-parsers 0.2.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aranha/parsers.rb +5 -5
- data/lib/aranha/parsers/base.rb +25 -4
- data/lib/aranha/parsers/html.rb +4 -4
- data/lib/aranha/parsers/html/item.rb +1 -0
- data/lib/aranha/parsers/html/item_list.rb +1 -1
- data/lib/aranha/parsers/html/node.rb +4 -2
- data/lib/aranha/parsers/html/node/default.rb +4 -1
- data/lib/aranha/parsers/patches.rb +11 -0
- data/lib/aranha/parsers/patches/ofx_parser.rb +38 -0
- data/lib/aranha/parsers/source_address.rb +6 -6
- data/lib/aranha/parsers/source_address/http_get.rb +1 -0
- data/lib/aranha/parsers/source_target_fixtures.rb +11 -4
- data/lib/aranha/parsers/spec/source_target_fixtures_example.rb +16 -11
- data/lib/aranha/parsers/version.rb +3 -1
- metadata +42 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b28eaf4a05a75966ce8828c5dcf3f86d885653e8b8abd815bba944371b9bcebc
|
4
|
+
data.tar.gz: '08f37d7c3a325af408368c338a386ca8f244a4ce36702e172f49b11e8b653256'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b7c286fea023c0f148c7a56931fa844555008edb1767ee3e8d3912969561257aba285cd67921282e4c3d9dd9155e27f6f4aa2fd61e5ed8030cd2b8c23e5f103
|
7
|
+
data.tar.gz: b616cb0faf36dfff89f615c123a4f4ddfaf45b42a0302473f92fbb893d282bf4cf958cb46b1047608d5cf58a70723485e00f2fd8601d2843e4a206e0dadbf143
|
data/lib/aranha/parsers.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/require_sub'
|
4
|
+
|
1
5
|
module Aranha
|
2
6
|
module Parsers
|
3
|
-
|
4
|
-
require 'aranha/parsers/html'
|
5
|
-
require 'aranha/parsers/invalid_state_exception'
|
6
|
-
require 'aranha/parsers/source_address'
|
7
|
-
require 'aranha/parsers/version'
|
7
|
+
::EacRubyUtils.require_sub __FILE__
|
8
8
|
end
|
9
9
|
end
|
data/lib/aranha/parsers/base.rb
CHANGED
@@ -3,10 +3,26 @@
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'fileutils'
|
5
5
|
require 'aranha/parsers/source_address'
|
6
|
+
require 'eac_ruby_utils/fs/temp'
|
6
7
|
|
7
8
|
module Aranha
|
8
9
|
module Parsers
|
9
10
|
class Base
|
11
|
+
class << self
|
12
|
+
def from_content(content)
|
13
|
+
::EacRubyUtils::Fs::Temp.on_file do |path|
|
14
|
+
path.write(content)
|
15
|
+
r = new(path.to_path)
|
16
|
+
r.content
|
17
|
+
r
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_content(content)
|
22
|
+
from_content(content).data
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
10
26
|
LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
|
11
27
|
|
12
28
|
attr_reader :source_address
|
@@ -19,9 +35,11 @@ module Aranha
|
|
19
35
|
delegate :url, to: :source_address
|
20
36
|
|
21
37
|
def content
|
22
|
-
|
23
|
-
|
24
|
-
|
38
|
+
@content ||= begin
|
39
|
+
s = source_address.content
|
40
|
+
log_content(s)
|
41
|
+
s
|
42
|
+
end
|
25
43
|
end
|
26
44
|
|
27
45
|
private
|
@@ -30,12 +48,14 @@ module Aranha
|
|
30
48
|
path = log_file(suffix)
|
31
49
|
|
32
50
|
return unless path
|
51
|
+
|
33
52
|
File.open(path, 'wb') { |file| file.write(content) }
|
34
53
|
end
|
35
54
|
|
36
55
|
def log_file(suffix)
|
37
56
|
dir = log_parsers_dir
|
38
57
|
return nil unless dir
|
58
|
+
|
39
59
|
f = ::File.join(dir, "#{self.class.name.parameterize}#{suffix}.log")
|
40
60
|
FileUtils.mkdir_p(File.dirname(f))
|
41
61
|
f
|
@@ -44,6 +64,7 @@ module Aranha
|
|
44
64
|
def log_parsers_dir
|
45
65
|
return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
|
46
66
|
return ::Rails.root.join('log', 'parsers') if rails_root_exist?
|
67
|
+
|
47
68
|
nil
|
48
69
|
end
|
49
70
|
|
@@ -51,7 +72,7 @@ module Aranha
|
|
51
72
|
::Rails.root
|
52
73
|
true
|
53
74
|
rescue NameError
|
54
|
-
|
75
|
+
false
|
55
76
|
end
|
56
77
|
end
|
57
78
|
end
|
data/lib/aranha/parsers/html.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/require_sub'
|
4
|
+
|
2
5
|
module Aranha
|
3
6
|
module Parsers
|
4
7
|
module Html
|
5
|
-
|
6
|
-
require 'aranha/parsers/html/item'
|
7
|
-
require 'aranha/parsers/html/item_list'
|
8
|
-
require 'aranha/parsers/html/node'
|
8
|
+
::EacRubyUtils.require_sub __FILE__
|
9
9
|
end
|
10
10
|
end
|
11
11
|
end
|
@@ -1,10 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/require_sub'
|
4
|
+
|
2
5
|
module Aranha
|
3
6
|
module Parsers
|
4
7
|
module Html
|
5
8
|
module Node
|
6
|
-
|
7
|
-
require 'aranha/parsers/html/node/default'
|
9
|
+
::EacRubyUtils.require_sub __FILE__
|
8
10
|
end
|
9
11
|
end
|
10
12
|
end
|
@@ -19,11 +19,13 @@ module Aranha
|
|
19
19
|
root = node.at_xpath(xpath)
|
20
20
|
if root.blank?
|
21
21
|
return nil unless required
|
22
|
+
|
22
23
|
raise "No node found (Xpath: #{xpath})"
|
23
24
|
end
|
24
25
|
result = string_recursive(root)
|
25
|
-
return result
|
26
|
+
return result if result.present?
|
26
27
|
return nil unless required
|
28
|
+
|
27
29
|
raise "String blank (Xpath: #{xpath})"
|
28
30
|
end
|
29
31
|
|
@@ -109,6 +111,7 @@ module Aranha
|
|
109
111
|
|
110
112
|
def string_recursive(node)
|
111
113
|
return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text)
|
114
|
+
|
112
115
|
s = ''
|
113
116
|
node.children.each do |child|
|
114
117
|
child_s = string_recursive(child)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ofx-parser'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Parsers
|
7
|
+
module Patches
|
8
|
+
module OfxParser
|
9
|
+
module OfxParser
|
10
|
+
def self.included(base)
|
11
|
+
base.class_eval do
|
12
|
+
class << self
|
13
|
+
prepend ClassMethods
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module ClassMethods
|
19
|
+
def build_transaction(transaction)
|
20
|
+
r = super
|
21
|
+
r.currate = (transaction / 'CURRENCY/CURRATE').inner_text
|
22
|
+
r
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
module Transaction
|
28
|
+
attr_accessor :currate, :cursym
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'eac_ruby_utils/patch'
|
36
|
+
|
37
|
+
::EacRubyUtils.patch(::OfxParser::OfxParser, ::Aranha::Parsers::Patches::OfxParser::OfxParser)
|
38
|
+
::EacRubyUtils.patch(::OfxParser::Transaction, ::Aranha::Parsers::Patches::OfxParser::Transaction)
|
@@ -1,15 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'yaml'
|
4
3
|
require 'active_support/core_ext/module/delegation'
|
5
|
-
require '
|
6
|
-
require '
|
7
|
-
require 'aranha/parsers/source_address/http_get'
|
8
|
-
require 'aranha/parsers/source_address/file'
|
4
|
+
require 'eac_ruby_utils/require_sub'
|
5
|
+
require 'yaml'
|
9
6
|
|
10
7
|
module Aranha
|
11
8
|
module Parsers
|
12
9
|
class SourceAddress
|
10
|
+
::EacRubyUtils.require_sub __FILE__
|
11
|
+
|
13
12
|
class << self
|
14
13
|
SUBS = [
|
15
14
|
::Aranha::Parsers::SourceAddress::HashHttpGet,
|
@@ -20,6 +19,7 @@ module Aranha
|
|
20
19
|
|
21
20
|
def detect_sub(source)
|
22
21
|
return source.sub if source.is_a?(self)
|
22
|
+
|
23
23
|
SUBS.each do |sub|
|
24
24
|
return sub.new(source) if sub.valid_source?(source)
|
25
25
|
end
|
@@ -27,7 +27,7 @@ module Aranha
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def deserialize(string)
|
30
|
-
new(string =~ %r{\A[a-z]+://} ? string.strip : ::YAML.load(string))
|
30
|
+
new(string =~ %r{\A[a-z]+://} ? string.strip : ::YAML.load(string)) # rubocop:disable Security/YAMLLoad
|
31
31
|
end
|
32
32
|
|
33
33
|
def from_file(path)
|
@@ -1,9 +1,7 @@
|
|
1
|
-
# encoding: UTF-8
|
2
1
|
# frozen_string_literal: true
|
3
2
|
|
4
3
|
require 'active_support/core_ext/object'
|
5
4
|
require 'active_support/core_ext/string'
|
6
|
-
require 'yaml'
|
7
5
|
|
8
6
|
module Aranha
|
9
7
|
module Parsers
|
@@ -37,6 +35,14 @@ module Aranha
|
|
37
35
|
r
|
38
36
|
end
|
39
37
|
|
38
|
+
def target_files
|
39
|
+
r = []
|
40
|
+
source_target_files.each do |st|
|
41
|
+
r << st.target if st.target
|
42
|
+
end
|
43
|
+
r
|
44
|
+
end
|
45
|
+
|
40
46
|
def target_file(basename)
|
41
47
|
fixture_file(basename, 'target')
|
42
48
|
end
|
@@ -50,7 +56,7 @@ module Aranha
|
|
50
56
|
def fixture_file(basename, suffix)
|
51
57
|
prefix = "#{basename}.#{suffix}"
|
52
58
|
Dir.foreach(fixtures_directory) do |item|
|
53
|
-
next if
|
59
|
+
next if %w[. ..].include?(item)
|
54
60
|
return File.expand_path(item, fixtures_directory) if item.starts_with?(prefix)
|
55
61
|
end
|
56
62
|
nil
|
@@ -59,7 +65,8 @@ module Aranha
|
|
59
65
|
def sources_targets_basenames
|
60
66
|
basenames = Set.new
|
61
67
|
Dir.foreach(fixtures_directory) do |item|
|
62
|
-
next if
|
68
|
+
next if %w[. ..].include?(item)
|
69
|
+
|
63
70
|
b = self.class.source_target_basename(item)
|
64
71
|
basenames << b if b.present?
|
65
72
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'aranha/parsers/source_target_fixtures'
|
4
|
+
require 'yaml'
|
4
5
|
|
5
6
|
RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable Metrics/BlockLength
|
6
7
|
let(:spec_file) { spec_file }
|
@@ -9,13 +10,13 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
9
10
|
expect(::File.directory?(fixtures_dir)).to be true
|
10
11
|
end
|
11
12
|
|
12
|
-
context '
|
13
|
-
it '
|
14
|
-
expect(source_target_fixtures.source_target_files.count).to be > 0
|
13
|
+
context 'with fixtures directory' do
|
14
|
+
it 'has at least one file' do
|
15
|
+
expect(source_target_fixtures.source_target_files.count).to be > 0 # rubocop:disable Style/NumericPredicate
|
15
16
|
end
|
16
17
|
|
17
18
|
if ENV['WRITE_TARGET_FIXTURES']
|
18
|
-
it '
|
19
|
+
it 'writes target data for all files' do
|
19
20
|
source_target_fixtures.source_files.each do |source_file|
|
20
21
|
sd = sort_results(source_data(source_file))
|
21
22
|
basename = ::Aranha::Parsers::SourceTargetFixtures.source_target_basename(source_file)
|
@@ -24,11 +25,11 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
24
25
|
end
|
25
26
|
end
|
26
27
|
else
|
27
|
-
it '
|
28
|
+
it 'parses data for all files' do
|
28
29
|
source_target_fixtures.source_target_files.each do |st|
|
29
30
|
assert_source_target_complete(st)
|
30
31
|
sd = source_data(st.source)
|
31
|
-
td =
|
32
|
+
td = target_data(st.target)
|
32
33
|
expect(sort_results(sd)).to eq(sort_results(td))
|
33
34
|
end
|
34
35
|
end
|
@@ -39,9 +40,9 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
39
40
|
@source_target_fixtures ||= ::Aranha::Parsers::SourceTargetFixtures.new(fixtures_dir)
|
40
41
|
end
|
41
42
|
|
42
|
-
def assert_source_target_complete(
|
43
|
-
expect(
|
44
|
-
expect(
|
43
|
+
def assert_source_target_complete(source_target)
|
44
|
+
expect(source_target.source).to(be_truthy, "Source not found (Target: #{source_target.target})")
|
45
|
+
expect(source_target.target).to(be_truthy, "Target not found (Source: #{source_target.source})")
|
45
46
|
end
|
46
47
|
|
47
48
|
def source_data(source_file)
|
@@ -55,7 +56,11 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
55
56
|
)
|
56
57
|
end
|
57
58
|
|
58
|
-
def sort_results(
|
59
|
-
|
59
|
+
def sort_results(results)
|
60
|
+
results
|
61
|
+
end
|
62
|
+
|
63
|
+
def target_data(target_file)
|
64
|
+
::YAML.load_file(target_file)
|
60
65
|
end
|
61
66
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha-parsers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Esquilo Azul Company
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -52,6 +52,26 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.9.10
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: eac_ruby_utils
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.33'
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 0.33.1
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - "~>"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0.33'
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 0.33.1
|
55
75
|
- !ruby/object:Gem::Dependency
|
56
76
|
name: httpclient
|
57
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,19 +93,33 @@ dependencies:
|
|
73
93
|
- !ruby/object:Gem::Version
|
74
94
|
version: 2.8.3
|
75
95
|
- !ruby/object:Gem::Dependency
|
76
|
-
name:
|
96
|
+
name: ofx-parser
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - "~>"
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.1.0
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - "~>"
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: 1.1.0
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: eac_ruby_gem_support
|
77
111
|
requirement: !ruby/object:Gem::Requirement
|
78
112
|
requirements:
|
79
113
|
- - "~>"
|
80
114
|
- !ruby/object:Gem::Version
|
81
|
-
version: '
|
115
|
+
version: '0.1'
|
82
116
|
type: :development
|
83
117
|
prerelease: false
|
84
118
|
version_requirements: !ruby/object:Gem::Requirement
|
85
119
|
requirements:
|
86
120
|
- - "~>"
|
87
121
|
- !ruby/object:Gem::Version
|
88
|
-
version: '
|
122
|
+
version: '0.1'
|
89
123
|
description:
|
90
124
|
email:
|
91
125
|
executables: []
|
@@ -103,6 +137,8 @@ files:
|
|
103
137
|
- lib/aranha/parsers/html/node/base.rb
|
104
138
|
- lib/aranha/parsers/html/node/default.rb
|
105
139
|
- lib/aranha/parsers/invalid_state_exception.rb
|
140
|
+
- lib/aranha/parsers/patches.rb
|
141
|
+
- lib/aranha/parsers/patches/ofx_parser.rb
|
106
142
|
- lib/aranha/parsers/source_address.rb
|
107
143
|
- lib/aranha/parsers/source_address/file.rb
|
108
144
|
- lib/aranha/parsers/source_address/hash_http_get.rb
|
@@ -129,8 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
165
|
- !ruby/object:Gem::Version
|
130
166
|
version: '0'
|
131
167
|
requirements: []
|
132
|
-
|
133
|
-
rubygems_version: 2.7.7
|
168
|
+
rubygems_version: 3.0.8
|
134
169
|
signing_key:
|
135
170
|
specification_version: 4
|
136
171
|
summary: Parsers' utilities for Ruby.
|