aranha-parsers 0.2.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/aranha/parsers.rb +5 -5
- data/lib/aranha/parsers/base.rb +25 -4
- data/lib/aranha/parsers/html.rb +4 -4
- data/lib/aranha/parsers/html/item.rb +1 -0
- data/lib/aranha/parsers/html/item_list.rb +1 -1
- data/lib/aranha/parsers/html/node.rb +4 -2
- data/lib/aranha/parsers/html/node/default.rb +4 -1
- data/lib/aranha/parsers/patches.rb +11 -0
- data/lib/aranha/parsers/patches/ofx_parser.rb +38 -0
- data/lib/aranha/parsers/source_address.rb +6 -6
- data/lib/aranha/parsers/source_address/http_get.rb +1 -0
- data/lib/aranha/parsers/source_target_fixtures.rb +11 -4
- data/lib/aranha/parsers/spec/source_target_fixtures_example.rb +16 -11
- data/lib/aranha/parsers/version.rb +3 -1
- metadata +42 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b28eaf4a05a75966ce8828c5dcf3f86d885653e8b8abd815bba944371b9bcebc
|
4
|
+
data.tar.gz: '08f37d7c3a325af408368c338a386ca8f244a4ce36702e172f49b11e8b653256'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b7c286fea023c0f148c7a56931fa844555008edb1767ee3e8d3912969561257aba285cd67921282e4c3d9dd9155e27f6f4aa2fd61e5ed8030cd2b8c23e5f103
|
7
|
+
data.tar.gz: b616cb0faf36dfff89f615c123a4f4ddfaf45b42a0302473f92fbb893d282bf4cf958cb46b1047608d5cf58a70723485e00f2fd8601d2843e4a206e0dadbf143
|
data/lib/aranha/parsers.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/require_sub'
|
4
|
+
|
1
5
|
module Aranha
|
2
6
|
module Parsers
|
3
|
-
|
4
|
-
require 'aranha/parsers/html'
|
5
|
-
require 'aranha/parsers/invalid_state_exception'
|
6
|
-
require 'aranha/parsers/source_address'
|
7
|
-
require 'aranha/parsers/version'
|
7
|
+
::EacRubyUtils.require_sub __FILE__
|
8
8
|
end
|
9
9
|
end
|
data/lib/aranha/parsers/base.rb
CHANGED
@@ -3,10 +3,26 @@
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'fileutils'
|
5
5
|
require 'aranha/parsers/source_address'
|
6
|
+
require 'eac_ruby_utils/fs/temp'
|
6
7
|
|
7
8
|
module Aranha
|
8
9
|
module Parsers
|
9
10
|
class Base
|
11
|
+
class << self
|
12
|
+
def from_content(content)
|
13
|
+
::EacRubyUtils::Fs::Temp.on_file do |path|
|
14
|
+
path.write(content)
|
15
|
+
r = new(path.to_path)
|
16
|
+
r.content
|
17
|
+
r
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_content(content)
|
22
|
+
from_content(content).data
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
10
26
|
LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
|
11
27
|
|
12
28
|
attr_reader :source_address
|
@@ -19,9 +35,11 @@ module Aranha
|
|
19
35
|
delegate :url, to: :source_address
|
20
36
|
|
21
37
|
def content
|
22
|
-
|
23
|
-
|
24
|
-
|
38
|
+
@content ||= begin
|
39
|
+
s = source_address.content
|
40
|
+
log_content(s)
|
41
|
+
s
|
42
|
+
end
|
25
43
|
end
|
26
44
|
|
27
45
|
private
|
@@ -30,12 +48,14 @@ module Aranha
|
|
30
48
|
path = log_file(suffix)
|
31
49
|
|
32
50
|
return unless path
|
51
|
+
|
33
52
|
File.open(path, 'wb') { |file| file.write(content) }
|
34
53
|
end
|
35
54
|
|
36
55
|
def log_file(suffix)
|
37
56
|
dir = log_parsers_dir
|
38
57
|
return nil unless dir
|
58
|
+
|
39
59
|
f = ::File.join(dir, "#{self.class.name.parameterize}#{suffix}.log")
|
40
60
|
FileUtils.mkdir_p(File.dirname(f))
|
41
61
|
f
|
@@ -44,6 +64,7 @@ module Aranha
|
|
44
64
|
def log_parsers_dir
|
45
65
|
return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
|
46
66
|
return ::Rails.root.join('log', 'parsers') if rails_root_exist?
|
67
|
+
|
47
68
|
nil
|
48
69
|
end
|
49
70
|
|
@@ -51,7 +72,7 @@ module Aranha
|
|
51
72
|
::Rails.root
|
52
73
|
true
|
53
74
|
rescue NameError
|
54
|
-
|
75
|
+
false
|
55
76
|
end
|
56
77
|
end
|
57
78
|
end
|
data/lib/aranha/parsers/html.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/require_sub'
|
4
|
+
|
2
5
|
module Aranha
|
3
6
|
module Parsers
|
4
7
|
module Html
|
5
|
-
|
6
|
-
require 'aranha/parsers/html/item'
|
7
|
-
require 'aranha/parsers/html/item_list'
|
8
|
-
require 'aranha/parsers/html/node'
|
8
|
+
::EacRubyUtils.require_sub __FILE__
|
9
9
|
end
|
10
10
|
end
|
11
11
|
end
|
@@ -1,10 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/require_sub'
|
4
|
+
|
2
5
|
module Aranha
|
3
6
|
module Parsers
|
4
7
|
module Html
|
5
8
|
module Node
|
6
|
-
|
7
|
-
require 'aranha/parsers/html/node/default'
|
9
|
+
::EacRubyUtils.require_sub __FILE__
|
8
10
|
end
|
9
11
|
end
|
10
12
|
end
|
@@ -19,11 +19,13 @@ module Aranha
|
|
19
19
|
root = node.at_xpath(xpath)
|
20
20
|
if root.blank?
|
21
21
|
return nil unless required
|
22
|
+
|
22
23
|
raise "No node found (Xpath: #{xpath})"
|
23
24
|
end
|
24
25
|
result = string_recursive(root)
|
25
|
-
return result
|
26
|
+
return result if result.present?
|
26
27
|
return nil unless required
|
28
|
+
|
27
29
|
raise "String blank (Xpath: #{xpath})"
|
28
30
|
end
|
29
31
|
|
@@ -109,6 +111,7 @@ module Aranha
|
|
109
111
|
|
110
112
|
def string_recursive(node)
|
111
113
|
return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text)
|
114
|
+
|
112
115
|
s = ''
|
113
116
|
node.children.each do |child|
|
114
117
|
child_s = string_recursive(child)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ofx-parser'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Parsers
|
7
|
+
module Patches
|
8
|
+
module OfxParser
|
9
|
+
module OfxParser
|
10
|
+
def self.included(base)
|
11
|
+
base.class_eval do
|
12
|
+
class << self
|
13
|
+
prepend ClassMethods
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module ClassMethods
|
19
|
+
def build_transaction(transaction)
|
20
|
+
r = super
|
21
|
+
r.currate = (transaction / 'CURRENCY/CURRATE').inner_text
|
22
|
+
r
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
module Transaction
|
28
|
+
attr_accessor :currate, :cursym
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'eac_ruby_utils/patch'
|
36
|
+
|
37
|
+
::EacRubyUtils.patch(::OfxParser::OfxParser, ::Aranha::Parsers::Patches::OfxParser::OfxParser)
|
38
|
+
::EacRubyUtils.patch(::OfxParser::Transaction, ::Aranha::Parsers::Patches::OfxParser::Transaction)
|
@@ -1,15 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'yaml'
|
4
3
|
require 'active_support/core_ext/module/delegation'
|
5
|
-
require '
|
6
|
-
require '
|
7
|
-
require 'aranha/parsers/source_address/http_get'
|
8
|
-
require 'aranha/parsers/source_address/file'
|
4
|
+
require 'eac_ruby_utils/require_sub'
|
5
|
+
require 'yaml'
|
9
6
|
|
10
7
|
module Aranha
|
11
8
|
module Parsers
|
12
9
|
class SourceAddress
|
10
|
+
::EacRubyUtils.require_sub __FILE__
|
11
|
+
|
13
12
|
class << self
|
14
13
|
SUBS = [
|
15
14
|
::Aranha::Parsers::SourceAddress::HashHttpGet,
|
@@ -20,6 +19,7 @@ module Aranha
|
|
20
19
|
|
21
20
|
def detect_sub(source)
|
22
21
|
return source.sub if source.is_a?(self)
|
22
|
+
|
23
23
|
SUBS.each do |sub|
|
24
24
|
return sub.new(source) if sub.valid_source?(source)
|
25
25
|
end
|
@@ -27,7 +27,7 @@ module Aranha
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def deserialize(string)
|
30
|
-
new(string =~ %r{\A[a-z]+://} ? string.strip : ::YAML.load(string))
|
30
|
+
new(string =~ %r{\A[a-z]+://} ? string.strip : ::YAML.load(string)) # rubocop:disable Security/YAMLLoad
|
31
31
|
end
|
32
32
|
|
33
33
|
def from_file(path)
|
@@ -1,9 +1,7 @@
|
|
1
|
-
# encoding: UTF-8
|
2
1
|
# frozen_string_literal: true
|
3
2
|
|
4
3
|
require 'active_support/core_ext/object'
|
5
4
|
require 'active_support/core_ext/string'
|
6
|
-
require 'yaml'
|
7
5
|
|
8
6
|
module Aranha
|
9
7
|
module Parsers
|
@@ -37,6 +35,14 @@ module Aranha
|
|
37
35
|
r
|
38
36
|
end
|
39
37
|
|
38
|
+
def target_files
|
39
|
+
r = []
|
40
|
+
source_target_files.each do |st|
|
41
|
+
r << st.target if st.target
|
42
|
+
end
|
43
|
+
r
|
44
|
+
end
|
45
|
+
|
40
46
|
def target_file(basename)
|
41
47
|
fixture_file(basename, 'target')
|
42
48
|
end
|
@@ -50,7 +56,7 @@ module Aranha
|
|
50
56
|
def fixture_file(basename, suffix)
|
51
57
|
prefix = "#{basename}.#{suffix}"
|
52
58
|
Dir.foreach(fixtures_directory) do |item|
|
53
|
-
next if
|
59
|
+
next if %w[. ..].include?(item)
|
54
60
|
return File.expand_path(item, fixtures_directory) if item.starts_with?(prefix)
|
55
61
|
end
|
56
62
|
nil
|
@@ -59,7 +65,8 @@ module Aranha
|
|
59
65
|
def sources_targets_basenames
|
60
66
|
basenames = Set.new
|
61
67
|
Dir.foreach(fixtures_directory) do |item|
|
62
|
-
next if
|
68
|
+
next if %w[. ..].include?(item)
|
69
|
+
|
63
70
|
b = self.class.source_target_basename(item)
|
64
71
|
basenames << b if b.present?
|
65
72
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'aranha/parsers/source_target_fixtures'
|
4
|
+
require 'yaml'
|
4
5
|
|
5
6
|
RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable Metrics/BlockLength
|
6
7
|
let(:spec_file) { spec_file }
|
@@ -9,13 +10,13 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
9
10
|
expect(::File.directory?(fixtures_dir)).to be true
|
10
11
|
end
|
11
12
|
|
12
|
-
context '
|
13
|
-
it '
|
14
|
-
expect(source_target_fixtures.source_target_files.count).to be > 0
|
13
|
+
context 'with fixtures directory' do
|
14
|
+
it 'has at least one file' do
|
15
|
+
expect(source_target_fixtures.source_target_files.count).to be > 0 # rubocop:disable Style/NumericPredicate
|
15
16
|
end
|
16
17
|
|
17
18
|
if ENV['WRITE_TARGET_FIXTURES']
|
18
|
-
it '
|
19
|
+
it 'writes target data for all files' do
|
19
20
|
source_target_fixtures.source_files.each do |source_file|
|
20
21
|
sd = sort_results(source_data(source_file))
|
21
22
|
basename = ::Aranha::Parsers::SourceTargetFixtures.source_target_basename(source_file)
|
@@ -24,11 +25,11 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
24
25
|
end
|
25
26
|
end
|
26
27
|
else
|
27
|
-
it '
|
28
|
+
it 'parses data for all files' do
|
28
29
|
source_target_fixtures.source_target_files.each do |st|
|
29
30
|
assert_source_target_complete(st)
|
30
31
|
sd = source_data(st.source)
|
31
|
-
td =
|
32
|
+
td = target_data(st.target)
|
32
33
|
expect(sort_results(sd)).to eq(sort_results(td))
|
33
34
|
end
|
34
35
|
end
|
@@ -39,9 +40,9 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
39
40
|
@source_target_fixtures ||= ::Aranha::Parsers::SourceTargetFixtures.new(fixtures_dir)
|
40
41
|
end
|
41
42
|
|
42
|
-
def assert_source_target_complete(
|
43
|
-
expect(
|
44
|
-
expect(
|
43
|
+
def assert_source_target_complete(source_target)
|
44
|
+
expect(source_target.source).to(be_truthy, "Source not found (Target: #{source_target.target})")
|
45
|
+
expect(source_target.target).to(be_truthy, "Target not found (Source: #{source_target.source})")
|
45
46
|
end
|
46
47
|
|
47
48
|
def source_data(source_file)
|
@@ -55,7 +56,11 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
55
56
|
)
|
56
57
|
end
|
57
58
|
|
58
|
-
def sort_results(
|
59
|
-
|
59
|
+
def sort_results(results)
|
60
|
+
results
|
61
|
+
end
|
62
|
+
|
63
|
+
def target_data(target_file)
|
64
|
+
::YAML.load_file(target_file)
|
60
65
|
end
|
61
66
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha-parsers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Esquilo Azul Company
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-09-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -52,6 +52,26 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.9.10
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: eac_ruby_utils
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.33'
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 0.33.1
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - "~>"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0.33'
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 0.33.1
|
55
75
|
- !ruby/object:Gem::Dependency
|
56
76
|
name: httpclient
|
57
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,19 +93,33 @@ dependencies:
|
|
73
93
|
- !ruby/object:Gem::Version
|
74
94
|
version: 2.8.3
|
75
95
|
- !ruby/object:Gem::Dependency
|
76
|
-
name:
|
96
|
+
name: ofx-parser
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - "~>"
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.1.0
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - "~>"
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: 1.1.0
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: eac_ruby_gem_support
|
77
111
|
requirement: !ruby/object:Gem::Requirement
|
78
112
|
requirements:
|
79
113
|
- - "~>"
|
80
114
|
- !ruby/object:Gem::Version
|
81
|
-
version: '
|
115
|
+
version: '0.1'
|
82
116
|
type: :development
|
83
117
|
prerelease: false
|
84
118
|
version_requirements: !ruby/object:Gem::Requirement
|
85
119
|
requirements:
|
86
120
|
- - "~>"
|
87
121
|
- !ruby/object:Gem::Version
|
88
|
-
version: '
|
122
|
+
version: '0.1'
|
89
123
|
description:
|
90
124
|
email:
|
91
125
|
executables: []
|
@@ -103,6 +137,8 @@ files:
|
|
103
137
|
- lib/aranha/parsers/html/node/base.rb
|
104
138
|
- lib/aranha/parsers/html/node/default.rb
|
105
139
|
- lib/aranha/parsers/invalid_state_exception.rb
|
140
|
+
- lib/aranha/parsers/patches.rb
|
141
|
+
- lib/aranha/parsers/patches/ofx_parser.rb
|
106
142
|
- lib/aranha/parsers/source_address.rb
|
107
143
|
- lib/aranha/parsers/source_address/file.rb
|
108
144
|
- lib/aranha/parsers/source_address/hash_http_get.rb
|
@@ -129,8 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
165
|
- !ruby/object:Gem::Version
|
130
166
|
version: '0'
|
131
167
|
requirements: []
|
132
|
-
|
133
|
-
rubygems_version: 2.7.7
|
168
|
+
rubygems_version: 3.0.8
|
134
169
|
signing_key:
|
135
170
|
specification_version: 4
|
136
171
|
summary: Parsers' utilities for Ruby.
|