aranha-parsers 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aranha/parsers.rb +2 -0
- data/lib/aranha/parsers/base.rb +4 -1
- data/lib/aranha/parsers/html.rb +1 -0
- data/lib/aranha/parsers/html/item.rb +1 -0
- data/lib/aranha/parsers/html/node.rb +1 -0
- data/lib/aranha/parsers/html/node/default.rb +4 -1
- data/lib/aranha/parsers/source_address.rb +2 -1
- data/lib/aranha/parsers/source_address/http_get.rb +1 -0
- data/lib/aranha/parsers/source_target_fixtures.rb +3 -3
- data/lib/aranha/parsers/spec/source_target_fixtures_example.rb +9 -9
- data/lib/aranha/parsers/version.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5dea73c22dac3845360b773ca5b07d3c122dbc5d108c1e24c9bd778187aedd3
|
4
|
+
data.tar.gz: 836cae17dea5908018ca41e0a73566bebc35c2df67709b0c8ea2ea3509eaaeb9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 199a2471fd78740fa54ef4f31446e8ff7f4ea74ea03d32328a5b9ee56ab99be31db87c217a544d070ce5d743cab3211847d570363d41acbe820169031a6b2715
|
7
|
+
data.tar.gz: b4a9d90a266fedd2a11d497565d0ac9a10295f8ce87835689d10ab4a71b5c0bae4e2d654afe0089a9dfe190861b0bc77d77f26bcac188b772b05dc491d18b660
|
data/lib/aranha/parsers.rb
CHANGED
data/lib/aranha/parsers/base.rb
CHANGED
@@ -30,12 +30,14 @@ module Aranha
|
|
30
30
|
path = log_file(suffix)
|
31
31
|
|
32
32
|
return unless path
|
33
|
+
|
33
34
|
File.open(path, 'wb') { |file| file.write(content) }
|
34
35
|
end
|
35
36
|
|
36
37
|
def log_file(suffix)
|
37
38
|
dir = log_parsers_dir
|
38
39
|
return nil unless dir
|
40
|
+
|
39
41
|
f = ::File.join(dir, "#{self.class.name.parameterize}#{suffix}.log")
|
40
42
|
FileUtils.mkdir_p(File.dirname(f))
|
41
43
|
f
|
@@ -44,6 +46,7 @@ module Aranha
|
|
44
46
|
def log_parsers_dir
|
45
47
|
return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
|
46
48
|
return ::Rails.root.join('log', 'parsers') if rails_root_exist?
|
49
|
+
|
47
50
|
nil
|
48
51
|
end
|
49
52
|
|
@@ -51,7 +54,7 @@ module Aranha
|
|
51
54
|
::Rails.root
|
52
55
|
true
|
53
56
|
rescue NameError
|
54
|
-
|
57
|
+
false
|
55
58
|
end
|
56
59
|
end
|
57
60
|
end
|
data/lib/aranha/parsers/html.rb
CHANGED
@@ -19,11 +19,13 @@ module Aranha
|
|
19
19
|
root = node.at_xpath(xpath)
|
20
20
|
if root.blank?
|
21
21
|
return nil unless required
|
22
|
+
|
22
23
|
raise "No node found (Xpath: #{xpath})"
|
23
24
|
end
|
24
25
|
result = string_recursive(root)
|
25
|
-
return result
|
26
|
+
return result if result.present?
|
26
27
|
return nil unless required
|
28
|
+
|
27
29
|
raise "String blank (Xpath: #{xpath})"
|
28
30
|
end
|
29
31
|
|
@@ -109,6 +111,7 @@ module Aranha
|
|
109
111
|
|
110
112
|
def string_recursive(node)
|
111
113
|
return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text)
|
114
|
+
|
112
115
|
s = ''
|
113
116
|
node.children.each do |child|
|
114
117
|
child_s = string_recursive(child)
|
@@ -20,6 +20,7 @@ module Aranha
|
|
20
20
|
|
21
21
|
def detect_sub(source)
|
22
22
|
return source.sub if source.is_a?(self)
|
23
|
+
|
23
24
|
SUBS.each do |sub|
|
24
25
|
return sub.new(source) if sub.valid_source?(source)
|
25
26
|
end
|
@@ -27,7 +28,7 @@ module Aranha
|
|
27
28
|
end
|
28
29
|
|
29
30
|
def deserialize(string)
|
30
|
-
new(string =~ %r{\A[a-z]+://} ? string.strip : ::YAML.load(string))
|
31
|
+
new(string =~ %r{\A[a-z]+://} ? string.strip : ::YAML.load(string)) # rubocop:disable Security/YAMLLoad
|
31
32
|
end
|
32
33
|
|
33
34
|
def from_file(path)
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# encoding: UTF-8
|
2
1
|
# frozen_string_literal: true
|
3
2
|
|
4
3
|
require 'active_support/core_ext/object'
|
@@ -50,7 +49,7 @@ module Aranha
|
|
50
49
|
def fixture_file(basename, suffix)
|
51
50
|
prefix = "#{basename}.#{suffix}"
|
52
51
|
Dir.foreach(fixtures_directory) do |item|
|
53
|
-
next if
|
52
|
+
next if %w[. ..].include?(item)
|
54
53
|
return File.expand_path(item, fixtures_directory) if item.starts_with?(prefix)
|
55
54
|
end
|
56
55
|
nil
|
@@ -59,7 +58,8 @@ module Aranha
|
|
59
58
|
def sources_targets_basenames
|
60
59
|
basenames = Set.new
|
61
60
|
Dir.foreach(fixtures_directory) do |item|
|
62
|
-
next if
|
61
|
+
next if %w[. ..].include?(item)
|
62
|
+
|
63
63
|
b = self.class.source_target_basename(item)
|
64
64
|
basenames << b if b.present?
|
65
65
|
end
|
@@ -10,12 +10,12 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
10
10
|
end
|
11
11
|
|
12
12
|
context 'in fixtures directory' do
|
13
|
-
it '
|
14
|
-
expect(source_target_fixtures.source_target_files.count).to be > 0
|
13
|
+
it 'has at least one file' do
|
14
|
+
expect(source_target_fixtures.source_target_files.count).to be > 0 # rubocop:disable Style/NumericPredicate
|
15
15
|
end
|
16
16
|
|
17
17
|
if ENV['WRITE_TARGET_FIXTURES']
|
18
|
-
it '
|
18
|
+
it 'writes target data for all files' do
|
19
19
|
source_target_fixtures.source_files.each do |source_file|
|
20
20
|
sd = sort_results(source_data(source_file))
|
21
21
|
basename = ::Aranha::Parsers::SourceTargetFixtures.source_target_basename(source_file)
|
@@ -24,7 +24,7 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
else
|
27
|
-
it '
|
27
|
+
it 'parses data for all files' do
|
28
28
|
source_target_fixtures.source_target_files.each do |st|
|
29
29
|
assert_source_target_complete(st)
|
30
30
|
sd = source_data(st.source)
|
@@ -39,9 +39,9 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
39
39
|
@source_target_fixtures ||= ::Aranha::Parsers::SourceTargetFixtures.new(fixtures_dir)
|
40
40
|
end
|
41
41
|
|
42
|
-
def assert_source_target_complete(
|
43
|
-
expect(
|
44
|
-
expect(
|
42
|
+
def assert_source_target_complete(source_target)
|
43
|
+
expect(source_target.source).to(be_truthy, "Source not found (Target: #{source_target.target})")
|
44
|
+
expect(source_target.target).to(be_truthy, "Target not found (Source: #{source_target.source})")
|
45
45
|
end
|
46
46
|
|
47
47
|
def source_data(source_file)
|
@@ -55,7 +55,7 @@ RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable
|
|
55
55
|
)
|
56
56
|
end
|
57
57
|
|
58
|
-
def sort_results(
|
59
|
-
|
58
|
+
def sort_results(results)
|
59
|
+
results
|
60
60
|
end
|
61
61
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha-parsers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Esquilo Azul Company
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|