ehbrs-tools 0.15.0 → 0.16.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ehbrs/runner.rb +4 -7
- data/lib/ehbrs/runner/finances.rb +4 -7
- data/lib/ehbrs/runner/finances/bb_browser.rb +5 -10
- data/lib/ehbrs/runner/fs.rb +4 -7
- data/lib/ehbrs/runner/fs/used_space.rb +4 -5
- data/lib/ehbrs/runner/google.rb +4 -7
- data/lib/ehbrs/runner/google/translate.rb +7 -12
- data/lib/ehbrs/runner/self.rb +4 -7
- data/lib/ehbrs/runner/self/test.rb +5 -10
- data/lib/ehbrs/runner/vg.rb +4 -7
- data/lib/ehbrs/runner/vg/ips.rb +9 -14
- data/lib/ehbrs/runner/vg/wii.rb +10 -13
- data/lib/ehbrs/runner/videos.rb +3 -5
- data/lib/ehbrs/runner/videos/extract.rb +10 -13
- data/lib/ehbrs/runner/videos/probe.rb +6 -9
- data/lib/ehbrs/runner/videos/series.rb +5 -8
- data/lib/ehbrs/runner/videos/series/rename.rb +9 -12
- data/lib/ehbrs/runner/videos/unsupported.rb +9 -13
- data/lib/ehbrs/runner/web_utils.rb +5 -7
- data/lib/ehbrs/runner/web_utils/videos.rb +4 -6
- data/lib/ehbrs/runner/web_utils/videos/download.rb +8 -11
- data/lib/ehbrs/runner/web_utils/videos/upload.rb +8 -11
- data/lib/ehbrs/tools/version.rb +1 -1
- data/lib/ehbrs/vg/wii/wit/parsers/dump.rb +10 -5
- data/lib/ehbrs/videos/file.rb +4 -8
- data/lib/ehbrs/videos/series/rename/file/options.rb +3 -13
- data/lib/ehbrs/videos/track.rb +20 -10
- data/lib/ehbrs/videos/unsupported/checks/codec_extra_unlisted.rb +2 -0
- data/lib/ehbrs/videos/unsupported/checks/codec_extra_unsupported.rb +2 -0
- data/lib/ehbrs/videos/unsupported/checks/codec_unlisted.rb +2 -0
- data/lib/ehbrs/videos/unsupported/checks/codec_unsupported.rb +2 -0
- data/lib/ehbrs/videos/unsupported/checks/invalid_extension.rb +2 -0
- data/lib/ehbrs/videos/unsupported/fixes/supported_codec.rb +1 -1
- data/lib/ehbrs/videos/unsupported/profiles/base.rb +17 -3
- data/lib/ehbrs/videos/unsupported/profiles/philco.rb +5 -4
- data/vendor/aranha-parsers/Gemfile +5 -0
- data/vendor/aranha-parsers/aranha-parsers.gemspec +23 -0
- data/vendor/aranha-parsers/lib/aranha/parsers.rb +9 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/base.rb +79 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/html.rb +11 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/html/base.rb +47 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/html/item.rb +24 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/html/item_list.rb +29 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/html/node.rb +13 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/html/node/base.rb +36 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/html/node/default.rb +126 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/invalid_state_exception.rb +8 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/patches.rb +11 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/patches/ofx_parser.rb +38 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/source_address.rb +55 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/source_address/file.rb +31 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/source_address/hash_http_get.rb +25 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/source_address/hash_http_post.rb +45 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/source_address/http_get.rb +49 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/source_target_fixtures.rb +77 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/spec/source_target_fixtures_example.rb +78 -0
- data/vendor/aranha-parsers/lib/aranha/parsers/version.rb +7 -0
- data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_address/http_get_spec.rb +21 -0
- data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_address_spec.rb +74 -0
- data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec.rb +27 -0
- data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub1.source.txt +1 -0
- data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub1.target.html +1 -0
- data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub2.source.html +1 -0
- data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub3.target.yaml +1 -0
- data/vendor/aranha-parsers/spec/lib/rubocop_check_spec.rb +7 -0
- data/vendor/aranha-parsers/spec/spec_helper.rb +8 -0
- data/vendor/eac_cli/eac_cli.gemspec +1 -1
- data/vendor/eac_cli/lib/eac_cli/definition.rb +49 -22
- data/vendor/eac_cli/lib/eac_cli/definition/alternative.rb +83 -0
- data/vendor/eac_cli/lib/eac_cli/definition/base_option.rb +17 -1
- data/vendor/eac_cli/lib/eac_cli/{parser/options_collection.rb → definition/help_formatter.rb} +20 -49
- data/vendor/eac_cli/lib/eac_cli/definition/positional_argument.rb +21 -4
- data/vendor/eac_cli/lib/eac_cli/docopt/doc_builder.rb +18 -40
- data/vendor/eac_cli/lib/eac_cli/docopt/doc_builder/alternative.rb +50 -0
- data/vendor/eac_cli/lib/eac_cli/docopt/runner_extension.rb +1 -0
- data/vendor/eac_cli/lib/eac_cli/parser.rb +23 -3
- data/vendor/eac_cli/lib/eac_cli/parser/alternative.rb +92 -0
- data/vendor/eac_cli/lib/eac_cli/parser/alternative/argv.rb +17 -0
- data/vendor/eac_cli/lib/eac_cli/parser/alternative/double_dash.rb +24 -0
- data/vendor/eac_cli/lib/eac_cli/parser/alternative/options.rb +58 -0
- data/vendor/eac_cli/lib/eac_cli/parser/alternative/positionals.rb +30 -0
- data/vendor/eac_cli/lib/eac_cli/parser/collector.rb +4 -0
- data/vendor/eac_cli/lib/eac_cli/patches/object/runner_with.rb +2 -1
- data/vendor/eac_cli/lib/eac_cli/runner.rb +17 -5
- data/vendor/eac_cli/lib/eac_cli/runner/context.rb +19 -2
- data/vendor/eac_cli/lib/eac_cli/runner/exit.rb +13 -0
- data/vendor/eac_cli/lib/eac_cli/runner_with/help.rb +18 -1
- data/vendor/eac_cli/lib/eac_cli/runner_with/output_file.rb +5 -1
- data/vendor/eac_cli/lib/eac_cli/runner_with/subcommands.rb +101 -0
- data/vendor/eac_cli/lib/eac_cli/version.rb +1 -1
- data/vendor/eac_cli/spec/lib/eac_cli/definition/alternative_spec.rb +14 -0
- data/vendor/eac_cli/spec/lib/eac_cli/docopt/runner_extension_spec.rb +35 -0
- data/vendor/eac_cli/spec/lib/eac_cli/parser/alternative_spec.rb +140 -0
- data/vendor/eac_cli/spec/lib/eac_cli/runner_spec.rb +59 -39
- data/vendor/eac_cli/spec/lib/eac_cli/runner_with/help_spec.rb +42 -0
- data/vendor/eac_cli/spec/lib/eac_cli/runner_with/output_file_spec.rb +53 -0
- data/vendor/eac_cli/spec/lib/eac_cli/runner_with/subcommands_spec.rb +85 -0
- data/vendor/eac_docker/lib/eac_docker/container.rb +24 -0
- data/vendor/eac_docker/lib/eac_docker/images/coded.rb +39 -0
- data/vendor/eac_docker/lib/eac_docker/images/templatized.rb +26 -0
- data/vendor/eac_docker/lib/eac_docker/registry.rb +17 -0
- data/vendor/eac_docker/lib/eac_docker/version.rb +1 -1
- data/vendor/eac_docker/spec/lib/eac_docker/images/coded_spec.rb +12 -0
- data/vendor/eac_docker/spec/lib/eac_docker/images/coded_spec_files/image1/Dockerfile +1 -0
- data/vendor/eac_docker/spec/lib/eac_docker/images/templatized_spec.rb +17 -0
- data/vendor/eac_docker/spec/lib/eac_docker/images/templatized_spec_files/stub_docker_image/Dockerfile +1 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/abstract_methods.rb +60 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/blank_not_blank.rb +19 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/common_concern.rb +2 -50
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/common_concern/class_setup.rb +52 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/common_concern/module_setup.rb +31 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/common_constructor.rb +53 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/configs/base.rb +43 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/configs/file.rb +12 -31
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs.rb +7 -104
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/entry_reader.rb +81 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/password_entry_reader.rb +18 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/read_entry_options.rb +46 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/store_passwords_entry_reader.rb +27 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/envs/command.rb +4 -6
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/envs/command/concat.rb +33 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/envs/command/envvars.rb +24 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/envs/command/extra_options.rb +0 -21
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/fs/clearable_directory.rb +57 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/enumerator.rb +4 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/enumerator/current.rb +9 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/enumerator/stopped.rb +14 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/module/abstract_methods.rb +10 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/object/debug.rb +17 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash.rb +21 -58
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash/entry_key_error.rb +8 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash/node.rb +67 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash/path_search.rb +39 -0
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/ruby/command.rb +2 -1
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/struct.rb +11 -1
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/version.rb +1 -1
- data/vendor/eac_ruby_utils/lib/eac_ruby_utils/yaml.rb +3 -2
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/abstract_methods_spec.rb +28 -0
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/blank_not_blank_spec.rb +16 -0
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/common_concern_spec.rb +30 -17
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/common_constructor_spec.rb +66 -8
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/configs_spec.rb +15 -0
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/patches/enumerator/current_spec.rb +26 -0
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/patches/enumerator/stopped_spec.rb +32 -0
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/paths_hash_spec.rb +52 -13
- data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/struct_spec.rb +12 -1
- data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/version.rb +1 -1
- data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/container.rb +30 -2
- data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/convert_job.rb +91 -0
- data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/stream.rb +51 -0
- metadata +108 -13
- data/vendor/eac_cli/lib/eac_cli/parser/parse_result.rb +0 -21
- data/vendor/eac_cli/lib/eac_cli/parser/positional_collection.rb +0 -49
- data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/container/file.rb +0 -31
- data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/container/info.rb +0 -21
@@ -19,13 +19,21 @@ module Ehbrs
|
|
19
19
|
added_checks << check_path.camelize.constantize.new(*args)
|
20
20
|
end
|
21
21
|
|
22
|
+
def base_checks
|
23
|
+
[unlisted_codec_check] + unsupported_codec_checks +
|
24
|
+
supported_codecs.flat_map { |codec| codec_extra_checks(codec) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def checks
|
28
|
+
base_checks + added_checks
|
29
|
+
end
|
30
|
+
|
22
31
|
def file_checks
|
23
|
-
|
32
|
+
checks.select { |c| check_type(c) == :container }
|
24
33
|
end
|
25
34
|
|
26
35
|
def track_checks
|
27
|
-
|
28
|
-
supported_codecs.flat_map { |codec| codec_extra_checks(codec) }
|
36
|
+
checks.select { |c| check_type(c) == :stream }
|
29
37
|
end
|
30
38
|
|
31
39
|
def codec_extra_checks(codec)
|
@@ -95,6 +103,12 @@ module Ehbrs
|
|
95
103
|
def codec_supported_extras(codec)
|
96
104
|
codec_extras(codec, 'supported')
|
97
105
|
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def check_type(check)
|
110
|
+
check.class.const_get(:TYPE)
|
111
|
+
end
|
98
112
|
end
|
99
113
|
end
|
100
114
|
end
|
@@ -7,19 +7,20 @@ module Ehbrs
|
|
7
7
|
module Unsupported
|
8
8
|
module Profiles
|
9
9
|
class Philco < ::Ehbrs::Videos::Unsupported::Profiles::Base
|
10
|
-
AUDIO_SUPPORTED_CODECS = %w[aac ac3 eac3 mp3].freeze
|
11
|
-
AUDIO_UNSUPPORTED_CODECS = %w[dts].freeze
|
10
|
+
AUDIO_SUPPORTED_CODECS = %w[aac ac3 eac3 mp3 vorbis wmav2].freeze
|
11
|
+
AUDIO_UNSUPPORTED_CODECS = %w[dts opus].freeze
|
12
12
|
|
13
13
|
VIDEO_SUPPORTED_CODECS = %w[h264 mpeg4].freeze
|
14
14
|
VIDEO_UNSUPPORTED_CODECS = %w[hevc msmpeg4v3].freeze
|
15
15
|
|
16
|
-
SUBTITLE_SUPPORTED_CODECS = %w[ass dvd
|
16
|
+
SUBTITLE_SUPPORTED_CODECS = %w[ass dvd dvd_subtitle hdmv_pgs_subtitle mov_text
|
17
|
+
subrip].freeze
|
17
18
|
SUBTITLE_UNSUPPORTED_CODECS = %w[mov].freeze
|
18
19
|
|
19
20
|
OTHER_SUPPORTED_CODECS = %w[png ttf].freeze
|
20
21
|
|
21
22
|
MPEG4_EXTRA_SUPPORTED = %w[xvid].freeze
|
22
|
-
MPEG4_EXTRA_UNSUPPORTED = %w[dx50].freeze
|
23
|
+
MPEG4_EXTRA_UNSUPPORTED = %w[divx dx50].freeze
|
23
24
|
end
|
24
25
|
end
|
25
26
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
+
|
5
|
+
require 'aranha/parsers/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = 'aranha-parsers'
|
9
|
+
s.version = ::Aranha::Parsers::VERSION
|
10
|
+
s.authors = ['Esquilo Azul Company']
|
11
|
+
s.summary = 'Parsers\' utilities for Ruby.'
|
12
|
+
|
13
|
+
s.files = Dir['{lib}/**/*', 'Gemfile']
|
14
|
+
|
15
|
+
s.add_dependency 'activesupport', '>= 4.0.0'
|
16
|
+
s.add_dependency 'addressable', '~> 2.7'
|
17
|
+
s.add_dependency 'curb', '~> 0.9.10'
|
18
|
+
s.add_dependency 'eac_ruby_utils', '~> 0.33', '>= 0.33.1'
|
19
|
+
s.add_dependency 'httpclient', '~> 2.8', '>= 2.8.3'
|
20
|
+
s.add_dependency 'ofx-parser', '~> 1.1.0'
|
21
|
+
|
22
|
+
s.add_development_dependency 'eac_ruby_gem_support', '~> 0.1'
|
23
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'open-uri'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'aranha/parsers/source_address'
|
6
|
+
require 'eac_ruby_utils/fs/temp'
|
7
|
+
|
8
|
+
module Aranha
|
9
|
+
module Parsers
|
10
|
+
class Base
|
11
|
+
class << self
|
12
|
+
def from_content(content)
|
13
|
+
::EacRubyUtils::Fs::Temp.on_file do |path|
|
14
|
+
path.write(content)
|
15
|
+
r = new(path.to_path)
|
16
|
+
r.content
|
17
|
+
r
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_content(content)
|
22
|
+
from_content(content).data
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
|
27
|
+
|
28
|
+
attr_reader :source_address
|
29
|
+
|
30
|
+
def initialize(url)
|
31
|
+
@source_address = ::Aranha::Parsers::SourceAddress.new(url)
|
32
|
+
log_content(source_address.serialize, '-source-address')
|
33
|
+
end
|
34
|
+
|
35
|
+
delegate :url, to: :source_address
|
36
|
+
|
37
|
+
def content
|
38
|
+
@content ||= begin
|
39
|
+
s = source_address.content
|
40
|
+
log_content(s)
|
41
|
+
s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def log_content(content, suffix = '')
|
48
|
+
path = log_file(suffix)
|
49
|
+
|
50
|
+
return unless path
|
51
|
+
|
52
|
+
File.open(path, 'wb') { |file| file.write(content) }
|
53
|
+
end
|
54
|
+
|
55
|
+
def log_file(suffix)
|
56
|
+
dir = log_parsers_dir
|
57
|
+
return nil unless dir
|
58
|
+
|
59
|
+
f = ::File.join(dir, "#{self.class.name.parameterize}#{suffix}.log")
|
60
|
+
FileUtils.mkdir_p(File.dirname(f))
|
61
|
+
f
|
62
|
+
end
|
63
|
+
|
64
|
+
def log_parsers_dir
|
65
|
+
return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
|
66
|
+
return ::Rails.root.join('log', 'parsers') if rails_root_exist?
|
67
|
+
|
68
|
+
nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def rails_root_exist?
|
72
|
+
::Rails.root
|
73
|
+
true
|
74
|
+
rescue NameError
|
75
|
+
false
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'aranha/parsers/base'
|
5
|
+
require 'aranha/parsers/html/node/default'
|
6
|
+
|
7
|
+
module Aranha
|
8
|
+
module Parsers
|
9
|
+
module Html
|
10
|
+
class Base < ::Aranha::Parsers::Base
|
11
|
+
class << self
|
12
|
+
def fields
|
13
|
+
@fields ||= []
|
14
|
+
@fields.dup
|
15
|
+
end
|
16
|
+
|
17
|
+
def field(name, type, xpath)
|
18
|
+
@fields ||= []
|
19
|
+
@fields << Field.new(name, type, xpath)
|
20
|
+
end
|
21
|
+
|
22
|
+
Field = Struct.new(:name, :type, :xpath)
|
23
|
+
end
|
24
|
+
|
25
|
+
def nokogiri
|
26
|
+
@nokogiri ||= Nokogiri::HTML(content, &:noblanks)
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def node_parser_class
|
32
|
+
::Aranha::Parsers::Html::Node::Default
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def node_parser
|
38
|
+
@node_parser ||= node_parser_class.new(fields)
|
39
|
+
end
|
40
|
+
|
41
|
+
def fields
|
42
|
+
self.class.fields.map { |f| [f.name, f.type, f.xpath] }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aranha/parsers/html/base'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Parsers
|
7
|
+
module Html
|
8
|
+
class Item < Base
|
9
|
+
def data
|
10
|
+
@data ||= node_parser.parse(item_node)
|
11
|
+
end
|
12
|
+
|
13
|
+
def item_node
|
14
|
+
@item_node ||= begin
|
15
|
+
r = item_xpath ? nokogiri.at_xpath(item_xpath) : nokogiri
|
16
|
+
raise "Item node not found (Item xpath: #{item_xpath})" unless r
|
17
|
+
|
18
|
+
r
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aranha/parsers/html/base'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Parsers
|
7
|
+
module Html
|
8
|
+
class ItemList < Base
|
9
|
+
def data
|
10
|
+
items_data
|
11
|
+
end
|
12
|
+
|
13
|
+
def items_data
|
14
|
+
count = 0
|
15
|
+
@data ||= nokogiri.xpath(items_xpath).map do |m|
|
16
|
+
count += 1
|
17
|
+
node_parser.parse(m)
|
18
|
+
end
|
19
|
+
rescue StandardError => e
|
20
|
+
raise StandardError, "#{e.message} (Count: #{count})"
|
21
|
+
end
|
22
|
+
|
23
|
+
def items_xpath
|
24
|
+
raise "Class #{self.class} has no method \"#{__method__}\". Implement it"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Aranha
|
4
|
+
module Parsers
|
5
|
+
module Html
|
6
|
+
module Node
|
7
|
+
class Base
|
8
|
+
attr_reader :fields
|
9
|
+
|
10
|
+
def initialize(fields)
|
11
|
+
@fields = fields
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse(node)
|
15
|
+
fields.map do |f|
|
16
|
+
begin
|
17
|
+
[f[0], parse_field(node, f[2], f[1])]
|
18
|
+
rescue StandardError => e
|
19
|
+
raise StandardError, "#{e.message}\nFields: #{f}"
|
20
|
+
end
|
21
|
+
end.to_h
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def parse_field(node, xpath, parser_method)
|
27
|
+
value_method = "#{parser_method}_value"
|
28
|
+
return send(value_method, node, xpath) if respond_to?(value_method)
|
29
|
+
|
30
|
+
raise "Method \"#{value_method}\" not found in #{self.class}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aranha/parsers/html/node/base'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Parsers
|
7
|
+
module Html
|
8
|
+
module Node
|
9
|
+
class Default < ::Aranha::Parsers::Html::Node::Base
|
10
|
+
def string_value(node, xpath)
|
11
|
+
if node.at_xpath(xpath)
|
12
|
+
sanitize_string(node.at_xpath(xpath).text)
|
13
|
+
else
|
14
|
+
''
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def string_recursive_value(node, xpath, required = true)
|
19
|
+
root = node.at_xpath(xpath)
|
20
|
+
if root.blank?
|
21
|
+
return nil unless required
|
22
|
+
|
23
|
+
raise "No node found (Xpath: #{xpath})"
|
24
|
+
end
|
25
|
+
result = string_recursive(root)
|
26
|
+
return result if result.present?
|
27
|
+
return nil unless required
|
28
|
+
|
29
|
+
raise "String blank (Xpath: #{xpath})"
|
30
|
+
end
|
31
|
+
|
32
|
+
def string_recursive_optional_value(node, xpath)
|
33
|
+
string_recursive_value(node, xpath, false)
|
34
|
+
end
|
35
|
+
|
36
|
+
def quoted_value(node, xpath)
|
37
|
+
s = string_value(node, xpath)
|
38
|
+
return '' unless s
|
39
|
+
|
40
|
+
m = /\"([^\"]+)\"/.match(s)
|
41
|
+
return m[1] if m
|
42
|
+
|
43
|
+
''
|
44
|
+
end
|
45
|
+
|
46
|
+
def integer_value(node, xpath)
|
47
|
+
r = string_value(node, xpath)
|
48
|
+
return nil if r.blank?
|
49
|
+
|
50
|
+
m = /\d+/.match(r)
|
51
|
+
raise "Integer not found in \"#{r}\"" unless m
|
52
|
+
|
53
|
+
m[0].to_i
|
54
|
+
end
|
55
|
+
|
56
|
+
def integer_optional_value(node, xpath)
|
57
|
+
r = string_value(node, xpath)
|
58
|
+
m = /\d+/.match(r)
|
59
|
+
m ? m[0].to_i : nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def float_value(node, xpath)
|
63
|
+
parse_float(node, xpath, true)
|
64
|
+
end
|
65
|
+
|
66
|
+
def float_optional_value(node, xpath)
|
67
|
+
parse_float(node, xpath, false)
|
68
|
+
end
|
69
|
+
|
70
|
+
def array_value(node, xpath)
|
71
|
+
r = node.xpath(xpath).map { |n| n.text.strip }
|
72
|
+
r.join('|')
|
73
|
+
end
|
74
|
+
|
75
|
+
def join_value(node, xpath)
|
76
|
+
m = ''
|
77
|
+
node.xpath(xpath).each do |n|
|
78
|
+
m << n.text.strip
|
79
|
+
end
|
80
|
+
m
|
81
|
+
end
|
82
|
+
|
83
|
+
def duration_value(node, xpath)
|
84
|
+
m = /(\d+) m/.match(join_value(node, xpath))
|
85
|
+
m ? m[1].to_i : nil
|
86
|
+
end
|
87
|
+
|
88
|
+
def regxep(node, xpath, pattern)
|
89
|
+
s = string_value(node, xpath)
|
90
|
+
m = pattern.match(s)
|
91
|
+
return m if m
|
92
|
+
|
93
|
+
raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def parse_float(node, xpath, required)
|
99
|
+
s = string_value(node, xpath)
|
100
|
+
m = /\d+(?:[\.\,](\d+))?/.match(s)
|
101
|
+
if m
|
102
|
+
m[0].delete('.').tr(',', '.').to_f
|
103
|
+
elsif required
|
104
|
+
raise "Float value not found in \"#{s}\""
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def sanitize_string(obj)
|
109
|
+
obj.to_s.tr("\u00A0", ' ').strip
|
110
|
+
end
|
111
|
+
|
112
|
+
def string_recursive(node)
|
113
|
+
return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text)
|
114
|
+
|
115
|
+
s = ''
|
116
|
+
node.children.each do |child|
|
117
|
+
child_s = string_recursive(child)
|
118
|
+
s += ' ' + child_s if child_s.present?
|
119
|
+
end
|
120
|
+
sanitize_string(s)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|