ehbrs-tools 0.14.0 → 0.16.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ehbrs/runner.rb +4 -7
  3. data/lib/ehbrs/runner/finances.rb +4 -7
  4. data/lib/ehbrs/runner/finances/bb_browser.rb +5 -10
  5. data/lib/ehbrs/runner/fs.rb +4 -7
  6. data/lib/ehbrs/runner/fs/used_space.rb +4 -5
  7. data/lib/ehbrs/runner/google.rb +4 -7
  8. data/lib/ehbrs/runner/google/translate.rb +7 -12
  9. data/lib/ehbrs/runner/self.rb +4 -7
  10. data/lib/ehbrs/runner/self/test.rb +5 -10
  11. data/lib/ehbrs/runner/vg.rb +4 -7
  12. data/lib/ehbrs/runner/vg/ips.rb +9 -14
  13. data/lib/ehbrs/runner/vg/wii.rb +10 -13
  14. data/lib/ehbrs/runner/videos.rb +3 -5
  15. data/lib/ehbrs/runner/videos/extract.rb +10 -13
  16. data/lib/ehbrs/runner/videos/probe.rb +6 -9
  17. data/lib/ehbrs/runner/videos/series.rb +5 -8
  18. data/lib/ehbrs/runner/videos/series/rename.rb +9 -12
  19. data/lib/ehbrs/runner/videos/unsupported.rb +9 -13
  20. data/lib/ehbrs/runner/web_utils.rb +24 -0
  21. data/lib/ehbrs/runner/web_utils/videos.rb +18 -0
  22. data/lib/ehbrs/runner/web_utils/videos/download.rb +63 -0
  23. data/lib/ehbrs/runner/web_utils/videos/upload.rb +75 -0
  24. data/lib/ehbrs/tools/version.rb +1 -1
  25. data/lib/ehbrs/vg/wii/wit/parsers/dump.rb +10 -5
  26. data/lib/ehbrs/videos/file.rb +4 -8
  27. data/lib/ehbrs/videos/series/rename/file/options.rb +3 -13
  28. data/lib/ehbrs/videos/track.rb +20 -10
  29. data/lib/ehbrs/videos/unsupported/checks/codec_extra_unlisted.rb +2 -0
  30. data/lib/ehbrs/videos/unsupported/checks/codec_extra_unsupported.rb +2 -0
  31. data/lib/ehbrs/videos/unsupported/checks/codec_unlisted.rb +2 -0
  32. data/lib/ehbrs/videos/unsupported/checks/codec_unsupported.rb +2 -0
  33. data/lib/ehbrs/videos/unsupported/checks/invalid_extension.rb +2 -0
  34. data/lib/ehbrs/videos/unsupported/fix_profile.rb +1 -0
  35. data/lib/ehbrs/videos/unsupported/fixes/supported_codec.rb +1 -1
  36. data/lib/ehbrs/videos/unsupported/profiles/base.rb +17 -3
  37. data/lib/ehbrs/videos/unsupported/profiles/philco.rb +5 -4
  38. data/vendor/aranha-parsers/Gemfile +5 -0
  39. data/vendor/aranha-parsers/aranha-parsers.gemspec +23 -0
  40. data/vendor/aranha-parsers/lib/aranha/parsers.rb +9 -0
  41. data/vendor/aranha-parsers/lib/aranha/parsers/base.rb +79 -0
  42. data/vendor/aranha-parsers/lib/aranha/parsers/html.rb +11 -0
  43. data/vendor/aranha-parsers/lib/aranha/parsers/html/base.rb +47 -0
  44. data/vendor/aranha-parsers/lib/aranha/parsers/html/item.rb +24 -0
  45. data/vendor/aranha-parsers/lib/aranha/parsers/html/item_list.rb +29 -0
  46. data/vendor/aranha-parsers/lib/aranha/parsers/html/node.rb +13 -0
  47. data/vendor/aranha-parsers/lib/aranha/parsers/html/node/base.rb +36 -0
  48. data/vendor/aranha-parsers/lib/aranha/parsers/html/node/default.rb +126 -0
  49. data/vendor/aranha-parsers/lib/aranha/parsers/invalid_state_exception.rb +8 -0
  50. data/vendor/aranha-parsers/lib/aranha/parsers/patches.rb +11 -0
  51. data/vendor/aranha-parsers/lib/aranha/parsers/patches/ofx_parser.rb +38 -0
  52. data/vendor/aranha-parsers/lib/aranha/parsers/source_address.rb +55 -0
  53. data/vendor/aranha-parsers/lib/aranha/parsers/source_address/file.rb +31 -0
  54. data/vendor/aranha-parsers/lib/aranha/parsers/source_address/hash_http_get.rb +25 -0
  55. data/vendor/aranha-parsers/lib/aranha/parsers/source_address/hash_http_post.rb +45 -0
  56. data/vendor/aranha-parsers/lib/aranha/parsers/source_address/http_get.rb +49 -0
  57. data/vendor/aranha-parsers/lib/aranha/parsers/source_target_fixtures.rb +77 -0
  58. data/vendor/aranha-parsers/lib/aranha/parsers/spec/source_target_fixtures_example.rb +78 -0
  59. data/vendor/aranha-parsers/lib/aranha/parsers/version.rb +7 -0
  60. data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_address/http_get_spec.rb +21 -0
  61. data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_address_spec.rb +74 -0
  62. data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec.rb +27 -0
  63. data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub1.source.txt +1 -0
  64. data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub1.target.html +1 -0
  65. data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub2.source.html +1 -0
  66. data/vendor/aranha-parsers/spec/lib/aranha/parsers/source_target_fixtures_spec_files/stub3.target.yaml +1 -0
  67. data/vendor/aranha-parsers/spec/lib/rubocop_check_spec.rb +7 -0
  68. data/vendor/aranha-parsers/spec/spec_helper.rb +8 -0
  69. data/vendor/eac_cli/eac_cli.gemspec +1 -1
  70. data/vendor/eac_cli/lib/eac_cli/definition.rb +49 -22
  71. data/vendor/eac_cli/lib/eac_cli/definition/alternative.rb +83 -0
  72. data/vendor/eac_cli/lib/eac_cli/definition/base_option.rb +17 -1
  73. data/vendor/eac_cli/lib/eac_cli/{parser/options_collection.rb → definition/help_formatter.rb} +20 -49
  74. data/vendor/eac_cli/lib/eac_cli/definition/positional_argument.rb +21 -4
  75. data/vendor/eac_cli/lib/eac_cli/docopt/doc_builder.rb +18 -40
  76. data/vendor/eac_cli/lib/eac_cli/docopt/doc_builder/alternative.rb +50 -0
  77. data/vendor/eac_cli/lib/eac_cli/docopt/runner_extension.rb +1 -0
  78. data/vendor/eac_cli/lib/eac_cli/parser.rb +21 -3
  79. data/vendor/eac_cli/lib/eac_cli/parser/alternative.rb +88 -0
  80. data/vendor/eac_cli/lib/eac_cli/parser/alternative/argv.rb +17 -0
  81. data/vendor/eac_cli/lib/eac_cli/parser/alternative/double_dash.rb +24 -0
  82. data/vendor/eac_cli/lib/eac_cli/parser/alternative/options.rb +58 -0
  83. data/vendor/eac_cli/lib/eac_cli/parser/alternative/positionals.rb +30 -0
  84. data/vendor/eac_cli/lib/eac_cli/parser/collector.rb +4 -0
  85. data/vendor/eac_cli/lib/eac_cli/patches/object/runner_with.rb +2 -1
  86. data/vendor/eac_cli/lib/eac_cli/runner.rb +7 -3
  87. data/vendor/eac_cli/lib/eac_cli/runner/context.rb +19 -2
  88. data/vendor/eac_cli/lib/eac_cli/runner_with/help.rb +1 -1
  89. data/vendor/eac_cli/lib/eac_cli/runner_with/output_file.rb +5 -1
  90. data/vendor/eac_cli/lib/eac_cli/runner_with/subcommands.rb +96 -0
  91. data/vendor/eac_cli/lib/eac_cli/version.rb +1 -1
  92. data/vendor/eac_cli/spec/lib/eac_cli/definition/alternative_spec.rb +14 -0
  93. data/vendor/eac_cli/spec/lib/eac_cli/docopt/runner_extension_spec.rb +35 -0
  94. data/vendor/eac_cli/spec/lib/eac_cli/parser/alternative_spec.rb +140 -0
  95. data/vendor/eac_cli/spec/lib/eac_cli/runner_spec.rb +57 -40
  96. data/vendor/eac_cli/spec/lib/eac_cli/runner_with/output_file_spec.rb +53 -0
  97. data/vendor/eac_cli/spec/lib/eac_cli/runner_with/subcommands_spec.rb +57 -0
  98. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/abstract_methods.rb +60 -0
  99. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/blank_not_blank.rb +19 -0
  100. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/configs/base.rb +43 -0
  101. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/configs/file.rb +12 -31
  102. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs.rb +7 -104
  103. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/entry_reader.rb +81 -0
  104. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/password_entry_reader.rb +18 -0
  105. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/read_entry_options.rb +46 -0
  106. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/console/configs/store_passwords_entry_reader.rb +27 -0
  107. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/fs/clearable_directory.rb +57 -0
  108. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/enumerator.rb +4 -0
  109. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/enumerator/current.rb +9 -0
  110. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/enumerator/stopped.rb +14 -0
  111. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/module/abstract_methods.rb +10 -0
  112. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/patches/object/debug.rb +17 -0
  113. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash.rb +21 -58
  114. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash/entry_key_error.rb +8 -0
  115. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash/node.rb +67 -0
  116. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/paths_hash/path_search.rb +39 -0
  117. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/ruby/command.rb +2 -1
  118. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/struct.rb +4 -0
  119. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/version.rb +1 -1
  120. data/vendor/eac_ruby_utils/lib/eac_ruby_utils/yaml.rb +3 -2
  121. data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/abstract_methods_spec.rb +28 -0
  122. data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/blank_not_blank_spec.rb +16 -0
  123. data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/configs_spec.rb +15 -0
  124. data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/patches/enumerator/current_spec.rb +26 -0
  125. data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/patches/enumerator/stopped_spec.rb +32 -0
  126. data/vendor/eac_ruby_utils/spec/lib/eac_ruby_utils/paths_hash_spec.rb +52 -13
  127. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/version.rb +1 -1
  128. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/container.rb +30 -2
  129. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/convert_job.rb +91 -0
  130. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/stream.rb +51 -0
  131. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/web_utils.rb +9 -0
  132. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/web_utils/instance.rb +32 -0
  133. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/web_utils/videos.rb +11 -0
  134. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/web_utils/videos/file.rb +40 -0
  135. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/web_utils/videos/files_list.rb +76 -0
  136. metadata +78 -7
  137. data/vendor/eac_cli/lib/eac_cli/parser/parse_result.rb +0 -21
  138. data/vendor/eac_cli/lib/eac_cli/parser/positional_collection.rb +0 -49
  139. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/container/file.rb +0 -31
  140. data/vendor/ehbrs_ruby_utils/lib/ehbrs_ruby_utils/videos/container/info.rb +0 -21
@@ -5,6 +5,8 @@ module Ehbrs
5
5
  module Unsupported
6
6
  module Checks
7
7
  class CodecExtraUnlisted
8
+ TYPE = :stream
9
+
8
10
  common_constructor :codec, :listed_extras
9
11
 
10
12
  def check(track)
@@ -7,6 +7,8 @@ module Ehbrs
7
7
  module Unsupported
8
8
  module Checks
9
9
  class CodecExtraUnsupported
10
+ TYPE = :stream
11
+
10
12
  common_constructor :codec, :extra
11
13
 
12
14
  def check(track)
@@ -5,6 +5,8 @@ module Ehbrs
5
5
  module Unsupported
6
6
  module Checks
7
7
  class CodecUnlisted
8
+ TYPE = :stream
9
+
8
10
  common_constructor :listed_codecs
9
11
 
10
12
  def check(track)
@@ -7,6 +7,8 @@ module Ehbrs
7
7
  module Unsupported
8
8
  module Checks
9
9
  class CodecUnsupported
10
+ TYPE = :stream
11
+
10
12
  common_constructor :codec
11
13
 
12
14
  def check(track)
@@ -7,6 +7,8 @@ module Ehbrs
7
7
  module Unsupported
8
8
  module Checks
9
9
  class InvalidExtension
10
+ TYPE = :container
11
+
10
12
  common_constructor :extension
11
13
 
12
14
  def check(video)
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'ehbrs/videos/profiles/same_quality'
4
+ require 'ehbrs/videos/unsupported/fixes/supported_container'
4
5
 
5
6
  module Ehbrs
6
7
  module Videos
@@ -12,7 +12,7 @@ module Ehbrs
12
12
  }.freeze
13
13
 
14
14
  TRACK_TYPE_FIX_CODECS = {
15
- 'Audio' => 'aac',
15
+ 'Audio' => 'libvorbis',
16
16
  'Video' => 'libx264',
17
17
  'Subtitle' => 'ass'
18
18
  }.freeze
@@ -19,13 +19,21 @@ module Ehbrs
19
19
  added_checks << check_path.camelize.constantize.new(*args)
20
20
  end
21
21
 
22
+ def base_checks
23
+ [unlisted_codec_check] + unsupported_codec_checks +
24
+ supported_codecs.flat_map { |codec| codec_extra_checks(codec) }
25
+ end
26
+
27
+ def checks
28
+ base_checks + added_checks
29
+ end
30
+
22
31
  def file_checks
23
- added_checks
32
+ checks.select { |c| check_type(c) == :container }
24
33
  end
25
34
 
26
35
  def track_checks
27
- [unlisted_codec_check] + unsupported_codec_checks +
28
- supported_codecs.flat_map { |codec| codec_extra_checks(codec) }
36
+ checks.select { |c| check_type(c) == :stream }
29
37
  end
30
38
 
31
39
  def codec_extra_checks(codec)
@@ -95,6 +103,12 @@ module Ehbrs
95
103
  def codec_supported_extras(codec)
96
104
  codec_extras(codec, 'supported')
97
105
  end
106
+
107
+ private
108
+
109
+ def check_type(check)
110
+ check.class.const_get(:TYPE)
111
+ end
98
112
  end
99
113
  end
100
114
  end
@@ -7,19 +7,20 @@ module Ehbrs
7
7
  module Unsupported
8
8
  module Profiles
9
9
  class Philco < ::Ehbrs::Videos::Unsupported::Profiles::Base
10
- AUDIO_SUPPORTED_CODECS = %w[aac ac3 eac3 mp3].freeze
11
- AUDIO_UNSUPPORTED_CODECS = %w[dts].freeze
10
+ AUDIO_SUPPORTED_CODECS = %w[aac ac3 eac3 mp3 vorbis wmav2].freeze
11
+ AUDIO_UNSUPPORTED_CODECS = %w[dts opus].freeze
12
12
 
13
13
  VIDEO_SUPPORTED_CODECS = %w[h264 mpeg4].freeze
14
14
  VIDEO_UNSUPPORTED_CODECS = %w[hevc msmpeg4v3].freeze
15
15
 
16
- SUBTITLE_SUPPORTED_CODECS = %w[ass dvd subrip].freeze
16
+ SUBTITLE_SUPPORTED_CODECS = %w[ass dvd dvd_subtitle hdmv_pgs_subtitle mov_text
17
+ subrip].freeze
17
18
  SUBTITLE_UNSUPPORTED_CODECS = %w[mov].freeze
18
19
 
19
20
  OTHER_SUPPORTED_CODECS = %w[png ttf].freeze
20
21
 
21
22
  MPEG4_EXTRA_SUPPORTED = %w[xvid].freeze
22
- MPEG4_EXTRA_UNSUPPORTED = %w[dx50].freeze
23
+ MPEG4_EXTRA_UNSUPPORTED = %w[divx dx50].freeze
23
24
  end
24
25
  end
25
26
  end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.push File.expand_path('lib', __dir__)
4
+
5
+ require 'aranha/parsers/version'
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = 'aranha-parsers'
9
+ s.version = ::Aranha::Parsers::VERSION
10
+ s.authors = ['Esquilo Azul Company']
11
+ s.summary = 'Parsers\' utilities for Ruby.'
12
+
13
+ s.files = Dir['{lib}/**/*', 'Gemfile']
14
+
15
+ s.add_dependency 'activesupport', '>= 4.0.0'
16
+ s.add_dependency 'addressable', '~> 2.7'
17
+ s.add_dependency 'curb', '~> 0.9.10'
18
+ s.add_dependency 'eac_ruby_utils', '~> 0.33', '>= 0.33.1'
19
+ s.add_dependency 'httpclient', '~> 2.8', '>= 2.8.3'
20
+ s.add_dependency 'ofx-parser', '~> 1.1.0'
21
+
22
+ s.add_development_dependency 'eac_ruby_gem_support', '~> 0.1'
23
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/require_sub'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ ::EacRubyUtils.require_sub __FILE__
8
+ end
9
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open-uri'
4
+ require 'fileutils'
5
+ require 'aranha/parsers/source_address'
6
+ require 'eac_ruby_utils/fs/temp'
7
+
8
+ module Aranha
9
+ module Parsers
10
+ class Base
11
+ class << self
12
+ def from_content(content)
13
+ ::EacRubyUtils::Fs::Temp.on_file do |path|
14
+ path.write(content)
15
+ r = new(path.to_path)
16
+ r.content
17
+ r
18
+ end
19
+ end
20
+
21
+ def parse_content(content)
22
+ from_content(content).data
23
+ end
24
+ end
25
+
26
+ LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
27
+
28
+ attr_reader :source_address
29
+
30
+ def initialize(url)
31
+ @source_address = ::Aranha::Parsers::SourceAddress.new(url)
32
+ log_content(source_address.serialize, '-source-address')
33
+ end
34
+
35
+ delegate :url, to: :source_address
36
+
37
+ def content
38
+ @content ||= begin
39
+ s = source_address.content
40
+ log_content(s)
41
+ s
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def log_content(content, suffix = '')
48
+ path = log_file(suffix)
49
+
50
+ return unless path
51
+
52
+ File.open(path, 'wb') { |file| file.write(content) }
53
+ end
54
+
55
+ def log_file(suffix)
56
+ dir = log_parsers_dir
57
+ return nil unless dir
58
+
59
+ f = ::File.join(dir, "#{self.class.name.parameterize}#{suffix}.log")
60
+ FileUtils.mkdir_p(File.dirname(f))
61
+ f
62
+ end
63
+
64
+ def log_parsers_dir
65
+ return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
66
+ return ::Rails.root.join('log', 'parsers') if rails_root_exist?
67
+
68
+ nil
69
+ end
70
+
71
+ def rails_root_exist?
72
+ ::Rails.root
73
+ true
74
+ rescue NameError
75
+ false
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/require_sub'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Html
8
+ ::EacRubyUtils.require_sub __FILE__
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'aranha/parsers/base'
5
+ require 'aranha/parsers/html/node/default'
6
+
7
+ module Aranha
8
+ module Parsers
9
+ module Html
10
+ class Base < ::Aranha::Parsers::Base
11
+ class << self
12
+ def fields
13
+ @fields ||= []
14
+ @fields.dup
15
+ end
16
+
17
+ def field(name, type, xpath)
18
+ @fields ||= []
19
+ @fields << Field.new(name, type, xpath)
20
+ end
21
+
22
+ Field = Struct.new(:name, :type, :xpath)
23
+ end
24
+
25
+ def nokogiri
26
+ @nokogiri ||= Nokogiri::HTML(content, &:noblanks)
27
+ end
28
+
29
+ protected
30
+
31
+ def node_parser_class
32
+ ::Aranha::Parsers::Html::Node::Default
33
+ end
34
+
35
+ private
36
+
37
+ def node_parser
38
+ @node_parser ||= node_parser_class.new(fields)
39
+ end
40
+
41
+ def fields
42
+ self.class.fields.map { |f| [f.name, f.type, f.xpath] }
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/html/base'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Html
8
+ class Item < Base
9
+ def data
10
+ @data ||= node_parser.parse(item_node)
11
+ end
12
+
13
+ def item_node
14
+ @item_node ||= begin
15
+ r = item_xpath ? nokogiri.at_xpath(item_xpath) : nokogiri
16
+ raise "Item node not found (Item xpath: #{item_xpath})" unless r
17
+
18
+ r
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/html/base'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Html
8
+ class ItemList < Base
9
+ def data
10
+ items_data
11
+ end
12
+
13
+ def items_data
14
+ count = 0
15
+ @data ||= nokogiri.xpath(items_xpath).map do |m|
16
+ count += 1
17
+ node_parser.parse(m)
18
+ end
19
+ rescue StandardError => e
20
+ raise StandardError, "#{e.message} (Count: #{count})"
21
+ end
22
+
23
+ def items_xpath
24
+ raise "Class #{self.class} has no method \"#{__method__}\". Implement it"
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/require_sub'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Html
8
+ module Node
9
+ ::EacRubyUtils.require_sub __FILE__
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aranha
4
+ module Parsers
5
+ module Html
6
+ module Node
7
+ class Base
8
+ attr_reader :fields
9
+
10
+ def initialize(fields)
11
+ @fields = fields
12
+ end
13
+
14
+ def parse(node)
15
+ fields.map do |f|
16
+ begin
17
+ [f[0], parse_field(node, f[2], f[1])]
18
+ rescue StandardError => e
19
+ raise StandardError, "#{e.message}\nFields: #{f}"
20
+ end
21
+ end.to_h
22
+ end
23
+
24
+ private
25
+
26
+ def parse_field(node, xpath, parser_method)
27
+ value_method = "#{parser_method}_value"
28
+ return send(value_method, node, xpath) if respond_to?(value_method)
29
+
30
+ raise "Method \"#{value_method}\" not found in #{self.class}"
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aranha/parsers/html/node/base'
4
+
5
+ module Aranha
6
+ module Parsers
7
+ module Html
8
+ module Node
9
+ class Default < ::Aranha::Parsers::Html::Node::Base
10
+ def string_value(node, xpath)
11
+ if node.at_xpath(xpath)
12
+ sanitize_string(node.at_xpath(xpath).text)
13
+ else
14
+ ''
15
+ end
16
+ end
17
+
18
+ def string_recursive_value(node, xpath, required = true)
19
+ root = node.at_xpath(xpath)
20
+ if root.blank?
21
+ return nil unless required
22
+
23
+ raise "No node found (Xpath: #{xpath})"
24
+ end
25
+ result = string_recursive(root)
26
+ return result if result.present?
27
+ return nil unless required
28
+
29
+ raise "String blank (Xpath: #{xpath})"
30
+ end
31
+
32
+ def string_recursive_optional_value(node, xpath)
33
+ string_recursive_value(node, xpath, false)
34
+ end
35
+
36
+ def quoted_value(node, xpath)
37
+ s = string_value(node, xpath)
38
+ return '' unless s
39
+
40
+ m = /\"([^\"]+)\"/.match(s)
41
+ return m[1] if m
42
+
43
+ ''
44
+ end
45
+
46
+ def integer_value(node, xpath)
47
+ r = string_value(node, xpath)
48
+ return nil if r.blank?
49
+
50
+ m = /\d+/.match(r)
51
+ raise "Integer not found in \"#{r}\"" unless m
52
+
53
+ m[0].to_i
54
+ end
55
+
56
+ def integer_optional_value(node, xpath)
57
+ r = string_value(node, xpath)
58
+ m = /\d+/.match(r)
59
+ m ? m[0].to_i : nil
60
+ end
61
+
62
+ def float_value(node, xpath)
63
+ parse_float(node, xpath, true)
64
+ end
65
+
66
+ def float_optional_value(node, xpath)
67
+ parse_float(node, xpath, false)
68
+ end
69
+
70
+ def array_value(node, xpath)
71
+ r = node.xpath(xpath).map { |n| n.text.strip }
72
+ r.join('|')
73
+ end
74
+
75
+ def join_value(node, xpath)
76
+ m = ''
77
+ node.xpath(xpath).each do |n|
78
+ m << n.text.strip
79
+ end
80
+ m
81
+ end
82
+
83
+ def duration_value(node, xpath)
84
+ m = /(\d+) m/.match(join_value(node, xpath))
85
+ m ? m[1].to_i : nil
86
+ end
87
+
88
+ def regxep(node, xpath, pattern)
89
+ s = string_value(node, xpath)
90
+ m = pattern.match(s)
91
+ return m if m
92
+
93
+ raise "Pattern \"#{pattern}\" not found in string \"#{s}\""
94
+ end
95
+
96
+ private
97
+
98
+ def parse_float(node, xpath, required)
99
+ s = string_value(node, xpath)
100
+ m = /\d+(?:[\.\,](\d+))?/.match(s)
101
+ if m
102
+ m[0].delete('.').tr(',', '.').to_f
103
+ elsif required
104
+ raise "Float value not found in \"#{s}\""
105
+ end
106
+ end
107
+
108
+ def sanitize_string(obj)
109
+ obj.to_s.tr("\u00A0", ' ').strip
110
+ end
111
+
112
+ def string_recursive(node)
113
+ return sanitize_string(node.text) if node.is_a?(::Nokogiri::XML::Text)
114
+
115
+ s = ''
116
+ node.children.each do |child|
117
+ child_s = string_recursive(child)
118
+ s += ' ' + child_s if child_s.present?
119
+ end
120
+ sanitize_string(s)
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end