ehbrs_ruby_utils 0.35.0 → 0.36.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 763112ae8325274f9c7a277a78e6984bb2c3e89632214f89d3bc3bced8e1acde
4
- data.tar.gz: 61448a174beb1981acaa3615a923a61fc39c9bd1c615839abef486a8c716b2a4
3
+ metadata.gz: 50cae2381085d9ecdd71d98619b6c8cb7927ee977c1d856d604edddd56ccb39a
4
+ data.tar.gz: 9ef89d0c3e2927f842c4f8d18e597470fb1452ae28b9d1efc975544375eb6761
5
5
  SHA512:
6
- metadata.gz: 7a6948ad99b05f1f9742c1a79391ea3a8f7138420efaa6b6f4b966b86d3460f74e01027311170ba0b051a7ac66e27f7c08998344374bded000cb9ba0d93a5a31
7
- data.tar.gz: 6515790504ba6e0c848f0c0941de5f3f5fd6f1b9bb396e356af85ea66957f857bfc3865b7b16258d2d5c54fffb55dac5439944884920e4e236766cba3407ff29
6
+ metadata.gz: b3cc8c03801f13eb8a7a4e729fe8a67d13102e667fe98c8997f1f71ad1d44e4621f59204a992fe0072b144e0f0b1b8400fc889550cd452cd5f2624f91329fb5d
7
+ data.tar.gz: 3a965527bef8c654a67ce2f05b24ad40d6ef8ed4da58aa86543c591685c84e66871e874a5e92f6ab4288271db23befb46390dde3058a9bbb03175d5117956b84
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_fs/patches'
4
+ require 'eac_ruby_utils/core_ext'
5
+
6
+ module EhbrsRubyUtils
7
+ module Fs
8
+ class ToFileFormat
9
+ enable_abstract_methods
10
+ enable_simple_cache
11
+ abstract_methods :convert, :convert?, :target_encoding
12
+
13
+ common_constructor(:file) do
14
+ self.file = file.to_pathname
15
+ end
16
+
17
+ class << self
18
+ def convert_self(file)
19
+ new(file).run
20
+ end
21
+
22
+ def convert_to_file(source, target)
23
+ ::FileUtils.cp(source, target)
24
+ convert_self(target)
25
+ end
26
+
27
+ def convert_to_string(source)
28
+ ::EacRubyUtils::Fs::Temp.on_file do |target|
29
+ convert_to_file(source, target)
30
+ target.open('rb', &:read)
31
+ end
32
+ end
33
+ end
34
+
35
+ def run
36
+ return false unless convert?
37
+
38
+ convert
39
+ true
40
+ end
41
+
42
+ protected
43
+
44
+ def convert_to_target_encoding
45
+ ::EacRubyUtils::Fs::Temp.on_file do |temp|
46
+ ::EacRubyUtils::Envs.local.command(
47
+ 'iconv', '-c', '-f', source_encoding, '-t', target_encoding, '-o', temp, file
48
+ ).execute!
49
+ ::FileUtils.mv(temp, file)
50
+ end
51
+ reset_cache
52
+ end
53
+
54
+ def crlf?
55
+ file_type?('CRLF')
56
+ end
57
+
58
+ def file_info_uncached
59
+ ::EacFs::FileInfo.new(file)
60
+ end
61
+
62
+ def file_type?(*include)
63
+ return false unless ::File.file?(file)
64
+
65
+ include.any? { |i| file_type.include?(i) }
66
+ end
67
+
68
+ def file_type_uncached
69
+ ::EacRubyUtils::Envs.local.command('file', '-b', file).execute!.strip
70
+ end
71
+
72
+ def source_encoding
73
+ r = file.info.charset
74
+ r = 'iso-8859-15' if r == 'unknown-8bit'
75
+ r
76
+ end
77
+
78
+ def text?
79
+ file.info.content_type.type == 'text'
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ehbrs_ruby_utils/fs/to_file_format'
4
+
5
+ module EhbrsRubyUtils
6
+ module Fs
7
+ class ToUtf8Unix < ::EhbrsRubyUtils::Fs::ToFileFormat
8
+ UTF8_ENCODINGS = %w[us-ascii utf-8].freeze
9
+ ISO885915_ENCODINGS = %w[iso-8859-1].freeze
10
+
11
+ protected
12
+
13
+ def convert
14
+ check_utf8
15
+ check_crlf
16
+ end
17
+
18
+ def check_utf8
19
+ return if utf8?
20
+
21
+ convert_to_target_encoding
22
+ reset_cache
23
+ end
24
+
25
+ def file_attr(option)
26
+ ::EacRubyUtils::Envs.local.command('file', '--brief', option, file).execute!
27
+ end
28
+
29
+ def mime_encoding_uncached
30
+ file_attr('--mime-encoding')
31
+ end
32
+
33
+ def check_crlf
34
+ return unless crlf?
35
+
36
+ convert_crlf
37
+ end
38
+
39
+ def convert_crlf
40
+ ::EacRubyUtils::Envs.local.command('dos2unix', file).execute!
41
+ end
42
+
43
+ def convert?
44
+ text? && (!utf8? || crlf?)
45
+ end
46
+
47
+ def utf8?
48
+ UTF8_ENCODINGS.include?(mime_encoding)
49
+ end
50
+
51
+ def iso885915?
52
+ ISO885915_ENCODINGS.include?(mime_encoding)
53
+ end
54
+
55
+ def target_encoding
56
+ 'utf-8'
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+
6
+ module EhbrsRubyUtils
7
+ module Fs
8
+ class ToWindowsPtBr < ::EhbrsRubyUtils::Fs::ToFileFormat
9
+ TARGET_CHARSETS = %w[ISO-8859].freeze
10
+ ICONV_TO = 'ISO-8859-1'
11
+
12
+ protected
13
+
14
+ def convert
15
+ check_bom
16
+ check_target_charset
17
+ check_crlf
18
+ end
19
+
20
+ private
21
+
22
+ def check_bom
23
+ ::EacRubyUtils::Envs.local.command(
24
+ 'sed', '-i', '1s/^\\xEF\\xBB\\xBF//', file
25
+ ).system!
26
+ reset_cache
27
+ end
28
+
29
+ def check_target_charset
30
+ return if target_charset?
31
+
32
+ convert_to_target_encoding
33
+ end
34
+
35
+ def check_crlf
36
+ return if crlf?
37
+
38
+ convert_crlf
39
+ end
40
+
41
+ def convert_crlf
42
+ ::EacRubyUtils::Envs.local.command('unix2dos', file).execute!
43
+ reset_cache
44
+ end
45
+
46
+ def convert?
47
+ text? && (!target_charset? || !crlf?)
48
+ end
49
+
50
+ def target_charset?
51
+ file_type?(*TARGET_CHARSETS)
52
+ end
53
+
54
+ def target_encoding
55
+ ICONV_TO
56
+ end
57
+ end
58
+ end
59
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EhbrsRubyUtils
4
- VERSION = '0.35.0'
4
+ VERSION = '0.36.1'
5
5
  end
@@ -10,7 +10,7 @@ module EhbrsRubyUtils
10
10
  attr_reader :name, :children
11
11
 
12
12
  def initialize(name, files)
13
- super
13
+ super()
14
14
  @name = name
15
15
  @children = build_children(files)
16
16
  end
@@ -10,7 +10,7 @@ module EhbrsRubyUtils
10
10
  attr_reader :season, :files
11
11
 
12
12
  def initialize(season, files)
13
- super
13
+ super()
14
14
  @season = season
15
15
  @files = files.sort_by { |f| [f.episode] }
16
16
  end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+ require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher'
6
+ require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher'
7
+ require 'srt'
8
+
9
+ module EhbrsRubyUtils
10
+ module Videos
11
+ module Subtitles
12
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
13
+ class ContentSanitizer
14
+ class << self
15
+ def build_pattern(slim, elim)
16
+ /#{::Regexp.quote(slim)}[^#{::Regexp.quote(elim)}]*#{::Regexp.quote(elim)}/
17
+ end
18
+ end
19
+
20
+ REMOVE_PATTERNS = [%w[< >], %w[( )], ['[', ']']].map do |args|
21
+ build_pattern(*args)
22
+ end.freeze
23
+ REMOVE_TERMS = %w[subtitle osdb legenda @ united4ever unitedteam pt-subs capejuna maniacs
24
+ |]
25
+ .map(&:downcase)
26
+
27
+ common_constructor :input
28
+
29
+ def line_processors
30
+ REMOVE_PATTERNS.map do |pattern|
31
+ ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithPatternMatcher.new(pattern)
32
+ end
33
+ end
34
+
35
+ def output
36
+ output_lines.join("\n")
37
+ end
38
+
39
+ def output_lines
40
+ r = []
41
+ last_output_line = nil
42
+ ::SRT::File.parse_string(input).lines.each do |input_line|
43
+ output_line(input_line, last_output_line).if_present do |v|
44
+ r << v
45
+ last_output_line = v
46
+ end
47
+ end
48
+ r
49
+ end
50
+
51
+ def output_line(input_line, last_output_line)
52
+ text = output_line_text(input_line.text)
53
+ return nil if text.blank?
54
+
55
+ r = input_line.dup
56
+ r.sequence = last_output_line.if_present(1) { |v| v.sequence + 1 }
57
+ r.text = text
58
+ r
59
+ end
60
+
61
+ def output_line_text(text)
62
+ text_processors.each do |term|
63
+ text = term.process(text)
64
+ return nil if text.blank?
65
+ end
66
+
67
+ text.map { |line| process_line(line) }.compact_blank
68
+ end
69
+
70
+ def process_line(line)
71
+ remove_tags(line)
72
+ end
73
+
74
+ def remove_tags(line)
75
+ line_processors.inject(line) { |a, e| e.process(a) }.strip
76
+ end
77
+
78
+ def text_processors
79
+ REMOVE_TERMS.map do |term|
80
+ ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithTermMatcher.new(term)
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+
6
+ module EhbrsRubyUtils
7
+ module Videos
8
+ module Subtitles
9
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
10
+ class WithPatternMatcher
11
+ common_constructor :pattern
12
+
13
+ def process(line)
14
+ line.gsub(pattern, '')
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+
6
+ module EhbrsRubyUtils
7
+ module Videos
8
+ module Subtitles
9
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
10
+ class WithTermMatcher
11
+ common_constructor :term
12
+
13
+ def process(lines)
14
+ lines.map(&:downcase).any? { |line| line.include?(term) } ? nil : lines
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avm/file_formats/utf8_assert'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+ require 'ehbrs_ruby_utils/fs/to_windows_pt_br'
6
+
7
+ module EhbrsRubyUtils
8
+ module Videos
9
+ module Subtitles
10
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
11
+ require_sub __FILE__
12
+
13
+ def run
14
+ sanitize_content
15
+ convert_to_windows_ptbr
16
+ end
17
+
18
+ def subtitle?
19
+ text? && file.extname == '.srt'
20
+ end
21
+
22
+ private
23
+
24
+ def convert_to_windows_ptbr
25
+ ::EhbrsRubyUtils::Fs::ToWindowsPtBr.convert_self(file)
26
+ end
27
+
28
+ def sanitize_content
29
+ ::Avm::FileFormats::Utf8Assert.assert_files([file]) do
30
+ sanitize_content_on_utf8
31
+ end
32
+ end
33
+
34
+ def sanitize_content_on_utf8
35
+ input = file.read
36
+ output = ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::ContentSanitizer.new(input).output
37
+ file.write(output) if input != output
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ehbrs_ruby_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.35.0
4
+ version: 0.36.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-28 00:00:00.000000000 Z
11
+ date: 2023-10-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aranha
@@ -154,6 +154,26 @@ dependencies:
154
154
  - - "~>"
155
155
  - !ruby/object:Gem::Version
156
156
  version: '0.5'
157
+ - !ruby/object:Gem::Dependency
158
+ name: srt
159
+ requirement: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - "~>"
162
+ - !ruby/object:Gem::Version
163
+ version: '0.1'
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: 0.1.5
167
+ type: :runtime
168
+ prerelease: false
169
+ version_requirements: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '0.1'
174
+ - - ">="
175
+ - !ruby/object:Gem::Version
176
+ version: 0.1.5
157
177
  - !ruby/object:Gem::Dependency
158
178
  name: taglib-ruby
159
179
  requirement: !ruby/object:Gem::Requirement
@@ -289,6 +309,9 @@ files:
289
309
  - lib/ehbrs_ruby_utils/fs/selected.rb
290
310
  - lib/ehbrs_ruby_utils/fs/selected/build.rb
291
311
  - lib/ehbrs_ruby_utils/fs/selected/build_file.rb
312
+ - lib/ehbrs_ruby_utils/fs/to_file_format.rb
313
+ - lib/ehbrs_ruby_utils/fs/to_utf8_unix.rb
314
+ - lib/ehbrs_ruby_utils/fs/to_windows_pt_br.rb
292
315
  - lib/ehbrs_ruby_utils/gjt1.rb
293
316
  - lib/ehbrs_ruby_utils/gjt1/manager.rb
294
317
  - lib/ehbrs_ruby_utils/mudslide.rb
@@ -341,6 +364,10 @@ files:
341
364
  - lib/ehbrs_ruby_utils/videos/series/rename/results_builder.rb
342
365
  - lib/ehbrs_ruby_utils/videos/series/rename/season_group.rb
343
366
  - lib/ehbrs_ruby_utils/videos/stream.rb
367
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize.rb
368
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize/content_sanitizer.rb
369
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher.rb
370
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher.rb
344
371
  - lib/ehbrs_ruby_utils/web_utils.rb
345
372
  - lib/ehbrs_ruby_utils/web_utils/instance.rb
346
373
  - lib/ehbrs_ruby_utils/web_utils/instance/finances.rb