ehbrs_ruby_utils 0.35.0 → 0.36.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 763112ae8325274f9c7a277a78e6984bb2c3e89632214f89d3bc3bced8e1acde
4
- data.tar.gz: 61448a174beb1981acaa3615a923a61fc39c9bd1c615839abef486a8c716b2a4
3
+ metadata.gz: a51803bc2fc98c7439c7e0e8808e00866d7130f898e1f9738c1c9f1483b326aa
4
+ data.tar.gz: a4146dc9718ed5356187a725e1537895c679301ca63d67619f9f3edaa8374223
5
5
  SHA512:
6
- metadata.gz: 7a6948ad99b05f1f9742c1a79391ea3a8f7138420efaa6b6f4b966b86d3460f74e01027311170ba0b051a7ac66e27f7c08998344374bded000cb9ba0d93a5a31
7
- data.tar.gz: 6515790504ba6e0c848f0c0941de5f3f5fd6f1b9bb396e356af85ea66957f857bfc3865b7b16258d2d5c54fffb55dac5439944884920e4e236766cba3407ff29
6
+ metadata.gz: 91760450578b621f60892d253f686ee1c5f8662c70a0177bace04b9f3cead3b4148999f0c8b7492f0de473be71e15e52e79d58957fd7fbcc04f59c939105e7ae
7
+ data.tar.gz: 2b9b4af1d0b6a47e99930adc678487721da804b6978d23700d5c33f2cdb04bf5757401c598c4ddc06212309c2bb3ac68bded943aaf4365a618bc169e4217ddac
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_fs/patches'
4
+ require 'eac_ruby_utils/core_ext'
5
+
6
+ module EhbrsRubyUtils
7
+ module Fs
8
+ class ToFileFormat
9
+ enable_abstract_methods
10
+ enable_simple_cache
11
+ abstract_methods :convert, :convert?, :target_encoding
12
+
13
+ common_constructor(:file) do
14
+ self.file = file.to_pathname
15
+ end
16
+
17
+ class << self
18
+ def convert_self(file)
19
+ new(file).run
20
+ end
21
+
22
+ def convert_to_file(source, target)
23
+ ::FileUtils.cp(source, target)
24
+ convert_self(target)
25
+ end
26
+
27
+ def convert_to_string(source)
28
+ ::EacRubyUtils::Fs::Temp.on_file do |target|
29
+ convert_to_file(source, target)
30
+ target.open('rb', &:read)
31
+ end
32
+ end
33
+ end
34
+
35
+ def run
36
+ return false unless convert?
37
+
38
+ convert
39
+ true
40
+ end
41
+
42
+ protected
43
+
44
+ def convert_to_target_encoding
45
+ ::EacRubyUtils::Fs::Temp.on_file do |temp|
46
+ ::EacRubyUtils::Envs.local.command(
47
+ 'iconv', '-c', '-f', source_encoding, '-t', target_encoding, '-o', temp, file
48
+ ).execute!
49
+ ::FileUtils.mv(temp, file)
50
+ end
51
+ reset_cache
52
+ end
53
+
54
+ def crlf?
55
+ file_type?('CRLF')
56
+ end
57
+
58
+ def file_info_uncached
59
+ ::EacFs::FileInfo.new(file)
60
+ end
61
+
62
+ def file_type?(*include)
63
+ return false unless ::File.file?(file)
64
+
65
+ include.any? { |i| file_type.include?(i) }
66
+ end
67
+
68
+ def file_type_uncached
69
+ ::EacRubyUtils::Envs.local.command('file', '-b', file).execute!.strip
70
+ end
71
+
72
+ def source_encoding
73
+ r = file.info.charset
74
+ r = 'iso-8859-15' if r == 'unknown-8bit'
75
+ r
76
+ end
77
+
78
+ def text?
79
+ file.info.content_type.type == 'text'
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ehbrs_ruby_utils/fs/to_file_format'
4
+
5
+ module EhbrsRubyUtils
6
+ module Fs
7
+ class ToUtf8Unix < ::EhbrsRubyUtils::Fs::ToFileFormat
8
+ UTF8_ENCODINGS = %w[us-ascii utf-8].freeze
9
+ ISO885915_ENCODINGS = %w[iso-8859-1].freeze
10
+
11
+ protected
12
+
13
+ def convert
14
+ check_utf8
15
+ check_crlf
16
+ end
17
+
18
+ def check_utf8
19
+ return if utf8?
20
+
21
+ convert_to_target_encoding
22
+ reset_cache
23
+ end
24
+
25
+ def file_attr(option)
26
+ ::EacRubyUtils::Envs.local.command('file', '--brief', option, file).execute!
27
+ end
28
+
29
+ def mime_encoding_uncached
30
+ file_attr('--mime-encoding')
31
+ end
32
+
33
+ def check_crlf
34
+ return unless crlf?
35
+
36
+ convert_crlf
37
+ end
38
+
39
+ def convert_crlf
40
+ ::EacRubyUtils::Envs.local.command('dos2unix', file).execute!
41
+ end
42
+
43
+ def convert?
44
+ text? && (!utf8? || crlf?)
45
+ end
46
+
47
+ def utf8?
48
+ UTF8_ENCODINGS.include?(mime_encoding)
49
+ end
50
+
51
+ def iso885915?
52
+ ISO885915_ENCODINGS.include?(mime_encoding)
53
+ end
54
+
55
+ def target_encoding
56
+ 'utf-8'
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+
6
+ module EhbrsRubyUtils
7
+ module Fs
8
+ class ToWindowsPtBr < ::EhbrsRubyUtils::Fs::ToFileFormat
9
+ TARGET_CHARSETS = %w[ISO-8859].freeze
10
+ ICONV_TO = 'ISO-8859-1'
11
+
12
+ protected
13
+
14
+ def convert
15
+ check_bom
16
+ check_target_charset
17
+ check_crlf
18
+ end
19
+
20
+ private
21
+
22
+ def check_bom
23
+ ::EacRubyUtils::Envs.local.command(
24
+ 'sed', '-i', '1s/^\\xEF\\xBB\\xBF//', file
25
+ ).system!
26
+ reset_cache
27
+ end
28
+
29
+ def check_target_charset
30
+ return if target_charset?
31
+
32
+ convert_to_target_encoding
33
+ end
34
+
35
+ def check_crlf
36
+ return if crlf?
37
+
38
+ convert_crlf
39
+ end
40
+
41
+ def convert_crlf
42
+ ::EacRubyUtils::Envs.local.command('unix2dos', file).execute!
43
+ reset_cache
44
+ end
45
+
46
+ def convert?
47
+ text? && (!target_charset? || !crlf?)
48
+ end
49
+
50
+ def target_charset?
51
+ file_type?(*TARGET_CHARSETS)
52
+ end
53
+
54
+ def target_encoding
55
+ ICONV_TO
56
+ end
57
+ end
58
+ end
59
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EhbrsRubyUtils
4
- VERSION = '0.35.0'
4
+ VERSION = '0.36.0'
5
5
  end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+ require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher'
6
+ require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher'
7
+ require 'srt'
8
+
9
+ module EhbrsRubyUtils
10
+ module Videos
11
+ module Subtitles
12
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
13
+ class ContentSanitizer
14
+ class << self
15
+ def build_pattern(slim, elim)
16
+ /#{::Regexp.quote(slim)}[^#{::Regexp.quote(elim)}]*#{::Regexp.quote(elim)}/
17
+ end
18
+ end
19
+
20
+ REMOVE_PATTERNS = [%w[< >], %w[( )], ['[', ']']].map do |args|
21
+ build_pattern(*args)
22
+ end.freeze
23
+ REMOVE_TERMS = %w[subtitle osdb legenda @ united4ever unitedteam pt-subs capejuna maniacs
24
+ |]
25
+ .map(&:downcase)
26
+
27
+ common_constructor :input
28
+
29
+ def line_processors
30
+ REMOVE_PATTERNS.map do |pattern|
31
+ ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithPatternMatcher.new(pattern)
32
+ end
33
+ end
34
+
35
+ def output
36
+ output_lines.join("\n")
37
+ end
38
+
39
+ def output_lines
40
+ r = []
41
+ last_output_line = nil
42
+ ::SRT::File.parse_string(input).lines.each do |input_line|
43
+ output_line(input_line, last_output_line).if_present do |v|
44
+ r << v
45
+ last_output_line = v
46
+ end
47
+ end
48
+ r
49
+ end
50
+
51
+ def output_line(input_line, last_output_line)
52
+ text = output_line_text(input_line.text)
53
+ return nil if text.blank?
54
+
55
+ r = input_line.dup
56
+ r.sequence = last_output_line.if_present(1) { |v| v.sequence + 1 }
57
+ r.text = text
58
+ r
59
+ end
60
+
61
+ def output_line_text(text)
62
+ text_processors.each do |term|
63
+ text = term.process(text)
64
+ return nil if text.blank?
65
+ end
66
+
67
+ text.map { |line| process_line(line) }.compact_blank
68
+ end
69
+
70
+ def process_line(line)
71
+ remove_tags(line)
72
+ end
73
+
74
+ def remove_tags(line)
75
+ line_processors.inject(line) { |a, e| e.process(a) }.strip
76
+ end
77
+
78
+ def text_processors
79
+ REMOVE_TERMS.map do |term|
80
+ ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithTermMatcher.new(term)
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+
6
+ module EhbrsRubyUtils
7
+ module Videos
8
+ module Subtitles
9
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
10
+ class WithPatternMatcher
11
+ common_constructor :pattern
12
+
13
+ def process(line)
14
+ line.gsub(pattern, '')
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eac_ruby_utils/core_ext'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+
6
+ module EhbrsRubyUtils
7
+ module Videos
8
+ module Subtitles
9
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
10
+ class WithTermMatcher
11
+ common_constructor :term
12
+
13
+ def process(lines)
14
+ lines.map(&:downcase).any? { |line| line.include?(term) } ? nil : lines
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avm/file_formats/utf8_assert'
4
+ require 'ehbrs_ruby_utils/fs/to_file_format'
5
+ require 'ehbrs_ruby_utils/fs/to_windows_pt_br'
6
+
7
+ module EhbrsRubyUtils
8
+ module Videos
9
+ module Subtitles
10
+ class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
11
+ require_sub __FILE__
12
+
13
+ def run
14
+ sanitize_content
15
+ convert_to_windows_ptbr
16
+ end
17
+
18
+ def subtitle?
19
+ text? && file.extname == '.srt'
20
+ end
21
+
22
+ private
23
+
24
+ def convert_to_windows_ptbr
25
+ ::EhbrsRubyUtils::Fs::ToWindowsPtBr.convert_self(file)
26
+ end
27
+
28
+ def sanitize_content
29
+ ::Avm::FileFormats::Utf8Assert.assert_files([file]) do
30
+ sanitize_content_on_utf8
31
+ end
32
+ end
33
+
34
+ def sanitize_content_on_utf8
35
+ input = file.read
36
+ output = ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::ContentSanitizer.new(input).output
37
+ file.write(output) if input != output
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ehbrs_ruby_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.35.0
4
+ version: 0.36.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo H. Bogoni
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-28 00:00:00.000000000 Z
11
+ date: 2023-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aranha
@@ -154,6 +154,26 @@ dependencies:
154
154
  - - "~>"
155
155
  - !ruby/object:Gem::Version
156
156
  version: '0.5'
157
+ - !ruby/object:Gem::Dependency
158
+ name: srt
159
+ requirement: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - "~>"
162
+ - !ruby/object:Gem::Version
163
+ version: '0.1'
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: 0.1.5
167
+ type: :runtime
168
+ prerelease: false
169
+ version_requirements: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '0.1'
174
+ - - ">="
175
+ - !ruby/object:Gem::Version
176
+ version: 0.1.5
157
177
  - !ruby/object:Gem::Dependency
158
178
  name: taglib-ruby
159
179
  requirement: !ruby/object:Gem::Requirement
@@ -289,6 +309,9 @@ files:
289
309
  - lib/ehbrs_ruby_utils/fs/selected.rb
290
310
  - lib/ehbrs_ruby_utils/fs/selected/build.rb
291
311
  - lib/ehbrs_ruby_utils/fs/selected/build_file.rb
312
+ - lib/ehbrs_ruby_utils/fs/to_file_format.rb
313
+ - lib/ehbrs_ruby_utils/fs/to_utf8_unix.rb
314
+ - lib/ehbrs_ruby_utils/fs/to_windows_pt_br.rb
292
315
  - lib/ehbrs_ruby_utils/gjt1.rb
293
316
  - lib/ehbrs_ruby_utils/gjt1/manager.rb
294
317
  - lib/ehbrs_ruby_utils/mudslide.rb
@@ -341,6 +364,10 @@ files:
341
364
  - lib/ehbrs_ruby_utils/videos/series/rename/results_builder.rb
342
365
  - lib/ehbrs_ruby_utils/videos/series/rename/season_group.rb
343
366
  - lib/ehbrs_ruby_utils/videos/stream.rb
367
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize.rb
368
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize/content_sanitizer.rb
369
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher.rb
370
+ - lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher.rb
344
371
  - lib/ehbrs_ruby_utils/web_utils.rb
345
372
  - lib/ehbrs_ruby_utils/web_utils/instance.rb
346
373
  - lib/ehbrs_ruby_utils/web_utils/instance/finances.rb