ehbrs_ruby_utils 0.35.0 → 0.36.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ehbrs_ruby_utils/fs/to_file_format.rb +83 -0
- data/lib/ehbrs_ruby_utils/fs/to_utf8_unix.rb +60 -0
- data/lib/ehbrs_ruby_utils/fs/to_windows_pt_br.rb +59 -0
- data/lib/ehbrs_ruby_utils/version.rb +1 -1
- data/lib/ehbrs_ruby_utils/videos/series/rename/line_result_group.rb +1 -1
- data/lib/ehbrs_ruby_utils/videos/series/rename/season_group.rb +1 -1
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize/content_sanitizer.rb +87 -0
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher.rb +20 -0
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher.rb +20 -0
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize.rb +42 -0
- metadata +29 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50cae2381085d9ecdd71d98619b6c8cb7927ee977c1d856d604edddd56ccb39a
|
4
|
+
data.tar.gz: 9ef89d0c3e2927f842c4f8d18e597470fb1452ae28b9d1efc975544375eb6761
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b3cc8c03801f13eb8a7a4e729fe8a67d13102e667fe98c8997f1f71ad1d44e4621f59204a992fe0072b144e0f0b1b8400fc889550cd452cd5f2624f91329fb5d
|
7
|
+
data.tar.gz: 3a965527bef8c654a67ce2f05b24ad40d6ef8ed4da58aa86543c591685c84e66871e874a5e92f6ab4288271db23befb46390dde3058a9bbb03175d5117956b84
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_fs/patches'
|
4
|
+
require 'eac_ruby_utils/core_ext'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Fs
|
8
|
+
class ToFileFormat
|
9
|
+
enable_abstract_methods
|
10
|
+
enable_simple_cache
|
11
|
+
abstract_methods :convert, :convert?, :target_encoding
|
12
|
+
|
13
|
+
common_constructor(:file) do
|
14
|
+
self.file = file.to_pathname
|
15
|
+
end
|
16
|
+
|
17
|
+
class << self
|
18
|
+
def convert_self(file)
|
19
|
+
new(file).run
|
20
|
+
end
|
21
|
+
|
22
|
+
def convert_to_file(source, target)
|
23
|
+
::FileUtils.cp(source, target)
|
24
|
+
convert_self(target)
|
25
|
+
end
|
26
|
+
|
27
|
+
def convert_to_string(source)
|
28
|
+
::EacRubyUtils::Fs::Temp.on_file do |target|
|
29
|
+
convert_to_file(source, target)
|
30
|
+
target.open('rb', &:read)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def run
|
36
|
+
return false unless convert?
|
37
|
+
|
38
|
+
convert
|
39
|
+
true
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
def convert_to_target_encoding
|
45
|
+
::EacRubyUtils::Fs::Temp.on_file do |temp|
|
46
|
+
::EacRubyUtils::Envs.local.command(
|
47
|
+
'iconv', '-c', '-f', source_encoding, '-t', target_encoding, '-o', temp, file
|
48
|
+
).execute!
|
49
|
+
::FileUtils.mv(temp, file)
|
50
|
+
end
|
51
|
+
reset_cache
|
52
|
+
end
|
53
|
+
|
54
|
+
def crlf?
|
55
|
+
file_type?('CRLF')
|
56
|
+
end
|
57
|
+
|
58
|
+
def file_info_uncached
|
59
|
+
::EacFs::FileInfo.new(file)
|
60
|
+
end
|
61
|
+
|
62
|
+
def file_type?(*include)
|
63
|
+
return false unless ::File.file?(file)
|
64
|
+
|
65
|
+
include.any? { |i| file_type.include?(i) }
|
66
|
+
end
|
67
|
+
|
68
|
+
def file_type_uncached
|
69
|
+
::EacRubyUtils::Envs.local.command('file', '-b', file).execute!.strip
|
70
|
+
end
|
71
|
+
|
72
|
+
def source_encoding
|
73
|
+
r = file.info.charset
|
74
|
+
r = 'iso-8859-15' if r == 'unknown-8bit'
|
75
|
+
r
|
76
|
+
end
|
77
|
+
|
78
|
+
def text?
|
79
|
+
file.info.content_type.type == 'text'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
4
|
+
|
5
|
+
module EhbrsRubyUtils
|
6
|
+
module Fs
|
7
|
+
class ToUtf8Unix < ::EhbrsRubyUtils::Fs::ToFileFormat
|
8
|
+
UTF8_ENCODINGS = %w[us-ascii utf-8].freeze
|
9
|
+
ISO885915_ENCODINGS = %w[iso-8859-1].freeze
|
10
|
+
|
11
|
+
protected
|
12
|
+
|
13
|
+
def convert
|
14
|
+
check_utf8
|
15
|
+
check_crlf
|
16
|
+
end
|
17
|
+
|
18
|
+
def check_utf8
|
19
|
+
return if utf8?
|
20
|
+
|
21
|
+
convert_to_target_encoding
|
22
|
+
reset_cache
|
23
|
+
end
|
24
|
+
|
25
|
+
def file_attr(option)
|
26
|
+
::EacRubyUtils::Envs.local.command('file', '--brief', option, file).execute!
|
27
|
+
end
|
28
|
+
|
29
|
+
def mime_encoding_uncached
|
30
|
+
file_attr('--mime-encoding')
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_crlf
|
34
|
+
return unless crlf?
|
35
|
+
|
36
|
+
convert_crlf
|
37
|
+
end
|
38
|
+
|
39
|
+
def convert_crlf
|
40
|
+
::EacRubyUtils::Envs.local.command('dos2unix', file).execute!
|
41
|
+
end
|
42
|
+
|
43
|
+
def convert?
|
44
|
+
text? && (!utf8? || crlf?)
|
45
|
+
end
|
46
|
+
|
47
|
+
def utf8?
|
48
|
+
UTF8_ENCODINGS.include?(mime_encoding)
|
49
|
+
end
|
50
|
+
|
51
|
+
def iso885915?
|
52
|
+
ISO885915_ENCODINGS.include?(mime_encoding)
|
53
|
+
end
|
54
|
+
|
55
|
+
def target_encoding
|
56
|
+
'utf-8'
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Fs
|
8
|
+
class ToWindowsPtBr < ::EhbrsRubyUtils::Fs::ToFileFormat
|
9
|
+
TARGET_CHARSETS = %w[ISO-8859].freeze
|
10
|
+
ICONV_TO = 'ISO-8859-1'
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
def convert
|
15
|
+
check_bom
|
16
|
+
check_target_charset
|
17
|
+
check_crlf
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def check_bom
|
23
|
+
::EacRubyUtils::Envs.local.command(
|
24
|
+
'sed', '-i', '1s/^\\xEF\\xBB\\xBF//', file
|
25
|
+
).system!
|
26
|
+
reset_cache
|
27
|
+
end
|
28
|
+
|
29
|
+
def check_target_charset
|
30
|
+
return if target_charset?
|
31
|
+
|
32
|
+
convert_to_target_encoding
|
33
|
+
end
|
34
|
+
|
35
|
+
def check_crlf
|
36
|
+
return if crlf?
|
37
|
+
|
38
|
+
convert_crlf
|
39
|
+
end
|
40
|
+
|
41
|
+
def convert_crlf
|
42
|
+
::EacRubyUtils::Envs.local.command('unix2dos', file).execute!
|
43
|
+
reset_cache
|
44
|
+
end
|
45
|
+
|
46
|
+
def convert?
|
47
|
+
text? && (!target_charset? || !crlf?)
|
48
|
+
end
|
49
|
+
|
50
|
+
def target_charset?
|
51
|
+
file_type?(*TARGET_CHARSETS)
|
52
|
+
end
|
53
|
+
|
54
|
+
def target_encoding
|
55
|
+
ICONV_TO
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher'
|
6
|
+
require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher'
|
7
|
+
require 'srt'
|
8
|
+
|
9
|
+
module EhbrsRubyUtils
|
10
|
+
module Videos
|
11
|
+
module Subtitles
|
12
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
13
|
+
class ContentSanitizer
|
14
|
+
class << self
|
15
|
+
def build_pattern(slim, elim)
|
16
|
+
/#{::Regexp.quote(slim)}[^#{::Regexp.quote(elim)}]*#{::Regexp.quote(elim)}/
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
REMOVE_PATTERNS = [%w[< >], %w[( )], ['[', ']']].map do |args|
|
21
|
+
build_pattern(*args)
|
22
|
+
end.freeze
|
23
|
+
REMOVE_TERMS = %w[subtitle osdb legenda @ united4ever unitedteam pt-subs capejuna maniacs
|
24
|
+
|]
|
25
|
+
.map(&:downcase)
|
26
|
+
|
27
|
+
common_constructor :input
|
28
|
+
|
29
|
+
def line_processors
|
30
|
+
REMOVE_PATTERNS.map do |pattern|
|
31
|
+
::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithPatternMatcher.new(pattern)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def output
|
36
|
+
output_lines.join("\n")
|
37
|
+
end
|
38
|
+
|
39
|
+
def output_lines
|
40
|
+
r = []
|
41
|
+
last_output_line = nil
|
42
|
+
::SRT::File.parse_string(input).lines.each do |input_line|
|
43
|
+
output_line(input_line, last_output_line).if_present do |v|
|
44
|
+
r << v
|
45
|
+
last_output_line = v
|
46
|
+
end
|
47
|
+
end
|
48
|
+
r
|
49
|
+
end
|
50
|
+
|
51
|
+
def output_line(input_line, last_output_line)
|
52
|
+
text = output_line_text(input_line.text)
|
53
|
+
return nil if text.blank?
|
54
|
+
|
55
|
+
r = input_line.dup
|
56
|
+
r.sequence = last_output_line.if_present(1) { |v| v.sequence + 1 }
|
57
|
+
r.text = text
|
58
|
+
r
|
59
|
+
end
|
60
|
+
|
61
|
+
def output_line_text(text)
|
62
|
+
text_processors.each do |term|
|
63
|
+
text = term.process(text)
|
64
|
+
return nil if text.blank?
|
65
|
+
end
|
66
|
+
|
67
|
+
text.map { |line| process_line(line) }.compact_blank
|
68
|
+
end
|
69
|
+
|
70
|
+
def process_line(line)
|
71
|
+
remove_tags(line)
|
72
|
+
end
|
73
|
+
|
74
|
+
def remove_tags(line)
|
75
|
+
line_processors.inject(line) { |a, e| e.process(a) }.strip
|
76
|
+
end
|
77
|
+
|
78
|
+
def text_processors
|
79
|
+
REMOVE_TERMS.map do |term|
|
80
|
+
::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithTermMatcher.new(term)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Videos
|
8
|
+
module Subtitles
|
9
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
10
|
+
class WithPatternMatcher
|
11
|
+
common_constructor :pattern
|
12
|
+
|
13
|
+
def process(line)
|
14
|
+
line.gsub(pattern, '')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Videos
|
8
|
+
module Subtitles
|
9
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
10
|
+
class WithTermMatcher
|
11
|
+
common_constructor :term
|
12
|
+
|
13
|
+
def process(lines)
|
14
|
+
lines.map(&:downcase).any? { |line| line.include?(term) } ? nil : lines
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'avm/file_formats/utf8_assert'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
require 'ehbrs_ruby_utils/fs/to_windows_pt_br'
|
6
|
+
|
7
|
+
module EhbrsRubyUtils
|
8
|
+
module Videos
|
9
|
+
module Subtitles
|
10
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
11
|
+
require_sub __FILE__
|
12
|
+
|
13
|
+
def run
|
14
|
+
sanitize_content
|
15
|
+
convert_to_windows_ptbr
|
16
|
+
end
|
17
|
+
|
18
|
+
def subtitle?
|
19
|
+
text? && file.extname == '.srt'
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def convert_to_windows_ptbr
|
25
|
+
::EhbrsRubyUtils::Fs::ToWindowsPtBr.convert_self(file)
|
26
|
+
end
|
27
|
+
|
28
|
+
def sanitize_content
|
29
|
+
::Avm::FileFormats::Utf8Assert.assert_files([file]) do
|
30
|
+
sanitize_content_on_utf8
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def sanitize_content_on_utf8
|
35
|
+
input = file.read
|
36
|
+
output = ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::ContentSanitizer.new(input).output
|
37
|
+
file.write(output) if input != output
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ehbrs_ruby_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.36.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aranha
|
@@ -154,6 +154,26 @@ dependencies:
|
|
154
154
|
- - "~>"
|
155
155
|
- !ruby/object:Gem::Version
|
156
156
|
version: '0.5'
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
name: srt
|
159
|
+
requirement: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
161
|
+
- - "~>"
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0.1'
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 0.1.5
|
167
|
+
type: :runtime
|
168
|
+
prerelease: false
|
169
|
+
version_requirements: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.1'
|
174
|
+
- - ">="
|
175
|
+
- !ruby/object:Gem::Version
|
176
|
+
version: 0.1.5
|
157
177
|
- !ruby/object:Gem::Dependency
|
158
178
|
name: taglib-ruby
|
159
179
|
requirement: !ruby/object:Gem::Requirement
|
@@ -289,6 +309,9 @@ files:
|
|
289
309
|
- lib/ehbrs_ruby_utils/fs/selected.rb
|
290
310
|
- lib/ehbrs_ruby_utils/fs/selected/build.rb
|
291
311
|
- lib/ehbrs_ruby_utils/fs/selected/build_file.rb
|
312
|
+
- lib/ehbrs_ruby_utils/fs/to_file_format.rb
|
313
|
+
- lib/ehbrs_ruby_utils/fs/to_utf8_unix.rb
|
314
|
+
- lib/ehbrs_ruby_utils/fs/to_windows_pt_br.rb
|
292
315
|
- lib/ehbrs_ruby_utils/gjt1.rb
|
293
316
|
- lib/ehbrs_ruby_utils/gjt1/manager.rb
|
294
317
|
- lib/ehbrs_ruby_utils/mudslide.rb
|
@@ -341,6 +364,10 @@ files:
|
|
341
364
|
- lib/ehbrs_ruby_utils/videos/series/rename/results_builder.rb
|
342
365
|
- lib/ehbrs_ruby_utils/videos/series/rename/season_group.rb
|
343
366
|
- lib/ehbrs_ruby_utils/videos/stream.rb
|
367
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize.rb
|
368
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize/content_sanitizer.rb
|
369
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher.rb
|
370
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher.rb
|
344
371
|
- lib/ehbrs_ruby_utils/web_utils.rb
|
345
372
|
- lib/ehbrs_ruby_utils/web_utils/instance.rb
|
346
373
|
- lib/ehbrs_ruby_utils/web_utils/instance/finances.rb
|