ehbrs_ruby_utils 0.35.0 → 0.36.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ehbrs_ruby_utils/fs/to_file_format.rb +83 -0
- data/lib/ehbrs_ruby_utils/fs/to_utf8_unix.rb +60 -0
- data/lib/ehbrs_ruby_utils/fs/to_windows_pt_br.rb +59 -0
- data/lib/ehbrs_ruby_utils/version.rb +1 -1
- data/lib/ehbrs_ruby_utils/videos/series/rename/line_result_group.rb +1 -1
- data/lib/ehbrs_ruby_utils/videos/series/rename/season_group.rb +1 -1
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize/content_sanitizer.rb +87 -0
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher.rb +20 -0
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher.rb +20 -0
- data/lib/ehbrs_ruby_utils/videos/subtitles/sanitize.rb +42 -0
- metadata +29 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50cae2381085d9ecdd71d98619b6c8cb7927ee977c1d856d604edddd56ccb39a
|
4
|
+
data.tar.gz: 9ef89d0c3e2927f842c4f8d18e597470fb1452ae28b9d1efc975544375eb6761
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b3cc8c03801f13eb8a7a4e729fe8a67d13102e667fe98c8997f1f71ad1d44e4621f59204a992fe0072b144e0f0b1b8400fc889550cd452cd5f2624f91329fb5d
|
7
|
+
data.tar.gz: 3a965527bef8c654a67ce2f05b24ad40d6ef8ed4da58aa86543c591685c84e66871e874a5e92f6ab4288271db23befb46390dde3058a9bbb03175d5117956b84
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_fs/patches'
|
4
|
+
require 'eac_ruby_utils/core_ext'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Fs
|
8
|
+
class ToFileFormat
|
9
|
+
enable_abstract_methods
|
10
|
+
enable_simple_cache
|
11
|
+
abstract_methods :convert, :convert?, :target_encoding
|
12
|
+
|
13
|
+
common_constructor(:file) do
|
14
|
+
self.file = file.to_pathname
|
15
|
+
end
|
16
|
+
|
17
|
+
class << self
|
18
|
+
def convert_self(file)
|
19
|
+
new(file).run
|
20
|
+
end
|
21
|
+
|
22
|
+
def convert_to_file(source, target)
|
23
|
+
::FileUtils.cp(source, target)
|
24
|
+
convert_self(target)
|
25
|
+
end
|
26
|
+
|
27
|
+
def convert_to_string(source)
|
28
|
+
::EacRubyUtils::Fs::Temp.on_file do |target|
|
29
|
+
convert_to_file(source, target)
|
30
|
+
target.open('rb', &:read)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def run
|
36
|
+
return false unless convert?
|
37
|
+
|
38
|
+
convert
|
39
|
+
true
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
def convert_to_target_encoding
|
45
|
+
::EacRubyUtils::Fs::Temp.on_file do |temp|
|
46
|
+
::EacRubyUtils::Envs.local.command(
|
47
|
+
'iconv', '-c', '-f', source_encoding, '-t', target_encoding, '-o', temp, file
|
48
|
+
).execute!
|
49
|
+
::FileUtils.mv(temp, file)
|
50
|
+
end
|
51
|
+
reset_cache
|
52
|
+
end
|
53
|
+
|
54
|
+
def crlf?
|
55
|
+
file_type?('CRLF')
|
56
|
+
end
|
57
|
+
|
58
|
+
def file_info_uncached
|
59
|
+
::EacFs::FileInfo.new(file)
|
60
|
+
end
|
61
|
+
|
62
|
+
def file_type?(*include)
|
63
|
+
return false unless ::File.file?(file)
|
64
|
+
|
65
|
+
include.any? { |i| file_type.include?(i) }
|
66
|
+
end
|
67
|
+
|
68
|
+
def file_type_uncached
|
69
|
+
::EacRubyUtils::Envs.local.command('file', '-b', file).execute!.strip
|
70
|
+
end
|
71
|
+
|
72
|
+
def source_encoding
|
73
|
+
r = file.info.charset
|
74
|
+
r = 'iso-8859-15' if r == 'unknown-8bit'
|
75
|
+
r
|
76
|
+
end
|
77
|
+
|
78
|
+
def text?
|
79
|
+
file.info.content_type.type == 'text'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
4
|
+
|
5
|
+
module EhbrsRubyUtils
|
6
|
+
module Fs
|
7
|
+
class ToUtf8Unix < ::EhbrsRubyUtils::Fs::ToFileFormat
|
8
|
+
UTF8_ENCODINGS = %w[us-ascii utf-8].freeze
|
9
|
+
ISO885915_ENCODINGS = %w[iso-8859-1].freeze
|
10
|
+
|
11
|
+
protected
|
12
|
+
|
13
|
+
def convert
|
14
|
+
check_utf8
|
15
|
+
check_crlf
|
16
|
+
end
|
17
|
+
|
18
|
+
def check_utf8
|
19
|
+
return if utf8?
|
20
|
+
|
21
|
+
convert_to_target_encoding
|
22
|
+
reset_cache
|
23
|
+
end
|
24
|
+
|
25
|
+
def file_attr(option)
|
26
|
+
::EacRubyUtils::Envs.local.command('file', '--brief', option, file).execute!
|
27
|
+
end
|
28
|
+
|
29
|
+
def mime_encoding_uncached
|
30
|
+
file_attr('--mime-encoding')
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_crlf
|
34
|
+
return unless crlf?
|
35
|
+
|
36
|
+
convert_crlf
|
37
|
+
end
|
38
|
+
|
39
|
+
def convert_crlf
|
40
|
+
::EacRubyUtils::Envs.local.command('dos2unix', file).execute!
|
41
|
+
end
|
42
|
+
|
43
|
+
def convert?
|
44
|
+
text? && (!utf8? || crlf?)
|
45
|
+
end
|
46
|
+
|
47
|
+
def utf8?
|
48
|
+
UTF8_ENCODINGS.include?(mime_encoding)
|
49
|
+
end
|
50
|
+
|
51
|
+
def iso885915?
|
52
|
+
ISO885915_ENCODINGS.include?(mime_encoding)
|
53
|
+
end
|
54
|
+
|
55
|
+
def target_encoding
|
56
|
+
'utf-8'
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Fs
|
8
|
+
class ToWindowsPtBr < ::EhbrsRubyUtils::Fs::ToFileFormat
|
9
|
+
TARGET_CHARSETS = %w[ISO-8859].freeze
|
10
|
+
ICONV_TO = 'ISO-8859-1'
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
def convert
|
15
|
+
check_bom
|
16
|
+
check_target_charset
|
17
|
+
check_crlf
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def check_bom
|
23
|
+
::EacRubyUtils::Envs.local.command(
|
24
|
+
'sed', '-i', '1s/^\\xEF\\xBB\\xBF//', file
|
25
|
+
).system!
|
26
|
+
reset_cache
|
27
|
+
end
|
28
|
+
|
29
|
+
def check_target_charset
|
30
|
+
return if target_charset?
|
31
|
+
|
32
|
+
convert_to_target_encoding
|
33
|
+
end
|
34
|
+
|
35
|
+
def check_crlf
|
36
|
+
return if crlf?
|
37
|
+
|
38
|
+
convert_crlf
|
39
|
+
end
|
40
|
+
|
41
|
+
def convert_crlf
|
42
|
+
::EacRubyUtils::Envs.local.command('unix2dos', file).execute!
|
43
|
+
reset_cache
|
44
|
+
end
|
45
|
+
|
46
|
+
def convert?
|
47
|
+
text? && (!target_charset? || !crlf?)
|
48
|
+
end
|
49
|
+
|
50
|
+
def target_charset?
|
51
|
+
file_type?(*TARGET_CHARSETS)
|
52
|
+
end
|
53
|
+
|
54
|
+
def target_encoding
|
55
|
+
ICONV_TO
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher'
|
6
|
+
require 'ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher'
|
7
|
+
require 'srt'
|
8
|
+
|
9
|
+
module EhbrsRubyUtils
|
10
|
+
module Videos
|
11
|
+
module Subtitles
|
12
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
13
|
+
class ContentSanitizer
|
14
|
+
class << self
|
15
|
+
def build_pattern(slim, elim)
|
16
|
+
/#{::Regexp.quote(slim)}[^#{::Regexp.quote(elim)}]*#{::Regexp.quote(elim)}/
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
REMOVE_PATTERNS = [%w[< >], %w[( )], ['[', ']']].map do |args|
|
21
|
+
build_pattern(*args)
|
22
|
+
end.freeze
|
23
|
+
REMOVE_TERMS = %w[subtitle osdb legenda @ united4ever unitedteam pt-subs capejuna maniacs
|
24
|
+
|]
|
25
|
+
.map(&:downcase)
|
26
|
+
|
27
|
+
common_constructor :input
|
28
|
+
|
29
|
+
def line_processors
|
30
|
+
REMOVE_PATTERNS.map do |pattern|
|
31
|
+
::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithPatternMatcher.new(pattern)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def output
|
36
|
+
output_lines.join("\n")
|
37
|
+
end
|
38
|
+
|
39
|
+
def output_lines
|
40
|
+
r = []
|
41
|
+
last_output_line = nil
|
42
|
+
::SRT::File.parse_string(input).lines.each do |input_line|
|
43
|
+
output_line(input_line, last_output_line).if_present do |v|
|
44
|
+
r << v
|
45
|
+
last_output_line = v
|
46
|
+
end
|
47
|
+
end
|
48
|
+
r
|
49
|
+
end
|
50
|
+
|
51
|
+
def output_line(input_line, last_output_line)
|
52
|
+
text = output_line_text(input_line.text)
|
53
|
+
return nil if text.blank?
|
54
|
+
|
55
|
+
r = input_line.dup
|
56
|
+
r.sequence = last_output_line.if_present(1) { |v| v.sequence + 1 }
|
57
|
+
r.text = text
|
58
|
+
r
|
59
|
+
end
|
60
|
+
|
61
|
+
def output_line_text(text)
|
62
|
+
text_processors.each do |term|
|
63
|
+
text = term.process(text)
|
64
|
+
return nil if text.blank?
|
65
|
+
end
|
66
|
+
|
67
|
+
text.map { |line| process_line(line) }.compact_blank
|
68
|
+
end
|
69
|
+
|
70
|
+
def process_line(line)
|
71
|
+
remove_tags(line)
|
72
|
+
end
|
73
|
+
|
74
|
+
def remove_tags(line)
|
75
|
+
line_processors.inject(line) { |a, e| e.process(a) }.strip
|
76
|
+
end
|
77
|
+
|
78
|
+
def text_processors
|
79
|
+
REMOVE_TERMS.map do |term|
|
80
|
+
::EhbrsRubyUtils::Videos::Subtitles::Sanitize::WithTermMatcher.new(term)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Videos
|
8
|
+
module Subtitles
|
9
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
10
|
+
class WithPatternMatcher
|
11
|
+
common_constructor :pattern
|
12
|
+
|
13
|
+
def process(line)
|
14
|
+
line.gsub(pattern, '')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'eac_ruby_utils/core_ext'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
|
6
|
+
module EhbrsRubyUtils
|
7
|
+
module Videos
|
8
|
+
module Subtitles
|
9
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
10
|
+
class WithTermMatcher
|
11
|
+
common_constructor :term
|
12
|
+
|
13
|
+
def process(lines)
|
14
|
+
lines.map(&:downcase).any? { |line| line.include?(term) } ? nil : lines
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'avm/file_formats/utf8_assert'
|
4
|
+
require 'ehbrs_ruby_utils/fs/to_file_format'
|
5
|
+
require 'ehbrs_ruby_utils/fs/to_windows_pt_br'
|
6
|
+
|
7
|
+
module EhbrsRubyUtils
|
8
|
+
module Videos
|
9
|
+
module Subtitles
|
10
|
+
class Sanitize < ::EhbrsRubyUtils::Fs::ToFileFormat
|
11
|
+
require_sub __FILE__
|
12
|
+
|
13
|
+
def run
|
14
|
+
sanitize_content
|
15
|
+
convert_to_windows_ptbr
|
16
|
+
end
|
17
|
+
|
18
|
+
def subtitle?
|
19
|
+
text? && file.extname == '.srt'
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def convert_to_windows_ptbr
|
25
|
+
::EhbrsRubyUtils::Fs::ToWindowsPtBr.convert_self(file)
|
26
|
+
end
|
27
|
+
|
28
|
+
def sanitize_content
|
29
|
+
::Avm::FileFormats::Utf8Assert.assert_files([file]) do
|
30
|
+
sanitize_content_on_utf8
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def sanitize_content_on_utf8
|
35
|
+
input = file.read
|
36
|
+
output = ::EhbrsRubyUtils::Videos::Subtitles::Sanitize::ContentSanitizer.new(input).output
|
37
|
+
file.write(output) if input != output
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ehbrs_ruby_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.36.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aranha
|
@@ -154,6 +154,26 @@ dependencies:
|
|
154
154
|
- - "~>"
|
155
155
|
- !ruby/object:Gem::Version
|
156
156
|
version: '0.5'
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
name: srt
|
159
|
+
requirement: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
161
|
+
- - "~>"
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0.1'
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 0.1.5
|
167
|
+
type: :runtime
|
168
|
+
prerelease: false
|
169
|
+
version_requirements: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.1'
|
174
|
+
- - ">="
|
175
|
+
- !ruby/object:Gem::Version
|
176
|
+
version: 0.1.5
|
157
177
|
- !ruby/object:Gem::Dependency
|
158
178
|
name: taglib-ruby
|
159
179
|
requirement: !ruby/object:Gem::Requirement
|
@@ -289,6 +309,9 @@ files:
|
|
289
309
|
- lib/ehbrs_ruby_utils/fs/selected.rb
|
290
310
|
- lib/ehbrs_ruby_utils/fs/selected/build.rb
|
291
311
|
- lib/ehbrs_ruby_utils/fs/selected/build_file.rb
|
312
|
+
- lib/ehbrs_ruby_utils/fs/to_file_format.rb
|
313
|
+
- lib/ehbrs_ruby_utils/fs/to_utf8_unix.rb
|
314
|
+
- lib/ehbrs_ruby_utils/fs/to_windows_pt_br.rb
|
292
315
|
- lib/ehbrs_ruby_utils/gjt1.rb
|
293
316
|
- lib/ehbrs_ruby_utils/gjt1/manager.rb
|
294
317
|
- lib/ehbrs_ruby_utils/mudslide.rb
|
@@ -341,6 +364,10 @@ files:
|
|
341
364
|
- lib/ehbrs_ruby_utils/videos/series/rename/results_builder.rb
|
342
365
|
- lib/ehbrs_ruby_utils/videos/series/rename/season_group.rb
|
343
366
|
- lib/ehbrs_ruby_utils/videos/stream.rb
|
367
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize.rb
|
368
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize/content_sanitizer.rb
|
369
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_pattern_matcher.rb
|
370
|
+
- lib/ehbrs_ruby_utils/videos/subtitles/sanitize/with_term_matcher.rb
|
344
371
|
- lib/ehbrs_ruby_utils/web_utils.rb
|
345
372
|
- lib/ehbrs_ruby_utils/web_utils/instance.rb
|
346
373
|
- lib/ehbrs_ruby_utils/web_utils/instance/finances.rb
|