subtitle 0.2.6 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/allfather.rb +75 -1
- data/lib/dfxp.rb +3 -3
- data/lib/engines/aws.rb +3 -9
- data/lib/scc.rb +76 -18
- data/lib/srt.rb +72 -4
- data/lib/subtitle.rb +55 -13
- data/lib/ttml.rb +8 -5
- data/lib/utils/common_utils.rb +329 -0
- data/lib/utils/cue_info.rb +40 -0
- data/lib/vtt.rb +74 -3
- metadata +32 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49e45fb2713aedd5d6d7d6d290fe4874a292df3249cfad7259913e90b0cb7fd8
|
4
|
+
data.tar.gz: 7f4535875a19028db4ec08de90903daba7b906b659571c5a921850071bf3154c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b53144f0a627a545c0a989d664f3611078a7afc11e9f00b479065a0d3a1b2bc9bf68e10706bd89b85c0e73ff53d7c4627a2c5e29d38867ec2882c99ea56eda0
|
7
|
+
data.tar.gz: 6999ae152b2f5904a2061944522b11387280df6e02cbf4d36d7d5ae27ba12eb3b6c36357bba791c5a7b4f06575531ce76df904dcbb76b0e5f79bd62b05988704
|
data/lib/allfather.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require_relative "engines/translator"
|
3
|
+
|
1
4
|
#
|
2
5
|
# A Module that kind of acts as an interface where the generic methods
|
3
6
|
# that applies to each caption type can be defined
|
@@ -12,6 +15,15 @@ module AllFather
|
|
12
15
|
#
|
13
16
|
VALID_FILES = [".scc", ".srt", ".vtt", ".ttml", ".dfxp"]
|
14
17
|
|
18
|
+
#
|
19
|
+
# Caption type constants
|
20
|
+
#
|
21
|
+
TYPE_SCC = 1
|
22
|
+
TYPE_SRT = 2
|
23
|
+
TYPE_VTT = 3
|
24
|
+
TYPE_TTML = 4
|
25
|
+
TYPE_DFXP = 5
|
26
|
+
|
15
27
|
#
|
16
28
|
# Generic exception class that is raised for validation errors
|
17
29
|
#
|
@@ -45,12 +57,23 @@ module AllFather
|
|
45
57
|
raise "Not Implemented. Class #{self.class.name} doesn't implement infer_languages"
|
46
58
|
end
|
47
59
|
|
60
|
+
|
61
|
+
#
|
62
|
+
# Method to set a translation engine
|
63
|
+
#
|
64
|
+
# * +translator+ - Instance of translation engine. Refer to `engines/aws` for example
|
65
|
+
#
|
66
|
+
def set_translator(translator)
|
67
|
+
if translator && !(translator.is_a? Translator)
|
68
|
+
raise "Argument is not an instance of Translator"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
48
72
|
#
|
49
73
|
# Method to translate the caption from one language to another
|
50
74
|
#
|
51
75
|
# :args: src_lang, target_lang, output_file
|
52
76
|
#
|
53
|
-
# * +input_caption+ - A Valid input caption file. Refer to #is_valid?
|
54
77
|
# * +src_lang+ - can be inferred using #infer_language method
|
55
78
|
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
56
79
|
# * +output_file+ - Output file. Can be a fully qualified path or just file name
|
@@ -80,4 +103,55 @@ module AllFather
|
|
80
103
|
# Further checks can be done only in caption specific implementations
|
81
104
|
# or translation engine specific implementation
|
82
105
|
end
|
106
|
+
|
107
|
+
#
|
108
|
+
# Method to convert from one caption type to other types. If the src_lang is not provided
|
109
|
+
# then all source languages will be converted to target types. For example, if a ttml file
|
110
|
+
# has "en" and "es" and target_type is vtt and no src_lang is provided 2 vtt files would be
|
111
|
+
# created one per language in the source. if a target_lang is provided then one of the lang
|
112
|
+
# from source would be picked for creating the output file with target_lang
|
113
|
+
#
|
114
|
+
# If no target_lang is provided, no translations are applied. output_file is created using
|
115
|
+
# without any need for any language translation services. Hence doesn't incur any cost !!
|
116
|
+
#
|
117
|
+
# * +types+ - An array of Valid input caption type(s). Refer to `#CaptionType`
|
118
|
+
# * +src_lang+ - can be inferred using #infer_language method
|
119
|
+
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
120
|
+
# * +output_dir+ - Output Directory. Generated files would be dumped here
|
121
|
+
#
|
122
|
+
# ==== Raises
|
123
|
+
#
|
124
|
+
# InvalidInputException shall be raised if
|
125
|
+
# 1. The input file doesn't exist or is unreadable or is invalid caption
|
126
|
+
# 2. The output dir doesn't exist
|
127
|
+
# 3. Invalid lang codes for a given caption type
|
128
|
+
# 4. Unsupported type to which conversion is requested for
|
129
|
+
#
|
130
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
131
|
+
if (types - supported_transformations).size != 0
|
132
|
+
raise InvalidInputException.new("Unknown types provided for conversion in input #{types}")
|
133
|
+
end
|
134
|
+
unless File.directory?(output_dir)
|
135
|
+
FileUtils.mkdir_p(output_dir)
|
136
|
+
end
|
137
|
+
# Basic validations
|
138
|
+
if types.include?(TYPE_SCC)
|
139
|
+
if target_lang && !target_lang.eql?("en")
|
140
|
+
raise InvalidInputException.new("SCC can be generated only in en. #{target_lang} is unsupported")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
if target_lang && !target_lang.empty?
|
144
|
+
raise InvalidInputException.new("Translation to other language as part of transform is yet to be implemented")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# Method to report on the supported transformations. Each implementor is free to return
|
150
|
+
# the types to which it can convert itself to
|
151
|
+
#
|
152
|
+
# Returns an array of one or more types defined as +TYPE_+ constants here
|
153
|
+
#
|
154
|
+
def supported_transformations
|
155
|
+
raise "Not Implemented. Class #{self.class.name} doesn't implement supported_transformations"
|
156
|
+
end
|
83
157
|
end
|
data/lib/dfxp.rb
CHANGED
@@ -10,10 +10,10 @@ require_relative "ttml"
|
|
10
10
|
#
|
11
11
|
class DFXP < TTML
|
12
12
|
|
13
|
-
def initialize(cc_file
|
13
|
+
def initialize(cc_file)
|
14
14
|
@cc_file = cc_file
|
15
|
-
|
16
|
-
|
15
|
+
#@translator = translator
|
16
|
+
#@force_detect = opts[:force_detect] || false
|
17
17
|
raise "Invalid TTML file provided" unless is_valid?
|
18
18
|
end
|
19
19
|
|
data/lib/engines/aws.rb
CHANGED
@@ -10,10 +10,9 @@ require_relative 'translator'
|
|
10
10
|
# == Credential Referencing Order
|
11
11
|
#
|
12
12
|
# * [Arguments] - Pass the credentials access_key_id and secret_access_key as arguments
|
13
|
-
# * [Environment route] - AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY can be exposed as
|
14
|
-
# environment variables
|
13
|
+
# * [Environment route] - AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY can be exposed as environment variables
|
15
14
|
# * [Profile Name] - The application uses the credentials of the system and picks the
|
16
|
-
#
|
15
|
+
# credentials referred to by the profile
|
17
16
|
#
|
18
17
|
class AwsEngine
|
19
18
|
include Translator
|
@@ -71,9 +70,6 @@ class AwsEngine
|
|
71
70
|
# Invokes the language detection API of AWS and returns only the language
|
72
71
|
# of the highest score and returns the ISO 639-1 code
|
73
72
|
#
|
74
|
-
# :args: text
|
75
|
-
#
|
76
|
-
# ===== Arguments
|
77
73
|
# * +text+ - The text for which the language is to be inferred
|
78
74
|
#
|
79
75
|
def infer_language(text)
|
@@ -83,12 +79,10 @@ class AwsEngine
|
|
83
79
|
|
84
80
|
#
|
85
81
|
# Invokes the translation API of AWS and returns the translated text
|
86
|
-
# as per the arguments provided
|
82
|
+
# as per the arguments provided.
|
87
83
|
# Will Raise exception if a translation cannot be made between the source
|
88
84
|
# and target language codes or if the lang code is invalid
|
89
85
|
#
|
90
|
-
# :args: input_text, src_lang, target_lang
|
91
|
-
#
|
92
86
|
# * +input_text+ - The text that needs to be translated
|
93
87
|
# * +src_lang+ - The source language of the text
|
94
88
|
# * +target_lang+ - The target language to which the input_text needs to be translated to
|
data/lib/scc.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
#
|
@@ -10,10 +12,12 @@ require_relative "allfather"
|
|
10
12
|
class SCC
|
11
13
|
|
12
14
|
include AllFather
|
15
|
+
include CommonUtils
|
13
16
|
|
14
|
-
|
17
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SRT, TYPE_VTT, TYPE_TTML, TYPE_DFXP]
|
18
|
+
|
19
|
+
def initialize(cc_file)
|
15
20
|
@cc_file = cc_file
|
16
|
-
@translator = translator
|
17
21
|
raise "Invalid SCC file provided" unless is_valid?
|
18
22
|
end
|
19
23
|
|
@@ -25,6 +29,11 @@ class SCC
|
|
25
29
|
return false
|
26
30
|
end
|
27
31
|
|
32
|
+
def set_translator(translator)
|
33
|
+
super(translator)
|
34
|
+
@translator = translator
|
35
|
+
end
|
36
|
+
|
28
37
|
def infer_languages
|
29
38
|
lang = nil
|
30
39
|
begin
|
@@ -40,6 +49,71 @@ class SCC
|
|
40
49
|
raise "Not Implemented. Class #{self.class.name} doesn't implement translate yet !!"
|
41
50
|
end
|
42
51
|
|
52
|
+
def supported_transformations
|
53
|
+
return SUPPORTED_TRANSFORMATIONS
|
54
|
+
end
|
55
|
+
|
56
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
57
|
+
# Let's start off with some validations
|
58
|
+
super(types, src_lang, target_lang, output_dir)
|
59
|
+
|
60
|
+
# Suffix output dir with File seperator
|
61
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
62
|
+
|
63
|
+
# Prepare the output files for each type
|
64
|
+
file_map = {}
|
65
|
+
types.each do |type|
|
66
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file)) + extension_from_type(type)
|
67
|
+
out_file = "#{output_dir}#{output_file}"
|
68
|
+
if create_file(TYPE_SCC, type, out_file, target_lang)
|
69
|
+
file_map[type] = out_file
|
70
|
+
else
|
71
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Read the file and prepare the cue model
|
76
|
+
prev_cue_info = cur_cue_info = nil
|
77
|
+
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
78
|
+
cue_index = 1
|
79
|
+
ccfile.each_line do | line |
|
80
|
+
time_point = line.scan(/(^\d\d:\d\d:\d\d:\d\d\s)(.*)/)
|
81
|
+
unless time_point.empty?
|
82
|
+
scc_text_code = time_point[0][1].strip
|
83
|
+
message = decode(scc_text_code)
|
84
|
+
# Replace \u0000 with empty as this causes the ttml / dfxp outputs
|
85
|
+
# to treat them as end and terminates the xml the moment this is encountered
|
86
|
+
# https://github.com/sparklemotion/nokogiri/issues/1535
|
87
|
+
message = message.gsub(/\u0000/, '')
|
88
|
+
if prev_cue_info.nil?
|
89
|
+
prev_cue_info = CueInfo.new(TYPE_SCC)
|
90
|
+
prev_cue_info.index = cue_index
|
91
|
+
prev_cue_info.message = message
|
92
|
+
prev_cue_info.start = time_point[0][0].strip
|
93
|
+
else
|
94
|
+
cur_cue_info = CueInfo.new(TYPE_SCC)
|
95
|
+
cur_cue_info.index = cue_index
|
96
|
+
cur_cue_info.message = message
|
97
|
+
cur_cue_info.start = time_point[0][0].strip
|
98
|
+
# Set the previous cue info's end time to current cue's start time
|
99
|
+
# TODO: Need to see if we need to reduce alteast 1 fps or 1s
|
100
|
+
prev_cue_info.end = cur_cue_info.start
|
101
|
+
prev_cue_info.start_time_units = time_details(prev_cue_info.start, TYPE_SCC)
|
102
|
+
prev_cue_info.end_time_units = time_details(prev_cue_info.end, TYPE_SCC)
|
103
|
+
write_cue(prev_cue_info, file_map)
|
104
|
+
prev_cue_info = cur_cue_info
|
105
|
+
end
|
106
|
+
cue_index += 1
|
107
|
+
end
|
108
|
+
end
|
109
|
+
# we need to set some end time, but don't know the same !!
|
110
|
+
# for now setting the start time itself
|
111
|
+
cur_cue_info.end = cur_cue_info.start
|
112
|
+
cur_cue_info.start_time_units = time_details(cur_cue_info.start, TYPE_SCC)
|
113
|
+
cur_cue_info.end_time_units = time_details(cur_cue_info.end, TYPE_SCC)
|
114
|
+
write_cue(cur_cue_info, file_map, true)
|
115
|
+
end
|
116
|
+
|
43
117
|
private
|
44
118
|
|
45
119
|
def get_text(srt_file, num_chars)
|
@@ -78,20 +152,4 @@ class SCC
|
|
78
152
|
end
|
79
153
|
decoded_text
|
80
154
|
end
|
81
|
-
|
82
|
-
def encode(free_text)
|
83
|
-
encoded_str = ""
|
84
|
-
count = 0
|
85
|
-
free_text.each_byte do |char|
|
86
|
-
count += 1
|
87
|
-
binval = char.to_s(2).count("1") % 2 == 0 ? (char.to_i | 128 ).to_s(2) : char.to_s(2)
|
88
|
-
encode_char = binval.to_i(2).to_s(16)
|
89
|
-
if ((count > 0) && (count % 2 == 0))
|
90
|
-
encoded_str << encode_char << " "
|
91
|
-
else
|
92
|
-
encoded_str << encode_char
|
93
|
-
end
|
94
|
-
end
|
95
|
-
encoded_str
|
96
|
-
end
|
97
155
|
end
|
data/lib/srt.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
#
|
@@ -10,10 +12,12 @@ require_relative "allfather"
|
|
10
12
|
class SRT
|
11
13
|
|
12
14
|
include AllFather
|
15
|
+
include CommonUtils
|
13
16
|
|
14
|
-
|
17
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_VTT, TYPE_TTML, TYPE_DFXP]
|
18
|
+
|
19
|
+
def initialize(cc_file)
|
15
20
|
@cc_file = cc_file
|
16
|
-
@translator = translator
|
17
21
|
raise "Invalid SRT file provided" unless is_valid?
|
18
22
|
end
|
19
23
|
|
@@ -25,6 +29,11 @@ class SRT
|
|
25
29
|
return false
|
26
30
|
end
|
27
31
|
|
32
|
+
def set_translator(translator)
|
33
|
+
super(translator)
|
34
|
+
@translator = translator
|
35
|
+
end
|
36
|
+
|
28
37
|
def translate(src_lang, dest_lang, out_file)
|
29
38
|
super(src_lang, dest_lang, out_file)
|
30
39
|
begin
|
@@ -60,7 +69,6 @@ class SRT
|
|
60
69
|
outfile.puts
|
61
70
|
end
|
62
71
|
ensure
|
63
|
-
ccfile.close rescue nil
|
64
72
|
outfile.close
|
65
73
|
end
|
66
74
|
end
|
@@ -76,6 +84,66 @@ class SRT
|
|
76
84
|
[lang]
|
77
85
|
end
|
78
86
|
|
87
|
+
def supported_transformations
|
88
|
+
return SUPPORTED_TRANSFORMATIONS
|
89
|
+
end
|
90
|
+
|
91
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
92
|
+
# Let's start off with some validations
|
93
|
+
super(types, src_lang, target_lang, output_dir)
|
94
|
+
|
95
|
+
# Suffix output dir with File seperator
|
96
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
97
|
+
|
98
|
+
# Prepare the output files for each type
|
99
|
+
file_map = {}
|
100
|
+
types.each do |type|
|
101
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file)) + extension_from_type(type)
|
102
|
+
out_file = "#{output_dir}#{output_file}"
|
103
|
+
if create_file(TYPE_SRT, type, out_file, target_lang)
|
104
|
+
file_map[type] = out_file
|
105
|
+
else
|
106
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Read the file and prepare the cue model
|
111
|
+
cue_info = nil
|
112
|
+
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
113
|
+
message = ""
|
114
|
+
ccfile.each_line do | line |
|
115
|
+
# p line
|
116
|
+
next if line.strip.empty?
|
117
|
+
time_points = line.scan(/^((\d\d:)\d\d:\d\d[,.]\d\d\d.*)-->.*((\d\d:)\d\d:\d\d[,.]\d\d\d)/)
|
118
|
+
if time_points.empty?
|
119
|
+
# This is not a time point
|
120
|
+
seq = line.strip
|
121
|
+
if seq.to_i > 0
|
122
|
+
cue_info.message = message unless message.empty?
|
123
|
+
write_cue(cue_info, file_map) if cue_info
|
124
|
+
cue_info = CueInfo.new(TYPE_SRT)
|
125
|
+
cue_info.sequence = seq
|
126
|
+
# Reset the message
|
127
|
+
message = ""
|
128
|
+
else
|
129
|
+
# This is not a sequence number nor it's timepoints
|
130
|
+
# Grab the details until we find next cue point
|
131
|
+
message << line
|
132
|
+
end
|
133
|
+
else
|
134
|
+
# This is a cue point. Fetch timestamps
|
135
|
+
cue_info.start = time_points[0][0]
|
136
|
+
cue_info.end = time_points[0][2]
|
137
|
+
start_units = time_details(cue_info.start, TYPE_SRT)
|
138
|
+
end_units = time_details(cue_info.end, TYPE_SRT)
|
139
|
+
cue_info.start_time_units = start_units
|
140
|
+
cue_info.end_time_units = end_units
|
141
|
+
end
|
142
|
+
end
|
143
|
+
cue_info.message = message unless message.empty?
|
144
|
+
write_cue(cue_info, file_map, true)
|
145
|
+
end
|
146
|
+
|
79
147
|
private
|
80
148
|
|
81
149
|
#
|
@@ -103,4 +171,4 @@ class SRT
|
|
103
171
|
end
|
104
172
|
return text_sample[0, num_chars]
|
105
173
|
end
|
106
|
-
end
|
174
|
+
end
|
data/lib/subtitle.rb
CHANGED
@@ -7,21 +7,29 @@ require_relative "allfather"
|
|
7
7
|
require_relative "engines/translator"
|
8
8
|
require_relative "engines/aws"
|
9
9
|
|
10
|
-
|
10
|
+
#
|
11
|
+
# Facade that wraps all the complexities surrounding which translation
|
12
|
+
# engine to use or which caption instances to be instantiated.
|
13
|
+
#
|
11
14
|
class Subtitle
|
12
|
-
|
15
|
+
|
16
|
+
TYPE_MAP = {"scc" => AllFather::TYPE_SCC, "srt" => AllFather::TYPE_SRT, "vtt" => AllFather::TYPE_VTT,
|
17
|
+
"ttml" => AllFather::TYPE_TTML, "dfxp" => AllFather::TYPE_DFXP}
|
18
|
+
|
19
|
+
def initialize(file, options = nil)
|
13
20
|
# Infer the caption handler from the extension
|
14
|
-
@cc_file =
|
21
|
+
@cc_file = file
|
15
22
|
raise "Input caption not provided. Please provide the same in :cc_file option" if @cc_file.nil?
|
16
|
-
|
17
|
-
@handler = get_caption_handler(options, translator)
|
23
|
+
initialize_handler(options) unless options.nil?
|
18
24
|
end
|
19
25
|
|
20
|
-
def detect_language
|
26
|
+
def detect_language(options = nil)
|
27
|
+
initialize_handler(options) if @handler.nil?
|
21
28
|
@handler.infer_languages
|
22
29
|
end
|
23
30
|
|
24
|
-
def translate(dest_lang, src_lang = nil, outfile = nil)
|
31
|
+
def translate(dest_lang, src_lang = nil, outfile = nil, options = nil)
|
32
|
+
initialize_handler(options) if @handler.nil?
|
25
33
|
if outfile.nil?
|
26
34
|
outfile = "#{@cc_file}_#{dest_lang}"
|
27
35
|
end
|
@@ -33,13 +41,40 @@ class Subtitle
|
|
33
41
|
outfile
|
34
42
|
end
|
35
43
|
|
44
|
+
def transform(types, src_lang = nil, target_lang = nil, options = nil)
|
45
|
+
# A quick validation & translation to expected arguments
|
46
|
+
vals = []
|
47
|
+
invalid_vals = []
|
48
|
+
types.each do |type|
|
49
|
+
type_val = TYPE_MAP[type]
|
50
|
+
if type_val.nil?
|
51
|
+
invalid_vals << type
|
52
|
+
next
|
53
|
+
end
|
54
|
+
vals << type_val
|
55
|
+
end
|
56
|
+
unless invalid_vals.empty?
|
57
|
+
raise "Invalid types #{invalid_vals} provided"
|
58
|
+
end
|
59
|
+
# Translator not required if target_lang is nil
|
60
|
+
if @handler.nil?
|
61
|
+
if target_lang.nil?
|
62
|
+
@handler = get_caption_handler(options, nil)
|
63
|
+
else
|
64
|
+
initialize_handler(options)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
output_dir = options[:outfile]
|
68
|
+
@handler.transform_to(vals, src_lang, target_lang, output_dir)
|
69
|
+
end
|
70
|
+
|
36
71
|
def type
|
37
72
|
type = nil
|
38
73
|
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
39
74
|
ccfile.each_line do | line |
|
40
75
|
if line =~ /^(\d\d:)\d\d:\d\d[,]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,]\d\d\d/
|
41
76
|
type = "srt"
|
42
|
-
elsif line =~
|
77
|
+
elsif line =~ /^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)|(^WEBVTT$)/
|
43
78
|
type = "vtt"
|
44
79
|
elsif line =~ /(^\d\d:\d\d:\d\d:\d\d\t(([0-9a-fA-F]{4})\s)*)+|(^Scenarist_SCC V(\d.\d)$)/
|
45
80
|
type = "scc"
|
@@ -63,6 +98,11 @@ class Subtitle
|
|
63
98
|
|
64
99
|
private
|
65
100
|
|
101
|
+
def initialize_handler(options)
|
102
|
+
translator = get_translator(options)
|
103
|
+
@handler = get_caption_handler(options, translator)
|
104
|
+
end
|
105
|
+
|
66
106
|
def get_translator(options)
|
67
107
|
translator = nil
|
68
108
|
# Try to infer the engine based on the passed options
|
@@ -93,24 +133,26 @@ class Subtitle
|
|
93
133
|
def get_caption_handler(options, translator)
|
94
134
|
caption_file = options[:cc_file]
|
95
135
|
extension = File.extname(caption_file)
|
136
|
+
extension = ".#{type}" if extension.nil?
|
96
137
|
unless AllFather::VALID_FILES.include?(extension)
|
97
138
|
raise "Caption support for #{caption_file} of type #{extension} is not supported yet"
|
98
139
|
end
|
99
140
|
handler = nil
|
100
141
|
case extension.downcase
|
101
142
|
when ".scc"
|
102
|
-
handler = SCC.new(caption_file
|
143
|
+
handler = SCC.new(caption_file)
|
103
144
|
when ".srt"
|
104
|
-
handler = SRT.new(caption_file
|
145
|
+
handler = SRT.new(caption_file)
|
105
146
|
when ".vtt"
|
106
|
-
handler = VTT.new(caption_file
|
147
|
+
handler = VTT.new(caption_file)
|
107
148
|
when ".ttml"
|
108
|
-
handler = TTML.new(caption_file
|
149
|
+
handler = TTML.new(caption_file)
|
109
150
|
when ".dfxp"
|
110
|
-
handler = DFXP.new(caption_file
|
151
|
+
handler = DFXP.new(caption_file)
|
111
152
|
else
|
112
153
|
raise "Cannot handle file type .#{extension}"
|
113
154
|
end
|
155
|
+
handler.set_translator(translator)
|
114
156
|
handler
|
115
157
|
end
|
116
158
|
end
|
data/lib/ttml.rb
CHANGED
@@ -13,10 +13,8 @@ class TTML
|
|
13
13
|
|
14
14
|
include AllFather
|
15
15
|
|
16
|
-
def initialize(cc_file
|
16
|
+
def initialize(cc_file)
|
17
17
|
@cc_file = cc_file
|
18
|
-
@translator = translator
|
19
|
-
@force_detect = opts[:force_detect] || false
|
20
18
|
raise "Invalid TTML file provided" unless is_valid?
|
21
19
|
end
|
22
20
|
|
@@ -30,7 +28,12 @@ class TTML
|
|
30
28
|
return false
|
31
29
|
end
|
32
30
|
|
31
|
+
def set_translator(translator)
|
32
|
+
@translator = translator
|
33
|
+
end
|
34
|
+
|
33
35
|
def infer_languages
|
36
|
+
force_detect = false
|
34
37
|
lang = []
|
35
38
|
begin
|
36
39
|
xml_file = File.open(@cc_file)
|
@@ -43,9 +46,9 @@ class TTML
|
|
43
46
|
if inferred_lang.nil?
|
44
47
|
# If lang is not provided in the caption, then override
|
45
48
|
# force detect for inferrence
|
46
|
-
|
49
|
+
force_detect = true
|
47
50
|
end
|
48
|
-
if
|
51
|
+
if force_detect
|
49
52
|
sample_text = get_text(div, 100)
|
50
53
|
inferred_lang = @translator.infer_language(sample_text) rescue nil
|
51
54
|
if inferred_lang.nil?
|
@@ -0,0 +1,329 @@
|
|
1
|
+
require_relative "../allfather"
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
module CommonUtils
|
5
|
+
|
6
|
+
CREDITS = "Credits: Autogenerated by subtitle Rubygem".freeze
|
7
|
+
|
8
|
+
SCC_DEFAULT_FRAME_RATE = ENV["SCC_DEFAULT_FRAME_RATE"] || 23.976
|
9
|
+
|
10
|
+
#
|
11
|
+
# Method to create the file with basic header informations which can be
|
12
|
+
# further updated with the transformed caption details by respective
|
13
|
+
# implementations
|
14
|
+
#
|
15
|
+
# * +src_type+ - Source caption type. Refer to AllFather::TYPE_SCC type constants
|
16
|
+
# * +dest_type+ - Target caption type. Refer to AllFather::TYPE_SCC type constants
|
17
|
+
# * +output_file+ - Creates this output_file to which type specific
|
18
|
+
# information would be dumped into
|
19
|
+
# * +target_lang+ - Target lang of the output_file
|
20
|
+
#
|
21
|
+
# ==== Returns
|
22
|
+
# true if the file is created with right headers and false otherwise
|
23
|
+
#
|
24
|
+
def create_file(src_type, dest_type, output_file, target_lang)
|
25
|
+
file = nil
|
26
|
+
done = false
|
27
|
+
begin
|
28
|
+
# Create the file in overwrite mode
|
29
|
+
file = File.open(output_file, "w")
|
30
|
+
|
31
|
+
# Dump the initial info into the file to start off with
|
32
|
+
case dest_type
|
33
|
+
when AllFather::TYPE_SCC
|
34
|
+
file.write("Scenarist_SCC V1.0\n\n")
|
35
|
+
|
36
|
+
when AllFather::TYPE_SRT
|
37
|
+
file.write("NOTE #{CREDITS}\n\n")
|
38
|
+
|
39
|
+
when AllFather::TYPE_VTT
|
40
|
+
file.write("WEBVTT\n\n")
|
41
|
+
file.write("NOTE #{CREDITS}\n\n")
|
42
|
+
|
43
|
+
when AllFather::TYPE_TTML
|
44
|
+
target_lang ||= ""
|
45
|
+
# TODO: Move this to a template file and load from there !!
|
46
|
+
data = <<-EOF
|
47
|
+
<tt xml:lang="" xmlns="http://www.w3.org/ns/ttml">
|
48
|
+
<head>
|
49
|
+
<metadata xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
|
50
|
+
<ttm:desc>#{CREDITS}</ttm:desc>
|
51
|
+
</metadata>
|
52
|
+
</head>
|
53
|
+
<body>
|
54
|
+
<div xml:lang=\"#{target_lang}\">
|
55
|
+
EOF
|
56
|
+
file.write(data)
|
57
|
+
|
58
|
+
when AllFather::TYPE_DFXP
|
59
|
+
target_lang ||= ""
|
60
|
+
data = <<-EOF
|
61
|
+
<tt xml:lang="" xmlns="http://www.w3.org/2004/11/ttaf1">
|
62
|
+
<head>
|
63
|
+
<meta xmlns:ttm="http://www.w3.org/2004/11/ttaf1#metadata">
|
64
|
+
<ttm:desc>#{CREDITS}</ttm:desc>
|
65
|
+
</meta>
|
66
|
+
</head>
|
67
|
+
<body>
|
68
|
+
<div xml:lang=\"#{target_lang}\">
|
69
|
+
EOF
|
70
|
+
file.write(data)
|
71
|
+
else
|
72
|
+
raise AllFather::InvalidInputException.new("Not a valid type; Failed to create output file for type #{type}")
|
73
|
+
end
|
74
|
+
done = true
|
75
|
+
ensure
|
76
|
+
file.close if file rescue nil
|
77
|
+
end
|
78
|
+
done
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Method to return a valid extension for a given caption type
|
83
|
+
# Refer to `AllFather#VALID_FILES`
|
84
|
+
#
|
85
|
+
# * +type+ - Must be one of the valid type defined in `AllFather`
|
86
|
+
#
|
87
|
+
# ====Raises
|
88
|
+
# InvalidInputException if a valid type is not provided
|
89
|
+
#
|
90
|
+
def extension_from_type(type)
|
91
|
+
case type
|
92
|
+
when AllFather::TYPE_SCC
|
93
|
+
return AllFather::VALID_FILES[0]
|
94
|
+
when AllFather::TYPE_SRT
|
95
|
+
return AllFather::VALID_FILES[1]
|
96
|
+
when AllFather::TYPE_VTT
|
97
|
+
return AllFather::VALID_FILES[2]
|
98
|
+
when AllFather::TYPE_TTML
|
99
|
+
return AllFather::VALID_FILES[3]
|
100
|
+
when AllFather::TYPE_DFXP
|
101
|
+
return AllFather::VALID_FILES[4]
|
102
|
+
else
|
103
|
+
raise AllFather::InvalidInputException.new("Not a valid type; Failed to create output file for type #{type}")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
#
|
108
|
+
# Method to encode a text to SCC format
|
109
|
+
#
|
110
|
+
# * +free_text+ - Text that needs to be encoded
|
111
|
+
#
|
112
|
+
# ===== Returns
|
113
|
+
# The encoded string that can be added to SCC file
|
114
|
+
#
|
115
|
+
def scc_encode(free_text)
|
116
|
+
encoded_str = ""
|
117
|
+
count = 0
|
118
|
+
free_text.each_byte do |char|
|
119
|
+
count += 1
|
120
|
+
binval = char.to_s(2).count("1") % 2 == 0 ? (char.to_i | 128 ).to_s(2) : char.to_s(2)
|
121
|
+
encode_char = binval.to_i(2).to_s(16)
|
122
|
+
if ((count > 0) && (count % 2 == 0))
|
123
|
+
encoded_str << encode_char << " "
|
124
|
+
else
|
125
|
+
encoded_str << encode_char
|
126
|
+
end
|
127
|
+
end
|
128
|
+
encoded_str
|
129
|
+
end
|
130
|
+
|
131
|
+
#
|
132
|
+
# Method to return the cue info of the caption based on the model
|
133
|
+
# and target caption type which can be used by the caller's transformation routine
|
134
|
+
#
|
135
|
+
# * +model+ - `CueInfo` instance which is caption agnostic details of a cue
|
136
|
+
# * +target_type+ - The target type to which the new cue is to be generated
|
137
|
+
# * +last_cue+ - true for last cue and false otherwise.
|
138
|
+
#
|
139
|
+
def new_cue(model, target_type, last_cue = false)
|
140
|
+
message = nil
|
141
|
+
case target_type
|
142
|
+
when AllFather::TYPE_SCC
|
143
|
+
start_unit = model.start_time_units
|
144
|
+
h = start_unit[0].to_s.rjust(2, "0")
|
145
|
+
m = start_unit[1].to_s.rjust(2, "0")
|
146
|
+
s = start_unit[2].to_s.rjust(2, "0")
|
147
|
+
ms = start_unit[3]
|
148
|
+
# Convert to Frames assuming a framerate of 23.976
|
149
|
+
# Pad 0 if frames is <= 9
|
150
|
+
frames = ((ms.to_f * SCC_DEFAULT_FRAME_RATE) / 1000.0).to_i.to_s.rjust(2, "0").to_i
|
151
|
+
# TODO: Might have to strip off non-english characters here
|
152
|
+
message = "#{h}:#{m}:#{s}:#{frames} " + scc_encode(model.message)
|
153
|
+
when AllFather::TYPE_VTT, AllFather::TYPE_SRT
|
154
|
+
start_unit = model.start_time_units
|
155
|
+
end_unit = model.end_time_units
|
156
|
+
message = ""
|
157
|
+
if model.sequence
|
158
|
+
message = model.sequence + "\n"
|
159
|
+
else
|
160
|
+
message = model.index.to_s + "\n"
|
161
|
+
end
|
162
|
+
delimiter_added = false
|
163
|
+
[start_unit, end_unit].each do |unit|
|
164
|
+
h = unit[0].to_s.rjust(2, "0")
|
165
|
+
m = unit[1].to_s.rjust(2, "0")
|
166
|
+
s = unit[2].to_s.rjust(2, "0")
|
167
|
+
ms = unit[3]
|
168
|
+
if ms < 100
|
169
|
+
ms = ms.to_s.rjust(3, "0")
|
170
|
+
end
|
171
|
+
if target_type == AllFather::TYPE_VTT
|
172
|
+
message << "#{h}:#{m}:#{s}:#{ms}"
|
173
|
+
else
|
174
|
+
message << "#{h}:#{m}:#{s},#{ms}"
|
175
|
+
end
|
176
|
+
unless delimiter_added
|
177
|
+
message << " --> "
|
178
|
+
delimiter_added = true
|
179
|
+
end
|
180
|
+
end
|
181
|
+
message << "\n"
|
182
|
+
message << model.message
|
183
|
+
message << "\n"
|
184
|
+
message << "\n" unless model.message.end_with?("\n")
|
185
|
+
when AllFather::TYPE_TTML, AllFather::TYPE_DFXP
|
186
|
+
start_unit = model.start_time_units
|
187
|
+
end_unit = model.end_time_units
|
188
|
+
h = start_unit[0].to_s.rjust(2, "0")
|
189
|
+
m = start_unit[1].to_s.rjust(2, "0")
|
190
|
+
s = start_unit[2].to_s.rjust(2, "0")
|
191
|
+
ms = start_unit[3]
|
192
|
+
begin_time = "#{h}:#{m}:#{s}"
|
193
|
+
begin_time << ".#{ms.to_s.rjust(3, "0")}" if ms > 0
|
194
|
+
h = end_unit[0].to_s.rjust(2, "0")
|
195
|
+
m = end_unit[1].to_s.rjust(2, "0")
|
196
|
+
s = end_unit[2].to_s.rjust(2, "0")
|
197
|
+
ms = end_unit[3]
|
198
|
+
end_time = "#{h}:#{m}:#{s}"
|
199
|
+
end_time << ".#{ms.to_s.rjust(3, "0")}" if ms > 0
|
200
|
+
message = "<p begin=\"#{begin_time}\" end=\"#{end_time}\">#{model.message.encode(:xml => :text)}</p>"
|
201
|
+
message << "</div>\n</body>\n</tt>" if last_cue
|
202
|
+
end
|
203
|
+
message
|
204
|
+
end
|
205
|
+
|
206
|
+
#
|
207
|
+
# Method that normalizes the timestamps from various different caption formats into
|
208
|
+
# a caption agnostic format
|
209
|
+
#
|
210
|
+
# * +time_stamp+ - The timestamp parsed from the caption file for a given caption type
|
211
|
+
# * +type+ - A valid caption type. Refer to `AllFather` for valid types
|
212
|
+
#
|
213
|
+
def time_details(time_stamp, type)
|
214
|
+
h = m = s = ms = nil
|
215
|
+
elapsed_seconds = nil
|
216
|
+
case type
|
217
|
+
when AllFather::TYPE_SCC
|
218
|
+
tokens = time_stamp.split(":")
|
219
|
+
h = tokens[0].to_i
|
220
|
+
m = tokens[1].to_i
|
221
|
+
s = tokens[2].to_i
|
222
|
+
frames = tokens[3].to_i
|
223
|
+
ms = (frames * 1000 / SCC_DEFAULT_FRAME_RATE).round(0).to_s.rjust(3, "0").to_i
|
224
|
+
if ms >= 1000
|
225
|
+
ms = 999
|
226
|
+
end
|
227
|
+
when AllFather::TYPE_SRT
|
228
|
+
tokens = time_stamp.split(",")
|
229
|
+
ms = tokens[1].to_i
|
230
|
+
tokens = tokens[0].split(":")
|
231
|
+
h = tokens[0].to_i
|
232
|
+
m = tokens[1].to_i
|
233
|
+
s = tokens[2].to_i
|
234
|
+
when AllFather::TYPE_VTT
|
235
|
+
tokens = time_stamp.split(".")
|
236
|
+
ms = tokens[1].to_i
|
237
|
+
tokens = tokens[0].split(":")
|
238
|
+
if tokens.size == 2
|
239
|
+
h = 0
|
240
|
+
m = tokens[0].to_i
|
241
|
+
s = tokens[1].to_i
|
242
|
+
else
|
243
|
+
h = tokens[0].to_i
|
244
|
+
m = tokens[1].to_i
|
245
|
+
s = tokens[2].to_i
|
246
|
+
end
|
247
|
+
when AllFather::TYPE_TTML, AllFather::TYPE_DFXP
|
248
|
+
# We support only clock-time without framerate / tickrate and only media timebase
|
249
|
+
# For offset hence we don't support frames / ticks
|
250
|
+
tokens = time_stamp.split(":")
|
251
|
+
if tokens.size > 1
|
252
|
+
if tokens.size > 3
|
253
|
+
# This is specified with frames and/or subframes. Unsupported
|
254
|
+
raise AllFather::InvalidInputException.new("TTML file with clock-time referencing frames / ticks is unsupported")
|
255
|
+
end
|
256
|
+
h = tokens[0].to_i
|
257
|
+
m = tokens[1].to_i
|
258
|
+
ms_tokens = tokens[2].split(".")
|
259
|
+
if ms_tokens.size == 1
|
260
|
+
ms = 0
|
261
|
+
else
|
262
|
+
ms = ms_tokens[1].to_i
|
263
|
+
end
|
264
|
+
s = ms_tokens[0].to_i
|
265
|
+
else
|
266
|
+
# Parsing in offset mode
|
267
|
+
if time_stamp.end_with?("ms")
|
268
|
+
unit = "ms"
|
269
|
+
time_with_no_unit = time_stamp[0, time_stamp.size - 2]
|
270
|
+
else
|
271
|
+
unit = time_stamp[time_stamp.size - 1]
|
272
|
+
time_with_no_unit = time_stamp[0, time_stamp.size - 1]
|
273
|
+
end
|
274
|
+
case unit
|
275
|
+
when "m"
|
276
|
+
time_with_no_unit = time_with_no_unit.to_f * 60
|
277
|
+
when "h"
|
278
|
+
time_with_no_unit = time_with_no_unit.to_f * (60 * 60)
|
279
|
+
when "s"
|
280
|
+
# do nothing
|
281
|
+
when "ms"
|
282
|
+
time_with_no_unit = time_with_no_unit.to_f / 1000.0
|
283
|
+
else
|
284
|
+
# Fail out f / t
|
285
|
+
raise AllFather::InvalidInputException.new("TTML file with offset-time referencing frames / ticks is unsupported")
|
286
|
+
end
|
287
|
+
tokens = time_with_no_unit.to_s.split(".")
|
288
|
+
h = m = 0
|
289
|
+
if tokens.size == 1
|
290
|
+
s = time_with_no_unit
|
291
|
+
ms = 0
|
292
|
+
else
|
293
|
+
s = tokens[0].to_i
|
294
|
+
ms = tokens[1].to_i
|
295
|
+
end
|
296
|
+
h = s / 3600
|
297
|
+
m = (s / 60) % 60
|
298
|
+
s = s % 60
|
299
|
+
end
|
300
|
+
end
|
301
|
+
elapsed_seconds = (h * 60 * 60) + (m * 60) + s
|
302
|
+
return [h, m, s, ms, elapsed_seconds]
|
303
|
+
end
|
304
|
+
|
305
|
+
|
306
|
+
#
|
307
|
+
# Method to write the cue details to the output files
|
308
|
+
#
|
309
|
+
# * +model+ - Cue instance
|
310
|
+
# * +file_map+ - Hash of files for each caption type
|
311
|
+
# * +last_cue+ - true for last cue and false otherwise
|
312
|
+
#
|
313
|
+
def write_cue(model, file_map, last_cue = false)
|
314
|
+
file_map.each do |type, file_path|
|
315
|
+
File.open(file_path, "a") do |f|
|
316
|
+
f.puts new_cue(model, type, last_cue)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
if last_cue
|
320
|
+
# Pretty print the output for ttml & dfxp
|
321
|
+
file_map.each do |type, file_path|
|
322
|
+
next unless [AllFather::TYPE_DFXP, AllFather::TYPE_TTML].include?(type)
|
323
|
+
file = File.open(file_path, "r")
|
324
|
+
xml_doc = Nokogiri::XML(file, &:noblanks)
|
325
|
+
File.write(file_path, xml_doc.to_s)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class CueInfo
|
2
|
+
def initialize(type)
|
3
|
+
@type = type
|
4
|
+
@start = @end = @sequence = nil
|
5
|
+
@message = ""
|
6
|
+
@start_time_units = []
|
7
|
+
@end_time_units = []
|
8
|
+
@index = 1
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :type, :start, :end, :sequence, :message, :start_time_units, :end_time_units, :index
|
12
|
+
|
13
|
+
def start=(start)
|
14
|
+
@start = start
|
15
|
+
end
|
16
|
+
|
17
|
+
def end=(end_point)
|
18
|
+
@end = end_point
|
19
|
+
end
|
20
|
+
|
21
|
+
def message=(msg)
|
22
|
+
@message = msg
|
23
|
+
end
|
24
|
+
|
25
|
+
def sequence=(seq)
|
26
|
+
@sequence = seq
|
27
|
+
end
|
28
|
+
|
29
|
+
def index=(index)
|
30
|
+
@index = index
|
31
|
+
end
|
32
|
+
|
33
|
+
def start_time_units=(units)
|
34
|
+
@start_time_units = units
|
35
|
+
end
|
36
|
+
|
37
|
+
def end_time_units=(units)
|
38
|
+
@end_time_units = units
|
39
|
+
end
|
40
|
+
end
|
data/lib/vtt.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
#
|
@@ -10,13 +12,20 @@ require_relative "allfather"
|
|
10
12
|
class VTT
|
11
13
|
|
12
14
|
include AllFather
|
15
|
+
include CommonUtils
|
13
16
|
|
14
|
-
|
17
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_SRT, TYPE_TTML, TYPE_DFXP]
|
18
|
+
|
19
|
+
def initialize(cc_file)
|
15
20
|
@cc_file = cc_file
|
16
|
-
@translator = translator
|
17
21
|
raise "Invalid VTT file provided" unless is_valid?
|
18
22
|
end
|
19
23
|
|
24
|
+
def set_translator(translator)
|
25
|
+
super(translator)
|
26
|
+
@translator = translator
|
27
|
+
end
|
28
|
+
|
20
29
|
def translate(src_lang, dest_lang, out_file)
|
21
30
|
super(src_lang, dest_lang, out_file)
|
22
31
|
begin
|
@@ -53,7 +62,6 @@ class VTT
|
|
53
62
|
outfile.puts
|
54
63
|
end
|
55
64
|
ensure
|
56
|
-
ccfile.close rescue nil
|
57
65
|
outfile.close
|
58
66
|
end
|
59
67
|
end
|
@@ -85,6 +93,69 @@ class VTT
|
|
85
93
|
return false
|
86
94
|
end
|
87
95
|
|
96
|
+
def supported_transformations
|
97
|
+
return SUPPORTED_TRANSFORMATIONS
|
98
|
+
end
|
99
|
+
|
100
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
101
|
+
# Let's start off with some validations
|
102
|
+
super(types, src_lang, target_lang, output_dir)
|
103
|
+
|
104
|
+
# Suffix output dir with File seperator
|
105
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
106
|
+
|
107
|
+
# Prepare the output files for each type
|
108
|
+
file_map = {}
|
109
|
+
types.each do |type|
|
110
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file)) + extension_from_type(type)
|
111
|
+
out_file = "#{output_dir}#{output_file}"
|
112
|
+
if create_file(TYPE_VTT, type, out_file, target_lang)
|
113
|
+
file_map[type] = out_file
|
114
|
+
else
|
115
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Read the file and prepare the cue model
|
120
|
+
cue_info = nil
|
121
|
+
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
122
|
+
message = ""
|
123
|
+
collect_msg = false
|
124
|
+
cue_index = 1
|
125
|
+
ccfile.each_line do | line |
|
126
|
+
if line.strip.empty?
|
127
|
+
collect_msg = false
|
128
|
+
next
|
129
|
+
end
|
130
|
+
time_points = line.scan(/^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)/)
|
131
|
+
if time_points.empty?
|
132
|
+
if collect_msg
|
133
|
+
message << line
|
134
|
+
end
|
135
|
+
else
|
136
|
+
collect_msg = false
|
137
|
+
unless message.empty?
|
138
|
+
cue_info.message = message
|
139
|
+
write_cue(cue_info, file_map)
|
140
|
+
message = ""
|
141
|
+
cue_index += 1
|
142
|
+
end
|
143
|
+
# This is a cue point. Fetch timestamps
|
144
|
+
cue_info = CueInfo.new(AllFather::TYPE_VTT)
|
145
|
+
cue_info.index = cue_index
|
146
|
+
cue_info.start = time_points[0][0]
|
147
|
+
cue_info.end = time_points[0][2]
|
148
|
+
start_units = time_details(cue_info.start, TYPE_VTT)
|
149
|
+
end_units = time_details(cue_info.end, TYPE_VTT)
|
150
|
+
cue_info.start_time_units = start_units
|
151
|
+
cue_info.end_time_units = end_units
|
152
|
+
collect_msg = true
|
153
|
+
end
|
154
|
+
end
|
155
|
+
cue_info.message = message unless message.empty?
|
156
|
+
write_cue(cue_info, file_map, true)
|
157
|
+
end
|
158
|
+
|
88
159
|
private
|
89
160
|
|
90
161
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: subtitle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maheshwaran G
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-11-
|
12
|
+
date: 2019-11-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -67,6 +67,34 @@ dependencies:
|
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '10.0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: minitest
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: optimist
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
70
98
|
description: Subtitle gem helps you to detect language and translate closed caption
|
71
99
|
to required language.
|
72
100
|
email:
|
@@ -85,6 +113,8 @@ files:
|
|
85
113
|
- lib/srt.rb
|
86
114
|
- lib/subtitle.rb
|
87
115
|
- lib/ttml.rb
|
116
|
+
- lib/utils/common_utils.rb
|
117
|
+
- lib/utils/cue_info.rb
|
88
118
|
- lib/vtt.rb
|
89
119
|
homepage: https://github.com/cloudaffair/subtitle
|
90
120
|
licenses:
|