subtitle 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/allfather.rb +24 -7
- data/lib/dfxp.rb +15 -7
- data/lib/engines/aws.rb +2 -2
- data/lib/srt.rb +43 -3
- data/lib/subtitle.rb +3 -3
- data/lib/ttml.rb +172 -4
- data/lib/vtt.rb +43 -4
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd5a110529cb49076b699028d4aba501f92d1b9af517363ecf9d0066ca67bfcd
|
4
|
+
data.tar.gz: 6e499c8e56be6748699f1e9210eaa14c982eac3ea6c425460e75e45ee941a1dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 027ee941cfd582de1c98f7d7aae4a7673bcaf51b7af4cba9df0bef0f3fb16437ad90e760bd61adcea0a4487094d3f7ade675635aff69e40881b36240ec17253c
|
7
|
+
data.tar.gz: b0b3763969201e7f98883463292f59c9583b103251a14879853ae923a5a22259e2d677c175fefbfd25f45ecfe589eb157fec5d7b891364b30cb9714f8ec65e36
|
data/lib/allfather.rb
CHANGED
@@ -63,17 +63,18 @@ module AllFather
|
|
63
63
|
#
|
64
64
|
# * +translator+ - Instance of translation engine. Refer to `engines/aws` for example
|
65
65
|
#
|
66
|
+
# ==== Raises
|
67
|
+
# * `InvalidInputException` when the argument `translator` is not an instance of Translator class
|
68
|
+
#
|
66
69
|
def set_translator(translator)
|
67
70
|
if translator && !(translator.is_a? Translator)
|
68
|
-
raise "Argument is not an instance of Translator"
|
71
|
+
raise InvalidInputException.new("Argument is not an instance of Translator")
|
69
72
|
end
|
70
73
|
end
|
71
74
|
|
72
75
|
#
|
73
76
|
# Method to translate the caption from one language to another
|
74
77
|
#
|
75
|
-
# :args: src_lang, target_lang, output_file
|
76
|
-
#
|
77
78
|
# * +src_lang+ - can be inferred using #infer_language method
|
78
79
|
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
79
80
|
# * +output_file+ - Output file. Can be a fully qualified path or just file name
|
@@ -114,6 +115,9 @@ module AllFather
|
|
114
115
|
# If no target_lang is provided, no translations are applied. output_file is created using
|
115
116
|
# without any need for any language translation services. Hence doesn't incur any cost !!
|
116
117
|
#
|
118
|
+
# Note: +src_lang+ makes sense only for caption types that can hold multi lingual captions
|
119
|
+
# like dfxp and ttml. For other caption sources this field is ignored
|
120
|
+
#
|
117
121
|
# * +types+ - An array of Valid input caption type(s). Refer to `#CaptionType`
|
118
122
|
# * +src_lang+ - can be inferred using #infer_language method
|
119
123
|
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
@@ -140,18 +144,31 @@ module AllFather
|
|
140
144
|
raise InvalidInputException.new("SCC can be generated only in en. #{target_lang} is unsupported")
|
141
145
|
end
|
142
146
|
end
|
143
|
-
if target_lang && !target_lang.empty?
|
144
|
-
raise InvalidInputException.new("Translation to other language as part of transform is yet to be implemented")
|
145
|
-
end
|
146
147
|
end
|
147
148
|
|
148
149
|
#
|
149
150
|
# Method to report on the supported transformations. Each implementor is free to return
|
150
151
|
# the types to which it can convert itself to
|
151
152
|
#
|
152
|
-
# Returns
|
153
|
+
# ==== Returns
|
154
|
+
#
|
155
|
+
# * An array of one or more types defined as +TYPE_+ constants here
|
153
156
|
#
|
154
157
|
def supported_transformations
|
155
158
|
raise "Not Implemented. Class #{self.class.name} doesn't implement supported_transformations"
|
156
159
|
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# While the logic of abstracting stuff to callers has it's benefits, sometimes it's required
|
163
|
+
# to identify which instance are we specifically operate on. This method returns the instance
|
164
|
+
# currently being operated on and returns one of the +TYPE_+ constants defined here
|
165
|
+
# Implement this unless and absolutely it's necessary and there is no other easy way to do things
|
166
|
+
#
|
167
|
+
# ===== Returns
|
168
|
+
#
|
169
|
+
# * the call sign of the instance
|
170
|
+
#
|
171
|
+
def callsign
|
172
|
+
raise "Not Implemented. Class #{self.class.name} doesn't implement callsign"
|
173
|
+
end
|
157
174
|
end
|
data/lib/dfxp.rb
CHANGED
@@ -5,16 +5,17 @@ require_relative "ttml"
|
|
5
5
|
#
|
6
6
|
# Library to handle DFXP Files
|
7
7
|
#
|
8
|
-
#
|
9
|
-
#
|
8
|
+
# Extends the TTML Class as except for namespace differences there isn't
|
9
|
+
# much to call between ttml and dfxp
|
10
10
|
#
|
11
11
|
class DFXP < TTML
|
12
12
|
|
13
|
-
|
13
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_SRT, TYPE_VTT, TYPE_TTML]
|
14
|
+
|
15
|
+
def initialize(cc_file, opts=nil)
|
14
16
|
@cc_file = cc_file
|
15
|
-
|
16
|
-
|
17
|
-
raise "Invalid TTML file provided" unless is_valid?
|
17
|
+
@force_detect = opts ? (opts[:force_detect] || false) : false
|
18
|
+
raise "Invalid DFXP file provided" unless is_valid?
|
18
19
|
end
|
19
20
|
|
20
21
|
def is_valid?
|
@@ -26,5 +27,12 @@ class DFXP < TTML
|
|
26
27
|
# a well-formed XML. Another is to see if lang is available in each div
|
27
28
|
return false
|
28
29
|
end
|
29
|
-
|
30
|
+
|
31
|
+
def callsign
|
32
|
+
TYPE_DFXP
|
33
|
+
end
|
34
|
+
|
35
|
+
def supported_transformations
|
36
|
+
return SUPPORTED_TRANSFORMATIONS
|
37
|
+
end
|
30
38
|
end
|
data/lib/engines/aws.rb
CHANGED
@@ -11,8 +11,8 @@ require_relative 'translator'
|
|
11
11
|
#
|
12
12
|
# * [Arguments] - Pass the credentials access_key_id and secret_access_key as arguments
|
13
13
|
# * [Environment route] - AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY can be exposed as environment variables
|
14
|
-
# * [Profile Name] - The application uses the credentials of the system and picks the
|
15
|
-
#
|
14
|
+
# * [Profile Name] - The application uses the credentials of the system and picks the credentials
|
15
|
+
# referred to by the profile
|
16
16
|
#
|
17
17
|
class AwsEngine
|
18
18
|
include Translator
|
data/lib/srt.rb
CHANGED
@@ -94,11 +94,21 @@ class SRT
|
|
94
94
|
|
95
95
|
# Suffix output dir with File seperator
|
96
96
|
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
97
|
+
|
98
|
+
translate = false
|
99
|
+
if target_lang && !target_lang.empty?
|
100
|
+
translate = true
|
101
|
+
if @translator.nil?
|
102
|
+
raise StandardError.new("Cannot infer language as engine options are not provided")
|
103
|
+
end
|
104
|
+
end
|
97
105
|
|
98
106
|
# Prepare the output files for each type
|
99
107
|
file_map = {}
|
100
108
|
types.each do |type|
|
101
|
-
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
109
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
110
|
+
output_file << "_#{target_lang}" if translate
|
111
|
+
output_file << extension_from_type(type)
|
102
112
|
out_file = "#{output_dir}#{output_file}"
|
103
113
|
if create_file(TYPE_SRT, type, out_file, target_lang)
|
104
114
|
file_map[type] = out_file
|
@@ -119,7 +129,7 @@ class SRT
|
|
119
129
|
# This is not a time point
|
120
130
|
seq = line.strip
|
121
131
|
if seq.to_i > 0
|
122
|
-
cue_info.message = message unless message.empty?
|
132
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty?
|
123
133
|
write_cue(cue_info, file_map) if cue_info
|
124
134
|
cue_info = CueInfo.new(TYPE_SRT)
|
125
135
|
cue_info.sequence = seq
|
@@ -140,12 +150,42 @@ class SRT
|
|
140
150
|
cue_info.end_time_units = end_units
|
141
151
|
end
|
142
152
|
end
|
143
|
-
cue_info.message = message unless message.empty?
|
153
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty?
|
144
154
|
write_cue(cue_info, file_map, true)
|
145
155
|
end
|
146
156
|
|
147
157
|
private
|
148
158
|
|
159
|
+
#
|
160
|
+
# Method to translate a given text message based on following conditions
|
161
|
+
#
|
162
|
+
# * If translate is false, the message is returned as is
|
163
|
+
# * If +src_lang+ and +target_lang+ are same then the message is returned as is
|
164
|
+
# * If +src_lang+ is nil or empty then this caption file will be inspected to infer language
|
165
|
+
# and if it's same as target_lang, then again the message shall be returned as is
|
166
|
+
# * Otherwise, returns a translated text
|
167
|
+
#
|
168
|
+
# ==== Raise
|
169
|
+
# * LangDetectionFailureException - If failed to infer the language
|
170
|
+
#
|
171
|
+
def translated_msg(translate, message, src_lang, target_lang)
|
172
|
+
return message unless translate
|
173
|
+
use_src = nil
|
174
|
+
if (src_lang.nil? || src_lang.empty?)
|
175
|
+
# We don't need to infer again and again
|
176
|
+
begin
|
177
|
+
@inferred_src_lang ||= infer_languages.first
|
178
|
+
rescue StandardError => e
|
179
|
+
raise LangDetectionFailureException.new("Failed to infer language due to #{e.message}")
|
180
|
+
end
|
181
|
+
use_src = @inferred_src_lang
|
182
|
+
else
|
183
|
+
use_src = src_lang
|
184
|
+
end
|
185
|
+
return message if use_src.eql?(target_lang)
|
186
|
+
@translator.translate(message, use_src, target_lang)
|
187
|
+
end
|
188
|
+
|
149
189
|
#
|
150
190
|
# Method to get a minimal amount of key text that excludes any tags
|
151
191
|
# or control information for the engine to meaninfully and
|
data/lib/subtitle.rb
CHANGED
@@ -58,7 +58,7 @@ class Subtitle
|
|
58
58
|
end
|
59
59
|
# Translator not required if target_lang is nil
|
60
60
|
if @handler.nil?
|
61
|
-
if target_lang.nil?
|
61
|
+
if target_lang.nil? && src_lang.nil?
|
62
62
|
@handler = get_caption_handler(options, nil)
|
63
63
|
else
|
64
64
|
initialize_handler(options)
|
@@ -146,9 +146,9 @@ class Subtitle
|
|
146
146
|
when ".vtt"
|
147
147
|
handler = VTT.new(caption_file)
|
148
148
|
when ".ttml"
|
149
|
-
handler = TTML.new(caption_file)
|
149
|
+
handler = TTML.new(caption_file, options)
|
150
150
|
when ".dfxp"
|
151
|
-
handler = DFXP.new(caption_file)
|
151
|
+
handler = DFXP.new(caption_file, options)
|
152
152
|
else
|
153
153
|
raise "Cannot handle file type .#{extension}"
|
154
154
|
end
|
data/lib/ttml.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
require "nokogiri"
|
@@ -12,12 +14,20 @@ require "nokogiri"
|
|
12
14
|
class TTML
|
13
15
|
|
14
16
|
include AllFather
|
17
|
+
include CommonUtils
|
15
18
|
|
16
|
-
|
19
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_SRT, TYPE_VTT, TYPE_DFXP]
|
20
|
+
|
21
|
+
def initialize(cc_file, opts=nil)
|
17
22
|
@cc_file = cc_file
|
23
|
+
@force_detect = opts ? (opts[:force_detect] || false) : false
|
18
24
|
raise "Invalid TTML file provided" unless is_valid?
|
19
25
|
end
|
20
26
|
|
27
|
+
def callsign
|
28
|
+
TYPE_TTML
|
29
|
+
end
|
30
|
+
|
21
31
|
def is_valid?
|
22
32
|
# Do any VTT specific validations here
|
23
33
|
if @cc_file =~ /^.*\.(ttml)$/
|
@@ -33,12 +43,12 @@ class TTML
|
|
33
43
|
end
|
34
44
|
|
35
45
|
def infer_languages
|
36
|
-
force_detect = false
|
37
46
|
lang = []
|
38
47
|
begin
|
39
48
|
xml_file = File.open(@cc_file)
|
40
49
|
xml_doc = Nokogiri::XML(xml_file)
|
41
50
|
div_objects = xml_doc.css("/tt/body/div")
|
51
|
+
local_force_detect = false
|
42
52
|
div_objects.each_with_index do |div, index|
|
43
53
|
# By default, return the lang if specified in the div and
|
44
54
|
# force detect is false
|
@@ -46,9 +56,10 @@ class TTML
|
|
46
56
|
if inferred_lang.nil?
|
47
57
|
# If lang is not provided in the caption, then override
|
48
58
|
# force detect for inferrence
|
49
|
-
|
59
|
+
local_force_detect = true
|
50
60
|
end
|
51
|
-
if force_detect
|
61
|
+
if @force_detect || local_force_detect
|
62
|
+
local_force_detect = false
|
52
63
|
sample_text = get_text(div, 100)
|
53
64
|
inferred_lang = @translator.infer_language(sample_text) rescue nil
|
54
65
|
if inferred_lang.nil?
|
@@ -115,8 +126,165 @@ class TTML
|
|
115
126
|
out_file
|
116
127
|
end
|
117
128
|
|
129
|
+
def supported_transformations
|
130
|
+
return SUPPORTED_TRANSFORMATIONS
|
131
|
+
end
|
132
|
+
|
133
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
134
|
+
# Let's start off with some validations
|
135
|
+
super(types, src_lang, target_lang, output_dir)
|
136
|
+
|
137
|
+
# Suffix output dir with File seperator
|
138
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
139
|
+
|
140
|
+
begin
|
141
|
+
xml_file = File.open(@cc_file, 'r')
|
142
|
+
xml_doc = Nokogiri::XML(xml_file)
|
143
|
+
div_objects = xml_doc.css("/tt/body/div")
|
144
|
+
langs = div_objects.map {|div| div.attributes['lang'].value rescue nil}
|
145
|
+
translate = false
|
146
|
+
matching_divs = []
|
147
|
+
inferred_src_lang = nil
|
148
|
+
if src_lang.nil? || src_lang.empty?
|
149
|
+
if target_lang && !target_lang.empty?
|
150
|
+
# Find if any of our div matches this. Else pick first and translate to target lang
|
151
|
+
div_objects.each_with_index do |div, j|
|
152
|
+
if matching_lang?(div, target_lang)
|
153
|
+
matching_divs << div
|
154
|
+
break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
if matching_divs.empty?
|
158
|
+
# Let's pick the first div for target translation
|
159
|
+
selected_div = div_objects.first
|
160
|
+
inferred_src_lang = selected_div.lang
|
161
|
+
matching_divs << selected_div
|
162
|
+
translate = true
|
163
|
+
end
|
164
|
+
else
|
165
|
+
# Then we will have to create output files for each lang
|
166
|
+
matching_divs = div_objects
|
167
|
+
end
|
168
|
+
else
|
169
|
+
# Find the matching lang div and create the outputs
|
170
|
+
available_divs = langs.select { |lang| lang.eql?(src_lang) }
|
171
|
+
if available_divs.length > 1
|
172
|
+
raise InvalidInputException.new("More than one section in Caption file specifies lang as #{src_lang}. This file is unsupported")
|
173
|
+
end
|
174
|
+
div_objects.each_with_index do |div, j|
|
175
|
+
if matching_lang?(div, src_lang)
|
176
|
+
matching_divs << div
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
if matching_divs.empty?
|
181
|
+
raise InvalidInputException.new("Given Caption file #{@cc_file} doesn't contain #{src_lang} lang. Available langs are #{langs}")
|
182
|
+
end
|
183
|
+
if matching_divs.length > 1
|
184
|
+
raise InvalidInputException.new("More than one section in Caption file specifies lang as #{src_lang}. This file is unsupported")
|
185
|
+
end
|
186
|
+
if target_lang && !target_lang.empty? && !src_lang.eql?(target_lang)
|
187
|
+
translate = true
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
div_index = 1
|
192
|
+
multiple_outputs = matching_divs.size > 1
|
193
|
+
matching_divs.each do |div|
|
194
|
+
div_lang = div.attributes['lang'].value rescue nil
|
195
|
+
# Override div lang if translate is required
|
196
|
+
div_lang = target_lang if translate
|
197
|
+
file_map = {}
|
198
|
+
# Prepare the output files for each type and for each lang in the file
|
199
|
+
types.each do |type|
|
200
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
201
|
+
# Suffix div index when multiple outputs are created
|
202
|
+
output_file << "_#{div_index}" if multiple_outputs
|
203
|
+
if target_lang.nil? && !src_lang.nil?
|
204
|
+
output_file << "_#{src_lang}"
|
205
|
+
end
|
206
|
+
# Suffix lang to filename if provideds
|
207
|
+
if target_lang && !target_lang.empty?
|
208
|
+
output_file << "_#{target_lang}"
|
209
|
+
end
|
210
|
+
output_file << extension_from_type(type)
|
211
|
+
out_file = "#{output_dir}#{output_file}"
|
212
|
+
if create_file(TYPE_TTML, type, out_file, div_lang)
|
213
|
+
file_map[type] = out_file
|
214
|
+
else
|
215
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
blocks = div.css("p")
|
219
|
+
cue_index = 1
|
220
|
+
total_blocks = blocks.size
|
221
|
+
blocks.each_with_index do |block, index|
|
222
|
+
start_time = block.attributes['begin'].value
|
223
|
+
end_time = block.attributes['end'].value
|
224
|
+
text = block.inner_html.strip.gsub(/(\s){2,}/, '')
|
225
|
+
message = ""
|
226
|
+
text_blocks = get_block_text(text)
|
227
|
+
text_blocks.each do |text_block|
|
228
|
+
next if text_block.start_with?('<') || text_block.empty?
|
229
|
+
message << text_block
|
230
|
+
end
|
231
|
+
cue_info = CueInfo.new(callsign)
|
232
|
+
cue_info.index = cue_index
|
233
|
+
cue_index += 1
|
234
|
+
cue_info.message = translated_msg(translate, message, src_lang, inferred_src_lang, target_lang)
|
235
|
+
cue_info.start = start_time
|
236
|
+
cue_info.end = end_time
|
237
|
+
cue_info.start_time_units = time_details(start_time, callsign)
|
238
|
+
cue_info.end_time_units = time_details(end_time, callsign)
|
239
|
+
write_cue(cue_info, file_map, index == (total_blocks - 1))
|
240
|
+
end
|
241
|
+
div_index += 1
|
242
|
+
end
|
243
|
+
ensure
|
244
|
+
xml_file.close if xml_file
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
118
248
|
private
|
119
249
|
|
250
|
+
def translated_msg(translate, message, src_lang, inferred_src_lang, target_lang)
|
251
|
+
return message unless translate
|
252
|
+
use_src = nil
|
253
|
+
if (src_lang.nil? || src_lang.empty?)
|
254
|
+
if inferred_src_lang.nil?
|
255
|
+
raise LangDetectionFailureException.new("Unable to deduce source lang for translation")
|
256
|
+
end
|
257
|
+
use_src = inferred_src_lang
|
258
|
+
else
|
259
|
+
use_src = src_lang
|
260
|
+
end
|
261
|
+
return message if use_src.eql?(target_lang)
|
262
|
+
@translator.translate(message, use_src, target_lang)
|
263
|
+
end
|
264
|
+
|
265
|
+
def matching_lang?(div, target_lang)
|
266
|
+
lang = div.attributes['lang'].value rescue nil
|
267
|
+
if lang.nil?
|
268
|
+
# Let's infer the lang
|
269
|
+
if @translator.nil?
|
270
|
+
raise StandardError.new("Cannot infer language as engine options are not provided")
|
271
|
+
end
|
272
|
+
reference_text = get_text(div, 100)
|
273
|
+
inferred_lang = @translator.infer_language(reference_text) rescue nil
|
274
|
+
if inferred_lang.nil?
|
275
|
+
raise LangDetectionFailureException.new("Failed to infer language for div block #{j} of caption file")
|
276
|
+
end
|
277
|
+
# Store this lang in the div
|
278
|
+
div.lang = inferred_lang
|
279
|
+
if inferred_lang.eql?(target_lang)
|
280
|
+
return true
|
281
|
+
end
|
282
|
+
elsif lang.eql?(target_lang)
|
283
|
+
return true
|
284
|
+
end
|
285
|
+
return false
|
286
|
+
end
|
287
|
+
|
120
288
|
#
|
121
289
|
# Method to segregate the data from markups as markups don't need
|
122
290
|
# translations.
|
data/lib/vtt.rb
CHANGED
@@ -104,10 +104,19 @@ class VTT
|
|
104
104
|
# Suffix output dir with File seperator
|
105
105
|
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
106
106
|
|
107
|
+
translate = false
|
108
|
+
if target_lang && !target_lang.empty?
|
109
|
+
translate = true
|
110
|
+
if @translator.nil?
|
111
|
+
raise StandardError.new("Cannot infer language as engine options are not provided")
|
112
|
+
end
|
113
|
+
end
|
107
114
|
# Prepare the output files for each type
|
108
115
|
file_map = {}
|
109
116
|
types.each do |type|
|
110
|
-
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
117
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
118
|
+
output_file << "_#{target_lang}" if translate
|
119
|
+
output_file << extension_from_type(type)
|
111
120
|
out_file = "#{output_dir}#{output_file}"
|
112
121
|
if create_file(TYPE_VTT, type, out_file, target_lang)
|
113
122
|
file_map[type] = out_file
|
@@ -135,7 +144,7 @@ class VTT
|
|
135
144
|
else
|
136
145
|
collect_msg = false
|
137
146
|
unless message.empty?
|
138
|
-
cue_info.message = message
|
147
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang)
|
139
148
|
write_cue(cue_info, file_map)
|
140
149
|
message = ""
|
141
150
|
cue_index += 1
|
@@ -152,11 +161,41 @@ class VTT
|
|
152
161
|
collect_msg = true
|
153
162
|
end
|
154
163
|
end
|
155
|
-
cue_info.message = message unless message.empty?
|
164
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty?
|
156
165
|
write_cue(cue_info, file_map, true)
|
157
166
|
end
|
158
167
|
|
159
|
-
private
|
168
|
+
private
|
169
|
+
|
170
|
+
#
|
171
|
+
# Method to translate a given text message based on following conditions
|
172
|
+
#
|
173
|
+
# * If translate is false, the message is returned as is
|
174
|
+
# * If +src_lang+ and +target_lang+ are same then the message is returned as is
|
175
|
+
# * If +src_lang+ is nil or empty then this caption file will be inspected to infer language
|
176
|
+
# and if it's same as target_lang, then again the message shall be returned as is
|
177
|
+
# * Otherwise, returns a translated text
|
178
|
+
#
|
179
|
+
# ==== Raise
|
180
|
+
# * LangDetectionFailureException - If failed to infer the language
|
181
|
+
#
|
182
|
+
def translated_msg(translate, message, src_lang, target_lang)
|
183
|
+
return message unless translate
|
184
|
+
use_src = nil
|
185
|
+
if (src_lang.nil? || src_lang.empty?)
|
186
|
+
# We don't need to infer again and again
|
187
|
+
begin
|
188
|
+
@inferred_src_lang ||= infer_languages.first
|
189
|
+
rescue StandardError => e
|
190
|
+
raise LangDetectionFailureException.new("Failed to infer language due to #{e.message}")
|
191
|
+
end
|
192
|
+
use_src = @inferred_src_lang
|
193
|
+
else
|
194
|
+
use_src = src_lang
|
195
|
+
end
|
196
|
+
return message if use_src.eql?(target_lang)
|
197
|
+
@translator.translate(message, use_src, target_lang)
|
198
|
+
end
|
160
199
|
|
161
200
|
#
|
162
201
|
# Method to get a minimal amount of key text that excludes any tags
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: subtitle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maheshwaran G
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-11-
|
12
|
+
date: 2019-11-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -95,8 +95,10 @@ dependencies:
|
|
95
95
|
- - ">="
|
96
96
|
- !ruby/object:Gem::Version
|
97
97
|
version: '0'
|
98
|
-
description: Subtitle gem helps you to detect language
|
99
|
-
to
|
98
|
+
description: " Subtitle gem helps you to detect the language(s)
|
99
|
+
of the caption file, translate closed caption \n to another
|
100
|
+
language and also supports transforming from one format to another. \n Say
|
101
|
+
for example from dfxp to srt or vtt or to all supported formats.\"\n"
|
100
102
|
email:
|
101
103
|
- pgmaheshwaran@gmail.com
|
102
104
|
- arunjeyaprasad@gmail.com
|