subtitle 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/allfather.rb +24 -7
- data/lib/dfxp.rb +15 -7
- data/lib/engines/aws.rb +2 -2
- data/lib/srt.rb +43 -3
- data/lib/subtitle.rb +3 -3
- data/lib/ttml.rb +172 -4
- data/lib/vtt.rb +43 -4
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd5a110529cb49076b699028d4aba501f92d1b9af517363ecf9d0066ca67bfcd
|
4
|
+
data.tar.gz: 6e499c8e56be6748699f1e9210eaa14c982eac3ea6c425460e75e45ee941a1dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 027ee941cfd582de1c98f7d7aae4a7673bcaf51b7af4cba9df0bef0f3fb16437ad90e760bd61adcea0a4487094d3f7ade675635aff69e40881b36240ec17253c
|
7
|
+
data.tar.gz: b0b3763969201e7f98883463292f59c9583b103251a14879853ae923a5a22259e2d677c175fefbfd25f45ecfe589eb157fec5d7b891364b30cb9714f8ec65e36
|
data/lib/allfather.rb
CHANGED
@@ -63,17 +63,18 @@ module AllFather
|
|
63
63
|
#
|
64
64
|
# * +translator+ - Instance of translation engine. Refer to `engines/aws` for example
|
65
65
|
#
|
66
|
+
# ==== Raises
|
67
|
+
# * `InvalidInputException` when the argument `translator` is not an instance of Translator class
|
68
|
+
#
|
66
69
|
def set_translator(translator)
|
67
70
|
if translator && !(translator.is_a? Translator)
|
68
|
-
raise "Argument is not an instance of Translator"
|
71
|
+
raise InvalidInputException.new("Argument is not an instance of Translator")
|
69
72
|
end
|
70
73
|
end
|
71
74
|
|
72
75
|
#
|
73
76
|
# Method to translate the caption from one language to another
|
74
77
|
#
|
75
|
-
# :args: src_lang, target_lang, output_file
|
76
|
-
#
|
77
78
|
# * +src_lang+ - can be inferred using #infer_language method
|
78
79
|
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
79
80
|
# * +output_file+ - Output file. Can be a fully qualified path or just file name
|
@@ -114,6 +115,9 @@ module AllFather
|
|
114
115
|
# If no target_lang is provided, no translations are applied. output_file is created using
|
115
116
|
# without any need for any language translation services. Hence doesn't incur any cost !!
|
116
117
|
#
|
118
|
+
# Note: +src_lang+ makes sense only for caption types that can hold multi lingual captions
|
119
|
+
# like dfxp and ttml. For other caption sources this field is ignored
|
120
|
+
#
|
117
121
|
# * +types+ - An array of Valid input caption type(s). Refer to `#CaptionType`
|
118
122
|
# * +src_lang+ - can be inferred using #infer_language method
|
119
123
|
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
@@ -140,18 +144,31 @@ module AllFather
|
|
140
144
|
raise InvalidInputException.new("SCC can be generated only in en. #{target_lang} is unsupported")
|
141
145
|
end
|
142
146
|
end
|
143
|
-
if target_lang && !target_lang.empty?
|
144
|
-
raise InvalidInputException.new("Translation to other language as part of transform is yet to be implemented")
|
145
|
-
end
|
146
147
|
end
|
147
148
|
|
148
149
|
#
|
149
150
|
# Method to report on the supported transformations. Each implementor is free to return
|
150
151
|
# the types to which it can convert itself to
|
151
152
|
#
|
152
|
-
# Returns
|
153
|
+
# ==== Returns
|
154
|
+
#
|
155
|
+
# * An array of one or more types defined as +TYPE_+ constants here
|
153
156
|
#
|
154
157
|
def supported_transformations
|
155
158
|
raise "Not Implemented. Class #{self.class.name} doesn't implement supported_transformations"
|
156
159
|
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# While the logic of abstracting stuff to callers has it's benefits, sometimes it's required
|
163
|
+
# to identify which instance are we specifically operate on. This method returns the instance
|
164
|
+
# currently being operated on and returns one of the +TYPE_+ constants defined here
|
165
|
+
# Implement this unless and absolutely it's necessary and there is no other easy way to do things
|
166
|
+
#
|
167
|
+
# ===== Returns
|
168
|
+
#
|
169
|
+
# * the call sign of the instance
|
170
|
+
#
|
171
|
+
def callsign
|
172
|
+
raise "Not Implemented. Class #{self.class.name} doesn't implement callsign"
|
173
|
+
end
|
157
174
|
end
|
data/lib/dfxp.rb
CHANGED
@@ -5,16 +5,17 @@ require_relative "ttml"
|
|
5
5
|
#
|
6
6
|
# Library to handle DFXP Files
|
7
7
|
#
|
8
|
-
#
|
9
|
-
#
|
8
|
+
# Extends the TTML Class as except for namespace differences there isn't
|
9
|
+
# much to call between ttml and dfxp
|
10
10
|
#
|
11
11
|
class DFXP < TTML
|
12
12
|
|
13
|
-
|
13
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_SRT, TYPE_VTT, TYPE_TTML]
|
14
|
+
|
15
|
+
def initialize(cc_file, opts=nil)
|
14
16
|
@cc_file = cc_file
|
15
|
-
|
16
|
-
|
17
|
-
raise "Invalid TTML file provided" unless is_valid?
|
17
|
+
@force_detect = opts ? (opts[:force_detect] || false) : false
|
18
|
+
raise "Invalid DFXP file provided" unless is_valid?
|
18
19
|
end
|
19
20
|
|
20
21
|
def is_valid?
|
@@ -26,5 +27,12 @@ class DFXP < TTML
|
|
26
27
|
# a well-formed XML. Another is to see if lang is available in each div
|
27
28
|
return false
|
28
29
|
end
|
29
|
-
|
30
|
+
|
31
|
+
def callsign
|
32
|
+
TYPE_DFXP
|
33
|
+
end
|
34
|
+
|
35
|
+
def supported_transformations
|
36
|
+
return SUPPORTED_TRANSFORMATIONS
|
37
|
+
end
|
30
38
|
end
|
data/lib/engines/aws.rb
CHANGED
@@ -11,8 +11,8 @@ require_relative 'translator'
|
|
11
11
|
#
|
12
12
|
# * [Arguments] - Pass the credentials access_key_id and secret_access_key as arguments
|
13
13
|
# * [Environment route] - AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY can be exposed as environment variables
|
14
|
-
# * [Profile Name] - The application uses the credentials of the system and picks the
|
15
|
-
#
|
14
|
+
# * [Profile Name] - The application uses the credentials of the system and picks the credentials
|
15
|
+
# referred to by the profile
|
16
16
|
#
|
17
17
|
class AwsEngine
|
18
18
|
include Translator
|
data/lib/srt.rb
CHANGED
@@ -94,11 +94,21 @@ class SRT
|
|
94
94
|
|
95
95
|
# Suffix output dir with File seperator
|
96
96
|
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
97
|
+
|
98
|
+
translate = false
|
99
|
+
if target_lang && !target_lang.empty?
|
100
|
+
translate = true
|
101
|
+
if @translator.nil?
|
102
|
+
raise StandardError.new("Cannot infer language as engine options are not provided")
|
103
|
+
end
|
104
|
+
end
|
97
105
|
|
98
106
|
# Prepare the output files for each type
|
99
107
|
file_map = {}
|
100
108
|
types.each do |type|
|
101
|
-
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
109
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
110
|
+
output_file << "_#{target_lang}" if translate
|
111
|
+
output_file << extension_from_type(type)
|
102
112
|
out_file = "#{output_dir}#{output_file}"
|
103
113
|
if create_file(TYPE_SRT, type, out_file, target_lang)
|
104
114
|
file_map[type] = out_file
|
@@ -119,7 +129,7 @@ class SRT
|
|
119
129
|
# This is not a time point
|
120
130
|
seq = line.strip
|
121
131
|
if seq.to_i > 0
|
122
|
-
cue_info.message = message unless message.empty?
|
132
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty?
|
123
133
|
write_cue(cue_info, file_map) if cue_info
|
124
134
|
cue_info = CueInfo.new(TYPE_SRT)
|
125
135
|
cue_info.sequence = seq
|
@@ -140,12 +150,42 @@ class SRT
|
|
140
150
|
cue_info.end_time_units = end_units
|
141
151
|
end
|
142
152
|
end
|
143
|
-
cue_info.message = message unless message.empty?
|
153
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty?
|
144
154
|
write_cue(cue_info, file_map, true)
|
145
155
|
end
|
146
156
|
|
147
157
|
private
|
148
158
|
|
159
|
+
#
|
160
|
+
# Method to translate a given text message based on following conditions
|
161
|
+
#
|
162
|
+
# * If translate is false, the message is returned as is
|
163
|
+
# * If +src_lang+ and +target_lang+ are same then the message is returned as is
|
164
|
+
# * If +src_lang+ is nil or empty then this caption file will be inspected to infer language
|
165
|
+
# and if it's same as target_lang, then again the message shall be returned as is
|
166
|
+
# * Otherwise, returns a translated text
|
167
|
+
#
|
168
|
+
# ==== Raise
|
169
|
+
# * LangDetectionFailureException - If failed to infer the language
|
170
|
+
#
|
171
|
+
def translated_msg(translate, message, src_lang, target_lang)
|
172
|
+
return message unless translate
|
173
|
+
use_src = nil
|
174
|
+
if (src_lang.nil? || src_lang.empty?)
|
175
|
+
# We don't need to infer again and again
|
176
|
+
begin
|
177
|
+
@inferred_src_lang ||= infer_languages.first
|
178
|
+
rescue StandardError => e
|
179
|
+
raise LangDetectionFailureException.new("Failed to infer language due to #{e.message}")
|
180
|
+
end
|
181
|
+
use_src = @inferred_src_lang
|
182
|
+
else
|
183
|
+
use_src = src_lang
|
184
|
+
end
|
185
|
+
return message if use_src.eql?(target_lang)
|
186
|
+
@translator.translate(message, use_src, target_lang)
|
187
|
+
end
|
188
|
+
|
149
189
|
#
|
150
190
|
# Method to get a minimal amount of key text that excludes any tags
|
151
191
|
# or control information for the engine to meaninfully and
|
data/lib/subtitle.rb
CHANGED
@@ -58,7 +58,7 @@ class Subtitle
|
|
58
58
|
end
|
59
59
|
# Translator not required if target_lang is nil
|
60
60
|
if @handler.nil?
|
61
|
-
if target_lang.nil?
|
61
|
+
if target_lang.nil? && src_lang.nil?
|
62
62
|
@handler = get_caption_handler(options, nil)
|
63
63
|
else
|
64
64
|
initialize_handler(options)
|
@@ -146,9 +146,9 @@ class Subtitle
|
|
146
146
|
when ".vtt"
|
147
147
|
handler = VTT.new(caption_file)
|
148
148
|
when ".ttml"
|
149
|
-
handler = TTML.new(caption_file)
|
149
|
+
handler = TTML.new(caption_file, options)
|
150
150
|
when ".dfxp"
|
151
|
-
handler = DFXP.new(caption_file)
|
151
|
+
handler = DFXP.new(caption_file, options)
|
152
152
|
else
|
153
153
|
raise "Cannot handle file type .#{extension}"
|
154
154
|
end
|
data/lib/ttml.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
require "nokogiri"
|
@@ -12,12 +14,20 @@ require "nokogiri"
|
|
12
14
|
class TTML
|
13
15
|
|
14
16
|
include AllFather
|
17
|
+
include CommonUtils
|
15
18
|
|
16
|
-
|
19
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_SRT, TYPE_VTT, TYPE_DFXP]
|
20
|
+
|
21
|
+
def initialize(cc_file, opts=nil)
|
17
22
|
@cc_file = cc_file
|
23
|
+
@force_detect = opts ? (opts[:force_detect] || false) : false
|
18
24
|
raise "Invalid TTML file provided" unless is_valid?
|
19
25
|
end
|
20
26
|
|
27
|
+
def callsign
|
28
|
+
TYPE_TTML
|
29
|
+
end
|
30
|
+
|
21
31
|
def is_valid?
|
22
32
|
# Do any VTT specific validations here
|
23
33
|
if @cc_file =~ /^.*\.(ttml)$/
|
@@ -33,12 +43,12 @@ class TTML
|
|
33
43
|
end
|
34
44
|
|
35
45
|
def infer_languages
|
36
|
-
force_detect = false
|
37
46
|
lang = []
|
38
47
|
begin
|
39
48
|
xml_file = File.open(@cc_file)
|
40
49
|
xml_doc = Nokogiri::XML(xml_file)
|
41
50
|
div_objects = xml_doc.css("/tt/body/div")
|
51
|
+
local_force_detect = false
|
42
52
|
div_objects.each_with_index do |div, index|
|
43
53
|
# By default, return the lang if specified in the div and
|
44
54
|
# force detect is false
|
@@ -46,9 +56,10 @@ class TTML
|
|
46
56
|
if inferred_lang.nil?
|
47
57
|
# If lang is not provided in the caption, then override
|
48
58
|
# force detect for inferrence
|
49
|
-
|
59
|
+
local_force_detect = true
|
50
60
|
end
|
51
|
-
if force_detect
|
61
|
+
if @force_detect || local_force_detect
|
62
|
+
local_force_detect = false
|
52
63
|
sample_text = get_text(div, 100)
|
53
64
|
inferred_lang = @translator.infer_language(sample_text) rescue nil
|
54
65
|
if inferred_lang.nil?
|
@@ -115,8 +126,165 @@ class TTML
|
|
115
126
|
out_file
|
116
127
|
end
|
117
128
|
|
129
|
+
def supported_transformations
|
130
|
+
return SUPPORTED_TRANSFORMATIONS
|
131
|
+
end
|
132
|
+
|
133
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
134
|
+
# Let's start off with some validations
|
135
|
+
super(types, src_lang, target_lang, output_dir)
|
136
|
+
|
137
|
+
# Suffix output dir with File seperator
|
138
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
139
|
+
|
140
|
+
begin
|
141
|
+
xml_file = File.open(@cc_file, 'r')
|
142
|
+
xml_doc = Nokogiri::XML(xml_file)
|
143
|
+
div_objects = xml_doc.css("/tt/body/div")
|
144
|
+
langs = div_objects.map {|div| div.attributes['lang'].value rescue nil}
|
145
|
+
translate = false
|
146
|
+
matching_divs = []
|
147
|
+
inferred_src_lang = nil
|
148
|
+
if src_lang.nil? || src_lang.empty?
|
149
|
+
if target_lang && !target_lang.empty?
|
150
|
+
# Find if any of our div matches this. Else pick first and translate to target lang
|
151
|
+
div_objects.each_with_index do |div, j|
|
152
|
+
if matching_lang?(div, target_lang)
|
153
|
+
matching_divs << div
|
154
|
+
break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
if matching_divs.empty?
|
158
|
+
# Let's pick the first div for target translation
|
159
|
+
selected_div = div_objects.first
|
160
|
+
inferred_src_lang = selected_div.lang
|
161
|
+
matching_divs << selected_div
|
162
|
+
translate = true
|
163
|
+
end
|
164
|
+
else
|
165
|
+
# Then we will have to create output files for each lang
|
166
|
+
matching_divs = div_objects
|
167
|
+
end
|
168
|
+
else
|
169
|
+
# Find the matching lang div and create the outputs
|
170
|
+
available_divs = langs.select { |lang| lang.eql?(src_lang) }
|
171
|
+
if available_divs.length > 1
|
172
|
+
raise InvalidInputException.new("More than one section in Caption file specifies lang as #{src_lang}. This file is unsupported")
|
173
|
+
end
|
174
|
+
div_objects.each_with_index do |div, j|
|
175
|
+
if matching_lang?(div, src_lang)
|
176
|
+
matching_divs << div
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
if matching_divs.empty?
|
181
|
+
raise InvalidInputException.new("Given Caption file #{@cc_file} doesn't contain #{src_lang} lang. Available langs are #{langs}")
|
182
|
+
end
|
183
|
+
if matching_divs.length > 1
|
184
|
+
raise InvalidInputException.new("More than one section in Caption file specifies lang as #{src_lang}. This file is unsupported")
|
185
|
+
end
|
186
|
+
if target_lang && !target_lang.empty? && !src_lang.eql?(target_lang)
|
187
|
+
translate = true
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
div_index = 1
|
192
|
+
multiple_outputs = matching_divs.size > 1
|
193
|
+
matching_divs.each do |div|
|
194
|
+
div_lang = div.attributes['lang'].value rescue nil
|
195
|
+
# Override div lang if translate is required
|
196
|
+
div_lang = target_lang if translate
|
197
|
+
file_map = {}
|
198
|
+
# Prepare the output files for each type and for each lang in the file
|
199
|
+
types.each do |type|
|
200
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
201
|
+
# Suffix div index when multiple outputs are created
|
202
|
+
output_file << "_#{div_index}" if multiple_outputs
|
203
|
+
if target_lang.nil? && !src_lang.nil?
|
204
|
+
output_file << "_#{src_lang}"
|
205
|
+
end
|
206
|
+
# Suffix lang to filename if provideds
|
207
|
+
if target_lang && !target_lang.empty?
|
208
|
+
output_file << "_#{target_lang}"
|
209
|
+
end
|
210
|
+
output_file << extension_from_type(type)
|
211
|
+
out_file = "#{output_dir}#{output_file}"
|
212
|
+
if create_file(TYPE_TTML, type, out_file, div_lang)
|
213
|
+
file_map[type] = out_file
|
214
|
+
else
|
215
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
blocks = div.css("p")
|
219
|
+
cue_index = 1
|
220
|
+
total_blocks = blocks.size
|
221
|
+
blocks.each_with_index do |block, index|
|
222
|
+
start_time = block.attributes['begin'].value
|
223
|
+
end_time = block.attributes['end'].value
|
224
|
+
text = block.inner_html.strip.gsub(/(\s){2,}/, '')
|
225
|
+
message = ""
|
226
|
+
text_blocks = get_block_text(text)
|
227
|
+
text_blocks.each do |text_block|
|
228
|
+
next if text_block.start_with?('<') || text_block.empty?
|
229
|
+
message << text_block
|
230
|
+
end
|
231
|
+
cue_info = CueInfo.new(callsign)
|
232
|
+
cue_info.index = cue_index
|
233
|
+
cue_index += 1
|
234
|
+
cue_info.message = translated_msg(translate, message, src_lang, inferred_src_lang, target_lang)
|
235
|
+
cue_info.start = start_time
|
236
|
+
cue_info.end = end_time
|
237
|
+
cue_info.start_time_units = time_details(start_time, callsign)
|
238
|
+
cue_info.end_time_units = time_details(end_time, callsign)
|
239
|
+
write_cue(cue_info, file_map, index == (total_blocks - 1))
|
240
|
+
end
|
241
|
+
div_index += 1
|
242
|
+
end
|
243
|
+
ensure
|
244
|
+
xml_file.close if xml_file
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
118
248
|
private
|
119
249
|
|
250
|
+
def translated_msg(translate, message, src_lang, inferred_src_lang, target_lang)
|
251
|
+
return message unless translate
|
252
|
+
use_src = nil
|
253
|
+
if (src_lang.nil? || src_lang.empty?)
|
254
|
+
if inferred_src_lang.nil?
|
255
|
+
raise LangDetectionFailureException.new("Unable to deduce source lang for translation")
|
256
|
+
end
|
257
|
+
use_src = inferred_src_lang
|
258
|
+
else
|
259
|
+
use_src = src_lang
|
260
|
+
end
|
261
|
+
return message if use_src.eql?(target_lang)
|
262
|
+
@translator.translate(message, use_src, target_lang)
|
263
|
+
end
|
264
|
+
|
265
|
+
def matching_lang?(div, target_lang)
|
266
|
+
lang = div.attributes['lang'].value rescue nil
|
267
|
+
if lang.nil?
|
268
|
+
# Let's infer the lang
|
269
|
+
if @translator.nil?
|
270
|
+
raise StandardError.new("Cannot infer language as engine options are not provided")
|
271
|
+
end
|
272
|
+
reference_text = get_text(div, 100)
|
273
|
+
inferred_lang = @translator.infer_language(reference_text) rescue nil
|
274
|
+
if inferred_lang.nil?
|
275
|
+
raise LangDetectionFailureException.new("Failed to infer language for div block #{j} of caption file")
|
276
|
+
end
|
277
|
+
# Store this lang in the div
|
278
|
+
div.lang = inferred_lang
|
279
|
+
if inferred_lang.eql?(target_lang)
|
280
|
+
return true
|
281
|
+
end
|
282
|
+
elsif lang.eql?(target_lang)
|
283
|
+
return true
|
284
|
+
end
|
285
|
+
return false
|
286
|
+
end
|
287
|
+
|
120
288
|
#
|
121
289
|
# Method to segregate the data from markups as markups don't need
|
122
290
|
# translations.
|
data/lib/vtt.rb
CHANGED
@@ -104,10 +104,19 @@ class VTT
|
|
104
104
|
# Suffix output dir with File seperator
|
105
105
|
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
106
106
|
|
107
|
+
translate = false
|
108
|
+
if target_lang && !target_lang.empty?
|
109
|
+
translate = true
|
110
|
+
if @translator.nil?
|
111
|
+
raise StandardError.new("Cannot infer language as engine options are not provided")
|
112
|
+
end
|
113
|
+
end
|
107
114
|
# Prepare the output files for each type
|
108
115
|
file_map = {}
|
109
116
|
types.each do |type|
|
110
|
-
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
117
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file))
|
118
|
+
output_file << "_#{target_lang}" if translate
|
119
|
+
output_file << extension_from_type(type)
|
111
120
|
out_file = "#{output_dir}#{output_file}"
|
112
121
|
if create_file(TYPE_VTT, type, out_file, target_lang)
|
113
122
|
file_map[type] = out_file
|
@@ -135,7 +144,7 @@ class VTT
|
|
135
144
|
else
|
136
145
|
collect_msg = false
|
137
146
|
unless message.empty?
|
138
|
-
cue_info.message = message
|
147
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang)
|
139
148
|
write_cue(cue_info, file_map)
|
140
149
|
message = ""
|
141
150
|
cue_index += 1
|
@@ -152,11 +161,41 @@ class VTT
|
|
152
161
|
collect_msg = true
|
153
162
|
end
|
154
163
|
end
|
155
|
-
cue_info.message = message unless message.empty?
|
164
|
+
cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty?
|
156
165
|
write_cue(cue_info, file_map, true)
|
157
166
|
end
|
158
167
|
|
159
|
-
private
|
168
|
+
private
|
169
|
+
|
170
|
+
#
|
171
|
+
# Method to translate a given text message based on following conditions
|
172
|
+
#
|
173
|
+
# * If translate is false, the message is returned as is
|
174
|
+
# * If +src_lang+ and +target_lang+ are same then the message is returned as is
|
175
|
+
# * If +src_lang+ is nil or empty then this caption file will be inspected to infer language
|
176
|
+
# and if it's same as target_lang, then again the message shall be returned as is
|
177
|
+
# * Otherwise, returns a translated text
|
178
|
+
#
|
179
|
+
# ==== Raise
|
180
|
+
# * LangDetectionFailureException - If failed to infer the language
|
181
|
+
#
|
182
|
+
def translated_msg(translate, message, src_lang, target_lang)
|
183
|
+
return message unless translate
|
184
|
+
use_src = nil
|
185
|
+
if (src_lang.nil? || src_lang.empty?)
|
186
|
+
# We don't need to infer again and again
|
187
|
+
begin
|
188
|
+
@inferred_src_lang ||= infer_languages.first
|
189
|
+
rescue StandardError => e
|
190
|
+
raise LangDetectionFailureException.new("Failed to infer language due to #{e.message}")
|
191
|
+
end
|
192
|
+
use_src = @inferred_src_lang
|
193
|
+
else
|
194
|
+
use_src = src_lang
|
195
|
+
end
|
196
|
+
return message if use_src.eql?(target_lang)
|
197
|
+
@translator.translate(message, use_src, target_lang)
|
198
|
+
end
|
160
199
|
|
161
200
|
#
|
162
201
|
# Method to get a minimal amount of key text that excludes any tags
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: subtitle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maheshwaran G
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-11-
|
12
|
+
date: 2019-11-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -95,8 +95,10 @@ dependencies:
|
|
95
95
|
- - ">="
|
96
96
|
- !ruby/object:Gem::Version
|
97
97
|
version: '0'
|
98
|
-
description: Subtitle gem helps you to detect language
|
99
|
-
to
|
98
|
+
description: " Subtitle gem helps you to detect the language(s)
|
99
|
+
of the caption file, translate closed caption \n to another
|
100
|
+
language and also supports transforming from one format to another. \n Say
|
101
|
+
for example from dfxp to srt or vtt or to all supported formats.\"\n"
|
100
102
|
email:
|
101
103
|
- pgmaheshwaran@gmail.com
|
102
104
|
- arunjeyaprasad@gmail.com
|