subtitle 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/allfather.rb +75 -1
- data/lib/dfxp.rb +3 -3
- data/lib/engines/aws.rb +3 -9
- data/lib/scc.rb +76 -18
- data/lib/srt.rb +72 -4
- data/lib/subtitle.rb +55 -13
- data/lib/ttml.rb +8 -5
- data/lib/utils/common_utils.rb +329 -0
- data/lib/utils/cue_info.rb +40 -0
- data/lib/vtt.rb +74 -3
- metadata +32 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49e45fb2713aedd5d6d7d6d290fe4874a292df3249cfad7259913e90b0cb7fd8
|
4
|
+
data.tar.gz: 7f4535875a19028db4ec08de90903daba7b906b659571c5a921850071bf3154c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b53144f0a627a545c0a989d664f3611078a7afc11e9f00b479065a0d3a1b2bc9bf68e10706bd89b85c0e73ff53d7c4627a2c5e29d38867ec2882c99ea56eda0
|
7
|
+
data.tar.gz: 6999ae152b2f5904a2061944522b11387280df6e02cbf4d36d7d5ae27ba12eb3b6c36357bba791c5a7b4f06575531ce76df904dcbb76b0e5f79bd62b05988704
|
data/lib/allfather.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require_relative "engines/translator"
|
3
|
+
|
1
4
|
#
|
2
5
|
# A Module that kind of acts as an interface where the generic methods
|
3
6
|
# that applies to each caption type can be defined
|
@@ -12,6 +15,15 @@ module AllFather
|
|
12
15
|
#
|
13
16
|
VALID_FILES = [".scc", ".srt", ".vtt", ".ttml", ".dfxp"]
|
14
17
|
|
18
|
+
#
|
19
|
+
# Caption type constants
|
20
|
+
#
|
21
|
+
TYPE_SCC = 1
|
22
|
+
TYPE_SRT = 2
|
23
|
+
TYPE_VTT = 3
|
24
|
+
TYPE_TTML = 4
|
25
|
+
TYPE_DFXP = 5
|
26
|
+
|
15
27
|
#
|
16
28
|
# Generic exception class that is raised for validation errors
|
17
29
|
#
|
@@ -45,12 +57,23 @@ module AllFather
|
|
45
57
|
raise "Not Implemented. Class #{self.class.name} doesn't implement infer_languages"
|
46
58
|
end
|
47
59
|
|
60
|
+
|
61
|
+
#
|
62
|
+
# Method to set a translation engine
|
63
|
+
#
|
64
|
+
# * +translator+ - Instance of translation engine. Refer to `engines/aws` for example
|
65
|
+
#
|
66
|
+
def set_translator(translator)
|
67
|
+
if translator && !(translator.is_a? Translator)
|
68
|
+
raise "Argument is not an instance of Translator"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
48
72
|
#
|
49
73
|
# Method to translate the caption from one language to another
|
50
74
|
#
|
51
75
|
# :args: src_lang, target_lang, output_file
|
52
76
|
#
|
53
|
-
# * +input_caption+ - A Valid input caption file. Refer to #is_valid?
|
54
77
|
# * +src_lang+ - can be inferred using #infer_language method
|
55
78
|
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
56
79
|
# * +output_file+ - Output file. Can be a fully qualified path or just file name
|
@@ -80,4 +103,55 @@ module AllFather
|
|
80
103
|
# Further checks can be done only in caption specific implementations
|
81
104
|
# or translation engine specific implementation
|
82
105
|
end
|
106
|
+
|
107
|
+
#
|
108
|
+
# Method to convert from one caption type to other types. If the src_lang is not provided
|
109
|
+
# then all source languages will be converted to target types. For example, if a ttml file
|
110
|
+
# has "en" and "es" and target_type is vtt and no src_lang is provided 2 vtt files would be
|
111
|
+
# created one per language in the source. if a target_lang is provided then one of the lang
|
112
|
+
# from source would be picked for creating the output file with target_lang
|
113
|
+
#
|
114
|
+
# If no target_lang is provided, no translations are applied. output_file is created using
|
115
|
+
# without any need for any language translation services. Hence doesn't incur any cost !!
|
116
|
+
#
|
117
|
+
# * +types+ - An array of Valid input caption type(s). Refer to `#CaptionType`
|
118
|
+
# * +src_lang+ - can be inferred using #infer_language method
|
119
|
+
# * +target_lang+ - Target 2 letter ISO language code to which the source needs to be translated in to.
|
120
|
+
# * +output_dir+ - Output Directory. Generated files would be dumped here
|
121
|
+
#
|
122
|
+
# ==== Raises
|
123
|
+
#
|
124
|
+
# InvalidInputException shall be raised if
|
125
|
+
# 1. The input file doesn't exist or is unreadable or is invalid caption
|
126
|
+
# 2. The output dir doesn't exist
|
127
|
+
# 3. Invalid lang codes for a given caption type
|
128
|
+
# 4. Unsupported type to which conversion is requested for
|
129
|
+
#
|
130
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
131
|
+
if (types - supported_transformations).size != 0
|
132
|
+
raise InvalidInputException.new("Unknown types provided for conversion in input #{types}")
|
133
|
+
end
|
134
|
+
unless File.directory?(output_dir)
|
135
|
+
FileUtils.mkdir_p(output_dir)
|
136
|
+
end
|
137
|
+
# Basic validations
|
138
|
+
if types.include?(TYPE_SCC)
|
139
|
+
if target_lang && !target_lang.eql?("en")
|
140
|
+
raise InvalidInputException.new("SCC can be generated only in en. #{target_lang} is unsupported")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
if target_lang && !target_lang.empty?
|
144
|
+
raise InvalidInputException.new("Translation to other language as part of transform is yet to be implemented")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# Method to report on the supported transformations. Each implementor is free to return
|
150
|
+
# the types to which it can convert itself to
|
151
|
+
#
|
152
|
+
# Returns an array of one or more types defined as +TYPE_+ constants here
|
153
|
+
#
|
154
|
+
def supported_transformations
|
155
|
+
raise "Not Implemented. Class #{self.class.name} doesn't implement supported_transformations"
|
156
|
+
end
|
83
157
|
end
|
data/lib/dfxp.rb
CHANGED
@@ -10,10 +10,10 @@ require_relative "ttml"
|
|
10
10
|
#
|
11
11
|
class DFXP < TTML
|
12
12
|
|
13
|
-
def initialize(cc_file
|
13
|
+
def initialize(cc_file)
|
14
14
|
@cc_file = cc_file
|
15
|
-
|
16
|
-
|
15
|
+
#@translator = translator
|
16
|
+
#@force_detect = opts[:force_detect] || false
|
17
17
|
raise "Invalid TTML file provided" unless is_valid?
|
18
18
|
end
|
19
19
|
|
data/lib/engines/aws.rb
CHANGED
@@ -10,10 +10,9 @@ require_relative 'translator'
|
|
10
10
|
# == Credential Referencing Order
|
11
11
|
#
|
12
12
|
# * [Arguments] - Pass the credentials access_key_id and secret_access_key as arguments
|
13
|
-
# * [Environment route] - AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY can be exposed as
|
14
|
-
# environment variables
|
13
|
+
# * [Environment route] - AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY can be exposed as environment variables
|
15
14
|
# * [Profile Name] - The application uses the credentials of the system and picks the
|
16
|
-
#
|
15
|
+
# credentials referred to by the profile
|
17
16
|
#
|
18
17
|
class AwsEngine
|
19
18
|
include Translator
|
@@ -71,9 +70,6 @@ class AwsEngine
|
|
71
70
|
# Invokes the language detection API of AWS and returns only the language
|
72
71
|
# of the highest score and returns the ISO 639-1 code
|
73
72
|
#
|
74
|
-
# :args: text
|
75
|
-
#
|
76
|
-
# ===== Arguments
|
77
73
|
# * +text+ - The text for which the language is to be inferred
|
78
74
|
#
|
79
75
|
def infer_language(text)
|
@@ -83,12 +79,10 @@ class AwsEngine
|
|
83
79
|
|
84
80
|
#
|
85
81
|
# Invokes the translation API of AWS and returns the translated text
|
86
|
-
# as per the arguments provided
|
82
|
+
# as per the arguments provided.
|
87
83
|
# Will Raise exception if a translation cannot be made between the source
|
88
84
|
# and target language codes or if the lang code is invalid
|
89
85
|
#
|
90
|
-
# :args: input_text, src_lang, target_lang
|
91
|
-
#
|
92
86
|
# * +input_text+ - The text that needs to be translated
|
93
87
|
# * +src_lang+ - The source language of the text
|
94
88
|
# * +target_lang+ - The target language to which the input_text needs to be translated to
|
data/lib/scc.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
#
|
@@ -10,10 +12,12 @@ require_relative "allfather"
|
|
10
12
|
class SCC
|
11
13
|
|
12
14
|
include AllFather
|
15
|
+
include CommonUtils
|
13
16
|
|
14
|
-
|
17
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SRT, TYPE_VTT, TYPE_TTML, TYPE_DFXP]
|
18
|
+
|
19
|
+
def initialize(cc_file)
|
15
20
|
@cc_file = cc_file
|
16
|
-
@translator = translator
|
17
21
|
raise "Invalid SCC file provided" unless is_valid?
|
18
22
|
end
|
19
23
|
|
@@ -25,6 +29,11 @@ class SCC
|
|
25
29
|
return false
|
26
30
|
end
|
27
31
|
|
32
|
+
def set_translator(translator)
|
33
|
+
super(translator)
|
34
|
+
@translator = translator
|
35
|
+
end
|
36
|
+
|
28
37
|
def infer_languages
|
29
38
|
lang = nil
|
30
39
|
begin
|
@@ -40,6 +49,71 @@ class SCC
|
|
40
49
|
raise "Not Implemented. Class #{self.class.name} doesn't implement translate yet !!"
|
41
50
|
end
|
42
51
|
|
52
|
+
def supported_transformations
|
53
|
+
return SUPPORTED_TRANSFORMATIONS
|
54
|
+
end
|
55
|
+
|
56
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
57
|
+
# Let's start off with some validations
|
58
|
+
super(types, src_lang, target_lang, output_dir)
|
59
|
+
|
60
|
+
# Suffix output dir with File seperator
|
61
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
62
|
+
|
63
|
+
# Prepare the output files for each type
|
64
|
+
file_map = {}
|
65
|
+
types.each do |type|
|
66
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file)) + extension_from_type(type)
|
67
|
+
out_file = "#{output_dir}#{output_file}"
|
68
|
+
if create_file(TYPE_SCC, type, out_file, target_lang)
|
69
|
+
file_map[type] = out_file
|
70
|
+
else
|
71
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Read the file and prepare the cue model
|
76
|
+
prev_cue_info = cur_cue_info = nil
|
77
|
+
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
78
|
+
cue_index = 1
|
79
|
+
ccfile.each_line do | line |
|
80
|
+
time_point = line.scan(/(^\d\d:\d\d:\d\d:\d\d\s)(.*)/)
|
81
|
+
unless time_point.empty?
|
82
|
+
scc_text_code = time_point[0][1].strip
|
83
|
+
message = decode(scc_text_code)
|
84
|
+
# Replace \u0000 with empty as this causes the ttml / dfxp outputs
|
85
|
+
# to treat them as end and terminates the xml the moment this is encountered
|
86
|
+
# https://github.com/sparklemotion/nokogiri/issues/1535
|
87
|
+
message = message.gsub(/\u0000/, '')
|
88
|
+
if prev_cue_info.nil?
|
89
|
+
prev_cue_info = CueInfo.new(TYPE_SCC)
|
90
|
+
prev_cue_info.index = cue_index
|
91
|
+
prev_cue_info.message = message
|
92
|
+
prev_cue_info.start = time_point[0][0].strip
|
93
|
+
else
|
94
|
+
cur_cue_info = CueInfo.new(TYPE_SCC)
|
95
|
+
cur_cue_info.index = cue_index
|
96
|
+
cur_cue_info.message = message
|
97
|
+
cur_cue_info.start = time_point[0][0].strip
|
98
|
+
# Set the previous cue info's end time to current cue's start time
|
99
|
+
# TODO: Need to see if we need to reduce alteast 1 fps or 1s
|
100
|
+
prev_cue_info.end = cur_cue_info.start
|
101
|
+
prev_cue_info.start_time_units = time_details(prev_cue_info.start, TYPE_SCC)
|
102
|
+
prev_cue_info.end_time_units = time_details(prev_cue_info.end, TYPE_SCC)
|
103
|
+
write_cue(prev_cue_info, file_map)
|
104
|
+
prev_cue_info = cur_cue_info
|
105
|
+
end
|
106
|
+
cue_index += 1
|
107
|
+
end
|
108
|
+
end
|
109
|
+
# we need to set some end time, but don't know the same !!
|
110
|
+
# for now setting the start time itself
|
111
|
+
cur_cue_info.end = cur_cue_info.start
|
112
|
+
cur_cue_info.start_time_units = time_details(cur_cue_info.start, TYPE_SCC)
|
113
|
+
cur_cue_info.end_time_units = time_details(cur_cue_info.end, TYPE_SCC)
|
114
|
+
write_cue(cur_cue_info, file_map, true)
|
115
|
+
end
|
116
|
+
|
43
117
|
private
|
44
118
|
|
45
119
|
def get_text(srt_file, num_chars)
|
@@ -78,20 +152,4 @@ class SCC
|
|
78
152
|
end
|
79
153
|
decoded_text
|
80
154
|
end
|
81
|
-
|
82
|
-
def encode(free_text)
|
83
|
-
encoded_str = ""
|
84
|
-
count = 0
|
85
|
-
free_text.each_byte do |char|
|
86
|
-
count += 1
|
87
|
-
binval = char.to_s(2).count("1") % 2 == 0 ? (char.to_i | 128 ).to_s(2) : char.to_s(2)
|
88
|
-
encode_char = binval.to_i(2).to_s(16)
|
89
|
-
if ((count > 0) && (count % 2 == 0))
|
90
|
-
encoded_str << encode_char << " "
|
91
|
-
else
|
92
|
-
encoded_str << encode_char
|
93
|
-
end
|
94
|
-
end
|
95
|
-
encoded_str
|
96
|
-
end
|
97
155
|
end
|
data/lib/srt.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
#
|
@@ -10,10 +12,12 @@ require_relative "allfather"
|
|
10
12
|
class SRT
|
11
13
|
|
12
14
|
include AllFather
|
15
|
+
include CommonUtils
|
13
16
|
|
14
|
-
|
17
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_VTT, TYPE_TTML, TYPE_DFXP]
|
18
|
+
|
19
|
+
def initialize(cc_file)
|
15
20
|
@cc_file = cc_file
|
16
|
-
@translator = translator
|
17
21
|
raise "Invalid SRT file provided" unless is_valid?
|
18
22
|
end
|
19
23
|
|
@@ -25,6 +29,11 @@ class SRT
|
|
25
29
|
return false
|
26
30
|
end
|
27
31
|
|
32
|
+
def set_translator(translator)
|
33
|
+
super(translator)
|
34
|
+
@translator = translator
|
35
|
+
end
|
36
|
+
|
28
37
|
def translate(src_lang, dest_lang, out_file)
|
29
38
|
super(src_lang, dest_lang, out_file)
|
30
39
|
begin
|
@@ -60,7 +69,6 @@ class SRT
|
|
60
69
|
outfile.puts
|
61
70
|
end
|
62
71
|
ensure
|
63
|
-
ccfile.close rescue nil
|
64
72
|
outfile.close
|
65
73
|
end
|
66
74
|
end
|
@@ -76,6 +84,66 @@ class SRT
|
|
76
84
|
[lang]
|
77
85
|
end
|
78
86
|
|
87
|
+
def supported_transformations
|
88
|
+
return SUPPORTED_TRANSFORMATIONS
|
89
|
+
end
|
90
|
+
|
91
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
92
|
+
# Let's start off with some validations
|
93
|
+
super(types, src_lang, target_lang, output_dir)
|
94
|
+
|
95
|
+
# Suffix output dir with File seperator
|
96
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
97
|
+
|
98
|
+
# Prepare the output files for each type
|
99
|
+
file_map = {}
|
100
|
+
types.each do |type|
|
101
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file)) + extension_from_type(type)
|
102
|
+
out_file = "#{output_dir}#{output_file}"
|
103
|
+
if create_file(TYPE_SRT, type, out_file, target_lang)
|
104
|
+
file_map[type] = out_file
|
105
|
+
else
|
106
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Read the file and prepare the cue model
|
111
|
+
cue_info = nil
|
112
|
+
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
113
|
+
message = ""
|
114
|
+
ccfile.each_line do | line |
|
115
|
+
# p line
|
116
|
+
next if line.strip.empty?
|
117
|
+
time_points = line.scan(/^((\d\d:)\d\d:\d\d[,.]\d\d\d.*)-->.*((\d\d:)\d\d:\d\d[,.]\d\d\d)/)
|
118
|
+
if time_points.empty?
|
119
|
+
# This is not a time point
|
120
|
+
seq = line.strip
|
121
|
+
if seq.to_i > 0
|
122
|
+
cue_info.message = message unless message.empty?
|
123
|
+
write_cue(cue_info, file_map) if cue_info
|
124
|
+
cue_info = CueInfo.new(TYPE_SRT)
|
125
|
+
cue_info.sequence = seq
|
126
|
+
# Reset the message
|
127
|
+
message = ""
|
128
|
+
else
|
129
|
+
# This is not a sequence number nor it's timepoints
|
130
|
+
# Grab the details until we find next cue point
|
131
|
+
message << line
|
132
|
+
end
|
133
|
+
else
|
134
|
+
# This is a cue point. Fetch timestamps
|
135
|
+
cue_info.start = time_points[0][0]
|
136
|
+
cue_info.end = time_points[0][2]
|
137
|
+
start_units = time_details(cue_info.start, TYPE_SRT)
|
138
|
+
end_units = time_details(cue_info.end, TYPE_SRT)
|
139
|
+
cue_info.start_time_units = start_units
|
140
|
+
cue_info.end_time_units = end_units
|
141
|
+
end
|
142
|
+
end
|
143
|
+
cue_info.message = message unless message.empty?
|
144
|
+
write_cue(cue_info, file_map, true)
|
145
|
+
end
|
146
|
+
|
79
147
|
private
|
80
148
|
|
81
149
|
#
|
@@ -103,4 +171,4 @@ class SRT
|
|
103
171
|
end
|
104
172
|
return text_sample[0, num_chars]
|
105
173
|
end
|
106
|
-
end
|
174
|
+
end
|
data/lib/subtitle.rb
CHANGED
@@ -7,21 +7,29 @@ require_relative "allfather"
|
|
7
7
|
require_relative "engines/translator"
|
8
8
|
require_relative "engines/aws"
|
9
9
|
|
10
|
-
|
10
|
+
#
|
11
|
+
# Facade that wraps all the complexities surrounding which translation
|
12
|
+
# engine to use or which caption instances to be instantiated.
|
13
|
+
#
|
11
14
|
class Subtitle
|
12
|
-
|
15
|
+
|
16
|
+
TYPE_MAP = {"scc" => AllFather::TYPE_SCC, "srt" => AllFather::TYPE_SRT, "vtt" => AllFather::TYPE_VTT,
|
17
|
+
"ttml" => AllFather::TYPE_TTML, "dfxp" => AllFather::TYPE_DFXP}
|
18
|
+
|
19
|
+
def initialize(file, options = nil)
|
13
20
|
# Infer the caption handler from the extension
|
14
|
-
@cc_file =
|
21
|
+
@cc_file = file
|
15
22
|
raise "Input caption not provided. Please provide the same in :cc_file option" if @cc_file.nil?
|
16
|
-
|
17
|
-
@handler = get_caption_handler(options, translator)
|
23
|
+
initialize_handler(options) unless options.nil?
|
18
24
|
end
|
19
25
|
|
20
|
-
def detect_language
|
26
|
+
def detect_language(options = nil)
|
27
|
+
initialize_handler(options) if @handler.nil?
|
21
28
|
@handler.infer_languages
|
22
29
|
end
|
23
30
|
|
24
|
-
def translate(dest_lang, src_lang = nil, outfile = nil)
|
31
|
+
def translate(dest_lang, src_lang = nil, outfile = nil, options = nil)
|
32
|
+
initialize_handler(options) if @handler.nil?
|
25
33
|
if outfile.nil?
|
26
34
|
outfile = "#{@cc_file}_#{dest_lang}"
|
27
35
|
end
|
@@ -33,13 +41,40 @@ class Subtitle
|
|
33
41
|
outfile
|
34
42
|
end
|
35
43
|
|
44
|
+
def transform(types, src_lang = nil, target_lang = nil, options = nil)
|
45
|
+
# A quick validation & translation to expected arguments
|
46
|
+
vals = []
|
47
|
+
invalid_vals = []
|
48
|
+
types.each do |type|
|
49
|
+
type_val = TYPE_MAP[type]
|
50
|
+
if type_val.nil?
|
51
|
+
invalid_vals << type
|
52
|
+
next
|
53
|
+
end
|
54
|
+
vals << type_val
|
55
|
+
end
|
56
|
+
unless invalid_vals.empty?
|
57
|
+
raise "Invalid types #{invalid_vals} provided"
|
58
|
+
end
|
59
|
+
# Translator not required if target_lang is nil
|
60
|
+
if @handler.nil?
|
61
|
+
if target_lang.nil?
|
62
|
+
@handler = get_caption_handler(options, nil)
|
63
|
+
else
|
64
|
+
initialize_handler(options)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
output_dir = options[:outfile]
|
68
|
+
@handler.transform_to(vals, src_lang, target_lang, output_dir)
|
69
|
+
end
|
70
|
+
|
36
71
|
def type
|
37
72
|
type = nil
|
38
73
|
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
39
74
|
ccfile.each_line do | line |
|
40
75
|
if line =~ /^(\d\d:)\d\d:\d\d[,]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,]\d\d\d/
|
41
76
|
type = "srt"
|
42
|
-
elsif line =~
|
77
|
+
elsif line =~ /^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)|(^WEBVTT$)/
|
43
78
|
type = "vtt"
|
44
79
|
elsif line =~ /(^\d\d:\d\d:\d\d:\d\d\t(([0-9a-fA-F]{4})\s)*)+|(^Scenarist_SCC V(\d.\d)$)/
|
45
80
|
type = "scc"
|
@@ -63,6 +98,11 @@ class Subtitle
|
|
63
98
|
|
64
99
|
private
|
65
100
|
|
101
|
+
def initialize_handler(options)
|
102
|
+
translator = get_translator(options)
|
103
|
+
@handler = get_caption_handler(options, translator)
|
104
|
+
end
|
105
|
+
|
66
106
|
def get_translator(options)
|
67
107
|
translator = nil
|
68
108
|
# Try to infer the engine based on the passed options
|
@@ -93,24 +133,26 @@ class Subtitle
|
|
93
133
|
def get_caption_handler(options, translator)
|
94
134
|
caption_file = options[:cc_file]
|
95
135
|
extension = File.extname(caption_file)
|
136
|
+
extension = ".#{type}" if extension.nil?
|
96
137
|
unless AllFather::VALID_FILES.include?(extension)
|
97
138
|
raise "Caption support for #{caption_file} of type #{extension} is not supported yet"
|
98
139
|
end
|
99
140
|
handler = nil
|
100
141
|
case extension.downcase
|
101
142
|
when ".scc"
|
102
|
-
handler = SCC.new(caption_file
|
143
|
+
handler = SCC.new(caption_file)
|
103
144
|
when ".srt"
|
104
|
-
handler = SRT.new(caption_file
|
145
|
+
handler = SRT.new(caption_file)
|
105
146
|
when ".vtt"
|
106
|
-
handler = VTT.new(caption_file
|
147
|
+
handler = VTT.new(caption_file)
|
107
148
|
when ".ttml"
|
108
|
-
handler = TTML.new(caption_file
|
149
|
+
handler = TTML.new(caption_file)
|
109
150
|
when ".dfxp"
|
110
|
-
handler = DFXP.new(caption_file
|
151
|
+
handler = DFXP.new(caption_file)
|
111
152
|
else
|
112
153
|
raise "Cannot handle file type .#{extension}"
|
113
154
|
end
|
155
|
+
handler.set_translator(translator)
|
114
156
|
handler
|
115
157
|
end
|
116
158
|
end
|
data/lib/ttml.rb
CHANGED
@@ -13,10 +13,8 @@ class TTML
|
|
13
13
|
|
14
14
|
include AllFather
|
15
15
|
|
16
|
-
def initialize(cc_file
|
16
|
+
def initialize(cc_file)
|
17
17
|
@cc_file = cc_file
|
18
|
-
@translator = translator
|
19
|
-
@force_detect = opts[:force_detect] || false
|
20
18
|
raise "Invalid TTML file provided" unless is_valid?
|
21
19
|
end
|
22
20
|
|
@@ -30,7 +28,12 @@ class TTML
|
|
30
28
|
return false
|
31
29
|
end
|
32
30
|
|
31
|
+
def set_translator(translator)
|
32
|
+
@translator = translator
|
33
|
+
end
|
34
|
+
|
33
35
|
def infer_languages
|
36
|
+
force_detect = false
|
34
37
|
lang = []
|
35
38
|
begin
|
36
39
|
xml_file = File.open(@cc_file)
|
@@ -43,9 +46,9 @@ class TTML
|
|
43
46
|
if inferred_lang.nil?
|
44
47
|
# If lang is not provided in the caption, then override
|
45
48
|
# force detect for inferrence
|
46
|
-
|
49
|
+
force_detect = true
|
47
50
|
end
|
48
|
-
if
|
51
|
+
if force_detect
|
49
52
|
sample_text = get_text(div, 100)
|
50
53
|
inferred_lang = @translator.infer_language(sample_text) rescue nil
|
51
54
|
if inferred_lang.nil?
|
@@ -0,0 +1,329 @@
|
|
1
|
+
require_relative "../allfather"
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
module CommonUtils
|
5
|
+
|
6
|
+
CREDITS = "Credits: Autogenerated by subtitle Rubygem".freeze
|
7
|
+
|
8
|
+
SCC_DEFAULT_FRAME_RATE = ENV["SCC_DEFAULT_FRAME_RATE"] || 23.976
|
9
|
+
|
10
|
+
#
|
11
|
+
# Method to create the file with basic header informations which can be
|
12
|
+
# further updated with the transformed caption details by respective
|
13
|
+
# implementations
|
14
|
+
#
|
15
|
+
# * +src_type+ - Source caption type. Refer to AllFather::TYPE_SCC type constants
|
16
|
+
# * +dest_type+ - Target caption type. Refer to AllFather::TYPE_SCC type constants
|
17
|
+
# * +output_file+ - Creates this output_file to which type specific
|
18
|
+
# information would be dumped into
|
19
|
+
# * +target_lang+ - Target lang of the output_file
|
20
|
+
#
|
21
|
+
# ==== Returns
|
22
|
+
# true if the file is created with right headers and false otherwise
|
23
|
+
#
|
24
|
+
def create_file(src_type, dest_type, output_file, target_lang)
|
25
|
+
file = nil
|
26
|
+
done = false
|
27
|
+
begin
|
28
|
+
# Create the file in overwrite mode
|
29
|
+
file = File.open(output_file, "w")
|
30
|
+
|
31
|
+
# Dump the initial info into the file to start off with
|
32
|
+
case dest_type
|
33
|
+
when AllFather::TYPE_SCC
|
34
|
+
file.write("Scenarist_SCC V1.0\n\n")
|
35
|
+
|
36
|
+
when AllFather::TYPE_SRT
|
37
|
+
file.write("NOTE #{CREDITS}\n\n")
|
38
|
+
|
39
|
+
when AllFather::TYPE_VTT
|
40
|
+
file.write("WEBVTT\n\n")
|
41
|
+
file.write("NOTE #{CREDITS}\n\n")
|
42
|
+
|
43
|
+
when AllFather::TYPE_TTML
|
44
|
+
target_lang ||= ""
|
45
|
+
# TODO: Move this to a template file and load from there !!
|
46
|
+
data = <<-EOF
|
47
|
+
<tt xml:lang="" xmlns="http://www.w3.org/ns/ttml">
|
48
|
+
<head>
|
49
|
+
<metadata xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
|
50
|
+
<ttm:desc>#{CREDITS}</ttm:desc>
|
51
|
+
</metadata>
|
52
|
+
</head>
|
53
|
+
<body>
|
54
|
+
<div xml:lang=\"#{target_lang}\">
|
55
|
+
EOF
|
56
|
+
file.write(data)
|
57
|
+
|
58
|
+
when AllFather::TYPE_DFXP
|
59
|
+
target_lang ||= ""
|
60
|
+
data = <<-EOF
|
61
|
+
<tt xml:lang="" xmlns="http://www.w3.org/2004/11/ttaf1">
|
62
|
+
<head>
|
63
|
+
<meta xmlns:ttm="http://www.w3.org/2004/11/ttaf1#metadata">
|
64
|
+
<ttm:desc>#{CREDITS}</ttm:desc>
|
65
|
+
</meta>
|
66
|
+
</head>
|
67
|
+
<body>
|
68
|
+
<div xml:lang=\"#{target_lang}\">
|
69
|
+
EOF
|
70
|
+
file.write(data)
|
71
|
+
else
|
72
|
+
raise AllFather::InvalidInputException.new("Not a valid type; Failed to create output file for type #{type}")
|
73
|
+
end
|
74
|
+
done = true
|
75
|
+
ensure
|
76
|
+
file.close if file rescue nil
|
77
|
+
end
|
78
|
+
done
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Method to return a valid extension for a given caption type
|
83
|
+
# Refer to `AllFather#VALID_FILES`
|
84
|
+
#
|
85
|
+
# * +type+ - Must be one of the valid type defined in `AllFather`
|
86
|
+
#
|
87
|
+
# ====Raises
|
88
|
+
# InvalidInputException if a valid type is not provided
|
89
|
+
#
|
90
|
+
def extension_from_type(type)
|
91
|
+
case type
|
92
|
+
when AllFather::TYPE_SCC
|
93
|
+
return AllFather::VALID_FILES[0]
|
94
|
+
when AllFather::TYPE_SRT
|
95
|
+
return AllFather::VALID_FILES[1]
|
96
|
+
when AllFather::TYPE_VTT
|
97
|
+
return AllFather::VALID_FILES[2]
|
98
|
+
when AllFather::TYPE_TTML
|
99
|
+
return AllFather::VALID_FILES[3]
|
100
|
+
when AllFather::TYPE_DFXP
|
101
|
+
return AllFather::VALID_FILES[4]
|
102
|
+
else
|
103
|
+
raise AllFather::InvalidInputException.new("Not a valid type; Failed to create output file for type #{type}")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
#
|
108
|
+
# Method to encode a text to SCC format
|
109
|
+
#
|
110
|
+
# * +free_text+ - Text that needs to be encoded
|
111
|
+
#
|
112
|
+
# ===== Returns
|
113
|
+
# The encoded string that can be added to SCC file
|
114
|
+
#
|
115
|
+
def scc_encode(free_text)
|
116
|
+
encoded_str = ""
|
117
|
+
count = 0
|
118
|
+
free_text.each_byte do |char|
|
119
|
+
count += 1
|
120
|
+
binval = char.to_s(2).count("1") % 2 == 0 ? (char.to_i | 128 ).to_s(2) : char.to_s(2)
|
121
|
+
encode_char = binval.to_i(2).to_s(16)
|
122
|
+
if ((count > 0) && (count % 2 == 0))
|
123
|
+
encoded_str << encode_char << " "
|
124
|
+
else
|
125
|
+
encoded_str << encode_char
|
126
|
+
end
|
127
|
+
end
|
128
|
+
encoded_str
|
129
|
+
end
|
130
|
+
|
131
|
+
#
|
132
|
+
# Method to return the cue info of the caption based on the model
|
133
|
+
# and target caption type which can be used by the caller's transformation routine
|
134
|
+
#
|
135
|
+
# * +model+ - `CueInfo` instance which is caption agnostic details of a cue
|
136
|
+
# * +target_type+ - The target type to which the new cue is to be generated
|
137
|
+
# * +last_cue+ - true for last cue and false otherwise.
|
138
|
+
#
|
139
|
+
def new_cue(model, target_type, last_cue = false)
|
140
|
+
message = nil
|
141
|
+
case target_type
|
142
|
+
when AllFather::TYPE_SCC
|
143
|
+
start_unit = model.start_time_units
|
144
|
+
h = start_unit[0].to_s.rjust(2, "0")
|
145
|
+
m = start_unit[1].to_s.rjust(2, "0")
|
146
|
+
s = start_unit[2].to_s.rjust(2, "0")
|
147
|
+
ms = start_unit[3]
|
148
|
+
# Convert to Frames assuming a framerate of 23.976
|
149
|
+
# Pad 0 if frames is <= 9
|
150
|
+
frames = ((ms.to_f * SCC_DEFAULT_FRAME_RATE) / 1000.0).to_i.to_s.rjust(2, "0").to_i
|
151
|
+
# TODO: Might have to strip off non-english characters here
|
152
|
+
message = "#{h}:#{m}:#{s}:#{frames} " + scc_encode(model.message)
|
153
|
+
when AllFather::TYPE_VTT, AllFather::TYPE_SRT
|
154
|
+
start_unit = model.start_time_units
|
155
|
+
end_unit = model.end_time_units
|
156
|
+
message = ""
|
157
|
+
if model.sequence
|
158
|
+
message = model.sequence + "\n"
|
159
|
+
else
|
160
|
+
message = model.index.to_s + "\n"
|
161
|
+
end
|
162
|
+
delimiter_added = false
|
163
|
+
[start_unit, end_unit].each do |unit|
|
164
|
+
h = unit[0].to_s.rjust(2, "0")
|
165
|
+
m = unit[1].to_s.rjust(2, "0")
|
166
|
+
s = unit[2].to_s.rjust(2, "0")
|
167
|
+
ms = unit[3]
|
168
|
+
if ms < 100
|
169
|
+
ms = ms.to_s.rjust(3, "0")
|
170
|
+
end
|
171
|
+
if target_type == AllFather::TYPE_VTT
|
172
|
+
message << "#{h}:#{m}:#{s}:#{ms}"
|
173
|
+
else
|
174
|
+
message << "#{h}:#{m}:#{s},#{ms}"
|
175
|
+
end
|
176
|
+
unless delimiter_added
|
177
|
+
message << " --> "
|
178
|
+
delimiter_added = true
|
179
|
+
end
|
180
|
+
end
|
181
|
+
message << "\n"
|
182
|
+
message << model.message
|
183
|
+
message << "\n"
|
184
|
+
message << "\n" unless model.message.end_with?("\n")
|
185
|
+
when AllFather::TYPE_TTML, AllFather::TYPE_DFXP
|
186
|
+
start_unit = model.start_time_units
|
187
|
+
end_unit = model.end_time_units
|
188
|
+
h = start_unit[0].to_s.rjust(2, "0")
|
189
|
+
m = start_unit[1].to_s.rjust(2, "0")
|
190
|
+
s = start_unit[2].to_s.rjust(2, "0")
|
191
|
+
ms = start_unit[3]
|
192
|
+
begin_time = "#{h}:#{m}:#{s}"
|
193
|
+
begin_time << ".#{ms.to_s.rjust(3, "0")}" if ms > 0
|
194
|
+
h = end_unit[0].to_s.rjust(2, "0")
|
195
|
+
m = end_unit[1].to_s.rjust(2, "0")
|
196
|
+
s = end_unit[2].to_s.rjust(2, "0")
|
197
|
+
ms = end_unit[3]
|
198
|
+
end_time = "#{h}:#{m}:#{s}"
|
199
|
+
end_time << ".#{ms.to_s.rjust(3, "0")}" if ms > 0
|
200
|
+
message = "<p begin=\"#{begin_time}\" end=\"#{end_time}\">#{model.message.encode(:xml => :text)}</p>"
|
201
|
+
message << "</div>\n</body>\n</tt>" if last_cue
|
202
|
+
end
|
203
|
+
message
|
204
|
+
end
|
205
|
+
|
206
|
+
#
|
207
|
+
# Method that normalizes the timestamps from various different caption formats into
|
208
|
+
# a caption agnostic format
|
209
|
+
#
|
210
|
+
# * +time_stamp+ - The timestamp parsed from the caption file for a given caption type
|
211
|
+
# * +type+ - A valid caption type. Refer to `AllFather` for valid types
|
212
|
+
#
|
213
|
+
def time_details(time_stamp, type)
|
214
|
+
h = m = s = ms = nil
|
215
|
+
elapsed_seconds = nil
|
216
|
+
case type
|
217
|
+
when AllFather::TYPE_SCC
|
218
|
+
tokens = time_stamp.split(":")
|
219
|
+
h = tokens[0].to_i
|
220
|
+
m = tokens[1].to_i
|
221
|
+
s = tokens[2].to_i
|
222
|
+
frames = tokens[3].to_i
|
223
|
+
ms = (frames * 1000 / SCC_DEFAULT_FRAME_RATE).round(0).to_s.rjust(3, "0").to_i
|
224
|
+
if ms >= 1000
|
225
|
+
ms = 999
|
226
|
+
end
|
227
|
+
when AllFather::TYPE_SRT
|
228
|
+
tokens = time_stamp.split(",")
|
229
|
+
ms = tokens[1].to_i
|
230
|
+
tokens = tokens[0].split(":")
|
231
|
+
h = tokens[0].to_i
|
232
|
+
m = tokens[1].to_i
|
233
|
+
s = tokens[2].to_i
|
234
|
+
when AllFather::TYPE_VTT
|
235
|
+
tokens = time_stamp.split(".")
|
236
|
+
ms = tokens[1].to_i
|
237
|
+
tokens = tokens[0].split(":")
|
238
|
+
if tokens.size == 2
|
239
|
+
h = 0
|
240
|
+
m = tokens[0].to_i
|
241
|
+
s = tokens[1].to_i
|
242
|
+
else
|
243
|
+
h = tokens[0].to_i
|
244
|
+
m = tokens[1].to_i
|
245
|
+
s = tokens[2].to_i
|
246
|
+
end
|
247
|
+
when AllFather::TYPE_TTML, AllFather::TYPE_DFXP
|
248
|
+
# We support only clock-time without framerate / tickrate and only media timebase
|
249
|
+
# For offset hence we don't support frames / ticks
|
250
|
+
tokens = time_stamp.split(":")
|
251
|
+
if tokens.size > 1
|
252
|
+
if tokens.size > 3
|
253
|
+
# This is specified with frames and/or subframes. Unsupported
|
254
|
+
raise AllFather::InvalidInputException.new("TTML file with clock-time referencing frames / ticks is unsupported")
|
255
|
+
end
|
256
|
+
h = tokens[0].to_i
|
257
|
+
m = tokens[1].to_i
|
258
|
+
ms_tokens = tokens[2].split(".")
|
259
|
+
if ms_tokens.size == 1
|
260
|
+
ms = 0
|
261
|
+
else
|
262
|
+
ms = ms_tokens[1].to_i
|
263
|
+
end
|
264
|
+
s = ms_tokens[0].to_i
|
265
|
+
else
|
266
|
+
# Parsing in offset mode
|
267
|
+
if time_stamp.end_with?("ms")
|
268
|
+
unit = "ms"
|
269
|
+
time_with_no_unit = time_stamp[0, time_stamp.size - 2]
|
270
|
+
else
|
271
|
+
unit = time_stamp[time_stamp.size - 1]
|
272
|
+
time_with_no_unit = time_stamp[0, time_stamp.size - 1]
|
273
|
+
end
|
274
|
+
case unit
|
275
|
+
when "m"
|
276
|
+
time_with_no_unit = time_with_no_unit.to_f * 60
|
277
|
+
when "h"
|
278
|
+
time_with_no_unit = time_with_no_unit.to_f * (60 * 60)
|
279
|
+
when "s"
|
280
|
+
# do nothing
|
281
|
+
when "ms"
|
282
|
+
time_with_no_unit = time_with_no_unit.to_f / 1000.0
|
283
|
+
else
|
284
|
+
# Fail out f / t
|
285
|
+
raise AllFather::InvalidInputException.new("TTML file with offset-time referencing frames / ticks is unsupported")
|
286
|
+
end
|
287
|
+
tokens = time_with_no_unit.to_s.split(".")
|
288
|
+
h = m = 0
|
289
|
+
if tokens.size == 1
|
290
|
+
s = time_with_no_unit
|
291
|
+
ms = 0
|
292
|
+
else
|
293
|
+
s = tokens[0].to_i
|
294
|
+
ms = tokens[1].to_i
|
295
|
+
end
|
296
|
+
h = s / 3600
|
297
|
+
m = (s / 60) % 60
|
298
|
+
s = s % 60
|
299
|
+
end
|
300
|
+
end
|
301
|
+
elapsed_seconds = (h * 60 * 60) + (m * 60) + s
|
302
|
+
return [h, m, s, ms, elapsed_seconds]
|
303
|
+
end
|
304
|
+
|
305
|
+
|
306
|
+
#
|
307
|
+
# Method to write the cue details to the output files
|
308
|
+
#
|
309
|
+
# * +model+ - Cue instance
|
310
|
+
# * +file_map+ - Hash of files for each caption type
|
311
|
+
# * +last_cue+ - true for last cue and false otherwise
|
312
|
+
#
|
313
|
+
def write_cue(model, file_map, last_cue = false)
|
314
|
+
file_map.each do |type, file_path|
|
315
|
+
File.open(file_path, "a") do |f|
|
316
|
+
f.puts new_cue(model, type, last_cue)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
if last_cue
|
320
|
+
# Pretty print the output for ttml & dfxp
|
321
|
+
file_map.each do |type, file_path|
|
322
|
+
next unless [AllFather::TYPE_DFXP, AllFather::TYPE_TTML].include?(type)
|
323
|
+
file = File.open(file_path, "r")
|
324
|
+
xml_doc = Nokogiri::XML(file, &:noblanks)
|
325
|
+
File.write(file_path, xml_doc.to_s)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class CueInfo
|
2
|
+
def initialize(type)
|
3
|
+
@type = type
|
4
|
+
@start = @end = @sequence = nil
|
5
|
+
@message = ""
|
6
|
+
@start_time_units = []
|
7
|
+
@end_time_units = []
|
8
|
+
@index = 1
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :type, :start, :end, :sequence, :message, :start_time_units, :end_time_units, :index
|
12
|
+
|
13
|
+
def start=(start)
|
14
|
+
@start = start
|
15
|
+
end
|
16
|
+
|
17
|
+
def end=(end_point)
|
18
|
+
@end = end_point
|
19
|
+
end
|
20
|
+
|
21
|
+
def message=(msg)
|
22
|
+
@message = msg
|
23
|
+
end
|
24
|
+
|
25
|
+
def sequence=(seq)
|
26
|
+
@sequence = seq
|
27
|
+
end
|
28
|
+
|
29
|
+
def index=(index)
|
30
|
+
@index = index
|
31
|
+
end
|
32
|
+
|
33
|
+
def start_time_units=(units)
|
34
|
+
@start_time_units = units
|
35
|
+
end
|
36
|
+
|
37
|
+
def end_time_units=(units)
|
38
|
+
@end_time_units = units
|
39
|
+
end
|
40
|
+
end
|
data/lib/vtt.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require_relative "engines/translator"
|
2
|
+
require_relative "utils/common_utils"
|
3
|
+
require_relative "utils/cue_info"
|
2
4
|
require_relative "allfather"
|
3
5
|
|
4
6
|
#
|
@@ -10,13 +12,20 @@ require_relative "allfather"
|
|
10
12
|
class VTT
|
11
13
|
|
12
14
|
include AllFather
|
15
|
+
include CommonUtils
|
13
16
|
|
14
|
-
|
17
|
+
SUPPORTED_TRANSFORMATIONS = [TYPE_SCC, TYPE_SRT, TYPE_TTML, TYPE_DFXP]
|
18
|
+
|
19
|
+
def initialize(cc_file)
|
15
20
|
@cc_file = cc_file
|
16
|
-
@translator = translator
|
17
21
|
raise "Invalid VTT file provided" unless is_valid?
|
18
22
|
end
|
19
23
|
|
24
|
+
def set_translator(translator)
|
25
|
+
super(translator)
|
26
|
+
@translator = translator
|
27
|
+
end
|
28
|
+
|
20
29
|
def translate(src_lang, dest_lang, out_file)
|
21
30
|
super(src_lang, dest_lang, out_file)
|
22
31
|
begin
|
@@ -53,7 +62,6 @@ class VTT
|
|
53
62
|
outfile.puts
|
54
63
|
end
|
55
64
|
ensure
|
56
|
-
ccfile.close rescue nil
|
57
65
|
outfile.close
|
58
66
|
end
|
59
67
|
end
|
@@ -85,6 +93,69 @@ class VTT
|
|
85
93
|
return false
|
86
94
|
end
|
87
95
|
|
96
|
+
def supported_transformations
|
97
|
+
return SUPPORTED_TRANSFORMATIONS
|
98
|
+
end
|
99
|
+
|
100
|
+
def transform_to(types, src_lang, target_lang, output_dir)
|
101
|
+
# Let's start off with some validations
|
102
|
+
super(types, src_lang, target_lang, output_dir)
|
103
|
+
|
104
|
+
# Suffix output dir with File seperator
|
105
|
+
output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator)
|
106
|
+
|
107
|
+
# Prepare the output files for each type
|
108
|
+
file_map = {}
|
109
|
+
types.each do |type|
|
110
|
+
output_file = File.basename(@cc_file, File.extname(@cc_file)) + extension_from_type(type)
|
111
|
+
out_file = "#{output_dir}#{output_file}"
|
112
|
+
if create_file(TYPE_VTT, type, out_file, target_lang)
|
113
|
+
file_map[type] = out_file
|
114
|
+
else
|
115
|
+
raise StandardError.new("Failed to create output file for type #{type}")
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Read the file and prepare the cue model
|
120
|
+
cue_info = nil
|
121
|
+
ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
|
122
|
+
message = ""
|
123
|
+
collect_msg = false
|
124
|
+
cue_index = 1
|
125
|
+
ccfile.each_line do | line |
|
126
|
+
if line.strip.empty?
|
127
|
+
collect_msg = false
|
128
|
+
next
|
129
|
+
end
|
130
|
+
time_points = line.scan(/^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)/)
|
131
|
+
if time_points.empty?
|
132
|
+
if collect_msg
|
133
|
+
message << line
|
134
|
+
end
|
135
|
+
else
|
136
|
+
collect_msg = false
|
137
|
+
unless message.empty?
|
138
|
+
cue_info.message = message
|
139
|
+
write_cue(cue_info, file_map)
|
140
|
+
message = ""
|
141
|
+
cue_index += 1
|
142
|
+
end
|
143
|
+
# This is a cue point. Fetch timestamps
|
144
|
+
cue_info = CueInfo.new(AllFather::TYPE_VTT)
|
145
|
+
cue_info.index = cue_index
|
146
|
+
cue_info.start = time_points[0][0]
|
147
|
+
cue_info.end = time_points[0][2]
|
148
|
+
start_units = time_details(cue_info.start, TYPE_VTT)
|
149
|
+
end_units = time_details(cue_info.end, TYPE_VTT)
|
150
|
+
cue_info.start_time_units = start_units
|
151
|
+
cue_info.end_time_units = end_units
|
152
|
+
collect_msg = true
|
153
|
+
end
|
154
|
+
end
|
155
|
+
cue_info.message = message unless message.empty?
|
156
|
+
write_cue(cue_info, file_map, true)
|
157
|
+
end
|
158
|
+
|
88
159
|
private
|
89
160
|
|
90
161
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: subtitle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maheshwaran G
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-11-
|
12
|
+
date: 2019-11-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -67,6 +67,34 @@ dependencies:
|
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '10.0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: minitest
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: optimist
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
70
98
|
description: Subtitle gem helps you to detect language and translate closed caption
|
71
99
|
to required language.
|
72
100
|
email:
|
@@ -85,6 +113,8 @@ files:
|
|
85
113
|
- lib/srt.rb
|
86
114
|
- lib/subtitle.rb
|
87
115
|
- lib/ttml.rb
|
116
|
+
- lib/utils/common_utils.rb
|
117
|
+
- lib/utils/cue_info.rb
|
88
118
|
- lib/vtt.rb
|
89
119
|
homepage: https://github.com/cloudaffair/subtitle
|
90
120
|
licenses:
|