amagi-captions 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +145 -0
- data/Rakefile +2 -0
- data/amagi-captions.gemspec +27 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/captions/base.rb +179 -0
- data/lib/captions/cue.rb +93 -0
- data/lib/captions/error.rb +27 -0
- data/lib/captions/export.rb +25 -0
- data/lib/captions/formats/srt.rb +83 -0
- data/lib/captions/formats/vtt.rb +122 -0
- data/lib/captions/list.rb +53 -0
- data/lib/captions/util.rb +87 -0
- data/lib/captions/version.rb +3 -0
- data/lib/captions.rb +10 -0
- data/robots.txt +1375 -0
- metadata +106 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
module Captions
|
2
|
+
class VTT < Base
|
3
|
+
|
4
|
+
# Header used for all VTT files
|
5
|
+
VTT_HEADER = "WEBVTT"
|
6
|
+
|
7
|
+
# VTT file comments/style section
|
8
|
+
VTT_METADATA = /^NOTE|^STYLE/
|
9
|
+
|
10
|
+
# Auto Keyword used in Alignment
|
11
|
+
AUTO_KEYWORD = "auto"
|
12
|
+
|
13
|
+
# Alignment Data
|
14
|
+
ALIGNMENT_VALUES = {
|
15
|
+
"middle" => "middle",
|
16
|
+
"center" => "middle",
|
17
|
+
"left" => "left",
|
18
|
+
"right" => "right",
|
19
|
+
"start" => "start",
|
20
|
+
"end" => "end",
|
21
|
+
}
|
22
|
+
|
23
|
+
# Parse VTT file and update CueList
|
24
|
+
def parse
|
25
|
+
base_parser do
|
26
|
+
count = 1
|
27
|
+
cue_count = 0
|
28
|
+
meta_data_section = false
|
29
|
+
cue = nil
|
30
|
+
raise InvalidSubtitle, "Invalid VTT Signature" unless validate_header(@file.gets)
|
31
|
+
while(line = @file.gets) do
|
32
|
+
line = line.strip
|
33
|
+
if line.empty?
|
34
|
+
meta_data_section = false
|
35
|
+
elsif is_meta_data?(line)
|
36
|
+
meta_data_section = true
|
37
|
+
elsif is_time?(line)
|
38
|
+
@cue_list.append(cue) if cue
|
39
|
+
cue_count += 1
|
40
|
+
cue = Cue.new
|
41
|
+
cue.number = cue_count
|
42
|
+
line = line.split
|
43
|
+
cue.set_time(line[0], line[2])
|
44
|
+
set_properties(cue, line[3..-1])
|
45
|
+
elsif !meta_data_section and is_text?(line)
|
46
|
+
cue.add_text(line)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
@cue_list.append(cue) if cue
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Export CueList to VTT file
|
54
|
+
def dump(file)
|
55
|
+
base_dump(file) do |file|
|
56
|
+
file.write(VTT_HEADER)
|
57
|
+
@cue_list.each do |cue|
|
58
|
+
file.write("\n\n")
|
59
|
+
file.write(msec_to_timecode(cue.start_time))
|
60
|
+
file.write(" --> ")
|
61
|
+
file.write(msec_to_timecode(cue.end_time))
|
62
|
+
file.write("\n")
|
63
|
+
file.write(cue.text)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Check whether its a VTT_HEADER or not
|
69
|
+
def validate_header(line)
|
70
|
+
!!line.strip.match(/^#{VTT_HEADER}/)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Check whether its a meta-data or not
|
74
|
+
def is_meta_data?(text)
|
75
|
+
!!text.match(VTT_METADATA)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Timecode format used in VTT file
|
79
|
+
def is_time?(text)
|
80
|
+
!!text.match(/^(\d{2}:)?\d{2}:\d{2}.\d{3}.*(\d{2}:)?\d{2}:\d{2}.\d{3}/)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Check whether if its subtilte text or not
|
84
|
+
def is_text?(text)
|
85
|
+
!text.empty? and text.is_a?(String) and text != VTT_HEADER
|
86
|
+
end
|
87
|
+
|
88
|
+
def set_properties(cue, properties)
|
89
|
+
properties.each do |prop|
|
90
|
+
prop, value = prop.split(":")
|
91
|
+
value.gsub!("%","")
|
92
|
+
case prop
|
93
|
+
when "align"
|
94
|
+
cue.alignment = get_alignment(value)
|
95
|
+
when "line"
|
96
|
+
value = value.split(",")[0]
|
97
|
+
cue.position = get_line(value)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_alignment(value)
|
103
|
+
raise InvalidSubtitle, "Invalid VTT Alignment Property" unless ALIGNMENT_VALUES[value]
|
104
|
+
return ALIGNMENT_VALUES[value]
|
105
|
+
end
|
106
|
+
|
107
|
+
def get_line(value)
|
108
|
+
raise InvalidSubtitle, "VTT Line property should be a valid number" if !is_integer?(value) and value != AUTO_KEYWORD
|
109
|
+
return value.to_i
|
110
|
+
end
|
111
|
+
|
112
|
+
def get_position(value)
|
113
|
+
raise InvalidSubtitle, "VTT Position should be a valid number" if !is_integer?(value)
|
114
|
+
raise InvalidSubtitle, "VTT Position should be a number between 0 to 100" if (value.to_i < 0) or (value.to_i > 100)
|
115
|
+
return value.to_i
|
116
|
+
end
|
117
|
+
|
118
|
+
def is_integer?(val)
|
119
|
+
val.to_i.to_s == val
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Captions
|
2
|
+
class CueList
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
# Creates a new CueList
|
6
|
+
def initialize(frame_rate, list=[])
|
7
|
+
@fps = frame_rate
|
8
|
+
@list = list
|
9
|
+
end
|
10
|
+
|
11
|
+
# Returns frame-rate of the list
|
12
|
+
def fps
|
13
|
+
@fps
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns the parsed subtitles
|
17
|
+
def entries
|
18
|
+
@list
|
19
|
+
end
|
20
|
+
|
21
|
+
# Hide all cues when inspecting CueList
|
22
|
+
# Show only necessary info rather than printing everything
|
23
|
+
def inspect
|
24
|
+
"#<Captions::CueList:#{object_id} @fps=#{fps} @cues=#{@list.count}>"
|
25
|
+
end
|
26
|
+
|
27
|
+
# Changes the frame rate of CueList
|
28
|
+
# This also changes frame rate in already parsed
|
29
|
+
# subtitles
|
30
|
+
def frame_rate=(rate)
|
31
|
+
@list.each { |c| c.change_frame_rate(@fps, rate) }
|
32
|
+
@fps = rate
|
33
|
+
end
|
34
|
+
|
35
|
+
# Inserts the subtitle into the CueList
|
36
|
+
# Subtitle is serialized before its inserted
|
37
|
+
def append(cue)
|
38
|
+
cue.serialize(@fps)
|
39
|
+
@list << cue
|
40
|
+
end
|
41
|
+
|
42
|
+
# Iterate through CueList
|
43
|
+
def each
|
44
|
+
return to_enum(:each) unless block_given?
|
45
|
+
@list.each { |c| yield(c) }
|
46
|
+
end
|
47
|
+
|
48
|
+
# Array based enumerables for cuelist
|
49
|
+
def [](index)
|
50
|
+
@list[index]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Captions
|
2
|
+
module Util
|
3
|
+
|
4
|
+
# TC should be HH:MM:SS:FF (frames) or HH:MM:SS.MSC (milliseconds).
|
5
|
+
# FF(2 digits) and MSC(3 digits) are optional.
|
6
|
+
TIMECODE_REGEX = /^-?([01]\d|2[0-3]):[0-5]\d:[0-5]\d(:\d{2}|\.\d{3})?$/
|
7
|
+
|
8
|
+
# Currently considering frame rate as 25
|
9
|
+
# Converts time-code in HH:MM:SS.MSEC (or) HH:MM:SS:FF (or) MM:SS.MSEC
|
10
|
+
# to milliseconds.
|
11
|
+
def convert_to_msec(tc, ms_per_frame=40)
|
12
|
+
msec = 0
|
13
|
+
negative_multiplier = 1
|
14
|
+
if tc[0] == '-'
|
15
|
+
tc = tc[1..-1] # remove -ve sign
|
16
|
+
negative_multiplier = -1
|
17
|
+
end
|
18
|
+
tc_split = tc.split('.')
|
19
|
+
time_split = tc_split[0].split(':')
|
20
|
+
|
21
|
+
# To handle MM:SS.MSEC format
|
22
|
+
if time_split.length == 2
|
23
|
+
time_split.unshift('00')
|
24
|
+
end
|
25
|
+
|
26
|
+
if tc_split[1] # msec component exists
|
27
|
+
msec = tc_split[1].ljust(3, '0').to_i # pad with trailing 0s to make it 3 digit
|
28
|
+
elsif time_split.length == 4 # FF (frame) component exists
|
29
|
+
msec = time_split[-1].to_i * ms_per_frame.to_f
|
30
|
+
time_split.pop # so that below code can work from last index
|
31
|
+
end
|
32
|
+
|
33
|
+
min = 60
|
34
|
+
hour = min * 60
|
35
|
+
# Get HH:MM:SS in seconds
|
36
|
+
sec = time_split[-1].to_i
|
37
|
+
sec += time_split[-2].to_i * min
|
38
|
+
sec += time_split[-3].to_i * hour
|
39
|
+
|
40
|
+
msec += sec * 1000
|
41
|
+
|
42
|
+
return (negative_multiplier * msec.round) # to be consistent with tc_to_frames which also rounds
|
43
|
+
end
|
44
|
+
|
45
|
+
# Converts milliseconds calculated in one frame-rate to another frame-rate
|
46
|
+
def convert_frame_rate(msec, old_fps, new_fps)
|
47
|
+
old_ms_per_frame = (1000.0 / old_fps)
|
48
|
+
new_ms_per_frame = (1000.0 / new_fps)
|
49
|
+
frames = (msec / old_ms_per_frame).round # Number of frames in old fps
|
50
|
+
sec = frames / old_fps
|
51
|
+
frames = frames % old_fps
|
52
|
+
new_frames = sec * new_fps
|
53
|
+
new_frames += frames # Number of frames in new fps
|
54
|
+
return (new_frames * new_ms_per_frame).round # MSEC in new fps
|
55
|
+
end
|
56
|
+
|
57
|
+
# Converts milliseconds to timecode format
|
58
|
+
# Currently returns HH:MM:SS.MSEC
|
59
|
+
# Supports upto 60 hours
|
60
|
+
def msec_to_timecode(milliseconds)
|
61
|
+
seconds = milliseconds / 1000
|
62
|
+
msec = milliseconds % 1000
|
63
|
+
secs = seconds % 60
|
64
|
+
|
65
|
+
seconds = seconds / 60
|
66
|
+
mins = seconds % 60
|
67
|
+
|
68
|
+
seconds = seconds / 60
|
69
|
+
hours = seconds % 60
|
70
|
+
|
71
|
+
format("%02d:%02d:%02d.%03d",hours, mins, secs ,msec)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Parses time-code and converts it to milliseconds.
|
75
|
+
# If time cannot be converted to milliseconds,
|
76
|
+
# it throws InvalidInput Error
|
77
|
+
def sanitize(time, frame_rate)
|
78
|
+
if time.is_a?(String)
|
79
|
+
if TIMECODE_REGEX.match(time)
|
80
|
+
time = convert_to_msec(time, frame_rate)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
raise InvalidInput, 'Input should be in Milliseconds or Timecode' unless time.is_a? (Fixnum)
|
84
|
+
return time
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/lib/captions.rb
ADDED