webvtt-ruby 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/Gemfile +0 -0
- data/LICENSE +22 -0
- data/README.md +121 -0
- data/bin/webvtt-segmenter +53 -0
- data/lib/parser.rb +143 -0
- data/lib/segmenter.rb +127 -0
- data/lib/webvtt.rb +8 -0
- data/tests/parser.rb +152 -0
- data/tests/segmenter.rb +76 -0
- data/tests/subtitles/big_srt.srt +4406 -0
- data/tests/subtitles/big_srt.webvtt +4408 -0
- data/tests/subtitles/notvalid.webvtt +3 -0
- data/tests/subtitles/test.webvtt +70 -0
- data/tests/subtitles/test_from_srt.srt +7 -0
- data/tests/subtitles/test_from_srt.webvtt +9 -0
- data/tests/subtitles/withnote.webvtt +21 -0
- data/webvtt-ruby.gemspecs +15 -0
- metadata +63 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c0edb293642afb992e4650a9835de04a619c527b
|
4
|
+
data.tar.gz: 17ac19588e370f2f8035196c4152f930a264ea32
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5236eb0fed07d03d983864f74a1dc8155a0221c2ce26ecb9ebb49736a8202acc0860f5d0a4f9b83134840c413c462ebc90617beb723afd9e813220c5702063d6
|
7
|
+
data.tar.gz: 0d047592a007a532315cd4131e44d649171662acfd07818863dcc6bf94b548a57eef6a9210104bca732031622ef95b0e4f45fa48b6c6761f5c83a123a3610cec
|
data/.gitignore
ADDED
data/Gemfile
ADDED
File without changes
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Bruno Celeste
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
# WebVTT Ruby parser and segmenter
|
2
|
+
|
3
|
+
The [WebVTT format](http://dev.w3.org/html5/webvtt/) is a standard captionning format used for HTML5 videos and HTTP Live Streaming (HLS).
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'webvtt-ruby'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install webvtt-ruby
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
To parse a webvtt file:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require "webvtt"
|
25
|
+
|
26
|
+
webvtt = WebVTT.read("path/sub.webvtt")
|
27
|
+
webvtt.cues.each do |cue|
|
28
|
+
puts "identifier: #{cue.identifier}"
|
29
|
+
puts "Start: #{cue.start}"
|
30
|
+
puts "End: #{cue.end}"
|
31
|
+
puts "Style: #{cue.style.inspect}"
|
32
|
+
puts "Text: #{cue.text}"
|
33
|
+
puts "--"
|
34
|
+
end
|
35
|
+
```
|
36
|
+
|
37
|
+
## Converting from SRT
|
38
|
+
|
39
|
+
You can also convert an SRT file to a standard WebVTT file:
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
webvtt = WebVTT.convert_from_srt("path/sub.srt", "path/sub.webvtt")
|
43
|
+
puts webvtt.to_webvtt
|
44
|
+
```
|
45
|
+
|
46
|
+
## Segmenting for HTTP Live Streaming (HLS)
|
47
|
+
|
48
|
+
Segmenting is required to work with HLS videos.
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
WebVTT.segment("subtitles/en.webvtt", :length => 10, :output => "subtitles/en-%05d.webvtt", :playlist => "subtitles/en.m3u8")
|
52
|
+
```
|
53
|
+
|
54
|
+
It will also generate the playlist in `m3u8`:
|
55
|
+
|
56
|
+
```
|
57
|
+
#EXTM3U
|
58
|
+
#EXT-X-TARGETDURATION:17
|
59
|
+
#EXT-X-VERSION:3
|
60
|
+
#EXT-X-MEDIA-SEQUENCE:0
|
61
|
+
#EXT-X-PLAYLIST-TYPE:VOD
|
62
|
+
#EXTINF:13,
|
63
|
+
en-00000.webvtt
|
64
|
+
#EXTINF:17,
|
65
|
+
en-00001.webvtt
|
66
|
+
#EXTINF:12,
|
67
|
+
en-00002.webvtt
|
68
|
+
#EXT-X-ENDLIST
|
69
|
+
```
|
70
|
+
|
71
|
+
To use the segmented webvtt files with your HLS playlist:
|
72
|
+
|
73
|
+
```
|
74
|
+
#EXTM3U
|
75
|
+
|
76
|
+
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",URI="subtitles/en.m3u8"
|
77
|
+
|
78
|
+
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,SUBTITLES="subs"
|
79
|
+
demo-300000.m3u8
|
80
|
+
|
81
|
+
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,SUBTITLES="subs"
|
82
|
+
demo-600000.m3u8
|
83
|
+
```
|
84
|
+
|
85
|
+
## CLI
|
86
|
+
|
87
|
+
You can also segment webvtt files using the command line `webvtt-segmenter`:
|
88
|
+
|
89
|
+
```
|
90
|
+
$ webvtt-segmenter -i subtitles/en.webvtt -t 10 -m subtitles/en.m3u8 -o "subtitles/en-%05d.webvtt"
|
91
|
+
```
|
92
|
+
|
93
|
+
```
|
94
|
+
$ webvtt-segmenter -h
|
95
|
+
Usage: bin/webvtt-segmenter [--arg]
|
96
|
+
-i, --input [PATH] WebVTT or SRT file
|
97
|
+
-b, --base-url [URL] Base URL
|
98
|
+
-t, --target-duration [DUR] Duration of each segments. Default: 10
|
99
|
+
-o, --output [PATH] Path where WebVTT segments will be saved. Default: fileSequence-%05d.webvtt
|
100
|
+
-m, --playlist [PATH] Path where the playlist in m3u8 will be saved. Default: prog_index.m3u8
|
101
|
+
```
|
102
|
+
|
103
|
+
## Note
|
104
|
+
|
105
|
+
`webvtt-ruby` was written in a few hours because there was no open source tool to segment webvtt files. It's not perfect at all but it does the job.
|
106
|
+
|
107
|
+
## Contributing
|
108
|
+
|
109
|
+
1. Fork it
|
110
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
111
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
112
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
113
|
+
5. Create new Pull Request
|
114
|
+
|
115
|
+
## Author
|
116
|
+
|
117
|
+
**Bruno Celeste**
|
118
|
+
|
119
|
+
* http://www.heywatchencoding.com
|
120
|
+
* bruno@heywatch.com
|
121
|
+
* [@sadikzzz](http://twitter.com/sadikzzz)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), "..", "lib")
|
4
|
+
require "optparse"
|
5
|
+
require "webvtt"
|
6
|
+
|
7
|
+
@options = {}
|
8
|
+
opts = OptionParser.new do |opts|
|
9
|
+
opts.banner = %(Usage: webvtt-segmenter [--arg])
|
10
|
+
|
11
|
+
opts.on("-i", "--input [PATH]", "WebVTT or SRT file") do |path|
|
12
|
+
@options[:input] = path
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on("-b", "--base-url [URL]", "Base URL") do |url|
|
16
|
+
@options[:base_url] = url
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on("-t", "--target-duration [DUR]", "Duration of each segments. Default: 10") do |dur|
|
20
|
+
@options[:length] = dur.to_i
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-o", "--output [PATH]", "Path where WebVTT segments will be saved. Default: fileSequence-%05d.webvtt") do |path|
|
24
|
+
@options[:output] = path
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-m", "--playlist [PATH]", "Path where the playlist in m3u8 will be saved. Default: prog_index.m3u8") do |path|
|
28
|
+
@options[:playlist] = path
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
begin
|
33
|
+
opts.parse!
|
34
|
+
raise OptionParser::MissingArgument if @options.empty?
|
35
|
+
if ! @options[:input]
|
36
|
+
raise OptionParser::InvalidOption, "Missing argument --input"
|
37
|
+
end
|
38
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
39
|
+
$stderr.puts $!.to_s
|
40
|
+
$stderr.puts opts
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
|
44
|
+
input = @options.delete(:input)
|
45
|
+
|
46
|
+
# convert srt to webvtt first
|
47
|
+
if File.extname(input) == ".srt"
|
48
|
+
puts "Converting srt to webvtt format..."
|
49
|
+
input = WebVTT.convert_from_srt(input)
|
50
|
+
end
|
51
|
+
|
52
|
+
res = WebVTT.segment(input, @options)
|
53
|
+
puts "Done"
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
module WebVTT
|
2
|
+
|
3
|
+
class MalformedFile < RuntimeError; end
|
4
|
+
class InputError < RuntimeError; end
|
5
|
+
|
6
|
+
def self.read(file)
|
7
|
+
File.new(file)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.convert_from_srt(srt_file, output=nil)
|
11
|
+
if !::File.exists?(srt_file)
|
12
|
+
raise InputError, "SRT file not found"
|
13
|
+
end
|
14
|
+
|
15
|
+
srt = ::File.read(srt_file)
|
16
|
+
output ||= srt_file.gsub(".srt", ".webvtt")
|
17
|
+
|
18
|
+
# convert timestamps and save the file
|
19
|
+
srt.gsub!(/([0-9]{2}:[0-9]{2}:[0-9]{2})([,])([0-9]{3})/, '\1.\3')
|
20
|
+
# normalize new line character
|
21
|
+
srt.gsub!("\r\n", "\n")
|
22
|
+
|
23
|
+
srt = "WEBVTT\n\n#{srt}"
|
24
|
+
::File.open(output, "w") {|f| f.write(srt)}
|
25
|
+
|
26
|
+
return File.new(output)
|
27
|
+
end
|
28
|
+
|
29
|
+
class File
|
30
|
+
attr_reader :header, :path, :filename
|
31
|
+
attr_accessor :cues
|
32
|
+
|
33
|
+
def initialize(webvtt_file)
|
34
|
+
if !::File.exists?(webvtt_file)
|
35
|
+
raise InputError, "WebVTT file not found"
|
36
|
+
end
|
37
|
+
|
38
|
+
@path = webvtt_file
|
39
|
+
@filename = ::File.basename(@path)
|
40
|
+
@content = ::File.read(webvtt_file).gsub("\r\n", "\n") # normalizing new line character
|
41
|
+
parse
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_webvtt
|
45
|
+
[@header, @cues.map(&:to_webvtt)].flatten.join("\n\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
def total_length
|
49
|
+
@cues.last.end_in_sec
|
50
|
+
end
|
51
|
+
|
52
|
+
def actual_total_length
|
53
|
+
@cues.last.end_in_sec - @cues.first.start_in_sec
|
54
|
+
end
|
55
|
+
|
56
|
+
def save(output=nil)
|
57
|
+
output ||= @path.gsub(".srt", ".webvtt")
|
58
|
+
|
59
|
+
File.open(output, "w") {|f| f.write(to_webvtt)}
|
60
|
+
return output
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse
|
64
|
+
# remove bom first
|
65
|
+
@content.gsub!("\uFEFF", '')
|
66
|
+
|
67
|
+
cues = @content.split("\n\n")
|
68
|
+
@header = cues.shift
|
69
|
+
header_lines = @header.split("\n").map(&:strip)
|
70
|
+
if (header_lines[0] =~ /^WEBVTT/).nil?
|
71
|
+
raise MalformedFile, "Not a valid WebVTT file"
|
72
|
+
end
|
73
|
+
|
74
|
+
@cues = []
|
75
|
+
cues.each do |cue|
|
76
|
+
cue_parsed = Cue.new(cue)
|
77
|
+
if !cue_parsed.text.nil?
|
78
|
+
@cues << cue_parsed
|
79
|
+
end
|
80
|
+
end
|
81
|
+
@cues
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class Cue
|
86
|
+
attr_accessor :identifier, :start, :end, :style, :text
|
87
|
+
|
88
|
+
def initialize(cue)
|
89
|
+
@content = cue
|
90
|
+
parse
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_webvtt
|
94
|
+
res = ""
|
95
|
+
if @identifier
|
96
|
+
res << "#{@identifier}\n"
|
97
|
+
end
|
98
|
+
res << "#{@start} --> #{@end} #{@style.map{|k,v| "#{k}:#{v}"}.join(" ")}".strip + "\n"
|
99
|
+
res << @text
|
100
|
+
|
101
|
+
res
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.timestamp_in_sec(timestamp)
|
105
|
+
mres = timestamp.match(/([0-9]{2}):([0-9]{2}):([0-9]{2}\.[0-9]{3})/)
|
106
|
+
sec = mres[3].to_f # seconds and subseconds
|
107
|
+
sec += mres[2].to_f * 60 # minutes
|
108
|
+
sec += mres[1].to_f * 60 * 60 # hours
|
109
|
+
return sec
|
110
|
+
end
|
111
|
+
|
112
|
+
def start_in_sec
|
113
|
+
Cue.timestamp_in_sec(@start)
|
114
|
+
end
|
115
|
+
|
116
|
+
def end_in_sec
|
117
|
+
Cue.timestamp_in_sec(@end)
|
118
|
+
end
|
119
|
+
|
120
|
+
def length
|
121
|
+
end_in_sec - start_in_sec
|
122
|
+
end
|
123
|
+
|
124
|
+
def parse
|
125
|
+
lines = @content.split("\n").map(&:strip)
|
126
|
+
|
127
|
+
# it's a note, ignore
|
128
|
+
return if lines[0] =~ /NOTE/
|
129
|
+
|
130
|
+
if !lines[0].include?("-->")
|
131
|
+
@identifier = lines[0]
|
132
|
+
lines.shift
|
133
|
+
end
|
134
|
+
|
135
|
+
if lines[0].match(/([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}) -+> ([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3})(.*)/)
|
136
|
+
@start = $1
|
137
|
+
@end = $2
|
138
|
+
@style = Hash[$3.strip.split(" ").map{|s| s.split(":").map(&:strip) }]
|
139
|
+
end
|
140
|
+
@text = lines[1..-1].join("\n")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
data/lib/segmenter.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
module WebVTT
|
2
|
+
|
3
|
+
class InputError < RuntimeError; end
|
4
|
+
|
5
|
+
def self.segment(input, options={})
|
6
|
+
if input.is_a?(String)
|
7
|
+
input = File.new(input)
|
8
|
+
end
|
9
|
+
|
10
|
+
if ! input.respond_to?(:to_webvtt)
|
11
|
+
raise InputError, "Input must be a WebVTT instance or a path"
|
12
|
+
end
|
13
|
+
|
14
|
+
segmenter = Segmenter.new(input, options)
|
15
|
+
subs = segmenter.split_to_files
|
16
|
+
playlist = segmenter.generate_playlist(subs)
|
17
|
+
|
18
|
+
return [playlist, subs]
|
19
|
+
end
|
20
|
+
|
21
|
+
class Segmenter
|
22
|
+
attr_reader :webvtt
|
23
|
+
|
24
|
+
def initialize(webvtt, options={})
|
25
|
+
@webvtt = webvtt
|
26
|
+
@options = options
|
27
|
+
@options[:length] ||= 10
|
28
|
+
@options[:output] ||= "fileSequence-%05d.webvtt"
|
29
|
+
@options[:playlist] ||= "prog_index.m3u8"
|
30
|
+
end
|
31
|
+
|
32
|
+
def find_segment_files(cue)
|
33
|
+
seg = find_segments(cue)
|
34
|
+
|
35
|
+
# we need to find out how many segments we
|
36
|
+
# have to remove from our calculation
|
37
|
+
# in the case of first cue not starting at 0
|
38
|
+
start = @webvtt.cues[0].start_in_sec
|
39
|
+
to_remove = (start / @options[:length]).floor
|
40
|
+
return seg.map{|s| s-to_remove}
|
41
|
+
end
|
42
|
+
|
43
|
+
def find_segments(cue)
|
44
|
+
all_cues = @webvtt.cues
|
45
|
+
index_cue = all_cues.index(cue)
|
46
|
+
seg = [(cue.start_in_sec / @options[:length]).floor]
|
47
|
+
start_seg = seg[0] * @options[:length]
|
48
|
+
end_seg = start_seg + @options[:length]
|
49
|
+
|
50
|
+
# if the cue length is > than desired length
|
51
|
+
# or if cue end in sec is > end of the segment in sec
|
52
|
+
# we display it in the next segment as well
|
53
|
+
|
54
|
+
if (cue.length > @options[:length]) ||
|
55
|
+
(cue.end_in_sec > end_seg)
|
56
|
+
|
57
|
+
(cue.length / @options[:length]).ceil.to_i.times.each do |s|
|
58
|
+
seg << seg.last + 1
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
return seg
|
63
|
+
end
|
64
|
+
|
65
|
+
def generate_playlist(files)
|
66
|
+
lines = []
|
67
|
+
target_duration = 0
|
68
|
+
files.each_with_index do |file,i|
|
69
|
+
|
70
|
+
# if first cue ever we calculate from 0 sec
|
71
|
+
if i == 0
|
72
|
+
total_length = file.total_length
|
73
|
+
else
|
74
|
+
total_length = file.actual_total_length
|
75
|
+
end
|
76
|
+
|
77
|
+
target_duration = total_length if total_length > target_duration
|
78
|
+
if @options[:base_url].nil?
|
79
|
+
url = file.filename
|
80
|
+
else
|
81
|
+
url = ::File.join(@options[:base_url], file.filename)
|
82
|
+
end
|
83
|
+
lines << %(#EXTINF:#{total_length.round},
|
84
|
+
#{url})
|
85
|
+
end
|
86
|
+
|
87
|
+
playlist = [%(#EXTM3U
|
88
|
+
#EXT-X-TARGETDURATION:#{target_duration.ceil}
|
89
|
+
#EXT-X-VERSION:3
|
90
|
+
#EXT-X-MEDIA-SEQUENCE:0
|
91
|
+
#EXT-X-PLAYLIST-TYPE:VOD)]
|
92
|
+
playlist.concat(lines)
|
93
|
+
playlist << "#EXT-X-ENDLIST"
|
94
|
+
|
95
|
+
::File.open(@options[:playlist], "w") {|f| f.write(playlist.join("\n")) }
|
96
|
+
return @options[:playlist]
|
97
|
+
end
|
98
|
+
|
99
|
+
def split_to_files
|
100
|
+
filenames = []
|
101
|
+
segment_files = []
|
102
|
+
|
103
|
+
@webvtt.cues.each_with_index do |cue,i|
|
104
|
+
find_segment_files(cue).each do |seg|
|
105
|
+
segment_files[seg] ||= []
|
106
|
+
segment_files[seg] << cue
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
segment_files.compact.each_with_index do |f,i|
|
111
|
+
filename = sprintf(@options[:output], i)
|
112
|
+
header = @webvtt.header
|
113
|
+
|
114
|
+
if !header.include?("X-TIMESTAMP-MAP=MPEGTS")
|
115
|
+
header << "\nX-TIMESTAMP-MAP=MPEGTS:900000,LOCAL:00:00:00.000"
|
116
|
+
end
|
117
|
+
|
118
|
+
content = [header, f.map{|c| c.to_webvtt }.join("\n\n")].join("\n\n")
|
119
|
+
|
120
|
+
::File.open(filename, "w") {|f| f.write(content)}
|
121
|
+
|
122
|
+
filenames << filename
|
123
|
+
end
|
124
|
+
return filenames.map{|f| File.new(f) }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|