webvtt-ruby 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/Gemfile +0 -0
- data/LICENSE +22 -0
- data/README.md +121 -0
- data/bin/webvtt-segmenter +53 -0
- data/lib/parser.rb +143 -0
- data/lib/segmenter.rb +127 -0
- data/lib/webvtt.rb +8 -0
- data/tests/parser.rb +152 -0
- data/tests/segmenter.rb +76 -0
- data/tests/subtitles/big_srt.srt +4406 -0
- data/tests/subtitles/big_srt.webvtt +4408 -0
- data/tests/subtitles/notvalid.webvtt +3 -0
- data/tests/subtitles/test.webvtt +70 -0
- data/tests/subtitles/test_from_srt.srt +7 -0
- data/tests/subtitles/test_from_srt.webvtt +9 -0
- data/tests/subtitles/withnote.webvtt +21 -0
- data/webvtt-ruby.gemspecs +15 -0
- metadata +63 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c0edb293642afb992e4650a9835de04a619c527b
|
4
|
+
data.tar.gz: 17ac19588e370f2f8035196c4152f930a264ea32
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5236eb0fed07d03d983864f74a1dc8155a0221c2ce26ecb9ebb49736a8202acc0860f5d0a4f9b83134840c413c462ebc90617beb723afd9e813220c5702063d6
|
7
|
+
data.tar.gz: 0d047592a007a532315cd4131e44d649171662acfd07818863dcc6bf94b548a57eef6a9210104bca732031622ef95b0e4f45fa48b6c6761f5c83a123a3610cec
|
data/.gitignore
ADDED
data/Gemfile
ADDED
File without changes
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Bruno Celeste
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
# WebVTT Ruby parser and segmenter
|
2
|
+
|
3
|
+
The [WebVTT format](http://dev.w3.org/html5/webvtt/) is a standard captionning format used for HTML5 videos and HTTP Live Streaming (HLS).
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'webvtt-ruby'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install webvtt-ruby
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
To parse a webvtt file:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require "webvtt"
|
25
|
+
|
26
|
+
webvtt = WebVTT.read("path/sub.webvtt")
|
27
|
+
webvtt.cues.each do |cue|
|
28
|
+
puts "identifier: #{cue.identifier}"
|
29
|
+
puts "Start: #{cue.start}"
|
30
|
+
puts "End: #{cue.end}"
|
31
|
+
puts "Style: #{cue.style.inspect}"
|
32
|
+
puts "Text: #{cue.text}"
|
33
|
+
puts "--"
|
34
|
+
end
|
35
|
+
```
|
36
|
+
|
37
|
+
## Converting from SRT
|
38
|
+
|
39
|
+
You can also convert an SRT file to a standard WebVTT file:
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
webvtt = WebVTT.convert_from_srt("path/sub.srt", "path/sub.webvtt")
|
43
|
+
puts webvtt.to_webvtt
|
44
|
+
```
|
45
|
+
|
46
|
+
## Segmenting for HTTP Live Streaming (HLS)
|
47
|
+
|
48
|
+
Segmenting is required to work with HLS videos.
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
WebVTT.segment("subtitles/en.webvtt", :length => 10, :output => "subtitles/en-%05d.webvtt", :playlist => "subtitles/en.m3u8")
|
52
|
+
```
|
53
|
+
|
54
|
+
It will also generate the playlist in `m3u8`:
|
55
|
+
|
56
|
+
```
|
57
|
+
#EXTM3U
|
58
|
+
#EXT-X-TARGETDURATION:17
|
59
|
+
#EXT-X-VERSION:3
|
60
|
+
#EXT-X-MEDIA-SEQUENCE:0
|
61
|
+
#EXT-X-PLAYLIST-TYPE:VOD
|
62
|
+
#EXTINF:13,
|
63
|
+
en-00000.webvtt
|
64
|
+
#EXTINF:17,
|
65
|
+
en-00001.webvtt
|
66
|
+
#EXTINF:12,
|
67
|
+
en-00002.webvtt
|
68
|
+
#EXT-X-ENDLIST
|
69
|
+
```
|
70
|
+
|
71
|
+
To use the segmented webvtt files with your HLS playlist:
|
72
|
+
|
73
|
+
```
|
74
|
+
#EXTM3U
|
75
|
+
|
76
|
+
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",URI="subtitles/en.m3u8"
|
77
|
+
|
78
|
+
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,SUBTITLES="subs"
|
79
|
+
demo-300000.m3u8
|
80
|
+
|
81
|
+
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,SUBTITLES="subs"
|
82
|
+
demo-600000.m3u8
|
83
|
+
```
|
84
|
+
|
85
|
+
## CLI
|
86
|
+
|
87
|
+
You can also segment webvtt files using the command line `webvtt-segmenter`:
|
88
|
+
|
89
|
+
```
|
90
|
+
$ webvtt-segmenter -i subtitles/en.webvtt -t 10 -m subtitles/en.m3u8 -o "subtitles/en-%05d.webvtt"
|
91
|
+
```
|
92
|
+
|
93
|
+
```
|
94
|
+
$ webvtt-segmenter -h
|
95
|
+
Usage: bin/webvtt-segmenter [--arg]
|
96
|
+
-i, --input [PATH] WebVTT or SRT file
|
97
|
+
-b, --base-url [URL] Base URL
|
98
|
+
-t, --target-duration [DUR] Duration of each segments. Default: 10
|
99
|
+
-o, --output [PATH] Path where WebVTT segments will be saved. Default: fileSequence-%05d.webvtt
|
100
|
+
-m, --playlist [PATH] Path where the playlist in m3u8 will be saved. Default: prog_index.m3u8
|
101
|
+
```
|
102
|
+
|
103
|
+
## Note
|
104
|
+
|
105
|
+
`webvtt-ruby` was written in a few hours because there was no open source tool to segment webvtt files. It's not perfect at all but it does the job.
|
106
|
+
|
107
|
+
## Contributing
|
108
|
+
|
109
|
+
1. Fork it
|
110
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
111
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
112
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
113
|
+
5. Create new Pull Request
|
114
|
+
|
115
|
+
## Author
|
116
|
+
|
117
|
+
**Bruno Celeste**
|
118
|
+
|
119
|
+
* http://www.heywatchencoding.com
|
120
|
+
* bruno@heywatch.com
|
121
|
+
* [@sadikzzz](http://twitter.com/sadikzzz)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), "..", "lib")
|
4
|
+
require "optparse"
|
5
|
+
require "webvtt"
|
6
|
+
|
7
|
+
@options = {}
|
8
|
+
opts = OptionParser.new do |opts|
|
9
|
+
opts.banner = %(Usage: webvtt-segmenter [--arg])
|
10
|
+
|
11
|
+
opts.on("-i", "--input [PATH]", "WebVTT or SRT file") do |path|
|
12
|
+
@options[:input] = path
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on("-b", "--base-url [URL]", "Base URL") do |url|
|
16
|
+
@options[:base_url] = url
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on("-t", "--target-duration [DUR]", "Duration of each segments. Default: 10") do |dur|
|
20
|
+
@options[:length] = dur.to_i
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-o", "--output [PATH]", "Path where WebVTT segments will be saved. Default: fileSequence-%05d.webvtt") do |path|
|
24
|
+
@options[:output] = path
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-m", "--playlist [PATH]", "Path where the playlist in m3u8 will be saved. Default: prog_index.m3u8") do |path|
|
28
|
+
@options[:playlist] = path
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
begin
|
33
|
+
opts.parse!
|
34
|
+
raise OptionParser::MissingArgument if @options.empty?
|
35
|
+
if ! @options[:input]
|
36
|
+
raise OptionParser::InvalidOption, "Missing argument --input"
|
37
|
+
end
|
38
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
39
|
+
$stderr.puts $!.to_s
|
40
|
+
$stderr.puts opts
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
|
44
|
+
input = @options.delete(:input)
|
45
|
+
|
46
|
+
# convert srt to webvtt first
|
47
|
+
if File.extname(input) == ".srt"
|
48
|
+
puts "Converting srt to webvtt format..."
|
49
|
+
input = WebVTT.convert_from_srt(input)
|
50
|
+
end
|
51
|
+
|
52
|
+
res = WebVTT.segment(input, @options)
|
53
|
+
puts "Done"
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
module WebVTT
|
2
|
+
|
3
|
+
class MalformedFile < RuntimeError; end
|
4
|
+
class InputError < RuntimeError; end
|
5
|
+
|
6
|
+
def self.read(file)
|
7
|
+
File.new(file)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.convert_from_srt(srt_file, output=nil)
|
11
|
+
if !::File.exists?(srt_file)
|
12
|
+
raise InputError, "SRT file not found"
|
13
|
+
end
|
14
|
+
|
15
|
+
srt = ::File.read(srt_file)
|
16
|
+
output ||= srt_file.gsub(".srt", ".webvtt")
|
17
|
+
|
18
|
+
# convert timestamps and save the file
|
19
|
+
srt.gsub!(/([0-9]{2}:[0-9]{2}:[0-9]{2})([,])([0-9]{3})/, '\1.\3')
|
20
|
+
# normalize new line character
|
21
|
+
srt.gsub!("\r\n", "\n")
|
22
|
+
|
23
|
+
srt = "WEBVTT\n\n#{srt}"
|
24
|
+
::File.open(output, "w") {|f| f.write(srt)}
|
25
|
+
|
26
|
+
return File.new(output)
|
27
|
+
end
|
28
|
+
|
29
|
+
class File
|
30
|
+
attr_reader :header, :path, :filename
|
31
|
+
attr_accessor :cues
|
32
|
+
|
33
|
+
def initialize(webvtt_file)
|
34
|
+
if !::File.exists?(webvtt_file)
|
35
|
+
raise InputError, "WebVTT file not found"
|
36
|
+
end
|
37
|
+
|
38
|
+
@path = webvtt_file
|
39
|
+
@filename = ::File.basename(@path)
|
40
|
+
@content = ::File.read(webvtt_file).gsub("\r\n", "\n") # normalizing new line character
|
41
|
+
parse
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_webvtt
|
45
|
+
[@header, @cues.map(&:to_webvtt)].flatten.join("\n\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
def total_length
|
49
|
+
@cues.last.end_in_sec
|
50
|
+
end
|
51
|
+
|
52
|
+
def actual_total_length
|
53
|
+
@cues.last.end_in_sec - @cues.first.start_in_sec
|
54
|
+
end
|
55
|
+
|
56
|
+
def save(output=nil)
|
57
|
+
output ||= @path.gsub(".srt", ".webvtt")
|
58
|
+
|
59
|
+
File.open(output, "w") {|f| f.write(to_webvtt)}
|
60
|
+
return output
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse
|
64
|
+
# remove bom first
|
65
|
+
@content.gsub!("\uFEFF", '')
|
66
|
+
|
67
|
+
cues = @content.split("\n\n")
|
68
|
+
@header = cues.shift
|
69
|
+
header_lines = @header.split("\n").map(&:strip)
|
70
|
+
if (header_lines[0] =~ /^WEBVTT/).nil?
|
71
|
+
raise MalformedFile, "Not a valid WebVTT file"
|
72
|
+
end
|
73
|
+
|
74
|
+
@cues = []
|
75
|
+
cues.each do |cue|
|
76
|
+
cue_parsed = Cue.new(cue)
|
77
|
+
if !cue_parsed.text.nil?
|
78
|
+
@cues << cue_parsed
|
79
|
+
end
|
80
|
+
end
|
81
|
+
@cues
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class Cue
|
86
|
+
attr_accessor :identifier, :start, :end, :style, :text
|
87
|
+
|
88
|
+
def initialize(cue)
|
89
|
+
@content = cue
|
90
|
+
parse
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_webvtt
|
94
|
+
res = ""
|
95
|
+
if @identifier
|
96
|
+
res << "#{@identifier}\n"
|
97
|
+
end
|
98
|
+
res << "#{@start} --> #{@end} #{@style.map{|k,v| "#{k}:#{v}"}.join(" ")}".strip + "\n"
|
99
|
+
res << @text
|
100
|
+
|
101
|
+
res
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.timestamp_in_sec(timestamp)
|
105
|
+
mres = timestamp.match(/([0-9]{2}):([0-9]{2}):([0-9]{2}\.[0-9]{3})/)
|
106
|
+
sec = mres[3].to_f # seconds and subseconds
|
107
|
+
sec += mres[2].to_f * 60 # minutes
|
108
|
+
sec += mres[1].to_f * 60 * 60 # hours
|
109
|
+
return sec
|
110
|
+
end
|
111
|
+
|
112
|
+
def start_in_sec
|
113
|
+
Cue.timestamp_in_sec(@start)
|
114
|
+
end
|
115
|
+
|
116
|
+
def end_in_sec
|
117
|
+
Cue.timestamp_in_sec(@end)
|
118
|
+
end
|
119
|
+
|
120
|
+
def length
|
121
|
+
end_in_sec - start_in_sec
|
122
|
+
end
|
123
|
+
|
124
|
+
def parse
|
125
|
+
lines = @content.split("\n").map(&:strip)
|
126
|
+
|
127
|
+
# it's a note, ignore
|
128
|
+
return if lines[0] =~ /NOTE/
|
129
|
+
|
130
|
+
if !lines[0].include?("-->")
|
131
|
+
@identifier = lines[0]
|
132
|
+
lines.shift
|
133
|
+
end
|
134
|
+
|
135
|
+
if lines[0].match(/([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}) -+> ([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3})(.*)/)
|
136
|
+
@start = $1
|
137
|
+
@end = $2
|
138
|
+
@style = Hash[$3.strip.split(" ").map{|s| s.split(":").map(&:strip) }]
|
139
|
+
end
|
140
|
+
@text = lines[1..-1].join("\n")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
data/lib/segmenter.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
module WebVTT
|
2
|
+
|
3
|
+
class InputError < RuntimeError; end
|
4
|
+
|
5
|
+
def self.segment(input, options={})
|
6
|
+
if input.is_a?(String)
|
7
|
+
input = File.new(input)
|
8
|
+
end
|
9
|
+
|
10
|
+
if ! input.respond_to?(:to_webvtt)
|
11
|
+
raise InputError, "Input must be a WebVTT instance or a path"
|
12
|
+
end
|
13
|
+
|
14
|
+
segmenter = Segmenter.new(input, options)
|
15
|
+
subs = segmenter.split_to_files
|
16
|
+
playlist = segmenter.generate_playlist(subs)
|
17
|
+
|
18
|
+
return [playlist, subs]
|
19
|
+
end
|
20
|
+
|
21
|
+
class Segmenter
|
22
|
+
attr_reader :webvtt
|
23
|
+
|
24
|
+
def initialize(webvtt, options={})
|
25
|
+
@webvtt = webvtt
|
26
|
+
@options = options
|
27
|
+
@options[:length] ||= 10
|
28
|
+
@options[:output] ||= "fileSequence-%05d.webvtt"
|
29
|
+
@options[:playlist] ||= "prog_index.m3u8"
|
30
|
+
end
|
31
|
+
|
32
|
+
def find_segment_files(cue)
|
33
|
+
seg = find_segments(cue)
|
34
|
+
|
35
|
+
# we need to find out how many segments we
|
36
|
+
# have to remove from our calculation
|
37
|
+
# in the case of first cue not starting at 0
|
38
|
+
start = @webvtt.cues[0].start_in_sec
|
39
|
+
to_remove = (start / @options[:length]).floor
|
40
|
+
return seg.map{|s| s-to_remove}
|
41
|
+
end
|
42
|
+
|
43
|
+
def find_segments(cue)
|
44
|
+
all_cues = @webvtt.cues
|
45
|
+
index_cue = all_cues.index(cue)
|
46
|
+
seg = [(cue.start_in_sec / @options[:length]).floor]
|
47
|
+
start_seg = seg[0] * @options[:length]
|
48
|
+
end_seg = start_seg + @options[:length]
|
49
|
+
|
50
|
+
# if the cue length is > than desired length
|
51
|
+
# or if cue end in sec is > end of the segment in sec
|
52
|
+
# we display it in the next segment as well
|
53
|
+
|
54
|
+
if (cue.length > @options[:length]) ||
|
55
|
+
(cue.end_in_sec > end_seg)
|
56
|
+
|
57
|
+
(cue.length / @options[:length]).ceil.to_i.times.each do |s|
|
58
|
+
seg << seg.last + 1
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
return seg
|
63
|
+
end
|
64
|
+
|
65
|
+
def generate_playlist(files)
|
66
|
+
lines = []
|
67
|
+
target_duration = 0
|
68
|
+
files.each_with_index do |file,i|
|
69
|
+
|
70
|
+
# if first cue ever we calculate from 0 sec
|
71
|
+
if i == 0
|
72
|
+
total_length = file.total_length
|
73
|
+
else
|
74
|
+
total_length = file.actual_total_length
|
75
|
+
end
|
76
|
+
|
77
|
+
target_duration = total_length if total_length > target_duration
|
78
|
+
if @options[:base_url].nil?
|
79
|
+
url = file.filename
|
80
|
+
else
|
81
|
+
url = ::File.join(@options[:base_url], file.filename)
|
82
|
+
end
|
83
|
+
lines << %(#EXTINF:#{total_length.round},
|
84
|
+
#{url})
|
85
|
+
end
|
86
|
+
|
87
|
+
playlist = [%(#EXTM3U
|
88
|
+
#EXT-X-TARGETDURATION:#{target_duration.ceil}
|
89
|
+
#EXT-X-VERSION:3
|
90
|
+
#EXT-X-MEDIA-SEQUENCE:0
|
91
|
+
#EXT-X-PLAYLIST-TYPE:VOD)]
|
92
|
+
playlist.concat(lines)
|
93
|
+
playlist << "#EXT-X-ENDLIST"
|
94
|
+
|
95
|
+
::File.open(@options[:playlist], "w") {|f| f.write(playlist.join("\n")) }
|
96
|
+
return @options[:playlist]
|
97
|
+
end
|
98
|
+
|
99
|
+
def split_to_files
|
100
|
+
filenames = []
|
101
|
+
segment_files = []
|
102
|
+
|
103
|
+
@webvtt.cues.each_with_index do |cue,i|
|
104
|
+
find_segment_files(cue).each do |seg|
|
105
|
+
segment_files[seg] ||= []
|
106
|
+
segment_files[seg] << cue
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
segment_files.compact.each_with_index do |f,i|
|
111
|
+
filename = sprintf(@options[:output], i)
|
112
|
+
header = @webvtt.header
|
113
|
+
|
114
|
+
if !header.include?("X-TIMESTAMP-MAP=MPEGTS")
|
115
|
+
header << "\nX-TIMESTAMP-MAP=MPEGTS:900000,LOCAL:00:00:00.000"
|
116
|
+
end
|
117
|
+
|
118
|
+
content = [header, f.map{|c| c.to_webvtt }.join("\n\n")].join("\n\n")
|
119
|
+
|
120
|
+
::File.open(filename, "w") {|f| f.write(content)}
|
121
|
+
|
122
|
+
filenames << filename
|
123
|
+
end
|
124
|
+
return filenames.map{|f| File.new(f) }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|