webvtt-ruby 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c0edb293642afb992e4650a9835de04a619c527b
4
+ data.tar.gz: 17ac19588e370f2f8035196c4152f930a264ea32
5
+ SHA512:
6
+ metadata.gz: 5236eb0fed07d03d983864f74a1dc8155a0221c2ce26ecb9ebb49736a8202acc0860f5d0a4f9b83134840c413c462ebc90617beb723afd9e813220c5702063d6
7
+ data.tar.gz: 0d047592a007a532315cd4131e44d649171662acfd07818863dcc6bf94b548a57eef6a9210104bca732031622ef95b0e4f45fa48b6c6761f5c83a123a3610cec
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ .DS_Store
2
+ *.m3u8
data/Gemfile ADDED
File without changes
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Bruno Celeste
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,121 @@
1
+ # WebVTT Ruby parser and segmenter
2
+
3
+ The [WebVTT format](http://dev.w3.org/html5/webvtt/) is a standard captionning format used for HTML5 videos and HTTP Live Streaming (HLS).
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'webvtt-ruby'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install webvtt-ruby
18
+
19
+ ## Usage
20
+
21
+ To parse a webvtt file:
22
+
23
+ ```ruby
24
+ require "webvtt"
25
+
26
+ webvtt = WebVTT.read("path/sub.webvtt")
27
+ webvtt.cues.each do |cue|
28
+ puts "identifier: #{cue.identifier}"
29
+ puts "Start: #{cue.start}"
30
+ puts "End: #{cue.end}"
31
+ puts "Style: #{cue.style.inspect}"
32
+ puts "Text: #{cue.text}"
33
+ puts "--"
34
+ end
35
+ ```
36
+
37
+ ## Converting from SRT
38
+
39
+ You can also convert an SRT file to a standard WebVTT file:
40
+
41
+ ```ruby
42
+ webvtt = WebVTT.convert_from_srt("path/sub.srt", "path/sub.webvtt")
43
+ puts webvtt.to_webvtt
44
+ ```
45
+
46
+ ## Segmenting for HTTP Live Streaming (HLS)
47
+
48
+ Segmenting is required to work with HLS videos.
49
+
50
+ ```ruby
51
+ WebVTT.segment("subtitles/en.webvtt", :length => 10, :output => "subtitles/en-%05d.webvtt", :playlist => "subtitles/en.m3u8")
52
+ ```
53
+
54
+ It will also generate the playlist in `m3u8`:
55
+
56
+ ```
57
+ #EXTM3U
58
+ #EXT-X-TARGETDURATION:17
59
+ #EXT-X-VERSION:3
60
+ #EXT-X-MEDIA-SEQUENCE:0
61
+ #EXT-X-PLAYLIST-TYPE:VOD
62
+ #EXTINF:13,
63
+ en-00000.webvtt
64
+ #EXTINF:17,
65
+ en-00001.webvtt
66
+ #EXTINF:12,
67
+ en-00002.webvtt
68
+ #EXT-X-ENDLIST
69
+ ```
70
+
71
+ To use the segmented webvtt files with your HLS playlist:
72
+
73
+ ```
74
+ #EXTM3U
75
+
76
+ #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",URI="subtitles/en.m3u8"
77
+
78
+ #EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,SUBTITLES="subs"
79
+ demo-300000.m3u8
80
+
81
+ #EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,SUBTITLES="subs"
82
+ demo-600000.m3u8
83
+ ```
84
+
85
+ ## CLI
86
+
87
+ You can also segment webvtt files using the command line `webvtt-segmenter`:
88
+
89
+ ```
90
+ $ webvtt-segmenter -i subtitles/en.webvtt -t 10 -m subtitles/en.m3u8 -o "subtitles/en-%05d.webvtt"
91
+ ```
92
+
93
+ ```
94
+ $ webvtt-segmenter -h
95
+ Usage: bin/webvtt-segmenter [--arg]
96
+ -i, --input [PATH] WebVTT or SRT file
97
+ -b, --base-url [URL] Base URL
98
+ -t, --target-duration [DUR] Duration of each segments. Default: 10
99
+ -o, --output [PATH] Path where WebVTT segments will be saved. Default: fileSequence-%05d.webvtt
100
+ -m, --playlist [PATH] Path where the playlist in m3u8 will be saved. Default: prog_index.m3u8
101
+ ```
102
+
103
+ ## Note
104
+
105
+ `webvtt-ruby` was written in a few hours because there was no open source tool to segment webvtt files. It's not perfect at all but it does the job.
106
+
107
+ ## Contributing
108
+
109
+ 1. Fork it
110
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
111
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
112
+ 4. Push to the branch (`git push origin my-new-feature`)
113
+ 5. Create new Pull Request
114
+
115
+ ## Author
116
+
117
+ **Bruno Celeste**
118
+
119
+ * http://www.heywatchencoding.com
120
+ * bruno@heywatch.com
121
+ * [@sadikzzz](http://twitter.com/sadikzzz)
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH << File.join(File.dirname(__FILE__), "..", "lib")
4
+ require "optparse"
5
+ require "webvtt"
6
+
7
+ @options = {}
8
+ opts = OptionParser.new do |opts|
9
+ opts.banner = %(Usage: webvtt-segmenter [--arg])
10
+
11
+ opts.on("-i", "--input [PATH]", "WebVTT or SRT file") do |path|
12
+ @options[:input] = path
13
+ end
14
+
15
+ opts.on("-b", "--base-url [URL]", "Base URL") do |url|
16
+ @options[:base_url] = url
17
+ end
18
+
19
+ opts.on("-t", "--target-duration [DUR]", "Duration of each segments. Default: 10") do |dur|
20
+ @options[:length] = dur.to_i
21
+ end
22
+
23
+ opts.on("-o", "--output [PATH]", "Path where WebVTT segments will be saved. Default: fileSequence-%05d.webvtt") do |path|
24
+ @options[:output] = path
25
+ end
26
+
27
+ opts.on("-m", "--playlist [PATH]", "Path where the playlist in m3u8 will be saved. Default: prog_index.m3u8") do |path|
28
+ @options[:playlist] = path
29
+ end
30
+ end
31
+
32
+ begin
33
+ opts.parse!
34
+ raise OptionParser::MissingArgument if @options.empty?
35
+ if ! @options[:input]
36
+ raise OptionParser::InvalidOption, "Missing argument --input"
37
+ end
38
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
39
+ $stderr.puts $!.to_s
40
+ $stderr.puts opts
41
+ exit
42
+ end
43
+
44
+ input = @options.delete(:input)
45
+
46
+ # convert srt to webvtt first
47
+ if File.extname(input) == ".srt"
48
+ puts "Converting srt to webvtt format..."
49
+ input = WebVTT.convert_from_srt(input)
50
+ end
51
+
52
+ res = WebVTT.segment(input, @options)
53
+ puts "Done"
data/lib/parser.rb ADDED
@@ -0,0 +1,143 @@
1
+ module WebVTT
2
+
3
+ class MalformedFile < RuntimeError; end
4
+ class InputError < RuntimeError; end
5
+
6
+ def self.read(file)
7
+ File.new(file)
8
+ end
9
+
10
+ def self.convert_from_srt(srt_file, output=nil)
11
+ if !::File.exists?(srt_file)
12
+ raise InputError, "SRT file not found"
13
+ end
14
+
15
+ srt = ::File.read(srt_file)
16
+ output ||= srt_file.gsub(".srt", ".webvtt")
17
+
18
+ # convert timestamps and save the file
19
+ srt.gsub!(/([0-9]{2}:[0-9]{2}:[0-9]{2})([,])([0-9]{3})/, '\1.\3')
20
+ # normalize new line character
21
+ srt.gsub!("\r\n", "\n")
22
+
23
+ srt = "WEBVTT\n\n#{srt}"
24
+ ::File.open(output, "w") {|f| f.write(srt)}
25
+
26
+ return File.new(output)
27
+ end
28
+
29
+ class File
30
+ attr_reader :header, :path, :filename
31
+ attr_accessor :cues
32
+
33
+ def initialize(webvtt_file)
34
+ if !::File.exists?(webvtt_file)
35
+ raise InputError, "WebVTT file not found"
36
+ end
37
+
38
+ @path = webvtt_file
39
+ @filename = ::File.basename(@path)
40
+ @content = ::File.read(webvtt_file).gsub("\r\n", "\n") # normalizing new line character
41
+ parse
42
+ end
43
+
44
+ def to_webvtt
45
+ [@header, @cues.map(&:to_webvtt)].flatten.join("\n\n")
46
+ end
47
+
48
+ def total_length
49
+ @cues.last.end_in_sec
50
+ end
51
+
52
+ def actual_total_length
53
+ @cues.last.end_in_sec - @cues.first.start_in_sec
54
+ end
55
+
56
+ def save(output=nil)
57
+ output ||= @path.gsub(".srt", ".webvtt")
58
+
59
+ File.open(output, "w") {|f| f.write(to_webvtt)}
60
+ return output
61
+ end
62
+
63
+ def parse
64
+ # remove bom first
65
+ @content.gsub!("\uFEFF", '')
66
+
67
+ cues = @content.split("\n\n")
68
+ @header = cues.shift
69
+ header_lines = @header.split("\n").map(&:strip)
70
+ if (header_lines[0] =~ /^WEBVTT/).nil?
71
+ raise MalformedFile, "Not a valid WebVTT file"
72
+ end
73
+
74
+ @cues = []
75
+ cues.each do |cue|
76
+ cue_parsed = Cue.new(cue)
77
+ if !cue_parsed.text.nil?
78
+ @cues << cue_parsed
79
+ end
80
+ end
81
+ @cues
82
+ end
83
+ end
84
+
85
+ class Cue
86
+ attr_accessor :identifier, :start, :end, :style, :text
87
+
88
+ def initialize(cue)
89
+ @content = cue
90
+ parse
91
+ end
92
+
93
+ def to_webvtt
94
+ res = ""
95
+ if @identifier
96
+ res << "#{@identifier}\n"
97
+ end
98
+ res << "#{@start} --> #{@end} #{@style.map{|k,v| "#{k}:#{v}"}.join(" ")}".strip + "\n"
99
+ res << @text
100
+
101
+ res
102
+ end
103
+
104
+ def self.timestamp_in_sec(timestamp)
105
+ mres = timestamp.match(/([0-9]{2}):([0-9]{2}):([0-9]{2}\.[0-9]{3})/)
106
+ sec = mres[3].to_f # seconds and subseconds
107
+ sec += mres[2].to_f * 60 # minutes
108
+ sec += mres[1].to_f * 60 * 60 # hours
109
+ return sec
110
+ end
111
+
112
+ def start_in_sec
113
+ Cue.timestamp_in_sec(@start)
114
+ end
115
+
116
+ def end_in_sec
117
+ Cue.timestamp_in_sec(@end)
118
+ end
119
+
120
+ def length
121
+ end_in_sec - start_in_sec
122
+ end
123
+
124
+ def parse
125
+ lines = @content.split("\n").map(&:strip)
126
+
127
+ # it's a note, ignore
128
+ return if lines[0] =~ /NOTE/
129
+
130
+ if !lines[0].include?("-->")
131
+ @identifier = lines[0]
132
+ lines.shift
133
+ end
134
+
135
+ if lines[0].match(/([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}) -+> ([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3})(.*)/)
136
+ @start = $1
137
+ @end = $2
138
+ @style = Hash[$3.strip.split(" ").map{|s| s.split(":").map(&:strip) }]
139
+ end
140
+ @text = lines[1..-1].join("\n")
141
+ end
142
+ end
143
+ end
data/lib/segmenter.rb ADDED
@@ -0,0 +1,127 @@
1
+ module WebVTT
2
+
3
+ class InputError < RuntimeError; end
4
+
5
+ def self.segment(input, options={})
6
+ if input.is_a?(String)
7
+ input = File.new(input)
8
+ end
9
+
10
+ if ! input.respond_to?(:to_webvtt)
11
+ raise InputError, "Input must be a WebVTT instance or a path"
12
+ end
13
+
14
+ segmenter = Segmenter.new(input, options)
15
+ subs = segmenter.split_to_files
16
+ playlist = segmenter.generate_playlist(subs)
17
+
18
+ return [playlist, subs]
19
+ end
20
+
21
+ class Segmenter
22
+ attr_reader :webvtt
23
+
24
+ def initialize(webvtt, options={})
25
+ @webvtt = webvtt
26
+ @options = options
27
+ @options[:length] ||= 10
28
+ @options[:output] ||= "fileSequence-%05d.webvtt"
29
+ @options[:playlist] ||= "prog_index.m3u8"
30
+ end
31
+
32
+ def find_segment_files(cue)
33
+ seg = find_segments(cue)
34
+
35
+ # we need to find out how many segments we
36
+ # have to remove from our calculation
37
+ # in the case of first cue not starting at 0
38
+ start = @webvtt.cues[0].start_in_sec
39
+ to_remove = (start / @options[:length]).floor
40
+ return seg.map{|s| s-to_remove}
41
+ end
42
+
43
+ def find_segments(cue)
44
+ all_cues = @webvtt.cues
45
+ index_cue = all_cues.index(cue)
46
+ seg = [(cue.start_in_sec / @options[:length]).floor]
47
+ start_seg = seg[0] * @options[:length]
48
+ end_seg = start_seg + @options[:length]
49
+
50
+ # if the cue length is > than desired length
51
+ # or if cue end in sec is > end of the segment in sec
52
+ # we display it in the next segment as well
53
+
54
+ if (cue.length > @options[:length]) ||
55
+ (cue.end_in_sec > end_seg)
56
+
57
+ (cue.length / @options[:length]).ceil.to_i.times.each do |s|
58
+ seg << seg.last + 1
59
+ end
60
+ end
61
+
62
+ return seg
63
+ end
64
+
65
+ def generate_playlist(files)
66
+ lines = []
67
+ target_duration = 0
68
+ files.each_with_index do |file,i|
69
+
70
+ # if first cue ever we calculate from 0 sec
71
+ if i == 0
72
+ total_length = file.total_length
73
+ else
74
+ total_length = file.actual_total_length
75
+ end
76
+
77
+ target_duration = total_length if total_length > target_duration
78
+ if @options[:base_url].nil?
79
+ url = file.filename
80
+ else
81
+ url = ::File.join(@options[:base_url], file.filename)
82
+ end
83
+ lines << %(#EXTINF:#{total_length.round},
84
+ #{url})
85
+ end
86
+
87
+ playlist = [%(#EXTM3U
88
+ #EXT-X-TARGETDURATION:#{target_duration.ceil}
89
+ #EXT-X-VERSION:3
90
+ #EXT-X-MEDIA-SEQUENCE:0
91
+ #EXT-X-PLAYLIST-TYPE:VOD)]
92
+ playlist.concat(lines)
93
+ playlist << "#EXT-X-ENDLIST"
94
+
95
+ ::File.open(@options[:playlist], "w") {|f| f.write(playlist.join("\n")) }
96
+ return @options[:playlist]
97
+ end
98
+
99
+ def split_to_files
100
+ filenames = []
101
+ segment_files = []
102
+
103
+ @webvtt.cues.each_with_index do |cue,i|
104
+ find_segment_files(cue).each do |seg|
105
+ segment_files[seg] ||= []
106
+ segment_files[seg] << cue
107
+ end
108
+ end
109
+
110
+ segment_files.compact.each_with_index do |f,i|
111
+ filename = sprintf(@options[:output], i)
112
+ header = @webvtt.header
113
+
114
+ if !header.include?("X-TIMESTAMP-MAP=MPEGTS")
115
+ header << "\nX-TIMESTAMP-MAP=MPEGTS:900000,LOCAL:00:00:00.000"
116
+ end
117
+
118
+ content = [header, f.map{|c| c.to_webvtt }.join("\n\n")].join("\n\n")
119
+
120
+ ::File.open(filename, "w") {|f| f.write(content)}
121
+
122
+ filenames << filename
123
+ end
124
+ return filenames.map{|f| File.new(f) }
125
+ end
126
+ end
127
+ end