caption_crunch 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: caf3a7a71c735e100811ff9357ba35b07b0facfb
4
+ data.tar.gz: 470f3e1ad973f32f94eaf04c59b6b25aedfde7c6
5
+ SHA512:
6
+ metadata.gz: faa7aaebfb7b28c603edba951aab611446426f926d7dd5410dcb3f2740b817ed9147947205fb35036d9025fb003c8d6b3132a158912165f3030598206dac2652
7
+ data.tar.gz: edb3692d122205d637c2a4f64e7a6fe7deb44002aee9dde51556b75c52592536073710f8b576795605715b874a9f85b6f7d69f39daa3e31b3750af0c22ce2e89
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in caption_crunch.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Kickstarter
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # CaptionCrunch
2
+
3
+ Praise @noopkat for the name.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'caption_crunch'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install caption_crunch
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+ require 'caption_crunch'
25
+ track = CaptionCrunch.parse(File.new('sample.vtt')) # returns a CaptionCrunch::Track instance
26
+ # or
27
+ track = CaptionCrunch.parse('WEBVTT')
28
+
29
+ # track.cues is an array of CaptionCrunch::Cue instances
30
+ track.cues.first.start_time
31
+ track.cues.first.end_time
32
+ track.cues.first.payload
33
+ ```
34
+
35
+ ## Contributing
36
+
37
+ 1. Fork it ( https://github.com/kickstarter/caption_crunch/fork )
38
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
39
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
40
+ 4. Push to the branch (`git push origin my-new-feature`)
41
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "rubygems"
2
+ require "bundler/setup"
3
+ require 'rake/testtask'
4
+
5
+ namespace :test do
6
+ Rake::TestTask.new(:units) do |t|
7
+ t.pattern = "spec/*_spec.rb"
8
+ end
9
+ end
10
+
11
+ desc 'Run tests'
12
+ task :test => %w[test:units]
13
+
14
+ task :default => :test
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'caption_crunch/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "caption_crunch"
8
+ spec.version = CaptionCrunch::VERSION
9
+ spec.authors = ["David Peter"]
10
+ spec.email = ["dpeter@kickstarter.com"]
11
+ spec.summary = %q{A collection of parsers for various caption/subtitle files}
12
+ spec.homepage = ""
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = '>= 2.1.2'
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.7"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency 'minitest'
25
+ spec.add_development_dependency 'rack-test'
26
+ end
@@ -0,0 +1,103 @@
1
+ module CaptionCrunch
2
+ module Adapters
3
+ class VTT
4
+ SIGNATURE_REGEX = /\AWEBVTT\Z|\AWEBVTT[ \t]/.freeze
5
+ COMMENT_REGEX = /\ANOTE\Z|\ANOTE[ \t\n]/.freeze
6
+ ARROW_REGEX = /-->/.freeze
7
+ # Format: hour:minute:second.milliseconds
8
+ # hour: is optional.
9
+ # 11:22:33
10
+ # 00:11:22:333
11
+ # 102:01:43:204
12
+ # http://dev.w3.org/html5/webvtt/#dfn-collect-a-webvtt-timestamp
13
+ TIME_REGEX = /\A(?:(\d\d+):)?([0-5]\d):([0-5]\d)\.(\d\d\d)\Z/.freeze
14
+ NEWLINE_REGEX = /\r\n|\r|\n/.freeze
15
+
16
+ class << self
17
+ # Reads a file (or string) and returns a CaptionCrunch::Track instance.
18
+ # Raises CaptionCrunch::ParseError if the input is malformed.
19
+ def parse(file)
20
+ contents = remove_bom(read_file(file))
21
+ segments = split_segments(contents)
22
+ ensure_signature(segments.shift)
23
+
24
+ Track.new.tap do |track|
25
+ segments.each do |segment|
26
+ next if comment?(segment)
27
+ track.cues << parse_cue(segment)
28
+ end
29
+ end
30
+ end
31
+
32
+ protected
33
+
34
+ # Returns a string corresponding to the contents of a File instance.
35
+ # Alternatively, if the argument is not a File, simply calls `.to_s`.
36
+ def read_file(file)
37
+ case file
38
+ when File then file.read
39
+ else file.to_s
40
+ end.strip
41
+ end
42
+
43
+ def remove_bom(string)
44
+ if string[0] == "\uFEFF"
45
+ string.slice(1..-1)
46
+ else
47
+ string
48
+ end
49
+ end
50
+
51
+ # The WebVTT spec separates segments by two newlines or more.
52
+ def split_segments(string)
53
+ string.split(/#{NEWLINE_REGEX}{2,}/)
54
+ end
55
+
56
+ def ensure_signature(segment)
57
+ if segment !~ SIGNATURE_REGEX
58
+ raise ParseError, 'File must start with WEBVTT'
59
+ end
60
+ end
61
+
62
+ def comment?(segment)
63
+ segment =~ COMMENT_REGEX
64
+ end
65
+
66
+ # Turns a segment into a new CaptionCrunch::Cue instance.
67
+ def parse_cue(segment)
68
+ parts = segment.split(NEWLINE_REGEX)
69
+ # ignore optional identifier for now
70
+ parts.shift unless parts[0] =~ ARROW_REGEX
71
+
72
+ # parse time and cue settings
73
+ times = parts.shift.to_s.split(ARROW_REGEX)
74
+ raise ParseError, "Cue timings missing: #{segment}" if times.size != 2
75
+ start_time = times.first.strip
76
+ end_time, settings = times.last.strip.split(/\s+/, 2)
77
+
78
+ # parse payload
79
+ payload = parts.map(&:strip).join("\n")
80
+
81
+ Cue.new.tap do |cue|
82
+ cue.start_time = parse_time(start_time)
83
+ cue.end_time = parse_time(end_time)
84
+ cue.payload = payload
85
+ end
86
+ end
87
+
88
+ # Converts a timestamp into an integer representing the milliseconds.
89
+ def parse_time(timestamp)
90
+ match = TIME_REGEX.match(timestamp.strip)
91
+ raise ParseError, "Invalid timestamp: #{timestamp}" unless match
92
+ captures = match.captures
93
+ integer = 0
94
+ integer += captures.pop.to_i # msecs
95
+ integer += captures.pop.to_i * 1000 # secs
96
+ integer += captures.pop.to_i * 1000 * 60 # mins
97
+ integer += captures.pop.to_i * 1000 * 60 * 60 # hours
98
+ integer
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1 @@
1
+ require 'caption_crunch/adapters/vtt'
@@ -0,0 +1,5 @@
1
+ module CaptionCrunch
2
+ class Cue
3
+ attr_accessor :start_time, :end_time, :payload
4
+ end
5
+ end
@@ -0,0 +1,4 @@
1
+ module CaptionCrunch
2
+ class ParseError < StandardError
3
+ end
4
+ end
@@ -0,0 +1,9 @@
1
+ module CaptionCrunch
2
+ class Track
3
+ attr_reader :cues
4
+
5
+ def initialize()
6
+ @cues = []
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module CaptionCrunch
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,13 @@
1
+ module CaptionCrunch
2
+ # Delegates to Adapters::VTT.parse.
3
+ # Returns a CaptionCrunch::Track instance
4
+ def self.parse(file)
5
+ Adapters::VTT.parse(file)
6
+ end
7
+ end
8
+
9
+ require "caption_crunch/version"
10
+ require 'caption_crunch/parse_error'
11
+ require 'caption_crunch/track'
12
+ require 'caption_crunch/cue'
13
+ require 'caption_crunch/adapters'
@@ -0,0 +1,131 @@
1
+ require_relative './spec_helper'
2
+
3
+ describe 'CaptionCrunch' do
4
+ describe 'parse' do
5
+ describe 'parsing entirely wrong signature' do
6
+ it 'should raise CaptionCrunch::ParseError' do
7
+ error = ->{
8
+ CaptionCrunch.parse('SPIDERWEBVTT')
9
+ }.must_raise CaptionCrunch::ParseError
10
+ error.message.must_match /File must start with WEBVTT/
11
+ end
12
+ end
13
+
14
+ describe 'parsing valid signature' do
15
+ it 'should not raise CaptionCrunch::ParseError' do
16
+ CaptionCrunch.parse("WEBVTT\t-- here is some stuff")
17
+ end
18
+ end
19
+
20
+ describe 'parsing signature starting with WEBVTT but no space after' do
21
+ it 'should raise CaptionCrunch::ParseError' do
22
+ error = ->{
23
+ CaptionCrunch.parse('WEBVTTINVALID')
24
+ }.must_raise CaptionCrunch::ParseError
25
+ error.message.must_match /File must start with WEBVTT/
26
+ end
27
+ end
28
+
29
+ describe 'parsing signature starting and ending with WEBVTT' do
30
+ it 'should not raise CaptionCrunch::ParseError' do
31
+ CaptionCrunch.parse('WEBVTT')
32
+ end
33
+ end
34
+
35
+ describe 'parsing signature with a BOM' do
36
+ it 'should not raise CaptionCrunch::ParseError' do
37
+ CaptionCrunch.parse("\uFEFFWEBVTT")
38
+ end
39
+ end
40
+
41
+ describe 'parsing a track with three cues' do
42
+ subject { CaptionCrunch.parse(fixture('three_cues.vtt')) }
43
+ it 'should return an object with three cues' do
44
+ subject.cues.size.must_equal(3)
45
+ end
46
+
47
+ it 'should parse cue start times' do
48
+ start_time = 1 * 60 * 60 * 1000 + 2 * 60 * 1000 + 25 * 1000 + 500
49
+ subject.cues.last.start_time.must_equal(start_time)
50
+ end
51
+
52
+ it 'should parse cue end times' do
53
+ subject.cues.first.end_time.must_equal(2000)
54
+ end
55
+
56
+ it 'should parse cue end times with >=100 hours' do
57
+ end_time = 201 * 60 * 60 * 1000 + 3 * 60 * 1000 + 15 * 1000
58
+ subject.cues.last.end_time.must_equal(end_time)
59
+ end
60
+
61
+ it 'should parse cue end times with settings after' do
62
+ end_time = 15 * 1000
63
+ subject.cues[1].end_time.must_equal(end_time)
64
+ end
65
+
66
+ it 'should parse cue payloads' do
67
+ subject.cues.last.payload.must_equal('TTYL.')
68
+ end
69
+
70
+ it 'should support multi-line cue payloads' do
71
+ subject.cues[1].payload.must_equal(%Q{
72
+ - I don't like shrimp, but I like ramen. All kinds of ramen. Miso ramen, tonkotsu ramen, shoyu ramen, Totto ramen...
73
+ - That's nice.
74
+ }.strip)
75
+ end
76
+ end
77
+
78
+ describe 'parsing cue with invalid minutes' do
79
+ it 'should raise CaptionCrunch::ParseError' do
80
+ error = ->{
81
+ CaptionCrunch.parse(%Q{WEBVTT
82
+
83
+ 00:60:00.000 --> 01:02:00.000
84
+ I'm invalid!
85
+ })
86
+ }.must_raise CaptionCrunch::ParseError
87
+ error.message.must_match /Invalid timestamp/
88
+ end
89
+ end
90
+
91
+ describe 'parsing cue without timings' do
92
+ it 'should raise CaptionCrunch::ParseError' do
93
+ error = ->{
94
+ CaptionCrunch.parse(%Q{WEBVTT
95
+
96
+ I'm invalid!
97
+ Woot!
98
+ })
99
+ }.must_raise CaptionCrunch::ParseError
100
+ error.message.must_match /Cue timings missing/
101
+ end
102
+ end
103
+
104
+ describe 'parsing cue with invalid seconds' do
105
+ it 'should raise CaptionCrunch::ParseError' do
106
+ error = ->{
107
+ CaptionCrunch.parse(%Q{WEBVTT
108
+
109
+ 00:00:60.000 --> 01:02:00.000
110
+ I'm invalid!
111
+ })
112
+ }.must_raise CaptionCrunch::ParseError
113
+ error.message.must_match /Invalid timestamp/
114
+ end
115
+ end
116
+
117
+ describe 'parsing cue with invalid milliseconds' do
118
+ it 'should raise CaptionCrunch::ParseError' do
119
+ error = ->{
120
+ CaptionCrunch.parse(%Q{WEBVTT
121
+
122
+ 00:00:00.1000 --> 01:02:00.000
123
+ I'm invalid!
124
+ })
125
+ }.must_raise CaptionCrunch::ParseError
126
+ error.message.must_match /Invalid timestamp/
127
+ end
128
+ end
129
+
130
+ end
131
+ end
@@ -0,0 +1,15 @@
1
+ WEBVTT - Lots of ramen
2
+
3
+ 1
4
+ 00:00:00.000 --> 00:02.000
5
+ - Here are some things I like.
6
+ - Uh, I don't really want to know.
7
+
8
+ 2
9
+ 00:00:02.001 --> 00:00:15.000 align:start line:0 position:50%
10
+ - I don't like shrimp, but I like ramen. All kinds of ramen. Miso ramen, tonkotsu ramen, shoyu ramen, Totto ramen...
11
+ - That's nice.
12
+
13
+ 3
14
+ 01:02:25.500 --> 201:03:15.000
15
+ TTYL.
@@ -0,0 +1,13 @@
1
+ require "rubygems"
2
+ require "bundler/setup"
3
+
4
+ require "minitest/autorun"
5
+ require "minitest/pride"
6
+ require "caption_crunch"
7
+
8
+ class MiniTest::Spec
9
+ def fixture(path)
10
+ path = File.join(File.dirname(__FILE__), 'fixtures', path)
11
+ File.open(path)
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: caption_crunch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - David Peter
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rack-test
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description:
70
+ email:
71
+ - dpeter@kickstarter.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - Gemfile
78
+ - LICENSE
79
+ - README.md
80
+ - Rakefile
81
+ - caption_crunch.gemspec
82
+ - lib/caption_crunch.rb
83
+ - lib/caption_crunch/adapters.rb
84
+ - lib/caption_crunch/adapters/vtt.rb
85
+ - lib/caption_crunch/cue.rb
86
+ - lib/caption_crunch/parse_error.rb
87
+ - lib/caption_crunch/track.rb
88
+ - lib/caption_crunch/version.rb
89
+ - spec/caption_crunch_spec.rb
90
+ - spec/fixtures/three_cues.vtt
91
+ - spec/spec_helper.rb
92
+ homepage: ''
93
+ licenses:
94
+ - MIT
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: 2.1.2
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubyforge_project:
112
+ rubygems_version: 2.4.5
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: A collection of parsers for various caption/subtitle files
116
+ test_files:
117
+ - spec/caption_crunch_spec.rb
118
+ - spec/fixtures/three_cues.vtt
119
+ - spec/spec_helper.rb