vid-skim 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,51 @@
1
+ =
2
+
3
+ --- ---
4
+ +-------------+
5
+ | +----+ ---- | ---
6
+ | | :) | ---- | ---
7
+ | +----+ ---- | ---
8
+ +-------------+
9
+
10
+
11
+
12
+ ~ Video Skimmer
13
+
14
+
15
+ # Transcripts and commentary for long boring videos on YouTube! #
16
+
17
+ * Present your videos with transcripts and running commentary.
18
+ * Let your users skip to the good parts.
19
+
20
+ Designed for:
21
+
22
+ * News organizations
23
+ * Producers comfortable with the command line
24
+ * Raw video from court transcripts, Political Speeches, Uncut Interviews. In
25
+ short: lengthy video.
26
+
27
+ ~ Documentation
28
+
29
+ #Wiki: https://github.com/propublica/vid-skim/wikis
30
+ #RDoc: http://rdoc.info/projects/propublica/vid-skim
31
+
32
+ ~ Getting Started
33
+
34
+ Install the gem
35
+
36
+ >> sudo install vid-skim
37
+
38
+ Install the directory structure.
39
+
40
+ >> vidskim install video-skimmer
41
+
42
+ Under ./video-skimmer/ you'll see an html and and videos directory.
43
+ Put your vidskim json or expanded files in ./video-skimmer/videos/ (see the
44
+ wiki for formatting info).
45
+
46
+ Once your json is complete run:
47
+
48
+ >> vidskim build video-skimmer
49
+
50
+ And you'll see some html files that look something like this:
51
+ http://projects.propublica.org/skimmer/ron_boline
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.dirname(__FILE__ ) + "/../lib/vid_skim"
4
+
5
+ VidSkim::Command.new
@@ -0,0 +1,57 @@
1
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
2
+
3
+ require 'rubygems'
4
+ gem 'nokogiri'
5
+ gem 'json'
6
+
7
+ autoload :JSON, 'json'
8
+ autoload :ERB, 'erb'
9
+ autoload :FileUtils, 'fileutils'
10
+ autoload :Set, 'set'
11
+ autoload :OptionParser, 'optparse'
12
+
13
+
14
+ module VidSkim
15
+ autoload :Command, 'vid_skim/command'
16
+ autoload :Transcript, "vid_skim/transcript"
17
+ autoload :Inflector, "vid_skim/inflector"
18
+ autoload :Compiler, "vid_skim/compiler"
19
+ autoload :Parser, "vid_skim/parser"
20
+ autoload :Files, "vid_skim/files"
21
+
22
+ ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
23
+
24
+
25
+ class << self
26
+ attr_reader :working_path, :build_path, :output_path, :parser_path
27
+
28
+
29
+ # Set the paths for each of the directories VidSkim works with.
30
+ def configure(working_path)
31
+ @working_path = working_path
32
+ @build_path = working_path + '/videos/'
33
+ @output_path = working_path + '/html/'
34
+ @parser_path = working_path + '/parsers/'
35
+ end
36
+
37
+ # Borrowed from Jeremy Ashkenas's wonderful cloud-crowd gem.
38
+ # Build a list of parsers from both VidSkim's defaults and
39
+ # those installed in the working directory.
40
+ def parsers
41
+ return @parsers if @parsers
42
+ @parsers = {}
43
+ installed = Dir["#{@parser_path}*.rb"]
44
+ default = Dir["#{ROOT}/parsers/*.rb"]
45
+
46
+ (installed + default).each do |path|
47
+ name = File.basename(path, File.extname(path))
48
+ require path
49
+ @parsers[name] = Module.const_get(Inflector.camelize(name))
50
+ end
51
+ @parsers
52
+ rescue NameError => e
53
+ adjusted_message = "One of your parsers failed to load. Please ensure that the name of your parser class can be deduced from the name of the file. ex: 'json_parser.rb' => 'JsonParser'\n#{e.message}"
54
+ raise NameError.new(adjusted_message, e.name)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,119 @@
1
+ module VidSkim
2
+
3
+ # Command-line `vidskim` client. Handles commands for initial installation
4
+ # and building out the exported HTML files.
5
+ class Command
6
+
7
+ # Command-line banner for the usage message.
8
+ BANNER = <<-EOS
9
+ Usage: vidskim COMMAND path/to/directory OPTIONS
10
+
11
+ Commands:
12
+ install Install the VidSkim configuration to the specified directory
13
+ build Build all videos in a VidSkim directory into HTML pages
14
+ parse Parse a file using a parser into the VidSkim directory
15
+ compile Compiles and builds each json file from an expanded format
16
+
17
+ parse path/to/directory -f <input_file> -p <parser_name>
18
+ Parse an <input_file> using the parser in <parser_name>
19
+
20
+ Example: vid-skim parse ./vids -f edit.edl -p edl_parser will parse an
21
+ EDL file using the edl_parser
22
+
23
+ Options:
24
+ EOS
25
+
26
+ # Creating a VidSkim::Command parses all command-line arguments and
27
+ # options.
28
+ def initialize
29
+ @options = {}
30
+ parse_options
31
+ @command = ARGV.shift
32
+ @directory = ARGV.shift || '.'
33
+ configure
34
+ case @command
35
+ when 'install' then run_install
36
+ when 'build' then run_build
37
+ when 'parse' then run_parse
38
+ when 'compile' then run_compile
39
+ else usage
40
+ end
41
+ end
42
+
43
+
44
+ # Parse the options from the command line
45
+ def parse_options
46
+ @option_parser = OptionParser.new do |opts|
47
+ opts.on('-p', '--parser NAME', 'Name of parser') do |parser_name|
48
+ @options[:parser_name] = parser_name
49
+ end
50
+
51
+ opts.on('-f', '--file FILE', 'Input file to parse') do |parser_file|
52
+ @options[:parser_file] = parser_file
53
+ end
54
+
55
+ opts.on('--force', 'Force overwriting of files') do
56
+ Files.force = true
57
+ end
58
+
59
+ opts.on_tail('-v', '--version', 'Show version') do
60
+ puts "VidSkim version #{VERSION}"
61
+ exit
62
+ end
63
+ end
64
+ @option_parser.banner = BANNER
65
+ @option_parser.parse!(ARGV)
66
+ end
67
+
68
+ # Install the example VidSkim folder to a location of your choosing.
69
+ def run_install
70
+ FileUtils.mkdir_p(VidSkim.working_path) unless File.exists?(VidSkim.working_path)
71
+ Files.install_dir "#{VidSkim::ROOT}/template/html", "#{VidSkim.output_path}"
72
+ Files.install_dir "#{VidSkim::ROOT}/template/videos", "#{VidSkim.build_path}"
73
+ Files.install_dir "#{VidSkim::ROOT}/template/parsers", "#{VidSkim.parser_path}"
74
+ end
75
+
76
+ # Build the html files from the json in the videos directory.
77
+ def run_build
78
+ Files.walk_build_path(".json").each do |f|
79
+ template = ERB.new(File.open(VidSkim::ROOT +
80
+ '/views/template.html.erb', 'r').read)
81
+ @transcript = Transcript.find(f)
82
+ str = template.result(binding)
83
+ Files.create_file(VidSkim.output_path + "#{@transcript.slug}.html", str)
84
+ end
85
+ end
86
+
87
+ # Run a parser to build the files in the videos directory. Allow an
88
+ # escape hatch if the directory exists
89
+ def run_parse
90
+ raise Error.new("To run a parser you must use both the -p and -f flags.") if
91
+ !@options[:parser_name] && !@options[:parser_file]
92
+ parser = VidSkim.parsers[@options[:parser_name]].new
93
+ parser.load(@options[:parser_file])
94
+ parser.parse
95
+ end
96
+
97
+ # Run the compiler to compile and build the files in each directory
98
+ # created by a parser or by hand.
99
+ def run_compile
100
+ compiler = VidSkim::Compiler.new
101
+ compiler.compile
102
+ end
103
+
104
+ # Print out `vidskim` usage.
105
+ def usage
106
+ puts "\n#{@option_parser}\n"
107
+ end
108
+
109
+
110
+ private
111
+
112
+ # Make sure that everyone knows where to put any files they generate
113
+ def configure
114
+ VidSkim.configure(@directory)
115
+ end
116
+
117
+
118
+ end
119
+ end
@@ -0,0 +1,106 @@
1
+ module VidSkim
2
+ # The compiler handles both the compiling to json of an expanded directory,
3
+ # and the creation of expanded directories.
4
+ class Compiler
5
+
6
+ # Set up the erb templates for .entry, .div and .trans files.
7
+ def initialize
8
+ @transcript_t = ERB.new <<-EOS
9
+ <%= skim.title || "TITLE OF VIDEO" %>
10
+ <%= skim.youtube_id || "YOUTUBE_ID" %>
11
+ <%= skim.duration || "DURATION IN SECONDS" %>
12
+ <%= skim.default || "DEFAULT TAB" %>
13
+ EOS
14
+ @division_t = ERB.new <<-EOS
15
+ <%= division.name || "DIVISION ID" %>
16
+ <%= division.color || "COLOR IN #XXXXXX FORMAT" %>
17
+ <%= division.hover || "HOVER COLOR IN #XXXXXX FORMAT" %>
18
+ EOS
19
+ @entry_t = ERB.new <<-EOS
20
+ <%= division.name %>
21
+ <%= entry.title || "TITLE HERE" %>
22
+ <%= entry.range.collect.to_json || "['00:00:00', '00:00:00']" %>
23
+ <%= entry.transcript || "<p>HTML HERE (CAN BE MULTIPLE LINES)</p>" %>
24
+ EOS
25
+ end
26
+
27
+ # Create an expanded directory from a VidSkim::Transcript
28
+ def explode(skim)
29
+
30
+ file_tree = {}
31
+ working_dir = Inflector.parameterize(skim.title)
32
+ file_tree[working_dir] =
33
+ [["/#{working_dir}.trans", @transcript_t.result(binding)]]
34
+ skim.divisions.each do |title, division|
35
+ file_tree[working_dir] << [
36
+ "/#{division.name}.div",
37
+ @division_t.result(binding)
38
+ ]
39
+
40
+ division.entries.each_with_index do |entry, i|
41
+ file_tree[working_dir] << [
42
+ "/#{division.name}-#{i}.entry",
43
+ @entry_t.result(binding)
44
+ ]
45
+
46
+ end
47
+ end
48
+ Files.create_tree(file_tree)
49
+
50
+ end
51
+
52
+ # Create a VidSkim::Transcript and compile it to json from an expanded
53
+ # directory
54
+ def compile
55
+ Dir[VidSkim.build_path + "**"].each do |dir|
56
+ next unless File.directory?(dir)
57
+ @skim = VidSkim::Transcript.new({})
58
+ Dir["#{dir}/*.{trans,div,entry}"].each do |path|
59
+ path =~ /.*\.(trans|div|entry)/
60
+ send("compile_#{$1}", File.open(path).read.split("\n"))
61
+ end
62
+ Files.create_file(VidSkim.build_path + Inflector.parameterize(@skim.title) + ".json", @skim.to_json)
63
+ end
64
+
65
+
66
+ rescue NameError => boom
67
+ message = "One of your build files failed, are you sure everything's in the right order and the right format?\n\nThis might help:\n#{boom.message}"
68
+ raise NameError.new(message, boom.name)
69
+ end
70
+
71
+ private
72
+
73
+ # Compile a trans file
74
+ def compile_trans(arr)
75
+ assign(@skim, [:title=, :youtube_id=, :duration=, :default=], arr)
76
+ @skim.duration = @skim.duration.to_i || 0
77
+ end
78
+
79
+ # Compile a division file
80
+ def compile_div(arr)
81
+ @skim.divisions[arr[0]] = Transcript::Division.new("")
82
+ name = arr.shift
83
+ @skim.divisions[name].name = name
84
+ assign(@skim.divisions[name], [:color=, :hover=], arr)
85
+ end
86
+
87
+ # Compile an entry file
88
+ def compile_entry(arr)
89
+ entry = Transcript::Entry.new()
90
+ division_name = arr.shift
91
+ assign(entry, [:title=], arr)
92
+ entry.range = JSON.parse(arr.shift)
93
+ entry.transcript = arr.join
94
+ @skim.divisions[division_name].entries << entry
95
+ end
96
+
97
+ # Assign each attribute to the right place in +@skim+
98
+ def assign(obj, dest, values)
99
+ dest.each do |attribute|
100
+ obj.send(attribute, values.shift)
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ end
@@ -0,0 +1,61 @@
1
+ module VidSkim
2
+
3
+ class Files
4
+
5
+ class << self
6
+ attr_accessor :force
7
+ end
8
+
9
+ # To be refactored soon. Takes an hash of arrays
10
+ # => puts tree
11
+ # >> {"path" => ["filename", contents], ... }
12
+ # which allows us to to write out the file and underlying directories.
13
+ # Allows for escape oppurtunities if we're about to overwrite something.
14
+ def self.create_tree(tree)
15
+ tree.each_pair do |dir, files|
16
+ dir = "/videos/" + dir
17
+ path = File.join(VidSkim.working_path, dir)
18
+ FileUtils.mkdir_p path unless File.exists? path
19
+ files.each do |filename, contents|
20
+ if filename.respond_to? :each_pair
21
+ create_tree filename, path
22
+ else
23
+ self.create_file(path + filename, contents)
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ # Walk the build path and return files with a given extension.
30
+ def self.walk_build_path(ext)
31
+ Dir[VidSkim.build_path + "**/*#{ext}"]
32
+ end
33
+
34
+ # Check if a file exists and asks the user if they want to overwrite it
35
+ # returns false if they say no.
36
+ def self.check_file(dest)
37
+ if File.exists?(dest) && !@force && ENV["VID_SKIM_ENV"] != 'test'
38
+ print "#{dest} already exists. Overwrite it? (yes/no) "
39
+ return false unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
40
+ end
41
+ true
42
+ end
43
+
44
+ # Install a file and log the installation. Allow opportunities to back out
45
+ # of overwriting existing files.
46
+ def self.install_dir(source, dest)
47
+ return unless check_file(dest)
48
+ FileUtils.cp_r(source, dest)
49
+ puts "installed #{dest}" unless ENV["VID_SKIM_ENV"] == 'test'
50
+ end
51
+
52
+ # Create a file and underlying directories if needed and log the creation.
53
+ def self.create_file(dest, str)
54
+ return unless check_file(dest)
55
+ File.new(dest, "w").write(str)
56
+ puts "created #{dest}" unless ENV["VID_SKIM_ENV"] == 'test'
57
+ end
58
+
59
+ end
60
+
61
+ end
@@ -0,0 +1,16 @@
1
+ module VidSkim
2
+ # Various string utilities.
3
+ class Inflector
4
+ # From rails
5
+ # Return the camelized form of the word. Useful for loading parsers.
6
+ def self.camelize(word)
7
+ word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ # Remove non printing characters and replace them with the seperator +sep+
11
+ def self.parameterize(string, sep = '-')
12
+ string.gsub(/[^a-z0-9\-_\+]+/i, sep).downcase
13
+ end
14
+ end
15
+
16
+ end
@@ -0,0 +1,21 @@
1
+ module VidSkim
2
+ # Each parser you define in your parser directory needs to set the
3
+ # +@transcript+ attribute with a VidSkim::Transcript instance. You can see a
4
+ # fully fleshed out example in parsers/edl_parser.rb.
5
+ #
6
+ # A key point to remember: every unset attribute of the @transcript will be
7
+ # replaced with a sane default when the Compiler expands @transcript.
8
+ class Parser
9
+ attr_accessor :transcript
10
+ # The load method takes a name of a file and must return a
11
+ # VidSkim::Transcript object
12
+ def load(file)
13
+ raise NotImplementedError, "Parsers must define a load method that takes the name of the file to read from."
14
+ end
15
+
16
+ def parse
17
+ Compiler.new.explode(@transcript)
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,195 @@
1
+ module VidSkim
2
+
3
+ # Transcript is a json parser/updater which parses a Video Skimmer formatted
4
+ # json file.
5
+ class Transcript
6
+ attr_accessor :divisions, :title, :youtube_id, :duration, :default
7
+
8
+ def initialize(hash)
9
+ @divisions = {}
10
+ @youtube_id = hash["youtube_id"]
11
+ @title = hash["title"]
12
+ @default = hash["default"]
13
+ @duration = hash["duration"].to_i
14
+ send("divisions=", hash["divisions"]) if hash["divisions"]
15
+ end
16
+
17
+ # Set each division from a hash of divisions
18
+ def divisions=(hash)
19
+ hash.each_pair do |key, value|
20
+ @divisions["#{key}"] = Transcript::Division.new(key)
21
+ value.each_pair do |method, value|
22
+ @divisions["#{key}"].send("#{method}=", value)
23
+ end
24
+ end
25
+
26
+ end
27
+
28
+ # Return the json representation
29
+ def to_json
30
+ to_hash.to_json
31
+ end
32
+
33
+ # Return the hash representation
34
+ def to_hash
35
+ c = {}
36
+ c["youtube_id"] = @youtube_id
37
+ c["title"] = @title
38
+ c["default"] = @default
39
+ c["duration"] = @duration
40
+ c["divisions"] = {}
41
+ @divisions.each_pair{|d,v| c["divisions"].merge!(v.collect)}
42
+ c
43
+ end
44
+
45
+
46
+ # Return a parameterized version of the title for creating the actual
47
+ # html file
48
+ def slug
49
+ Inflector.parameterize(@title, "_")
50
+ end
51
+
52
+
53
+ class << self
54
+ # Search for a Transcript based on a path
55
+ def find(f)
56
+ hash = JSON.parse(File.open("#{f}").read)
57
+ Transcript.new(hash)
58
+ end
59
+ end
60
+
61
+
62
+ # The building blocks of transcripts: each Transcript::Division is a
63
+ # different view to the video
64
+ class Division
65
+
66
+ attr_accessor :name, :color, :hover
67
+ def initialize(name)
68
+ @name = name || ""
69
+ @entries = []
70
+ end
71
+
72
+ # Set each individual Entry from a straight hash of +entries+, which
73
+ # are synced to the video
74
+ def entries=(entries)
75
+ @entries=[]
76
+ entries.each do |e|
77
+ entry = Transcript::Entry.new()
78
+ e.each_pair {|key, value| entry.send("#{key}=", value)}
79
+ @entries << entry
80
+ end
81
+ end
82
+
83
+ # Return an array of entries ensuring that their sorted by the low end
84
+ # of each Range
85
+ def entries
86
+ @entries.sort!{|a, b| a.range.low <=> b.range.low }
87
+ end
88
+
89
+ # Collect each Entry and returns a hash
90
+ def collect
91
+ c = {
92
+ @name => {
93
+ "color"=> @color,
94
+ "hover"=> @hover,
95
+ "entries"=> []
96
+ }
97
+ }
98
+ entries.each{ |e| c[@name]['entries'] << e.collect }
99
+ c
100
+ end
101
+
102
+
103
+
104
+
105
+ # Build a dynamic finder (<tt>unique_entries_by_attribute</tt> where
106
+ # attribute is an Entry attribute) so that filters returns unique
107
+ # entries you can do things like:
108
+ # >> entries = [{'title'=>'Hamm', 'range'=>["00:00:00", "00:00:00"]},
109
+ # {'title'=> 'Clove', 'range'=>["00:00:00", "00:00:00"]},
110
+ # {'title'=>'Hamm', 'range'=>["00:00:00", "00:00:00"]}]
111
+ # >> d = Transcript::Division.new('Endgame')
112
+ # >> d.entries = entries
113
+ # >> uniq = d.unique_entries_by_title
114
+ # >> uniq.each {|u| p u.title }
115
+ # "Hamm"
116
+ # "Clove"
117
+ def method_missing(method_id, *args)
118
+ if method_id.to_s =~ /unique_entries_by_([_a-zA-Z]\w*)$/
119
+ unique_entries_by_($1.to_sym) #just having a bit of fun
120
+ else
121
+ super
122
+ end
123
+ end
124
+
125
+ private
126
+ # Use a set to build the unique entries returned by method missing
127
+ def unique_entries_by_(key)
128
+ seen = Set.new()
129
+ entries.select { |e|
130
+ k = e.send(key)
131
+ seen.add?(k)
132
+ }.sort{|a, b| a.range.low <=> b.range.low }
133
+ end
134
+
135
+ end
136
+ # An Transcript::Entry is an individual section of video
137
+ class Entry
138
+ attr_accessor :title, :range, :transcript
139
+
140
+ def initialize()
141
+ end
142
+
143
+ # Set a Transcript::Entry::Range object based on a +range+ of the format
144
+ # ['hh:mm:ss', 'hh:mm:ss'].
145
+ def range=(range)
146
+ @range = Range.new(range)
147
+ end
148
+
149
+ # Return the original hash representation of this object
150
+ def collect
151
+ {
152
+ "title"=> @title,
153
+ "range"=> @range.collect,
154
+ "transcript"=> @transcript,
155
+ }
156
+ end
157
+
158
+ # A Transcript::Entry::Range parses a timecode.
159
+ class Range
160
+
161
+ # +range+ should be of the format ['hh:mm:ss', 'hh:mm:ss']
162
+ def initialize(range)
163
+ @range_low = range.first
164
+ @range_high = range.last
165
+ end
166
+ # Return the low end of the Transcript::Entry::Range
167
+ def low
168
+ @range_low
169
+ end
170
+
171
+ # Return the high end of the Transcript::Entry::Range
172
+ def high
173
+ @range_high
174
+ end
175
+
176
+ # Convert a Transcript::Entry::Range into seconds, the
177
+ # argument can either be :low or :high
178
+ def to_seconds(sym)
179
+ seconds = 0
180
+ self.send(sym).split(':').reverse.each_with_index do |i, x|
181
+ seconds += (x == 0 ? 1 : 60 ** x) * i.to_i
182
+ end
183
+ seconds
184
+ end
185
+
186
+ # Return the original array representation of this object
187
+ def collect
188
+ [@range_low, @range_high]
189
+ end
190
+ end
191
+
192
+ end
193
+
194
+ end
195
+ end