vid-skim 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,51 @@
1
+ =
2
+
3
+ --- ---
4
+ +-------------+
5
+ | +----+ ---- | ---
6
+ | | :) | ---- | ---
7
+ | +----+ ---- | ---
8
+ +-------------+
9
+
10
+
11
+
12
+ ~ Video Skimmer
13
+
14
+
15
+ # Transcripts and commentary for long boring videos on YouTube! #
16
+
17
+ * Present your videos with transcripts and running commentary.
18
+ * Let your users skip to the good parts.
19
+
20
+ Designed for:
21
+
22
+ * News organizations
23
+ * Producers comfortable with the command line
24
+ * Raw video from court transcripts, Political Speeches, Uncut Interviews. In
25
+ short: lengthy video.
26
+
27
+ ~ Documentation
28
+
29
+ #Wiki: https://github.com/propublica/vid-skim/wikis
30
+ #RDoc: http://rdoc.info/projects/propublica/vid-skim
31
+
32
+ ~ Getting Started
33
+
34
+ Install the gem
35
+
36
+ >> sudo install vid-skim
37
+
38
+ Install the directory structure.
39
+
40
+ >> vidskim install video-skimmer
41
+
42
+ Under ./video-skimmer/ you'll see an html and and videos directory.
43
+ Put your vidskim json or expanded files in ./video-skimmer/videos/ (see the
44
+ wiki for formatting info).
45
+
46
+ Once your json is complete run:
47
+
48
+ >> vidskim build video-skimmer
49
+
50
+ And you'll see some html files that look something like this:
51
+ http://projects.propublica.org/skimmer/ron_boline
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.dirname(__FILE__ ) + "/../lib/vid_skim"
4
+
5
+ VidSkim::Command.new
@@ -0,0 +1,57 @@
1
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
2
+
3
+ require 'rubygems'
4
+ gem 'nokogiri'
5
+ gem 'json'
6
+
7
+ autoload :JSON, 'json'
8
+ autoload :ERB, 'erb'
9
+ autoload :FileUtils, 'fileutils'
10
+ autoload :Set, 'set'
11
+ autoload :OptionParser, 'optparse'
12
+
13
+
14
+ module VidSkim
15
+ autoload :Command, 'vid_skim/command'
16
+ autoload :Transcript, "vid_skim/transcript"
17
+ autoload :Inflector, "vid_skim/inflector"
18
+ autoload :Compiler, "vid_skim/compiler"
19
+ autoload :Parser, "vid_skim/parser"
20
+ autoload :Files, "vid_skim/files"
21
+
22
+ ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
23
+
24
+
25
+ class << self
26
+ attr_reader :working_path, :build_path, :output_path, :parser_path
27
+
28
+
29
+ # Set the paths for each of the directories VidSkim works with.
30
+ def configure(working_path)
31
+ @working_path = working_path
32
+ @build_path = working_path + '/videos/'
33
+ @output_path = working_path + '/html/'
34
+ @parser_path = working_path + '/parsers/'
35
+ end
36
+
37
+ # Borrowed from Jeremy Ashkenas's wonderful cloud-crowd gem.
38
+ # Build a list of parsers from both VidSkim's defaults and
39
+ # those installed in the working directory.
40
+ def parsers
41
+ return @parsers if @parsers
42
+ @parsers = {}
43
+ installed = Dir["#{@parser_path}*.rb"]
44
+ default = Dir["#{ROOT}/parsers/*.rb"]
45
+
46
+ (installed + default).each do |path|
47
+ name = File.basename(path, File.extname(path))
48
+ require path
49
+ @parsers[name] = Module.const_get(Inflector.camelize(name))
50
+ end
51
+ @parsers
52
+ rescue NameError => e
53
+ adjusted_message = "One of your parsers failed to load. Please ensure that the name of your parser class can be deduced from the name of the file. ex: 'json_parser.rb' => 'JsonParser'\n#{e.message}"
54
+ raise NameError.new(adjusted_message, e.name)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,119 @@
1
+ module VidSkim
2
+
3
+ # Command-line `vidskim` client. Handles commands for initial installation
4
+ # and building out the exported HTML files.
5
+ class Command
6
+
7
+ # Command-line banner for the usage message.
8
+ BANNER = <<-EOS
9
+ Usage: vidskim COMMAND path/to/directory OPTIONS
10
+
11
+ Commands:
12
+ install Install the VidSkim configuration to the specified directory
13
+ build Build all videos in a VidSkim directory into HTML pages
14
+ parse Parse a file using a parser into the VidSkim directory
15
+ compile Compiles and builds each json file from an expanded format
16
+
17
+ parse path/to/directory -f <input_file> -p <parser_name>
18
+ Parse an <input_file> using the parser in <parser_name>
19
+
20
+ Example: vid-skim parse ./vids -f edit.edl -p edl_parser will parse an
21
+ EDL file using the edl_parser
22
+
23
+ Options:
24
+ EOS
25
+
26
+ # Creating a VidSkim::Command parses all command-line arguments and
27
+ # options.
28
+ def initialize
29
+ @options = {}
30
+ parse_options
31
+ @command = ARGV.shift
32
+ @directory = ARGV.shift || '.'
33
+ configure
34
+ case @command
35
+ when 'install' then run_install
36
+ when 'build' then run_build
37
+ when 'parse' then run_parse
38
+ when 'compile' then run_compile
39
+ else usage
40
+ end
41
+ end
42
+
43
+
44
+ # Parse the options from the command line
45
+ def parse_options
46
+ @option_parser = OptionParser.new do |opts|
47
+ opts.on('-p', '--parser NAME', 'Name of parser') do |parser_name|
48
+ @options[:parser_name] = parser_name
49
+ end
50
+
51
+ opts.on('-f', '--file FILE', 'Input file to parse') do |parser_file|
52
+ @options[:parser_file] = parser_file
53
+ end
54
+
55
+ opts.on('--force', 'Force overwriting of files') do
56
+ Files.force = true
57
+ end
58
+
59
+ opts.on_tail('-v', '--version', 'Show version') do
60
+ puts "VidSkim version #{VERSION}"
61
+ exit
62
+ end
63
+ end
64
+ @option_parser.banner = BANNER
65
+ @option_parser.parse!(ARGV)
66
+ end
67
+
68
+ # Install the example VidSkim folder to a location of your choosing.
69
+ def run_install
70
+ FileUtils.mkdir_p(VidSkim.working_path) unless File.exists?(VidSkim.working_path)
71
+ Files.install_dir "#{VidSkim::ROOT}/template/html", "#{VidSkim.output_path}"
72
+ Files.install_dir "#{VidSkim::ROOT}/template/videos", "#{VidSkim.build_path}"
73
+ Files.install_dir "#{VidSkim::ROOT}/template/parsers", "#{VidSkim.parser_path}"
74
+ end
75
+
76
+ # Build the html files from the json in the videos directory.
77
+ def run_build
78
+ Files.walk_build_path(".json").each do |f|
79
+ template = ERB.new(File.open(VidSkim::ROOT +
80
+ '/views/template.html.erb', 'r').read)
81
+ @transcript = Transcript.find(f)
82
+ str = template.result(binding)
83
+ Files.create_file(VidSkim.output_path + "#{@transcript.slug}.html", str)
84
+ end
85
+ end
86
+
87
+ # Run a parser to build the files in the videos directory. Allow an
88
+ # escape hatch if the directory exists
89
+ def run_parse
90
+ raise Error.new("To run a parser you must use both the -p and -f flags.") if
91
+ !@options[:parser_name] && !@options[:parser_file]
92
+ parser = VidSkim.parsers[@options[:parser_name]].new
93
+ parser.load(@options[:parser_file])
94
+ parser.parse
95
+ end
96
+
97
+ # Run the compiler to compile and build the files in each directory
98
+ # created by a parser or by hand.
99
+ def run_compile
100
+ compiler = VidSkim::Compiler.new
101
+ compiler.compile
102
+ end
103
+
104
+ # Print out `vidskim` usage.
105
+ def usage
106
+ puts "\n#{@option_parser}\n"
107
+ end
108
+
109
+
110
+ private
111
+
112
+ # Make sure that everyone knows where to put any files they generate
113
+ def configure
114
+ VidSkim.configure(@directory)
115
+ end
116
+
117
+
118
+ end
119
+ end
@@ -0,0 +1,106 @@
1
+ module VidSkim
2
+ # The compiler handles both the compiling to json of an expanded directory,
3
+ # and the creation of expanded directories.
4
+ class Compiler
5
+
6
+ # Set up the erb templates for .entry, .div and .trans files.
7
+ def initialize
8
+ @transcript_t = ERB.new <<-EOS
9
+ <%= skim.title || "TITLE OF VIDEO" %>
10
+ <%= skim.youtube_id || "YOUTUBE_ID" %>
11
+ <%= skim.duration || "DURATION IN SECONDS" %>
12
+ <%= skim.default || "DEFAULT TAB" %>
13
+ EOS
14
+ @division_t = ERB.new <<-EOS
15
+ <%= division.name || "DIVISION ID" %>
16
+ <%= division.color || "COLOR IN #XXXXXX FORMAT" %>
17
+ <%= division.hover || "HOVER COLOR IN #XXXXXX FORMAT" %>
18
+ EOS
19
+ @entry_t = ERB.new <<-EOS
20
+ <%= division.name %>
21
+ <%= entry.title || "TITLE HERE" %>
22
+ <%= entry.range.collect.to_json || "['00:00:00', '00:00:00']" %>
23
+ <%= entry.transcript || "<p>HTML HERE (CAN BE MULTIPLE LINES)</p>" %>
24
+ EOS
25
+ end
26
+
27
+ # Create an expanded directory from a VidSkim::Transcript
28
+ def explode(skim)
29
+
30
+ file_tree = {}
31
+ working_dir = Inflector.parameterize(skim.title)
32
+ file_tree[working_dir] =
33
+ [["/#{working_dir}.trans", @transcript_t.result(binding)]]
34
+ skim.divisions.each do |title, division|
35
+ file_tree[working_dir] << [
36
+ "/#{division.name}.div",
37
+ @division_t.result(binding)
38
+ ]
39
+
40
+ division.entries.each_with_index do |entry, i|
41
+ file_tree[working_dir] << [
42
+ "/#{division.name}-#{i}.entry",
43
+ @entry_t.result(binding)
44
+ ]
45
+
46
+ end
47
+ end
48
+ Files.create_tree(file_tree)
49
+
50
+ end
51
+
52
+ # Create a VidSkim::Transcript and compile it to json from an expanded
53
+ # directory
54
+ def compile
55
+ Dir[VidSkim.build_path + "**"].each do |dir|
56
+ next unless File.directory?(dir)
57
+ @skim = VidSkim::Transcript.new({})
58
+ Dir["#{dir}/*.{trans,div,entry}"].each do |path|
59
+ path =~ /.*\.(trans|div|entry)/
60
+ send("compile_#{$1}", File.open(path).read.split("\n"))
61
+ end
62
+ Files.create_file(VidSkim.build_path + Inflector.parameterize(@skim.title) + ".json", @skim.to_json)
63
+ end
64
+
65
+
66
+ rescue NameError => boom
67
+ message = "One of your build files failed, are you sure everything's in the right order and the right format?\n\nThis might help:\n#{boom.message}"
68
+ raise NameError.new(message, boom.name)
69
+ end
70
+
71
+ private
72
+
73
+ # Compile a trans file
74
+ def compile_trans(arr)
75
+ assign(@skim, [:title=, :youtube_id=, :duration=, :default=], arr)
76
+ @skim.duration = @skim.duration.to_i || 0
77
+ end
78
+
79
+ # Compile a division file
80
+ def compile_div(arr)
81
+ @skim.divisions[arr[0]] = Transcript::Division.new("")
82
+ name = arr.shift
83
+ @skim.divisions[name].name = name
84
+ assign(@skim.divisions[name], [:color=, :hover=], arr)
85
+ end
86
+
87
+ # Compile an entry file
88
+ def compile_entry(arr)
89
+ entry = Transcript::Entry.new()
90
+ division_name = arr.shift
91
+ assign(entry, [:title=], arr)
92
+ entry.range = JSON.parse(arr.shift)
93
+ entry.transcript = arr.join
94
+ @skim.divisions[division_name].entries << entry
95
+ end
96
+
97
+ # Assign each attribute to the right place in +@skim+
98
+ def assign(obj, dest, values)
99
+ dest.each do |attribute|
100
+ obj.send(attribute, values.shift)
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ end
@@ -0,0 +1,61 @@
1
+ module VidSkim
2
+
3
+ class Files
4
+
5
+ class << self
6
+ attr_accessor :force
7
+ end
8
+
9
+ # To be refactored soon. Takes an hash of arrays
10
+ # => puts tree
11
+ # >> {"path" => ["filename", contents], ... }
12
+ # which allows us to to write out the file and underlying directories.
13
+ # Allows for escape oppurtunities if we're about to overwrite something.
14
+ def self.create_tree(tree)
15
+ tree.each_pair do |dir, files|
16
+ dir = "/videos/" + dir
17
+ path = File.join(VidSkim.working_path, dir)
18
+ FileUtils.mkdir_p path unless File.exists? path
19
+ files.each do |filename, contents|
20
+ if filename.respond_to? :each_pair
21
+ create_tree filename, path
22
+ else
23
+ self.create_file(path + filename, contents)
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ # Walk the build path and return files with a given extension.
30
+ def self.walk_build_path(ext)
31
+ Dir[VidSkim.build_path + "**/*#{ext}"]
32
+ end
33
+
34
+ # Check if a file exists and asks the user if they want to overwrite it
35
+ # returns false if they say no.
36
+ def self.check_file(dest)
37
+ if File.exists?(dest) && !@force && ENV["VID_SKIM_ENV"] != 'test'
38
+ print "#{dest} already exists. Overwrite it? (yes/no) "
39
+ return false unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
40
+ end
41
+ true
42
+ end
43
+
44
+ # Install a file and log the installation. Allow opportunities to back out
45
+ # of overwriting existing files.
46
+ def self.install_dir(source, dest)
47
+ return unless check_file(dest)
48
+ FileUtils.cp_r(source, dest)
49
+ puts "installed #{dest}" unless ENV["VID_SKIM_ENV"] == 'test'
50
+ end
51
+
52
+ # Create a file and underlying directories if needed and log the creation.
53
+ def self.create_file(dest, str)
54
+ return unless check_file(dest)
55
+ File.new(dest, "w").write(str)
56
+ puts "created #{dest}" unless ENV["VID_SKIM_ENV"] == 'test'
57
+ end
58
+
59
+ end
60
+
61
+ end
@@ -0,0 +1,16 @@
1
+ module VidSkim
2
+ # Various string utilities.
3
+ class Inflector
4
+ # From rails
5
+ # Return the camelized form of the word. Useful for loading parsers.
6
+ def self.camelize(word)
7
+ word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ # Remove non printing characters and replace them with the seperator +sep+
11
+ def self.parameterize(string, sep = '-')
12
+ string.gsub(/[^a-z0-9\-_\+]+/i, sep).downcase
13
+ end
14
+ end
15
+
16
+ end
@@ -0,0 +1,21 @@
1
+ module VidSkim
2
+ # Each parser you define in your parser directory needs to set the
3
+ # +@transcript+ attribute with a VidSkim::Transcript instance. You can see a
4
+ # fully fleshed out example in parsers/edl_parser.rb.
5
+ #
6
+ # A key point to remember: every unset attribute of the @transcript will be
7
+ # replaced with a sane default when the Compiler expands @transcript.
8
+ class Parser
9
+ attr_accessor :transcript
10
+ # The load method takes a name of a file and must return a
11
+ # VidSkim::Transcript object
12
+ def load(file)
13
+ raise NotImplementedError, "Parsers must define a load method that takes the name of the file to read from."
14
+ end
15
+
16
+ def parse
17
+ Compiler.new.explode(@transcript)
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,195 @@
1
+ module VidSkim
2
+
3
+ # Transcript is a json parser/updater which parses a Video Skimmer formatted
4
+ # json file.
5
+ class Transcript
6
+ attr_accessor :divisions, :title, :youtube_id, :duration, :default
7
+
8
+ def initialize(hash)
9
+ @divisions = {}
10
+ @youtube_id = hash["youtube_id"]
11
+ @title = hash["title"]
12
+ @default = hash["default"]
13
+ @duration = hash["duration"].to_i
14
+ send("divisions=", hash["divisions"]) if hash["divisions"]
15
+ end
16
+
17
+ # Set each division from a hash of divisions
18
+ def divisions=(hash)
19
+ hash.each_pair do |key, value|
20
+ @divisions["#{key}"] = Transcript::Division.new(key)
21
+ value.each_pair do |method, value|
22
+ @divisions["#{key}"].send("#{method}=", value)
23
+ end
24
+ end
25
+
26
+ end
27
+
28
+ # Return the json representation
29
+ def to_json
30
+ to_hash.to_json
31
+ end
32
+
33
+ # Return the hash representation
34
+ def to_hash
35
+ c = {}
36
+ c["youtube_id"] = @youtube_id
37
+ c["title"] = @title
38
+ c["default"] = @default
39
+ c["duration"] = @duration
40
+ c["divisions"] = {}
41
+ @divisions.each_pair{|d,v| c["divisions"].merge!(v.collect)}
42
+ c
43
+ end
44
+
45
+
46
+ # Return a parameterized version of the title for creating the actual
47
+ # html file
48
+ def slug
49
+ Inflector.parameterize(@title, "_")
50
+ end
51
+
52
+
53
+ class << self
54
+ # Search for a Transcript based on a path
55
+ def find(f)
56
+ hash = JSON.parse(File.open("#{f}").read)
57
+ Transcript.new(hash)
58
+ end
59
+ end
60
+
61
+
62
+ # The building blocks of transcripts: each Transcript::Division is a
63
+ # different view to the video
64
+ class Division
65
+
66
+ attr_accessor :name, :color, :hover
67
+ def initialize(name)
68
+ @name = name || ""
69
+ @entries = []
70
+ end
71
+
72
+ # Set each individual Entry from a straight hash of +entries+, which
73
+ # are synced to the video
74
+ def entries=(entries)
75
+ @entries=[]
76
+ entries.each do |e|
77
+ entry = Transcript::Entry.new()
78
+ e.each_pair {|key, value| entry.send("#{key}=", value)}
79
+ @entries << entry
80
+ end
81
+ end
82
+
83
+ # Return an array of entries ensuring that their sorted by the low end
84
+ # of each Range
85
+ def entries
86
+ @entries.sort!{|a, b| a.range.low <=> b.range.low }
87
+ end
88
+
89
+ # Collect each Entry and returns a hash
90
+ def collect
91
+ c = {
92
+ @name => {
93
+ "color"=> @color,
94
+ "hover"=> @hover,
95
+ "entries"=> []
96
+ }
97
+ }
98
+ entries.each{ |e| c[@name]['entries'] << e.collect }
99
+ c
100
+ end
101
+
102
+
103
+
104
+
105
+ # Build a dynamic finder (<tt>unique_entries_by_attribute</tt> where
106
+ # attribute is an Entry attribute) so that filters returns unique
107
+ # entries you can do things like:
108
+ # >> entries = [{'title'=>'Hamm', 'range'=>["00:00:00", "00:00:00"]},
109
+ # {'title'=> 'Clove', 'range'=>["00:00:00", "00:00:00"]},
110
+ # {'title'=>'Hamm', 'range'=>["00:00:00", "00:00:00"]}]
111
+ # >> d = Transcript::Division.new('Endgame')
112
+ # >> d.entries = entries
113
+ # >> uniq = d.unique_entries_by_title
114
+ # >> uniq.each {|u| p u.title }
115
+ # "Hamm"
116
+ # "Clove"
117
+ def method_missing(method_id, *args)
118
+ if method_id.to_s =~ /unique_entries_by_([_a-zA-Z]\w*)$/
119
+ unique_entries_by_($1.to_sym) #just having a bit of fun
120
+ else
121
+ super
122
+ end
123
+ end
124
+
125
+ private
126
+ # Use a set to build the unique entries returned by method missing
127
+ def unique_entries_by_(key)
128
+ seen = Set.new()
129
+ entries.select { |e|
130
+ k = e.send(key)
131
+ seen.add?(k)
132
+ }.sort{|a, b| a.range.low <=> b.range.low }
133
+ end
134
+
135
+ end
136
+ # An Transcript::Entry is an individual section of video
137
+ class Entry
138
+ attr_accessor :title, :range, :transcript
139
+
140
+ def initialize()
141
+ end
142
+
143
+ # Set a Transcript::Entry::Range object based on a +range+ of the format
144
+ # ['hh:mm:ss', 'hh:mm:ss'].
145
+ def range=(range)
146
+ @range = Range.new(range)
147
+ end
148
+
149
+ # Return the original hash representation of this object
150
+ def collect
151
+ {
152
+ "title"=> @title,
153
+ "range"=> @range.collect,
154
+ "transcript"=> @transcript,
155
+ }
156
+ end
157
+
158
+ # A Transcript::Entry::Range parses a timecode.
159
+ class Range
160
+
161
+ # +range+ should be of the format ['hh:mm:ss', 'hh:mm:ss']
162
+ def initialize(range)
163
+ @range_low = range.first
164
+ @range_high = range.last
165
+ end
166
+ # Return the low end of the Transcript::Entry::Range
167
+ def low
168
+ @range_low
169
+ end
170
+
171
+ # Return the high end of the Transcript::Entry::Range
172
+ def high
173
+ @range_high
174
+ end
175
+
176
+ # Convert a Transcript::Entry::Range into seconds, the
177
+ # argument can either be :low or :high
178
+ def to_seconds(sym)
179
+ seconds = 0
180
+ self.send(sym).split(':').reverse.each_with_index do |i, x|
181
+ seconds += (x == 0 ? 1 : 60 ** x) * i.to_i
182
+ end
183
+ seconds
184
+ end
185
+
186
+ # Return the original array representation of this object
187
+ def collect
188
+ [@range_low, @range_high]
189
+ end
190
+ end
191
+
192
+ end
193
+
194
+ end
195
+ end