typingpool 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
data/bin/tp-make ADDED
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'typingpool'
5
+ require 'fileutils'
6
+ require 'tempfile'
7
+ require 'tmpdir'
8
+ include Typingpool::App::FriendlyExceptions
9
+
10
+ options = {
11
+ :files => [],
12
+ :voices => [],
13
+ :unusual => [],
14
+ :chunk => '1:00',
15
+ }
16
+ OptionParser.new do |opts|
17
+ options[:banner] = "USAGE: #{File.basename($PROGRAM_NAME)} PROJECTNAME FILE [FILE [FILE...]]\n"
18
+ options[:banner] += " [--chunks 1:00] [--subtitle 'Hack Day interview']\n"
19
+ options[:banner] += " [--voice 'John' --voice 'Pat Foo, British female'...]\n"
20
+ options[:banner] += " [--unusual 'Hack Day' --unusual 'Sunnyvale, Chad Dickerson'...]\n"
21
+ options[:banner] += " [--bitrate 256] [--config PATH]\n"
22
+ options[:banner] += " [--title PROJECTNAME] [--file foo.mp3 [--file bar.mp3...]]\n"
23
+ opts.banner = options[:banner]
24
+
25
+ opts.on('--title TITLE',
26
+ 'Required. For file names and transcript.',
27
+ 'Also accepted as first argument') do |title|
28
+ options[:title] = title
29
+ end
30
+
31
+ opts.on('--file FILE',
32
+ 'Required. Audio for transcribing.',
33
+ 'Repeatable (sorting is by name).',
34
+ 'Also accepted as second and later arguments') do |file|
35
+ options[:files].push(file)
36
+ end
37
+
38
+ opts.on('--subtitle SUBTITLE',
39
+ 'For transcript') do |subtitle|
40
+ options[:subtitle] = subtitle
41
+ end
42
+
43
+ opts.on('--chunks MM:SS',
44
+ 'Default: 1:00. Audio divided thusly for',
45
+ 'transcribing. Try also HH:MM:SS and SSS') do |chunk|
46
+ options[:chunk] = chunk
47
+ end
48
+
49
+ opts.on('--voice "NAME[, DESCR]"',
50
+ 'Name, optional description of recorded',
51
+ 'person, to aid transcriber. Repeatable') do |voice|
52
+ options[:voices].push(voice)
53
+ end
54
+
55
+ opts.on('--unusual WORD[,WORD,]',
56
+ 'Unusual word within recording, to aid',
57
+ 'transcriber. Commas for multiple.',
58
+ 'Repeatable') do |word|
59
+ options[:unusual].push(word)
60
+ end
61
+
62
+ opts.on('--config PATH',
63
+ 'Default: ~/.typingpool. A config file') do |path|
64
+ options[:config] = path
65
+ end
66
+
67
+ opts.on('--bitrate KBPS',
68
+ 'Default: Mirror input. Output bitrate in',
69
+ 'kb/s. Only applies if/when converting to',
70
+ 'MP3') do |kbps|
71
+ options[:bitrate] = kbps
72
+ end
73
+
74
+ opts.on('--help',
75
+ 'Display this screen.') do
76
+ puts opts
77
+ exit
78
+ end
79
+ end.parse!
80
+
81
+ Typingpool::App.if_missing_dependencies do |missing|
82
+ missing.map!{|cmd| "`#{cmd}`" }
83
+ them = missing.count > 1 ? 'them' : 'it'
84
+ abort "It looks like you're missing #{Typingpool::Utility.join_in_english(missing)}. You'll need to install #{them} before Typingpool can run."
85
+ end
86
+
87
+ options[:title] ||= ARGV.shift if ARGV.count > 0
88
+ options[:files].push(ARGV).flatten! if ARGV.count > 0
89
+ options[:banner] += "\n#{Typingpool::App::CLI.help_arg_explanation}\n"
90
+ abort "No files specified\n\n#{options[:banner]}" if options[:files].empty?
91
+ abort "No title specified\n\n#{options[:banner]}" if options[:title].to_s.empty?
92
+ options[:files].sort!
93
+ options[:files].each do |file|
94
+ File.extname(file) or abort "You need a file extension on the file '#{file}'"
95
+ File.exists?(file) or abort "There is no file '#{file}'"
96
+ File.file?(file) or abort "Not a file: '#{file}'"
97
+ end
98
+ options[:unusual].map!{|unusual| unusual.split(/\s*,\s*/)}.flatten!
99
+ options[:voices].map! do |voice|
100
+ name, description = voice.split(/\s*,\s*/)
101
+ {
102
+ :name => name,
103
+ :description => (description || '')
104
+ }
105
+ end
106
+
107
+ config = Typingpool::App::CLI.config_from_arg(options[:config]) or abort "No config file at '#{options[:config]}'"
108
+
109
+ project = with_friendly_exceptions('project title', options[:title]) do
110
+ Typingpool::Project.new(options[:title], config)
111
+ end
112
+
113
+ with_friendly_exceptions('--chunk argument', options[:chunk]) do
114
+ project.interval = options[:chunk] if options[:chunk]
115
+ end
116
+
117
+ with_friendly_exceptions('--bitrate argument', options[:bitrate]) do
118
+ project.bitrate = options[:bitrate] if options[:bitrate]
119
+ end
120
+
121
+ if project.local
122
+ if (File.exists?(project.local.file('data', 'assignment.csv')) &&
123
+ project.local.file('data', 'assignment.csv').as(:csv).read.select{|assignment| assignment['audio_uploaded'] == 'maybe' }.count > 0)
124
+ #project where the upload died partway through
125
+ STDERR.puts "Fixing incomplete project"
126
+ STDERR.puts "Determining which mp3s need uploading"
127
+ else
128
+ abort "The title '#{options[:title]}' is taken"
129
+ end #if(File.exists(project.local.file('data', 'assignment.csv') &&...
130
+ else
131
+ project.create_local
132
+ project.local.subtitle = options[:subtitle] if options[:subtitle]
133
+ options[:files].each{|path| FileUtils.cp(path, project.local.subdir('audio', 'originals')) }
134
+
135
+ temp_conversion_dir = Typingpool::Filer::Dir.new(Dir.mktmpdir)
136
+ converted_files = project.local.subdir('audio', 'originals').as(:audio).to_mp3(temp_conversion_dir, project.bitrate) do |file|
137
+ STDERR.puts "Converting #{File.basename(file) } to mp3"
138
+ end
139
+
140
+ STDERR.puts "Merging audio" if converted_files.count > 1
141
+ temp_merge_file = Tempfile.new(["#{project.name}.all.", ".mp3"])
142
+ temp_merge_filer = Typingpool::Filer::Audio.new(temp_merge_file.path)
143
+ converted_files.merge(temp_merge_filer)
144
+
145
+ STDERR.puts "Splitting audio into uniform bits"
146
+ chunks = temp_merge_filer.split(project.interval_as_min_dot_sec, project.name, project.local.subdir('audio','chunks'))
147
+ urls = project.create_remote_names(chunks).map{|file| project.remote.file_to_url(file) }
148
+
149
+ #Write mp3 URLs before uploading, so we can rollback if uploading
150
+ #dies halfway through
151
+ assignment_path = project.create_assignment_csv(:path => ['data', 'assignment.csv'], :urls => urls, :unusual => options[:unusual], :voices => options[:voices])
152
+
153
+ temp_merge_file.close
154
+ temp_merge_file.unlink
155
+ FileUtils.remove_entry_secure(temp_conversion_dir)
156
+ end #if project.local
157
+
158
+ Typingpool::App.upload_audio_for_project(project) do |file, as|
159
+ STDERR.puts "Uploading #{File.basename(file)} to #{project.remote.host}/#{project.remote.path} as #{as}"
160
+ end
161
+
162
+ if STDOUT.tty? && Typingpool::Utility.os_x?
163
+ STDERR.puts "Opening project folder #{project.local.path}"
164
+ project.local.finder_open
165
+ end
166
+
167
+ STDERR.puts "Done. Project at:"
168
+ puts project.local.path
169
+
data/bin/tp-review ADDED
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'typingpool'
4
+ require 'highline/import'
5
+ require 'optparse'
6
+ include Typingpool::App::CLI::Formatter
7
+
8
+ options = {}
9
+ OptionParser.new do |commands|
10
+ options[:banner] = "USAGE: #{File.basename($PROGRAM_NAME) } [PROJECT]\n"
11
+ options[:banner] += " [--config PATH] [--sandbox]\n"
12
+ commands.banner = options[:banner]
13
+
14
+ commands.on('--project=PROJECT',
15
+ 'Path or name within dir $config_file:transcripts.',
16
+ 'Also accepted as first argument to script.',
17
+ 'If not specified, reviews pending results',
18
+ 'for ALL projects') do |project|
19
+ options[:project] = project
20
+ end
21
+ commands.on('--config=PATH',
22
+ 'Default: ~/.typingpool') do |path|
23
+ options[:config] = path
24
+ end
25
+ commands.on('--sandbox',
26
+ 'Test in Mechanical Turk\'s sandbox') do |sandbox|
27
+ options[:sandbox] = true
28
+ end
29
+ commands.on('--fixture=PATH',
30
+ "Optional. For testing purposes only.",
31
+ "A VCR fixture for running with mock data") do |fixture|
32
+ options[:fixture] = fixture
33
+ end
34
+ commands.on('--help',
35
+ "Display this screen") do
36
+ STDERR.puts commands
37
+ exit
38
+ end
39
+ end.parse!
40
+
41
+ config = Typingpool::App::CLI.config_from_arg(options[:config]) or abort "No config file at '#{options[:config]}'"
42
+ options[:banner] += "\n#{Typingpool::App::CLI.help_arg_explanation}\n"
43
+
44
+ if ARGV.count > 0
45
+ if options[:project]
46
+ abort "Duplicate project values ('#{ARGV[0]}' and '#{options[:project]}')"
47
+ end
48
+ options[:project] = ARGV.shift
49
+ end
50
+ abort "Unexpected argument(s): #{ARGV.join(';')}" if ARGV.count > 0
51
+
52
+ if options[:project]
53
+ options[:project] = Typingpool::App::CLI.project_from_arg_and_config(options[:project], config)
54
+ end
55
+
56
+ if options[:fixture]
57
+ Typingpool::App.vcr_record(options[:fixture], config)
58
+ end
59
+
60
+ Typingpool::Amazon.setup(:sandbox => options[:sandbox], :config => config)
61
+
62
+ assignments = Typingpool::App.assignments_file_for_sandbox_status(options[:sandbox], options[:project]) if options[:project]
63
+ STDERR.puts "Gathering submissions from Amazon"
64
+ hits = if options[:project] && ((hit_ids = assignments.map{|assignment| assignment['hit_id'] }.select{|hit_id| hit_id }).count > 0)
65
+ Typingpool::Amazon::HIT.with_ids(hit_ids).select{|hit| hit.submitted? }
66
+ else
67
+ hits = Typingpool::Amazon::HIT.all_reviewable{|hit| hit.submitted? && hit.ours? }
68
+ if options[:project]
69
+ #code path for projects assigned through RUI
70
+ hits.select!{|hit| hit.project_id == options[:project].local.id }
71
+ end
72
+ hits
73
+ end
74
+
75
+ STDERR.puts "Matching submissions with local projects"
76
+ choices = %w(approve reject quit skip)
77
+ catch :quitting do
78
+ project_hits = Typingpool::App.find_projects_waiting_for_hits(hits, config)
79
+ all_hits = project_hits.map{|hash| hash[:hits] }.flatten
80
+ i = 0
81
+ project_hits.each do |hash|
82
+ hits = hash[:hits]
83
+ project = hash[:project]
84
+ assignments = Typingpool::App.assignments_file_for_sandbox_status(options[:sandbox], project)
85
+ hits.sort!{|a, b| a.url <=> b.url }
86
+ hits.each do |hit|
87
+ i += 1
88
+ say(cli_bold("\nTranscript for: ") + hit.url)
89
+ project_info = cli_bold("Project: ") + project.name
90
+ if project.local.subtitle
91
+ project_info += ": #{project.local.subtitle}"
92
+ end
93
+ say(project_info)
94
+ say(cli_bold("Submitted at: ") +
95
+ hit.assignment.submitted_at.localtime.to_s +
96
+ cli_bold(" by ") +
97
+ hit.assignment.worker_id)
98
+ say(hit.transcript.body_as_text(4, 68))
99
+ prompt = choices.map do |c|
100
+ cli_reverse('(') +
101
+ cli_reverse(cli_bold(c.slice(0).upcase)) +
102
+ cli_reverse(")#{c.slice(1, c.size)}")
103
+ end
104
+ prompt << cli_reverse('[') + prompt.pop + cli_reverse(']')
105
+ prompt = prompt.join(cli_reverse(', '))
106
+ prompt += cli_reverse('? ')
107
+ prompt += cli_reverse("(#{i}/#{all_hits.count}) ")
108
+
109
+ choice=nil
110
+ until choice
111
+ input = ask(prompt)
112
+ if input.to_s.match(/^\s*$/)
113
+ choice = choices.last
114
+ elsif not(choice = choices.detect{|possible| possible[0] == input.downcase[0] })
115
+ say("Invalid selection '#{input}'.")
116
+ end
117
+ end #until choice
118
+
119
+ case choice
120
+ when 'approve'
121
+ begin
122
+ hit.assignment.at_amazon.approve!
123
+ rescue RTurk::InvalidRequest => exception
124
+ #assignment may have passed its approval deadline since
125
+ #the session started and has been auto approved. In that
126
+ #case, add to the transcript; no need to tell the user.
127
+ unless exception.message.match(/AWS.MechanicalTurk.InvalidAssignmentState\b.+\bstatus of:\s*Submitted/i)
128
+ #different issue than auto approval; raise exception
129
+ raise exception
130
+ end
131
+ end #begin
132
+ Typingpool::App.record_approved_hits_in_assignments_file(assignments, [hit])
133
+ Typingpool::App.create_transcript(project, assignments)
134
+ say(cli_bold("Approved. ") + "#{project.name} transcript updated.\n")
135
+ when 'reject'
136
+ reason=nil
137
+ until reason.to_s.match(/\S/)
138
+ reason = ask("Rejection reason, for worker: ")
139
+ end
140
+ begin
141
+ hit.assignment.at_amazon.reject!(reason)
142
+ hit.at_amazon.dispose!
143
+ rescue RTurk::InvalidRequest => exception
144
+ #see comment under 'approve' case above
145
+ if exception.message.match(/AWS.MechanicalTurk.InvalidAssignmentState\b.+\bstatus of:\s*Submitted/i)
146
+ say(cli_bold("Not rejected:") + " This assignment passed its approval deadline since this session began and the assignment has been auto-approved. It will be deleted, but you've been charged for this assignment and your rejection message will not be delivered.")
147
+ hit.remove_from_amazon
148
+ else
149
+ raise exception
150
+ end
151
+ else
152
+ say(cli_bold("Rejected\n"))
153
+ end #begin
154
+ Typingpool::App.unrecord_hits_in_assignments_file(assignments, [hit])
155
+ when 'quit'
156
+ say(cli_bold("Quitting"))
157
+ throw :quitting
158
+ when 'skip'
159
+ say(cli_bold("Skipping\n"))
160
+ next
161
+ end #case choice
162
+ end #hits.each_with_index
163
+
164
+ end #find_projects_waiting_for(...) do
165
+ end #catch :quitting do
166
+
167
+ #Put project path on STDOUT so script can be pipelined, e.g.
168
+ #`tp-review Foo | tp-finish`
169
+ if options[:project]
170
+ puts options[:project].local.path
171
+ end
172
+
173
+ if options[:fixture]
174
+ Typingpool::App.vcr_stop
175
+ end