typingpool 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
data/bin/tp-make ADDED
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'typingpool'
5
+ require 'fileutils'
6
+ require 'tempfile'
7
+ require 'tmpdir'
8
+ include Typingpool::App::FriendlyExceptions
9
+
10
+ options = {
11
+ :files => [],
12
+ :voices => [],
13
+ :unusual => [],
14
+ :chunk => '1:00',
15
+ }
16
+ OptionParser.new do |opts|
17
+ options[:banner] = "USAGE: #{File.basename($PROGRAM_NAME)} PROJECTNAME FILE [FILE [FILE...]]\n"
18
+ options[:banner] += " [--chunks 1:00] [--subtitle 'Hack Day interview']\n"
19
+ options[:banner] += " [--voice 'John' --voice 'Pat Foo, British female'...]\n"
20
+ options[:banner] += " [--unusual 'Hack Day' --unusual 'Sunnyvale, Chad Dickerson'...]\n"
21
+ options[:banner] += " [--bitrate 256] [--config PATH]\n"
22
+ options[:banner] += " [--title PROJECTNAME] [--file foo.mp3 [--file bar.mp3...]]\n"
23
+ opts.banner = options[:banner]
24
+
25
+ opts.on('--title TITLE',
26
+ 'Required. For file names and transcript.',
27
+ 'Also accepted as first argument') do |title|
28
+ options[:title] = title
29
+ end
30
+
31
+ opts.on('--file FILE',
32
+ 'Required. Audio for transcribing.',
33
+ 'Repeatable (sorting is by name).',
34
+ 'Also accepted as second and later arguments') do |file|
35
+ options[:files].push(file)
36
+ end
37
+
38
+ opts.on('--subtitle SUBTITLE',
39
+ 'For transcript') do |subtitle|
40
+ options[:subtitle] = subtitle
41
+ end
42
+
43
+ opts.on('--chunks MM:SS',
44
+ 'Default: 1:00. Audio divided thusly for',
45
+ 'transcribing. Try also HH:MM:SS and SSS') do |chunk|
46
+ options[:chunk] = chunk
47
+ end
48
+
49
+ opts.on('--voice "NAME[, DESCR]"',
50
+ 'Name, optional description of recorded',
51
+ 'person, to aid transcriber. Repeatable') do |voice|
52
+ options[:voices].push(voice)
53
+ end
54
+
55
+ opts.on('--unusual WORD[,WORD,]',
56
+ 'Unusual word within recording, to aid',
57
+ 'transcriber. Commas for multiple.',
58
+ 'Repeatable') do |word|
59
+ options[:unusual].push(word)
60
+ end
61
+
62
+ opts.on('--config PATH',
63
+ 'Default: ~/.typingpool. A config file') do |path|
64
+ options[:config] = path
65
+ end
66
+
67
+ opts.on('--bitrate KBPS',
68
+ 'Default: Mirror input. Output bitrate in',
69
+ 'kb/s. Only applies if/when converting to',
70
+ 'MP3') do |kbps|
71
+ options[:bitrate] = kbps
72
+ end
73
+
74
+ opts.on('--help',
75
+ 'Display this screen.') do
76
+ puts opts
77
+ exit
78
+ end
79
+ end.parse!
80
+
81
+ Typingpool::App.if_missing_dependencies do |missing|
82
+ missing.map!{|cmd| "`#{cmd}`" }
83
+ them = missing.count > 1 ? 'them' : 'it'
84
+ abort "It looks like you're missing #{Typingpool::Utility.join_in_english(missing)}. You'll need to install #{them} before Typingpool can run."
85
+ end
86
+
87
+ options[:title] ||= ARGV.shift if ARGV.count > 0
88
+ options[:files].push(ARGV).flatten! if ARGV.count > 0
89
+ options[:banner] += "\n#{Typingpool::App::CLI.help_arg_explanation}\n"
90
+ abort "No files specified\n\n#{options[:banner]}" if options[:files].empty?
91
+ abort "No title specified\n\n#{options[:banner]}" if options[:title].to_s.empty?
92
+ options[:files].sort!
93
+ options[:files].each do |file|
94
+ File.extname(file) or abort "You need a file extension on the file '#{file}'"
95
+ File.exists?(file) or abort "There is no file '#{file}'"
96
+ File.file?(file) or abort "Not a file: '#{file}'"
97
+ end
98
+ options[:unusual].map!{|unusual| unusual.split(/\s*,\s*/)}.flatten!
99
+ options[:voices].map! do |voice|
100
+ name, description = voice.split(/\s*,\s*/)
101
+ {
102
+ :name => name,
103
+ :description => (description || '')
104
+ }
105
+ end
106
+
107
+ config = Typingpool::App::CLI.config_from_arg(options[:config]) or abort "No config file at '#{options[:config]}'"
108
+
109
+ project = with_friendly_exceptions('project title', options[:title]) do
110
+ Typingpool::Project.new(options[:title], config)
111
+ end
112
+
113
+ with_friendly_exceptions('--chunk argument', options[:chunk]) do
114
+ project.interval = options[:chunk] if options[:chunk]
115
+ end
116
+
117
+ with_friendly_exceptions('--bitrate argument', options[:bitrate]) do
118
+ project.bitrate = options[:bitrate] if options[:bitrate]
119
+ end
120
+
121
+ if project.local
122
+ if (File.exists?(project.local.file('data', 'assignment.csv')) &&
123
+ project.local.file('data', 'assignment.csv').as(:csv).read.select{|assignment| assignment['audio_uploaded'] == 'maybe' }.count > 0)
124
+ #project where the upload died partway through
125
+ STDERR.puts "Fixing incomplete project"
126
+ STDERR.puts "Determining which mp3s need uploading"
127
+ else
128
+ abort "The title '#{options[:title]}' is taken"
129
+ end #if(File.exists(project.local.file('data', 'assignment.csv') &&...
130
+ else
131
+ project.create_local
132
+ project.local.subtitle = options[:subtitle] if options[:subtitle]
133
+ options[:files].each{|path| FileUtils.cp(path, project.local.subdir('audio', 'originals')) }
134
+
135
+ temp_conversion_dir = Typingpool::Filer::Dir.new(Dir.mktmpdir)
136
+ converted_files = project.local.subdir('audio', 'originals').as(:audio).to_mp3(temp_conversion_dir, project.bitrate) do |file|
137
+ STDERR.puts "Converting #{File.basename(file) } to mp3"
138
+ end
139
+
140
+ STDERR.puts "Merging audio" if converted_files.count > 1
141
+ temp_merge_file = Tempfile.new(["#{project.name}.all.", ".mp3"])
142
+ temp_merge_filer = Typingpool::Filer::Audio.new(temp_merge_file.path)
143
+ converted_files.merge(temp_merge_filer)
144
+
145
+ STDERR.puts "Splitting audio into uniform bits"
146
+ chunks = temp_merge_filer.split(project.interval_as_min_dot_sec, project.name, project.local.subdir('audio','chunks'))
147
+ urls = project.create_remote_names(chunks).map{|file| project.remote.file_to_url(file) }
148
+
149
+ #Write mp3 URLs before uploading, so we can rollback if uploading
150
+ #dies halfway through
151
+ assignment_path = project.create_assignment_csv(:path => ['data', 'assignment.csv'], :urls => urls, :unusual => options[:unusual], :voices => options[:voices])
152
+
153
+ temp_merge_file.close
154
+ temp_merge_file.unlink
155
+ FileUtils.remove_entry_secure(temp_conversion_dir)
156
+ end #if project.local
157
+
158
+ Typingpool::App.upload_audio_for_project(project) do |file, as|
159
+ STDERR.puts "Uploading #{File.basename(file)} to #{project.remote.host}/#{project.remote.path} as #{as}"
160
+ end
161
+
162
+ if STDOUT.tty? && Typingpool::Utility.os_x?
163
+ STDERR.puts "Opening project folder #{project.local.path}"
164
+ project.local.finder_open
165
+ end
166
+
167
+ STDERR.puts "Done. Project at:"
168
+ puts project.local.path
169
+
data/bin/tp-review ADDED
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'typingpool'
4
+ require 'highline/import'
5
+ require 'optparse'
6
+ include Typingpool::App::CLI::Formatter
7
+
8
+ options = {}
9
+ OptionParser.new do |commands|
10
+ options[:banner] = "USAGE: #{File.basename($PROGRAM_NAME) } [PROJECT]\n"
11
+ options[:banner] += " [--config PATH] [--sandbox]\n"
12
+ commands.banner = options[:banner]
13
+
14
+ commands.on('--project=PROJECT',
15
+ 'Path or name within dir $config_file:transcripts.',
16
+ 'Also accepted as first argument to script.',
17
+ 'If not specified, reviews pending results',
18
+ 'for ALL projects') do |project|
19
+ options[:project] = project
20
+ end
21
+ commands.on('--config=PATH',
22
+ 'Default: ~/.typingpool') do |path|
23
+ options[:config] = path
24
+ end
25
+ commands.on('--sandbox',
26
+ 'Test in Mechanical Turk\'s sandbox') do |sandbox|
27
+ options[:sandbox] = true
28
+ end
29
+ commands.on('--fixture=PATH',
30
+ "Optional. For testing purposes only.",
31
+ "A VCR fixture for running with mock data") do |fixture|
32
+ options[:fixture] = fixture
33
+ end
34
+ commands.on('--help',
35
+ "Display this screen") do
36
+ STDERR.puts commands
37
+ exit
38
+ end
39
+ end.parse!
40
+
41
+ config = Typingpool::App::CLI.config_from_arg(options[:config]) or abort "No config file at '#{options[:config]}'"
42
+ options[:banner] += "\n#{Typingpool::App::CLI.help_arg_explanation}\n"
43
+
44
+ if ARGV.count > 0
45
+ if options[:project]
46
+ abort "Duplicate project values ('#{ARGV[0]}' and '#{options[:project]}')"
47
+ end
48
+ options[:project] = ARGV.shift
49
+ end
50
+ abort "Unexpected argument(s): #{ARGV.join(';')}" if ARGV.count > 0
51
+
52
+ if options[:project]
53
+ options[:project] = Typingpool::App::CLI.project_from_arg_and_config(options[:project], config)
54
+ end
55
+
56
+ if options[:fixture]
57
+ Typingpool::App.vcr_record(options[:fixture], config)
58
+ end
59
+
60
+ Typingpool::Amazon.setup(:sandbox => options[:sandbox], :config => config)
61
+
62
+ assignments = Typingpool::App.assignments_file_for_sandbox_status(options[:sandbox], options[:project]) if options[:project]
63
+ STDERR.puts "Gathering submissions from Amazon"
64
+ hits = if options[:project] && ((hit_ids = assignments.map{|assignment| assignment['hit_id'] }.select{|hit_id| hit_id }).count > 0)
65
+ Typingpool::Amazon::HIT.with_ids(hit_ids).select{|hit| hit.submitted? }
66
+ else
67
+ hits = Typingpool::Amazon::HIT.all_reviewable{|hit| hit.submitted? && hit.ours? }
68
+ if options[:project]
69
+ #code path for projects assigned through RUI
70
+ hits.select!{|hit| hit.project_id == options[:project].local.id }
71
+ end
72
+ hits
73
+ end
74
+
75
+ STDERR.puts "Matching submissions with local projects"
76
+ choices = %w(approve reject quit skip)
77
+ catch :quitting do
78
+ project_hits = Typingpool::App.find_projects_waiting_for_hits(hits, config)
79
+ all_hits = project_hits.map{|hash| hash[:hits] }.flatten
80
+ i = 0
81
+ project_hits.each do |hash|
82
+ hits = hash[:hits]
83
+ project = hash[:project]
84
+ assignments = Typingpool::App.assignments_file_for_sandbox_status(options[:sandbox], project)
85
+ hits.sort!{|a, b| a.url <=> b.url }
86
+ hits.each do |hit|
87
+ i += 1
88
+ say(cli_bold("\nTranscript for: ") + hit.url)
89
+ project_info = cli_bold("Project: ") + project.name
90
+ if project.local.subtitle
91
+ project_info += ": #{project.local.subtitle}"
92
+ end
93
+ say(project_info)
94
+ say(cli_bold("Submitted at: ") +
95
+ hit.assignment.submitted_at.localtime.to_s +
96
+ cli_bold(" by ") +
97
+ hit.assignment.worker_id)
98
+ say(hit.transcript.body_as_text(4, 68))
99
+ prompt = choices.map do |c|
100
+ cli_reverse('(') +
101
+ cli_reverse(cli_bold(c.slice(0).upcase)) +
102
+ cli_reverse(")#{c.slice(1, c.size)}")
103
+ end
104
+ prompt << cli_reverse('[') + prompt.pop + cli_reverse(']')
105
+ prompt = prompt.join(cli_reverse(', '))
106
+ prompt += cli_reverse('? ')
107
+ prompt += cli_reverse("(#{i}/#{all_hits.count}) ")
108
+
109
+ choice=nil
110
+ until choice
111
+ input = ask(prompt)
112
+ if input.to_s.match(/^\s*$/)
113
+ choice = choices.last
114
+ elsif not(choice = choices.detect{|possible| possible[0] == input.downcase[0] })
115
+ say("Invalid selection '#{input}'.")
116
+ end
117
+ end #until choice
118
+
119
+ case choice
120
+ when 'approve'
121
+ begin
122
+ hit.assignment.at_amazon.approve!
123
+ rescue RTurk::InvalidRequest => exception
124
+ #assignment may have passed its approval deadline since
125
+ #the session started and has been auto approved. In that
126
+ #case, add to the transcript; no need to tell the user.
127
+ unless exception.message.match(/AWS.MechanicalTurk.InvalidAssignmentState\b.+\bstatus of:\s*Submitted/i)
128
+ #different issue than auto approval; raise exception
129
+ raise exception
130
+ end
131
+ end #begin
132
+ Typingpool::App.record_approved_hits_in_assignments_file(assignments, [hit])
133
+ Typingpool::App.create_transcript(project, assignments)
134
+ say(cli_bold("Approved. ") + "#{project.name} transcript updated.\n")
135
+ when 'reject'
136
+ reason=nil
137
+ until reason.to_s.match(/\S/)
138
+ reason = ask("Rejection reason, for worker: ")
139
+ end
140
+ begin
141
+ hit.assignment.at_amazon.reject!(reason)
142
+ hit.at_amazon.dispose!
143
+ rescue RTurk::InvalidRequest => exception
144
+ #see comment under 'approve' case above
145
+ if exception.message.match(/AWS.MechanicalTurk.InvalidAssignmentState\b.+\bstatus of:\s*Submitted/i)
146
+ say(cli_bold("Not rejected:") + " This assignment passed its approval deadline since this session began and the assignment has been auto-approved. It will be deleted, but you've been charged for this assignment and your rejection message will not be delivered.")
147
+ hit.remove_from_amazon
148
+ else
149
+ raise exception
150
+ end
151
+ else
152
+ say(cli_bold("Rejected\n"))
153
+ end #begin
154
+ Typingpool::App.unrecord_hits_in_assignments_file(assignments, [hit])
155
+ when 'quit'
156
+ say(cli_bold("Quitting"))
157
+ throw :quitting
158
+ when 'skip'
159
+ say(cli_bold("Skipping\n"))
160
+ next
161
+ end #case choice
162
+ end #hits.each_with_index
163
+
164
+ end #find_projects_waiting_for(...) do
165
+ end #catch :quitting do
166
+
167
+ #Put project path on STDOUT so script can be pipelined, e.g.
168
+ #`tp-review Foo | tp-finish`
169
+ if options[:project]
170
+ puts options[:project].local.path
171
+ end
172
+
173
+ if options[:fixture]
174
+ Typingpool::App.vcr_stop
175
+ end