typingpool 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -0
- data/bin/tp-assign +240 -0
- data/bin/tp-collect +50 -0
- data/bin/tp-config +114 -0
- data/bin/tp-finish +101 -0
- data/bin/tp-make +169 -0
- data/bin/tp-review +175 -0
- data/lib/typingpool/amazon.rb +732 -0
- data/lib/typingpool/app.rb +634 -0
- data/lib/typingpool/config.rb +344 -0
- data/lib/typingpool/error.rb +22 -0
- data/lib/typingpool/filer.rb +396 -0
- data/lib/typingpool/project.rb +593 -0
- data/lib/typingpool/template.rb +175 -0
- data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
- data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
- data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
- data/lib/typingpool/templates/assignment/main.css +20 -0
- data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
- data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
- data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
- data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
- data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
- data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
- data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
- data/lib/typingpool/templates/config.yml +21 -0
- data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
- data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
- data/lib/typingpool/templates/project/data/.empty_directory +0 -0
- data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
- data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
- data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
- data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
- data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
- data/lib/typingpool/templates/project/etc/transcript.css +49 -0
- data/lib/typingpool/templates/transcript.html.erb +23 -0
- data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
- data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
- data/lib/typingpool/test/fixtures/config-1 +20 -0
- data/lib/typingpool/test/fixtures/config-2 +25 -0
- data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
- data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
- data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
- data/lib/typingpool/test/fixtures/template.html.erb +10 -0
- data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
- data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
- data/lib/typingpool/test.rb +418 -0
- data/lib/typingpool/transcript.rb +181 -0
- data/lib/typingpool/utility.rb +272 -0
- data/lib/typingpool.rb +500 -0
- data/test/make_amazon_question_fixture.rb +24 -0
- data/test/make_tp_collect_fixture_1.rb +26 -0
- data/test/make_tp_collect_fixture_2.rb +16 -0
- data/test/make_tp_collect_fixture_3.rb +15 -0
- data/test/make_tp_collect_fixture_4.rb +17 -0
- data/test/make_tp_review_fixture_1.rb +26 -0
- data/test/make_tp_review_fixture_2.rb +30 -0
- data/test/make_transcript_chunks_fixture.rb +53 -0
- data/test/test_integration_script_1_tp_config.rb +108 -0
- data/test/test_integration_script_2_tp_make.rb +119 -0
- data/test/test_integration_script_3_tp_assign.rb +152 -0
- data/test/test_integration_script_4_tp_review.rb +72 -0
- data/test/test_integration_script_5_tp_collect.rb +44 -0
- data/test/test_integration_script_6_tp_finish.rb +123 -0
- data/test/test_unit_amazon.rb +153 -0
- data/test/test_unit_config.rb +94 -0
- data/test/test_unit_filer.rb +202 -0
- data/test/test_unit_project.rb +168 -0
- data/test/test_unit_project_local.rb +68 -0
- data/test/test_unit_project_remote.rb +157 -0
- data/test/test_unit_template.rb +111 -0
- data/test/test_unit_transcript.rb +77 -0
- metadata +234 -0
@@ -0,0 +1,634 @@
|
|
1
|
+
module Typingpool
|
2
|
+
#Module encapsulating high-level Typingpool procedures and called
|
3
|
+
#from the various tp-* scripts. Control layer type code.
|
4
|
+
#
|
5
|
+
#This is the least mature Typingpool class. At present, all methods
|
6
|
+
#are class methods. This will likely change to a model in which
|
7
|
+
#different subclasses of App instances do everything from parsing
|
8
|
+
#and validating command-line input to completing core functionality
|
9
|
+
#to outputing context-dependent result summaries.
|
10
|
+
#
|
11
|
+
#As such, all App methods should be considered fluid and likely to
|
12
|
+
#change in subsequent releases.
|
13
|
+
module App
|
14
|
+
require 'vcr'
|
15
|
+
require 'stringio'
|
16
|
+
require 'open3'
|
17
|
+
class << self
|
18
|
+
|
19
|
+
#Given a Project instance, figures out which audio chunks, if
|
20
|
+
#any, need to be uploaded and uploads them.
|
21
|
+
#
|
22
|
+
#Note that this method is sensitive to the possibility of
|
23
|
+
#interrupted batch uploads. It checks for previously interrupted
|
24
|
+
#uploads at the start to see if it needs to re-try them, and
|
25
|
+
#writes out what uploads it is attempting prior to beginning the
|
26
|
+
#upload in case the upload is interrupted by an exception.
|
27
|
+
#
|
28
|
+
#As such, any script calling this method can usually be simply
|
29
|
+
#re-run to re-attempt the upload.
|
30
|
+
#
|
31
|
+
#Reads and writes from a Filer::CSV instance passed as the
|
32
|
+
#second param, intended to link to a file like
|
33
|
+
#Project#local#file('data', 'assignment.csv')
|
34
|
+
#
|
35
|
+
#Returns an array of urls corresponding to uploaded files. If no
|
36
|
+
#files were uploaded, the array will be empty
|
37
|
+
# ==== Params
|
38
|
+
# [project] A Project instance.
|
39
|
+
# [&block] Optional. A block that will be called at the
|
40
|
+
# beginning of each file upload and passed
|
41
|
+
# the local path to the file and the remote
|
42
|
+
# name of the file.
|
43
|
+
# ==== Returns
|
44
|
+
# An array of URLs of the uploaded audio files.
|
45
|
+
def upload_audio_for_project(project)
|
46
|
+
#we don't make any provision for reading/writing from
|
47
|
+
#sandbox-assignment.csv because audio upload data in such files is
|
48
|
+
#effectively ignored
|
49
|
+
assignments_file = project.local.file('data', 'assignment.csv').as(:csv)
|
50
|
+
check_interrupted_uploads(assignments_file, 'audio')
|
51
|
+
uploading = assignments_file.reject{|assignment| assignment['audio_uploaded'] == 'yes' }
|
52
|
+
return uploading if uploading.empty?
|
53
|
+
files = uploading.map{|assignment| Typingpool::Project.local_basename_from_url(assignment['audio_url']) }
|
54
|
+
files.map!{|basename| project.local.file('audio', 'chunks', basename).as(:audio) }
|
55
|
+
files = Typingpool::Filer::Files.new(files)
|
56
|
+
remote_files = with_abort_on_url_mismatch('audio') do
|
57
|
+
uploading.map{|assignment| project.remote.url_basename(assignment['audio_url']) }
|
58
|
+
end
|
59
|
+
#Record that we're uploading so we'll know later if something
|
60
|
+
#goes wrong
|
61
|
+
record_assignment_upload_status(assignments_file, uploading, ['audio'], 'maybe')
|
62
|
+
project.remote.put(files.to_streams, remote_files) do |file, as|
|
63
|
+
yield(file, as) if block_given?
|
64
|
+
end
|
65
|
+
assignments_files = [assignments_file]
|
66
|
+
record_assignment_upload_status(assignments_file, uploading, ['audio'], 'yes')
|
67
|
+
uploading.map{|assignment| assignment['audio_url'] }
|
68
|
+
end
|
69
|
+
|
70
|
+
#For a subset of a Project instance's chunks/assignments,
|
71
|
+
#uploads assignment html that is used as the external question
|
72
|
+
#for a Mechanical Turk HIT.
|
73
|
+
#
|
74
|
+
#Takes the same precautions around interrupted network uploads
|
75
|
+
#as upload_audio_for_project.
|
76
|
+
#
|
77
|
+
#The URL of each uploaded assignment is written into
|
78
|
+
#Project#local.file('data', 'assignment.csv'), along with
|
79
|
+
#metadata confirming that the upload completed.
|
80
|
+
#
|
81
|
+
# ==== Params
|
82
|
+
# [project] A Project instance.
|
83
|
+
# [assignments_file] A Filer::CSV instance from which
|
84
|
+
# assignments_uploading were drawn. The
|
85
|
+
# upload status will be written and
|
86
|
+
# tracked here.
|
87
|
+
# [assignments_uploading] An enumerable collection of hashes
|
88
|
+
# corresponding to rows in
|
89
|
+
# Project#local.file('data',
|
90
|
+
# 'assignment.csv'). Only assignments
|
91
|
+
# whose URLs are contained in these
|
92
|
+
# hashes will be uploaded.
|
93
|
+
# [template] A Template::Assignment instance. Used to render
|
94
|
+
# assignments_uploading into HTML prior
|
95
|
+
# to uploading.
|
96
|
+
# ==== Returns
|
97
|
+
# An array of URLs of the uploaded assignments
|
98
|
+
def upload_html_for_project_assignments(project, assignments_file, assignments_uploading, template)
|
99
|
+
ios = assignments_uploading.map{|assignment| StringIO.new(template.render(assignment)) }
|
100
|
+
remote_basenames = assignments_uploading.map do |assignment|
|
101
|
+
File.basename(project.class.local_basename_from_url(assignment['audio_url']), '.*') + '.html'
|
102
|
+
end
|
103
|
+
remote_names = project.create_remote_names(remote_basenames)
|
104
|
+
urls = remote_names.map{|name| project.remote.file_to_url(name) }
|
105
|
+
#record upload URLs ahead of time so we can roll back later if the
|
106
|
+
#upload fails halfway through
|
107
|
+
record_assignment_urls(assignments_file, assignments_uploading, 'assignment', urls)
|
108
|
+
record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'maybe')
|
109
|
+
project.remote.put(ios, remote_names)
|
110
|
+
record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'yes')
|
111
|
+
urls
|
112
|
+
end
|
113
|
+
|
114
|
+
#Removes one or more types of remote files -- audio, assignment
|
115
|
+
#html, etc. -- associated with a subset of a Project instance's
|
116
|
+
#chunks/assignments.
|
117
|
+
#
|
118
|
+
#Writes to Project#local.file('data', 'assignment.csv') to
|
119
|
+
#reflect these changes.
|
120
|
+
#
|
121
|
+
#As with upload_audio_for_project, this method is sensitive to
|
122
|
+
#the possibility of interrupted batch operations over the
|
123
|
+
#network. This means
|
124
|
+
# 1. It handles deleting files that *might* have been uploaded,
|
125
|
+
# trapping any exceptions that arise if such files do not exist
|
126
|
+
# on the remote server.
|
127
|
+
# 2. It writes out what deletions it is attempting before
|
128
|
+
# attempting them, so that if the deletion operation is
|
129
|
+
# interrupted by an exception, the files will be clearly marked
|
130
|
+
# in an unknown state.
|
131
|
+
#
|
132
|
+
# ==== Params
|
133
|
+
# [project] A Project instance.
|
134
|
+
# [assignments_file] A Filer::CSV instance from which
|
135
|
+
# assignments_updeleting were
|
136
|
+
# drawn. The upload status will be
|
137
|
+
# written and tracked here.
|
138
|
+
# [assignments_updeleting] An enumerable collection of hashes
|
139
|
+
# corresponding to selected rows in
|
140
|
+
# Project#local#file('data',
|
141
|
+
# 'assignment.csv'). Only assets whose
|
142
|
+
# URLs are contained in these hashes
|
143
|
+
# will be deleted.
|
144
|
+
# [types] Optional. An array of asset
|
145
|
+
# 'types'. The default, ['audio',
|
146
|
+
# 'assignment'], means assets at
|
147
|
+
# assignment['audio_url'] and
|
148
|
+
# assignment['assignment_url'] will be
|
149
|
+
# deleted for each assignment hash in
|
150
|
+
# assignments_updeleting and that
|
151
|
+
# upload status will be tracked in
|
152
|
+
# assignment['audio_uploaded'] and
|
153
|
+
# assignment['assignment_uploaded'].
|
154
|
+
# [&block] Optional. A code block that will be
|
155
|
+
# called with the name of the remote
|
156
|
+
# file just before the delete is
|
157
|
+
# carried out.
|
158
|
+
# ==== Returns
|
159
|
+
# A count of how many items were actually removed from the
|
160
|
+
# server.
|
161
|
+
def updelete_assignment_assets(project, assignments_file, assignments_updeleting=assignments_file, types=['audio', 'assignment'])
|
162
|
+
assignments_updeleting = assignments_updeleting.select do |assignment|
|
163
|
+
types.select do |type|
|
164
|
+
assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
|
165
|
+
end.count > 0
|
166
|
+
end.flatten #assignments_updeleting.select...
|
167
|
+
urls_updeleting = assignments_updeleting.map do |assignment|
|
168
|
+
types.select do |type|
|
169
|
+
assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
|
170
|
+
end.map{|type| assignment["#{type}_url"] }.select{|url| url }
|
171
|
+
end.flatten #assignments_updeleting.map...
|
172
|
+
return 0 if urls_updeleting.empty?
|
173
|
+
missing = []
|
174
|
+
record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'maybe')
|
175
|
+
begin
|
176
|
+
with_abort_on_url_mismatch do
|
177
|
+
project.remote.remove_urls(urls_updeleting){|file| yield(file) if block_given? }
|
178
|
+
end
|
179
|
+
rescue Typingpool::Error::File::Remote => exception
|
180
|
+
others = []
|
181
|
+
exception.message.split('; ').each do |message|
|
182
|
+
if message.match(/no such file/i)
|
183
|
+
missing.push(message)
|
184
|
+
else
|
185
|
+
others.push(message)
|
186
|
+
end
|
187
|
+
end #messages.each...
|
188
|
+
raise Error, "Can't remove files: #{others.join('; ')}" if others.count > 0
|
189
|
+
end #begin
|
190
|
+
record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'no')
|
191
|
+
urls_updeleting.count - missing.count
|
192
|
+
end
|
193
|
+
|
194
|
+
#Given a collection of Amazon::HITs, looks for Project folders
|
195
|
+
#on the local system waiting to "receive" those HITs. Such
|
196
|
+
#folders are kept in Config#transcripts. Returns Project
|
197
|
+
#instances associated with those folders, bundled together
|
198
|
+
#with the related HITs (see below for the exact format of the
|
199
|
+
#return value).
|
200
|
+
# ==== Params
|
201
|
+
# [hits] An enumerable collection of Amazon::HIT instances.
|
202
|
+
# [config] A Config instance.
|
203
|
+
# [&block] Optional. A block, if supplied, will be called
|
204
|
+
# repeatedly, each time being passed a different
|
205
|
+
# Project instance and an array of Amazon::HIT
|
206
|
+
# instances, corresponding to the subset of [hits]
|
207
|
+
# belonging to the Project.
|
208
|
+
# ==== Returns
|
209
|
+
# An array of hashes of the form {:project => project, :hits
|
210
|
+
# =>[hit1,hit2...]}.
|
211
|
+
def find_projects_waiting_for_hits(hits, config)
|
212
|
+
need = {}
|
213
|
+
by_project_id = {}
|
214
|
+
hits.each do |hit|
|
215
|
+
if need[hit.project_id]
|
216
|
+
by_project_id[hit.project_id][:hits].push(hit)
|
217
|
+
elsif need[hit.project_id] == false
|
218
|
+
next
|
219
|
+
else
|
220
|
+
need[hit.project_id] = false
|
221
|
+
project = Typingpool::Project.new(hit.project_title_from_url, config)
|
222
|
+
next unless project.local && (project.local.id == hit.project_id)
|
223
|
+
next if File.exists? project.local.file(transcript_filename[:done])
|
224
|
+
by_project_id[hit.project_id] = {
|
225
|
+
:project => project,
|
226
|
+
:hits => [hit]
|
227
|
+
}
|
228
|
+
need[hit.project_id] = true
|
229
|
+
end #if need[hit.project_id]
|
230
|
+
end #hits.each do...
|
231
|
+
if block_given?
|
232
|
+
by_project_id.values.each{|hash| yield(hash[:project], hash[:hits]) }
|
233
|
+
end
|
234
|
+
by_project_id.keys.sort.map{|key| by_project_id[key] }
|
235
|
+
end
|
236
|
+
|
237
|
+
#Given a Project and assignments file like
|
238
|
+
#Project#local#file('data', 'assignments.csv'), writes an HTML
|
239
|
+
#transcript for that project within the local project folder
|
240
|
+
#(Project#local). To do so, uses data from within Project#local,
|
241
|
+
#in particular the data dir and in particular within that the
|
242
|
+
#assignment.csv file.
|
243
|
+
# ==== Params
|
244
|
+
# [project] A Project instance.
|
245
|
+
# [assignments_file] A Filer::CSV instance
|
246
|
+
# corresponding to a file like
|
247
|
+
# Project#local#file('data',
|
248
|
+
# 'assignment.csv').
|
249
|
+
# [config] Optional. A Config instance. If not supplied, will
|
250
|
+
# use Project#config. Used to find the
|
251
|
+
# transcript template (Config#templates is
|
252
|
+
# examined).
|
253
|
+
# ==== Returns
|
254
|
+
# Path to the resulting HTML transcript file.
|
255
|
+
def create_transcript(project, assignments_file, config=project.config)
|
256
|
+
transcript_chunks = assignments_file.select{|assignment| assignment['transcript']}.map do |assignment|
|
257
|
+
chunk = Typingpool::Transcript::Chunk.new(assignment['transcript'])
|
258
|
+
chunk.url = assignment['audio_url']
|
259
|
+
chunk.project = assignment['project_id']
|
260
|
+
chunk.worker = assignment['worker']
|
261
|
+
chunk.hit = assignment['hit_id']
|
262
|
+
chunk
|
263
|
+
end #...map do |assignment|
|
264
|
+
transcript = Typingpool::Transcript.new(project.name, transcript_chunks)
|
265
|
+
transcript.subtitle = project.local.subtitle
|
266
|
+
done = (transcript.to_a.length == project.local.subdir('audio', 'chunks').to_a.size)
|
267
|
+
out_file = done ? transcript_filename[:done] : transcript_filename[:working]
|
268
|
+
begin
|
269
|
+
template ||= Template.from_config('transcript', config)
|
270
|
+
rescue Error::File::NotExists => e
|
271
|
+
abort "Couldn't find the template dir in your config file: #{e}"
|
272
|
+
rescue Error => e
|
273
|
+
abort "There was a fatal error with the transcript template: #{e}"
|
274
|
+
end #begin
|
275
|
+
File.delete(project.local.file(transcript_filename[:working])) if File.exists?(project.local.file(transcript_filename[:working]))
|
276
|
+
File.open(project.local.file(out_file), 'w') do |out|
|
277
|
+
out << template.render({:transcript => transcript})
|
278
|
+
end #File.open...
|
279
|
+
out_file
|
280
|
+
end
|
281
|
+
|
282
|
+
#Creates the file Project#local#file('data',
|
283
|
+
#'sandbox-assignments.csv') if it doesn't exist. Populates the
|
284
|
+
#file by copying over Project#local#file('data',
|
285
|
+
#'assignment.csv') and stripping it of HIT and assignment_url
|
286
|
+
#data.
|
287
|
+
#
|
288
|
+
#Always returns a Filer::CSV instance linked to
|
289
|
+
#sandbox-assignmens.csv.
|
290
|
+
def ensure_sandbox_assignment_csv(project)
|
291
|
+
csv = project.local.file('data', 'sandbox-assignment.csv').as(:csv)
|
292
|
+
return csv if File.exists? csv
|
293
|
+
raise Error, "No assignment CSV to copy" unless File.exists? project.local.file('data', 'assignment.csv')
|
294
|
+
csv.write(
|
295
|
+
project.local.file('data', 'assignment.csv').as(:csv).map do |assignment|
|
296
|
+
unrecord_hit_in_csv_row(assignment)
|
297
|
+
assignment.delete('assignment_url')
|
298
|
+
assignment.delete('assignment_uploaded')
|
299
|
+
assignment
|
300
|
+
end #project.local.file('data', 'assignment.csv').as(:csv) map...
|
301
|
+
)
|
302
|
+
csv
|
303
|
+
end
|
304
|
+
|
305
|
+
#Takes Project instance and a boolean indicating whether we're
|
306
|
+
#working in the Amazon sandbox. Returns a Filer::CSV instance
|
307
|
+
#corresponding to the appropriate assignments file,
|
308
|
+
#e.g. Project#local#file('data', 'assignments.csv')#as(:csv).
|
309
|
+
def assignments_file_for_sandbox_status(sandbox, project)
|
310
|
+
if sandbox
|
311
|
+
ensure_sandbox_assignment_csv(project)
|
312
|
+
else
|
313
|
+
project.local.file('data', 'assignment.csv').as(:csv)
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
#Extracts relevant information from a collection of
|
318
|
+
#just-assigned Amazon::HITs and writes it into the Project's
|
319
|
+
#assignment CSV file for future use.
|
320
|
+
# ==== Params
|
321
|
+
# [assignments_file] A Filer::CSV instance
|
322
|
+
# corresponding to a file like
|
323
|
+
# Project#local#file('data',
|
324
|
+
# 'assignment.csv').
|
325
|
+
# [hits] An enumerable collection of Amazon::HIT instances that
|
326
|
+
# were just assigned (that is, that have one
|
327
|
+
# assignment, which has a blank status).
|
328
|
+
def record_assigned_hits_in_assignments_file(assignments_file, hits)
|
329
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
330
|
+
csv_row['hit_id'] = hit.id
|
331
|
+
csv_row['hit_expires_at'] = hit.full.expires_at.to_s
|
332
|
+
csv_row['hit_assignments_duration'] = hit.full.assignments_duration.to_s
|
333
|
+
end #record_hits_in_project do....
|
334
|
+
end
|
335
|
+
|
336
|
+
#Extracts relevant information from a collection of
|
337
|
+
#just-approved Amazon::HITs and writes it into the Project's
|
338
|
+
#assignment CSV file (Project#local#file('data', 'assignment.csv')) for
|
339
|
+
#future use.
|
340
|
+
# ==== Params
|
341
|
+
# [assignments_file] A Filer::CSV instance
|
342
|
+
# corresponding to a file like
|
343
|
+
# Project#local#file('data',
|
344
|
+
# 'assignment.csv').
|
345
|
+
# [hits] An enumerable collection of Amazon::HIT instances whose
|
346
|
+
# one assignment has the status 'Approved'.
|
347
|
+
def record_approved_hits_in_assignments_file(assignments_file, hits)
|
348
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
349
|
+
next if csv_row['transcript']
|
350
|
+
csv_row['transcript'] = hit.transcript.body
|
351
|
+
csv_row['worker'] = hit.transcript.worker
|
352
|
+
csv_row['hit_id'] = hit.id
|
353
|
+
end #record_hits_in_project do...
|
354
|
+
end
|
355
|
+
|
356
|
+
#Given a Project instance and an array of modified assignment
|
357
|
+
#hashes previously retrieved from the Project's assignment CSV
|
358
|
+
#(Project#local#file('data', 'assignment.csv')), writes the
|
359
|
+
#'assignment_url' property of each modified hash back to the
|
360
|
+
#corresponding row in the original CSV.
|
361
|
+
# def record_assignment_urls_in_project(project, assignments)
|
362
|
+
# assignments_by_audio_url = Hash[ *assignments.map{|assignment| [assignment['aud#io_url'], assignment] }.flatten ]
|
363
|
+
# project.local.file('data', 'assignment.csv').as(:csv).each! do |csv_row|
|
364
|
+
# assignment = assignments_by_audio_url[csv_row['audio_url']] or next
|
365
|
+
# csv_row['assignment_url'] = assignment['assignment_url']
|
366
|
+
# end
|
367
|
+
# end
|
368
|
+
|
369
|
+
#Erases all mention of the given Amazon::HITs from one of the
|
370
|
+
#Project's assignment CSV files. Typically used when rejecting a
|
371
|
+
#HIT assignment.
|
372
|
+
# ==== Params
|
373
|
+
# [assignments_file] A Filer::CSV instance
|
374
|
+
# corresponding to a file like
|
375
|
+
# Project#local#file('data',
|
376
|
+
# 'assignment.csv').
|
377
|
+
# [hits] An enumerable collection of Amazon::HIT instances to be
|
378
|
+
# deleted.
|
379
|
+
def unrecord_hits_in_assignments_file(assignments_file, hits)
|
380
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
381
|
+
unrecord_hit_in_csv_row(csv_row)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
#Erases particular details of a subset or all of a Project's
|
386
|
+
#Amazon::HITs from one of the Project's assignment CSV files.
|
387
|
+
#
|
388
|
+
#Specifically, deletes information about the HIT's
|
389
|
+
#expires_at, and assignments_duration.
|
390
|
+
#
|
391
|
+
#Typically used when some or all of a Project's HITs have been
|
392
|
+
#processed and incorporated into a transcript and are not needed
|
393
|
+
#any more as Amazon::HITs on Amazon servers, but when we still
|
394
|
+
#want to retain the HIT ids in the Project assignment CSV.
|
395
|
+
# ==== Params
|
396
|
+
# [assignments_file] A Filer::CSV instance
|
397
|
+
# corresponding to a file like
|
398
|
+
# Project#local#file('data',
|
399
|
+
# 'assignment.csv').
|
400
|
+
# [hits] Optional. An enumerable collection of Amazon::HIT
|
401
|
+
# instances whose details are to be
|
402
|
+
# deleted. If not supplied, details for ALL
|
403
|
+
# HITs in the Project assignment CSV will be
|
404
|
+
# deleted.
|
405
|
+
def unrecord_hits_details_in_assignments_file(assignments_file, hits=nil)
|
406
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
407
|
+
unrecord_hit_details_in_csv_row(csv_row)
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
#Checks for Typingpool's external dependencies. If they appear
|
412
|
+
#to missing, yields to the passed block an array containing the
|
413
|
+
#name of missing commands/packages (e.g. ffmpeg).
|
414
|
+
def if_missing_dependencies
|
415
|
+
#TODO: Test on Linux
|
416
|
+
missing = []
|
417
|
+
[['ffmpeg','-version'], ['mp3splt', '-v'], ['mp3wrap']].each do |cmdline|
|
418
|
+
begin
|
419
|
+
out, err, status = Open3.capture3(*cmdline)
|
420
|
+
rescue
|
421
|
+
missing.push(cmdline.first)
|
422
|
+
end #begin
|
423
|
+
end #...].each do |cmdline|
|
424
|
+
yield(missing) unless missing.empty?
|
425
|
+
end
|
426
|
+
|
427
|
+
#Begins recording of an HTTP mock fixture (for automated
|
428
|
+
#testing) using the great VCR gem. Automatically filters your
|
429
|
+
#Config#amazon#key and Config#amazon#secret from the recorded
|
430
|
+
#fixture, and automatically determines the "cassette" name and
|
431
|
+
#"cassette library" dir from the supplied path.
|
432
|
+
# ==== Params
|
433
|
+
# [fixture_path] Path to where you want the HTTP fixture
|
434
|
+
# recorded, including filename.
|
435
|
+
# [config] A Config instance, used to extract the
|
436
|
+
# Config#amazon#secret and Config#amazon#key that
|
437
|
+
# will be filtered from the fixture.
|
438
|
+
# ==== Returns
|
439
|
+
# Result of calling VCR.insert_cassette.
|
440
|
+
def vcr_record(fixture_path, config)
|
441
|
+
VCR.configure do |c|
|
442
|
+
c.cassette_library_dir = File.dirname(fixture_path)
|
443
|
+
c.hook_into :webmock
|
444
|
+
c.filter_sensitive_data('<AWS_KEY>'){ config.amazon.key }
|
445
|
+
c.filter_sensitive_data('<AWS_SECRET>'){ config.amazon.secret }
|
446
|
+
end
|
447
|
+
VCR.insert_cassette(File.basename(fixture_path, '.*'), :record => :new_episodes)
|
448
|
+
end
|
449
|
+
|
450
|
+
#Stops recording of the last call to vcr_record. Returns the
|
451
|
+
#result of VCR.eject_cassette.
|
452
|
+
def vcr_stop
|
453
|
+
VCR.eject_cassette
|
454
|
+
end
|
455
|
+
|
456
|
+
#protected
|
457
|
+
|
458
|
+
def with_abort_on_url_mismatch(url_type='')
|
459
|
+
url_type += ' '
|
460
|
+
begin
|
461
|
+
yield
|
462
|
+
rescue Typingpool::Error => exception
|
463
|
+
if exception.message.match(/not find base url/i)
|
464
|
+
abort "Previously recorded #{url_type}URLs don\'t look right. Are you using the right config file? You may have passed in a --config argument to a previous script and forgotten to do so now."
|
465
|
+
else
|
466
|
+
raise exception
|
467
|
+
end
|
468
|
+
end #begin
|
469
|
+
end
|
470
|
+
|
471
|
+
def record_hits_in_assignments_file(assignments_file, hits)
|
472
|
+
hits_by_url = self.hits_by_url(hits) if hits
|
473
|
+
assignments_file.each! do |csv_row|
|
474
|
+
hit = nil
|
475
|
+
if hits
|
476
|
+
hit = hits_by_url[csv_row['audio_url']] or next
|
477
|
+
end
|
478
|
+
yield(hit, csv_row)
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
def record_assignment_upload_status(assignments, uploading, types, status)
|
483
|
+
record_in_selected_assignments(assignments, uploading) do |assignment|
|
484
|
+
types.each do |type|
|
485
|
+
assignment["#{type}_uploaded"] = status if assignment["#{type}_url"]
|
486
|
+
end
|
487
|
+
end #record_in_selected_assignments...
|
488
|
+
end
|
489
|
+
|
490
|
+
def record_assignment_urls(assignments, uploading, type, urls)
|
491
|
+
i = 0
|
492
|
+
record_in_selected_assignments(assignments, uploading) do |assignment|
|
493
|
+
assignment["#{type}_url"] = urls[i]
|
494
|
+
i += 1
|
495
|
+
end #record_in_selected_assignments...
|
496
|
+
end
|
497
|
+
|
498
|
+
def record_in_selected_assignments(assignments, selected)
|
499
|
+
selected_by_url = Hash[ *selected.map{|assignment| [assignment['audio_url'], assignment] }.flatten ]
|
500
|
+
assignments.each! do |assignment|
|
501
|
+
if selected_by_url[assignment['audio_url']]
|
502
|
+
yield(assignment)
|
503
|
+
end #if uploading...
|
504
|
+
end #assignments.each!...
|
505
|
+
end
|
506
|
+
|
507
|
+
def unrecord_hit_details_in_csv_row(csv_row)
|
508
|
+
%w(hit_expires_at hit_assignments_duration).each{|key| csv_row.delete(key) }
|
509
|
+
end
|
510
|
+
|
511
|
+
def unrecord_hit_in_csv_row(csv_row)
|
512
|
+
unrecord_hit_details_in_csv_row(csv_row)
|
513
|
+
csv_row.delete('hit_id')
|
514
|
+
end
|
515
|
+
|
516
|
+
def transcript_filename
|
517
|
+
{
|
518
|
+
:done => 'transcript.html',
|
519
|
+
:working => 'transcript_in_progress.html'
|
520
|
+
}
|
521
|
+
end
|
522
|
+
|
523
|
+
def hits_by_url(hits)
|
524
|
+
Hash[ *hits.map{|hit| [hit.url, hit] }.flatten ]
|
525
|
+
end
|
526
|
+
|
527
|
+
|
528
|
+
def check_interrupted_uploads(assignments, property)
|
529
|
+
assignments.each! do |assignment|
|
530
|
+
if assignment["#{property}_uploaded"].to_s == 'maybe'
|
531
|
+
assignment["#{property}_uploaded"] = (Typingpool::Utility.working_url? assignment["#{property}_url"]) ? 'yes' : 'no'
|
532
|
+
end
|
533
|
+
end #assignments.each!...
|
534
|
+
end
|
535
|
+
end #class << self
|
536
|
+
module FriendlyExceptions
|
537
|
+
#Massages terse exceptions from our model layer into a
|
538
|
+
#human-friendly message suitable for an abort message from a
|
539
|
+
#command-line script.
|
540
|
+
# ==== Params
|
541
|
+
# [name] A string used to refer to the input. For example
|
542
|
+
# 'project title' or '--config argument'. Used in the
|
543
|
+
# goodbye message.
|
544
|
+
# [*input] One or more values. The user input that will cause
|
545
|
+
# any exceptions. Used in the goodbye message.
|
546
|
+
# [&block] The block to execute and monitor for
|
547
|
+
# exceptions. Will be passed [*input].
|
548
|
+
# ==== Errors
|
549
|
+
# Will abort with a friendly message on any exception of the
|
550
|
+
# type Typingpool::Error::Argument.
|
551
|
+
# ==== Returns
|
552
|
+
# The return value of &block.
|
553
|
+
def with_friendly_exceptions(name, *input)
|
554
|
+
begin
|
555
|
+
yield(*input)
|
556
|
+
rescue Typingpool::Error::Argument => exception
|
557
|
+
goodbye = "Could not make sense of #{name.to_s} "
|
558
|
+
goodbye += input.map{|input| "'#{input}'" }.join(', ')
|
559
|
+
goodbye += ". #{exception.message}"
|
560
|
+
goodbye += '.' unless goodbye.match(/\.$/)
|
561
|
+
abort goodbye
|
562
|
+
end #begin
|
563
|
+
end
|
564
|
+
end #FriendlyExceptions
|
565
|
+
module CLI
|
566
|
+
class << self
|
567
|
+
include App::FriendlyExceptions
|
568
|
+
#Optionally takes an ostensible path to a config file, as passed
|
569
|
+
#as a command-line option. Checks to make sure the file exists;
|
570
|
+
#returns nil if does not, returns a Config instance if it
|
571
|
+
#does. If no path is passed, the default config file is returned
|
572
|
+
#(as retrieved by Config.file with no args).
|
573
|
+
def config_from_arg(arg=nil)
|
574
|
+
if arg
|
575
|
+
path = File.expand_path(arg)
|
576
|
+
return unless File.exists?(path) && File.file?(path)
|
577
|
+
Config.file(path)
|
578
|
+
else
|
579
|
+
Config.file
|
580
|
+
end #if option
|
581
|
+
end
|
582
|
+
|
583
|
+
#Outputs a friendly explanation of the --help option for
|
584
|
+
#appending to script usage banners.
|
585
|
+
def help_arg_explanation
|
586
|
+
"`#{File.basename($PROGRAM_NAME)} --help` for more information."
|
587
|
+
end
|
588
|
+
|
589
|
+
#Converts a user arg into a Project instance, setting up or
|
590
|
+
#consulting a Config along the way.
|
591
|
+
# ==== Params
|
592
|
+
# [arg] A user-supplied argument specifying either an absolute
|
593
|
+
# path to a Project folder (Project#local) or the
|
594
|
+
# name of a project folder within
|
595
|
+
# [config]#transcripts.
|
596
|
+
# [config] A Config instance. If [arg] is an absolute path,
|
597
|
+
# will be modified -- Config#itranscripts will be
|
598
|
+
# changed to match the implied transcripts dir.
|
599
|
+
# ==== Errors
|
600
|
+
# Will abort with a friendly message on any errors.
|
601
|
+
# ==== Returns
|
602
|
+
# A Project instance.
|
603
|
+
def project_from_arg_and_config(arg, config)
|
604
|
+
path = if (File.exists?(arg) && File.directory?(arg))
|
605
|
+
config.transcripts = File.dirname(arg)
|
606
|
+
arg
|
607
|
+
else
|
608
|
+
abort "No 'transcripts' dir specified in your config file and '#{arg}' is not a valid path" unless config.transcripts
|
609
|
+
path = File.join(config.transcripts, arg)
|
610
|
+
abort "No such project '#{arg}' in dir '#{config.transcripts}'" unless File.exists? path
|
611
|
+
abort "'#{arg}' is not a directory at '#{path}'" unless File.directory? path
|
612
|
+
path
|
613
|
+
end
|
614
|
+
project = with_friendly_exceptions('project name', File.basename(path)) do
|
615
|
+
Typingpool::Project.new(File.basename(path), config)
|
616
|
+
end
|
617
|
+
abort "Not a project directory at '#{path}'" unless project.local
|
618
|
+
project
|
619
|
+
end
|
620
|
+
|
621
|
+
end #class << self
|
622
|
+
module Formatter
|
623
|
+
require 'highline/import'
|
624
|
+
def cli_bold(text)
|
625
|
+
HighLine.color(text, :bold)
|
626
|
+
end
|
627
|
+
|
628
|
+
def cli_reverse(text)
|
629
|
+
HighLine.color(text, :reverse)
|
630
|
+
end
|
631
|
+
end #Formatter
|
632
|
+
end #CLI
|
633
|
+
end #App
|
634
|
+
end #Typingpool
|