typingpool 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
@@ -0,0 +1,634 @@
1
+ module Typingpool
2
+ #Module encapsulating high-level Typingpool procedures and called
3
+ #from the various tp-* scripts. Control layer type code.
4
+ #
5
+ #This is the least mature Typingpool class. At present, all methods
6
+ #are class methods. This will likely change to a model in which
7
+ #different subclasses of App instances do everything from parsing
8
+ #and validating command-line input to completing core functionality
9
+ #to outputing context-dependent result summaries.
10
+ #
11
+ #As such, all App methods should be considered fluid and likely to
12
+ #change in subsequent releases.
13
+ module App
14
+ require 'vcr'
15
+ require 'stringio'
16
+ require 'open3'
17
+ class << self
18
+
19
+ #Given a Project instance, figures out which audio chunks, if
20
+ #any, need to be uploaded and uploads them.
21
+ #
22
+ #Note that this method is sensitive to the possibility of
23
+ #interrupted batch uploads. It checks for previously interrupted
24
+ #uploads at the start to see if it needs to re-try them, and
25
+ #writes out what uploads it is attempting prior to beginning the
26
+ #upload in case the upload is interrupted by an exception.
27
+ #
28
+ #As such, any script calling this method can usually be simply
29
+ #re-run to re-attempt the upload.
30
+ #
31
+ #Reads and writes from a Filer::CSV instance passed as the
32
+ #second param, intended to link to a file like
33
+ #Project#local#file('data', 'assignment.csv')
34
+ #
35
+ #Returns an array of urls corresponding to uploaded files. If no
36
+ #files were uploaded, the array will be empty
37
+ # ==== Params
38
+ # [project] A Project instance.
39
+ # [&block] Optional. A block that will be called at the
40
+ # beginning of each file upload and passed
41
+ # the local path to the file and the remote
42
+ # name of the file.
43
+ # ==== Returns
44
+ # An array of URLs of the uploaded audio files.
45
+ def upload_audio_for_project(project)
46
+ #we don't make any provision for reading/writing from
47
+ #sandbox-assignment.csv because audio upload data in such files is
48
+ #effectively ignored
49
+ assignments_file = project.local.file('data', 'assignment.csv').as(:csv)
50
+ check_interrupted_uploads(assignments_file, 'audio')
51
+ uploading = assignments_file.reject{|assignment| assignment['audio_uploaded'] == 'yes' }
52
+ return uploading if uploading.empty?
53
+ files = uploading.map{|assignment| Typingpool::Project.local_basename_from_url(assignment['audio_url']) }
54
+ files.map!{|basename| project.local.file('audio', 'chunks', basename).as(:audio) }
55
+ files = Typingpool::Filer::Files.new(files)
56
+ remote_files = with_abort_on_url_mismatch('audio') do
57
+ uploading.map{|assignment| project.remote.url_basename(assignment['audio_url']) }
58
+ end
59
+ #Record that we're uploading so we'll know later if something
60
+ #goes wrong
61
+ record_assignment_upload_status(assignments_file, uploading, ['audio'], 'maybe')
62
+ project.remote.put(files.to_streams, remote_files) do |file, as|
63
+ yield(file, as) if block_given?
64
+ end
65
+ assignments_files = [assignments_file]
66
+ record_assignment_upload_status(assignments_file, uploading, ['audio'], 'yes')
67
+ uploading.map{|assignment| assignment['audio_url'] }
68
+ end
69
+
70
+ #For a subset of a Project instance's chunks/assignments,
71
+ #uploads assignment html that is used as the external question
72
+ #for a Mechanical Turk HIT.
73
+ #
74
+ #Takes the same precautions around interrupted network uploads
75
+ #as upload_audio_for_project.
76
+ #
77
+ #The URL of each uploaded assignment is written into
78
+ #Project#local.file('data', 'assignment.csv'), along with
79
+ #metadata confirming that the upload completed.
80
+ #
81
+ # ==== Params
82
+ # [project] A Project instance.
83
+ # [assignments_file] A Filer::CSV instance from which
84
+ # assignments_uploading were drawn. The
85
+ # upload status will be written and
86
+ # tracked here.
87
+ # [assignments_uploading] An enumerable collection of hashes
88
+ # corresponding to rows in
89
+ # Project#local.file('data',
90
+ # 'assignment.csv'). Only assignments
91
+ # whose URLs are contained in these
92
+ # hashes will be uploaded.
93
+ # [template] A Template::Assignment instance. Used to render
94
+ # assignments_uploading into HTML prior
95
+ # to uploading.
96
+ # ==== Returns
97
+ # An array of URLs of the uploaded assignments
98
+ def upload_html_for_project_assignments(project, assignments_file, assignments_uploading, template)
99
+ ios = assignments_uploading.map{|assignment| StringIO.new(template.render(assignment)) }
100
+ remote_basenames = assignments_uploading.map do |assignment|
101
+ File.basename(project.class.local_basename_from_url(assignment['audio_url']), '.*') + '.html'
102
+ end
103
+ remote_names = project.create_remote_names(remote_basenames)
104
+ urls = remote_names.map{|name| project.remote.file_to_url(name) }
105
+ #record upload URLs ahead of time so we can roll back later if the
106
+ #upload fails halfway through
107
+ record_assignment_urls(assignments_file, assignments_uploading, 'assignment', urls)
108
+ record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'maybe')
109
+ project.remote.put(ios, remote_names)
110
+ record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'yes')
111
+ urls
112
+ end
113
+
114
+ #Removes one or more types of remote files -- audio, assignment
115
+ #html, etc. -- associated with a subset of a Project instance's
116
+ #chunks/assignments.
117
+ #
118
+ #Writes to Project#local.file('data', 'assignment.csv') to
119
+ #reflect these changes.
120
+ #
121
+ #As with upload_audio_for_project, this method is sensitive to
122
+ #the possibility of interrupted batch operations over the
123
+ #network. This means
124
+ # 1. It handles deleting files that *might* have been uploaded,
125
+ # trapping any exceptions that arise if such files do not exist
126
+ # on the remote server.
127
+ # 2. It writes out what deletions it is attempting before
128
+ # attempting them, so that if the deletion operation is
129
+ # interrupted by an exception, the files will be clearly marked
130
+ # in an unknown state.
131
+ #
132
+ # ==== Params
133
+ # [project] A Project instance.
134
+ # [assignments_file] A Filer::CSV instance from which
135
+ # assignments_updeleting were
136
+ # drawn. The upload status will be
137
+ # written and tracked here.
138
+ # [assignments_updeleting] An enumerable collection of hashes
139
+ # corresponding to selected rows in
140
+ # Project#local#file('data',
141
+ # 'assignment.csv'). Only assets whose
142
+ # URLs are contained in these hashes
143
+ # will be deleted.
144
+ # [types] Optional. An array of asset
145
+ # 'types'. The default, ['audio',
146
+ # 'assignment'], means assets at
147
+ # assignment['audio_url'] and
148
+ # assignment['assignment_url'] will be
149
+ # deleted for each assignment hash in
150
+ # assignments_updeleting and that
151
+ # upload status will be tracked in
152
+ # assignment['audio_uploaded'] and
153
+ # assignment['assignment_uploaded'].
154
+ # [&block] Optional. A code block that will be
155
+ # called with the name of the remote
156
+ # file just before the delete is
157
+ # carried out.
158
+ # ==== Returns
159
+ # A count of how many items were actually removed from the
160
+ # server.
161
+ def updelete_assignment_assets(project, assignments_file, assignments_updeleting=assignments_file, types=['audio', 'assignment'])
162
+ assignments_updeleting = assignments_updeleting.select do |assignment|
163
+ types.select do |type|
164
+ assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
165
+ end.count > 0
166
+ end.flatten #assignments_updeleting.select...
167
+ urls_updeleting = assignments_updeleting.map do |assignment|
168
+ types.select do |type|
169
+ assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
170
+ end.map{|type| assignment["#{type}_url"] }.select{|url| url }
171
+ end.flatten #assignments_updeleting.map...
172
+ return 0 if urls_updeleting.empty?
173
+ missing = []
174
+ record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'maybe')
175
+ begin
176
+ with_abort_on_url_mismatch do
177
+ project.remote.remove_urls(urls_updeleting){|file| yield(file) if block_given? }
178
+ end
179
+ rescue Typingpool::Error::File::Remote => exception
180
+ others = []
181
+ exception.message.split('; ').each do |message|
182
+ if message.match(/no such file/i)
183
+ missing.push(message)
184
+ else
185
+ others.push(message)
186
+ end
187
+ end #messages.each...
188
+ raise Error, "Can't remove files: #{others.join('; ')}" if others.count > 0
189
+ end #begin
190
+ record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'no')
191
+ urls_updeleting.count - missing.count
192
+ end
193
+
194
+ #Given a collection of Amazon::HITs, looks for Project folders
195
+ #on the local system waiting to "receive" those HITs. Such
196
+ #folders are kept in Config#transcripts. Returns Project
197
+ #instances associated with those folders, bundled together
198
+ #with the related HITs (see below for the exact format of the
199
+ #return value).
200
+ # ==== Params
201
+ # [hits] An enumerable collection of Amazon::HIT instances.
202
+ # [config] A Config instance.
203
+ # [&block] Optional. A block, if supplied, will be called
204
+ # repeatedly, each time being passed a different
205
+ # Project instance and an array of Amazon::HIT
206
+ # instances, corresponding to the subset of [hits]
207
+ # belonging to the Project.
208
+ # ==== Returns
209
+ # An array of hashes of the form {:project => project, :hits
210
+ # =>[hit1,hit2...]}.
211
+ def find_projects_waiting_for_hits(hits, config)
212
+ need = {}
213
+ by_project_id = {}
214
+ hits.each do |hit|
215
+ if need[hit.project_id]
216
+ by_project_id[hit.project_id][:hits].push(hit)
217
+ elsif need[hit.project_id] == false
218
+ next
219
+ else
220
+ need[hit.project_id] = false
221
+ project = Typingpool::Project.new(hit.project_title_from_url, config)
222
+ next unless project.local && (project.local.id == hit.project_id)
223
+ next if File.exists? project.local.file(transcript_filename[:done])
224
+ by_project_id[hit.project_id] = {
225
+ :project => project,
226
+ :hits => [hit]
227
+ }
228
+ need[hit.project_id] = true
229
+ end #if need[hit.project_id]
230
+ end #hits.each do...
231
+ if block_given?
232
+ by_project_id.values.each{|hash| yield(hash[:project], hash[:hits]) }
233
+ end
234
+ by_project_id.keys.sort.map{|key| by_project_id[key] }
235
+ end
236
+
237
+ #Given a Project and assignments file like
238
+ #Project#local#file('data', 'assignments.csv'), writes an HTML
239
+ #transcript for that project within the local project folder
240
+ #(Project#local). To do so, uses data from within Project#local,
241
+ #in particular the data dir and in particular within that the
242
+ #assignment.csv file.
243
+ # ==== Params
244
+ # [project] A Project instance.
245
+ # [assignments_file] A Filer::CSV instance
246
+ # corresponding to a file like
247
+ # Project#local#file('data',
248
+ # 'assignment.csv').
249
+ # [config] Optional. A Config instance. If not supplied, will
250
+ # use Project#config. Used to find the
251
+ # transcript template (Config#templates is
252
+ # examined).
253
+ # ==== Returns
254
+ # Path to the resulting HTML transcript file.
255
+ def create_transcript(project, assignments_file, config=project.config)
256
+ transcript_chunks = assignments_file.select{|assignment| assignment['transcript']}.map do |assignment|
257
+ chunk = Typingpool::Transcript::Chunk.new(assignment['transcript'])
258
+ chunk.url = assignment['audio_url']
259
+ chunk.project = assignment['project_id']
260
+ chunk.worker = assignment['worker']
261
+ chunk.hit = assignment['hit_id']
262
+ chunk
263
+ end #...map do |assignment|
264
+ transcript = Typingpool::Transcript.new(project.name, transcript_chunks)
265
+ transcript.subtitle = project.local.subtitle
266
+ done = (transcript.to_a.length == project.local.subdir('audio', 'chunks').to_a.size)
267
+ out_file = done ? transcript_filename[:done] : transcript_filename[:working]
268
+ begin
269
+ template ||= Template.from_config('transcript', config)
270
+ rescue Error::File::NotExists => e
271
+ abort "Couldn't find the template dir in your config file: #{e}"
272
+ rescue Error => e
273
+ abort "There was a fatal error with the transcript template: #{e}"
274
+ end #begin
275
+ File.delete(project.local.file(transcript_filename[:working])) if File.exists?(project.local.file(transcript_filename[:working]))
276
+ File.open(project.local.file(out_file), 'w') do |out|
277
+ out << template.render({:transcript => transcript})
278
+ end #File.open...
279
+ out_file
280
+ end
281
+
282
+ #Creates the file Project#local#file('data',
283
+ #'sandbox-assignments.csv') if it doesn't exist. Populates the
284
+ #file by copying over Project#local#file('data',
285
+ #'assignment.csv') and stripping it of HIT and assignment_url
286
+ #data.
287
+ #
288
+ #Always returns a Filer::CSV instance linked to
289
+ #sandbox-assignmens.csv.
290
+ def ensure_sandbox_assignment_csv(project)
291
+ csv = project.local.file('data', 'sandbox-assignment.csv').as(:csv)
292
+ return csv if File.exists? csv
293
+ raise Error, "No assignment CSV to copy" unless File.exists? project.local.file('data', 'assignment.csv')
294
+ csv.write(
295
+ project.local.file('data', 'assignment.csv').as(:csv).map do |assignment|
296
+ unrecord_hit_in_csv_row(assignment)
297
+ assignment.delete('assignment_url')
298
+ assignment.delete('assignment_uploaded')
299
+ assignment
300
+ end #project.local.file('data', 'assignment.csv').as(:csv) map...
301
+ )
302
+ csv
303
+ end
304
+
305
+ #Takes Project instance and a boolean indicating whether we're
306
+ #working in the Amazon sandbox. Returns a Filer::CSV instance
307
+ #corresponding to the appropriate assignments file,
308
+ #e.g. Project#local#file('data', 'assignments.csv')#as(:csv).
309
+ def assignments_file_for_sandbox_status(sandbox, project)
310
+ if sandbox
311
+ ensure_sandbox_assignment_csv(project)
312
+ else
313
+ project.local.file('data', 'assignment.csv').as(:csv)
314
+ end
315
+ end
316
+
317
+ #Extracts relevant information from a collection of
318
+ #just-assigned Amazon::HITs and writes it into the Project's
319
+ #assignment CSV file for future use.
320
+ # ==== Params
321
+ # [assignments_file] A Filer::CSV instance
322
+ # corresponding to a file like
323
+ # Project#local#file('data',
324
+ # 'assignment.csv').
325
+ # [hits] An enumerable collection of Amazon::HIT instances that
326
+ # were just assigned (that is, that have one
327
+ # assignment, which has a blank status).
328
+ def record_assigned_hits_in_assignments_file(assignments_file, hits)
329
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
330
+ csv_row['hit_id'] = hit.id
331
+ csv_row['hit_expires_at'] = hit.full.expires_at.to_s
332
+ csv_row['hit_assignments_duration'] = hit.full.assignments_duration.to_s
333
+ end #record_hits_in_project do....
334
+ end
335
+
336
+ #Extracts relevant information from a collection of
337
+ #just-approved Amazon::HITs and writes it into the Project's
338
+ #assignment CSV file (Project#local#file('data', 'assignment.csv')) for
339
+ #future use.
340
+ # ==== Params
341
+ # [assignments_file] A Filer::CSV instance
342
+ # corresponding to a file like
343
+ # Project#local#file('data',
344
+ # 'assignment.csv').
345
+ # [hits] An enumerable collection of Amazon::HIT instances whose
346
+ # one assignment has the status 'Approved'.
347
+ def record_approved_hits_in_assignments_file(assignments_file, hits)
348
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
349
+ next if csv_row['transcript']
350
+ csv_row['transcript'] = hit.transcript.body
351
+ csv_row['worker'] = hit.transcript.worker
352
+ csv_row['hit_id'] = hit.id
353
+ end #record_hits_in_project do...
354
+ end
355
+
356
+ #Given a Project instance and an array of modified assignment
357
+ #hashes previously retrieved from the Project's assignment CSV
358
+ #(Project#local#file('data', 'assignment.csv')), writes the
359
+ #'assignment_url' property of each modified hash back to the
360
+ #corresponding row in the original CSV.
361
+ # def record_assignment_urls_in_project(project, assignments)
362
+ # assignments_by_audio_url = Hash[ *assignments.map{|assignment| [assignment['aud#io_url'], assignment] }.flatten ]
363
+ # project.local.file('data', 'assignment.csv').as(:csv).each! do |csv_row|
364
+ # assignment = assignments_by_audio_url[csv_row['audio_url']] or next
365
+ # csv_row['assignment_url'] = assignment['assignment_url']
366
+ # end
367
+ # end
368
+
369
+ #Erases all mention of the given Amazon::HITs from one of the
370
+ #Project's assignment CSV files. Typically used when rejecting a
371
+ #HIT assignment.
372
+ # ==== Params
373
+ # [assignments_file] A Filer::CSV instance
374
+ # corresponding to a file like
375
+ # Project#local#file('data',
376
+ # 'assignment.csv').
377
+ # [hits] An enumerable collection of Amazon::HIT instances to be
378
+ # deleted.
379
+ def unrecord_hits_in_assignments_file(assignments_file, hits)
380
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
381
+ unrecord_hit_in_csv_row(csv_row)
382
+ end
383
+ end
384
+
385
+ #Erases particular details of a subset or all of a Project's
386
+ #Amazon::HITs from one of the Project's assignment CSV files.
387
+ #
388
+ #Specifically, deletes information about the HIT's
389
+ #expires_at, and assignments_duration.
390
+ #
391
+ #Typically used when some or all of a Project's HITs have been
392
+ #processed and incorporated into a transcript and are not needed
393
+ #any more as Amazon::HITs on Amazon servers, but when we still
394
+ #want to retain the HIT ids in the Project assignment CSV.
395
+ # ==== Params
396
+ # [assignments_file] A Filer::CSV instance
397
+ # corresponding to a file like
398
+ # Project#local#file('data',
399
+ # 'assignment.csv').
400
+ # [hits] Optional. An enumerable collection of Amazon::HIT
401
+ # instances whose details are to be
402
+ # deleted. If not supplied, details for ALL
403
+ # HITs in the Project assignment CSV will be
404
+ # deleted.
405
+ def unrecord_hits_details_in_assignments_file(assignments_file, hits=nil)
406
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
407
+ unrecord_hit_details_in_csv_row(csv_row)
408
+ end
409
+ end
410
+
411
+ #Checks for Typingpool's external dependencies. If they appear
412
+ #to missing, yields to the passed block an array containing the
413
+ #name of missing commands/packages (e.g. ffmpeg).
414
+ def if_missing_dependencies
415
+ #TODO: Test on Linux
416
+ missing = []
417
+ [['ffmpeg','-version'], ['mp3splt', '-v'], ['mp3wrap']].each do |cmdline|
418
+ begin
419
+ out, err, status = Open3.capture3(*cmdline)
420
+ rescue
421
+ missing.push(cmdline.first)
422
+ end #begin
423
+ end #...].each do |cmdline|
424
+ yield(missing) unless missing.empty?
425
+ end
426
+
427
+ #Begins recording of an HTTP mock fixture (for automated
428
+ #testing) using the great VCR gem. Automatically filters your
429
+ #Config#amazon#key and Config#amazon#secret from the recorded
430
+ #fixture, and automatically determines the "cassette" name and
431
+ #"cassette library" dir from the supplied path.
432
+ # ==== Params
433
+ # [fixture_path] Path to where you want the HTTP fixture
434
+ # recorded, including filename.
435
+ # [config] A Config instance, used to extract the
436
+ # Config#amazon#secret and Config#amazon#key that
437
+ # will be filtered from the fixture.
438
+ # ==== Returns
439
+ # Result of calling VCR.insert_cassette.
440
+ def vcr_record(fixture_path, config)
441
+ VCR.configure do |c|
442
+ c.cassette_library_dir = File.dirname(fixture_path)
443
+ c.hook_into :webmock
444
+ c.filter_sensitive_data('<AWS_KEY>'){ config.amazon.key }
445
+ c.filter_sensitive_data('<AWS_SECRET>'){ config.amazon.secret }
446
+ end
447
+ VCR.insert_cassette(File.basename(fixture_path, '.*'), :record => :new_episodes)
448
+ end
449
+
450
+ #Stops recording of the last call to vcr_record. Returns the
451
+ #result of VCR.eject_cassette.
452
+ def vcr_stop
453
+ VCR.eject_cassette
454
+ end
455
+
456
+ #protected
457
+
458
+ def with_abort_on_url_mismatch(url_type='')
459
+ url_type += ' '
460
+ begin
461
+ yield
462
+ rescue Typingpool::Error => exception
463
+ if exception.message.match(/not find base url/i)
464
+ abort "Previously recorded #{url_type}URLs don\'t look right. Are you using the right config file? You may have passed in a --config argument to a previous script and forgotten to do so now."
465
+ else
466
+ raise exception
467
+ end
468
+ end #begin
469
+ end
470
+
471
+ def record_hits_in_assignments_file(assignments_file, hits)
472
+ hits_by_url = self.hits_by_url(hits) if hits
473
+ assignments_file.each! do |csv_row|
474
+ hit = nil
475
+ if hits
476
+ hit = hits_by_url[csv_row['audio_url']] or next
477
+ end
478
+ yield(hit, csv_row)
479
+ end
480
+ end
481
+
482
+ def record_assignment_upload_status(assignments, uploading, types, status)
483
+ record_in_selected_assignments(assignments, uploading) do |assignment|
484
+ types.each do |type|
485
+ assignment["#{type}_uploaded"] = status if assignment["#{type}_url"]
486
+ end
487
+ end #record_in_selected_assignments...
488
+ end
489
+
490
+ def record_assignment_urls(assignments, uploading, type, urls)
491
+ i = 0
492
+ record_in_selected_assignments(assignments, uploading) do |assignment|
493
+ assignment["#{type}_url"] = urls[i]
494
+ i += 1
495
+ end #record_in_selected_assignments...
496
+ end
497
+
498
+ def record_in_selected_assignments(assignments, selected)
499
+ selected_by_url = Hash[ *selected.map{|assignment| [assignment['audio_url'], assignment] }.flatten ]
500
+ assignments.each! do |assignment|
501
+ if selected_by_url[assignment['audio_url']]
502
+ yield(assignment)
503
+ end #if uploading...
504
+ end #assignments.each!...
505
+ end
506
+
507
+ def unrecord_hit_details_in_csv_row(csv_row)
508
+ %w(hit_expires_at hit_assignments_duration).each{|key| csv_row.delete(key) }
509
+ end
510
+
511
+ def unrecord_hit_in_csv_row(csv_row)
512
+ unrecord_hit_details_in_csv_row(csv_row)
513
+ csv_row.delete('hit_id')
514
+ end
515
+
516
+ def transcript_filename
517
+ {
518
+ :done => 'transcript.html',
519
+ :working => 'transcript_in_progress.html'
520
+ }
521
+ end
522
+
523
+ def hits_by_url(hits)
524
+ Hash[ *hits.map{|hit| [hit.url, hit] }.flatten ]
525
+ end
526
+
527
+
528
+ def check_interrupted_uploads(assignments, property)
529
+ assignments.each! do |assignment|
530
+ if assignment["#{property}_uploaded"].to_s == 'maybe'
531
+ assignment["#{property}_uploaded"] = (Typingpool::Utility.working_url? assignment["#{property}_url"]) ? 'yes' : 'no'
532
+ end
533
+ end #assignments.each!...
534
+ end
535
+ end #class << self
536
+ module FriendlyExceptions
537
+ #Massages terse exceptions from our model layer into a
538
+ #human-friendly message suitable for an abort message from a
539
+ #command-line script.
540
+ # ==== Params
541
+ # [name] A string used to refer to the input. For example
542
+ # 'project title' or '--config argument'. Used in the
543
+ # goodbye message.
544
+ # [*input] One or more values. The user input that will cause
545
+ # any exceptions. Used in the goodbye message.
546
+ # [&block] The block to execute and monitor for
547
+ # exceptions. Will be passed [*input].
548
+ # ==== Errors
549
+ # Will abort with a friendly message on any exception of the
550
+ # type Typingpool::Error::Argument.
551
+ # ==== Returns
552
+ # The return value of &block.
553
+ def with_friendly_exceptions(name, *input)
554
+ begin
555
+ yield(*input)
556
+ rescue Typingpool::Error::Argument => exception
557
+ goodbye = "Could not make sense of #{name.to_s} "
558
+ goodbye += input.map{|input| "'#{input}'" }.join(', ')
559
+ goodbye += ". #{exception.message}"
560
+ goodbye += '.' unless goodbye.match(/\.$/)
561
+ abort goodbye
562
+ end #begin
563
+ end
564
+ end #FriendlyExceptions
565
+ module CLI
566
+ class << self
567
+ include App::FriendlyExceptions
568
+ #Optionally takes an ostensible path to a config file, as passed
569
+ #as a command-line option. Checks to make sure the file exists;
570
+ #returns nil if does not, returns a Config instance if it
571
+ #does. If no path is passed, the default config file is returned
572
+ #(as retrieved by Config.file with no args).
573
+ def config_from_arg(arg=nil)
574
+ if arg
575
+ path = File.expand_path(arg)
576
+ return unless File.exists?(path) && File.file?(path)
577
+ Config.file(path)
578
+ else
579
+ Config.file
580
+ end #if option
581
+ end
582
+
583
+ #Outputs a friendly explanation of the --help option for
584
+ #appending to script usage banners.
585
+ def help_arg_explanation
586
+ "`#{File.basename($PROGRAM_NAME)} --help` for more information."
587
+ end
588
+
589
+ #Converts a user arg into a Project instance, setting up or
590
+ #consulting a Config along the way.
591
+ # ==== Params
592
+ # [arg] A user-supplied argument specifying either an absolute
593
+ # path to a Project folder (Project#local) or the
594
+ # name of a project folder within
595
+ # [config]#transcripts.
596
+ # [config] A Config instance. If [arg] is an absolute path,
597
+ # will be modified -- Config#itranscripts will be
598
+ # changed to match the implied transcripts dir.
599
+ # ==== Errors
600
+ # Will abort with a friendly message on any errors.
601
+ # ==== Returns
602
+ # A Project instance.
603
+ def project_from_arg_and_config(arg, config)
604
+ path = if (File.exists?(arg) && File.directory?(arg))
605
+ config.transcripts = File.dirname(arg)
606
+ arg
607
+ else
608
+ abort "No 'transcripts' dir specified in your config file and '#{arg}' is not a valid path" unless config.transcripts
609
+ path = File.join(config.transcripts, arg)
610
+ abort "No such project '#{arg}' in dir '#{config.transcripts}'" unless File.exists? path
611
+ abort "'#{arg}' is not a directory at '#{path}'" unless File.directory? path
612
+ path
613
+ end
614
+ project = with_friendly_exceptions('project name', File.basename(path)) do
615
+ Typingpool::Project.new(File.basename(path), config)
616
+ end
617
+ abort "Not a project directory at '#{path}'" unless project.local
618
+ project
619
+ end
620
+
621
+ end #class << self
622
+ module Formatter
623
+ require 'highline/import'
624
+ def cli_bold(text)
625
+ HighLine.color(text, :bold)
626
+ end
627
+
628
+ def cli_reverse(text)
629
+ HighLine.color(text, :reverse)
630
+ end
631
+ end #Formatter
632
+ end #CLI
633
+ end #App
634
+ end #Typingpool