typingpool 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
@@ -0,0 +1,634 @@
1
+ module Typingpool
2
+ #Module encapsulating high-level Typingpool procedures and called
3
+ #from the various tp-* scripts. Control layer type code.
4
+ #
5
+ #This is the least mature Typingpool class. At present, all methods
6
+ #are class methods. This will likely change to a model in which
7
+ #different subclasses of App instances do everything from parsing
8
+ #and validating command-line input to completing core functionality
9
+ #to outputing context-dependent result summaries.
10
+ #
11
+ #As such, all App methods should be considered fluid and likely to
12
+ #change in subsequent releases.
13
+ module App
14
+ require 'vcr'
15
+ require 'stringio'
16
+ require 'open3'
17
+ class << self
18
+
19
+ #Given a Project instance, figures out which audio chunks, if
20
+ #any, need to be uploaded and uploads them.
21
+ #
22
+ #Note that this method is sensitive to the possibility of
23
+ #interrupted batch uploads. It checks for previously interrupted
24
+ #uploads at the start to see if it needs to re-try them, and
25
+ #writes out what uploads it is attempting prior to beginning the
26
+ #upload in case the upload is interrupted by an exception.
27
+ #
28
+ #As such, any script calling this method can usually be simply
29
+ #re-run to re-attempt the upload.
30
+ #
31
+ #Reads and writes from a Filer::CSV instance passed as the
32
+ #second param, intended to link to a file like
33
+ #Project#local#file('data', 'assignment.csv')
34
+ #
35
+ #Returns an array of urls corresponding to uploaded files. If no
36
+ #files were uploaded, the array will be empty
37
+ # ==== Params
38
+ # [project] A Project instance.
39
+ # [&block] Optional. A block that will be called at the
40
+ # beginning of each file upload and passed
41
+ # the local path to the file and the remote
42
+ # name of the file.
43
+ # ==== Returns
44
+ # An array of URLs of the uploaded audio files.
45
+ def upload_audio_for_project(project)
46
+ #we don't make any provision for reading/writing from
47
+ #sandbox-assignment.csv because audio upload data in such files is
48
+ #effectively ignored
49
+ assignments_file = project.local.file('data', 'assignment.csv').as(:csv)
50
+ check_interrupted_uploads(assignments_file, 'audio')
51
+ uploading = assignments_file.reject{|assignment| assignment['audio_uploaded'] == 'yes' }
52
+ return uploading if uploading.empty?
53
+ files = uploading.map{|assignment| Typingpool::Project.local_basename_from_url(assignment['audio_url']) }
54
+ files.map!{|basename| project.local.file('audio', 'chunks', basename).as(:audio) }
55
+ files = Typingpool::Filer::Files.new(files)
56
+ remote_files = with_abort_on_url_mismatch('audio') do
57
+ uploading.map{|assignment| project.remote.url_basename(assignment['audio_url']) }
58
+ end
59
+ #Record that we're uploading so we'll know later if something
60
+ #goes wrong
61
+ record_assignment_upload_status(assignments_file, uploading, ['audio'], 'maybe')
62
+ project.remote.put(files.to_streams, remote_files) do |file, as|
63
+ yield(file, as) if block_given?
64
+ end
65
+ assignments_files = [assignments_file]
66
+ record_assignment_upload_status(assignments_file, uploading, ['audio'], 'yes')
67
+ uploading.map{|assignment| assignment['audio_url'] }
68
+ end
69
+
70
+ #For a subset of a Project instance's chunks/assignments,
71
+ #uploads assignment html that is used as the external question
72
+ #for a Mechanical Turk HIT.
73
+ #
74
+ #Takes the same precautions around interrupted network uploads
75
+ #as upload_audio_for_project.
76
+ #
77
+ #The URL of each uploaded assignment is written into
78
+ #Project#local.file('data', 'assignment.csv'), along with
79
+ #metadata confirming that the upload completed.
80
+ #
81
+ # ==== Params
82
+ # [project] A Project instance.
83
+ # [assignments_file] A Filer::CSV instance from which
84
+ # assignments_uploading were drawn. The
85
+ # upload status will be written and
86
+ # tracked here.
87
+ # [assignments_uploading] An enumerable collection of hashes
88
+ # corresponding to rows in
89
+ # Project#local.file('data',
90
+ # 'assignment.csv'). Only assignments
91
+ # whose URLs are contained in these
92
+ # hashes will be uploaded.
93
+ # [template] A Template::Assignment instance. Used to render
94
+ # assignments_uploading into HTML prior
95
+ # to uploading.
96
+ # ==== Returns
97
+ # An array of URLs of the uploaded assignments
98
+ def upload_html_for_project_assignments(project, assignments_file, assignments_uploading, template)
99
+ ios = assignments_uploading.map{|assignment| StringIO.new(template.render(assignment)) }
100
+ remote_basenames = assignments_uploading.map do |assignment|
101
+ File.basename(project.class.local_basename_from_url(assignment['audio_url']), '.*') + '.html'
102
+ end
103
+ remote_names = project.create_remote_names(remote_basenames)
104
+ urls = remote_names.map{|name| project.remote.file_to_url(name) }
105
+ #record upload URLs ahead of time so we can roll back later if the
106
+ #upload fails halfway through
107
+ record_assignment_urls(assignments_file, assignments_uploading, 'assignment', urls)
108
+ record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'maybe')
109
+ project.remote.put(ios, remote_names)
110
+ record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'yes')
111
+ urls
112
+ end
113
+
114
+ #Removes one or more types of remote files -- audio, assignment
115
+ #html, etc. -- associated with a subset of a Project instance's
116
+ #chunks/assignments.
117
+ #
118
+ #Writes to Project#local.file('data', 'assignment.csv') to
119
+ #reflect these changes.
120
+ #
121
+ #As with upload_audio_for_project, this method is sensitive to
122
+ #the possibility of interrupted batch operations over the
123
+ #network. This means
124
+ # 1. It handles deleting files that *might* have been uploaded,
125
+ # trapping any exceptions that arise if such files do not exist
126
+ # on the remote server.
127
+ # 2. It writes out what deletions it is attempting before
128
+ # attempting them, so that if the deletion operation is
129
+ # interrupted by an exception, the files will be clearly marked
130
+ # in an unknown state.
131
+ #
132
+ # ==== Params
133
+ # [project] A Project instance.
134
+ # [assignments_file] A Filer::CSV instance from which
135
+ # assignments_updeleting were
136
+ # drawn. The upload status will be
137
+ # written and tracked here.
138
+ # [assignments_updeleting] An enumerable collection of hashes
139
+ # corresponding to selected rows in
140
+ # Project#local#file('data',
141
+ # 'assignment.csv'). Only assets whose
142
+ # URLs are contained in these hashes
143
+ # will be deleted.
144
+ # [types] Optional. An array of asset
145
+ # 'types'. The default, ['audio',
146
+ # 'assignment'], means assets at
147
+ # assignment['audio_url'] and
148
+ # assignment['assignment_url'] will be
149
+ # deleted for each assignment hash in
150
+ # assignments_updeleting and that
151
+ # upload status will be tracked in
152
+ # assignment['audio_uploaded'] and
153
+ # assignment['assignment_uploaded'].
154
+ # [&block] Optional. A code block that will be
155
+ # called with the name of the remote
156
+ # file just before the delete is
157
+ # carried out.
158
+ # ==== Returns
159
+ # A count of how many items were actually removed from the
160
+ # server.
161
+ def updelete_assignment_assets(project, assignments_file, assignments_updeleting=assignments_file, types=['audio', 'assignment'])
162
+ assignments_updeleting = assignments_updeleting.select do |assignment|
163
+ types.select do |type|
164
+ assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
165
+ end.count > 0
166
+ end.flatten #assignments_updeleting.select...
167
+ urls_updeleting = assignments_updeleting.map do |assignment|
168
+ types.select do |type|
169
+ assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
170
+ end.map{|type| assignment["#{type}_url"] }.select{|url| url }
171
+ end.flatten #assignments_updeleting.map...
172
+ return 0 if urls_updeleting.empty?
173
+ missing = []
174
+ record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'maybe')
175
+ begin
176
+ with_abort_on_url_mismatch do
177
+ project.remote.remove_urls(urls_updeleting){|file| yield(file) if block_given? }
178
+ end
179
+ rescue Typingpool::Error::File::Remote => exception
180
+ others = []
181
+ exception.message.split('; ').each do |message|
182
+ if message.match(/no such file/i)
183
+ missing.push(message)
184
+ else
185
+ others.push(message)
186
+ end
187
+ end #messages.each...
188
+ raise Error, "Can't remove files: #{others.join('; ')}" if others.count > 0
189
+ end #begin
190
+ record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'no')
191
+ urls_updeleting.count - missing.count
192
+ end
193
+
194
+ #Given a collection of Amazon::HITs, looks for Project folders
195
+ #on the local system waiting to "receive" those HITs. Such
196
+ #folders are kept in Config#transcripts. Returns Project
197
+ #instances associated with those folders, bundled together
198
+ #with the related HITs (see below for the exact format of the
199
+ #return value).
200
+ # ==== Params
201
+ # [hits] An enumerable collection of Amazon::HIT instances.
202
+ # [config] A Config instance.
203
+ # [&block] Optional. A block, if supplied, will be called
204
+ # repeatedly, each time being passed a different
205
+ # Project instance and an array of Amazon::HIT
206
+ # instances, corresponding to the subset of [hits]
207
+ # belonging to the Project.
208
+ # ==== Returns
209
+ # An array of hashes of the form {:project => project, :hits
210
+ # =>[hit1,hit2...]}.
211
+ def find_projects_waiting_for_hits(hits, config)
212
+ need = {}
213
+ by_project_id = {}
214
+ hits.each do |hit|
215
+ if need[hit.project_id]
216
+ by_project_id[hit.project_id][:hits].push(hit)
217
+ elsif need[hit.project_id] == false
218
+ next
219
+ else
220
+ need[hit.project_id] = false
221
+ project = Typingpool::Project.new(hit.project_title_from_url, config)
222
+ next unless project.local && (project.local.id == hit.project_id)
223
+ next if File.exists? project.local.file(transcript_filename[:done])
224
+ by_project_id[hit.project_id] = {
225
+ :project => project,
226
+ :hits => [hit]
227
+ }
228
+ need[hit.project_id] = true
229
+ end #if need[hit.project_id]
230
+ end #hits.each do...
231
+ if block_given?
232
+ by_project_id.values.each{|hash| yield(hash[:project], hash[:hits]) }
233
+ end
234
+ by_project_id.keys.sort.map{|key| by_project_id[key] }
235
+ end
236
+
237
+ #Given a Project and assignments file like
238
+ #Project#local#file('data', 'assignments.csv'), writes an HTML
239
+ #transcript for that project within the local project folder
240
+ #(Project#local). To do so, uses data from within Project#local,
241
+ #in particular the data dir and in particular within that the
242
+ #assignment.csv file.
243
+ # ==== Params
244
+ # [project] A Project instance.
245
+ # [assignments_file] A Filer::CSV instance
246
+ # corresponding to a file like
247
+ # Project#local#file('data',
248
+ # 'assignment.csv').
249
+ # [config] Optional. A Config instance. If not supplied, will
250
+ # use Project#config. Used to find the
251
+ # transcript template (Config#templates is
252
+ # examined).
253
+ # ==== Returns
254
+ # Path to the resulting HTML transcript file.
255
+ def create_transcript(project, assignments_file, config=project.config)
256
+ transcript_chunks = assignments_file.select{|assignment| assignment['transcript']}.map do |assignment|
257
+ chunk = Typingpool::Transcript::Chunk.new(assignment['transcript'])
258
+ chunk.url = assignment['audio_url']
259
+ chunk.project = assignment['project_id']
260
+ chunk.worker = assignment['worker']
261
+ chunk.hit = assignment['hit_id']
262
+ chunk
263
+ end #...map do |assignment|
264
+ transcript = Typingpool::Transcript.new(project.name, transcript_chunks)
265
+ transcript.subtitle = project.local.subtitle
266
+ done = (transcript.to_a.length == project.local.subdir('audio', 'chunks').to_a.size)
267
+ out_file = done ? transcript_filename[:done] : transcript_filename[:working]
268
+ begin
269
+ template ||= Template.from_config('transcript', config)
270
+ rescue Error::File::NotExists => e
271
+ abort "Couldn't find the template dir in your config file: #{e}"
272
+ rescue Error => e
273
+ abort "There was a fatal error with the transcript template: #{e}"
274
+ end #begin
275
+ File.delete(project.local.file(transcript_filename[:working])) if File.exists?(project.local.file(transcript_filename[:working]))
276
+ File.open(project.local.file(out_file), 'w') do |out|
277
+ out << template.render({:transcript => transcript})
278
+ end #File.open...
279
+ out_file
280
+ end
281
+
282
+ #Creates the file Project#local#file('data',
283
+ #'sandbox-assignments.csv') if it doesn't exist. Populates the
284
+ #file by copying over Project#local#file('data',
285
+ #'assignment.csv') and stripping it of HIT and assignment_url
286
+ #data.
287
+ #
288
+ #Always returns a Filer::CSV instance linked to
289
+ #sandbox-assignmens.csv.
290
+ def ensure_sandbox_assignment_csv(project)
291
+ csv = project.local.file('data', 'sandbox-assignment.csv').as(:csv)
292
+ return csv if File.exists? csv
293
+ raise Error, "No assignment CSV to copy" unless File.exists? project.local.file('data', 'assignment.csv')
294
+ csv.write(
295
+ project.local.file('data', 'assignment.csv').as(:csv).map do |assignment|
296
+ unrecord_hit_in_csv_row(assignment)
297
+ assignment.delete('assignment_url')
298
+ assignment.delete('assignment_uploaded')
299
+ assignment
300
+ end #project.local.file('data', 'assignment.csv').as(:csv) map...
301
+ )
302
+ csv
303
+ end
304
+
305
+ #Takes Project instance and a boolean indicating whether we're
306
+ #working in the Amazon sandbox. Returns a Filer::CSV instance
307
+ #corresponding to the appropriate assignments file,
308
+ #e.g. Project#local#file('data', 'assignments.csv')#as(:csv).
309
+ def assignments_file_for_sandbox_status(sandbox, project)
310
+ if sandbox
311
+ ensure_sandbox_assignment_csv(project)
312
+ else
313
+ project.local.file('data', 'assignment.csv').as(:csv)
314
+ end
315
+ end
316
+
317
+ #Extracts relevant information from a collection of
318
+ #just-assigned Amazon::HITs and writes it into the Project's
319
+ #assignment CSV file for future use.
320
+ # ==== Params
321
+ # [assignments_file] A Filer::CSV instance
322
+ # corresponding to a file like
323
+ # Project#local#file('data',
324
+ # 'assignment.csv').
325
+ # [hits] An enumerable collection of Amazon::HIT instances that
326
+ # were just assigned (that is, that have one
327
+ # assignment, which has a blank status).
328
+ def record_assigned_hits_in_assignments_file(assignments_file, hits)
329
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
330
+ csv_row['hit_id'] = hit.id
331
+ csv_row['hit_expires_at'] = hit.full.expires_at.to_s
332
+ csv_row['hit_assignments_duration'] = hit.full.assignments_duration.to_s
333
+ end #record_hits_in_project do....
334
+ end
335
+
336
+ #Extracts relevant information from a collection of
337
+ #just-approved Amazon::HITs and writes it into the Project's
338
+ #assignment CSV file (Project#local#file('data', 'assignment.csv')) for
339
+ #future use.
340
+ # ==== Params
341
+ # [assignments_file] A Filer::CSV instance
342
+ # corresponding to a file like
343
+ # Project#local#file('data',
344
+ # 'assignment.csv').
345
+ # [hits] An enumerable collection of Amazon::HIT instances whose
346
+ # one assignment has the status 'Approved'.
347
+ def record_approved_hits_in_assignments_file(assignments_file, hits)
348
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
349
+ next if csv_row['transcript']
350
+ csv_row['transcript'] = hit.transcript.body
351
+ csv_row['worker'] = hit.transcript.worker
352
+ csv_row['hit_id'] = hit.id
353
+ end #record_hits_in_project do...
354
+ end
355
+
356
+ #Given a Project instance and an array of modified assignment
357
+ #hashes previously retrieved from the Project's assignment CSV
358
+ #(Project#local#file('data', 'assignment.csv')), writes the
359
+ #'assignment_url' property of each modified hash back to the
360
+ #corresponding row in the original CSV.
361
+ # def record_assignment_urls_in_project(project, assignments)
362
+ # assignments_by_audio_url = Hash[ *assignments.map{|assignment| [assignment['aud#io_url'], assignment] }.flatten ]
363
+ # project.local.file('data', 'assignment.csv').as(:csv).each! do |csv_row|
364
+ # assignment = assignments_by_audio_url[csv_row['audio_url']] or next
365
+ # csv_row['assignment_url'] = assignment['assignment_url']
366
+ # end
367
+ # end
368
+
369
+ #Erases all mention of the given Amazon::HITs from one of the
370
+ #Project's assignment CSV files. Typically used when rejecting a
371
+ #HIT assignment.
372
+ # ==== Params
373
+ # [assignments_file] A Filer::CSV instance
374
+ # corresponding to a file like
375
+ # Project#local#file('data',
376
+ # 'assignment.csv').
377
+ # [hits] An enumerable collection of Amazon::HIT instances to be
378
+ # deleted.
379
+ def unrecord_hits_in_assignments_file(assignments_file, hits)
380
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
381
+ unrecord_hit_in_csv_row(csv_row)
382
+ end
383
+ end
384
+
385
+ #Erases particular details of a subset or all of a Project's
386
+ #Amazon::HITs from one of the Project's assignment CSV files.
387
+ #
388
+ #Specifically, deletes information about the HIT's
389
+ #expires_at, and assignments_duration.
390
+ #
391
+ #Typically used when some or all of a Project's HITs have been
392
+ #processed and incorporated into a transcript and are not needed
393
+ #any more as Amazon::HITs on Amazon servers, but when we still
394
+ #want to retain the HIT ids in the Project assignment CSV.
395
+ # ==== Params
396
+ # [assignments_file] A Filer::CSV instance
397
+ # corresponding to a file like
398
+ # Project#local#file('data',
399
+ # 'assignment.csv').
400
+ # [hits] Optional. An enumerable collection of Amazon::HIT
401
+ # instances whose details are to be
402
+ # deleted. If not supplied, details for ALL
403
+ # HITs in the Project assignment CSV will be
404
+ # deleted.
405
+ def unrecord_hits_details_in_assignments_file(assignments_file, hits=nil)
406
+ record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
407
+ unrecord_hit_details_in_csv_row(csv_row)
408
+ end
409
+ end
410
+
411
+ #Checks for Typingpool's external dependencies. If they appear
412
+ #to missing, yields to the passed block an array containing the
413
+ #name of missing commands/packages (e.g. ffmpeg).
414
+ def if_missing_dependencies
415
+ #TODO: Test on Linux
416
+ missing = []
417
+ [['ffmpeg','-version'], ['mp3splt', '-v'], ['mp3wrap']].each do |cmdline|
418
+ begin
419
+ out, err, status = Open3.capture3(*cmdline)
420
+ rescue
421
+ missing.push(cmdline.first)
422
+ end #begin
423
+ end #...].each do |cmdline|
424
+ yield(missing) unless missing.empty?
425
+ end
426
+
427
+ #Begins recording of an HTTP mock fixture (for automated
428
+ #testing) using the great VCR gem. Automatically filters your
429
+ #Config#amazon#key and Config#amazon#secret from the recorded
430
+ #fixture, and automatically determines the "cassette" name and
431
+ #"cassette library" dir from the supplied path.
432
+ # ==== Params
433
+ # [fixture_path] Path to where you want the HTTP fixture
434
+ # recorded, including filename.
435
+ # [config] A Config instance, used to extract the
436
+ # Config#amazon#secret and Config#amazon#key that
437
+ # will be filtered from the fixture.
438
+ # ==== Returns
439
+ # Result of calling VCR.insert_cassette.
440
+ def vcr_record(fixture_path, config)
441
+ VCR.configure do |c|
442
+ c.cassette_library_dir = File.dirname(fixture_path)
443
+ c.hook_into :webmock
444
+ c.filter_sensitive_data('<AWS_KEY>'){ config.amazon.key }
445
+ c.filter_sensitive_data('<AWS_SECRET>'){ config.amazon.secret }
446
+ end
447
+ VCR.insert_cassette(File.basename(fixture_path, '.*'), :record => :new_episodes)
448
+ end
449
+
450
+ #Stops recording of the last call to vcr_record. Returns the
451
+ #result of VCR.eject_cassette.
452
+ def vcr_stop
453
+ VCR.eject_cassette
454
+ end
455
+
456
+ #protected
457
+
458
+ def with_abort_on_url_mismatch(url_type='')
459
+ url_type += ' '
460
+ begin
461
+ yield
462
+ rescue Typingpool::Error => exception
463
+ if exception.message.match(/not find base url/i)
464
+ abort "Previously recorded #{url_type}URLs don\'t look right. Are you using the right config file? You may have passed in a --config argument to a previous script and forgotten to do so now."
465
+ else
466
+ raise exception
467
+ end
468
+ end #begin
469
+ end
470
+
471
+ def record_hits_in_assignments_file(assignments_file, hits)
472
+ hits_by_url = self.hits_by_url(hits) if hits
473
+ assignments_file.each! do |csv_row|
474
+ hit = nil
475
+ if hits
476
+ hit = hits_by_url[csv_row['audio_url']] or next
477
+ end
478
+ yield(hit, csv_row)
479
+ end
480
+ end
481
+
482
+ def record_assignment_upload_status(assignments, uploading, types, status)
483
+ record_in_selected_assignments(assignments, uploading) do |assignment|
484
+ types.each do |type|
485
+ assignment["#{type}_uploaded"] = status if assignment["#{type}_url"]
486
+ end
487
+ end #record_in_selected_assignments...
488
+ end
489
+
490
+ def record_assignment_urls(assignments, uploading, type, urls)
491
+ i = 0
492
+ record_in_selected_assignments(assignments, uploading) do |assignment|
493
+ assignment["#{type}_url"] = urls[i]
494
+ i += 1
495
+ end #record_in_selected_assignments...
496
+ end
497
+
498
+ def record_in_selected_assignments(assignments, selected)
499
+ selected_by_url = Hash[ *selected.map{|assignment| [assignment['audio_url'], assignment] }.flatten ]
500
+ assignments.each! do |assignment|
501
+ if selected_by_url[assignment['audio_url']]
502
+ yield(assignment)
503
+ end #if uploading...
504
+ end #assignments.each!...
505
+ end
506
+
507
+ def unrecord_hit_details_in_csv_row(csv_row)
508
+ %w(hit_expires_at hit_assignments_duration).each{|key| csv_row.delete(key) }
509
+ end
510
+
511
+ def unrecord_hit_in_csv_row(csv_row)
512
+ unrecord_hit_details_in_csv_row(csv_row)
513
+ csv_row.delete('hit_id')
514
+ end
515
+
516
+ def transcript_filename
517
+ {
518
+ :done => 'transcript.html',
519
+ :working => 'transcript_in_progress.html'
520
+ }
521
+ end
522
+
523
+ def hits_by_url(hits)
524
+ Hash[ *hits.map{|hit| [hit.url, hit] }.flatten ]
525
+ end
526
+
527
+
528
+ def check_interrupted_uploads(assignments, property)
529
+ assignments.each! do |assignment|
530
+ if assignment["#{property}_uploaded"].to_s == 'maybe'
531
+ assignment["#{property}_uploaded"] = (Typingpool::Utility.working_url? assignment["#{property}_url"]) ? 'yes' : 'no'
532
+ end
533
+ end #assignments.each!...
534
+ end
535
+ end #class << self
536
+ module FriendlyExceptions
537
+ #Massages terse exceptions from our model layer into a
538
+ #human-friendly message suitable for an abort message from a
539
+ #command-line script.
540
+ # ==== Params
541
+ # [name] A string used to refer to the input. For example
542
+ # 'project title' or '--config argument'. Used in the
543
+ # goodbye message.
544
+ # [*input] One or more values. The user input that will cause
545
+ # any exceptions. Used in the goodbye message.
546
+ # [&block] The block to execute and monitor for
547
+ # exceptions. Will be passed [*input].
548
+ # ==== Errors
549
+ # Will abort with a friendly message on any exception of the
550
+ # type Typingpool::Error::Argument.
551
+ # ==== Returns
552
+ # The return value of &block.
553
+ def with_friendly_exceptions(name, *input)
554
+ begin
555
+ yield(*input)
556
+ rescue Typingpool::Error::Argument => exception
557
+ goodbye = "Could not make sense of #{name.to_s} "
558
+ goodbye += input.map{|input| "'#{input}'" }.join(', ')
559
+ goodbye += ". #{exception.message}"
560
+ goodbye += '.' unless goodbye.match(/\.$/)
561
+ abort goodbye
562
+ end #begin
563
+ end
564
+ end #FriendlyExceptions
565
+ module CLI
566
+ class << self
567
+ include App::FriendlyExceptions
568
+ #Optionally takes an ostensible path to a config file, as passed
569
+ #as a command-line option. Checks to make sure the file exists;
570
+ #returns nil if does not, returns a Config instance if it
571
+ #does. If no path is passed, the default config file is returned
572
+ #(as retrieved by Config.file with no args).
573
+ def config_from_arg(arg=nil)
574
+ if arg
575
+ path = File.expand_path(arg)
576
+ return unless File.exists?(path) && File.file?(path)
577
+ Config.file(path)
578
+ else
579
+ Config.file
580
+ end #if option
581
+ end
582
+
583
+ #Outputs a friendly explanation of the --help option for
584
+ #appending to script usage banners.
585
+ def help_arg_explanation
586
+ "`#{File.basename($PROGRAM_NAME)} --help` for more information."
587
+ end
588
+
589
+ #Converts a user arg into a Project instance, setting up or
590
+ #consulting a Config along the way.
591
+ # ==== Params
592
+ # [arg] A user-supplied argument specifying either an absolute
593
+ # path to a Project folder (Project#local) or the
594
+ # name of a project folder within
595
+ # [config]#transcripts.
596
+ # [config] A Config instance. If [arg] is an absolute path,
597
+ # will be modified -- Config#itranscripts will be
598
+ # changed to match the implied transcripts dir.
599
+ # ==== Errors
600
+ # Will abort with a friendly message on any errors.
601
+ # ==== Returns
602
+ # A Project instance.
603
+ def project_from_arg_and_config(arg, config)
604
+ path = if (File.exists?(arg) && File.directory?(arg))
605
+ config.transcripts = File.dirname(arg)
606
+ arg
607
+ else
608
+ abort "No 'transcripts' dir specified in your config file and '#{arg}' is not a valid path" unless config.transcripts
609
+ path = File.join(config.transcripts, arg)
610
+ abort "No such project '#{arg}' in dir '#{config.transcripts}'" unless File.exists? path
611
+ abort "'#{arg}' is not a directory at '#{path}'" unless File.directory? path
612
+ path
613
+ end
614
+ project = with_friendly_exceptions('project name', File.basename(path)) do
615
+ Typingpool::Project.new(File.basename(path), config)
616
+ end
617
+ abort "Not a project directory at '#{path}'" unless project.local
618
+ project
619
+ end
620
+
621
+ end #class << self
622
+ module Formatter
623
+ require 'highline/import'
624
+ def cli_bold(text)
625
+ HighLine.color(text, :bold)
626
+ end
627
+
628
+ def cli_reverse(text)
629
+ HighLine.color(text, :reverse)
630
+ end
631
+ end #Formatter
632
+ end #CLI
633
+ end #App
634
+ end #Typingpool