typingpool 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +23 -0
- data/bin/tp-assign +240 -0
- data/bin/tp-collect +50 -0
- data/bin/tp-config +114 -0
- data/bin/tp-finish +101 -0
- data/bin/tp-make +169 -0
- data/bin/tp-review +175 -0
- data/lib/typingpool/amazon.rb +732 -0
- data/lib/typingpool/app.rb +634 -0
- data/lib/typingpool/config.rb +344 -0
- data/lib/typingpool/error.rb +22 -0
- data/lib/typingpool/filer.rb +396 -0
- data/lib/typingpool/project.rb +593 -0
- data/lib/typingpool/template.rb +175 -0
- data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
- data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
- data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
- data/lib/typingpool/templates/assignment/main.css +20 -0
- data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
- data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
- data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
- data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
- data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
- data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
- data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
- data/lib/typingpool/templates/config.yml +21 -0
- data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
- data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
- data/lib/typingpool/templates/project/data/.empty_directory +0 -0
- data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
- data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
- data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
- data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
- data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
- data/lib/typingpool/templates/project/etc/transcript.css +49 -0
- data/lib/typingpool/templates/transcript.html.erb +23 -0
- data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
- data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
- data/lib/typingpool/test/fixtures/config-1 +20 -0
- data/lib/typingpool/test/fixtures/config-2 +25 -0
- data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
- data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
- data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
- data/lib/typingpool/test/fixtures/template.html.erb +10 -0
- data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
- data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
- data/lib/typingpool/test.rb +418 -0
- data/lib/typingpool/transcript.rb +181 -0
- data/lib/typingpool/utility.rb +272 -0
- data/lib/typingpool.rb +500 -0
- data/test/make_amazon_question_fixture.rb +24 -0
- data/test/make_tp_collect_fixture_1.rb +26 -0
- data/test/make_tp_collect_fixture_2.rb +16 -0
- data/test/make_tp_collect_fixture_3.rb +15 -0
- data/test/make_tp_collect_fixture_4.rb +17 -0
- data/test/make_tp_review_fixture_1.rb +26 -0
- data/test/make_tp_review_fixture_2.rb +30 -0
- data/test/make_transcript_chunks_fixture.rb +53 -0
- data/test/test_integration_script_1_tp_config.rb +108 -0
- data/test/test_integration_script_2_tp_make.rb +119 -0
- data/test/test_integration_script_3_tp_assign.rb +152 -0
- data/test/test_integration_script_4_tp_review.rb +72 -0
- data/test/test_integration_script_5_tp_collect.rb +44 -0
- data/test/test_integration_script_6_tp_finish.rb +123 -0
- data/test/test_unit_amazon.rb +153 -0
- data/test/test_unit_config.rb +94 -0
- data/test/test_unit_filer.rb +202 -0
- data/test/test_unit_project.rb +168 -0
- data/test/test_unit_project_local.rb +68 -0
- data/test/test_unit_project_remote.rb +157 -0
- data/test/test_unit_template.rb +111 -0
- data/test/test_unit_transcript.rb +77 -0
- metadata +234 -0
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
module Typingpool
|
|
2
|
+
#Module encapsulating high-level Typingpool procedures and called
|
|
3
|
+
#from the various tp-* scripts. Control layer type code.
|
|
4
|
+
#
|
|
5
|
+
#This is the least mature Typingpool class. At present, all methods
|
|
6
|
+
#are class methods. This will likely change to a model in which
|
|
7
|
+
#different subclasses of App instances do everything from parsing
|
|
8
|
+
#and validating command-line input to completing core functionality
|
|
9
|
+
#to outputing context-dependent result summaries.
|
|
10
|
+
#
|
|
11
|
+
#As such, all App methods should be considered fluid and likely to
|
|
12
|
+
#change in subsequent releases.
|
|
13
|
+
module App
|
|
14
|
+
require 'vcr'
|
|
15
|
+
require 'stringio'
|
|
16
|
+
require 'open3'
|
|
17
|
+
class << self
|
|
18
|
+
|
|
19
|
+
#Given a Project instance, figures out which audio chunks, if
|
|
20
|
+
#any, need to be uploaded and uploads them.
|
|
21
|
+
#
|
|
22
|
+
#Note that this method is sensitive to the possibility of
|
|
23
|
+
#interrupted batch uploads. It checks for previously interrupted
|
|
24
|
+
#uploads at the start to see if it needs to re-try them, and
|
|
25
|
+
#writes out what uploads it is attempting prior to beginning the
|
|
26
|
+
#upload in case the upload is interrupted by an exception.
|
|
27
|
+
#
|
|
28
|
+
#As such, any script calling this method can usually be simply
|
|
29
|
+
#re-run to re-attempt the upload.
|
|
30
|
+
#
|
|
31
|
+
#Reads and writes from a Filer::CSV instance passed as the
|
|
32
|
+
#second param, intended to link to a file like
|
|
33
|
+
#Project#local#file('data', 'assignment.csv')
|
|
34
|
+
#
|
|
35
|
+
#Returns an array of urls corresponding to uploaded files. If no
|
|
36
|
+
#files were uploaded, the array will be empty
|
|
37
|
+
# ==== Params
|
|
38
|
+
# [project] A Project instance.
|
|
39
|
+
# [&block] Optional. A block that will be called at the
|
|
40
|
+
# beginning of each file upload and passed
|
|
41
|
+
# the local path to the file and the remote
|
|
42
|
+
# name of the file.
|
|
43
|
+
# ==== Returns
|
|
44
|
+
# An array of URLs of the uploaded audio files.
|
|
45
|
+
def upload_audio_for_project(project)
|
|
46
|
+
#we don't make any provision for reading/writing from
|
|
47
|
+
#sandbox-assignment.csv because audio upload data in such files is
|
|
48
|
+
#effectively ignored
|
|
49
|
+
assignments_file = project.local.file('data', 'assignment.csv').as(:csv)
|
|
50
|
+
check_interrupted_uploads(assignments_file, 'audio')
|
|
51
|
+
uploading = assignments_file.reject{|assignment| assignment['audio_uploaded'] == 'yes' }
|
|
52
|
+
return uploading if uploading.empty?
|
|
53
|
+
files = uploading.map{|assignment| Typingpool::Project.local_basename_from_url(assignment['audio_url']) }
|
|
54
|
+
files.map!{|basename| project.local.file('audio', 'chunks', basename).as(:audio) }
|
|
55
|
+
files = Typingpool::Filer::Files.new(files)
|
|
56
|
+
remote_files = with_abort_on_url_mismatch('audio') do
|
|
57
|
+
uploading.map{|assignment| project.remote.url_basename(assignment['audio_url']) }
|
|
58
|
+
end
|
|
59
|
+
#Record that we're uploading so we'll know later if something
|
|
60
|
+
#goes wrong
|
|
61
|
+
record_assignment_upload_status(assignments_file, uploading, ['audio'], 'maybe')
|
|
62
|
+
project.remote.put(files.to_streams, remote_files) do |file, as|
|
|
63
|
+
yield(file, as) if block_given?
|
|
64
|
+
end
|
|
65
|
+
assignments_files = [assignments_file]
|
|
66
|
+
record_assignment_upload_status(assignments_file, uploading, ['audio'], 'yes')
|
|
67
|
+
uploading.map{|assignment| assignment['audio_url'] }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
#For a subset of a Project instance's chunks/assignments,
|
|
71
|
+
#uploads assignment html that is used as the external question
|
|
72
|
+
#for a Mechanical Turk HIT.
|
|
73
|
+
#
|
|
74
|
+
#Takes the same precautions around interrupted network uploads
|
|
75
|
+
#as upload_audio_for_project.
|
|
76
|
+
#
|
|
77
|
+
#The URL of each uploaded assignment is written into
|
|
78
|
+
#Project#local.file('data', 'assignment.csv'), along with
|
|
79
|
+
#metadata confirming that the upload completed.
|
|
80
|
+
#
|
|
81
|
+
# ==== Params
|
|
82
|
+
# [project] A Project instance.
|
|
83
|
+
# [assignments_file] A Filer::CSV instance from which
|
|
84
|
+
# assignments_uploading were drawn. The
|
|
85
|
+
# upload status will be written and
|
|
86
|
+
# tracked here.
|
|
87
|
+
# [assignments_uploading] An enumerable collection of hashes
|
|
88
|
+
# corresponding to rows in
|
|
89
|
+
# Project#local.file('data',
|
|
90
|
+
# 'assignment.csv'). Only assignments
|
|
91
|
+
# whose URLs are contained in these
|
|
92
|
+
# hashes will be uploaded.
|
|
93
|
+
# [template] A Template::Assignment instance. Used to render
|
|
94
|
+
# assignments_uploading into HTML prior
|
|
95
|
+
# to uploading.
|
|
96
|
+
# ==== Returns
|
|
97
|
+
# An array of URLs of the uploaded assignments
|
|
98
|
+
def upload_html_for_project_assignments(project, assignments_file, assignments_uploading, template)
|
|
99
|
+
ios = assignments_uploading.map{|assignment| StringIO.new(template.render(assignment)) }
|
|
100
|
+
remote_basenames = assignments_uploading.map do |assignment|
|
|
101
|
+
File.basename(project.class.local_basename_from_url(assignment['audio_url']), '.*') + '.html'
|
|
102
|
+
end
|
|
103
|
+
remote_names = project.create_remote_names(remote_basenames)
|
|
104
|
+
urls = remote_names.map{|name| project.remote.file_to_url(name) }
|
|
105
|
+
#record upload URLs ahead of time so we can roll back later if the
|
|
106
|
+
#upload fails halfway through
|
|
107
|
+
record_assignment_urls(assignments_file, assignments_uploading, 'assignment', urls)
|
|
108
|
+
record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'maybe')
|
|
109
|
+
project.remote.put(ios, remote_names)
|
|
110
|
+
record_assignment_upload_status(assignments_file, assignments_uploading, ['assignment'], 'yes')
|
|
111
|
+
urls
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
#Removes one or more types of remote files -- audio, assignment
|
|
115
|
+
#html, etc. -- associated with a subset of a Project instance's
|
|
116
|
+
#chunks/assignments.
|
|
117
|
+
#
|
|
118
|
+
#Writes to Project#local.file('data', 'assignment.csv') to
|
|
119
|
+
#reflect these changes.
|
|
120
|
+
#
|
|
121
|
+
#As with upload_audio_for_project, this method is sensitive to
|
|
122
|
+
#the possibility of interrupted batch operations over the
|
|
123
|
+
#network. This means
|
|
124
|
+
# 1. It handles deleting files that *might* have been uploaded,
|
|
125
|
+
# trapping any exceptions that arise if such files do not exist
|
|
126
|
+
# on the remote server.
|
|
127
|
+
# 2. It writes out what deletions it is attempting before
|
|
128
|
+
# attempting them, so that if the deletion operation is
|
|
129
|
+
# interrupted by an exception, the files will be clearly marked
|
|
130
|
+
# in an unknown state.
|
|
131
|
+
#
|
|
132
|
+
# ==== Params
|
|
133
|
+
# [project] A Project instance.
|
|
134
|
+
# [assignments_file] A Filer::CSV instance from which
|
|
135
|
+
# assignments_updeleting were
|
|
136
|
+
# drawn. The upload status will be
|
|
137
|
+
# written and tracked here.
|
|
138
|
+
# [assignments_updeleting] An enumerable collection of hashes
|
|
139
|
+
# corresponding to selected rows in
|
|
140
|
+
# Project#local#file('data',
|
|
141
|
+
# 'assignment.csv'). Only assets whose
|
|
142
|
+
# URLs are contained in these hashes
|
|
143
|
+
# will be deleted.
|
|
144
|
+
# [types] Optional. An array of asset
|
|
145
|
+
# 'types'. The default, ['audio',
|
|
146
|
+
# 'assignment'], means assets at
|
|
147
|
+
# assignment['audio_url'] and
|
|
148
|
+
# assignment['assignment_url'] will be
|
|
149
|
+
# deleted for each assignment hash in
|
|
150
|
+
# assignments_updeleting and that
|
|
151
|
+
# upload status will be tracked in
|
|
152
|
+
# assignment['audio_uploaded'] and
|
|
153
|
+
# assignment['assignment_uploaded'].
|
|
154
|
+
# [&block] Optional. A code block that will be
|
|
155
|
+
# called with the name of the remote
|
|
156
|
+
# file just before the delete is
|
|
157
|
+
# carried out.
|
|
158
|
+
# ==== Returns
|
|
159
|
+
# A count of how many items were actually removed from the
|
|
160
|
+
# server.
|
|
161
|
+
def updelete_assignment_assets(project, assignments_file, assignments_updeleting=assignments_file, types=['audio', 'assignment'])
|
|
162
|
+
assignments_updeleting = assignments_updeleting.select do |assignment|
|
|
163
|
+
types.select do |type|
|
|
164
|
+
assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
|
|
165
|
+
end.count > 0
|
|
166
|
+
end.flatten #assignments_updeleting.select...
|
|
167
|
+
urls_updeleting = assignments_updeleting.map do |assignment|
|
|
168
|
+
types.select do |type|
|
|
169
|
+
assignment["#{type}_uploaded"] == 'yes' || assignment["#{type}_uploaded"] == 'maybe'
|
|
170
|
+
end.map{|type| assignment["#{type}_url"] }.select{|url| url }
|
|
171
|
+
end.flatten #assignments_updeleting.map...
|
|
172
|
+
return 0 if urls_updeleting.empty?
|
|
173
|
+
missing = []
|
|
174
|
+
record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'maybe')
|
|
175
|
+
begin
|
|
176
|
+
with_abort_on_url_mismatch do
|
|
177
|
+
project.remote.remove_urls(urls_updeleting){|file| yield(file) if block_given? }
|
|
178
|
+
end
|
|
179
|
+
rescue Typingpool::Error::File::Remote => exception
|
|
180
|
+
others = []
|
|
181
|
+
exception.message.split('; ').each do |message|
|
|
182
|
+
if message.match(/no such file/i)
|
|
183
|
+
missing.push(message)
|
|
184
|
+
else
|
|
185
|
+
others.push(message)
|
|
186
|
+
end
|
|
187
|
+
end #messages.each...
|
|
188
|
+
raise Error, "Can't remove files: #{others.join('; ')}" if others.count > 0
|
|
189
|
+
end #begin
|
|
190
|
+
record_assignment_upload_status(assignments_file, assignments_updeleting, types, 'no')
|
|
191
|
+
urls_updeleting.count - missing.count
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
#Given a collection of Amazon::HITs, looks for Project folders
|
|
195
|
+
#on the local system waiting to "receive" those HITs. Such
|
|
196
|
+
#folders are kept in Config#transcripts. Returns Project
|
|
197
|
+
#instances associated with those folders, bundled together
|
|
198
|
+
#with the related HITs (see below for the exact format of the
|
|
199
|
+
#return value).
|
|
200
|
+
# ==== Params
|
|
201
|
+
# [hits] An enumerable collection of Amazon::HIT instances.
|
|
202
|
+
# [config] A Config instance.
|
|
203
|
+
# [&block] Optional. A block, if supplied, will be called
|
|
204
|
+
# repeatedly, each time being passed a different
|
|
205
|
+
# Project instance and an array of Amazon::HIT
|
|
206
|
+
# instances, corresponding to the subset of [hits]
|
|
207
|
+
# belonging to the Project.
|
|
208
|
+
# ==== Returns
|
|
209
|
+
# An array of hashes of the form {:project => project, :hits
|
|
210
|
+
# =>[hit1,hit2...]}.
|
|
211
|
+
def find_projects_waiting_for_hits(hits, config)
|
|
212
|
+
need = {}
|
|
213
|
+
by_project_id = {}
|
|
214
|
+
hits.each do |hit|
|
|
215
|
+
if need[hit.project_id]
|
|
216
|
+
by_project_id[hit.project_id][:hits].push(hit)
|
|
217
|
+
elsif need[hit.project_id] == false
|
|
218
|
+
next
|
|
219
|
+
else
|
|
220
|
+
need[hit.project_id] = false
|
|
221
|
+
project = Typingpool::Project.new(hit.project_title_from_url, config)
|
|
222
|
+
next unless project.local && (project.local.id == hit.project_id)
|
|
223
|
+
next if File.exists? project.local.file(transcript_filename[:done])
|
|
224
|
+
by_project_id[hit.project_id] = {
|
|
225
|
+
:project => project,
|
|
226
|
+
:hits => [hit]
|
|
227
|
+
}
|
|
228
|
+
need[hit.project_id] = true
|
|
229
|
+
end #if need[hit.project_id]
|
|
230
|
+
end #hits.each do...
|
|
231
|
+
if block_given?
|
|
232
|
+
by_project_id.values.each{|hash| yield(hash[:project], hash[:hits]) }
|
|
233
|
+
end
|
|
234
|
+
by_project_id.keys.sort.map{|key| by_project_id[key] }
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
#Given a Project and assignments file like
|
|
238
|
+
#Project#local#file('data', 'assignments.csv'), writes an HTML
|
|
239
|
+
#transcript for that project within the local project folder
|
|
240
|
+
#(Project#local). To do so, uses data from within Project#local,
|
|
241
|
+
#in particular the data dir and in particular within that the
|
|
242
|
+
#assignment.csv file.
|
|
243
|
+
# ==== Params
|
|
244
|
+
# [project] A Project instance.
|
|
245
|
+
# [assignments_file] A Filer::CSV instance
|
|
246
|
+
# corresponding to a file like
|
|
247
|
+
# Project#local#file('data',
|
|
248
|
+
# 'assignment.csv').
|
|
249
|
+
# [config] Optional. A Config instance. If not supplied, will
|
|
250
|
+
# use Project#config. Used to find the
|
|
251
|
+
# transcript template (Config#templates is
|
|
252
|
+
# examined).
|
|
253
|
+
# ==== Returns
|
|
254
|
+
# Path to the resulting HTML transcript file.
|
|
255
|
+
def create_transcript(project, assignments_file, config=project.config)
|
|
256
|
+
transcript_chunks = assignments_file.select{|assignment| assignment['transcript']}.map do |assignment|
|
|
257
|
+
chunk = Typingpool::Transcript::Chunk.new(assignment['transcript'])
|
|
258
|
+
chunk.url = assignment['audio_url']
|
|
259
|
+
chunk.project = assignment['project_id']
|
|
260
|
+
chunk.worker = assignment['worker']
|
|
261
|
+
chunk.hit = assignment['hit_id']
|
|
262
|
+
chunk
|
|
263
|
+
end #...map do |assignment|
|
|
264
|
+
transcript = Typingpool::Transcript.new(project.name, transcript_chunks)
|
|
265
|
+
transcript.subtitle = project.local.subtitle
|
|
266
|
+
done = (transcript.to_a.length == project.local.subdir('audio', 'chunks').to_a.size)
|
|
267
|
+
out_file = done ? transcript_filename[:done] : transcript_filename[:working]
|
|
268
|
+
begin
|
|
269
|
+
template ||= Template.from_config('transcript', config)
|
|
270
|
+
rescue Error::File::NotExists => e
|
|
271
|
+
abort "Couldn't find the template dir in your config file: #{e}"
|
|
272
|
+
rescue Error => e
|
|
273
|
+
abort "There was a fatal error with the transcript template: #{e}"
|
|
274
|
+
end #begin
|
|
275
|
+
File.delete(project.local.file(transcript_filename[:working])) if File.exists?(project.local.file(transcript_filename[:working]))
|
|
276
|
+
File.open(project.local.file(out_file), 'w') do |out|
|
|
277
|
+
out << template.render({:transcript => transcript})
|
|
278
|
+
end #File.open...
|
|
279
|
+
out_file
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
#Creates the file Project#local#file('data',
|
|
283
|
+
#'sandbox-assignments.csv') if it doesn't exist. Populates the
|
|
284
|
+
#file by copying over Project#local#file('data',
|
|
285
|
+
#'assignment.csv') and stripping it of HIT and assignment_url
|
|
286
|
+
#data.
|
|
287
|
+
#
|
|
288
|
+
#Always returns a Filer::CSV instance linked to
|
|
289
|
+
#sandbox-assignmens.csv.
|
|
290
|
+
def ensure_sandbox_assignment_csv(project)
|
|
291
|
+
csv = project.local.file('data', 'sandbox-assignment.csv').as(:csv)
|
|
292
|
+
return csv if File.exists? csv
|
|
293
|
+
raise Error, "No assignment CSV to copy" unless File.exists? project.local.file('data', 'assignment.csv')
|
|
294
|
+
csv.write(
|
|
295
|
+
project.local.file('data', 'assignment.csv').as(:csv).map do |assignment|
|
|
296
|
+
unrecord_hit_in_csv_row(assignment)
|
|
297
|
+
assignment.delete('assignment_url')
|
|
298
|
+
assignment.delete('assignment_uploaded')
|
|
299
|
+
assignment
|
|
300
|
+
end #project.local.file('data', 'assignment.csv').as(:csv) map...
|
|
301
|
+
)
|
|
302
|
+
csv
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
#Takes Project instance and a boolean indicating whether we're
|
|
306
|
+
#working in the Amazon sandbox. Returns a Filer::CSV instance
|
|
307
|
+
#corresponding to the appropriate assignments file,
|
|
308
|
+
#e.g. Project#local#file('data', 'assignments.csv')#as(:csv).
|
|
309
|
+
def assignments_file_for_sandbox_status(sandbox, project)
|
|
310
|
+
if sandbox
|
|
311
|
+
ensure_sandbox_assignment_csv(project)
|
|
312
|
+
else
|
|
313
|
+
project.local.file('data', 'assignment.csv').as(:csv)
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
#Extracts relevant information from a collection of
|
|
318
|
+
#just-assigned Amazon::HITs and writes it into the Project's
|
|
319
|
+
#assignment CSV file for future use.
|
|
320
|
+
# ==== Params
|
|
321
|
+
# [assignments_file] A Filer::CSV instance
|
|
322
|
+
# corresponding to a file like
|
|
323
|
+
# Project#local#file('data',
|
|
324
|
+
# 'assignment.csv').
|
|
325
|
+
# [hits] An enumerable collection of Amazon::HIT instances that
|
|
326
|
+
# were just assigned (that is, that have one
|
|
327
|
+
# assignment, which has a blank status).
|
|
328
|
+
def record_assigned_hits_in_assignments_file(assignments_file, hits)
|
|
329
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
|
330
|
+
csv_row['hit_id'] = hit.id
|
|
331
|
+
csv_row['hit_expires_at'] = hit.full.expires_at.to_s
|
|
332
|
+
csv_row['hit_assignments_duration'] = hit.full.assignments_duration.to_s
|
|
333
|
+
end #record_hits_in_project do....
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
#Extracts relevant information from a collection of
|
|
337
|
+
#just-approved Amazon::HITs and writes it into the Project's
|
|
338
|
+
#assignment CSV file (Project#local#file('data', 'assignment.csv')) for
|
|
339
|
+
#future use.
|
|
340
|
+
# ==== Params
|
|
341
|
+
# [assignments_file] A Filer::CSV instance
|
|
342
|
+
# corresponding to a file like
|
|
343
|
+
# Project#local#file('data',
|
|
344
|
+
# 'assignment.csv').
|
|
345
|
+
# [hits] An enumerable collection of Amazon::HIT instances whose
|
|
346
|
+
# one assignment has the status 'Approved'.
|
|
347
|
+
def record_approved_hits_in_assignments_file(assignments_file, hits)
|
|
348
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
|
349
|
+
next if csv_row['transcript']
|
|
350
|
+
csv_row['transcript'] = hit.transcript.body
|
|
351
|
+
csv_row['worker'] = hit.transcript.worker
|
|
352
|
+
csv_row['hit_id'] = hit.id
|
|
353
|
+
end #record_hits_in_project do...
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
#Given a Project instance and an array of modified assignment
|
|
357
|
+
#hashes previously retrieved from the Project's assignment CSV
|
|
358
|
+
#(Project#local#file('data', 'assignment.csv')), writes the
|
|
359
|
+
#'assignment_url' property of each modified hash back to the
|
|
360
|
+
#corresponding row in the original CSV.
|
|
361
|
+
# def record_assignment_urls_in_project(project, assignments)
|
|
362
|
+
# assignments_by_audio_url = Hash[ *assignments.map{|assignment| [assignment['aud#io_url'], assignment] }.flatten ]
|
|
363
|
+
# project.local.file('data', 'assignment.csv').as(:csv).each! do |csv_row|
|
|
364
|
+
# assignment = assignments_by_audio_url[csv_row['audio_url']] or next
|
|
365
|
+
# csv_row['assignment_url'] = assignment['assignment_url']
|
|
366
|
+
# end
|
|
367
|
+
# end
|
|
368
|
+
|
|
369
|
+
#Erases all mention of the given Amazon::HITs from one of the
|
|
370
|
+
#Project's assignment CSV files. Typically used when rejecting a
|
|
371
|
+
#HIT assignment.
|
|
372
|
+
# ==== Params
|
|
373
|
+
# [assignments_file] A Filer::CSV instance
|
|
374
|
+
# corresponding to a file like
|
|
375
|
+
# Project#local#file('data',
|
|
376
|
+
# 'assignment.csv').
|
|
377
|
+
# [hits] An enumerable collection of Amazon::HIT instances to be
|
|
378
|
+
# deleted.
|
|
379
|
+
def unrecord_hits_in_assignments_file(assignments_file, hits)
|
|
380
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
|
381
|
+
unrecord_hit_in_csv_row(csv_row)
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
#Erases particular details of a subset or all of a Project's
|
|
386
|
+
#Amazon::HITs from one of the Project's assignment CSV files.
|
|
387
|
+
#
|
|
388
|
+
#Specifically, deletes information about the HIT's
|
|
389
|
+
#expires_at, and assignments_duration.
|
|
390
|
+
#
|
|
391
|
+
#Typically used when some or all of a Project's HITs have been
|
|
392
|
+
#processed and incorporated into a transcript and are not needed
|
|
393
|
+
#any more as Amazon::HITs on Amazon servers, but when we still
|
|
394
|
+
#want to retain the HIT ids in the Project assignment CSV.
|
|
395
|
+
# ==== Params
|
|
396
|
+
# [assignments_file] A Filer::CSV instance
|
|
397
|
+
# corresponding to a file like
|
|
398
|
+
# Project#local#file('data',
|
|
399
|
+
# 'assignment.csv').
|
|
400
|
+
# [hits] Optional. An enumerable collection of Amazon::HIT
|
|
401
|
+
# instances whose details are to be
|
|
402
|
+
# deleted. If not supplied, details for ALL
|
|
403
|
+
# HITs in the Project assignment CSV will be
|
|
404
|
+
# deleted.
|
|
405
|
+
def unrecord_hits_details_in_assignments_file(assignments_file, hits=nil)
|
|
406
|
+
record_hits_in_assignments_file(assignments_file, hits) do |hit, csv_row|
|
|
407
|
+
unrecord_hit_details_in_csv_row(csv_row)
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
#Checks for Typingpool's external dependencies. If they appear
|
|
412
|
+
#to missing, yields to the passed block an array containing the
|
|
413
|
+
#name of missing commands/packages (e.g. ffmpeg).
|
|
414
|
+
def if_missing_dependencies
|
|
415
|
+
#TODO: Test on Linux
|
|
416
|
+
missing = []
|
|
417
|
+
[['ffmpeg','-version'], ['mp3splt', '-v'], ['mp3wrap']].each do |cmdline|
|
|
418
|
+
begin
|
|
419
|
+
out, err, status = Open3.capture3(*cmdline)
|
|
420
|
+
rescue
|
|
421
|
+
missing.push(cmdline.first)
|
|
422
|
+
end #begin
|
|
423
|
+
end #...].each do |cmdline|
|
|
424
|
+
yield(missing) unless missing.empty?
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
#Begins recording of an HTTP mock fixture (for automated
|
|
428
|
+
#testing) using the great VCR gem. Automatically filters your
|
|
429
|
+
#Config#amazon#key and Config#amazon#secret from the recorded
|
|
430
|
+
#fixture, and automatically determines the "cassette" name and
|
|
431
|
+
#"cassette library" dir from the supplied path.
|
|
432
|
+
# ==== Params
|
|
433
|
+
# [fixture_path] Path to where you want the HTTP fixture
|
|
434
|
+
# recorded, including filename.
|
|
435
|
+
# [config] A Config instance, used to extract the
|
|
436
|
+
# Config#amazon#secret and Config#amazon#key that
|
|
437
|
+
# will be filtered from the fixture.
|
|
438
|
+
# ==== Returns
|
|
439
|
+
# Result of calling VCR.insert_cassette.
|
|
440
|
+
def vcr_record(fixture_path, config)
|
|
441
|
+
VCR.configure do |c|
|
|
442
|
+
c.cassette_library_dir = File.dirname(fixture_path)
|
|
443
|
+
c.hook_into :webmock
|
|
444
|
+
c.filter_sensitive_data('<AWS_KEY>'){ config.amazon.key }
|
|
445
|
+
c.filter_sensitive_data('<AWS_SECRET>'){ config.amazon.secret }
|
|
446
|
+
end
|
|
447
|
+
VCR.insert_cassette(File.basename(fixture_path, '.*'), :record => :new_episodes)
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
#Stops recording of the last call to vcr_record. Returns the
|
|
451
|
+
#result of VCR.eject_cassette.
|
|
452
|
+
def vcr_stop
|
|
453
|
+
VCR.eject_cassette
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
#protected
|
|
457
|
+
|
|
458
|
+
def with_abort_on_url_mismatch(url_type='')
|
|
459
|
+
url_type += ' '
|
|
460
|
+
begin
|
|
461
|
+
yield
|
|
462
|
+
rescue Typingpool::Error => exception
|
|
463
|
+
if exception.message.match(/not find base url/i)
|
|
464
|
+
abort "Previously recorded #{url_type}URLs don\'t look right. Are you using the right config file? You may have passed in a --config argument to a previous script and forgotten to do so now."
|
|
465
|
+
else
|
|
466
|
+
raise exception
|
|
467
|
+
end
|
|
468
|
+
end #begin
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def record_hits_in_assignments_file(assignments_file, hits)
|
|
472
|
+
hits_by_url = self.hits_by_url(hits) if hits
|
|
473
|
+
assignments_file.each! do |csv_row|
|
|
474
|
+
hit = nil
|
|
475
|
+
if hits
|
|
476
|
+
hit = hits_by_url[csv_row['audio_url']] or next
|
|
477
|
+
end
|
|
478
|
+
yield(hit, csv_row)
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
def record_assignment_upload_status(assignments, uploading, types, status)
|
|
483
|
+
record_in_selected_assignments(assignments, uploading) do |assignment|
|
|
484
|
+
types.each do |type|
|
|
485
|
+
assignment["#{type}_uploaded"] = status if assignment["#{type}_url"]
|
|
486
|
+
end
|
|
487
|
+
end #record_in_selected_assignments...
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
def record_assignment_urls(assignments, uploading, type, urls)
|
|
491
|
+
i = 0
|
|
492
|
+
record_in_selected_assignments(assignments, uploading) do |assignment|
|
|
493
|
+
assignment["#{type}_url"] = urls[i]
|
|
494
|
+
i += 1
|
|
495
|
+
end #record_in_selected_assignments...
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
def record_in_selected_assignments(assignments, selected)
|
|
499
|
+
selected_by_url = Hash[ *selected.map{|assignment| [assignment['audio_url'], assignment] }.flatten ]
|
|
500
|
+
assignments.each! do |assignment|
|
|
501
|
+
if selected_by_url[assignment['audio_url']]
|
|
502
|
+
yield(assignment)
|
|
503
|
+
end #if uploading...
|
|
504
|
+
end #assignments.each!...
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
def unrecord_hit_details_in_csv_row(csv_row)
|
|
508
|
+
%w(hit_expires_at hit_assignments_duration).each{|key| csv_row.delete(key) }
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def unrecord_hit_in_csv_row(csv_row)
|
|
512
|
+
unrecord_hit_details_in_csv_row(csv_row)
|
|
513
|
+
csv_row.delete('hit_id')
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def transcript_filename
|
|
517
|
+
{
|
|
518
|
+
:done => 'transcript.html',
|
|
519
|
+
:working => 'transcript_in_progress.html'
|
|
520
|
+
}
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
def hits_by_url(hits)
|
|
524
|
+
Hash[ *hits.map{|hit| [hit.url, hit] }.flatten ]
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def check_interrupted_uploads(assignments, property)
|
|
529
|
+
assignments.each! do |assignment|
|
|
530
|
+
if assignment["#{property}_uploaded"].to_s == 'maybe'
|
|
531
|
+
assignment["#{property}_uploaded"] = (Typingpool::Utility.working_url? assignment["#{property}_url"]) ? 'yes' : 'no'
|
|
532
|
+
end
|
|
533
|
+
end #assignments.each!...
|
|
534
|
+
end
|
|
535
|
+
end #class << self
|
|
536
|
+
module FriendlyExceptions
|
|
537
|
+
#Massages terse exceptions from our model layer into a
|
|
538
|
+
#human-friendly message suitable for an abort message from a
|
|
539
|
+
#command-line script.
|
|
540
|
+
# ==== Params
|
|
541
|
+
# [name] A string used to refer to the input. For example
|
|
542
|
+
# 'project title' or '--config argument'. Used in the
|
|
543
|
+
# goodbye message.
|
|
544
|
+
# [*input] One or more values. The user input that will cause
|
|
545
|
+
# any exceptions. Used in the goodbye message.
|
|
546
|
+
# [&block] The block to execute and monitor for
|
|
547
|
+
# exceptions. Will be passed [*input].
|
|
548
|
+
# ==== Errors
|
|
549
|
+
# Will abort with a friendly message on any exception of the
|
|
550
|
+
# type Typingpool::Error::Argument.
|
|
551
|
+
# ==== Returns
|
|
552
|
+
# The return value of &block.
|
|
553
|
+
def with_friendly_exceptions(name, *input)
|
|
554
|
+
begin
|
|
555
|
+
yield(*input)
|
|
556
|
+
rescue Typingpool::Error::Argument => exception
|
|
557
|
+
goodbye = "Could not make sense of #{name.to_s} "
|
|
558
|
+
goodbye += input.map{|input| "'#{input}'" }.join(', ')
|
|
559
|
+
goodbye += ". #{exception.message}"
|
|
560
|
+
goodbye += '.' unless goodbye.match(/\.$/)
|
|
561
|
+
abort goodbye
|
|
562
|
+
end #begin
|
|
563
|
+
end
|
|
564
|
+
end #FriendlyExceptions
|
|
565
|
+
module CLI
|
|
566
|
+
class << self
|
|
567
|
+
include App::FriendlyExceptions
|
|
568
|
+
#Optionally takes an ostensible path to a config file, as passed
|
|
569
|
+
#as a command-line option. Checks to make sure the file exists;
|
|
570
|
+
#returns nil if does not, returns a Config instance if it
|
|
571
|
+
#does. If no path is passed, the default config file is returned
|
|
572
|
+
#(as retrieved by Config.file with no args).
|
|
573
|
+
def config_from_arg(arg=nil)
|
|
574
|
+
if arg
|
|
575
|
+
path = File.expand_path(arg)
|
|
576
|
+
return unless File.exists?(path) && File.file?(path)
|
|
577
|
+
Config.file(path)
|
|
578
|
+
else
|
|
579
|
+
Config.file
|
|
580
|
+
end #if option
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
#Outputs a friendly explanation of the --help option for
|
|
584
|
+
#appending to script usage banners.
|
|
585
|
+
def help_arg_explanation
|
|
586
|
+
"`#{File.basename($PROGRAM_NAME)} --help` for more information."
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
#Converts a user arg into a Project instance, setting up or
|
|
590
|
+
#consulting a Config along the way.
|
|
591
|
+
# ==== Params
|
|
592
|
+
# [arg] A user-supplied argument specifying either an absolute
|
|
593
|
+
# path to a Project folder (Project#local) or the
|
|
594
|
+
# name of a project folder within
|
|
595
|
+
# [config]#transcripts.
|
|
596
|
+
# [config] A Config instance. If [arg] is an absolute path,
|
|
597
|
+
# will be modified -- Config#itranscripts will be
|
|
598
|
+
# changed to match the implied transcripts dir.
|
|
599
|
+
# ==== Errors
|
|
600
|
+
# Will abort with a friendly message on any errors.
|
|
601
|
+
# ==== Returns
|
|
602
|
+
# A Project instance.
|
|
603
|
+
def project_from_arg_and_config(arg, config)
|
|
604
|
+
path = if (File.exists?(arg) && File.directory?(arg))
|
|
605
|
+
config.transcripts = File.dirname(arg)
|
|
606
|
+
arg
|
|
607
|
+
else
|
|
608
|
+
abort "No 'transcripts' dir specified in your config file and '#{arg}' is not a valid path" unless config.transcripts
|
|
609
|
+
path = File.join(config.transcripts, arg)
|
|
610
|
+
abort "No such project '#{arg}' in dir '#{config.transcripts}'" unless File.exists? path
|
|
611
|
+
abort "'#{arg}' is not a directory at '#{path}'" unless File.directory? path
|
|
612
|
+
path
|
|
613
|
+
end
|
|
614
|
+
project = with_friendly_exceptions('project name', File.basename(path)) do
|
|
615
|
+
Typingpool::Project.new(File.basename(path), config)
|
|
616
|
+
end
|
|
617
|
+
abort "Not a project directory at '#{path}'" unless project.local
|
|
618
|
+
project
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
end #class << self
|
|
622
|
+
module Formatter
|
|
623
|
+
require 'highline/import'
|
|
624
|
+
def cli_bold(text)
|
|
625
|
+
HighLine.color(text, :bold)
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
def cli_reverse(text)
|
|
629
|
+
HighLine.color(text, :reverse)
|
|
630
|
+
end
|
|
631
|
+
end #Formatter
|
|
632
|
+
end #CLI
|
|
633
|
+
end #App
|
|
634
|
+
end #Typingpool
|