typingpool 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -0
- data/bin/tp-assign +240 -0
- data/bin/tp-collect +50 -0
- data/bin/tp-config +114 -0
- data/bin/tp-finish +101 -0
- data/bin/tp-make +169 -0
- data/bin/tp-review +175 -0
- data/lib/typingpool/amazon.rb +732 -0
- data/lib/typingpool/app.rb +634 -0
- data/lib/typingpool/config.rb +344 -0
- data/lib/typingpool/error.rb +22 -0
- data/lib/typingpool/filer.rb +396 -0
- data/lib/typingpool/project.rb +593 -0
- data/lib/typingpool/template.rb +175 -0
- data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
- data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
- data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
- data/lib/typingpool/templates/assignment/main.css +20 -0
- data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
- data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
- data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
- data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
- data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
- data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
- data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
- data/lib/typingpool/templates/config.yml +21 -0
- data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
- data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
- data/lib/typingpool/templates/project/data/.empty_directory +0 -0
- data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
- data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
- data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
- data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
- data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
- data/lib/typingpool/templates/project/etc/transcript.css +49 -0
- data/lib/typingpool/templates/transcript.html.erb +23 -0
- data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
- data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
- data/lib/typingpool/test/fixtures/config-1 +20 -0
- data/lib/typingpool/test/fixtures/config-2 +25 -0
- data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
- data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
- data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
- data/lib/typingpool/test/fixtures/template.html.erb +10 -0
- data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
- data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
- data/lib/typingpool/test.rb +418 -0
- data/lib/typingpool/transcript.rb +181 -0
- data/lib/typingpool/utility.rb +272 -0
- data/lib/typingpool.rb +500 -0
- data/test/make_amazon_question_fixture.rb +24 -0
- data/test/make_tp_collect_fixture_1.rb +26 -0
- data/test/make_tp_collect_fixture_2.rb +16 -0
- data/test/make_tp_collect_fixture_3.rb +15 -0
- data/test/make_tp_collect_fixture_4.rb +17 -0
- data/test/make_tp_review_fixture_1.rb +26 -0
- data/test/make_tp_review_fixture_2.rb +30 -0
- data/test/make_transcript_chunks_fixture.rb +53 -0
- data/test/test_integration_script_1_tp_config.rb +108 -0
- data/test/test_integration_script_2_tp_make.rb +119 -0
- data/test/test_integration_script_3_tp_assign.rb +152 -0
- data/test/test_integration_script_4_tp_review.rb +72 -0
- data/test/test_integration_script_5_tp_collect.rb +44 -0
- data/test/test_integration_script_6_tp_finish.rb +123 -0
- data/test/test_unit_amazon.rb +153 -0
- data/test/test_unit_config.rb +94 -0
- data/test/test_unit_filer.rb +202 -0
- data/test/test_unit_project.rb +168 -0
- data/test/test_unit_project_local.rb +68 -0
- data/test/test_unit_project_remote.rb +157 -0
- data/test/test_unit_template.rb +111 -0
- data/test/test_unit_transcript.rb +77 -0
- metadata +234 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
module Typingpool
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class Test < ::Test::Unit::TestCase
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
def MiniTest.filter_backtrace(bt)
|
9
|
+
bt
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.app_dir
|
13
|
+
File.dirname(File.dirname(File.dirname(__FILE__)))
|
14
|
+
end
|
15
|
+
|
16
|
+
def fixtures_dir
|
17
|
+
File.join(Utility.lib_dir, 'test', 'fixtures')
|
18
|
+
end
|
19
|
+
|
20
|
+
def audio_dir
|
21
|
+
File.join(fixtures_dir, 'audio')
|
22
|
+
end
|
23
|
+
|
24
|
+
def config
|
25
|
+
if File.exists?(File.expand_path(Config.default_file))
|
26
|
+
Config.file
|
27
|
+
else
|
28
|
+
Config.from_bundled_template
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def amazon_credentials?(config=self.config)
|
33
|
+
config.amazon && config.amazon.key && config.amazon.secret
|
34
|
+
end
|
35
|
+
|
36
|
+
def skip_with_message(reason, skipping_what='')
|
37
|
+
skipping_what = " #{skipping_what}" if not(skipping_what.empty?)
|
38
|
+
skip ("Skipping#{skipping_what}: #{reason}")
|
39
|
+
true
|
40
|
+
end
|
41
|
+
|
42
|
+
def skip_if_no_amazon_credentials(skipping_what='', config=self.config)
|
43
|
+
if not (amazon_credentials?(config))
|
44
|
+
skip_with_message('Missing or incomplete Amazon credentials', skipping_what)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def s3_credentials?(config)
|
49
|
+
amazon_credentials?(config) && config.amazon.bucket
|
50
|
+
end
|
51
|
+
|
52
|
+
def skip_if_no_s3_credentials(skipping_what='', config=self.config)
|
53
|
+
if not (skip_if_no_amazon_credentials(skipping_what, config))
|
54
|
+
if not(s3_credentials?(config))
|
55
|
+
skip_with_message('No Amazon S3 credentials', skipping_what)
|
56
|
+
end #if not(s3_credentials?...)
|
57
|
+
end #if not(skip_if_no_amazon_credentials...)
|
58
|
+
end
|
59
|
+
|
60
|
+
def sftp_credentials?(config)
|
61
|
+
config.sftp && config.sftp.user && config.sftp.host && config.sftp.url
|
62
|
+
end
|
63
|
+
|
64
|
+
def skip_if_no_sftp_credentials(skipping_what='', config=self.config)
|
65
|
+
if not(sftp_credentials?(config))
|
66
|
+
skip_with_message('No SFTP credentials', skipping_what)
|
67
|
+
end #if not(sftp_credentials?...
|
68
|
+
end
|
69
|
+
|
70
|
+
def skip_if_no_upload_credentials(skipping_what='', config=self.config)
|
71
|
+
if not(s3_credentials?(config) || sftp_credentials?(config))
|
72
|
+
skip_with_message("No S3 or SFTP credentials in config", skipping_what)
|
73
|
+
end #if not(s3_credentials?...
|
74
|
+
end
|
75
|
+
|
76
|
+
def add_goodbye_message(msg)
|
77
|
+
at_exit do
|
78
|
+
STDERR.puts msg
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def dummy_config(number=1)
|
83
|
+
Typingpool::Config.file(File.join(fixtures_dir, "config-#{number}"))
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
def project_default
|
88
|
+
Hash[
|
89
|
+
:config_filename => '.config',
|
90
|
+
:subtitle => "Typingpool's test interview transcription",
|
91
|
+
:title => "Typingpool's Test & Interview",
|
92
|
+
:chunks => '0:20',
|
93
|
+
:unusual => ['Hack Day', 'Sunnyvale', 'Chad D'],
|
94
|
+
:voice => ['Ryan', 'Havi, hacker'],
|
95
|
+
]
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
def in_temp_dir
|
100
|
+
Typingpool::Utility.in_temp_dir{|dir| yield(dir) }
|
101
|
+
end
|
102
|
+
|
103
|
+
def working_url?(*args)
|
104
|
+
Typingpool::Utility.working_url?(*args)
|
105
|
+
end
|
106
|
+
|
107
|
+
def fetch_url(*args)
|
108
|
+
Typingpool::Utility.fetch_url(*args)
|
109
|
+
end
|
110
|
+
|
111
|
+
class Script < Test
|
112
|
+
require 'typingpool'
|
113
|
+
require 'yaml'
|
114
|
+
require 'open3'
|
115
|
+
|
116
|
+
|
117
|
+
def audio_files(subdir='mp3')
|
118
|
+
dir = File.join(audio_dir, subdir)
|
119
|
+
Dir.entries(dir).reject{|entry| entry.match(/^\./) }.map{|entry| File.join(dir, entry)}.select{|path| File.file?(path) }
|
120
|
+
end
|
121
|
+
|
122
|
+
def config_path(dir)
|
123
|
+
::File.join(dir, project_default[:config_filename])
|
124
|
+
end
|
125
|
+
|
126
|
+
def config_from_dir(dir)
|
127
|
+
Config.file(config_path(dir))
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def setup_amazon(dir)
|
132
|
+
Amazon.setup(:sandbox => true, :config => config_from_dir(dir))
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
def in_temp_tp_dir
|
137
|
+
::Dir.mktmpdir('typingpool_') do |dir|
|
138
|
+
setup_temp_tp_dir(dir)
|
139
|
+
yield(dir)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def setup_temp_tp_dir(dir)
|
144
|
+
make_temp_tp_dir_config(dir)
|
145
|
+
Dir.mkdir(File.join(dir, 'projects'))
|
146
|
+
end
|
147
|
+
|
148
|
+
def setup_s3_config(dir, config=config_from_dir(dir), filename='.config_s3')
|
149
|
+
return unless s3_credentials?(config)
|
150
|
+
config.to_hash.delete('sftp')
|
151
|
+
write_config(config, dir, filename)
|
152
|
+
end
|
153
|
+
|
154
|
+
def setup_s3_config_with_bad_password(dir, config=config_from_dir(dir))
|
155
|
+
bad_password = 'f'
|
156
|
+
refute_equal(config.to_hash['amazon']['secret'], bad_password)
|
157
|
+
config.to_hash['amazon']['secret'] = bad_password
|
158
|
+
setup_s3_config(dir, config, '.config_s3_bad')
|
159
|
+
end
|
160
|
+
|
161
|
+
def make_temp_tp_dir_config(dir, config=self.config)
|
162
|
+
config.transcripts = File.join(dir, 'projects')
|
163
|
+
config.cache = File.join(dir, '.cache')
|
164
|
+
config['assign']['reward'] = '0.02'
|
165
|
+
config.assign.to_hash.delete('qualify')
|
166
|
+
write_config(config, dir, project_default[:config_filename])
|
167
|
+
end
|
168
|
+
|
169
|
+
def write_config(config, dir, filename=project_default[:config_filename])
|
170
|
+
path = ::File.join(dir, filename)
|
171
|
+
::File.open(path, 'w') do |out|
|
172
|
+
out << YAML.dump(config.to_hash)
|
173
|
+
end
|
174
|
+
path
|
175
|
+
end
|
176
|
+
|
177
|
+
def temp_tp_dir_project_dir(temp_tp_dir)
|
178
|
+
::File.join(temp_tp_dir, 'projects', project_default[:title])
|
179
|
+
end
|
180
|
+
|
181
|
+
def temp_tp_dir_project(dir, config=config_from_dir(dir))
|
182
|
+
Project.new(project_default[:title], config)
|
183
|
+
end
|
184
|
+
|
185
|
+
def call_script(*args)
|
186
|
+
Utility.system_quietly(*args)
|
187
|
+
end
|
188
|
+
|
189
|
+
def path_to_tp_make
|
190
|
+
::File.join(self.class.app_dir, 'bin', 'tp-make')
|
191
|
+
end
|
192
|
+
|
193
|
+
def call_tp_make(*args)
|
194
|
+
call_script(path_to_tp_make, *args)
|
195
|
+
end
|
196
|
+
|
197
|
+
def tp_make(in_dir, config=config_path(in_dir), audio_subdir='mp3')
|
198
|
+
call_tp_make(
|
199
|
+
'--config', config,
|
200
|
+
'--chunks', project_default[:chunks],
|
201
|
+
*[:title, :subtitle].map{|param| ["--#{param}", project_default[param]] }.flatten,
|
202
|
+
*[:voice, :unusual].map{|param| project_default[param].map{|value| ["--#{param}", value] } }.flatten,
|
203
|
+
*audio_files(audio_subdir).map{|path| ['--file', path]}.flatten
|
204
|
+
)
|
205
|
+
end
|
206
|
+
|
207
|
+
def path_to_tp_finish
|
208
|
+
::File.join(self.class.app_dir, 'bin', 'tp-finish')
|
209
|
+
end
|
210
|
+
|
211
|
+
def call_tp_finish(*args)
|
212
|
+
call_script(path_to_tp_finish, *args)
|
213
|
+
end
|
214
|
+
|
215
|
+
def tp_finish(dir, config_path=self.config_path(dir))
|
216
|
+
tp_finish_inside_sandbox(dir, config_path)
|
217
|
+
tp_finish_outside_sandbox(dir, config_path)
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
def tp_finish_inside_sandbox(dir, config_path=self.config_path(dir))
|
222
|
+
tp_finish_outside_sandbox(dir, config_path, '--sandbox')
|
223
|
+
end
|
224
|
+
|
225
|
+
def tp_finish_outside_sandbox(dir, config_path=self.config_path(dir), *args)
|
226
|
+
call_tp_finish(
|
227
|
+
project_default[:title],
|
228
|
+
'--config', config_path,
|
229
|
+
*args
|
230
|
+
)
|
231
|
+
end
|
232
|
+
|
233
|
+
def path_to_tp_assign
|
234
|
+
File.join(self.class.app_dir, 'bin', 'tp-assign')
|
235
|
+
end
|
236
|
+
|
237
|
+
def call_tp_assign(*args)
|
238
|
+
call_script(path_to_tp_assign, '--sandbox', *args)
|
239
|
+
end
|
240
|
+
|
241
|
+
def assign_default
|
242
|
+
Hash[
|
243
|
+
:template => 'interview/phone',
|
244
|
+
:deadline => '5h',
|
245
|
+
:lifetime => '10h',
|
246
|
+
:approval => '10h',
|
247
|
+
:qualify => ['approval_rate >= 90', 'hits_approved > 10'],
|
248
|
+
:keyword => ['test', 'mp3', 'typingpooltest']
|
249
|
+
]
|
250
|
+
end
|
251
|
+
|
252
|
+
def tp_assign(dir, config_path=config_path(dir))
|
253
|
+
call_tp_assign(
|
254
|
+
project_default[:title],
|
255
|
+
assign_default[:template],
|
256
|
+
'--config', config_path,
|
257
|
+
*[:deadline, :lifetime, :approval].map{|param| ["--#{param}", assign_default[param]] }.flatten,
|
258
|
+
*[:qualify, :keyword].map{|param| assign_default[param].map{|value| ["--#{param}", value] } }.flatten
|
259
|
+
)
|
260
|
+
end
|
261
|
+
|
262
|
+
def path_to_tp_collect
|
263
|
+
File.join(self.class.app_dir, 'bin', 'tp-collect')
|
264
|
+
end
|
265
|
+
|
266
|
+
def call_tp_collect(fixture_path, *args)
|
267
|
+
call_script(path_to_tp_collect, '--sandbox', '--fixture', fixture_path, *args)
|
268
|
+
end
|
269
|
+
|
270
|
+
def tp_collect_with_fixture(dir, fixture_path)
|
271
|
+
call_tp_collect(
|
272
|
+
fixture_path,
|
273
|
+
'--config', config_path(dir)
|
274
|
+
)
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
def path_to_tp_review
|
279
|
+
File.join(self.class.app_dir, 'bin', 'tp-review')
|
280
|
+
end
|
281
|
+
|
282
|
+
def tp_review_with_fixture(dir, fixture_path, choices)
|
283
|
+
output = {}
|
284
|
+
Open3.popen3(path_to_tp_review, '--sandbox', '--fixture', fixture_path, '--config', config_path(dir), project_default[:title]) do |stdin, stdout, stderr, wait_thr|
|
285
|
+
choices.each do |choice|
|
286
|
+
stdin.puts(choice)
|
287
|
+
if choice.strip.match(/^r/i)
|
288
|
+
stdin.puts("No reason - this is a test")
|
289
|
+
end
|
290
|
+
end
|
291
|
+
output[:out] = stdout.gets(nil)
|
292
|
+
output[:err] = stderr.gets(nil)
|
293
|
+
[stdin, stdout, stderr].each{|stream| stream.close }
|
294
|
+
output[:status] = wait_thr.value
|
295
|
+
end
|
296
|
+
output
|
297
|
+
end
|
298
|
+
|
299
|
+
def path_to_tp_config
|
300
|
+
File.join(self.class.app_dir, 'bin', 'tp-config')
|
301
|
+
end
|
302
|
+
|
303
|
+
def tp_config(*args)
|
304
|
+
call_script(path_to_tp_config, *args)
|
305
|
+
end
|
306
|
+
|
307
|
+
def tp_config_with_input(args, input)
|
308
|
+
output = {}
|
309
|
+
Open3.popen3(path_to_tp_config, *args) do |stdin, stdout, stderr, wait_thr|
|
310
|
+
input.each do |sending|
|
311
|
+
stdin.puts(sending)
|
312
|
+
end
|
313
|
+
output[:out] = stdout.gets(nil)
|
314
|
+
output[:err] = stderr.gets(nil)
|
315
|
+
[stdin, stdout, stderr].each{|stream| stream.close }
|
316
|
+
output[:status] = wait_thr.value
|
317
|
+
end #Open3.popen3...
|
318
|
+
output
|
319
|
+
end
|
320
|
+
|
321
|
+
def fixture_project_dir(name)
|
322
|
+
File.join(fixtures_dir, name)
|
323
|
+
end
|
324
|
+
|
325
|
+
def make_fixture_project_dir(name)
|
326
|
+
dir = fixture_project_dir(name)
|
327
|
+
if File.exists? dir
|
328
|
+
raise Error::Test, "Fixture project already exists for #{name} at #{dir}"
|
329
|
+
end
|
330
|
+
::Dir.mkdir(dir)
|
331
|
+
dir
|
332
|
+
end
|
333
|
+
|
334
|
+
def remove_fixture_project_dir(name)
|
335
|
+
FileUtils.remove_entry_secure(fixture_project_dir(name), :secure => true)
|
336
|
+
end
|
337
|
+
|
338
|
+
def with_fixtures_in_temp_tp_dir(dir, fixture_prefix)
|
339
|
+
fixtures = Dir.entries(fixtures_dir).select{|entry| entry.include?(fixture_prefix) && entry.index(fixture_prefix) == 0 }.select{|entry| File.file?(File.join(fixtures_dir, entry)) }
|
340
|
+
fixtures.map!{|fixture| fixture[fixture_prefix.size .. -1] }
|
341
|
+
fixtures.each do |fixture|
|
342
|
+
project_path = File.join(temp_tp_dir_project_dir(dir), 'data', fixture)
|
343
|
+
fixture_path = File.join(fixtures_dir, [fixture_prefix, fixture].join )
|
344
|
+
yield(fixture_path, project_path)
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
def copy_fixtures_to_temp_tp_dir(dir, fixture_prefix)
|
349
|
+
with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
|
350
|
+
if File.exists? project_path
|
351
|
+
FileUtils.mv(project_path, File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}"))
|
352
|
+
end
|
353
|
+
FileUtils.cp(fixture_path, project_path)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
def rm_fixtures_from_temp_tp_dir(dir, fixture_prefix)
|
358
|
+
with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
|
359
|
+
FileUtils.rm(project_path)
|
360
|
+
path_to_orig = File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}")
|
361
|
+
if File.exists?(path_to_orig)
|
362
|
+
FileUtils.mv(path_to_orig, project_path)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
def assert_has_transcript(dir, transcript_file='transcript.html')
|
368
|
+
transcript_path = File.join(temp_tp_dir_project_dir(dir), transcript_file)
|
369
|
+
assert(File.exists?(transcript_path))
|
370
|
+
assert(not((transcript = IO.read(transcript_path)).empty?))
|
371
|
+
transcript
|
372
|
+
end
|
373
|
+
|
374
|
+
def assert_has_partial_transcript(dir)
|
375
|
+
assert_has_transcript(dir, 'transcript_in_progress.html')
|
376
|
+
end
|
377
|
+
|
378
|
+
def assert_assignment_csv_has_transcription_count(count, project, which_csv='assignment.csv')
|
379
|
+
assert_equal(count, project.local.file('data', which_csv).as(:csv).reject{|assignment| assignment['transcript'].to_s.empty?}.size)
|
380
|
+
end
|
381
|
+
|
382
|
+
def assert_html_has_audio_count(count, html)
|
383
|
+
assert_equal(count, noko(html).css('audio').size)
|
384
|
+
end
|
385
|
+
|
386
|
+
def assert_all_assets_have_upload_status(assignment_csv, types, status)
|
387
|
+
types.each do |type|
|
388
|
+
recorded_uploads = assignment_csv.map{|assignment| assignment["#{type}_uploaded"] }
|
389
|
+
refute_empty(recorded_uploads)
|
390
|
+
assert_equal(recorded_uploads.count, recorded_uploads.select{|uploaded| uploaded == status }.count)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
def assert_shell_error_match(regex)
|
395
|
+
exception = assert_raise(Typingpool::Error::Shell) do
|
396
|
+
yield
|
397
|
+
end
|
398
|
+
assert_match(exception.message, regex)
|
399
|
+
end
|
400
|
+
|
401
|
+
def assert_script_abort_match(args, regex)
|
402
|
+
in_temp_tp_dir do |dir|
|
403
|
+
assert_shell_error_match(regex) do
|
404
|
+
yield([*args, '--config', config_path(dir)])
|
405
|
+
end
|
406
|
+
end #in_temp_tp_dir do...
|
407
|
+
end
|
408
|
+
|
409
|
+
def noko(html)
|
410
|
+
Nokogiri::HTML(html)
|
411
|
+
end
|
412
|
+
|
413
|
+
def vcr_dir
|
414
|
+
File.join(fixtures_dir, 'vcr')
|
415
|
+
end
|
416
|
+
end #Script
|
417
|
+
end #Test
|
418
|
+
end #Typingpool
|
@@ -0,0 +1,181 @@
|
|
1
|
+
module Typingpool
|
2
|
+
#This is the model class for Typingpool's final and most important
|
3
|
+
#output, a transcript of the Project audio in HTML format, with
|
4
|
+
#embedded audio. A Transcript instance is actually an enumerable
|
5
|
+
#container for Transcript::Chunk instances. Each Transcript::Chunk
|
6
|
+
#corresponds to an Amazon::HIT and to an audio "chunk" (file) that
|
7
|
+
#has been transcribed and which is part of a larger recording.
|
8
|
+
#
|
9
|
+
#This class is likey to be done away with in the next few point
|
10
|
+
#versions of Typingpool. Functionality and data unique to
|
11
|
+
#Transcipt::Chunk can probably be rolled into
|
12
|
+
#Amazon::HIT. Transcript itself can probably be folded into Project,
|
13
|
+
#which would become a HIT container, and then we'd pass Project
|
14
|
+
#instances to the output template.
|
15
|
+
class Transcript
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
#Get/set the title of the transcript, typically corresponds to the name of the
|
19
|
+
#associated Project
|
20
|
+
attr_accessor :title
|
21
|
+
|
22
|
+
#Get/set the subtitle of the transcript, corresponds to Project#local#subtitle
|
23
|
+
#(a.k.a data/subtitle.txt in the project dir)
|
24
|
+
attr_accessor :subtitle
|
25
|
+
|
26
|
+
#Constructor. Takes an optional title (see above for explanation
|
27
|
+
#of title) and an optional array of Transcript::Chunk instances.
|
28
|
+
def initialize(title=nil, chunks=[])
|
29
|
+
@title = title
|
30
|
+
@chunks = chunks
|
31
|
+
end
|
32
|
+
|
33
|
+
#Iterate of the Transcript::Chunk instances
|
34
|
+
def each
|
35
|
+
@chunks.each do |chunk|
|
36
|
+
yield chunk
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
#Takes an index, returns the Transcript::Chunk at that index.
|
41
|
+
def [](index)
|
42
|
+
@chunks[index]
|
43
|
+
end
|
44
|
+
|
45
|
+
#Returns chunks joined by double newlines
|
46
|
+
def to_s
|
47
|
+
@chunks.join("\n\n")
|
48
|
+
end
|
49
|
+
|
50
|
+
#Takes a Transcript::Chunk instance and adds it to the Transcript instance.
|
51
|
+
def add_chunk(chunk)
|
52
|
+
@chunks.push(chunk)
|
53
|
+
end
|
54
|
+
|
55
|
+
#Transcript::Chunk is the model class for one transcription by one
|
56
|
+
#Mechanical Turk worker of one "chunk" (a file) of audio, which in
|
57
|
+
#turn is a portion of a larger recording (for example, one minute
|
58
|
+
#of a 60 minute interview). It is basically parallel and similar
|
59
|
+
#to an Amazon::HIT instance. Transcript is a container for these
|
60
|
+
#chunks, which know how to render themselves as text and HTML.
|
61
|
+
class Chunk
|
62
|
+
require 'cgi'
|
63
|
+
require 'rubygems/text'
|
64
|
+
include Gem::Text
|
65
|
+
|
66
|
+
#Get/set the raw text of the transcript
|
67
|
+
attr_accessor :body
|
68
|
+
|
69
|
+
#Get/set the Amazon ID of the Mechanical Turk worker who
|
70
|
+
#transcribed the audio into text
|
71
|
+
attr_accessor :worker
|
72
|
+
|
73
|
+
#Get/set the id of the Amazon::HIT associated with this chunk
|
74
|
+
attr_accessor :hit
|
75
|
+
|
76
|
+
#Get/set the id of the Project#local associated with this chunk
|
77
|
+
attr_accessor :project
|
78
|
+
|
79
|
+
#Return the offset associated with the chunk, in MM:SS
|
80
|
+
#format. This corresponds to the associated audio file, which is
|
81
|
+
#a chunk of a larger recording and which starts at a particular
|
82
|
+
#time offset, for example from 1:00 (the offset) to 2:00 (the
|
83
|
+
#next offset).
|
84
|
+
#
|
85
|
+
#
|
86
|
+
#This should be updated to return HH:MM:SS and MM:SS.sss when
|
87
|
+
#appropriate, since in Project#interval we use that format and
|
88
|
+
#allow audio to be divided into such units. (TODO)
|
89
|
+
attr_reader :offset
|
90
|
+
|
91
|
+
#Returns the offset in seconds. So for an offset of 1:00 would return 60.
|
92
|
+
attr_reader :offset_seconds
|
93
|
+
|
94
|
+
#Returns the name of the remote audio file corresponding to this
|
95
|
+
#chunk. The remote file has the project ID and pseudo random
|
96
|
+
#characters added to it.
|
97
|
+
attr_reader :filename
|
98
|
+
|
99
|
+
#Returns the name of the local audio file corresponding to this
|
100
|
+
#chunk.
|
101
|
+
attr_reader :filename_local
|
102
|
+
|
103
|
+
#Returns the URL of the remote audio transcribed in the body of
|
104
|
+
#this chunk.
|
105
|
+
attr_reader :url
|
106
|
+
|
107
|
+
#Constructor. Takes the raw text of the transcription.
|
108
|
+
def initialize(body)
|
109
|
+
@body = body
|
110
|
+
end
|
111
|
+
|
112
|
+
#Sorts by offset seconds.
|
113
|
+
def <=>(other)
|
114
|
+
self.offset_seconds <=> other.offset_seconds
|
115
|
+
end
|
116
|
+
|
117
|
+
#Takes an URL. As an important side effect, sets various
|
118
|
+
#attributes, including url, filename, filename_local, offset and
|
119
|
+
#offset_seconds. So setting Chunk#url= http://whateverwhatever
|
120
|
+
#is an important step in populating the instance.
|
121
|
+
def url=(url)
|
122
|
+
#http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
|
123
|
+
matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
|
124
|
+
@url = matches[0]
|
125
|
+
@filename = matches[1]
|
126
|
+
@filename_local = Project.local_basename_from_url(@url)
|
127
|
+
@offset = "#{matches[3]}:#{matches[4]}"
|
128
|
+
@offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
|
129
|
+
end
|
130
|
+
|
131
|
+
#Takes an optional specification of how many spaces to indent
|
132
|
+
#the text by (default 0) and an optional specification of how
|
133
|
+
#many characters to wrap at (default no wrapping).
|
134
|
+
#
|
135
|
+
#Returns the text with newlines normalized to Unix format, runs
|
136
|
+
#of newlines shortened to a maximum of two newlines, leading and
|
137
|
+
#trailing whitespace removed from each line, and the text
|
138
|
+
#wrapped/indented as specified.
|
139
|
+
def body_as_text(indent=nil, wrap=nil)
|
140
|
+
text = self.body
|
141
|
+
text = Utility.normalize_newlines(text)
|
142
|
+
text.gsub!(/\n\n+/, "\n\n")
|
143
|
+
text = text.split("\n").map{|line| line.strip }.join("\n")
|
144
|
+
text = wrap_text(text, wrap) if wrap
|
145
|
+
text = indent_text(text, indent) if indent
|
146
|
+
text
|
147
|
+
end
|
148
|
+
alias :to_s :body_as_text
|
149
|
+
alias :to_str :body_as_text
|
150
|
+
|
151
|
+
#Takes an optional count of how many characters to wrap at
|
152
|
+
#(default 72). Returns the body, presumed to be raw text, as
|
153
|
+
#HTML. Any HTML tags in the body are escaped. Text blocks
|
154
|
+
#separated by double newlines are converted to HTML paragraphs,
|
155
|
+
#while single newlines are converted to HTML BR tags. Newlines
|
156
|
+
#are normalized as in body_as_text, and lines in the HTML source
|
157
|
+
#are automatically wrapped as specified.
|
158
|
+
def body_as_html(wrap=72)
|
159
|
+
text = body_as_text
|
160
|
+
text = CGI::escapeHTML(text)
|
161
|
+
text = Utility.newlines_to_html(text)
|
162
|
+
text = text.split("\n").map do |line|
|
163
|
+
wrap_text(line, 72).chomp
|
164
|
+
end.join("\n")
|
165
|
+
text
|
166
|
+
end
|
167
|
+
|
168
|
+
protected
|
169
|
+
|
170
|
+
def indent_text(text, indent)
|
171
|
+
text.gsub!(/^/, " " * indent)
|
172
|
+
text
|
173
|
+
end
|
174
|
+
|
175
|
+
def wrap_text(text, wrap=72)
|
176
|
+
format_text(text, wrap)
|
177
|
+
end
|
178
|
+
|
179
|
+
end #Chunk
|
180
|
+
end #Transcript
|
181
|
+
end #Typingpool
|