typingpool 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +23 -0
- data/bin/tp-assign +240 -0
- data/bin/tp-collect +50 -0
- data/bin/tp-config +114 -0
- data/bin/tp-finish +101 -0
- data/bin/tp-make +169 -0
- data/bin/tp-review +175 -0
- data/lib/typingpool/amazon.rb +732 -0
- data/lib/typingpool/app.rb +634 -0
- data/lib/typingpool/config.rb +344 -0
- data/lib/typingpool/error.rb +22 -0
- data/lib/typingpool/filer.rb +396 -0
- data/lib/typingpool/project.rb +593 -0
- data/lib/typingpool/template.rb +175 -0
- data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
- data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
- data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
- data/lib/typingpool/templates/assignment/main.css +20 -0
- data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
- data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
- data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
- data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
- data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
- data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
- data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
- data/lib/typingpool/templates/config.yml +21 -0
- data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
- data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
- data/lib/typingpool/templates/project/data/.empty_directory +0 -0
- data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
- data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
- data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
- data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
- data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
- data/lib/typingpool/templates/project/etc/transcript.css +49 -0
- data/lib/typingpool/templates/transcript.html.erb +23 -0
- data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
- data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
- data/lib/typingpool/test/fixtures/config-1 +20 -0
- data/lib/typingpool/test/fixtures/config-2 +25 -0
- data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
- data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
- data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
- data/lib/typingpool/test/fixtures/template.html.erb +10 -0
- data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
- data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
- data/lib/typingpool/test.rb +418 -0
- data/lib/typingpool/transcript.rb +181 -0
- data/lib/typingpool/utility.rb +272 -0
- data/lib/typingpool.rb +500 -0
- data/test/make_amazon_question_fixture.rb +24 -0
- data/test/make_tp_collect_fixture_1.rb +26 -0
- data/test/make_tp_collect_fixture_2.rb +16 -0
- data/test/make_tp_collect_fixture_3.rb +15 -0
- data/test/make_tp_collect_fixture_4.rb +17 -0
- data/test/make_tp_review_fixture_1.rb +26 -0
- data/test/make_tp_review_fixture_2.rb +30 -0
- data/test/make_transcript_chunks_fixture.rb +53 -0
- data/test/test_integration_script_1_tp_config.rb +108 -0
- data/test/test_integration_script_2_tp_make.rb +119 -0
- data/test/test_integration_script_3_tp_assign.rb +152 -0
- data/test/test_integration_script_4_tp_review.rb +72 -0
- data/test/test_integration_script_5_tp_collect.rb +44 -0
- data/test/test_integration_script_6_tp_finish.rb +123 -0
- data/test/test_unit_amazon.rb +153 -0
- data/test/test_unit_config.rb +94 -0
- data/test/test_unit_filer.rb +202 -0
- data/test/test_unit_project.rb +168 -0
- data/test/test_unit_project_local.rb +68 -0
- data/test/test_unit_project_remote.rb +157 -0
- data/test/test_unit_template.rb +111 -0
- data/test/test_unit_transcript.rb +77 -0
- metadata +234 -0
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
module Typingpool
|
|
2
|
+
require 'test/unit'
|
|
3
|
+
|
|
4
|
+
class Test < ::Test::Unit::TestCase
|
|
5
|
+
require 'nokogiri'
|
|
6
|
+
require 'fileutils'
|
|
7
|
+
|
|
8
|
+
def MiniTest.filter_backtrace(bt)
|
|
9
|
+
bt
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.app_dir
|
|
13
|
+
File.dirname(File.dirname(File.dirname(__FILE__)))
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def fixtures_dir
|
|
17
|
+
File.join(Utility.lib_dir, 'test', 'fixtures')
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def audio_dir
|
|
21
|
+
File.join(fixtures_dir, 'audio')
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def config
|
|
25
|
+
if File.exists?(File.expand_path(Config.default_file))
|
|
26
|
+
Config.file
|
|
27
|
+
else
|
|
28
|
+
Config.from_bundled_template
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def amazon_credentials?(config=self.config)
|
|
33
|
+
config.amazon && config.amazon.key && config.amazon.secret
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def skip_with_message(reason, skipping_what='')
|
|
37
|
+
skipping_what = " #{skipping_what}" if not(skipping_what.empty?)
|
|
38
|
+
skip ("Skipping#{skipping_what}: #{reason}")
|
|
39
|
+
true
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def skip_if_no_amazon_credentials(skipping_what='', config=self.config)
|
|
43
|
+
if not (amazon_credentials?(config))
|
|
44
|
+
skip_with_message('Missing or incomplete Amazon credentials', skipping_what)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def s3_credentials?(config)
|
|
49
|
+
amazon_credentials?(config) && config.amazon.bucket
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def skip_if_no_s3_credentials(skipping_what='', config=self.config)
|
|
53
|
+
if not (skip_if_no_amazon_credentials(skipping_what, config))
|
|
54
|
+
if not(s3_credentials?(config))
|
|
55
|
+
skip_with_message('No Amazon S3 credentials', skipping_what)
|
|
56
|
+
end #if not(s3_credentials?...)
|
|
57
|
+
end #if not(skip_if_no_amazon_credentials...)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def sftp_credentials?(config)
|
|
61
|
+
config.sftp && config.sftp.user && config.sftp.host && config.sftp.url
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def skip_if_no_sftp_credentials(skipping_what='', config=self.config)
|
|
65
|
+
if not(sftp_credentials?(config))
|
|
66
|
+
skip_with_message('No SFTP credentials', skipping_what)
|
|
67
|
+
end #if not(sftp_credentials?...
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def skip_if_no_upload_credentials(skipping_what='', config=self.config)
|
|
71
|
+
if not(s3_credentials?(config) || sftp_credentials?(config))
|
|
72
|
+
skip_with_message("No S3 or SFTP credentials in config", skipping_what)
|
|
73
|
+
end #if not(s3_credentials?...
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def add_goodbye_message(msg)
|
|
77
|
+
at_exit do
|
|
78
|
+
STDERR.puts msg
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def dummy_config(number=1)
|
|
83
|
+
Typingpool::Config.file(File.join(fixtures_dir, "config-#{number}"))
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def project_default
|
|
88
|
+
Hash[
|
|
89
|
+
:config_filename => '.config',
|
|
90
|
+
:subtitle => "Typingpool's test interview transcription",
|
|
91
|
+
:title => "Typingpool's Test & Interview",
|
|
92
|
+
:chunks => '0:20',
|
|
93
|
+
:unusual => ['Hack Day', 'Sunnyvale', 'Chad D'],
|
|
94
|
+
:voice => ['Ryan', 'Havi, hacker'],
|
|
95
|
+
]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def in_temp_dir
|
|
100
|
+
Typingpool::Utility.in_temp_dir{|dir| yield(dir) }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def working_url?(*args)
|
|
104
|
+
Typingpool::Utility.working_url?(*args)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def fetch_url(*args)
|
|
108
|
+
Typingpool::Utility.fetch_url(*args)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
class Script < Test
|
|
112
|
+
require 'typingpool'
|
|
113
|
+
require 'yaml'
|
|
114
|
+
require 'open3'
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def audio_files(subdir='mp3')
|
|
118
|
+
dir = File.join(audio_dir, subdir)
|
|
119
|
+
Dir.entries(dir).reject{|entry| entry.match(/^\./) }.map{|entry| File.join(dir, entry)}.select{|path| File.file?(path) }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def config_path(dir)
|
|
123
|
+
::File.join(dir, project_default[:config_filename])
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def config_from_dir(dir)
|
|
127
|
+
Config.file(config_path(dir))
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def setup_amazon(dir)
|
|
132
|
+
Amazon.setup(:sandbox => true, :config => config_from_dir(dir))
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def in_temp_tp_dir
|
|
137
|
+
::Dir.mktmpdir('typingpool_') do |dir|
|
|
138
|
+
setup_temp_tp_dir(dir)
|
|
139
|
+
yield(dir)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def setup_temp_tp_dir(dir)
|
|
144
|
+
make_temp_tp_dir_config(dir)
|
|
145
|
+
Dir.mkdir(File.join(dir, 'projects'))
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def setup_s3_config(dir, config=config_from_dir(dir), filename='.config_s3')
|
|
149
|
+
return unless s3_credentials?(config)
|
|
150
|
+
config.to_hash.delete('sftp')
|
|
151
|
+
write_config(config, dir, filename)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def setup_s3_config_with_bad_password(dir, config=config_from_dir(dir))
|
|
155
|
+
bad_password = 'f'
|
|
156
|
+
refute_equal(config.to_hash['amazon']['secret'], bad_password)
|
|
157
|
+
config.to_hash['amazon']['secret'] = bad_password
|
|
158
|
+
setup_s3_config(dir, config, '.config_s3_bad')
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def make_temp_tp_dir_config(dir, config=self.config)
|
|
162
|
+
config.transcripts = File.join(dir, 'projects')
|
|
163
|
+
config.cache = File.join(dir, '.cache')
|
|
164
|
+
config['assign']['reward'] = '0.02'
|
|
165
|
+
config.assign.to_hash.delete('qualify')
|
|
166
|
+
write_config(config, dir, project_default[:config_filename])
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def write_config(config, dir, filename=project_default[:config_filename])
|
|
170
|
+
path = ::File.join(dir, filename)
|
|
171
|
+
::File.open(path, 'w') do |out|
|
|
172
|
+
out << YAML.dump(config.to_hash)
|
|
173
|
+
end
|
|
174
|
+
path
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def temp_tp_dir_project_dir(temp_tp_dir)
|
|
178
|
+
::File.join(temp_tp_dir, 'projects', project_default[:title])
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def temp_tp_dir_project(dir, config=config_from_dir(dir))
|
|
182
|
+
Project.new(project_default[:title], config)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def call_script(*args)
|
|
186
|
+
Utility.system_quietly(*args)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def path_to_tp_make
|
|
190
|
+
::File.join(self.class.app_dir, 'bin', 'tp-make')
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def call_tp_make(*args)
|
|
194
|
+
call_script(path_to_tp_make, *args)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def tp_make(in_dir, config=config_path(in_dir), audio_subdir='mp3')
|
|
198
|
+
call_tp_make(
|
|
199
|
+
'--config', config,
|
|
200
|
+
'--chunks', project_default[:chunks],
|
|
201
|
+
*[:title, :subtitle].map{|param| ["--#{param}", project_default[param]] }.flatten,
|
|
202
|
+
*[:voice, :unusual].map{|param| project_default[param].map{|value| ["--#{param}", value] } }.flatten,
|
|
203
|
+
*audio_files(audio_subdir).map{|path| ['--file', path]}.flatten
|
|
204
|
+
)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def path_to_tp_finish
|
|
208
|
+
::File.join(self.class.app_dir, 'bin', 'tp-finish')
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def call_tp_finish(*args)
|
|
212
|
+
call_script(path_to_tp_finish, *args)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def tp_finish(dir, config_path=self.config_path(dir))
|
|
216
|
+
tp_finish_inside_sandbox(dir, config_path)
|
|
217
|
+
tp_finish_outside_sandbox(dir, config_path)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def tp_finish_inside_sandbox(dir, config_path=self.config_path(dir))
|
|
222
|
+
tp_finish_outside_sandbox(dir, config_path, '--sandbox')
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def tp_finish_outside_sandbox(dir, config_path=self.config_path(dir), *args)
|
|
226
|
+
call_tp_finish(
|
|
227
|
+
project_default[:title],
|
|
228
|
+
'--config', config_path,
|
|
229
|
+
*args
|
|
230
|
+
)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def path_to_tp_assign
|
|
234
|
+
File.join(self.class.app_dir, 'bin', 'tp-assign')
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def call_tp_assign(*args)
|
|
238
|
+
call_script(path_to_tp_assign, '--sandbox', *args)
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def assign_default
|
|
242
|
+
Hash[
|
|
243
|
+
:template => 'interview/phone',
|
|
244
|
+
:deadline => '5h',
|
|
245
|
+
:lifetime => '10h',
|
|
246
|
+
:approval => '10h',
|
|
247
|
+
:qualify => ['approval_rate >= 90', 'hits_approved > 10'],
|
|
248
|
+
:keyword => ['test', 'mp3', 'typingpooltest']
|
|
249
|
+
]
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def tp_assign(dir, config_path=config_path(dir))
|
|
253
|
+
call_tp_assign(
|
|
254
|
+
project_default[:title],
|
|
255
|
+
assign_default[:template],
|
|
256
|
+
'--config', config_path,
|
|
257
|
+
*[:deadline, :lifetime, :approval].map{|param| ["--#{param}", assign_default[param]] }.flatten,
|
|
258
|
+
*[:qualify, :keyword].map{|param| assign_default[param].map{|value| ["--#{param}", value] } }.flatten
|
|
259
|
+
)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def path_to_tp_collect
|
|
263
|
+
File.join(self.class.app_dir, 'bin', 'tp-collect')
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def call_tp_collect(fixture_path, *args)
|
|
267
|
+
call_script(path_to_tp_collect, '--sandbox', '--fixture', fixture_path, *args)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def tp_collect_with_fixture(dir, fixture_path)
|
|
271
|
+
call_tp_collect(
|
|
272
|
+
fixture_path,
|
|
273
|
+
'--config', config_path(dir)
|
|
274
|
+
)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def path_to_tp_review
|
|
279
|
+
File.join(self.class.app_dir, 'bin', 'tp-review')
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def tp_review_with_fixture(dir, fixture_path, choices)
|
|
283
|
+
output = {}
|
|
284
|
+
Open3.popen3(path_to_tp_review, '--sandbox', '--fixture', fixture_path, '--config', config_path(dir), project_default[:title]) do |stdin, stdout, stderr, wait_thr|
|
|
285
|
+
choices.each do |choice|
|
|
286
|
+
stdin.puts(choice)
|
|
287
|
+
if choice.strip.match(/^r/i)
|
|
288
|
+
stdin.puts("No reason - this is a test")
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
output[:out] = stdout.gets(nil)
|
|
292
|
+
output[:err] = stderr.gets(nil)
|
|
293
|
+
[stdin, stdout, stderr].each{|stream| stream.close }
|
|
294
|
+
output[:status] = wait_thr.value
|
|
295
|
+
end
|
|
296
|
+
output
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def path_to_tp_config
|
|
300
|
+
File.join(self.class.app_dir, 'bin', 'tp-config')
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def tp_config(*args)
|
|
304
|
+
call_script(path_to_tp_config, *args)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def tp_config_with_input(args, input)
|
|
308
|
+
output = {}
|
|
309
|
+
Open3.popen3(path_to_tp_config, *args) do |stdin, stdout, stderr, wait_thr|
|
|
310
|
+
input.each do |sending|
|
|
311
|
+
stdin.puts(sending)
|
|
312
|
+
end
|
|
313
|
+
output[:out] = stdout.gets(nil)
|
|
314
|
+
output[:err] = stderr.gets(nil)
|
|
315
|
+
[stdin, stdout, stderr].each{|stream| stream.close }
|
|
316
|
+
output[:status] = wait_thr.value
|
|
317
|
+
end #Open3.popen3...
|
|
318
|
+
output
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def fixture_project_dir(name)
|
|
322
|
+
File.join(fixtures_dir, name)
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def make_fixture_project_dir(name)
|
|
326
|
+
dir = fixture_project_dir(name)
|
|
327
|
+
if File.exists? dir
|
|
328
|
+
raise Error::Test, "Fixture project already exists for #{name} at #{dir}"
|
|
329
|
+
end
|
|
330
|
+
::Dir.mkdir(dir)
|
|
331
|
+
dir
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def remove_fixture_project_dir(name)
|
|
335
|
+
FileUtils.remove_entry_secure(fixture_project_dir(name), :secure => true)
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def with_fixtures_in_temp_tp_dir(dir, fixture_prefix)
|
|
339
|
+
fixtures = Dir.entries(fixtures_dir).select{|entry| entry.include?(fixture_prefix) && entry.index(fixture_prefix) == 0 }.select{|entry| File.file?(File.join(fixtures_dir, entry)) }
|
|
340
|
+
fixtures.map!{|fixture| fixture[fixture_prefix.size .. -1] }
|
|
341
|
+
fixtures.each do |fixture|
|
|
342
|
+
project_path = File.join(temp_tp_dir_project_dir(dir), 'data', fixture)
|
|
343
|
+
fixture_path = File.join(fixtures_dir, [fixture_prefix, fixture].join )
|
|
344
|
+
yield(fixture_path, project_path)
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def copy_fixtures_to_temp_tp_dir(dir, fixture_prefix)
|
|
349
|
+
with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
|
|
350
|
+
if File.exists? project_path
|
|
351
|
+
FileUtils.mv(project_path, File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}"))
|
|
352
|
+
end
|
|
353
|
+
FileUtils.cp(fixture_path, project_path)
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def rm_fixtures_from_temp_tp_dir(dir, fixture_prefix)
|
|
358
|
+
with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
|
|
359
|
+
FileUtils.rm(project_path)
|
|
360
|
+
path_to_orig = File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}")
|
|
361
|
+
if File.exists?(path_to_orig)
|
|
362
|
+
FileUtils.mv(path_to_orig, project_path)
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def assert_has_transcript(dir, transcript_file='transcript.html')
|
|
368
|
+
transcript_path = File.join(temp_tp_dir_project_dir(dir), transcript_file)
|
|
369
|
+
assert(File.exists?(transcript_path))
|
|
370
|
+
assert(not((transcript = IO.read(transcript_path)).empty?))
|
|
371
|
+
transcript
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def assert_has_partial_transcript(dir)
|
|
375
|
+
assert_has_transcript(dir, 'transcript_in_progress.html')
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def assert_assignment_csv_has_transcription_count(count, project, which_csv='assignment.csv')
|
|
379
|
+
assert_equal(count, project.local.file('data', which_csv).as(:csv).reject{|assignment| assignment['transcript'].to_s.empty?}.size)
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def assert_html_has_audio_count(count, html)
|
|
383
|
+
assert_equal(count, noko(html).css('audio').size)
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
def assert_all_assets_have_upload_status(assignment_csv, types, status)
|
|
387
|
+
types.each do |type|
|
|
388
|
+
recorded_uploads = assignment_csv.map{|assignment| assignment["#{type}_uploaded"] }
|
|
389
|
+
refute_empty(recorded_uploads)
|
|
390
|
+
assert_equal(recorded_uploads.count, recorded_uploads.select{|uploaded| uploaded == status }.count)
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def assert_shell_error_match(regex)
|
|
395
|
+
exception = assert_raise(Typingpool::Error::Shell) do
|
|
396
|
+
yield
|
|
397
|
+
end
|
|
398
|
+
assert_match(exception.message, regex)
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
def assert_script_abort_match(args, regex)
|
|
402
|
+
in_temp_tp_dir do |dir|
|
|
403
|
+
assert_shell_error_match(regex) do
|
|
404
|
+
yield([*args, '--config', config_path(dir)])
|
|
405
|
+
end
|
|
406
|
+
end #in_temp_tp_dir do...
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def noko(html)
|
|
410
|
+
Nokogiri::HTML(html)
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def vcr_dir
|
|
414
|
+
File.join(fixtures_dir, 'vcr')
|
|
415
|
+
end
|
|
416
|
+
end #Script
|
|
417
|
+
end #Test
|
|
418
|
+
end #Typingpool
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
module Typingpool
|
|
2
|
+
#This is the model class for Typingpool's final and most important
|
|
3
|
+
#output, a transcript of the Project audio in HTML format, with
|
|
4
|
+
#embedded audio. A Transcript instance is actually an enumerable
|
|
5
|
+
#container for Transcript::Chunk instances. Each Transcript::Chunk
|
|
6
|
+
#corresponds to an Amazon::HIT and to an audio "chunk" (file) that
|
|
7
|
+
#has been transcribed and which is part of a larger recording.
|
|
8
|
+
#
|
|
9
|
+
#This class is likey to be done away with in the next few point
|
|
10
|
+
#versions of Typingpool. Functionality and data unique to
|
|
11
|
+
#Transcipt::Chunk can probably be rolled into
|
|
12
|
+
#Amazon::HIT. Transcript itself can probably be folded into Project,
|
|
13
|
+
#which would become a HIT container, and then we'd pass Project
|
|
14
|
+
#instances to the output template.
|
|
15
|
+
class Transcript
|
|
16
|
+
include Enumerable
|
|
17
|
+
|
|
18
|
+
#Get/set the title of the transcript, typically corresponds to the name of the
|
|
19
|
+
#associated Project
|
|
20
|
+
attr_accessor :title
|
|
21
|
+
|
|
22
|
+
#Get/set the subtitle of the transcript, corresponds to Project#local#subtitle
|
|
23
|
+
#(a.k.a data/subtitle.txt in the project dir)
|
|
24
|
+
attr_accessor :subtitle
|
|
25
|
+
|
|
26
|
+
#Constructor. Takes an optional title (see above for explanation
|
|
27
|
+
#of title) and an optional array of Transcript::Chunk instances.
|
|
28
|
+
def initialize(title=nil, chunks=[])
|
|
29
|
+
@title = title
|
|
30
|
+
@chunks = chunks
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
#Iterate of the Transcript::Chunk instances
|
|
34
|
+
def each
|
|
35
|
+
@chunks.each do |chunk|
|
|
36
|
+
yield chunk
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
#Takes an index, returns the Transcript::Chunk at that index.
|
|
41
|
+
def [](index)
|
|
42
|
+
@chunks[index]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
#Returns chunks joined by double newlines
|
|
46
|
+
def to_s
|
|
47
|
+
@chunks.join("\n\n")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
#Takes a Transcript::Chunk instance and adds it to the Transcript instance.
|
|
51
|
+
def add_chunk(chunk)
|
|
52
|
+
@chunks.push(chunk)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
#Transcript::Chunk is the model class for one transcription by one
|
|
56
|
+
#Mechanical Turk worker of one "chunk" (a file) of audio, which in
|
|
57
|
+
#turn is a portion of a larger recording (for example, one minute
|
|
58
|
+
#of a 60 minute interview). It is basically parallel and similar
|
|
59
|
+
#to an Amazon::HIT instance. Transcript is a container for these
|
|
60
|
+
#chunks, which know how to render themselves as text and HTML.
|
|
61
|
+
class Chunk
|
|
62
|
+
require 'cgi'
|
|
63
|
+
require 'rubygems/text'
|
|
64
|
+
include Gem::Text
|
|
65
|
+
|
|
66
|
+
#Get/set the raw text of the transcript
|
|
67
|
+
attr_accessor :body
|
|
68
|
+
|
|
69
|
+
#Get/set the Amazon ID of the Mechanical Turk worker who
|
|
70
|
+
#transcribed the audio into text
|
|
71
|
+
attr_accessor :worker
|
|
72
|
+
|
|
73
|
+
#Get/set the id of the Amazon::HIT associated with this chunk
|
|
74
|
+
attr_accessor :hit
|
|
75
|
+
|
|
76
|
+
#Get/set the id of the Project#local associated with this chunk
|
|
77
|
+
attr_accessor :project
|
|
78
|
+
|
|
79
|
+
#Return the offset associated with the chunk, in MM:SS
|
|
80
|
+
#format. This corresponds to the associated audio file, which is
|
|
81
|
+
#a chunk of a larger recording and which starts at a particular
|
|
82
|
+
#time offset, for example from 1:00 (the offset) to 2:00 (the
|
|
83
|
+
#next offset).
|
|
84
|
+
#
|
|
85
|
+
#
|
|
86
|
+
#This should be updated to return HH:MM:SS and MM:SS.sss when
|
|
87
|
+
#appropriate, since in Project#interval we use that format and
|
|
88
|
+
#allow audio to be divided into such units. (TODO)
|
|
89
|
+
attr_reader :offset
|
|
90
|
+
|
|
91
|
+
#Returns the offset in seconds. So for an offset of 1:00 would return 60.
|
|
92
|
+
attr_reader :offset_seconds
|
|
93
|
+
|
|
94
|
+
#Returns the name of the remote audio file corresponding to this
|
|
95
|
+
#chunk. The remote file has the project ID and pseudo random
|
|
96
|
+
#characters added to it.
|
|
97
|
+
attr_reader :filename
|
|
98
|
+
|
|
99
|
+
#Returns the name of the local audio file corresponding to this
|
|
100
|
+
#chunk.
|
|
101
|
+
attr_reader :filename_local
|
|
102
|
+
|
|
103
|
+
#Returns the URL of the remote audio transcribed in the body of
|
|
104
|
+
#this chunk.
|
|
105
|
+
attr_reader :url
|
|
106
|
+
|
|
107
|
+
#Constructor. Takes the raw text of the transcription.
|
|
108
|
+
def initialize(body)
|
|
109
|
+
@body = body
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
#Sorts by offset seconds.
|
|
113
|
+
def <=>(other)
|
|
114
|
+
self.offset_seconds <=> other.offset_seconds
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
#Takes an URL. As an important side effect, sets various
|
|
118
|
+
#attributes, including url, filename, filename_local, offset and
|
|
119
|
+
#offset_seconds. So setting Chunk#url= http://whateverwhatever
|
|
120
|
+
#is an important step in populating the instance.
|
|
121
|
+
def url=(url)
|
|
122
|
+
#http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
|
|
123
|
+
matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
|
|
124
|
+
@url = matches[0]
|
|
125
|
+
@filename = matches[1]
|
|
126
|
+
@filename_local = Project.local_basename_from_url(@url)
|
|
127
|
+
@offset = "#{matches[3]}:#{matches[4]}"
|
|
128
|
+
@offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
#Takes an optional specification of how many spaces to indent
|
|
132
|
+
#the text by (default 0) and an optional specification of how
|
|
133
|
+
#many characters to wrap at (default no wrapping).
|
|
134
|
+
#
|
|
135
|
+
#Returns the text with newlines normalized to Unix format, runs
|
|
136
|
+
#of newlines shortened to a maximum of two newlines, leading and
|
|
137
|
+
#trailing whitespace removed from each line, and the text
|
|
138
|
+
#wrapped/indented as specified.
|
|
139
|
+
def body_as_text(indent=nil, wrap=nil)
|
|
140
|
+
text = self.body
|
|
141
|
+
text = Utility.normalize_newlines(text)
|
|
142
|
+
text.gsub!(/\n\n+/, "\n\n")
|
|
143
|
+
text = text.split("\n").map{|line| line.strip }.join("\n")
|
|
144
|
+
text = wrap_text(text, wrap) if wrap
|
|
145
|
+
text = indent_text(text, indent) if indent
|
|
146
|
+
text
|
|
147
|
+
end
|
|
148
|
+
alias :to_s :body_as_text
|
|
149
|
+
alias :to_str :body_as_text
|
|
150
|
+
|
|
151
|
+
#Takes an optional count of how many characters to wrap at
|
|
152
|
+
#(default 72). Returns the body, presumed to be raw text, as
|
|
153
|
+
#HTML. Any HTML tags in the body are escaped. Text blocks
|
|
154
|
+
#separated by double newlines are converted to HTML paragraphs,
|
|
155
|
+
#while single newlines are converted to HTML BR tags. Newlines
|
|
156
|
+
#are normalized as in body_as_text, and lines in the HTML source
|
|
157
|
+
#are automatically wrapped as specified.
|
|
158
|
+
def body_as_html(wrap=72)
|
|
159
|
+
text = body_as_text
|
|
160
|
+
text = CGI::escapeHTML(text)
|
|
161
|
+
text = Utility.newlines_to_html(text)
|
|
162
|
+
text = text.split("\n").map do |line|
|
|
163
|
+
wrap_text(line, 72).chomp
|
|
164
|
+
end.join("\n")
|
|
165
|
+
text
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
protected
|
|
169
|
+
|
|
170
|
+
def indent_text(text, indent)
|
|
171
|
+
text.gsub!(/^/, " " * indent)
|
|
172
|
+
text
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def wrap_text(text, wrap=72)
|
|
176
|
+
format_text(text, wrap)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
end #Chunk
|
|
180
|
+
end #Transcript
|
|
181
|
+
end #Typingpool
|