typingpool 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
@@ -0,0 +1,418 @@
1
+ module Typingpool
2
+ require 'test/unit'
3
+
4
+ class Test < ::Test::Unit::TestCase
5
+ require 'nokogiri'
6
+ require 'fileutils'
7
+
8
+ def MiniTest.filter_backtrace(bt)
9
+ bt
10
+ end
11
+
12
+ def self.app_dir
13
+ File.dirname(File.dirname(File.dirname(__FILE__)))
14
+ end
15
+
16
+ def fixtures_dir
17
+ File.join(Utility.lib_dir, 'test', 'fixtures')
18
+ end
19
+
20
+ def audio_dir
21
+ File.join(fixtures_dir, 'audio')
22
+ end
23
+
24
+ def config
25
+ if File.exists?(File.expand_path(Config.default_file))
26
+ Config.file
27
+ else
28
+ Config.from_bundled_template
29
+ end
30
+ end
31
+
32
+ def amazon_credentials?(config=self.config)
33
+ config.amazon && config.amazon.key && config.amazon.secret
34
+ end
35
+
36
+ def skip_with_message(reason, skipping_what='')
37
+ skipping_what = " #{skipping_what}" if not(skipping_what.empty?)
38
+ skip ("Skipping#{skipping_what}: #{reason}")
39
+ true
40
+ end
41
+
42
+ def skip_if_no_amazon_credentials(skipping_what='', config=self.config)
43
+ if not (amazon_credentials?(config))
44
+ skip_with_message('Missing or incomplete Amazon credentials', skipping_what)
45
+ end
46
+ end
47
+
48
+ def s3_credentials?(config)
49
+ amazon_credentials?(config) && config.amazon.bucket
50
+ end
51
+
52
+ def skip_if_no_s3_credentials(skipping_what='', config=self.config)
53
+ if not (skip_if_no_amazon_credentials(skipping_what, config))
54
+ if not(s3_credentials?(config))
55
+ skip_with_message('No Amazon S3 credentials', skipping_what)
56
+ end #if not(s3_credentials?...)
57
+ end #if not(skip_if_no_amazon_credentials...)
58
+ end
59
+
60
+ def sftp_credentials?(config)
61
+ config.sftp && config.sftp.user && config.sftp.host && config.sftp.url
62
+ end
63
+
64
+ def skip_if_no_sftp_credentials(skipping_what='', config=self.config)
65
+ if not(sftp_credentials?(config))
66
+ skip_with_message('No SFTP credentials', skipping_what)
67
+ end #if not(sftp_credentials?...
68
+ end
69
+
70
+ def skip_if_no_upload_credentials(skipping_what='', config=self.config)
71
+ if not(s3_credentials?(config) || sftp_credentials?(config))
72
+ skip_with_message("No S3 or SFTP credentials in config", skipping_what)
73
+ end #if not(s3_credentials?...
74
+ end
75
+
76
+ def add_goodbye_message(msg)
77
+ at_exit do
78
+ STDERR.puts msg
79
+ end
80
+ end
81
+
82
+ def dummy_config(number=1)
83
+ Typingpool::Config.file(File.join(fixtures_dir, "config-#{number}"))
84
+ end
85
+
86
+
87
+ def project_default
88
+ Hash[
89
+ :config_filename => '.config',
90
+ :subtitle => "Typingpool's test interview transcription",
91
+ :title => "Typingpool's Test & Interview",
92
+ :chunks => '0:20',
93
+ :unusual => ['Hack Day', 'Sunnyvale', 'Chad D'],
94
+ :voice => ['Ryan', 'Havi, hacker'],
95
+ ]
96
+ end
97
+
98
+
99
+ def in_temp_dir
100
+ Typingpool::Utility.in_temp_dir{|dir| yield(dir) }
101
+ end
102
+
103
+ def working_url?(*args)
104
+ Typingpool::Utility.working_url?(*args)
105
+ end
106
+
107
+ def fetch_url(*args)
108
+ Typingpool::Utility.fetch_url(*args)
109
+ end
110
+
111
+ class Script < Test
112
+ require 'typingpool'
113
+ require 'yaml'
114
+ require 'open3'
115
+
116
+
117
+ def audio_files(subdir='mp3')
118
+ dir = File.join(audio_dir, subdir)
119
+ Dir.entries(dir).reject{|entry| entry.match(/^\./) }.map{|entry| File.join(dir, entry)}.select{|path| File.file?(path) }
120
+ end
121
+
122
+ def config_path(dir)
123
+ ::File.join(dir, project_default[:config_filename])
124
+ end
125
+
126
+ def config_from_dir(dir)
127
+ Config.file(config_path(dir))
128
+ end
129
+
130
+
131
+ def setup_amazon(dir)
132
+ Amazon.setup(:sandbox => true, :config => config_from_dir(dir))
133
+ end
134
+
135
+
136
+ def in_temp_tp_dir
137
+ ::Dir.mktmpdir('typingpool_') do |dir|
138
+ setup_temp_tp_dir(dir)
139
+ yield(dir)
140
+ end
141
+ end
142
+
143
+ def setup_temp_tp_dir(dir)
144
+ make_temp_tp_dir_config(dir)
145
+ Dir.mkdir(File.join(dir, 'projects'))
146
+ end
147
+
148
+ def setup_s3_config(dir, config=config_from_dir(dir), filename='.config_s3')
149
+ return unless s3_credentials?(config)
150
+ config.to_hash.delete('sftp')
151
+ write_config(config, dir, filename)
152
+ end
153
+
154
+ def setup_s3_config_with_bad_password(dir, config=config_from_dir(dir))
155
+ bad_password = 'f'
156
+ refute_equal(config.to_hash['amazon']['secret'], bad_password)
157
+ config.to_hash['amazon']['secret'] = bad_password
158
+ setup_s3_config(dir, config, '.config_s3_bad')
159
+ end
160
+
161
+ def make_temp_tp_dir_config(dir, config=self.config)
162
+ config.transcripts = File.join(dir, 'projects')
163
+ config.cache = File.join(dir, '.cache')
164
+ config['assign']['reward'] = '0.02'
165
+ config.assign.to_hash.delete('qualify')
166
+ write_config(config, dir, project_default[:config_filename])
167
+ end
168
+
169
+ def write_config(config, dir, filename=project_default[:config_filename])
170
+ path = ::File.join(dir, filename)
171
+ ::File.open(path, 'w') do |out|
172
+ out << YAML.dump(config.to_hash)
173
+ end
174
+ path
175
+ end
176
+
177
+ def temp_tp_dir_project_dir(temp_tp_dir)
178
+ ::File.join(temp_tp_dir, 'projects', project_default[:title])
179
+ end
180
+
181
+ def temp_tp_dir_project(dir, config=config_from_dir(dir))
182
+ Project.new(project_default[:title], config)
183
+ end
184
+
185
+ def call_script(*args)
186
+ Utility.system_quietly(*args)
187
+ end
188
+
189
+ def path_to_tp_make
190
+ ::File.join(self.class.app_dir, 'bin', 'tp-make')
191
+ end
192
+
193
+ def call_tp_make(*args)
194
+ call_script(path_to_tp_make, *args)
195
+ end
196
+
197
+ def tp_make(in_dir, config=config_path(in_dir), audio_subdir='mp3')
198
+ call_tp_make(
199
+ '--config', config,
200
+ '--chunks', project_default[:chunks],
201
+ *[:title, :subtitle].map{|param| ["--#{param}", project_default[param]] }.flatten,
202
+ *[:voice, :unusual].map{|param| project_default[param].map{|value| ["--#{param}", value] } }.flatten,
203
+ *audio_files(audio_subdir).map{|path| ['--file', path]}.flatten
204
+ )
205
+ end
206
+
207
+ def path_to_tp_finish
208
+ ::File.join(self.class.app_dir, 'bin', 'tp-finish')
209
+ end
210
+
211
+ def call_tp_finish(*args)
212
+ call_script(path_to_tp_finish, *args)
213
+ end
214
+
215
+ def tp_finish(dir, config_path=self.config_path(dir))
216
+ tp_finish_inside_sandbox(dir, config_path)
217
+ tp_finish_outside_sandbox(dir, config_path)
218
+ end
219
+
220
+
221
+ def tp_finish_inside_sandbox(dir, config_path=self.config_path(dir))
222
+ tp_finish_outside_sandbox(dir, config_path, '--sandbox')
223
+ end
224
+
225
+ def tp_finish_outside_sandbox(dir, config_path=self.config_path(dir), *args)
226
+ call_tp_finish(
227
+ project_default[:title],
228
+ '--config', config_path,
229
+ *args
230
+ )
231
+ end
232
+
233
+ def path_to_tp_assign
234
+ File.join(self.class.app_dir, 'bin', 'tp-assign')
235
+ end
236
+
237
+ def call_tp_assign(*args)
238
+ call_script(path_to_tp_assign, '--sandbox', *args)
239
+ end
240
+
241
+ def assign_default
242
+ Hash[
243
+ :template => 'interview/phone',
244
+ :deadline => '5h',
245
+ :lifetime => '10h',
246
+ :approval => '10h',
247
+ :qualify => ['approval_rate >= 90', 'hits_approved > 10'],
248
+ :keyword => ['test', 'mp3', 'typingpooltest']
249
+ ]
250
+ end
251
+
252
+ def tp_assign(dir, config_path=config_path(dir))
253
+ call_tp_assign(
254
+ project_default[:title],
255
+ assign_default[:template],
256
+ '--config', config_path,
257
+ *[:deadline, :lifetime, :approval].map{|param| ["--#{param}", assign_default[param]] }.flatten,
258
+ *[:qualify, :keyword].map{|param| assign_default[param].map{|value| ["--#{param}", value] } }.flatten
259
+ )
260
+ end
261
+
262
+ def path_to_tp_collect
263
+ File.join(self.class.app_dir, 'bin', 'tp-collect')
264
+ end
265
+
266
+ def call_tp_collect(fixture_path, *args)
267
+ call_script(path_to_tp_collect, '--sandbox', '--fixture', fixture_path, *args)
268
+ end
269
+
270
+ def tp_collect_with_fixture(dir, fixture_path)
271
+ call_tp_collect(
272
+ fixture_path,
273
+ '--config', config_path(dir)
274
+ )
275
+ end
276
+
277
+
278
+ def path_to_tp_review
279
+ File.join(self.class.app_dir, 'bin', 'tp-review')
280
+ end
281
+
282
+ def tp_review_with_fixture(dir, fixture_path, choices)
283
+ output = {}
284
+ Open3.popen3(path_to_tp_review, '--sandbox', '--fixture', fixture_path, '--config', config_path(dir), project_default[:title]) do |stdin, stdout, stderr, wait_thr|
285
+ choices.each do |choice|
286
+ stdin.puts(choice)
287
+ if choice.strip.match(/^r/i)
288
+ stdin.puts("No reason - this is a test")
289
+ end
290
+ end
291
+ output[:out] = stdout.gets(nil)
292
+ output[:err] = stderr.gets(nil)
293
+ [stdin, stdout, stderr].each{|stream| stream.close }
294
+ output[:status] = wait_thr.value
295
+ end
296
+ output
297
+ end
298
+
299
+ def path_to_tp_config
300
+ File.join(self.class.app_dir, 'bin', 'tp-config')
301
+ end
302
+
303
+ def tp_config(*args)
304
+ call_script(path_to_tp_config, *args)
305
+ end
306
+
307
+ def tp_config_with_input(args, input)
308
+ output = {}
309
+ Open3.popen3(path_to_tp_config, *args) do |stdin, stdout, stderr, wait_thr|
310
+ input.each do |sending|
311
+ stdin.puts(sending)
312
+ end
313
+ output[:out] = stdout.gets(nil)
314
+ output[:err] = stderr.gets(nil)
315
+ [stdin, stdout, stderr].each{|stream| stream.close }
316
+ output[:status] = wait_thr.value
317
+ end #Open3.popen3...
318
+ output
319
+ end
320
+
321
+ def fixture_project_dir(name)
322
+ File.join(fixtures_dir, name)
323
+ end
324
+
325
+ def make_fixture_project_dir(name)
326
+ dir = fixture_project_dir(name)
327
+ if File.exists? dir
328
+ raise Error::Test, "Fixture project already exists for #{name} at #{dir}"
329
+ end
330
+ ::Dir.mkdir(dir)
331
+ dir
332
+ end
333
+
334
+ def remove_fixture_project_dir(name)
335
+ FileUtils.remove_entry_secure(fixture_project_dir(name), :secure => true)
336
+ end
337
+
338
+ def with_fixtures_in_temp_tp_dir(dir, fixture_prefix)
339
+ fixtures = Dir.entries(fixtures_dir).select{|entry| entry.include?(fixture_prefix) && entry.index(fixture_prefix) == 0 }.select{|entry| File.file?(File.join(fixtures_dir, entry)) }
340
+ fixtures.map!{|fixture| fixture[fixture_prefix.size .. -1] }
341
+ fixtures.each do |fixture|
342
+ project_path = File.join(temp_tp_dir_project_dir(dir), 'data', fixture)
343
+ fixture_path = File.join(fixtures_dir, [fixture_prefix, fixture].join )
344
+ yield(fixture_path, project_path)
345
+ end
346
+ end
347
+
348
+ def copy_fixtures_to_temp_tp_dir(dir, fixture_prefix)
349
+ with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
350
+ if File.exists? project_path
351
+ FileUtils.mv(project_path, File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}"))
352
+ end
353
+ FileUtils.cp(fixture_path, project_path)
354
+ end
355
+ end
356
+
357
+ def rm_fixtures_from_temp_tp_dir(dir, fixture_prefix)
358
+ with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
359
+ FileUtils.rm(project_path)
360
+ path_to_orig = File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}")
361
+ if File.exists?(path_to_orig)
362
+ FileUtils.mv(path_to_orig, project_path)
363
+ end
364
+ end
365
+ end
366
+
367
+ def assert_has_transcript(dir, transcript_file='transcript.html')
368
+ transcript_path = File.join(temp_tp_dir_project_dir(dir), transcript_file)
369
+ assert(File.exists?(transcript_path))
370
+ assert(not((transcript = IO.read(transcript_path)).empty?))
371
+ transcript
372
+ end
373
+
374
+ def assert_has_partial_transcript(dir)
375
+ assert_has_transcript(dir, 'transcript_in_progress.html')
376
+ end
377
+
378
+ def assert_assignment_csv_has_transcription_count(count, project, which_csv='assignment.csv')
379
+ assert_equal(count, project.local.file('data', which_csv).as(:csv).reject{|assignment| assignment['transcript'].to_s.empty?}.size)
380
+ end
381
+
382
+ def assert_html_has_audio_count(count, html)
383
+ assert_equal(count, noko(html).css('audio').size)
384
+ end
385
+
386
+ def assert_all_assets_have_upload_status(assignment_csv, types, status)
387
+ types.each do |type|
388
+ recorded_uploads = assignment_csv.map{|assignment| assignment["#{type}_uploaded"] }
389
+ refute_empty(recorded_uploads)
390
+ assert_equal(recorded_uploads.count, recorded_uploads.select{|uploaded| uploaded == status }.count)
391
+ end
392
+ end
393
+
394
+ def assert_shell_error_match(regex)
395
+ exception = assert_raise(Typingpool::Error::Shell) do
396
+ yield
397
+ end
398
+ assert_match(exception.message, regex)
399
+ end
400
+
401
+ def assert_script_abort_match(args, regex)
402
+ in_temp_tp_dir do |dir|
403
+ assert_shell_error_match(regex) do
404
+ yield([*args, '--config', config_path(dir)])
405
+ end
406
+ end #in_temp_tp_dir do...
407
+ end
408
+
409
+ def noko(html)
410
+ Nokogiri::HTML(html)
411
+ end
412
+
413
+ def vcr_dir
414
+ File.join(fixtures_dir, 'vcr')
415
+ end
416
+ end #Script
417
+ end #Test
418
+ end #Typingpool
@@ -0,0 +1,181 @@
1
+ module Typingpool
2
+ #This is the model class for Typingpool's final and most important
3
+ #output, a transcript of the Project audio in HTML format, with
4
+ #embedded audio. A Transcript instance is actually an enumerable
5
+ #container for Transcript::Chunk instances. Each Transcript::Chunk
6
+ #corresponds to an Amazon::HIT and to an audio "chunk" (file) that
7
+ #has been transcribed and which is part of a larger recording.
8
+ #
9
+ #This class is likey to be done away with in the next few point
10
+ #versions of Typingpool. Functionality and data unique to
11
+ #Transcipt::Chunk can probably be rolled into
12
+ #Amazon::HIT. Transcript itself can probably be folded into Project,
13
+ #which would become a HIT container, and then we'd pass Project
14
+ #instances to the output template.
15
+ class Transcript
16
+ include Enumerable
17
+
18
+ #Get/set the title of the transcript, typically corresponds to the name of the
19
+ #associated Project
20
+ attr_accessor :title
21
+
22
+ #Get/set the subtitle of the transcript, corresponds to Project#local#subtitle
23
+ #(a.k.a data/subtitle.txt in the project dir)
24
+ attr_accessor :subtitle
25
+
26
+ #Constructor. Takes an optional title (see above for explanation
27
+ #of title) and an optional array of Transcript::Chunk instances.
28
+ def initialize(title=nil, chunks=[])
29
+ @title = title
30
+ @chunks = chunks
31
+ end
32
+
33
+ #Iterate of the Transcript::Chunk instances
34
+ def each
35
+ @chunks.each do |chunk|
36
+ yield chunk
37
+ end
38
+ end
39
+
40
+ #Takes an index, returns the Transcript::Chunk at that index.
41
+ def [](index)
42
+ @chunks[index]
43
+ end
44
+
45
+ #Returns chunks joined by double newlines
46
+ def to_s
47
+ @chunks.join("\n\n")
48
+ end
49
+
50
+ #Takes a Transcript::Chunk instance and adds it to the Transcript instance.
51
+ def add_chunk(chunk)
52
+ @chunks.push(chunk)
53
+ end
54
+
55
+ #Transcript::Chunk is the model class for one transcription by one
56
+ #Mechanical Turk worker of one "chunk" (a file) of audio, which in
57
+ #turn is a portion of a larger recording (for example, one minute
58
+ #of a 60 minute interview). It is basically parallel and similar
59
+ #to an Amazon::HIT instance. Transcript is a container for these
60
+ #chunks, which know how to render themselves as text and HTML.
61
+ class Chunk
62
+ require 'cgi'
63
+ require 'rubygems/text'
64
+ include Gem::Text
65
+
66
+ #Get/set the raw text of the transcript
67
+ attr_accessor :body
68
+
69
+ #Get/set the Amazon ID of the Mechanical Turk worker who
70
+ #transcribed the audio into text
71
+ attr_accessor :worker
72
+
73
+ #Get/set the id of the Amazon::HIT associated with this chunk
74
+ attr_accessor :hit
75
+
76
+ #Get/set the id of the Project#local associated with this chunk
77
+ attr_accessor :project
78
+
79
+ #Return the offset associated with the chunk, in MM:SS
80
+ #format. This corresponds to the associated audio file, which is
81
+ #a chunk of a larger recording and which starts at a particular
82
+ #time offset, for example from 1:00 (the offset) to 2:00 (the
83
+ #next offset).
84
+ #
85
+ #
86
+ #This should be updated to return HH:MM:SS and MM:SS.sss when
87
+ #appropriate, since in Project#interval we use that format and
88
+ #allow audio to be divided into such units. (TODO)
89
+ attr_reader :offset
90
+
91
+ #Returns the offset in seconds. So for an offset of 1:00 would return 60.
92
+ attr_reader :offset_seconds
93
+
94
+ #Returns the name of the remote audio file corresponding to this
95
+ #chunk. The remote file has the project ID and pseudo random
96
+ #characters added to it.
97
+ attr_reader :filename
98
+
99
+ #Returns the name of the local audio file corresponding to this
100
+ #chunk.
101
+ attr_reader :filename_local
102
+
103
+ #Returns the URL of the remote audio transcribed in the body of
104
+ #this chunk.
105
+ attr_reader :url
106
+
107
+ #Constructor. Takes the raw text of the transcription.
108
+ def initialize(body)
109
+ @body = body
110
+ end
111
+
112
+ #Sorts by offset seconds.
113
+ def <=>(other)
114
+ self.offset_seconds <=> other.offset_seconds
115
+ end
116
+
117
+ #Takes an URL. As an important side effect, sets various
118
+ #attributes, including url, filename, filename_local, offset and
119
+ #offset_seconds. So setting Chunk#url= http://whateverwhatever
120
+ #is an important step in populating the instance.
121
+ def url=(url)
122
+ #http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
123
+ matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
124
+ @url = matches[0]
125
+ @filename = matches[1]
126
+ @filename_local = Project.local_basename_from_url(@url)
127
+ @offset = "#{matches[3]}:#{matches[4]}"
128
+ @offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
129
+ end
130
+
131
+ #Takes an optional specification of how many spaces to indent
132
+ #the text by (default 0) and an optional specification of how
133
+ #many characters to wrap at (default no wrapping).
134
+ #
135
+ #Returns the text with newlines normalized to Unix format, runs
136
+ #of newlines shortened to a maximum of two newlines, leading and
137
+ #trailing whitespace removed from each line, and the text
138
+ #wrapped/indented as specified.
139
+ def body_as_text(indent=nil, wrap=nil)
140
+ text = self.body
141
+ text = Utility.normalize_newlines(text)
142
+ text.gsub!(/\n\n+/, "\n\n")
143
+ text = text.split("\n").map{|line| line.strip }.join("\n")
144
+ text = wrap_text(text, wrap) if wrap
145
+ text = indent_text(text, indent) if indent
146
+ text
147
+ end
148
+ alias :to_s :body_as_text
149
+ alias :to_str :body_as_text
150
+
151
+ #Takes an optional count of how many characters to wrap at
152
+ #(default 72). Returns the body, presumed to be raw text, as
153
+ #HTML. Any HTML tags in the body are escaped. Text blocks
154
+ #separated by double newlines are converted to HTML paragraphs,
155
+ #while single newlines are converted to HTML BR tags. Newlines
156
+ #are normalized as in body_as_text, and lines in the HTML source
157
+ #are automatically wrapped as specified.
158
+ def body_as_html(wrap=72)
159
+ text = body_as_text
160
+ text = CGI::escapeHTML(text)
161
+ text = Utility.newlines_to_html(text)
162
+ text = text.split("\n").map do |line|
163
+ wrap_text(line, 72).chomp
164
+ end.join("\n")
165
+ text
166
+ end
167
+
168
+ protected
169
+
170
+ def indent_text(text, indent)
171
+ text.gsub!(/^/, " " * indent)
172
+ text
173
+ end
174
+
175
+ def wrap_text(text, wrap=72)
176
+ format_text(text, wrap)
177
+ end
178
+
179
+ end #Chunk
180
+ end #Transcript
181
+ end #Typingpool