typingpool 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
@@ -0,0 +1,418 @@
1
+ module Typingpool
2
+ require 'test/unit'
3
+
4
+ class Test < ::Test::Unit::TestCase
5
+ require 'nokogiri'
6
+ require 'fileutils'
7
+
8
+ def MiniTest.filter_backtrace(bt)
9
+ bt
10
+ end
11
+
12
+ def self.app_dir
13
+ File.dirname(File.dirname(File.dirname(__FILE__)))
14
+ end
15
+
16
+ def fixtures_dir
17
+ File.join(Utility.lib_dir, 'test', 'fixtures')
18
+ end
19
+
20
+ def audio_dir
21
+ File.join(fixtures_dir, 'audio')
22
+ end
23
+
24
+ def config
25
+ if File.exists?(File.expand_path(Config.default_file))
26
+ Config.file
27
+ else
28
+ Config.from_bundled_template
29
+ end
30
+ end
31
+
32
+ def amazon_credentials?(config=self.config)
33
+ config.amazon && config.amazon.key && config.amazon.secret
34
+ end
35
+
36
+ def skip_with_message(reason, skipping_what='')
37
+ skipping_what = " #{skipping_what}" if not(skipping_what.empty?)
38
+ skip ("Skipping#{skipping_what}: #{reason}")
39
+ true
40
+ end
41
+
42
+ def skip_if_no_amazon_credentials(skipping_what='', config=self.config)
43
+ if not (amazon_credentials?(config))
44
+ skip_with_message('Missing or incomplete Amazon credentials', skipping_what)
45
+ end
46
+ end
47
+
48
+ def s3_credentials?(config)
49
+ amazon_credentials?(config) && config.amazon.bucket
50
+ end
51
+
52
+ def skip_if_no_s3_credentials(skipping_what='', config=self.config)
53
+ if not (skip_if_no_amazon_credentials(skipping_what, config))
54
+ if not(s3_credentials?(config))
55
+ skip_with_message('No Amazon S3 credentials', skipping_what)
56
+ end #if not(s3_credentials?...)
57
+ end #if not(skip_if_no_amazon_credentials...)
58
+ end
59
+
60
+ def sftp_credentials?(config)
61
+ config.sftp && config.sftp.user && config.sftp.host && config.sftp.url
62
+ end
63
+
64
+ def skip_if_no_sftp_credentials(skipping_what='', config=self.config)
65
+ if not(sftp_credentials?(config))
66
+ skip_with_message('No SFTP credentials', skipping_what)
67
+ end #if not(sftp_credentials?...
68
+ end
69
+
70
+ def skip_if_no_upload_credentials(skipping_what='', config=self.config)
71
+ if not(s3_credentials?(config) || sftp_credentials?(config))
72
+ skip_with_message("No S3 or SFTP credentials in config", skipping_what)
73
+ end #if not(s3_credentials?...
74
+ end
75
+
76
+ def add_goodbye_message(msg)
77
+ at_exit do
78
+ STDERR.puts msg
79
+ end
80
+ end
81
+
82
+ def dummy_config(number=1)
83
+ Typingpool::Config.file(File.join(fixtures_dir, "config-#{number}"))
84
+ end
85
+
86
+
87
+ def project_default
88
+ Hash[
89
+ :config_filename => '.config',
90
+ :subtitle => "Typingpool's test interview transcription",
91
+ :title => "Typingpool's Test & Interview",
92
+ :chunks => '0:20',
93
+ :unusual => ['Hack Day', 'Sunnyvale', 'Chad D'],
94
+ :voice => ['Ryan', 'Havi, hacker'],
95
+ ]
96
+ end
97
+
98
+
99
+ def in_temp_dir
100
+ Typingpool::Utility.in_temp_dir{|dir| yield(dir) }
101
+ end
102
+
103
+ def working_url?(*args)
104
+ Typingpool::Utility.working_url?(*args)
105
+ end
106
+
107
+ def fetch_url(*args)
108
+ Typingpool::Utility.fetch_url(*args)
109
+ end
110
+
111
+ class Script < Test
112
+ require 'typingpool'
113
+ require 'yaml'
114
+ require 'open3'
115
+
116
+
117
+ def audio_files(subdir='mp3')
118
+ dir = File.join(audio_dir, subdir)
119
+ Dir.entries(dir).reject{|entry| entry.match(/^\./) }.map{|entry| File.join(dir, entry)}.select{|path| File.file?(path) }
120
+ end
121
+
122
+ def config_path(dir)
123
+ ::File.join(dir, project_default[:config_filename])
124
+ end
125
+
126
+ def config_from_dir(dir)
127
+ Config.file(config_path(dir))
128
+ end
129
+
130
+
131
+ def setup_amazon(dir)
132
+ Amazon.setup(:sandbox => true, :config => config_from_dir(dir))
133
+ end
134
+
135
+
136
+ def in_temp_tp_dir
137
+ ::Dir.mktmpdir('typingpool_') do |dir|
138
+ setup_temp_tp_dir(dir)
139
+ yield(dir)
140
+ end
141
+ end
142
+
143
+ def setup_temp_tp_dir(dir)
144
+ make_temp_tp_dir_config(dir)
145
+ Dir.mkdir(File.join(dir, 'projects'))
146
+ end
147
+
148
+ def setup_s3_config(dir, config=config_from_dir(dir), filename='.config_s3')
149
+ return unless s3_credentials?(config)
150
+ config.to_hash.delete('sftp')
151
+ write_config(config, dir, filename)
152
+ end
153
+
154
+ def setup_s3_config_with_bad_password(dir, config=config_from_dir(dir))
155
+ bad_password = 'f'
156
+ refute_equal(config.to_hash['amazon']['secret'], bad_password)
157
+ config.to_hash['amazon']['secret'] = bad_password
158
+ setup_s3_config(dir, config, '.config_s3_bad')
159
+ end
160
+
161
+ def make_temp_tp_dir_config(dir, config=self.config)
162
+ config.transcripts = File.join(dir, 'projects')
163
+ config.cache = File.join(dir, '.cache')
164
+ config['assign']['reward'] = '0.02'
165
+ config.assign.to_hash.delete('qualify')
166
+ write_config(config, dir, project_default[:config_filename])
167
+ end
168
+
169
+ def write_config(config, dir, filename=project_default[:config_filename])
170
+ path = ::File.join(dir, filename)
171
+ ::File.open(path, 'w') do |out|
172
+ out << YAML.dump(config.to_hash)
173
+ end
174
+ path
175
+ end
176
+
177
+ def temp_tp_dir_project_dir(temp_tp_dir)
178
+ ::File.join(temp_tp_dir, 'projects', project_default[:title])
179
+ end
180
+
181
+ def temp_tp_dir_project(dir, config=config_from_dir(dir))
182
+ Project.new(project_default[:title], config)
183
+ end
184
+
185
+ def call_script(*args)
186
+ Utility.system_quietly(*args)
187
+ end
188
+
189
+ def path_to_tp_make
190
+ ::File.join(self.class.app_dir, 'bin', 'tp-make')
191
+ end
192
+
193
+ def call_tp_make(*args)
194
+ call_script(path_to_tp_make, *args)
195
+ end
196
+
197
+ def tp_make(in_dir, config=config_path(in_dir), audio_subdir='mp3')
198
+ call_tp_make(
199
+ '--config', config,
200
+ '--chunks', project_default[:chunks],
201
+ *[:title, :subtitle].map{|param| ["--#{param}", project_default[param]] }.flatten,
202
+ *[:voice, :unusual].map{|param| project_default[param].map{|value| ["--#{param}", value] } }.flatten,
203
+ *audio_files(audio_subdir).map{|path| ['--file', path]}.flatten
204
+ )
205
+ end
206
+
207
+ def path_to_tp_finish
208
+ ::File.join(self.class.app_dir, 'bin', 'tp-finish')
209
+ end
210
+
211
+ def call_tp_finish(*args)
212
+ call_script(path_to_tp_finish, *args)
213
+ end
214
+
215
+ def tp_finish(dir, config_path=self.config_path(dir))
216
+ tp_finish_inside_sandbox(dir, config_path)
217
+ tp_finish_outside_sandbox(dir, config_path)
218
+ end
219
+
220
+
221
+ def tp_finish_inside_sandbox(dir, config_path=self.config_path(dir))
222
+ tp_finish_outside_sandbox(dir, config_path, '--sandbox')
223
+ end
224
+
225
+ def tp_finish_outside_sandbox(dir, config_path=self.config_path(dir), *args)
226
+ call_tp_finish(
227
+ project_default[:title],
228
+ '--config', config_path,
229
+ *args
230
+ )
231
+ end
232
+
233
+ def path_to_tp_assign
234
+ File.join(self.class.app_dir, 'bin', 'tp-assign')
235
+ end
236
+
237
+ def call_tp_assign(*args)
238
+ call_script(path_to_tp_assign, '--sandbox', *args)
239
+ end
240
+
241
+ def assign_default
242
+ Hash[
243
+ :template => 'interview/phone',
244
+ :deadline => '5h',
245
+ :lifetime => '10h',
246
+ :approval => '10h',
247
+ :qualify => ['approval_rate >= 90', 'hits_approved > 10'],
248
+ :keyword => ['test', 'mp3', 'typingpooltest']
249
+ ]
250
+ end
251
+
252
+ def tp_assign(dir, config_path=config_path(dir))
253
+ call_tp_assign(
254
+ project_default[:title],
255
+ assign_default[:template],
256
+ '--config', config_path,
257
+ *[:deadline, :lifetime, :approval].map{|param| ["--#{param}", assign_default[param]] }.flatten,
258
+ *[:qualify, :keyword].map{|param| assign_default[param].map{|value| ["--#{param}", value] } }.flatten
259
+ )
260
+ end
261
+
262
+ def path_to_tp_collect
263
+ File.join(self.class.app_dir, 'bin', 'tp-collect')
264
+ end
265
+
266
+ def call_tp_collect(fixture_path, *args)
267
+ call_script(path_to_tp_collect, '--sandbox', '--fixture', fixture_path, *args)
268
+ end
269
+
270
+ def tp_collect_with_fixture(dir, fixture_path)
271
+ call_tp_collect(
272
+ fixture_path,
273
+ '--config', config_path(dir)
274
+ )
275
+ end
276
+
277
+
278
+ def path_to_tp_review
279
+ File.join(self.class.app_dir, 'bin', 'tp-review')
280
+ end
281
+
282
+ def tp_review_with_fixture(dir, fixture_path, choices)
283
+ output = {}
284
+ Open3.popen3(path_to_tp_review, '--sandbox', '--fixture', fixture_path, '--config', config_path(dir), project_default[:title]) do |stdin, stdout, stderr, wait_thr|
285
+ choices.each do |choice|
286
+ stdin.puts(choice)
287
+ if choice.strip.match(/^r/i)
288
+ stdin.puts("No reason - this is a test")
289
+ end
290
+ end
291
+ output[:out] = stdout.gets(nil)
292
+ output[:err] = stderr.gets(nil)
293
+ [stdin, stdout, stderr].each{|stream| stream.close }
294
+ output[:status] = wait_thr.value
295
+ end
296
+ output
297
+ end
298
+
299
+ def path_to_tp_config
300
+ File.join(self.class.app_dir, 'bin', 'tp-config')
301
+ end
302
+
303
+ def tp_config(*args)
304
+ call_script(path_to_tp_config, *args)
305
+ end
306
+
307
+ def tp_config_with_input(args, input)
308
+ output = {}
309
+ Open3.popen3(path_to_tp_config, *args) do |stdin, stdout, stderr, wait_thr|
310
+ input.each do |sending|
311
+ stdin.puts(sending)
312
+ end
313
+ output[:out] = stdout.gets(nil)
314
+ output[:err] = stderr.gets(nil)
315
+ [stdin, stdout, stderr].each{|stream| stream.close }
316
+ output[:status] = wait_thr.value
317
+ end #Open3.popen3...
318
+ output
319
+ end
320
+
321
+ def fixture_project_dir(name)
322
+ File.join(fixtures_dir, name)
323
+ end
324
+
325
+ def make_fixture_project_dir(name)
326
+ dir = fixture_project_dir(name)
327
+ if File.exists? dir
328
+ raise Error::Test, "Fixture project already exists for #{name} at #{dir}"
329
+ end
330
+ ::Dir.mkdir(dir)
331
+ dir
332
+ end
333
+
334
+ def remove_fixture_project_dir(name)
335
+ FileUtils.remove_entry_secure(fixture_project_dir(name), :secure => true)
336
+ end
337
+
338
+ def with_fixtures_in_temp_tp_dir(dir, fixture_prefix)
339
+ fixtures = Dir.entries(fixtures_dir).select{|entry| entry.include?(fixture_prefix) && entry.index(fixture_prefix) == 0 }.select{|entry| File.file?(File.join(fixtures_dir, entry)) }
340
+ fixtures.map!{|fixture| fixture[fixture_prefix.size .. -1] }
341
+ fixtures.each do |fixture|
342
+ project_path = File.join(temp_tp_dir_project_dir(dir), 'data', fixture)
343
+ fixture_path = File.join(fixtures_dir, [fixture_prefix, fixture].join )
344
+ yield(fixture_path, project_path)
345
+ end
346
+ end
347
+
348
+ def copy_fixtures_to_temp_tp_dir(dir, fixture_prefix)
349
+ with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
350
+ if File.exists? project_path
351
+ FileUtils.mv(project_path, File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}"))
352
+ end
353
+ FileUtils.cp(fixture_path, project_path)
354
+ end
355
+ end
356
+
357
+ def rm_fixtures_from_temp_tp_dir(dir, fixture_prefix)
358
+ with_fixtures_in_temp_tp_dir(dir, fixture_prefix) do |fixture_path, project_path|
359
+ FileUtils.rm(project_path)
360
+ path_to_orig = File.join(File.dirname(project_path), "orig_#{File.basename(project_path)}")
361
+ if File.exists?(path_to_orig)
362
+ FileUtils.mv(path_to_orig, project_path)
363
+ end
364
+ end
365
+ end
366
+
367
+ def assert_has_transcript(dir, transcript_file='transcript.html')
368
+ transcript_path = File.join(temp_tp_dir_project_dir(dir), transcript_file)
369
+ assert(File.exists?(transcript_path))
370
+ assert(not((transcript = IO.read(transcript_path)).empty?))
371
+ transcript
372
+ end
373
+
374
+ def assert_has_partial_transcript(dir)
375
+ assert_has_transcript(dir, 'transcript_in_progress.html')
376
+ end
377
+
378
+ def assert_assignment_csv_has_transcription_count(count, project, which_csv='assignment.csv')
379
+ assert_equal(count, project.local.file('data', which_csv).as(:csv).reject{|assignment| assignment['transcript'].to_s.empty?}.size)
380
+ end
381
+
382
+ def assert_html_has_audio_count(count, html)
383
+ assert_equal(count, noko(html).css('audio').size)
384
+ end
385
+
386
+ def assert_all_assets_have_upload_status(assignment_csv, types, status)
387
+ types.each do |type|
388
+ recorded_uploads = assignment_csv.map{|assignment| assignment["#{type}_uploaded"] }
389
+ refute_empty(recorded_uploads)
390
+ assert_equal(recorded_uploads.count, recorded_uploads.select{|uploaded| uploaded == status }.count)
391
+ end
392
+ end
393
+
394
+ def assert_shell_error_match(regex)
395
+ exception = assert_raise(Typingpool::Error::Shell) do
396
+ yield
397
+ end
398
+ assert_match(exception.message, regex)
399
+ end
400
+
401
+ def assert_script_abort_match(args, regex)
402
+ in_temp_tp_dir do |dir|
403
+ assert_shell_error_match(regex) do
404
+ yield([*args, '--config', config_path(dir)])
405
+ end
406
+ end #in_temp_tp_dir do...
407
+ end
408
+
409
+ def noko(html)
410
+ Nokogiri::HTML(html)
411
+ end
412
+
413
+ def vcr_dir
414
+ File.join(fixtures_dir, 'vcr')
415
+ end
416
+ end #Script
417
+ end #Test
418
+ end #Typingpool
@@ -0,0 +1,181 @@
1
+ module Typingpool
2
+ #This is the model class for Typingpool's final and most important
3
+ #output, a transcript of the Project audio in HTML format, with
4
+ #embedded audio. A Transcript instance is actually an enumerable
5
+ #container for Transcript::Chunk instances. Each Transcript::Chunk
6
+ #corresponds to an Amazon::HIT and to an audio "chunk" (file) that
7
+ #has been transcribed and which is part of a larger recording.
8
+ #
9
+ #This class is likey to be done away with in the next few point
10
+ #versions of Typingpool. Functionality and data unique to
11
+ #Transcipt::Chunk can probably be rolled into
12
+ #Amazon::HIT. Transcript itself can probably be folded into Project,
13
+ #which would become a HIT container, and then we'd pass Project
14
+ #instances to the output template.
15
+ class Transcript
16
+ include Enumerable
17
+
18
+ #Get/set the title of the transcript, typically corresponds to the name of the
19
+ #associated Project
20
+ attr_accessor :title
21
+
22
+ #Get/set the subtitle of the transcript, corresponds to Project#local#subtitle
23
+ #(a.k.a data/subtitle.txt in the project dir)
24
+ attr_accessor :subtitle
25
+
26
+ #Constructor. Takes an optional title (see above for explanation
27
+ #of title) and an optional array of Transcript::Chunk instances.
28
+ def initialize(title=nil, chunks=[])
29
+ @title = title
30
+ @chunks = chunks
31
+ end
32
+
33
+ #Iterate of the Transcript::Chunk instances
34
+ def each
35
+ @chunks.each do |chunk|
36
+ yield chunk
37
+ end
38
+ end
39
+
40
+ #Takes an index, returns the Transcript::Chunk at that index.
41
+ def [](index)
42
+ @chunks[index]
43
+ end
44
+
45
+ #Returns chunks joined by double newlines
46
+ def to_s
47
+ @chunks.join("\n\n")
48
+ end
49
+
50
+ #Takes a Transcript::Chunk instance and adds it to the Transcript instance.
51
+ def add_chunk(chunk)
52
+ @chunks.push(chunk)
53
+ end
54
+
55
+ #Transcript::Chunk is the model class for one transcription by one
56
+ #Mechanical Turk worker of one "chunk" (a file) of audio, which in
57
+ #turn is a portion of a larger recording (for example, one minute
58
+ #of a 60 minute interview). It is basically parallel and similar
59
+ #to an Amazon::HIT instance. Transcript is a container for these
60
+ #chunks, which know how to render themselves as text and HTML.
61
+ class Chunk
62
+ require 'cgi'
63
+ require 'rubygems/text'
64
+ include Gem::Text
65
+
66
+ #Get/set the raw text of the transcript
67
+ attr_accessor :body
68
+
69
+ #Get/set the Amazon ID of the Mechanical Turk worker who
70
+ #transcribed the audio into text
71
+ attr_accessor :worker
72
+
73
+ #Get/set the id of the Amazon::HIT associated with this chunk
74
+ attr_accessor :hit
75
+
76
+ #Get/set the id of the Project#local associated with this chunk
77
+ attr_accessor :project
78
+
79
+ #Return the offset associated with the chunk, in MM:SS
80
+ #format. This corresponds to the associated audio file, which is
81
+ #a chunk of a larger recording and which starts at a particular
82
+ #time offset, for example from 1:00 (the offset) to 2:00 (the
83
+ #next offset).
84
+ #
85
+ #
86
+ #This should be updated to return HH:MM:SS and MM:SS.sss when
87
+ #appropriate, since in Project#interval we use that format and
88
+ #allow audio to be divided into such units. (TODO)
89
+ attr_reader :offset
90
+
91
+ #Returns the offset in seconds. So for an offset of 1:00 would return 60.
92
+ attr_reader :offset_seconds
93
+
94
+ #Returns the name of the remote audio file corresponding to this
95
+ #chunk. The remote file has the project ID and pseudo random
96
+ #characters added to it.
97
+ attr_reader :filename
98
+
99
+ #Returns the name of the local audio file corresponding to this
100
+ #chunk.
101
+ attr_reader :filename_local
102
+
103
+ #Returns the URL of the remote audio transcribed in the body of
104
+ #this chunk.
105
+ attr_reader :url
106
+
107
+ #Constructor. Takes the raw text of the transcription.
108
+ def initialize(body)
109
+ @body = body
110
+ end
111
+
112
+ #Sorts by offset seconds.
113
+ def <=>(other)
114
+ self.offset_seconds <=> other.offset_seconds
115
+ end
116
+
117
+ #Takes an URL. As an important side effect, sets various
118
+ #attributes, including url, filename, filename_local, offset and
119
+ #offset_seconds. So setting Chunk#url= http://whateverwhatever
120
+ #is an important step in populating the instance.
121
+ def url=(url)
122
+ #http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
123
+ matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
124
+ @url = matches[0]
125
+ @filename = matches[1]
126
+ @filename_local = Project.local_basename_from_url(@url)
127
+ @offset = "#{matches[3]}:#{matches[4]}"
128
+ @offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
129
+ end
130
+
131
+ #Takes an optional specification of how many spaces to indent
132
+ #the text by (default 0) and an optional specification of how
133
+ #many characters to wrap at (default no wrapping).
134
+ #
135
+ #Returns the text with newlines normalized to Unix format, runs
136
+ #of newlines shortened to a maximum of two newlines, leading and
137
+ #trailing whitespace removed from each line, and the text
138
+ #wrapped/indented as specified.
139
+ def body_as_text(indent=nil, wrap=nil)
140
+ text = self.body
141
+ text = Utility.normalize_newlines(text)
142
+ text.gsub!(/\n\n+/, "\n\n")
143
+ text = text.split("\n").map{|line| line.strip }.join("\n")
144
+ text = wrap_text(text, wrap) if wrap
145
+ text = indent_text(text, indent) if indent
146
+ text
147
+ end
148
+ alias :to_s :body_as_text
149
+ alias :to_str :body_as_text
150
+
151
+ #Takes an optional count of how many characters to wrap at
152
+ #(default 72). Returns the body, presumed to be raw text, as
153
+ #HTML. Any HTML tags in the body are escaped. Text blocks
154
+ #separated by double newlines are converted to HTML paragraphs,
155
+ #while single newlines are converted to HTML BR tags. Newlines
156
+ #are normalized as in body_as_text, and lines in the HTML source
157
+ #are automatically wrapped as specified.
158
+ def body_as_html(wrap=72)
159
+ text = body_as_text
160
+ text = CGI::escapeHTML(text)
161
+ text = Utility.newlines_to_html(text)
162
+ text = text.split("\n").map do |line|
163
+ wrap_text(line, 72).chomp
164
+ end.join("\n")
165
+ text
166
+ end
167
+
168
+ protected
169
+
170
+ def indent_text(text, indent)
171
+ text.gsub!(/^/, " " * indent)
172
+ text
173
+ end
174
+
175
+ def wrap_text(text, wrap=72)
176
+ format_text(text, wrap)
177
+ end
178
+
179
+ end #Chunk
180
+ end #Transcript
181
+ end #Typingpool