typingpool 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
data/lib/typingpool.rb ADDED
@@ -0,0 +1,500 @@
1
+ # -*- coding: utf-8 -*-
2
+ #= Typingpool
3
+ #
4
+ #Typingpool is an app for easily making transcripts of audio using
5
+ #Amazon's labor marketplace, Mechanical Turk.
6
+ #
7
+ #Typingpool is distributed as a Ruby gem. It is a made up of a handful
8
+ #of scripts for users and a collection of library files for
9
+ #developers.
10
+ #
11
+ #Typingpool also includes a collection of ERB templates for
12
+ #generating Mechanical Turk assignments and the final transcript HTML
13
+ #file.
14
+ #
15
+ #== Dependencies
16
+ #
17
+ #Typingpool depends on these command-line tools, which are not
18
+ #included in the gem since they are external to Ruby:
19
+ #
20
+ # [ffmpeg] A powerhouse audio/video converter.
21
+ # [libmp3lame] An mp3 encoder/decoder, used by ffmpeg.
22
+ # [mp3splt] An audio file-splitting utility.
23
+ # [mp3wrap] An audio file-merging utility.
24
+ #
25
+ #== User overview
26
+ #
27
+ #=== Setup
28
+ #
29
+ #After installing the gem and its dependencies, run tp-config from the
30
+ #command line to create your config file (~/.typingpool). At the
31
+ #prompts, you will need to supply your Amazon Web Services Access Key
32
+ #ID and your Amazon Web Services Secret Access key.
33
+ #
34
+ #The config file is in YAML format and may be customized using any
35
+ #text editor. For more details on configuration options, see the
36
+ #documentation for Typingpool::Config.
37
+ #
38
+ #=== Workflow
39
+ #
40
+ #A typical workflow will use the bundled scripts in this order:
41
+ #
42
+ # tp-make -> tp-assign -> [wait] -> tp-review -> tp-finish
43
+ #
44
+ #tp-review may be called repeatedly, until transcripts for all audio
45
+ #chunks have been processed. Similarly, tp-assign may be called
46
+ #repeatedly, for example to re-assign chunks rejected using tp-review,
47
+ #or to re-assign chunks that have expired.
48
+ #
49
+ #An alternate workflow would go like this:
50
+ #
51
+ # tp-make -> [manually upload assignments.csv to Amazon RUI] ->
52
+ # [wait] -> [approve/reject assignments via RUI] -> tp-collect ->
53
+ # tp-finish
54
+ #
55
+ #=== Examples
56
+ #
57
+ #Typical usage scenario:
58
+ #
59
+ # tp-make 'Chad Interview' chad1.WMA chad2.WMA --unusual 'Hack Day,
60
+ # Yahoo' --subtitle 'Phone interview re Yahoo Hack Day'
61
+ #
62
+ # # => Converting chad1.WMA to mp3
63
+ # # => Converting chad2.WMA to mp3
64
+ # # => Merging audio
65
+ # # => Splitting audio into uniform bits
66
+ # # => Uploading Chad Interview.00.00.mp3 to
67
+ # ryantate42.s3.amazonaws.com as Chad
68
+ # Interview.00.00.33ca7f2cceba9f8031bf4fb7c3f819f4.LHFJEM.mp3
69
+ # # => Uploading Chad Interview.01.00.mp3 to
70
+ # ryantate42.s3.amazonaws.com as Chad #
71
+ # Interview.01.00.33ca7f2cceba9f8031bf4fb7c3f819f4.XMWNYW.mp3
72
+ # # => Uploading Chad Interview.02.00.mp3 to
73
+ # ryantate42.s3.amazonaws.com as Chad #
74
+ # Interview.02.00.33ca7f2cceba9f8031bf4fb7c3f819f4.FNEIWN.mp3
75
+ # # => ... [snip]
76
+ # # => Done. Project at:
77
+ # # => /Users/ryantate/Desktop/Transcripts/Chad Interview
78
+ #
79
+ #
80
+ # tp-assign 'Chad Interview' interview/nameless --reward 1.00
81
+ # --deadline 90m --approval 6h --lifetime 2d
82
+ #
83
+ # # => Figuring out what needs to be assigned
84
+ # # => 85 assignments total
85
+ # # => 85 assignments to assign
86
+ # # => Deleting old assignment HTML from ryantate42.s3.amazonaws.com
87
+ # # => Uploading assignment HTML to ryantate42.s3.amazonaws.com
88
+ # # => Assigning
89
+ # # => Assigned 85 transcription jobs for $85
90
+ # # => Remaining balance: $115.00
91
+ #
92
+ # [Wait...]
93
+ #
94
+ #
95
+ # tp-review 'Chad Interview'
96
+ #
97
+ # # => Gathering submissions from Amazon
98
+ # # => Matching submissions with local projects
99
+ # # =>
100
+ # # => Transcript for: https://ryantate42.s3.amazonaws.com/
101
+ # Chad%20Interview.29.00.263d492275a81afb005c8231d8d8afdb.
102
+ # UEMOCN.mp3
103
+ # # => Project: Chad Interview: Phone interview re Yahoo Hack Day
104
+ # # => Submitted at: 2012-08-11 17:00:36 -0700 by A9S0AOAI8HO9P
105
+ # # =>
106
+ # # => Chad: ... so it had sort of some geek history. And the
107
+ # # => weather was really bad. But it was an indoor event,
108
+ # # => right? So people were staying indoors. And like very
109
+ # # => early... And there was all this really expensive gear
110
+ # # => that the BBC had. Like these cameras that guys were like
111
+ # # => riding around on and stuff, huge sound stage, bigger than
112
+ # # => the one we had in Sunnyvale.
113
+ # # =>
114
+ # # => Two hours into the event, we heard this big lightning
115
+ # # => strike, because we were up on a hill in London. And all
116
+ # # => the lights went out and the roof opened up in the
117
+ # # => building. What we didn't know is the fire supression
118
+ # # => system in that building which got blown up by the
119
+ # # => lightning during a fire would cause the roof to open
120
+ # # => up. So we had all these geeks with equipment and all this
121
+ # # => BBC equipment and it was literally raining on them.
122
+ # # =>
123
+ # # => (A)pprove, (R)eject, (Q)uit, [(S)kip]? (1/20)
124
+ #
125
+ # a
126
+ #
127
+ # # => Approved. Chad Interview transcript updated.
128
+ # # =>
129
+ # # => Transcript for: https://ryantate42.s3.amazonaws.com/
130
+ # Chad%20Interview.30.00.263d492275a81afb005c8231d8d8afdb.
131
+ # RXNKRN.mp3
132
+ # # => Project: Chad Interview: Phone interview re Yahoo Hack Day
133
+ # # => Submitted at: 2012-08-11 17:00:58 -0700 by A9S0AOAI8HO9P
134
+ # # =>
135
+ # # => Blah blah blah blah okay I am done typing byeeeeeeee
136
+ # # =>
137
+ # # => (A)pprove, (R)eject, (Q)uit, [(S)kip]? (2/20)
138
+ #
139
+ # r
140
+ #
141
+ # # => Rejection reason, for worker:
142
+ #
143
+ # There's no transcription at all, just nonsense
144
+ #
145
+ # # => Rejected
146
+ # # =>
147
+ # # => Transcript for...
148
+ # # => ... [snip]
149
+ #
150
+ #
151
+ # tp-finish 'Chad Interview'
152
+ #
153
+ # # => Removing from Amazon
154
+ # # => Collecting all results
155
+ # # => Removing HIT 2GKMIKMN9U8PNHKK58NXL3SU4TCBSN (Reviewable)
156
+ # # => Removing from data/assignment.csv
157
+ # # => Removing from local cache
158
+ # # => Removing HIT 2CFX2Q45UUKQ2HXZU8SNV8OG6CQBTC (Assignable)
159
+ # # => Removing from data/assignment.csv
160
+ # # => Removing from local cache
161
+ # # => Removing HIT 294EZZ2MIKMNNDP1LAU8WWWXOEI7O0...
162
+ # # => ... [snip]
163
+ # # => Removing Chad Interview.00.00.
164
+ # 263d492275a81afb005c8231d8d8afdb.ORSENE.html from
165
+ # ryantate42.s3.amazonaws.com
166
+ # # => Removing Chad Interview.01.00...
167
+ # # => ... [snip]
168
+ # # => Removing Chad Interview.00.00.
169
+ # 263d492275a81afb005c8231d8d8afdb.RNTVLN.mp3 from
170
+ # ryantate42.s3.amazonaws.com
171
+ # # => Removing Chad Interview.01.00....
172
+ # # => ... [snip]
173
+ #
174
+ #=== Output
175
+ #
176
+ #The final output of Typingpool is a project directory containing a
177
+ #transcript file.
178
+ #
179
+ #The transcript file is HTML with audio chunks embedded alongside each
180
+ #associated transcript chunk.
181
+ #
182
+ #The transcript file is called transcript.html when complete. A
183
+ #partial transcript file is called transcript_in_progress.html.
184
+ #
185
+ #The project directory also includes supporting files, including a CSV
186
+ #data file used to store raw transcript chunks, Amazon Mechanical Turk
187
+ #HIT information, and other metdata; Javscript code that swaps in
188
+ #Flash players on browsers that don't support mp3 files in audio tags;
189
+ #the original audio files and the audio chunks generated from them;
190
+ #and a CSS file.
191
+ #
192
+ #The directory is laid out like so:
193
+ #
194
+ # Chad Interview/
195
+ # -> transcript.html | transcript_in_progress.html
196
+ # -> audio/
197
+ # -> chunks/
198
+ # -> Chad Interview.00.00.mp3
199
+ # -> Chad Interview.01.00.mp3
200
+ # -> ... [snip]
201
+ # -> originals/
202
+ # -> chad1.WMA
203
+ # -> chad2.WMA
204
+ # -> data/
205
+ # -> assignment.csv
206
+ # -> id.txt
207
+ # -> subtitle.txt
208
+ # -> etc/
209
+ # -> audio-compat.js
210
+ # -> transcript.css
211
+ # -> About these files - readme.txt
212
+ # -> player/
213
+ # -> audio-player.js
214
+ # -> license.txt
215
+ # -> player.swf
216
+ #
217
+ #You may safely edit the files transcript.html, etc/transcript.css,
218
+ #and data/subtitle.txt, and you may safely delete the files in
219
+ #audio/originals and any .txt files in etc/. Editing or deleting other
220
+ #files may interfere with the operation of Typingpool or render the
221
+ #transcript inoperative. Do not edit transcript_in_progress.html as
222
+ #your changes will be overwritten if/when the transcript is next
223
+ #updated.
224
+ #
225
+ #
226
+ #=== Workflow (additional)
227
+ # * When you want to preview your assignments, run tp-assign with the
228
+ # option --sandbox and with --qualify 'rejection_rate < 100' (to
229
+ # make sure you qualify to view your own HITs). Then visit
230
+ # http://workersandbox.mturk.com and find your assignments (a seach
231
+ # for "mp3" works if you left mp3 set as a keyword in your config
232
+ # file). When you are done previewing, run tp-finish with the
233
+ # name/path of your project and the --sandbox option.
234
+ #
235
+ # * When you assign your transcription jobs via tp-assign, you must
236
+ # supply a template name or relative path as the second
237
+ # argument. In the example above, the named template is
238
+ # “interview/nameless.”
239
+ #
240
+ # The template “interview/nameless” is a great general purpose
241
+ # template. It instructs the transcriber not to worry about the
242
+ # names of the speakers, and instead to use labels like “male 1,”
243
+ # “male 2,” etc. This allows the transcriber to work quickly and
244
+ # usually results in a viable transcript, since you can consult
245
+ # your memory or the original audio to figure out who is who.
246
+ #
247
+ # To find what other templates are available, navigate to the
248
+ # directory where typingpool is installed (`gem which typingpool`)
249
+ # and then go into typingpool/templates/assignment and its
250
+ # subdirectories. Anything that ends in ‘.html.erb’ is an available
251
+ # template. You may also create your own templates in the directory
252
+ # listed in the “templates” param of your config file.
253
+ #
254
+ # The templates interview, interview/phone, and interview/noisy
255
+ # require you to have passed the names of two voices to tp-make
256
+ # when you created your project. The first voice should be the name
257
+ # (and optional title) of the interviewer, and the second the name
258
+ # (and title) of the interviewee, like so:
259
+ #
260
+ # tp-make 'Chad Interview' chad1.WMA chad2.WMA –voice ‘Ryan,
261
+ # hack reporter’ --voice ‘Chad, a software engineer’ --unusual
262
+ # 'Hack Day, Yahoo' --subtitle 'Phone interview re Yahoo Hack
263
+ # Day'
264
+ #
265
+ #
266
+ # * When you've rejected some submissions in tp-review and need to
267
+ # re-assign these chunks to be transcribed, simply re-run tp-assign
268
+ # with the name (or path) of your project. You may select the same
269
+ # template, reward, deadlines, etc., or pick new ones. tp-assign
270
+ # will be careful not to re-assign chunks for which you have
271
+ # approved a transcript, or which are pending on Mechanical Turk.
272
+ #
273
+ # * When some chunks previously assigned via tp-assign have expired
274
+ # without attracting submissions, simply re-run tp-assign as
275
+ # described above to re-assign these chunks. Consider increasing
276
+ # the dollar amount specified in your --reward argument.
277
+ #
278
+ # * When some chunks previously assigned via tp-assign have been
279
+ # submitted by workers but not approved or rejected in time for the
280
+ # approval deadline (assign/approval in your config file or
281
+ # --approval as passed to tp-assign), Mechanical Turk has
282
+ # automatically approved these submissions for you and you'll need
283
+ # to run tp-collect to collect them. (Yes, it’s silly you need run
284
+ # a whole different script instead of just calling tp-review as
285
+ # usual. I’ll fix this in a future version.)
286
+ #
287
+ # * When you want to cancel outstanding assignments, simply run
288
+ # tp-finish with the name of your project. If your assignments have
289
+ # already attracted submissions, you may be prompted to run
290
+ # tp-review first.
291
+ #
292
+ # * When tp-make, tp-assign, or tp-finish tells you it failed an
293
+ # upload, deletion, or Amazon command, simply re-run the script
294
+ # with the same arguments to re-attempt the upload, deletion or
295
+ # Amazon command. Typingpool carefully records which network
296
+ # operations it is attempting and which network operations have
297
+ # completed. It can robustly handle network errors, including
298
+ # uncaught exceptions.
299
+ ##
300
+ #
301
+ #=== Maintenance
302
+ #
303
+ # [cache] If the cache file grows too large, you'll need to delete it
304
+ # manually. It may be safely deleted as long as no
305
+ # Typingpool scripts are running. Its location is
306
+ # specified in the 'cache' param in the config file. (The
307
+ # config file is at ~/.typingpool and the cache, by
308
+ # default, is at ~/.typingpool.cache.)
309
+ #
310
+ # Typingpool takes no steps to limit the size of the
311
+ # cache file. It prunes the cache of project-specific
312
+ # entries when you run tp-finish on a project, but the
313
+ # cache may grow large if you work on many active
314
+ # projects in parallel, or if you fail to run tp-finish
315
+ # on projects when you are done with them.
316
+ #
317
+ # [tp-finish] You should run tp-finish PROJECT each time you finish a
318
+ # project, where PROJECT may be either the project name
319
+ # or path. Assuming you have no submissions pending or
320
+ # awaiting approval, this clears all traces of the
321
+ # project from Amazon Mechanical Turk, from Amazon S3 or
322
+ # your SFTP server, and from the local cache. This will
323
+ # keep your local cache from balooning in size and will
324
+ # minimize your S3 charges or SFTP disk usage. It will
325
+ # also help Typingpool scripts run faster by reducing the
326
+ # number of HITs you have on Amazon Mechanical Turk; many
327
+ # Typingpool operations involve iterating through all of
328
+ # your HITs.
329
+ #
330
+ #
331
+ #=== See also
332
+ #
333
+ # * Run any script with the --help options for further details on how
334
+ # to run the script.
335
+ #
336
+ # * See the docs for Typingpool::Config for details of the config
337
+ # file format.
338
+ #
339
+ # * See Amazon's Mechanical Turk documentation for guides and
340
+ # overviews on how Mechanical Turk
341
+ # works. https://requester.mturk.com/help
342
+ #
343
+ # * See the documentation on ffmpeg and related libraries for clues
344
+ # as to how to make Typingpool support additional file
345
+ # formats. Typingpool can work with any file format that ffmpeg can
346
+ # convert to mp3 (libmp3lame).
347
+ #
348
+ # * For an overview of the concepts on which Typingpool is built, see
349
+ # Andy Baio’s guide to using Mechanical Turk for transcription:
350
+ # http://waxy.org/2008/09/audio_transcription_with_mechanical_turk/
351
+ #
352
+ #== Developer overview
353
+ #
354
+ #Views, used for the final transcript and for rendering HTML
355
+ #assignments for Amazon Mechanical Turk workers, are contained in a
356
+ #series of templates in lib/typingpool/templates, particularly
357
+ #transcript.html.erb and assignment/*. The control layer lives in the
358
+ #App class (lib/typingpool/app.rb) and within the individual
359
+ #scripts. The models constitute the other Typingpool classes,
360
+ #including most importantly and in rough order of importance the
361
+ #Project, Transcript, Amazon, Config and Filer classes (the latter of
362
+ #interest mainly because of Filer::Audio, which handles splitting,
363
+ #merging, and conversion).
364
+ #
365
+ #The models in particular, along with the App class, are
366
+ #underdeveloped and not particularly clear or fully thought
367
+ #through. The Transcript model, for example, should almost certainly
368
+ #be folded into the Project model. Dividing Project into
369
+ #Project::Local and Project::Remote only makes sense on a superficial
370
+ #level; Project::Remote could probably be its own class or even part
371
+ #of Utility. Amazon will probably be simpler if I can get some patches
372
+ #into RTurk, and Amazon::HITshould probably be integrated more closely
373
+ #with Project.
374
+ #
375
+ #One of the most frustrating things about the code is that there are
376
+ #so many subtly different ways a "chunk" of a transcript/project is
377
+ #represented: As a simple hash derived from a row in
378
+ #data/assignment.csv within a project folder, as an Amazon::HIT, as a
379
+ #Transcription::Chunk, as an audio file on a remote server, and as a
380
+ #local audio file (which has a different name from the remote
381
+ #file). So in future versions I'll probably reduce the number of
382
+ #different ways to represent a chunk.
383
+ #
384
+ #Also in the future, it's very likely that App will evolve from a
385
+ #simple collection of class methods into a real class with a simple
386
+ #set of instance methods called in a particular order by a "run"
387
+ #method or similar. Subclasses for particular scripts/commands will
388
+ #then override these methods.
389
+ #
390
+ #
391
+ #===Examples
392
+ #
393
+ #The most comprehensive examples of how the Typingpool classes
394
+ #actually work and interact are the tp-* scripts themselves, in
395
+ #particular tp-make, tp-assign, tp-review, and tp-finish.
396
+ #
397
+ #More concise examples follow below, to give you a sense of what the
398
+ #various classes actually do:
399
+ #
400
+ # require 'typingpool'
401
+ #
402
+ # #new Project instance
403
+ # project = Typingpool::Project.new('Chad Interview')
404
+ #
405
+ # #check if project exists on disk
406
+ # unless project.local
407
+ # #make a skeleton project folder in Config#transcripts dir
408
+ # project.create_local
409
+ # #make subtitle record in project folder
410
+ # project.local.subtitle = 'Interview about Hack Day Jan 21'
411
+ # end
412
+ #
413
+ # id = project.local.id
414
+ #
415
+ # #Wrap file in Typingpool::Filer
416
+ # wma = Typingpool::Filer::Audio.new('/foo/bar.wma')
417
+ #
418
+ # #convert file to mp3
419
+ # mp3 = wma.to_mp3
420
+ # other_mp3 = Typingpool::Filer::Audio.new('/foo/bar2.wma').to_mp3
421
+ #
422
+ # #merge audio
423
+ # combined_mp3 = Typingpool::Filer::Files::Audio.new([mp3,
424
+ # other_mp3]).merge(Typingpool::Filer.new('/foo/combined.mp3')
425
+ #
426
+ # #split audio every 1 minute
427
+ # chunks = combined_mp3.split('1.00')
428
+ #
429
+ # #upload mp3s
430
+ # urls = project.remote.put(chunks.to_streams,
431
+ # project.create_remote_names(chunks))
432
+ #
433
+ # #remove mp3s
434
+ # project.remote.remove_urls(urls)
435
+ #
436
+ # #new Template instance
437
+ # template = Typingpool::Template::Assignment.from_config('interview/nameless')
438
+ # html = template.render({
439
+ # 'audio_url' => urls[0],
440
+ # 'unusual' => ['Hack Day', 'Yahoo', 'Atlassian'],
441
+ # 'chunk_minutes' => 1,
442
+ # 'project_id' => project.local.id
443
+ # })
444
+ #
445
+ # question = Typingpool::Amazon::Question.new(urls[0], html)
446
+ #
447
+ # Typingpool::Amazon.setup
448
+ #
449
+ # #Assign a transcription job (1 chunk)
450
+ # hit = Typingpool::Amazon::HIT.create(question, Typingpool::Config.file.assign)
451
+ #
452
+ # #Find all Typingpool HITs on Amazon Mechanical Turk
453
+ # all = Typingpool::Amazon::HIT.all
454
+ # #Find all reviewable Typingpool HITs
455
+ # reviewable = Typingpool::Amazon::HIT.all_reviewable
456
+ # #Find all approved Typingpool HITs
457
+ # approved = Typingpool::Amazon::HIT.all_approved
458
+ # #Find all HITs for our project
459
+ # project_hits = Typingpool::Amazon::HIT.all_for_project(project.local.id)
460
+ # #Filter all HITs (not just Typingpool HITs) arbitrarily
461
+ # safe_to_delete = Typingpool::Amazon::HIT.all{|hit| hit.ours? && hit.full.expired_and_overdue? }
462
+ # #Filter all approved HITs arbitrarily
463
+ # ready_for_judgment = Typingpool::Amazon::HIT.all_approved{|hit| hit.submitted? && hit.ours? }
464
+ #
465
+ # #Approve a HIT
466
+ # ready_for_judgment[0].at_amazon.approve! #at_amazon is an rturk instance
467
+ # #Reject a HIT
468
+ # ready_for_judgment[1].at_amazon.reject!('Your transcription is just random gibberish')
469
+ # #Delete a HIT from Amazon
470
+ # safe_to_delete[0].remove_from_amazon
471
+ #
472
+ # #Get text of transcript chunk (Typingpool::Transcript::Chunk)
473
+ # transcript_chunk = approved[0].transcript
474
+ # puts transcript_chunk.body
475
+ # #Get formmated text of transcript chunk
476
+ # puts transcript_chunk.body_as_text
477
+ # #Get transcript chunk as HTML
478
+ # puts transcript_chunk.body_as_html
479
+ # #Get transcript chunk metadata
480
+ # puts "--#{transcript_chunk.url} (audio at #{transcript_chunk.offset})"
481
+ #
482
+ #==Author
483
+ # Ryan Tate - ryantate@ryantate.com
484
+ #
485
+ #==License
486
+ # Copyright (c) 2011-2012 Ryan Tate. Released under the terms of the MIT
487
+ # license. See LICENSE for details.
488
+
489
+ module Typingpool
490
+ VERSION = '0.7.0'
491
+ require 'typingpool/error'
492
+ require 'typingpool/utility'
493
+ require 'typingpool/config'
494
+ require 'typingpool/filer'
495
+ require 'typingpool/amazon'
496
+ require 'typingpool/project'
497
+ require 'typingpool/transcript'
498
+ require 'typingpool/template'
499
+ require 'typingpool/app'
500
+ end #Typingpool
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool'
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class MakeAmazonQuestion < Typingpool::Test::Script
10
+ def test_make_amazon_question_fixture
11
+ in_temp_tp_dir do |dir|
12
+ tp_make(dir)
13
+ template = Typingpool::Template::Assignment.from_config(assign_default[:template], config_from_dir(dir))
14
+ assignment = temp_tp_dir_project(dir).local.file('data', 'assignment.csv').as(:csv).read.first
15
+ question_html = template.render(assignment)
16
+ question_url = 'http://example.com/assignments/101.html'
17
+ assert_match(question_html, /\S/)
18
+ assert_match(question_url, /http/i)
19
+ File.open(File.join(fixtures_dir, 'amazon-question-html.html'), 'w'){|f| f << question_html}
20
+ File.open(File.join(fixtures_dir, 'amazon-question-url.txt'), 'w'){|f| f << question_url}
21
+ end #in_temp_tp_dir
22
+ add_goodbye_message("Amazon question fixtures created.")
23
+ end
24
+ end #MakeAmazonQuestion
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool'
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class MakeAndAssignProject < Typingpool::Test::Script
10
+ def test_prep_for_fixture
11
+ dir = make_fixture_project_dir('tp_collect_project_temp')
12
+ setup_temp_tp_dir(dir)
13
+ begin
14
+ tp_make(dir)
15
+ tp_assign(dir)
16
+ rescue
17
+ FileUtils.remove_entry_secure(dir)
18
+ raise
19
+ end
20
+ #copy key files over to permanent locations within fixture dir
21
+ with_fixtures_in_temp_tp_dir(dir, 'tp_collect_') do |fixture_path, project_path|
22
+ FileUtils.cp(project_path, fixture_path)
23
+ end
24
+ add_goodbye_message("Temp project assigned in Mechanical Turk sandbox. Complete and approve TWO assignments and run make_tp_collect_fixture_2.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n...and then approve them at\nhttps://requestersandbox.mturk.com/mturk/manageHITs?hitSortType=CREATION_DESCENDING&%2Fsort.x=11&%2Fsort.y=7")
25
+ end
26
+ end #MakeAndAssignProject
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class CollectProjectFixtureGen2 < Typingpool::Test::Script
10
+ def test_populate_fixture
11
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-collect-1')
12
+ tp_collect_with_fixture(fixture_project_dir('tp_collect_project_temp'), fixture_path)
13
+ assert(File.exists?("#{fixture_path}.yml"))
14
+ add_goodbye_message("Initial tp-collect recorded. Please complete and approve TWO more assignments and run make_tp_collect_fixture_3.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n...and then approve them at\nhttps://requestersandbox.mturk.com/mturk/manageHITs?hitSortType=CREATION_DESCENDING&%2Fsort.x=11&%2Fsort.y=7")
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool/test'
6
+ require 'fileutils'
7
+
8
+ class CollectProjectFixtureGen3 < Typingpool::Test::Script
9
+ def test_populate_fixture2
10
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-collect-2')
11
+ tp_collect_with_fixture(fixture_project_dir('tp_collect_project_temp'), fixture_path)
12
+ assert(File.exists?("#{fixture_path}.yml"))
13
+ add_goodbye_message("Second tp-collect recorded. Please complete and approve THREE more assignments and run make_tp_collect_fixture_4.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n...and then approve them at\nhttps://requestersandbox.mturk.com/mturk/manageHITs?hitSortType=CREATION_DESCENDING&%2Fsort.x=11&%2Fsort.y=7")
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool/test'
6
+ require 'fileutils'
7
+
8
+ class CollectProjectFixtureGen4 < Typingpool::Test::Script
9
+ def test_populate_fixture3
10
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-collect-3')
11
+ tp_collect_with_fixture(fixture_project_dir('tp_collect_project_temp'), fixture_path)
12
+ assert(File.exists?("#{fixture_path}.yml"))
13
+ tp_finish(fixture_project_dir('tp_collect_project_temp'))
14
+ remove_fixture_project_dir('tp_collect_project_temp')
15
+ add_goodbye_message("Third and final tp-collect recorded. Fixtures for tp-collect testing successfully generated in #{File.dirname(fixture_path)}!")
16
+ end
17
+ end
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool'
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class ReviewProjectFixtureGen1 < Typingpool::Test::Script
10
+ def test_prep_for_fixture
11
+ dir = make_fixture_project_dir('tp_review_project_temp')
12
+ setup_temp_tp_dir(dir)
13
+ begin
14
+ tp_make(dir)
15
+ tp_assign(dir)
16
+ rescue
17
+ FileUtils.remove_entry_secure(dir)
18
+ raise
19
+ end
20
+ #copy key files over to permanent locations within fixture dir
21
+ with_fixtures_in_temp_tp_dir(dir, 'tp_review_') do |fixture_path, project_path|
22
+ FileUtils.cp(project_path, fixture_path)
23
+ end
24
+ add_goodbye_message("Temp project assigned in Mechanical Turk sandbox. Complete SIX assignments and run make_tp_review_fixture_2.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n")
25
+ end
26
+ end #MakeAndAssignProjectForReview
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool/test'
6
+ require 'fileutils'
7
+
8
+ class ReviewProjectFixtureGen2 < Typingpool::Test::Script
9
+ def test_populate_fixture
10
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-review-1')
11
+ dir = fixture_project_dir('tp_review_project_temp')
12
+ output = nil
13
+ assert_nothing_raised do
14
+ output = tp_review_with_fixture(dir, fixture_path, %w(a r a r s q ))
15
+ end
16
+ assert_equal(0, output[:status].to_i, "Bad exit code: #{output[:status]} err: #{output[:err]}")
17
+ assert(File.exists?("#{fixture_path}.yml"))
18
+
19
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-review-2')
20
+ assert_nothing_raised do
21
+ output = tp_review_with_fixture(dir, fixture_path, %w(a r))
22
+ end
23
+ assert_equal(0, output[:status].to_i, "Bad exit code: #{output[:status]} err: #{output[:err]}")
24
+ assert(File.exists?("#{fixture_path}.yml"))
25
+
26
+ tp_finish(dir)
27
+ remove_fixture_project_dir('tp_review_project_temp')
28
+ add_goodbye_message("All done!")
29
+ end
30
+ end