typingpool 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
data/lib/typingpool.rb ADDED
@@ -0,0 +1,500 @@
1
+ # -*- coding: utf-8 -*-
2
+ #= Typingpool
3
+ #
4
+ #Typingpool is an app for easily making transcripts of audio using
5
+ #Amazon's labor marketplace, Mechanical Turk.
6
+ #
7
+ #Typingpool is distributed as a Ruby gem. It is a made up of a handful
8
+ #of scripts for users and a collection of library files for
9
+ #developers.
10
+ #
11
+ #Typingpool also includes a collection of ERB templates for
12
+ #generating Mechanical Turk assignments and the final transcript HTML
13
+ #file.
14
+ #
15
+ #== Dependencies
16
+ #
17
+ #Typingpool depends on these command-line tools, which are not
18
+ #included in the gem since they are external to Ruby:
19
+ #
20
+ # [ffmpeg] A powerhouse audio/video converter.
21
+ # [libmp3lame] An mp3 encoder/decoder, used by ffmpeg.
22
+ # [mp3splt] An audio file-splitting utility.
23
+ # [mp3wrap] An audio file-merging utility.
24
+ #
25
+ #== User overview
26
+ #
27
+ #=== Setup
28
+ #
29
+ #After installing the gem and its dependencies, run tp-config from the
30
+ #command line to create your config file (~/.typingpool). At the
31
+ #prompts, you will need to supply your Amazon Web Services Access Key
32
+ #ID and your Amazon Web Services Secret Access key.
33
+ #
34
+ #The config file is in YAML format and may be customized using any
35
+ #text editor. For more details on configuration options, see the
36
+ #documentation for Typingpool::Config.
37
+ #
38
+ #=== Workflow
39
+ #
40
+ #A typical workflow will use the bundled scripts in this order:
41
+ #
42
+ # tp-make -> tp-assign -> [wait] -> tp-review -> tp-finish
43
+ #
44
+ #tp-review may be called repeatedly, until transcripts for all audio
45
+ #chunks have been processed. Similarly, tp-assign may be called
46
+ #repeatedly, for example to re-assign chunks rejected using tp-review,
47
+ #or to re-assign chunks that have expired.
48
+ #
49
+ #An alternate workflow would go like this:
50
+ #
51
+ # tp-make -> [manually upload assignments.csv to Amazon RUI] ->
52
+ # [wait] -> [approve/reject assignments via RUI] -> tp-collect ->
53
+ # tp-finish
54
+ #
55
+ #=== Examples
56
+ #
57
+ #Typical usage scenario:
58
+ #
59
+ # tp-make 'Chad Interview' chad1.WMA chad2.WMA --unusual 'Hack Day,
60
+ # Yahoo' --subtitle 'Phone interview re Yahoo Hack Day'
61
+ #
62
+ # # => Converting chad1.WMA to mp3
63
+ # # => Converting chad2.WMA to mp3
64
+ # # => Merging audio
65
+ # # => Splitting audio into uniform bits
66
+ # # => Uploading Chad Interview.00.00.mp3 to
67
+ # ryantate42.s3.amazonaws.com as Chad
68
+ # Interview.00.00.33ca7f2cceba9f8031bf4fb7c3f819f4.LHFJEM.mp3
69
+ # # => Uploading Chad Interview.01.00.mp3 to
70
+ # ryantate42.s3.amazonaws.com as Chad #
71
+ # Interview.01.00.33ca7f2cceba9f8031bf4fb7c3f819f4.XMWNYW.mp3
72
+ # # => Uploading Chad Interview.02.00.mp3 to
73
+ # ryantate42.s3.amazonaws.com as Chad #
74
+ # Interview.02.00.33ca7f2cceba9f8031bf4fb7c3f819f4.FNEIWN.mp3
75
+ # # => ... [snip]
76
+ # # => Done. Project at:
77
+ # # => /Users/ryantate/Desktop/Transcripts/Chad Interview
78
+ #
79
+ #
80
+ # tp-assign 'Chad Interview' interview/nameless --reward 1.00
81
+ # --deadline 90m --approval 6h --lifetime 2d
82
+ #
83
+ # # => Figuring out what needs to be assigned
84
+ # # => 85 assignments total
85
+ # # => 85 assignments to assign
86
+ # # => Deleting old assignment HTML from ryantate42.s3.amazonaws.com
87
+ # # => Uploading assignment HTML to ryantate42.s3.amazonaws.com
88
+ # # => Assigning
89
+ # # => Assigned 85 transcription jobs for $85
90
+ # # => Remaining balance: $115.00
91
+ #
92
+ # [Wait...]
93
+ #
94
+ #
95
+ # tp-review 'Chad Interview'
96
+ #
97
+ # # => Gathering submissions from Amazon
98
+ # # => Matching submissions with local projects
99
+ # # =>
100
+ # # => Transcript for: https://ryantate42.s3.amazonaws.com/
101
+ # Chad%20Interview.29.00.263d492275a81afb005c8231d8d8afdb.
102
+ # UEMOCN.mp3
103
+ # # => Project: Chad Interview: Phone interview re Yahoo Hack Day
104
+ # # => Submitted at: 2012-08-11 17:00:36 -0700 by A9S0AOAI8HO9P
105
+ # # =>
106
+ # # => Chad: ... so it had sort of some geek history. And the
107
+ # # => weather was really bad. But it was an indoor event,
108
+ # # => right? So people were staying indoors. And like very
109
+ # # => early... And there was all this really expensive gear
110
+ # # => that the BBC had. Like these cameras that guys were like
111
+ # # => riding around on and stuff, huge sound stage, bigger than
112
+ # # => the one we had in Sunnyvale.
113
+ # # =>
114
+ # # => Two hours into the event, we heard this big lightning
115
+ # # => strike, because we were up on a hill in London. And all
116
+ # # => the lights went out and the roof opened up in the
117
+ # # => building. What we didn't know is the fire supression
118
+ # # => system in that building which got blown up by the
119
+ # # => lightning during a fire would cause the roof to open
120
+ # # => up. So we had all these geeks with equipment and all this
121
+ # # => BBC equipment and it was literally raining on them.
122
+ # # =>
123
+ # # => (A)pprove, (R)eject, (Q)uit, [(S)kip]? (1/20)
124
+ #
125
+ # a
126
+ #
127
+ # # => Approved. Chad Interview transcript updated.
128
+ # # =>
129
+ # # => Transcript for: https://ryantate42.s3.amazonaws.com/
130
+ # Chad%20Interview.30.00.263d492275a81afb005c8231d8d8afdb.
131
+ # RXNKRN.mp3
132
+ # # => Project: Chad Interview: Phone interview re Yahoo Hack Day
133
+ # # => Submitted at: 2012-08-11 17:00:58 -0700 by A9S0AOAI8HO9P
134
+ # # =>
135
+ # # => Blah blah blah blah okay I am done typing byeeeeeeee
136
+ # # =>
137
+ # # => (A)pprove, (R)eject, (Q)uit, [(S)kip]? (2/20)
138
+ #
139
+ # r
140
+ #
141
+ # # => Rejection reason, for worker:
142
+ #
143
+ # There's no transcription at all, just nonsense
144
+ #
145
+ # # => Rejected
146
+ # # =>
147
+ # # => Transcript for...
148
+ # # => ... [snip]
149
+ #
150
+ #
151
+ # tp-finish 'Chad Interview'
152
+ #
153
+ # # => Removing from Amazon
154
+ # # => Collecting all results
155
+ # # => Removing HIT 2GKMIKMN9U8PNHKK58NXL3SU4TCBSN (Reviewable)
156
+ # # => Removing from data/assignment.csv
157
+ # # => Removing from local cache
158
+ # # => Removing HIT 2CFX2Q45UUKQ2HXZU8SNV8OG6CQBTC (Assignable)
159
+ # # => Removing from data/assignment.csv
160
+ # # => Removing from local cache
161
+ # # => Removing HIT 294EZZ2MIKMNNDP1LAU8WWWXOEI7O0...
162
+ # # => ... [snip]
163
+ # # => Removing Chad Interview.00.00.
164
+ # 263d492275a81afb005c8231d8d8afdb.ORSENE.html from
165
+ # ryantate42.s3.amazonaws.com
166
+ # # => Removing Chad Interview.01.00...
167
+ # # => ... [snip]
168
+ # # => Removing Chad Interview.00.00.
169
+ # 263d492275a81afb005c8231d8d8afdb.RNTVLN.mp3 from
170
+ # ryantate42.s3.amazonaws.com
171
+ # # => Removing Chad Interview.01.00....
172
+ # # => ... [snip]
173
+ #
174
+ #=== Output
175
+ #
176
+ #The final output of Typingpool is a project directory containing a
177
+ #transcript file.
178
+ #
179
+ #The transcript file is HTML with audio chunks embedded alongside each
180
+ #associated transcript chunk.
181
+ #
182
+ #The transcript file is called transcript.html when complete. A
183
+ #partial transcript file is called transcript_in_progress.html.
184
+ #
185
+ #The project directory also includes supporting files, including a CSV
186
+ #data file used to store raw transcript chunks, Amazon Mechanical Turk
187
+ #HIT information, and other metdata; Javscript code that swaps in
188
+ #Flash players on browsers that don't support mp3 files in audio tags;
189
+ #the original audio files and the audio chunks generated from them;
190
+ #and a CSS file.
191
+ #
192
+ #The directory is laid out like so:
193
+ #
194
+ # Chad Interview/
195
+ # -> transcript.html | transcript_in_progress.html
196
+ # -> audio/
197
+ # -> chunks/
198
+ # -> Chad Interview.00.00.mp3
199
+ # -> Chad Interview.01.00.mp3
200
+ # -> ... [snip]
201
+ # -> originals/
202
+ # -> chad1.WMA
203
+ # -> chad2.WMA
204
+ # -> data/
205
+ # -> assignment.csv
206
+ # -> id.txt
207
+ # -> subtitle.txt
208
+ # -> etc/
209
+ # -> audio-compat.js
210
+ # -> transcript.css
211
+ # -> About these files - readme.txt
212
+ # -> player/
213
+ # -> audio-player.js
214
+ # -> license.txt
215
+ # -> player.swf
216
+ #
217
+ #You may safely edit the files transcript.html, etc/transcript.css,
218
+ #and data/subtitle.txt, and you may safely delete the files in
219
+ #audio/originals and any .txt files in etc/. Editing or deleting other
220
+ #files may interfere with the operation of Typingpool or render the
221
+ #transcript inoperative. Do not edit transcript_in_progress.html as
222
+ #your changes will be overwritten if/when the transcript is next
223
+ #updated.
224
+ #
225
+ #
226
+ #=== Workflow (additional)
227
+ # * When you want to preview your assignments, run tp-assign with the
228
+ # option --sandbox and with --qualify 'rejection_rate < 100' (to
229
+ # make sure you qualify to view your own HITs). Then visit
230
+ # http://workersandbox.mturk.com and find your assignments (a seach
231
+ # for "mp3" works if you left mp3 set as a keyword in your config
232
+ # file). When you are done previewing, run tp-finish with the
233
+ # name/path of your project and the --sandbox option.
234
+ #
235
+ # * When you assign your transcription jobs via tp-assign, you must
236
+ # supply a template name or relative path as the second
237
+ # argument. In the example above, the named template is
238
+ # “interview/nameless.”
239
+ #
240
+ # The template “interview/nameless” is a great general purpose
241
+ # template. It instructs the transcriber not to worry about the
242
+ # names of the speakers, and instead to use labels like “male 1,”
243
+ # “male 2,” etc. This allows the transcriber to work quickly and
244
+ # usually results in a viable transcript, since you can consult
245
+ # your memory or the original audio to figure out who is who.
246
+ #
247
+ # To find what other templates are available, navigate to the
248
+ # directory where typingpool is installed (`gem which typingpool`)
249
+ # and then go into typingpool/templates/assignment and its
250
+ # subdirectories. Anything that ends in ‘.html.erb’ is an available
251
+ # template. You may also create your own templates in the directory
252
+ # listed in the “templates” param of your config file.
253
+ #
254
+ # The templates interview, interview/phone, and interview/noisy
255
+ # require you to have passed the names of two voices to tp-make
256
+ # when you created your project. The first voice should be the name
257
+ # (and optional title) of the interviewer, and the second the name
258
+ # (and title) of the interviewee, like so:
259
+ #
260
+ # tp-make 'Chad Interview' chad1.WMA chad2.WMA –voice ‘Ryan,
261
+ # hack reporter’ --voice ‘Chad, a software engineer’ --unusual
262
+ # 'Hack Day, Yahoo' --subtitle 'Phone interview re Yahoo Hack
263
+ # Day'
264
+ #
265
+ #
266
+ # * When you've rejected some submissions in tp-review and need to
267
+ # re-assign these chunks to be transcribed, simply re-run tp-assign
268
+ # with the name (or path) of your project. You may select the same
269
+ # template, reward, deadlines, etc., or pick new ones. tp-assign
270
+ # will be careful not to re-assign chunks for which you have
271
+ # approved a transcript, or which are pending on Mechanical Turk.
272
+ #
273
+ # * When some chunks previously assigned via tp-assign have expired
274
+ # without attracting submissions, simply re-run tp-assign as
275
+ # described above to re-assign these chunks. Consider increasing
276
+ # the dollar amount specified in your --reward argument.
277
+ #
278
+ # * When some chunks previously assigned via tp-assign have been
279
+ # submitted by workers but not approved or rejected in time for the
280
+ # approval deadline (assign/approval in your config file or
281
+ # --approval as passed to tp-assign), Mechanical Turk has
282
+ # automatically approved these submissions for you and you'll need
283
+ # to run tp-collect to collect them. (Yes, it’s silly you need run
284
+ # a whole different script instead of just calling tp-review as
285
+ # usual. I’ll fix this in a future version.)
286
+ #
287
+ # * When you want to cancel outstanding assignments, simply run
288
+ # tp-finish with the name of your project. If your assignments have
289
+ # already attracted submissions, you may be prompted to run
290
+ # tp-review first.
291
+ #
292
+ # * When tp-make, tp-assign, or tp-finish tells you it failed an
293
+ # upload, deletion, or Amazon command, simply re-run the script
294
+ # with the same arguments to re-attempt the upload, deletion or
295
+ # Amazon command. Typingpool carefully records which network
296
+ # operations it is attempting and which network operations have
297
+ # completed. It can robustly handle network errors, including
298
+ # uncaught exceptions.
299
+ ##
300
+ #
301
+ #=== Maintenance
302
+ #
303
+ # [cache] If the cache file grows too large, you'll need to delete it
304
+ # manually. It may be safely deleted as long as no
305
+ # Typingpool scripts are running. Its location is
306
+ # specified in the 'cache' param in the config file. (The
307
+ # config file is at ~/.typingpool and the cache, by
308
+ # default, is at ~/.typingpool.cache.)
309
+ #
310
+ # Typingpool takes no steps to limit the size of the
311
+ # cache file. It prunes the cache of project-specific
312
+ # entries when you run tp-finish on a project, but the
313
+ # cache may grow large if you work on many active
314
+ # projects in parallel, or if you fail to run tp-finish
315
+ # on projects when you are done with them.
316
+ #
317
+ # [tp-finish] You should run tp-finish PROJECT each time you finish a
318
+ # project, where PROJECT may be either the project name
319
+ # or path. Assuming you have no submissions pending or
320
+ # awaiting approval, this clears all traces of the
321
+ # project from Amazon Mechanical Turk, from Amazon S3 or
322
+ # your SFTP server, and from the local cache. This will
323
+ # keep your local cache from balooning in size and will
324
+ # minimize your S3 charges or SFTP disk usage. It will
325
+ # also help Typingpool scripts run faster by reducing the
326
+ # number of HITs you have on Amazon Mechanical Turk; many
327
+ # Typingpool operations involve iterating through all of
328
+ # your HITs.
329
+ #
330
+ #
331
+ #=== See also
332
+ #
333
+ # * Run any script with the --help options for further details on how
334
+ # to run the script.
335
+ #
336
+ # * See the docs for Typingpool::Config for details of the config
337
+ # file format.
338
+ #
339
+ # * See Amazon's Mechanical Turk documentation for guides and
340
+ # overviews on how Mechanical Turk
341
+ # works. https://requester.mturk.com/help
342
+ #
343
+ # * See the documentation on ffmpeg and related libraries for clues
344
+ # as to how to make Typingpool support additional file
345
+ # formats. Typingpool can work with any file format that ffmpeg can
346
+ # convert to mp3 (libmp3lame).
347
+ #
348
+ # * For an overview of the concepts on which Typingpool is built, see
349
+ # Andy Baio’s guide to using Mechanical Turk for transcription:
350
+ # http://waxy.org/2008/09/audio_transcription_with_mechanical_turk/
351
+ #
352
+ #== Developer overview
353
+ #
354
+ #Views, used for the final transcript and for rendering HTML
355
+ #assignments for Amazon Mechanical Turk workers, are contained in a
356
+ #series of templates in lib/typingpool/templates, particularly
357
+ #transcript.html.erb and assignment/*. The control layer lives in the
358
+ #App class (lib/typingpool/app.rb) and within the individual
359
+ #scripts. The models constitute the other Typingpool classes,
360
+ #including most importantly and in rough order of importance the
361
+ #Project, Transcript, Amazon, Config and Filer classes (the latter of
362
+ #interest mainly because of Filer::Audio, which handles splitting,
363
+ #merging, and conversion).
364
+ #
365
+ #The models in particular, along with the App class, are
366
+ #underdeveloped and not particularly clear or fully thought
367
+ #through. The Transcript model, for example, should almost certainly
368
+ #be folded into the Project model. Dividing Project into
369
+ #Project::Local and Project::Remote only makes sense on a superficial
370
+ #level; Project::Remote could probably be its own class or even part
371
+ #of Utility. Amazon will probably be simpler if I can get some patches
372
+ #into RTurk, and Amazon::HITshould probably be integrated more closely
373
+ #with Project.
374
+ #
375
+ #One of the most frustrating things about the code is that there are
376
+ #so many subtly different ways a "chunk" of a transcript/project is
377
+ #represented: As a simple hash derived from a row in
378
+ #data/assignment.csv within a project folder, as an Amazon::HIT, as a
379
+ #Transcription::Chunk, as an audio file on a remote server, and as a
380
+ #local audio file (which has a different name from the remote
381
+ #file). So in future versions I'll probably reduce the number of
382
+ #different ways to represent a chunk.
383
+ #
384
+ #Also in the future, it's very likely that App will evolve from a
385
+ #simple collection of class methods into a real class with a simple
386
+ #set of instance methods called in a particular order by a "run"
387
+ #method or similar. Subclasses for particular scripts/commands will
388
+ #then override these methods.
389
+ #
390
+ #
391
+ #===Examples
392
+ #
393
+ #The most comprehensive examples of how the Typingpool classes
394
+ #actually work and interact are the tp-* scripts themselves, in
395
+ #particular tp-make, tp-assign, tp-review, and tp-finish.
396
+ #
397
+ #More concise examples follow below, to give you a sense of what the
398
+ #various classes actually do:
399
+ #
400
+ # require 'typingpool'
401
+ #
402
+ # #new Project instance
403
+ # project = Typingpool::Project.new('Chad Interview')
404
+ #
405
+ # #check if project exists on disk
406
+ # unless project.local
407
+ # #make a skeleton project folder in Config#transcripts dir
408
+ # project.create_local
409
+ # #make subtitle record in project folder
410
+ # project.local.subtitle = 'Interview about Hack Day Jan 21'
411
+ # end
412
+ #
413
+ # id = project.local.id
414
+ #
415
+ # #Wrap file in Typingpool::Filer
416
+ # wma = Typingpool::Filer::Audio.new('/foo/bar.wma')
417
+ #
418
+ # #convert file to mp3
419
+ # mp3 = wma.to_mp3
420
+ # other_mp3 = Typingpool::Filer::Audio.new('/foo/bar2.wma').to_mp3
421
+ #
422
+ # #merge audio
423
+ # combined_mp3 = Typingpool::Filer::Files::Audio.new([mp3,
424
+ # other_mp3]).merge(Typingpool::Filer.new('/foo/combined.mp3')
425
+ #
426
+ # #split audio every 1 minute
427
+ # chunks = combined_mp3.split('1.00')
428
+ #
429
+ # #upload mp3s
430
+ # urls = project.remote.put(chunks.to_streams,
431
+ # project.create_remote_names(chunks))
432
+ #
433
+ # #remove mp3s
434
+ # project.remote.remove_urls(urls)
435
+ #
436
+ # #new Template instance
437
+ # template = Typingpool::Template::Assignment.from_config('interview/nameless')
438
+ # html = template.render({
439
+ # 'audio_url' => urls[0],
440
+ # 'unusual' => ['Hack Day', 'Yahoo', 'Atlassian'],
441
+ # 'chunk_minutes' => 1,
442
+ # 'project_id' => project.local.id
443
+ # })
444
+ #
445
+ # question = Typingpool::Amazon::Question.new(urls[0], html)
446
+ #
447
+ # Typingpool::Amazon.setup
448
+ #
449
+ # #Assign a transcription job (1 chunk)
450
+ # hit = Typingpool::Amazon::HIT.create(question, Typingpool::Config.file.assign)
451
+ #
452
+ # #Find all Typingpool HITs on Amazon Mechanical Turk
453
+ # all = Typingpool::Amazon::HIT.all
454
+ # #Find all reviewable Typingpool HITs
455
+ # reviewable = Typingpool::Amazon::HIT.all_reviewable
456
+ # #Find all approved Typingpool HITs
457
+ # approved = Typingpool::Amazon::HIT.all_approved
458
+ # #Find all HITs for our project
459
+ # project_hits = Typingpool::Amazon::HIT.all_for_project(project.local.id)
460
+ # #Filter all HITs (not just Typingpool HITs) arbitrarily
461
+ # safe_to_delete = Typingpool::Amazon::HIT.all{|hit| hit.ours? && hit.full.expired_and_overdue? }
462
+ # #Filter all approved HITs arbitrarily
463
+ # ready_for_judgment = Typingpool::Amazon::HIT.all_approved{|hit| hit.submitted? && hit.ours? }
464
+ #
465
+ # #Approve a HIT
466
+ # ready_for_judgment[0].at_amazon.approve! #at_amazon is an rturk instance
467
+ # #Reject a HIT
468
+ # ready_for_judgment[1].at_amazon.reject!('Your transcription is just random gibberish')
469
+ # #Delete a HIT from Amazon
470
+ # safe_to_delete[0].remove_from_amazon
471
+ #
472
+ # #Get text of transcript chunk (Typingpool::Transcript::Chunk)
473
+ # transcript_chunk = approved[0].transcript
474
+ # puts transcript_chunk.body
475
+ # #Get formmated text of transcript chunk
476
+ # puts transcript_chunk.body_as_text
477
+ # #Get transcript chunk as HTML
478
+ # puts transcript_chunk.body_as_html
479
+ # #Get transcript chunk metadata
480
+ # puts "--#{transcript_chunk.url} (audio at #{transcript_chunk.offset})"
481
+ #
482
+ #==Author
483
+ # Ryan Tate - ryantate@ryantate.com
484
+ #
485
+ #==License
486
+ # Copyright (c) 2011-2012 Ryan Tate. Released under the terms of the MIT
487
+ # license. See LICENSE for details.
488
+
489
+ module Typingpool
490
+ VERSION = '0.7.0'
491
+ require 'typingpool/error'
492
+ require 'typingpool/utility'
493
+ require 'typingpool/config'
494
+ require 'typingpool/filer'
495
+ require 'typingpool/amazon'
496
+ require 'typingpool/project'
497
+ require 'typingpool/transcript'
498
+ require 'typingpool/template'
499
+ require 'typingpool/app'
500
+ end #Typingpool
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool'
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class MakeAmazonQuestion < Typingpool::Test::Script
10
+ def test_make_amazon_question_fixture
11
+ in_temp_tp_dir do |dir|
12
+ tp_make(dir)
13
+ template = Typingpool::Template::Assignment.from_config(assign_default[:template], config_from_dir(dir))
14
+ assignment = temp_tp_dir_project(dir).local.file('data', 'assignment.csv').as(:csv).read.first
15
+ question_html = template.render(assignment)
16
+ question_url = 'http://example.com/assignments/101.html'
17
+ assert_match(question_html, /\S/)
18
+ assert_match(question_url, /http/i)
19
+ File.open(File.join(fixtures_dir, 'amazon-question-html.html'), 'w'){|f| f << question_html}
20
+ File.open(File.join(fixtures_dir, 'amazon-question-url.txt'), 'w'){|f| f << question_url}
21
+ end #in_temp_tp_dir
22
+ add_goodbye_message("Amazon question fixtures created.")
23
+ end
24
+ end #MakeAmazonQuestion
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool'
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class MakeAndAssignProject < Typingpool::Test::Script
10
+ def test_prep_for_fixture
11
+ dir = make_fixture_project_dir('tp_collect_project_temp')
12
+ setup_temp_tp_dir(dir)
13
+ begin
14
+ tp_make(dir)
15
+ tp_assign(dir)
16
+ rescue
17
+ FileUtils.remove_entry_secure(dir)
18
+ raise
19
+ end
20
+ #copy key files over to permanent locations within fixture dir
21
+ with_fixtures_in_temp_tp_dir(dir, 'tp_collect_') do |fixture_path, project_path|
22
+ FileUtils.cp(project_path, fixture_path)
23
+ end
24
+ add_goodbye_message("Temp project assigned in Mechanical Turk sandbox. Complete and approve TWO assignments and run make_tp_collect_fixture_2.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n...and then approve them at\nhttps://requestersandbox.mturk.com/mturk/manageHITs?hitSortType=CREATION_DESCENDING&%2Fsort.x=11&%2Fsort.y=7")
25
+ end
26
+ end #MakeAndAssignProject
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class CollectProjectFixtureGen2 < Typingpool::Test::Script
10
+ def test_populate_fixture
11
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-collect-1')
12
+ tp_collect_with_fixture(fixture_project_dir('tp_collect_project_temp'), fixture_path)
13
+ assert(File.exists?("#{fixture_path}.yml"))
14
+ add_goodbye_message("Initial tp-collect recorded. Please complete and approve TWO more assignments and run make_tp_collect_fixture_3.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n...and then approve them at\nhttps://requestersandbox.mturk.com/mturk/manageHITs?hitSortType=CREATION_DESCENDING&%2Fsort.x=11&%2Fsort.y=7")
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool/test'
6
+ require 'fileutils'
7
+
8
+ class CollectProjectFixtureGen3 < Typingpool::Test::Script
9
+ def test_populate_fixture2
10
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-collect-2')
11
+ tp_collect_with_fixture(fixture_project_dir('tp_collect_project_temp'), fixture_path)
12
+ assert(File.exists?("#{fixture_path}.yml"))
13
+ add_goodbye_message("Second tp-collect recorded. Please complete and approve THREE more assignments and run make_tp_collect_fixture_4.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n...and then approve them at\nhttps://requestersandbox.mturk.com/mturk/manageHITs?hitSortType=CREATION_DESCENDING&%2Fsort.x=11&%2Fsort.y=7")
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool/test'
6
+ require 'fileutils'
7
+
8
+ class CollectProjectFixtureGen4 < Typingpool::Test::Script
9
+ def test_populate_fixture3
10
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-collect-3')
11
+ tp_collect_with_fixture(fixture_project_dir('tp_collect_project_temp'), fixture_path)
12
+ assert(File.exists?("#{fixture_path}.yml"))
13
+ tp_finish(fixture_project_dir('tp_collect_project_temp'))
14
+ remove_fixture_project_dir('tp_collect_project_temp')
15
+ add_goodbye_message("Third and final tp-collect recorded. Fixtures for tp-collect testing successfully generated in #{File.dirname(fixture_path)}!")
16
+ end
17
+ end
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool'
6
+ require 'typingpool/test'
7
+ require 'fileutils'
8
+
9
+ class ReviewProjectFixtureGen1 < Typingpool::Test::Script
10
+ def test_prep_for_fixture
11
+ dir = make_fixture_project_dir('tp_review_project_temp')
12
+ setup_temp_tp_dir(dir)
13
+ begin
14
+ tp_make(dir)
15
+ tp_assign(dir)
16
+ rescue
17
+ FileUtils.remove_entry_secure(dir)
18
+ raise
19
+ end
20
+ #copy key files over to permanent locations within fixture dir
21
+ with_fixtures_in_temp_tp_dir(dir, 'tp_review_') do |fixture_path, project_path|
22
+ FileUtils.cp(project_path, fixture_path)
23
+ end
24
+ add_goodbye_message("Temp project assigned in Mechanical Turk sandbox. Complete SIX assignments and run make_tp_review_fixture_2.rb. Check for assignments at\nhttps://workersandbox.mturk.com/mturk/searchbar?minReward=0.00&searchWords=typingpooltest&selectedSearchType=hitgroups\n")
25
+ end
26
+ end #MakeAndAssignProjectForReview
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(File.dirname($0)), 'lib')
4
+
5
+ require 'typingpool/test'
6
+ require 'fileutils'
7
+
8
+ class ReviewProjectFixtureGen2 < Typingpool::Test::Script
9
+ def test_populate_fixture
10
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-review-1')
11
+ dir = fixture_project_dir('tp_review_project_temp')
12
+ output = nil
13
+ assert_nothing_raised do
14
+ output = tp_review_with_fixture(dir, fixture_path, %w(a r a r s q ))
15
+ end
16
+ assert_equal(0, output[:status].to_i, "Bad exit code: #{output[:status]} err: #{output[:err]}")
17
+ assert(File.exists?("#{fixture_path}.yml"))
18
+
19
+ fixture_path = File.join(fixtures_dir, 'vcr', 'tp-review-2')
20
+ assert_nothing_raised do
21
+ output = tp_review_with_fixture(dir, fixture_path, %w(a r))
22
+ end
23
+ assert_equal(0, output[:status].to_i, "Bad exit code: #{output[:status]} err: #{output[:err]}")
24
+ assert(File.exists?("#{fixture_path}.yml"))
25
+
26
+ tp_finish(dir)
27
+ remove_fixture_project_dir('tp_review_project_temp')
28
+ add_goodbye_message("All done!")
29
+ end
30
+ end