typingpool 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
@@ -0,0 +1,344 @@
1
+ module Typingpool
2
+
3
+ #Hierarchical config object. Can be read from a YAML file and is
4
+ #often modified at runtime, for example in response to script flags.
5
+ #
6
+ #==Fields
7
+ # All listed defaults are populated when you run tp-install.
8
+ #===Required
9
+ # [transcripts] Unexpanded path to working directory for
10
+ # transcripts. This is where tp-make creates new
11
+ # transcript projects, and where other scripts like
12
+ # tp-assign, tp-review and tp-finish look for
13
+ # them. Default: On systems with a ~/Desktop (like OS
14
+ # X), ~/Desktop/Transcripts. Elsewhere,
15
+ # ~/transcripts.
16
+ #====amazon
17
+ # [key] An Amazon Web Services "Access Key ID." Default: none.
18
+ # [secret] An Amazon Web Services "Secret Access Key." Default: none.
19
+ # [bucket] The name of the "bucket" on Amazon S3 where your uploads
20
+ # will be stored. Not required if you specify SFTP config
21
+ # instead (see below). Default: Generated for you when you
22
+ # run tp-install.
23
+ #
24
+ #===Optional
25
+ # [cache] Unexpanded path to the cache file (pstore). Default:
26
+ # ~/.typingpool.cache
27
+ # [templates] Unexpanded path to directory for user-created
28
+ # templates. Will be searched before looking in the
29
+ # template dir within the app. Default: 'templates' or
30
+ # 'Templates' (OS X) dir inside the transcripts dir.
31
+ #====amazon
32
+ # [url] Base URL to use when linking to files uploaded to S3. You
33
+ # may want to use this if you do custom domain mapping on
34
+ # S3. Default is https://$bucket.s3.amazonaws.com.
35
+ #====sftp
36
+ #If you provide SFTP config, the specified SFTP server will be used
37
+ #to host remote mp3 and html files rather than Amazon S3. At
38
+ #minimum, you must provide a user, host, and URL. SFTP will work
39
+ #fine with public-key authentication (passwordless login). In fact,
40
+ #I've not bothered to add password support yet.
41
+ # [user] SFTP username
42
+ # [host] SFTP server
43
+ # [path] Files will be uploaded into this path. Optional.
44
+ # [url] Base URL to use when linking to files uploaded using the
45
+ # preceding config.
46
+ #====assign
47
+ #Defaults for tp-assign.
48
+ # [reward] Pay per transcription chunk in U.S. dollars. Default: 0.75.
49
+ # [deadline] Length of time a worker has to complete a
50
+ # transcription job after accepting it (HIT
51
+ # 'AssignmentDurationInSeconds' in the Mechanical Turk
52
+ # API). For details on the format, see docs for
53
+ # Utility.timespec_to_seconds. Default: 3h.
54
+ # [approval] Length of time before a submitted transcription job is
55
+ # automatically approved (HIT
56
+ # 'AutoApprovalDelayInSeconds' in the Mechanical Turk
57
+ # API). For details on the format, see docs for
58
+ # Utility.timespec_to_seconds. Default: 1d.
59
+ # [lifetime] Length of time before a transcription job is no longer
60
+ # available to be accepted (HIT 'LifetimeInSeconds' in
61
+ # the Mechanical Turk API). For details on the format,
62
+ # see docs for Utility.timespec_to_seconds. Default: 2d.
63
+ # [qualify] An array of qualifications with which to filter workers
64
+ # who may accept a transcript job. The first part of the
65
+ # qualification should be the string form of a key in
66
+ # RTurk::Qualifications::TYPES (see
67
+ # https://github.com/mdp/rturk/blob/master/lib/rturk/builders/qualification_builder.rb
68
+ # ). The second part should be one of the following
69
+ # comparators: > >= < <= == != exists. The optional
70
+ # third part is a value. Default: ['approval_rate >=
71
+ # 95'].
72
+ # [keywords] An array of keywords with which to tag each
73
+ # transcription job. Default: ['transcription', 'audio',
74
+ # 'mp3'].
75
+ #
76
+ #==API
77
+ #Values are read via same-named methods and set via same-named equals methods, like so:
78
+ # transcript_path = config.transcripts
79
+ # config.transcripts = new_path
80
+ #
81
+ #Nested sections are created simply by declaring a nested class
82
+ #(which should typically inherit from Config, even if nested several
83
+ #levels lower).
84
+ #
85
+ #Fields can be assigned special behaviors:
86
+ #
87
+ # class Config
88
+ # class Root < Config
89
+ # local_path_reader :transcripts
90
+ # class SFTP < Config
91
+ # never_ends_in_slash_reader :url
92
+ # end
93
+ # end
94
+ # end
95
+ #
96
+ # conf = Typingpool::Config.file
97
+ # conf.transcripts = '~/Documents/Transcripts'
98
+ # puts conf.transcripts #'/Volumes/Redsector/Users/chad/Documents/Transcripts'
99
+ # conf.sftp.url = 'http://luvrecording.s3.amazonaws.com/'
100
+ # puts conf.sftp.url #'http://luvrecording.s3.amazonaws.com'
101
+ #
102
+ class Config
103
+
104
+ require 'yaml'
105
+ @@default_file = "~/.typingpool"
106
+
107
+ def initialize(params)
108
+ @param = params
109
+ end
110
+
111
+ class << self
112
+ #Constructor.
113
+ # ==== Params
114
+ #[path] Fully expanded path to YAML file.
115
+ # ==== Returns
116
+ #Config instance.
117
+ def file(path=File.expand_path(default_file))
118
+ Root.new(YAML.load(IO.read((path))))
119
+ end
120
+
121
+ #Will always return ~/.typingpool unless you subclass. Will be
122
+ #handed to File.expand_path before use.
123
+ def default_file
124
+ @@default_file
125
+ end
126
+
127
+ #Returns a new Config instance, empty except for the values
128
+ #included in lib/templates/config.yml
129
+ def from_bundled_template
130
+ file(File.join(Utility.lib_dir, 'templates', 'config.yml'))
131
+ end
132
+
133
+ #protected
134
+
135
+ #Define a field in a Config subclass as a local path. Reads on
136
+ #that field will be filtered through File.expand_path.
137
+ def local_path_reader(*syms)
138
+ define_reader(*syms) do |value|
139
+ File.expand_path(value) if value
140
+ end
141
+ end
142
+
143
+ #Define a field in a Config subclass as never ending in
144
+ #'/'. Useful for URLs and SFTP path specs. When a field is set
145
+ #to a value ending in '/', the last character is stripped.
146
+ def never_ends_in_slash_reader(*syms)
147
+ define_reader(*syms) do |value|
148
+ value.sub(/\/$/, '') if value
149
+ end
150
+ end
151
+
152
+ #Define a field in a Config subclass as a time-length
153
+ #specification. For format details, see docs for
154
+ #Utility.timespec_to_seconds.
155
+ def time_accessor(*syms)
156
+ define_accessor(*syms) do |value|
157
+ Utility.timespec_to_seconds(value) or raise Error::Argument::Format, "Can't convert '#{value}' to time"
158
+ end
159
+ end
160
+
161
+ def define_reader(*syms)
162
+ syms.each do |sym|
163
+ define_method(sym) do
164
+ value = @param[sym.to_s]
165
+ yield(value)
166
+ end
167
+ end
168
+ end
169
+
170
+ def define_writer(*syms)
171
+ syms.each do |sym|
172
+ define_method("#{sym.to_s}=".to_sym) do |value|
173
+ @param[sym.to_s] = yield(value)
174
+ end
175
+ end
176
+ end
177
+
178
+ def define_accessor(*syms)
179
+ define_reader(*syms) do |value|
180
+ yield(value) if value
181
+ end
182
+ define_writer(*syms) do |value|
183
+ yield(value)
184
+ end
185
+ end
186
+
187
+ def inherited(subklass)
188
+ @@subklasses ||= {}
189
+ @@subklasses[subklass.name.downcase] = subklass
190
+ end
191
+
192
+ def subklass?(param)
193
+ @@subklasses["#{self.name.downcase}::#{param.downcase}"]
194
+ end
195
+ end #class << self
196
+
197
+ #All fields as raw key-value pairs. For nested subclasses, the
198
+ #value is another hash.
199
+ def to_hash
200
+ @param
201
+ end
202
+
203
+ #Read the raw data for a field
204
+ def [](key)
205
+ @param[key]
206
+ end
207
+
208
+ #Set the raw data for a field
209
+ def []=(key, value)
210
+ @param[key] = value
211
+ end
212
+
213
+ def method_missing(meth, *args)
214
+ equals_param = equals_method?(meth)
215
+ if equals_param
216
+ args.count == 1 or raise Error::Argument, "Wrong number of args (#{args.count} for 1)"
217
+ return @param[equals_param] = args[0]
218
+ end
219
+ args.empty? or raise Error::Argument, "Too many args #{meth} #{args.join('|')}"
220
+ value = @param[meth.to_s]
221
+ if self.class.subklass?(meth.to_s) && value
222
+ return self.class.subklass?(meth.to_s).new(value)
223
+ end
224
+ value
225
+ end
226
+
227
+ protected
228
+
229
+ def equals_method?(meth)
230
+ match = meth.to_s.match(/([^=]+)=$/) or return
231
+ return match[1]
232
+ end
233
+
234
+ #The root level of the config file and all full config
235
+ #objects. Kept distinct from Config because other subclasses need
236
+ #to inherit from Config, and we don't want them inheriting the
237
+ #root level fields.
238
+ class Root < Config
239
+ local_path_reader :transcripts, :cache, :templates
240
+
241
+ class SFTP < Config
242
+ never_ends_in_slash_reader :path, :url
243
+ end
244
+
245
+ class Amazon < Config
246
+ never_ends_in_slash_reader :url
247
+ end
248
+
249
+ class Assign < Config
250
+ local_path_reader :templates
251
+ time_accessor :deadline, :approval, :lifetime
252
+
253
+ define_accessor(:reward) do |value|
254
+ value.to_s.match(/(\d+(\.\d+)?)|(\d*\.\d+)/) or raise Error::Argument::Format, "Format should be N.NN"
255
+ value
256
+ end
257
+
258
+ define_reader(:confirm) do |value|
259
+ next false if value.to_s.match(/(^n)|(^0)|(^false)/i)
260
+ next true if value.to_s.match(/(^y)|(^1)|(^true)/i)
261
+ next if value.to_s.empty?
262
+ raise Error::Argument::Format, "Format should be 'yes' or 'no'"
263
+ end
264
+
265
+ def qualify
266
+ self.qualify = (@param['qualify'] || []) unless @qualify
267
+ @qualify
268
+ end
269
+
270
+ def qualify=(specs)
271
+ @qualify = specs.map{|spec| Qualification.new(spec) }
272
+ end
273
+
274
+ def add_qualification(spec)
275
+ self.qualify.push(Qualification.new(spec))
276
+ end
277
+
278
+ def keywords
279
+ @param['keywords'] ||= []
280
+ end
281
+
282
+ def keywords=(array)
283
+ @param['keywords'] = array
284
+ end
285
+
286
+ class Qualification < Config
287
+ def initialize(spec)
288
+ @raw = spec
289
+ to_arg #make sure value parses
290
+ end
291
+
292
+ def to_s
293
+ @raw
294
+ end
295
+
296
+ def to_arg
297
+ [type, opts]
298
+ end
299
+
300
+ protected
301
+
302
+ def type
303
+ type = @raw.split(/\s+/)[0]
304
+ if RTurk::Qualification::TYPES[type.to_sym]
305
+ return type.to_sym
306
+ elsif (type.match(/\d/) || type.size >= 25)
307
+ return type
308
+ else
309
+ #Seems likely to be qualification typo: Not a known
310
+ #system qualification, all letters and less than 25
311
+ #chars
312
+ raise Error::Argument, "Unknown qualification type and does not appear to be a raw qualification type ID: '#{type.to_s}'"
313
+ end
314
+ end
315
+
316
+ def opts
317
+ args = @raw.split(/\s+/)
318
+ if (args.count > 3) || (args.count < 2)
319
+ raise Error::Argument, "Unexpected number of qualification tokens: #{@raw}"
320
+ end
321
+ args.shift
322
+ comparator(args[0]) or raise Error::Argument, "Unknown comparator '#{args[0]}'"
323
+ value = 1
324
+ value = args[1] if args.count == 2
325
+ return {comparator(args[0]) => value}
326
+ end
327
+
328
+ def comparator(value)
329
+ Hash[
330
+ '>' => :gt,
331
+ '>=' => :gte,
332
+ '<' => :lt,
333
+ '<=' => :lte,
334
+ '==' => :eql,
335
+ '!=' => :not,
336
+ 'true' => :eql,
337
+ 'exists' => :exists
338
+ ][value]
339
+ end
340
+ end #Qualification
341
+ end #Assign
342
+ end #Root
343
+ end #Config
344
+ end #Typingpool
@@ -0,0 +1,22 @@
1
+ module Typingpool
2
+ class Error < StandardError
3
+ class Test < Error; end
4
+ class Shell < Error; end
5
+ class Argument < Error
6
+ class Format < Argument; end
7
+ end
8
+ class File < Error
9
+ class NotExists < File; end
10
+ class Remote < File
11
+ class SFTP < Remote; end
12
+ class S3 < Remote
13
+ class Credentials < S3; end
14
+ end #S3
15
+ end #Remote
16
+ end #File
17
+ class Amazon < Error
18
+ class UnreviewedContent < Amazon; end
19
+ end #Amazon
20
+ class HTTP < Error; end
21
+ end #Error
22
+ end #Typingpool