typingpool 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. data/Rakefile +23 -0
  2. data/bin/tp-assign +240 -0
  3. data/bin/tp-collect +50 -0
  4. data/bin/tp-config +114 -0
  5. data/bin/tp-finish +101 -0
  6. data/bin/tp-make +169 -0
  7. data/bin/tp-review +175 -0
  8. data/lib/typingpool/amazon.rb +732 -0
  9. data/lib/typingpool/app.rb +634 -0
  10. data/lib/typingpool/config.rb +344 -0
  11. data/lib/typingpool/error.rb +22 -0
  12. data/lib/typingpool/filer.rb +396 -0
  13. data/lib/typingpool/project.rb +593 -0
  14. data/lib/typingpool/template.rb +175 -0
  15. data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
  16. data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
  17. data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
  18. data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
  19. data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
  20. data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
  21. data/lib/typingpool/templates/assignment/main.css +20 -0
  22. data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
  23. data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
  24. data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
  25. data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
  26. data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
  27. data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
  28. data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
  29. data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
  30. data/lib/typingpool/templates/config.yml +21 -0
  31. data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
  32. data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
  33. data/lib/typingpool/templates/project/data/.empty_directory +0 -0
  34. data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
  35. data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
  36. data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
  37. data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
  38. data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
  39. data/lib/typingpool/templates/project/etc/transcript.css +49 -0
  40. data/lib/typingpool/templates/transcript.html.erb +23 -0
  41. data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
  42. data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
  43. data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
  44. data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
  45. data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
  46. data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
  47. data/lib/typingpool/test/fixtures/config-1 +20 -0
  48. data/lib/typingpool/test/fixtures/config-2 +25 -0
  49. data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
  50. data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
  51. data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
  52. data/lib/typingpool/test/fixtures/template.html.erb +10 -0
  53. data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
  54. data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
  55. data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
  56. data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
  57. data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
  58. data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
  59. data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
  60. data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
  61. data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
  62. data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
  63. data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
  64. data/lib/typingpool/test.rb +418 -0
  65. data/lib/typingpool/transcript.rb +181 -0
  66. data/lib/typingpool/utility.rb +272 -0
  67. data/lib/typingpool.rb +500 -0
  68. data/test/make_amazon_question_fixture.rb +24 -0
  69. data/test/make_tp_collect_fixture_1.rb +26 -0
  70. data/test/make_tp_collect_fixture_2.rb +16 -0
  71. data/test/make_tp_collect_fixture_3.rb +15 -0
  72. data/test/make_tp_collect_fixture_4.rb +17 -0
  73. data/test/make_tp_review_fixture_1.rb +26 -0
  74. data/test/make_tp_review_fixture_2.rb +30 -0
  75. data/test/make_transcript_chunks_fixture.rb +53 -0
  76. data/test/test_integration_script_1_tp_config.rb +108 -0
  77. data/test/test_integration_script_2_tp_make.rb +119 -0
  78. data/test/test_integration_script_3_tp_assign.rb +152 -0
  79. data/test/test_integration_script_4_tp_review.rb +72 -0
  80. data/test/test_integration_script_5_tp_collect.rb +44 -0
  81. data/test/test_integration_script_6_tp_finish.rb +123 -0
  82. data/test/test_unit_amazon.rb +153 -0
  83. data/test/test_unit_config.rb +94 -0
  84. data/test/test_unit_filer.rb +202 -0
  85. data/test/test_unit_project.rb +168 -0
  86. data/test/test_unit_project_local.rb +68 -0
  87. data/test/test_unit_project_remote.rb +157 -0
  88. data/test/test_unit_template.rb +111 -0
  89. data/test/test_unit_transcript.rb +77 -0
  90. metadata +234 -0
@@ -0,0 +1,344 @@
1
+ module Typingpool
2
+
3
+ #Hierarchical config object. Can be read from a YAML file and is
4
+ #often modified at runtime, for example in response to script flags.
5
+ #
6
+ #==Fields
7
+ # All listed defaults are populated when you run tp-install.
8
+ #===Required
9
+ # [transcripts] Unexpanded path to working directory for
10
+ # transcripts. This is where tp-make creates new
11
+ # transcript projects, and where other scripts like
12
+ # tp-assign, tp-review and tp-finish look for
13
+ # them. Default: On systems with a ~/Desktop (like OS
14
+ # X), ~/Desktop/Transcripts. Elsewhere,
15
+ # ~/transcripts.
16
+ #====amazon
17
+ # [key] An Amazon Web Services "Access Key ID." Default: none.
18
+ # [secret] An Amazon Web Services "Secret Access Key." Default: none.
19
+ # [bucket] The name of the "bucket" on Amazon S3 where your uploads
20
+ # will be stored. Not required if you specify SFTP config
21
+ # instead (see below). Default: Generated for you when you
22
+ # run tp-install.
23
+ #
24
+ #===Optional
25
+ # [cache] Unexpanded path to the cache file (pstore). Default:
26
+ # ~/.typingpool.cache
27
+ # [templates] Unexpanded path to directory for user-created
28
+ # templates. Will be searched before looking in the
29
+ # template dir within the app. Default: 'templates' or
30
+ # 'Templates' (OS X) dir inside the transcripts dir.
31
+ #====amazon
32
+ # [url] Base URL to use when linking to files uploaded to S3. You
33
+ # may want to use this if you do custom domain mapping on
34
+ # S3. Default is https://$bucket.s3.amazonaws.com.
35
+ #====sftp
36
+ #If you provide SFTP config, the specified SFTP server will be used
37
+ #to host remote mp3 and html files rather than Amazon S3. At
38
+ #minimum, you must provide a user, host, and URL. SFTP will work
39
+ #fine with public-key authentication (passwordless login). In fact,
40
+ #I've not bothered to add password support yet.
41
+ # [user] SFTP username
42
+ # [host] SFTP server
43
+ # [path] Files will be uploaded into this path. Optional.
44
+ # [url] Base URL to use when linking to files uploaded using the
45
+ # preceding config.
46
+ #====assign
47
+ #Defaults for tp-assign.
48
+ # [reward] Pay per transcription chunk in U.S. dollars. Default: 0.75.
49
+ # [deadline] Length of time a worker has to complete a
50
+ # transcription job after accepting it (HIT
51
+ # 'AssignmentDurationInSeconds' in the Mechanical Turk
52
+ # API). For details on the format, see docs for
53
+ # Utility.timespec_to_seconds. Default: 3h.
54
+ # [approval] Length of time before a submitted transcription job is
55
+ # automatically approved (HIT
56
+ # 'AutoApprovalDelayInSeconds' in the Mechanical Turk
57
+ # API). For details on the format, see docs for
58
+ # Utility.timespec_to_seconds. Default: 1d.
59
+ # [lifetime] Length of time before a transcription job is no longer
60
+ # available to be accepted (HIT 'LifetimeInSeconds' in
61
+ # the Mechanical Turk API). For details on the format,
62
+ # see docs for Utility.timespec_to_seconds. Default: 2d.
63
+ # [qualify] An array of qualifications with which to filter workers
64
+ # who may accept a transcript job. The first part of the
65
+ # qualification should be the string form of a key in
66
+ # RTurk::Qualifications::TYPES (see
67
+ # https://github.com/mdp/rturk/blob/master/lib/rturk/builders/qualification_builder.rb
68
+ # ). The second part should be one of the following
69
+ # comparators: > >= < <= == != exists. The optional
70
+ # third part is a value. Default: ['approval_rate >=
71
+ # 95'].
72
+ # [keywords] An array of keywords with which to tag each
73
+ # transcription job. Default: ['transcription', 'audio',
74
+ # 'mp3'].
75
+ #
76
+ #==API
77
+ #Values are read via same-named methods and set via same-named equals methods, like so:
78
+ # transcript_path = config.transcripts
79
+ # config.transcripts = new_path
80
+ #
81
+ #Nested sections are created simply by declaring a nested class
82
+ #(which should typically inherit from Config, even if nested several
83
+ #levels lower).
84
+ #
85
+ #Fields can be assigned special behaviors:
86
+ #
87
+ # class Config
88
+ # class Root < Config
89
+ # local_path_reader :transcripts
90
+ # class SFTP < Config
91
+ # never_ends_in_slash_reader :url
92
+ # end
93
+ # end
94
+ # end
95
+ #
96
+ # conf = Typingpool::Config.file
97
+ # conf.transcripts = '~/Documents/Transcripts'
98
+ # puts conf.transcripts #'/Volumes/Redsector/Users/chad/Documents/Transcripts'
99
+ # conf.sftp.url = 'http://luvrecording.s3.amazonaws.com/'
100
+ # puts conf.sftp.url #'http://luvrecording.s3.amazonaws.com'
101
+ #
102
+ class Config
103
+
104
+ require 'yaml'
105
+ @@default_file = "~/.typingpool"
106
+
107
+ def initialize(params)
108
+ @param = params
109
+ end
110
+
111
+ class << self
112
+ #Constructor.
113
+ # ==== Params
114
+ #[path] Fully expanded path to YAML file.
115
+ # ==== Returns
116
+ #Config instance.
117
+ def file(path=File.expand_path(default_file))
118
+ Root.new(YAML.load(IO.read((path))))
119
+ end
120
+
121
+ #Will always return ~/.typingpool unless you subclass. Will be
122
+ #handed to File.expand_path before use.
123
+ def default_file
124
+ @@default_file
125
+ end
126
+
127
+ #Returns a new Config instance, empty except for the values
128
+ #included in lib/templates/config.yml
129
+ def from_bundled_template
130
+ file(File.join(Utility.lib_dir, 'templates', 'config.yml'))
131
+ end
132
+
133
+ #protected
134
+
135
+ #Define a field in a Config subclass as a local path. Reads on
136
+ #that field will be filtered through File.expand_path.
137
+ def local_path_reader(*syms)
138
+ define_reader(*syms) do |value|
139
+ File.expand_path(value) if value
140
+ end
141
+ end
142
+
143
+ #Define a field in a Config subclass as never ending in
144
+ #'/'. Useful for URLs and SFTP path specs. When a field is set
145
+ #to a value ending in '/', the last character is stripped.
146
+ def never_ends_in_slash_reader(*syms)
147
+ define_reader(*syms) do |value|
148
+ value.sub(/\/$/, '') if value
149
+ end
150
+ end
151
+
152
+ #Define a field in a Config subclass as a time-length
153
+ #specification. For format details, see docs for
154
+ #Utility.timespec_to_seconds.
155
+ def time_accessor(*syms)
156
+ define_accessor(*syms) do |value|
157
+ Utility.timespec_to_seconds(value) or raise Error::Argument::Format, "Can't convert '#{value}' to time"
158
+ end
159
+ end
160
+
161
+ def define_reader(*syms)
162
+ syms.each do |sym|
163
+ define_method(sym) do
164
+ value = @param[sym.to_s]
165
+ yield(value)
166
+ end
167
+ end
168
+ end
169
+
170
+ def define_writer(*syms)
171
+ syms.each do |sym|
172
+ define_method("#{sym.to_s}=".to_sym) do |value|
173
+ @param[sym.to_s] = yield(value)
174
+ end
175
+ end
176
+ end
177
+
178
+ def define_accessor(*syms)
179
+ define_reader(*syms) do |value|
180
+ yield(value) if value
181
+ end
182
+ define_writer(*syms) do |value|
183
+ yield(value)
184
+ end
185
+ end
186
+
187
+ def inherited(subklass)
188
+ @@subklasses ||= {}
189
+ @@subklasses[subklass.name.downcase] = subklass
190
+ end
191
+
192
+ def subklass?(param)
193
+ @@subklasses["#{self.name.downcase}::#{param.downcase}"]
194
+ end
195
+ end #class << self
196
+
197
+ #All fields as raw key-value pairs. For nested subclasses, the
198
+ #value is another hash.
199
+ def to_hash
200
+ @param
201
+ end
202
+
203
+ #Read the raw data for a field
204
+ def [](key)
205
+ @param[key]
206
+ end
207
+
208
+ #Set the raw data for a field
209
+ def []=(key, value)
210
+ @param[key] = value
211
+ end
212
+
213
+ def method_missing(meth, *args)
214
+ equals_param = equals_method?(meth)
215
+ if equals_param
216
+ args.count == 1 or raise Error::Argument, "Wrong number of args (#{args.count} for 1)"
217
+ return @param[equals_param] = args[0]
218
+ end
219
+ args.empty? or raise Error::Argument, "Too many args #{meth} #{args.join('|')}"
220
+ value = @param[meth.to_s]
221
+ if self.class.subklass?(meth.to_s) && value
222
+ return self.class.subklass?(meth.to_s).new(value)
223
+ end
224
+ value
225
+ end
226
+
227
+ protected
228
+
229
+ def equals_method?(meth)
230
+ match = meth.to_s.match(/([^=]+)=$/) or return
231
+ return match[1]
232
+ end
233
+
234
+ #The root level of the config file and all full config
235
+ #objects. Kept distinct from Config because other subclasses need
236
+ #to inherit from Config, and we don't want them inheriting the
237
+ #root level fields.
238
+ class Root < Config
239
+ local_path_reader :transcripts, :cache, :templates
240
+
241
+ class SFTP < Config
242
+ never_ends_in_slash_reader :path, :url
243
+ end
244
+
245
+ class Amazon < Config
246
+ never_ends_in_slash_reader :url
247
+ end
248
+
249
+ class Assign < Config
250
+ local_path_reader :templates
251
+ time_accessor :deadline, :approval, :lifetime
252
+
253
+ define_accessor(:reward) do |value|
254
+ value.to_s.match(/(\d+(\.\d+)?)|(\d*\.\d+)/) or raise Error::Argument::Format, "Format should be N.NN"
255
+ value
256
+ end
257
+
258
+ define_reader(:confirm) do |value|
259
+ next false if value.to_s.match(/(^n)|(^0)|(^false)/i)
260
+ next true if value.to_s.match(/(^y)|(^1)|(^true)/i)
261
+ next if value.to_s.empty?
262
+ raise Error::Argument::Format, "Format should be 'yes' or 'no'"
263
+ end
264
+
265
+ def qualify
266
+ self.qualify = (@param['qualify'] || []) unless @qualify
267
+ @qualify
268
+ end
269
+
270
+ def qualify=(specs)
271
+ @qualify = specs.map{|spec| Qualification.new(spec) }
272
+ end
273
+
274
+ def add_qualification(spec)
275
+ self.qualify.push(Qualification.new(spec))
276
+ end
277
+
278
+ def keywords
279
+ @param['keywords'] ||= []
280
+ end
281
+
282
+ def keywords=(array)
283
+ @param['keywords'] = array
284
+ end
285
+
286
+ class Qualification < Config
287
+ def initialize(spec)
288
+ @raw = spec
289
+ to_arg #make sure value parses
290
+ end
291
+
292
+ def to_s
293
+ @raw
294
+ end
295
+
296
+ def to_arg
297
+ [type, opts]
298
+ end
299
+
300
+ protected
301
+
302
+ def type
303
+ type = @raw.split(/\s+/)[0]
304
+ if RTurk::Qualification::TYPES[type.to_sym]
305
+ return type.to_sym
306
+ elsif (type.match(/\d/) || type.size >= 25)
307
+ return type
308
+ else
309
+ #Seems likely to be qualification typo: Not a known
310
+ #system qualification, all letters and less than 25
311
+ #chars
312
+ raise Error::Argument, "Unknown qualification type and does not appear to be a raw qualification type ID: '#{type.to_s}'"
313
+ end
314
+ end
315
+
316
+ def opts
317
+ args = @raw.split(/\s+/)
318
+ if (args.count > 3) || (args.count < 2)
319
+ raise Error::Argument, "Unexpected number of qualification tokens: #{@raw}"
320
+ end
321
+ args.shift
322
+ comparator(args[0]) or raise Error::Argument, "Unknown comparator '#{args[0]}'"
323
+ value = 1
324
+ value = args[1] if args.count == 2
325
+ return {comparator(args[0]) => value}
326
+ end
327
+
328
+ def comparator(value)
329
+ Hash[
330
+ '>' => :gt,
331
+ '>=' => :gte,
332
+ '<' => :lt,
333
+ '<=' => :lte,
334
+ '==' => :eql,
335
+ '!=' => :not,
336
+ 'true' => :eql,
337
+ 'exists' => :exists
338
+ ][value]
339
+ end
340
+ end #Qualification
341
+ end #Assign
342
+ end #Root
343
+ end #Config
344
+ end #Typingpool
@@ -0,0 +1,22 @@
1
+ module Typingpool
2
+ class Error < StandardError
3
+ class Test < Error; end
4
+ class Shell < Error; end
5
+ class Argument < Error
6
+ class Format < Argument; end
7
+ end
8
+ class File < Error
9
+ class NotExists < File; end
10
+ class Remote < File
11
+ class SFTP < Remote; end
12
+ class S3 < Remote
13
+ class Credentials < S3; end
14
+ end #S3
15
+ end #Remote
16
+ end #File
17
+ class Amazon < Error
18
+ class UnreviewedContent < Amazon; end
19
+ end #Amazon
20
+ class HTTP < Error; end
21
+ end #Error
22
+ end #Typingpool