typingpool 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -0
- data/bin/tp-assign +240 -0
- data/bin/tp-collect +50 -0
- data/bin/tp-config +114 -0
- data/bin/tp-finish +101 -0
- data/bin/tp-make +169 -0
- data/bin/tp-review +175 -0
- data/lib/typingpool/amazon.rb +732 -0
- data/lib/typingpool/app.rb +634 -0
- data/lib/typingpool/config.rb +344 -0
- data/lib/typingpool/error.rb +22 -0
- data/lib/typingpool/filer.rb +396 -0
- data/lib/typingpool/project.rb +593 -0
- data/lib/typingpool/template.rb +175 -0
- data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
- data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
- data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
- data/lib/typingpool/templates/assignment/main.css +20 -0
- data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
- data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
- data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
- data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
- data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
- data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
- data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
- data/lib/typingpool/templates/config.yml +21 -0
- data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
- data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
- data/lib/typingpool/templates/project/data/.empty_directory +0 -0
- data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
- data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
- data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
- data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
- data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
- data/lib/typingpool/templates/project/etc/transcript.css +49 -0
- data/lib/typingpool/templates/transcript.html.erb +23 -0
- data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
- data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
- data/lib/typingpool/test/fixtures/config-1 +20 -0
- data/lib/typingpool/test/fixtures/config-2 +25 -0
- data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
- data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
- data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
- data/lib/typingpool/test/fixtures/template.html.erb +10 -0
- data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
- data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
- data/lib/typingpool/test.rb +418 -0
- data/lib/typingpool/transcript.rb +181 -0
- data/lib/typingpool/utility.rb +272 -0
- data/lib/typingpool.rb +500 -0
- data/test/make_amazon_question_fixture.rb +24 -0
- data/test/make_tp_collect_fixture_1.rb +26 -0
- data/test/make_tp_collect_fixture_2.rb +16 -0
- data/test/make_tp_collect_fixture_3.rb +15 -0
- data/test/make_tp_collect_fixture_4.rb +17 -0
- data/test/make_tp_review_fixture_1.rb +26 -0
- data/test/make_tp_review_fixture_2.rb +30 -0
- data/test/make_transcript_chunks_fixture.rb +53 -0
- data/test/test_integration_script_1_tp_config.rb +108 -0
- data/test/test_integration_script_2_tp_make.rb +119 -0
- data/test/test_integration_script_3_tp_assign.rb +152 -0
- data/test/test_integration_script_4_tp_review.rb +72 -0
- data/test/test_integration_script_5_tp_collect.rb +44 -0
- data/test/test_integration_script_6_tp_finish.rb +123 -0
- data/test/test_unit_amazon.rb +153 -0
- data/test/test_unit_config.rb +94 -0
- data/test/test_unit_filer.rb +202 -0
- data/test/test_unit_project.rb +168 -0
- data/test/test_unit_project_local.rb +68 -0
- data/test/test_unit_project_remote.rb +157 -0
- data/test/test_unit_template.rb +111 -0
- data/test/test_unit_transcript.rb +77 -0
- metadata +234 -0
@@ -0,0 +1,344 @@
|
|
1
|
+
module Typingpool
|
2
|
+
|
3
|
+
#Hierarchical config object. Can be read from a YAML file and is
|
4
|
+
#often modified at runtime, for example in response to script flags.
|
5
|
+
#
|
6
|
+
#==Fields
|
7
|
+
# All listed defaults are populated when you run tp-install.
|
8
|
+
#===Required
|
9
|
+
# [transcripts] Unexpanded path to working directory for
|
10
|
+
# transcripts. This is where tp-make creates new
|
11
|
+
# transcript projects, and where other scripts like
|
12
|
+
# tp-assign, tp-review and tp-finish look for
|
13
|
+
# them. Default: On systems with a ~/Desktop (like OS
|
14
|
+
# X), ~/Desktop/Transcripts. Elsewhere,
|
15
|
+
# ~/transcripts.
|
16
|
+
#====amazon
|
17
|
+
# [key] An Amazon Web Services "Access Key ID." Default: none.
|
18
|
+
# [secret] An Amazon Web Services "Secret Access Key." Default: none.
|
19
|
+
# [bucket] The name of the "bucket" on Amazon S3 where your uploads
|
20
|
+
# will be stored. Not required if you specify SFTP config
|
21
|
+
# instead (see below). Default: Generated for you when you
|
22
|
+
# run tp-install.
|
23
|
+
#
|
24
|
+
#===Optional
|
25
|
+
# [cache] Unexpanded path to the cache file (pstore). Default:
|
26
|
+
# ~/.typingpool.cache
|
27
|
+
# [templates] Unexpanded path to directory for user-created
|
28
|
+
# templates. Will be searched before looking in the
|
29
|
+
# template dir within the app. Default: 'templates' or
|
30
|
+
# 'Templates' (OS X) dir inside the transcripts dir.
|
31
|
+
#====amazon
|
32
|
+
# [url] Base URL to use when linking to files uploaded to S3. You
|
33
|
+
# may want to use this if you do custom domain mapping on
|
34
|
+
# S3. Default is https://$bucket.s3.amazonaws.com.
|
35
|
+
#====sftp
|
36
|
+
#If you provide SFTP config, the specified SFTP server will be used
|
37
|
+
#to host remote mp3 and html files rather than Amazon S3. At
|
38
|
+
#minimum, you must provide a user, host, and URL. SFTP will work
|
39
|
+
#fine with public-key authentication (passwordless login). In fact,
|
40
|
+
#I've not bothered to add password support yet.
|
41
|
+
# [user] SFTP username
|
42
|
+
# [host] SFTP server
|
43
|
+
# [path] Files will be uploaded into this path. Optional.
|
44
|
+
# [url] Base URL to use when linking to files uploaded using the
|
45
|
+
# preceding config.
|
46
|
+
#====assign
|
47
|
+
#Defaults for tp-assign.
|
48
|
+
# [reward] Pay per transcription chunk in U.S. dollars. Default: 0.75.
|
49
|
+
# [deadline] Length of time a worker has to complete a
|
50
|
+
# transcription job after accepting it (HIT
|
51
|
+
# 'AssignmentDurationInSeconds' in the Mechanical Turk
|
52
|
+
# API). For details on the format, see docs for
|
53
|
+
# Utility.timespec_to_seconds. Default: 3h.
|
54
|
+
# [approval] Length of time before a submitted transcription job is
|
55
|
+
# automatically approved (HIT
|
56
|
+
# 'AutoApprovalDelayInSeconds' in the Mechanical Turk
|
57
|
+
# API). For details on the format, see docs for
|
58
|
+
# Utility.timespec_to_seconds. Default: 1d.
|
59
|
+
# [lifetime] Length of time before a transcription job is no longer
|
60
|
+
# available to be accepted (HIT 'LifetimeInSeconds' in
|
61
|
+
# the Mechanical Turk API). For details on the format,
|
62
|
+
# see docs for Utility.timespec_to_seconds. Default: 2d.
|
63
|
+
# [qualify] An array of qualifications with which to filter workers
|
64
|
+
# who may accept a transcript job. The first part of the
|
65
|
+
# qualification should be the string form of a key in
|
66
|
+
# RTurk::Qualifications::TYPES (see
|
67
|
+
# https://github.com/mdp/rturk/blob/master/lib/rturk/builders/qualification_builder.rb
|
68
|
+
# ). The second part should be one of the following
|
69
|
+
# comparators: > >= < <= == != exists. The optional
|
70
|
+
# third part is a value. Default: ['approval_rate >=
|
71
|
+
# 95'].
|
72
|
+
# [keywords] An array of keywords with which to tag each
|
73
|
+
# transcription job. Default: ['transcription', 'audio',
|
74
|
+
# 'mp3'].
|
75
|
+
#
|
76
|
+
#==API
|
77
|
+
#Values are read via same-named methods and set via same-named equals methods, like so:
|
78
|
+
# transcript_path = config.transcripts
|
79
|
+
# config.transcripts = new_path
|
80
|
+
#
|
81
|
+
#Nested sections are created simply by declaring a nested class
|
82
|
+
#(which should typically inherit from Config, even if nested several
|
83
|
+
#levels lower).
|
84
|
+
#
|
85
|
+
#Fields can be assigned special behaviors:
|
86
|
+
#
|
87
|
+
# class Config
|
88
|
+
# class Root < Config
|
89
|
+
# local_path_reader :transcripts
|
90
|
+
# class SFTP < Config
|
91
|
+
# never_ends_in_slash_reader :url
|
92
|
+
# end
|
93
|
+
# end
|
94
|
+
# end
|
95
|
+
#
|
96
|
+
# conf = Typingpool::Config.file
|
97
|
+
# conf.transcripts = '~/Documents/Transcripts'
|
98
|
+
# puts conf.transcripts #'/Volumes/Redsector/Users/chad/Documents/Transcripts'
|
99
|
+
# conf.sftp.url = 'http://luvrecording.s3.amazonaws.com/'
|
100
|
+
# puts conf.sftp.url #'http://luvrecording.s3.amazonaws.com'
|
101
|
+
#
|
102
|
+
class Config
|
103
|
+
|
104
|
+
require 'yaml'
|
105
|
+
@@default_file = "~/.typingpool"
|
106
|
+
|
107
|
+
def initialize(params)
|
108
|
+
@param = params
|
109
|
+
end
|
110
|
+
|
111
|
+
class << self
|
112
|
+
#Constructor.
|
113
|
+
# ==== Params
|
114
|
+
#[path] Fully expanded path to YAML file.
|
115
|
+
# ==== Returns
|
116
|
+
#Config instance.
|
117
|
+
def file(path=File.expand_path(default_file))
|
118
|
+
Root.new(YAML.load(IO.read((path))))
|
119
|
+
end
|
120
|
+
|
121
|
+
#Will always return ~/.typingpool unless you subclass. Will be
|
122
|
+
#handed to File.expand_path before use.
|
123
|
+
def default_file
|
124
|
+
@@default_file
|
125
|
+
end
|
126
|
+
|
127
|
+
#Returns a new Config instance, empty except for the values
|
128
|
+
#included in lib/templates/config.yml
|
129
|
+
def from_bundled_template
|
130
|
+
file(File.join(Utility.lib_dir, 'templates', 'config.yml'))
|
131
|
+
end
|
132
|
+
|
133
|
+
#protected
|
134
|
+
|
135
|
+
#Define a field in a Config subclass as a local path. Reads on
|
136
|
+
#that field will be filtered through File.expand_path.
|
137
|
+
def local_path_reader(*syms)
|
138
|
+
define_reader(*syms) do |value|
|
139
|
+
File.expand_path(value) if value
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
#Define a field in a Config subclass as never ending in
|
144
|
+
#'/'. Useful for URLs and SFTP path specs. When a field is set
|
145
|
+
#to a value ending in '/', the last character is stripped.
|
146
|
+
def never_ends_in_slash_reader(*syms)
|
147
|
+
define_reader(*syms) do |value|
|
148
|
+
value.sub(/\/$/, '') if value
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#Define a field in a Config subclass as a time-length
|
153
|
+
#specification. For format details, see docs for
|
154
|
+
#Utility.timespec_to_seconds.
|
155
|
+
def time_accessor(*syms)
|
156
|
+
define_accessor(*syms) do |value|
|
157
|
+
Utility.timespec_to_seconds(value) or raise Error::Argument::Format, "Can't convert '#{value}' to time"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def define_reader(*syms)
|
162
|
+
syms.each do |sym|
|
163
|
+
define_method(sym) do
|
164
|
+
value = @param[sym.to_s]
|
165
|
+
yield(value)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def define_writer(*syms)
|
171
|
+
syms.each do |sym|
|
172
|
+
define_method("#{sym.to_s}=".to_sym) do |value|
|
173
|
+
@param[sym.to_s] = yield(value)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def define_accessor(*syms)
|
179
|
+
define_reader(*syms) do |value|
|
180
|
+
yield(value) if value
|
181
|
+
end
|
182
|
+
define_writer(*syms) do |value|
|
183
|
+
yield(value)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def inherited(subklass)
|
188
|
+
@@subklasses ||= {}
|
189
|
+
@@subklasses[subklass.name.downcase] = subklass
|
190
|
+
end
|
191
|
+
|
192
|
+
def subklass?(param)
|
193
|
+
@@subklasses["#{self.name.downcase}::#{param.downcase}"]
|
194
|
+
end
|
195
|
+
end #class << self
|
196
|
+
|
197
|
+
#All fields as raw key-value pairs. For nested subclasses, the
|
198
|
+
#value is another hash.
|
199
|
+
def to_hash
|
200
|
+
@param
|
201
|
+
end
|
202
|
+
|
203
|
+
#Read the raw data for a field
|
204
|
+
def [](key)
|
205
|
+
@param[key]
|
206
|
+
end
|
207
|
+
|
208
|
+
#Set the raw data for a field
|
209
|
+
def []=(key, value)
|
210
|
+
@param[key] = value
|
211
|
+
end
|
212
|
+
|
213
|
+
def method_missing(meth, *args)
|
214
|
+
equals_param = equals_method?(meth)
|
215
|
+
if equals_param
|
216
|
+
args.count == 1 or raise Error::Argument, "Wrong number of args (#{args.count} for 1)"
|
217
|
+
return @param[equals_param] = args[0]
|
218
|
+
end
|
219
|
+
args.empty? or raise Error::Argument, "Too many args #{meth} #{args.join('|')}"
|
220
|
+
value = @param[meth.to_s]
|
221
|
+
if self.class.subklass?(meth.to_s) && value
|
222
|
+
return self.class.subklass?(meth.to_s).new(value)
|
223
|
+
end
|
224
|
+
value
|
225
|
+
end
|
226
|
+
|
227
|
+
protected
|
228
|
+
|
229
|
+
def equals_method?(meth)
|
230
|
+
match = meth.to_s.match(/([^=]+)=$/) or return
|
231
|
+
return match[1]
|
232
|
+
end
|
233
|
+
|
234
|
+
#The root level of the config file and all full config
|
235
|
+
#objects. Kept distinct from Config because other subclasses need
|
236
|
+
#to inherit from Config, and we don't want them inheriting the
|
237
|
+
#root level fields.
|
238
|
+
class Root < Config
|
239
|
+
local_path_reader :transcripts, :cache, :templates
|
240
|
+
|
241
|
+
class SFTP < Config
|
242
|
+
never_ends_in_slash_reader :path, :url
|
243
|
+
end
|
244
|
+
|
245
|
+
class Amazon < Config
|
246
|
+
never_ends_in_slash_reader :url
|
247
|
+
end
|
248
|
+
|
249
|
+
class Assign < Config
|
250
|
+
local_path_reader :templates
|
251
|
+
time_accessor :deadline, :approval, :lifetime
|
252
|
+
|
253
|
+
define_accessor(:reward) do |value|
|
254
|
+
value.to_s.match(/(\d+(\.\d+)?)|(\d*\.\d+)/) or raise Error::Argument::Format, "Format should be N.NN"
|
255
|
+
value
|
256
|
+
end
|
257
|
+
|
258
|
+
define_reader(:confirm) do |value|
|
259
|
+
next false if value.to_s.match(/(^n)|(^0)|(^false)/i)
|
260
|
+
next true if value.to_s.match(/(^y)|(^1)|(^true)/i)
|
261
|
+
next if value.to_s.empty?
|
262
|
+
raise Error::Argument::Format, "Format should be 'yes' or 'no'"
|
263
|
+
end
|
264
|
+
|
265
|
+
def qualify
|
266
|
+
self.qualify = (@param['qualify'] || []) unless @qualify
|
267
|
+
@qualify
|
268
|
+
end
|
269
|
+
|
270
|
+
def qualify=(specs)
|
271
|
+
@qualify = specs.map{|spec| Qualification.new(spec) }
|
272
|
+
end
|
273
|
+
|
274
|
+
def add_qualification(spec)
|
275
|
+
self.qualify.push(Qualification.new(spec))
|
276
|
+
end
|
277
|
+
|
278
|
+
def keywords
|
279
|
+
@param['keywords'] ||= []
|
280
|
+
end
|
281
|
+
|
282
|
+
def keywords=(array)
|
283
|
+
@param['keywords'] = array
|
284
|
+
end
|
285
|
+
|
286
|
+
class Qualification < Config
|
287
|
+
def initialize(spec)
|
288
|
+
@raw = spec
|
289
|
+
to_arg #make sure value parses
|
290
|
+
end
|
291
|
+
|
292
|
+
def to_s
|
293
|
+
@raw
|
294
|
+
end
|
295
|
+
|
296
|
+
def to_arg
|
297
|
+
[type, opts]
|
298
|
+
end
|
299
|
+
|
300
|
+
protected
|
301
|
+
|
302
|
+
def type
|
303
|
+
type = @raw.split(/\s+/)[0]
|
304
|
+
if RTurk::Qualification::TYPES[type.to_sym]
|
305
|
+
return type.to_sym
|
306
|
+
elsif (type.match(/\d/) || type.size >= 25)
|
307
|
+
return type
|
308
|
+
else
|
309
|
+
#Seems likely to be qualification typo: Not a known
|
310
|
+
#system qualification, all letters and less than 25
|
311
|
+
#chars
|
312
|
+
raise Error::Argument, "Unknown qualification type and does not appear to be a raw qualification type ID: '#{type.to_s}'"
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def opts
|
317
|
+
args = @raw.split(/\s+/)
|
318
|
+
if (args.count > 3) || (args.count < 2)
|
319
|
+
raise Error::Argument, "Unexpected number of qualification tokens: #{@raw}"
|
320
|
+
end
|
321
|
+
args.shift
|
322
|
+
comparator(args[0]) or raise Error::Argument, "Unknown comparator '#{args[0]}'"
|
323
|
+
value = 1
|
324
|
+
value = args[1] if args.count == 2
|
325
|
+
return {comparator(args[0]) => value}
|
326
|
+
end
|
327
|
+
|
328
|
+
def comparator(value)
|
329
|
+
Hash[
|
330
|
+
'>' => :gt,
|
331
|
+
'>=' => :gte,
|
332
|
+
'<' => :lt,
|
333
|
+
'<=' => :lte,
|
334
|
+
'==' => :eql,
|
335
|
+
'!=' => :not,
|
336
|
+
'true' => :eql,
|
337
|
+
'exists' => :exists
|
338
|
+
][value]
|
339
|
+
end
|
340
|
+
end #Qualification
|
341
|
+
end #Assign
|
342
|
+
end #Root
|
343
|
+
end #Config
|
344
|
+
end #Typingpool
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Typingpool
|
2
|
+
class Error < StandardError
|
3
|
+
class Test < Error; end
|
4
|
+
class Shell < Error; end
|
5
|
+
class Argument < Error
|
6
|
+
class Format < Argument; end
|
7
|
+
end
|
8
|
+
class File < Error
|
9
|
+
class NotExists < File; end
|
10
|
+
class Remote < File
|
11
|
+
class SFTP < Remote; end
|
12
|
+
class S3 < Remote
|
13
|
+
class Credentials < S3; end
|
14
|
+
end #S3
|
15
|
+
end #Remote
|
16
|
+
end #File
|
17
|
+
class Amazon < Error
|
18
|
+
class UnreviewedContent < Amazon; end
|
19
|
+
end #Amazon
|
20
|
+
class HTTP < Error; end
|
21
|
+
end #Error
|
22
|
+
end #Typingpool
|