typingpool 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -0
- data/bin/tp-assign +240 -0
- data/bin/tp-collect +50 -0
- data/bin/tp-config +114 -0
- data/bin/tp-finish +101 -0
- data/bin/tp-make +169 -0
- data/bin/tp-review +175 -0
- data/lib/typingpool/amazon.rb +732 -0
- data/lib/typingpool/app.rb +634 -0
- data/lib/typingpool/config.rb +344 -0
- data/lib/typingpool/error.rb +22 -0
- data/lib/typingpool/filer.rb +396 -0
- data/lib/typingpool/project.rb +593 -0
- data/lib/typingpool/template.rb +175 -0
- data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
- data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
- data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
- data/lib/typingpool/templates/assignment/main.css +20 -0
- data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
- data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
- data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
- data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
- data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
- data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
- data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
- data/lib/typingpool/templates/config.yml +21 -0
- data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
- data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
- data/lib/typingpool/templates/project/data/.empty_directory +0 -0
- data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
- data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
- data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
- data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
- data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
- data/lib/typingpool/templates/project/etc/transcript.css +49 -0
- data/lib/typingpool/templates/transcript.html.erb +23 -0
- data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
- data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
- data/lib/typingpool/test/fixtures/config-1 +20 -0
- data/lib/typingpool/test/fixtures/config-2 +25 -0
- data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
- data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
- data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
- data/lib/typingpool/test/fixtures/template.html.erb +10 -0
- data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
- data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
- data/lib/typingpool/test.rb +418 -0
- data/lib/typingpool/transcript.rb +181 -0
- data/lib/typingpool/utility.rb +272 -0
- data/lib/typingpool.rb +500 -0
- data/test/make_amazon_question_fixture.rb +24 -0
- data/test/make_tp_collect_fixture_1.rb +26 -0
- data/test/make_tp_collect_fixture_2.rb +16 -0
- data/test/make_tp_collect_fixture_3.rb +15 -0
- data/test/make_tp_collect_fixture_4.rb +17 -0
- data/test/make_tp_review_fixture_1.rb +26 -0
- data/test/make_tp_review_fixture_2.rb +30 -0
- data/test/make_transcript_chunks_fixture.rb +53 -0
- data/test/test_integration_script_1_tp_config.rb +108 -0
- data/test/test_integration_script_2_tp_make.rb +119 -0
- data/test/test_integration_script_3_tp_assign.rb +152 -0
- data/test/test_integration_script_4_tp_review.rb +72 -0
- data/test/test_integration_script_5_tp_collect.rb +44 -0
- data/test/test_integration_script_6_tp_finish.rb +123 -0
- data/test/test_unit_amazon.rb +153 -0
- data/test/test_unit_config.rb +94 -0
- data/test/test_unit_filer.rb +202 -0
- data/test/test_unit_project.rb +168 -0
- data/test/test_unit_project_local.rb +68 -0
- data/test/test_unit_project_remote.rb +157 -0
- data/test/test_unit_template.rb +111 -0
- data/test/test_unit_transcript.rb +77 -0
- metadata +234 -0
@@ -0,0 +1,593 @@
|
|
1
|
+
module Typingpool
|
2
|
+
|
3
|
+
#Class representing a transcription job, a job typically associated
|
4
|
+
#with a single interview or other event and with one or more audio
|
5
|
+
#files containing recordings of that event. A project is
|
6
|
+
#associated, locally, with a filesystem directory. On Amazon
|
7
|
+
#Mechanical Turk, a Project is associated with various HITs. A
|
8
|
+
#project is also associated with audio files on a remote server.
|
9
|
+
class Project
|
10
|
+
require 'uri'
|
11
|
+
|
12
|
+
#Returns a time interval corresponding to the length of each audio
|
13
|
+
#chunk within the project. (Each chunk may be transcribed
|
14
|
+
#separately.)
|
15
|
+
attr_reader :interval
|
16
|
+
|
17
|
+
#Returns the desired bitrate of processed audio files.
|
18
|
+
attr_reader :bitrate
|
19
|
+
|
20
|
+
#Accessor for the name of the project (sometimes referred to as
|
21
|
+
#the 'title' in command line code)
|
22
|
+
attr_accessor :name
|
23
|
+
|
24
|
+
#Accessor for the Config object associated with the project.
|
25
|
+
attr_accessor :config
|
26
|
+
|
27
|
+
#Constructor. Takes the project name and an optional Config
|
28
|
+
#instance (default is the default Config.file). Project does not
|
29
|
+
#have to exist locally or remotely.
|
30
|
+
def initialize(name, config=Config.file)
|
31
|
+
Local.valid_name?(name) or raise Error::Argument::Format, "Must be a valid name for a directory in the local filesystem. Eliminate '/' or any other illegal character."
|
32
|
+
@name = name
|
33
|
+
@config = config
|
34
|
+
end
|
35
|
+
|
36
|
+
#Constructs and returns a Project::Remote instance associated with
|
37
|
+
#this Project instance. Takes an optional Config instance; default
|
38
|
+
#is project.config.
|
39
|
+
def remote(config=@config)
|
40
|
+
Remote.from_config(@name, config)
|
41
|
+
end
|
42
|
+
|
43
|
+
#Constructs and returns a Project::Local instance associated with
|
44
|
+
#this Project instance IF the project exists at the appropriate
|
45
|
+
#location in the filesystem. Takes an optional path to a base
|
46
|
+
#directory to look in; default is project.config.transcripts.
|
47
|
+
def local(dir=@config.transcripts)
|
48
|
+
Local.named(@name, dir)
|
49
|
+
end
|
50
|
+
|
51
|
+
#Creates a local filesystem directory corresponding to the project
|
52
|
+
#and constructs and returns a Project::Local instance associated
|
53
|
+
#with that directory and with this Project instance. Takes an
|
54
|
+
#optional path to a base directory in which to create the project
|
55
|
+
#directory; default is project.config.transcripts.
|
56
|
+
def create_local(basedir=@config.transcripts)
|
57
|
+
Local.create(@name, basedir, File.join(Utility.lib_dir, 'templates', 'project'))
|
58
|
+
end
|
59
|
+
|
60
|
+
#Takes a time specification for setting the project.interval. The
|
61
|
+
#time specification may be an integer corresponding to the nuymber
|
62
|
+
#of secods or a colon-delimited time of the format HH:MM::SS.ssss,
|
63
|
+
#where the hour and fractional seconds components are optional.
|
64
|
+
def interval=(mmss)
|
65
|
+
formatted = mmss.to_s.match(
|
66
|
+
/^((\d+)|((\d+:)?(\d+):(\d\d)))$/
|
67
|
+
) or raise Error::Argument::Format, "Required format is SS, or MM:SS, or HH:MM:SS"
|
68
|
+
@interval = (formatted[2] || ((formatted[4].to_i * 60 * 60) + (formatted[5].to_i * 60) + formatted[6].to_i)).to_i
|
69
|
+
end
|
70
|
+
|
71
|
+
#Returns the project.interval in a format understood by the Unix
|
72
|
+
#utility mp3splt: $min.$sec[.01-99].
|
73
|
+
def interval_as_min_dot_sec
|
74
|
+
seconds = @interval % 60
|
75
|
+
if seconds > seconds.to_i
|
76
|
+
#mpl3splt takes fractions of a second to hundredths of a second precision
|
77
|
+
seconds = seconds.round(2)
|
78
|
+
end
|
79
|
+
min_dot_sec = "#{(@interval.to_i / 60).floor}.#{seconds}"
|
80
|
+
end
|
81
|
+
|
82
|
+
#Takes an integer for setting the project.bitrate. The integer
|
83
|
+
#should correspond to kilobits per second (kbit/s or kbps). This
|
84
|
+
#is used as a target when converting to mp3 (when it's neccesary
|
85
|
+
#to do so).
|
86
|
+
def bitrate=(kbps)
|
87
|
+
raise Error::Argument::Format, 'Should be an integer corresponding to kb/s' if kbps.to_i == 0
|
88
|
+
@bitrate = kbps
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
#Writes a CSV file into project.local directory, storing information about the specified files.
|
93
|
+
# ==== Params
|
94
|
+
# [:path] Relative path where the file will be written. Array of
|
95
|
+
# relative path elements. See Filer::Dir#file docs
|
96
|
+
# for details.
|
97
|
+
# [:urls] Array of URLs corresponding to project files.
|
98
|
+
# [:unusual] Optional. Array of unusual words spoken in the
|
99
|
+
# audio to be transcribed. This list is ultimately
|
100
|
+
# provided to transcribers to aid in their work.
|
101
|
+
# [:voices] Optional. Array of hashes, with each having a :name and
|
102
|
+
# :description element. Each hash corresponds to a
|
103
|
+
# person whose voice is on the audio. These
|
104
|
+
# details are ultimately provided to transcibers
|
105
|
+
# to allow them to correctly label sections of the
|
106
|
+
# transcript
|
107
|
+
# ==== Returns
|
108
|
+
# Path to the resulting CSV file.
|
109
|
+
def create_assignment_csv(args)
|
110
|
+
[:path, :urls].each{|arg| args[arg] or raise Error::Argument, "Missing arg '#{arg}'" }
|
111
|
+
headers = ['audio_url',
|
112
|
+
'project_id',
|
113
|
+
'unusual',
|
114
|
+
'chunk',
|
115
|
+
'chunk_hours',
|
116
|
+
'chunk_minutes',
|
117
|
+
'chunk_seconds',
|
118
|
+
'voices_count',
|
119
|
+
(1 .. args[:voices].count).map{|n| ["voice#{n}", "voice#{n}title"]}
|
120
|
+
].flatten
|
121
|
+
csv = args[:urls].map do |url|
|
122
|
+
[url,
|
123
|
+
local.id,
|
124
|
+
args[:unusual].join(', '),
|
125
|
+
interval_as_time_string,
|
126
|
+
interval_as_hours_minutes_seconds.map{|n| (n == 0) ? nil : n },
|
127
|
+
args[:voices].count,
|
128
|
+
args[:voices].map{|v| [v[:name], v[:description]]}
|
129
|
+
].flatten
|
130
|
+
end
|
131
|
+
local.file(*args[:path]).as(:csv).write_arrays(csv, headers)
|
132
|
+
local.file_path(*args[:path])
|
133
|
+
end
|
134
|
+
|
135
|
+
#Takes an array of file paths, file names, or Filer
|
136
|
+
#instances. Returns an array of file basenames. The return
|
137
|
+
#basenames will be the original basenames with the project id and
|
138
|
+
#a random or pseudo-random string insterted between the root
|
139
|
+
#basename and the file extension. The purpose of this is to make
|
140
|
+
#it difficult to guess the name of one remote file after seeing
|
141
|
+
#another, thus significantly complicating any attempt to download
|
142
|
+
#the entirety of a project (such as a journalistic interview)
|
143
|
+
#after seeing a single assignment on Amazon Mechanical Turk. (This
|
144
|
+
#should be considered an effort at obfuscation. It is not any
|
145
|
+
#guarantee of true security.)
|
146
|
+
def create_remote_names(files)
|
147
|
+
files.map do |file|
|
148
|
+
name = [File.basename(file, '.*'), local.id, pseudo_random_uppercase_string].join('.')
|
149
|
+
name += File.extname(file) if not(File.extname(file).to_s.empty?)
|
150
|
+
name
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
#Returns a Regexp for breaking an URL down into the original
|
155
|
+
#project basename as well as the audio chunk offset. This probably
|
156
|
+
#shouldn't need to exist. (TODO: make this unneccesary.)
|
157
|
+
def self.url_regex
|
158
|
+
Regexp.new('.+\/((.+)\.(\d+)\.(\d\d)\.[a-fA-F0-9]{32}\.[A-Z]{6}(\.\w+))')
|
159
|
+
end
|
160
|
+
|
161
|
+
#Takes an url. Returns the basename of the associated
|
162
|
+
#project.local file. This probably shouldn't need to exist. (TODO:
|
163
|
+
#Make this unneccesary.)
|
164
|
+
def self.local_basename_from_url(url)
|
165
|
+
matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
|
166
|
+
URI.unescape([matches[2..4].join('.'), matches[5]].join)
|
167
|
+
end
|
168
|
+
|
169
|
+
protected
|
170
|
+
|
171
|
+
#Takes an optional string length (default 6). Returns a string of
|
172
|
+
#pseudo-random uppercase letters of the specified length. Should
|
173
|
+
#probably move this into Utility. TODO
|
174
|
+
def pseudo_random_uppercase_string(length=6)
|
175
|
+
(0...length).map{(65 + rand(25)).chr}.join
|
176
|
+
end
|
177
|
+
|
178
|
+
def interval_as_hours_minutes_seconds
|
179
|
+
seconds = interval or return
|
180
|
+
hours = seconds / (60 * 60)
|
181
|
+
seconds = seconds % (60 * 60)
|
182
|
+
minutes = seconds / 60
|
183
|
+
seconds = seconds % 60
|
184
|
+
[hours, minutes, seconds]
|
185
|
+
end
|
186
|
+
|
187
|
+
#Returns interval as [HH:]MM:SS.
|
188
|
+
def interval_as_time_string
|
189
|
+
hms = interval_as_hours_minutes_seconds
|
190
|
+
hms.shift if hms.first == 0
|
191
|
+
#make sure seconds column is zero-padded and, if there are
|
192
|
+
#hours, do the same to the minutes column
|
193
|
+
(1 - hms.count .. -1).each{|i| hms[i] = hms[i].to_s.rjust(2, '0') }
|
194
|
+
hms.join(":")
|
195
|
+
end
|
196
|
+
|
197
|
+
#Representation of the Project instance on remote servers. This is
|
198
|
+
#basically a collection of audio files to be transcribed and HTML
|
199
|
+
#files containing instructions and a form for the
|
200
|
+
#transcribers. The backend can be Amazon S3 (the default) or an
|
201
|
+
#SFTP server. Each backend is encapsulated in its own subclass. A
|
202
|
+
#backend subclass must provide a 'put' method, which takes an
|
203
|
+
#array of IO streams and an optional array of remote file
|
204
|
+
#basenames; a 'remove' method, which takes an array of remote file
|
205
|
+
#basenames; and the methods 'host' and 'path', which return the
|
206
|
+
#location of the destination server and destination directory,
|
207
|
+
#respectively.
|
208
|
+
#
|
209
|
+
#Thus, there will always be 'put', 'remove', 'host' and 'path'
|
210
|
+
#methods available, in addition to the Project::Remote methods
|
211
|
+
#outlined below.
|
212
|
+
class Remote
|
213
|
+
|
214
|
+
#The project name
|
215
|
+
attr_accessor :name
|
216
|
+
|
217
|
+
#Constructor. Takes the project name and a Config
|
218
|
+
#instance. Returns a Project::Remote::S3 or
|
219
|
+
#Project::Remote::SFTP instance, depending on the particulars of
|
220
|
+
#the Config. If there are sufficient config params to return
|
221
|
+
#EITHER an S3 or SFTP subclass, it will prefer the SFTP
|
222
|
+
#subclass.
|
223
|
+
def self.from_config(name, config)
|
224
|
+
if config.sftp
|
225
|
+
SFTP.new(name, config.sftp)
|
226
|
+
elsif config.amazon && config.amazon.bucket
|
227
|
+
S3.new(name, config.amazon)
|
228
|
+
else
|
229
|
+
raise Error, "No valid upload params found in config file (SFTP or Amazon info)"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
#Like project.remote.remove, except it takes an array of URLs
|
234
|
+
#instead an array of remote basenames, saving you from having to
|
235
|
+
#manually extract basenames from the URL.
|
236
|
+
def remove_urls(urls)
|
237
|
+
basenames = urls.map{|url| url_basename(url) }
|
238
|
+
remove(basenames){|file| yield(file) if block_given? }
|
239
|
+
end
|
240
|
+
|
241
|
+
#Given a file path, returns the URL to the file path were it to
|
242
|
+
#be uploaded by this instance.
|
243
|
+
def file_to_url(file)
|
244
|
+
"#{@url}/#{URI.escape(file)}"
|
245
|
+
end
|
246
|
+
|
247
|
+
#Given an URL, returns the file portion of the path, given the
|
248
|
+
#configuration of this instance.
|
249
|
+
def url_basename(url)
|
250
|
+
basename = url.split("#{self.url}/")[1] or raise Error, "Could not find base url '#{self.url}' within longer url '#{url}'"
|
251
|
+
URI.unescape(basename)
|
252
|
+
end
|
253
|
+
|
254
|
+
#Subclass for storing remote files on Amazon Simple Storage
|
255
|
+
#Service (S3)
|
256
|
+
class S3 < Remote
|
257
|
+
require 'aws/s3'
|
258
|
+
|
259
|
+
#An Amazon Web Services "Access Key ID." Set from the
|
260
|
+
#Config#amazon value passed to Project::Remote::S3.new, but
|
261
|
+
#changeable.
|
262
|
+
attr_accessor :key
|
263
|
+
|
264
|
+
#An Amazon Web Services "Secret Access Key." Set from the
|
265
|
+
#Config#amazon value passed to Project::Remote::S3.new, but
|
266
|
+
#changeable.
|
267
|
+
attr_accessor :secret
|
268
|
+
|
269
|
+
#The S3 "bucket" where uploads will be stores. Set from the
|
270
|
+
#Config#amazon value passed to Project::Remote::S3.new, but
|
271
|
+
#changeable.
|
272
|
+
attr_accessor :bucket
|
273
|
+
|
274
|
+
#Returns the base URL, which is prepended to the remote
|
275
|
+
#files. This is either the 'url' attribute of the
|
276
|
+
#Config#amazon value passed to Project::Remote::S3.new or, if
|
277
|
+
#that attribute is not set, the value returned by
|
278
|
+
#'default_url' (e.g. "https://bucketname.s3.amazonaws.com").
|
279
|
+
attr_reader :url
|
280
|
+
|
281
|
+
#Constructor. Takes the project name and the result of calling
|
282
|
+
#the 'amazon' method on a Config instance (i.e. the amazon
|
283
|
+
#section of a Config file).
|
284
|
+
def initialize(name, amazon_config)
|
285
|
+
@name = name
|
286
|
+
@config = amazon_config
|
287
|
+
@key = @config.key or raise Error::File::Remote::S3, "Missing Amazon key in config"
|
288
|
+
@secret = @config.secret or raise Error::File::Remote::S3, "Missing Amazon secret in config"
|
289
|
+
@bucket = @config.bucket or raise Error::File::Remote::S3, "Missing Amazon bucket in config"
|
290
|
+
@url = @config.url || default_url
|
291
|
+
end
|
292
|
+
|
293
|
+
#The remote host (server) name, parsed from #url
|
294
|
+
def host
|
295
|
+
URI.parse(@url).host
|
296
|
+
end
|
297
|
+
|
298
|
+
#The remote path (directory), pased from #url
|
299
|
+
def path
|
300
|
+
URI.parse(@url).path
|
301
|
+
end
|
302
|
+
|
303
|
+
#Upload files/strings to S3, optionally changing the names in the process.
|
304
|
+
# ==== Params
|
305
|
+
#[io_streams] Enumerable collection of IO objects, like a File
|
306
|
+
# or StringIO instance.
|
307
|
+
#[as] Optional if the io_streams are File instances. Array of
|
308
|
+
# file basenames, used to name the destination
|
309
|
+
# files. Default is the basename of the Files
|
310
|
+
# passed in as io_streams.
|
311
|
+
# ==== Returns
|
312
|
+
#Array of URLs corresponding to the uploaded files.
|
313
|
+
def put(io_streams, as=io_streams.map{|file| File.basename(file)})
|
314
|
+
batch(io_streams) do |stream, i|
|
315
|
+
dest = as[i]
|
316
|
+
yield(stream, dest) if block_given?
|
317
|
+
begin
|
318
|
+
AWS::S3::S3Object.store(dest, stream, @bucket, :access => :public_read)
|
319
|
+
rescue AWS::S3::NoSuchBucket
|
320
|
+
make_bucket
|
321
|
+
retry
|
322
|
+
end
|
323
|
+
file_to_url(dest)
|
324
|
+
end #batch
|
325
|
+
end
|
326
|
+
|
327
|
+
#Delete objects from S3.
|
328
|
+
# ==== Params
|
329
|
+
#[files] Enumerable collection of file names. Should NOT
|
330
|
+
# include the bucket name (path).
|
331
|
+
# ==== Returns
|
332
|
+
#Array of booleans corresponding to whether the delete call
|
333
|
+
#succeeded.
|
334
|
+
def remove(files)
|
335
|
+
batch(files) do |file, i|
|
336
|
+
yield(file) if block_given?
|
337
|
+
AWS::S3::S3Object.delete(file, @bucket)
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
protected
|
342
|
+
|
343
|
+
def batch(io_streams)
|
344
|
+
results = []
|
345
|
+
io_streams.each_with_index do |stream, i|
|
346
|
+
connect if i == 0
|
347
|
+
begin
|
348
|
+
results.push(yield(stream, i))
|
349
|
+
rescue AWS::S3::S3Exception => e
|
350
|
+
if e.message.match(/AWS::S3::SignatureDoesNotMatch/)
|
351
|
+
raise Error::File::Remote::S3::Credentials, "S3 operation failed with a signature error. This likely means your AWS key or secret is wrong. Error: #{e}"
|
352
|
+
else
|
353
|
+
raise Error::File::Remote::S3, "Your S3 operation failed with an Amazon error: #{e}"
|
354
|
+
end #if
|
355
|
+
end #begin
|
356
|
+
end #files.each
|
357
|
+
disconnect unless io_streams.empty?
|
358
|
+
results
|
359
|
+
end
|
360
|
+
|
361
|
+
def connect
|
362
|
+
AWS::S3::Base.establish_connection!(
|
363
|
+
:access_key_id => @key,
|
364
|
+
:secret_access_key => @secret,
|
365
|
+
:persistent => true,
|
366
|
+
:use_ssl => true
|
367
|
+
)
|
368
|
+
end
|
369
|
+
|
370
|
+
def disconnect
|
371
|
+
AWS::S3::Base.disconnect
|
372
|
+
end
|
373
|
+
|
374
|
+
def make_bucket
|
375
|
+
AWS::S3::Bucket.create(@bucket)
|
376
|
+
end
|
377
|
+
|
378
|
+
def default_url
|
379
|
+
"https://#{@bucket}.s3.amazonaws.com"
|
380
|
+
end
|
381
|
+
end #S3
|
382
|
+
|
383
|
+
#Subclass for storing remote files on an SFTP server. Only
|
384
|
+
#public/private key authentication has been tested. There is not
|
385
|
+
#yet any provision for password-based authentication, though
|
386
|
+
#adding it should be trivial.
|
387
|
+
class SFTP < Remote
|
388
|
+
require 'net/sftp'
|
389
|
+
|
390
|
+
#Returns the remote host (server) name. This is set from
|
391
|
+
#Config#sftp#host.
|
392
|
+
attr_reader :host
|
393
|
+
|
394
|
+
#Returns the remote path (directory). This is set from
|
395
|
+
#Config#sftp#path.
|
396
|
+
attr_reader :path
|
397
|
+
|
398
|
+
#Returns the name of the user used to log in to the SFTP
|
399
|
+
#server. This is et from Config#sftp#user.
|
400
|
+
attr_reader :user
|
401
|
+
|
402
|
+
#Returns the base URL, which is prepended to the remote
|
403
|
+
#files. This is set from Config#sftp#url.
|
404
|
+
attr_reader :url
|
405
|
+
|
406
|
+
#Constructor. Takes the project name and a Config#sftp.
|
407
|
+
def initialize(name, sftp_config)
|
408
|
+
@name = name
|
409
|
+
@config = sftp_config
|
410
|
+
@user = @config.user or raise Error::File::Remote::SFTP, "No SFTP user specified in config"
|
411
|
+
@host = @config.host or raise Error::File::Remote::SFTP, "No SFTP host specified in config"
|
412
|
+
@url = @config.url or raise Error::File::Remote::SFTP, "No SFTP url specified in config"
|
413
|
+
@path = @config.path || ''
|
414
|
+
end
|
415
|
+
|
416
|
+
#See docs for Project::Remote::S3#put.
|
417
|
+
def put(io_streams, as=io_streams.map{|file| File.basename(file)})
|
418
|
+
begin
|
419
|
+
i = 0
|
420
|
+
batch(io_streams) do |stream, connection|
|
421
|
+
dest = as[i]
|
422
|
+
i += 1
|
423
|
+
yield(stream, dest) if block_given?
|
424
|
+
connection.upload(stream, join_with_path(dest))
|
425
|
+
file_to_url(dest)
|
426
|
+
end
|
427
|
+
rescue Net::SFTP::StatusException => e
|
428
|
+
raise Error::File::Remote::SFTP, "SFTP upload failed: #{e.description}"
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
#See docs for Project::Remote::S3#remove.
|
433
|
+
def remove(files)
|
434
|
+
requests = batch(files) do |file, connection|
|
435
|
+
yield(file) if block_given?
|
436
|
+
connection.remove(join_with_path(file))
|
437
|
+
end
|
438
|
+
failures = requests.reject{|request| request.response.ok?}
|
439
|
+
if not(failures.empty?)
|
440
|
+
summary = failures.map{|request| request.response.to_s}.join('; ')
|
441
|
+
raise Error::File::Remote::SFTP, "SFTP removal failed: #{summary}"
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
protected
|
446
|
+
|
447
|
+
def connection
|
448
|
+
begin
|
449
|
+
Net::SFTP.start(@host, @user) do |connection|
|
450
|
+
yield(connection)
|
451
|
+
connection.loop
|
452
|
+
end
|
453
|
+
rescue Net::SSH::AuthenticationFailed
|
454
|
+
raise Error::File::Remote::SFTP, "SFTP authentication failed: #{$?}"
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def batch(files)
|
459
|
+
results = []
|
460
|
+
connection do |connection|
|
461
|
+
files.each do |file|
|
462
|
+
results.push(yield(file, connection))
|
463
|
+
end
|
464
|
+
end
|
465
|
+
return results
|
466
|
+
end
|
467
|
+
|
468
|
+
def join_with_path(file)
|
469
|
+
if @path
|
470
|
+
[@path, file].join('/')
|
471
|
+
else
|
472
|
+
file
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
end #SFTP
|
477
|
+
end #Remote
|
478
|
+
|
479
|
+
#Representation of the Project instance in the local
|
480
|
+
#filesystem. Subclass of Filer::Dir; see Filer::Dir docs for
|
481
|
+
#additional details.
|
482
|
+
#
|
483
|
+
#This is basically a local dir with various subdirs and files
|
484
|
+
#containing the canonical representation of the project, including
|
485
|
+
#data on remote resources, the project ID and subtitle, the audio files
|
486
|
+
#themselves, and, when complete, an HTML transcript of that audio,
|
487
|
+
#along with supporting CSS and Javascript files.
|
488
|
+
class Local < Filer::Dir
|
489
|
+
require 'fileutils'
|
490
|
+
require 'securerandom'
|
491
|
+
|
492
|
+
#Returns the dir path.
|
493
|
+
attr_reader :path
|
494
|
+
|
495
|
+
class << self
|
496
|
+
#Constructor. Creates a directory in the filesystem for the
|
497
|
+
#project.
|
498
|
+
#
|
499
|
+
# ==== Params
|
500
|
+
# [name] Name of the associated project.
|
501
|
+
# [base_dir] Path to the local directory into which the project
|
502
|
+
# dir should be placed.
|
503
|
+
# [template_dir] Path to the dir which will be used as a base
|
504
|
+
# template for new projects.
|
505
|
+
# ==== Returns
|
506
|
+
# Project::Local instance.
|
507
|
+
def create(name, base_dir, template_dir)
|
508
|
+
local = super(File.join(base_dir, name))
|
509
|
+
FileUtils.cp_r(File.join(template_dir, '.'), local)
|
510
|
+
local.create_id
|
511
|
+
local
|
512
|
+
end
|
513
|
+
|
514
|
+
#Takes the name of a project and a path. If there's a
|
515
|
+
#directory with a matching name in the given path whose file
|
516
|
+
#layout indicates it is a Project::Local instance (see 'ours?'
|
517
|
+
#docs), returns a corresponding Project::Local instance.
|
518
|
+
def named(string, path)
|
519
|
+
match = super
|
520
|
+
if match && ours?(match)
|
521
|
+
return match
|
522
|
+
end
|
523
|
+
return
|
524
|
+
end
|
525
|
+
|
526
|
+
#Takes a Filer::Dir instance. Returns true or false depending on whether
|
527
|
+
#the file layout inside the dir indicates it is a
|
528
|
+
#Project::Local instance.
|
529
|
+
def ours?(dir)
|
530
|
+
File.exists?(dir.subdir('audio')) && File.exists?(dir.subdir('audio', 'originals'))
|
531
|
+
end
|
532
|
+
|
533
|
+
#Takes the name of a project and returns true if it is a valid
|
534
|
+
#name for a directory in the local filesystem, false if not.
|
535
|
+
def valid_name?(name)
|
536
|
+
Utility.in_temp_dir do |dir|
|
537
|
+
begin
|
538
|
+
FileUtils.mkdir(File.join(dir, name))
|
539
|
+
rescue Errno::ENOENT
|
540
|
+
return false
|
541
|
+
end #begin
|
542
|
+
return File.exists?(File.join(dir, name))
|
543
|
+
end #Utility.in_temp_dir do...
|
544
|
+
end
|
545
|
+
|
546
|
+
#Takes one or more symbols. Adds corresponding getter/setter
|
547
|
+
#and delete method(s) to Project::Local, which read (getter)
|
548
|
+
#and write (setter) and delete corresponding text files in the
|
549
|
+
#data directory.
|
550
|
+
#
|
551
|
+
#So, for example, 'data_file_accessor :name' would allow you
|
552
|
+
#to later create the file 'data/foo.txt' in the project dir by
|
553
|
+
#calling 'project.local.name = "Foo"', read that same file via
|
554
|
+
#'project.local.name', and delete the file via
|
555
|
+
#'project.local.delete_name'
|
556
|
+
def data_file_accessor(*syms)
|
557
|
+
syms.each do |sym|
|
558
|
+
define_method(sym) do
|
559
|
+
file('data',"#{sym.to_s}.txt").read
|
560
|
+
end
|
561
|
+
define_method("#{sym.to_s}=".to_sym) do |value|
|
562
|
+
file('data',"#{sym.to_s}.txt").write(value)
|
563
|
+
end
|
564
|
+
define_method("delete_#{sym.to_s}".to_sym) do
|
565
|
+
if File.exists? file('data',"#{sym.to_s}.txt")
|
566
|
+
File.delete(file('data',"#{sym.to_s}.txt"))
|
567
|
+
end
|
568
|
+
end
|
569
|
+
end
|
570
|
+
end
|
571
|
+
end #class << self
|
572
|
+
|
573
|
+
#Calling 'subtitle' will read 'data/subtitle.txt'; calling
|
574
|
+
#'subtitle=' will write 'data/subtitle.txt'; calling
|
575
|
+
#'delete_subtitle' will delete 'data/subtitle.txt'.
|
576
|
+
data_file_accessor :subtitle
|
577
|
+
|
578
|
+
#Returns the ID of the project, as stored in 'data/id.txt'.
|
579
|
+
def id
|
580
|
+
file('data','id.txt').read
|
581
|
+
end
|
582
|
+
|
583
|
+
#Creates a file storing the canonical ID of the project in
|
584
|
+
#'data/id.txt'. Raises an exception if the file already exists.
|
585
|
+
def create_id
|
586
|
+
if id
|
587
|
+
raise Error, "id already exists"
|
588
|
+
end
|
589
|
+
file('data','id.txt').write(SecureRandom.hex(16))
|
590
|
+
end
|
591
|
+
end #Local
|
592
|
+
end #Project
|
593
|
+
end #Typingpool
|