typingpool 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +23 -0
- data/bin/tp-assign +240 -0
- data/bin/tp-collect +50 -0
- data/bin/tp-config +114 -0
- data/bin/tp-finish +101 -0
- data/bin/tp-make +169 -0
- data/bin/tp-review +175 -0
- data/lib/typingpool/amazon.rb +732 -0
- data/lib/typingpool/app.rb +634 -0
- data/lib/typingpool/config.rb +344 -0
- data/lib/typingpool/error.rb +22 -0
- data/lib/typingpool/filer.rb +396 -0
- data/lib/typingpool/project.rb +593 -0
- data/lib/typingpool/template.rb +175 -0
- data/lib/typingpool/templates/assignment/amazon-init.js +38 -0
- data/lib/typingpool/templates/assignment/interview/nameless.html.erb +13 -0
- data/lib/typingpool/templates/assignment/interview/noisy.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/interview/phone.html.erb +12 -0
- data/lib/typingpool/templates/assignment/interview.html.erb +11 -0
- data/lib/typingpool/templates/assignment/main.css +20 -0
- data/lib/typingpool/templates/assignment/partials/entry.html.erb +19 -0
- data/lib/typingpool/templates/assignment/partials/footer.html.erb +3 -0
- data/lib/typingpool/templates/assignment/partials/header.html.erb +11 -0
- data/lib/typingpool/templates/assignment/partials/labeling-example.html.erb +4 -0
- data/lib/typingpool/templates/assignment/partials/labeling.html.erb +5 -0
- data/lib/typingpool/templates/assignment/partials/length-description.html.erb +6 -0
- data/lib/typingpool/templates/assignment/partials/voices.html.erb +10 -0
- data/lib/typingpool/templates/assignment/speech.html.erb +11 -0
- data/lib/typingpool/templates/config.yml +21 -0
- data/lib/typingpool/templates/project/audio/chunks/.empty_directory +0 -0
- data/lib/typingpool/templates/project/audio/originals/.empty_directory +0 -0
- data/lib/typingpool/templates/project/data/.empty_directory +0 -0
- data/lib/typingpool/templates/project/etc/ About these files - read me.txt +8 -0
- data/lib/typingpool/templates/project/etc/audio-compat.js +25 -0
- data/lib/typingpool/templates/project/etc/player/audio-player.js +4 -0
- data/lib/typingpool/templates/project/etc/player/license.txt +19 -0
- data/lib/typingpool/templates/project/etc/player/player.swf +0 -0
- data/lib/typingpool/templates/project/etc/transcript.css +49 -0
- data/lib/typingpool/templates/transcript.html.erb +23 -0
- data/lib/typingpool/test/fixtures/amazon-question-html.html +95 -0
- data/lib/typingpool/test/fixtures/amazon-question-url.txt +1 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.1.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/mp3/interview.2.mp3 +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620007.WMA +0 -0
- data/lib/typingpool/test/fixtures/audio/wma/VN620052.WMA +0 -0
- data/lib/typingpool/test/fixtures/config-1 +20 -0
- data/lib/typingpool/test/fixtures/config-2 +25 -0
- data/lib/typingpool/test/fixtures/not_yaml.txt +4 -0
- data/lib/typingpool/test/fixtures/template-2.html.erb +10 -0
- data/lib/typingpool/test/fixtures/template-3.html.erb +22 -0
- data/lib/typingpool/test/fixtures/template.html.erb +10 -0
- data/lib/typingpool/test/fixtures/tp_collect_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_collect_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/tp_review_id.txt +1 -0
- data/lib/typingpool/test/fixtures/tp_review_sandbox-assignment.csv +8 -0
- data/lib/typingpool/test/fixtures/transcript-chunks.csv +226 -0
- data/lib/typingpool/test/fixtures/utf8_transcript.txt +7 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-1.yml +2712 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-2.yml +2718 -0
- data/lib/typingpool/test/fixtures/vcr/tp-collect-3.yml +2768 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-1.yml +570 -0
- data/lib/typingpool/test/fixtures/vcr/tp-review-2.yml +351 -0
- data/lib/typingpool/test.rb +418 -0
- data/lib/typingpool/transcript.rb +181 -0
- data/lib/typingpool/utility.rb +272 -0
- data/lib/typingpool.rb +500 -0
- data/test/make_amazon_question_fixture.rb +24 -0
- data/test/make_tp_collect_fixture_1.rb +26 -0
- data/test/make_tp_collect_fixture_2.rb +16 -0
- data/test/make_tp_collect_fixture_3.rb +15 -0
- data/test/make_tp_collect_fixture_4.rb +17 -0
- data/test/make_tp_review_fixture_1.rb +26 -0
- data/test/make_tp_review_fixture_2.rb +30 -0
- data/test/make_transcript_chunks_fixture.rb +53 -0
- data/test/test_integration_script_1_tp_config.rb +108 -0
- data/test/test_integration_script_2_tp_make.rb +119 -0
- data/test/test_integration_script_3_tp_assign.rb +152 -0
- data/test/test_integration_script_4_tp_review.rb +72 -0
- data/test/test_integration_script_5_tp_collect.rb +44 -0
- data/test/test_integration_script_6_tp_finish.rb +123 -0
- data/test/test_unit_amazon.rb +153 -0
- data/test/test_unit_config.rb +94 -0
- data/test/test_unit_filer.rb +202 -0
- data/test/test_unit_project.rb +168 -0
- data/test/test_unit_project_local.rb +68 -0
- data/test/test_unit_project_remote.rb +157 -0
- data/test/test_unit_template.rb +111 -0
- data/test/test_unit_transcript.rb +77 -0
- metadata +234 -0
|
@@ -0,0 +1,593 @@
|
|
|
1
|
+
module Typingpool
|
|
2
|
+
|
|
3
|
+
#Class representing a transcription job, a job typically associated
|
|
4
|
+
#with a single interview or other event and with one or more audio
|
|
5
|
+
#files containing recordings of that event. A project is
|
|
6
|
+
#associated, locally, with a filesystem directory. On Amazon
|
|
7
|
+
#Mechanical Turk, a Project is associated with various HITs. A
|
|
8
|
+
#project is also associated with audio files on a remote server.
|
|
9
|
+
class Project
|
|
10
|
+
require 'uri'
|
|
11
|
+
|
|
12
|
+
#Returns a time interval corresponding to the length of each audio
|
|
13
|
+
#chunk within the project. (Each chunk may be transcribed
|
|
14
|
+
#separately.)
|
|
15
|
+
attr_reader :interval
|
|
16
|
+
|
|
17
|
+
#Returns the desired bitrate of processed audio files.
|
|
18
|
+
attr_reader :bitrate
|
|
19
|
+
|
|
20
|
+
#Accessor for the name of the project (sometimes referred to as
|
|
21
|
+
#the 'title' in command line code)
|
|
22
|
+
attr_accessor :name
|
|
23
|
+
|
|
24
|
+
#Accessor for the Config object associated with the project.
|
|
25
|
+
attr_accessor :config
|
|
26
|
+
|
|
27
|
+
#Constructor. Takes the project name and an optional Config
|
|
28
|
+
#instance (default is the default Config.file). Project does not
|
|
29
|
+
#have to exist locally or remotely.
|
|
30
|
+
def initialize(name, config=Config.file)
|
|
31
|
+
Local.valid_name?(name) or raise Error::Argument::Format, "Must be a valid name for a directory in the local filesystem. Eliminate '/' or any other illegal character."
|
|
32
|
+
@name = name
|
|
33
|
+
@config = config
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
#Constructs and returns a Project::Remote instance associated with
|
|
37
|
+
#this Project instance. Takes an optional Config instance; default
|
|
38
|
+
#is project.config.
|
|
39
|
+
def remote(config=@config)
|
|
40
|
+
Remote.from_config(@name, config)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
#Constructs and returns a Project::Local instance associated with
|
|
44
|
+
#this Project instance IF the project exists at the appropriate
|
|
45
|
+
#location in the filesystem. Takes an optional path to a base
|
|
46
|
+
#directory to look in; default is project.config.transcripts.
|
|
47
|
+
def local(dir=@config.transcripts)
|
|
48
|
+
Local.named(@name, dir)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
#Creates a local filesystem directory corresponding to the project
|
|
52
|
+
#and constructs and returns a Project::Local instance associated
|
|
53
|
+
#with that directory and with this Project instance. Takes an
|
|
54
|
+
#optional path to a base directory in which to create the project
|
|
55
|
+
#directory; default is project.config.transcripts.
|
|
56
|
+
def create_local(basedir=@config.transcripts)
|
|
57
|
+
Local.create(@name, basedir, File.join(Utility.lib_dir, 'templates', 'project'))
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
#Takes a time specification for setting the project.interval. The
|
|
61
|
+
#time specification may be an integer corresponding to the nuymber
|
|
62
|
+
#of secods or a colon-delimited time of the format HH:MM::SS.ssss,
|
|
63
|
+
#where the hour and fractional seconds components are optional.
|
|
64
|
+
def interval=(mmss)
|
|
65
|
+
formatted = mmss.to_s.match(
|
|
66
|
+
/^((\d+)|((\d+:)?(\d+):(\d\d)))$/
|
|
67
|
+
) or raise Error::Argument::Format, "Required format is SS, or MM:SS, or HH:MM:SS"
|
|
68
|
+
@interval = (formatted[2] || ((formatted[4].to_i * 60 * 60) + (formatted[5].to_i * 60) + formatted[6].to_i)).to_i
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
#Returns the project.interval in a format understood by the Unix
|
|
72
|
+
#utility mp3splt: $min.$sec[.01-99].
|
|
73
|
+
def interval_as_min_dot_sec
|
|
74
|
+
seconds = @interval % 60
|
|
75
|
+
if seconds > seconds.to_i
|
|
76
|
+
#mpl3splt takes fractions of a second to hundredths of a second precision
|
|
77
|
+
seconds = seconds.round(2)
|
|
78
|
+
end
|
|
79
|
+
min_dot_sec = "#{(@interval.to_i / 60).floor}.#{seconds}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
#Takes an integer for setting the project.bitrate. The integer
|
|
83
|
+
#should correspond to kilobits per second (kbit/s or kbps). This
|
|
84
|
+
#is used as a target when converting to mp3 (when it's neccesary
|
|
85
|
+
#to do so).
|
|
86
|
+
def bitrate=(kbps)
|
|
87
|
+
raise Error::Argument::Format, 'Should be an integer corresponding to kb/s' if kbps.to_i == 0
|
|
88
|
+
@bitrate = kbps
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
#Writes a CSV file into project.local directory, storing information about the specified files.
|
|
93
|
+
# ==== Params
|
|
94
|
+
# [:path] Relative path where the file will be written. Array of
|
|
95
|
+
# relative path elements. See Filer::Dir#file docs
|
|
96
|
+
# for details.
|
|
97
|
+
# [:urls] Array of URLs corresponding to project files.
|
|
98
|
+
# [:unusual] Optional. Array of unusual words spoken in the
|
|
99
|
+
# audio to be transcribed. This list is ultimately
|
|
100
|
+
# provided to transcribers to aid in their work.
|
|
101
|
+
# [:voices] Optional. Array of hashes, with each having a :name and
|
|
102
|
+
# :description element. Each hash corresponds to a
|
|
103
|
+
# person whose voice is on the audio. These
|
|
104
|
+
# details are ultimately provided to transcibers
|
|
105
|
+
# to allow them to correctly label sections of the
|
|
106
|
+
# transcript
|
|
107
|
+
# ==== Returns
|
|
108
|
+
# Path to the resulting CSV file.
|
|
109
|
+
def create_assignment_csv(args)
|
|
110
|
+
[:path, :urls].each{|arg| args[arg] or raise Error::Argument, "Missing arg '#{arg}'" }
|
|
111
|
+
headers = ['audio_url',
|
|
112
|
+
'project_id',
|
|
113
|
+
'unusual',
|
|
114
|
+
'chunk',
|
|
115
|
+
'chunk_hours',
|
|
116
|
+
'chunk_minutes',
|
|
117
|
+
'chunk_seconds',
|
|
118
|
+
'voices_count',
|
|
119
|
+
(1 .. args[:voices].count).map{|n| ["voice#{n}", "voice#{n}title"]}
|
|
120
|
+
].flatten
|
|
121
|
+
csv = args[:urls].map do |url|
|
|
122
|
+
[url,
|
|
123
|
+
local.id,
|
|
124
|
+
args[:unusual].join(', '),
|
|
125
|
+
interval_as_time_string,
|
|
126
|
+
interval_as_hours_minutes_seconds.map{|n| (n == 0) ? nil : n },
|
|
127
|
+
args[:voices].count,
|
|
128
|
+
args[:voices].map{|v| [v[:name], v[:description]]}
|
|
129
|
+
].flatten
|
|
130
|
+
end
|
|
131
|
+
local.file(*args[:path]).as(:csv).write_arrays(csv, headers)
|
|
132
|
+
local.file_path(*args[:path])
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
#Takes an array of file paths, file names, or Filer
|
|
136
|
+
#instances. Returns an array of file basenames. The return
|
|
137
|
+
#basenames will be the original basenames with the project id and
|
|
138
|
+
#a random or pseudo-random string insterted between the root
|
|
139
|
+
#basename and the file extension. The purpose of this is to make
|
|
140
|
+
#it difficult to guess the name of one remote file after seeing
|
|
141
|
+
#another, thus significantly complicating any attempt to download
|
|
142
|
+
#the entirety of a project (such as a journalistic interview)
|
|
143
|
+
#after seeing a single assignment on Amazon Mechanical Turk. (This
|
|
144
|
+
#should be considered an effort at obfuscation. It is not any
|
|
145
|
+
#guarantee of true security.)
|
|
146
|
+
def create_remote_names(files)
|
|
147
|
+
files.map do |file|
|
|
148
|
+
name = [File.basename(file, '.*'), local.id, pseudo_random_uppercase_string].join('.')
|
|
149
|
+
name += File.extname(file) if not(File.extname(file).to_s.empty?)
|
|
150
|
+
name
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
#Returns a Regexp for breaking an URL down into the original
|
|
155
|
+
#project basename as well as the audio chunk offset. This probably
|
|
156
|
+
#shouldn't need to exist. (TODO: make this unneccesary.)
|
|
157
|
+
def self.url_regex
|
|
158
|
+
Regexp.new('.+\/((.+)\.(\d+)\.(\d\d)\.[a-fA-F0-9]{32}\.[A-Z]{6}(\.\w+))')
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
#Takes an url. Returns the basename of the associated
|
|
162
|
+
#project.local file. This probably shouldn't need to exist. (TODO:
|
|
163
|
+
#Make this unneccesary.)
|
|
164
|
+
def self.local_basename_from_url(url)
|
|
165
|
+
matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
|
|
166
|
+
URI.unescape([matches[2..4].join('.'), matches[5]].join)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
protected
|
|
170
|
+
|
|
171
|
+
#Takes an optional string length (default 6). Returns a string of
|
|
172
|
+
#pseudo-random uppercase letters of the specified length. Should
|
|
173
|
+
#probably move this into Utility. TODO
|
|
174
|
+
def pseudo_random_uppercase_string(length=6)
|
|
175
|
+
(0...length).map{(65 + rand(25)).chr}.join
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def interval_as_hours_minutes_seconds
|
|
179
|
+
seconds = interval or return
|
|
180
|
+
hours = seconds / (60 * 60)
|
|
181
|
+
seconds = seconds % (60 * 60)
|
|
182
|
+
minutes = seconds / 60
|
|
183
|
+
seconds = seconds % 60
|
|
184
|
+
[hours, minutes, seconds]
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
#Returns interval as [HH:]MM:SS.
|
|
188
|
+
def interval_as_time_string
|
|
189
|
+
hms = interval_as_hours_minutes_seconds
|
|
190
|
+
hms.shift if hms.first == 0
|
|
191
|
+
#make sure seconds column is zero-padded and, if there are
|
|
192
|
+
#hours, do the same to the minutes column
|
|
193
|
+
(1 - hms.count .. -1).each{|i| hms[i] = hms[i].to_s.rjust(2, '0') }
|
|
194
|
+
hms.join(":")
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
#Representation of the Project instance on remote servers. This is
|
|
198
|
+
#basically a collection of audio files to be transcribed and HTML
|
|
199
|
+
#files containing instructions and a form for the
|
|
200
|
+
#transcribers. The backend can be Amazon S3 (the default) or an
|
|
201
|
+
#SFTP server. Each backend is encapsulated in its own subclass. A
|
|
202
|
+
#backend subclass must provide a 'put' method, which takes an
|
|
203
|
+
#array of IO streams and an optional array of remote file
|
|
204
|
+
#basenames; a 'remove' method, which takes an array of remote file
|
|
205
|
+
#basenames; and the methods 'host' and 'path', which return the
|
|
206
|
+
#location of the destination server and destination directory,
|
|
207
|
+
#respectively.
|
|
208
|
+
#
|
|
209
|
+
#Thus, there will always be 'put', 'remove', 'host' and 'path'
|
|
210
|
+
#methods available, in addition to the Project::Remote methods
|
|
211
|
+
#outlined below.
|
|
212
|
+
class Remote
|
|
213
|
+
|
|
214
|
+
#The project name
|
|
215
|
+
attr_accessor :name
|
|
216
|
+
|
|
217
|
+
#Constructor. Takes the project name and a Config
|
|
218
|
+
#instance. Returns a Project::Remote::S3 or
|
|
219
|
+
#Project::Remote::SFTP instance, depending on the particulars of
|
|
220
|
+
#the Config. If there are sufficient config params to return
|
|
221
|
+
#EITHER an S3 or SFTP subclass, it will prefer the SFTP
|
|
222
|
+
#subclass.
|
|
223
|
+
def self.from_config(name, config)
|
|
224
|
+
if config.sftp
|
|
225
|
+
SFTP.new(name, config.sftp)
|
|
226
|
+
elsif config.amazon && config.amazon.bucket
|
|
227
|
+
S3.new(name, config.amazon)
|
|
228
|
+
else
|
|
229
|
+
raise Error, "No valid upload params found in config file (SFTP or Amazon info)"
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
#Like project.remote.remove, except it takes an array of URLs
|
|
234
|
+
#instead an array of remote basenames, saving you from having to
|
|
235
|
+
#manually extract basenames from the URL.
|
|
236
|
+
def remove_urls(urls)
|
|
237
|
+
basenames = urls.map{|url| url_basename(url) }
|
|
238
|
+
remove(basenames){|file| yield(file) if block_given? }
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
#Given a file path, returns the URL to the file path were it to
|
|
242
|
+
#be uploaded by this instance.
|
|
243
|
+
def file_to_url(file)
|
|
244
|
+
"#{@url}/#{URI.escape(file)}"
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
#Given an URL, returns the file portion of the path, given the
|
|
248
|
+
#configuration of this instance.
|
|
249
|
+
def url_basename(url)
|
|
250
|
+
basename = url.split("#{self.url}/")[1] or raise Error, "Could not find base url '#{self.url}' within longer url '#{url}'"
|
|
251
|
+
URI.unescape(basename)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
#Subclass for storing remote files on Amazon Simple Storage
|
|
255
|
+
#Service (S3)
|
|
256
|
+
class S3 < Remote
|
|
257
|
+
require 'aws/s3'
|
|
258
|
+
|
|
259
|
+
#An Amazon Web Services "Access Key ID." Set from the
|
|
260
|
+
#Config#amazon value passed to Project::Remote::S3.new, but
|
|
261
|
+
#changeable.
|
|
262
|
+
attr_accessor :key
|
|
263
|
+
|
|
264
|
+
#An Amazon Web Services "Secret Access Key." Set from the
|
|
265
|
+
#Config#amazon value passed to Project::Remote::S3.new, but
|
|
266
|
+
#changeable.
|
|
267
|
+
attr_accessor :secret
|
|
268
|
+
|
|
269
|
+
#The S3 "bucket" where uploads will be stores. Set from the
|
|
270
|
+
#Config#amazon value passed to Project::Remote::S3.new, but
|
|
271
|
+
#changeable.
|
|
272
|
+
attr_accessor :bucket
|
|
273
|
+
|
|
274
|
+
#Returns the base URL, which is prepended to the remote
|
|
275
|
+
#files. This is either the 'url' attribute of the
|
|
276
|
+
#Config#amazon value passed to Project::Remote::S3.new or, if
|
|
277
|
+
#that attribute is not set, the value returned by
|
|
278
|
+
#'default_url' (e.g. "https://bucketname.s3.amazonaws.com").
|
|
279
|
+
attr_reader :url
|
|
280
|
+
|
|
281
|
+
#Constructor. Takes the project name and the result of calling
|
|
282
|
+
#the 'amazon' method on a Config instance (i.e. the amazon
|
|
283
|
+
#section of a Config file).
|
|
284
|
+
def initialize(name, amazon_config)
|
|
285
|
+
@name = name
|
|
286
|
+
@config = amazon_config
|
|
287
|
+
@key = @config.key or raise Error::File::Remote::S3, "Missing Amazon key in config"
|
|
288
|
+
@secret = @config.secret or raise Error::File::Remote::S3, "Missing Amazon secret in config"
|
|
289
|
+
@bucket = @config.bucket or raise Error::File::Remote::S3, "Missing Amazon bucket in config"
|
|
290
|
+
@url = @config.url || default_url
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
#The remote host (server) name, parsed from #url
|
|
294
|
+
def host
|
|
295
|
+
URI.parse(@url).host
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
#The remote path (directory), pased from #url
|
|
299
|
+
def path
|
|
300
|
+
URI.parse(@url).path
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
#Upload files/strings to S3, optionally changing the names in the process.
|
|
304
|
+
# ==== Params
|
|
305
|
+
#[io_streams] Enumerable collection of IO objects, like a File
|
|
306
|
+
# or StringIO instance.
|
|
307
|
+
#[as] Optional if the io_streams are File instances. Array of
|
|
308
|
+
# file basenames, used to name the destination
|
|
309
|
+
# files. Default is the basename of the Files
|
|
310
|
+
# passed in as io_streams.
|
|
311
|
+
# ==== Returns
|
|
312
|
+
#Array of URLs corresponding to the uploaded files.
|
|
313
|
+
def put(io_streams, as=io_streams.map{|file| File.basename(file)})
|
|
314
|
+
batch(io_streams) do |stream, i|
|
|
315
|
+
dest = as[i]
|
|
316
|
+
yield(stream, dest) if block_given?
|
|
317
|
+
begin
|
|
318
|
+
AWS::S3::S3Object.store(dest, stream, @bucket, :access => :public_read)
|
|
319
|
+
rescue AWS::S3::NoSuchBucket
|
|
320
|
+
make_bucket
|
|
321
|
+
retry
|
|
322
|
+
end
|
|
323
|
+
file_to_url(dest)
|
|
324
|
+
end #batch
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
#Delete objects from S3.
|
|
328
|
+
# ==== Params
|
|
329
|
+
#[files] Enumerable collection of file names. Should NOT
|
|
330
|
+
# include the bucket name (path).
|
|
331
|
+
# ==== Returns
|
|
332
|
+
#Array of booleans corresponding to whether the delete call
|
|
333
|
+
#succeeded.
|
|
334
|
+
def remove(files)
|
|
335
|
+
batch(files) do |file, i|
|
|
336
|
+
yield(file) if block_given?
|
|
337
|
+
AWS::S3::S3Object.delete(file, @bucket)
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
protected
|
|
342
|
+
|
|
343
|
+
def batch(io_streams)
|
|
344
|
+
results = []
|
|
345
|
+
io_streams.each_with_index do |stream, i|
|
|
346
|
+
connect if i == 0
|
|
347
|
+
begin
|
|
348
|
+
results.push(yield(stream, i))
|
|
349
|
+
rescue AWS::S3::S3Exception => e
|
|
350
|
+
if e.message.match(/AWS::S3::SignatureDoesNotMatch/)
|
|
351
|
+
raise Error::File::Remote::S3::Credentials, "S3 operation failed with a signature error. This likely means your AWS key or secret is wrong. Error: #{e}"
|
|
352
|
+
else
|
|
353
|
+
raise Error::File::Remote::S3, "Your S3 operation failed with an Amazon error: #{e}"
|
|
354
|
+
end #if
|
|
355
|
+
end #begin
|
|
356
|
+
end #files.each
|
|
357
|
+
disconnect unless io_streams.empty?
|
|
358
|
+
results
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def connect
|
|
362
|
+
AWS::S3::Base.establish_connection!(
|
|
363
|
+
:access_key_id => @key,
|
|
364
|
+
:secret_access_key => @secret,
|
|
365
|
+
:persistent => true,
|
|
366
|
+
:use_ssl => true
|
|
367
|
+
)
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def disconnect
|
|
371
|
+
AWS::S3::Base.disconnect
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def make_bucket
|
|
375
|
+
AWS::S3::Bucket.create(@bucket)
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def default_url
|
|
379
|
+
"https://#{@bucket}.s3.amazonaws.com"
|
|
380
|
+
end
|
|
381
|
+
end #S3
|
|
382
|
+
|
|
383
|
+
#Subclass for storing remote files on an SFTP server. Only
|
|
384
|
+
#public/private key authentication has been tested. There is not
|
|
385
|
+
#yet any provision for password-based authentication, though
|
|
386
|
+
#adding it should be trivial.
|
|
387
|
+
class SFTP < Remote
|
|
388
|
+
require 'net/sftp'
|
|
389
|
+
|
|
390
|
+
#Returns the remote host (server) name. This is set from
|
|
391
|
+
#Config#sftp#host.
|
|
392
|
+
attr_reader :host
|
|
393
|
+
|
|
394
|
+
#Returns the remote path (directory). This is set from
|
|
395
|
+
#Config#sftp#path.
|
|
396
|
+
attr_reader :path
|
|
397
|
+
|
|
398
|
+
#Returns the name of the user used to log in to the SFTP
|
|
399
|
+
#server. This is et from Config#sftp#user.
|
|
400
|
+
attr_reader :user
|
|
401
|
+
|
|
402
|
+
#Returns the base URL, which is prepended to the remote
|
|
403
|
+
#files. This is set from Config#sftp#url.
|
|
404
|
+
attr_reader :url
|
|
405
|
+
|
|
406
|
+
#Constructor. Takes the project name and a Config#sftp.
|
|
407
|
+
def initialize(name, sftp_config)
|
|
408
|
+
@name = name
|
|
409
|
+
@config = sftp_config
|
|
410
|
+
@user = @config.user or raise Error::File::Remote::SFTP, "No SFTP user specified in config"
|
|
411
|
+
@host = @config.host or raise Error::File::Remote::SFTP, "No SFTP host specified in config"
|
|
412
|
+
@url = @config.url or raise Error::File::Remote::SFTP, "No SFTP url specified in config"
|
|
413
|
+
@path = @config.path || ''
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
#See docs for Project::Remote::S3#put.
|
|
417
|
+
def put(io_streams, as=io_streams.map{|file| File.basename(file)})
|
|
418
|
+
begin
|
|
419
|
+
i = 0
|
|
420
|
+
batch(io_streams) do |stream, connection|
|
|
421
|
+
dest = as[i]
|
|
422
|
+
i += 1
|
|
423
|
+
yield(stream, dest) if block_given?
|
|
424
|
+
connection.upload(stream, join_with_path(dest))
|
|
425
|
+
file_to_url(dest)
|
|
426
|
+
end
|
|
427
|
+
rescue Net::SFTP::StatusException => e
|
|
428
|
+
raise Error::File::Remote::SFTP, "SFTP upload failed: #{e.description}"
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
#See docs for Project::Remote::S3#remove.
|
|
433
|
+
def remove(files)
|
|
434
|
+
requests = batch(files) do |file, connection|
|
|
435
|
+
yield(file) if block_given?
|
|
436
|
+
connection.remove(join_with_path(file))
|
|
437
|
+
end
|
|
438
|
+
failures = requests.reject{|request| request.response.ok?}
|
|
439
|
+
if not(failures.empty?)
|
|
440
|
+
summary = failures.map{|request| request.response.to_s}.join('; ')
|
|
441
|
+
raise Error::File::Remote::SFTP, "SFTP removal failed: #{summary}"
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
protected
|
|
446
|
+
|
|
447
|
+
def connection
|
|
448
|
+
begin
|
|
449
|
+
Net::SFTP.start(@host, @user) do |connection|
|
|
450
|
+
yield(connection)
|
|
451
|
+
connection.loop
|
|
452
|
+
end
|
|
453
|
+
rescue Net::SSH::AuthenticationFailed
|
|
454
|
+
raise Error::File::Remote::SFTP, "SFTP authentication failed: #{$?}"
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
def batch(files)
|
|
459
|
+
results = []
|
|
460
|
+
connection do |connection|
|
|
461
|
+
files.each do |file|
|
|
462
|
+
results.push(yield(file, connection))
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
return results
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def join_with_path(file)
|
|
469
|
+
if @path
|
|
470
|
+
[@path, file].join('/')
|
|
471
|
+
else
|
|
472
|
+
file
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
end #SFTP
|
|
477
|
+
end #Remote
|
|
478
|
+
|
|
479
|
+
#Representation of the Project instance in the local
|
|
480
|
+
#filesystem. Subclass of Filer::Dir; see Filer::Dir docs for
|
|
481
|
+
#additional details.
|
|
482
|
+
#
|
|
483
|
+
#This is basically a local dir with various subdirs and files
|
|
484
|
+
#containing the canonical representation of the project, including
|
|
485
|
+
#data on remote resources, the project ID and subtitle, the audio files
|
|
486
|
+
#themselves, and, when complete, an HTML transcript of that audio,
|
|
487
|
+
#along with supporting CSS and Javascript files.
|
|
488
|
+
class Local < Filer::Dir
|
|
489
|
+
require 'fileutils'
|
|
490
|
+
require 'securerandom'
|
|
491
|
+
|
|
492
|
+
#Returns the dir path.
|
|
493
|
+
attr_reader :path
|
|
494
|
+
|
|
495
|
+
class << self
|
|
496
|
+
#Constructor. Creates a directory in the filesystem for the
|
|
497
|
+
#project.
|
|
498
|
+
#
|
|
499
|
+
# ==== Params
|
|
500
|
+
# [name] Name of the associated project.
|
|
501
|
+
# [base_dir] Path to the local directory into which the project
|
|
502
|
+
# dir should be placed.
|
|
503
|
+
# [template_dir] Path to the dir which will be used as a base
|
|
504
|
+
# template for new projects.
|
|
505
|
+
# ==== Returns
|
|
506
|
+
# Project::Local instance.
|
|
507
|
+
def create(name, base_dir, template_dir)
|
|
508
|
+
local = super(File.join(base_dir, name))
|
|
509
|
+
FileUtils.cp_r(File.join(template_dir, '.'), local)
|
|
510
|
+
local.create_id
|
|
511
|
+
local
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
#Takes the name of a project and a path. If there's a
|
|
515
|
+
#directory with a matching name in the given path whose file
|
|
516
|
+
#layout indicates it is a Project::Local instance (see 'ours?'
|
|
517
|
+
#docs), returns a corresponding Project::Local instance.
|
|
518
|
+
def named(string, path)
|
|
519
|
+
match = super
|
|
520
|
+
if match && ours?(match)
|
|
521
|
+
return match
|
|
522
|
+
end
|
|
523
|
+
return
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
#Takes a Filer::Dir instance. Returns true or false depending on whether
|
|
527
|
+
#the file layout inside the dir indicates it is a
|
|
528
|
+
#Project::Local instance.
|
|
529
|
+
def ours?(dir)
|
|
530
|
+
File.exists?(dir.subdir('audio')) && File.exists?(dir.subdir('audio', 'originals'))
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
#Takes the name of a project and returns true if it is a valid
|
|
534
|
+
#name for a directory in the local filesystem, false if not.
|
|
535
|
+
def valid_name?(name)
|
|
536
|
+
Utility.in_temp_dir do |dir|
|
|
537
|
+
begin
|
|
538
|
+
FileUtils.mkdir(File.join(dir, name))
|
|
539
|
+
rescue Errno::ENOENT
|
|
540
|
+
return false
|
|
541
|
+
end #begin
|
|
542
|
+
return File.exists?(File.join(dir, name))
|
|
543
|
+
end #Utility.in_temp_dir do...
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
#Takes one or more symbols. Adds corresponding getter/setter
|
|
547
|
+
#and delete method(s) to Project::Local, which read (getter)
|
|
548
|
+
#and write (setter) and delete corresponding text files in the
|
|
549
|
+
#data directory.
|
|
550
|
+
#
|
|
551
|
+
#So, for example, 'data_file_accessor :name' would allow you
|
|
552
|
+
#to later create the file 'data/foo.txt' in the project dir by
|
|
553
|
+
#calling 'project.local.name = "Foo"', read that same file via
|
|
554
|
+
#'project.local.name', and delete the file via
|
|
555
|
+
#'project.local.delete_name'
|
|
556
|
+
def data_file_accessor(*syms)
|
|
557
|
+
syms.each do |sym|
|
|
558
|
+
define_method(sym) do
|
|
559
|
+
file('data',"#{sym.to_s}.txt").read
|
|
560
|
+
end
|
|
561
|
+
define_method("#{sym.to_s}=".to_sym) do |value|
|
|
562
|
+
file('data',"#{sym.to_s}.txt").write(value)
|
|
563
|
+
end
|
|
564
|
+
define_method("delete_#{sym.to_s}".to_sym) do
|
|
565
|
+
if File.exists? file('data',"#{sym.to_s}.txt")
|
|
566
|
+
File.delete(file('data',"#{sym.to_s}.txt"))
|
|
567
|
+
end
|
|
568
|
+
end
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
end #class << self
|
|
572
|
+
|
|
573
|
+
#Calling 'subtitle' will read 'data/subtitle.txt'; calling
|
|
574
|
+
#'subtitle=' will write 'data/subtitle.txt'; calling
|
|
575
|
+
#'delete_subtitle' will delete 'data/subtitle.txt'.
|
|
576
|
+
data_file_accessor :subtitle
|
|
577
|
+
|
|
578
|
+
#Returns the ID of the project, as stored in 'data/id.txt'.
|
|
579
|
+
def id
|
|
580
|
+
file('data','id.txt').read
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
#Creates a file storing the canonical ID of the project in
|
|
584
|
+
#'data/id.txt'. Raises an exception if the file already exists.
|
|
585
|
+
def create_id
|
|
586
|
+
if id
|
|
587
|
+
raise Error, "id already exists"
|
|
588
|
+
end
|
|
589
|
+
file('data','id.txt').write(SecureRandom.hex(16))
|
|
590
|
+
end
|
|
591
|
+
end #Local
|
|
592
|
+
end #Project
|
|
593
|
+
end #Typingpool
|