typingpool 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/LICENSE +20 -0
  2. data/README.markdown +452 -0
  3. data/lib/typingpool/amazon/hit/assignment/empty.rb +19 -0
  4. data/lib/typingpool/amazon/hit/assignment.rb +43 -0
  5. data/lib/typingpool/amazon/hit/full/fromsearchhits.rb +44 -0
  6. data/lib/typingpool/amazon/hit/full.rb +105 -0
  7. data/lib/typingpool/amazon/hit.rb +458 -0
  8. data/lib/typingpool/amazon/question.rb +45 -0
  9. data/lib/typingpool/amazon.rb +3 -677
  10. data/lib/typingpool/app/cli/formatter.rb +16 -0
  11. data/lib/typingpool/app/cli.rb +64 -0
  12. data/lib/typingpool/app/friendlyexceptions.rb +34 -0
  13. data/lib/typingpool/app.rb +2 -97
  14. data/lib/typingpool/config/root.rb +114 -0
  15. data/lib/typingpool/config.rb +13 -119
  16. data/lib/typingpool/filer/audio.rb +84 -0
  17. data/lib/typingpool/filer/csv.rb +57 -0
  18. data/lib/typingpool/filer/dir.rb +76 -0
  19. data/lib/typingpool/filer/files/audio.rb +63 -0
  20. data/lib/typingpool/filer/files.rb +55 -0
  21. data/lib/typingpool/filer.rb +4 -313
  22. data/lib/typingpool/project/local.rb +117 -0
  23. data/lib/typingpool/project/remote/s3.rb +135 -0
  24. data/lib/typingpool/project/remote/sftp.rb +100 -0
  25. data/lib/typingpool/project/remote.rb +65 -0
  26. data/lib/typingpool/project.rb +2 -396
  27. data/lib/typingpool/template/assignment.rb +17 -0
  28. data/lib/typingpool/template/env.rb +77 -0
  29. data/lib/typingpool/template.rb +2 -87
  30. data/lib/typingpool/test/script.rb +310 -0
  31. data/lib/typingpool/test.rb +1 -306
  32. data/lib/typingpool/transcript/chunk.rb +129 -0
  33. data/lib/typingpool/transcript.rb +1 -125
  34. data/lib/typingpool/utility/castable.rb +65 -0
  35. data/lib/typingpool/utility.rb +1 -61
  36. data/test/test_integration_script_6_tp_finish.rb +1 -0
  37. metadata +135 -81
@@ -0,0 +1,129 @@
1
+ module Typingpool
2
+ class Transcript
3
+
4
+ #Transcript::Chunk is the model class for one transcription by one
5
+ #Mechanical Turk worker of one "chunk" (a file) of audio, which in
6
+ #turn is a portion of a larger recording (for example, one minute
7
+ #of a 60 minute interview). It is basically parallel and similar
8
+ #to an Amazon::HIT instance. Transcript is a container for these
9
+ #chunks, which know how to render themselves as text and HTML.
10
+ class Chunk
11
+ require 'cgi'
12
+ require 'rubygems/text'
13
+ include Gem::Text
14
+
15
+ #Get/set the raw text of the transcript
16
+ attr_accessor :body
17
+
18
+ #Get/set the Amazon ID of the Mechanical Turk worker who
19
+ #transcribed the audio into text
20
+ attr_accessor :worker
21
+
22
+ #Get/set the id of the Amazon::HIT associated with this chunk
23
+ attr_accessor :hit
24
+
25
+ #Get/set the id of the Project#local associated with this chunk
26
+ attr_accessor :project
27
+
28
+ #Return the offset associated with the chunk, in MM:SS
29
+ #format. This corresponds to the associated audio file, which is
30
+ #a chunk of a larger recording and which starts at a particular
31
+ #time offset, for example from 1:00 (the offset) to 2:00 (the
32
+ #next offset).
33
+ #
34
+ #
35
+ #This should be updated to return HH:MM:SS and MM:SS.sss when
36
+ #appropriate, since in Project#interval we use that format and
37
+ #allow audio to be divided into such units. (TODO)
38
+ attr_reader :offset
39
+
40
+ #Returns the offset in seconds. So for an offset of 1:00 would return 60.
41
+ attr_reader :offset_seconds
42
+
43
+ #Returns the name of the remote audio file corresponding to this
44
+ #chunk. The remote file has the project ID and pseudo random
45
+ #characters added to it.
46
+ attr_reader :filename
47
+
48
+ #Returns the name of the local audio file corresponding to this
49
+ #chunk.
50
+ attr_reader :filename_local
51
+
52
+ #Returns the URL of the remote audio transcribed in the body of
53
+ #this chunk.
54
+ attr_reader :url
55
+
56
+ #Constructor. Takes the raw text of the transcription.
57
+ def initialize(body)
58
+ @body = body
59
+ end
60
+
61
+ #Sorts by offset seconds.
62
+ def <=>(other)
63
+ self.offset_seconds <=> other.offset_seconds
64
+ end
65
+
66
+ #Takes an URL. As an important side effect, sets various
67
+ #attributes, including url, filename, filename_local, offset and
68
+ #offset_seconds. So setting Chunk#url= http://whateverwhatever
69
+ #is an important step in populating the instance.
70
+ def url=(url)
71
+ #http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
72
+ matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
73
+ @url = matches[0]
74
+ @filename = matches[1]
75
+ @filename_local = Project.local_basename_from_url(@url)
76
+ @offset = "#{matches[3]}:#{matches[4]}"
77
+ @offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
78
+ end
79
+
80
+ #Takes an optional specification of how many spaces to indent
81
+ #the text by (default 0) and an optional specification of how
82
+ #many characters to wrap at (default no wrapping).
83
+ #
84
+ #Returns the text with newlines normalized to Unix format, runs
85
+ #of newlines shortened to a maximum of two newlines, leading and
86
+ #trailing whitespace removed from each line, and the text
87
+ #wrapped/indented as specified.
88
+ def body_as_text(indent=nil, wrap=nil)
89
+ text = self.body
90
+ text = Utility.normalize_newlines(text)
91
+ text.gsub!(/\n\n+/, "\n\n")
92
+ text = text.split("\n").map{|line| line.strip }.join("\n")
93
+ text = wrap_text(text, wrap) if wrap
94
+ text = indent_text(text, indent) if indent
95
+ text
96
+ end
97
+ alias :to_s :body_as_text
98
+ alias :to_str :body_as_text
99
+
100
+ #Takes an optional count of how many characters to wrap at
101
+ #(default 72). Returns the body, presumed to be raw text, as
102
+ #HTML. Any HTML tags in the body are escaped. Text blocks
103
+ #separated by double newlines are converted to HTML paragraphs,
104
+ #while single newlines are converted to HTML BR tags. Newlines
105
+ #are normalized as in body_as_text, and lines in the HTML source
106
+ #are automatically wrapped as specified.
107
+ def body_as_html(wrap=72)
108
+ text = body_as_text
109
+ text = CGI::escapeHTML(text)
110
+ text = Utility.newlines_to_html(text)
111
+ text = text.split("\n").map do |line|
112
+ wrap_text(line, 72).chomp
113
+ end.join("\n")
114
+ text
115
+ end
116
+
117
+ protected
118
+
119
+ def indent_text(text, indent)
120
+ text.gsub!(/^/, " " * indent)
121
+ text
122
+ end
123
+
124
+ def wrap_text(text, wrap=72)
125
+ format_text(text, wrap)
126
+ end
127
+ end #Chunk
128
+ end #Transcript
129
+ end #Typingpool
@@ -52,130 +52,6 @@ module Typingpool
52
52
  @chunks.push(chunk)
53
53
  end
54
54
 
55
- #Transcript::Chunk is the model class for one transcription by one
56
- #Mechanical Turk worker of one "chunk" (a file) of audio, which in
57
- #turn is a portion of a larger recording (for example, one minute
58
- #of a 60 minute interview). It is basically parallel and similar
59
- #to an Amazon::HIT instance. Transcript is a container for these
60
- #chunks, which know how to render themselves as text and HTML.
61
- class Chunk
62
- require 'cgi'
63
- require 'rubygems/text'
64
- include Gem::Text
65
-
66
- #Get/set the raw text of the transcript
67
- attr_accessor :body
68
-
69
- #Get/set the Amazon ID of the Mechanical Turk worker who
70
- #transcribed the audio into text
71
- attr_accessor :worker
72
-
73
- #Get/set the id of the Amazon::HIT associated with this chunk
74
- attr_accessor :hit
75
-
76
- #Get/set the id of the Project#local associated with this chunk
77
- attr_accessor :project
78
-
79
- #Return the offset associated with the chunk, in MM:SS
80
- #format. This corresponds to the associated audio file, which is
81
- #a chunk of a larger recording and which starts at a particular
82
- #time offset, for example from 1:00 (the offset) to 2:00 (the
83
- #next offset).
84
- #
85
- #
86
- #This should be updated to return HH:MM:SS and MM:SS.sss when
87
- #appropriate, since in Project#interval we use that format and
88
- #allow audio to be divided into such units. (TODO)
89
- attr_reader :offset
90
-
91
- #Returns the offset in seconds. So for an offset of 1:00 would return 60.
92
- attr_reader :offset_seconds
93
-
94
- #Returns the name of the remote audio file corresponding to this
95
- #chunk. The remote file has the project ID and pseudo random
96
- #characters added to it.
97
- attr_reader :filename
98
-
99
- #Returns the name of the local audio file corresponding to this
100
- #chunk.
101
- attr_reader :filename_local
102
-
103
- #Returns the URL of the remote audio transcribed in the body of
104
- #this chunk.
105
- attr_reader :url
106
-
107
- #Constructor. Takes the raw text of the transcription.
108
- def initialize(body)
109
- @body = body
110
- end
111
-
112
- #Sorts by offset seconds.
113
- def <=>(other)
114
- self.offset_seconds <=> other.offset_seconds
115
- end
116
-
117
- #Takes an URL. As an important side effect, sets various
118
- #attributes, including url, filename, filename_local, offset and
119
- #offset_seconds. So setting Chunk#url= http://whateverwhatever
120
- #is an important step in populating the instance.
121
- def url=(url)
122
- #http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
123
- matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
124
- @url = matches[0]
125
- @filename = matches[1]
126
- @filename_local = Project.local_basename_from_url(@url)
127
- @offset = "#{matches[3]}:#{matches[4]}"
128
- @offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
129
- end
130
-
131
- #Takes an optional specification of how many spaces to indent
132
- #the text by (default 0) and an optional specification of how
133
- #many characters to wrap at (default no wrapping).
134
- #
135
- #Returns the text with newlines normalized to Unix format, runs
136
- #of newlines shortened to a maximum of two newlines, leading and
137
- #trailing whitespace removed from each line, and the text
138
- #wrapped/indented as specified.
139
- def body_as_text(indent=nil, wrap=nil)
140
- text = self.body
141
- text = Utility.normalize_newlines(text)
142
- text.gsub!(/\n\n+/, "\n\n")
143
- text = text.split("\n").map{|line| line.strip }.join("\n")
144
- text = wrap_text(text, wrap) if wrap
145
- text = indent_text(text, indent) if indent
146
- text
147
- end
148
- alias :to_s :body_as_text
149
- alias :to_str :body_as_text
150
-
151
- #Takes an optional count of how many characters to wrap at
152
- #(default 72). Returns the body, presumed to be raw text, as
153
- #HTML. Any HTML tags in the body are escaped. Text blocks
154
- #separated by double newlines are converted to HTML paragraphs,
155
- #while single newlines are converted to HTML BR tags. Newlines
156
- #are normalized as in body_as_text, and lines in the HTML source
157
- #are automatically wrapped as specified.
158
- def body_as_html(wrap=72)
159
- text = body_as_text
160
- text = CGI::escapeHTML(text)
161
- text = Utility.newlines_to_html(text)
162
- text = text.split("\n").map do |line|
163
- wrap_text(line, 72).chomp
164
- end.join("\n")
165
- text
166
- end
167
-
168
- protected
169
-
170
- def indent_text(text, indent)
171
- text.gsub!(/^/, " " * indent)
172
- text
173
- end
174
-
175
- def wrap_text(text, wrap=72)
176
- format_text(text, wrap)
177
- end
178
-
179
- end #Chunk
55
+ require 'typingpool/transcript/chunk'
180
56
  end #Transcript
181
57
  end #Typingpool
@@ -0,0 +1,65 @@
1
+ module Typingpool
2
+ module Utility
3
+ module Castable
4
+
5
+ #Cast this object instance to a relative class. Call this from
6
+ #super in your own class if you want to pass args to the
7
+ #relative class constructor. All args after the first will be
8
+ #passed to new.
9
+ #
10
+ #A relative class can be a subclass and in some cases a sibling
11
+ #class, parent class, parent sibling class, grandparent class,
12
+ #grandparent sibling class, and so on. A relative class will
13
+ #never be higher up the inheritance tree than the subclasses of
14
+ #the class where Castable was included.
15
+ # ==== Params
16
+ # [sym] Symbol corresponding to relative class to cast into. For
17
+ # example, Class#as(:audio) will cast into a Class::Audio
18
+ # and Class#as(:csv) will cast into Class::CSV. Casting
19
+ # is class insensitive, which means you can't have class
20
+ # CSV and class Csv. To cast into a related class whose
21
+ # name is not not directly under that of its parent, you
22
+ # must either specify the full name,
23
+ # e.g. Class#as(:foo_bar_baz) to cast to Foo::Bar::Baz,
24
+ # or a name relative to the parent,
25
+ # e.g. Class#as(:remote_html), where Class::Remote does
26
+ # not inherit from Class but Class::Remote::HTML does.
27
+ # ==== Returns
28
+ # New instance of subclass
29
+ def as(sym, *args)
30
+ if klass = self.class.relative_klass(sym.to_s.downcase)
31
+ klass.new(*args)
32
+ else
33
+ raise Error, "Can't find class '#{sym.to_s}' to cast to"
34
+ end #if subklass =...
35
+ end
36
+
37
+ def self.included(receiver)
38
+ receiver.extend(ClassMethods)
39
+ end
40
+
41
+ module ClassMethods
42
+ def inherited(subklass)
43
+ subklasses[subklass.to_s.split("#{self.name}::").last.downcase.gsub(/::/, '_')] = subklass
44
+ end
45
+
46
+ def subklasses
47
+ @subklasses ||= {}
48
+ end
49
+
50
+ def subklass(subklass_key)
51
+ subklasses[subklass_key]
52
+ end
53
+
54
+ def relative_klass(key)
55
+ if subklasses[key]
56
+ subklasses[key]
57
+ elsif self.superclass.respond_to? :relative_klass
58
+ self.superclass.relative_klass(key)
59
+ end
60
+ end
61
+
62
+ end #module ClassMethods
63
+ end #Castable
64
+ end #Utility
65
+ end #Typingpool
@@ -207,66 +207,6 @@ module Typingpool
207
207
  end
208
208
 
209
209
  end #class << self
210
-
211
- module Castable
212
- #Cast this object instance to a relative class. Call this from
213
- #super in your own class if you want to pass args to the
214
- #relative class constructor. All args after the first will be
215
- #passed to new.
216
- #
217
- #A relative class can be a subclass and in some cases a sibling
218
- #class, parent class, parent sibling class, grandparent class,
219
- #grandparent sibling class, and so on. A relative class will
220
- #never be higher up the inheritance tree than the subclasses of
221
- #the class where Castable was included.
222
- # ==== Params
223
- # [sym] Symbol corresponding to relative class to cast into. For
224
- # example, Class#as(:audio) will cast into a Class::Audio
225
- # and Class#as(:csv) will cast into Class::CSV. Casting
226
- # is class insensitive, which means you can't have class
227
- # CSV and class Csv. To cast into a related class whose
228
- # name is not not directly under that of its parent, you
229
- # must either specify the full name,
230
- # e.g. Class#as(:foo_bar_baz) to cast to Foo::Bar::Baz,
231
- # or a name relative to the parent,
232
- # e.g. Class#as(:remote_html), where Class::Remote does
233
- # not inherit from Class but Class::Remote::HTML does.
234
- # ==== Returns
235
- # New instance of subclass
236
- def as(sym, *args)
237
- if klass = self.class.relative_klass(sym.to_s.downcase)
238
- klass.new(*args)
239
- else
240
- raise Error, "Can't find class '#{sym.to_s}' to cast to"
241
- end #if subklass =...
242
- end
243
-
244
- def self.included(receiver)
245
- receiver.extend(ClassMethods)
246
- end
247
-
248
- module ClassMethods
249
- def inherited(subklass)
250
- subklasses[subklass.to_s.split("#{self.name}::").last.downcase.gsub(/::/, '_')] = subklass
251
- end
252
-
253
- def subklasses
254
- @subklasses ||= {}
255
- end
256
-
257
- def subklass(subklass_key)
258
- subklasses[subklass_key]
259
- end
260
-
261
- def relative_klass(key)
262
- if subklasses[key]
263
- subklasses[key]
264
- elsif self.superclass.respond_to? :relative_klass
265
- self.superclass.relative_klass(key)
266
- end
267
- end
268
-
269
- end #module ClassMethods
270
- end #Castable
210
+ require 'typingpool/utility/castable'
271
211
  end #Utility
272
212
  end #Typingpool
@@ -19,6 +19,7 @@ class TestTpFinish < Typingpool::Test::Script
19
19
  assert_nothing_raised do
20
20
  tp_finish_outside_sandbox(dir, config_path)
21
21
  end
22
+ sleep 1 #pause before checking URLs so remote server has time to fully delete
22
23
  assert_empty(urls.select{|url| working_url? url })
23
24
  assert_all_assets_have_upload_status(csv, ['audio'], 'no')
24
25
  end