typingpool 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/LICENSE +20 -0
  2. data/README.markdown +452 -0
  3. data/lib/typingpool/amazon/hit/assignment/empty.rb +19 -0
  4. data/lib/typingpool/amazon/hit/assignment.rb +43 -0
  5. data/lib/typingpool/amazon/hit/full/fromsearchhits.rb +44 -0
  6. data/lib/typingpool/amazon/hit/full.rb +105 -0
  7. data/lib/typingpool/amazon/hit.rb +458 -0
  8. data/lib/typingpool/amazon/question.rb +45 -0
  9. data/lib/typingpool/amazon.rb +3 -677
  10. data/lib/typingpool/app/cli/formatter.rb +16 -0
  11. data/lib/typingpool/app/cli.rb +64 -0
  12. data/lib/typingpool/app/friendlyexceptions.rb +34 -0
  13. data/lib/typingpool/app.rb +2 -97
  14. data/lib/typingpool/config/root.rb +114 -0
  15. data/lib/typingpool/config.rb +13 -119
  16. data/lib/typingpool/filer/audio.rb +84 -0
  17. data/lib/typingpool/filer/csv.rb +57 -0
  18. data/lib/typingpool/filer/dir.rb +76 -0
  19. data/lib/typingpool/filer/files/audio.rb +63 -0
  20. data/lib/typingpool/filer/files.rb +55 -0
  21. data/lib/typingpool/filer.rb +4 -313
  22. data/lib/typingpool/project/local.rb +117 -0
  23. data/lib/typingpool/project/remote/s3.rb +135 -0
  24. data/lib/typingpool/project/remote/sftp.rb +100 -0
  25. data/lib/typingpool/project/remote.rb +65 -0
  26. data/lib/typingpool/project.rb +2 -396
  27. data/lib/typingpool/template/assignment.rb +17 -0
  28. data/lib/typingpool/template/env.rb +77 -0
  29. data/lib/typingpool/template.rb +2 -87
  30. data/lib/typingpool/test/script.rb +310 -0
  31. data/lib/typingpool/test.rb +1 -306
  32. data/lib/typingpool/transcript/chunk.rb +129 -0
  33. data/lib/typingpool/transcript.rb +1 -125
  34. data/lib/typingpool/utility/castable.rb +65 -0
  35. data/lib/typingpool/utility.rb +1 -61
  36. data/test/test_integration_script_6_tp_finish.rb +1 -0
  37. metadata +135 -81
@@ -0,0 +1,129 @@
1
+ module Typingpool
2
+ class Transcript
3
+
4
+ #Transcript::Chunk is the model class for one transcription by one
5
+ #Mechanical Turk worker of one "chunk" (a file) of audio, which in
6
+ #turn is a portion of a larger recording (for example, one minute
7
+ #of a 60 minute interview). It is basically parallel and similar
8
+ #to an Amazon::HIT instance. Transcript is a container for these
9
+ #chunks, which know how to render themselves as text and HTML.
10
+ class Chunk
11
+ require 'cgi'
12
+ require 'rubygems/text'
13
+ include Gem::Text
14
+
15
+ #Get/set the raw text of the transcript
16
+ attr_accessor :body
17
+
18
+ #Get/set the Amazon ID of the Mechanical Turk worker who
19
+ #transcribed the audio into text
20
+ attr_accessor :worker
21
+
22
+ #Get/set the id of the Amazon::HIT associated with this chunk
23
+ attr_accessor :hit
24
+
25
+ #Get/set the id of the Project#local associated with this chunk
26
+ attr_accessor :project
27
+
28
+ #Return the offset associated with the chunk, in MM:SS
29
+ #format. This corresponds to the associated audio file, which is
30
+ #a chunk of a larger recording and which starts at a particular
31
+ #time offset, for example from 1:00 (the offset) to 2:00 (the
32
+ #next offset).
33
+ #
34
+ #
35
+ #This should be updated to return HH:MM:SS and MM:SS.sss when
36
+ #appropriate, since in Project#interval we use that format and
37
+ #allow audio to be divided into such units. (TODO)
38
+ attr_reader :offset
39
+
40
+ #Returns the offset in seconds. So for an offset of 1:00 would return 60.
41
+ attr_reader :offset_seconds
42
+
43
+ #Returns the name of the remote audio file corresponding to this
44
+ #chunk. The remote file has the project ID and pseudo random
45
+ #characters added to it.
46
+ attr_reader :filename
47
+
48
+ #Returns the name of the local audio file corresponding to this
49
+ #chunk.
50
+ attr_reader :filename_local
51
+
52
+ #Returns the URL of the remote audio transcribed in the body of
53
+ #this chunk.
54
+ attr_reader :url
55
+
56
+ #Constructor. Takes the raw text of the transcription.
57
+ def initialize(body)
58
+ @body = body
59
+ end
60
+
61
+ #Sorts by offset seconds.
62
+ def <=>(other)
63
+ self.offset_seconds <=> other.offset_seconds
64
+ end
65
+
66
+ #Takes an URL. As an important side effect, sets various
67
+ #attributes, including url, filename, filename_local, offset and
68
+ #offset_seconds. So setting Chunk#url= http://whateverwhatever
69
+ #is an important step in populating the instance.
70
+ def url=(url)
71
+ #http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
72
+ matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
73
+ @url = matches[0]
74
+ @filename = matches[1]
75
+ @filename_local = Project.local_basename_from_url(@url)
76
+ @offset = "#{matches[3]}:#{matches[4]}"
77
+ @offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
78
+ end
79
+
80
+ #Takes an optional specification of how many spaces to indent
81
+ #the text by (default 0) and an optional specification of how
82
+ #many characters to wrap at (default no wrapping).
83
+ #
84
+ #Returns the text with newlines normalized to Unix format, runs
85
+ #of newlines shortened to a maximum of two newlines, leading and
86
+ #trailing whitespace removed from each line, and the text
87
+ #wrapped/indented as specified.
88
+ def body_as_text(indent=nil, wrap=nil)
89
+ text = self.body
90
+ text = Utility.normalize_newlines(text)
91
+ text.gsub!(/\n\n+/, "\n\n")
92
+ text = text.split("\n").map{|line| line.strip }.join("\n")
93
+ text = wrap_text(text, wrap) if wrap
94
+ text = indent_text(text, indent) if indent
95
+ text
96
+ end
97
+ alias :to_s :body_as_text
98
+ alias :to_str :body_as_text
99
+
100
+ #Takes an optional count of how many characters to wrap at
101
+ #(default 72). Returns the body, presumed to be raw text, as
102
+ #HTML. Any HTML tags in the body are escaped. Text blocks
103
+ #separated by double newlines are converted to HTML paragraphs,
104
+ #while single newlines are converted to HTML BR tags. Newlines
105
+ #are normalized as in body_as_text, and lines in the HTML source
106
+ #are automatically wrapped as specified.
107
+ def body_as_html(wrap=72)
108
+ text = body_as_text
109
+ text = CGI::escapeHTML(text)
110
+ text = Utility.newlines_to_html(text)
111
+ text = text.split("\n").map do |line|
112
+ wrap_text(line, 72).chomp
113
+ end.join("\n")
114
+ text
115
+ end
116
+
117
+ protected
118
+
119
+ def indent_text(text, indent)
120
+ text.gsub!(/^/, " " * indent)
121
+ text
122
+ end
123
+
124
+ def wrap_text(text, wrap=72)
125
+ format_text(text, wrap)
126
+ end
127
+ end #Chunk
128
+ end #Transcript
129
+ end #Typingpool
@@ -52,130 +52,6 @@ module Typingpool
52
52
  @chunks.push(chunk)
53
53
  end
54
54
 
55
- #Transcript::Chunk is the model class for one transcription by one
56
- #Mechanical Turk worker of one "chunk" (a file) of audio, which in
57
- #turn is a portion of a larger recording (for example, one minute
58
- #of a 60 minute interview). It is basically parallel and similar
59
- #to an Amazon::HIT instance. Transcript is a container for these
60
- #chunks, which know how to render themselves as text and HTML.
61
- class Chunk
62
- require 'cgi'
63
- require 'rubygems/text'
64
- include Gem::Text
65
-
66
- #Get/set the raw text of the transcript
67
- attr_accessor :body
68
-
69
- #Get/set the Amazon ID of the Mechanical Turk worker who
70
- #transcribed the audio into text
71
- attr_accessor :worker
72
-
73
- #Get/set the id of the Amazon::HIT associated with this chunk
74
- attr_accessor :hit
75
-
76
- #Get/set the id of the Project#local associated with this chunk
77
- attr_accessor :project
78
-
79
- #Return the offset associated with the chunk, in MM:SS
80
- #format. This corresponds to the associated audio file, which is
81
- #a chunk of a larger recording and which starts at a particular
82
- #time offset, for example from 1:00 (the offset) to 2:00 (the
83
- #next offset).
84
- #
85
- #
86
- #This should be updated to return HH:MM:SS and MM:SS.sss when
87
- #appropriate, since in Project#interval we use that format and
88
- #allow audio to be divided into such units. (TODO)
89
- attr_reader :offset
90
-
91
- #Returns the offset in seconds. So for an offset of 1:00 would return 60.
92
- attr_reader :offset_seconds
93
-
94
- #Returns the name of the remote audio file corresponding to this
95
- #chunk. The remote file has the project ID and pseudo random
96
- #characters added to it.
97
- attr_reader :filename
98
-
99
- #Returns the name of the local audio file corresponding to this
100
- #chunk.
101
- attr_reader :filename_local
102
-
103
- #Returns the URL of the remote audio transcribed in the body of
104
- #this chunk.
105
- attr_reader :url
106
-
107
- #Constructor. Takes the raw text of the transcription.
108
- def initialize(body)
109
- @body = body
110
- end
111
-
112
- #Sorts by offset seconds.
113
- def <=>(other)
114
- self.offset_seconds <=> other.offset_seconds
115
- end
116
-
117
- #Takes an URL. As an important side effect, sets various
118
- #attributes, including url, filename, filename_local, offset and
119
- #offset_seconds. So setting Chunk#url= http://whateverwhatever
120
- #is an important step in populating the instance.
121
- def url=(url)
122
- #http://ryantate.com/transfer/Speech.01.00.ede9b0f2aed0d35a26cef7160bc9e35e.ISEAOM.mp3
123
- matches = Project.url_regex.match(url) or raise Error::Argument::Format, "Unexpected format to url '#{url}'"
124
- @url = matches[0]
125
- @filename = matches[1]
126
- @filename_local = Project.local_basename_from_url(@url)
127
- @offset = "#{matches[3]}:#{matches[4]}"
128
- @offset_seconds = (matches[3].to_i * 60) + matches[4].to_i
129
- end
130
-
131
- #Takes an optional specification of how many spaces to indent
132
- #the text by (default 0) and an optional specification of how
133
- #many characters to wrap at (default no wrapping).
134
- #
135
- #Returns the text with newlines normalized to Unix format, runs
136
- #of newlines shortened to a maximum of two newlines, leading and
137
- #trailing whitespace removed from each line, and the text
138
- #wrapped/indented as specified.
139
- def body_as_text(indent=nil, wrap=nil)
140
- text = self.body
141
- text = Utility.normalize_newlines(text)
142
- text.gsub!(/\n\n+/, "\n\n")
143
- text = text.split("\n").map{|line| line.strip }.join("\n")
144
- text = wrap_text(text, wrap) if wrap
145
- text = indent_text(text, indent) if indent
146
- text
147
- end
148
- alias :to_s :body_as_text
149
- alias :to_str :body_as_text
150
-
151
- #Takes an optional count of how many characters to wrap at
152
- #(default 72). Returns the body, presumed to be raw text, as
153
- #HTML. Any HTML tags in the body are escaped. Text blocks
154
- #separated by double newlines are converted to HTML paragraphs,
155
- #while single newlines are converted to HTML BR tags. Newlines
156
- #are normalized as in body_as_text, and lines in the HTML source
157
- #are automatically wrapped as specified.
158
- def body_as_html(wrap=72)
159
- text = body_as_text
160
- text = CGI::escapeHTML(text)
161
- text = Utility.newlines_to_html(text)
162
- text = text.split("\n").map do |line|
163
- wrap_text(line, 72).chomp
164
- end.join("\n")
165
- text
166
- end
167
-
168
- protected
169
-
170
- def indent_text(text, indent)
171
- text.gsub!(/^/, " " * indent)
172
- text
173
- end
174
-
175
- def wrap_text(text, wrap=72)
176
- format_text(text, wrap)
177
- end
178
-
179
- end #Chunk
55
+ require 'typingpool/transcript/chunk'
180
56
  end #Transcript
181
57
  end #Typingpool
@@ -0,0 +1,65 @@
1
+ module Typingpool
2
+ module Utility
3
+ module Castable
4
+
5
+ #Cast this object instance to a relative class. Call this from
6
+ #super in your own class if you want to pass args to the
7
+ #relative class constructor. All args after the first will be
8
+ #passed to new.
9
+ #
10
+ #A relative class can be a subclass and in some cases a sibling
11
+ #class, parent class, parent sibling class, grandparent class,
12
+ #grandparent sibling class, and so on. A relative class will
13
+ #never be higher up the inheritance tree than the subclasses of
14
+ #the class where Castable was included.
15
+ # ==== Params
16
+ # [sym] Symbol corresponding to relative class to cast into. For
17
+ # example, Class#as(:audio) will cast into a Class::Audio
18
+ # and Class#as(:csv) will cast into Class::CSV. Casting
19
+ # is class insensitive, which means you can't have class
20
+ # CSV and class Csv. To cast into a related class whose
21
+ # name is not not directly under that of its parent, you
22
+ # must either specify the full name,
23
+ # e.g. Class#as(:foo_bar_baz) to cast to Foo::Bar::Baz,
24
+ # or a name relative to the parent,
25
+ # e.g. Class#as(:remote_html), where Class::Remote does
26
+ # not inherit from Class but Class::Remote::HTML does.
27
+ # ==== Returns
28
+ # New instance of subclass
29
+ def as(sym, *args)
30
+ if klass = self.class.relative_klass(sym.to_s.downcase)
31
+ klass.new(*args)
32
+ else
33
+ raise Error, "Can't find class '#{sym.to_s}' to cast to"
34
+ end #if subklass =...
35
+ end
36
+
37
+ def self.included(receiver)
38
+ receiver.extend(ClassMethods)
39
+ end
40
+
41
+ module ClassMethods
42
+ def inherited(subklass)
43
+ subklasses[subklass.to_s.split("#{self.name}::").last.downcase.gsub(/::/, '_')] = subklass
44
+ end
45
+
46
+ def subklasses
47
+ @subklasses ||= {}
48
+ end
49
+
50
+ def subklass(subklass_key)
51
+ subklasses[subklass_key]
52
+ end
53
+
54
+ def relative_klass(key)
55
+ if subklasses[key]
56
+ subklasses[key]
57
+ elsif self.superclass.respond_to? :relative_klass
58
+ self.superclass.relative_klass(key)
59
+ end
60
+ end
61
+
62
+ end #module ClassMethods
63
+ end #Castable
64
+ end #Utility
65
+ end #Typingpool
@@ -207,66 +207,6 @@ module Typingpool
207
207
  end
208
208
 
209
209
  end #class << self
210
-
211
- module Castable
212
- #Cast this object instance to a relative class. Call this from
213
- #super in your own class if you want to pass args to the
214
- #relative class constructor. All args after the first will be
215
- #passed to new.
216
- #
217
- #A relative class can be a subclass and in some cases a sibling
218
- #class, parent class, parent sibling class, grandparent class,
219
- #grandparent sibling class, and so on. A relative class will
220
- #never be higher up the inheritance tree than the subclasses of
221
- #the class where Castable was included.
222
- # ==== Params
223
- # [sym] Symbol corresponding to relative class to cast into. For
224
- # example, Class#as(:audio) will cast into a Class::Audio
225
- # and Class#as(:csv) will cast into Class::CSV. Casting
226
- # is class insensitive, which means you can't have class
227
- # CSV and class Csv. To cast into a related class whose
228
- # name is not not directly under that of its parent, you
229
- # must either specify the full name,
230
- # e.g. Class#as(:foo_bar_baz) to cast to Foo::Bar::Baz,
231
- # or a name relative to the parent,
232
- # e.g. Class#as(:remote_html), where Class::Remote does
233
- # not inherit from Class but Class::Remote::HTML does.
234
- # ==== Returns
235
- # New instance of subclass
236
- def as(sym, *args)
237
- if klass = self.class.relative_klass(sym.to_s.downcase)
238
- klass.new(*args)
239
- else
240
- raise Error, "Can't find class '#{sym.to_s}' to cast to"
241
- end #if subklass =...
242
- end
243
-
244
- def self.included(receiver)
245
- receiver.extend(ClassMethods)
246
- end
247
-
248
- module ClassMethods
249
- def inherited(subklass)
250
- subklasses[subklass.to_s.split("#{self.name}::").last.downcase.gsub(/::/, '_')] = subklass
251
- end
252
-
253
- def subklasses
254
- @subklasses ||= {}
255
- end
256
-
257
- def subklass(subklass_key)
258
- subklasses[subklass_key]
259
- end
260
-
261
- def relative_klass(key)
262
- if subklasses[key]
263
- subklasses[key]
264
- elsif self.superclass.respond_to? :relative_klass
265
- self.superclass.relative_klass(key)
266
- end
267
- end
268
-
269
- end #module ClassMethods
270
- end #Castable
210
+ require 'typingpool/utility/castable'
271
211
  end #Utility
272
212
  end #Typingpool
@@ -19,6 +19,7 @@ class TestTpFinish < Typingpool::Test::Script
19
19
  assert_nothing_raised do
20
20
  tp_finish_outside_sandbox(dir, config_path)
21
21
  end
22
+ sleep 1 #pause before checking URLs so remote server has time to fully delete
22
23
  assert_empty(urls.select{|url| working_url? url })
23
24
  assert_all_assets_have_upload_status(csv, ['audio'], 'no')
24
25
  end