mturk 1.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +2 -0
- data/.gemtest +0 -0
- data/History.md +105 -0
- data/LICENSE.txt +202 -0
- data/Manifest.txt +72 -0
- data/NOTICE.txt +4 -0
- data/README.md +100 -0
- data/Rakefile +33 -0
- data/bin/mturk +9 -0
- data/lib/amazon/util.rb +10 -0
- data/lib/amazon/util/binder.rb +48 -0
- data/lib/amazon/util/data_reader.rb +169 -0
- data/lib/amazon/util/filter_chain.rb +79 -0
- data/lib/amazon/util/hash_nesting.rb +93 -0
- data/lib/amazon/util/lazy_results.rb +59 -0
- data/lib/amazon/util/logging.rb +23 -0
- data/lib/amazon/util/paginated_iterator.rb +70 -0
- data/lib/amazon/util/proactive_results.rb +116 -0
- data/lib/amazon/util/threadpool.rb +129 -0
- data/lib/amazon/util/user_data_store.rb +100 -0
- data/lib/amazon/webservices/mechanical_turk.rb +123 -0
- data/lib/amazon/webservices/mechanical_turk_requester.rb +285 -0
- data/lib/amazon/webservices/mturk/mechanical_turk_error_handler.rb +153 -0
- data/lib/amazon/webservices/mturk/question_generator.rb +58 -0
- data/lib/amazon/webservices/util/amazon_authentication_relay.rb +72 -0
- data/lib/amazon/webservices/util/command_line.rb +155 -0
- data/lib/amazon/webservices/util/convenience_wrapper.rb +90 -0
- data/lib/amazon/webservices/util/filter_proxy.rb +45 -0
- data/lib/amazon/webservices/util/mock_transport.rb +70 -0
- data/lib/amazon/webservices/util/request_signer.rb +42 -0
- data/lib/amazon/webservices/util/rest_transport.rb +120 -0
- data/lib/amazon/webservices/util/soap_simplifier.rb +48 -0
- data/lib/amazon/webservices/util/soap_transport.rb +20 -0
- data/lib/amazon/webservices/util/soap_transport_header_handler.rb +27 -0
- data/lib/amazon/webservices/util/unknown_result_exception.rb +27 -0
- data/lib/amazon/webservices/util/validation_exception.rb +55 -0
- data/lib/amazon/webservices/util/xml_simplifier.rb +61 -0
- data/lib/mturk.rb +19 -0
- data/lib/mturk/version.rb +6 -0
- data/run_rcov.sh +1 -0
- data/samples/best_image/BestImage.rb +61 -0
- data/samples/best_image/best_image.properties +39 -0
- data/samples/best_image/best_image.question +82 -0
- data/samples/blank_slate/BlankSlate.rb +63 -0
- data/samples/blank_slate/BlankSlate_multithreaded.rb +67 -0
- data/samples/helloworld/MTurkHelloWorld.rb +56 -0
- data/samples/helloworld/mturk.yml +8 -0
- data/samples/review_policy/ReviewPolicy.rb +139 -0
- data/samples/review_policy/review_policy.question +30 -0
- data/samples/reviewer/Reviewer.rb +103 -0
- data/samples/reviewer/mturk.yml +8 -0
- data/samples/simple_survey/SimpleSurvey.rb +98 -0
- data/samples/simple_survey/simple_survey.question +30 -0
- data/samples/site_category/SiteCategory.rb +87 -0
- data/samples/site_category/externalpage.htm +71 -0
- data/samples/site_category/site_category.input +6 -0
- data/samples/site_category/site_category.properties +56 -0
- data/samples/site_category/site_category.question +9 -0
- data/test/mturk/test_changehittypeofhit.rb +130 -0
- data/test/mturk/test_error_handler.rb +403 -0
- data/test/mturk/test_mechanical_turk_requester.rb +178 -0
- data/test/mturk/test_mock_mechanical_turk_requester.rb +205 -0
- data/test/test_mturk.rb +21 -0
- data/test/unit/test_binder.rb +89 -0
- data/test/unit/test_data_reader.rb +135 -0
- data/test/unit/test_exceptions.rb +32 -0
- data/test/unit/test_hash_nesting.rb +99 -0
- data/test/unit/test_lazy_results.rb +89 -0
- data/test/unit/test_mock_transport.rb +132 -0
- data/test/unit/test_paginated_iterator.rb +58 -0
- data/test/unit/test_proactive_results.rb +108 -0
- data/test/unit/test_question_generator.rb +55 -0
- data/test/unit/test_threadpool.rb +50 -0
- data/test/unit/test_user_data_store.rb +80 -0
- metadata +225 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
# Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
|
2
|
+
# License:: Apache License, Version 2.0
|
3
|
+
|
4
|
+
require 'amazon/util/paginated_iterator'
|
5
|
+
|
6
|
+
module Amazon
|
7
|
+
module Util
|
8
|
+
|
9
|
+
# This class provides a wrapper for lazy evaluation of results.
|
10
|
+
# The constructor takes a block which should accept a pagenumber
|
11
|
+
# and return a page worth of results.
|
12
|
+
class LazyResults
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
def initialize( &feeder )
|
16
|
+
@iterator = PaginatedIterator.new( &feeder )
|
17
|
+
flush
|
18
|
+
end
|
19
|
+
|
20
|
+
# clear the result set and start over again
|
21
|
+
def flush
|
22
|
+
@truth = []
|
23
|
+
@iterator.restart
|
24
|
+
end
|
25
|
+
|
26
|
+
# iterate over entire result set, loading lazily
|
27
|
+
def each( &block ) # :yields: item
|
28
|
+
@truth.each {|e| yield e }
|
29
|
+
@iterator.each {|e| @truth << e ; yield e }
|
30
|
+
end
|
31
|
+
|
32
|
+
# index into the array set. if requested index has not been loaded, will load up to that index
|
33
|
+
def []( index )
|
34
|
+
feedme while !@iterator.done and index >= @truth.size
|
35
|
+
return @truth[index]
|
36
|
+
end
|
37
|
+
|
38
|
+
# fully populate the result set and return a true array
|
39
|
+
def to_a
|
40
|
+
feedme until @iterator.done
|
41
|
+
return @truth.dup
|
42
|
+
end
|
43
|
+
|
44
|
+
def inspect # :nodoc:
|
45
|
+
"#<Amazon::Util::LazyResults truth_size=#{@truth.size} page=#{@page} done=#{@done}>"
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# fetch the next item from the iterator and stick it in @truth
|
51
|
+
def feedme
|
52
|
+
item = @iterator.next
|
53
|
+
@truth << item unless item.nil?
|
54
|
+
end
|
55
|
+
|
56
|
+
end # LazyResults
|
57
|
+
|
58
|
+
end # Amazon::Util
|
59
|
+
end # Amazon
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
|
2
|
+
# License:: Apache License, Version 2.0
|
3
|
+
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module Amazon
|
7
|
+
module Util
|
8
|
+
module Logging
|
9
|
+
|
10
|
+
@@AmazonLogger = nil
|
11
|
+
|
12
|
+
def set_log( filename )
|
13
|
+
@@AmazonLogger = Logger.new filename
|
14
|
+
end
|
15
|
+
|
16
|
+
def log( str )
|
17
|
+
set_log 'mturk.log' if @@AmazonLogger.nil?
|
18
|
+
@@AmazonLogger.debug str
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
|
2
|
+
# License:: Apache License, Version 2.0
|
3
|
+
|
4
|
+
module Amazon
|
5
|
+
module Util
|
6
|
+
|
7
|
+
# PaginatedIterator provides an iterator interface to a paginated
|
8
|
+
# dataset, buffering the current page. It can be used to stream
|
9
|
+
# large result sets which would not fit into memory or only need
|
10
|
+
# to be processed in a single pass.
|
11
|
+
class PaginatedIterator
|
12
|
+
|
13
|
+
# feeder should be a block that accepts a pagenumber and
|
14
|
+
# returns an array containing the corresponding page
|
15
|
+
# worth of results. It should return an empty array when
|
16
|
+
# there are no more results in the dataset.
|
17
|
+
def initialize( &feeder )
|
18
|
+
@feeder = feeder
|
19
|
+
restart
|
20
|
+
end
|
21
|
+
|
22
|
+
# resets the iterator to start pulling from the first page
|
23
|
+
def restart
|
24
|
+
@buffer = []
|
25
|
+
@page = 1
|
26
|
+
@done = false
|
27
|
+
end
|
28
|
+
|
29
|
+
# returns the next item, or nil if there are no more items
|
30
|
+
def next
|
31
|
+
fetchpage if @buffer.empty?
|
32
|
+
@buffer.shift
|
33
|
+
end
|
34
|
+
|
35
|
+
# checks if we have another item available
|
36
|
+
def hasNext
|
37
|
+
fetchpage if @buffer.empty?
|
38
|
+
return !@buffer.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
# iterates over the remaining items
|
42
|
+
def each( &block ) # :yields: item
|
43
|
+
until @done
|
44
|
+
item = self.next
|
45
|
+
yield item unless item.nil?
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
attr_reader :done
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def fetchpage
|
54
|
+
return [] if @done
|
55
|
+
res = @feeder.call @page
|
56
|
+
res = [res].flatten - [nil]
|
57
|
+
if res.nil? or res.empty?
|
58
|
+
@done = true
|
59
|
+
return []
|
60
|
+
else
|
61
|
+
@page += 1
|
62
|
+
@buffer += res
|
63
|
+
return res
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end # PaginatedIterator
|
68
|
+
|
69
|
+
end # Amazon::Util
|
70
|
+
end # Amazon
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
|
2
|
+
# License:: Apache License, Version 2.0
|
3
|
+
|
4
|
+
require 'monitor'
|
5
|
+
require 'amazon/util/threadpool'
|
6
|
+
|
7
|
+
module Amazon
|
8
|
+
module Util
|
9
|
+
|
10
|
+
# ProactiveResults is not as lazy as LazyResults
|
11
|
+
# The constructor takes a block which should accept a pagenumber
|
12
|
+
# and return a page worth of results.
|
13
|
+
class ProactiveResults
|
14
|
+
include Enumerable
|
15
|
+
|
16
|
+
THREADPOOL_SIZE = 3
|
17
|
+
|
18
|
+
def initialize( exception_handler=nil, &feeder )
|
19
|
+
@feeder = feeder
|
20
|
+
@eh = exception_handler
|
21
|
+
@tp = nil
|
22
|
+
self.flush
|
23
|
+
end
|
24
|
+
|
25
|
+
# clear the result set and start over again
|
26
|
+
def flush
|
27
|
+
@tp.finish unless @tp.nil?
|
28
|
+
@tp = ThreadPool.new(THREADPOOL_SIZE, @eh)
|
29
|
+
@done = false
|
30
|
+
@inflight = [].extend(MonitorMixin)
|
31
|
+
@current_page = 1
|
32
|
+
@truth = []
|
33
|
+
1.upto(THREADPOOL_SIZE) do |page|
|
34
|
+
getPage(page)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# iterate over entire result set, waiting for
|
39
|
+
# threads to finish where necessary
|
40
|
+
def each( &block ) # :yields: item
|
41
|
+
index = 0
|
42
|
+
while true
|
43
|
+
if index >= @truth.size
|
44
|
+
break if @done
|
45
|
+
feedme
|
46
|
+
else
|
47
|
+
yield @truth[index]
|
48
|
+
index += 1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# index into the result set. if we haven't
|
54
|
+
# loaded enough, will wait until we have
|
55
|
+
def []( index )
|
56
|
+
feedme while !@done and index >= @truth.size
|
57
|
+
return @truth[index]
|
58
|
+
end
|
59
|
+
|
60
|
+
# wait for the entire results set to be populated,
|
61
|
+
# then return an array of the results
|
62
|
+
def to_a
|
63
|
+
feedme until @done
|
64
|
+
return @truth.dup
|
65
|
+
end
|
66
|
+
|
67
|
+
def inspect # :nodoc:
|
68
|
+
"#<Amazon::Util::ProactiveResults truth_size=#{@truth.size} pending_pages=#{@pending.size}>"
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def getPage(num)
|
74
|
+
@inflight.synchronize do
|
75
|
+
workitem = @tp.addWork(num) { |n| worker(n) }
|
76
|
+
@inflight[num] = workitem
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def worker(page)
|
81
|
+
res = []
|
82
|
+
begin
|
83
|
+
res = @feeder.call( page )
|
84
|
+
ensure
|
85
|
+
getPage( page + THREADPOOL_SIZE ) unless (res.nil? || res.empty?)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def feedme
|
90
|
+
return if @done
|
91
|
+
item = nil
|
92
|
+
@inflight.synchronize do
|
93
|
+
if @inflight[@current_page].nil?
|
94
|
+
raise "This should be the last page! #{@current_page} #{@inflight.inspect}" unless [] == ( @inflight - [nil] )
|
95
|
+
@done = true
|
96
|
+
return
|
97
|
+
end
|
98
|
+
item = @inflight[@current_page]
|
99
|
+
@inflight[@current_page] = nil # clear out our references
|
100
|
+
@current_page += 1
|
101
|
+
end
|
102
|
+
res = item.getResult
|
103
|
+
case res
|
104
|
+
when Array
|
105
|
+
@truth += res
|
106
|
+
when Exception, NilClass
|
107
|
+
# ignore
|
108
|
+
else
|
109
|
+
raise "Unexpected result type: #{res.class}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
end # Amazon::Util
|
116
|
+
end # Amazon
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
|
2
|
+
# License:: Apache License, Version 2.0
|
3
|
+
|
4
|
+
require 'thread'
|
5
|
+
require 'set'
|
6
|
+
|
7
|
+
module Amazon
|
8
|
+
module Util
|
9
|
+
|
10
|
+
# ThreadPool is a generic threadpooling class that enables
|
11
|
+
# easier multithreaded workflows. Initialize with a thread count,
|
12
|
+
# then addWork to queue up tasks. You can +sync+ to ensure the current
|
13
|
+
# workload is complete, or +finish+ to flush the threads when you're done.
|
14
|
+
class ThreadPool
|
15
|
+
|
16
|
+
# First arg is the thread count. Threads will be created once and wait
|
17
|
+
# for work ( no performance penalty, since they're waiting on a Queue.
|
18
|
+
# Second arg (optional) is a proc to be used as an exception handler. If
|
19
|
+
# this argument is passed in and the thread encounters an uncaught
|
20
|
+
# exception, the proc will be called with the exception as the only argument.
|
21
|
+
def initialize( num_threads, exception_handler=nil )
|
22
|
+
@work = Queue.new
|
23
|
+
@threads = ThreadGroup.new
|
24
|
+
num_threads.times do
|
25
|
+
worker_thread = Thread.new { workerProcess(exception_handler) }
|
26
|
+
@threads.add worker_thread
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# add work to the queue
|
31
|
+
# pass any number of arguments, they will be passed on to the block.
|
32
|
+
def addWork( *args, &block )
|
33
|
+
item = WorkItem.new( args, &block )
|
34
|
+
@work.push( item )
|
35
|
+
item
|
36
|
+
end
|
37
|
+
|
38
|
+
# how many worker threads are there?
|
39
|
+
def threadcount
|
40
|
+
@threads.list.length
|
41
|
+
end
|
42
|
+
|
43
|
+
# request thread completion
|
44
|
+
# No more work will be performed
|
45
|
+
def noMoreWork
|
46
|
+
threadcount.times { @work << :Finish }
|
47
|
+
end
|
48
|
+
|
49
|
+
# request thread completion and wait for them to finish
|
50
|
+
def finish
|
51
|
+
noMoreWork
|
52
|
+
@threads.list.each do |t|
|
53
|
+
t.join
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# wait for the currently queued work to finish
|
58
|
+
# (This freezes up the entire pool, temporarily)
|
59
|
+
def sync
|
60
|
+
t = threadcount
|
61
|
+
|
62
|
+
if t < 2
|
63
|
+
item = addWork { :sync }
|
64
|
+
return item.getResult
|
65
|
+
end
|
66
|
+
|
67
|
+
q = Queue.new
|
68
|
+
items = []
|
69
|
+
|
70
|
+
items << addWork do
|
71
|
+
q.pop
|
72
|
+
end
|
73
|
+
|
74
|
+
(t-2).times do |z|
|
75
|
+
items << addWork(z) do |i|
|
76
|
+
items[i].getResult
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
addWork do
|
81
|
+
q.push :sync
|
82
|
+
end
|
83
|
+
|
84
|
+
items.last.getResult
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def workerProcess( exception_handler=nil )
|
90
|
+
while true
|
91
|
+
workitem = @work.pop
|
92
|
+
return if workitem == :Finish
|
93
|
+
begin
|
94
|
+
workitem.run
|
95
|
+
rescue Exception => e
|
96
|
+
if exception_handler.nil?
|
97
|
+
print "Worker thread has thrown an exception: "+e.to_s+"\n"
|
98
|
+
else
|
99
|
+
exception_handler.call(workitem)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class WorkItem
|
106
|
+
attr_reader :args, :block
|
107
|
+
def initialize( args, &block )
|
108
|
+
@args = args
|
109
|
+
@block = block
|
110
|
+
@result = Queue.new
|
111
|
+
end
|
112
|
+
def run
|
113
|
+
res = @block.call( *@args)
|
114
|
+
@result.push res
|
115
|
+
rescue Exception => e
|
116
|
+
@result.push e
|
117
|
+
raise e
|
118
|
+
end
|
119
|
+
def getResult
|
120
|
+
value = @result.pop
|
121
|
+
@result = [value]
|
122
|
+
value
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end # ThreadPool
|
127
|
+
|
128
|
+
end # Amazon::Util
|
129
|
+
end # Amazon
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
|
2
|
+
# License:: Apache License, Version 2.0
|
3
|
+
|
4
|
+
require 'amazon/util/data_reader'
|
5
|
+
|
6
|
+
module Amazon
|
7
|
+
module Util
|
8
|
+
|
9
|
+
# The UserDataStore is a platform-independent class intended to store application configuration information in a human-readable per-user location.
|
10
|
+
class UserDataStore
|
11
|
+
|
12
|
+
def initialize(app_name)
|
13
|
+
@app = sanitizeKey(app_name)
|
14
|
+
@base = findBaseStore(@app)
|
15
|
+
@dirty = []
|
16
|
+
@data = Hash.new {|h,a| h[a] = {} }
|
17
|
+
loadConfig
|
18
|
+
end
|
19
|
+
|
20
|
+
def get(namespace,property)
|
21
|
+
ns = sanitizeKey(namespace)
|
22
|
+
@data[ns][property]
|
23
|
+
end
|
24
|
+
|
25
|
+
def set(namespace,property,value)
|
26
|
+
ns = sanitizeKey(namespace)
|
27
|
+
@dirty << ns unless @dirty.member? ns
|
28
|
+
@data[ns][property] = value
|
29
|
+
end
|
30
|
+
|
31
|
+
def clear(namespace,property = nil)
|
32
|
+
ns = sanitizeKey(namespace)
|
33
|
+
@dirty << ns unless @dirty.member? ns
|
34
|
+
if property.nil?
|
35
|
+
@data[ns] = {}
|
36
|
+
else
|
37
|
+
@data[ns].delete_if {|k,v| k == property }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def save
|
42
|
+
@dirty.delete_if do |name|
|
43
|
+
saveNamespace( name )
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def loadConfig
|
50
|
+
Dir.open(@base).each do |filename|
|
51
|
+
next if filename =~ /^\./
|
52
|
+
loadNamespace( filename )
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def sanitizeKey(ns)
|
57
|
+
ns.to_s.downcase
|
58
|
+
end
|
59
|
+
|
60
|
+
def loadNamespace(name)
|
61
|
+
@data[name] = DataReader.load( File.join( @base, name ), :Properties )
|
62
|
+
end
|
63
|
+
|
64
|
+
def saveNamespace(name)
|
65
|
+
filename = File.join( @base, name )
|
66
|
+
# kill old config before saving
|
67
|
+
File.delete filename if File.exist? filename
|
68
|
+
# now save out the data
|
69
|
+
DataReader.save( filename, @data[name], :Properties ) unless @data[name].keys.empty?
|
70
|
+
end
|
71
|
+
|
72
|
+
def findBaseStore(app_name)
|
73
|
+
home = findHomeDir
|
74
|
+
folder = findAppFolderName(app_name)
|
75
|
+
base = File.join( home, folder )
|
76
|
+
Dir.open( home ) do |d|
|
77
|
+
unless d.member? folder
|
78
|
+
Dir.mkdir base
|
79
|
+
end
|
80
|
+
end
|
81
|
+
base
|
82
|
+
end
|
83
|
+
|
84
|
+
def findHomeDir
|
85
|
+
return ENV['TEST_HOME_OVERRIDE'] unless ENV['TEST_HOME_OVERRIDE'].nil?
|
86
|
+
return Gem::user_home if defined? Gem
|
87
|
+
return ENV['HOME'] unless ENV['HOME'].nil?
|
88
|
+
return ENV['USERPROFILE'] unless ENV['USERPROFILE'].nil?
|
89
|
+
return ENV['HOMEDRIVE'] + ENV['HOMEPATH'] if PLATFORM =~ /win32/
|
90
|
+
return '.'
|
91
|
+
end
|
92
|
+
|
93
|
+
def findAppFolderName(app_name)
|
94
|
+
"." + app_name
|
95
|
+
end
|
96
|
+
|
97
|
+
end # UserDataStore
|
98
|
+
|
99
|
+
end # Amazon::Util
|
100
|
+
end # Amazon
|