mturk 1.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +2 -0
  4. data/.gemtest +0 -0
  5. data/History.md +105 -0
  6. data/LICENSE.txt +202 -0
  7. data/Manifest.txt +72 -0
  8. data/NOTICE.txt +4 -0
  9. data/README.md +100 -0
  10. data/Rakefile +33 -0
  11. data/bin/mturk +9 -0
  12. data/lib/amazon/util.rb +10 -0
  13. data/lib/amazon/util/binder.rb +48 -0
  14. data/lib/amazon/util/data_reader.rb +169 -0
  15. data/lib/amazon/util/filter_chain.rb +79 -0
  16. data/lib/amazon/util/hash_nesting.rb +93 -0
  17. data/lib/amazon/util/lazy_results.rb +59 -0
  18. data/lib/amazon/util/logging.rb +23 -0
  19. data/lib/amazon/util/paginated_iterator.rb +70 -0
  20. data/lib/amazon/util/proactive_results.rb +116 -0
  21. data/lib/amazon/util/threadpool.rb +129 -0
  22. data/lib/amazon/util/user_data_store.rb +100 -0
  23. data/lib/amazon/webservices/mechanical_turk.rb +123 -0
  24. data/lib/amazon/webservices/mechanical_turk_requester.rb +285 -0
  25. data/lib/amazon/webservices/mturk/mechanical_turk_error_handler.rb +153 -0
  26. data/lib/amazon/webservices/mturk/question_generator.rb +58 -0
  27. data/lib/amazon/webservices/util/amazon_authentication_relay.rb +72 -0
  28. data/lib/amazon/webservices/util/command_line.rb +155 -0
  29. data/lib/amazon/webservices/util/convenience_wrapper.rb +90 -0
  30. data/lib/amazon/webservices/util/filter_proxy.rb +45 -0
  31. data/lib/amazon/webservices/util/mock_transport.rb +70 -0
  32. data/lib/amazon/webservices/util/request_signer.rb +42 -0
  33. data/lib/amazon/webservices/util/rest_transport.rb +120 -0
  34. data/lib/amazon/webservices/util/soap_simplifier.rb +48 -0
  35. data/lib/amazon/webservices/util/soap_transport.rb +20 -0
  36. data/lib/amazon/webservices/util/soap_transport_header_handler.rb +27 -0
  37. data/lib/amazon/webservices/util/unknown_result_exception.rb +27 -0
  38. data/lib/amazon/webservices/util/validation_exception.rb +55 -0
  39. data/lib/amazon/webservices/util/xml_simplifier.rb +61 -0
  40. data/lib/mturk.rb +19 -0
  41. data/lib/mturk/version.rb +6 -0
  42. data/run_rcov.sh +1 -0
  43. data/samples/best_image/BestImage.rb +61 -0
  44. data/samples/best_image/best_image.properties +39 -0
  45. data/samples/best_image/best_image.question +82 -0
  46. data/samples/blank_slate/BlankSlate.rb +63 -0
  47. data/samples/blank_slate/BlankSlate_multithreaded.rb +67 -0
  48. data/samples/helloworld/MTurkHelloWorld.rb +56 -0
  49. data/samples/helloworld/mturk.yml +8 -0
  50. data/samples/review_policy/ReviewPolicy.rb +139 -0
  51. data/samples/review_policy/review_policy.question +30 -0
  52. data/samples/reviewer/Reviewer.rb +103 -0
  53. data/samples/reviewer/mturk.yml +8 -0
  54. data/samples/simple_survey/SimpleSurvey.rb +98 -0
  55. data/samples/simple_survey/simple_survey.question +30 -0
  56. data/samples/site_category/SiteCategory.rb +87 -0
  57. data/samples/site_category/externalpage.htm +71 -0
  58. data/samples/site_category/site_category.input +6 -0
  59. data/samples/site_category/site_category.properties +56 -0
  60. data/samples/site_category/site_category.question +9 -0
  61. data/test/mturk/test_changehittypeofhit.rb +130 -0
  62. data/test/mturk/test_error_handler.rb +403 -0
  63. data/test/mturk/test_mechanical_turk_requester.rb +178 -0
  64. data/test/mturk/test_mock_mechanical_turk_requester.rb +205 -0
  65. data/test/test_mturk.rb +21 -0
  66. data/test/unit/test_binder.rb +89 -0
  67. data/test/unit/test_data_reader.rb +135 -0
  68. data/test/unit/test_exceptions.rb +32 -0
  69. data/test/unit/test_hash_nesting.rb +99 -0
  70. data/test/unit/test_lazy_results.rb +89 -0
  71. data/test/unit/test_mock_transport.rb +132 -0
  72. data/test/unit/test_paginated_iterator.rb +58 -0
  73. data/test/unit/test_proactive_results.rb +108 -0
  74. data/test/unit/test_question_generator.rb +55 -0
  75. data/test/unit/test_threadpool.rb +50 -0
  76. data/test/unit/test_user_data_store.rb +80 -0
  77. metadata +225 -0
  78. metadata.gz.sig +0 -0
@@ -0,0 +1,59 @@
1
+ # Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
2
+ # License:: Apache License, Version 2.0
3
+
4
+ require 'amazon/util/paginated_iterator'
5
+
6
+ module Amazon
7
+ module Util
8
+
9
+ # This class provides a wrapper for lazy evaluation of results.
10
+ # The constructor takes a block which should accept a pagenumber
11
+ # and return a page worth of results.
12
+ class LazyResults
13
+ include Enumerable
14
+
15
+ def initialize( &feeder )
16
+ @iterator = PaginatedIterator.new( &feeder )
17
+ flush
18
+ end
19
+
20
+ # clear the result set and start over again
21
+ def flush
22
+ @truth = []
23
+ @iterator.restart
24
+ end
25
+
26
+ # iterate over entire result set, loading lazily
27
+ def each( &block ) # :yields: item
28
+ @truth.each {|e| yield e }
29
+ @iterator.each {|e| @truth << e ; yield e }
30
+ end
31
+
32
+ # index into the array set. if requested index has not been loaded, will load up to that index
33
+ def []( index )
34
+ feedme while !@iterator.done and index >= @truth.size
35
+ return @truth[index]
36
+ end
37
+
38
+ # fully populate the result set and return a true array
39
+ def to_a
40
+ feedme until @iterator.done
41
+ return @truth.dup
42
+ end
43
+
44
+ def inspect # :nodoc:
45
+ "#<Amazon::Util::LazyResults truth_size=#{@truth.size} page=#{@page} done=#{@done}>"
46
+ end
47
+
48
+ private
49
+
50
+ # fetch the next item from the iterator and stick it in @truth
51
+ def feedme
52
+ item = @iterator.next
53
+ @truth << item unless item.nil?
54
+ end
55
+
56
+ end # LazyResults
57
+
58
+ end # Amazon::Util
59
+ end # Amazon
@@ -0,0 +1,23 @@
1
+ # Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
2
+ # License:: Apache License, Version 2.0
3
+
4
+ require 'logger'
5
+
6
+ module Amazon
7
+ module Util
8
+ module Logging
9
+
10
+ @@AmazonLogger = nil
11
+
12
+ def set_log( filename )
13
+ @@AmazonLogger = Logger.new filename
14
+ end
15
+
16
+ def log( str )
17
+ set_log 'mturk.log' if @@AmazonLogger.nil?
18
+ @@AmazonLogger.debug str
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,70 @@
1
+ # Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
2
+ # License:: Apache License, Version 2.0
3
+
4
+ module Amazon
5
+ module Util
6
+
7
+ # PaginatedIterator provides an iterator interface to a paginated
8
+ # dataset, buffering the current page. It can be used to stream
9
+ # large result sets which would not fit into memory or only need
10
+ # to be processed in a single pass.
11
+ class PaginatedIterator
12
+
13
+ # feeder should be a block that accepts a pagenumber and
14
+ # returns an array containing the corresponding page
15
+ # worth of results. It should return an empty array when
16
+ # there are no more results in the dataset.
17
+ def initialize( &feeder )
18
+ @feeder = feeder
19
+ restart
20
+ end
21
+
22
+ # resets the iterator to start pulling from the first page
23
+ def restart
24
+ @buffer = []
25
+ @page = 1
26
+ @done = false
27
+ end
28
+
29
+ # returns the next item, or nil if there are no more items
30
+ def next
31
+ fetchpage if @buffer.empty?
32
+ @buffer.shift
33
+ end
34
+
35
+ # checks if we have another item available
36
+ def hasNext
37
+ fetchpage if @buffer.empty?
38
+ return !@buffer.empty?
39
+ end
40
+
41
+ # iterates over the remaining items
42
+ def each( &block ) # :yields: item
43
+ until @done
44
+ item = self.next
45
+ yield item unless item.nil?
46
+ end
47
+ end
48
+
49
+ attr_reader :done
50
+
51
+ private
52
+
53
+ def fetchpage
54
+ return [] if @done
55
+ res = @feeder.call @page
56
+ res = [res].flatten - [nil]
57
+ if res.nil? or res.empty?
58
+ @done = true
59
+ return []
60
+ else
61
+ @page += 1
62
+ @buffer += res
63
+ return res
64
+ end
65
+ end
66
+
67
+ end # PaginatedIterator
68
+
69
+ end # Amazon::Util
70
+ end # Amazon
@@ -0,0 +1,116 @@
1
+ # Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
2
+ # License:: Apache License, Version 2.0
3
+
4
+ require 'monitor'
5
+ require 'amazon/util/threadpool'
6
+
7
+ module Amazon
8
+ module Util
9
+
10
+ # ProactiveResults is not as lazy as LazyResults
11
+ # The constructor takes a block which should accept a pagenumber
12
+ # and return a page worth of results.
13
+ class ProactiveResults
14
+ include Enumerable
15
+
16
+ THREADPOOL_SIZE = 3
17
+
18
+ def initialize( exception_handler=nil, &feeder )
19
+ @feeder = feeder
20
+ @eh = exception_handler
21
+ @tp = nil
22
+ self.flush
23
+ end
24
+
25
+ # clear the result set and start over again
26
+ def flush
27
+ @tp.finish unless @tp.nil?
28
+ @tp = ThreadPool.new(THREADPOOL_SIZE, @eh)
29
+ @done = false
30
+ @inflight = [].extend(MonitorMixin)
31
+ @current_page = 1
32
+ @truth = []
33
+ 1.upto(THREADPOOL_SIZE) do |page|
34
+ getPage(page)
35
+ end
36
+ end
37
+
38
+ # iterate over entire result set, waiting for
39
+ # threads to finish where necessary
40
+ def each( &block ) # :yields: item
41
+ index = 0
42
+ while true
43
+ if index >= @truth.size
44
+ break if @done
45
+ feedme
46
+ else
47
+ yield @truth[index]
48
+ index += 1
49
+ end
50
+ end
51
+ end
52
+
53
+ # index into the result set. if we haven't
54
+ # loaded enough, will wait until we have
55
+ def []( index )
56
+ feedme while !@done and index >= @truth.size
57
+ return @truth[index]
58
+ end
59
+
60
+ # wait for the entire results set to be populated,
61
+ # then return an array of the results
62
+ def to_a
63
+ feedme until @done
64
+ return @truth.dup
65
+ end
66
+
67
+ def inspect # :nodoc:
68
+ "#<Amazon::Util::ProactiveResults truth_size=#{@truth.size} pending_pages=#{@pending.size}>"
69
+ end
70
+
71
+ private
72
+
73
+ def getPage(num)
74
+ @inflight.synchronize do
75
+ workitem = @tp.addWork(num) { |n| worker(n) }
76
+ @inflight[num] = workitem
77
+ end
78
+ end
79
+
80
+ def worker(page)
81
+ res = []
82
+ begin
83
+ res = @feeder.call( page )
84
+ ensure
85
+ getPage( page + THREADPOOL_SIZE ) unless (res.nil? || res.empty?)
86
+ end
87
+ end
88
+
89
+ def feedme
90
+ return if @done
91
+ item = nil
92
+ @inflight.synchronize do
93
+ if @inflight[@current_page].nil?
94
+ raise "This should be the last page! #{@current_page} #{@inflight.inspect}" unless [] == ( @inflight - [nil] )
95
+ @done = true
96
+ return
97
+ end
98
+ item = @inflight[@current_page]
99
+ @inflight[@current_page] = nil # clear out our references
100
+ @current_page += 1
101
+ end
102
+ res = item.getResult
103
+ case res
104
+ when Array
105
+ @truth += res
106
+ when Exception, NilClass
107
+ # ignore
108
+ else
109
+ raise "Unexpected result type: #{res.class}"
110
+ end
111
+ end
112
+
113
+ end
114
+
115
+ end # Amazon::Util
116
+ end # Amazon
@@ -0,0 +1,129 @@
1
+ # Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
2
+ # License:: Apache License, Version 2.0
3
+
4
+ require 'thread'
5
+ require 'set'
6
+
7
+ module Amazon
8
+ module Util
9
+
10
+ # ThreadPool is a generic threadpooling class that enables
11
+ # easier multithreaded workflows. Initialize with a thread count,
12
+ # then addWork to queue up tasks. You can +sync+ to ensure the current
13
+ # workload is complete, or +finish+ to flush the threads when you're done.
14
+ class ThreadPool
15
+
16
+ # First arg is the thread count. Threads will be created once and wait
17
+ # for work ( no performance penalty, since they're waiting on a Queue.
18
+ # Second arg (optional) is a proc to be used as an exception handler. If
19
+ # this argument is passed in and the thread encounters an uncaught
20
+ # exception, the proc will be called with the exception as the only argument.
21
+ def initialize( num_threads, exception_handler=nil )
22
+ @work = Queue.new
23
+ @threads = ThreadGroup.new
24
+ num_threads.times do
25
+ worker_thread = Thread.new { workerProcess(exception_handler) }
26
+ @threads.add worker_thread
27
+ end
28
+ end
29
+
30
+ # add work to the queue
31
+ # pass any number of arguments, they will be passed on to the block.
32
+ def addWork( *args, &block )
33
+ item = WorkItem.new( args, &block )
34
+ @work.push( item )
35
+ item
36
+ end
37
+
38
+ # how many worker threads are there?
39
+ def threadcount
40
+ @threads.list.length
41
+ end
42
+
43
+ # request thread completion
44
+ # No more work will be performed
45
+ def noMoreWork
46
+ threadcount.times { @work << :Finish }
47
+ end
48
+
49
+ # request thread completion and wait for them to finish
50
+ def finish
51
+ noMoreWork
52
+ @threads.list.each do |t|
53
+ t.join
54
+ end
55
+ end
56
+
57
+ # wait for the currently queued work to finish
58
+ # (This freezes up the entire pool, temporarily)
59
+ def sync
60
+ t = threadcount
61
+
62
+ if t < 2
63
+ item = addWork { :sync }
64
+ return item.getResult
65
+ end
66
+
67
+ q = Queue.new
68
+ items = []
69
+
70
+ items << addWork do
71
+ q.pop
72
+ end
73
+
74
+ (t-2).times do |z|
75
+ items << addWork(z) do |i|
76
+ items[i].getResult
77
+ end
78
+ end
79
+
80
+ addWork do
81
+ q.push :sync
82
+ end
83
+
84
+ items.last.getResult
85
+ end
86
+
87
+ private
88
+
89
+ def workerProcess( exception_handler=nil )
90
+ while true
91
+ workitem = @work.pop
92
+ return if workitem == :Finish
93
+ begin
94
+ workitem.run
95
+ rescue Exception => e
96
+ if exception_handler.nil?
97
+ print "Worker thread has thrown an exception: "+e.to_s+"\n"
98
+ else
99
+ exception_handler.call(workitem)
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ class WorkItem
106
+ attr_reader :args, :block
107
+ def initialize( args, &block )
108
+ @args = args
109
+ @block = block
110
+ @result = Queue.new
111
+ end
112
+ def run
113
+ res = @block.call( *@args)
114
+ @result.push res
115
+ rescue Exception => e
116
+ @result.push e
117
+ raise e
118
+ end
119
+ def getResult
120
+ value = @result.pop
121
+ @result = [value]
122
+ value
123
+ end
124
+ end
125
+
126
+ end # ThreadPool
127
+
128
+ end # Amazon::Util
129
+ end # Amazon
@@ -0,0 +1,100 @@
1
+ # Copyright:: Copyright (c) 2007 Amazon Technologies, Inc.
2
+ # License:: Apache License, Version 2.0
3
+
4
+ require 'amazon/util/data_reader'
5
+
6
+ module Amazon
7
+ module Util
8
+
9
+ # The UserDataStore is a platform-independent class intended to store application configuration information in a human-readable per-user location.
10
+ class UserDataStore
11
+
12
+ def initialize(app_name)
13
+ @app = sanitizeKey(app_name)
14
+ @base = findBaseStore(@app)
15
+ @dirty = []
16
+ @data = Hash.new {|h,a| h[a] = {} }
17
+ loadConfig
18
+ end
19
+
20
+ def get(namespace,property)
21
+ ns = sanitizeKey(namespace)
22
+ @data[ns][property]
23
+ end
24
+
25
+ def set(namespace,property,value)
26
+ ns = sanitizeKey(namespace)
27
+ @dirty << ns unless @dirty.member? ns
28
+ @data[ns][property] = value
29
+ end
30
+
31
+ def clear(namespace,property = nil)
32
+ ns = sanitizeKey(namespace)
33
+ @dirty << ns unless @dirty.member? ns
34
+ if property.nil?
35
+ @data[ns] = {}
36
+ else
37
+ @data[ns].delete_if {|k,v| k == property }
38
+ end
39
+ end
40
+
41
+ def save
42
+ @dirty.delete_if do |name|
43
+ saveNamespace( name )
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ def loadConfig
50
+ Dir.open(@base).each do |filename|
51
+ next if filename =~ /^\./
52
+ loadNamespace( filename )
53
+ end
54
+ end
55
+
56
+ def sanitizeKey(ns)
57
+ ns.to_s.downcase
58
+ end
59
+
60
+ def loadNamespace(name)
61
+ @data[name] = DataReader.load( File.join( @base, name ), :Properties )
62
+ end
63
+
64
+ def saveNamespace(name)
65
+ filename = File.join( @base, name )
66
+ # kill old config before saving
67
+ File.delete filename if File.exist? filename
68
+ # now save out the data
69
+ DataReader.save( filename, @data[name], :Properties ) unless @data[name].keys.empty?
70
+ end
71
+
72
+ def findBaseStore(app_name)
73
+ home = findHomeDir
74
+ folder = findAppFolderName(app_name)
75
+ base = File.join( home, folder )
76
+ Dir.open( home ) do |d|
77
+ unless d.member? folder
78
+ Dir.mkdir base
79
+ end
80
+ end
81
+ base
82
+ end
83
+
84
+ def findHomeDir
85
+ return ENV['TEST_HOME_OVERRIDE'] unless ENV['TEST_HOME_OVERRIDE'].nil?
86
+ return Gem::user_home if defined? Gem
87
+ return ENV['HOME'] unless ENV['HOME'].nil?
88
+ return ENV['USERPROFILE'] unless ENV['USERPROFILE'].nil?
89
+ return ENV['HOMEDRIVE'] + ENV['HOMEPATH'] if PLATFORM =~ /win32/
90
+ return '.'
91
+ end
92
+
93
+ def findAppFolderName(app_name)
94
+ "." + app_name
95
+ end
96
+
97
+ end # UserDataStore
98
+
99
+ end # Amazon::Util
100
+ end # Amazon