appengine-mapreduce 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/appengine-mapreduce.rb +4 -1
- data/lib/appengine-mapreduce/job.rb +45 -9
- data/lib/appengine-mapreduce/mappable.rb +16 -0
- data/lib/appengine-mapreduce/proc_source.rb +141 -0
- data/lib/appengine-mapreduce/string_helpers.rb +29 -0
- data/target/appengine-mapreduce-jruby-0.0.1-SNAPSHOT.jar +0 -0
- metadata +6 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/lib/appengine-mapreduce.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
2
|
|
3
|
+
require 'appengine-mapreduce/job'
|
4
|
+
require 'appengine-mapreduce/mappable'
|
5
|
+
|
3
6
|
module AppEngine
|
4
7
|
module MapReduce
|
5
|
-
VERSION = '0.0.
|
8
|
+
VERSION = '0.0.3'
|
6
9
|
end
|
7
10
|
end
|
@@ -1,12 +1,19 @@
|
|
1
1
|
require 'appengine-apis/labs/taskqueue'
|
2
|
+
require File.join(File.dirname(__FILE__), 'proc_source')
|
3
|
+
require File.join(File.dirname(__FILE__), 'string_helpers')
|
2
4
|
|
3
5
|
module AppEngine
|
4
6
|
module MapReduce
|
7
|
+
JRUBY_PROXY_MAPPER_CLASS = "com.jetthoughts.appengine.tools.mapreduce.JRubyMapper"
|
8
|
+
|
5
9
|
BLOBSTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.BlobstoreInputFormat"
|
6
10
|
DATASTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.DatastoreInputFormat"
|
7
11
|
|
12
|
+
OUTPUT_ENTITY_KIND_KEY = "mapreduce.mapper.outputformat.datastoreoutputformat.entitykind"
|
13
|
+
MAPREDUCE_SCRIPT_KEY = "mapreduce.ruby.script"
|
14
|
+
|
8
15
|
CALLBACK_KEY = "mapreduce.appengine.donecallback.url"
|
9
|
-
|
16
|
+
|
10
17
|
import com.google.appengine.tools.mapreduce.ConfigurationXmlUtil
|
11
18
|
import com.google.appengine.tools.mapreduce.DatastoreInputFormat
|
12
19
|
|
@@ -16,15 +23,14 @@ module AppEngine
|
|
16
23
|
attr_accessor :map, :reduce, :input_kind, :output_kind, :properties, :input_class
|
17
24
|
|
18
25
|
def initialize options = {}
|
19
|
-
puts "Job Options: #{options.inspect}"
|
20
|
-
|
21
26
|
@properties = {}
|
22
27
|
|
23
28
|
%w(map reduce input_class input_kind output_kind).each do |attr_n|
|
24
|
-
|
29
|
+
key = (attr_n.to_sym rescue attr_n) || attr_n
|
30
|
+
self.send("#{attr_n}=", options.delete(key)) if options[key]
|
25
31
|
end
|
26
32
|
|
27
|
-
@properties["mapreduce.map.class"] =
|
33
|
+
@properties["mapreduce.map.class"] = JRUBY_PROXY_MAPPER_CLASS
|
28
34
|
|
29
35
|
end
|
30
36
|
|
@@ -34,16 +40,30 @@ module AppEngine
|
|
34
40
|
queue.add({:method => 'POST',
|
35
41
|
:params => {'configuration' => conf_as_string},
|
36
42
|
:url => "/mapreduce/start"})
|
37
|
-
|
38
|
-
puts "Conf: #{conf_as_string}"
|
39
43
|
end
|
40
44
|
|
41
45
|
def input_kind= aKind
|
42
46
|
@properties[DatastoreInputFormat::ENTITY_KIND_KEY] = aKind
|
43
47
|
end
|
44
48
|
|
49
|
+
def output_kind= aKind
|
50
|
+
@properties[OUTPUT_ENTITY_KIND_KEY] = aKind
|
51
|
+
end
|
52
|
+
|
53
|
+
def map &block
|
54
|
+
if block
|
55
|
+
self.map = block
|
56
|
+
end
|
57
|
+
|
58
|
+
@properties[MAPREDUCE_SCRIPT_KEY]
|
59
|
+
end
|
60
|
+
|
45
61
|
def map= aMap
|
46
|
-
|
62
|
+
if aMap.is_a?(Proc)
|
63
|
+
aMap = serialize_block aMap
|
64
|
+
end
|
65
|
+
|
66
|
+
@properties[MAPREDUCE_SCRIPT_KEY] = aMap
|
47
67
|
end
|
48
68
|
|
49
69
|
def input_class= klass
|
@@ -51,7 +71,15 @@ module AppEngine
|
|
51
71
|
end
|
52
72
|
|
53
73
|
def use_blobstore_input_class
|
54
|
-
input_class = BLOBSTORE_INPUT_FORMAT
|
74
|
+
self.input_class = BLOBSTORE_INPUT_FORMAT
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.create_job_for mapper_name
|
78
|
+
map = <<MAP
|
79
|
+
require 'lib/mappers/#{mapper_name.to_s}_mapper'
|
80
|
+
include #{mapper_name.to_s.camelize}Mapper
|
81
|
+
MAP
|
82
|
+
Job.new(:map => map)
|
55
83
|
end
|
56
84
|
|
57
85
|
protected
|
@@ -60,6 +88,14 @@ module AppEngine
|
|
60
88
|
@properties.each {|k,v| @conf.set(k, v)}
|
61
89
|
ConfigurationXmlUtil.convertConfigurationToXml(@conf)
|
62
90
|
end
|
91
|
+
|
92
|
+
def serialize_block block
|
93
|
+
<<MAP
|
94
|
+
self.class.class_eval do
|
95
|
+
define_method(:map, lambda{ #{ProcSource.handle(block)} })
|
96
|
+
end
|
97
|
+
MAP
|
98
|
+
end
|
63
99
|
end
|
64
100
|
end
|
65
101
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module AppEngine
|
2
|
+
module Mappable
|
3
|
+
def self.included(base)
|
4
|
+
#base.property :name, String
|
5
|
+
base.extend ClassMethods
|
6
|
+
end
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def async_map &block
|
10
|
+
job = AppEngine::MapReduce::Job.new(:input_kind => name || storage_names[:default])
|
11
|
+
job.map = block
|
12
|
+
job.run
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'irb/ruby-lex'
|
3
|
+
|
4
|
+
SCRIPT_LINES__ = {} unless defined? SCRIPT_LINES__
|
5
|
+
|
6
|
+
module ProcSource #:nodoc:
|
7
|
+
def get_lines(filename, start_line = 0)
|
8
|
+
return nil unless filename
|
9
|
+
|
10
|
+
case filename
|
11
|
+
# special "(irb)" descriptor?
|
12
|
+
when "(irb)"
|
13
|
+
IRB.conf[:MAIN_CONTEXT].io.line(start_line .. -1)
|
14
|
+
# special "(eval...)" descriptor?
|
15
|
+
when /^\(eval.+\)$/
|
16
|
+
EVAL_LINES__[filename][start_line .. -1]
|
17
|
+
# regular file
|
18
|
+
else
|
19
|
+
# Ruby already parsed this file? (see disclaimer above)
|
20
|
+
if lines = SCRIPT_LINES__[filename]
|
21
|
+
lines[(start_line - 1) .. -1]
|
22
|
+
# If the file exists we're going to try reading it in
|
23
|
+
elsif File.exist?(filename)
|
24
|
+
begin
|
25
|
+
File.readlines(filename)[(start_line - 1) .. -1]
|
26
|
+
rescue
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def handle(proc)
|
34
|
+
filename, line = source_descriptor proc
|
35
|
+
lines = get_lines(filename, line) || []
|
36
|
+
|
37
|
+
lexer = RubyLex.new
|
38
|
+
lexer.set_input(StringIO.new(lines.join))
|
39
|
+
|
40
|
+
state = :before_constructor
|
41
|
+
nesting_level = 1
|
42
|
+
start_token, end_token = nil, nil
|
43
|
+
found = false
|
44
|
+
while token = lexer.token
|
45
|
+
# we've not yet found any proc-constructor -- we'll try to find one.
|
46
|
+
if [:before_constructor, :check_more].include?(state)
|
47
|
+
# checking more and newline? -> done
|
48
|
+
if token.is_a?(RubyToken::TkNL) and state == :check_more
|
49
|
+
state = :done
|
50
|
+
break
|
51
|
+
end
|
52
|
+
# token is Proc?
|
53
|
+
if token.is_a?(RubyToken::TkCONSTANT) and
|
54
|
+
token.instance_variable_get(:@name) == "Proc"
|
55
|
+
# method call?
|
56
|
+
if lexer.token.is_a?(RubyToken::TkDOT)
|
57
|
+
method = lexer.token
|
58
|
+
# constructor?
|
59
|
+
if method.is_a?(RubyToken::TkIDENTIFIER) and
|
60
|
+
method.instance_variable_get(:@name) == "new"
|
61
|
+
unless state == :check_more
|
62
|
+
# okay, code will follow soon.
|
63
|
+
state = :before_code
|
64
|
+
else
|
65
|
+
# multiple procs on one line
|
66
|
+
return
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
# token is lambda or proc call?
|
71
|
+
elsif token.is_a?(RubyToken::TkIDENTIFIER) and
|
72
|
+
%w{proc lambda}.include?(token.instance_variable_get(:@name))
|
73
|
+
unless state == :check_more
|
74
|
+
# okay, code will follow soon.
|
75
|
+
state = :before_code
|
76
|
+
else
|
77
|
+
# multiple procs on one line
|
78
|
+
return
|
79
|
+
end
|
80
|
+
elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
|
81
|
+
# found the code start, update state and remember current token
|
82
|
+
state = :in_code
|
83
|
+
start_token = token
|
84
|
+
end
|
85
|
+
|
86
|
+
# we're waiting for the code start to appear.
|
87
|
+
elsif state == :before_code
|
88
|
+
if token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
|
89
|
+
# found the code start, update state and remember current token
|
90
|
+
state = :in_code
|
91
|
+
start_token = token
|
92
|
+
end
|
93
|
+
|
94
|
+
# okay, we're inside code
|
95
|
+
elsif state == :in_code
|
96
|
+
if token.is_a?(RubyToken::TkRBRACE) or token.is_a?(RubyToken::TkEND)
|
97
|
+
nesting_level -= 1
|
98
|
+
if nesting_level == 0
|
99
|
+
# we're done!
|
100
|
+
end_token = token
|
101
|
+
# parse another time to check if there are multiple procs on one line
|
102
|
+
# we can't handle that case correctly so we return no source code at all
|
103
|
+
state = :check_more
|
104
|
+
end
|
105
|
+
elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO) or
|
106
|
+
token.is_a?(RubyToken::TkBEGIN) or token.is_a?(RubyToken::TkCASE) or
|
107
|
+
token.is_a?(RubyToken::TkCLASS) or token.is_a?(RubyToken::TkDEF) or
|
108
|
+
token.is_a?(RubyToken::TkFOR) or token.is_a?(RubyToken::TkIF) or
|
109
|
+
token.is_a?(RubyToken::TkMODULE) or token.is_a?(RubyToken::TkUNLESS) or
|
110
|
+
token.is_a?(RubyToken::TkUNTIL) or token.is_a?(RubyToken::TkWHILE) or
|
111
|
+
token.is_a?(RubyToken::TklBEGIN)
|
112
|
+
nesting_level += 1
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if start_token and end_token
|
118
|
+
start_line, end_line = start_token.line_no - 1, end_token.line_no - 1
|
119
|
+
source = lines[start_line .. end_line]
|
120
|
+
start_offset = start_token.char_no
|
121
|
+
start_offset += 1 if start_token.is_a?(RubyToken::TkDO)
|
122
|
+
end_offset = -(source.last.length - end_token.char_no)
|
123
|
+
source.first.slice!(0 .. start_offset)
|
124
|
+
source.last.slice!(end_offset .. -1)
|
125
|
+
|
126
|
+
# Can't use .strip because newline at end of code might be important
|
127
|
+
# (Stuff would break when somebody does proc { ... #foo\n})
|
128
|
+
source.join.gsub(/^ | $/, "")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def source_descriptor p
|
133
|
+
if md = /^#<Proc:0x[0-9A-Fa-f]+@(.+):(\d+)>$/.match(p.inspect)
|
134
|
+
filename, line = md.captures
|
135
|
+
return filename, line.to_i
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
module_function :handle, :get_lines, :source_descriptor
|
141
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module StringHelper
|
2
|
+
extend self
|
3
|
+
|
4
|
+
def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
|
5
|
+
lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
|
6
|
+
end
|
7
|
+
|
8
|
+
def underscore(camel_cased_word)
|
9
|
+
word = camel_cased_word.to_s.dup
|
10
|
+
word.gsub!(/::/, '/')
|
11
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
|
12
|
+
word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
|
13
|
+
word.tr!("-", "_")
|
14
|
+
word.downcase!
|
15
|
+
word
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ::String
|
20
|
+
def camelize
|
21
|
+
StringHelper.camelize(self)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class ::Symbol
|
26
|
+
def camelize
|
27
|
+
StringHelper.camelize(self)
|
28
|
+
end
|
29
|
+
end
|
Binary file
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: appengine-mapreduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Paul Nikitochkin
|
@@ -49,6 +49,9 @@ files:
|
|
49
49
|
- lib/appengine-mapreduce/boot.rb
|
50
50
|
- lib/appengine-mapreduce/datastore_input_format.rb
|
51
51
|
- lib/appengine-mapreduce/job.rb
|
52
|
+
- lib/appengine-mapreduce/mappable.rb
|
53
|
+
- lib/appengine-mapreduce/proc_source.rb
|
54
|
+
- lib/appengine-mapreduce/string_helpers.rb
|
52
55
|
- target/appengine-mapreduce-jruby-0.0.1-SNAPSHOT.jar
|
53
56
|
- target/dependency/aopalliance-1.0.jar
|
54
57
|
- target/dependency/appengine-mapper-0.0.1-SNAPSHOT.jar
|