appengine-mapreduce 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/appengine-mapreduce.rb +4 -1
- data/lib/appengine-mapreduce/job.rb +45 -9
- data/lib/appengine-mapreduce/mappable.rb +16 -0
- data/lib/appengine-mapreduce/proc_source.rb +141 -0
- data/lib/appengine-mapreduce/string_helpers.rb +29 -0
- data/target/appengine-mapreduce-jruby-0.0.1-SNAPSHOT.jar +0 -0
- metadata +6 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/lib/appengine-mapreduce.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
2
|
|
3
|
+
require 'appengine-mapreduce/job'
|
4
|
+
require 'appengine-mapreduce/mappable'
|
5
|
+
|
3
6
|
module AppEngine
|
4
7
|
module MapReduce
|
5
|
-
VERSION = '0.0.
|
8
|
+
VERSION = '0.0.3'
|
6
9
|
end
|
7
10
|
end
|
@@ -1,12 +1,19 @@
|
|
1
1
|
require 'appengine-apis/labs/taskqueue'
|
2
|
+
require File.join(File.dirname(__FILE__), 'proc_source')
|
3
|
+
require File.join(File.dirname(__FILE__), 'string_helpers')
|
2
4
|
|
3
5
|
module AppEngine
|
4
6
|
module MapReduce
|
7
|
+
JRUBY_PROXY_MAPPER_CLASS = "com.jetthoughts.appengine.tools.mapreduce.JRubyMapper"
|
8
|
+
|
5
9
|
BLOBSTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.BlobstoreInputFormat"
|
6
10
|
DATASTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.DatastoreInputFormat"
|
7
11
|
|
12
|
+
OUTPUT_ENTITY_KIND_KEY = "mapreduce.mapper.outputformat.datastoreoutputformat.entitykind"
|
13
|
+
MAPREDUCE_SCRIPT_KEY = "mapreduce.ruby.script"
|
14
|
+
|
8
15
|
CALLBACK_KEY = "mapreduce.appengine.donecallback.url"
|
9
|
-
|
16
|
+
|
10
17
|
import com.google.appengine.tools.mapreduce.ConfigurationXmlUtil
|
11
18
|
import com.google.appengine.tools.mapreduce.DatastoreInputFormat
|
12
19
|
|
@@ -16,15 +23,14 @@ module AppEngine
|
|
16
23
|
attr_accessor :map, :reduce, :input_kind, :output_kind, :properties, :input_class
|
17
24
|
|
18
25
|
def initialize options = {}
|
19
|
-
puts "Job Options: #{options.inspect}"
|
20
|
-
|
21
26
|
@properties = {}
|
22
27
|
|
23
28
|
%w(map reduce input_class input_kind output_kind).each do |attr_n|
|
24
|
-
|
29
|
+
key = (attr_n.to_sym rescue attr_n) || attr_n
|
30
|
+
self.send("#{attr_n}=", options.delete(key)) if options[key]
|
25
31
|
end
|
26
32
|
|
27
|
-
@properties["mapreduce.map.class"] =
|
33
|
+
@properties["mapreduce.map.class"] = JRUBY_PROXY_MAPPER_CLASS
|
28
34
|
|
29
35
|
end
|
30
36
|
|
@@ -34,16 +40,30 @@ module AppEngine
|
|
34
40
|
queue.add({:method => 'POST',
|
35
41
|
:params => {'configuration' => conf_as_string},
|
36
42
|
:url => "/mapreduce/start"})
|
37
|
-
|
38
|
-
puts "Conf: #{conf_as_string}"
|
39
43
|
end
|
40
44
|
|
41
45
|
def input_kind= aKind
|
42
46
|
@properties[DatastoreInputFormat::ENTITY_KIND_KEY] = aKind
|
43
47
|
end
|
44
48
|
|
49
|
+
def output_kind= aKind
|
50
|
+
@properties[OUTPUT_ENTITY_KIND_KEY] = aKind
|
51
|
+
end
|
52
|
+
|
53
|
+
def map &block
|
54
|
+
if block
|
55
|
+
self.map = block
|
56
|
+
end
|
57
|
+
|
58
|
+
@properties[MAPREDUCE_SCRIPT_KEY]
|
59
|
+
end
|
60
|
+
|
45
61
|
def map= aMap
|
46
|
-
|
62
|
+
if aMap.is_a?(Proc)
|
63
|
+
aMap = serialize_block aMap
|
64
|
+
end
|
65
|
+
|
66
|
+
@properties[MAPREDUCE_SCRIPT_KEY] = aMap
|
47
67
|
end
|
48
68
|
|
49
69
|
def input_class= klass
|
@@ -51,7 +71,15 @@ module AppEngine
|
|
51
71
|
end
|
52
72
|
|
53
73
|
def use_blobstore_input_class
|
54
|
-
input_class = BLOBSTORE_INPUT_FORMAT
|
74
|
+
self.input_class = BLOBSTORE_INPUT_FORMAT
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.create_job_for mapper_name
|
78
|
+
map = <<MAP
|
79
|
+
require 'lib/mappers/#{mapper_name.to_s}_mapper'
|
80
|
+
include #{mapper_name.to_s.camelize}Mapper
|
81
|
+
MAP
|
82
|
+
Job.new(:map => map)
|
55
83
|
end
|
56
84
|
|
57
85
|
protected
|
@@ -60,6 +88,14 @@ module AppEngine
|
|
60
88
|
@properties.each {|k,v| @conf.set(k, v)}
|
61
89
|
ConfigurationXmlUtil.convertConfigurationToXml(@conf)
|
62
90
|
end
|
91
|
+
|
92
|
+
def serialize_block block
|
93
|
+
<<MAP
|
94
|
+
self.class.class_eval do
|
95
|
+
define_method(:map, lambda{ #{ProcSource.handle(block)} })
|
96
|
+
end
|
97
|
+
MAP
|
98
|
+
end
|
63
99
|
end
|
64
100
|
end
|
65
101
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module AppEngine
|
2
|
+
module Mappable
|
3
|
+
def self.included(base)
|
4
|
+
#base.property :name, String
|
5
|
+
base.extend ClassMethods
|
6
|
+
end
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def async_map &block
|
10
|
+
job = AppEngine::MapReduce::Job.new(:input_kind => name || storage_names[:default])
|
11
|
+
job.map = block
|
12
|
+
job.run
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'irb/ruby-lex'
|
3
|
+
|
4
|
+
SCRIPT_LINES__ = {} unless defined? SCRIPT_LINES__
|
5
|
+
|
6
|
+
module ProcSource #:nodoc:
|
7
|
+
def get_lines(filename, start_line = 0)
|
8
|
+
return nil unless filename
|
9
|
+
|
10
|
+
case filename
|
11
|
+
# special "(irb)" descriptor?
|
12
|
+
when "(irb)"
|
13
|
+
IRB.conf[:MAIN_CONTEXT].io.line(start_line .. -1)
|
14
|
+
# special "(eval...)" descriptor?
|
15
|
+
when /^\(eval.+\)$/
|
16
|
+
EVAL_LINES__[filename][start_line .. -1]
|
17
|
+
# regular file
|
18
|
+
else
|
19
|
+
# Ruby already parsed this file? (see disclaimer above)
|
20
|
+
if lines = SCRIPT_LINES__[filename]
|
21
|
+
lines[(start_line - 1) .. -1]
|
22
|
+
# If the file exists we're going to try reading it in
|
23
|
+
elsif File.exist?(filename)
|
24
|
+
begin
|
25
|
+
File.readlines(filename)[(start_line - 1) .. -1]
|
26
|
+
rescue
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def handle(proc)
|
34
|
+
filename, line = source_descriptor proc
|
35
|
+
lines = get_lines(filename, line) || []
|
36
|
+
|
37
|
+
lexer = RubyLex.new
|
38
|
+
lexer.set_input(StringIO.new(lines.join))
|
39
|
+
|
40
|
+
state = :before_constructor
|
41
|
+
nesting_level = 1
|
42
|
+
start_token, end_token = nil, nil
|
43
|
+
found = false
|
44
|
+
while token = lexer.token
|
45
|
+
# we've not yet found any proc-constructor -- we'll try to find one.
|
46
|
+
if [:before_constructor, :check_more].include?(state)
|
47
|
+
# checking more and newline? -> done
|
48
|
+
if token.is_a?(RubyToken::TkNL) and state == :check_more
|
49
|
+
state = :done
|
50
|
+
break
|
51
|
+
end
|
52
|
+
# token is Proc?
|
53
|
+
if token.is_a?(RubyToken::TkCONSTANT) and
|
54
|
+
token.instance_variable_get(:@name) == "Proc"
|
55
|
+
# method call?
|
56
|
+
if lexer.token.is_a?(RubyToken::TkDOT)
|
57
|
+
method = lexer.token
|
58
|
+
# constructor?
|
59
|
+
if method.is_a?(RubyToken::TkIDENTIFIER) and
|
60
|
+
method.instance_variable_get(:@name) == "new"
|
61
|
+
unless state == :check_more
|
62
|
+
# okay, code will follow soon.
|
63
|
+
state = :before_code
|
64
|
+
else
|
65
|
+
# multiple procs on one line
|
66
|
+
return
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
# token is lambda or proc call?
|
71
|
+
elsif token.is_a?(RubyToken::TkIDENTIFIER) and
|
72
|
+
%w{proc lambda}.include?(token.instance_variable_get(:@name))
|
73
|
+
unless state == :check_more
|
74
|
+
# okay, code will follow soon.
|
75
|
+
state = :before_code
|
76
|
+
else
|
77
|
+
# multiple procs on one line
|
78
|
+
return
|
79
|
+
end
|
80
|
+
elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
|
81
|
+
# found the code start, update state and remember current token
|
82
|
+
state = :in_code
|
83
|
+
start_token = token
|
84
|
+
end
|
85
|
+
|
86
|
+
# we're waiting for the code start to appear.
|
87
|
+
elsif state == :before_code
|
88
|
+
if token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
|
89
|
+
# found the code start, update state and remember current token
|
90
|
+
state = :in_code
|
91
|
+
start_token = token
|
92
|
+
end
|
93
|
+
|
94
|
+
# okay, we're inside code
|
95
|
+
elsif state == :in_code
|
96
|
+
if token.is_a?(RubyToken::TkRBRACE) or token.is_a?(RubyToken::TkEND)
|
97
|
+
nesting_level -= 1
|
98
|
+
if nesting_level == 0
|
99
|
+
# we're done!
|
100
|
+
end_token = token
|
101
|
+
# parse another time to check if there are multiple procs on one line
|
102
|
+
# we can't handle that case correctly so we return no source code at all
|
103
|
+
state = :check_more
|
104
|
+
end
|
105
|
+
elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO) or
|
106
|
+
token.is_a?(RubyToken::TkBEGIN) or token.is_a?(RubyToken::TkCASE) or
|
107
|
+
token.is_a?(RubyToken::TkCLASS) or token.is_a?(RubyToken::TkDEF) or
|
108
|
+
token.is_a?(RubyToken::TkFOR) or token.is_a?(RubyToken::TkIF) or
|
109
|
+
token.is_a?(RubyToken::TkMODULE) or token.is_a?(RubyToken::TkUNLESS) or
|
110
|
+
token.is_a?(RubyToken::TkUNTIL) or token.is_a?(RubyToken::TkWHILE) or
|
111
|
+
token.is_a?(RubyToken::TklBEGIN)
|
112
|
+
nesting_level += 1
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if start_token and end_token
|
118
|
+
start_line, end_line = start_token.line_no - 1, end_token.line_no - 1
|
119
|
+
source = lines[start_line .. end_line]
|
120
|
+
start_offset = start_token.char_no
|
121
|
+
start_offset += 1 if start_token.is_a?(RubyToken::TkDO)
|
122
|
+
end_offset = -(source.last.length - end_token.char_no)
|
123
|
+
source.first.slice!(0 .. start_offset)
|
124
|
+
source.last.slice!(end_offset .. -1)
|
125
|
+
|
126
|
+
# Can't use .strip because newline at end of code might be important
|
127
|
+
# (Stuff would break when somebody does proc { ... #foo\n})
|
128
|
+
source.join.gsub(/^ | $/, "")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def source_descriptor p
|
133
|
+
if md = /^#<Proc:0x[0-9A-Fa-f]+@(.+):(\d+)>$/.match(p.inspect)
|
134
|
+
filename, line = md.captures
|
135
|
+
return filename, line.to_i
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
module_function :handle, :get_lines, :source_descriptor
|
141
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module StringHelper
|
2
|
+
extend self
|
3
|
+
|
4
|
+
def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
|
5
|
+
lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
|
6
|
+
end
|
7
|
+
|
8
|
+
def underscore(camel_cased_word)
|
9
|
+
word = camel_cased_word.to_s.dup
|
10
|
+
word.gsub!(/::/, '/')
|
11
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
|
12
|
+
word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
|
13
|
+
word.tr!("-", "_")
|
14
|
+
word.downcase!
|
15
|
+
word
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ::String
|
20
|
+
def camelize
|
21
|
+
StringHelper.camelize(self)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class ::Symbol
|
26
|
+
def camelize
|
27
|
+
StringHelper.camelize(self)
|
28
|
+
end
|
29
|
+
end
|
Binary file
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: appengine-mapreduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Paul Nikitochkin
|
@@ -49,6 +49,9 @@ files:
|
|
49
49
|
- lib/appengine-mapreduce/boot.rb
|
50
50
|
- lib/appengine-mapreduce/datastore_input_format.rb
|
51
51
|
- lib/appengine-mapreduce/job.rb
|
52
|
+
- lib/appengine-mapreduce/mappable.rb
|
53
|
+
- lib/appengine-mapreduce/proc_source.rb
|
54
|
+
- lib/appengine-mapreduce/string_helpers.rb
|
52
55
|
- target/appengine-mapreduce-jruby-0.0.1-SNAPSHOT.jar
|
53
56
|
- target/dependency/aopalliance-1.0.jar
|
54
57
|
- target/dependency/appengine-mapper-0.0.1-SNAPSHOT.jar
|