appengine-mapreduce 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -1,7 +1,10 @@
1
1
  $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
2
 
3
+ require 'appengine-mapreduce/job'
4
+ require 'appengine-mapreduce/mappable'
5
+
3
6
  module AppEngine
4
7
  module MapReduce
5
- VERSION = '0.0.1'
8
+ VERSION = '0.0.3'
6
9
  end
7
10
  end
@@ -1,12 +1,19 @@
1
1
  require 'appengine-apis/labs/taskqueue'
2
+ require File.join(File.dirname(__FILE__), 'proc_source')
3
+ require File.join(File.dirname(__FILE__), 'string_helpers')
2
4
 
3
5
  module AppEngine
4
6
  module MapReduce
7
+ JRUBY_PROXY_MAPPER_CLASS = "com.jetthoughts.appengine.tools.mapreduce.JRubyMapper"
8
+
5
9
  BLOBSTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.BlobstoreInputFormat"
6
10
  DATASTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.DatastoreInputFormat"
7
11
 
12
+ OUTPUT_ENTITY_KIND_KEY = "mapreduce.mapper.outputformat.datastoreoutputformat.entitykind"
13
+ MAPREDUCE_SCRIPT_KEY = "mapreduce.ruby.script"
14
+
8
15
  CALLBACK_KEY = "mapreduce.appengine.donecallback.url"
9
-
16
+
10
17
  import com.google.appengine.tools.mapreduce.ConfigurationXmlUtil
11
18
  import com.google.appengine.tools.mapreduce.DatastoreInputFormat
12
19
 
@@ -16,15 +23,14 @@ module AppEngine
16
23
  attr_accessor :map, :reduce, :input_kind, :output_kind, :properties, :input_class
17
24
 
18
25
  def initialize options = {}
19
- puts "Job Options: #{options.inspect}"
20
-
21
26
  @properties = {}
22
27
 
23
28
  %w(map reduce input_class input_kind output_kind).each do |attr_n|
24
- self.send("#{attr_n}=", options.delete(attr_n)) if options[attr_n]
29
+ key = (attr_n.to_sym rescue attr_n) || attr_n
30
+ self.send("#{attr_n}=", options.delete(key)) if options[key]
25
31
  end
26
32
 
27
- @properties["mapreduce.map.class"] = "com.jetthoughts.appengine.tools.mapreduce.JRubyMapper"
33
+ @properties["mapreduce.map.class"] = JRUBY_PROXY_MAPPER_CLASS
28
34
 
29
35
  end
30
36
 
@@ -34,16 +40,30 @@ module AppEngine
34
40
  queue.add({:method => 'POST',
35
41
  :params => {'configuration' => conf_as_string},
36
42
  :url => "/mapreduce/start"})
37
-
38
- puts "Conf: #{conf_as_string}"
39
43
  end
40
44
 
41
45
  def input_kind= aKind
42
46
  @properties[DatastoreInputFormat::ENTITY_KIND_KEY] = aKind
43
47
  end
44
48
 
49
+ def output_kind= aKind
50
+ @properties[OUTPUT_ENTITY_KIND_KEY] = aKind
51
+ end
52
+
53
+ def map &block
54
+ if block
55
+ self.map = block
56
+ end
57
+
58
+ @properties[MAPREDUCE_SCRIPT_KEY]
59
+ end
60
+
45
61
  def map= aMap
46
- @properties["mapreduce.ruby.script"] = aMap
62
+ if aMap.is_a?(Proc)
63
+ aMap = serialize_block aMap
64
+ end
65
+
66
+ @properties[MAPREDUCE_SCRIPT_KEY] = aMap
47
67
  end
48
68
 
49
69
  def input_class= klass
@@ -51,7 +71,15 @@ module AppEngine
51
71
  end
52
72
 
53
73
  def use_blobstore_input_class
54
- input_class = BLOBSTORE_INPUT_FORMAT
74
+ self.input_class = BLOBSTORE_INPUT_FORMAT
75
+ end
76
+
77
+ def self.create_job_for mapper_name
78
+ map = <<MAP
79
+ require 'lib/mappers/#{mapper_name.to_s}_mapper'
80
+ include #{mapper_name.to_s.camelize}Mapper
81
+ MAP
82
+ Job.new(:map => map)
55
83
  end
56
84
 
57
85
  protected
@@ -60,6 +88,14 @@ module AppEngine
60
88
  @properties.each {|k,v| @conf.set(k, v)}
61
89
  ConfigurationXmlUtil.convertConfigurationToXml(@conf)
62
90
  end
91
+
92
+ def serialize_block block
93
+ <<MAP
94
+ self.class.class_eval do
95
+ define_method(:map, lambda{ #{ProcSource.handle(block)} })
96
+ end
97
+ MAP
98
+ end
63
99
  end
64
100
  end
65
101
  end
@@ -0,0 +1,16 @@
1
+ module AppEngine
2
+ module Mappable
3
+ def self.included(base)
4
+ #base.property :name, String
5
+ base.extend ClassMethods
6
+ end
7
+
8
+ module ClassMethods
9
+ def async_map &block
10
+ job = AppEngine::MapReduce::Job.new(:input_kind => name || storage_names[:default])
11
+ job.map = block
12
+ job.run
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,141 @@
1
+ require 'stringio'
2
+ require 'irb/ruby-lex'
3
+
4
+ SCRIPT_LINES__ = {} unless defined? SCRIPT_LINES__
5
+
6
+ module ProcSource #:nodoc:
7
+ def get_lines(filename, start_line = 0)
8
+ return nil unless filename
9
+
10
+ case filename
11
+ # special "(irb)" descriptor?
12
+ when "(irb)"
13
+ IRB.conf[:MAIN_CONTEXT].io.line(start_line .. -1)
14
+ # special "(eval...)" descriptor?
15
+ when /^\(eval.+\)$/
16
+ EVAL_LINES__[filename][start_line .. -1]
17
+ # regular file
18
+ else
19
+ # Ruby already parsed this file? (see disclaimer above)
20
+ if lines = SCRIPT_LINES__[filename]
21
+ lines[(start_line - 1) .. -1]
22
+ # If the file exists we're going to try reading it in
23
+ elsif File.exist?(filename)
24
+ begin
25
+ File.readlines(filename)[(start_line - 1) .. -1]
26
+ rescue
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ def handle(proc)
34
+ filename, line = source_descriptor proc
35
+ lines = get_lines(filename, line) || []
36
+
37
+ lexer = RubyLex.new
38
+ lexer.set_input(StringIO.new(lines.join))
39
+
40
+ state = :before_constructor
41
+ nesting_level = 1
42
+ start_token, end_token = nil, nil
43
+ found = false
44
+ while token = lexer.token
45
+ # we've not yet found any proc-constructor -- we'll try to find one.
46
+ if [:before_constructor, :check_more].include?(state)
47
+ # checking more and newline? -> done
48
+ if token.is_a?(RubyToken::TkNL) and state == :check_more
49
+ state = :done
50
+ break
51
+ end
52
+ # token is Proc?
53
+ if token.is_a?(RubyToken::TkCONSTANT) and
54
+ token.instance_variable_get(:@name) == "Proc"
55
+ # method call?
56
+ if lexer.token.is_a?(RubyToken::TkDOT)
57
+ method = lexer.token
58
+ # constructor?
59
+ if method.is_a?(RubyToken::TkIDENTIFIER) and
60
+ method.instance_variable_get(:@name) == "new"
61
+ unless state == :check_more
62
+ # okay, code will follow soon.
63
+ state = :before_code
64
+ else
65
+ # multiple procs on one line
66
+ return
67
+ end
68
+ end
69
+ end
70
+ # token is lambda or proc call?
71
+ elsif token.is_a?(RubyToken::TkIDENTIFIER) and
72
+ %w{proc lambda}.include?(token.instance_variable_get(:@name))
73
+ unless state == :check_more
74
+ # okay, code will follow soon.
75
+ state = :before_code
76
+ else
77
+ # multiple procs on one line
78
+ return
79
+ end
80
+ elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
81
+ # found the code start, update state and remember current token
82
+ state = :in_code
83
+ start_token = token
84
+ end
85
+
86
+ # we're waiting for the code start to appear.
87
+ elsif state == :before_code
88
+ if token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
89
+ # found the code start, update state and remember current token
90
+ state = :in_code
91
+ start_token = token
92
+ end
93
+
94
+ # okay, we're inside code
95
+ elsif state == :in_code
96
+ if token.is_a?(RubyToken::TkRBRACE) or token.is_a?(RubyToken::TkEND)
97
+ nesting_level -= 1
98
+ if nesting_level == 0
99
+ # we're done!
100
+ end_token = token
101
+ # parse another time to check if there are multiple procs on one line
102
+ # we can't handle that case correctly so we return no source code at all
103
+ state = :check_more
104
+ end
105
+ elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO) or
106
+ token.is_a?(RubyToken::TkBEGIN) or token.is_a?(RubyToken::TkCASE) or
107
+ token.is_a?(RubyToken::TkCLASS) or token.is_a?(RubyToken::TkDEF) or
108
+ token.is_a?(RubyToken::TkFOR) or token.is_a?(RubyToken::TkIF) or
109
+ token.is_a?(RubyToken::TkMODULE) or token.is_a?(RubyToken::TkUNLESS) or
110
+ token.is_a?(RubyToken::TkUNTIL) or token.is_a?(RubyToken::TkWHILE) or
111
+ token.is_a?(RubyToken::TklBEGIN)
112
+ nesting_level += 1
113
+ end
114
+ end
115
+ end
116
+
117
+ if start_token and end_token
118
+ start_line, end_line = start_token.line_no - 1, end_token.line_no - 1
119
+ source = lines[start_line .. end_line]
120
+ start_offset = start_token.char_no
121
+ start_offset += 1 if start_token.is_a?(RubyToken::TkDO)
122
+ end_offset = -(source.last.length - end_token.char_no)
123
+ source.first.slice!(0 .. start_offset)
124
+ source.last.slice!(end_offset .. -1)
125
+
126
+ # Can't use .strip because newline at end of code might be important
127
+ # (Stuff would break when somebody does proc { ... #foo\n})
128
+ source.join.gsub(/^ | $/, "")
129
+ end
130
+ end
131
+
132
+ def source_descriptor p
133
+ if md = /^#<Proc:0x[0-9A-Fa-f]+@(.+):(\d+)>$/.match(p.inspect)
134
+ filename, line = md.captures
135
+ return filename, line.to_i
136
+ end
137
+ end
138
+
139
+
140
+ module_function :handle, :get_lines, :source_descriptor
141
+ end
@@ -0,0 +1,29 @@
1
+ module StringHelper
2
+ extend self
3
+
4
+ def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
5
+ lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
6
+ end
7
+
8
+ def underscore(camel_cased_word)
9
+ word = camel_cased_word.to_s.dup
10
+ word.gsub!(/::/, '/')
11
+ word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
12
+ word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
13
+ word.tr!("-", "_")
14
+ word.downcase!
15
+ word
16
+ end
17
+ end
18
+
19
+ class ::String
20
+ def camelize
21
+ StringHelper.camelize(self)
22
+ end
23
+ end
24
+
25
+ class ::Symbol
26
+ def camelize
27
+ StringHelper.camelize(self)
28
+ end
29
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: appengine-mapreduce
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Paul Nikitochkin
@@ -49,6 +49,9 @@ files:
49
49
  - lib/appengine-mapreduce/boot.rb
50
50
  - lib/appengine-mapreduce/datastore_input_format.rb
51
51
  - lib/appengine-mapreduce/job.rb
52
+ - lib/appengine-mapreduce/mappable.rb
53
+ - lib/appengine-mapreduce/proc_source.rb
54
+ - lib/appengine-mapreduce/string_helpers.rb
52
55
  - target/appengine-mapreduce-jruby-0.0.1-SNAPSHOT.jar
53
56
  - target/dependency/aopalliance-1.0.jar
54
57
  - target/dependency/appengine-mapper-0.0.1-SNAPSHOT.jar