appengine-mapreduce 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -1,7 +1,10 @@
1
1
  $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
2
 
3
+ require 'appengine-mapreduce/job'
4
+ require 'appengine-mapreduce/mappable'
5
+
3
6
  module AppEngine
4
7
  module MapReduce
5
- VERSION = '0.0.1'
8
+ VERSION = '0.0.3'
6
9
  end
7
10
  end
@@ -1,12 +1,19 @@
1
1
  require 'appengine-apis/labs/taskqueue'
2
+ require File.join(File.dirname(__FILE__), 'proc_source')
3
+ require File.join(File.dirname(__FILE__), 'string_helpers')
2
4
 
3
5
  module AppEngine
4
6
  module MapReduce
7
+ JRUBY_PROXY_MAPPER_CLASS = "com.jetthoughts.appengine.tools.mapreduce.JRubyMapper"
8
+
5
9
  BLOBSTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.BlobstoreInputFormat"
6
10
  DATASTORE_INPUT_FORMAT = "com.google.appengine.tools.mapreduce.DatastoreInputFormat"
7
11
 
12
+ OUTPUT_ENTITY_KIND_KEY = "mapreduce.mapper.outputformat.datastoreoutputformat.entitykind"
13
+ MAPREDUCE_SCRIPT_KEY = "mapreduce.ruby.script"
14
+
8
15
  CALLBACK_KEY = "mapreduce.appengine.donecallback.url"
9
-
16
+
10
17
  import com.google.appengine.tools.mapreduce.ConfigurationXmlUtil
11
18
  import com.google.appengine.tools.mapreduce.DatastoreInputFormat
12
19
 
@@ -16,15 +23,14 @@ module AppEngine
16
23
  attr_accessor :map, :reduce, :input_kind, :output_kind, :properties, :input_class
17
24
 
18
25
  def initialize options = {}
19
- puts "Job Options: #{options.inspect}"
20
-
21
26
  @properties = {}
22
27
 
23
28
  %w(map reduce input_class input_kind output_kind).each do |attr_n|
24
- self.send("#{attr_n}=", options.delete(attr_n)) if options[attr_n]
29
+ key = (attr_n.to_sym rescue attr_n) || attr_n
30
+ self.send("#{attr_n}=", options.delete(key)) if options[key]
25
31
  end
26
32
 
27
- @properties["mapreduce.map.class"] = "com.jetthoughts.appengine.tools.mapreduce.JRubyMapper"
33
+ @properties["mapreduce.map.class"] = JRUBY_PROXY_MAPPER_CLASS
28
34
 
29
35
  end
30
36
 
@@ -34,16 +40,30 @@ module AppEngine
34
40
  queue.add({:method => 'POST',
35
41
  :params => {'configuration' => conf_as_string},
36
42
  :url => "/mapreduce/start"})
37
-
38
- puts "Conf: #{conf_as_string}"
39
43
  end
40
44
 
41
45
  def input_kind= aKind
42
46
  @properties[DatastoreInputFormat::ENTITY_KIND_KEY] = aKind
43
47
  end
44
48
 
49
+ def output_kind= aKind
50
+ @properties[OUTPUT_ENTITY_KIND_KEY] = aKind
51
+ end
52
+
53
+ def map &block
54
+ if block
55
+ self.map = block
56
+ end
57
+
58
+ @properties[MAPREDUCE_SCRIPT_KEY]
59
+ end
60
+
45
61
  def map= aMap
46
- @properties["mapreduce.ruby.script"] = aMap
62
+ if aMap.is_a?(Proc)
63
+ aMap = serialize_block aMap
64
+ end
65
+
66
+ @properties[MAPREDUCE_SCRIPT_KEY] = aMap
47
67
  end
48
68
 
49
69
  def input_class= klass
@@ -51,7 +71,15 @@ module AppEngine
51
71
  end
52
72
 
53
73
  def use_blobstore_input_class
54
- input_class = BLOBSTORE_INPUT_FORMAT
74
+ self.input_class = BLOBSTORE_INPUT_FORMAT
75
+ end
76
+
77
+ def self.create_job_for mapper_name
78
+ map = <<MAP
79
+ require 'lib/mappers/#{mapper_name.to_s}_mapper'
80
+ include #{mapper_name.to_s.camelize}Mapper
81
+ MAP
82
+ Job.new(:map => map)
55
83
  end
56
84
 
57
85
  protected
@@ -60,6 +88,14 @@ module AppEngine
60
88
  @properties.each {|k,v| @conf.set(k, v)}
61
89
  ConfigurationXmlUtil.convertConfigurationToXml(@conf)
62
90
  end
91
+
92
+ def serialize_block block
93
+ <<MAP
94
+ self.class.class_eval do
95
+ define_method(:map, lambda{ #{ProcSource.handle(block)} })
96
+ end
97
+ MAP
98
+ end
63
99
  end
64
100
  end
65
101
  end
@@ -0,0 +1,16 @@
1
+ module AppEngine
2
+ module Mappable
3
+ def self.included(base)
4
+ #base.property :name, String
5
+ base.extend ClassMethods
6
+ end
7
+
8
+ module ClassMethods
9
+ def async_map &block
10
+ job = AppEngine::MapReduce::Job.new(:input_kind => name || storage_names[:default])
11
+ job.map = block
12
+ job.run
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,141 @@
1
+ require 'stringio'
2
+ require 'irb/ruby-lex'
3
+
4
+ SCRIPT_LINES__ = {} unless defined? SCRIPT_LINES__
5
+
6
+ module ProcSource #:nodoc:
7
+ def get_lines(filename, start_line = 0)
8
+ return nil unless filename
9
+
10
+ case filename
11
+ # special "(irb)" descriptor?
12
+ when "(irb)"
13
+ IRB.conf[:MAIN_CONTEXT].io.line(start_line .. -1)
14
+ # special "(eval...)" descriptor?
15
+ when /^\(eval.+\)$/
16
+ EVAL_LINES__[filename][start_line .. -1]
17
+ # regular file
18
+ else
19
+ # Ruby already parsed this file? (see disclaimer above)
20
+ if lines = SCRIPT_LINES__[filename]
21
+ lines[(start_line - 1) .. -1]
22
+ # If the file exists we're going to try reading it in
23
+ elsif File.exist?(filename)
24
+ begin
25
+ File.readlines(filename)[(start_line - 1) .. -1]
26
+ rescue
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ def handle(proc)
34
+ filename, line = source_descriptor proc
35
+ lines = get_lines(filename, line) || []
36
+
37
+ lexer = RubyLex.new
38
+ lexer.set_input(StringIO.new(lines.join))
39
+
40
+ state = :before_constructor
41
+ nesting_level = 1
42
+ start_token, end_token = nil, nil
43
+ found = false
44
+ while token = lexer.token
45
+ # we've not yet found any proc-constructor -- we'll try to find one.
46
+ if [:before_constructor, :check_more].include?(state)
47
+ # checking more and newline? -> done
48
+ if token.is_a?(RubyToken::TkNL) and state == :check_more
49
+ state = :done
50
+ break
51
+ end
52
+ # token is Proc?
53
+ if token.is_a?(RubyToken::TkCONSTANT) and
54
+ token.instance_variable_get(:@name) == "Proc"
55
+ # method call?
56
+ if lexer.token.is_a?(RubyToken::TkDOT)
57
+ method = lexer.token
58
+ # constructor?
59
+ if method.is_a?(RubyToken::TkIDENTIFIER) and
60
+ method.instance_variable_get(:@name) == "new"
61
+ unless state == :check_more
62
+ # okay, code will follow soon.
63
+ state = :before_code
64
+ else
65
+ # multiple procs on one line
66
+ return
67
+ end
68
+ end
69
+ end
70
+ # token is lambda or proc call?
71
+ elsif token.is_a?(RubyToken::TkIDENTIFIER) and
72
+ %w{proc lambda}.include?(token.instance_variable_get(:@name))
73
+ unless state == :check_more
74
+ # okay, code will follow soon.
75
+ state = :before_code
76
+ else
77
+ # multiple procs on one line
78
+ return
79
+ end
80
+ elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
81
+ # found the code start, update state and remember current token
82
+ state = :in_code
83
+ start_token = token
84
+ end
85
+
86
+ # we're waiting for the code start to appear.
87
+ elsif state == :before_code
88
+ if token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO)
89
+ # found the code start, update state and remember current token
90
+ state = :in_code
91
+ start_token = token
92
+ end
93
+
94
+ # okay, we're inside code
95
+ elsif state == :in_code
96
+ if token.is_a?(RubyToken::TkRBRACE) or token.is_a?(RubyToken::TkEND)
97
+ nesting_level -= 1
98
+ if nesting_level == 0
99
+ # we're done!
100
+ end_token = token
101
+ # parse another time to check if there are multiple procs on one line
102
+ # we can't handle that case correctly so we return no source code at all
103
+ state = :check_more
104
+ end
105
+ elsif token.is_a?(RubyToken::TkfLBRACE) or token.is_a?(RubyToken::TkDO) or
106
+ token.is_a?(RubyToken::TkBEGIN) or token.is_a?(RubyToken::TkCASE) or
107
+ token.is_a?(RubyToken::TkCLASS) or token.is_a?(RubyToken::TkDEF) or
108
+ token.is_a?(RubyToken::TkFOR) or token.is_a?(RubyToken::TkIF) or
109
+ token.is_a?(RubyToken::TkMODULE) or token.is_a?(RubyToken::TkUNLESS) or
110
+ token.is_a?(RubyToken::TkUNTIL) or token.is_a?(RubyToken::TkWHILE) or
111
+ token.is_a?(RubyToken::TklBEGIN)
112
+ nesting_level += 1
113
+ end
114
+ end
115
+ end
116
+
117
+ if start_token and end_token
118
+ start_line, end_line = start_token.line_no - 1, end_token.line_no - 1
119
+ source = lines[start_line .. end_line]
120
+ start_offset = start_token.char_no
121
+ start_offset += 1 if start_token.is_a?(RubyToken::TkDO)
122
+ end_offset = -(source.last.length - end_token.char_no)
123
+ source.first.slice!(0 .. start_offset)
124
+ source.last.slice!(end_offset .. -1)
125
+
126
+ # Can't use .strip because newline at end of code might be important
127
+ # (Stuff would break when somebody does proc { ... #foo\n})
128
+ source.join.gsub(/^ | $/, "")
129
+ end
130
+ end
131
+
132
+ def source_descriptor p
133
+ if md = /^#<Proc:0x[0-9A-Fa-f]+@(.+):(\d+)>$/.match(p.inspect)
134
+ filename, line = md.captures
135
+ return filename, line.to_i
136
+ end
137
+ end
138
+
139
+
140
+ module_function :handle, :get_lines, :source_descriptor
141
+ end
@@ -0,0 +1,29 @@
1
+ module StringHelper
2
+ extend self
3
+
4
+ def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
5
+ lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
6
+ end
7
+
8
+ def underscore(camel_cased_word)
9
+ word = camel_cased_word.to_s.dup
10
+ word.gsub!(/::/, '/')
11
+ word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
12
+ word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
13
+ word.tr!("-", "_")
14
+ word.downcase!
15
+ word
16
+ end
17
+ end
18
+
19
+ class ::String
20
+ def camelize
21
+ StringHelper.camelize(self)
22
+ end
23
+ end
24
+
25
+ class ::Symbol
26
+ def camelize
27
+ StringHelper.camelize(self)
28
+ end
29
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: appengine-mapreduce
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Paul Nikitochkin
@@ -49,6 +49,9 @@ files:
49
49
  - lib/appengine-mapreduce/boot.rb
50
50
  - lib/appengine-mapreduce/datastore_input_format.rb
51
51
  - lib/appengine-mapreduce/job.rb
52
+ - lib/appengine-mapreduce/mappable.rb
53
+ - lib/appengine-mapreduce/proc_source.rb
54
+ - lib/appengine-mapreduce/string_helpers.rb
52
55
  - target/appengine-mapreduce-jruby-0.0.1-SNAPSHOT.jar
53
56
  - target/dependency/aopalliance-1.0.jar
54
57
  - target/dependency/appengine-mapper-0.0.1-SNAPSHOT.jar