RubyGems - trafficbroker-mandy - Versions diffs - 0.1.0 - Mend

trafficbroker-mandy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/Rakefile ADDED Viewed

@@ -0,0 +1,11 @@
+require 'rubygems'
+require "rake"
+require File.expand_path(File.join(File.dirname(__FILE__), 'lib', 'mandy'))
+require 'spec/rake/spectask'
+task :default => :spec
+Spec::Rake::SpecTask.new(:spec) do |t|
+  t.spec_files = FileList['spec/lib/**/*_spec.rb']
+  t.spec_opts = %w{-f s -c -L mtime}
+end

data/bin/mandy-hadoop ADDED Viewed

@@ -0,0 +1,20 @@
+#!/usr/bin/env ruby
+def absolute_path(path)
+  path =~ /^\// ? path : File.join(Dir.pwd, path)
+end
+file   = absolute_path(ARGV[0])
+input  = ARGV[1]
+output = ARGV[2]
+config = ARGV[3]
+require file
+`$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar \
+                -additionalconfspec "#{config}" \
+                -input "#{input}"  \
+                -mapper "mandy-map #{file}"  \
+                -reducer "mandy-reduce #{file}"  \
+                -file "#{file}" \
+                -output "#{output}"`

data/bin/mandy-local ADDED Viewed

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+def absolute_path(path)
+  path =~ /^\// ? path : File.join(Dir.pwd, path)
+end
+file   = absolute_path(ARGV[0])
+input  = absolute_path(ARGV[1])
+output = absolute_path(ARGV[2])
+`cat #{input} | mandy-map #{file} | sort | mandy-reduce #{file} > #{output}`

data/bin/mandy-map ADDED Viewed

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+def absolute_path(path)
+  path =~ /^\// ? path : File.join(Dir.pwd, path)
+end
+file   = absolute_path(ARGV[0])
+require file
+Mandy::Job.default.run_map

data/bin/mandy-put ADDED Viewed

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+def absolute_path(path)
+  path =~ /^\// ? path : File.join(Dir.pwd, path)
+end
+source = absolute_path(ARGV[0])
+dest = ARGV[1]
+config = absolute_path(ARGV[2])
+`$HADOOP_HOME/bin/hadoop fs -conf #{config} -copyFromLocal #{source} #{dest}`

data/bin/mandy-reduce ADDED Viewed

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+def absolute_path(path)
+  path =~ /^\// ? path : File.join(Dir.pwd, path)
+end
+file   = absolute_path(ARGV[0])
+require file
+Mandy::Job.default.run_reduce

data/lib/array_serializer.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Mandy
+  class ArraySerializer
+    SEPERATOR = '|' unless defined?(SEPERATOR)
+    def initialize(items)
+      @items = items || []
+    end
+    def to_s
+      @items.join(SEPERATOR)
+    end
+    def self.from_s(str)
+      str.split(SEPERATOR)
+    end
+    def self.tuples_from_s(str)
+      from_s(str).map {|s| Tuple.from_s(s) }
+    end
+  end
+end

data/lib/dsl.rb ADDED Viewed

@@ -0,0 +1,16 @@
+module Mandy
+  module DSL
+    def self.included(klass)
+      Mandy::Job.default = Mandy::Job.new('Untitled Job')
+    end
+    def map(&blk)
+      Mandy::Job.default.map(&blk)
+    end
+    def reduce(&blk)
+      Mandy::Job.default.reduce(&blk)
+    end
+  end
+end

data/lib/job.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module Mandy
+  class Job
+    class << self
+      attr_accessor :default
+    end
+    def initialize(name, &blk)
+      @name = name
+      instance_eval(&blk) if blk
+    end
+    def map(&blk)
+      @mapper_class = Mandy::Mapper.compile(&blk)
+    end
+    def reduce(&blk)
+      @reducer_class = Mandy::Reducer.compile(&blk)
+    end
+    def run_map(input=STDIN, output=STDOUT, &blk)
+      mapper = @mapper_class.new(input, output)
+      yield(mapper) if blk
+      mapper.execute
+    end
+    def run_reduce(input=STDIN, output=STDOUT, &blk)
+      reducer = @reducer_class.new(input, output)
+      yield(reducer) if blk
+      reducer.execute
+    end
+  end
+end

data/lib/mandy.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ %w(tuple array_serializer mapper reducer dsl job test_runner).each {\|file\| require File.join(File.dirname(__FILE__), file) }

data/lib/mapper.rb ADDED Viewed

@@ -0,0 +1,40 @@
+module Mandy
+  class Mapper
+    KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
+    def initialize(input=STDIN, output=STDOUT)
+      @input, @output = input, output
+    end
+    def self.compile(&blk)
+      Class.new(Mandy::Mapper) do
+        self.class_eval do
+          define_method(:mapper, blk) if blk
+        end
+      end
+    end
+    def execute
+      @input.each_line do |line|
+         key, value = line.split(KEY_VALUE_SEPERATOR)
+         key, value = nil, key if value.nil?
+         value.chomp!
+         mapper(key, value)
+      end
+    end
+    def emit(key, value=nil)
+      key = 'nil' if key.nil?
+      @output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
+    end
+    private
+    def mapper(key,value)
+      # default map is simply a pass-through
+      emit(key, value)
+    end
+  end
+end

data/lib/reducer.rb ADDED Viewed

@@ -0,0 +1,44 @@
+module Mandy
+  class Reducer
+    KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
+    def initialize(input=STDIN, output=STDOUT)
+      @input, @output = input, output
+    end
+    def self.compile(&blk)
+      Class.new(Mandy::Reducer) do
+        self.class_eval do
+          define_method(:reducer, blk) if blk
+        end
+      end
+    end
+    def execute
+      last_key, values = nil, []
+      @input.each_line do |line|
+         key, value = line.split(KEY_VALUE_SEPERATOR)
+         value.chomp!
+         last_key = key if last_key.nil?
+         if key != last_key
+           reducer(last_key, values)
+           last_key, values = key, []
+         end
+         values << value
+      end
+    end
+    def emit(key, value=nil)
+      key = 'nil' if key.nil?
+      @output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
+    end
+    private
+    def reducer(key,value)
+      # default reducer is simply a pass-through
+      emit(key, value)
+    end
+  end
+end

data/lib/test_runner.rb ADDED Viewed

@@ -0,0 +1,38 @@
+module Mandy
+  class TestRunner
+    def initialize(job=Mandy::Job.default)
+      @job = job
+    end
+    def map(input, output_stream=StringIO.new(''), &blk)
+      input = input_from_array(input) if input.is_a?(Array)
+      input_stream = StringIO.new(input.to_s)
+      @job.run_map(input_stream, output_stream, &blk)
+      output_stream.rewind
+      output_stream
+    end
+    def reduce(input, output_stream=StringIO.new(''), &blk)
+      input = input_from_hash(input) if input.is_a?(Hash)
+      input_stream = StringIO.new(input.to_s)
+      @job.run_reduce(input_stream, output_stream, &blk)
+      output_stream.rewind
+      output_stream
+    end
+    private
+    def input_from_array(input)
+      input.join("\n")
+    end
+    def input_from_hash(input)
+      output = []
+      input.each do |key, values|
+        output << "#{key}\t#{values}" and next unless values.is_a?(Array)
+        values.each { |value| output << "#{key}\t#{value}" }
+      end
+      input_from_array(output.sort)
+    end
+  end
+end

data/lib/tuple.rb ADDED Viewed

@@ -0,0 +1,24 @@
+module Mandy
+  class Tuple
+    SEPERATOR = ',' unless defined?(SEPERATOR)
+    attr_accessor :name, :value
+    def initialize(name, value)
+      @name, @value = name, value
+    end
+    def to_s
+      %(#{@name}#{SEPERATOR}#{@value})
+    end
+    def self.from_s(str)
+      new(*str.split(SEPERATOR))
+    end
+    def ==(other)
+      self.name == other.name && self.value == other.value
+    end
+  end
+end

data/readme.md ADDED Viewed

@@ -0,0 +1,9 @@
+Mandy - Simplified Hadoop distribution for Ruby code
+====================================================
+Mandy hides the differences and complexities between running map/reduce tasks locally or distributed or in test environments.
+It provides a simple DSL to define new jobs for distribution. See examples/word_count.rb for a very simple demo.
+Run the word count example locally with...
+    bin/mandy local examples/word_count.rb examples/alice.txt examples/output.txt

metadata ADDED Viewed

@@ -0,0 +1,72 @@
+--- !ruby/object:Gem::Specification
+name: trafficbroker-mandy
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Andy Kent
+- Paul Ingles
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2009-07-09 00:00:00 -07:00
+default_executable:
+dependencies: []
+description: Map/Reduce
+email: andy.kent@me.com
+executables:
+- mandy-hadoop
+- mandy-local
+- mandy-map
+- mandy-put
+- mandy-reduce
+extensions: []
+extra_rdoc_files: []
+files:
+- bin/mandy-hadoop
+- bin/mandy-local
+- bin/mandy-map
+- bin/mandy-put
+- bin/mandy-reduce
+- readme.md
+- Rakefile
+- lib/mandy.rb
+- lib/array_serializer.rb
+- lib/dsl.rb
+- lib/job.rb
+- lib/mapper.rb
+- lib/reducer.rb
+- lib/test_runner.rb
+- lib/tuple.rb
+has_rdoc: false
+homepage:
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project:
+rubygems_version: 1.2.0
+signing_key:
+specification_version: 2
+summary: Map/Reduce
+test_files: []