mongo-hadoop 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ require 'bson'
2
+ require 'mongo-hadoop/input'
3
+ require 'mongo-hadoop/output'
4
+ require 'mongo-hadoop/mapper'
5
+ require 'mongo-hadoop/reducer'
@@ -0,0 +1,29 @@
1
+ class BSONInput
2
+ include Enumerable
3
+
4
+ def initialize(stream=nil)
5
+ @stream = stream || $stdin
6
+ end
7
+
8
+ def read
9
+ begin
10
+ BSON.read_bson_document(@stream)
11
+ rescue NoMethodError
12
+ nil
13
+ end
14
+ end
15
+
16
+ def each
17
+ while(doc = read)
18
+ yield doc
19
+ end
20
+ end
21
+ end
22
+
23
+ class BSONKeyValueInput < BSONInput
24
+ def each
25
+ while(doc = read)
26
+ yield doc['_id'], doc
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,29 @@
1
+ class MongoHadoop
2
+ def self.map
3
+ input = BSONInput.new
4
+ output = BSONOutput.new
5
+
6
+ input.each do |doc|
7
+ mapped = yield doc
8
+ mapped = [mapped] unless mapped.respond_to?(:each)
9
+
10
+ mapped.each do |mapped|
11
+ output.write mapped if mapped
12
+ end
13
+ end
14
+ end
15
+
16
+ def self.kvmap
17
+ kvinput = BSONKeyValueInput.new
18
+ kvoutput = BSONKeyValueOutput.new
19
+
20
+ kvinput.each do |key, value|
21
+ mapped = yield key, value
22
+ mapped = [mapped] unless profiles.respond_to(:each)
23
+
24
+ mapped.each do |mapped|
25
+ kvoutput.write mapped if mapped
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,22 @@
1
+ class BSONOutput
2
+ def initialize(stream=nil)
3
+ @stream = stream || $stdout
4
+ end
5
+
6
+ def write(doc)
7
+ bson_doc = BSON.serialize(doc)
8
+ @stream.write(bson_doc)
9
+ @stream.flush
10
+ end
11
+ end
12
+
13
+ class BSONKeyValueOutput < BSONOutput
14
+ def write(pair)
15
+ key, value = *pair
16
+
17
+ doc = value.is_a?(Hash) ? value : { :value => value }
18
+
19
+ doc['_id'] = key
20
+ super(doc)
21
+ end
22
+ end
@@ -0,0 +1,28 @@
1
+ class MongoHadoop
2
+ def self.reduce
3
+ input = BSONInput.new
4
+ output = BSONOutput.new
5
+
6
+ grouped = input.group_by { |doc| doc['_id'] }
7
+
8
+ grouped.each do |key, values|
9
+ output.write yield key, values
10
+ end
11
+ end
12
+
13
+ def self.kvreduce
14
+ kvinput = BSONKeyValueInput.new
15
+ kvoutput = BSONKeyValueOutput.new
16
+
17
+ grouped = kvinput.inject(Hash.new) do |hash, pair|
18
+ key, value = *pair
19
+ hash[key] ||= []
20
+ hash[key] << value
21
+ hash
22
+ end
23
+
24
+ grouped.each do |key, values|
25
+ kvoutput.write yield key, values
26
+ end
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mongo-hadoop
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tyler Brock
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bson
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Ruby MongoDB Hadoop streaming support
31
+ email: tyler.brock@gmail.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/mongo-hadoop.rb
37
+ - lib/mongo-hadoop/input.rb
38
+ - lib/mongo-hadoop/output.rb
39
+ - lib/mongo-hadoop/mapper.rb
40
+ - lib/mongo-hadoop/reducer.rb
41
+ homepage: http://github.com/mongodb/mongo-hadoop
42
+ licenses: []
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project:
61
+ rubygems_version: 1.8.24
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: MongoDB Hadoop streaming support
65
+ test_files: []