mongo-hadoop 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ require 'bson'
2
+ require 'mongo-hadoop/input'
3
+ require 'mongo-hadoop/output'
4
+ require 'mongo-hadoop/mapper'
5
+ require 'mongo-hadoop/reducer'
@@ -0,0 +1,29 @@
1
+ class BSONInput
2
+ include Enumerable
3
+
4
+ def initialize(stream=nil)
5
+ @stream = stream || $stdin
6
+ end
7
+
8
+ def read
9
+ begin
10
+ BSON.read_bson_document(@stream)
11
+ rescue NoMethodError
12
+ nil
13
+ end
14
+ end
15
+
16
+ def each
17
+ while(doc = read)
18
+ yield doc
19
+ end
20
+ end
21
+ end
22
+
23
+ class BSONKeyValueInput < BSONInput
24
+ def each
25
+ while(doc = read)
26
+ yield doc['_id'], doc
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,29 @@
1
+ class MongoHadoop
2
+ def self.map
3
+ input = BSONInput.new
4
+ output = BSONOutput.new
5
+
6
+ input.each do |doc|
7
+ mapped = yield doc
8
+ mapped = [mapped] unless mapped.respond_to?(:each)
9
+
10
+ mapped.each do |mapped|
11
+ output.write mapped if mapped
12
+ end
13
+ end
14
+ end
15
+
16
+ def self.kvmap
17
+ kvinput = BSONKeyValueInput.new
18
+ kvoutput = BSONKeyValueOutput.new
19
+
20
+ kvinput.each do |key, value|
21
+ mapped = yield key, value
22
+ mapped = [mapped] unless profiles.respond_to(:each)
23
+
24
+ mapped.each do |mapped|
25
+ kvoutput.write mapped if mapped
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,22 @@
1
+ class BSONOutput
2
+ def initialize(stream=nil)
3
+ @stream = stream || $stdout
4
+ end
5
+
6
+ def write(doc)
7
+ bson_doc = BSON.serialize(doc)
8
+ @stream.write(bson_doc)
9
+ @stream.flush
10
+ end
11
+ end
12
+
13
+ class BSONKeyValueOutput < BSONOutput
14
+ def write(pair)
15
+ key, value = *pair
16
+
17
+ doc = value.is_a?(Hash) ? value : { :value => value }
18
+
19
+ doc['_id'] = key
20
+ super(doc)
21
+ end
22
+ end
@@ -0,0 +1,28 @@
1
+ class MongoHadoop
2
+ def self.reduce
3
+ input = BSONInput.new
4
+ output = BSONOutput.new
5
+
6
+ grouped = input.group_by { |doc| doc['_id'] }
7
+
8
+ grouped.each do |key, values|
9
+ output.write yield key, values
10
+ end
11
+ end
12
+
13
+ def self.kvreduce
14
+ kvinput = BSONKeyValueInput.new
15
+ kvoutput = BSONKeyValueOutput.new
16
+
17
+ grouped = kvinput.inject(Hash.new) do |hash, pair|
18
+ key, value = *pair
19
+ hash[key] ||= []
20
+ hash[key] << value
21
+ hash
22
+ end
23
+
24
+ grouped.each do |key, values|
25
+ kvoutput.write yield key, values
26
+ end
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mongo-hadoop
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tyler Brock
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bson
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Ruby MongoDB Hadoop streaming support
31
+ email: tyler.brock@gmail.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/mongo-hadoop.rb
37
+ - lib/mongo-hadoop/input.rb
38
+ - lib/mongo-hadoop/output.rb
39
+ - lib/mongo-hadoop/mapper.rb
40
+ - lib/mongo-hadoop/reducer.rb
41
+ homepage: http://github.com/mongodb/mongo-hadoop
42
+ licenses: []
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project:
61
+ rubygems_version: 1.8.24
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: MongoDB Hadoop streaming support
65
+ test_files: []