mongo-hadoop 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mongo-hadoop.rb +5 -0
- data/lib/mongo-hadoop/input.rb +29 -0
- data/lib/mongo-hadoop/mapper.rb +29 -0
- data/lib/mongo-hadoop/output.rb +22 -0
- data/lib/mongo-hadoop/reducer.rb +28 -0
- metadata +65 -0
data/lib/mongo-hadoop.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
class BSONInput
|
2
|
+
include Enumerable
|
3
|
+
|
4
|
+
def initialize(stream=nil)
|
5
|
+
@stream = stream || $stdin
|
6
|
+
end
|
7
|
+
|
8
|
+
def read
|
9
|
+
begin
|
10
|
+
BSON.read_bson_document(@stream)
|
11
|
+
rescue NoMethodError
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def each
|
17
|
+
while(doc = read)
|
18
|
+
yield doc
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class BSONKeyValueInput < BSONInput
|
24
|
+
def each
|
25
|
+
while(doc = read)
|
26
|
+
yield doc['_id'], doc
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class MongoHadoop
|
2
|
+
def self.map
|
3
|
+
input = BSONInput.new
|
4
|
+
output = BSONOutput.new
|
5
|
+
|
6
|
+
input.each do |doc|
|
7
|
+
mapped = yield doc
|
8
|
+
mapped = [mapped] unless mapped.respond_to?(:each)
|
9
|
+
|
10
|
+
mapped.each do |mapped|
|
11
|
+
output.write mapped if mapped
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.kvmap
|
17
|
+
kvinput = BSONKeyValueInput.new
|
18
|
+
kvoutput = BSONKeyValueOutput.new
|
19
|
+
|
20
|
+
kvinput.each do |key, value|
|
21
|
+
mapped = yield key, value
|
22
|
+
mapped = [mapped] unless profiles.respond_to(:each)
|
23
|
+
|
24
|
+
mapped.each do |mapped|
|
25
|
+
kvoutput.write mapped if mapped
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class BSONOutput
|
2
|
+
def initialize(stream=nil)
|
3
|
+
@stream = stream || $stdout
|
4
|
+
end
|
5
|
+
|
6
|
+
def write(doc)
|
7
|
+
bson_doc = BSON.serialize(doc)
|
8
|
+
@stream.write(bson_doc)
|
9
|
+
@stream.flush
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class BSONKeyValueOutput < BSONOutput
|
14
|
+
def write(pair)
|
15
|
+
key, value = *pair
|
16
|
+
|
17
|
+
doc = value.is_a?(Hash) ? value : { :value => value }
|
18
|
+
|
19
|
+
doc['_id'] = key
|
20
|
+
super(doc)
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class MongoHadoop
|
2
|
+
def self.reduce
|
3
|
+
input = BSONInput.new
|
4
|
+
output = BSONOutput.new
|
5
|
+
|
6
|
+
grouped = input.group_by { |doc| doc['_id'] }
|
7
|
+
|
8
|
+
grouped.each do |key, values|
|
9
|
+
output.write yield key, values
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.kvreduce
|
14
|
+
kvinput = BSONKeyValueInput.new
|
15
|
+
kvoutput = BSONKeyValueOutput.new
|
16
|
+
|
17
|
+
grouped = kvinput.inject(Hash.new) do |hash, pair|
|
18
|
+
key, value = *pair
|
19
|
+
hash[key] ||= []
|
20
|
+
hash[key] << value
|
21
|
+
hash
|
22
|
+
end
|
23
|
+
|
24
|
+
grouped.each do |key, values|
|
25
|
+
kvoutput.write yield key, values
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mongo-hadoop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tyler Brock
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bson
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Ruby MongoDB Hadoop streaming support
|
31
|
+
email: tyler.brock@gmail.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/mongo-hadoop.rb
|
37
|
+
- lib/mongo-hadoop/input.rb
|
38
|
+
- lib/mongo-hadoop/output.rb
|
39
|
+
- lib/mongo-hadoop/mapper.rb
|
40
|
+
- lib/mongo-hadoop/reducer.rb
|
41
|
+
homepage: http://github.com/mongodb/mongo-hadoop
|
42
|
+
licenses: []
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
none: false
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.8.24
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: MongoDB Hadoop streaming support
|
65
|
+
test_files: []
|