mongo-hadoop 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mongo-hadoop.rb +5 -0
- data/lib/mongo-hadoop/input.rb +29 -0
- data/lib/mongo-hadoop/mapper.rb +29 -0
- data/lib/mongo-hadoop/output.rb +22 -0
- data/lib/mongo-hadoop/reducer.rb +28 -0
- metadata +65 -0
data/lib/mongo-hadoop.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
class BSONInput
|
2
|
+
include Enumerable
|
3
|
+
|
4
|
+
def initialize(stream=nil)
|
5
|
+
@stream = stream || $stdin
|
6
|
+
end
|
7
|
+
|
8
|
+
def read
|
9
|
+
begin
|
10
|
+
BSON.read_bson_document(@stream)
|
11
|
+
rescue NoMethodError
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def each
|
17
|
+
while(doc = read)
|
18
|
+
yield doc
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class BSONKeyValueInput < BSONInput
|
24
|
+
def each
|
25
|
+
while(doc = read)
|
26
|
+
yield doc['_id'], doc
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class MongoHadoop
|
2
|
+
def self.map
|
3
|
+
input = BSONInput.new
|
4
|
+
output = BSONOutput.new
|
5
|
+
|
6
|
+
input.each do |doc|
|
7
|
+
mapped = yield doc
|
8
|
+
mapped = [mapped] unless mapped.respond_to?(:each)
|
9
|
+
|
10
|
+
mapped.each do |mapped|
|
11
|
+
output.write mapped if mapped
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.kvmap
|
17
|
+
kvinput = BSONKeyValueInput.new
|
18
|
+
kvoutput = BSONKeyValueOutput.new
|
19
|
+
|
20
|
+
kvinput.each do |key, value|
|
21
|
+
mapped = yield key, value
|
22
|
+
mapped = [mapped] unless profiles.respond_to(:each)
|
23
|
+
|
24
|
+
mapped.each do |mapped|
|
25
|
+
kvoutput.write mapped if mapped
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class BSONOutput
|
2
|
+
def initialize(stream=nil)
|
3
|
+
@stream = stream || $stdout
|
4
|
+
end
|
5
|
+
|
6
|
+
def write(doc)
|
7
|
+
bson_doc = BSON.serialize(doc)
|
8
|
+
@stream.write(bson_doc)
|
9
|
+
@stream.flush
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class BSONKeyValueOutput < BSONOutput
|
14
|
+
def write(pair)
|
15
|
+
key, value = *pair
|
16
|
+
|
17
|
+
doc = value.is_a?(Hash) ? value : { :value => value }
|
18
|
+
|
19
|
+
doc['_id'] = key
|
20
|
+
super(doc)
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class MongoHadoop
|
2
|
+
def self.reduce
|
3
|
+
input = BSONInput.new
|
4
|
+
output = BSONOutput.new
|
5
|
+
|
6
|
+
grouped = input.group_by { |doc| doc['_id'] }
|
7
|
+
|
8
|
+
grouped.each do |key, values|
|
9
|
+
output.write yield key, values
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.kvreduce
|
14
|
+
kvinput = BSONKeyValueInput.new
|
15
|
+
kvoutput = BSONKeyValueOutput.new
|
16
|
+
|
17
|
+
grouped = kvinput.inject(Hash.new) do |hash, pair|
|
18
|
+
key, value = *pair
|
19
|
+
hash[key] ||= []
|
20
|
+
hash[key] << value
|
21
|
+
hash
|
22
|
+
end
|
23
|
+
|
24
|
+
grouped.each do |key, values|
|
25
|
+
kvoutput.write yield key, values
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mongo-hadoop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tyler Brock
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bson
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Ruby MongoDB Hadoop streaming support
|
31
|
+
email: tyler.brock@gmail.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/mongo-hadoop.rb
|
37
|
+
- lib/mongo-hadoop/input.rb
|
38
|
+
- lib/mongo-hadoop/output.rb
|
39
|
+
- lib/mongo-hadoop/mapper.rb
|
40
|
+
- lib/mongo-hadoop/reducer.rb
|
41
|
+
homepage: http://github.com/mongodb/mongo-hadoop
|
42
|
+
licenses: []
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
none: false
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.8.24
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: MongoDB Hadoop streaming support
|
65
|
+
test_files: []
|