jmapreduce 0.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,7 +12,33 @@ JMapReduce.job 'Count' do
12
12
  reduce do |key, values|
13
13
  sum = 0
14
14
  values.each {|v| sum += v }
15
- emit(key, sum)
15
+ emit(key, {'sum' => sum})
16
16
  end
17
17
  end
18
18
 
19
+ JMapReduce.job "Histogram" do
20
+ setup do
21
+ RANGES = [0..1, 2..3, 4..5, 6..10, 11..20, 21..30, 31..40, 41..50, 51..100, 101..200, 201..300, 301..10_000, 10_001..99_999]
22
+ end
23
+
24
+ map do |word, count|
25
+ range = RANGES.find {|range| range.include?(count['sum']) }
26
+ emit("#{range.first.to_s.rjust(5,'0')}-#{range.last.to_s.rjust(5,'0')}", 1)
27
+ end
28
+
29
+ reduce do |range, counts|
30
+ total = counts.inject(0) {|sum,count| sum+count }
31
+ emit(range, '|'*(total/20))
32
+ end
33
+ end
34
+
35
+ # this job is just a pass though which takes advantage of the map/reduce shuffle to get ordered keys
36
+ JMapReduce.job "Sort" do
37
+ reduce_tasks 1
38
+ end
39
+
40
+ __END__
41
+
42
+ To run:
43
+
44
+ ./bin/jmapreduce examples/wordcount.rb examples/alice.txt /tmp/output
Binary file
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jmapreduce
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- version: "0.3"
9
+ - 1
10
+ version: 0.3.1
10
11
  platform: ruby
11
12
  authors:
12
13
  - Abhinay Mehta