mrtoolkit 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/lib/mrtoolkit.rb +4 -4
- data/test/utest.rb +11 -12
- metadata +13 -7
data/VERSION.yml
CHANGED
data/lib/mrtoolkit.rb
CHANGED
@@ -779,10 +779,10 @@ class JobBase
|
|
779
779
|
# This gathers the declarations and stores in a stage record.
|
780
780
|
def add_stage
|
781
781
|
case
|
782
|
-
when @map_class.nil
|
783
|
-
when @reduce_class.nil
|
784
|
-
when @in_dirs.empty
|
785
|
-
when @out_dir.nil
|
782
|
+
when @map_class.nil? then raise "Map class not specified"
|
783
|
+
when @reduce_class.nil? then raise "Reduce class not specified"
|
784
|
+
when @in_dirs.empty? then raise "Indir not speficied"
|
785
|
+
when @out_dir.nil? then raise "Outdir not specified"
|
786
786
|
end
|
787
787
|
@stages << [@map_class, @map_args, @map_opts,
|
788
788
|
@reduce_class, @reduce_args, @reduce_opts,
|
data/test/utest.rb
CHANGED
@@ -15,7 +15,7 @@ class LogMap < MapBase
|
|
15
15
|
field :date
|
16
16
|
field :time
|
17
17
|
field :url
|
18
|
-
|
18
|
+
|
19
19
|
emit :date_time
|
20
20
|
emit :url
|
21
21
|
end
|
@@ -25,7 +25,7 @@ class LogMap < MapBase
|
|
25
25
|
output.url = input.url
|
26
26
|
output
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
end
|
30
30
|
|
31
31
|
class LogReduce < ReduceBase
|
@@ -58,7 +58,7 @@ class TestMRToolkit < Test::Unit::TestCase
|
|
58
58
|
def test_log
|
59
59
|
LogJob.run_command
|
60
60
|
out = File.read("test-out")
|
61
|
-
expected = "2008-10-01T10:30:00\t1.2.3.4\tx\n" +
|
61
|
+
expected = "2008-10-01T10:30:00\t1.2.3.4\tx\n" +
|
62
62
|
"2008-10-02T11:30:00\t1.2.3.5\tx\n"
|
63
63
|
assert_equal(expected, out)
|
64
64
|
end
|
@@ -72,7 +72,7 @@ end
|
|
72
72
|
class SumMap < MapBase
|
73
73
|
def declare
|
74
74
|
field :value
|
75
|
-
|
75
|
+
|
76
76
|
emit :count
|
77
77
|
emit :total
|
78
78
|
emit :sum_of_squares
|
@@ -85,7 +85,7 @@ class SumMap < MapBase
|
|
85
85
|
output.sum_of_squares = v * v
|
86
86
|
output
|
87
87
|
end
|
88
|
-
|
88
|
+
|
89
89
|
end
|
90
90
|
|
91
91
|
# This could be done with canned reducer
|
@@ -144,26 +144,25 @@ end
|
|
144
144
|
# Grops times into one-minute buckets
|
145
145
|
# Calculates counts for each bucket
|
146
146
|
|
147
|
-
require '
|
147
|
+
require 'time'
|
148
148
|
|
149
149
|
class MinMap < MapBase
|
150
150
|
def declare
|
151
151
|
field :dt
|
152
152
|
field :tm
|
153
|
-
|
153
|
+
|
154
154
|
emit :minute
|
155
155
|
emit :count
|
156
156
|
end
|
157
157
|
|
158
158
|
def process(input, output)
|
159
|
-
|
160
|
-
t = Time.local(*res)
|
159
|
+
t = Time.parse(input.dt + " " + input.tm)
|
161
160
|
min = t.min + 60 * (t.hour + 24 * t.wday)
|
162
161
|
output.count = 1
|
163
162
|
output.minute = min
|
164
163
|
output
|
165
164
|
end
|
166
|
-
|
165
|
+
|
167
166
|
end
|
168
167
|
|
169
168
|
class MyMinReduce < ReduceBase
|
@@ -211,7 +210,7 @@ class TestMRToolkit < Test::Unit::TestCase
|
|
211
210
|
end
|
212
211
|
|
213
212
|
#################################
|
214
|
-
#
|
213
|
+
#
|
215
214
|
# This is the previous one, but with a standard reducer.
|
216
215
|
|
217
216
|
class CollectJob < JobBase
|
@@ -237,7 +236,7 @@ class TestMRToolkit < Test::Unit::TestCase
|
|
237
236
|
end
|
238
237
|
|
239
238
|
#################################
|
240
|
-
#
|
239
|
+
#
|
241
240
|
# This is the previous one, but with adifferent
|
242
241
|
# standard reducer. This produces the same output
|
243
242
|
# as the custom reducer.
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: mrtoolkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.1.
|
5
|
+
version: 0.1.5
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- cchayden
|
@@ -12,8 +12,7 @@ autorequire:
|
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
14
|
|
15
|
-
date: 2011-
|
16
|
-
default_executable:
|
15
|
+
date: 2011-07-06 00:00:00 Z
|
17
16
|
dependencies: []
|
18
17
|
|
19
18
|
description:
|
@@ -57,7 +56,6 @@ files:
|
|
57
56
|
- test/test-in/test8-in
|
58
57
|
- test/test-in/test9-in
|
59
58
|
- test/utest.rb
|
60
|
-
has_rdoc: true
|
61
59
|
homepage: http://github.com/jashmenn/mrtoolkit
|
62
60
|
licenses: []
|
63
61
|
|
@@ -81,9 +79,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
79
|
requirements: []
|
82
80
|
|
83
81
|
rubyforge_project:
|
84
|
-
rubygems_version: 1.
|
82
|
+
rubygems_version: 1.8.5
|
85
83
|
signing_key:
|
86
84
|
specification_version: 3
|
87
85
|
summary: Simplify the creation of Hadoop Map/Reduce jobs
|
88
|
-
test_files:
|
89
|
-
|
86
|
+
test_files:
|
87
|
+
- examples/hour.rb
|
88
|
+
- examples/import.rb
|
89
|
+
- examples/ip-result.rb
|
90
|
+
- examples/ip-size.rb
|
91
|
+
- examples/ip-ua.rb
|
92
|
+
- examples/ip.rb
|
93
|
+
- examples/section.rb
|
94
|
+
- examples/top-file.rb
|
95
|
+
- test/utest.rb
|