wukong 3.0.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/lib/wukong.rb +4 -2
- data/lib/wukong/dataflow.rb +5 -0
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -0
- data/lib/wukong/version.rb +1 -2
- data/lib/wukong/widget/reducers.rb +1 -0
- data/lib/wukong/widget/reducers/uniq.rb +90 -0
- metadata +10 -3
data/README.md
CHANGED
@@ -19,8 +19,8 @@ Here is a list of various other projects which you may also want to
|
|
19
19
|
peruse when trying to understand the full Wukong experience:
|
20
20
|
|
21
21
|
* <a href="http://github.com/infochimps-labs/wukong-hadoop">wukong-hadoop</a>: Run Wukong processors as mappers and reducers within the Hadoop framework. Model Hadoop jobs locally before you run them.
|
22
|
-
* <a href="http://github.com/infochimps-labs/wukong-storm>wukong-storm</a>: Run Wukong processors within the Storm framework. Model flows locally before you run them.
|
23
|
-
* <a href="http://github.com/infochimps-labs/wukong-load>wukong-load</a>: Load the output data from your local Wukong jobs and flows into a variety of different data stores.
|
22
|
+
* <a href="http://github.com/infochimps-labs/wukong-storm">wukong-storm</a>: Run Wukong processors within the Storm framework. Model flows locally before you run them.
|
23
|
+
* <a href="http://github.com/infochimps-labs/wukong-load">wukong-load</a>: Load the output data from your local Wukong jobs and flows into a variety of different data stores.
|
24
24
|
* <a href="http://github.com/infochimps-labs/wonderdog">wonderdog</a>: Connect Wukong processors running within Hadoop to Elasticsearch as either a source or sink for data.
|
25
25
|
* <a href="http://github.com/infochimps-labs/wukong-deploy">wukong-deploy</a>: Orchestrate Wukong and other wu-tools together to support an application running on the Infochimps Platform.
|
26
26
|
|
data/lib/wukong.rb
CHANGED
@@ -35,7 +35,7 @@ module Wukong
|
|
35
35
|
|
36
36
|
add_shortcut_method_for(:processor, ProcessorBuilder)
|
37
37
|
add_shortcut_method_for(:dataflow, DataflowBuilder)
|
38
|
-
|
38
|
+
|
39
39
|
end
|
40
40
|
|
41
41
|
# Alias module name for shorter namespaces
|
@@ -44,4 +44,6 @@ Wu = Wukong
|
|
44
44
|
require_relative 'wukong/widgets'
|
45
45
|
require_relative 'wukong/local'
|
46
46
|
|
47
|
-
|
47
|
+
module Wukong
|
48
|
+
BUILTINS = Set.new(Wukong.registry.show.keys)
|
49
|
+
end
|
data/lib/wukong/dataflow.rb
CHANGED
@@ -44,6 +44,13 @@ module Wukong
|
|
44
44
|
@driver ||= UnitTestDriver.new(processor, settings)
|
45
45
|
end
|
46
46
|
|
47
|
+
# No need to load commandline arguments when we are testing
|
48
|
+
# There are other mechanisms for passing them in, plus
|
49
|
+
# RSpec goes into an infinite loop if you load a spec file
|
50
|
+
# from within a spec file
|
51
|
+
def load_args
|
52
|
+
end
|
53
|
+
|
47
54
|
# Do nothing. This prevents control flow within the Ruby
|
48
55
|
# interpreter from staying within this runner, as it would
|
49
56
|
# ordinarly do for `wu-local`.
|
data/lib/wukong/version.rb
CHANGED
@@ -0,0 +1,90 @@
|
|
1
|
+
require_relative("accumulator")
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
class Processor
|
5
|
+
|
6
|
+
# A processor which emits only unique records from its input.
|
7
|
+
# It's intended to work just like `uniq`.
|
8
|
+
#
|
9
|
+
# @example Emit unique elements from the input (like `uniq`).
|
10
|
+
#
|
11
|
+
# $ uniq input
|
12
|
+
# apple
|
13
|
+
# banana
|
14
|
+
# pear
|
15
|
+
# $ cat input | wu-local uniq
|
16
|
+
# apple
|
17
|
+
# banana
|
18
|
+
# pear
|
19
|
+
#
|
20
|
+
# @example Emit unique elements from the input with counts (like `uniq -c`).
|
21
|
+
#
|
22
|
+
# $ uniq -c input
|
23
|
+
# 3 apple
|
24
|
+
# 2 banana
|
25
|
+
# 3 pear
|
26
|
+
# $ cat input | wu-local uniq --count --to=tsv
|
27
|
+
# apple 3
|
28
|
+
# banana 5
|
29
|
+
# pear 8
|
30
|
+
|
31
|
+
class Uniq < Accumulator
|
32
|
+
|
33
|
+
field :count, :boolean, doc: "Emit a count for each group of input records", default: false
|
34
|
+
|
35
|
+
description <<EOF
|
36
|
+
This processor uniq's its inputs.
|
37
|
+
|
38
|
+
$ uniq input
|
39
|
+
apple
|
40
|
+
banana
|
41
|
+
pear
|
42
|
+
$ cat input | wu-local uniq
|
43
|
+
apple
|
44
|
+
banana
|
45
|
+
pear
|
46
|
+
|
47
|
+
And it can count as well:
|
48
|
+
|
49
|
+
$ uniq -c input
|
50
|
+
3 apple
|
51
|
+
2 banana
|
52
|
+
3 pear
|
53
|
+
$ cat input | wu-local uniq --count --to=tsv
|
54
|
+
apple 3
|
55
|
+
banana 5
|
56
|
+
pear 8
|
57
|
+
EOF
|
58
|
+
|
59
|
+
# The total size of the input recors.
|
60
|
+
attr_accessor :size
|
61
|
+
|
62
|
+
# Initializes the count to 0.
|
63
|
+
def setup
|
64
|
+
super()
|
65
|
+
@size = 0
|
66
|
+
end
|
67
|
+
|
68
|
+
# Accumulate a `record` by incrmenting the total size.
|
69
|
+
#
|
70
|
+
# @param [Object] record
|
71
|
+
def accumulate record
|
72
|
+
self.size += 1
|
73
|
+
end
|
74
|
+
|
75
|
+
# Yields the total size.
|
76
|
+
#
|
77
|
+
# @yield [size]
|
78
|
+
# @yieldparam [Integer] size
|
79
|
+
def finalize
|
80
|
+
if count
|
81
|
+
yield [key, self.size]
|
82
|
+
else
|
83
|
+
yield key
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
register
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2013-
|
14
|
+
date: 2013-03-07 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: configliere
|
@@ -337,6 +337,7 @@ files:
|
|
337
337
|
- lib/wukong/widget/reducers/group_concat.rb
|
338
338
|
- lib/wukong/widget/reducers/moments.rb
|
339
339
|
- lib/wukong/widget/reducers/sort.rb
|
340
|
+
- lib/wukong/widget/reducers/uniq.rb
|
340
341
|
- lib/wukong/widget/serializers.rb
|
341
342
|
- lib/wukong/widget/utils.rb
|
342
343
|
- lib/wukong/widgets.rb
|
@@ -399,15 +400,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
399
400
|
- - ! '>='
|
400
401
|
- !ruby/object:Gem::Version
|
401
402
|
version: '0'
|
403
|
+
segments:
|
404
|
+
- 0
|
405
|
+
hash: 719389029987495852
|
402
406
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
403
407
|
none: false
|
404
408
|
requirements:
|
405
409
|
- - ! '>='
|
406
410
|
- !ruby/object:Gem::Version
|
407
411
|
version: '0'
|
412
|
+
segments:
|
413
|
+
- 0
|
414
|
+
hash: 719389029987495852
|
408
415
|
requirements: []
|
409
416
|
rubyforge_project:
|
410
|
-
rubygems_version: 1.8.
|
417
|
+
rubygems_version: 1.8.24
|
411
418
|
signing_key:
|
412
419
|
specification_version: 3
|
413
420
|
summary: Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use
|