wukong 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/lib/wukong.rb +4 -2
- data/lib/wukong/dataflow.rb +5 -0
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -0
- data/lib/wukong/version.rb +1 -2
- data/lib/wukong/widget/reducers.rb +1 -0
- data/lib/wukong/widget/reducers/uniq.rb +90 -0
- metadata +10 -3
data/README.md
CHANGED
@@ -19,8 +19,8 @@ Here is a list of various other projects which you may also want to
|
|
19
19
|
peruse when trying to understand the full Wukong experience:
|
20
20
|
|
21
21
|
* <a href="http://github.com/infochimps-labs/wukong-hadoop">wukong-hadoop</a>: Run Wukong processors as mappers and reducers within the Hadoop framework. Model Hadoop jobs locally before you run them.
|
22
|
-
* <a href="http://github.com/infochimps-labs/wukong-storm>wukong-storm</a>: Run Wukong processors within the Storm framework. Model flows locally before you run them.
|
23
|
-
* <a href="http://github.com/infochimps-labs/wukong-load>wukong-load</a>: Load the output data from your local Wukong jobs and flows into a variety of different data stores.
|
22
|
+
* <a href="http://github.com/infochimps-labs/wukong-storm">wukong-storm</a>: Run Wukong processors within the Storm framework. Model flows locally before you run them.
|
23
|
+
* <a href="http://github.com/infochimps-labs/wukong-load">wukong-load</a>: Load the output data from your local Wukong jobs and flows into a variety of different data stores.
|
24
24
|
* <a href="http://github.com/infochimps-labs/wonderdog">wonderdog</a>: Connect Wukong processors running within Hadoop to Elasticsearch as either a source or sink for data.
|
25
25
|
* <a href="http://github.com/infochimps-labs/wukong-deploy">wukong-deploy</a>: Orchestrate Wukong and other wu-tools together to support an application running on the Infochimps Platform.
|
26
26
|
|
data/lib/wukong.rb
CHANGED
@@ -35,7 +35,7 @@ module Wukong
|
|
35
35
|
|
36
36
|
add_shortcut_method_for(:processor, ProcessorBuilder)
|
37
37
|
add_shortcut_method_for(:dataflow, DataflowBuilder)
|
38
|
-
|
38
|
+
|
39
39
|
end
|
40
40
|
|
41
41
|
# Alias module name for shorter namespaces
|
@@ -44,4 +44,6 @@ Wu = Wukong
|
|
44
44
|
require_relative 'wukong/widgets'
|
45
45
|
require_relative 'wukong/local'
|
46
46
|
|
47
|
-
|
47
|
+
module Wukong
|
48
|
+
BUILTINS = Set.new(Wukong.registry.show.keys)
|
49
|
+
end
|
data/lib/wukong/dataflow.rb
CHANGED
@@ -44,6 +44,13 @@ module Wukong
|
|
44
44
|
@driver ||= UnitTestDriver.new(processor, settings)
|
45
45
|
end
|
46
46
|
|
47
|
+
# No need to load commandline arguments when we are testing
|
48
|
+
# There are other mechanisms for passing them in, plus
|
49
|
+
# RSpec goes into an infinite loop if you load a spec file
|
50
|
+
# from within a spec file
|
51
|
+
def load_args
|
52
|
+
end
|
53
|
+
|
47
54
|
# Do nothing. This prevents control flow within the Ruby
|
48
55
|
# interpreter from staying within this runner, as it would
|
49
56
|
# ordinarly do for `wu-local`.
|
data/lib/wukong/version.rb
CHANGED
@@ -0,0 +1,90 @@
|
|
1
|
+
require_relative("accumulator")
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
class Processor
|
5
|
+
|
6
|
+
# A processor which emits only unique records from its input.
|
7
|
+
# It's intended to work just like `uniq`.
|
8
|
+
#
|
9
|
+
# @example Emit unique elements from the input (like `uniq`).
|
10
|
+
#
|
11
|
+
# $ uniq input
|
12
|
+
# apple
|
13
|
+
# banana
|
14
|
+
# pear
|
15
|
+
# $ cat input | wu-local uniq
|
16
|
+
# apple
|
17
|
+
# banana
|
18
|
+
# pear
|
19
|
+
#
|
20
|
+
# @example Emit unique elements from the input with counts (like `uniq -c`).
|
21
|
+
#
|
22
|
+
# $ uniq -c input
|
23
|
+
# 3 apple
|
24
|
+
# 2 banana
|
25
|
+
# 3 pear
|
26
|
+
# $ cat input | wu-local uniq --count --to=tsv
|
27
|
+
# apple 3
|
28
|
+
# banana 5
|
29
|
+
# pear 8
|
30
|
+
|
31
|
+
class Uniq < Accumulator
|
32
|
+
|
33
|
+
field :count, :boolean, doc: "Emit a count for each group of input records", default: false
|
34
|
+
|
35
|
+
description <<EOF
|
36
|
+
This processor uniq's its inputs.
|
37
|
+
|
38
|
+
$ uniq input
|
39
|
+
apple
|
40
|
+
banana
|
41
|
+
pear
|
42
|
+
$ cat input | wu-local uniq
|
43
|
+
apple
|
44
|
+
banana
|
45
|
+
pear
|
46
|
+
|
47
|
+
And it can count as well:
|
48
|
+
|
49
|
+
$ uniq -c input
|
50
|
+
3 apple
|
51
|
+
2 banana
|
52
|
+
3 pear
|
53
|
+
$ cat input | wu-local uniq --count --to=tsv
|
54
|
+
apple 3
|
55
|
+
banana 5
|
56
|
+
pear 8
|
57
|
+
EOF
|
58
|
+
|
59
|
+
# The total size of the input recors.
|
60
|
+
attr_accessor :size
|
61
|
+
|
62
|
+
# Initializes the count to 0.
|
63
|
+
def setup
|
64
|
+
super()
|
65
|
+
@size = 0
|
66
|
+
end
|
67
|
+
|
68
|
+
# Accumulate a `record` by incrmenting the total size.
|
69
|
+
#
|
70
|
+
# @param [Object] record
|
71
|
+
def accumulate record
|
72
|
+
self.size += 1
|
73
|
+
end
|
74
|
+
|
75
|
+
# Yields the total size.
|
76
|
+
#
|
77
|
+
# @yield [size]
|
78
|
+
# @yieldparam [Integer] size
|
79
|
+
def finalize
|
80
|
+
if count
|
81
|
+
yield [key, self.size]
|
82
|
+
else
|
83
|
+
yield key
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
register
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2013-
|
14
|
+
date: 2013-03-07 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: configliere
|
@@ -337,6 +337,7 @@ files:
|
|
337
337
|
- lib/wukong/widget/reducers/group_concat.rb
|
338
338
|
- lib/wukong/widget/reducers/moments.rb
|
339
339
|
- lib/wukong/widget/reducers/sort.rb
|
340
|
+
- lib/wukong/widget/reducers/uniq.rb
|
340
341
|
- lib/wukong/widget/serializers.rb
|
341
342
|
- lib/wukong/widget/utils.rb
|
342
343
|
- lib/wukong/widgets.rb
|
@@ -399,15 +400,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
399
400
|
- - ! '>='
|
400
401
|
- !ruby/object:Gem::Version
|
401
402
|
version: '0'
|
403
|
+
segments:
|
404
|
+
- 0
|
405
|
+
hash: 719389029987495852
|
402
406
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
403
407
|
none: false
|
404
408
|
requirements:
|
405
409
|
- - ! '>='
|
406
410
|
- !ruby/object:Gem::Version
|
407
411
|
version: '0'
|
412
|
+
segments:
|
413
|
+
- 0
|
414
|
+
hash: 719389029987495852
|
408
415
|
requirements: []
|
409
416
|
rubyforge_project:
|
410
|
-
rubygems_version: 1.8.
|
417
|
+
rubygems_version: 1.8.24
|
411
418
|
signing_key:
|
412
419
|
specification_version: 3
|
413
420
|
summary: Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use
|