wukong 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -19,8 +19,8 @@ Here is a list of various other projects which you may also want to
19
19
  peruse when trying to understand the full Wukong experience:
20
20
 
21
21
  * <a href="http://github.com/infochimps-labs/wukong-hadoop">wukong-hadoop</a>: Run Wukong processors as mappers and reducers within the Hadoop framework. Model Hadoop jobs locally before you run them.
22
- * <a href="http://github.com/infochimps-labs/wukong-storm>wukong-storm</a>: Run Wukong processors within the Storm framework. Model flows locally before you run them.
23
- * <a href="http://github.com/infochimps-labs/wukong-load>wukong-load</a>: Load the output data from your local Wukong jobs and flows into a variety of different data stores.
22
+ * <a href="http://github.com/infochimps-labs/wukong-storm">wukong-storm</a>: Run Wukong processors within the Storm framework. Model flows locally before you run them.
23
+ * <a href="http://github.com/infochimps-labs/wukong-load">wukong-load</a>: Load the output data from your local Wukong jobs and flows into a variety of different data stores.
24
24
  * <a href="http://github.com/infochimps-labs/wonderdog">wonderdog</a>: Connect Wukong processors running within Hadoop to Elasticsearch as either a source or sink for data.
25
25
  * <a href="http://github.com/infochimps-labs/wukong-deploy">wukong-deploy</a>: Orchestrate Wukong and other wu-tools together to support an application running on the Infochimps Platform.
26
26
 
@@ -35,7 +35,7 @@ module Wukong
35
35
 
36
36
  add_shortcut_method_for(:processor, ProcessorBuilder)
37
37
  add_shortcut_method_for(:dataflow, DataflowBuilder)
38
-
38
+
39
39
  end
40
40
 
41
41
  # Alias module name for shorter namespaces
@@ -44,4 +44,6 @@ Wu = Wukong
44
44
  require_relative 'wukong/widgets'
45
45
  require_relative 'wukong/local'
46
46
 
47
-
47
+ module Wukong
48
+ BUILTINS = Set.new(Wukong.registry.show.keys)
49
+ end
@@ -33,6 +33,11 @@ module Wukong
33
33
 
34
34
  class Dataflow < Hanuman::Graph
35
35
 
36
+ def self.description desc=nil
37
+ @description = desc if desc
38
+ @description
39
+ end
40
+
36
41
  def has_input?(stage)
37
42
  links.any?{ |link| link.into == stage }
38
43
  end
@@ -44,6 +44,13 @@ module Wukong
44
44
  @driver ||= UnitTestDriver.new(processor, settings)
45
45
  end
46
46
 
47
+ # No need to load commandline arguments when we are testing
48
+ # There are other mechanisms for passing them in, plus
49
+ # RSpec goes into an infinite loop if you load a spec file
50
+ # from within a spec file
51
+ def load_args
52
+ end
53
+
47
54
  # Do nothing. This prevents control flow within the Ruby
48
55
  # interpreter from staying within this runner, as it would
49
56
  # ordinarly do for `wu-local`.
@@ -1,4 +1,3 @@
1
1
  module Wukong
2
- # The current version of Wukong.
3
- VERSION = '3.0.0'
2
+ VERSION = '3.0.1'
4
3
  end
@@ -5,3 +5,4 @@ require_relative("reducers/group")
5
5
  require_relative("reducers/group_concat")
6
6
  require_relative("reducers/moments")
7
7
  require_relative("reducers/bin")
8
+ require_relative("reducers/uniq")
@@ -0,0 +1,90 @@
1
+ require_relative("accumulator")
2
+
3
+ module Wukong
4
+ class Processor
5
+
6
+ # A processor which emits only unique records from its input.
7
+ # It's intended to work just like `uniq`.
8
+ #
9
+ # @example Emit unique elements from the input (like `uniq`).
10
+ #
11
+ # $ uniq input
12
+ # apple
13
+ # banana
14
+ # pear
15
+ # $ cat input | wu-local uniq
16
+ # apple
17
+ # banana
18
+ # pear
19
+ #
20
+ # @example Emit unique elements from the input with counts (like `uniq -c`).
21
+ #
22
+ # $ uniq -c input
23
+ # 3 apple
24
+ # 2 banana
25
+ # 3 pear
26
+ # $ cat input | wu-local uniq --count --to=tsv
27
+ # apple 3
28
+ # banana 5
29
+ # pear 8
30
+
31
+ class Uniq < Accumulator
32
+
33
+ field :count, :boolean, doc: "Emit a count for each group of input records", default: false
34
+
35
+ description <<EOF
36
+ This processor uniq's its inputs.
37
+
38
+ $ uniq input
39
+ apple
40
+ banana
41
+ pear
42
+ $ cat input | wu-local uniq
43
+ apple
44
+ banana
45
+ pear
46
+
47
+ And it can count as well:
48
+
49
+ $ uniq -c input
50
+ 3 apple
51
+ 2 banana
52
+ 3 pear
53
+ $ cat input | wu-local uniq --count --to=tsv
54
+ apple 3
55
+ banana 5
56
+ pear 8
57
+ EOF
58
+
59
+ # The total size of the input recors.
60
+ attr_accessor :size
61
+
62
+ # Initializes the count to 0.
63
+ def setup
64
+ super()
65
+ @size = 0
66
+ end
67
+
68
+ # Accumulate a `record` by incrmenting the total size.
69
+ #
70
+ # @param [Object] record
71
+ def accumulate record
72
+ self.size += 1
73
+ end
74
+
75
+ # Yields the total size.
76
+ #
77
+ # @yield [size]
78
+ # @yieldparam [Integer] size
79
+ def finalize
80
+ if count
81
+ yield [key, self.size]
82
+ else
83
+ yield key
84
+ end
85
+ end
86
+
87
+ register
88
+ end
89
+ end
90
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-02-20 00:00:00.000000000 Z
14
+ date: 2013-03-07 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: configliere
@@ -337,6 +337,7 @@ files:
337
337
  - lib/wukong/widget/reducers/group_concat.rb
338
338
  - lib/wukong/widget/reducers/moments.rb
339
339
  - lib/wukong/widget/reducers/sort.rb
340
+ - lib/wukong/widget/reducers/uniq.rb
340
341
  - lib/wukong/widget/serializers.rb
341
342
  - lib/wukong/widget/utils.rb
342
343
  - lib/wukong/widgets.rb
@@ -399,15 +400,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
399
400
  - - ! '>='
400
401
  - !ruby/object:Gem::Version
401
402
  version: '0'
403
+ segments:
404
+ - 0
405
+ hash: 719389029987495852
402
406
  required_rubygems_version: !ruby/object:Gem::Requirement
403
407
  none: false
404
408
  requirements:
405
409
  - - ! '>='
406
410
  - !ruby/object:Gem::Version
407
411
  version: '0'
412
+ segments:
413
+ - 0
414
+ hash: 719389029987495852
408
415
  requirements: []
409
416
  rubyforge_project:
410
- rubygems_version: 1.8.23
417
+ rubygems_version: 1.8.24
411
418
  signing_key:
412
419
  specification_version: 3
413
420
  summary: Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use