cascading.jruby 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
data/History.txt CHANGED
@@ -1,3 +1,18 @@
1
+ 0.0.10 - Introduce Jading, cleanup, and library standardization
2
+
3
+ This release removes the code associated with running jobs on a Hadoop cluster
4
+ as that functionality is now moved to Jading. This separates the DSL library (a
5
+ gem) from the repository required to build and package job jars (Jading).
6
+ Additionally, this release removes a lot of the auto-generated Rake garbage
7
+ inherited from the original version and greatly streamlines the packaged gem.
8
+ Finally, it starts the precedent for making the contract of operations idiomatic
9
+ Ruby, removing the unnecessary Operations module, and eventually eliminating
10
+ global cascade and flow registries. A non-backwards compatible change was made
11
+ to the contract of complete and draw which allowed the mechnism for propagating
12
+ properties through a job to be righted and made more Hadoop-idiomatic.
13
+
14
+ This release removes all the cruft associated with running jobs
15
+
1
16
  0.0.9 - Cascading local mode and upgrade to Cascading 2.0.0
2
17
 
3
18
  This release upgrades to Cascading 2.0.0 (final) and introduces Cascading local
@@ -1,8 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
3
- #
4
- # This is free software. Please see the LICENSE and COPYING files for details.
5
-
6
1
  require 'cascading/base'
7
2
  require 'cascading/operations'
8
3
  require 'cascading/aggregations'
@@ -110,9 +105,7 @@ module Cascading
110
105
  "#{name} : head pipe : #{head_pipe} - tail pipe: #{tail_pipe}"
111
106
  end
112
107
 
113
- # Builds a join (CoGroup) pipe. Requires a list of assembly names to join
114
- # and :on to specify the group_fields.
115
- def join(*args, &block)
108
+ def prepare_join(*args, &block)
116
109
  options = args.extract_options!
117
110
 
118
111
  pipes, _ = populate_incoming_scopes(args)
@@ -140,6 +133,7 @@ module Cascading
140
133
  incoming_fields = @incoming_scopes.map{ |s| s.values_fields }
141
134
  declared_fields = fields(options[:declared_fields] || dedup_fields(*incoming_fields))
142
135
  joiner = options[:joiner]
136
+ is_hash_join = options[:hash] || false
143
137
 
144
138
  case joiner
145
139
  when :inner, 'inner', nil
@@ -160,15 +154,48 @@ module Cascading
160
154
  end
161
155
  joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
162
156
  end
163
- result_group_fields = dedup_fields(*group_fields)
164
- parameters = [
165
- pipes.to_java(Java::CascadingPipe::Pipe),
166
- group_fields,
167
- declared_fields,
168
- result_group_fields,
169
- joiner
170
- ]
171
- apply_aggregations(Java::CascadingPipe::CoGroup.new(*parameters), @incoming_scopes, &block)
157
+
158
+ if is_hash_join
159
+ raise ArgumentError, "hash joins don't support aggregations" if block_given?
160
+ parameters = [
161
+ pipes.to_java(Java::CascadingPipe::Pipe),
162
+ group_fields,
163
+ declared_fields,
164
+ joiner
165
+ ]
166
+ group_assembly = Java::CascadingPipe::HashJoin.new(*parameters)
167
+ else
168
+ result_group_fields = dedup_fields(*group_fields)
169
+ parameters = [
170
+ pipes.to_java(Java::CascadingPipe::Pipe),
171
+ group_fields,
172
+ declared_fields,
173
+ result_group_fields,
174
+ joiner
175
+ ]
176
+ group_assembly = Java::CascadingPipe::CoGroup.new(*parameters)
177
+ end
178
+ apply_aggregations(group_assembly, @incoming_scopes, &block)
179
+ end
180
+ private :prepare_join
181
+
182
+ # Builds a HashJoin pipe. This should be used carefully, as the right side
183
+ # of the join is accumulated entirely in memory. Requires a list of assembly
184
+ # names to join and :on to specify the join_fields.
185
+ def hash_join(*args, &block)
186
+ options = args.extract_options!
187
+ options[:hash] = true
188
+ args << options
189
+ prepare_join(*args, &block)
190
+ end
191
+
192
+ # Builds a join (CoGroup) pipe. Requires a list of assembly names to join
193
+ # and :on to specify the group_fields.
194
+ def join(*args, &block)
195
+ options = args.extract_options!
196
+ options[:hash] = false
197
+ args << options
198
+ prepare_join(*args, &block)
172
199
  end
173
200
  alias co_group join
174
201
 
@@ -650,5 +677,99 @@ module Cascading
650
677
 
651
678
  each args, :function => field_joiner(options), :output => output
652
679
  end
680
+
681
+ # Ungroups, or unpivots, a tuple (see Cascading's UnGroup at http://docs.cascading.org/cascading/2.0/javadoc/cascading/operation/function/UnGroup.html).
682
+ #
683
+ # You must provide :key and you must provide only one of :value_selectors
684
+ # and :num_values.
685
+ #
686
+ # The named options are:
687
+ # * <tt>:key</tt> required array of field names to replicate on every
688
+ # output row in an ungrouped group.
689
+ # * <tt>:value_selectors</tt> an array of field names to ungroup. Each
690
+ # field will be ungrouped into an output tuple along with the key fields
691
+ # in the order provided.
692
+ # * <tt>:num_values</tt> an integer specifying the number of fields to
693
+ # ungroup into each output tuple (excluding the key fields). All input
694
+ # fields will be ungrouped.
695
+ # * <tt>:input</tt> an array of field names that specifies the fields to
696
+ # input to UnGroup. Defaults to all_fields.
697
+ # * <tt>:into</tt> an array of field names. Default set by UnGroup.
698
+ # * <tt>:output</tt> an array of field names that specifies the fields to
699
+ # produce as output of UnGroup. Defaults to all_fields.
700
+ def ungroup(*args)
701
+ options = args.extract_options!
702
+ input = options[:input] || all_fields
703
+ into = fields(options[:into])
704
+ output = options[:output] || all_fields
705
+ key = fields(options[:key])
706
+
707
+ raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
708
+ value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
709
+ num_values = options[:num_values] if options.has_key?(:num_values)
710
+
711
+ parameters = [into, key, value_selectors, num_values].compact
712
+ each input, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
713
+ end
714
+
715
+ # Inserts one of two values into the dataflow based upon the result of the
716
+ # supplied filter on the input fields. This is primarily useful for
717
+ # creating indicators from filters.
718
+ #
719
+ # Parameters:
720
+ # * <tt>input</tt> name of field to apply the filter.
721
+ # * <tt>filter</tt> Cascading Filter to apply.
722
+ # * <tt>keep_value</tt> Java value to produce when the filter would keep
723
+ # the given input.
724
+ # * <tt>remove_value</tt> Java value to produce when the filter would
725
+ # remove the given input.
726
+ #
727
+ # The named options are:
728
+ # * <tt>:into</tt> an output field name, defaulting to 'filter_value'.
729
+ # * <tt>:output</tt> an array of field names that specifies the fields to
730
+ # retain in the output tuple. Defaults to all_fields.
731
+ def set_value(input, filter, keep_value, remove_value, params = {})
732
+ into = fields(params[:into] || 'filter_value')
733
+ output = params[:output] || all_fields
734
+ each input, :function => Java::CascadingOperationFunction::SetValue.new(into, filter, keep_value, remove_value), :output => output
735
+ end
736
+
737
+ # Efficient way of inserting a null indicator for any field, even one that
738
+ # cannot be coerced to a string. This is accomplished using Cascading's
739
+ # FilterNull and SetValue operators rather than Janino. 1 is produced if
740
+ # the field is null and 0 otherwise.
741
+ #
742
+ # Parameters:
743
+ # * <tt>input</tt> name of field to check for null.
744
+ #
745
+ # The named options are:
746
+ # * <tt>:into</tt> an output field name, defaulting to 'is_null'.
747
+ # * <tt>:output</tt> an array of field names that specifies the fields to
748
+ # retain in the output tuple. Defaults to all_fields.
749
+ def null_indicator(input, params = {})
750
+ into = fields(params[:into] || 'is_null')
751
+ output = params[:output] || all_fields
752
+ set_value input, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, :into => into, :output => output
753
+ end
754
+
755
+ # Given a field and a regex, returns an indicator that is 1 if the string
756
+ # contains at least 1 match and 0 otherwise.
757
+ #
758
+ # Parameters:
759
+ # * <tt>input</tt> field name or names that specifies the fields over which
760
+ # to perform the match.
761
+ # * <tt>pattern</tt> regex to apply to the input.
762
+ #
763
+ # The named options are:
764
+ # * <tt>:into</tt> an output field name, defaulting to 'regex_contains'.
765
+ # * <tt>:output</tt> an array of field names that specifies the fields to
766
+ # retain in the output tuple. Defaults to all_fields.
767
+ def regex_contains(input, pattern, params = {})
768
+ input = fields(input)
769
+ pattern = pattern.to_s # Supports JRuby regexes
770
+ into = fields(params[:into] || 'regex_contains')
771
+ output = params[:output] || all_fields
772
+ set_value input, Java::CascadingOperationRegex::RegexFilter.new(pattern), 1.to_java, 0.to_java, :into => into, :output => output
773
+ end
653
774
  end
654
775
  end
@@ -1,7 +1,3 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  module Cascading
6
2
  class Node
7
3
  attr_accessor :name, :parent, :children, :child_names, :last_child
@@ -1,7 +1,3 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  require 'cascading/base'
6
2
  require 'yaml'
7
3
 
@@ -9,22 +5,35 @@ module Cascading
9
5
  class Cascade < Cascading::Node
10
6
  extend Registerable
11
7
 
12
- attr_reader :mode
8
+ attr_reader :properties, :mode
13
9
 
14
- # Builds a cascade given the specified name. Optionally accepts a :mode
15
- # which will be used as the default mode for all child flows. See
16
- # Cascading::Mode.parse for details.
10
+ # Do not use this constructor directly; instead, use Cascading::cascade to
11
+ # build cascades.
12
+ #
13
+ # Builds a cascade given the specified name. Optionally accepts
14
+ # :properties which will be used as the default properties for all child
15
+ # flows. Properties must be a Ruby Hash with string keys and values and
16
+ # will be copied before being passed into each flow in the cascade. See
17
+ # Cascading::Flow#initialize for details on how flows handle properties.
18
+ # Optionally accepts a :mode which will be used as the default mode for all
19
+ # child flows. See Cascading::Mode.parse for details.
17
20
  def initialize(name, params = {})
21
+ @properties = params[:properties] || {}
18
22
  @mode = params[:mode]
19
23
  super(name, nil) # A Cascade cannot have a parent
20
24
  self.class.add(name, self)
21
25
  end
22
26
 
23
- # Builds a child flow given a name and block. Optionally accepts a :mode,
24
- # which will override the default mode stored in this cascade.
27
+ # Builds a child flow given a name and block. Optionally accepts
28
+ # :properties which will override the default properties stroed in this
29
+ # cascade. Optionally accepts a :mode, which will override the default
30
+ # mode stored in this cascade.
25
31
  def flow(name, params = {}, &block)
26
32
  raise "Could not build flow '#{name}'; block required" unless block_given?
33
+
34
+ params[:properties] ||= properties.dup
27
35
  params[:mode] ||= mode
36
+
28
37
  flow = Flow.new(name, self, params)
29
38
  add_child(flow)
30
39
  flow.instance_eval(&block)
@@ -35,9 +44,9 @@ module Cascading
35
44
  "#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
36
45
  end
37
46
 
38
- def draw(dir, properties = nil)
47
+ def draw(dir)
39
48
  @children.each do |name, flow|
40
- flow.connect(properties).writeDOT("#{dir}/#{name}.dot")
49
+ flow.connect.writeDOT("#{dir}/#{name}.dot")
41
50
  end
42
51
  end
43
52
 
@@ -54,9 +63,9 @@ module Cascading
54
63
  end
55
64
  end
56
65
 
57
- def complete(properties = nil)
66
+ def complete
58
67
  begin
59
- Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children, properties)).complete
68
+ Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children)).complete
60
69
  rescue NativeException => e
61
70
  raise CascadingException.new(e, 'Error completing cascade')
62
71
  end
@@ -64,9 +73,9 @@ module Cascading
64
73
 
65
74
  private
66
75
 
67
- def make_flows(flows, properties)
76
+ def make_flows(flows)
68
77
  flow_instances = flows.map do |name, flow|
69
- cascading_flow = flow.connect(properties)
78
+ cascading_flow = flow.connect
70
79
  flow.listeners.each { |l| cascading_flow.addListener(l) }
71
80
  cascading_flow
72
81
  end
@@ -1,8 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
3
- #
4
- # This is free software. Please see the LICENSE and COPYING files for details.
5
-
6
1
  require 'cascading/expr_stub'
7
2
 
8
3
  module Cascading
@@ -12,10 +7,31 @@ module Cascading
12
7
  :float => java.lang.Float.java_class, :string => java.lang.String.java_class,
13
8
  }
14
9
 
10
+ # FIXME: I consider $jobconf_properties to be a hack forced on us by the lack
11
+ # of properties handling in earlier versions of the gem. Fully removing the
12
+ # hack would look like introducing a Job abstraction which instantiates user
13
+ # code, and allowing jading's runner to pass properties into that. I've
14
+ # already taken the step to thread properties through cascades and flows
15
+ # rather than merge properties before connect, but we still require the
16
+ # global properties hack to integrate with external runner code (jading).
17
+ #
18
+ # Note that this would also mean we can get rid of the global "registries" of
19
+ # cascades and flows. I've already eliminated most uses of these registries,
20
+ # but they are still required for the runner to find user code required in a
21
+ # previous step. A Job abstraction would clean this up, as well.
22
+ #
23
+ # For now, it is important that people use these constructors rather than
24
+ # directly building their own cascades and flows so that jading can send them
25
+ # default properties.
26
+
15
27
  # Builds a top-level cascade given a name and a block. Optionally accepts a
16
28
  # :mode, as explained in Cascading::Cascade#initialize.
17
29
  def cascade(name, params = {}, &block)
18
30
  raise "Could not build cascade '#{name}'; block required" unless block_given?
31
+ raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if params[:properties]
32
+
33
+ params[:properties] = $jobconf_properties.dup if $jobconf_properties
34
+
19
35
  cascade = Cascade.new(name, params)
20
36
  cascade.instance_eval(&block)
21
37
  cascade
@@ -26,6 +42,10 @@ module Cascading
26
42
  # Cascading::Flow#initialize.
27
43
  def flow(name, params = {}, &block)
28
44
  raise "Could not build flow '#{name}'; block required" unless block_given?
45
+ raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if params[:properties]
46
+
47
+ params[:properties] = $jobconf_properties.dup if $jobconf_properties
48
+
29
49
  flow = Flow.new(name, nil, params)
30
50
  flow.instance_eval(&block)
31
51
  flow
@@ -1,9 +1,3 @@
1
- # ext.rb : some extensions to basic types
2
- #
3
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
4
- #
5
- # This is free software. Please see the LICENSE and COPYING files for details.
6
-
7
1
  class Array
8
2
  def extract_options!
9
3
  last.is_a?(::Hash) ? pop : {}
@@ -12,4 +6,4 @@ class Array
12
6
  def extract_options
13
7
  last.is_a?(::Hash) ? last : {}
14
8
  end
15
- end
9
+ end
@@ -1,22 +1,26 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
3
- #
4
- # This is free software. Please see the LICENSE and COPYING files for details.
5
-
6
1
  require 'cascading/assembly'
7
2
 
8
3
  module Cascading
9
4
  class Flow < Cascading::Node
10
5
  extend Registerable
11
6
 
12
- attr_accessor :properties, :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
13
- attr_reader :mode
7
+ attr_accessor :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
8
+ attr_reader :properties, :mode
14
9
 
10
+ # Do not use this constructor directly. Instead, use Cascading::flow to
11
+ # build top-level flows and Cascade#flow to build flows within a Cascade.
12
+ #
15
13
  # Builds a flow given a name and a parent node (a cascade or nil).
16
- # Optionally accepts a :mode which will determine the execution mode of
17
- # this flow. See Cascading::Mode.parse for details.
14
+ # Optionally accepts :properties which allows external configuration of
15
+ # this flow. The flow will side-effect the properties during composition,
16
+ # then pass the modified properties along to the FlowConnector for
17
+ # execution. See Cascading::Cascade#initialize for details on how
18
+ # properties are propagated through cascades. Optionally accepts a :mode
19
+ # which will determine the execution mode of this flow. See
20
+ # Cascading::Mode.parse for details.
18
21
  def initialize(name, parent, params = {})
19
- @properties, @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, {}, []
22
+ @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
23
+ @properties = params[:properties] || {}
20
24
  @mode = Mode.parse(params[:mode])
21
25
  @flow_scope = Scope.flow_scope(name)
22
26
  super(name, parent)
@@ -128,14 +132,9 @@ module Cascading
128
132
  end
129
133
  end
130
134
 
131
- def connect(properties = nil)
132
- # This ensures we have a hash, and that it is a Ruby Hash (because we
133
- # also accept java.util.HashMap), then merges it with Flow properties
134
- properties ||= {}
135
- properties = java.util.HashMap.new(@properties.merge(Hash[*properties.to_a.flatten]))
136
-
135
+ def connect
137
136
  puts "Connecting flow '#{name}' with properties:"
138
- properties.key_set.to_a.sort.each do |key|
137
+ properties.keys.sort.each do |key|
139
138
  puts "#{key}=#{properties[key]}"
140
139
  end
141
140
 
@@ -149,9 +148,9 @@ module Cascading
149
148
  mode.connect_flow(properties, name, sources, sinks, pipes)
150
149
  end
151
150
 
152
- def complete(properties = nil)
151
+ def complete
153
152
  begin
154
- flow = connect(properties)
153
+ flow = connect
155
154
  @listeners.each { |l| flow.addListener(l) }
156
155
  flow.complete
157
156
  rescue NativeException => e
@@ -41,7 +41,11 @@ module Cascading
41
41
  update_local_mode(sources, sinks)
42
42
  sources = select_taps(sources)
43
43
  sinks = select_taps(sinks)
44
- flow_connector_class.new(properties).connect(name, sources, sinks, pipes)
44
+
45
+ # Report execution mode to stdout before connecting
46
+ puts "Connecting flow '#{name}' in #{local ? 'Cascading local mode' : 'Hadoop mode'}"
47
+
48
+ flow_connector_class.new(java.util.HashMap.new(properties)).connect(name, sources, sinks, pipes)
45
49
  end
46
50
 
47
51
  private
@@ -1,8 +1,15 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  module Cascading
2
+ # The Cascading::Operations module is deprecated. The original idea from long
3
+ # ago is that it would be useful to mixin operator wrappers to places other
4
+ # than Cascading::Assembly, but this is not true. Instead, put Eaches in
5
+ # Cascading::Assembly, Everies in Cascading::Aggregations, and any more
6
+ # generally useful utility code directly in the Cascading module
7
+ # (cascading/cascading.rb).
8
+ #
9
+ # Further, the entire *args pattern should be deprecated as it leads to
10
+ # functions that can only be understood by reading their code. Instead,
11
+ # idiomatic Ruby (positional required params and a params hash for optional
12
+ # args) should be used. See Cascading::Assembly#set_value for an example.
6
13
  module Operations
7
14
  def identity
8
15
  Java::CascadingOperation::Identity.new
data/lib/cascading/tap.rb CHANGED
@@ -9,6 +9,10 @@ module Cascading
9
9
  @hadoop_tap = hadoop_tap
10
10
  end
11
11
 
12
+ def to_s
13
+ "Local: #{local_tap}, Hadoop: #{hadoop_tap}"
14
+ end
15
+
12
16
  def local?
13
17
  !local_tap.nil?
14
18
  end
data/lib/cascading.rb CHANGED
@@ -1,12 +1,8 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  require 'java'
6
2
 
7
3
  module Cascading
8
4
  # :stopdoc:
9
- VERSION = '0.0.9'
5
+ VERSION = '0.0.10'
10
6
  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
11
7
  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
12
8
  CASCADING_HOME = ENV['CASCADING_HOME']