cascading.jruby 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
data/History.txt CHANGED
@@ -1,3 +1,18 @@
1
+ 0.0.10 - Introduce Jading, cleanup, and library standardization
2
+
3
+ This release removes the code associated with running jobs on a Hadoop cluster
4
+ as that functionality is now moved to Jading. This separates the DSL library (a
5
+ gem) from the repository required to build and package job jars (Jading).
6
+ Additionally, this release removes a lot of the auto-generated Rake garbage
7
+ inherited from the original version and greatly streamlines the packaged gem.
8
+ Finally, it starts the precedent for making the contract of operations idiomatic
9
+ Ruby, removing the unnecessary Operations module, and eventually eliminating
10
+ global cascade and flow registries. A non-backwards compatible change was made
11
+ to the contract of complete and draw which allowed the mechnism for propagating
12
+ properties through a job to be righted and made more Hadoop-idiomatic.
13
+
14
+ This release removes all the cruft associated with running jobs
15
+
1
16
  0.0.9 - Cascading local mode and upgrade to Cascading 2.0.0
2
17
 
3
18
  This release upgrades to Cascading 2.0.0 (final) and introduces Cascading local
@@ -1,8 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
3
- #
4
- # This is free software. Please see the LICENSE and COPYING files for details.
5
-
6
1
  require 'cascading/base'
7
2
  require 'cascading/operations'
8
3
  require 'cascading/aggregations'
@@ -110,9 +105,7 @@ module Cascading
110
105
  "#{name} : head pipe : #{head_pipe} - tail pipe: #{tail_pipe}"
111
106
  end
112
107
 
113
- # Builds a join (CoGroup) pipe. Requires a list of assembly names to join
114
- # and :on to specify the group_fields.
115
- def join(*args, &block)
108
+ def prepare_join(*args, &block)
116
109
  options = args.extract_options!
117
110
 
118
111
  pipes, _ = populate_incoming_scopes(args)
@@ -140,6 +133,7 @@ module Cascading
140
133
  incoming_fields = @incoming_scopes.map{ |s| s.values_fields }
141
134
  declared_fields = fields(options[:declared_fields] || dedup_fields(*incoming_fields))
142
135
  joiner = options[:joiner]
136
+ is_hash_join = options[:hash] || false
143
137
 
144
138
  case joiner
145
139
  when :inner, 'inner', nil
@@ -160,15 +154,48 @@ module Cascading
160
154
  end
161
155
  joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
162
156
  end
163
- result_group_fields = dedup_fields(*group_fields)
164
- parameters = [
165
- pipes.to_java(Java::CascadingPipe::Pipe),
166
- group_fields,
167
- declared_fields,
168
- result_group_fields,
169
- joiner
170
- ]
171
- apply_aggregations(Java::CascadingPipe::CoGroup.new(*parameters), @incoming_scopes, &block)
157
+
158
+ if is_hash_join
159
+ raise ArgumentError, "hash joins don't support aggregations" if block_given?
160
+ parameters = [
161
+ pipes.to_java(Java::CascadingPipe::Pipe),
162
+ group_fields,
163
+ declared_fields,
164
+ joiner
165
+ ]
166
+ group_assembly = Java::CascadingPipe::HashJoin.new(*parameters)
167
+ else
168
+ result_group_fields = dedup_fields(*group_fields)
169
+ parameters = [
170
+ pipes.to_java(Java::CascadingPipe::Pipe),
171
+ group_fields,
172
+ declared_fields,
173
+ result_group_fields,
174
+ joiner
175
+ ]
176
+ group_assembly = Java::CascadingPipe::CoGroup.new(*parameters)
177
+ end
178
+ apply_aggregations(group_assembly, @incoming_scopes, &block)
179
+ end
180
+ private :prepare_join
181
+
182
+ # Builds a HashJoin pipe. This should be used carefully, as the right side
183
+ # of the join is accumulated entirely in memory. Requires a list of assembly
184
+ # names to join and :on to specify the join_fields.
185
+ def hash_join(*args, &block)
186
+ options = args.extract_options!
187
+ options[:hash] = true
188
+ args << options
189
+ prepare_join(*args, &block)
190
+ end
191
+
192
+ # Builds a join (CoGroup) pipe. Requires a list of assembly names to join
193
+ # and :on to specify the group_fields.
194
+ def join(*args, &block)
195
+ options = args.extract_options!
196
+ options[:hash] = false
197
+ args << options
198
+ prepare_join(*args, &block)
172
199
  end
173
200
  alias co_group join
174
201
 
@@ -650,5 +677,99 @@ module Cascading
650
677
 
651
678
  each args, :function => field_joiner(options), :output => output
652
679
  end
680
+
681
+ # Ungroups, or unpivots, a tuple (see Cascading's UnGroup at http://docs.cascading.org/cascading/2.0/javadoc/cascading/operation/function/UnGroup.html).
682
+ #
683
+ # You must provide :key and you must provide only one of :value_selectors
684
+ # and :num_values.
685
+ #
686
+ # The named options are:
687
+ # * <tt>:key</tt> required array of field names to replicate on every
688
+ # output row in an ungrouped group.
689
+ # * <tt>:value_selectors</tt> an array of field names to ungroup. Each
690
+ # field will be ungrouped into an output tuple along with the key fields
691
+ # in the order provided.
692
+ # * <tt>:num_values</tt> an integer specifying the number of fields to
693
+ # ungroup into each output tuple (excluding the key fields). All input
694
+ # fields will be ungrouped.
695
+ # * <tt>:input</tt> an array of field names that specifies the fields to
696
+ # input to UnGroup. Defaults to all_fields.
697
+ # * <tt>:into</tt> an array of field names. Default set by UnGroup.
698
+ # * <tt>:output</tt> an array of field names that specifies the fields to
699
+ # produce as output of UnGroup. Defaults to all_fields.
700
+ def ungroup(*args)
701
+ options = args.extract_options!
702
+ input = options[:input] || all_fields
703
+ into = fields(options[:into])
704
+ output = options[:output] || all_fields
705
+ key = fields(options[:key])
706
+
707
+ raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
708
+ value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
709
+ num_values = options[:num_values] if options.has_key?(:num_values)
710
+
711
+ parameters = [into, key, value_selectors, num_values].compact
712
+ each input, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
713
+ end
714
+
715
+ # Inserts one of two values into the dataflow based upon the result of the
716
+ # supplied filter on the input fields. This is primarily useful for
717
+ # creating indicators from filters.
718
+ #
719
+ # Parameters:
720
+ # * <tt>input</tt> name of field to apply the filter.
721
+ # * <tt>filter</tt> Cascading Filter to apply.
722
+ # * <tt>keep_value</tt> Java value to produce when the filter would keep
723
+ # the given input.
724
+ # * <tt>remove_value</tt> Java value to produce when the filter would
725
+ # remove the given input.
726
+ #
727
+ # The named options are:
728
+ # * <tt>:into</tt> an output field name, defaulting to 'filter_value'.
729
+ # * <tt>:output</tt> an array of field names that specifies the fields to
730
+ # retain in the output tuple. Defaults to all_fields.
731
+ def set_value(input, filter, keep_value, remove_value, params = {})
732
+ into = fields(params[:into] || 'filter_value')
733
+ output = params[:output] || all_fields
734
+ each input, :function => Java::CascadingOperationFunction::SetValue.new(into, filter, keep_value, remove_value), :output => output
735
+ end
736
+
737
+ # Efficient way of inserting a null indicator for any field, even one that
738
+ # cannot be coerced to a string. This is accomplished using Cascading's
739
+ # FilterNull and SetValue operators rather than Janino. 1 is produced if
740
+ # the field is null and 0 otherwise.
741
+ #
742
+ # Parameters:
743
+ # * <tt>input</tt> name of field to check for null.
744
+ #
745
+ # The named options are:
746
+ # * <tt>:into</tt> an output field name, defaulting to 'is_null'.
747
+ # * <tt>:output</tt> an array of field names that specifies the fields to
748
+ # retain in the output tuple. Defaults to all_fields.
749
+ def null_indicator(input, params = {})
750
+ into = fields(params[:into] || 'is_null')
751
+ output = params[:output] || all_fields
752
+ set_value input, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, :into => into, :output => output
753
+ end
754
+
755
+ # Given a field and a regex, returns an indicator that is 1 if the string
756
+ # contains at least 1 match and 0 otherwise.
757
+ #
758
+ # Parameters:
759
+ # * <tt>input</tt> field name or names that specifies the fields over which
760
+ # to perform the match.
761
+ # * <tt>pattern</tt> regex to apply to the input.
762
+ #
763
+ # The named options are:
764
+ # * <tt>:into</tt> an output field name, defaulting to 'regex_contains'.
765
+ # * <tt>:output</tt> an array of field names that specifies the fields to
766
+ # retain in the output tuple. Defaults to all_fields.
767
+ def regex_contains(input, pattern, params = {})
768
+ input = fields(input)
769
+ pattern = pattern.to_s # Supports JRuby regexes
770
+ into = fields(params[:into] || 'regex_contains')
771
+ output = params[:output] || all_fields
772
+ set_value input, Java::CascadingOperationRegex::RegexFilter.new(pattern), 1.to_java, 0.to_java, :into => into, :output => output
773
+ end
653
774
  end
654
775
  end
@@ -1,7 +1,3 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  module Cascading
6
2
  class Node
7
3
  attr_accessor :name, :parent, :children, :child_names, :last_child
@@ -1,7 +1,3 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  require 'cascading/base'
6
2
  require 'yaml'
7
3
 
@@ -9,22 +5,35 @@ module Cascading
9
5
  class Cascade < Cascading::Node
10
6
  extend Registerable
11
7
 
12
- attr_reader :mode
8
+ attr_reader :properties, :mode
13
9
 
14
- # Builds a cascade given the specified name. Optionally accepts a :mode
15
- # which will be used as the default mode for all child flows. See
16
- # Cascading::Mode.parse for details.
10
+ # Do not use this constructor directly; instead, use Cascading::cascade to
11
+ # build cascades.
12
+ #
13
+ # Builds a cascade given the specified name. Optionally accepts
14
+ # :properties which will be used as the default properties for all child
15
+ # flows. Properties must be a Ruby Hash with string keys and values and
16
+ # will be copied before being passed into each flow in the cascade. See
17
+ # Cascading::Flow#initialize for details on how flows handle properties.
18
+ # Optionally accepts a :mode which will be used as the default mode for all
19
+ # child flows. See Cascading::Mode.parse for details.
17
20
  def initialize(name, params = {})
21
+ @properties = params[:properties] || {}
18
22
  @mode = params[:mode]
19
23
  super(name, nil) # A Cascade cannot have a parent
20
24
  self.class.add(name, self)
21
25
  end
22
26
 
23
- # Builds a child flow given a name and block. Optionally accepts a :mode,
24
- # which will override the default mode stored in this cascade.
27
+ # Builds a child flow given a name and block. Optionally accepts
28
+ # :properties which will override the default properties stroed in this
29
+ # cascade. Optionally accepts a :mode, which will override the default
30
+ # mode stored in this cascade.
25
31
  def flow(name, params = {}, &block)
26
32
  raise "Could not build flow '#{name}'; block required" unless block_given?
33
+
34
+ params[:properties] ||= properties.dup
27
35
  params[:mode] ||= mode
36
+
28
37
  flow = Flow.new(name, self, params)
29
38
  add_child(flow)
30
39
  flow.instance_eval(&block)
@@ -35,9 +44,9 @@ module Cascading
35
44
  "#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset} ") }.join("\n")}"
36
45
  end
37
46
 
38
- def draw(dir, properties = nil)
47
+ def draw(dir)
39
48
  @children.each do |name, flow|
40
- flow.connect(properties).writeDOT("#{dir}/#{name}.dot")
49
+ flow.connect.writeDOT("#{dir}/#{name}.dot")
41
50
  end
42
51
  end
43
52
 
@@ -54,9 +63,9 @@ module Cascading
54
63
  end
55
64
  end
56
65
 
57
- def complete(properties = nil)
66
+ def complete
58
67
  begin
59
- Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children, properties)).complete
68
+ Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children)).complete
60
69
  rescue NativeException => e
61
70
  raise CascadingException.new(e, 'Error completing cascade')
62
71
  end
@@ -64,9 +73,9 @@ module Cascading
64
73
 
65
74
  private
66
75
 
67
- def make_flows(flows, properties)
76
+ def make_flows(flows)
68
77
  flow_instances = flows.map do |name, flow|
69
- cascading_flow = flow.connect(properties)
78
+ cascading_flow = flow.connect
70
79
  flow.listeners.each { |l| cascading_flow.addListener(l) }
71
80
  cascading_flow
72
81
  end
@@ -1,8 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
3
- #
4
- # This is free software. Please see the LICENSE and COPYING files for details.
5
-
6
1
  require 'cascading/expr_stub'
7
2
 
8
3
  module Cascading
@@ -12,10 +7,31 @@ module Cascading
12
7
  :float => java.lang.Float.java_class, :string => java.lang.String.java_class,
13
8
  }
14
9
 
10
+ # FIXME: I consider $jobconf_properties to be a hack forced on us by the lack
11
+ # of properties handling in earlier versions of the gem. Fully removing the
12
+ # hack would look like introducing a Job abstraction which instantiates user
13
+ # code, and allowing jading's runner to pass properties into that. I've
14
+ # already taken the step to thread properties through cascades and flows
15
+ # rather than merge properties before connect, but we still require the
16
+ # global properties hack to integrate with external runner code (jading).
17
+ #
18
+ # Note that this would also mean we can get rid of the global "registries" of
19
+ # cascades and flows. I've already eliminated most uses of these registries,
20
+ # but they are still required for the runner to find user code required in a
21
+ # previous step. A Job abstraction would clean this up, as well.
22
+ #
23
+ # For now, it is important that people use these constructors rather than
24
+ # directly building their own cascades and flows so that jading can send them
25
+ # default properties.
26
+
15
27
  # Builds a top-level cascade given a name and a block. Optionally accepts a
16
28
  # :mode, as explained in Cascading::Cascade#initialize.
17
29
  def cascade(name, params = {}, &block)
18
30
  raise "Could not build cascade '#{name}'; block required" unless block_given?
31
+ raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if params[:properties]
32
+
33
+ params[:properties] = $jobconf_properties.dup if $jobconf_properties
34
+
19
35
  cascade = Cascade.new(name, params)
20
36
  cascade.instance_eval(&block)
21
37
  cascade
@@ -26,6 +42,10 @@ module Cascading
26
42
  # Cascading::Flow#initialize.
27
43
  def flow(name, params = {}, &block)
28
44
  raise "Could not build flow '#{name}'; block required" unless block_given?
45
+ raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if params[:properties]
46
+
47
+ params[:properties] = $jobconf_properties.dup if $jobconf_properties
48
+
29
49
  flow = Flow.new(name, nil, params)
30
50
  flow.instance_eval(&block)
31
51
  flow
@@ -1,9 +1,3 @@
1
- # ext.rb : some extensions to basic types
2
- #
3
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
4
- #
5
- # This is free software. Please see the LICENSE and COPYING files for details.
6
-
7
1
  class Array
8
2
  def extract_options!
9
3
  last.is_a?(::Hash) ? pop : {}
@@ -12,4 +6,4 @@ class Array
12
6
  def extract_options
13
7
  last.is_a?(::Hash) ? last : {}
14
8
  end
15
- end
9
+ end
@@ -1,22 +1,26 @@
1
- # -*- coding: utf-8 -*-
2
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
3
- #
4
- # This is free software. Please see the LICENSE and COPYING files for details.
5
-
6
1
  require 'cascading/assembly'
7
2
 
8
3
  module Cascading
9
4
  class Flow < Cascading::Node
10
5
  extend Registerable
11
6
 
12
- attr_accessor :properties, :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
13
- attr_reader :mode
7
+ attr_accessor :sources, :sinks, :incoming_scopes, :outgoing_scopes, :listeners
8
+ attr_reader :properties, :mode
14
9
 
10
+ # Do not use this constructor directly. Instead, use Cascading::flow to
11
+ # build top-level flows and Cascade#flow to build flows within a Cascade.
12
+ #
15
13
  # Builds a flow given a name and a parent node (a cascade or nil).
16
- # Optionally accepts a :mode which will determine the execution mode of
17
- # this flow. See Cascading::Mode.parse for details.
14
+ # Optionally accepts :properties which allows external configuration of
15
+ # this flow. The flow will side-effect the properties during composition,
16
+ # then pass the modified properties along to the FlowConnector for
17
+ # execution. See Cascading::Cascade#initialize for details on how
18
+ # properties are propagated through cascades. Optionally accepts a :mode
19
+ # which will determine the execution mode of this flow. See
20
+ # Cascading::Mode.parse for details.
18
21
  def initialize(name, parent, params = {})
19
- @properties, @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, {}, []
22
+ @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
23
+ @properties = params[:properties] || {}
20
24
  @mode = Mode.parse(params[:mode])
21
25
  @flow_scope = Scope.flow_scope(name)
22
26
  super(name, parent)
@@ -128,14 +132,9 @@ module Cascading
128
132
  end
129
133
  end
130
134
 
131
- def connect(properties = nil)
132
- # This ensures we have a hash, and that it is a Ruby Hash (because we
133
- # also accept java.util.HashMap), then merges it with Flow properties
134
- properties ||= {}
135
- properties = java.util.HashMap.new(@properties.merge(Hash[*properties.to_a.flatten]))
136
-
135
+ def connect
137
136
  puts "Connecting flow '#{name}' with properties:"
138
- properties.key_set.to_a.sort.each do |key|
137
+ properties.keys.sort.each do |key|
139
138
  puts "#{key}=#{properties[key]}"
140
139
  end
141
140
 
@@ -149,9 +148,9 @@ module Cascading
149
148
  mode.connect_flow(properties, name, sources, sinks, pipes)
150
149
  end
151
150
 
152
- def complete(properties = nil)
151
+ def complete
153
152
  begin
154
- flow = connect(properties)
153
+ flow = connect
155
154
  @listeners.each { |l| flow.addListener(l) }
156
155
  flow.complete
157
156
  rescue NativeException => e
@@ -41,7 +41,11 @@ module Cascading
41
41
  update_local_mode(sources, sinks)
42
42
  sources = select_taps(sources)
43
43
  sinks = select_taps(sinks)
44
- flow_connector_class.new(properties).connect(name, sources, sinks, pipes)
44
+
45
+ # Report execution mode to stdout before connecting
46
+ puts "Connecting flow '#{name}' in #{local ? 'Cascading local mode' : 'Hadoop mode'}"
47
+
48
+ flow_connector_class.new(java.util.HashMap.new(properties)).connect(name, sources, sinks, pipes)
45
49
  end
46
50
 
47
51
  private
@@ -1,8 +1,15 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  module Cascading
2
+ # The Cascading::Operations module is deprecated. The original idea from long
3
+ # ago is that it would be useful to mixin operator wrappers to places other
4
+ # than Cascading::Assembly, but this is not true. Instead, put Eaches in
5
+ # Cascading::Assembly, Everies in Cascading::Aggregations, and any more
6
+ # generally useful utility code directly in the Cascading module
7
+ # (cascading/cascading.rb).
8
+ #
9
+ # Further, the entire *args pattern should be deprecated as it leads to
10
+ # functions that can only be understood by reading their code. Instead,
11
+ # idiomatic Ruby (positional required params and a params hash for optional
12
+ # args) should be used. See Cascading::Assembly#set_value for an example.
6
13
  module Operations
7
14
  def identity
8
15
  Java::CascadingOperation::Identity.new
data/lib/cascading/tap.rb CHANGED
@@ -9,6 +9,10 @@ module Cascading
9
9
  @hadoop_tap = hadoop_tap
10
10
  end
11
11
 
12
+ def to_s
13
+ "Local: #{local_tap}, Hadoop: #{hadoop_tap}"
14
+ end
15
+
12
16
  def local?
13
17
  !local_tap.nil?
14
18
  end
data/lib/cascading.rb CHANGED
@@ -1,12 +1,8 @@
1
- # Copyright 2009, Grégoire Marabout. All Rights Reserved.
2
- #
3
- # This is free software. Please see the LICENSE and COPYING files for details.
4
-
5
1
  require 'java'
6
2
 
7
3
  module Cascading
8
4
  # :stopdoc:
9
- VERSION = '0.0.9'
5
+ VERSION = '0.0.10'
10
6
  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
11
7
  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
12
8
  CASCADING_HOME = ENV['CASCADING_HOME']