cascading.jruby 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/HACKING.md +15 -0
  2. data/History.txt +0 -0
  3. data/LICENSE.txt +165 -0
  4. data/README.md +7 -0
  5. data/Rakefile +45 -0
  6. data/bin/make_job +81 -0
  7. data/lib/cascading/assembly.rb +726 -0
  8. data/lib/cascading/base.rb +63 -0
  9. data/lib/cascading/cascade.rb +63 -0
  10. data/lib/cascading/cascading.rb +134 -0
  11. data/lib/cascading/cascading_exception.rb +30 -0
  12. data/lib/cascading/expr_stub.rb +33 -0
  13. data/lib/cascading/ext/array.rb +15 -0
  14. data/lib/cascading/flow.rb +168 -0
  15. data/lib/cascading/operations.rb +204 -0
  16. data/lib/cascading/scope.rb +160 -0
  17. data/lib/cascading.rb +63 -0
  18. data/samples/branch.rb +31 -0
  19. data/samples/cascading.rb +41 -0
  20. data/samples/copy.rb +18 -0
  21. data/samples/data/data2.txt +88799 -0
  22. data/samples/data/data_join1.txt +3 -0
  23. data/samples/data/data_join2.txt +3 -0
  24. data/samples/data/data_join3.txt +3 -0
  25. data/samples/join.rb +32 -0
  26. data/samples/logwordcount.rb +22 -0
  27. data/samples/project.rb +24 -0
  28. data/samples/rename.rb +21 -0
  29. data/samples/scorenames.rb +20 -0
  30. data/samples/splitter.rb +20 -0
  31. data/samples/union.rb +35 -0
  32. data/spec/cascading_spec.rb +100 -0
  33. data/spec/expr_spec.rb +10 -0
  34. data/spec/primary_key_spec.rb +119 -0
  35. data/spec/resource/join_input.txt +3 -0
  36. data/spec/resource/test_input.txt +4 -0
  37. data/spec/scope_spec.rb +174 -0
  38. data/spec/spec.opts +6 -0
  39. data/spec/spec_helper.rb +5 -0
  40. data/spec/spec_util.rb +188 -0
  41. data/src/cascading/jruby/Main.java +38 -0
  42. data/src/cascading/jruby/runner.rb +6 -0
  43. data/tags +238 -0
  44. data/tasks/ann.rake +80 -0
  45. data/tasks/ant.rake +11 -0
  46. data/tasks/bones.rake +20 -0
  47. data/tasks/gem.rake +206 -0
  48. data/tasks/git.rake +40 -0
  49. data/tasks/notes.rake +27 -0
  50. data/tasks/post_load.rake +34 -0
  51. data/tasks/rdoc.rake +50 -0
  52. data/tasks/rubyforge.rake +55 -0
  53. data/tasks/samples.rake +13 -0
  54. data/tasks/setup.rb +300 -0
  55. data/tasks/spec.rake +59 -0
  56. data/tasks/svn.rake +47 -0
  57. data/tasks/test.rake +42 -0
  58. data/test/data/data1.txt +14 -0
  59. data/test/data/data2.txt +14 -0
  60. data/test/test_assembly.rb +321 -0
  61. data/test/test_cascading.rb +49 -0
  62. data/test/test_flow.rb +15 -0
  63. metadata +137 -0
data/spec/spec_util.rb ADDED
@@ -0,0 +1,188 @@
1
+ OUTPUT_DIR = 'output'
2
+ BUILD_DIR = 'build/spec'
3
+
4
+ module ScopeTests
5
+ def check_scope(params = {})
6
+ name_params = [params[:source]].compact
7
+ scope = scope(*name_params)
8
+ values_fields = params[:values_fields]
9
+ grouping_fields = params[:grouping_fields] || values_fields
10
+ primary_key_fields = params[:primary_key_fields]
11
+ grouping_primary_key_fields = primary_key_fields
12
+ grouping_primary_key_fields = params[:grouping_primary_key_fields] if params.has_key?(:grouping_primary_key_fields)
13
+
14
+ debug = params[:debug]
15
+ debug_scope(*name_params) if debug
16
+
17
+ scope.values_fields.to_a.should == values_fields
18
+ scope.grouping_fields.to_a.should == grouping_fields
19
+ if params.has_key?(:primary_key_fields) # Must support nil values
20
+ scope.primary_key_fields.should == nil if primary_key_fields.nil?
21
+ scope.primary_key_fields.to_a.should == primary_key_fields unless primary_key_fields.nil?
22
+
23
+ scope.grouping_primary_key_fields.should == nil if grouping_primary_key_fields.nil?
24
+ scope.grouping_primary_key_fields.to_a.should == grouping_primary_key_fields unless grouping_primary_key_fields.nil?
25
+ end
26
+ end
27
+ end
28
+
29
+ module Cascading
30
+ class Flow; include ScopeTests; end
31
+ class Assembly; include ScopeTests; end
32
+ end
33
+
34
+ def test_flow(&block)
35
+ cascade = cascade 'test_app' do
36
+ flow 'test', &block
37
+ end
38
+ cascade.complete(cascading_properties)
39
+ end
40
+
41
+ def test_assembly(params = {}, &block)
42
+ branches = params[:branches] || []
43
+
44
+ test_flow do
45
+ source 'input', tap('spec/resource/test_input.txt', :kind => :lfs, :scheme => text_line_scheme)
46
+
47
+ # Default Fields defined by TextLineScheme
48
+ check_scope :source => 'input', :values_fields => ['offset', 'line']
49
+
50
+ assembly 'input', &block
51
+
52
+ sink 'input', tap("#{OUTPUT_DIR}/out.txt", :kind => :lfs, :sink_mode => :replace)
53
+
54
+ # Branches must be sunk so that they (and their assertions) will be run
55
+ branches.each do |branch|
56
+ sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :kind => :lfs, :sink_mode => :replace)
57
+ end
58
+ end
59
+ end
60
+
61
+ def test_join_assembly(params = {}, &block)
62
+ branches = params[:branches] || []
63
+
64
+ test_flow do
65
+ source 'left', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
66
+ source 'right', tap('spec/resource/join_input.txt', :kind => :lfs, :scheme => text_line_scheme)
67
+
68
+ # Default Fields defined by TextLineScheme
69
+ check_scope :source => 'left', :values_fields => ['offset', 'line']
70
+ check_scope :source => 'right', :values_fields => ['offset', 'line']
71
+
72
+ assembly 'left' do
73
+ check_scope :values_fields => ['offset', 'line']
74
+ split 'line', ['x', 'y', 'z'], :pattern => /,/
75
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
76
+ end
77
+
78
+ assembly 'right' do
79
+ check_scope :values_fields => ['offset', 'line']
80
+ split 'line', ['x', 'y', 'z'], :pattern => /,/
81
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
82
+ end
83
+
84
+ assembly 'join' do
85
+ # Empty scope because there is no 'join' source or assembly
86
+ check_scope :values_fields => []
87
+
88
+ left_join 'left', 'right', :on => ['x']
89
+ check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
90
+ :grouping_fields => ['x']
91
+
92
+ instance_eval(&block)
93
+ end
94
+
95
+ sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :kind => :lfs, :sink_mode => :replace)
96
+
97
+ # Branches must be sunk so that they (and their assertions) will be run
98
+ branches.each do |branch|
99
+ sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :kind => :lfs, :sink_mode => :replace)
100
+ end
101
+ end
102
+ end
103
+
104
+ def cascading_properties
105
+ build_dir = "#{BUILD_DIR}/build"
106
+ `mkdir -p #{build_dir}`
107
+ tmp_dir = "#{BUILD_DIR}/tmp"
108
+ `mkdir -p #{tmp_dir}`
109
+ log_dir = "#{BUILD_DIR}/log"
110
+ `mkdir -p #{log_dir}`
111
+
112
+ # Local cluster settings
113
+ #java.lang.System.set_property("test.build.data", build_dir)
114
+ #java.lang.System.set_property("hadoop.tmp.dir", tmp_dir)
115
+ #java.lang.System.set_property("hadoop.log.dir", log_dir)
116
+ #conf = Java::OrgApacheHadoopConf::Configuration.new
117
+ #dfs = Java::OrgApacheHadoopDfs::MiniDFSCluster.new(conf, 4, true, nil);
118
+ #file_sys = dfs.file_system
119
+ #mr = Java::OrgApacheHadoopMapred::MiniMRCluster.new(4, file_sys.uri.to_string, 1)
120
+ #job_conf = mr.create_job_conf
121
+ #job_conf.set("mapred.child.java.opts", "-Xmx512m")
122
+ #job_conf.set("mapred.map.tasks.speculative.execution", "false")
123
+ #job_conf.set("mapred.reduce.tasks.speculative.execution", "false")
124
+
125
+ job_conf = Java::OrgApacheHadoopMapred::JobConf.new
126
+ job_conf.jar = build_dir
127
+ job_conf.set("test.build.data", build_dir)
128
+ job_conf.set("hadoop.tmp.dir", tmp_dir)
129
+ job_conf.set("hadoop.log.dir", log_dir)
130
+
131
+ job_conf.num_map_tasks = 4
132
+ job_conf.num_reduce_tasks = 1
133
+
134
+ properties = java.util.HashMap.new({})
135
+ Java::CascadingFlow::MultiMapReducePlanner.set_job_conf(properties, job_conf)
136
+ properties
137
+ end
138
+
139
+ def verify_assembly_output(assembly_name, params, &block)
140
+ `rm -rf spec_output`
141
+
142
+ Cascade.new("foo") do
143
+ flow("bar") do
144
+ source assembly_name, tap(params[:source], params.slice(:scheme))
145
+ assembly = assembly(assembly_name)
146
+ sink assembly_name, tap("spec_output", :kind => :lfs, :sink_mode => :replace)
147
+ end
148
+ end.complete(@properties)
149
+
150
+ output_data = nil
151
+
152
+ File.open("spec_output/part-00000") do |f|
153
+ output_data = f.readlines
154
+ end
155
+
156
+ if params[:length]
157
+ output_data.size.should == params[:length]
158
+ end
159
+
160
+ keys = assembly.scope.values_fields
161
+ if block_given?
162
+ output_data.each do |line|
163
+ values = line.chomp.split(/\t/)
164
+
165
+ yield(keys.zip(values).inject({}) do |map, kv|
166
+ map[kv[0].to_sym] = kv[1]
167
+ map
168
+ end)
169
+ end
170
+ end
171
+ end
172
+
173
+ def describe_job(job_file, &block)
174
+ describe Object do
175
+ before(:each) do
176
+ @properties = cascading_properties
177
+ # Must artificially fill ARGV to prevent errors when creating multi-taps
178
+ # in ETL cascade
179
+ ARGV.clear
180
+ 10.times do
181
+ ARGV << 'text_line_scheme' # Dummy value, required for 3rd arg
182
+ end
183
+ load "lib/jobs/#{job_file}/#{job_file}.rb"
184
+ end
185
+
186
+ self.class_eval(&block)
187
+ end
188
+ end
@@ -0,0 +1,38 @@
1
+ package cascading.jruby;
2
+
3
+ import org.jruby.Ruby;
4
+ import org.jruby.RubyInstanceConfig;
5
+
6
+ public class Main {
7
+ private final static String JRUBY_HOME = "/opt/jruby";
8
+
9
+ /**
10
+ * Starts a Hadoop job by reading the specified JRuby script.
11
+ *
12
+ * @param args
13
+ */
14
+ public static void main(String[] args) {
15
+ String name = args[0]; // c.j script name
16
+ if (!name.startsWith("/"))
17
+ name = "/" + name;
18
+
19
+ // c.j script args
20
+ String[] newArgs = new String[args.length - 1];
21
+ System.arraycopy(args, 1, newArgs, 0, args.length - 1);
22
+ RubyInstanceConfig config = new RubyInstanceConfig();
23
+ config.setJRubyHome(JRUBY_HOME); // mwalker
24
+ config.processArguments(newArgs);
25
+
26
+ System.out.println("Arguments: ");
27
+ for (String arg : config.getArgv())
28
+ System.out.println(arg);
29
+
30
+ Ruby runtime = Ruby.newInstance(config);
31
+
32
+ System.out.println("Requiring '" + name + "'");
33
+ runtime.executeScript("require '" + name + "'", name);
34
+
35
+ System.out.println("Requiring 'cascading/jruby/runner'");
36
+ runtime.executeScript("require 'cascading/jruby/runner'", "runner"); // gfodor
37
+ }
38
+ }
@@ -0,0 +1,6 @@
1
+ puts "Found #{Cascading::Cascade.all.size} Cascades in global registry"
2
+
3
+ Cascading::Cascade.all.each do |cascade|
4
+ puts "runner.rb running '#{cascade.name}' Cascade"
5
+ cascade.complete
6
+ end
data/tags ADDED
@@ -0,0 +1,238 @@
1
+ !_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
2
+ !_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
3
+ !_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/
4
+ !_TAG_PROGRAM_NAME Exuberant Ctags //
5
+ !_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
6
+ !_TAG_PROGRAM_VERSION 5.8 //
7
+ Array lib/cascading/ext/array.rb /^class Array$/;" c
8
+ Assembly lib/cascading/assembly.rb /^ class Assembly < Cascading::Node$/;" c class:Cascading
9
+ Assembly spec/spec_util.rb /^ class Assembly; include ScopeTests; end$/;" c class:Cascading
10
+ Cascade lib/cascading/cascade.rb /^ class Cascade < Cascading::Node$/;" c class:Cascading
11
+ Cascading lib/cascading.rb /^module Cascading$/;" m
12
+ Cascading lib/cascading/assembly.rb /^module Cascading$/;" m
13
+ Cascading lib/cascading/base.rb /^module Cascading$/;" m
14
+ Cascading lib/cascading/cascade.rb /^module Cascading$/;" m
15
+ Cascading lib/cascading/cascading.rb /^module Cascading$/;" m
16
+ Cascading lib/cascading/flow.rb /^module Cascading$/;" m
17
+ Cascading lib/cascading/operations.rb /^module Cascading$/;" m
18
+ Cascading lib/cascading/scope.rb /^module Cascading$/;" m
19
+ Cascading samples/cascading.rb /^module Cascading$/;" m
20
+ Cascading spec/spec_util.rb /^module Cascading$/;" m
21
+ CascadingException lib/cascading/cascading_exception.rb /^class CascadingException < StandardError$/;" c
22
+ ExprStub lib/cascading/expr_stub.rb /^class ExprStub$/;" c
23
+ Flow lib/cascading/flow.rb /^ class Flow < Cascading::Node$/;" c class:Cascading
24
+ Flow spec/spec_util.rb /^ class Flow; include ScopeTests; end$/;" c class:Cascading
25
+ JRUBY_HOME src/cascading/jruby/Main.java /^ private final static String JRUBY_HOME = "\/opt\/jruby";$/;" f class:Main file:
26
+ Main src/cascading/jruby/Main.java /^public class Main {$/;" c
27
+ Node lib/cascading/base.rb /^ class Node$/;" c class:Cascading
28
+ Object tasks/setup.rb /^class Object$/;" c
29
+ OpenStruct tasks/setup.rb /^class OpenStruct; undef :gem; end$/;" c
30
+ Operations lib/cascading/operations.rb /^ module Operations$/;" m class:Cascading
31
+ Registerable lib/cascading/base.rb /^ module Registerable$/;" m class:Cascading
32
+ Scope lib/cascading/scope.rb /^ class Scope$/;" c class:Cascading
33
+ ScopeTests spec/spec_util.rb /^module ScopeTests$/;" m
34
+ TC_Assembly test/test_assembly.rb /^class TC_Assembly < Test::Unit::TestCase$/;" c
35
+ TC_AssemblyScenarii test/test_assembly.rb /^class TC_AssemblyScenarii < Test::Unit::TestCase$/;" c
36
+ TC_Cascading test/test_cascading.rb /^class TC_Cascading < Test::Unit::TestCase$/;" c
37
+ TC_Flow test/test_flow.rb /^class TC_Flow < Test::Unit::TestCase$/;" c
38
+ add lib/cascading/base.rb /^ def add(name, instance)$/;" f class:Cascading.Registerable
39
+ add_archive_to_distributed_cache lib/cascading/flow.rb /^ def add_archive_to_distributed_cache(file)$/;" f
40
+ add_child lib/cascading/base.rb /^ def add_child(node)$/;" f class:Cascading.Node
41
+ add_file_to_distributed_cache lib/cascading/flow.rb /^ def add_file_to_distributed_cache(file)$/;" f
42
+ add_listener lib/cascading/flow.rb /^ def add_listener(listener)$/;" f
43
+ add_to_distributed_cache lib/cascading/flow.rb /^ def add_to_distributed_cache(file, property)$/;" f
44
+ aggregator_function lib/cascading/operations.rb /^ def aggregator_function(args, aggregator_klass)$/;" f class:Cascading.Operations
45
+ all lib/cascading/base.rb /^ def all$/;" f class:Cascading.Registerable
46
+ all_fields lib/cascading/cascading.rb /^ def all_fields$/;" f class:Cascading
47
+ assembly lib/cascading/flow.rb /^ def assembly(name, &block)$/;" f class:Cascading.Flow
48
+ assembly test/test_assembly.rb /^def assembly(name, &block)$/;" f
49
+ assert lib/cascading/assembly.rb /^ def assert(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
50
+ assert_group lib/cascading/assembly.rb /^ def assert_group(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
51
+ assert_group_size_equals lib/cascading/assembly.rb /^ def assert_group_size_equals(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
52
+ assert_not_null lib/cascading/assembly.rb /^ def assert_not_null(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
53
+ assert_size_equals lib/cascading/assembly.rb /^ def assert_size_equals(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
54
+ average lib/cascading/assembly.rb /^ def average(*args); composite_aggregator(args, :average_function); end$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
55
+ average_function lib/cascading/operations.rb /^ def average_function(*args)$/;" f class:Cascading.Operations
56
+ bind_names lib/cascading/assembly.rb /^ def bind_names(*new_names)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
57
+ branch lib/cascading/assembly.rb /^ def branch(name, &block)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
58
+ build_connect_parameter lib/cascading/flow.rb /^ def build_connect_parameter$/;" f
59
+ cascade lib/cascading/cascading.rb /^ def cascade(name, &block)$/;" f class:Cascading
60
+ cascading.jruby src/cascading/jruby/Main.java /^package cascading.jruby;$/;" p
61
+ cascading_properties spec/spec_util.rb /^def cascading_properties$/;" f
62
+ cast lib/cascading/assembly.rb /^ def cast(type_map)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
63
+ cause lib/cascading/cascading_exception.rb /^ def cause(depth)$/;" f class:CascadingException
64
+ check_scope spec/spec_util.rb /^ def check_scope(params = {})$/;" f class:ScopeTests
65
+ compare_with_references test/test_assembly.rb /^def compare_with_references(test_name)$/;" f
66
+ complete lib/cascading/cascade.rb /^ def complete(properties = nil)$/;" f class:Cascading.Cascade
67
+ complete lib/cascading/flow.rb /^ def complete(properties = nil)$/;" f
68
+ composite_aggregator lib/cascading/assembly.rb /^ def composite_aggregator(args, function)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
69
+ compress_output lib/cascading/flow.rb /^ def compress_output(codec, type)$/;" f class:Cascading.Flow
70
+ connect lib/cascading/flow.rb /^ def connect(properties = nil)$/;" f
71
+ copy lib/cascading/assembly.rb /^ def copy(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
72
+ copy lib/cascading/scope.rb /^ def copy$/;" f class:Cascading.Scope
73
+ copy_fields lib/cascading/cascading.rb /^ def copy_fields(fields)$/;" f class:Cascading
74
+ count lib/cascading/assembly.rb /^ def count(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
75
+ count_function lib/cascading/operations.rb /^ def count_function(*args)$/;" f class:Cascading.Operations
76
+ date_formatter lib/cascading/operations.rb /^ def date_formatter(fields, format, timezone=nil)$/;" f class:Cascading.Operations.to_java_comparable_array
77
+ date_parser lib/cascading/operations.rb /^ def date_parser(field, format)$/;" f class:Cascading.Operations.to_java_comparable_array
78
+ debug lib/cascading/assembly.rb /^ def debug(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
79
+ debug_scope lib/cascading/assembly.rb /^ def debug_scope$/;" f class:Cascading.Assembly
80
+ debug_scope lib/cascading/flow.rb /^ def debug_scope(name = nil)$/;" f class:Cascading.Flow
81
+ dedup_field_names lib/cascading/cascading.rb /^ def dedup_field_names(*names)$/;" f class:Cascading
82
+ dedup_fields lib/cascading/cascading.rb /^ def dedup_fields(*fields)$/;" f class:Cascading
83
+ depend_on tasks/setup.rb /^def depend_on( name, version = nil )$/;" f
84
+ describe_job spec/spec_util.rb /^def describe_job(job_file, &block)$/;" f
85
+ difference_fields lib/cascading/cascading.rb /^ def difference_fields(*fields)$/;" f class:Cascading
86
+ discard lib/cascading/assembly.rb /^ def discard(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
87
+ distinct lib/cascading/assembly.rb /^ def distinct(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
88
+ do_every_block_and_rename_fields lib/cascading/assembly.rb /^ def do_every_block_and_rename_fields(group_fields, incoming_scopes, &block)$/;" f class:Cascading.Assembly
89
+ draw lib/cascading/cascade.rb /^ def draw(dir, properties = nil)$/;" f class:Cascading.Cascade
90
+ each lib/cascading/assembly.rb /^ def each(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
91
+ empty_scope lib/cascading/scope.rb /^ def self.empty_scope(name)$/;" F class:Cascading.Scope
92
+ emr_local_path_for_distributed_cache_file lib/cascading/flow.rb /^ def emr_local_path_for_distributed_cache_file(file)$/;" f
93
+ ensure_in_path tasks/setup.rb /^def ensure_in_path( *args )$/;" f
94
+ eval_expression lib/cascading/assembly.rb /^ def eval_expression(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
95
+ every lib/cascading/assembly.rb /^ def every(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
96
+ every_applied? lib/cascading/assembly.rb /^ def every_applied?$/;" f class:Cascading.Assembly
97
+ expr lib/cascading/cascading.rb /^ def expr(s)$/;" f class:Cascading
98
+ expression_filter lib/cascading/operations.rb /^ def expression_filter(*args)$/;" f class:Cascading.Operations.to_java_comparable_array
99
+ expression_function lib/cascading/operations.rb /^ def expression_function(*args)$/;" f class:Cascading.Operations
100
+ extract_options lib/cascading/ext/array.rb /^ def extract_options$/;" f class:Array
101
+ extract_options! lib/cascading/ext/array.rb /^ def extract_options!$/;" f class:Array
102
+ fetch_cause lib/cascading/cascading_exception.rb /^ def fetch_cause(ne, depth)$/;" f class:CascadingException
103
+ field_joiner lib/cascading/operations.rb /^ def field_joiner(*args)$/;" f class:Cascading.Operations.to_java_comparable_array
104
+ fields lib/cascading/cascading.rb /^ def fields(fields)$/;" f class:Cascading
105
+ filter lib/cascading/assembly.rb /^ def filter(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
106
+ filter_not_null lib/cascading/assembly.rb /^ def filter_not_null(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
107
+ filter_null lib/cascading/assembly.rb /^ def filter_null(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
108
+ find_child lib/cascading/base.rb /^ def find_child(name)$/;" f class:Cascading.Node
109
+ first lib/cascading/assembly.rb /^ def first(*args); composite_aggregator(args, :first_function); end$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
110
+ first_function lib/cascading/operations.rb /^ def first_function(*args)$/;" f class:Cascading.Operations
111
+ flow lib/cascading/cascade.rb /^ def flow(name, &block)$/;" f class:Cascading.Cascade
112
+ flow lib/cascading/cascading.rb /^ def flow(name, &block)$/;" f class:Cascading
113
+ format_date lib/cascading/assembly.rb /^ def format_date(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
114
+ get lib/cascading/base.rb /^ def get(key)$/;" f class:Cascading.Registerable
115
+ group_by lib/cascading/assembly.rb /^ def group_by(*args, &block)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
116
+ grouping_fields lib/cascading/scope.rb /^ def grouping_fields$/;" f class:Cascading.Scope
117
+ grouping_primary_key_fields lib/cascading/scope.rb /^ def self.grouping_primary_key_fields(flow_element, incoming_scopes, scope)$/;" F class:Cascading.Scope
118
+ identity lib/cascading/operations.rb /^ def identity$/;" f class:Cascading.Operations
119
+ in_directory tasks/setup.rb /^def in_directory( dir, &block )$/;" f
120
+ initialize lib/cascading/assembly.rb /^ def initialize(name, parent, outgoing_scopes = {})$/;" f class:Cascading.Assembly
121
+ initialize lib/cascading/base.rb /^ def initialize(name, parent)$/;" f class:Cascading.Node
122
+ initialize lib/cascading/cascade.rb /^ def initialize(name)$/;" f class:Cascading.Cascade
123
+ initialize lib/cascading/cascading_exception.rb /^ def initialize(native_exception, message)$/;" f class:CascadingException
124
+ initialize lib/cascading/expr_stub.rb /^ def initialize(st)$/;" f class:ExprStub
125
+ initialize lib/cascading/flow.rb /^ def initialize(name, parent)$/;" f class:Cascading.Flow
126
+ initialize lib/cascading/scope.rb /^ def initialize(scope, params = {})$/;" f class:Cascading.Scope
127
+ inner_join lib/cascading/assembly.rb /^ def inner_join(*args, &block)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
128
+ insert lib/cascading/assembly.rb /^ def insert(args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
129
+ insert_function lib/cascading/operations.rb /^ def insert_function(*args)$/;" f class:Cascading.Operations
130
+ join lib/cascading/assembly.rb /^ def join(*args, &block)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
131
+ join_fields lib/cascading/assembly.rb /^ def join_fields(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
132
+ last lib/cascading/assembly.rb /^ def last(*args); composite_aggregator(args, :last_function); end$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
133
+ last_function lib/cascading/operations.rb /^ def last_function(*args)$/;" f class:Cascading.Operations
134
+ last_grouping_fields lib/cascading/cascading.rb /^ def last_grouping_fields$/;" f class:Cascading
135
+ left_join lib/cascading/assembly.rb /^ def left_join(*args, &block)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
136
+ libpath lib/cascading.rb /^ def self.libpath( *args )$/;" F class:Cascading
137
+ main src/cascading/jruby/Main.java /^ public static void main(String[] args) {$/;" m class:Main
138
+ make_each lib/cascading/assembly.rb /^ def make_each(type, *parameters)$/;" f class:Cascading.Assembly
139
+ make_every lib/cascading/assembly.rb /^ def make_every(type, *parameters)$/;" f class:Cascading.Assembly
140
+ make_flows lib/cascading/cascade.rb /^ def make_flows(flows, properties)$/;" f class:Cascading.Cascade
141
+ make_pipe lib/cascading/assembly.rb /^ def make_pipe(type, parameters, grouping_key_fields = [], incoming_scopes = [scope])$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
142
+ make_pipes lib/cascading/flow.rb /^ def make_pipes$/;" f
143
+ make_tap_parameter lib/cascading/flow.rb /^ def make_tap_parameter(taps)$/;" f
144
+ manifest tasks/setup.rb /^def manifest$/;" f
145
+ match_rows lib/cascading/assembly.rb /^ def match_rows(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
146
+ max lib/cascading/assembly.rb /^ def max(*args); composite_aggregator(args, :max_function); end$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
147
+ max_function lib/cascading/operations.rb /^ def max_function(*args)$/;" f class:Cascading.Operations
148
+ min lib/cascading/assembly.rb /^ def min(*args); composite_aggregator(args, :min_function); end$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
149
+ min_function lib/cascading/operations.rb /^ def min_function(*args)$/;" f class:Cascading.Operations
150
+ mock_assembly test/test_assembly.rb /^ def mock_assembly(&block)$/;" f class:TC_Assembly
151
+ multi_tap lib/cascading/cascading.rb /^ def multi_tap(*taps)$/;" f class:Cascading
152
+ outer_join lib/cascading/assembly.rb /^ def outer_join(*args, &block)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
153
+ outgoing_scope lib/cascading/scope.rb /^ def self.outgoing_scope(flow_element, incoming_scopes, grouping_key_fields, every_applied)$/;" F class:Cascading.Scope
154
+ outgoing_scope_for lib/cascading/scope.rb /^ def self.outgoing_scope_for(flow_element, incoming_scopes)$/;" F class:Cascading.Scope
155
+ paragraphs_of tasks/setup.rb /^def paragraphs_of( path, *paragraphs )$/;" f
156
+ parent_flow lib/cascading/assembly.rb /^ def parent_flow$/;" f class:Cascading.Assembly
157
+ parse lib/cascading/assembly.rb /^ def parse(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
158
+ parse_date lib/cascading/assembly.rb /^ def parse_date(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
159
+ pass lib/cascading/assembly.rb /^ def pass(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
160
+ path lib/cascading.rb /^ def self.path( *args )$/;" F class:Cascading
161
+ primary lib/cascading/assembly.rb /^ def primary(*args)$/;" f class:Cascading.Assembly
162
+ primary_key_fields lib/cascading/scope.rb /^ def self.primary_key_fields(flow_element, incoming_scopes, scope)$/;" F class:Cascading.Scope
163
+ project lib/cascading/assembly.rb /^ def project(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
164
+ project_primary_key lib/cascading/scope.rb /^ def self.project_primary_key(primary_key, old_fields, new_fields)$/;" F class:Cascading.Scope
165
+ quiet tasks/setup.rb /^def quiet( &block )$/;" f
166
+ regex_filter lib/cascading/operations.rb /^ def regex_filter(*args)$/;" f class:Cascading.Operations.to_java_comparable_array
167
+ regex_generator lib/cascading/operations.rb /^ def regex_generator(*args)$/;" f class:Cascading.Operations
168
+ regex_parser lib/cascading/operations.rb /^ def regex_parser(*args)$/;" f class:Cascading.Operations
169
+ regex_replace lib/cascading/operations.rb /^ def regex_replace(*args)$/;" f class:Cascading.Operations.to_java_comparable_array
170
+ regex_split_generator lib/cascading/operations.rb /^ def regex_split_generator(*args)$/;" f class:Cascading.Operations
171
+ regex_splitter lib/cascading/operations.rb /^ def regex_splitter(*args)$/;" f class:Cascading.Operations
172
+ register_scheme_key lib/cascading/scope.rb /^ def self.register_scheme_key(scheme, primary_key)$/;" F class:Cascading.Scope
173
+ registered lib/cascading/base.rb /^ def registered$/;" f class:Cascading.Registerable
174
+ reject lib/cascading/assembly.rb /^ def reject(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
175
+ remove_desc_for_task tasks/setup.rb /^def remove_desc_for_task( names )$/;" f
176
+ rename lib/cascading/assembly.rb /^ def rename(name_map)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
177
+ replace lib/cascading/assembly.rb /^ def replace(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
178
+ require_all_jars lib/cascading.rb /^ def self.require_all_jars(from = ::File.join(::File.dirname(__FILE__), "..", "jars"))$/;" F class:Cascading
179
+ require_all_libs_relative_to lib/cascading.rb /^ def self.require_all_libs_relative_to( fname, dir = nil )$/;" F class:Cascading
180
+ reset lib/cascading/base.rb /^ def reset$/;" f class:Cascading.Registerable
181
+ results_fields lib/cascading/cascading.rb /^ def results_fields$/;" f class:Cascading
182
+ right_join lib/cascading/assembly.rb /^ def right_join(*args, &block)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
183
+ sample_properties samples/cascading.rb /^ def sample_properties$/;" f class:Cascading
184
+ scope lib/cascading/assembly.rb /^ def scope$/;" f class:Cascading.Assembly
185
+ scope lib/cascading/flow.rb /^ def scope(name = nil)$/;" f class:Cascading.Flow
186
+ search_field_name lib/cascading/cascading.rb /^ def search_field_name(names, candidate)$/;" f class:Cascading
187
+ sequence_file_scheme lib/cascading/cascading.rb /^ def sequence_file_scheme(*fields)$/;" f class:Cascading
188
+ set_spill_threshold lib/cascading/flow.rb /^ def set_spill_threshold(threshold)$/;" f
189
+ sink lib/cascading/flow.rb /^ def sink(*args)$/;" f class:Cascading.Flow
190
+ sink_metadata lib/cascading/cascade.rb /^ def sink_metadata$/;" f class:Cascading.Cascade
191
+ sink_metadata lib/cascading/flow.rb /^ def sink_metadata$/;" f class:Cascading.Flow
192
+ source lib/cascading/flow.rb /^ def source(*args)$/;" f class:Cascading.Flow
193
+ split lib/cascading/assembly.rb /^ def split(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
194
+ split_hash lib/cascading/expr_stub.rb /^ def self.split_hash(h)$/;" F
195
+ split_names_and_types lib/cascading/expr_stub.rb /^ def self.split_names_and_types(expr_types)$/;" F
196
+ split_rows lib/cascading/assembly.rb /^ def split_rows(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
197
+ sum lib/cascading/assembly.rb /^ def sum(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
198
+ sum_function lib/cascading/operations.rb /^ def sum_function(*args)$/;" f class:Cascading.Operations
199
+ tap lib/cascading/cascading.rb /^ def tap(*args)$/;" f class:Cascading
200
+ tap_scope lib/cascading/scope.rb /^ def self.tap_scope(tap, name)$/;" F class:Cascading.Scope
201
+ test_assembly spec/spec_util.rb /^def test_assembly(params = {}, &block)$/;" f
202
+ test_assembly test/test_flow.rb /^ def test_assembly$/;" f class:TC_Flow
203
+ test_branch_empty test/test_assembly.rb /^ def test_branch_empty$/;" f
204
+ test_branch_single test/test_assembly.rb /^ def test_branch_single$/;" f
205
+ test_branch_unique test/test_assembly.rb /^ def test_branch_unique$/;" f
206
+ test_create_assembly_simple test/test_assembly.rb /^ def test_create_assembly_simple$/;" f class:TC_Assembly
207
+ test_create_each test/test_assembly.rb /^ def test_create_each$/;" f
208
+ test_create_every test/test_assembly.rb /^ def test_create_every$/;" f
209
+ test_create_group_by test/test_assembly.rb /^ def test_create_group_by$/;" f
210
+ test_create_group_by_many_fields test/test_assembly.rb /^ def test_create_group_by_many_fields$/;" f
211
+ test_create_group_by_reverse test/test_assembly.rb /^ def test_create_group_by_reverse$/;" f
212
+ test_create_group_by_with_sort test/test_assembly.rb /^ def test_create_group_by_with_sort$/;" f
213
+ test_create_group_by_with_sort_reverse test/test_assembly.rb /^ def test_create_group_by_with_sort_reverse$/;" f
214
+ test_each_identity test/test_assembly.rb /^ def test_each_identity$/;" f
215
+ test_fields_field test/test_cascading.rb /^ def test_fields_field$/;" f class:TC_Cascading
216
+ test_fields_multiple test/test_cascading.rb /^ def test_fields_multiple$/;" f class:TC_Cascading
217
+ test_fields_single test/test_cascading.rb /^ def test_fields_single$/;" f class:TC_Cascading
218
+ test_flow spec/spec_util.rb /^def test_flow(&block)$/;" f
219
+ test_full_assembly test/test_assembly.rb /^ def test_full_assembly$/;" f
220
+ test_join1 test/test_assembly.rb /^ def test_join1$/;" f
221
+ test_join2 test/test_assembly.rb /^ def test_join2$/;" f
222
+ test_join_assembly spec/spec_util.rb /^def test_join_assembly(params = {}, &block)$/;" f
223
+ test_splitter test/test_assembly.rb /^ def test_splitter$/;" f class:TC_AssemblyScenarii
224
+ test_tap test/test_cascading.rb /^ def test_tap$/;" f class:TC_Cascading
225
+ text_line_scheme lib/cascading/cascading.rb /^ def text_line_scheme(*fields)$/;" f class:Cascading
226
+ to_java_comparable_array lib/cascading/operations.rb /^ def to_java_comparable_array(arr)$/;" f class:Cascading.Operations
227
+ to_s lib/cascading/assembly.rb /^ def to_s$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
228
+ to_s lib/cascading/scope.rb /^ def to_s$/;" f class:Cascading.Scope
229
+ trace_causes lib/cascading/cascading_exception.rb /^ def trace_causes(ne, depth)$/;" f class:CascadingException
230
+ union lib/cascading/assembly.rb /^ def union(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
231
+ union_fields lib/cascading/cascading.rb /^ def union_fields(*fields)$/;" f class:Cascading
232
+ union_pipes lib/cascading/assembly.rb /^ def union_pipes(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
233
+ valid? tasks/setup.rb /^ def valid?$/;" f class:Object
234
+ values_fields lib/cascading/scope.rb /^ def values_fields$/;" f class:Cascading.Scope
235
+ verify_assembly_output spec/spec_util.rb /^def verify_assembly_output(assembly_name, params, &block)$/;" f
236
+ version lib/cascading.rb /^ def self.version$/;" F class:Cascading
237
+ where lib/cascading/assembly.rb /^ def where(*args)$/;" f class:Cascading.Assembly.do_every_block_and_rename_fields
238
+ write_sink_metadata lib/cascading/cascade.rb /^ def write_sink_metadata(file_name)$/;" f class:Cascading.Cascade
data/tasks/ann.rake ADDED
@@ -0,0 +1,80 @@
1
+
2
+ begin
3
+ require 'bones/smtp_tls'
4
+ rescue LoadError
5
+ require 'net/smtp'
6
+ end
7
+ require 'time'
8
+
9
+ namespace :ann do
10
+
11
+ # A prerequisites task that all other tasks depend upon
12
+ task :prereqs
13
+
14
+ file PROJ.ann.file do
15
+ ann = PROJ.ann
16
+ puts "Generating #{ann.file}"
17
+ File.open(ann.file,'w') do |fd|
18
+ fd.puts("#{PROJ.name} version #{PROJ.version}")
19
+ fd.puts(" by #{Array(PROJ.authors).first}") if PROJ.authors
20
+ fd.puts(" #{PROJ.url}") if PROJ.url.valid?
21
+ fd.puts(" (the \"#{PROJ.release_name}\" release)") if PROJ.release_name
22
+ fd.puts
23
+ fd.puts("== DESCRIPTION")
24
+ fd.puts
25
+ fd.puts(PROJ.description)
26
+ fd.puts
27
+ fd.puts(PROJ.changes.sub(%r/^.*$/, '== CHANGES'))
28
+ fd.puts
29
+ ann.paragraphs.each do |p|
30
+ fd.puts "== #{p.upcase}"
31
+ fd.puts
32
+ fd.puts paragraphs_of(PROJ.readme_file, p).join("\n\n")
33
+ fd.puts
34
+ end
35
+ fd.puts ann.text if ann.text
36
+ end
37
+ end
38
+
39
+ desc "Create an announcement file"
40
+ task :announcement => ['ann:prereqs', PROJ.ann.file]
41
+
42
+ desc "Send an email announcement"
43
+ task :email => ['ann:prereqs', PROJ.ann.file] do
44
+ ann = PROJ.ann
45
+ from = ann.email[:from] || Array(PROJ.authors).first || PROJ.email
46
+ to = Array(ann.email[:to])
47
+
48
+ ### build a mail header for RFC 822
49
+ rfc822msg = "From: #{from}\n"
50
+ rfc822msg << "To: #{to.join(',')}\n"
51
+ rfc822msg << "Subject: [ANN] #{PROJ.name} #{PROJ.version}"
52
+ rfc822msg << " (#{PROJ.release_name})" if PROJ.release_name
53
+ rfc822msg << "\n"
54
+ rfc822msg << "Date: #{Time.new.rfc822}\n"
55
+ rfc822msg << "Message-Id: "
56
+ rfc822msg << "<#{"%.8f" % Time.now.to_f}@#{ann.email[:domain]}>\n\n"
57
+ rfc822msg << File.read(ann.file)
58
+
59
+ params = [:server, :port, :domain, :acct, :passwd, :authtype].map do |key|
60
+ ann.email[key]
61
+ end
62
+
63
+ params[3] = PROJ.email if params[3].nil?
64
+
65
+ if params[4].nil?
66
+ STDOUT.write "Please enter your e-mail password (#{params[3]}): "
67
+ params[4] = STDIN.gets.chomp
68
+ end
69
+
70
+ ### send email
71
+ Net::SMTP.start(*params) {|smtp| smtp.sendmail(rfc822msg, from, to)}
72
+ end
73
+ end # namespace :ann
74
+
75
+ desc 'Alias to ann:announcement'
76
+ task :ann => 'ann:announcement'
77
+
78
+ CLOBBER << PROJ.ann.file
79
+
80
+ # EOF
data/tasks/ant.rake ADDED
@@ -0,0 +1,11 @@
1
+ namespace :ant do
2
+ desc 'Builds Java source for inclusion in gem'
3
+ task :build do
4
+ `ant build`
5
+ end
6
+
7
+ desc 'Cleans Java build files'
8
+ task :clean do
9
+ `ant clean`
10
+ end
11
+ end
data/tasks/bones.rake ADDED
@@ -0,0 +1,20 @@
1
+
2
+ if HAVE_BONES
3
+
4
+ namespace :bones do
5
+
6
+ desc 'Show the PROJ open struct'
7
+ task :debug do |t|
8
+ atr = if t.application.top_level_tasks.length == 2
9
+ t.application.top_level_tasks.pop
10
+ end
11
+
12
+ if atr then Bones::Debug.show_attr(PROJ, atr)
13
+ else Bones::Debug.show PROJ end
14
+ end
15
+
16
+ end # namespace :bones
17
+
18
+ end # HAVE_BONES
19
+
20
+ # EOF