cascading.jruby 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
@@ -1,4 +0,0 @@
1
- hello,world
2
- foo,bar
3
- foo,bar
4
- biz,baz
data/spec/scope_spec.rb DELETED
@@ -1,149 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
-
3
- context Cascading::Scope do
4
- it 'should match Cascading fields names from source tap scheme' do
5
- test_assembly do
6
- # Pass that uses our scope instead of all_fields
7
- each scope.values_fields, :function => Java::CascadingOperation::Identity.new
8
- check_scope :values_fields => ['offset', 'line']
9
- end
10
- end
11
-
12
- it 'should pick up names from source tap scheme' do
13
- test_assembly do
14
- pass
15
-
16
- check_scope :values_fields => ['offset', 'line']
17
- end
18
- end
19
-
20
- it 'should propagate names through Each' do
21
- test_assembly do
22
- check_scope :values_fields => ['offset', 'line']
23
- assert_size_equals 2
24
-
25
- split 'line', ['x', 'y'], :pattern => /,/
26
- check_scope :values_fields => ['offset', 'line', 'x', 'y']
27
- assert_size_equals 4
28
- end
29
- end
30
-
31
- it 'should allow field filtration at Each' do
32
- test_assembly do
33
- check_scope :values_fields => ['offset', 'line']
34
- assert_size_equals 2
35
-
36
- split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
37
- check_scope :values_fields => ['x', 'y']
38
- assert_size_equals 2
39
- end
40
- end
41
-
42
- it 'should propagate names through CoGroup' do
43
- test_join_assembly do
44
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
45
- :grouping_fields => ['x', 'x_']
46
- end
47
- end
48
-
49
- it 'should propagate names through CoGroup with no Aggregations' do
50
- post_join_block = lambda do
51
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
52
- :grouping_fields => ['x', 'x_']
53
- end
54
-
55
- test_join_assembly(:post_join_block => post_join_block)
56
- end
57
-
58
- it 'should pass grouping fields to Every' do
59
- test_join_assembly do
60
- sum :mapping => {'x' => 'x_sum'}, :type => :int
61
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
62
- :grouping_fields => ['x', 'x_', 'x_sum']
63
- assert_group_size_equals 1
64
- end
65
- end
66
-
67
- it 'should pass grouping fields through chained Every' do
68
- test_join_assembly do
69
- sum :mapping => {'x' => 'x_sum'}, :type => :int
70
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
71
- :grouping_fields => ['x', 'x_', 'x_sum']
72
- assert_group_size_equals 1
73
-
74
- sum :mapping => {'y' => 'y_sum'}, :type => :int
75
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
76
- :grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
77
- assert_group_size_equals 1
78
- end
79
- end
80
-
81
- it 'should propagate names through Every' do
82
- post_join_block = lambda do
83
- check_scope :values_fields => ['x', 'x_', 'x_sum', 'y_sum']
84
- assert_size_equals 4
85
- end
86
-
87
- test_join_assembly :post_join_block => post_join_block do
88
- sum :mapping => {'x' => 'x_sum'}, :type => :int
89
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
90
- :grouping_fields => ['x', 'x_', 'x_sum']
91
- assert_group_size_equals 1
92
-
93
- sum :mapping => {'y' => 'y_sum'}, :type => :int
94
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
95
- :grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
96
- assert_group_size_equals 1
97
- end
98
- end
99
-
100
- it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
101
- post_join_block = lambda do
102
- assert_size_equals 10
103
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
104
- end
105
- test_join_assembly(:post_join_block => post_join_block)
106
- end
107
-
108
- it 'should fail to pass grouping fields to Every immediately following Each' do
109
- post_join_block = lambda do
110
- pass
111
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
112
- sum :mapping => {'x' => 'x_sum'}, :type => :int
113
- end
114
-
115
- lambda do # Composition fails
116
- test_join_assembly(:post_join_block => post_join_block)
117
- # sum doesn't exist outside of Aggregations (where block of join is
118
- # evaluated)
119
- end.should raise_error NoMethodError
120
- end
121
-
122
- it 'should propagate values fields and field names into branch' do
123
- post_join_block = lambda do
124
- branch 'data_tuple' do
125
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
126
- :grouping_fields => ['x', 'x_']
127
- assert_size_equals 10
128
- end
129
- end
130
-
131
- test_join_assembly(:branches => ['data_tuple'], :post_join_block => post_join_block)
132
- end
133
-
134
- it 'should propagate names through GroupBy' do
135
- test_assembly do
136
- group_by 'line' do
137
- count
138
- end
139
- check_scope :values_fields => ['line', 'count']
140
- end
141
- end
142
-
143
- it 'should propagate names through blockless GroupBy' do
144
- test_assembly do
145
- group_by 'line'
146
- check_scope :values_fields => ['offset', 'line'], :grouping_fields => ['line']
147
- end
148
- end
149
- end
data/spec/spec.opts DELETED
@@ -1,6 +0,0 @@
1
- --colour
2
- --format
3
- progress
4
- --loadby
5
- mtime
6
- --reverse
data/spec/spec_helper.rb DELETED
@@ -1,5 +0,0 @@
1
- require 'spec'
2
- require 'rubygems'
3
- require 'cascading'
4
-
5
- require File.expand_path(File.dirname(__FILE__) + '/spec_util')
data/spec/spec_util.rb DELETED
@@ -1,92 +0,0 @@
1
- OUTPUT_DIR = 'output'
2
- BUILD_DIR = 'build/spec'
3
-
4
- module ScopeTests
5
- def check_scope(params = {})
6
- name_params = [params[:source]].compact
7
- scope = scope(*name_params)
8
- values_fields = params[:values_fields]
9
- grouping_fields = params[:grouping_fields] || values_fields
10
-
11
- debug = params[:debug]
12
- debug_scope(*name_params) if debug
13
-
14
- scope.values_fields.to_a.should == values_fields
15
- scope.grouping_fields.to_a.should == grouping_fields
16
- end
17
- end
18
-
19
- module Cascading
20
- class Flow; include ScopeTests; end
21
- class Assembly; include ScopeTests; end
22
- class Aggregations; include ScopeTests; end
23
- end
24
-
25
- def test_flow(&block)
26
- cascade = cascade 'test_app', :mode => :local do
27
- flow 'test', &block
28
- end
29
- cascade.complete
30
- end
31
-
32
- def test_assembly(params = {}, &block)
33
- branches = params[:branches] || []
34
-
35
- test_flow do
36
- source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
37
-
38
- # Default Fields defined by TextLineScheme
39
- check_scope :source => 'input', :values_fields => ['offset', 'line']
40
-
41
- assembly 'input', &block
42
-
43
- sink 'input', tap("#{OUTPUT_DIR}/out.txt", :sink_mode => :replace)
44
-
45
- # Branches must be sunk so that they (and their assertions) will be run
46
- branches.each do |branch|
47
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
48
- end
49
- end
50
- end
51
-
52
- def test_join_assembly(params = {}, &block)
53
- branches = params[:branches] || []
54
- post_join_block = params[:post_join_block]
55
-
56
- test_flow do
57
- source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
58
- source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
59
-
60
- # Default Fields defined by TextLineScheme
61
- check_scope :source => 'left', :values_fields => ['offset', 'line']
62
- check_scope :source => 'right', :values_fields => ['offset', 'line']
63
-
64
- assembly 'left' do
65
- check_scope :values_fields => ['offset', 'line']
66
- split 'line', ['x', 'y', 'z'], :pattern => /,/
67
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
68
- end
69
-
70
- assembly 'right' do
71
- check_scope :values_fields => ['offset', 'line']
72
- split 'line', ['x', 'y', 'z'], :pattern => /,/
73
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
74
- end
75
-
76
- assembly 'join' do
77
- # Empty scope because there is no 'join' source or assembly
78
- check_scope :values_fields => []
79
-
80
- left_join 'left', 'right', :on => ['x'], &block
81
-
82
- instance_eval &post_join_block if post_join_block
83
- end
84
-
85
- sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :sink_mode => :replace)
86
-
87
- # Branches must be sunk so that they (and their assertions) will be run
88
- branches.each do |branch|
89
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
90
- end
91
- end
92
- end
@@ -1,38 +0,0 @@
1
- package cascading.jruby;
2
-
3
- import org.jruby.Ruby;
4
- import org.jruby.RubyInstanceConfig;
5
-
6
- public class Main {
7
- private final static String JRUBY_HOME = "/opt/jruby";
8
-
9
- /**
10
- * Starts a Hadoop job by reading the specified JRuby script.
11
- *
12
- * @param args
13
- */
14
- public static void main(String[] args) {
15
- String name = args[0]; // c.j script name
16
- if (!name.startsWith("/"))
17
- name = "/" + name;
18
-
19
- // c.j script args
20
- String[] newArgs = new String[args.length - 1];
21
- System.arraycopy(args, 1, newArgs, 0, args.length - 1);
22
- RubyInstanceConfig config = new RubyInstanceConfig();
23
- config.setJRubyHome(JRUBY_HOME); // mwalker
24
- config.processArguments(newArgs);
25
-
26
- System.out.println("Arguments: ");
27
- for (String arg : config.getArgv())
28
- System.out.println(arg);
29
-
30
- Ruby runtime = Ruby.newInstance(config);
31
-
32
- System.out.println("Requiring '" + name + "'");
33
- runtime.executeScript("require '" + name + "'", name);
34
-
35
- System.out.println("Requiring 'cascading/jruby/runner'");
36
- runtime.executeScript("require 'cascading/jruby/runner'", "runner"); // gfodor
37
- }
38
- }
@@ -1,6 +0,0 @@
1
- puts "Found #{Cascading::Cascade.all.size} Cascades in global registry"
2
-
3
- Cascading::Cascade.all.each do |cascade|
4
- puts "runner.rb running '#{cascade.name}' Cascade"
5
- cascade.complete
6
- end