cascading.jruby 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
@@ -1,4 +0,0 @@
1
- hello,world
2
- foo,bar
3
- foo,bar
4
- biz,baz
data/spec/scope_spec.rb DELETED
@@ -1,149 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
-
3
- context Cascading::Scope do
4
- it 'should match Cascading fields names from source tap scheme' do
5
- test_assembly do
6
- # Pass that uses our scope instead of all_fields
7
- each scope.values_fields, :function => Java::CascadingOperation::Identity.new
8
- check_scope :values_fields => ['offset', 'line']
9
- end
10
- end
11
-
12
- it 'should pick up names from source tap scheme' do
13
- test_assembly do
14
- pass
15
-
16
- check_scope :values_fields => ['offset', 'line']
17
- end
18
- end
19
-
20
- it 'should propagate names through Each' do
21
- test_assembly do
22
- check_scope :values_fields => ['offset', 'line']
23
- assert_size_equals 2
24
-
25
- split 'line', ['x', 'y'], :pattern => /,/
26
- check_scope :values_fields => ['offset', 'line', 'x', 'y']
27
- assert_size_equals 4
28
- end
29
- end
30
-
31
- it 'should allow field filtration at Each' do
32
- test_assembly do
33
- check_scope :values_fields => ['offset', 'line']
34
- assert_size_equals 2
35
-
36
- split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
37
- check_scope :values_fields => ['x', 'y']
38
- assert_size_equals 2
39
- end
40
- end
41
-
42
- it 'should propagate names through CoGroup' do
43
- test_join_assembly do
44
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
45
- :grouping_fields => ['x', 'x_']
46
- end
47
- end
48
-
49
- it 'should propagate names through CoGroup with no Aggregations' do
50
- post_join_block = lambda do
51
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
52
- :grouping_fields => ['x', 'x_']
53
- end
54
-
55
- test_join_assembly(:post_join_block => post_join_block)
56
- end
57
-
58
- it 'should pass grouping fields to Every' do
59
- test_join_assembly do
60
- sum :mapping => {'x' => 'x_sum'}, :type => :int
61
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
62
- :grouping_fields => ['x', 'x_', 'x_sum']
63
- assert_group_size_equals 1
64
- end
65
- end
66
-
67
- it 'should pass grouping fields through chained Every' do
68
- test_join_assembly do
69
- sum :mapping => {'x' => 'x_sum'}, :type => :int
70
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
71
- :grouping_fields => ['x', 'x_', 'x_sum']
72
- assert_group_size_equals 1
73
-
74
- sum :mapping => {'y' => 'y_sum'}, :type => :int
75
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
76
- :grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
77
- assert_group_size_equals 1
78
- end
79
- end
80
-
81
- it 'should propagate names through Every' do
82
- post_join_block = lambda do
83
- check_scope :values_fields => ['x', 'x_', 'x_sum', 'y_sum']
84
- assert_size_equals 4
85
- end
86
-
87
- test_join_assembly :post_join_block => post_join_block do
88
- sum :mapping => {'x' => 'x_sum'}, :type => :int
89
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
90
- :grouping_fields => ['x', 'x_', 'x_sum']
91
- assert_group_size_equals 1
92
-
93
- sum :mapping => {'y' => 'y_sum'}, :type => :int
94
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
95
- :grouping_fields => ['x', 'x_', 'x_sum', 'y_sum']
96
- assert_group_size_equals 1
97
- end
98
- end
99
-
100
- it 'should pass values fields to Each immediately following CoGroup and remove grouping fields' do
101
- post_join_block = lambda do
102
- assert_size_equals 10
103
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
104
- end
105
- test_join_assembly(:post_join_block => post_join_block)
106
- end
107
-
108
- it 'should fail to pass grouping fields to Every immediately following Each' do
109
- post_join_block = lambda do
110
- pass
111
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_']
112
- sum :mapping => {'x' => 'x_sum'}, :type => :int
113
- end
114
-
115
- lambda do # Composition fails
116
- test_join_assembly(:post_join_block => post_join_block)
117
- # sum doesn't exist outside of Aggregations (where block of join is
118
- # evaluated)
119
- end.should raise_error NoMethodError
120
- end
121
-
122
- it 'should propagate values fields and field names into branch' do
123
- post_join_block = lambda do
124
- branch 'data_tuple' do
125
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z', 'offset_', 'line_', 'x_', 'y_', 'z_'],
126
- :grouping_fields => ['x', 'x_']
127
- assert_size_equals 10
128
- end
129
- end
130
-
131
- test_join_assembly(:branches => ['data_tuple'], :post_join_block => post_join_block)
132
- end
133
-
134
- it 'should propagate names through GroupBy' do
135
- test_assembly do
136
- group_by 'line' do
137
- count
138
- end
139
- check_scope :values_fields => ['line', 'count']
140
- end
141
- end
142
-
143
- it 'should propagate names through blockless GroupBy' do
144
- test_assembly do
145
- group_by 'line'
146
- check_scope :values_fields => ['offset', 'line'], :grouping_fields => ['line']
147
- end
148
- end
149
- end
data/spec/spec.opts DELETED
@@ -1,6 +0,0 @@
1
- --colour
2
- --format
3
- progress
4
- --loadby
5
- mtime
6
- --reverse
data/spec/spec_helper.rb DELETED
@@ -1,5 +0,0 @@
1
- require 'spec'
2
- require 'rubygems'
3
- require 'cascading'
4
-
5
- require File.expand_path(File.dirname(__FILE__) + '/spec_util')
data/spec/spec_util.rb DELETED
@@ -1,92 +0,0 @@
1
- OUTPUT_DIR = 'output'
2
- BUILD_DIR = 'build/spec'
3
-
4
- module ScopeTests
5
- def check_scope(params = {})
6
- name_params = [params[:source]].compact
7
- scope = scope(*name_params)
8
- values_fields = params[:values_fields]
9
- grouping_fields = params[:grouping_fields] || values_fields
10
-
11
- debug = params[:debug]
12
- debug_scope(*name_params) if debug
13
-
14
- scope.values_fields.to_a.should == values_fields
15
- scope.grouping_fields.to_a.should == grouping_fields
16
- end
17
- end
18
-
19
- module Cascading
20
- class Flow; include ScopeTests; end
21
- class Assembly; include ScopeTests; end
22
- class Aggregations; include ScopeTests; end
23
- end
24
-
25
- def test_flow(&block)
26
- cascade = cascade 'test_app', :mode => :local do
27
- flow 'test', &block
28
- end
29
- cascade.complete
30
- end
31
-
32
- def test_assembly(params = {}, &block)
33
- branches = params[:branches] || []
34
-
35
- test_flow do
36
- source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
37
-
38
- # Default Fields defined by TextLineScheme
39
- check_scope :source => 'input', :values_fields => ['offset', 'line']
40
-
41
- assembly 'input', &block
42
-
43
- sink 'input', tap("#{OUTPUT_DIR}/out.txt", :sink_mode => :replace)
44
-
45
- # Branches must be sunk so that they (and their assertions) will be run
46
- branches.each do |branch|
47
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
48
- end
49
- end
50
- end
51
-
52
- def test_join_assembly(params = {}, &block)
53
- branches = params[:branches] || []
54
- post_join_block = params[:post_join_block]
55
-
56
- test_flow do
57
- source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
58
- source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
59
-
60
- # Default Fields defined by TextLineScheme
61
- check_scope :source => 'left', :values_fields => ['offset', 'line']
62
- check_scope :source => 'right', :values_fields => ['offset', 'line']
63
-
64
- assembly 'left' do
65
- check_scope :values_fields => ['offset', 'line']
66
- split 'line', ['x', 'y', 'z'], :pattern => /,/
67
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
68
- end
69
-
70
- assembly 'right' do
71
- check_scope :values_fields => ['offset', 'line']
72
- split 'line', ['x', 'y', 'z'], :pattern => /,/
73
- check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
74
- end
75
-
76
- assembly 'join' do
77
- # Empty scope because there is no 'join' source or assembly
78
- check_scope :values_fields => []
79
-
80
- left_join 'left', 'right', :on => ['x'], &block
81
-
82
- instance_eval &post_join_block if post_join_block
83
- end
84
-
85
- sink 'join', tap("#{OUTPUT_DIR}/join_out.txt", :sink_mode => :replace)
86
-
87
- # Branches must be sunk so that they (and their assertions) will be run
88
- branches.each do |branch|
89
- sink branch, tap("#{OUTPUT_DIR}/#{branch}_out.txt", :sink_mode => :replace)
90
- end
91
- end
92
- end
@@ -1,38 +0,0 @@
1
- package cascading.jruby;
2
-
3
- import org.jruby.Ruby;
4
- import org.jruby.RubyInstanceConfig;
5
-
6
- public class Main {
7
- private final static String JRUBY_HOME = "/opt/jruby";
8
-
9
- /**
10
- * Starts a Hadoop job by reading the specified JRuby script.
11
- *
12
- * @param args
13
- */
14
- public static void main(String[] args) {
15
- String name = args[0]; // c.j script name
16
- if (!name.startsWith("/"))
17
- name = "/" + name;
18
-
19
- // c.j script args
20
- String[] newArgs = new String[args.length - 1];
21
- System.arraycopy(args, 1, newArgs, 0, args.length - 1);
22
- RubyInstanceConfig config = new RubyInstanceConfig();
23
- config.setJRubyHome(JRUBY_HOME); // mwalker
24
- config.processArguments(newArgs);
25
-
26
- System.out.println("Arguments: ");
27
- for (String arg : config.getArgv())
28
- System.out.println(arg);
29
-
30
- Ruby runtime = Ruby.newInstance(config);
31
-
32
- System.out.println("Requiring '" + name + "'");
33
- runtime.executeScript("require '" + name + "'", name);
34
-
35
- System.out.println("Requiring 'cascading/jruby/runner'");
36
- runtime.executeScript("require 'cascading/jruby/runner'", "runner"); // gfodor
37
- }
38
- }
@@ -1,6 +0,0 @@
1
- puts "Found #{Cascading::Cascade.all.size} Cascades in global registry"
2
-
3
- Cascading::Cascade.all.each do |cascade|
4
- puts "runner.rb running '#{cascade.name}' Cascade"
5
- cascade.complete
6
- end