cascading.jruby 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/HACKING.md +15 -0
  2. data/History.txt +0 -0
  3. data/LICENSE.txt +165 -0
  4. data/README.md +7 -0
  5. data/Rakefile +45 -0
  6. data/bin/make_job +81 -0
  7. data/lib/cascading/assembly.rb +726 -0
  8. data/lib/cascading/base.rb +63 -0
  9. data/lib/cascading/cascade.rb +63 -0
  10. data/lib/cascading/cascading.rb +134 -0
  11. data/lib/cascading/cascading_exception.rb +30 -0
  12. data/lib/cascading/expr_stub.rb +33 -0
  13. data/lib/cascading/ext/array.rb +15 -0
  14. data/lib/cascading/flow.rb +168 -0
  15. data/lib/cascading/operations.rb +204 -0
  16. data/lib/cascading/scope.rb +160 -0
  17. data/lib/cascading.rb +63 -0
  18. data/samples/branch.rb +31 -0
  19. data/samples/cascading.rb +41 -0
  20. data/samples/copy.rb +18 -0
  21. data/samples/data/data2.txt +88799 -0
  22. data/samples/data/data_join1.txt +3 -0
  23. data/samples/data/data_join2.txt +3 -0
  24. data/samples/data/data_join3.txt +3 -0
  25. data/samples/join.rb +32 -0
  26. data/samples/logwordcount.rb +22 -0
  27. data/samples/project.rb +24 -0
  28. data/samples/rename.rb +21 -0
  29. data/samples/scorenames.rb +20 -0
  30. data/samples/splitter.rb +20 -0
  31. data/samples/union.rb +35 -0
  32. data/spec/cascading_spec.rb +100 -0
  33. data/spec/expr_spec.rb +10 -0
  34. data/spec/primary_key_spec.rb +119 -0
  35. data/spec/resource/join_input.txt +3 -0
  36. data/spec/resource/test_input.txt +4 -0
  37. data/spec/scope_spec.rb +174 -0
  38. data/spec/spec.opts +6 -0
  39. data/spec/spec_helper.rb +5 -0
  40. data/spec/spec_util.rb +188 -0
  41. data/src/cascading/jruby/Main.java +38 -0
  42. data/src/cascading/jruby/runner.rb +6 -0
  43. data/tags +238 -0
  44. data/tasks/ann.rake +80 -0
  45. data/tasks/ant.rake +11 -0
  46. data/tasks/bones.rake +20 -0
  47. data/tasks/gem.rake +206 -0
  48. data/tasks/git.rake +40 -0
  49. data/tasks/notes.rake +27 -0
  50. data/tasks/post_load.rake +34 -0
  51. data/tasks/rdoc.rake +50 -0
  52. data/tasks/rubyforge.rake +55 -0
  53. data/tasks/samples.rake +13 -0
  54. data/tasks/setup.rb +300 -0
  55. data/tasks/spec.rake +59 -0
  56. data/tasks/svn.rake +47 -0
  57. data/tasks/test.rake +42 -0
  58. data/test/data/data1.txt +14 -0
  59. data/test/data/data2.txt +14 -0
  60. data/test/test_assembly.rb +321 -0
  61. data/test/test_cascading.rb +49 -0
  62. data/test/test_flow.rb +15 -0
  63. metadata +137 -0
data/tasks/setup.rb ADDED
@@ -0,0 +1,300 @@
1
+
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'rake/clean'
5
+ require 'fileutils'
6
+ require 'ostruct'
7
+ require 'find'
8
+
9
+ class OpenStruct; undef :gem; end
10
+
11
+ # TODO: make my own openstruct type object that includes descriptions
12
+ # TODO: use the descriptions to output help on the available bones options
13
+
14
+ PROJ = OpenStruct.new(
15
+ # Project Defaults
16
+ :name => nil,
17
+ :summary => nil,
18
+ :description => nil,
19
+ :changes => nil,
20
+ :authors => nil,
21
+ :email => nil,
22
+ :url => "\000",
23
+ :version => ENV['VERSION'] || '0.0.0',
24
+ :exclude => %w(tmp$ bak$ ~$ CVS \.svn/ \.git/ \.gitignore ^pkg/ \.swp \.swo output build classes),
25
+ :release_name => ENV['RELEASE'],
26
+
27
+ # System Defaults
28
+ :ruby_opts => %w(-w),
29
+ :libs => [],
30
+ :history_file => 'History.txt',
31
+ :readme_file => 'README.md',
32
+ :ignore_file => '.bnsignore',
33
+
34
+ # Announce
35
+ :ann => OpenStruct.new(
36
+ :file => 'announcement.txt',
37
+ :text => nil,
38
+ :paragraphs => [],
39
+ :email => {
40
+ :from => nil,
41
+ :to => %w(ruby-talk@ruby-lang.org),
42
+ :server => 'localhost',
43
+ :port => 25,
44
+ :domain => ENV['HOSTNAME'],
45
+ :acct => nil,
46
+ :passwd => nil,
47
+ :authtype => :plain
48
+ }
49
+ ),
50
+
51
+ # Gem Packaging
52
+ :gem => OpenStruct.new(
53
+ :dependencies => [],
54
+ :development_dependencies => [],
55
+ :executables => nil,
56
+ :extensions => FileList['ext/**/extconf.rb'],
57
+ :files => nil,
58
+ :need_tar => true,
59
+ :need_zip => false,
60
+ :extras => {}
61
+ ),
62
+
63
+ # File Annotations
64
+ :notes => OpenStruct.new(
65
+ :exclude => %w(^tasks/setup\.rb$),
66
+ :extensions => %w(.txt .rb .erb .rdoc) << '',
67
+ :tags => %w(FIXME OPTIMIZE TODO)
68
+ ),
69
+
70
+ # Rcov
71
+ :rcov => OpenStruct.new(
72
+ :dir => 'coverage',
73
+ :opts => %w[--sort coverage -T],
74
+ :threshold => 90.0,
75
+ :threshold_exact => false
76
+ ),
77
+
78
+ # Rdoc
79
+ :rdoc => OpenStruct.new(
80
+ :opts => [],
81
+ :include => %w(^lib/ ^bin/ ^ext/ \.txt$ \.rdoc$),
82
+ :exclude => %w(extconf\.rb$),
83
+ :main => nil,
84
+ :dir => 'doc',
85
+ :remote_dir => nil
86
+ ),
87
+
88
+ # Rubyforge
89
+ :rubyforge => OpenStruct.new(
90
+ :name => "\000"
91
+ ),
92
+
93
+ # Rspec
94
+ :spec => OpenStruct.new(
95
+ :files => FileList['spec/**/*_spec.rb'],
96
+ :opts => []
97
+ ),
98
+
99
+ # Subversion Repository
100
+ :svn => OpenStruct.new(
101
+ :root => nil,
102
+ :path => '',
103
+ :trunk => 'trunk',
104
+ :tags => 'tags',
105
+ :branches => 'branches'
106
+ ),
107
+
108
+ # Test::Unit
109
+ :test => OpenStruct.new(
110
+ :files => FileList['test/**/test_*.rb'],
111
+ :file => 'test/all.rb',
112
+ :opts => []
113
+ )
114
+ )
115
+
116
+ # Load the other rake files in the tasks folder
117
+ tasks_dir = File.expand_path(File.dirname(__FILE__))
118
+ post_load_fn = File.join(tasks_dir, 'post_load.rake')
119
+ rakefiles = Dir.glob(File.join(tasks_dir, '*.rake')).sort
120
+ rakefiles.unshift(rakefiles.delete(post_load_fn)).compact!
121
+ import(*rakefiles)
122
+
123
+ # Setup the project libraries
124
+ %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
125
+
126
+ # Setup some constants
127
+ WIN32 = %r/djgpp|(cyg|ms|bcc)win|mingw/ =~ RUBY_PLATFORM unless defined? WIN32
128
+
129
+ DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
130
+
131
+ def quiet( &block )
132
+ io = [STDOUT.dup, STDERR.dup]
133
+ STDOUT.reopen DEV_NULL
134
+ STDERR.reopen DEV_NULL
135
+ block.call
136
+ ensure
137
+ STDOUT.reopen io.first
138
+ STDERR.reopen io.last
139
+ $stdout, $stderr = STDOUT, STDERR
140
+ end
141
+
142
+ DIFF = if WIN32 then 'diff.exe'
143
+ else
144
+ if quiet {system "gdiff", __FILE__, __FILE__} then 'gdiff'
145
+ else 'diff' end
146
+ end unless defined? DIFF
147
+
148
+ SUDO = if WIN32 then ''
149
+ else
150
+ if quiet {system 'which sudo'} then 'sudo'
151
+ else '' end
152
+ end
153
+
154
+ RCOV = WIN32 ? 'rcov.bat' : 'rcov'
155
+ RDOC = WIN32 ? 'rdoc.bat' : 'rdoc'
156
+ GEM = WIN32 ? 'gem.bat' : 'gem'
157
+
158
+ %w(rcov spec/rake/spectask rubyforge bones facets/ansicode).each do |lib|
159
+ begin
160
+ require lib
161
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", true}
162
+ rescue LoadError
163
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", false}
164
+ end
165
+ end
166
+ HAVE_SVN = (Dir.entries(Dir.pwd).include?('.svn') and
167
+ system("svn --version 2>&1 > #{DEV_NULL}"))
168
+ HAVE_GIT = (Dir.entries(Dir.pwd).include?('.git') and
169
+ system("git --version 2>&1 > #{DEV_NULL}"))
170
+
171
+ # Add bones as a development dependency
172
+ #
173
+ if HAVE_BONES
174
+ PROJ.gem.development_dependencies << ['bones', ">= #{Bones::VERSION}"]
175
+ end
176
+
177
+ # Reads a file at +path+ and spits out an array of the +paragraphs+
178
+ # specified.
179
+ #
180
+ # changes = paragraphs_of('History.txt', 0..1).join("\n\n")
181
+ # summary, *description = paragraphs_of('README.md', 3, 3..8)
182
+ #
183
+ def paragraphs_of( path, *paragraphs )
184
+ title = String === paragraphs.first ? paragraphs.shift : nil
185
+ ary = File.read(path).delete("\r").split(/\n\n+/)
186
+
187
+ result = if title
188
+ tmp, matching = [], false
189
+ rgxp = %r/^=+\s*#{Regexp.escape(title)}/i
190
+ paragraphs << (0..-1) if paragraphs.empty?
191
+
192
+ ary.each do |val|
193
+ if val =~ rgxp
194
+ break if matching
195
+ matching = true
196
+ rgxp = %r/^=+/i
197
+ elsif matching
198
+ tmp << val
199
+ end
200
+ end
201
+ tmp
202
+ else ary end
203
+
204
+ result.values_at(*paragraphs)
205
+ end
206
+
207
+ # Adds the given gem _name_ to the current project's dependency list. An
208
+ # optional gem _version_ can be given. If omitted, the newest gem version
209
+ # will be used.
210
+ #
211
+ def depend_on( name, version = nil )
212
+ spec = Gem.source_index.find_name(name).last
213
+ version = spec.version.to_s if version.nil? and !spec.nil?
214
+
215
+ PROJ.gem.dependencies << case version
216
+ when nil; [name]
217
+ when %r/^\d/; [name, ">= #{version}"]
218
+ else [name, version] end
219
+ end
220
+
221
+ # Adds the given arguments to the include path if they are not already there
222
+ #
223
+ def ensure_in_path( *args )
224
+ args.each do |path|
225
+ path = File.expand_path(path)
226
+ $:.unshift(path) if test(?d, path) and not $:.include?(path)
227
+ end
228
+ end
229
+
230
+ # Find a rake task using the task name and remove any description text. This
231
+ # will prevent the task from being displayed in the list of available tasks.
232
+ #
233
+ def remove_desc_for_task( names )
234
+ Array(names).each do |task_name|
235
+ task = Rake.application.tasks.find {|t| t.name == task_name}
236
+ next if task.nil?
237
+ task.instance_variable_set :@comment, nil
238
+ end
239
+ end
240
+
241
+ # Change working directories to _dir_, call the _block_ of code, and then
242
+ # change back to the original working directory (the current directory when
243
+ # this method was called).
244
+ #
245
+ def in_directory( dir, &block )
246
+ curdir = pwd
247
+ begin
248
+ cd dir
249
+ return block.call
250
+ ensure
251
+ cd curdir
252
+ end
253
+ end
254
+
255
+ # Scans the current working directory and creates a list of files that are
256
+ # candidates to be in the manifest.
257
+ #
258
+ def manifest
259
+ files = []
260
+ exclude = PROJ.exclude.dup
261
+ comment = %r/^\s*#/
262
+
263
+ # process the ignore file and add the items there to the exclude list
264
+ if test(?f, PROJ.ignore_file)
265
+ ary = []
266
+ File.readlines(PROJ.ignore_file).each do |line|
267
+ next if line =~ comment
268
+ line.chomp!
269
+ line.strip!
270
+ next if line.nil? or line.empty?
271
+
272
+ glob = line =~ %r/\*\./ ? File.join('**', line) : line
273
+ Dir.glob(glob).each {|fn| ary << "^#{Regexp.escape(fn)}"}
274
+ end
275
+ exclude.concat ary
276
+ end
277
+
278
+ # generate a regular expression from the exclude list
279
+ exclude = Regexp.new(exclude.join('|'))
280
+
281
+ Find.find '.' do |path|
282
+ path.sub! %r/^(\.\/|\/)/o, ''
283
+ next unless test ?f, path
284
+ next if path =~ exclude
285
+ files << path
286
+ end
287
+ files.sort!
288
+ end
289
+
290
+ # We need a "valid" method thtat determines if a string is suitable for use
291
+ # in the gem specification.
292
+ #
293
+ class Object
294
+ def valid?
295
+ return !(self.empty? or self == "\000") if self.respond_to?(:to_str)
296
+ return false
297
+ end
298
+ end
299
+
300
+ # EOF
data/tasks/spec.rake ADDED
@@ -0,0 +1,59 @@
1
+
2
+ if HAVE_SPEC_RAKE_SPECTASK and not PROJ.spec.files.to_a.empty?
3
+ require 'spec/rake/verify_rcov'
4
+
5
+ namespace :spec do
6
+
7
+ desc 'Run all specs with basic output'
8
+ Spec::Rake::SpecTask.new(:run) do |t|
9
+ # Allow user to specify specs to run at command line
10
+ _, spec_files = ARGV
11
+ spec_files ||= PROJ.spec.files
12
+ t.verbose = true
13
+
14
+ t.ruby_opts = PROJ.ruby_opts
15
+ t.spec_opts = PROJ.spec.opts
16
+ t.spec_files = spec_files
17
+ t.libs += PROJ.libs
18
+ end
19
+
20
+ desc 'Run all specs with text output'
21
+ Spec::Rake::SpecTask.new(:specdoc) do |t|
22
+ t.ruby_opts = PROJ.ruby_opts
23
+ t.spec_opts = PROJ.spec.opts + ['--format', 'specdoc']
24
+ t.spec_files = PROJ.spec.files
25
+ t.libs += PROJ.libs
26
+ end
27
+
28
+ if HAVE_RCOV
29
+ desc 'Run all specs with RCov'
30
+ Spec::Rake::SpecTask.new(:rcov) do |t|
31
+ t.ruby_opts = PROJ.ruby_opts
32
+ t.spec_opts = PROJ.spec.opts
33
+ t.spec_files = PROJ.spec.files
34
+ t.libs += PROJ.libs
35
+ t.rcov = true
36
+ t.rcov_dir = PROJ.rcov.dir
37
+ t.rcov_opts = PROJ.rcov.opts + ['--exclude', 'spec']
38
+ end
39
+
40
+ RCov::VerifyTask.new(:verify) do |t|
41
+ t.threshold = PROJ.rcov.threshold
42
+ t.index_html = File.join(PROJ.rcov.dir, 'index.html')
43
+ t.require_exact_threshold = PROJ.rcov.threshold_exact
44
+ end
45
+
46
+ task :verify => :rcov
47
+ remove_desc_for_task %w(spec:clobber_rcov)
48
+ end
49
+
50
+ end # namespace :spec
51
+
52
+ desc 'Alias to spec:run'
53
+ task :spec => 'spec:run'
54
+
55
+ task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
56
+
57
+ end # if HAVE_SPEC_RAKE_SPECTASK
58
+
59
+ # EOF
data/tasks/svn.rake ADDED
@@ -0,0 +1,47 @@
1
+
2
+ if HAVE_SVN
3
+
4
+ unless PROJ.svn.root
5
+ info = %x/svn info ./
6
+ m = %r/^Repository Root:\s+(.*)$/.match(info)
7
+ PROJ.svn.root = (m.nil? ? '' : m[1])
8
+ end
9
+ PROJ.svn.root = File.join(PROJ.svn.root, PROJ.svn.path) unless PROJ.svn.path.empty?
10
+
11
+ namespace :svn do
12
+
13
+ # A prerequisites task that all other tasks depend upon
14
+ task :prereqs
15
+
16
+ desc 'Show tags from the SVN repository'
17
+ task :show_tags => 'svn:prereqs' do |t|
18
+ tags = %x/svn list #{File.join(PROJ.svn.root, PROJ.svn.tags)}/
19
+ tags.gsub!(%r/\/$/, '')
20
+ tags = tags.split("\n").sort {|a,b| b <=> a}
21
+ puts tags
22
+ end
23
+
24
+ desc 'Create a new tag in the SVN repository'
25
+ task :create_tag => 'svn:prereqs' do |t|
26
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
27
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
28
+
29
+ svn = PROJ.svn
30
+ trunk = File.join(svn.root, svn.trunk)
31
+ tag = "%s-%s" % [PROJ.name, PROJ.version]
32
+ tag = File.join(svn.root, svn.tags, tag)
33
+ msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
34
+
35
+ puts "Creating SVN tag '#{tag}'"
36
+ unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
37
+ abort "Tag creation failed"
38
+ end
39
+ end
40
+
41
+ end # namespace :svn
42
+
43
+ task 'gem:release' => 'svn:create_tag'
44
+
45
+ end # if PROJ.svn.path
46
+
47
+ # EOF
data/tasks/test.rake ADDED
@@ -0,0 +1,42 @@
1
+
2
+ if test(?e, PROJ.test.file) or not PROJ.test.files.to_a.empty?
3
+ require 'rake/testtask'
4
+
5
+
6
+
7
+ namespace :test do
8
+
9
+ Rake::TestTask.new(:run) do |t|
10
+ t.libs = PROJ.libs
11
+ t.test_files = if test(?f, PROJ.test.file) then [PROJ.test.file]
12
+ else PROJ.test.files end
13
+ t.ruby_opts += PROJ.ruby_opts
14
+ t.ruby_opts += PROJ.test.opts
15
+ end
16
+
17
+ if HAVE_RCOV
18
+ desc 'Run rcov on the unit tests'
19
+ task :rcov => :clobber_rcov do
20
+ opts = PROJ.rcov.opts.dup << '-o' << PROJ.rcov.dir
21
+ opts = opts.join(' ')
22
+ files = if test(?f, PROJ.test.file) then [PROJ.test.file]
23
+ else PROJ.test.files end
24
+ files = files.join(' ')
25
+ sh "#{RCOV} #{files} #{opts}"
26
+ end
27
+
28
+ task :clobber_rcov do
29
+ rm_r 'coverage' rescue nil
30
+ end
31
+ end
32
+
33
+ end # namespace :test
34
+
35
+ desc 'Alias to test:run'
36
+ task :test => 'test:run'
37
+
38
+ task :clobber => 'test:clobber_rcov' if HAVE_RCOV
39
+
40
+ end
41
+
42
+ # EOF
@@ -0,0 +1,14 @@
1
+ SMITH 1.006 1.006 1
2
+ JOHNSON 0.810 1.816 2
3
+ WILLIAMS 0.699 2.515 3
4
+ JONES 0.621 3.136 4
5
+ BROWN 0.621 3.757 5
6
+ DAVIS 0.480 4.237 6
7
+ MILLER 0.424 4.660 7
8
+ WILSON 0.339 5.000 8
9
+ MOORE 0.312 5.312 9
10
+ TAYLOR 0.311 5.623 10
11
+ ANDERSON 0.311 5.934 11
12
+ THOMAS 0.311 6.245 12
13
+ JACKSON 0.310 6.554 13
14
+ WHITE 0.279 6.834 14
@@ -0,0 +1,14 @@
1
+ SMITH 1 Paris
2
+ JOHNSON 2 New-York
3
+ WILLIAMS 3 London
4
+ JONES 4 San Francisco
5
+ BROWN 5 Dublin
6
+ DAVIS 6 Rome
7
+ MILLER 7 Munich
8
+ WILSON 8 Berlin
9
+ MOORE 9 Amsterdam
10
+ TAYLOR 10 Bruxelles
11
+ ANDERSON 11 Paris
12
+ THOMAS 12 Lyon
13
+ JACKSON 13 Barcelona
14
+ WHITE 14 Athene