cascading.jruby 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
data/tasks/setup.rb DELETED
@@ -1,300 +0,0 @@
1
-
2
- require 'rubygems'
3
- require 'rake'
4
- require 'rake/clean'
5
- require 'fileutils'
6
- require 'ostruct'
7
- require 'find'
8
-
9
- class OpenStruct; undef :gem; end
10
-
11
- # TODO: make my own openstruct type object that includes descriptions
12
- # TODO: use the descriptions to output help on the available bones options
13
-
14
- PROJ = OpenStruct.new(
15
- # Project Defaults
16
- :name => nil,
17
- :summary => nil,
18
- :description => nil,
19
- :changes => nil,
20
- :authors => nil,
21
- :email => nil,
22
- :url => "\000",
23
- :version => ENV['VERSION'] || '0.0.0',
24
- :exclude => %w(tmp$ bak$ ~$ CVS \.svn/ \.git/ \.gitignore ^pkg/ \.swp \.swo output build classes),
25
- :release_name => ENV['RELEASE'],
26
-
27
- # System Defaults
28
- :ruby_opts => %w(-w),
29
- :libs => [],
30
- :history_file => 'History.txt',
31
- :readme_file => 'README.md',
32
- :ignore_file => '.bnsignore',
33
-
34
- # Announce
35
- :ann => OpenStruct.new(
36
- :file => 'announcement.txt',
37
- :text => nil,
38
- :paragraphs => [],
39
- :email => {
40
- :from => nil,
41
- :to => %w(ruby-talk@ruby-lang.org),
42
- :server => 'localhost',
43
- :port => 25,
44
- :domain => ENV['HOSTNAME'],
45
- :acct => nil,
46
- :passwd => nil,
47
- :authtype => :plain
48
- }
49
- ),
50
-
51
- # Gem Packaging
52
- :gem => OpenStruct.new(
53
- :dependencies => [],
54
- :development_dependencies => [],
55
- :executables => nil,
56
- :extensions => FileList['ext/**/extconf.rb'],
57
- :files => nil,
58
- :need_tar => true,
59
- :need_zip => false,
60
- :extras => {}
61
- ),
62
-
63
- # File Annotations
64
- :notes => OpenStruct.new(
65
- :exclude => %w(^tasks/setup\.rb$),
66
- :extensions => %w(.txt .rb .erb .rdoc) << '',
67
- :tags => %w(FIXME OPTIMIZE TODO)
68
- ),
69
-
70
- # Rcov
71
- :rcov => OpenStruct.new(
72
- :dir => 'coverage',
73
- :opts => %w[--sort coverage -T],
74
- :threshold => 90.0,
75
- :threshold_exact => false
76
- ),
77
-
78
- # Rdoc
79
- :rdoc => OpenStruct.new(
80
- :opts => [],
81
- :include => %w(^lib/ ^bin/ ^ext/ \.txt$ \.rdoc$),
82
- :exclude => %w(extconf\.rb$),
83
- :main => nil,
84
- :dir => 'doc',
85
- :remote_dir => nil
86
- ),
87
-
88
- # Rubyforge
89
- :rubyforge => OpenStruct.new(
90
- :name => "\000"
91
- ),
92
-
93
- # Rspec
94
- :spec => OpenStruct.new(
95
- :files => FileList['spec/**/*_spec.rb'],
96
- :opts => []
97
- ),
98
-
99
- # Subversion Repository
100
- :svn => OpenStruct.new(
101
- :root => nil,
102
- :path => '',
103
- :trunk => 'trunk',
104
- :tags => 'tags',
105
- :branches => 'branches'
106
- ),
107
-
108
- # Test::Unit
109
- :test => OpenStruct.new(
110
- :files => FileList['test/**/test_*.rb'],
111
- :file => 'test/all.rb',
112
- :opts => []
113
- )
114
- )
115
-
116
- # Load the other rake files in the tasks folder
117
- tasks_dir = File.expand_path(File.dirname(__FILE__))
118
- post_load_fn = File.join(tasks_dir, 'post_load.rake')
119
- rakefiles = Dir.glob(File.join(tasks_dir, '*.rake')).sort
120
- rakefiles.unshift(rakefiles.delete(post_load_fn)).compact!
121
- import(*rakefiles)
122
-
123
- # Setup the project libraries
124
- %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
125
-
126
- # Setup some constants
127
- WIN32 = %r/djgpp|(cyg|ms|bcc)win|mingw/ =~ RUBY_PLATFORM unless defined? WIN32
128
-
129
- DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
130
-
131
- def quiet( &block )
132
- io = [STDOUT.dup, STDERR.dup]
133
- STDOUT.reopen DEV_NULL
134
- STDERR.reopen DEV_NULL
135
- block.call
136
- ensure
137
- STDOUT.reopen io.first
138
- STDERR.reopen io.last
139
- $stdout, $stderr = STDOUT, STDERR
140
- end
141
-
142
- DIFF = if WIN32 then 'diff.exe'
143
- else
144
- if quiet {system "gdiff", __FILE__, __FILE__} then 'gdiff'
145
- else 'diff' end
146
- end unless defined? DIFF
147
-
148
- SUDO = if WIN32 then ''
149
- else
150
- if quiet {system 'which sudo'} then 'sudo'
151
- else '' end
152
- end
153
-
154
- RCOV = WIN32 ? 'rcov.bat' : 'rcov'
155
- RDOC = WIN32 ? 'rdoc.bat' : 'rdoc'
156
- GEM = WIN32 ? 'gem.bat' : 'gem'
157
-
158
- %w(rcov spec/rake/spectask rubyforge bones facets/ansicode).each do |lib|
159
- begin
160
- require lib
161
- Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", true}
162
- rescue LoadError
163
- Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", false}
164
- end
165
- end
166
- HAVE_SVN = (Dir.entries(Dir.pwd).include?('.svn') and
167
- system("svn --version 2>&1 > #{DEV_NULL}"))
168
- HAVE_GIT = (Dir.entries(Dir.pwd).include?('.git') and
169
- system("git --version 2>&1 > #{DEV_NULL}"))
170
-
171
- # Add bones as a development dependency
172
- #
173
- if HAVE_BONES
174
- PROJ.gem.development_dependencies << ['bones', ">= #{Bones::VERSION}"]
175
- end
176
-
177
- # Reads a file at +path+ and spits out an array of the +paragraphs+
178
- # specified.
179
- #
180
- # changes = paragraphs_of('History.txt', 0..1).join("\n\n")
181
- # summary, *description = paragraphs_of('README.md', 3, 3..8)
182
- #
183
- def paragraphs_of( path, *paragraphs )
184
- title = String === paragraphs.first ? paragraphs.shift : nil
185
- ary = File.read(path).delete("\r").split(/\n\n+/)
186
-
187
- result = if title
188
- tmp, matching = [], false
189
- rgxp = %r/^=+\s*#{Regexp.escape(title)}/i
190
- paragraphs << (0..-1) if paragraphs.empty?
191
-
192
- ary.each do |val|
193
- if val =~ rgxp
194
- break if matching
195
- matching = true
196
- rgxp = %r/^=+/i
197
- elsif matching
198
- tmp << val
199
- end
200
- end
201
- tmp
202
- else ary end
203
-
204
- result.values_at(*paragraphs)
205
- end
206
-
207
- # Adds the given gem _name_ to the current project's dependency list. An
208
- # optional gem _version_ can be given. If omitted, the newest gem version
209
- # will be used.
210
- #
211
- def depend_on( name, version = nil )
212
- spec = Gem.source_index.find_name(name).last
213
- version = spec.version.to_s if version.nil? and !spec.nil?
214
-
215
- PROJ.gem.dependencies << case version
216
- when nil; [name]
217
- when %r/^\d/; [name, ">= #{version}"]
218
- else [name, version] end
219
- end
220
-
221
- # Adds the given arguments to the include path if they are not already there
222
- #
223
- def ensure_in_path( *args )
224
- args.each do |path|
225
- path = File.expand_path(path)
226
- $:.unshift(path) if test(?d, path) and not $:.include?(path)
227
- end
228
- end
229
-
230
- # Find a rake task using the task name and remove any description text. This
231
- # will prevent the task from being displayed in the list of available tasks.
232
- #
233
- def remove_desc_for_task( names )
234
- Array(names).each do |task_name|
235
- task = Rake.application.tasks.find {|t| t.name == task_name}
236
- next if task.nil?
237
- task.instance_variable_set :@comment, nil
238
- end
239
- end
240
-
241
- # Change working directories to _dir_, call the _block_ of code, and then
242
- # change back to the original working directory (the current directory when
243
- # this method was called).
244
- #
245
- def in_directory( dir, &block )
246
- curdir = pwd
247
- begin
248
- cd dir
249
- return block.call
250
- ensure
251
- cd curdir
252
- end
253
- end
254
-
255
- # Scans the current working directory and creates a list of files that are
256
- # candidates to be in the manifest.
257
- #
258
- def manifest
259
- files = []
260
- exclude = PROJ.exclude.dup
261
- comment = %r/^\s*#/
262
-
263
- # process the ignore file and add the items there to the exclude list
264
- if test(?f, PROJ.ignore_file)
265
- ary = []
266
- File.readlines(PROJ.ignore_file).each do |line|
267
- next if line =~ comment
268
- line.chomp!
269
- line.strip!
270
- next if line.nil? or line.empty?
271
-
272
- glob = line =~ %r/\*\./ ? File.join('**', line) : line
273
- Dir.glob(glob).each {|fn| ary << "^#{Regexp.escape(fn)}"}
274
- end
275
- exclude.concat ary
276
- end
277
-
278
- # generate a regular expression from the exclude list
279
- exclude = Regexp.new(exclude.join('|'))
280
-
281
- Find.find '.' do |path|
282
- path.sub! %r/^(\.\/|\/)/o, ''
283
- next unless test ?f, path
284
- next if path =~ exclude
285
- files << path
286
- end
287
- files.sort!
288
- end
289
-
290
- # We need a "valid" method thtat determines if a string is suitable for use
291
- # in the gem specification.
292
- #
293
- class Object
294
- def valid?
295
- return !(self.empty? or self == "\000") if self.respond_to?(:to_str)
296
- return false
297
- end
298
- end
299
-
300
- # EOF
data/tasks/spec.rake DELETED
@@ -1,59 +0,0 @@
1
-
2
- if HAVE_SPEC_RAKE_SPECTASK and not PROJ.spec.files.to_a.empty?
3
- require 'spec/rake/verify_rcov'
4
-
5
- namespace :spec do
6
-
7
- desc 'Run all specs with basic output'
8
- Spec::Rake::SpecTask.new(:run) do |t|
9
- # Allow user to specify specs to run at command line
10
- _, spec_files = ARGV
11
- spec_files ||= PROJ.spec.files
12
- t.verbose = true
13
-
14
- t.ruby_opts = PROJ.ruby_opts
15
- t.spec_opts = PROJ.spec.opts
16
- t.spec_files = spec_files
17
- t.libs += PROJ.libs
18
- end
19
-
20
- desc 'Run all specs with text output'
21
- Spec::Rake::SpecTask.new(:specdoc) do |t|
22
- t.ruby_opts = PROJ.ruby_opts
23
- t.spec_opts = PROJ.spec.opts + ['--format', 'specdoc']
24
- t.spec_files = PROJ.spec.files
25
- t.libs += PROJ.libs
26
- end
27
-
28
- if HAVE_RCOV
29
- desc 'Run all specs with RCov'
30
- Spec::Rake::SpecTask.new(:rcov) do |t|
31
- t.ruby_opts = PROJ.ruby_opts
32
- t.spec_opts = PROJ.spec.opts
33
- t.spec_files = PROJ.spec.files
34
- t.libs += PROJ.libs
35
- t.rcov = true
36
- t.rcov_dir = PROJ.rcov.dir
37
- t.rcov_opts = PROJ.rcov.opts + ['--exclude', 'spec']
38
- end
39
-
40
- RCov::VerifyTask.new(:verify) do |t|
41
- t.threshold = PROJ.rcov.threshold
42
- t.index_html = File.join(PROJ.rcov.dir, 'index.html')
43
- t.require_exact_threshold = PROJ.rcov.threshold_exact
44
- end
45
-
46
- task :verify => :rcov
47
- remove_desc_for_task %w(spec:clobber_rcov)
48
- end
49
-
50
- end # namespace :spec
51
-
52
- desc 'Alias to spec:run'
53
- task :spec => 'spec:run'
54
-
55
- task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
56
-
57
- end # if HAVE_SPEC_RAKE_SPECTASK
58
-
59
- # EOF
data/tasks/svn.rake DELETED
@@ -1,47 +0,0 @@
1
-
2
- if HAVE_SVN
3
-
4
- unless PROJ.svn.root
5
- info = %x/svn info ./
6
- m = %r/^Repository Root:\s+(.*)$/.match(info)
7
- PROJ.svn.root = (m.nil? ? '' : m[1])
8
- end
9
- PROJ.svn.root = File.join(PROJ.svn.root, PROJ.svn.path) unless PROJ.svn.path.empty?
10
-
11
- namespace :svn do
12
-
13
- # A prerequisites task that all other tasks depend upon
14
- task :prereqs
15
-
16
- desc 'Show tags from the SVN repository'
17
- task :show_tags => 'svn:prereqs' do |t|
18
- tags = %x/svn list #{File.join(PROJ.svn.root, PROJ.svn.tags)}/
19
- tags.gsub!(%r/\/$/, '')
20
- tags = tags.split("\n").sort {|a,b| b <=> a}
21
- puts tags
22
- end
23
-
24
- desc 'Create a new tag in the SVN repository'
25
- task :create_tag => 'svn:prereqs' do |t|
26
- v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
27
- abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
28
-
29
- svn = PROJ.svn
30
- trunk = File.join(svn.root, svn.trunk)
31
- tag = "%s-%s" % [PROJ.name, PROJ.version]
32
- tag = File.join(svn.root, svn.tags, tag)
33
- msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
34
-
35
- puts "Creating SVN tag '#{tag}'"
36
- unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
37
- abort "Tag creation failed"
38
- end
39
- end
40
-
41
- end # namespace :svn
42
-
43
- task 'gem:release' => 'svn:create_tag'
44
-
45
- end # if PROJ.svn.path
46
-
47
- # EOF
data/tasks/test.rake DELETED
@@ -1,42 +0,0 @@
1
-
2
- if test(?e, PROJ.test.file) or not PROJ.test.files.to_a.empty?
3
- require 'rake/testtask'
4
-
5
-
6
-
7
- namespace :test do
8
-
9
- Rake::TestTask.new(:run) do |t|
10
- t.libs = PROJ.libs
11
- t.test_files = if test(?f, PROJ.test.file) then [PROJ.test.file]
12
- else PROJ.test.files end
13
- t.ruby_opts += PROJ.ruby_opts
14
- t.ruby_opts += PROJ.test.opts
15
- end
16
-
17
- if HAVE_RCOV
18
- desc 'Run rcov on the unit tests'
19
- task :rcov => :clobber_rcov do
20
- opts = PROJ.rcov.opts.dup << '-o' << PROJ.rcov.dir
21
- opts = opts.join(' ')
22
- files = if test(?f, PROJ.test.file) then [PROJ.test.file]
23
- else PROJ.test.files end
24
- files = files.join(' ')
25
- sh "#{RCOV} #{files} #{opts}"
26
- end
27
-
28
- task :clobber_rcov do
29
- rm_r 'coverage' rescue nil
30
- end
31
- end
32
-
33
- end # namespace :test
34
-
35
- desc 'Alias to test:run'
36
- task :test => ['ant:retrieve', 'test:run']
37
-
38
- task :clobber => 'test:clobber_rcov' if HAVE_RCOV
39
-
40
- end
41
-
42
- # EOF
data/test/data/data1.txt DELETED
@@ -1,14 +0,0 @@
1
- SMITH 1.006 1.006 1
2
- JOHNSON 0.810 1.816 2
3
- WILLIAMS 0.699 2.515 3
4
- JONES 0.621 3.136 4
5
- BROWN 0.621 3.757 5
6
- DAVIS 0.480 4.237 6
7
- MILLER 0.424 4.660 7
8
- WILSON 0.339 5.000 8
9
- MOORE 0.312 5.312 9
10
- TAYLOR 0.311 5.623 10
11
- ANDERSON 0.311 5.934 11
12
- THOMAS 0.311 6.245 12
13
- JACKSON 0.310 6.554 13
14
- WHITE 0.279 6.834 14
data/test/data/data2.txt DELETED
@@ -1,14 +0,0 @@
1
- SMITH 1 Paris
2
- JOHNSON 2 New-York
3
- WILLIAMS 3 London
4
- JONES 4 San Francisco
5
- BROWN 5 Dublin
6
- DAVIS 6 Rome
7
- MILLER 7 Munich
8
- WILSON 8 Berlin
9
- MOORE 9 Amsterdam
10
- TAYLOR 10 Bruxelles
11
- ANDERSON 11 Paris
12
- THOMAS 12 Lyon
13
- JACKSON 13 Barcelona
14
- WHITE 14 Athene
@@ -1,55 +0,0 @@
1
- require 'cascading'
2
-
3
- module MockAssemblies
4
- def mock_assembly(&block)
5
- assembly = nil
6
- flow 'test' do
7
- source 'test', tap('test/data/data1.txt')
8
- assembly = assembly 'test', &block
9
- sink 'test', tap('output/test_mock_assembly')
10
- end
11
- assembly
12
- end
13
-
14
- def mock_branched_assembly(&block)
15
- assembly = nil
16
- flow 'mock_branched_assembly' do
17
- source 'data1', tap('test/data/data1.txt')
18
-
19
- assembly 'data1' do
20
- branch 'test1' do
21
- pass
22
- end
23
- branch 'test2' do
24
- pass
25
- end
26
- end
27
-
28
- assembly = assembly 'test', &block
29
-
30
- sink 'test', tap('output/test_mock_branched_assembly')
31
- end
32
- assembly
33
- end
34
-
35
- def mock_two_input_assembly(&block)
36
- assembly = nil
37
- flow 'mock_two_input_assembly' do
38
- source 'test1', tap('test/data/data1.txt')
39
- source 'test2', tap('test/data/data2.txt')
40
-
41
- assembly 'test1' do
42
- split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
43
- end
44
-
45
- assembly 'test2' do
46
- split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'id', 'town'], :output => ['name', 'id', 'town']
47
- end
48
-
49
- assembly = assembly 'test', &block
50
-
51
- sink 'test', tap('output/test_mock_two_input_assembly')
52
- end
53
- assembly
54
- end
55
- end