cascading.jruby 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
data/tasks/setup.rb DELETED
@@ -1,300 +0,0 @@
1
-
2
- require 'rubygems'
3
- require 'rake'
4
- require 'rake/clean'
5
- require 'fileutils'
6
- require 'ostruct'
7
- require 'find'
8
-
9
- class OpenStruct; undef :gem; end
10
-
11
- # TODO: make my own openstruct type object that includes descriptions
12
- # TODO: use the descriptions to output help on the available bones options
13
-
14
- PROJ = OpenStruct.new(
15
- # Project Defaults
16
- :name => nil,
17
- :summary => nil,
18
- :description => nil,
19
- :changes => nil,
20
- :authors => nil,
21
- :email => nil,
22
- :url => "\000",
23
- :version => ENV['VERSION'] || '0.0.0',
24
- :exclude => %w(tmp$ bak$ ~$ CVS \.svn/ \.git/ \.gitignore ^pkg/ \.swp \.swo output build classes),
25
- :release_name => ENV['RELEASE'],
26
-
27
- # System Defaults
28
- :ruby_opts => %w(-w),
29
- :libs => [],
30
- :history_file => 'History.txt',
31
- :readme_file => 'README.md',
32
- :ignore_file => '.bnsignore',
33
-
34
- # Announce
35
- :ann => OpenStruct.new(
36
- :file => 'announcement.txt',
37
- :text => nil,
38
- :paragraphs => [],
39
- :email => {
40
- :from => nil,
41
- :to => %w(ruby-talk@ruby-lang.org),
42
- :server => 'localhost',
43
- :port => 25,
44
- :domain => ENV['HOSTNAME'],
45
- :acct => nil,
46
- :passwd => nil,
47
- :authtype => :plain
48
- }
49
- ),
50
-
51
- # Gem Packaging
52
- :gem => OpenStruct.new(
53
- :dependencies => [],
54
- :development_dependencies => [],
55
- :executables => nil,
56
- :extensions => FileList['ext/**/extconf.rb'],
57
- :files => nil,
58
- :need_tar => true,
59
- :need_zip => false,
60
- :extras => {}
61
- ),
62
-
63
- # File Annotations
64
- :notes => OpenStruct.new(
65
- :exclude => %w(^tasks/setup\.rb$),
66
- :extensions => %w(.txt .rb .erb .rdoc) << '',
67
- :tags => %w(FIXME OPTIMIZE TODO)
68
- ),
69
-
70
- # Rcov
71
- :rcov => OpenStruct.new(
72
- :dir => 'coverage',
73
- :opts => %w[--sort coverage -T],
74
- :threshold => 90.0,
75
- :threshold_exact => false
76
- ),
77
-
78
- # Rdoc
79
- :rdoc => OpenStruct.new(
80
- :opts => [],
81
- :include => %w(^lib/ ^bin/ ^ext/ \.txt$ \.rdoc$),
82
- :exclude => %w(extconf\.rb$),
83
- :main => nil,
84
- :dir => 'doc',
85
- :remote_dir => nil
86
- ),
87
-
88
- # Rubyforge
89
- :rubyforge => OpenStruct.new(
90
- :name => "\000"
91
- ),
92
-
93
- # Rspec
94
- :spec => OpenStruct.new(
95
- :files => FileList['spec/**/*_spec.rb'],
96
- :opts => []
97
- ),
98
-
99
- # Subversion Repository
100
- :svn => OpenStruct.new(
101
- :root => nil,
102
- :path => '',
103
- :trunk => 'trunk',
104
- :tags => 'tags',
105
- :branches => 'branches'
106
- ),
107
-
108
- # Test::Unit
109
- :test => OpenStruct.new(
110
- :files => FileList['test/**/test_*.rb'],
111
- :file => 'test/all.rb',
112
- :opts => []
113
- )
114
- )
115
-
116
- # Load the other rake files in the tasks folder
117
- tasks_dir = File.expand_path(File.dirname(__FILE__))
118
- post_load_fn = File.join(tasks_dir, 'post_load.rake')
119
- rakefiles = Dir.glob(File.join(tasks_dir, '*.rake')).sort
120
- rakefiles.unshift(rakefiles.delete(post_load_fn)).compact!
121
- import(*rakefiles)
122
-
123
- # Setup the project libraries
124
- %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
125
-
126
- # Setup some constants
127
- WIN32 = %r/djgpp|(cyg|ms|bcc)win|mingw/ =~ RUBY_PLATFORM unless defined? WIN32
128
-
129
- DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
130
-
131
- def quiet( &block )
132
- io = [STDOUT.dup, STDERR.dup]
133
- STDOUT.reopen DEV_NULL
134
- STDERR.reopen DEV_NULL
135
- block.call
136
- ensure
137
- STDOUT.reopen io.first
138
- STDERR.reopen io.last
139
- $stdout, $stderr = STDOUT, STDERR
140
- end
141
-
142
- DIFF = if WIN32 then 'diff.exe'
143
- else
144
- if quiet {system "gdiff", __FILE__, __FILE__} then 'gdiff'
145
- else 'diff' end
146
- end unless defined? DIFF
147
-
148
- SUDO = if WIN32 then ''
149
- else
150
- if quiet {system 'which sudo'} then 'sudo'
151
- else '' end
152
- end
153
-
154
- RCOV = WIN32 ? 'rcov.bat' : 'rcov'
155
- RDOC = WIN32 ? 'rdoc.bat' : 'rdoc'
156
- GEM = WIN32 ? 'gem.bat' : 'gem'
157
-
158
- %w(rcov spec/rake/spectask rubyforge bones facets/ansicode).each do |lib|
159
- begin
160
- require lib
161
- Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", true}
162
- rescue LoadError
163
- Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", false}
164
- end
165
- end
166
- HAVE_SVN = (Dir.entries(Dir.pwd).include?('.svn') and
167
- system("svn --version 2>&1 > #{DEV_NULL}"))
168
- HAVE_GIT = (Dir.entries(Dir.pwd).include?('.git') and
169
- system("git --version 2>&1 > #{DEV_NULL}"))
170
-
171
- # Add bones as a development dependency
172
- #
173
- if HAVE_BONES
174
- PROJ.gem.development_dependencies << ['bones', ">= #{Bones::VERSION}"]
175
- end
176
-
177
- # Reads a file at +path+ and spits out an array of the +paragraphs+
178
- # specified.
179
- #
180
- # changes = paragraphs_of('History.txt', 0..1).join("\n\n")
181
- # summary, *description = paragraphs_of('README.md', 3, 3..8)
182
- #
183
- def paragraphs_of( path, *paragraphs )
184
- title = String === paragraphs.first ? paragraphs.shift : nil
185
- ary = File.read(path).delete("\r").split(/\n\n+/)
186
-
187
- result = if title
188
- tmp, matching = [], false
189
- rgxp = %r/^=+\s*#{Regexp.escape(title)}/i
190
- paragraphs << (0..-1) if paragraphs.empty?
191
-
192
- ary.each do |val|
193
- if val =~ rgxp
194
- break if matching
195
- matching = true
196
- rgxp = %r/^=+/i
197
- elsif matching
198
- tmp << val
199
- end
200
- end
201
- tmp
202
- else ary end
203
-
204
- result.values_at(*paragraphs)
205
- end
206
-
207
- # Adds the given gem _name_ to the current project's dependency list. An
208
- # optional gem _version_ can be given. If omitted, the newest gem version
209
- # will be used.
210
- #
211
- def depend_on( name, version = nil )
212
- spec = Gem.source_index.find_name(name).last
213
- version = spec.version.to_s if version.nil? and !spec.nil?
214
-
215
- PROJ.gem.dependencies << case version
216
- when nil; [name]
217
- when %r/^\d/; [name, ">= #{version}"]
218
- else [name, version] end
219
- end
220
-
221
- # Adds the given arguments to the include path if they are not already there
222
- #
223
- def ensure_in_path( *args )
224
- args.each do |path|
225
- path = File.expand_path(path)
226
- $:.unshift(path) if test(?d, path) and not $:.include?(path)
227
- end
228
- end
229
-
230
- # Find a rake task using the task name and remove any description text. This
231
- # will prevent the task from being displayed in the list of available tasks.
232
- #
233
- def remove_desc_for_task( names )
234
- Array(names).each do |task_name|
235
- task = Rake.application.tasks.find {|t| t.name == task_name}
236
- next if task.nil?
237
- task.instance_variable_set :@comment, nil
238
- end
239
- end
240
-
241
- # Change working directories to _dir_, call the _block_ of code, and then
242
- # change back to the original working directory (the current directory when
243
- # this method was called).
244
- #
245
- def in_directory( dir, &block )
246
- curdir = pwd
247
- begin
248
- cd dir
249
- return block.call
250
- ensure
251
- cd curdir
252
- end
253
- end
254
-
255
- # Scans the current working directory and creates a list of files that are
256
- # candidates to be in the manifest.
257
- #
258
- def manifest
259
- files = []
260
- exclude = PROJ.exclude.dup
261
- comment = %r/^\s*#/
262
-
263
- # process the ignore file and add the items there to the exclude list
264
- if test(?f, PROJ.ignore_file)
265
- ary = []
266
- File.readlines(PROJ.ignore_file).each do |line|
267
- next if line =~ comment
268
- line.chomp!
269
- line.strip!
270
- next if line.nil? or line.empty?
271
-
272
- glob = line =~ %r/\*\./ ? File.join('**', line) : line
273
- Dir.glob(glob).each {|fn| ary << "^#{Regexp.escape(fn)}"}
274
- end
275
- exclude.concat ary
276
- end
277
-
278
- # generate a regular expression from the exclude list
279
- exclude = Regexp.new(exclude.join('|'))
280
-
281
- Find.find '.' do |path|
282
- path.sub! %r/^(\.\/|\/)/o, ''
283
- next unless test ?f, path
284
- next if path =~ exclude
285
- files << path
286
- end
287
- files.sort!
288
- end
289
-
290
- # We need a "valid" method thtat determines if a string is suitable for use
291
- # in the gem specification.
292
- #
293
- class Object
294
- def valid?
295
- return !(self.empty? or self == "\000") if self.respond_to?(:to_str)
296
- return false
297
- end
298
- end
299
-
300
- # EOF
data/tasks/spec.rake DELETED
@@ -1,59 +0,0 @@
1
-
2
- if HAVE_SPEC_RAKE_SPECTASK and not PROJ.spec.files.to_a.empty?
3
- require 'spec/rake/verify_rcov'
4
-
5
- namespace :spec do
6
-
7
- desc 'Run all specs with basic output'
8
- Spec::Rake::SpecTask.new(:run) do |t|
9
- # Allow user to specify specs to run at command line
10
- _, spec_files = ARGV
11
- spec_files ||= PROJ.spec.files
12
- t.verbose = true
13
-
14
- t.ruby_opts = PROJ.ruby_opts
15
- t.spec_opts = PROJ.spec.opts
16
- t.spec_files = spec_files
17
- t.libs += PROJ.libs
18
- end
19
-
20
- desc 'Run all specs with text output'
21
- Spec::Rake::SpecTask.new(:specdoc) do |t|
22
- t.ruby_opts = PROJ.ruby_opts
23
- t.spec_opts = PROJ.spec.opts + ['--format', 'specdoc']
24
- t.spec_files = PROJ.spec.files
25
- t.libs += PROJ.libs
26
- end
27
-
28
- if HAVE_RCOV
29
- desc 'Run all specs with RCov'
30
- Spec::Rake::SpecTask.new(:rcov) do |t|
31
- t.ruby_opts = PROJ.ruby_opts
32
- t.spec_opts = PROJ.spec.opts
33
- t.spec_files = PROJ.spec.files
34
- t.libs += PROJ.libs
35
- t.rcov = true
36
- t.rcov_dir = PROJ.rcov.dir
37
- t.rcov_opts = PROJ.rcov.opts + ['--exclude', 'spec']
38
- end
39
-
40
- RCov::VerifyTask.new(:verify) do |t|
41
- t.threshold = PROJ.rcov.threshold
42
- t.index_html = File.join(PROJ.rcov.dir, 'index.html')
43
- t.require_exact_threshold = PROJ.rcov.threshold_exact
44
- end
45
-
46
- task :verify => :rcov
47
- remove_desc_for_task %w(spec:clobber_rcov)
48
- end
49
-
50
- end # namespace :spec
51
-
52
- desc 'Alias to spec:run'
53
- task :spec => 'spec:run'
54
-
55
- task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
56
-
57
- end # if HAVE_SPEC_RAKE_SPECTASK
58
-
59
- # EOF
data/tasks/svn.rake DELETED
@@ -1,47 +0,0 @@
1
-
2
- if HAVE_SVN
3
-
4
- unless PROJ.svn.root
5
- info = %x/svn info ./
6
- m = %r/^Repository Root:\s+(.*)$/.match(info)
7
- PROJ.svn.root = (m.nil? ? '' : m[1])
8
- end
9
- PROJ.svn.root = File.join(PROJ.svn.root, PROJ.svn.path) unless PROJ.svn.path.empty?
10
-
11
- namespace :svn do
12
-
13
- # A prerequisites task that all other tasks depend upon
14
- task :prereqs
15
-
16
- desc 'Show tags from the SVN repository'
17
- task :show_tags => 'svn:prereqs' do |t|
18
- tags = %x/svn list #{File.join(PROJ.svn.root, PROJ.svn.tags)}/
19
- tags.gsub!(%r/\/$/, '')
20
- tags = tags.split("\n").sort {|a,b| b <=> a}
21
- puts tags
22
- end
23
-
24
- desc 'Create a new tag in the SVN repository'
25
- task :create_tag => 'svn:prereqs' do |t|
26
- v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
27
- abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
28
-
29
- svn = PROJ.svn
30
- trunk = File.join(svn.root, svn.trunk)
31
- tag = "%s-%s" % [PROJ.name, PROJ.version]
32
- tag = File.join(svn.root, svn.tags, tag)
33
- msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
34
-
35
- puts "Creating SVN tag '#{tag}'"
36
- unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
37
- abort "Tag creation failed"
38
- end
39
- end
40
-
41
- end # namespace :svn
42
-
43
- task 'gem:release' => 'svn:create_tag'
44
-
45
- end # if PROJ.svn.path
46
-
47
- # EOF
data/tasks/test.rake DELETED
@@ -1,42 +0,0 @@
1
-
2
- if test(?e, PROJ.test.file) or not PROJ.test.files.to_a.empty?
3
- require 'rake/testtask'
4
-
5
-
6
-
7
- namespace :test do
8
-
9
- Rake::TestTask.new(:run) do |t|
10
- t.libs = PROJ.libs
11
- t.test_files = if test(?f, PROJ.test.file) then [PROJ.test.file]
12
- else PROJ.test.files end
13
- t.ruby_opts += PROJ.ruby_opts
14
- t.ruby_opts += PROJ.test.opts
15
- end
16
-
17
- if HAVE_RCOV
18
- desc 'Run rcov on the unit tests'
19
- task :rcov => :clobber_rcov do
20
- opts = PROJ.rcov.opts.dup << '-o' << PROJ.rcov.dir
21
- opts = opts.join(' ')
22
- files = if test(?f, PROJ.test.file) then [PROJ.test.file]
23
- else PROJ.test.files end
24
- files = files.join(' ')
25
- sh "#{RCOV} #{files} #{opts}"
26
- end
27
-
28
- task :clobber_rcov do
29
- rm_r 'coverage' rescue nil
30
- end
31
- end
32
-
33
- end # namespace :test
34
-
35
- desc 'Alias to test:run'
36
- task :test => ['ant:retrieve', 'test:run']
37
-
38
- task :clobber => 'test:clobber_rcov' if HAVE_RCOV
39
-
40
- end
41
-
42
- # EOF
data/test/data/data1.txt DELETED
@@ -1,14 +0,0 @@
1
- SMITH 1.006 1.006 1
2
- JOHNSON 0.810 1.816 2
3
- WILLIAMS 0.699 2.515 3
4
- JONES 0.621 3.136 4
5
- BROWN 0.621 3.757 5
6
- DAVIS 0.480 4.237 6
7
- MILLER 0.424 4.660 7
8
- WILSON 0.339 5.000 8
9
- MOORE 0.312 5.312 9
10
- TAYLOR 0.311 5.623 10
11
- ANDERSON 0.311 5.934 11
12
- THOMAS 0.311 6.245 12
13
- JACKSON 0.310 6.554 13
14
- WHITE 0.279 6.834 14
data/test/data/data2.txt DELETED
@@ -1,14 +0,0 @@
1
- SMITH 1 Paris
2
- JOHNSON 2 New-York
3
- WILLIAMS 3 London
4
- JONES 4 San Francisco
5
- BROWN 5 Dublin
6
- DAVIS 6 Rome
7
- MILLER 7 Munich
8
- WILSON 8 Berlin
9
- MOORE 9 Amsterdam
10
- TAYLOR 10 Bruxelles
11
- ANDERSON 11 Paris
12
- THOMAS 12 Lyon
13
- JACKSON 13 Barcelona
14
- WHITE 14 Athene
@@ -1,55 +0,0 @@
1
- require 'cascading'
2
-
3
- module MockAssemblies
4
- def mock_assembly(&block)
5
- assembly = nil
6
- flow 'test' do
7
- source 'test', tap('test/data/data1.txt')
8
- assembly = assembly 'test', &block
9
- sink 'test', tap('output/test_mock_assembly')
10
- end
11
- assembly
12
- end
13
-
14
- def mock_branched_assembly(&block)
15
- assembly = nil
16
- flow 'mock_branched_assembly' do
17
- source 'data1', tap('test/data/data1.txt')
18
-
19
- assembly 'data1' do
20
- branch 'test1' do
21
- pass
22
- end
23
- branch 'test2' do
24
- pass
25
- end
26
- end
27
-
28
- assembly = assembly 'test', &block
29
-
30
- sink 'test', tap('output/test_mock_branched_assembly')
31
- end
32
- assembly
33
- end
34
-
35
- def mock_two_input_assembly(&block)
36
- assembly = nil
37
- flow 'mock_two_input_assembly' do
38
- source 'test1', tap('test/data/data1.txt')
39
- source 'test2', tap('test/data/data2.txt')
40
-
41
- assembly 'test1' do
42
- split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
43
- end
44
-
45
- assembly 'test2' do
46
- split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'id', 'town'], :output => ['name', 'id', 'town']
47
- end
48
-
49
- assembly = assembly 'test', &block
50
-
51
- sink 'test', tap('output/test_mock_two_input_assembly')
52
- end
53
- assembly
54
- end
55
- end