cascading.jruby 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/HACKING.md +15 -0
- data/History.txt +0 -0
- data/LICENSE.txt +165 -0
- data/README.md +7 -0
- data/Rakefile +45 -0
- data/bin/make_job +81 -0
- data/lib/cascading/assembly.rb +726 -0
- data/lib/cascading/base.rb +63 -0
- data/lib/cascading/cascade.rb +63 -0
- data/lib/cascading/cascading.rb +134 -0
- data/lib/cascading/cascading_exception.rb +30 -0
- data/lib/cascading/expr_stub.rb +33 -0
- data/lib/cascading/ext/array.rb +15 -0
- data/lib/cascading/flow.rb +168 -0
- data/lib/cascading/operations.rb +204 -0
- data/lib/cascading/scope.rb +160 -0
- data/lib/cascading.rb +63 -0
- data/samples/branch.rb +31 -0
- data/samples/cascading.rb +41 -0
- data/samples/copy.rb +18 -0
- data/samples/data/data2.txt +88799 -0
- data/samples/data/data_join1.txt +3 -0
- data/samples/data/data_join2.txt +3 -0
- data/samples/data/data_join3.txt +3 -0
- data/samples/join.rb +32 -0
- data/samples/logwordcount.rb +22 -0
- data/samples/project.rb +24 -0
- data/samples/rename.rb +21 -0
- data/samples/scorenames.rb +20 -0
- data/samples/splitter.rb +20 -0
- data/samples/union.rb +35 -0
- data/spec/cascading_spec.rb +100 -0
- data/spec/expr_spec.rb +10 -0
- data/spec/primary_key_spec.rb +119 -0
- data/spec/resource/join_input.txt +3 -0
- data/spec/resource/test_input.txt +4 -0
- data/spec/scope_spec.rb +174 -0
- data/spec/spec.opts +6 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/spec_util.rb +188 -0
- data/src/cascading/jruby/Main.java +38 -0
- data/src/cascading/jruby/runner.rb +6 -0
- data/tags +238 -0
- data/tasks/ann.rake +80 -0
- data/tasks/ant.rake +11 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +206 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +50 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/samples.rake +13 -0
- data/tasks/setup.rb +300 -0
- data/tasks/spec.rake +59 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +42 -0
- data/test/data/data1.txt +14 -0
- data/test/data/data2.txt +14 -0
- data/test/test_assembly.rb +321 -0
- data/test/test_cascading.rb +49 -0
- data/test/test_flow.rb +15 -0
- metadata +137 -0
data/tasks/gem.rake
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
|
2
|
+
require 'find'
|
3
|
+
require 'rake/packagetask'
|
4
|
+
require 'rubygems/user_interaction'
|
5
|
+
require 'rubygems/builder'
|
6
|
+
|
7
|
+
module Bones
|
8
|
+
class GemPackageTask < Rake::PackageTask
|
9
|
+
# Ruby GEM spec containing the metadata for this package. The
|
10
|
+
# name, version and package_files are automatically determined
|
11
|
+
# from the GEM spec and don't need to be explicitly provided.
|
12
|
+
#
|
13
|
+
attr_accessor :gem_spec
|
14
|
+
|
15
|
+
# Tasks from the Bones gem directory
|
16
|
+
attr_reader :bones_files
|
17
|
+
|
18
|
+
# Create a GEM Package task library. Automatically define the gem
|
19
|
+
# if a block is given. If no block is supplied, then +define+
|
20
|
+
# needs to be called to define the task.
|
21
|
+
#
|
22
|
+
def initialize(gem_spec)
|
23
|
+
init(gem_spec)
|
24
|
+
yield self if block_given?
|
25
|
+
define if block_given?
|
26
|
+
end
|
27
|
+
|
28
|
+
# Initialization tasks without the "yield self" or define
|
29
|
+
# operations.
|
30
|
+
#
|
31
|
+
def init(gem)
|
32
|
+
super(gem.name, gem.version)
|
33
|
+
@gem_spec = gem
|
34
|
+
@package_files += gem_spec.files if gem_spec.files
|
35
|
+
@bones_files = []
|
36
|
+
|
37
|
+
local_setup = File.join(Dir.pwd, %w[tasks setup.rb])
|
38
|
+
if !test(?e, local_setup)
|
39
|
+
Dir.glob(::Bones.path(%w[lib bones tasks *])).each {|fn| bones_files << fn}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Create the Rake tasks and actions specified by this
|
44
|
+
# GemPackageTask. (+define+ is automatically called if a block is
|
45
|
+
# given to +new+).
|
46
|
+
#
|
47
|
+
def define
|
48
|
+
super
|
49
|
+
task :prereqs
|
50
|
+
task :package => ['gem:prereqs', "#{package_dir_path}/#{gem_file}"]
|
51
|
+
file "#{package_dir_path}/#{gem_file}" => [package_dir_path] + package_files + bones_files do
|
52
|
+
when_writing("Creating GEM") {
|
53
|
+
chdir(package_dir_path) do
|
54
|
+
Gem::Builder.new(gem_spec).build
|
55
|
+
verbose(true) {
|
56
|
+
mv gem_file, "../#{gem_file}"
|
57
|
+
}
|
58
|
+
end
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
file package_dir_path => bones_files do
|
63
|
+
mkdir_p package_dir rescue nil
|
64
|
+
|
65
|
+
gem_spec.files = (gem_spec.files +
|
66
|
+
bones_files.map {|fn| File.join('tasks', File.basename(fn))}).sort
|
67
|
+
|
68
|
+
bones_files.each do |fn|
|
69
|
+
base_fn = File.join('tasks', File.basename(fn))
|
70
|
+
f = File.join(package_dir_path, base_fn)
|
71
|
+
fdir = File.dirname(f)
|
72
|
+
mkdir_p(fdir) if !File.exist?(fdir)
|
73
|
+
if File.directory?(fn)
|
74
|
+
mkdir_p(f)
|
75
|
+
else
|
76
|
+
raise "file name conflict for '#{base_fn}' (conflicts with '#{fn}')" if test(?e, f)
|
77
|
+
safe_ln(fn, f)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def gem_file
|
84
|
+
if @gem_spec.platform == Gem::Platform::RUBY
|
85
|
+
"#{package_name}.gem"
|
86
|
+
else
|
87
|
+
"#{package_name}-#{@gem_spec.platform}.gem"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end # class GemPackageTask
|
91
|
+
end # module Bones
|
92
|
+
|
93
|
+
namespace :gem do
|
94
|
+
|
95
|
+
PROJ.gem._spec = Gem::Specification.new do |s|
|
96
|
+
s.name = PROJ.name
|
97
|
+
s.version = PROJ.version
|
98
|
+
s.summary = PROJ.summary
|
99
|
+
s.authors = Array(PROJ.authors)
|
100
|
+
s.email = PROJ.email
|
101
|
+
s.homepage = Array(PROJ.url).first
|
102
|
+
s.rubyforge_project = PROJ.rubyforge.name
|
103
|
+
|
104
|
+
s.description = PROJ.description
|
105
|
+
|
106
|
+
PROJ.gem.dependencies.each do |dep|
|
107
|
+
s.add_dependency(*dep)
|
108
|
+
end
|
109
|
+
|
110
|
+
PROJ.gem.development_dependencies.each do |dep|
|
111
|
+
s.add_development_dependency(*dep)
|
112
|
+
end
|
113
|
+
|
114
|
+
s.files = PROJ.gem.files
|
115
|
+
s.executables = PROJ.gem.executables.map {|fn| File.basename(fn)}
|
116
|
+
s.extensions = PROJ.gem.files.grep %r/extconf\.rb$/
|
117
|
+
|
118
|
+
s.bindir = 'bin'
|
119
|
+
dirs = Dir["{#{PROJ.libs.join(',')}}"]
|
120
|
+
s.require_paths = dirs unless dirs.empty?
|
121
|
+
|
122
|
+
incl = Regexp.new(PROJ.rdoc.include.join('|'))
|
123
|
+
excl = PROJ.rdoc.exclude.dup.concat %w[\.rb$ ^(\.\/|\/)?ext]
|
124
|
+
excl = Regexp.new(excl.join('|'))
|
125
|
+
rdoc_files = PROJ.gem.files.find_all do |fn|
|
126
|
+
case fn
|
127
|
+
when excl; false
|
128
|
+
when incl; true
|
129
|
+
else false end
|
130
|
+
end
|
131
|
+
s.rdoc_options = PROJ.rdoc.opts + ['--main', PROJ.rdoc.main]
|
132
|
+
s.extra_rdoc_files = rdoc_files
|
133
|
+
s.has_rdoc = true
|
134
|
+
|
135
|
+
if test ?f, PROJ.test.file
|
136
|
+
s.test_file = PROJ.test.file
|
137
|
+
else
|
138
|
+
s.test_files = PROJ.test.files.to_a
|
139
|
+
end
|
140
|
+
|
141
|
+
# Do any extra stuff the user wants
|
142
|
+
PROJ.gem.extras.each do |msg, val|
|
143
|
+
case val
|
144
|
+
when Proc
|
145
|
+
val.call(s.send(msg))
|
146
|
+
else
|
147
|
+
s.send "#{msg}=", val
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end # Gem::Specification.new
|
151
|
+
|
152
|
+
Bones::GemPackageTask.new(PROJ.gem._spec) do |pkg|
|
153
|
+
pkg.need_tar = PROJ.gem.need_tar
|
154
|
+
pkg.need_zip = PROJ.gem.need_zip
|
155
|
+
end
|
156
|
+
|
157
|
+
desc 'Show information about the gem'
|
158
|
+
task :debug => 'gem:prereqs' do
|
159
|
+
puts PROJ.gem._spec.to_ruby
|
160
|
+
end
|
161
|
+
|
162
|
+
desc 'Write the gemspec '
|
163
|
+
task :spec => 'gem:prereqs' do
|
164
|
+
File.open("#{PROJ.name}.gemspec", 'w') do |f|
|
165
|
+
f.write PROJ.gem._spec.to_ruby
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
desc 'Install the gem'
|
170
|
+
task :install => [:clobber, 'gem:package'] do
|
171
|
+
sh "#{SUDO} #{GEM} install --local pkg/#{PROJ.gem._spec.full_name}"
|
172
|
+
|
173
|
+
# use this version of the command for rubygems > 1.0.0
|
174
|
+
#sh "#{SUDO} #{GEM} install --no-update-sources pkg/#{PROJ.gem._spec.full_name}"
|
175
|
+
end
|
176
|
+
|
177
|
+
desc 'Uninstall the gem'
|
178
|
+
task :uninstall do
|
179
|
+
installed_list = Gem.source_index.find_name(PROJ.name)
|
180
|
+
if installed_list and installed_list.collect { |s| s.version.to_s}.include?(PROJ.version) then
|
181
|
+
sh "#{SUDO} #{GEM} uninstall --version '#{PROJ.version}' --ignore-dependencies --executables #{PROJ.name}"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
desc 'Reinstall the gem'
|
186
|
+
task :reinstall => [:uninstall, :install]
|
187
|
+
|
188
|
+
desc 'Cleanup the gem'
|
189
|
+
task :cleanup do
|
190
|
+
sh "#{SUDO} #{GEM} cleanup #{PROJ.gem._spec.name}"
|
191
|
+
end
|
192
|
+
|
193
|
+
desc 'Remove gem build files'
|
194
|
+
task :clean do
|
195
|
+
`rm -rf pkg`
|
196
|
+
end
|
197
|
+
end # namespace :gem
|
198
|
+
|
199
|
+
|
200
|
+
desc 'Alias to gem:package'
|
201
|
+
task :gem => ['ant:build', 'gem:package']
|
202
|
+
|
203
|
+
task :clobber => 'gem:clobber_package'
|
204
|
+
remove_desc_for_task 'gem:clobber_package'
|
205
|
+
|
206
|
+
# EOF
|
data/tasks/git.rake
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
if HAVE_GIT
|
3
|
+
|
4
|
+
namespace :git do
|
5
|
+
|
6
|
+
# A prerequisites task that all other tasks depend upon
|
7
|
+
task :prereqs
|
8
|
+
|
9
|
+
desc 'Show tags from the Git repository'
|
10
|
+
task :show_tags => 'git:prereqs' do |t|
|
11
|
+
puts %x/git tag/
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Create a new tag in the Git repository'
|
15
|
+
task :create_tag => 'git:prereqs' do |t|
|
16
|
+
v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
|
17
|
+
abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
|
18
|
+
|
19
|
+
tag = "%s-%s" % [PROJ.name, PROJ.version]
|
20
|
+
msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
|
21
|
+
|
22
|
+
puts "Creating Git tag '#{tag}'"
|
23
|
+
unless system "git tag -a -m '#{msg}' #{tag}"
|
24
|
+
abort "Tag creation failed"
|
25
|
+
end
|
26
|
+
|
27
|
+
if %x/git remote/ =~ %r/^origin\s*$/
|
28
|
+
unless system "git push origin #{tag}"
|
29
|
+
abort "Could not push tag to remote Git repository"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end # namespace :git
|
35
|
+
|
36
|
+
task 'gem:release' => 'git:create_tag'
|
37
|
+
|
38
|
+
end # if HAVE_GIT
|
39
|
+
|
40
|
+
# EOF
|
data/tasks/notes.rake
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
if HAVE_BONES
|
3
|
+
|
4
|
+
desc "Enumerate all annotations"
|
5
|
+
task :notes do |t|
|
6
|
+
id = if t.application.top_level_tasks.length > 1
|
7
|
+
t.application.top_level_tasks.slice!(1..-1).join(' ')
|
8
|
+
end
|
9
|
+
Bones::AnnotationExtractor.enumerate(
|
10
|
+
PROJ, PROJ.notes.tags.join('|'), id, :tag => true)
|
11
|
+
end
|
12
|
+
|
13
|
+
namespace :notes do
|
14
|
+
PROJ.notes.tags.each do |tag|
|
15
|
+
desc "Enumerate all #{tag} annotations"
|
16
|
+
task tag.downcase.to_sym do |t|
|
17
|
+
id = if t.application.top_level_tasks.length > 1
|
18
|
+
t.application.top_level_tasks.slice!(1..-1).join(' ')
|
19
|
+
end
|
20
|
+
Bones::AnnotationExtractor.enumerate(PROJ, tag, id)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end # if HAVE_BONES
|
26
|
+
|
27
|
+
# EOF
|
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
# This file does not define any rake tasks. It is used to load some project
|
3
|
+
# settings if they are not defined by the user.
|
4
|
+
|
5
|
+
PROJ.exclude << ["^#{Regexp.escape(PROJ.ann.file)}$",
|
6
|
+
"^#{Regexp.escape(PROJ.ignore_file)}$",
|
7
|
+
"^#{Regexp.escape(PROJ.rdoc.dir)}/",
|
8
|
+
"^#{Regexp.escape(PROJ.rcov.dir)}/"]
|
9
|
+
|
10
|
+
flatten_arrays = lambda do |this,os|
|
11
|
+
os.instance_variable_get(:@table).each do |key,val|
|
12
|
+
next if key == :dependencies \
|
13
|
+
or key == :development_dependencies
|
14
|
+
case val
|
15
|
+
when Array; val.flatten!
|
16
|
+
when OpenStruct; this.call(this,val)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
flatten_arrays.call(flatten_arrays,PROJ)
|
21
|
+
|
22
|
+
PROJ.changes ||= paragraphs_of(PROJ.history_file, 0..1).join("\n\n")
|
23
|
+
|
24
|
+
PROJ.description ||= paragraphs_of(PROJ.readme_file, 'description').join("\n\n")
|
25
|
+
|
26
|
+
PROJ.summary ||= PROJ.description.split('.').first
|
27
|
+
|
28
|
+
PROJ.gem.files ||= manifest
|
29
|
+
|
30
|
+
PROJ.gem.executables ||= PROJ.gem.files.find_all {|fn| fn =~ %r/^bin/}
|
31
|
+
|
32
|
+
PROJ.rdoc.main ||= PROJ.readme_file
|
33
|
+
|
34
|
+
# EOF
|
data/tasks/rdoc.rake
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
require 'rake/rdoctask'
|
3
|
+
|
4
|
+
namespace :doc do
|
5
|
+
|
6
|
+
desc 'Generate RDoc documentation'
|
7
|
+
Rake::RDocTask.new do |rd|
|
8
|
+
rdoc = PROJ.rdoc
|
9
|
+
rd.main = rdoc.main
|
10
|
+
rd.rdoc_dir = rdoc.dir
|
11
|
+
|
12
|
+
incl = Regexp.new(rdoc.include.join('|'))
|
13
|
+
excl = Regexp.new(rdoc.exclude.join('|'))
|
14
|
+
files = PROJ.gem.files.find_all do |fn|
|
15
|
+
case fn
|
16
|
+
when excl; false
|
17
|
+
when incl; true
|
18
|
+
else false end
|
19
|
+
end
|
20
|
+
rd.rdoc_files.push(*files)
|
21
|
+
|
22
|
+
title = "#{PROJ.name}-#{PROJ.version} Documentation"
|
23
|
+
|
24
|
+
rf_name = PROJ.rubyforge.name
|
25
|
+
title = "#{rf_name}'s " + title if rf_name.valid? and rf_name != title
|
26
|
+
|
27
|
+
rd.options << "-t #{title}"
|
28
|
+
rd.options.concat(rdoc.opts)
|
29
|
+
end
|
30
|
+
|
31
|
+
desc 'Generate ri locally for testing'
|
32
|
+
task :ri => :clobber_ri do
|
33
|
+
sh "#{RDOC} --ri -o ri ."
|
34
|
+
end
|
35
|
+
|
36
|
+
task :clobber_ri do
|
37
|
+
rm_r 'ri' rescue nil
|
38
|
+
end
|
39
|
+
|
40
|
+
end # namespace :doc
|
41
|
+
|
42
|
+
desc 'Alias to doc:rdoc'
|
43
|
+
task :doc => 'doc:rdoc'
|
44
|
+
|
45
|
+
desc 'Remove all build products'
|
46
|
+
task :clobber => %w(doc:clobber_rdoc doc:clobber_ri)
|
47
|
+
|
48
|
+
remove_desc_for_task %w(doc:clobber_rdoc)
|
49
|
+
|
50
|
+
# EOF
|
@@ -0,0 +1,55 @@
|
|
1
|
+
|
2
|
+
if PROJ.rubyforge.name.valid? && HAVE_RUBYFORGE
|
3
|
+
|
4
|
+
require 'rubyforge'
|
5
|
+
require 'rake/contrib/sshpublisher'
|
6
|
+
|
7
|
+
namespace :gem do
|
8
|
+
desc 'Package and upload to RubyForge'
|
9
|
+
task :release => [:clobber, 'gem'] do |t|
|
10
|
+
v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
|
11
|
+
abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
|
12
|
+
pkg = "pkg/#{PROJ.gem._spec.full_name}"
|
13
|
+
|
14
|
+
if $DEBUG then
|
15
|
+
puts "release_id = rf.add_release #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, #{PROJ.version.inspect}, \"#{pkg}.tgz\""
|
16
|
+
puts "rf.add_file #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, release_id, \"#{pkg}.gem\""
|
17
|
+
end
|
18
|
+
|
19
|
+
rf = RubyForge.new
|
20
|
+
rf.configure rescue nil
|
21
|
+
puts 'Logging in'
|
22
|
+
rf.login
|
23
|
+
|
24
|
+
c = rf.userconfig
|
25
|
+
c['release_notes'] = PROJ.description if PROJ.description
|
26
|
+
c['release_changes'] = PROJ.changes if PROJ.changes
|
27
|
+
c['preformatted'] = true
|
28
|
+
|
29
|
+
files = Dir.glob("#{pkg}*.*")
|
30
|
+
|
31
|
+
puts "Releasing #{PROJ.name} v. #{PROJ.version}"
|
32
|
+
rf.add_release PROJ.rubyforge.name, PROJ.name, PROJ.version, *files
|
33
|
+
end
|
34
|
+
end # namespace :gem
|
35
|
+
|
36
|
+
|
37
|
+
namespace :doc do
|
38
|
+
desc "Publish RDoc to RubyForge"
|
39
|
+
task :release => %w(doc:clobber_rdoc doc:rdoc) do
|
40
|
+
config = YAML.load(
|
41
|
+
File.read(File.expand_path('~/.rubyforge/user-config.yml'))
|
42
|
+
)
|
43
|
+
|
44
|
+
host = "#{config['username']}@rubyforge.org"
|
45
|
+
remote_dir = "/var/www/gforge-projects/#{PROJ.rubyforge.name}/"
|
46
|
+
remote_dir << PROJ.rdoc.remote_dir if PROJ.rdoc.remote_dir
|
47
|
+
local_dir = PROJ.rdoc.dir
|
48
|
+
|
49
|
+
Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload
|
50
|
+
end
|
51
|
+
end # namespace :doc
|
52
|
+
|
53
|
+
end # if HAVE_RUBYFORGE
|
54
|
+
|
55
|
+
# EOF
|
data/tasks/samples.rake
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
namespace :samples do
|
2
|
+
desc 'Run all sample applications'
|
3
|
+
task :run do
|
4
|
+
Dir.glob('samples/*.rb') do |sample|
|
5
|
+
next unless File.executable?(sample)
|
6
|
+
success = system(sample)
|
7
|
+
raise "#{sample} sample app failed" unless success
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
desc 'Alias to samples:run'
|
13
|
+
task :samples => 'samples:run'
|