cascading.jruby 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/HACKING.md +15 -0
- data/History.txt +0 -0
- data/LICENSE.txt +165 -0
- data/README.md +7 -0
- data/Rakefile +45 -0
- data/bin/make_job +81 -0
- data/lib/cascading/assembly.rb +726 -0
- data/lib/cascading/base.rb +63 -0
- data/lib/cascading/cascade.rb +63 -0
- data/lib/cascading/cascading.rb +134 -0
- data/lib/cascading/cascading_exception.rb +30 -0
- data/lib/cascading/expr_stub.rb +33 -0
- data/lib/cascading/ext/array.rb +15 -0
- data/lib/cascading/flow.rb +168 -0
- data/lib/cascading/operations.rb +204 -0
- data/lib/cascading/scope.rb +160 -0
- data/lib/cascading.rb +63 -0
- data/samples/branch.rb +31 -0
- data/samples/cascading.rb +41 -0
- data/samples/copy.rb +18 -0
- data/samples/data/data2.txt +88799 -0
- data/samples/data/data_join1.txt +3 -0
- data/samples/data/data_join2.txt +3 -0
- data/samples/data/data_join3.txt +3 -0
- data/samples/join.rb +32 -0
- data/samples/logwordcount.rb +22 -0
- data/samples/project.rb +24 -0
- data/samples/rename.rb +21 -0
- data/samples/scorenames.rb +20 -0
- data/samples/splitter.rb +20 -0
- data/samples/union.rb +35 -0
- data/spec/cascading_spec.rb +100 -0
- data/spec/expr_spec.rb +10 -0
- data/spec/primary_key_spec.rb +119 -0
- data/spec/resource/join_input.txt +3 -0
- data/spec/resource/test_input.txt +4 -0
- data/spec/scope_spec.rb +174 -0
- data/spec/spec.opts +6 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/spec_util.rb +188 -0
- data/src/cascading/jruby/Main.java +38 -0
- data/src/cascading/jruby/runner.rb +6 -0
- data/tags +238 -0
- data/tasks/ann.rake +80 -0
- data/tasks/ant.rake +11 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +206 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +50 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/samples.rake +13 -0
- data/tasks/setup.rb +300 -0
- data/tasks/spec.rake +59 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +42 -0
- data/test/data/data1.txt +14 -0
- data/test/data/data2.txt +14 -0
- data/test/test_assembly.rb +321 -0
- data/test/test_cascading.rb +49 -0
- data/test/test_flow.rb +15 -0
- metadata +137 -0
data/HACKING.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# Hacking
|
2
|
+
|
3
|
+
Some hacking info on `cascading.jruby`:
|
4
|
+
|
5
|
+
`cascading.jruby` can be packaged as a gem. To do so, you must generate the necessary packaging files:
|
6
|
+
|
7
|
+
ant build; jruby -S rake gem
|
8
|
+
|
9
|
+
will produce the gem in the pkg/ sub-directory. After that, just cd to this directory and:
|
10
|
+
|
11
|
+
jruby -S rake install cascading.jruby-xxx.gem
|
12
|
+
|
13
|
+
The `Cascading::Operations` module is mixed-in the `Cascading::Assembly` class to provide some shortcuts for common operations.
|
14
|
+
|
15
|
+
The file cascading/cascading.rb defines global helper methods for cascading like tap creation, fields creation, etc.
|
data/History.txt
ADDED
File without changes
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
2
|
+
Version 3, 29 June 2007
|
3
|
+
|
4
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
5
|
+
Everyone is permitted to copy and distribute verbatim copies
|
6
|
+
of this license document, but changing it is not allowed.
|
7
|
+
|
8
|
+
|
9
|
+
This version of the GNU Lesser General Public License incorporates
|
10
|
+
the terms and conditions of version 3 of the GNU General Public
|
11
|
+
License, supplemented by the additional permissions listed below.
|
12
|
+
|
13
|
+
0. Additional Definitions.
|
14
|
+
|
15
|
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
16
|
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
17
|
+
General Public License.
|
18
|
+
|
19
|
+
"The Library" refers to a covered work governed by this License,
|
20
|
+
other than an Application or a Combined Work as defined below.
|
21
|
+
|
22
|
+
An "Application" is any work that makes use of an interface provided
|
23
|
+
by the Library, but which is not otherwise based on the Library.
|
24
|
+
Defining a subclass of a class defined by the Library is deemed a mode
|
25
|
+
of using an interface provided by the Library.
|
26
|
+
|
27
|
+
A "Combined Work" is a work produced by combining or linking an
|
28
|
+
Application with the Library. The particular version of the Library
|
29
|
+
with which the Combined Work was made is also called the "Linked
|
30
|
+
Version".
|
31
|
+
|
32
|
+
The "Minimal Corresponding Source" for a Combined Work means the
|
33
|
+
Corresponding Source for the Combined Work, excluding any source code
|
34
|
+
for portions of the Combined Work that, considered in isolation, are
|
35
|
+
based on the Application, and not on the Linked Version.
|
36
|
+
|
37
|
+
The "Corresponding Application Code" for a Combined Work means the
|
38
|
+
object code and/or source code for the Application, including any data
|
39
|
+
and utility programs needed for reproducing the Combined Work from the
|
40
|
+
Application, but excluding the System Libraries of the Combined Work.
|
41
|
+
|
42
|
+
1. Exception to Section 3 of the GNU GPL.
|
43
|
+
|
44
|
+
You may convey a covered work under sections 3 and 4 of this License
|
45
|
+
without being bound by section 3 of the GNU GPL.
|
46
|
+
|
47
|
+
2. Conveying Modified Versions.
|
48
|
+
|
49
|
+
If you modify a copy of the Library, and, in your modifications, a
|
50
|
+
facility refers to a function or data to be supplied by an Application
|
51
|
+
that uses the facility (other than as an argument passed when the
|
52
|
+
facility is invoked), then you may convey a copy of the modified
|
53
|
+
version:
|
54
|
+
|
55
|
+
a) under this License, provided that you make a good faith effort to
|
56
|
+
ensure that, in the event an Application does not supply the
|
57
|
+
function or data, the facility still operates, and performs
|
58
|
+
whatever part of its purpose remains meaningful, or
|
59
|
+
|
60
|
+
b) under the GNU GPL, with none of the additional permissions of
|
61
|
+
this License applicable to that copy.
|
62
|
+
|
63
|
+
3. Object Code Incorporating Material from Library Header Files.
|
64
|
+
|
65
|
+
The object code form of an Application may incorporate material from
|
66
|
+
a header file that is part of the Library. You may convey such object
|
67
|
+
code under terms of your choice, provided that, if the incorporated
|
68
|
+
material is not limited to numerical parameters, data structure
|
69
|
+
layouts and accessors, or small macros, inline functions and templates
|
70
|
+
(ten or fewer lines in length), you do both of the following:
|
71
|
+
|
72
|
+
a) Give prominent notice with each copy of the object code that the
|
73
|
+
Library is used in it and that the Library and its use are
|
74
|
+
covered by this License.
|
75
|
+
|
76
|
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
77
|
+
document.
|
78
|
+
|
79
|
+
4. Combined Works.
|
80
|
+
|
81
|
+
You may convey a Combined Work under terms of your choice that,
|
82
|
+
taken together, effectively do not restrict modification of the
|
83
|
+
portions of the Library contained in the Combined Work and reverse
|
84
|
+
engineering for debugging such modifications, if you also do each of
|
85
|
+
the following:
|
86
|
+
|
87
|
+
a) Give prominent notice with each copy of the Combined Work that
|
88
|
+
the Library is used in it and that the Library and its use are
|
89
|
+
covered by this License.
|
90
|
+
|
91
|
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
92
|
+
document.
|
93
|
+
|
94
|
+
c) For a Combined Work that displays copyright notices during
|
95
|
+
execution, include the copyright notice for the Library among
|
96
|
+
these notices, as well as a reference directing the user to the
|
97
|
+
copies of the GNU GPL and this license document.
|
98
|
+
|
99
|
+
d) Do one of the following:
|
100
|
+
|
101
|
+
0) Convey the Minimal Corresponding Source under the terms of this
|
102
|
+
License, and the Corresponding Application Code in a form
|
103
|
+
suitable for, and under terms that permit, the user to
|
104
|
+
recombine or relink the Application with a modified version of
|
105
|
+
the Linked Version to produce a modified Combined Work, in the
|
106
|
+
manner specified by section 6 of the GNU GPL for conveying
|
107
|
+
Corresponding Source.
|
108
|
+
|
109
|
+
1) Use a suitable shared library mechanism for linking with the
|
110
|
+
Library. A suitable mechanism is one that (a) uses at run time
|
111
|
+
a copy of the Library already present on the user's computer
|
112
|
+
system, and (b) will operate properly with a modified version
|
113
|
+
of the Library that is interface-compatible with the Linked
|
114
|
+
Version.
|
115
|
+
|
116
|
+
e) Provide Installation Information, but only if you would otherwise
|
117
|
+
be required to provide such information under section 6 of the
|
118
|
+
GNU GPL, and only to the extent that such information is
|
119
|
+
necessary to install and execute a modified version of the
|
120
|
+
Combined Work produced by recombining or relinking the
|
121
|
+
Application with a modified version of the Linked Version. (If
|
122
|
+
you use option 4d0, the Installation Information must accompany
|
123
|
+
the Minimal Corresponding Source and Corresponding Application
|
124
|
+
Code. If you use option 4d1, you must provide the Installation
|
125
|
+
Information in the manner specified by section 6 of the GNU GPL
|
126
|
+
for conveying Corresponding Source.)
|
127
|
+
|
128
|
+
5. Combined Libraries.
|
129
|
+
|
130
|
+
You may place library facilities that are a work based on the
|
131
|
+
Library side by side in a single library together with other library
|
132
|
+
facilities that are not Applications and are not covered by this
|
133
|
+
License, and convey such a combined library under terms of your
|
134
|
+
choice, if you do both of the following:
|
135
|
+
|
136
|
+
a) Accompany the combined library with a copy of the same work based
|
137
|
+
on the Library, uncombined with any other library facilities,
|
138
|
+
conveyed under the terms of this License.
|
139
|
+
|
140
|
+
b) Give prominent notice with the combined library that part of it
|
141
|
+
is a work based on the Library, and explaining where to find the
|
142
|
+
accompanying uncombined form of the same work.
|
143
|
+
|
144
|
+
6. Revised Versions of the GNU Lesser General Public License.
|
145
|
+
|
146
|
+
The Free Software Foundation may publish revised and/or new versions
|
147
|
+
of the GNU Lesser General Public License from time to time. Such new
|
148
|
+
versions will be similar in spirit to the present version, but may
|
149
|
+
differ in detail to address new problems or concerns.
|
150
|
+
|
151
|
+
Each version is given a distinguishing version number. If the
|
152
|
+
Library as you received it specifies that a certain numbered version
|
153
|
+
of the GNU Lesser General Public License "or any later version"
|
154
|
+
applies to it, you have the option of following the terms and
|
155
|
+
conditions either of that published version or of any later version
|
156
|
+
published by the Free Software Foundation. If the Library as you
|
157
|
+
received it does not specify a version number of the GNU Lesser
|
158
|
+
General Public License, you may choose any version of the GNU Lesser
|
159
|
+
General Public License ever published by the Free Software Foundation.
|
160
|
+
|
161
|
+
If the Library as you received it specifies that a proxy can decide
|
162
|
+
whether future versions of the GNU Lesser General Public License shall
|
163
|
+
apply, that proxy's public statement of acceptance of any version is
|
164
|
+
permanent authorization for you to choose that version for the
|
165
|
+
Library.
|
data/README.md
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
# Cascading.JRuby
|
2
|
+
|
3
|
+
`cascading.jruby` is a small DSL above [Cascading](http://www.cascading.org/).
|
4
|
+
|
5
|
+
It requires Hadoop (>= 0.18.3) and Cascading (>=1.0.1) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
|
6
|
+
|
7
|
+
Copyright 2009, Grégoire Marabout.
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#! /usr/bin/env jruby
|
2
|
+
|
3
|
+
# Look in the tasks/setup.rb file for the various options that can be
|
4
|
+
# configured in this Rakefile. The .rake files in the tasks directory
|
5
|
+
# are where the options are used.
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'bones'
|
9
|
+
Bones.setup
|
10
|
+
rescue LoadError
|
11
|
+
begin
|
12
|
+
load 'tasks/setup.rb'
|
13
|
+
rescue LoadError
|
14
|
+
raise RuntimeError, '### please install the "bones" gem ###'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
ensure_in_path 'lib'
|
19
|
+
|
20
|
+
require 'cascading'
|
21
|
+
|
22
|
+
task :default => 'test:run'
|
23
|
+
|
24
|
+
task :run do
|
25
|
+
# ensure_in_path "samples"
|
26
|
+
puts "Running #{ARGS[0]}"
|
27
|
+
require "samples/#{ARGS[0]}"
|
28
|
+
end
|
29
|
+
|
30
|
+
desc 'Remove gem and Java build files'
|
31
|
+
task :clean => ['ant:clean', 'gem:clean'] do
|
32
|
+
puts 'Build files removed'
|
33
|
+
end
|
34
|
+
|
35
|
+
PROJ.name = 'cascading.jruby'
|
36
|
+
PROJ.authors = ['Matt Walker', 'Grégoire Marabout']
|
37
|
+
PROJ.email = 'mwalker@etsy.com'
|
38
|
+
PROJ.url = 'http://github.com/etsy/cascading.jruby'
|
39
|
+
PROJ.version = Cascading::VERSION
|
40
|
+
PROJ.summary = 'A JRuby DSL for Cascading'
|
41
|
+
PROJ.description = 'cascading.jruby is a small DSL above Cascading, written in JRuby'
|
42
|
+
PROJ.rubyforge.name = 'cascading.jruby'
|
43
|
+
PROJ.spec.opts << '--color'
|
44
|
+
|
45
|
+
# EOF
|
data/bin/make_job
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
|
3
|
+
require 'java'
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jruby")))
|
6
|
+
$LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jobs")))
|
7
|
+
|
8
|
+
require 'rubygems'
|
9
|
+
require 'cascading'
|
10
|
+
require 'fileutils'
|
11
|
+
require 'optparse'
|
12
|
+
require 'ostruct'
|
13
|
+
|
14
|
+
include FileUtils
|
15
|
+
|
16
|
+
TEMP_DIR = "_temp_jars"
|
17
|
+
TEMP_DIR_LIB = ::File.join(TEMP_DIR, "lib")
|
18
|
+
CASCADING_JRUBY_HOME = Cascading::PATH
|
19
|
+
CASCADING_HOME = ENV["CASCADING_HOME"]
|
20
|
+
JRUBY_HOME = ENV["JRUBY_HOME"]
|
21
|
+
|
22
|
+
options = OpenStruct.new
|
23
|
+
options.input = ARGV[0]
|
24
|
+
options.output = "job.jar"
|
25
|
+
options.libs = []
|
26
|
+
|
27
|
+
OptionParser.new do |opts|
|
28
|
+
opts.banner = "Usage: make_job [options]"
|
29
|
+
|
30
|
+
opts.on("-o", "--output", "Set the name of the output jar file (job.jar by default)") do |v|
|
31
|
+
options.output = v
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on("-l", "--lib LIBPATH", "Set the path where external libraries are stored") do |path|
|
35
|
+
options.libs << path
|
36
|
+
end
|
37
|
+
end.parse!
|
38
|
+
|
39
|
+
p options
|
40
|
+
|
41
|
+
# Create temp dir
|
42
|
+
mkdir(TEMP_DIR) unless File.exists? TEMP_DIR
|
43
|
+
mkdir(TEMP_DIR_LIB) unless File.exists? TEMP_DIR_LIB
|
44
|
+
|
45
|
+
def copy(from, to, message=nil)
|
46
|
+
puts message if message
|
47
|
+
Dir.glob(from).each do |f|
|
48
|
+
cp_r(f, to)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Copy job files into TEMP_DIR:
|
53
|
+
files = ::File.join(options.input, "**", "*.rb")
|
54
|
+
copy(files, TEMP_DIR, "Copying job files to temp dir...")
|
55
|
+
|
56
|
+
# Copy external libs into TEMP_DIR:
|
57
|
+
for lib in options.libs
|
58
|
+
files = ::File.join(lib, "**", "*.jar")
|
59
|
+
copy(files, TEMP_DIR_LIB, "Copying external libs to temp dir...")
|
60
|
+
end
|
61
|
+
|
62
|
+
files = ::File.join(CASCADING_JRUBY_HOME, "lib", "**")
|
63
|
+
copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
|
64
|
+
|
65
|
+
# Copy cascading.jruby.runner classes:
|
66
|
+
files = ::File.join(CASCADING_JRUBY_HOME, "classes", "**")
|
67
|
+
copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
|
68
|
+
|
69
|
+
# Copy cascading jars in _temp_jars/lib
|
70
|
+
files = ::File.join(CASCADING_HOME, "**", "*.jar")
|
71
|
+
copy(files, TEMP_DIR_LIB, "Copying Cascading jars to temp dir...")
|
72
|
+
|
73
|
+
# Jar the whole thing:
|
74
|
+
puts "Building final jar file (#{options.output})..."
|
75
|
+
system("jar cvf #{options.output} -C #{TEMP_DIR}/ .")
|
76
|
+
|
77
|
+
# Clean-up things
|
78
|
+
puts "Cleaning temp dir..."
|
79
|
+
rm_rf(TEMP_DIR)
|
80
|
+
|
81
|
+
puts "Finished. Have Fun!"
|