cascading.jruby 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/HACKING.md +15 -0
  2. data/History.txt +0 -0
  3. data/LICENSE.txt +165 -0
  4. data/README.md +7 -0
  5. data/Rakefile +45 -0
  6. data/bin/make_job +81 -0
  7. data/lib/cascading/assembly.rb +726 -0
  8. data/lib/cascading/base.rb +63 -0
  9. data/lib/cascading/cascade.rb +63 -0
  10. data/lib/cascading/cascading.rb +134 -0
  11. data/lib/cascading/cascading_exception.rb +30 -0
  12. data/lib/cascading/expr_stub.rb +33 -0
  13. data/lib/cascading/ext/array.rb +15 -0
  14. data/lib/cascading/flow.rb +168 -0
  15. data/lib/cascading/operations.rb +204 -0
  16. data/lib/cascading/scope.rb +160 -0
  17. data/lib/cascading.rb +63 -0
  18. data/samples/branch.rb +31 -0
  19. data/samples/cascading.rb +41 -0
  20. data/samples/copy.rb +18 -0
  21. data/samples/data/data2.txt +88799 -0
  22. data/samples/data/data_join1.txt +3 -0
  23. data/samples/data/data_join2.txt +3 -0
  24. data/samples/data/data_join3.txt +3 -0
  25. data/samples/join.rb +32 -0
  26. data/samples/logwordcount.rb +22 -0
  27. data/samples/project.rb +24 -0
  28. data/samples/rename.rb +21 -0
  29. data/samples/scorenames.rb +20 -0
  30. data/samples/splitter.rb +20 -0
  31. data/samples/union.rb +35 -0
  32. data/spec/cascading_spec.rb +100 -0
  33. data/spec/expr_spec.rb +10 -0
  34. data/spec/primary_key_spec.rb +119 -0
  35. data/spec/resource/join_input.txt +3 -0
  36. data/spec/resource/test_input.txt +4 -0
  37. data/spec/scope_spec.rb +174 -0
  38. data/spec/spec.opts +6 -0
  39. data/spec/spec_helper.rb +5 -0
  40. data/spec/spec_util.rb +188 -0
  41. data/src/cascading/jruby/Main.java +38 -0
  42. data/src/cascading/jruby/runner.rb +6 -0
  43. data/tags +238 -0
  44. data/tasks/ann.rake +80 -0
  45. data/tasks/ant.rake +11 -0
  46. data/tasks/bones.rake +20 -0
  47. data/tasks/gem.rake +206 -0
  48. data/tasks/git.rake +40 -0
  49. data/tasks/notes.rake +27 -0
  50. data/tasks/post_load.rake +34 -0
  51. data/tasks/rdoc.rake +50 -0
  52. data/tasks/rubyforge.rake +55 -0
  53. data/tasks/samples.rake +13 -0
  54. data/tasks/setup.rb +300 -0
  55. data/tasks/spec.rake +59 -0
  56. data/tasks/svn.rake +47 -0
  57. data/tasks/test.rake +42 -0
  58. data/test/data/data1.txt +14 -0
  59. data/test/data/data2.txt +14 -0
  60. data/test/test_assembly.rb +321 -0
  61. data/test/test_cascading.rb +49 -0
  62. data/test/test_flow.rb +15 -0
  63. metadata +137 -0
data/HACKING.md ADDED
@@ -0,0 +1,15 @@
1
+ # Hacking
2
+
3
+ Some hacking info on `cascading.jruby`:
4
+
5
+ `cascading.jruby` can be packaged as a gem. To do so, you must generate the necessary packaging files:
6
+
7
+ ant build; jruby -S rake gem
8
+
9
+ will produce the gem in the pkg/ sub-directory. After that, just cd to this directory and:
10
+
11
+ jruby -S rake install cascading.jruby-xxx.gem
12
+
13
+ The `Cascading::Operations` module is mixed-in the `Cascading::Assembly` class to provide some shortcuts for common operations.
14
+
15
+ The file cascading/cascading.rb defines global helper methods for cascading like tap creation, fields creation, etc.
data/History.txt ADDED
File without changes
data/LICENSE.txt ADDED
@@ -0,0 +1,165 @@
1
+ GNU LESSER GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+
9
+ This version of the GNU Lesser General Public License incorporates
10
+ the terms and conditions of version 3 of the GNU General Public
11
+ License, supplemented by the additional permissions listed below.
12
+
13
+ 0. Additional Definitions.
14
+
15
+ As used herein, "this License" refers to version 3 of the GNU Lesser
16
+ General Public License, and the "GNU GPL" refers to version 3 of the GNU
17
+ General Public License.
18
+
19
+ "The Library" refers to a covered work governed by this License,
20
+ other than an Application or a Combined Work as defined below.
21
+
22
+ An "Application" is any work that makes use of an interface provided
23
+ by the Library, but which is not otherwise based on the Library.
24
+ Defining a subclass of a class defined by the Library is deemed a mode
25
+ of using an interface provided by the Library.
26
+
27
+ A "Combined Work" is a work produced by combining or linking an
28
+ Application with the Library. The particular version of the Library
29
+ with which the Combined Work was made is also called the "Linked
30
+ Version".
31
+
32
+ The "Minimal Corresponding Source" for a Combined Work means the
33
+ Corresponding Source for the Combined Work, excluding any source code
34
+ for portions of the Combined Work that, considered in isolation, are
35
+ based on the Application, and not on the Linked Version.
36
+
37
+ The "Corresponding Application Code" for a Combined Work means the
38
+ object code and/or source code for the Application, including any data
39
+ and utility programs needed for reproducing the Combined Work from the
40
+ Application, but excluding the System Libraries of the Combined Work.
41
+
42
+ 1. Exception to Section 3 of the GNU GPL.
43
+
44
+ You may convey a covered work under sections 3 and 4 of this License
45
+ without being bound by section 3 of the GNU GPL.
46
+
47
+ 2. Conveying Modified Versions.
48
+
49
+ If you modify a copy of the Library, and, in your modifications, a
50
+ facility refers to a function or data to be supplied by an Application
51
+ that uses the facility (other than as an argument passed when the
52
+ facility is invoked), then you may convey a copy of the modified
53
+ version:
54
+
55
+ a) under this License, provided that you make a good faith effort to
56
+ ensure that, in the event an Application does not supply the
57
+ function or data, the facility still operates, and performs
58
+ whatever part of its purpose remains meaningful, or
59
+
60
+ b) under the GNU GPL, with none of the additional permissions of
61
+ this License applicable to that copy.
62
+
63
+ 3. Object Code Incorporating Material from Library Header Files.
64
+
65
+ The object code form of an Application may incorporate material from
66
+ a header file that is part of the Library. You may convey such object
67
+ code under terms of your choice, provided that, if the incorporated
68
+ material is not limited to numerical parameters, data structure
69
+ layouts and accessors, or small macros, inline functions and templates
70
+ (ten or fewer lines in length), you do both of the following:
71
+
72
+ a) Give prominent notice with each copy of the object code that the
73
+ Library is used in it and that the Library and its use are
74
+ covered by this License.
75
+
76
+ b) Accompany the object code with a copy of the GNU GPL and this license
77
+ document.
78
+
79
+ 4. Combined Works.
80
+
81
+ You may convey a Combined Work under terms of your choice that,
82
+ taken together, effectively do not restrict modification of the
83
+ portions of the Library contained in the Combined Work and reverse
84
+ engineering for debugging such modifications, if you also do each of
85
+ the following:
86
+
87
+ a) Give prominent notice with each copy of the Combined Work that
88
+ the Library is used in it and that the Library and its use are
89
+ covered by this License.
90
+
91
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
92
+ document.
93
+
94
+ c) For a Combined Work that displays copyright notices during
95
+ execution, include the copyright notice for the Library among
96
+ these notices, as well as a reference directing the user to the
97
+ copies of the GNU GPL and this license document.
98
+
99
+ d) Do one of the following:
100
+
101
+ 0) Convey the Minimal Corresponding Source under the terms of this
102
+ License, and the Corresponding Application Code in a form
103
+ suitable for, and under terms that permit, the user to
104
+ recombine or relink the Application with a modified version of
105
+ the Linked Version to produce a modified Combined Work, in the
106
+ manner specified by section 6 of the GNU GPL for conveying
107
+ Corresponding Source.
108
+
109
+ 1) Use a suitable shared library mechanism for linking with the
110
+ Library. A suitable mechanism is one that (a) uses at run time
111
+ a copy of the Library already present on the user's computer
112
+ system, and (b) will operate properly with a modified version
113
+ of the Library that is interface-compatible with the Linked
114
+ Version.
115
+
116
+ e) Provide Installation Information, but only if you would otherwise
117
+ be required to provide such information under section 6 of the
118
+ GNU GPL, and only to the extent that such information is
119
+ necessary to install and execute a modified version of the
120
+ Combined Work produced by recombining or relinking the
121
+ Application with a modified version of the Linked Version. (If
122
+ you use option 4d0, the Installation Information must accompany
123
+ the Minimal Corresponding Source and Corresponding Application
124
+ Code. If you use option 4d1, you must provide the Installation
125
+ Information in the manner specified by section 6 of the GNU GPL
126
+ for conveying Corresponding Source.)
127
+
128
+ 5. Combined Libraries.
129
+
130
+ You may place library facilities that are a work based on the
131
+ Library side by side in a single library together with other library
132
+ facilities that are not Applications and are not covered by this
133
+ License, and convey such a combined library under terms of your
134
+ choice, if you do both of the following:
135
+
136
+ a) Accompany the combined library with a copy of the same work based
137
+ on the Library, uncombined with any other library facilities,
138
+ conveyed under the terms of this License.
139
+
140
+ b) Give prominent notice with the combined library that part of it
141
+ is a work based on the Library, and explaining where to find the
142
+ accompanying uncombined form of the same work.
143
+
144
+ 6. Revised Versions of the GNU Lesser General Public License.
145
+
146
+ The Free Software Foundation may publish revised and/or new versions
147
+ of the GNU Lesser General Public License from time to time. Such new
148
+ versions will be similar in spirit to the present version, but may
149
+ differ in detail to address new problems or concerns.
150
+
151
+ Each version is given a distinguishing version number. If the
152
+ Library as you received it specifies that a certain numbered version
153
+ of the GNU Lesser General Public License "or any later version"
154
+ applies to it, you have the option of following the terms and
155
+ conditions either of that published version or of any later version
156
+ published by the Free Software Foundation. If the Library as you
157
+ received it does not specify a version number of the GNU Lesser
158
+ General Public License, you may choose any version of the GNU Lesser
159
+ General Public License ever published by the Free Software Foundation.
160
+
161
+ If the Library as you received it specifies that a proxy can decide
162
+ whether future versions of the GNU Lesser General Public License shall
163
+ apply, that proxy's public statement of acceptance of any version is
164
+ permanent authorization for you to choose that version for the
165
+ Library.
data/README.md ADDED
@@ -0,0 +1,7 @@
1
+ # Cascading.JRuby
2
+
3
+ `cascading.jruby` is a small DSL above [Cascading](http://www.cascading.org/).
4
+
5
+ It requires Hadoop (>= 0.18.3) and Cascading (>=1.0.1) to be set via the environment variables: `HADOOP_HOME` and `CASCADING_HOME`
6
+
7
+ Copyright 2009, Grégoire Marabout.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ #! /usr/bin/env jruby
2
+
3
+ # Look in the tasks/setup.rb file for the various options that can be
4
+ # configured in this Rakefile. The .rake files in the tasks directory
5
+ # are where the options are used.
6
+
7
+ begin
8
+ require 'bones'
9
+ Bones.setup
10
+ rescue LoadError
11
+ begin
12
+ load 'tasks/setup.rb'
13
+ rescue LoadError
14
+ raise RuntimeError, '### please install the "bones" gem ###'
15
+ end
16
+ end
17
+
18
+ ensure_in_path 'lib'
19
+
20
+ require 'cascading'
21
+
22
+ task :default => 'test:run'
23
+
24
+ task :run do
25
+ # ensure_in_path "samples"
26
+ puts "Running #{ARGS[0]}"
27
+ require "samples/#{ARGS[0]}"
28
+ end
29
+
30
+ desc 'Remove gem and Java build files'
31
+ task :clean => ['ant:clean', 'gem:clean'] do
32
+ puts 'Build files removed'
33
+ end
34
+
35
+ PROJ.name = 'cascading.jruby'
36
+ PROJ.authors = ['Matt Walker', 'Grégoire Marabout']
37
+ PROJ.email = 'mwalker@etsy.com'
38
+ PROJ.url = 'http://github.com/etsy/cascading.jruby'
39
+ PROJ.version = Cascading::VERSION
40
+ PROJ.summary = 'A JRuby DSL for Cascading'
41
+ PROJ.description = 'cascading.jruby is a small DSL above Cascading, written in JRuby'
42
+ PROJ.rubyforge.name = 'cascading.jruby'
43
+ PROJ.spec.opts << '--color'
44
+
45
+ # EOF
data/bin/make_job ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ require 'java'
4
+
5
+ $LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jruby")))
6
+ $LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jobs")))
7
+
8
+ require 'rubygems'
9
+ require 'cascading'
10
+ require 'fileutils'
11
+ require 'optparse'
12
+ require 'ostruct'
13
+
14
+ include FileUtils
15
+
16
+ TEMP_DIR = "_temp_jars"
17
+ TEMP_DIR_LIB = ::File.join(TEMP_DIR, "lib")
18
+ CASCADING_JRUBY_HOME = Cascading::PATH
19
+ CASCADING_HOME = ENV["CASCADING_HOME"]
20
+ JRUBY_HOME = ENV["JRUBY_HOME"]
21
+
22
+ options = OpenStruct.new
23
+ options.input = ARGV[0]
24
+ options.output = "job.jar"
25
+ options.libs = []
26
+
27
+ OptionParser.new do |opts|
28
+ opts.banner = "Usage: make_job [options]"
29
+
30
+ opts.on("-o", "--output", "Set the name of the output jar file (job.jar by default)") do |v|
31
+ options.output = v
32
+ end
33
+
34
+ opts.on("-l", "--lib LIBPATH", "Set the path where external libraries are stored") do |path|
35
+ options.libs << path
36
+ end
37
+ end.parse!
38
+
39
+ p options
40
+
41
+ # Create temp dir
42
+ mkdir(TEMP_DIR) unless File.exists? TEMP_DIR
43
+ mkdir(TEMP_DIR_LIB) unless File.exists? TEMP_DIR_LIB
44
+
45
+ def copy(from, to, message=nil)
46
+ puts message if message
47
+ Dir.glob(from).each do |f|
48
+ cp_r(f, to)
49
+ end
50
+ end
51
+
52
+ # Copy job files into TEMP_DIR:
53
+ files = ::File.join(options.input, "**", "*.rb")
54
+ copy(files, TEMP_DIR, "Copying job files to temp dir...")
55
+
56
+ # Copy external libs into TEMP_DIR:
57
+ for lib in options.libs
58
+ files = ::File.join(lib, "**", "*.jar")
59
+ copy(files, TEMP_DIR_LIB, "Copying external libs to temp dir...")
60
+ end
61
+
62
+ files = ::File.join(CASCADING_JRUBY_HOME, "lib", "**")
63
+ copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
64
+
65
+ # Copy cascading.jruby.runner classes:
66
+ files = ::File.join(CASCADING_JRUBY_HOME, "classes", "**")
67
+ copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
68
+
69
+ # Copy cascading jars in _temp_jars/lib
70
+ files = ::File.join(CASCADING_HOME, "**", "*.jar")
71
+ copy(files, TEMP_DIR_LIB, "Copying Cascading jars to temp dir...")
72
+
73
+ # Jar the whole thing:
74
+ puts "Building final jar file (#{options.output})..."
75
+ system("jar cvf #{options.output} -C #{TEMP_DIR}/ .")
76
+
77
+ # Clean-up things
78
+ puts "Cleaning temp dir..."
79
+ rm_rf(TEMP_DIR)
80
+
81
+ puts "Finished. Have Fun!"