cascading.jruby 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
data/bin/make_job DELETED
@@ -1,81 +0,0 @@
1
- #!/usr/bin/env jruby
2
-
3
- require 'java'
4
-
5
- $LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jruby")))
6
- $LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jobs")))
7
-
8
- require 'rubygems'
9
- require 'cascading'
10
- require 'fileutils'
11
- require 'optparse'
12
- require 'ostruct'
13
-
14
- include FileUtils
15
-
16
- TEMP_DIR = "_temp_jars"
17
- TEMP_DIR_LIB = ::File.join(TEMP_DIR, "lib")
18
- CASCADING_JRUBY_HOME = Cascading::PATH
19
- CASCADING_HOME = ENV["CASCADING_HOME"]
20
- JRUBY_HOME = ENV["JRUBY_HOME"]
21
-
22
- options = OpenStruct.new
23
- options.input = ARGV[0]
24
- options.output = "job.jar"
25
- options.libs = []
26
-
27
- OptionParser.new do |opts|
28
- opts.banner = "Usage: make_job [options]"
29
-
30
- opts.on("-o", "--output", "Set the name of the output jar file (job.jar by default)") do |v|
31
- options.output = v
32
- end
33
-
34
- opts.on("-l", "--lib LIBPATH", "Set the path where external libraries are stored") do |path|
35
- options.libs << path
36
- end
37
- end.parse!
38
-
39
- p options
40
-
41
- # Create temp dir
42
- mkdir(TEMP_DIR) unless File.exists? TEMP_DIR
43
- mkdir(TEMP_DIR_LIB) unless File.exists? TEMP_DIR_LIB
44
-
45
- def copy(from, to, message=nil)
46
- puts message if message
47
- Dir.glob(from).each do |f|
48
- cp_r(f, to)
49
- end
50
- end
51
-
52
- # Copy job files into TEMP_DIR:
53
- files = ::File.join(options.input, "**", "*.rb")
54
- copy(files, TEMP_DIR, "Copying job files to temp dir...")
55
-
56
- # Copy external libs into TEMP_DIR:
57
- for lib in options.libs
58
- files = ::File.join(lib, "**", "*.jar")
59
- copy(files, TEMP_DIR_LIB, "Copying external libs to temp dir...")
60
- end
61
-
62
- files = ::File.join(CASCADING_JRUBY_HOME, "lib", "**")
63
- copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
64
-
65
- # Copy cascading.jruby.runner classes:
66
- files = ::File.join(CASCADING_JRUBY_HOME, "classes", "**")
67
- copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
68
-
69
- # Copy cascading jars in _temp_jars/lib
70
- files = ::File.join(CASCADING_HOME, "**", "*.jar")
71
- copy(files, TEMP_DIR_LIB, "Copying Cascading jars to temp dir...")
72
-
73
- # Jar the whole thing:
74
- puts "Building final jar file (#{options.output})..."
75
- system("jar cvf #{options.output} -C #{TEMP_DIR}/ .")
76
-
77
- # Clean-up things
78
- puts "Cleaning temp dir..."
79
- rm_rf(TEMP_DIR)
80
-
81
- puts "Finished. Have Fun!"
data/ivy.xml DELETED
@@ -1,25 +0,0 @@
1
- <?xml version="1.0" encoding="ISO-8859-1"?>
2
- <ivy-module version="2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
- xsi:noNamespaceSchemaLocation="http://ant.apache.org/ivy/schemas/ivy.xsd">
4
- <info organisation="com.etsy" module="cascading.jruby" status="integration" />
5
-
6
- <configurations>
7
- <conf name="default" visibility="public" description="runtime dependencies and master artifact can be used with this conf" extends="runtime,master" />
8
- <conf name="master" visibility="public" description="contains only the artifact published by this module itself, with no transitive dependencies" />
9
- <conf name="compile" visibility="public" description="this is the default scope, used if none is specified. Compile dependencies are available in all classpaths." />
10
- <conf name="provided" visibility="public" description="this is much like compile, but indicates you expect the JDK or a container to provide it. It is only available on the compilation classpath, and is not transitive." />
11
- <conf name="runtime" visibility="public" description="this scope indicates that the dependency is not required for compilation, but is for execution. It is in the runtime and test classpaths, but not the compile classpath." extends="compile" />
12
- <conf name="test" visibility="private" description="this scope indicates that the dependency is not required for normal use of the application, and is only available for the test compilation and execution phases." extends="runtime" />
13
- <conf name="system" visibility="public" description="this scope is similar to provided except that you have to provide the JAR which contains it explicitly. The artifact is always available and is not looked up in a repository." />
14
- <conf name="sources" visibility="public" description="this configuration contains the source artifact of this module, if any." />
15
- <conf name="javadoc" visibility="public" description="this configuration contains the javadoc artifact of this module, if any." />
16
- <conf name="optional" visibility="public" description="contains all optional dependencies" />
17
- </configurations>
18
-
19
- <dependencies>
20
- <dependency org="cascading" name="cascading-core" rev="2.0.0" conf="default" />
21
- <dependency org="cascading" name="cascading-local" rev="2.0.0" conf="default" />
22
- <dependency org="cascading" name="cascading-hadoop" rev="2.0.0" conf="default" />
23
- <dependency org="org.jruby" name="jruby" rev="1.6.5" conf="default" />
24
- </dependencies>
25
- </ivy-module>
data/ivysettings.xml DELETED
@@ -1,7 +0,0 @@
1
- <ivysettings>
2
- <include url="http://conjars.org/repo/ivysettings.xml"/>
3
-
4
- <resolvers>
5
- <ibiblio name="public" m2compatible="true"/>
6
- </resolvers>
7
- </ivysettings>
data/samples/branch.rb DELETED
@@ -1,30 +0,0 @@
1
- #! /usr/bin/env jruby
2
-
3
- $: << File.join(File.dirname(__FILE__), '..', 'lib')
4
-
5
- require 'cascading'
6
-
7
- cascade 'branch', :mode => :local do
8
- flow 'branch' do
9
- source 'input', tap('samples/data/data2.txt')
10
-
11
- assembly 'input' do
12
- split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/
13
-
14
- branch 'branch1' do
15
- group_by 'score1' do
16
- count
17
- end
18
- end
19
-
20
- branch 'branch2' do
21
- group_by 'score2' do
22
- count
23
- end
24
- end
25
- end
26
-
27
- sink 'branch1', tap('output/branch1', :sink_mode => :replace)
28
- sink 'branch2', tap('output/branch2', :sink_mode => :replace)
29
- end
30
- end.complete
data/samples/copy.rb DELETED
@@ -1,20 +0,0 @@
1
- #! /usr/bin/env jruby
2
- $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
-
4
- require 'cascading'
5
-
6
- cascade 'copy', :mode => :local do
7
- flow 'copy' do
8
- # You don't have to curl and cache inputs: tap can fetch via HTTP
9
- #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
10
- source 'input', tap('samples/data/genealogy/names/dist.all.last')
11
-
12
- assembly 'input' do
13
- rename 'line' => 'value'
14
- # We override validate_with because we know line will never be null
15
- reject 'value:string.indexOf("R") == -1', :validate_with => { :value => 'nothinghere' }
16
- end
17
-
18
- sink 'input', tap('output/copy', :sink_mode => :replace)
19
- end
20
- end.complete