cascading.jruby 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/History.txt +15 -0
  2. data/lib/cascading/assembly.rb +138 -17
  3. data/lib/cascading/base.rb +0 -4
  4. data/lib/cascading/cascade.rb +25 -16
  5. data/lib/cascading/cascading.rb +25 -5
  6. data/lib/cascading/ext/array.rb +1 -7
  7. data/lib/cascading/flow.rb +18 -19
  8. data/lib/cascading/mode.rb +5 -1
  9. data/lib/cascading/operations.rb +11 -4
  10. data/lib/cascading/tap.rb +4 -0
  11. data/lib/cascading.rb +1 -5
  12. data/test/test_assembly.rb +135 -29
  13. data/test/test_cascade.rb +80 -0
  14. data/test/test_flow.rb +20 -0
  15. data/test/test_operations.rb +3 -2
  16. metadata +6 -76
  17. data/.travis.yml +0 -6
  18. data/Gemfile +0 -6
  19. data/Gemfile.lock +0 -12
  20. data/HACKING.md +0 -23
  21. data/README.md +0 -9
  22. data/Rakefile +0 -46
  23. data/TODO +0 -13
  24. data/bin/make_job +0 -81
  25. data/ivy.xml +0 -25
  26. data/ivysettings.xml +0 -7
  27. data/samples/branch.rb +0 -30
  28. data/samples/copy.rb +0 -20
  29. data/samples/data/data2.txt +0 -88799
  30. data/samples/data/data_group_by.txt +0 -7
  31. data/samples/data/data_join1.txt +0 -3
  32. data/samples/data/data_join2.txt +0 -3
  33. data/samples/data/data_join3.txt +0 -3
  34. data/samples/data/genealogy/names/dist.all.last +0 -88799
  35. data/samples/data/gutenberg/the_outline_of_science_vol_1 +0 -12761
  36. data/samples/group_by.rb +0 -61
  37. data/samples/join.rb +0 -31
  38. data/samples/logwordcount.rb +0 -22
  39. data/samples/project.rb +0 -23
  40. data/samples/rename.rb +0 -20
  41. data/samples/scorenames.rb +0 -20
  42. data/samples/splitter.rb +0 -19
  43. data/samples/sub_assembly.rb +0 -30
  44. data/samples/union.rb +0 -36
  45. data/spec/cascading_spec.rb +0 -105
  46. data/spec/expr_spec.rb +0 -230
  47. data/spec/jruby_version_spec.rb +0 -72
  48. data/spec/resource/join_input.txt +0 -3
  49. data/spec/resource/test_input.txt +0 -4
  50. data/spec/scope_spec.rb +0 -149
  51. data/spec/spec.opts +0 -6
  52. data/spec/spec_helper.rb +0 -5
  53. data/spec/spec_util.rb +0 -92
  54. data/src/cascading/jruby/Main.java +0 -38
  55. data/src/cascading/jruby/runner.rb +0 -6
  56. data/tags +0 -342
  57. data/tasks/ann.rake +0 -80
  58. data/tasks/ant.rake +0 -23
  59. data/tasks/bones.rake +0 -20
  60. data/tasks/gem.rake +0 -206
  61. data/tasks/git.rake +0 -40
  62. data/tasks/notes.rake +0 -27
  63. data/tasks/post_load.rake +0 -34
  64. data/tasks/rdoc.rake +0 -50
  65. data/tasks/rubyforge.rake +0 -55
  66. data/tasks/samples.rake +0 -19
  67. data/tasks/setup.rb +0 -300
  68. data/tasks/spec.rake +0 -59
  69. data/tasks/svn.rake +0 -47
  70. data/tasks/test.rake +0 -42
  71. data/test/data/data1.txt +0 -14
  72. data/test/data/data2.txt +0 -14
  73. data/test/mock_assemblies.rb +0 -55
data/bin/make_job DELETED
@@ -1,81 +0,0 @@
1
- #!/usr/bin/env jruby
2
-
3
- require 'java'
4
-
5
- $LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jruby")))
6
- $LOAD_PATH.unshift(::File.expand_path(::File.join(::File.dirname(__FILE__), "..", "jobs")))
7
-
8
- require 'rubygems'
9
- require 'cascading'
10
- require 'fileutils'
11
- require 'optparse'
12
- require 'ostruct'
13
-
14
- include FileUtils
15
-
16
- TEMP_DIR = "_temp_jars"
17
- TEMP_DIR_LIB = ::File.join(TEMP_DIR, "lib")
18
- CASCADING_JRUBY_HOME = Cascading::PATH
19
- CASCADING_HOME = ENV["CASCADING_HOME"]
20
- JRUBY_HOME = ENV["JRUBY_HOME"]
21
-
22
- options = OpenStruct.new
23
- options.input = ARGV[0]
24
- options.output = "job.jar"
25
- options.libs = []
26
-
27
- OptionParser.new do |opts|
28
- opts.banner = "Usage: make_job [options]"
29
-
30
- opts.on("-o", "--output", "Set the name of the output jar file (job.jar by default)") do |v|
31
- options.output = v
32
- end
33
-
34
- opts.on("-l", "--lib LIBPATH", "Set the path where external libraries are stored") do |path|
35
- options.libs << path
36
- end
37
- end.parse!
38
-
39
- p options
40
-
41
- # Create temp dir
42
- mkdir(TEMP_DIR) unless File.exists? TEMP_DIR
43
- mkdir(TEMP_DIR_LIB) unless File.exists? TEMP_DIR_LIB
44
-
45
- def copy(from, to, message=nil)
46
- puts message if message
47
- Dir.glob(from).each do |f|
48
- cp_r(f, to)
49
- end
50
- end
51
-
52
- # Copy job files into TEMP_DIR:
53
- files = ::File.join(options.input, "**", "*.rb")
54
- copy(files, TEMP_DIR, "Copying job files to temp dir...")
55
-
56
- # Copy external libs into TEMP_DIR:
57
- for lib in options.libs
58
- files = ::File.join(lib, "**", "*.jar")
59
- copy(files, TEMP_DIR_LIB, "Copying external libs to temp dir...")
60
- end
61
-
62
- files = ::File.join(CASCADING_JRUBY_HOME, "lib", "**")
63
- copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
64
-
65
- # Copy cascading.jruby.runner classes:
66
- files = ::File.join(CASCADING_JRUBY_HOME, "classes", "**")
67
- copy(files, TEMP_DIR, "Copying cascading.jruby files to temp dir...")
68
-
69
- # Copy cascading jars in _temp_jars/lib
70
- files = ::File.join(CASCADING_HOME, "**", "*.jar")
71
- copy(files, TEMP_DIR_LIB, "Copying Cascading jars to temp dir...")
72
-
73
- # Jar the whole thing:
74
- puts "Building final jar file (#{options.output})..."
75
- system("jar cvf #{options.output} -C #{TEMP_DIR}/ .")
76
-
77
- # Clean-up things
78
- puts "Cleaning temp dir..."
79
- rm_rf(TEMP_DIR)
80
-
81
- puts "Finished. Have Fun!"
data/ivy.xml DELETED
@@ -1,25 +0,0 @@
1
- <?xml version="1.0" encoding="ISO-8859-1"?>
2
- <ivy-module version="2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
- xsi:noNamespaceSchemaLocation="http://ant.apache.org/ivy/schemas/ivy.xsd">
4
- <info organisation="com.etsy" module="cascading.jruby" status="integration" />
5
-
6
- <configurations>
7
- <conf name="default" visibility="public" description="runtime dependencies and master artifact can be used with this conf" extends="runtime,master" />
8
- <conf name="master" visibility="public" description="contains only the artifact published by this module itself, with no transitive dependencies" />
9
- <conf name="compile" visibility="public" description="this is the default scope, used if none is specified. Compile dependencies are available in all classpaths." />
10
- <conf name="provided" visibility="public" description="this is much like compile, but indicates you expect the JDK or a container to provide it. It is only available on the compilation classpath, and is not transitive." />
11
- <conf name="runtime" visibility="public" description="this scope indicates that the dependency is not required for compilation, but is for execution. It is in the runtime and test classpaths, but not the compile classpath." extends="compile" />
12
- <conf name="test" visibility="private" description="this scope indicates that the dependency is not required for normal use of the application, and is only available for the test compilation and execution phases." extends="runtime" />
13
- <conf name="system" visibility="public" description="this scope is similar to provided except that you have to provide the JAR which contains it explicitly. The artifact is always available and is not looked up in a repository." />
14
- <conf name="sources" visibility="public" description="this configuration contains the source artifact of this module, if any." />
15
- <conf name="javadoc" visibility="public" description="this configuration contains the javadoc artifact of this module, if any." />
16
- <conf name="optional" visibility="public" description="contains all optional dependencies" />
17
- </configurations>
18
-
19
- <dependencies>
20
- <dependency org="cascading" name="cascading-core" rev="2.0.0" conf="default" />
21
- <dependency org="cascading" name="cascading-local" rev="2.0.0" conf="default" />
22
- <dependency org="cascading" name="cascading-hadoop" rev="2.0.0" conf="default" />
23
- <dependency org="org.jruby" name="jruby" rev="1.6.5" conf="default" />
24
- </dependencies>
25
- </ivy-module>
data/ivysettings.xml DELETED
@@ -1,7 +0,0 @@
1
- <ivysettings>
2
- <include url="http://conjars.org/repo/ivysettings.xml"/>
3
-
4
- <resolvers>
5
- <ibiblio name="public" m2compatible="true"/>
6
- </resolvers>
7
- </ivysettings>
data/samples/branch.rb DELETED
@@ -1,30 +0,0 @@
1
- #! /usr/bin/env jruby
2
-
3
- $: << File.join(File.dirname(__FILE__), '..', 'lib')
4
-
5
- require 'cascading'
6
-
7
- cascade 'branch', :mode => :local do
8
- flow 'branch' do
9
- source 'input', tap('samples/data/data2.txt')
10
-
11
- assembly 'input' do
12
- split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/
13
-
14
- branch 'branch1' do
15
- group_by 'score1' do
16
- count
17
- end
18
- end
19
-
20
- branch 'branch2' do
21
- group_by 'score2' do
22
- count
23
- end
24
- end
25
- end
26
-
27
- sink 'branch1', tap('output/branch1', :sink_mode => :replace)
28
- sink 'branch2', tap('output/branch2', :sink_mode => :replace)
29
- end
30
- end.complete
data/samples/copy.rb DELETED
@@ -1,20 +0,0 @@
1
- #! /usr/bin/env jruby
2
- $: << File.join(File.dirname(__FILE__), '..', 'lib')
3
-
4
- require 'cascading'
5
-
6
- cascade 'copy', :mode => :local do
7
- flow 'copy' do
8
- # You don't have to curl and cache inputs: tap can fetch via HTTP
9
- #source 'input', tap('http://www.census.gov/genealogy/names/dist.all.last')
10
- source 'input', tap('samples/data/genealogy/names/dist.all.last')
11
-
12
- assembly 'input' do
13
- rename 'line' => 'value'
14
- # We override validate_with because we know line will never be null
15
- reject 'value:string.indexOf("R") == -1', :validate_with => { :value => 'nothinghere' }
16
- end
17
-
18
- sink 'input', tap('output/copy', :sink_mode => :replace)
19
- end
20
- end.complete