ruby-spark 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/Gemfile +47 -0
- data/Guardfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +185 -0
- data/Rakefile +35 -0
- data/TODO.md +7 -0
- data/benchmark/aggregate.rb +33 -0
- data/benchmark/bisect.rb +88 -0
- data/benchmark/custom_marshal.rb +94 -0
- data/benchmark/digest.rb +150 -0
- data/benchmark/enumerator.rb +88 -0
- data/benchmark/performance/prepare.sh +18 -0
- data/benchmark/performance/python.py +156 -0
- data/benchmark/performance/r.r +69 -0
- data/benchmark/performance/ruby.rb +167 -0
- data/benchmark/performance/run-all.sh +160 -0
- data/benchmark/performance/scala.scala +181 -0
- data/benchmark/serializer.rb +82 -0
- data/benchmark/sort.rb +43 -0
- data/benchmark/sort2.rb +164 -0
- data/benchmark/take.rb +28 -0
- data/bin/ruby-spark +8 -0
- data/example/pi.rb +28 -0
- data/ext/ruby_c/extconf.rb +3 -0
- data/ext/ruby_c/murmur.c +158 -0
- data/ext/ruby_c/murmur.h +9 -0
- data/ext/ruby_c/ruby-spark.c +18 -0
- data/ext/ruby_java/Digest.java +36 -0
- data/ext/ruby_java/Murmur2.java +98 -0
- data/ext/ruby_java/RubySparkExtService.java +28 -0
- data/ext/ruby_java/extconf.rb +3 -0
- data/ext/spark/build.sbt +73 -0
- data/ext/spark/project/plugins.sbt +9 -0
- data/ext/spark/sbt/sbt +34 -0
- data/ext/spark/src/main/scala/Exec.scala +91 -0
- data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
- data/ext/spark/src/main/scala/Marshal.scala +52 -0
- data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
- data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
- data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
- data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
- data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
- data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
- data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
- data/ext/spark/src/main/scala/RubyPage.scala +34 -0
- data/ext/spark/src/main/scala/RubyRDD.scala +364 -0
- data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
- data/ext/spark/src/main/scala/RubyTab.scala +11 -0
- data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
- data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
- data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
- data/lib/ruby-spark.rb +1 -0
- data/lib/spark.rb +198 -0
- data/lib/spark/accumulator.rb +260 -0
- data/lib/spark/broadcast.rb +98 -0
- data/lib/spark/build.rb +43 -0
- data/lib/spark/cli.rb +169 -0
- data/lib/spark/command.rb +86 -0
- data/lib/spark/command/base.rb +154 -0
- data/lib/spark/command/basic.rb +345 -0
- data/lib/spark/command/pair.rb +124 -0
- data/lib/spark/command/sort.rb +51 -0
- data/lib/spark/command/statistic.rb +144 -0
- data/lib/spark/command_builder.rb +141 -0
- data/lib/spark/command_validator.rb +34 -0
- data/lib/spark/config.rb +244 -0
- data/lib/spark/constant.rb +14 -0
- data/lib/spark/context.rb +304 -0
- data/lib/spark/error.rb +50 -0
- data/lib/spark/ext/hash.rb +41 -0
- data/lib/spark/ext/integer.rb +25 -0
- data/lib/spark/ext/io.rb +57 -0
- data/lib/spark/ext/ip_socket.rb +29 -0
- data/lib/spark/ext/module.rb +58 -0
- data/lib/spark/ext/object.rb +24 -0
- data/lib/spark/ext/string.rb +24 -0
- data/lib/spark/helper.rb +10 -0
- data/lib/spark/helper/logger.rb +40 -0
- data/lib/spark/helper/parser.rb +85 -0
- data/lib/spark/helper/serialize.rb +71 -0
- data/lib/spark/helper/statistic.rb +93 -0
- data/lib/spark/helper/system.rb +42 -0
- data/lib/spark/java_bridge.rb +19 -0
- data/lib/spark/java_bridge/base.rb +203 -0
- data/lib/spark/java_bridge/jruby.rb +23 -0
- data/lib/spark/java_bridge/rjb.rb +41 -0
- data/lib/spark/logger.rb +76 -0
- data/lib/spark/mllib.rb +100 -0
- data/lib/spark/mllib/classification/common.rb +31 -0
- data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
- data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
- data/lib/spark/mllib/classification/svm.rb +135 -0
- data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
- data/lib/spark/mllib/clustering/kmeans.rb +118 -0
- data/lib/spark/mllib/matrix.rb +120 -0
- data/lib/spark/mllib/regression/common.rb +73 -0
- data/lib/spark/mllib/regression/labeled_point.rb +41 -0
- data/lib/spark/mllib/regression/lasso.rb +100 -0
- data/lib/spark/mllib/regression/linear.rb +124 -0
- data/lib/spark/mllib/regression/ridge.rb +97 -0
- data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
- data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
- data/lib/spark/mllib/stat/distribution.rb +12 -0
- data/lib/spark/mllib/vector.rb +185 -0
- data/lib/spark/rdd.rb +1328 -0
- data/lib/spark/sampler.rb +92 -0
- data/lib/spark/serializer.rb +24 -0
- data/lib/spark/serializer/base.rb +170 -0
- data/lib/spark/serializer/cartesian.rb +37 -0
- data/lib/spark/serializer/marshal.rb +19 -0
- data/lib/spark/serializer/message_pack.rb +25 -0
- data/lib/spark/serializer/oj.rb +25 -0
- data/lib/spark/serializer/pair.rb +27 -0
- data/lib/spark/serializer/utf8.rb +25 -0
- data/lib/spark/sort.rb +189 -0
- data/lib/spark/stat_counter.rb +125 -0
- data/lib/spark/storage_level.rb +39 -0
- data/lib/spark/version.rb +3 -0
- data/lib/spark/worker/master.rb +144 -0
- data/lib/spark/worker/spark_files.rb +15 -0
- data/lib/spark/worker/worker.rb +197 -0
- data/ruby-spark.gemspec +36 -0
- data/spec/generator.rb +37 -0
- data/spec/inputs/lorem_300.txt +316 -0
- data/spec/inputs/numbers/1.txt +50 -0
- data/spec/inputs/numbers/10.txt +50 -0
- data/spec/inputs/numbers/11.txt +50 -0
- data/spec/inputs/numbers/12.txt +50 -0
- data/spec/inputs/numbers/13.txt +50 -0
- data/spec/inputs/numbers/14.txt +50 -0
- data/spec/inputs/numbers/15.txt +50 -0
- data/spec/inputs/numbers/16.txt +50 -0
- data/spec/inputs/numbers/17.txt +50 -0
- data/spec/inputs/numbers/18.txt +50 -0
- data/spec/inputs/numbers/19.txt +50 -0
- data/spec/inputs/numbers/2.txt +50 -0
- data/spec/inputs/numbers/20.txt +50 -0
- data/spec/inputs/numbers/3.txt +50 -0
- data/spec/inputs/numbers/4.txt +50 -0
- data/spec/inputs/numbers/5.txt +50 -0
- data/spec/inputs/numbers/6.txt +50 -0
- data/spec/inputs/numbers/7.txt +50 -0
- data/spec/inputs/numbers/8.txt +50 -0
- data/spec/inputs/numbers/9.txt +50 -0
- data/spec/inputs/numbers_0_100.txt +101 -0
- data/spec/inputs/numbers_1_100.txt +100 -0
- data/spec/lib/collect_spec.rb +42 -0
- data/spec/lib/command_spec.rb +68 -0
- data/spec/lib/config_spec.rb +64 -0
- data/spec/lib/context_spec.rb +163 -0
- data/spec/lib/ext_spec.rb +72 -0
- data/spec/lib/external_apps_spec.rb +45 -0
- data/spec/lib/filter_spec.rb +80 -0
- data/spec/lib/flat_map_spec.rb +100 -0
- data/spec/lib/group_spec.rb +109 -0
- data/spec/lib/helper_spec.rb +19 -0
- data/spec/lib/key_spec.rb +41 -0
- data/spec/lib/manipulation_spec.rb +114 -0
- data/spec/lib/map_partitions_spec.rb +87 -0
- data/spec/lib/map_spec.rb +91 -0
- data/spec/lib/mllib/classification_spec.rb +54 -0
- data/spec/lib/mllib/clustering_spec.rb +35 -0
- data/spec/lib/mllib/matrix_spec.rb +32 -0
- data/spec/lib/mllib/regression_spec.rb +116 -0
- data/spec/lib/mllib/vector_spec.rb +77 -0
- data/spec/lib/reduce_by_key_spec.rb +118 -0
- data/spec/lib/reduce_spec.rb +131 -0
- data/spec/lib/sample_spec.rb +46 -0
- data/spec/lib/serializer_spec.rb +13 -0
- data/spec/lib/sort_spec.rb +58 -0
- data/spec/lib/statistic_spec.rb +168 -0
- data/spec/lib/whole_text_files_spec.rb +33 -0
- data/spec/spec_helper.rb +39 -0
- metadata +301 -0
data/ext/ruby_c/murmur.h
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "murmur.h"
|
3
|
+
|
4
|
+
|
5
|
+
VALUE SparkModule;
|
6
|
+
VALUE SparkDigestModule;
|
7
|
+
VALUE SparkDigestMurmur2Class;
|
8
|
+
|
9
|
+
|
10
|
+
void Init_ruby_spark_ext()
|
11
|
+
{
|
12
|
+
SparkModule = rb_define_module("Spark");
|
13
|
+
SparkDigestModule = rb_define_module_under(SparkModule, "Digest");
|
14
|
+
SparkDigestMurmur2Class = rb_define_class_under(SparkDigestModule, "Murmur2", rb_cObject);
|
15
|
+
|
16
|
+
rb_define_singleton_method(SparkDigestModule, "portable_hash", method_portable_hash, -1);
|
17
|
+
rb_define_singleton_method(SparkDigestMurmur2Class, "digest", method_murmur2_digest, -1);
|
18
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
import org.jruby.Ruby;
|
2
|
+
import org.jruby.RubyModule;
|
3
|
+
import org.jruby.RubyObject;
|
4
|
+
import org.jruby.RubyClass;
|
5
|
+
import org.jruby.RubyString;
|
6
|
+
import org.jruby.RubyFixnum;
|
7
|
+
import org.jruby.anno.JRubyModule;
|
8
|
+
import org.jruby.anno.JRubyMethod;
|
9
|
+
import org.jruby.runtime.ThreadContext;
|
10
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
11
|
+
|
12
|
+
@JRubyModule(name="Spark::Digest")
|
13
|
+
public class Digest extends RubyObject{
|
14
|
+
|
15
|
+
// Have to be the same as in C extension
|
16
|
+
final static long PORTABLE_HASH_SEED = 16154832;
|
17
|
+
|
18
|
+
public Digest(final Ruby ruby, RubyClass rubyClass) {
|
19
|
+
super(ruby, rubyClass);
|
20
|
+
}
|
21
|
+
|
22
|
+
@JRubyMethod(module=true)
|
23
|
+
public static IRubyObject portable_hash(ThreadContext context, IRubyObject self, IRubyObject arg) {
|
24
|
+
Ruby ruby = self.getRuntime();
|
25
|
+
|
26
|
+
RubyString keyString = (RubyString)arg;
|
27
|
+
|
28
|
+
long hash = Murmur2.hash64(keyString.getBytes(), (int)keyString.length().getLongValue(), PORTABLE_HASH_SEED);
|
29
|
+
|
30
|
+
RubyFixnum result = new RubyFixnum(ruby, hash);
|
31
|
+
|
32
|
+
return result;
|
33
|
+
}
|
34
|
+
|
35
|
+
}
|
36
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
import org.jruby.Ruby;
|
2
|
+
import org.jruby.RubyClass;
|
3
|
+
import org.jruby.RubyObject;
|
4
|
+
import org.jruby.RubyString;
|
5
|
+
import org.jruby.RubyFixnum;
|
6
|
+
import org.jruby.anno.JRubyClass;
|
7
|
+
import org.jruby.anno.JRubyMethod;
|
8
|
+
import org.jruby.runtime.ThreadContext;
|
9
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
10
|
+
|
11
|
+
/** Murmur hash 2.0.
|
12
|
+
*
|
13
|
+
* The murmur hash is a relative fast hash function from
|
14
|
+
* http://murmurhash.googlepages.com/ for platforms with efficient
|
15
|
+
* multiplication.
|
16
|
+
*
|
17
|
+
* http://d3s.mff.cuni.cz/~holub/sw/javamurmurhash/
|
18
|
+
*
|
19
|
+
*/
|
20
|
+
|
21
|
+
@JRubyClass(name="Spark::Digest::Murmur2")
|
22
|
+
public class Murmur2 extends RubyObject {
|
23
|
+
|
24
|
+
public Murmur2(final Ruby ruby, RubyClass rubyClass) {
|
25
|
+
super(ruby, rubyClass);
|
26
|
+
}
|
27
|
+
|
28
|
+
@JRubyMethod(required=1, optional=1, module=true)
|
29
|
+
public static IRubyObject digest(ThreadContext context, IRubyObject self, IRubyObject[] args) {
|
30
|
+
Ruby ruby = context.getRuntime();
|
31
|
+
|
32
|
+
RubyString keyString = (RubyString)args[0];
|
33
|
+
long seed;
|
34
|
+
|
35
|
+
if(args.length > 1){
|
36
|
+
RubyFixnum rb_seed = (RubyFixnum)args[1];
|
37
|
+
seed = rb_seed.getLongValue();
|
38
|
+
}
|
39
|
+
else{
|
40
|
+
seed = 0;
|
41
|
+
}
|
42
|
+
|
43
|
+
long hash = hash64(keyString.getBytes(), (int)keyString.length().getLongValue(), seed);
|
44
|
+
|
45
|
+
RubyFixnum result = new RubyFixnum(ruby, hash);
|
46
|
+
return result;
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
/** Generates 64 bit hash from byte array of the given length and seed.
|
51
|
+
*
|
52
|
+
* @param data byte array to hash
|
53
|
+
* @param length length of the array to hash
|
54
|
+
* @param seed initial seed value
|
55
|
+
* @return 64 bit hash of the given array
|
56
|
+
*/
|
57
|
+
public static long hash64(final byte[] data, int length, long seed) {
|
58
|
+
final long m = 0xc6a4a7935bd1e995L;
|
59
|
+
final int r = 47;
|
60
|
+
|
61
|
+
long h = (seed&0xffffffffl)^(length*m);
|
62
|
+
|
63
|
+
int length8 = length/8;
|
64
|
+
|
65
|
+
for (int i=0; i<length8; i++) {
|
66
|
+
final int i8 = i*8;
|
67
|
+
long k = ((long)data[i8+0]&0xff) +(((long)data[i8+1]&0xff)<<8)
|
68
|
+
+(((long)data[i8+2]&0xff)<<16) +(((long)data[i8+3]&0xff)<<24)
|
69
|
+
+(((long)data[i8+4]&0xff)<<32) +(((long)data[i8+5]&0xff)<<40)
|
70
|
+
+(((long)data[i8+6]&0xff)<<48) +(((long)data[i8+7]&0xff)<<56);
|
71
|
+
|
72
|
+
k *= m;
|
73
|
+
k ^= k >>> r;
|
74
|
+
k *= m;
|
75
|
+
|
76
|
+
h ^= k;
|
77
|
+
h *= m;
|
78
|
+
}
|
79
|
+
|
80
|
+
switch (length%8) {
|
81
|
+
case 7: h ^= (long)(data[(length&~7)+6]&0xff) << 48;
|
82
|
+
case 6: h ^= (long)(data[(length&~7)+5]&0xff) << 40;
|
83
|
+
case 5: h ^= (long)(data[(length&~7)+4]&0xff) << 32;
|
84
|
+
case 4: h ^= (long)(data[(length&~7)+3]&0xff) << 24;
|
85
|
+
case 3: h ^= (long)(data[(length&~7)+2]&0xff) << 16;
|
86
|
+
case 2: h ^= (long)(data[(length&~7)+1]&0xff) << 8;
|
87
|
+
case 1: h ^= (long)(data[length&~7]&0xff);
|
88
|
+
h *= m;
|
89
|
+
};
|
90
|
+
|
91
|
+
h ^= h >>> r;
|
92
|
+
h *= m;
|
93
|
+
h ^= h >>> r;
|
94
|
+
|
95
|
+
return h;
|
96
|
+
}
|
97
|
+
|
98
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import org.jruby.Ruby;
|
2
|
+
import org.jruby.RubyClass;
|
3
|
+
import org.jruby.RubyModule;
|
4
|
+
import org.jruby.runtime.ObjectAllocator;
|
5
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
6
|
+
import org.jruby.runtime.load.BasicLibraryService;
|
7
|
+
|
8
|
+
public class RubySparkExtService implements BasicLibraryService
|
9
|
+
{
|
10
|
+
public boolean basicLoad(final Ruby ruby) throws java.io.IOException {
|
11
|
+
|
12
|
+
RubyModule sparkModule = ruby.defineModule("Spark");
|
13
|
+
RubyModule sparkDigestModule = sparkModule.defineModuleUnder("Digest");
|
14
|
+
RubyClass sparkDigestMurmur2Class = sparkDigestModule.defineClassUnder("Murmur2", ruby.getObject(), sparkDigestMurmur2Allocator);
|
15
|
+
|
16
|
+
sparkDigestModule.defineAnnotatedMethods(Digest.class);
|
17
|
+
sparkDigestMurmur2Class.defineAnnotatedMethods(Murmur2.class);
|
18
|
+
|
19
|
+
return true;
|
20
|
+
}
|
21
|
+
|
22
|
+
public static ObjectAllocator sparkDigestMurmur2Allocator = new ObjectAllocator() {
|
23
|
+
public IRubyObject allocate(Ruby ruby, RubyClass rubyClass) {
|
24
|
+
return new Murmur2(ruby, rubyClass);
|
25
|
+
}
|
26
|
+
};
|
27
|
+
|
28
|
+
}
|
data/ext/spark/build.sbt
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
import AssemblyKeys._
|
2
|
+
|
3
|
+
assemblySettings
|
4
|
+
|
5
|
+
// Default values
|
6
|
+
val defaultScalaVersion = "2.10.4"
|
7
|
+
val defaultSparkVersion = "1.3.0"
|
8
|
+
val defaultSparkCoreVersion = "2.10"
|
9
|
+
val defaultSparkHome = "target"
|
10
|
+
val defaultHadoopVersion = "1.0.4"
|
11
|
+
|
12
|
+
// Values
|
13
|
+
val _scalaVersion = scala.util.Properties.envOrElse("SCALA_VERSION", defaultScalaVersion)
|
14
|
+
val _sparkVersion = scala.util.Properties.envOrElse("SPARK_VERSION", defaultSparkVersion)
|
15
|
+
val _sparkCoreVersion = scala.util.Properties.envOrElse("SPARK_CORE_VERSION", defaultSparkCoreVersion)
|
16
|
+
val _sparkHome = scala.util.Properties.envOrElse("SPARK_HOME", defaultSparkHome)
|
17
|
+
val _hadoopVersion = scala.util.Properties.envOrElse("HADOOP_VERSION", defaultHadoopVersion)
|
18
|
+
|
19
|
+
// Project settings
|
20
|
+
name := "ruby-spark"
|
21
|
+
|
22
|
+
version := "1.0.0"
|
23
|
+
|
24
|
+
scalaVersion := _scalaVersion
|
25
|
+
|
26
|
+
javacOptions ++= Seq("-source", "1.7", "-target", "1.7")
|
27
|
+
|
28
|
+
// Jar target folder
|
29
|
+
artifactPath in Compile in packageBin := file(s"${_sparkHome}/ruby-spark.jar")
|
30
|
+
outputPath in packageDependency := file(s"${_sparkHome}/ruby-spark-deps.jar")
|
31
|
+
|
32
|
+
// Protocol buffer support
|
33
|
+
seq(sbtprotobuf.ProtobufPlugin.protobufSettings: _*)
|
34
|
+
|
35
|
+
// Additional libraries
|
36
|
+
libraryDependencies ++= Seq(
|
37
|
+
"org.apache.spark" %% "spark-core" % _sparkVersion excludeAll(ExclusionRule(organization = "org.apache.hadoop")),
|
38
|
+
"org.apache.spark" %% "spark-graphx" % _sparkVersion,
|
39
|
+
"org.apache.spark" %% "spark-mllib" % _sparkVersion,
|
40
|
+
"org.apache.hadoop" % "hadoop-client" % _hadoopVersion,
|
41
|
+
"com.github.fommil.netlib" % "all" % "1.1.2",
|
42
|
+
"org.scalatest" % "scalatest_2.10" % "2.2.1" % "test"
|
43
|
+
)
|
44
|
+
|
45
|
+
// Repositories
|
46
|
+
resolvers ++= Seq(
|
47
|
+
"JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/",
|
48
|
+
"Spray Repository" at "http://repo.spray.cc/",
|
49
|
+
"Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/",
|
50
|
+
"Akka Repository" at "http://repo.akka.io/releases/",
|
51
|
+
"Twitter4J Repository" at "http://twitter4j.org/maven2/",
|
52
|
+
"Apache HBase" at "https://repository.apache.org/content/repositories/releases",
|
53
|
+
"Twitter Maven Repo" at "http://maven.twttr.com/",
|
54
|
+
"scala-tools" at "https://oss.sonatype.org/content/groups/scala-tools",
|
55
|
+
"Typesafe repository" at "http://repo.typesafe.com/typesafe/releases/",
|
56
|
+
"Second Typesafe repo" at "http://repo.typesafe.com/typesafe/maven-releases/",
|
57
|
+
"Mesosphere Public Repository" at "http://downloads.mesosphere.io/maven",
|
58
|
+
Resolver.sonatypeRepo("public")
|
59
|
+
)
|
60
|
+
|
61
|
+
// Merge strategy
|
62
|
+
mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) =>
|
63
|
+
{
|
64
|
+
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
|
65
|
+
case m if m.startsWith("META-INF") => MergeStrategy.discard
|
66
|
+
case PathList("javax", "servlet", xs @ _*) => MergeStrategy.first
|
67
|
+
case PathList("org", "apache", xs @ _*) => MergeStrategy.first
|
68
|
+
case PathList("org", "jboss", xs @ _*) => MergeStrategy.first
|
69
|
+
case "about.html" => MergeStrategy.rename
|
70
|
+
case "reference.conf" => MergeStrategy.concat
|
71
|
+
case _ => MergeStrategy.first
|
72
|
+
}
|
73
|
+
}
|
@@ -0,0 +1,9 @@
|
|
1
|
+
resolvers += Resolver.url("artifactory", url("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases"))(Resolver.ivyStylePatterns)
|
2
|
+
|
3
|
+
resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/releases/"
|
4
|
+
|
5
|
+
resolvers += "Spray Repository" at "http://repo.spray.cc/"
|
6
|
+
|
7
|
+
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.10.2")
|
8
|
+
|
9
|
+
addSbtPlugin("com.github.gseitz" % "sbt-protobuf" % "0.3.3")
|
data/ext/spark/sbt/sbt
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script launches sbt for this project. If present it uses the system
|
4
|
+
# version of sbt. If there is no system version of sbt it attempts to download
|
5
|
+
# sbt locally.
|
6
|
+
SBT_VERSION=0.13.7
|
7
|
+
URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
|
8
|
+
URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
|
9
|
+
JAR=sbt/sbt-launch-${SBT_VERSION}.jar
|
10
|
+
|
11
|
+
# Download sbt launch jar if it hasn't been downloaded yet
|
12
|
+
if [ ! -f ${JAR} ]; then
|
13
|
+
# Download
|
14
|
+
printf "Attempting to fetch sbt\n"
|
15
|
+
JAR_DL=${JAR}.part
|
16
|
+
if hash curl 2>/dev/null; then
|
17
|
+
(curl --progress-bar ${URL1} > ${JAR_DL} || curl --progress-bar ${URL2} > ${JAR_DL}) && mv ${JAR_DL} ${JAR}
|
18
|
+
elif hash wget 2>/dev/null; then
|
19
|
+
(wget --progress=bar ${URL1} -O ${JAR_DL} || wget --progress=bar ${URL2} -O ${JAR_DL}) && mv ${JAR_DL} ${JAR}
|
20
|
+
else
|
21
|
+
printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
|
22
|
+
exit -1
|
23
|
+
fi
|
24
|
+
fi
|
25
|
+
if [ ! -f ${JAR} ]; then
|
26
|
+
# We failed to download
|
27
|
+
printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n"
|
28
|
+
exit -1
|
29
|
+
fi
|
30
|
+
printf "Launching sbt from ${JAR}\n"
|
31
|
+
java \
|
32
|
+
-Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m \
|
33
|
+
-jar ${JAR} \
|
34
|
+
"$@"
|
@@ -0,0 +1,91 @@
|
|
1
|
+
package org.apache.spark.api.ruby
|
2
|
+
|
3
|
+
import java.io.{File, FileOutputStream, InputStreamReader, BufferedReader}
|
4
|
+
|
5
|
+
import scala.collection.JavaConversions._
|
6
|
+
|
7
|
+
import org.apache.spark.{SparkEnv, Logging}
|
8
|
+
import org.apache.spark.util._
|
9
|
+
|
10
|
+
|
11
|
+
/* =================================================================================================
|
12
|
+
* class FileCommand
|
13
|
+
* =================================================================================================
|
14
|
+
*
|
15
|
+
* Save command to file and than execute him because from Scala you cannot simply run
|
16
|
+
* something like "bash --norc -i -c 'source .zshrc; ruby master.rb'"
|
17
|
+
*/
|
18
|
+
|
19
|
+
class FileCommand(command: String) extends Logging {
|
20
|
+
|
21
|
+
var pb: ProcessBuilder = null
|
22
|
+
var file: File = null
|
23
|
+
|
24
|
+
// Command is complete.
|
25
|
+
def this(command: String, env: SparkEnv) = {
|
26
|
+
this(command)
|
27
|
+
create(env)
|
28
|
+
}
|
29
|
+
|
30
|
+
// Template must contains %s which will be replaced for command
|
31
|
+
def this(template: String, command: String, env: SparkEnv, envVars: Map[String, String]) = {
|
32
|
+
this(template.format(command), env)
|
33
|
+
setEnvVars(envVars)
|
34
|
+
}
|
35
|
+
|
36
|
+
private def create(env: SparkEnv) {
|
37
|
+
val dir = new File(env.sparkFilesDir)
|
38
|
+
val ext = if(Utils.isWindows) ".cmd" else ".sh"
|
39
|
+
val shell = if(Utils.isWindows) "cmd" else "bash"
|
40
|
+
|
41
|
+
file = File.createTempFile("command", ext, dir)
|
42
|
+
|
43
|
+
val out = new FileOutputStream(file)
|
44
|
+
out.write(command.getBytes)
|
45
|
+
out.close
|
46
|
+
|
47
|
+
logInfo(s"New FileCommand at ${file.getAbsolutePath}")
|
48
|
+
|
49
|
+
pb = new ProcessBuilder(shell, file.getAbsolutePath)
|
50
|
+
}
|
51
|
+
|
52
|
+
def setEnvVars(vars: Map[String, String]) {
|
53
|
+
pb.environment().putAll(vars)
|
54
|
+
}
|
55
|
+
|
56
|
+
def run = {
|
57
|
+
new ExecutedFileCommand(pb.start)
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
/* =================================================================================================
|
63
|
+
* class ExecutedFileCommand
|
64
|
+
* =================================================================================================
|
65
|
+
*
|
66
|
+
* Represent process executed from file.
|
67
|
+
*/
|
68
|
+
|
69
|
+
class ExecutedFileCommand(process: Process) {
|
70
|
+
|
71
|
+
var reader: BufferedReader = null
|
72
|
+
|
73
|
+
def readLine = {
|
74
|
+
openInput
|
75
|
+
reader.readLine.toString.trim
|
76
|
+
}
|
77
|
+
|
78
|
+
def openInput {
|
79
|
+
if(reader != null){
|
80
|
+
return
|
81
|
+
}
|
82
|
+
|
83
|
+
val input = new InputStreamReader(process.getInputStream)
|
84
|
+
reader = new BufferedReader(input)
|
85
|
+
}
|
86
|
+
|
87
|
+
// Delegation
|
88
|
+
def destroy = process.destroy
|
89
|
+
def getInputStream = process.getInputStream
|
90
|
+
def getErrorStream = process.getErrorStream
|
91
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
package org.apache.spark.api.ruby.marshal
|
2
|
+
|
3
|
+
import java.io.{DataInputStream, DataOutputStream, ByteArrayInputStream, ByteArrayOutputStream}
|
4
|
+
|
5
|
+
import scala.collection.mutable.ArrayBuffer
|
6
|
+
import scala.collection.JavaConverters._
|
7
|
+
|
8
|
+
|
9
|
+
/* =================================================================================================
|
10
|
+
* object Marshal
|
11
|
+
* =================================================================================================
|
12
|
+
*/
|
13
|
+
object Marshal {
|
14
|
+
def load(bytes: Array[Byte]) = {
|
15
|
+
val is = new DataInputStream(new ByteArrayInputStream(bytes))
|
16
|
+
|
17
|
+
val majorVersion = is.readUnsignedByte // 4
|
18
|
+
val minorVersion = is.readUnsignedByte // 8
|
19
|
+
|
20
|
+
(new MarshalLoad(is)).load
|
21
|
+
}
|
22
|
+
|
23
|
+
def dump(data: Any) = {
|
24
|
+
val aos = new ByteArrayOutputStream
|
25
|
+
val os = new DataOutputStream(aos)
|
26
|
+
|
27
|
+
os.writeByte(4)
|
28
|
+
os.writeByte(8)
|
29
|
+
|
30
|
+
(new MarshalDump(os)).dump(data)
|
31
|
+
aos.toByteArray
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
/* =================================================================================================
|
37
|
+
* class IterableMarshaller
|
38
|
+
* =================================================================================================
|
39
|
+
*/
|
40
|
+
class IterableMarshaller(iter: Iterator[Any]) extends Iterator[Array[Byte]] {
|
41
|
+
private val buffer = new ArrayBuffer[Any]
|
42
|
+
|
43
|
+
override def hasNext: Boolean = iter.hasNext
|
44
|
+
|
45
|
+
override def next(): Array[Byte] = {
|
46
|
+
while (iter.hasNext) {
|
47
|
+
buffer += iter.next()
|
48
|
+
}
|
49
|
+
|
50
|
+
Marshal.dump(buffer)
|
51
|
+
}
|
52
|
+
}
|