spark_toolkit 0.1.0-java → 0.1.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/docs/Spark.md +1 -0
- data/lib/spark_toolkit/hadoop/conf/configuration.rb +7 -0
- data/lib/spark_toolkit/spark/client.rb +12 -3
- data/lib/spark_toolkit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56d3aadfc0f28d592c7051ee0fe9427df5f73383
|
4
|
+
data.tar.gz: 0fa58599de96d34d0c1fcf613e288073867ce6b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20f30636b201ede56541e852fce3828211e4ee3aad31f83155afd8b58168a143d6c8c8bac913101be49790aa4acc601c55f2457039c722da9d0f2e666db72b4b
|
7
|
+
data.tar.gz: 053cf2ee78cd8bf2707aaa0c255ce21cb0866bf56b43b84056a262b8d8b1b2f9f570f6869dbad6874f6997064388a48bc81e2d7932163b4dd3801077572901e4
|
data/README.md
CHANGED
data/docs/Spark.md
CHANGED
@@ -22,6 +22,7 @@ args = ["--class", "org.apache.spark.deploy.PythonRunner",
|
|
22
22
|
"--arg", 2]
|
23
23
|
spark_conf.yarn_deploy_mode(:cluster) # or :client
|
24
24
|
spark_client.is_python_job(true)
|
25
|
+
spark_client.avtive_kerberos # If you want to submit job to secure cluster
|
25
26
|
# Submit your job to YARN and get its app_id for query
|
26
27
|
yarn_app_id = spark_client.yarn_submit(args)
|
27
28
|
# Or run as client, print all output into console
|
@@ -3,6 +3,7 @@ module SparkToolkit
|
|
3
3
|
Configuration = Java::OrgApacheHadoopConf::Configuration
|
4
4
|
class Configuration
|
5
5
|
java_import org.apache.hadoop.fs.Path
|
6
|
+
java_import org.apache.hadoop.security.UserGroupInformation
|
6
7
|
|
7
8
|
alias_method :initialise, :initialize
|
8
9
|
def initialize(opts={})
|
@@ -21,6 +22,12 @@ module SparkToolkit
|
|
21
22
|
add_resource_java(Path.new(f))
|
22
23
|
end
|
23
24
|
|
25
|
+
def krb_login(principle, keytab)
|
26
|
+
set('hadoop.security.authentication', 'kerberos')
|
27
|
+
UserGroupInformation.set_configuration(self)
|
28
|
+
UserGroupInformation.login_user_from_keytab(principle, keytab)
|
29
|
+
end
|
30
|
+
|
24
31
|
def []=(k, v)
|
25
32
|
set(k, v)
|
26
33
|
end
|
@@ -69,9 +69,18 @@ module SparkToolkit
|
|
69
69
|
@sconf.set("spark.hadoop.hadoop.security.authentication", "kerberos")
|
70
70
|
@sconf.set("spark.hadoop.hadoop.security.authorization", "true")
|
71
71
|
|
72
|
+
UserGroupInformation.get_login_user.check_tgt_and_relogin_from_keytab
|
72
73
|
UserGroupInformation.set_configuration(SparkHadoopUtil.get.newConfiguration(@sconf))
|
73
|
-
credentials = UserGroupInformation.
|
74
|
-
SparkHadoopUtil.get.
|
74
|
+
credentials = UserGroupInformation.get_login_user.get_credentials
|
75
|
+
SparkHadoopUtil.get.add_current_user_credentials(credentials)
|
76
|
+
end
|
77
|
+
|
78
|
+
def executor_cores n
|
79
|
+
@sconf.set_property('spark.executor.cores', n.to_s)
|
80
|
+
end
|
81
|
+
|
82
|
+
def num_executors n
|
83
|
+
@sconf.set_property('spark.executor.instances', n.to_s)
|
75
84
|
end
|
76
85
|
|
77
86
|
private
|
@@ -83,7 +92,7 @@ module SparkToolkit
|
|
83
92
|
@sconf.set('spark.submit.deployMode', 'cluster')
|
84
93
|
end
|
85
94
|
|
86
|
-
java.lang.System.
|
95
|
+
java.lang.System.set_property("SPARK_YARN_MODE", "true")
|
87
96
|
end
|
88
97
|
end
|
89
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spark_toolkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Yuli Mo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|