itamae-plugin-recipe-spark 0.1.2 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 605132bc36830fea99471ce83f20ab11d186b10f5d83e6762b9b27090ef2417c
4
- data.tar.gz: d65298d73321e36b7ff8a896f097eb48d7c33287f72598e1e677d87b2ed28d67
3
+ metadata.gz: 65e26a2e185e47252ef2e7bc92c7aab513cb19266ed2ad11d78764a2306ccb3d
4
+ data.tar.gz: fc71a105d5189e80525d25839d65bfacb6978e24a5878e809672522dcd10557e
5
5
  SHA512:
6
- metadata.gz: 20d8e643ccbbb54a3a37d323f15a2c07521afdeaa5fd4e94cf470f77a7afae9aeda927fbce8784c76284d5bdcc913db2135b48970fbee35d36afb20a04c3f703
7
- data.tar.gz: bd801b5977fcbaf3a6639a8587eccfd3388480727a69a94255701388b96914a15be4b7072b59b3998e7f2d7b92195c2f8a080e628b57e0c898e63aef65d87395
6
+ metadata.gz: 925e77e886ea6a4bf482a88e496ee5e408bfa1284d5b355178d1034a7df61e5ddee1a23b047660105768c194f01c3c0bf800f397ee3eceb2720996447c69ab2d
7
+ data.tar.gz: 2afc8f9b8f5fa3401f92513af200d38a4d3bc2839e3d973ad0663ab5f340f58514e3db44efb9a5735be5bc3399271f389b12584aa6b3a6cd9b3608b462527cb8
@@ -1,13 +1,37 @@
1
1
  version = ENV['SPARK_VERSION'] || Itamae::Plugin::Recipe::Spark::SPARK_VERSION
2
2
  hadoop_version = ENV['HADOOP_VERSION'] || Itamae::Plugin::Recipe::Hadoop::HADOOP_VERSION
3
+ hadoop_type = if Gem::Version.create(hadoop_version) >= Gem::Version.create('3.2')
4
+ '3.2'
5
+ elsif Gem::Version.create(hadoop_version) >= Gem::Version.create('2.7')
6
+ '2.7'
7
+ else
8
+ raise "Hadoop version #{hadoop_version} is not supported."
9
+ end
3
10
 
4
11
  execute "download spark-#{version}" do
5
12
  cwd '/tmp'
6
13
  command <<-EOF
7
- rm -f spark-#{version}-bin-hadoop2.7.tgz
8
- wget https://archive.apache.org/dist/spark/spark-#{version}/spark-#{version}-bin-hadoop2.7.tgz
14
+ rm -f spark-#{version}-bin-hadoop#{hadoop_type}.tgz
15
+ wget https://archive.apache.org/dist/spark/spark-#{version}/spark-#{version}-bin-hadoop#{hadoop_type}.tgz
9
16
  EOF
10
- not_if "test -e /opt/spark/spark-#{version}-bin-hadoop2.7/INSTALLED || echo #{::File.read(::File.join(::File.dirname(__FILE__), "spark-#{version}_sha256.txt")).strip} | sha256sum -c"
17
+ not_if "test -e /opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/INSTALLED || echo #{::File.read(::File.join(::File.dirname(__FILE__), "spark-#{version}_hadoop_#{hadoop_type}_sha256.txt")).strip} | sha256sum -c"
18
+ end
19
+
20
+ spark_redshift_version = Itamae::Plugin::Recipe::Spark::SPARK_REDSHIFT_VERSION
21
+ spark_avro_version = Itamae::Plugin::Recipe::Spark::SPARK_AVRO_VERSION
22
+ minimal_json_version = Itamae::Plugin::Recipe::Spark::MINIMAL_JSON_VERSION
23
+ redshift_jdbc_version = Itamae::Plugin::Recipe::Spark::REDSHIFT_JDBC_VERSION
24
+ jets3t_version = Itamae::Plugin::Recipe::Spark::JETS3T_VERSION
25
+ execute "download spark-redshift-#{spark_redshift_version} and dependencies" do
26
+ cwd '/tmp'
27
+ command <<-EOF
28
+ wget -q https://repo1.maven.org/maven2/io/github/spark-redshift-community/spark-redshift_#{spark_redshift_version.split('-').first}/#{spark_redshift_version.split('-').last}/spark-redshift_#{spark_redshift_version}.jar -O spark-redshift_#{spark_redshift_version}.jar
29
+ wget -q https://repo1.maven.org/maven2/org/apache/spark/spark-avro_#{spark_avro_version.split('-').first}/#{spark_avro_version.split('-').last}/spark-avro_#{spark_avro_version}.jar -O spark-avro_#{spark_avro_version}.jar
30
+ wget -q https://repo1.maven.org/maven2/com/eclipsesource/minimal-json/minimal-json/#{minimal_json_version}/minimal-json-#{minimal_json_version}.jar -O minimal-json-#{minimal_json_version}.jar
31
+ wget -q https://s3.amazonaws.com/redshift-downloads/drivers/jdbc/#{redshift_jdbc_version}/RedshiftJDBC42-#{redshift_jdbc_version}.jar -O RedshiftJDBC42-#{redshift_jdbc_version}.jar
32
+ wget -q https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/#{jets3t_version}/jets3t-#{jets3t_version}.jar -O jets3t-#{jets3t_version}.jar
33
+ EOF
34
+ not_if "sha256sum -c #{File.join(File.dirname(__FILE__), "spark-redshift_#{spark_redshift_version}_sha256.txt")}"
11
35
  end
12
36
 
13
37
  directory '/opt/spark' do
@@ -20,37 +44,58 @@ end
20
44
  execute "install spark-#{version}" do
21
45
  cwd '/tmp'
22
46
  command <<-EOF
23
- rm -Rf spark-#{version}-bin-hadoop2.7/
24
- tar zxf spark-#{version}-bin-hadoop2.7.tgz
25
- sudo rm -Rf /opt/spark/spark-#{version}-bin-hadoop2.7
26
- sudo mv spark-#{version}-bin-hadoop2.7 /opt/spark/
27
- sudo touch /opt/spark/spark-#{version}-bin-hadoop2.7/INSTALLED
47
+ rm -Rf spark-#{version}-bin-hadoop#{hadoop_type}/
48
+ tar zxf spark-#{version}-bin-hadoop#{hadoop_type}.tgz
49
+ sudo rm -Rf /opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}
50
+ sudo mv spark-#{version}-bin-hadoop#{hadoop_type} /opt/spark/
51
+ sudo touch /opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/INSTALLED
28
52
  EOF
29
- not_if "test -e /opt/spark/spark-#{version}-bin-hadoop2.7/INSTALLED"
53
+ not_if "test -e /opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/INSTALLED"
30
54
  end
31
55
 
32
56
  execute 'install hadoop aws jars' do
33
57
  cwd '/opt/hadoop/current'
34
58
  command <<-EOF
35
59
  cp -f share/hadoop/tools/lib/aws-java-sdk-*.jar \
36
- /opt/spark/spark-#{version}-bin-hadoop2.7/jars/
60
+ /opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/jars/
37
61
  cp -f share/hadoop/tools/lib/hadoop-aws-#{hadoop_version}.jar \
38
- /opt/spark/spark-#{version}-bin-hadoop2.7/jars/
62
+ /opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/jars/
63
+ EOF
64
+ not_if "test `ls -1 /opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/jars/ | egrep '(hadoop-)?aws-.*' | wc -l` = 4"
65
+ end
66
+
67
+ execute 'install spark-redshift jars' do
68
+ cwd "/opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}"
69
+ command <<-EOF
70
+ ls -d $(find jars) | grep 'spark-redshift_[0-9.-]*.jar' | xargs rm -f
71
+ cp -f /tmp/spark-redshift_#{spark_redshift_version}.jar \
72
+ jars/
73
+ ls -d $(find jars) | grep 'spark-avro_[0-9.-]*.jar' | xargs rm -f
74
+ cp -f /tmp/spark-avro_#{spark_avro_version}.jar \
75
+ jars/
76
+ ls -d $(find jars) | grep 'minimal-json-[0-9.]*.jar' | xargs rm -f
77
+ cp -f /tmp/minimal-json-#{minimal_json_version}.jar \
78
+ jars/
79
+ ls -d $(find jars) | grep 'RedshiftJDBC42-[0-9.]*.jar' | xargs rm -f
80
+ cp -f /tmp/RedshiftJDBC42-#{redshift_jdbc_version}.jar \
81
+ jars/
82
+ ls -d $(find jars) | grep 'jets3t-[0-9.]*.jar' | xargs rm -f
83
+ cp -f /tmp/jets3t-#{jets3t_version}.jar \
84
+ jars/
39
85
  EOF
40
- not_if "test `ls -1 /opt/spark/spark-#{version}-bin-hadoop2.7/jars/ | egrep '(hadoop-)?aws-.*' | wc -l` = 4"
41
86
  end
42
87
 
43
- template "/opt/spark/spark-#{version}-bin-hadoop2.7/conf/spark-defaults.conf"
88
+ template "/opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/conf/spark-defaults.conf"
44
89
 
45
90
  if ENV['AWS_ACCESS_KEY_ID'] and ENV['AWS_SECRET_ACCESS_KEY']
46
- template "/opt/spark/spark-#{version}-bin-hadoop2.7/conf/hdfs-site.xml" do
91
+ template "/opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}/conf/hdfs-site.xml" do
47
92
  variables aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
48
93
  aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY']
49
94
  end
50
95
  end
51
96
 
52
97
  link '/opt/spark/current' do
53
- to "/opt/spark/spark-#{version}-bin-hadoop2.7"
98
+ to "/opt/spark/spark-#{version}-bin-hadoop#{hadoop_type}"
54
99
  user 'root'
55
100
  force true
56
101
  end
@@ -0,0 +1 @@
1
+ 13098490936c9931beda3acc4c30cdc5ca707acd1415eebde1030b11903934fe spark-2.4.7-bin-hadoop2.7.tgz
@@ -0,0 +1 @@
1
+ 61543e748d225d859e3e0d488362f9dc4b6496a104ef5a4b297d341a47a49a12 spark-3.0.1-bin-hadoop2.7.tgz
@@ -0,0 +1 @@
1
+ e2d05efa1c657dd5180628a83ea36c97c00f972b4aee935b7affa2e1058b0279 spark-3.0.1-bin-hadoop3.2.tgz
@@ -0,0 +1 @@
1
+ 3a79e324d12f46de44d042641d9340ba03f8ccb3db6f2496a9ccb65431dbb593 spark-3.1.2-bin-hadoop2.7.tgz
@@ -0,0 +1 @@
1
+ 0d9cf9dbbb3b4215afebe7fa4748b012e406dd1f1ad2a61b993ac04adcb94eaa spark-3.1.2-bin-hadoop3.2.tgz
@@ -0,0 +1,5 @@
1
+ a4ded0f2806b50eef02885d2543b16d2f0654744e8f1f45dc913f74b22071b93 spark-redshift_2.12-5.0.3.jar
2
+ b1ffe6dbfb9a6492d9e5ceed645ee49bb5f3ab2e48a3836ee8d91a892995436b spark-avro_2.12-3.0.1.jar
3
+ e1d280900c78f18ae2e00c14e7410a77ba19cf084154b386532846aa6dc81721 minimal-json-0.9.4.jar
4
+ a27fd9e446021557c17ca3697d67d1c1857ce7e92a5c80c1038b767b835ad841 RedshiftJDBC42-1.2.37.1061.jar
5
+ a03f0d25e810ecff30137e0d3148d1bf9afdcf62357ebd1b1af338f7f6115bbb jets3t-0.9.4.jar
@@ -2,14 +2,19 @@ module Itamae
2
2
  module Plugin
3
3
  module Recipe
4
4
  module Spark
5
- VERSION = "0.1.2"
6
-
5
+ VERSION = "0.1.6"
6
+
7
7
  SPARK_VERSION = [
8
- SPARK_VERSION_MAJOR = '2',
9
- SPARK_VERSION_MINOR = '4',
10
- SPARK_VERSION_REVISION = '6'
8
+ SPARK_VERSION_MAJOR = '3',
9
+ SPARK_VERSION_MINOR = '1',
10
+ SPARK_VERSION_REVISION = '2'
11
11
  ].join('.')
12
12
 
13
+ SPARK_REDSHIFT_VERSION = '2.12-5.0.3'
14
+ SPARK_AVRO_VERSION = '2.12-3.0.1'
15
+ MINIMAL_JSON_VERSION = '0.9.4'
16
+ REDSHIFT_JDBC_VERSION = '1.2.37.1061'
17
+ JETS3T_VERSION = '0.9.4'
13
18
  end
14
19
  end
15
20
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: itamae-plugin-recipe-spark
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ichylinux
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-24 00:00:00.000000000 Z
11
+ date: 2022-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: itamae
@@ -92,14 +92,20 @@ files:
92
92
  - itamae-plugin-recipe-spark.gemspec
93
93
  - lib/itamae/plugin/recipe/spark.rb
94
94
  - lib/itamae/plugin/recipe/spark/install.rb
95
- - lib/itamae/plugin/recipe/spark/spark-2.2.0_sha256.txt
96
- - lib/itamae/plugin/recipe/spark/spark-2.3.1_sha256.txt
97
- - lib/itamae/plugin/recipe/spark/spark-2.3.2_sha256.txt
98
- - lib/itamae/plugin/recipe/spark/spark-2.4.0_sha256.txt
99
- - lib/itamae/plugin/recipe/spark/spark-2.4.3_sha256.txt
100
- - lib/itamae/plugin/recipe/spark/spark-2.4.4_sha256.txt
101
- - lib/itamae/plugin/recipe/spark/spark-2.4.5_sha256.txt
102
- - lib/itamae/plugin/recipe/spark/spark-2.4.6_sha256.txt
95
+ - lib/itamae/plugin/recipe/spark/spark-2.2.0_hadoop_2.7_sha256.txt
96
+ - lib/itamae/plugin/recipe/spark/spark-2.3.1_hadoop_2.7_sha256.txt
97
+ - lib/itamae/plugin/recipe/spark/spark-2.3.2_hadoop_2.7_sha256.txt
98
+ - lib/itamae/plugin/recipe/spark/spark-2.4.0_hadoop_2.7_sha256.txt
99
+ - lib/itamae/plugin/recipe/spark/spark-2.4.3_hadoop_2.7_sha256.txt
100
+ - lib/itamae/plugin/recipe/spark/spark-2.4.4_hadoop_2.7_sha256.txt
101
+ - lib/itamae/plugin/recipe/spark/spark-2.4.5_hadoop_2.7_sha256.txt
102
+ - lib/itamae/plugin/recipe/spark/spark-2.4.6_hadoop_2.7_sha256.txt
103
+ - lib/itamae/plugin/recipe/spark/spark-2.4.7_hadoop_2.7_sha256.txt
104
+ - lib/itamae/plugin/recipe/spark/spark-3.0.1_hadoop_2.7_sha256.txt
105
+ - lib/itamae/plugin/recipe/spark/spark-3.0.1_hadoop_3.2_sha256.txt
106
+ - lib/itamae/plugin/recipe/spark/spark-3.1.2_hadoop_2.7_sha256.txt
107
+ - lib/itamae/plugin/recipe/spark/spark-3.1.2_hadoop_3.2_sha256.txt
108
+ - lib/itamae/plugin/recipe/spark/spark-redshift_2.12-5.0.3_sha256.txt
103
109
  - lib/itamae/plugin/recipe/spark/templates/hdfs-site.xml.erb
104
110
  - lib/itamae/plugin/recipe/spark/templates/spark-defaults.conf.erb
105
111
  - lib/itamae/plugin/recipe/spark/version.rb
@@ -122,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
128
  - !ruby/object:Gem::Version
123
129
  version: '0'
124
130
  requirements: []
125
- rubygems_version: 3.0.4
131
+ rubygems_version: 3.2.16
126
132
  signing_key:
127
133
  specification_version: 4
128
134
  summary: itamae recipe for apache spark installation