cassandra_model_spark 0.0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +17 -0
  3. data/bin/cmodel-spark-build +7 -0
  4. data/bin/cmodel-spark-env.rb +11 -0
  5. data/bin/cmodel-spark-master +22 -0
  6. data/bin/cmodel-spark-run-master +4 -0
  7. data/bin/cmodel-spark-run-master.sh +8 -0
  8. data/bin/cmodel-spark-run-slave +4 -0
  9. data/bin/cmodel-spark-run-slave.sh +8 -0
  10. data/bin/cmodel-spark-slaves +22 -0
  11. data/ext/scala_helper/bin/load-spark-env.sh +63 -0
  12. data/ext/scala_helper/bin/spark-class +87 -0
  13. data/ext/scala_helper/build.sbt +62 -0
  14. data/ext/scala_helper/cassandra_helper.scala +23 -0
  15. data/ext/scala_helper/data_type_helper.scala +27 -0
  16. data/ext/scala_helper/marshal_loader.scala +204 -0
  17. data/ext/scala_helper/marshal_row_mapping.scala +85 -0
  18. data/ext/scala_helper/project/plugins.sbt +6 -0
  19. data/ext/scala_helper/sbin/spark-config.sh +30 -0
  20. data/ext/scala_helper/sbin/spark-daemon.sh +223 -0
  21. data/ext/scala_helper/schema_builder.scala +35 -0
  22. data/ext/scala_helper/worker.scala +13 -0
  23. data/lib/cassandra_model_spark/build.rb +24 -0
  24. data/lib/cassandra_model_spark/column_cast.rb +44 -0
  25. data/lib/cassandra_model_spark/connection_cache.rb +9 -0
  26. data/lib/cassandra_model_spark/data_frame.rb +374 -0
  27. data/lib/cassandra_model_spark/java_bridge.rb +91 -0
  28. data/lib/cassandra_model_spark/java_classes.rb +36 -0
  29. data/lib/cassandra_model_spark/launcher.rb +150 -0
  30. data/lib/cassandra_model_spark/query_builder.rb +37 -0
  31. data/lib/cassandra_model_spark/raw_connection.rb +47 -0
  32. data/lib/cassandra_model_spark/record.rb +18 -0
  33. data/lib/cassandra_model_spark/spark.rb +33 -0
  34. data/lib/cassandra_model_spark.rb +42 -0
  35. metadata +127 -0
@@ -0,0 +1,47 @@
1
+ module CassandraModel
2
+ class RawConnection
3
+ def java_spark_context
4
+ @spark_context ||= begin
5
+ JavaSparkContext.new(spark_conf).tap do |java_spark_context|
6
+ java_spark_context.sc.addJar("#{Spark.classpath}/cmodel_scala_helper.jar")
7
+ end
8
+ end
9
+ end
10
+
11
+ def spark_context
12
+ java_spark_context.sc
13
+ end
14
+
15
+ private
16
+
17
+ def spark_conf
18
+ @spark_conf ||= SparkConf.new(true).tap do |conf|
19
+ conf.set('spark.app.name', 'cassandra_model_spark')
20
+ conf.set('spark.master', 'local[*]')
21
+ conf.set('spark.cassandra.connection.host', config[:hosts].first)
22
+ flat_spark_config.each { |key, value| conf.set(key, value) }
23
+ end
24
+ end
25
+
26
+ def flat_spark_config(config = spark_config)
27
+ config.inject({}) do |memo, (key, value)|
28
+ if value.is_a?(Hash)
29
+ memo.merge!(child_spark_conf(key, value))
30
+ else
31
+ memo.merge!(key.to_s => value)
32
+ end
33
+ end
34
+ end
35
+
36
+ def child_spark_conf(key, value)
37
+ child_conf = flat_spark_config(value)
38
+ child_conf.inject({}) do |child_memo, (child_key, child_value)|
39
+ child_memo.merge!("#{key}.#{child_key}" => child_value)
40
+ end
41
+ end
42
+
43
+ def spark_config
44
+ config.slice(:spark)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,18 @@
1
+ module CassandraModel
2
+ class Record
3
+ def self.rdd
4
+ @spark_rdd ||= SparkCassandraHelper.cassandra_table(
5
+ table.connection.spark_context,
6
+ table.connection.config[:keyspace],
7
+ table_name)
8
+ end
9
+
10
+ def self.rdd_row_mapping
11
+ nil
12
+ end
13
+
14
+ def self.count
15
+ rdd.count
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,33 @@
1
+ require 'fileutils'
2
+
3
+ module CassandraModel
4
+ module Spark
5
+ class << self
6
+ def root
7
+ @gem_root ||= File.expand_path('../../..', __FILE__)
8
+ end
9
+
10
+ def home
11
+ @home ||= (ENV['SPARK_HOME'] || default_home)
12
+ end
13
+
14
+ def classpath
15
+ @classpath ||= (ENV['SPARK_CLASSPATH'] || default_classpath)
16
+ end
17
+
18
+ private
19
+
20
+ def default_classpath
21
+ File.expand_path('./lib/', home).tap do |path|
22
+ FileUtils.mkdir_p(path)
23
+ end
24
+ end
25
+
26
+ def default_home
27
+ File.expand_path('~/.cassandra_model_spark').tap do |path|
28
+ FileUtils.mkdir_p(path)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,42 @@
1
+ #--
2
+ # Copyright 2015 Thomas RM Rogers
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #++
16
+
17
+ require 'yaml'
18
+ require 'logger'
19
+
20
+ require 'concurrent'
21
+ require 'cassandra'
22
+ require 'active_support/all'
23
+ require 'active_support/core_ext/class/attribute_accessors'
24
+ require 'thomas_utils'
25
+ require 'batch_reactor'
26
+ require 'cassandra_model'
27
+ require 'rjb' unless RUBY_ENGINE == 'jruby' || CassandraModel.const_defined?('NO_BRIDGE')
28
+ require 'cassandra_model_spark/spark'
29
+
30
+ unless CassandraModel.const_defined?('NO_BRIDGE')
31
+ require 'cassandra_model_spark/java_bridge'
32
+ Dir["#{CassandraModel::Spark.classpath}/*.jar"].each { |file| require file }
33
+ initialize_java_engine
34
+ require 'cassandra_model_spark/java_classes'
35
+ end
36
+
37
+ require 'cassandra_model_spark/raw_connection'
38
+ require 'cassandra_model_spark/connection_cache'
39
+ require 'cassandra_model_spark/record'
40
+ require 'cassandra_model_spark/query_builder'
41
+ require 'cassandra_model_spark/data_frame'
42
+ require 'cassandra_model_spark/column_cast'
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cassandra_model_spark
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.5
5
+ platform: ruby
6
+ authors:
7
+ - Thomas RM Rogers
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-12-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cassandra_model
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.16
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.16
27
+ - !ruby/object:Gem::Dependency
28
+ name: thomas_utils
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.16
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 0.1.16
41
+ - !ruby/object:Gem::Dependency
42
+ name: rjb
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 1.5.4
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.4
55
+ description: |-
56
+ Spark integration for cassandra_model.
57
+ Get high-performance data analytics with the ease of cassandra_model.
58
+ Inspired by the ruby-spark gem.
59
+ email: thomasrogers03@gmail.com
60
+ executables:
61
+ - cmodel-spark-build
62
+ - cmodel-spark-env.rb
63
+ - cmodel-spark-master
64
+ - cmodel-spark-slaves
65
+ - cmodel-spark-run-master
66
+ - cmodel-spark-run-slave
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - README.md
71
+ - bin/cmodel-spark-build
72
+ - bin/cmodel-spark-env.rb
73
+ - bin/cmodel-spark-master
74
+ - bin/cmodel-spark-run-master
75
+ - bin/cmodel-spark-run-master.sh
76
+ - bin/cmodel-spark-run-slave
77
+ - bin/cmodel-spark-run-slave.sh
78
+ - bin/cmodel-spark-slaves
79
+ - ext/scala_helper/bin/load-spark-env.sh
80
+ - ext/scala_helper/bin/spark-class
81
+ - ext/scala_helper/build.sbt
82
+ - ext/scala_helper/cassandra_helper.scala
83
+ - ext/scala_helper/data_type_helper.scala
84
+ - ext/scala_helper/marshal_loader.scala
85
+ - ext/scala_helper/marshal_row_mapping.scala
86
+ - ext/scala_helper/project/plugins.sbt
87
+ - ext/scala_helper/sbin/spark-config.sh
88
+ - ext/scala_helper/sbin/spark-daemon.sh
89
+ - ext/scala_helper/schema_builder.scala
90
+ - ext/scala_helper/worker.scala
91
+ - lib/cassandra_model_spark.rb
92
+ - lib/cassandra_model_spark/build.rb
93
+ - lib/cassandra_model_spark/column_cast.rb
94
+ - lib/cassandra_model_spark/connection_cache.rb
95
+ - lib/cassandra_model_spark/data_frame.rb
96
+ - lib/cassandra_model_spark/java_bridge.rb
97
+ - lib/cassandra_model_spark/java_classes.rb
98
+ - lib/cassandra_model_spark/launcher.rb
99
+ - lib/cassandra_model_spark/query_builder.rb
100
+ - lib/cassandra_model_spark/raw_connection.rb
101
+ - lib/cassandra_model_spark/record.rb
102
+ - lib/cassandra_model_spark/spark.rb
103
+ homepage: https://www.github.com/thomasrogers03/cassandra_model_spark
104
+ licenses:
105
+ - Apache License 2.0
106
+ metadata: {}
107
+ post_install_message:
108
+ rdoc_options: []
109
+ require_paths:
110
+ - lib
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - '>='
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - '>='
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
121
+ requirements: []
122
+ rubyforge_project:
123
+ rubygems_version: 2.4.8
124
+ signing_key:
125
+ specification_version: 4
126
+ summary: Spark integration for cassandra_model
127
+ test_files: []