cassandra_model_spark 0.0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +17 -0
  3. data/bin/cmodel-spark-build +7 -0
  4. data/bin/cmodel-spark-env.rb +11 -0
  5. data/bin/cmodel-spark-master +22 -0
  6. data/bin/cmodel-spark-run-master +4 -0
  7. data/bin/cmodel-spark-run-master.sh +8 -0
  8. data/bin/cmodel-spark-run-slave +4 -0
  9. data/bin/cmodel-spark-run-slave.sh +8 -0
  10. data/bin/cmodel-spark-slaves +22 -0
  11. data/ext/scala_helper/bin/load-spark-env.sh +63 -0
  12. data/ext/scala_helper/bin/spark-class +87 -0
  13. data/ext/scala_helper/build.sbt +62 -0
  14. data/ext/scala_helper/cassandra_helper.scala +23 -0
  15. data/ext/scala_helper/data_type_helper.scala +27 -0
  16. data/ext/scala_helper/marshal_loader.scala +204 -0
  17. data/ext/scala_helper/marshal_row_mapping.scala +85 -0
  18. data/ext/scala_helper/project/plugins.sbt +6 -0
  19. data/ext/scala_helper/sbin/spark-config.sh +30 -0
  20. data/ext/scala_helper/sbin/spark-daemon.sh +223 -0
  21. data/ext/scala_helper/schema_builder.scala +35 -0
  22. data/ext/scala_helper/worker.scala +13 -0
  23. data/lib/cassandra_model_spark/build.rb +24 -0
  24. data/lib/cassandra_model_spark/column_cast.rb +44 -0
  25. data/lib/cassandra_model_spark/connection_cache.rb +9 -0
  26. data/lib/cassandra_model_spark/data_frame.rb +374 -0
  27. data/lib/cassandra_model_spark/java_bridge.rb +91 -0
  28. data/lib/cassandra_model_spark/java_classes.rb +36 -0
  29. data/lib/cassandra_model_spark/launcher.rb +150 -0
  30. data/lib/cassandra_model_spark/query_builder.rb +37 -0
  31. data/lib/cassandra_model_spark/raw_connection.rb +47 -0
  32. data/lib/cassandra_model_spark/record.rb +18 -0
  33. data/lib/cassandra_model_spark/spark.rb +33 -0
  34. data/lib/cassandra_model_spark.rb +42 -0
  35. metadata +127 -0
@@ -0,0 +1,47 @@
1
+ module CassandraModel
2
+ class RawConnection
3
+ def java_spark_context
4
+ @spark_context ||= begin
5
+ JavaSparkContext.new(spark_conf).tap do |java_spark_context|
6
+ java_spark_context.sc.addJar("#{Spark.classpath}/cmodel_scala_helper.jar")
7
+ end
8
+ end
9
+ end
10
+
11
+ def spark_context
12
+ java_spark_context.sc
13
+ end
14
+
15
+ private
16
+
17
+ def spark_conf
18
+ @spark_conf ||= SparkConf.new(true).tap do |conf|
19
+ conf.set('spark.app.name', 'cassandra_model_spark')
20
+ conf.set('spark.master', 'local[*]')
21
+ conf.set('spark.cassandra.connection.host', config[:hosts].first)
22
+ flat_spark_config.each { |key, value| conf.set(key, value) }
23
+ end
24
+ end
25
+
26
+ def flat_spark_config(config = spark_config)
27
+ config.inject({}) do |memo, (key, value)|
28
+ if value.is_a?(Hash)
29
+ memo.merge!(child_spark_conf(key, value))
30
+ else
31
+ memo.merge!(key.to_s => value)
32
+ end
33
+ end
34
+ end
35
+
36
+ def child_spark_conf(key, value)
37
+ child_conf = flat_spark_config(value)
38
+ child_conf.inject({}) do |child_memo, (child_key, child_value)|
39
+ child_memo.merge!("#{key}.#{child_key}" => child_value)
40
+ end
41
+ end
42
+
43
+ def spark_config
44
+ config.slice(:spark)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,18 @@
1
+ module CassandraModel
2
+ class Record
3
+ def self.rdd
4
+ @spark_rdd ||= SparkCassandraHelper.cassandra_table(
5
+ table.connection.spark_context,
6
+ table.connection.config[:keyspace],
7
+ table_name)
8
+ end
9
+
10
+ def self.rdd_row_mapping
11
+ nil
12
+ end
13
+
14
+ def self.count
15
+ rdd.count
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,33 @@
1
+ require 'fileutils'
2
+
3
+ module CassandraModel
4
+ module Spark
5
+ class << self
6
+ def root
7
+ @gem_root ||= File.expand_path('../../..', __FILE__)
8
+ end
9
+
10
+ def home
11
+ @home ||= (ENV['SPARK_HOME'] || default_home)
12
+ end
13
+
14
+ def classpath
15
+ @classpath ||= (ENV['SPARK_CLASSPATH'] || default_classpath)
16
+ end
17
+
18
+ private
19
+
20
+ def default_classpath
21
+ File.expand_path('./lib/', home).tap do |path|
22
+ FileUtils.mkdir_p(path)
23
+ end
24
+ end
25
+
26
+ def default_home
27
+ File.expand_path('~/.cassandra_model_spark').tap do |path|
28
+ FileUtils.mkdir_p(path)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,42 @@
1
+ #--
2
+ # Copyright 2015 Thomas RM Rogers
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #++
16
+
17
+ require 'yaml'
18
+ require 'logger'
19
+
20
+ require 'concurrent'
21
+ require 'cassandra'
22
+ require 'active_support/all'
23
+ require 'active_support/core_ext/class/attribute_accessors'
24
+ require 'thomas_utils'
25
+ require 'batch_reactor'
26
+ require 'cassandra_model'
27
+ require 'rjb' unless RUBY_ENGINE == 'jruby' || CassandraModel.const_defined?('NO_BRIDGE')
28
+ require 'cassandra_model_spark/spark'
29
+
30
+ unless CassandraModel.const_defined?('NO_BRIDGE')
31
+ require 'cassandra_model_spark/java_bridge'
32
+ Dir["#{CassandraModel::Spark.classpath}/*.jar"].each { |file| require file }
33
+ initialize_java_engine
34
+ require 'cassandra_model_spark/java_classes'
35
+ end
36
+
37
+ require 'cassandra_model_spark/raw_connection'
38
+ require 'cassandra_model_spark/connection_cache'
39
+ require 'cassandra_model_spark/record'
40
+ require 'cassandra_model_spark/query_builder'
41
+ require 'cassandra_model_spark/data_frame'
42
+ require 'cassandra_model_spark/column_cast'
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cassandra_model_spark
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.5
5
+ platform: ruby
6
+ authors:
7
+ - Thomas RM Rogers
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-12-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cassandra_model
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.16
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.16
27
+ - !ruby/object:Gem::Dependency
28
+ name: thomas_utils
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.16
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 0.1.16
41
+ - !ruby/object:Gem::Dependency
42
+ name: rjb
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 1.5.4
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.4
55
+ description: |-
56
+ Spark integration for cassandra_model.
57
+ Get high-performance data analytics with the ease of cassandra_model.
58
+ Inspired by the ruby-spark gem.
59
+ email: thomasrogers03@gmail.com
60
+ executables:
61
+ - cmodel-spark-build
62
+ - cmodel-spark-env.rb
63
+ - cmodel-spark-master
64
+ - cmodel-spark-slaves
65
+ - cmodel-spark-run-master
66
+ - cmodel-spark-run-slave
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - README.md
71
+ - bin/cmodel-spark-build
72
+ - bin/cmodel-spark-env.rb
73
+ - bin/cmodel-spark-master
74
+ - bin/cmodel-spark-run-master
75
+ - bin/cmodel-spark-run-master.sh
76
+ - bin/cmodel-spark-run-slave
77
+ - bin/cmodel-spark-run-slave.sh
78
+ - bin/cmodel-spark-slaves
79
+ - ext/scala_helper/bin/load-spark-env.sh
80
+ - ext/scala_helper/bin/spark-class
81
+ - ext/scala_helper/build.sbt
82
+ - ext/scala_helper/cassandra_helper.scala
83
+ - ext/scala_helper/data_type_helper.scala
84
+ - ext/scala_helper/marshal_loader.scala
85
+ - ext/scala_helper/marshal_row_mapping.scala
86
+ - ext/scala_helper/project/plugins.sbt
87
+ - ext/scala_helper/sbin/spark-config.sh
88
+ - ext/scala_helper/sbin/spark-daemon.sh
89
+ - ext/scala_helper/schema_builder.scala
90
+ - ext/scala_helper/worker.scala
91
+ - lib/cassandra_model_spark.rb
92
+ - lib/cassandra_model_spark/build.rb
93
+ - lib/cassandra_model_spark/column_cast.rb
94
+ - lib/cassandra_model_spark/connection_cache.rb
95
+ - lib/cassandra_model_spark/data_frame.rb
96
+ - lib/cassandra_model_spark/java_bridge.rb
97
+ - lib/cassandra_model_spark/java_classes.rb
98
+ - lib/cassandra_model_spark/launcher.rb
99
+ - lib/cassandra_model_spark/query_builder.rb
100
+ - lib/cassandra_model_spark/raw_connection.rb
101
+ - lib/cassandra_model_spark/record.rb
102
+ - lib/cassandra_model_spark/spark.rb
103
+ homepage: https://www.github.com/thomasrogers03/cassandra_model_spark
104
+ licenses:
105
+ - Apache License 2.0
106
+ metadata: {}
107
+ post_install_message:
108
+ rdoc_options: []
109
+ require_paths:
110
+ - lib
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - '>='
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - '>='
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
121
+ requirements: []
122
+ rubyforge_project:
123
+ rubygems_version: 2.4.8
124
+ signing_key:
125
+ specification_version: 4
126
+ summary: Spark integration for cassandra_model
127
+ test_files: []