cassandra_model_spark 0.0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +17 -0
- data/bin/cmodel-spark-build +7 -0
- data/bin/cmodel-spark-env.rb +11 -0
- data/bin/cmodel-spark-master +22 -0
- data/bin/cmodel-spark-run-master +4 -0
- data/bin/cmodel-spark-run-master.sh +8 -0
- data/bin/cmodel-spark-run-slave +4 -0
- data/bin/cmodel-spark-run-slave.sh +8 -0
- data/bin/cmodel-spark-slaves +22 -0
- data/ext/scala_helper/bin/load-spark-env.sh +63 -0
- data/ext/scala_helper/bin/spark-class +87 -0
- data/ext/scala_helper/build.sbt +62 -0
- data/ext/scala_helper/cassandra_helper.scala +23 -0
- data/ext/scala_helper/data_type_helper.scala +27 -0
- data/ext/scala_helper/marshal_loader.scala +204 -0
- data/ext/scala_helper/marshal_row_mapping.scala +85 -0
- data/ext/scala_helper/project/plugins.sbt +6 -0
- data/ext/scala_helper/sbin/spark-config.sh +30 -0
- data/ext/scala_helper/sbin/spark-daemon.sh +223 -0
- data/ext/scala_helper/schema_builder.scala +35 -0
- data/ext/scala_helper/worker.scala +13 -0
- data/lib/cassandra_model_spark/build.rb +24 -0
- data/lib/cassandra_model_spark/column_cast.rb +44 -0
- data/lib/cassandra_model_spark/connection_cache.rb +9 -0
- data/lib/cassandra_model_spark/data_frame.rb +374 -0
- data/lib/cassandra_model_spark/java_bridge.rb +91 -0
- data/lib/cassandra_model_spark/java_classes.rb +36 -0
- data/lib/cassandra_model_spark/launcher.rb +150 -0
- data/lib/cassandra_model_spark/query_builder.rb +37 -0
- data/lib/cassandra_model_spark/raw_connection.rb +47 -0
- data/lib/cassandra_model_spark/record.rb +18 -0
- data/lib/cassandra_model_spark/spark.rb +33 -0
- data/lib/cassandra_model_spark.rb +42 -0
- metadata +127 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
module CassandraModel
|
2
|
+
class RawConnection
|
3
|
+
def java_spark_context
|
4
|
+
@spark_context ||= begin
|
5
|
+
JavaSparkContext.new(spark_conf).tap do |java_spark_context|
|
6
|
+
java_spark_context.sc.addJar("#{Spark.classpath}/cmodel_scala_helper.jar")
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def spark_context
|
12
|
+
java_spark_context.sc
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def spark_conf
|
18
|
+
@spark_conf ||= SparkConf.new(true).tap do |conf|
|
19
|
+
conf.set('spark.app.name', 'cassandra_model_spark')
|
20
|
+
conf.set('spark.master', 'local[*]')
|
21
|
+
conf.set('spark.cassandra.connection.host', config[:hosts].first)
|
22
|
+
flat_spark_config.each { |key, value| conf.set(key, value) }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def flat_spark_config(config = spark_config)
|
27
|
+
config.inject({}) do |memo, (key, value)|
|
28
|
+
if value.is_a?(Hash)
|
29
|
+
memo.merge!(child_spark_conf(key, value))
|
30
|
+
else
|
31
|
+
memo.merge!(key.to_s => value)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def child_spark_conf(key, value)
|
37
|
+
child_conf = flat_spark_config(value)
|
38
|
+
child_conf.inject({}) do |child_memo, (child_key, child_value)|
|
39
|
+
child_memo.merge!("#{key}.#{child_key}" => child_value)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def spark_config
|
44
|
+
config.slice(:spark)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module CassandraModel
|
2
|
+
class Record
|
3
|
+
def self.rdd
|
4
|
+
@spark_rdd ||= SparkCassandraHelper.cassandra_table(
|
5
|
+
table.connection.spark_context,
|
6
|
+
table.connection.config[:keyspace],
|
7
|
+
table_name)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.rdd_row_mapping
|
11
|
+
nil
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.count
|
15
|
+
rdd.count
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module CassandraModel
|
4
|
+
module Spark
|
5
|
+
class << self
|
6
|
+
def root
|
7
|
+
@gem_root ||= File.expand_path('../../..', __FILE__)
|
8
|
+
end
|
9
|
+
|
10
|
+
def home
|
11
|
+
@home ||= (ENV['SPARK_HOME'] || default_home)
|
12
|
+
end
|
13
|
+
|
14
|
+
def classpath
|
15
|
+
@classpath ||= (ENV['SPARK_CLASSPATH'] || default_classpath)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def default_classpath
|
21
|
+
File.expand_path('./lib/', home).tap do |path|
|
22
|
+
FileUtils.mkdir_p(path)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def default_home
|
27
|
+
File.expand_path('~/.cassandra_model_spark').tap do |path|
|
28
|
+
FileUtils.mkdir_p(path)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2015 Thomas RM Rogers
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'yaml'
|
18
|
+
require 'logger'
|
19
|
+
|
20
|
+
require 'concurrent'
|
21
|
+
require 'cassandra'
|
22
|
+
require 'active_support/all'
|
23
|
+
require 'active_support/core_ext/class/attribute_accessors'
|
24
|
+
require 'thomas_utils'
|
25
|
+
require 'batch_reactor'
|
26
|
+
require 'cassandra_model'
|
27
|
+
require 'rjb' unless RUBY_ENGINE == 'jruby' || CassandraModel.const_defined?('NO_BRIDGE')
|
28
|
+
require 'cassandra_model_spark/spark'
|
29
|
+
|
30
|
+
unless CassandraModel.const_defined?('NO_BRIDGE')
|
31
|
+
require 'cassandra_model_spark/java_bridge'
|
32
|
+
Dir["#{CassandraModel::Spark.classpath}/*.jar"].each { |file| require file }
|
33
|
+
initialize_java_engine
|
34
|
+
require 'cassandra_model_spark/java_classes'
|
35
|
+
end
|
36
|
+
|
37
|
+
require 'cassandra_model_spark/raw_connection'
|
38
|
+
require 'cassandra_model_spark/connection_cache'
|
39
|
+
require 'cassandra_model_spark/record'
|
40
|
+
require 'cassandra_model_spark/query_builder'
|
41
|
+
require 'cassandra_model_spark/data_frame'
|
42
|
+
require 'cassandra_model_spark/column_cast'
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cassandra_model_spark
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Thomas RM Rogers
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-12-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: cassandra_model
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.9.16
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.9.16
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: thomas_utils
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.1.16
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.1.16
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rjb
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.5.4
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.5.4
|
55
|
+
description: |-
|
56
|
+
Spark integration for cassandra_model.
|
57
|
+
Get high-performance data analytics with the ease of cassandra_model.
|
58
|
+
Inspired by the ruby-spark gem.
|
59
|
+
email: thomasrogers03@gmail.com
|
60
|
+
executables:
|
61
|
+
- cmodel-spark-build
|
62
|
+
- cmodel-spark-env.rb
|
63
|
+
- cmodel-spark-master
|
64
|
+
- cmodel-spark-slaves
|
65
|
+
- cmodel-spark-run-master
|
66
|
+
- cmodel-spark-run-slave
|
67
|
+
extensions: []
|
68
|
+
extra_rdoc_files: []
|
69
|
+
files:
|
70
|
+
- README.md
|
71
|
+
- bin/cmodel-spark-build
|
72
|
+
- bin/cmodel-spark-env.rb
|
73
|
+
- bin/cmodel-spark-master
|
74
|
+
- bin/cmodel-spark-run-master
|
75
|
+
- bin/cmodel-spark-run-master.sh
|
76
|
+
- bin/cmodel-spark-run-slave
|
77
|
+
- bin/cmodel-spark-run-slave.sh
|
78
|
+
- bin/cmodel-spark-slaves
|
79
|
+
- ext/scala_helper/bin/load-spark-env.sh
|
80
|
+
- ext/scala_helper/bin/spark-class
|
81
|
+
- ext/scala_helper/build.sbt
|
82
|
+
- ext/scala_helper/cassandra_helper.scala
|
83
|
+
- ext/scala_helper/data_type_helper.scala
|
84
|
+
- ext/scala_helper/marshal_loader.scala
|
85
|
+
- ext/scala_helper/marshal_row_mapping.scala
|
86
|
+
- ext/scala_helper/project/plugins.sbt
|
87
|
+
- ext/scala_helper/sbin/spark-config.sh
|
88
|
+
- ext/scala_helper/sbin/spark-daemon.sh
|
89
|
+
- ext/scala_helper/schema_builder.scala
|
90
|
+
- ext/scala_helper/worker.scala
|
91
|
+
- lib/cassandra_model_spark.rb
|
92
|
+
- lib/cassandra_model_spark/build.rb
|
93
|
+
- lib/cassandra_model_spark/column_cast.rb
|
94
|
+
- lib/cassandra_model_spark/connection_cache.rb
|
95
|
+
- lib/cassandra_model_spark/data_frame.rb
|
96
|
+
- lib/cassandra_model_spark/java_bridge.rb
|
97
|
+
- lib/cassandra_model_spark/java_classes.rb
|
98
|
+
- lib/cassandra_model_spark/launcher.rb
|
99
|
+
- lib/cassandra_model_spark/query_builder.rb
|
100
|
+
- lib/cassandra_model_spark/raw_connection.rb
|
101
|
+
- lib/cassandra_model_spark/record.rb
|
102
|
+
- lib/cassandra_model_spark/spark.rb
|
103
|
+
homepage: https://www.github.com/thomasrogers03/cassandra_model_spark
|
104
|
+
licenses:
|
105
|
+
- Apache License 2.0
|
106
|
+
metadata: {}
|
107
|
+
post_install_message:
|
108
|
+
rdoc_options: []
|
109
|
+
require_paths:
|
110
|
+
- lib
|
111
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - '>='
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '0'
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - '>='
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
requirements: []
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 2.4.8
|
124
|
+
signing_key:
|
125
|
+
specification_version: 4
|
126
|
+
summary: Spark integration for cassandra_model
|
127
|
+
test_files: []
|