terastream 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +84 -0
- data/Rakefile +1 -0
- data/bin/setup +7 -0
- data/bin/terasql +50 -0
- data/lib/terastream.rb +35 -0
- data/lib/terastream/cli.rb +83 -0
- data/lib/terastream/config.rb +12 -0
- data/lib/terastream/connection.rb +53 -0
- data/lib/terastream/errors.rb +6 -0
- data/lib/terastream/jars/readme.txt +1279 -0
- data/lib/terastream/jars/tdgssconfig.jar +0 -0
- data/lib/terastream/jars/terajdbc4.jar +0 -0
- data/lib/terastream/middleware/formatters/csv_builder.rb +21 -0
- data/lib/terastream/middleware/formatters/json_builder.rb +17 -0
- data/lib/terastream/middleware/output/kafka.rb +42 -0
- data/lib/terastream/middleware/output/kinesis.rb +41 -0
- data/lib/terastream/middleware/output/redis.rb +25 -0
- data/lib/terastream/middleware/output/s3.rb +49 -0
- data/lib/terastream/query.rb +31 -0
- data/lib/terastream/query/base_formatter.rb +13 -0
- data/lib/terastream/query/records_builder.rb +31 -0
- data/lib/terastream/query/result_set.rb +52 -0
- data/lib/terastream/query/result_set_metadata.rb +37 -0
- data/lib/terastream/query/type_map.rb +13 -0
- data/lib/terastream/version.rb +3 -0
- data/terastream.gemspec +41 -0
- metadata +175 -0
Binary file
|
Binary file
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
module Terastream
|
4
|
+
class CSVBuilder < Terastream::Query::BaseFormatter
|
5
|
+
def default_output(&block)
|
6
|
+
CSV &block
|
7
|
+
end
|
8
|
+
|
9
|
+
def init
|
10
|
+
@app.headers
|
11
|
+
end
|
12
|
+
|
13
|
+
def call(&block)
|
14
|
+
row_data = []
|
15
|
+
@app.headers.each_with_index do |header, index|
|
16
|
+
row_data << @app.record_set.getString(index + 1)
|
17
|
+
end
|
18
|
+
row_data
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "json"
|
2
|
+
|
3
|
+
module Terastream
|
4
|
+
class JsonBuilder < Terastream::Query::BaseFormatter
|
5
|
+
def call(&block)
|
6
|
+
row_data = {}
|
7
|
+
@app.headers.each_with_index do |header, index|
|
8
|
+
row_data[header] = @app.record_set.getString(index + 1)
|
9
|
+
end
|
10
|
+
if block_given?
|
11
|
+
yield row_data.to_json
|
12
|
+
else
|
13
|
+
row_data.to_json
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'poseidon'
|
2
|
+
|
3
|
+
module Terastream
|
4
|
+
module Output
|
5
|
+
class Kafka
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
@connection = Poseidon::Producer.new([connection], producer)
|
9
|
+
end
|
10
|
+
|
11
|
+
def <<(record)
|
12
|
+
@connection.send_messages [build_message(record)]
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def build_message(record)
|
18
|
+
Poseidon::MessageToSend.new(topic, record)
|
19
|
+
end
|
20
|
+
|
21
|
+
def connection
|
22
|
+
"#{host}:#{port}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def host
|
26
|
+
@options["host"] || "localhost"
|
27
|
+
end
|
28
|
+
|
29
|
+
def port
|
30
|
+
@options["port"] || "9092"
|
31
|
+
end
|
32
|
+
|
33
|
+
def producer
|
34
|
+
@options["producer"] || "terastream-producer"
|
35
|
+
end
|
36
|
+
|
37
|
+
def topic
|
38
|
+
@options["topic"] || "terastream"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require "aws-sdk"
|
2
|
+
|
3
|
+
module Terastream
|
4
|
+
module Output
|
5
|
+
class Kinesis
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options || {}
|
8
|
+
@connection = Aws::Kinesis::Client.new
|
9
|
+
@max_retries = 4
|
10
|
+
end
|
11
|
+
|
12
|
+
def <<(record)
|
13
|
+
put_record(record)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def put_record(data)
|
19
|
+
tries = @max_retries
|
20
|
+
begin
|
21
|
+
@connection.put_record(
|
22
|
+
stream_name: @options[:stream_name],
|
23
|
+
data: data,
|
24
|
+
partition_key: partition_key
|
25
|
+
)
|
26
|
+
rescue => e
|
27
|
+
tries -= 1
|
28
|
+
retry if tries > 0
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def region
|
33
|
+
options[:region] || ENV["region"]
|
34
|
+
end
|
35
|
+
|
36
|
+
def partition_key
|
37
|
+
Time.now
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'redis'
|
2
|
+
|
3
|
+
module Terastream
|
4
|
+
module Output
|
5
|
+
class Redis
|
6
|
+
def initialize(options = {})
|
7
|
+
@connection = ::Redis.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(record)
|
11
|
+
@connection.publish(:messages, record)
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def host
|
17
|
+
@options["host"] || "localhost"
|
18
|
+
end
|
19
|
+
|
20
|
+
def port
|
21
|
+
@options["port"] || "9092"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require "aws-sdk"
|
2
|
+
require "securerandom"
|
3
|
+
|
4
|
+
module Terastream
|
5
|
+
module Output
|
6
|
+
class S3
|
7
|
+
attr_accessor :options
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
@temp = ""
|
12
|
+
@connection = Aws::S3::Client.new(region: region)
|
13
|
+
@records = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def <<(record)
|
17
|
+
@temp += record.is_a?(Hash) || record.is_a?(String) ? as_json(record) : as_csv(record)
|
18
|
+
end
|
19
|
+
|
20
|
+
def complete!
|
21
|
+
tries = @max_retries
|
22
|
+
begin
|
23
|
+
@connection.put_object(
|
24
|
+
key: options[:key],
|
25
|
+
bucket: options[:bucket],
|
26
|
+
body: @temp
|
27
|
+
)
|
28
|
+
rescue => e
|
29
|
+
retries -= 1
|
30
|
+
retry if tries > 0
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def as_json(record)
|
37
|
+
"#{record.to_json}\n"
|
38
|
+
end
|
39
|
+
|
40
|
+
def as_csv(record)
|
41
|
+
CSV.generate{ |csv| csv << record }
|
42
|
+
end
|
43
|
+
|
44
|
+
def region
|
45
|
+
options[:region] || ENV["region"] || "us-east-1"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Terastream
|
2
|
+
class Query
|
3
|
+
attr_accessor :result_set, :statement
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def build(connection)
|
7
|
+
new(connection)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(connection)
|
12
|
+
@connection = connection
|
13
|
+
@statement = connection.create_statement
|
14
|
+
@statement.setQueryTimeout(connection.config.timeout)
|
15
|
+
@executed = false
|
16
|
+
@result_set = []
|
17
|
+
end
|
18
|
+
|
19
|
+
def executed?
|
20
|
+
@executed
|
21
|
+
end
|
22
|
+
|
23
|
+
def execute(sql)
|
24
|
+
results = ResultSet.new(statement.execute_query(sql), @connection.formatter, @connection.output)
|
25
|
+
@executed = true
|
26
|
+
results
|
27
|
+
rescue => e
|
28
|
+
raise Terastream::Errors::QueryError.new("Database exception: #{e.message}")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "ostruct"
|
2
|
+
|
3
|
+
module Terastream
|
4
|
+
class Query
|
5
|
+
class RecordsBuilder < Terastream::Query::BaseFormatter
|
6
|
+
def call
|
7
|
+
row_data = {}
|
8
|
+
@app.headers.each_with_index do |header, index|
|
9
|
+
row_data[header] = @app.record_set.getString(index + 1)
|
10
|
+
end
|
11
|
+
Record.new(row_data)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Record
|
16
|
+
attr_reader :data
|
17
|
+
|
18
|
+
def initialize(data = {})
|
19
|
+
@data = OpenStruct.new(data)
|
20
|
+
end
|
21
|
+
|
22
|
+
def method_missing(name, *args, &block)
|
23
|
+
if data.respond_to?(name)
|
24
|
+
data.send(:name, *args, &block)
|
25
|
+
else
|
26
|
+
super
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "forwardable"
|
2
|
+
|
3
|
+
module Terastream
|
4
|
+
class Query
|
5
|
+
class ResultSet
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
attr_accessor :record_set, :headers, :formatter, :results, :output
|
11
|
+
|
12
|
+
def_delegators :@metadata, :column_count, :columns, :headers, :types
|
13
|
+
|
14
|
+
def initialize(record_set, formatter = Terastream::JsonBuilder.new, destination = nil)
|
15
|
+
@record_set = record_set
|
16
|
+
@metadata = ResultSetMetadata.new(record_set)
|
17
|
+
@formatter = formatter
|
18
|
+
@output = destination || default_output
|
19
|
+
@_results = nil
|
20
|
+
@formatter.app(self)
|
21
|
+
end
|
22
|
+
|
23
|
+
def results
|
24
|
+
@_results ||= build
|
25
|
+
end
|
26
|
+
|
27
|
+
def build
|
28
|
+
if formatter.respond_to?(:init)
|
29
|
+
@output << formatter.init
|
30
|
+
end
|
31
|
+
|
32
|
+
while row = @record_set.next
|
33
|
+
@output << formatter.call
|
34
|
+
end
|
35
|
+
@output.send(:complete!) if @output.respond_to?(:complete!)
|
36
|
+
@output if @output.instance_of?(Array)
|
37
|
+
end
|
38
|
+
|
39
|
+
def each(&block)
|
40
|
+
results.each do |result|
|
41
|
+
yield result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def default_output
|
48
|
+
formatter.respond_to?(:default_output) ? formatter.default_output : []
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Terastream
|
2
|
+
class Query
|
3
|
+
class ResultSetMetadata
|
4
|
+
def initialize(record_set)
|
5
|
+
@metadata = record_set.getMetaData()
|
6
|
+
end
|
7
|
+
|
8
|
+
def column_count
|
9
|
+
@column_count ||= @metadata.getColumnCount()
|
10
|
+
end
|
11
|
+
|
12
|
+
def columns
|
13
|
+
@columns ||= extract_metadata
|
14
|
+
end
|
15
|
+
|
16
|
+
def headers
|
17
|
+
@heders ||= columns.map{ |col| col[:name] }
|
18
|
+
end
|
19
|
+
|
20
|
+
def types
|
21
|
+
@types ||= columns.map{ |col| col[:type] }
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def extract_metadata
|
27
|
+
(1..column_count).inject([]) do |data, index|
|
28
|
+
data << {
|
29
|
+
name: @metadata.getColumnName(index),
|
30
|
+
type: @metadata.getColumnType(index)
|
31
|
+
}
|
32
|
+
data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/terastream.gemspec
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'terastream/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "terastream"
|
8
|
+
spec.version = Terastream::VERSION
|
9
|
+
spec.authors = ["Keene, Ramin"]
|
10
|
+
spec.email = ["Ramin.Keene@nordstrom.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{ Stream query results out of teradata data warehouse}
|
13
|
+
spec.description = %q{ Utility gem to stream teradata results out of the teradata data warehouse, supporting adaptors for various formats and destinations}
|
14
|
+
spec.homepage = "http://github.com/Nordstrom/terastream"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
18
|
+
# delete this section to allow pushing this gem to any host.
|
19
|
+
if spec.respond_to?(:metadata)
|
20
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
21
|
+
else
|
22
|
+
raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
23
|
+
end
|
24
|
+
|
25
|
+
spec.platform = 'java'
|
26
|
+
|
27
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
28
|
+
spec.bindir = "bin"
|
29
|
+
spec.executables << 'terasql'
|
30
|
+
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_dependency "aws-sdk", "~> 2"
|
34
|
+
spec.add_dependency "json", "~> 1.8.0"
|
35
|
+
spec.add_dependency "poseidon", "~> 0.0.5"
|
36
|
+
spec.add_dependency "redis", "~> 3.2.1"
|
37
|
+
|
38
|
+
spec.add_development_dependency "bundler", "~> 1.9"
|
39
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
40
|
+
spec.add_development_dependency "rspec", "~> 3.3.0"
|
41
|
+
end
|