terastream 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +84 -0
- data/Rakefile +1 -0
- data/bin/setup +7 -0
- data/bin/terasql +50 -0
- data/lib/terastream.rb +35 -0
- data/lib/terastream/cli.rb +83 -0
- data/lib/terastream/config.rb +12 -0
- data/lib/terastream/connection.rb +53 -0
- data/lib/terastream/errors.rb +6 -0
- data/lib/terastream/jars/readme.txt +1279 -0
- data/lib/terastream/jars/tdgssconfig.jar +0 -0
- data/lib/terastream/jars/terajdbc4.jar +0 -0
- data/lib/terastream/middleware/formatters/csv_builder.rb +21 -0
- data/lib/terastream/middleware/formatters/json_builder.rb +17 -0
- data/lib/terastream/middleware/output/kafka.rb +42 -0
- data/lib/terastream/middleware/output/kinesis.rb +41 -0
- data/lib/terastream/middleware/output/redis.rb +25 -0
- data/lib/terastream/middleware/output/s3.rb +49 -0
- data/lib/terastream/query.rb +31 -0
- data/lib/terastream/query/base_formatter.rb +13 -0
- data/lib/terastream/query/records_builder.rb +31 -0
- data/lib/terastream/query/result_set.rb +52 -0
- data/lib/terastream/query/result_set_metadata.rb +37 -0
- data/lib/terastream/query/type_map.rb +13 -0
- data/lib/terastream/version.rb +3 -0
- data/terastream.gemspec +41 -0
- metadata +175 -0
| Binary file | 
| Binary file | 
| @@ -0,0 +1,21 @@ | |
| 1 | 
            +
            require "csv"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Terastream
         | 
| 4 | 
            +
              class CSVBuilder < Terastream::Query::BaseFormatter
         | 
| 5 | 
            +
                def default_output(&block)
         | 
| 6 | 
            +
                  CSV &block
         | 
| 7 | 
            +
                end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def init
         | 
| 10 | 
            +
                  @app.headers
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def call(&block)
         | 
| 14 | 
            +
                  row_data = []
         | 
| 15 | 
            +
                  @app.headers.each_with_index do |header, index|
         | 
| 16 | 
            +
                    row_data << @app.record_set.getString(index + 1)
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                  row_data
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
            end
         | 
| @@ -0,0 +1,17 @@ | |
| 1 | 
            +
            require "json"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Terastream
         | 
| 4 | 
            +
              class JsonBuilder < Terastream::Query::BaseFormatter
         | 
| 5 | 
            +
                def call(&block)
         | 
| 6 | 
            +
                  row_data = {}
         | 
| 7 | 
            +
                  @app.headers.each_with_index do |header, index|
         | 
| 8 | 
            +
                    row_data[header] = @app.record_set.getString(index + 1)
         | 
| 9 | 
            +
                  end
         | 
| 10 | 
            +
                  if block_given?
         | 
| 11 | 
            +
                    yield row_data.to_json
         | 
| 12 | 
            +
                  else
         | 
| 13 | 
            +
                    row_data.to_json
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
            end
         | 
| @@ -0,0 +1,42 @@ | |
| 1 | 
            +
            require 'poseidon'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Terastream
         | 
| 4 | 
            +
              module Output
         | 
| 5 | 
            +
                class Kafka
         | 
| 6 | 
            +
                  def initialize(options = {})
         | 
| 7 | 
            +
                    @options = options
         | 
| 8 | 
            +
                    @connection = Poseidon::Producer.new([connection], producer)
         | 
| 9 | 
            +
                  end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  def <<(record)
         | 
| 12 | 
            +
                    @connection.send_messages [build_message(record)]
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  private
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  def build_message(record)
         | 
| 18 | 
            +
                    Poseidon::MessageToSend.new(topic, record)
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  def connection
         | 
| 22 | 
            +
                    "#{host}:#{port}"
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  def host
         | 
| 26 | 
            +
                    @options["host"] || "localhost"
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  def port
         | 
| 30 | 
            +
                    @options["port"] || "9092"
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  def producer
         | 
| 34 | 
            +
                    @options["producer"] || "terastream-producer"
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  def topic
         | 
| 38 | 
            +
                    @options["topic"] || "terastream"
         | 
| 39 | 
            +
                  end
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
              end
         | 
| 42 | 
            +
            end
         | 
| @@ -0,0 +1,41 @@ | |
| 1 | 
            +
            require "aws-sdk"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Terastream
         | 
| 4 | 
            +
              module Output
         | 
| 5 | 
            +
                class Kinesis
         | 
| 6 | 
            +
                  def initialize(options = {})
         | 
| 7 | 
            +
                    @options = options || {}
         | 
| 8 | 
            +
                    @connection = Aws::Kinesis::Client.new
         | 
| 9 | 
            +
                    @max_retries = 4
         | 
| 10 | 
            +
                  end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  def <<(record)
         | 
| 13 | 
            +
                    put_record(record)
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  private
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  def put_record(data)
         | 
| 19 | 
            +
                    tries = @max_retries
         | 
| 20 | 
            +
                    begin
         | 
| 21 | 
            +
                      @connection.put_record(
         | 
| 22 | 
            +
                        stream_name: @options[:stream_name],
         | 
| 23 | 
            +
                        data: data,
         | 
| 24 | 
            +
                        partition_key: partition_key
         | 
| 25 | 
            +
                      )
         | 
| 26 | 
            +
                    rescue => e
         | 
| 27 | 
            +
                      tries -= 1
         | 
| 28 | 
            +
                      retry if tries > 0
         | 
| 29 | 
            +
                    end
         | 
| 30 | 
            +
                  end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                  def region
         | 
| 33 | 
            +
                    options[:region] || ENV["region"]
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  def partition_key
         | 
| 37 | 
            +
                    Time.now
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
              end
         | 
| 41 | 
            +
            end
         | 
| @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            require 'redis'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Terastream
         | 
| 4 | 
            +
              module Output
         | 
| 5 | 
            +
                class Redis
         | 
| 6 | 
            +
                  def initialize(options = {})
         | 
| 7 | 
            +
                    @connection = ::Redis.new
         | 
| 8 | 
            +
                  end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                  def <<(record)
         | 
| 11 | 
            +
                    @connection.publish(:messages, record)
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  private
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  def host
         | 
| 17 | 
            +
                    @options["host"] || "localhost"
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  def port
         | 
| 21 | 
            +
                    @options["port"] || "9092"
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
            end
         | 
| @@ -0,0 +1,49 @@ | |
| 1 | 
            +
            require "aws-sdk"
         | 
| 2 | 
            +
            require "securerandom"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module Terastream
         | 
| 5 | 
            +
              module Output
         | 
| 6 | 
            +
                class S3
         | 
| 7 | 
            +
                  attr_accessor :options
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  def initialize(options = {})
         | 
| 10 | 
            +
                    @options = options
         | 
| 11 | 
            +
                    @temp = ""
         | 
| 12 | 
            +
                    @connection = Aws::S3::Client.new(region: region)
         | 
| 13 | 
            +
                    @records = []
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  def <<(record)
         | 
| 17 | 
            +
                    @temp += record.is_a?(Hash) || record.is_a?(String) ? as_json(record) : as_csv(record)
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  def complete!
         | 
| 21 | 
            +
                    tries = @max_retries
         | 
| 22 | 
            +
                    begin
         | 
| 23 | 
            +
                      @connection.put_object(
         | 
| 24 | 
            +
                        key: options[:key],
         | 
| 25 | 
            +
                        bucket: options[:bucket],
         | 
| 26 | 
            +
                        body: @temp
         | 
| 27 | 
            +
                      )
         | 
| 28 | 
            +
                    rescue => e
         | 
| 29 | 
            +
                      retries -= 1
         | 
| 30 | 
            +
                      retry if tries > 0
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  private
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  def as_json(record)
         | 
| 37 | 
            +
                    "#{record.to_json}\n"
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                  def as_csv(record)
         | 
| 41 | 
            +
                    CSV.generate{ |csv| csv << record }
         | 
| 42 | 
            +
                  end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  def region
         | 
| 45 | 
            +
                    options[:region] || ENV["region"] || "us-east-1"
         | 
| 46 | 
            +
                  end
         | 
| 47 | 
            +
                end
         | 
| 48 | 
            +
              end
         | 
| 49 | 
            +
            end
         | 
| @@ -0,0 +1,31 @@ | |
| 1 | 
            +
            module Terastream
         | 
| 2 | 
            +
              class Query
         | 
| 3 | 
            +
                attr_accessor :result_set, :statement
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                class << self
         | 
| 6 | 
            +
                  def build(connection)
         | 
| 7 | 
            +
                    new(connection)
         | 
| 8 | 
            +
                  end
         | 
| 9 | 
            +
                end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                def initialize(connection)
         | 
| 12 | 
            +
                  @connection = connection
         | 
| 13 | 
            +
                  @statement = connection.create_statement
         | 
| 14 | 
            +
                  @statement.setQueryTimeout(connection.config.timeout)
         | 
| 15 | 
            +
                  @executed = false
         | 
| 16 | 
            +
                  @result_set = []
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                def executed?
         | 
| 20 | 
            +
                  @executed
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def execute(sql)
         | 
| 24 | 
            +
                  results = ResultSet.new(statement.execute_query(sql), @connection.formatter, @connection.output)
         | 
| 25 | 
            +
                  @executed = true
         | 
| 26 | 
            +
                  results
         | 
| 27 | 
            +
                rescue  => e
         | 
| 28 | 
            +
                  raise Terastream::Errors::QueryError.new("Database exception: #{e.message}")
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
            end
         | 
| @@ -0,0 +1,31 @@ | |
| 1 | 
            +
            require "ostruct"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Terastream
         | 
| 4 | 
            +
              class Query
         | 
| 5 | 
            +
                class RecordsBuilder < Terastream::Query::BaseFormatter
         | 
| 6 | 
            +
                  def call
         | 
| 7 | 
            +
                    row_data = {}
         | 
| 8 | 
            +
                    @app.headers.each_with_index do |header, index|
         | 
| 9 | 
            +
                      row_data[header] = @app.record_set.getString(index + 1)
         | 
| 10 | 
            +
                    end
         | 
| 11 | 
            +
                    Record.new(row_data)
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                class Record
         | 
| 16 | 
            +
                  attr_reader :data
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  def initialize(data = {})
         | 
| 19 | 
            +
                    @data = OpenStruct.new(data)
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                  def method_missing(name, *args, &block)
         | 
| 23 | 
            +
                    if data.respond_to?(name)
         | 
| 24 | 
            +
                      data.send(:name, *args, &block)
         | 
| 25 | 
            +
                    else
         | 
| 26 | 
            +
                      super
         | 
| 27 | 
            +
                    end
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
            end
         | 
| @@ -0,0 +1,52 @@ | |
| 1 | 
            +
            require "forwardable"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Terastream
         | 
| 4 | 
            +
              class Query
         | 
| 5 | 
            +
                class ResultSet
         | 
| 6 | 
            +
                  include Enumerable
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  extend Forwardable
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                  attr_accessor :record_set, :headers, :formatter, :results, :output
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  def_delegators :@metadata, :column_count, :columns, :headers, :types
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  def initialize(record_set, formatter = Terastream::JsonBuilder.new, destination = nil)
         | 
| 15 | 
            +
                    @record_set = record_set
         | 
| 16 | 
            +
                    @metadata = ResultSetMetadata.new(record_set)
         | 
| 17 | 
            +
                    @formatter = formatter
         | 
| 18 | 
            +
                    @output = destination || default_output
         | 
| 19 | 
            +
                    @_results = nil
         | 
| 20 | 
            +
                    @formatter.app(self)
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                  def results
         | 
| 24 | 
            +
                    @_results ||= build
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  def build
         | 
| 28 | 
            +
                    if formatter.respond_to?(:init)
         | 
| 29 | 
            +
                      @output << formatter.init
         | 
| 30 | 
            +
                    end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                    while row = @record_set.next
         | 
| 33 | 
            +
                      @output << formatter.call
         | 
| 34 | 
            +
                    end
         | 
| 35 | 
            +
                    @output.send(:complete!) if @output.respond_to?(:complete!)
         | 
| 36 | 
            +
                    @output if @output.instance_of?(Array)
         | 
| 37 | 
            +
                  end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                  def each(&block)
         | 
| 40 | 
            +
                    results.each do |result|
         | 
| 41 | 
            +
                      yield result
         | 
| 42 | 
            +
                    end
         | 
| 43 | 
            +
                  end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                  private
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  def default_output
         | 
| 48 | 
            +
                    formatter.respond_to?(:default_output) ? formatter.default_output : []
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
            end
         | 
| @@ -0,0 +1,37 @@ | |
| 1 | 
            +
            module Terastream
         | 
| 2 | 
            +
              class Query
         | 
| 3 | 
            +
                class ResultSetMetadata
         | 
| 4 | 
            +
                  def initialize(record_set)
         | 
| 5 | 
            +
                    @metadata = record_set.getMetaData()
         | 
| 6 | 
            +
                  end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  def column_count
         | 
| 9 | 
            +
                    @column_count ||= @metadata.getColumnCount()
         | 
| 10 | 
            +
                  end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  def columns
         | 
| 13 | 
            +
                    @columns ||= extract_metadata
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  def headers
         | 
| 17 | 
            +
                    @heders ||= columns.map{ |col| col[:name] }
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  def types
         | 
| 21 | 
            +
                    @types ||= columns.map{ |col| col[:type] }
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  private
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                  def extract_metadata
         | 
| 27 | 
            +
                    (1..column_count).inject([]) do |data, index|
         | 
| 28 | 
            +
                      data << {
         | 
| 29 | 
            +
                        name: @metadata.getColumnName(index),
         | 
| 30 | 
            +
                        type: @metadata.getColumnType(index)
         | 
| 31 | 
            +
                      }
         | 
| 32 | 
            +
                      data
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
            end
         | 
    
        data/terastream.gemspec
    ADDED
    
    | @@ -0,0 +1,41 @@ | |
| 1 | 
            +
            # coding: utf-8
         | 
| 2 | 
            +
            lib = File.expand_path('../lib', __FILE__)
         | 
| 3 | 
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         | 
| 4 | 
            +
            require 'terastream/version'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            Gem::Specification.new do |spec|
         | 
| 7 | 
            +
              spec.name          = "terastream"
         | 
| 8 | 
            +
              spec.version       = Terastream::VERSION
         | 
| 9 | 
            +
              spec.authors       = ["Keene, Ramin"]
         | 
| 10 | 
            +
              spec.email         = ["Ramin.Keene@nordstrom.com"]
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              spec.summary       = %q{ Stream query results out of teradata data warehouse}
         | 
| 13 | 
            +
              spec.description   = %q{ Utility gem to stream teradata results out of the teradata data warehouse, supporting adaptors for various formats and destinations}
         | 
| 14 | 
            +
              spec.homepage      = "http://github.com/Nordstrom/terastream"
         | 
| 15 | 
            +
              spec.license       = "MIT"
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
         | 
| 18 | 
            +
              # delete this section to allow pushing this gem to any host.
         | 
| 19 | 
            +
              if spec.respond_to?(:metadata)
         | 
| 20 | 
            +
                spec.metadata['allowed_push_host'] = "https://rubygems.org"
         | 
| 21 | 
            +
              else
         | 
| 22 | 
            +
                raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
         | 
| 23 | 
            +
              end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              spec.platform = 'java'
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
         | 
| 28 | 
            +
              spec.bindir        = "bin"
         | 
| 29 | 
            +
              spec.executables  << 'terasql'
         | 
| 30 | 
            +
             | 
| 31 | 
            +
              spec.require_paths = ["lib"]
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              spec.add_dependency "aws-sdk", "~> 2"
         | 
| 34 | 
            +
              spec.add_dependency "json", "~> 1.8.0"
         | 
| 35 | 
            +
              spec.add_dependency "poseidon", "~> 0.0.5"
         | 
| 36 | 
            +
              spec.add_dependency "redis", "~> 3.2.1"
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              spec.add_development_dependency "bundler", "~> 1.9"
         | 
| 39 | 
            +
              spec.add_development_dependency "rake", "~> 10.0"
         | 
| 40 | 
            +
              spec.add_development_dependency "rspec", "~> 3.3.0"
         | 
| 41 | 
            +
            end
         |