redtrack 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +173 -0
- data/Rakefile +2 -0
- data/lib/redtrack.rb +16 -0
- data/lib/redtrack_client.rb +286 -0
- data/lib/redtrack_datatypes.rb +175 -0
- data/lib/redtrack_kinesisclient.rb +238 -0
- data/lib/redtrack_loader.rb +650 -0
- data/lib/redtrack_local_file_stream.rb +126 -0
- data/redtrack.gemspec +17 -0
- metadata +99 -0
@@ -0,0 +1,126 @@
|
|
1
|
+
# The FileClient provides an application interface to a file-based broker for redshift data
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014 RedHotLabs, Inc.
|
4
|
+
# Licensed under The MIT License
|
5
|
+
|
6
|
+
module RedTrack
|
7
|
+
class FileClient
|
8
|
+
|
9
|
+
@options=nil
|
10
|
+
|
11
|
+
# Setup class variables for kinesis access
|
12
|
+
#
|
13
|
+
# @param [Hash] options Nothing expected
|
14
|
+
# @return [Boolean] Success
|
15
|
+
def initialize(options)
|
16
|
+
|
17
|
+
#check if log/ exists and create it if it doesn't
|
18
|
+
if File.directory?("log") == false
|
19
|
+
Dir.mkdir "log"
|
20
|
+
end
|
21
|
+
|
22
|
+
@options = options
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get Location of the stream
|
26
|
+
#
|
27
|
+
# @param [String] stream_name The name of the stream
|
28
|
+
# @return [String] Url/file location for the stream
|
29
|
+
def stream_location(stream_name)
|
30
|
+
# V1 of data streaming - use a local file
|
31
|
+
return "log/#{stream_name}"
|
32
|
+
end
|
33
|
+
|
34
|
+
# Whether or not the stream has data
|
35
|
+
#
|
36
|
+
# @param [String] stream_name The name of the stream
|
37
|
+
# @return [Boolean] Whether or not the stream has data
|
38
|
+
def stream_has_data(stream_name)
|
39
|
+
# V1 of data streaming - use a local file
|
40
|
+
return File.exist?(self.stream_location(stream_name))
|
41
|
+
end
|
42
|
+
|
43
|
+
# Write data to a stream
|
44
|
+
#
|
45
|
+
# @param [String] stream_name The name of the stream
|
46
|
+
# @param [String] data_string String of data to write
|
47
|
+
# @param [String] partition_key Ignored
|
48
|
+
# @return [Boolean] True - the write to the stream succeeded
|
49
|
+
def stream_write(stream_name,data_string,partition_key=nil)
|
50
|
+
|
51
|
+
# V1 of data streaming - use a local file: open, write, close
|
52
|
+
stream=File.open(self.stream_location(stream_name),"a")
|
53
|
+
stream.puts data_string + "\n"
|
54
|
+
stream.close
|
55
|
+
return true
|
56
|
+
end
|
57
|
+
|
58
|
+
# Fake shard description for file, use hostname for shard_name
|
59
|
+
#
|
60
|
+
# @param [String] stream_name The name of the kinesis stream
|
61
|
+
def get_shard_descriptions(stream_name)
|
62
|
+
return [{
|
63
|
+
:shard_id => `hostname`.tr("\n","")
|
64
|
+
}]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Get the shard iterator given a checkpointed sequence number. If no checkpoint, start to read from start of shard
|
68
|
+
#
|
69
|
+
# @param [String] stream_name The name of the stream to get a shard iterator for
|
70
|
+
# @param [Hash] shard_description Result from describe stream request
|
71
|
+
# @param [String] starting_sequence_number The sequence number to get a shard iterator for, if doesn't exist, get one for start of shard
|
72
|
+
# @return [String] The shard iterator
|
73
|
+
def get_shard_iterator_from_sequence_number(stream_name,shard_description,starting_sequence_number=nil)
|
74
|
+
return self.stream_location(stream_name)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Ream from kinesis shard into a file
|
78
|
+
#
|
79
|
+
# @param [String] shard_iterator The shard iterator to start reading from - result of get_shard_iterator- http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html
|
80
|
+
# @param [String] files Array of files to read into
|
81
|
+
# @param [Hash] options Optional. Can specify :max_records, :max_requests, :max_consecutive_requests_without_data, :backoff_no_data
|
82
|
+
# @return [Hash] Hash of # of records read and the sequence number of the last read record, number of records, and shard iterator
|
83
|
+
def stream_read_from_shard_iterator_into_files(shard_iterator, files, options={})
|
84
|
+
|
85
|
+
stream_file_name = shard_iterator
|
86
|
+
|
87
|
+
records = 0
|
88
|
+
num_files = files.length
|
89
|
+
|
90
|
+
fake_sequence_number = Time.now.to_i
|
91
|
+
|
92
|
+
if File.exist?(stream_file_name)
|
93
|
+
FileUtils.mv(stream_file_name, "#{stream_file_name}.#{fake_sequence_number}")
|
94
|
+
|
95
|
+
stream_file = File.open("#{stream_file_name}.#{fake_sequence_number}",'r')
|
96
|
+
while(line = stream_file.gets) != nil
|
97
|
+
files[records % num_files].puts line + "\n"
|
98
|
+
records += 1
|
99
|
+
end
|
100
|
+
|
101
|
+
result = {
|
102
|
+
:starting_sequence_number => fake_sequence_number,
|
103
|
+
:ending_sequence_number => fake_sequence_number,
|
104
|
+
:records => records
|
105
|
+
}
|
106
|
+
else
|
107
|
+
result = {
|
108
|
+
:records => 0,
|
109
|
+
:starting_sequence_number => '',
|
110
|
+
:ending_sequence_number => ''
|
111
|
+
}
|
112
|
+
end
|
113
|
+
return result
|
114
|
+
end
|
115
|
+
|
116
|
+
# Name of the stream in the data broker (This is a Kinesis stream name)
|
117
|
+
#
|
118
|
+
# @param [String] redshift_table Name of the redshift table
|
119
|
+
# @return [String] Name of the stream in Kinesis
|
120
|
+
def stream_name(redshift_table)
|
121
|
+
result= @options[:redshift_cluster_name] + '.' + @options[:redshift_dbname] + ".#{redshift_table}"
|
122
|
+
return result
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
end
|
data/redtrack.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'redtrack'
|
3
|
+
s.version = '0.0.1'
|
4
|
+
s.date = '2014-11-06'
|
5
|
+
s.summary = 'Real-time event tracking in AWS.'
|
6
|
+
s.description = 'System for real time event tracking & loading infrastructure for AWS. Utilizes Kinesis as a data broker and Redshift as a data warehouse.'
|
7
|
+
s.authors = ['Luke Rajlich']
|
8
|
+
s.email = 'lrajlich@gmail.com'
|
9
|
+
s.files = `git ls-files`.split("\n")
|
10
|
+
s.require_paths = ["lib"]
|
11
|
+
s.homepage = 'https://github.com/redhotlabs/redtrack'
|
12
|
+
s.license = 'MIT'
|
13
|
+
|
14
|
+
s.add_dependency 'aws-sdk','>= 1.58.0'
|
15
|
+
s.add_dependency 'pg','>= 0.17.1'
|
16
|
+
s.add_development_dependency 'rspec','>= 3.1.0'
|
17
|
+
end
|
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: redtrack
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Luke Rajlich
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: aws-sdk
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.58.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.58.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pg
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.17.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.17.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 3.1.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 3.1.0
|
55
|
+
description: System for real time event tracking & loading infrastructure for AWS.
|
56
|
+
Utilizes Kinesis as a data broker and Redshift as a data warehouse.
|
57
|
+
email: lrajlich@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- lib/redtrack.rb
|
68
|
+
- lib/redtrack_client.rb
|
69
|
+
- lib/redtrack_datatypes.rb
|
70
|
+
- lib/redtrack_kinesisclient.rb
|
71
|
+
- lib/redtrack_loader.rb
|
72
|
+
- lib/redtrack_local_file_stream.rb
|
73
|
+
- redtrack.gemspec
|
74
|
+
homepage: https://github.com/redhotlabs/redtrack
|
75
|
+
licenses:
|
76
|
+
- MIT
|
77
|
+
metadata: {}
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options: []
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
requirements: []
|
93
|
+
rubyforge_project:
|
94
|
+
rubygems_version: 2.2.2
|
95
|
+
signing_key:
|
96
|
+
specification_version: 4
|
97
|
+
summary: Real-time event tracking in AWS.
|
98
|
+
test_files: []
|
99
|
+
has_rdoc:
|