redtrack 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ # The FileClient provides an application interface to a file-based broker for redshift data
2
+ #
3
+ # Copyright (c) 2014 RedHotLabs, Inc.
4
+ # Licensed under The MIT License
5
+
6
+ module RedTrack
7
+ class FileClient
8
+
9
+ @options=nil
10
+
11
+ # Setup class variables for kinesis access
12
+ #
13
+ # @param [Hash] options Nothing expected
14
+ # @return [Boolean] Success
15
+ def initialize(options)
16
+
17
+ #check if log/ exists and create it if it doesn't
18
+ if File.directory?("log") == false
19
+ Dir.mkdir "log"
20
+ end
21
+
22
+ @options = options
23
+ end
24
+
25
+ # Get Location of the stream
26
+ #
27
+ # @param [String] stream_name The name of the stream
28
+ # @return [String] Url/file location for the stream
29
+ def stream_location(stream_name)
30
+ # V1 of data streaming - use a local file
31
+ return "log/#{stream_name}"
32
+ end
33
+
34
+ # Whether or not the stream has data
35
+ #
36
+ # @param [String] stream_name The name of the stream
37
+ # @return [Boolean] Whether or not the stream has data
38
+ def stream_has_data(stream_name)
39
+ # V1 of data streaming - use a local file
40
+ return File.exist?(self.stream_location(stream_name))
41
+ end
42
+
43
+ # Write data to a stream
44
+ #
45
+ # @param [String] stream_name The name of the stream
46
+ # @param [String] data_string String of data to write
47
+ # @param [String] partition_key Ignored
48
+ # @return [Boolean] True - the write to the stream succeeded
49
+ def stream_write(stream_name,data_string,partition_key=nil)
50
+
51
+ # V1 of data streaming - use a local file: open, write, close
52
+ stream=File.open(self.stream_location(stream_name),"a")
53
+ stream.puts data_string + "\n"
54
+ stream.close
55
+ return true
56
+ end
57
+
58
+ # Fake shard description for file, use hostname for shard_name
59
+ #
60
+ # @param [String] stream_name The name of the kinesis stream
61
+ def get_shard_descriptions(stream_name)
62
+ return [{
63
+ :shard_id => `hostname`.tr("\n","")
64
+ }]
65
+ end
66
+
67
+ # Get the shard iterator given a checkpointed sequence number. If no checkpoint, start to read from start of shard
68
+ #
69
+ # @param [String] stream_name The name of the stream to get a shard iterator for
70
+ # @param [Hash] shard_description Result from describe stream request
71
+ # @param [String] starting_sequence_number The sequence number to get a shard iterator for, if doesn't exist, get one for start of shard
72
+ # @return [String] The shard iterator
73
+ def get_shard_iterator_from_sequence_number(stream_name,shard_description,starting_sequence_number=nil)
74
+ return self.stream_location(stream_name)
75
+ end
76
+
77
+ # Ream from kinesis shard into a file
78
+ #
79
+ # @param [String] shard_iterator The shard iterator to start reading from - result of get_shard_iterator- http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html
80
+ # @param [String] files Array of files to read into
81
+ # @param [Hash] options Optional. Can specify :max_records, :max_requests, :max_consecutive_requests_without_data, :backoff_no_data
82
+ # @return [Hash] Hash of # of records read and the sequence number of the last read record, number of records, and shard iterator
83
+ def stream_read_from_shard_iterator_into_files(shard_iterator, files, options={})
84
+
85
+ stream_file_name = shard_iterator
86
+
87
+ records = 0
88
+ num_files = files.length
89
+
90
+ fake_sequence_number = Time.now.to_i
91
+
92
+ if File.exist?(stream_file_name)
93
+ FileUtils.mv(stream_file_name, "#{stream_file_name}.#{fake_sequence_number}")
94
+
95
+ stream_file = File.open("#{stream_file_name}.#{fake_sequence_number}",'r')
96
+ while(line = stream_file.gets) != nil
97
+ files[records % num_files].puts line + "\n"
98
+ records += 1
99
+ end
100
+
101
+ result = {
102
+ :starting_sequence_number => fake_sequence_number,
103
+ :ending_sequence_number => fake_sequence_number,
104
+ :records => records
105
+ }
106
+ else
107
+ result = {
108
+ :records => 0,
109
+ :starting_sequence_number => '',
110
+ :ending_sequence_number => ''
111
+ }
112
+ end
113
+ return result
114
+ end
115
+
116
+ # Name of the stream in the data broker (This is a Kinesis stream name)
117
+ #
118
+ # @param [String] redshift_table Name of the redshift table
119
+ # @return [String] Name of the stream in Kinesis
120
+ def stream_name(redshift_table)
121
+ result= @options[:redshift_cluster_name] + '.' + @options[:redshift_dbname] + ".#{redshift_table}"
122
+ return result
123
+ end
124
+
125
+ end
126
+ end
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'redtrack'
3
+ s.version = '0.0.1'
4
+ s.date = '2014-11-06'
5
+ s.summary = 'Real-time event tracking in AWS.'
6
+ s.description = 'System for real time event tracking & loading infrastructure for AWS. Utilizes Kinesis as a data broker and Redshift as a data warehouse.'
7
+ s.authors = ['Luke Rajlich']
8
+ s.email = 'lrajlich@gmail.com'
9
+ s.files = `git ls-files`.split("\n")
10
+ s.require_paths = ["lib"]
11
+ s.homepage = 'https://github.com/redhotlabs/redtrack'
12
+ s.license = 'MIT'
13
+
14
+ s.add_dependency 'aws-sdk','>= 1.58.0'
15
+ s.add_dependency 'pg','>= 0.17.1'
16
+ s.add_development_dependency 'rspec','>= 3.1.0'
17
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redtrack
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Luke Rajlich
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.58.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.58.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.17.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.17.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.1.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 3.1.0
55
+ description: System for real time event tracking & loading infrastructure for AWS.
56
+ Utilizes Kinesis as a data broker and Redshift as a data warehouse.
57
+ email: lrajlich@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE
65
+ - README.md
66
+ - Rakefile
67
+ - lib/redtrack.rb
68
+ - lib/redtrack_client.rb
69
+ - lib/redtrack_datatypes.rb
70
+ - lib/redtrack_kinesisclient.rb
71
+ - lib/redtrack_loader.rb
72
+ - lib/redtrack_local_file_stream.rb
73
+ - redtrack.gemspec
74
+ homepage: https://github.com/redhotlabs/redtrack
75
+ licenses:
76
+ - MIT
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.2.2
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Real-time event tracking in AWS.
98
+ test_files: []
99
+ has_rdoc: