bucket_cake 5.2.1 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bucket_cake/base.rb +68 -23
- data/lib/bucket_cake/decoder.rb +10 -11
- data/lib/bucket_cake/facts.rb +1 -6
- data/lib/bucket_cake/realtime.rb +14 -0
- data/lib/bucket_cake/time_helper.rb +26 -0
- data/lib/bucket_cake/version.rb +1 -1
- data/lib/bucket_cake.rb +4 -6
- metadata +4 -17
- data/lib/bucket_cake/source.rb +0 -68
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2883992442100adbcd532c41921f5054242b8c6c
|
4
|
+
data.tar.gz: 5e4d397a504c4a976b0eb5bbbbb27d8c7c83ed99
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7868a377f26766dd2416aab145894eb963f588ea92bafa3aaf5364110449af913af0017f7ebfca1b635f5bc0293b5f2c19e7a0096fff4fa4dc983896034d793e
|
7
|
+
data.tar.gz: f09f55f39ee24be7ac16e0ca99ba90a3242790fee052c03b0961c6a25b4c91862f06d68d5c87c652df840e21a7f4d4bc26285607c6295e38ac3efe6704b76792
|
data/lib/bucket_cake/base.rb
CHANGED
@@ -1,46 +1,91 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module BucketCake
|
3
3
|
class Base
|
4
|
-
|
5
|
-
|
4
|
+
def items
|
5
|
+
klass = self.class::PROTOCLASS.call
|
6
|
+
Decoder.new(files, klass).items.lazy
|
7
|
+
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
|
+
private
|
10
|
+
|
11
|
+
def files
|
12
|
+
Enumerator.new do |y|
|
13
|
+
keys.each do |key|
|
14
|
+
y << bucket.object(key).get.body
|
15
|
+
end
|
9
16
|
end
|
10
17
|
end
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
19
|
+
def folder
|
20
|
+
self.class::FOLDER
|
21
|
+
end
|
16
22
|
|
17
|
-
|
18
|
-
|
23
|
+
def bucket
|
24
|
+
@bucket ||= Aws::S3::Bucket.new(ENV.fetch('CAKE_DATA_BUCKET'))
|
25
|
+
end
|
26
|
+
|
27
|
+
class Range < self
|
28
|
+
include TimeHelper
|
29
|
+
|
30
|
+
attr_reader :start_time, :end_time
|
31
|
+
|
32
|
+
def initialize(start_time, end_time)
|
33
|
+
assert_time(start_time)
|
34
|
+
assert_time(end_time)
|
35
|
+
raise 'Invalid time: end must be after start' unless end_time > start_time
|
36
|
+
|
37
|
+
@start_time = start_time
|
38
|
+
@end_time = end_time
|
19
39
|
end
|
20
40
|
|
21
41
|
private
|
22
42
|
|
23
|
-
def
|
24
|
-
|
43
|
+
def keys
|
44
|
+
hour_keys(folder, start_time, end_time)
|
25
45
|
end
|
26
46
|
end
|
27
47
|
|
28
|
-
|
48
|
+
class Latest < self
|
49
|
+
LATEST_FILE = 'latest.gz'
|
29
50
|
|
30
|
-
|
31
|
-
raise 'BucketCake: cursor has invalid format!' unless cursor.nil? || cursor =~ CURSOR_REGEXP
|
32
|
-
@cursor = cursor
|
33
|
-
end
|
51
|
+
private
|
34
52
|
|
35
|
-
|
36
|
-
|
53
|
+
def keys
|
54
|
+
["#{folder}/#{LATEST_FILE}"]
|
55
|
+
end
|
37
56
|
end
|
38
57
|
|
39
|
-
|
58
|
+
class Realtime < self
|
59
|
+
include TimeHelper
|
60
|
+
|
61
|
+
DEFAULT_LOOKBACK = 24.hours
|
62
|
+
CURSOR_REGEX = %r{\A\w+/\d{4}/\d{2}/\d{2}/\d{4}\.gz\z}
|
63
|
+
|
64
|
+
attr_reader :cursor
|
65
|
+
|
66
|
+
def initialize(cursor)
|
67
|
+
raise 'Invalid cursor format!' unless cursor.nil? || cursor =~ CURSOR_REGEX
|
68
|
+
@cursor = cursor || default_cursor
|
69
|
+
end
|
40
70
|
|
41
|
-
|
42
|
-
|
43
|
-
|
71
|
+
def files
|
72
|
+
@keys = load_keys
|
73
|
+
@cursor = keys.last unless keys.empty?
|
74
|
+
super
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
attr_reader :keys
|
80
|
+
|
81
|
+
def default_cursor
|
82
|
+
minute_cursor(folder, Time.now - DEFAULT_LOOKBACK)
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_keys
|
86
|
+
# return all objects after the cursor timestamp
|
87
|
+
bucket.objects(marker: cursor, prefix: "#{folder}/").map(&:key)
|
88
|
+
end
|
44
89
|
end
|
45
90
|
end
|
46
91
|
end
|
data/lib/bucket_cake/decoder.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module BucketCake
|
3
3
|
class Decoder
|
4
|
-
def initialize(
|
5
|
-
@
|
4
|
+
def initialize(files, klass)
|
5
|
+
@files = files
|
6
6
|
@klass = klass
|
7
7
|
end
|
8
8
|
|
9
9
|
def items
|
10
10
|
Enumerator.new do |y|
|
11
|
-
|
12
|
-
|
11
|
+
files.each do |gzdata|
|
12
|
+
unpack(gzdata) do |element|
|
13
13
|
y << klass.decode(element)
|
14
14
|
end
|
15
15
|
end
|
@@ -18,15 +18,14 @@ module BucketCake
|
|
18
18
|
|
19
19
|
private
|
20
20
|
|
21
|
-
attr_reader :
|
21
|
+
attr_reader :files, :klass
|
22
22
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
def unpack(gzdata)
|
24
|
+
Zlib::GzipReader.new(gzdata).each_line.each do |line|
|
25
|
+
line.strip!
|
26
|
+
next if line.empty?
|
27
27
|
|
28
|
-
|
29
|
-
yield entry.get_input_stream.read
|
28
|
+
yield Base64.decode64(line)
|
30
29
|
end
|
31
30
|
end
|
32
31
|
end
|
data/lib/bucket_cake/facts.rb
CHANGED
@@ -11,14 +11,9 @@ module BucketCake
|
|
11
11
|
PROTOCLASS = -> { Cakeproto::Conversion }
|
12
12
|
end
|
13
13
|
|
14
|
-
class
|
14
|
+
class CapStatesLatest < Base::Latest
|
15
15
|
FOLDER = 'cap_states'
|
16
16
|
PROTOCLASS = -> { Cakeproto::CapState }
|
17
|
-
|
18
|
-
class Latest < Base::Latest
|
19
|
-
FOLDER = 'cap_states'
|
20
|
-
PROTOCLASS = -> { Cakeproto::CapState }
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module BucketCake
|
3
|
+
module Realtime
|
4
|
+
class Clicks < Base::Realtime
|
5
|
+
FOLDER = 'clicks_rt'
|
6
|
+
PROTOCLASS = -> { Cakeproto::Click }
|
7
|
+
end
|
8
|
+
|
9
|
+
class Conversions < Base::Realtime
|
10
|
+
FOLDER = 'conversions_rt'
|
11
|
+
PROTOCLASS = -> { Cakeproto::Conversion }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module BucketCake
|
3
|
+
module TimeHelper
|
4
|
+
def hour_cursor(prefix, time)
|
5
|
+
"#{prefix}/#{time.utc.strftime('%Y/%m/%d/%H')}.gz"
|
6
|
+
end
|
7
|
+
|
8
|
+
def minute_cursor(prefix, time)
|
9
|
+
"#{prefix}/#{time.utc.strftime('%Y/%m/%d/%H%M')}.gz"
|
10
|
+
end
|
11
|
+
|
12
|
+
def hour_keys(prefix, start_time, end_time)
|
13
|
+
Enumerator.new do |y|
|
14
|
+
while start_time < end_time
|
15
|
+
y << hour_cursor(prefix, start_time)
|
16
|
+
start_time += 1.hour
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def assert_time(time)
|
22
|
+
raise 'Invalid time: must be a Time object' unless time.is_a?(Time)
|
23
|
+
raise 'Invalid time: must be at start of hour' if time.beginning_of_hour != time
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/bucket_cake/version.rb
CHANGED
data/lib/bucket_cake.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'base64'
|
3
|
+
require 'zlib'
|
2
4
|
require 'aws-sdk'
|
3
|
-
require 'zip'
|
4
5
|
require 'google/protobuf'
|
5
6
|
require 'active_support/time'
|
6
7
|
|
7
8
|
require 'bucket_cake/version'
|
8
|
-
require 'bucket_cake/
|
9
|
+
require 'bucket_cake/time_helper'
|
9
10
|
require 'bucket_cake/decoder'
|
10
11
|
require 'bucket_cake/base'
|
11
12
|
|
@@ -25,7 +26,4 @@ require 'bucket_cake/proto_ext/maybe_int'
|
|
25
26
|
require 'bucket_cake/entities'
|
26
27
|
require 'bucket_cake/values'
|
27
28
|
require 'bucket_cake/facts'
|
28
|
-
|
29
|
-
module BucketCake
|
30
|
-
CURSOR_REGEXP = %r{\A\d{4}/\d{2}/\d{2}/\d{6}\z}
|
31
|
-
end
|
29
|
+
require 'bucket_cake/realtime'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bucket_cake
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 6.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ad2games developers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rubyzip
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: google-protobuf
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -187,7 +173,8 @@ files:
|
|
187
173
|
- lib/bucket_cake/proto/offers_pb.rb
|
188
174
|
- lib/bucket_cake/proto_ext/maybe_int.rb
|
189
175
|
- lib/bucket_cake/proto_ext/time_with_zone.rb
|
190
|
-
- lib/bucket_cake/
|
176
|
+
- lib/bucket_cake/realtime.rb
|
177
|
+
- lib/bucket_cake/time_helper.rb
|
191
178
|
- lib/bucket_cake/values.rb
|
192
179
|
- lib/bucket_cake/version.rb
|
193
180
|
homepage: https://github.com/ad2games/bucket_cake
|
data/lib/bucket_cake/source.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module BucketCake
|
3
|
-
class Source
|
4
|
-
def initialize(folder, cursor)
|
5
|
-
@folder = folder
|
6
|
-
@cursor = cursor
|
7
|
-
end
|
8
|
-
|
9
|
-
def zip_files
|
10
|
-
Enumerator.new do |y|
|
11
|
-
objects.each do |object|
|
12
|
-
next unless check_success(object.key)
|
13
|
-
next unless object.key =~ %r{/part\d{3}\.zip\z}
|
14
|
-
|
15
|
-
io = object.get.body
|
16
|
-
|
17
|
-
# we need to force binary encoding, otherwise binary zip data will be parsed as utf-8
|
18
|
-
io.set_encoding('BINARY')
|
19
|
-
|
20
|
-
y << io.read
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def latest_cursor
|
26
|
-
@latest_cursor ||= bucket.object("#{folder}/latest").get.body.read
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
attr_reader :folder, :cursor
|
32
|
-
|
33
|
-
def check_success(key)
|
34
|
-
@success_files ||= Set.new
|
35
|
-
@success_files << key if key.end_with?('/SUCCESS')
|
36
|
-
|
37
|
-
# This relies on the fact that SUCCESS files always come before partNNN.zip files.
|
38
|
-
# But that is fine because S3 guarantees alphabetical order.
|
39
|
-
@success_files.include?(File.dirname(key) + '/SUCCESS')
|
40
|
-
end
|
41
|
-
|
42
|
-
def bucket
|
43
|
-
@bucket ||= Aws::S3::Bucket.new(ENV.fetch('CAKE_DATA_BUCKET'))
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
class Source::Latest < Source
|
48
|
-
private
|
49
|
-
|
50
|
-
def objects
|
51
|
-
bucket.objects(prefix: "#{folder}/#{latest_cursor}/")
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
class Source::Range < Source
|
56
|
-
private
|
57
|
-
|
58
|
-
def objects
|
59
|
-
# If we don't have a timestamp in cursor, return everything
|
60
|
-
return bucket.objects(prefix: "#{folder}/") if cursor.nil?
|
61
|
-
|
62
|
-
# This returns all objects after the current timestamp.
|
63
|
-
# By using the 'xxx' suffix, none of the objects of the given timestamp are returned.
|
64
|
-
# S3 ensures alphabetical order of the results, so this works fine for date ranges.
|
65
|
-
bucket.objects(marker: "#{folder}/#{cursor}/xxx", prefix: "#{folder}/")
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|