bucket_cake 5.2.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bucket_cake/base.rb +68 -23
- data/lib/bucket_cake/decoder.rb +10 -11
- data/lib/bucket_cake/facts.rb +1 -6
- data/lib/bucket_cake/realtime.rb +14 -0
- data/lib/bucket_cake/time_helper.rb +26 -0
- data/lib/bucket_cake/version.rb +1 -1
- data/lib/bucket_cake.rb +4 -6
- metadata +4 -17
- data/lib/bucket_cake/source.rb +0 -68
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2883992442100adbcd532c41921f5054242b8c6c
|
4
|
+
data.tar.gz: 5e4d397a504c4a976b0eb5bbbbb27d8c7c83ed99
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7868a377f26766dd2416aab145894eb963f588ea92bafa3aaf5364110449af913af0017f7ebfca1b635f5bc0293b5f2c19e7a0096fff4fa4dc983896034d793e
|
7
|
+
data.tar.gz: f09f55f39ee24be7ac16e0ca99ba90a3242790fee052c03b0961c6a25b4c91862f06d68d5c87c652df840e21a7f4d4bc26285607c6295e38ac3efe6704b76792
|
data/lib/bucket_cake/base.rb
CHANGED
@@ -1,46 +1,91 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module BucketCake
|
3
3
|
class Base
|
4
|
-
|
5
|
-
|
4
|
+
def items
|
5
|
+
klass = self.class::PROTOCLASS.call
|
6
|
+
Decoder.new(files, klass).items.lazy
|
7
|
+
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
|
+
private
|
10
|
+
|
11
|
+
def files
|
12
|
+
Enumerator.new do |y|
|
13
|
+
keys.each do |key|
|
14
|
+
y << bucket.object(key).get.body
|
15
|
+
end
|
9
16
|
end
|
10
17
|
end
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
19
|
+
def folder
|
20
|
+
self.class::FOLDER
|
21
|
+
end
|
16
22
|
|
17
|
-
|
18
|
-
|
23
|
+
def bucket
|
24
|
+
@bucket ||= Aws::S3::Bucket.new(ENV.fetch('CAKE_DATA_BUCKET'))
|
25
|
+
end
|
26
|
+
|
27
|
+
class Range < self
|
28
|
+
include TimeHelper
|
29
|
+
|
30
|
+
attr_reader :start_time, :end_time
|
31
|
+
|
32
|
+
def initialize(start_time, end_time)
|
33
|
+
assert_time(start_time)
|
34
|
+
assert_time(end_time)
|
35
|
+
raise 'Invalid time: end must be after start' unless end_time > start_time
|
36
|
+
|
37
|
+
@start_time = start_time
|
38
|
+
@end_time = end_time
|
19
39
|
end
|
20
40
|
|
21
41
|
private
|
22
42
|
|
23
|
-
def
|
24
|
-
|
43
|
+
def keys
|
44
|
+
hour_keys(folder, start_time, end_time)
|
25
45
|
end
|
26
46
|
end
|
27
47
|
|
28
|
-
|
48
|
+
class Latest < self
|
49
|
+
LATEST_FILE = 'latest.gz'
|
29
50
|
|
30
|
-
|
31
|
-
raise 'BucketCake: cursor has invalid format!' unless cursor.nil? || cursor =~ CURSOR_REGEXP
|
32
|
-
@cursor = cursor
|
33
|
-
end
|
51
|
+
private
|
34
52
|
|
35
|
-
|
36
|
-
|
53
|
+
def keys
|
54
|
+
["#{folder}/#{LATEST_FILE}"]
|
55
|
+
end
|
37
56
|
end
|
38
57
|
|
39
|
-
|
58
|
+
class Realtime < self
|
59
|
+
include TimeHelper
|
60
|
+
|
61
|
+
DEFAULT_LOOKBACK = 24.hours
|
62
|
+
CURSOR_REGEX = %r{\A\w+/\d{4}/\d{2}/\d{2}/\d{4}\.gz\z}
|
63
|
+
|
64
|
+
attr_reader :cursor
|
65
|
+
|
66
|
+
def initialize(cursor)
|
67
|
+
raise 'Invalid cursor format!' unless cursor.nil? || cursor =~ CURSOR_REGEX
|
68
|
+
@cursor = cursor || default_cursor
|
69
|
+
end
|
40
70
|
|
41
|
-
|
42
|
-
|
43
|
-
|
71
|
+
def files
|
72
|
+
@keys = load_keys
|
73
|
+
@cursor = keys.last unless keys.empty?
|
74
|
+
super
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
attr_reader :keys
|
80
|
+
|
81
|
+
def default_cursor
|
82
|
+
minute_cursor(folder, Time.now - DEFAULT_LOOKBACK)
|
83
|
+
end
|
84
|
+
|
85
|
+
def load_keys
|
86
|
+
# return all objects after the cursor timestamp
|
87
|
+
bucket.objects(marker: cursor, prefix: "#{folder}/").map(&:key)
|
88
|
+
end
|
44
89
|
end
|
45
90
|
end
|
46
91
|
end
|
data/lib/bucket_cake/decoder.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module BucketCake
|
3
3
|
class Decoder
|
4
|
-
def initialize(
|
5
|
-
@
|
4
|
+
def initialize(files, klass)
|
5
|
+
@files = files
|
6
6
|
@klass = klass
|
7
7
|
end
|
8
8
|
|
9
9
|
def items
|
10
10
|
Enumerator.new do |y|
|
11
|
-
|
12
|
-
|
11
|
+
files.each do |gzdata|
|
12
|
+
unpack(gzdata) do |element|
|
13
13
|
y << klass.decode(element)
|
14
14
|
end
|
15
15
|
end
|
@@ -18,15 +18,14 @@ module BucketCake
|
|
18
18
|
|
19
19
|
private
|
20
20
|
|
21
|
-
attr_reader :
|
21
|
+
attr_reader :files, :klass
|
22
22
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
def unpack(gzdata)
|
24
|
+
Zlib::GzipReader.new(gzdata).each_line.each do |line|
|
25
|
+
line.strip!
|
26
|
+
next if line.empty?
|
27
27
|
|
28
|
-
|
29
|
-
yield entry.get_input_stream.read
|
28
|
+
yield Base64.decode64(line)
|
30
29
|
end
|
31
30
|
end
|
32
31
|
end
|
data/lib/bucket_cake/facts.rb
CHANGED
@@ -11,14 +11,9 @@ module BucketCake
|
|
11
11
|
PROTOCLASS = -> { Cakeproto::Conversion }
|
12
12
|
end
|
13
13
|
|
14
|
-
class
|
14
|
+
class CapStatesLatest < Base::Latest
|
15
15
|
FOLDER = 'cap_states'
|
16
16
|
PROTOCLASS = -> { Cakeproto::CapState }
|
17
|
-
|
18
|
-
class Latest < Base::Latest
|
19
|
-
FOLDER = 'cap_states'
|
20
|
-
PROTOCLASS = -> { Cakeproto::CapState }
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module BucketCake
|
3
|
+
module Realtime
|
4
|
+
class Clicks < Base::Realtime
|
5
|
+
FOLDER = 'clicks_rt'
|
6
|
+
PROTOCLASS = -> { Cakeproto::Click }
|
7
|
+
end
|
8
|
+
|
9
|
+
class Conversions < Base::Realtime
|
10
|
+
FOLDER = 'conversions_rt'
|
11
|
+
PROTOCLASS = -> { Cakeproto::Conversion }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module BucketCake
|
3
|
+
module TimeHelper
|
4
|
+
def hour_cursor(prefix, time)
|
5
|
+
"#{prefix}/#{time.utc.strftime('%Y/%m/%d/%H')}.gz"
|
6
|
+
end
|
7
|
+
|
8
|
+
def minute_cursor(prefix, time)
|
9
|
+
"#{prefix}/#{time.utc.strftime('%Y/%m/%d/%H%M')}.gz"
|
10
|
+
end
|
11
|
+
|
12
|
+
def hour_keys(prefix, start_time, end_time)
|
13
|
+
Enumerator.new do |y|
|
14
|
+
while start_time < end_time
|
15
|
+
y << hour_cursor(prefix, start_time)
|
16
|
+
start_time += 1.hour
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def assert_time(time)
|
22
|
+
raise 'Invalid time: must be a Time object' unless time.is_a?(Time)
|
23
|
+
raise 'Invalid time: must be at start of hour' if time.beginning_of_hour != time
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/bucket_cake/version.rb
CHANGED
data/lib/bucket_cake.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require 'base64'
|
3
|
+
require 'zlib'
|
2
4
|
require 'aws-sdk'
|
3
|
-
require 'zip'
|
4
5
|
require 'google/protobuf'
|
5
6
|
require 'active_support/time'
|
6
7
|
|
7
8
|
require 'bucket_cake/version'
|
8
|
-
require 'bucket_cake/
|
9
|
+
require 'bucket_cake/time_helper'
|
9
10
|
require 'bucket_cake/decoder'
|
10
11
|
require 'bucket_cake/base'
|
11
12
|
|
@@ -25,7 +26,4 @@ require 'bucket_cake/proto_ext/maybe_int'
|
|
25
26
|
require 'bucket_cake/entities'
|
26
27
|
require 'bucket_cake/values'
|
27
28
|
require 'bucket_cake/facts'
|
28
|
-
|
29
|
-
module BucketCake
|
30
|
-
CURSOR_REGEXP = %r{\A\d{4}/\d{2}/\d{2}/\d{6}\z}
|
31
|
-
end
|
29
|
+
require 'bucket_cake/realtime'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bucket_cake
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 6.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ad2games developers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rubyzip
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: google-protobuf
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -187,7 +173,8 @@ files:
|
|
187
173
|
- lib/bucket_cake/proto/offers_pb.rb
|
188
174
|
- lib/bucket_cake/proto_ext/maybe_int.rb
|
189
175
|
- lib/bucket_cake/proto_ext/time_with_zone.rb
|
190
|
-
- lib/bucket_cake/
|
176
|
+
- lib/bucket_cake/realtime.rb
|
177
|
+
- lib/bucket_cake/time_helper.rb
|
191
178
|
- lib/bucket_cake/values.rb
|
192
179
|
- lib/bucket_cake/version.rb
|
193
180
|
homepage: https://github.com/ad2games/bucket_cake
|
data/lib/bucket_cake/source.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
module BucketCake
|
3
|
-
class Source
|
4
|
-
def initialize(folder, cursor)
|
5
|
-
@folder = folder
|
6
|
-
@cursor = cursor
|
7
|
-
end
|
8
|
-
|
9
|
-
def zip_files
|
10
|
-
Enumerator.new do |y|
|
11
|
-
objects.each do |object|
|
12
|
-
next unless check_success(object.key)
|
13
|
-
next unless object.key =~ %r{/part\d{3}\.zip\z}
|
14
|
-
|
15
|
-
io = object.get.body
|
16
|
-
|
17
|
-
# we need to force binary encoding, otherwise binary zip data will be parsed as utf-8
|
18
|
-
io.set_encoding('BINARY')
|
19
|
-
|
20
|
-
y << io.read
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def latest_cursor
|
26
|
-
@latest_cursor ||= bucket.object("#{folder}/latest").get.body.read
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
attr_reader :folder, :cursor
|
32
|
-
|
33
|
-
def check_success(key)
|
34
|
-
@success_files ||= Set.new
|
35
|
-
@success_files << key if key.end_with?('/SUCCESS')
|
36
|
-
|
37
|
-
# This relies on the fact that SUCCESS files always come before partNNN.zip files.
|
38
|
-
# But that is fine because S3 guarantees alphabetical order.
|
39
|
-
@success_files.include?(File.dirname(key) + '/SUCCESS')
|
40
|
-
end
|
41
|
-
|
42
|
-
def bucket
|
43
|
-
@bucket ||= Aws::S3::Bucket.new(ENV.fetch('CAKE_DATA_BUCKET'))
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
class Source::Latest < Source
|
48
|
-
private
|
49
|
-
|
50
|
-
def objects
|
51
|
-
bucket.objects(prefix: "#{folder}/#{latest_cursor}/")
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
class Source::Range < Source
|
56
|
-
private
|
57
|
-
|
58
|
-
def objects
|
59
|
-
# If we don't have a timestamp in cursor, return everything
|
60
|
-
return bucket.objects(prefix: "#{folder}/") if cursor.nil?
|
61
|
-
|
62
|
-
# This returns all objects after the current timestamp.
|
63
|
-
# By using the 'xxx' suffix, none of the objects of the given timestamp are returned.
|
64
|
-
# S3 ensures alphabetical order of the results, so this works fine for date ranges.
|
65
|
-
bucket.objects(marker: "#{folder}/#{cursor}/xxx", prefix: "#{folder}/")
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|