bucket_cake 5.2.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c70c6ebd3e74463c225cf381f435bd9ba555eb2a
4
- data.tar.gz: 40d780925bac1c782eea33810d0c31662f0fee40
3
+ metadata.gz: 2883992442100adbcd532c41921f5054242b8c6c
4
+ data.tar.gz: 5e4d397a504c4a976b0eb5bbbbb27d8c7c83ed99
5
5
  SHA512:
6
- metadata.gz: 1682f264a01266b295152ea1a66cf3f76266a5ae6edcf823ed08c4d0038fe7ac6300a33bb7a1f8419c80fe0ed42ea3945a9cee096ce37ca029c1c653d315efe9
7
- data.tar.gz: cd5a860472517fdc2f0d2b80ce8fc665c2505b2a36615d9efbceb215d969c504c53a35a609f8bfd5ec2b7df300486b990dd961e4036fd885478f6cbb9144b9ad
6
+ metadata.gz: 7868a377f26766dd2416aab145894eb963f588ea92bafa3aaf5364110449af913af0017f7ebfca1b635f5bc0293b5f2c19e7a0096fff4fa4dc983896034d793e
7
+ data.tar.gz: f09f55f39ee24be7ac16e0ca99ba90a3242790fee052c03b0961c6a25b4c91862f06d68d5c87c652df840e21a7f4d4bc26285607c6295e38ac3efe6704b76792
@@ -1,46 +1,91 @@
1
1
  # frozen_string_literal: true
2
2
  module BucketCake
3
3
  class Base
4
- class Range < self
5
- private
4
+ def items
5
+ klass = self.class::PROTOCLASS.call
6
+ Decoder.new(files, klass).items.lazy
7
+ end
6
8
 
7
- def source
8
- Source::Range.new(self.class::FOLDER, cursor)
9
+ private
10
+
11
+ def files
12
+ Enumerator.new do |y|
13
+ keys.each do |key|
14
+ y << bucket.object(key).get.body
15
+ end
9
16
  end
10
17
  end
11
18
 
12
- class Latest < self
13
- def initialize(cursor = nil)
14
- super(cursor)
15
- end
19
+ def folder
20
+ self.class::FOLDER
21
+ end
16
22
 
17
- def has_new_data?
18
- cursor.nil? || cursor != source.latest_cursor
23
+ def bucket
24
+ @bucket ||= Aws::S3::Bucket.new(ENV.fetch('CAKE_DATA_BUCKET'))
25
+ end
26
+
27
+ class Range < self
28
+ include TimeHelper
29
+
30
+ attr_reader :start_time, :end_time
31
+
32
+ def initialize(start_time, end_time)
33
+ assert_time(start_time)
34
+ assert_time(end_time)
35
+ raise 'Invalid time: end must be after start' unless end_time > start_time
36
+
37
+ @start_time = start_time
38
+ @end_time = end_time
19
39
  end
20
40
 
21
41
  private
22
42
 
23
- def source
24
- Source::Latest.new(self.class::FOLDER, cursor)
43
+ def keys
44
+ hour_keys(folder, start_time, end_time)
25
45
  end
26
46
  end
27
47
 
28
- attr_reader :cursor
48
+ class Latest < self
49
+ LATEST_FILE = 'latest.gz'
29
50
 
30
- def initialize(cursor)
31
- raise 'BucketCake: cursor has invalid format!' unless cursor.nil? || cursor =~ CURSOR_REGEXP
32
- @cursor = cursor
33
- end
51
+ private
34
52
 
35
- def items
36
- get source, self.class::PROTOCLASS.call
53
+ def keys
54
+ ["#{folder}/#{LATEST_FILE}"]
55
+ end
37
56
  end
38
57
 
39
- private
58
+ class Realtime < self
59
+ include TimeHelper
60
+
61
+ DEFAULT_LOOKBACK = 24.hours
62
+ CURSOR_REGEX = %r{\A\w+/\d{4}/\d{2}/\d{2}/\d{4}\.gz\z}
63
+
64
+ attr_reader :cursor
65
+
66
+ def initialize(cursor)
67
+ raise 'Invalid cursor format!' unless cursor.nil? || cursor =~ CURSOR_REGEX
68
+ @cursor = cursor || default_cursor
69
+ end
40
70
 
41
- def get(source, klass)
42
- @cursor = source.latest_cursor
43
- Decoder.new(source.zip_files, klass).items.lazy
71
+ def files
72
+ @keys = load_keys
73
+ @cursor = keys.last unless keys.empty?
74
+ super
75
+ end
76
+
77
+ private
78
+
79
+ attr_reader :keys
80
+
81
+ def default_cursor
82
+ minute_cursor(folder, Time.now - DEFAULT_LOOKBACK)
83
+ end
84
+
85
+ def load_keys
86
+ # return all objects after the cursor timestamp
87
+ bucket.objects(marker: cursor, prefix: "#{folder}/").map(&:key)
88
+ end
44
89
  end
45
90
  end
46
91
  end
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
  module BucketCake
3
3
  class Decoder
4
- def initialize(zip_files, klass)
5
- @zip_files = zip_files
4
+ def initialize(files, klass)
5
+ @files = files
6
6
  @klass = klass
7
7
  end
8
8
 
9
9
  def items
10
10
  Enumerator.new do |y|
11
- zip_files.each do |zip_data|
12
- unzip_elements(zip_data) do |element|
11
+ files.each do |gzdata|
12
+ unpack(gzdata) do |element|
13
13
  y << klass.decode(element)
14
14
  end
15
15
  end
@@ -18,15 +18,14 @@ module BucketCake
18
18
 
19
19
  private
20
20
 
21
- attr_reader :zip_files, :klass
21
+ attr_reader :files, :klass
22
22
 
23
- def unzip_elements(zip_data)
24
- zip_data_io = StringIO.new(zip_data)
25
- files = Zip::File.new(zip_data_io, true, true)
26
- files.read_from_stream(zip_data_io)
23
+ def unpack(gzdata)
24
+ Zlib::GzipReader.new(gzdata).each_line.each do |line|
25
+ line.strip!
26
+ next if line.empty?
27
27
 
28
- files.each do |entry|
29
- yield entry.get_input_stream.read
28
+ yield Base64.decode64(line)
30
29
  end
31
30
  end
32
31
  end
@@ -11,14 +11,9 @@ module BucketCake
11
11
  PROTOCLASS = -> { Cakeproto::Conversion }
12
12
  end
13
13
 
14
- class CapStates < Base::Range
14
+ class CapStatesLatest < Base::Latest
15
15
  FOLDER = 'cap_states'
16
16
  PROTOCLASS = -> { Cakeproto::CapState }
17
-
18
- class Latest < Base::Latest
19
- FOLDER = 'cap_states'
20
- PROTOCLASS = -> { Cakeproto::CapState }
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module BucketCake
3
+ module Realtime
4
+ class Clicks < Base::Realtime
5
+ FOLDER = 'clicks_rt'
6
+ PROTOCLASS = -> { Cakeproto::Click }
7
+ end
8
+
9
+ class Conversions < Base::Realtime
10
+ FOLDER = 'conversions_rt'
11
+ PROTOCLASS = -> { Cakeproto::Conversion }
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+ module BucketCake
3
+ module TimeHelper
4
+ def hour_cursor(prefix, time)
5
+ "#{prefix}/#{time.utc.strftime('%Y/%m/%d/%H')}.gz"
6
+ end
7
+
8
+ def minute_cursor(prefix, time)
9
+ "#{prefix}/#{time.utc.strftime('%Y/%m/%d/%H%M')}.gz"
10
+ end
11
+
12
+ def hour_keys(prefix, start_time, end_time)
13
+ Enumerator.new do |y|
14
+ while start_time < end_time
15
+ y << hour_cursor(prefix, start_time)
16
+ start_time += 1.hour
17
+ end
18
+ end
19
+ end
20
+
21
+ def assert_time(time)
22
+ raise 'Invalid time: must be a Time object' unless time.is_a?(Time)
23
+ raise 'Invalid time: must be at start of hour' if time.beginning_of_hour != time
24
+ end
25
+ end
26
+ end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module BucketCake
3
- VERSION = '5.2.1'
3
+ VERSION = '6.0.0'
4
4
  end
data/lib/bucket_cake.rb CHANGED
@@ -1,11 +1,12 @@
1
1
  # frozen_string_literal: true
2
+ require 'base64'
3
+ require 'zlib'
2
4
  require 'aws-sdk'
3
- require 'zip'
4
5
  require 'google/protobuf'
5
6
  require 'active_support/time'
6
7
 
7
8
  require 'bucket_cake/version'
8
- require 'bucket_cake/source'
9
+ require 'bucket_cake/time_helper'
9
10
  require 'bucket_cake/decoder'
10
11
  require 'bucket_cake/base'
11
12
 
@@ -25,7 +26,4 @@ require 'bucket_cake/proto_ext/maybe_int'
25
26
  require 'bucket_cake/entities'
26
27
  require 'bucket_cake/values'
27
28
  require 'bucket_cake/facts'
28
-
29
- module BucketCake
30
- CURSOR_REGEXP = %r{\A\d{4}/\d{2}/\d{2}/\d{6}\z}
31
- end
29
+ require 'bucket_cake/realtime'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bucket_cake
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.2.1
4
+ version: 6.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ad2games developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-09 00:00:00.000000000 Z
11
+ date: 2016-12-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: rubyzip
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: google-protobuf
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -187,7 +173,8 @@ files:
187
173
  - lib/bucket_cake/proto/offers_pb.rb
188
174
  - lib/bucket_cake/proto_ext/maybe_int.rb
189
175
  - lib/bucket_cake/proto_ext/time_with_zone.rb
190
- - lib/bucket_cake/source.rb
176
+ - lib/bucket_cake/realtime.rb
177
+ - lib/bucket_cake/time_helper.rb
191
178
  - lib/bucket_cake/values.rb
192
179
  - lib/bucket_cake/version.rb
193
180
  homepage: https://github.com/ad2games/bucket_cake
@@ -1,68 +0,0 @@
1
- # frozen_string_literal: true
2
- module BucketCake
3
- class Source
4
- def initialize(folder, cursor)
5
- @folder = folder
6
- @cursor = cursor
7
- end
8
-
9
- def zip_files
10
- Enumerator.new do |y|
11
- objects.each do |object|
12
- next unless check_success(object.key)
13
- next unless object.key =~ %r{/part\d{3}\.zip\z}
14
-
15
- io = object.get.body
16
-
17
- # we need to force binary encoding, otherwise binary zip data will be parsed as utf-8
18
- io.set_encoding('BINARY')
19
-
20
- y << io.read
21
- end
22
- end
23
- end
24
-
25
- def latest_cursor
26
- @latest_cursor ||= bucket.object("#{folder}/latest").get.body.read
27
- end
28
-
29
- private
30
-
31
- attr_reader :folder, :cursor
32
-
33
- def check_success(key)
34
- @success_files ||= Set.new
35
- @success_files << key if key.end_with?('/SUCCESS')
36
-
37
- # This relies on the fact that SUCCESS files always come before partNNN.zip files.
38
- # But that is fine because S3 guarantees alphabetical order.
39
- @success_files.include?(File.dirname(key) + '/SUCCESS')
40
- end
41
-
42
- def bucket
43
- @bucket ||= Aws::S3::Bucket.new(ENV.fetch('CAKE_DATA_BUCKET'))
44
- end
45
- end
46
-
47
- class Source::Latest < Source
48
- private
49
-
50
- def objects
51
- bucket.objects(prefix: "#{folder}/#{latest_cursor}/")
52
- end
53
- end
54
-
55
- class Source::Range < Source
56
- private
57
-
58
- def objects
59
- # If we don't have a timestamp in cursor, return everything
60
- return bucket.objects(prefix: "#{folder}/") if cursor.nil?
61
-
62
- # This returns all objects after the current timestamp.
63
- # By using the 'xxx' suffix, none of the objects of the given timestamp are returned.
64
- # S3 ensures alphabetical order of the results, so this works fine for date ranges.
65
- bucket.objects(marker: "#{folder}/#{cursor}/xxx", prefix: "#{folder}/")
66
- end
67
- end
68
- end