km-db 0.2.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +2 -4
  5. data/Gemfile.lock +179 -20
  6. data/Procfile +2 -0
  7. data/Procfile.work +1 -0
  8. data/README.md +186 -0
  9. data/Rakefile +1 -0
  10. data/bin/kmdb-flush +13 -0
  11. data/bin/kmdb-import +13 -0
  12. data/bin/kmdb-partition +15 -0
  13. data/bin/kmdb-pool +8 -0
  14. data/bin/kmdb-realias +12 -0
  15. data/bin/kmdb-ui +6 -0
  16. data/bin/kmdb-work +17 -0
  17. data/config/amazon-rds-ca-cert.pem +260 -0
  18. data/config.ru +8 -0
  19. data/km-db.gemspec +17 -17
  20. data/lib/kmdb/{belongs_to_user.rb → concerns/belongs_to_user.rb} +3 -3
  21. data/lib/kmdb/concerns/has_properties.rb +35 -0
  22. data/lib/kmdb/jobs/find_files.rb +32 -0
  23. data/lib/kmdb/jobs/list_files.rb +37 -0
  24. data/lib/kmdb/jobs/locked.rb +10 -0
  25. data/lib/kmdb/jobs/parse_file.rb +109 -0
  26. data/lib/kmdb/jobs/record_batch.rb +65 -0
  27. data/lib/kmdb/jobs/redo_unaliasing.rb +31 -0
  28. data/lib/kmdb/jobs/unalias_user.rb +32 -0
  29. data/lib/kmdb/migrations/01_kmdb_initial.rb +78 -0
  30. data/lib/kmdb/migrations/02_kmdb_partitions.rb +28 -0
  31. data/lib/kmdb/migrations/03_kmdb_blacklist.rb +20 -0
  32. data/lib/kmdb/models/alias.rb +36 -0
  33. data/lib/kmdb/models/blacklisted_property.rb +20 -0
  34. data/lib/kmdb/models/custom_record.rb +53 -0
  35. data/lib/kmdb/models/dumpfile.rb +33 -0
  36. data/lib/kmdb/models/event.rb +56 -0
  37. data/lib/kmdb/models/event_batch.rb +72 -0
  38. data/lib/kmdb/models/global_uid.rb +42 -0
  39. data/lib/kmdb/models/ignored_user.rb +20 -0
  40. data/lib/kmdb/models/json_file.rb +56 -0
  41. data/lib/kmdb/models/key.rb +28 -0
  42. data/lib/kmdb/models/property.rb +44 -0
  43. data/lib/kmdb/models/s3_object.rb +54 -0
  44. data/lib/kmdb/models/user.rb +53 -0
  45. data/lib/kmdb/models/whitelisted_event.rb +20 -0
  46. data/lib/kmdb/parser.rb +4 -4
  47. data/lib/kmdb/redis.rb +17 -0
  48. data/lib/kmdb/resque.rb +38 -0
  49. data/lib/kmdb/s3_bucket.rb +33 -0
  50. data/lib/kmdb/services/partitioner.rb +65 -0
  51. data/lib/kmdb/version.rb +1 -1
  52. data/lib/kmdb.rb +31 -6
  53. metadata +236 -186
  54. data/README.markdown +0 -91
  55. data/bin/km_db_import +0 -36
  56. data/lib/kmdb/custom_record.rb +0 -54
  57. data/lib/kmdb/dumpfile.rb +0 -23
  58. data/lib/kmdb/event.rb +0 -39
  59. data/lib/kmdb/has_properties.rb +0 -33
  60. data/lib/kmdb/key.rb +0 -56
  61. data/lib/kmdb/migration.rb +0 -63
  62. data/lib/kmdb/parallel_parser.rb +0 -85
  63. data/lib/kmdb/property.rb +0 -33
  64. data/lib/kmdb/user.rb +0 -83
@@ -0,0 +1,53 @@
1
+ require 'kmdb/models/custom_record'
2
+ require 'kmdb/concerns/has_properties'
3
+ require 'kmdb/redis'
4
+
5
+ module KMDB
6
+ class User < ActiveRecord::Base
7
+ include CustomRecord
8
+ include HasProperties
9
+
10
+ has_many :events, class_name: 'KMDB::Event', inverse_of: :user
11
+ # points to the aliased user. if set, no properties/events should belong to this user
12
+
13
+ validates_presence_of :name
14
+
15
+ # it's bad practice, but this is (much) more efficiently done
16
+ # by the SQL engine:
17
+ # validates_uniqueness_of :name
18
+
19
+ scope :named, lambda { |name| where(name: name) }
20
+
21
+ # return (latest) value of property
22
+ def prop(name)
23
+ properties.named(name).first.andand.value
24
+ end
25
+
26
+ CACHE_EXPIRY = 3_600
27
+
28
+ def self.find_or_create(name:)
29
+ if raw = _redis.get(name)
30
+ Marshal.load(raw)
31
+ else
32
+ super.tap do |user|
33
+ _redis.set(name, Marshal.dump(user), ex: CACHE_EXPIRY)
34
+ end
35
+ end
36
+ end
37
+
38
+ def destroy
39
+ super
40
+ _redis.del(name)
41
+ end
42
+
43
+ private
44
+
45
+ def _redis(*args)
46
+ self.class._redis(*args)
47
+ end
48
+
49
+ def self._redis
50
+ @_redis ||= Redis.namespaced('kmdb:users')
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,20 @@
1
+ require 'kmdb'
2
+ require 'active_record'
3
+ require 'set'
4
+
5
+ module KMDB
6
+ class WhitelistedEvent < ActiveRecord::Base
7
+ module ClassMethods
8
+ def include?(name)
9
+ _data.include?(name)
10
+ end
11
+
12
+ private
13
+
14
+ def _data
15
+ @_data ||= Set.new(pluck(:name))
16
+ end
17
+ end
18
+ extend ClassMethods
19
+ end
20
+ end
data/lib/kmdb/parser.rb CHANGED
@@ -1,7 +1,7 @@
1
- require 'yajl/json_gem'
2
1
  require 'pathname'
3
2
  require 'progressbar'
4
- require 'pstore'
3
+ require 'oj'
4
+ require 'kmdb/models/dumpfile'
5
5
 
6
6
  module KMDB
7
7
  class Parser
@@ -82,8 +82,8 @@ module KMDB
82
82
  end
83
83
 
84
84
  begin
85
- data = JSON.parse(text)
86
- rescue JSON::ParserError => e
85
+ data = Oj.load(text)
86
+ rescue Oj::ParseError => e
87
87
  log "Warning, JSON parse error in: #{text}"
88
88
  raise e if @abort_on_error
89
89
  return
data/lib/kmdb/redis.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'redis'
2
+ require 'redis-namespace'
3
+
4
+ module KMDB
5
+ module Redis
6
+ module ModuleMethods
7
+ def connection
8
+ @@_connection ||= ::Redis.new(url: ENV.fetch('KMDB_REDIS_URL', 'localhost'))
9
+ end
10
+
11
+ def namespaced(ns)
12
+ ::Redis::Namespace.new(ns, redis: connection)
13
+ end
14
+ end
15
+ extend ModuleMethods
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ require 'resque'
2
+ require 'kmdb/redis'
3
+
4
+ module KMDB
5
+ module Resque
6
+ module ModuleMethods
7
+ def enqueue(*args)
8
+ _configure
9
+ ::Resque.enqueue(*args)
10
+ end
11
+
12
+ def work
13
+ _configure
14
+ ::Resque::Worker.new(:high, :medium, :low).tap do |w|
15
+ w.term_timeout = 8
16
+ w.term_child = true
17
+ w.log "starting worker"
18
+ w.work(5) # interval
19
+ end
20
+ end
21
+
22
+ def configure
23
+ _configure
24
+ end
25
+
26
+ private
27
+
28
+ def _configure
29
+ return if @configured
30
+ ::Resque.redis = Redis.connection
31
+ ::Resque.redis.namespace = ENV.fetch('KMDB_REDIS_NS', 'kmdb:resque')
32
+ ::Resque.logger.level = Logger::DEBUG
33
+ @configured = true
34
+ end
35
+ end
36
+ extend ModuleMethods
37
+ end
38
+ end
@@ -0,0 +1,33 @@
1
+ require 'kmdb'
2
+ require 'fog'
3
+ require 'singleton'
4
+
5
+ module KMDB
6
+ class S3Bucket
7
+ include Singleton
8
+
9
+ def method_missing(method, *args, &block)
10
+ _directory.send(method, *args, &block)
11
+ end
12
+
13
+ def respond_to?(method, all = false)
14
+ _directory.respond_to(method, all)
15
+ end
16
+
17
+ private
18
+
19
+ def _directory
20
+ @_directory ||= _connection.directories.get(ENV.fetch('AWS_BUCKET'))
21
+ end
22
+
23
+ def _connection
24
+ @_connection ||= Fog::Storage.new(
25
+ provider: 'AWS',
26
+ aws_access_key_id: ENV.fetch('AWS_ACCESS_KEY_ID'),
27
+ aws_secret_access_key: ENV.fetch('AWS_SECRET_ACCESS_KEY')
28
+ )
29
+ end
30
+ end
31
+ end
32
+
33
+
@@ -0,0 +1,65 @@
1
+ require 'kmdb'
2
+
3
+ module KMDB
4
+ module Services
5
+ class Partitioner
6
+ def initialize(model:, min_date: nil, max_date: nil, days_per_partition: nil)
7
+ @model = model
8
+ @min_date = min_date || Date.parse(ENV.fetch('KMDB_MIN_DATE'))
9
+ @max_date = max_date || Date.parse(ENV.fetch('KMDB_MAX_DATE'))
10
+ @days_per_partition = days_per_partition || Integer(ENV.fetch('KMDB_DAYS_PER_PARTITION'))
11
+ end
12
+
13
+ def run
14
+ while true
15
+ last_limit = _get_last_limit || @min_date
16
+ break if last_limit > @max_date
17
+ next_limit = last_limit + @days_per_partition
18
+ _add_partition(next_limit)
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def _get_last_limit
25
+ limit = _conn.select_value(%Q{
26
+ SELECT `partition_description` FROM information_schema.partitions
27
+ WHERE `table_schema` = '#{_database_name}'
28
+ AND `table_name` = '#{_table}'
29
+ AND `partition_description` <> 'MAXVALUE'
30
+ ORDER BY `partition_description` DESC LIMIT 1
31
+ })
32
+ limit ? Date.parse(limit) : nil
33
+ end
34
+
35
+ def _add_partition(date)
36
+ part_limit = date.strftime("'%F'")
37
+ part_name = date.strftime('p%Y%m%d')
38
+ _log "adding partition up to #{part_limit} to #{_table}"
39
+ _conn.execute %Q{
40
+ ALTER TABLE #{_table} REORGANIZE PARTITION pLast INTO (
41
+ PARTITION #{part_name} VALUES LESS THAN (#{part_limit}),
42
+ PARTITION pLast VALUES LESS THAN MAXVALUE
43
+ )
44
+ }
45
+ end
46
+
47
+ def _table
48
+ @_table ||= @model.table_name
49
+ end
50
+
51
+ def _database_name
52
+ @_database_name ||= _conn.current_database
53
+ end
54
+
55
+ def _conn
56
+ @_conn ||= @model.connection
57
+ end
58
+
59
+ def _log(message)
60
+ $stderr.write("#{message}\n")
61
+ end
62
+
63
+ end
64
+ end
65
+ end
data/lib/kmdb/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module KMDB
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.2"
3
3
  end
4
4
 
data/lib/kmdb.rb CHANGED
@@ -1,10 +1,35 @@
1
+ require 'active_record'
2
+
1
3
  module KMDB
2
- MaxStringSize = 48
4
+ MIGRATIONS_DIR = Pathname(__FILE__).parent.join('kmdb/migrations').cleanpath.to_s
3
5
 
4
- %w(key user property event user_error dumpfile parser parallel_parser).each do |mod|
5
- require "kmdb/#{mod}"
6
- end
6
+ module ModuleMethods
7
+ def env
8
+ ENV['RACK_ENV'] || ENV['KMDB_ENV'] || 'development'
9
+ end
10
+
11
+ def connect
12
+ url = ENV.fetch('DATABASE_URL')
13
+ puts url
14
+ ActiveRecord::Base.establish_connection(url)
7
15
 
8
- # Connect to an alternate database when the module is loaded
9
- CustomRecord.connect_to_km_db!
16
+ if ENV.fetch('KMDB_AR_LOG', 'NO') == 'YES'
17
+ ActiveRecord::Base.logger = ActiveSupport::Logger.new(STDOUT)
18
+ end
19
+ self
20
+ end
21
+
22
+ def migrate
23
+ ActiveRecord::Migration.verbose = true
24
+ ActiveRecord::Migrator.migrate MIGRATIONS_DIR
25
+ self
26
+ end
27
+
28
+ def transaction(&block)
29
+ ActiveRecord::Base.transaction do
30
+ yield ActiveRecord::Base.connection
31
+ end
32
+ end
33
+ end
34
+ extend ModuleMethods
10
35
  end