km-db 0.2.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +2 -4
  5. data/Gemfile.lock +179 -20
  6. data/Procfile +2 -0
  7. data/Procfile.work +1 -0
  8. data/README.md +186 -0
  9. data/Rakefile +1 -0
  10. data/bin/kmdb-flush +13 -0
  11. data/bin/kmdb-import +13 -0
  12. data/bin/kmdb-partition +15 -0
  13. data/bin/kmdb-pool +8 -0
  14. data/bin/kmdb-realias +12 -0
  15. data/bin/kmdb-ui +6 -0
  16. data/bin/kmdb-work +17 -0
  17. data/config/amazon-rds-ca-cert.pem +260 -0
  18. data/config.ru +8 -0
  19. data/km-db.gemspec +17 -17
  20. data/lib/kmdb/{belongs_to_user.rb → concerns/belongs_to_user.rb} +3 -3
  21. data/lib/kmdb/concerns/has_properties.rb +35 -0
  22. data/lib/kmdb/jobs/find_files.rb +32 -0
  23. data/lib/kmdb/jobs/list_files.rb +37 -0
  24. data/lib/kmdb/jobs/locked.rb +10 -0
  25. data/lib/kmdb/jobs/parse_file.rb +109 -0
  26. data/lib/kmdb/jobs/record_batch.rb +65 -0
  27. data/lib/kmdb/jobs/redo_unaliasing.rb +31 -0
  28. data/lib/kmdb/jobs/unalias_user.rb +32 -0
  29. data/lib/kmdb/migrations/01_kmdb_initial.rb +78 -0
  30. data/lib/kmdb/migrations/02_kmdb_partitions.rb +28 -0
  31. data/lib/kmdb/migrations/03_kmdb_blacklist.rb +20 -0
  32. data/lib/kmdb/models/alias.rb +36 -0
  33. data/lib/kmdb/models/blacklisted_property.rb +20 -0
  34. data/lib/kmdb/models/custom_record.rb +53 -0
  35. data/lib/kmdb/models/dumpfile.rb +33 -0
  36. data/lib/kmdb/models/event.rb +56 -0
  37. data/lib/kmdb/models/event_batch.rb +72 -0
  38. data/lib/kmdb/models/global_uid.rb +42 -0
  39. data/lib/kmdb/models/ignored_user.rb +20 -0
  40. data/lib/kmdb/models/json_file.rb +56 -0
  41. data/lib/kmdb/models/key.rb +28 -0
  42. data/lib/kmdb/models/property.rb +44 -0
  43. data/lib/kmdb/models/s3_object.rb +54 -0
  44. data/lib/kmdb/models/user.rb +53 -0
  45. data/lib/kmdb/models/whitelisted_event.rb +20 -0
  46. data/lib/kmdb/parser.rb +4 -4
  47. data/lib/kmdb/redis.rb +17 -0
  48. data/lib/kmdb/resque.rb +38 -0
  49. data/lib/kmdb/s3_bucket.rb +33 -0
  50. data/lib/kmdb/services/partitioner.rb +65 -0
  51. data/lib/kmdb/version.rb +1 -1
  52. data/lib/kmdb.rb +31 -6
  53. metadata +236 -186
  54. data/README.markdown +0 -91
  55. data/bin/km_db_import +0 -36
  56. data/lib/kmdb/custom_record.rb +0 -54
  57. data/lib/kmdb/dumpfile.rb +0 -23
  58. data/lib/kmdb/event.rb +0 -39
  59. data/lib/kmdb/has_properties.rb +0 -33
  60. data/lib/kmdb/key.rb +0 -56
  61. data/lib/kmdb/migration.rb +0 -63
  62. data/lib/kmdb/parallel_parser.rb +0 -85
  63. data/lib/kmdb/property.rb +0 -33
  64. data/lib/kmdb/user.rb +0 -83
@@ -0,0 +1,53 @@
1
+ require 'kmdb/models/custom_record'
2
+ require 'kmdb/concerns/has_properties'
3
+ require 'kmdb/redis'
4
+
5
+ module KMDB
6
+ class User < ActiveRecord::Base
7
+ include CustomRecord
8
+ include HasProperties
9
+
10
+ has_many :events, class_name: 'KMDB::Event', inverse_of: :user
11
+ # points to the aliased user. if set, no properties/events should belong to this user
12
+
13
+ validates_presence_of :name
14
+
15
+ # it's bad practice, but this is (much) more efficiently done
16
+ # by the SQL engine:
17
+ # validates_uniqueness_of :name
18
+
19
+ scope :named, lambda { |name| where(name: name) }
20
+
21
+ # return (latest) value of property
22
+ def prop(name)
23
+ properties.named(name).first.andand.value
24
+ end
25
+
26
+ CACHE_EXPIRY = 3_600
27
+
28
+ def self.find_or_create(name:)
29
+ if raw = _redis.get(name)
30
+ Marshal.load(raw)
31
+ else
32
+ super.tap do |user|
33
+ _redis.set(name, Marshal.dump(user), ex: CACHE_EXPIRY)
34
+ end
35
+ end
36
+ end
37
+
38
+ def destroy
39
+ super
40
+ _redis.del(name)
41
+ end
42
+
43
+ private
44
+
45
+ def _redis(*args)
46
+ self.class._redis(*args)
47
+ end
48
+
49
+ def self._redis
50
+ @_redis ||= Redis.namespaced('kmdb:users')
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,20 @@
1
+ require 'kmdb'
2
+ require 'active_record'
3
+ require 'set'
4
+
5
+ module KMDB
6
+ class WhitelistedEvent < ActiveRecord::Base
7
+ module ClassMethods
8
+ def include?(name)
9
+ _data.include?(name)
10
+ end
11
+
12
+ private
13
+
14
+ def _data
15
+ @_data ||= Set.new(pluck(:name))
16
+ end
17
+ end
18
+ extend ClassMethods
19
+ end
20
+ end
data/lib/kmdb/parser.rb CHANGED
@@ -1,7 +1,7 @@
1
- require 'yajl/json_gem'
2
1
  require 'pathname'
3
2
  require 'progressbar'
4
- require 'pstore'
3
+ require 'oj'
4
+ require 'kmdb/models/dumpfile'
5
5
 
6
6
  module KMDB
7
7
  class Parser
@@ -82,8 +82,8 @@ module KMDB
82
82
  end
83
83
 
84
84
  begin
85
- data = JSON.parse(text)
86
- rescue JSON::ParserError => e
85
+ data = Oj.load(text)
86
+ rescue Oj::ParseError => e
87
87
  log "Warning, JSON parse error in: #{text}"
88
88
  raise e if @abort_on_error
89
89
  return
data/lib/kmdb/redis.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'redis'
2
+ require 'redis-namespace'
3
+
4
+ module KMDB
5
+ module Redis
6
+ module ModuleMethods
7
+ def connection
8
+ @@_connection ||= ::Redis.new(url: ENV.fetch('KMDB_REDIS_URL', 'localhost'))
9
+ end
10
+
11
+ def namespaced(ns)
12
+ ::Redis::Namespace.new(ns, redis: connection)
13
+ end
14
+ end
15
+ extend ModuleMethods
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ require 'resque'
2
+ require 'kmdb/redis'
3
+
4
+ module KMDB
5
+ module Resque
6
+ module ModuleMethods
7
+ def enqueue(*args)
8
+ _configure
9
+ ::Resque.enqueue(*args)
10
+ end
11
+
12
+ def work
13
+ _configure
14
+ ::Resque::Worker.new(:high, :medium, :low).tap do |w|
15
+ w.term_timeout = 8
16
+ w.term_child = true
17
+ w.log "starting worker"
18
+ w.work(5) # interval
19
+ end
20
+ end
21
+
22
+ def configure
23
+ _configure
24
+ end
25
+
26
+ private
27
+
28
+ def _configure
29
+ return if @configured
30
+ ::Resque.redis = Redis.connection
31
+ ::Resque.redis.namespace = ENV.fetch('KMDB_REDIS_NS', 'kmdb:resque')
32
+ ::Resque.logger.level = Logger::DEBUG
33
+ @configured = true
34
+ end
35
+ end
36
+ extend ModuleMethods
37
+ end
38
+ end
@@ -0,0 +1,33 @@
1
+ require 'kmdb'
2
+ require 'fog'
3
+ require 'singleton'
4
+
5
+ module KMDB
6
+ class S3Bucket
7
+ include Singleton
8
+
9
+ def method_missing(method, *args, &block)
10
+ _directory.send(method, *args, &block)
11
+ end
12
+
13
+ def respond_to?(method, all = false)
14
+ _directory.respond_to(method, all)
15
+ end
16
+
17
+ private
18
+
19
+ def _directory
20
+ @_directory ||= _connection.directories.get(ENV.fetch('AWS_BUCKET'))
21
+ end
22
+
23
+ def _connection
24
+ @_connection ||= Fog::Storage.new(
25
+ provider: 'AWS',
26
+ aws_access_key_id: ENV.fetch('AWS_ACCESS_KEY_ID'),
27
+ aws_secret_access_key: ENV.fetch('AWS_SECRET_ACCESS_KEY')
28
+ )
29
+ end
30
+ end
31
+ end
32
+
33
+
@@ -0,0 +1,65 @@
1
+ require 'kmdb'
2
+
3
+ module KMDB
4
+ module Services
5
+ class Partitioner
6
+ def initialize(model:, min_date: nil, max_date: nil, days_per_partition: nil)
7
+ @model = model
8
+ @min_date = min_date || Date.parse(ENV.fetch('KMDB_MIN_DATE'))
9
+ @max_date = max_date || Date.parse(ENV.fetch('KMDB_MAX_DATE'))
10
+ @days_per_partition = days_per_partition || Integer(ENV.fetch('KMDB_DAYS_PER_PARTITION'))
11
+ end
12
+
13
+ def run
14
+ while true
15
+ last_limit = _get_last_limit || @min_date
16
+ break if last_limit > @max_date
17
+ next_limit = last_limit + @days_per_partition
18
+ _add_partition(next_limit)
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def _get_last_limit
25
+ limit = _conn.select_value(%Q{
26
+ SELECT `partition_description` FROM information_schema.partitions
27
+ WHERE `table_schema` = '#{_database_name}'
28
+ AND `table_name` = '#{_table}'
29
+ AND `partition_description` <> 'MAXVALUE'
30
+ ORDER BY `partition_description` DESC LIMIT 1
31
+ })
32
+ limit ? Date.parse(limit) : nil
33
+ end
34
+
35
+ def _add_partition(date)
36
+ part_limit = date.strftime("'%F'")
37
+ part_name = date.strftime('p%Y%m%d')
38
+ _log "adding partition up to #{part_limit} to #{_table}"
39
+ _conn.execute %Q{
40
+ ALTER TABLE #{_table} REORGANIZE PARTITION pLast INTO (
41
+ PARTITION #{part_name} VALUES LESS THAN (#{part_limit}),
42
+ PARTITION pLast VALUES LESS THAN MAXVALUE
43
+ )
44
+ }
45
+ end
46
+
47
+ def _table
48
+ @_table ||= @model.table_name
49
+ end
50
+
51
+ def _database_name
52
+ @_database_name ||= _conn.current_database
53
+ end
54
+
55
+ def _conn
56
+ @_conn ||= @model.connection
57
+ end
58
+
59
+ def _log(message)
60
+ $stderr.write("#{message}\n")
61
+ end
62
+
63
+ end
64
+ end
65
+ end
data/lib/kmdb/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module KMDB
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.2"
3
3
  end
4
4
 
data/lib/kmdb.rb CHANGED
@@ -1,10 +1,35 @@
1
+ require 'active_record'
2
+
1
3
  module KMDB
2
- MaxStringSize = 48
4
+ MIGRATIONS_DIR = Pathname(__FILE__).parent.join('kmdb/migrations').cleanpath.to_s
3
5
 
4
- %w(key user property event user_error dumpfile parser parallel_parser).each do |mod|
5
- require "kmdb/#{mod}"
6
- end
6
+ module ModuleMethods
7
+ def env
8
+ ENV['RACK_ENV'] || ENV['KMDB_ENV'] || 'development'
9
+ end
10
+
11
+ def connect
12
+ url = ENV.fetch('DATABASE_URL')
13
+ puts url
14
+ ActiveRecord::Base.establish_connection(url)
7
15
 
8
- # Connect to an alternate database when the module is loaded
9
- CustomRecord.connect_to_km_db!
16
+ if ENV.fetch('KMDB_AR_LOG', 'NO') == 'YES'
17
+ ActiveRecord::Base.logger = ActiveSupport::Logger.new(STDOUT)
18
+ end
19
+ self
20
+ end
21
+
22
+ def migrate
23
+ ActiveRecord::Migration.verbose = true
24
+ ActiveRecord::Migrator.migrate MIGRATIONS_DIR
25
+ self
26
+ end
27
+
28
+ def transaction(&block)
29
+ ActiveRecord::Base.transaction do
30
+ yield ActiveRecord::Base.connection
31
+ end
32
+ end
33
+ end
34
+ extend ModuleMethods
10
35
  end