km-db 0.2.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/.ruby-version +1 -0
- data/Gemfile +2 -4
- data/Gemfile.lock +179 -20
- data/Procfile +2 -0
- data/Procfile.work +1 -0
- data/README.md +186 -0
- data/Rakefile +1 -0
- data/bin/kmdb-flush +13 -0
- data/bin/kmdb-import +13 -0
- data/bin/kmdb-partition +15 -0
- data/bin/kmdb-pool +8 -0
- data/bin/kmdb-realias +12 -0
- data/bin/kmdb-ui +6 -0
- data/bin/kmdb-work +17 -0
- data/config/amazon-rds-ca-cert.pem +260 -0
- data/config.ru +8 -0
- data/km-db.gemspec +17 -17
- data/lib/kmdb/{belongs_to_user.rb → concerns/belongs_to_user.rb} +3 -3
- data/lib/kmdb/concerns/has_properties.rb +35 -0
- data/lib/kmdb/jobs/find_files.rb +32 -0
- data/lib/kmdb/jobs/list_files.rb +37 -0
- data/lib/kmdb/jobs/locked.rb +10 -0
- data/lib/kmdb/jobs/parse_file.rb +109 -0
- data/lib/kmdb/jobs/record_batch.rb +65 -0
- data/lib/kmdb/jobs/redo_unaliasing.rb +31 -0
- data/lib/kmdb/jobs/unalias_user.rb +32 -0
- data/lib/kmdb/migrations/01_kmdb_initial.rb +78 -0
- data/lib/kmdb/migrations/02_kmdb_partitions.rb +28 -0
- data/lib/kmdb/migrations/03_kmdb_blacklist.rb +20 -0
- data/lib/kmdb/models/alias.rb +36 -0
- data/lib/kmdb/models/blacklisted_property.rb +20 -0
- data/lib/kmdb/models/custom_record.rb +53 -0
- data/lib/kmdb/models/dumpfile.rb +33 -0
- data/lib/kmdb/models/event.rb +56 -0
- data/lib/kmdb/models/event_batch.rb +72 -0
- data/lib/kmdb/models/global_uid.rb +42 -0
- data/lib/kmdb/models/ignored_user.rb +20 -0
- data/lib/kmdb/models/json_file.rb +56 -0
- data/lib/kmdb/models/key.rb +28 -0
- data/lib/kmdb/models/property.rb +44 -0
- data/lib/kmdb/models/s3_object.rb +54 -0
- data/lib/kmdb/models/user.rb +53 -0
- data/lib/kmdb/models/whitelisted_event.rb +20 -0
- data/lib/kmdb/parser.rb +4 -4
- data/lib/kmdb/redis.rb +17 -0
- data/lib/kmdb/resque.rb +38 -0
- data/lib/kmdb/s3_bucket.rb +33 -0
- data/lib/kmdb/services/partitioner.rb +65 -0
- data/lib/kmdb/version.rb +1 -1
- data/lib/kmdb.rb +31 -6
- metadata +236 -186
- data/README.markdown +0 -91
- data/bin/km_db_import +0 -36
- data/lib/kmdb/custom_record.rb +0 -54
- data/lib/kmdb/dumpfile.rb +0 -23
- data/lib/kmdb/event.rb +0 -39
- data/lib/kmdb/has_properties.rb +0 -33
- data/lib/kmdb/key.rb +0 -56
- data/lib/kmdb/migration.rb +0 -63
- data/lib/kmdb/parallel_parser.rb +0 -85
- data/lib/kmdb/property.rb +0 -33
- data/lib/kmdb/user.rb +0 -83
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/models/user'
|
3
|
+
require 'kmdb/models/event'
|
4
|
+
require 'kmdb/models/property'
|
5
|
+
require 'kmdb/jobs/locked'
|
6
|
+
|
7
|
+
module KMDB
|
8
|
+
module Jobs
|
9
|
+
# Removes all references to a user alias
|
10
|
+
class UnaliasUser < Locked
|
11
|
+
@queue = :medium
|
12
|
+
|
13
|
+
def self.perform(name1, name2)
|
14
|
+
new(name1, name2).work
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(name1, name2)
|
18
|
+
@user = User.where(name: name1).first
|
19
|
+
@alias = User.where(name: name2).first
|
20
|
+
end
|
21
|
+
|
22
|
+
def work
|
23
|
+
return unless @user && @alias
|
24
|
+
[Property, Event].each do |model|
|
25
|
+
model.where(user_id: @alias.id).update_all(user_id: @user.id)
|
26
|
+
end
|
27
|
+
@alias.destroy
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Setup a custom database for KissMetrics tracking events.
|
4
|
+
|
5
|
+
=end
|
6
|
+
require 'active_record'
|
7
|
+
require 'kmdb'
|
8
|
+
|
9
|
+
class KmdbInitial < ActiveRecord::Migration
|
10
|
+
|
11
|
+
def up
|
12
|
+
execute "SET character_set_database = 'utf8'"
|
13
|
+
|
14
|
+
create_table :events do |t|
|
15
|
+
t.integer :user_id
|
16
|
+
t.integer :n
|
17
|
+
t.datetime :t
|
18
|
+
end
|
19
|
+
add_index :events, [:n], using: :hash
|
20
|
+
add_index :events, [:user_id], using: :hash
|
21
|
+
add_index :events, [:user_id, :n, :t]
|
22
|
+
|
23
|
+
|
24
|
+
create_table :keys do |t|
|
25
|
+
t.string :string
|
26
|
+
end
|
27
|
+
add_index :keys, [:string], using: :hash, unique: true
|
28
|
+
|
29
|
+
create_table :properties do |t|
|
30
|
+
t.integer :user_id
|
31
|
+
t.integer :event_id
|
32
|
+
t.integer :key
|
33
|
+
t.datetime :t
|
34
|
+
t.string :value
|
35
|
+
end
|
36
|
+
add_index :properties, [:key], using: :hash
|
37
|
+
add_index :properties, [:user_id], using: :hash
|
38
|
+
add_index :properties, [:event_id], using: :hash
|
39
|
+
add_index :properties, [:user_id, :key, :t]
|
40
|
+
|
41
|
+
create_table :users do |t|
|
42
|
+
t.string :name, limit: 48
|
43
|
+
end
|
44
|
+
add_index :users, [:name], using: :hash, unique: true
|
45
|
+
|
46
|
+
create_table :ignored_users do |t|
|
47
|
+
t.string :name, limit: 48
|
48
|
+
end
|
49
|
+
|
50
|
+
create_table :whitelisted_events do |t|
|
51
|
+
t.string :name
|
52
|
+
end
|
53
|
+
|
54
|
+
create_table :aliases do |t|
|
55
|
+
t.string :name1, limit: 48
|
56
|
+
t.string :name2, limit: 48
|
57
|
+
t.datetime :t
|
58
|
+
end
|
59
|
+
# add_index :aliases, [:name1, :name2], using: :hash, unique: true
|
60
|
+
add_index :aliases, [:name1], using: :hash
|
61
|
+
add_index :aliases, [:name2], using: :hash
|
62
|
+
|
63
|
+
create_table :dumpfiles do |t|
|
64
|
+
t.integer :revision
|
65
|
+
t.integer :length
|
66
|
+
t.integer :offset
|
67
|
+
t.timestamps
|
68
|
+
end
|
69
|
+
add_index :dumpfiles, [:revision]
|
70
|
+
end
|
71
|
+
|
72
|
+
def down
|
73
|
+
drop_table :events
|
74
|
+
drop_table :properties
|
75
|
+
drop_table :users
|
76
|
+
drop_table :aliases
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Setup events and properties for partitioning
|
4
|
+
|
5
|
+
=end
|
6
|
+
require 'active_record'
|
7
|
+
require 'kmdb'
|
8
|
+
|
9
|
+
class KmdbPartitions < ActiveRecord::Migration
|
10
|
+
|
11
|
+
def up
|
12
|
+
%w(events properties aliases).each do |t|
|
13
|
+
sql = %Q{
|
14
|
+
ALTER TABLE #{t} CHANGE id id BIGINT NOT NULL
|
15
|
+
ALTER TABLE #{t} DROP PRIMARY KEY
|
16
|
+
CREATE UNIQUE INDEX index_events_partition ON #{t} (t, id)
|
17
|
+
CREATE INDEX index_events_id ON #{t} (id)
|
18
|
+
ALTER TABLE #{t} CHANGE id id BIGINT NOT NULL AUTO_INCREMENT
|
19
|
+
ALTER TABLE #{t} PARTITION BY RANGE COLUMNS (t) (PARTITION pLast VALUES LESS THAN MAXVALUE)
|
20
|
+
}
|
21
|
+
sql.strip.split(/\n/).each { |stmt| execute stmt.strip }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def down
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Setup events and properties for partitioning
|
4
|
+
|
5
|
+
=end
|
6
|
+
require 'active_record'
|
7
|
+
require 'kmdb'
|
8
|
+
|
9
|
+
class KmdbBlacklist < ActiveRecord::Migration
|
10
|
+
|
11
|
+
def up
|
12
|
+
create_table :blacklisted_properties do |t|
|
13
|
+
t.string :name
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def down
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
|
3
|
+
module KMDB
|
4
|
+
# Aliasing events, ie. pairs of user names that are considered the same actual user.
|
5
|
+
class Alias < ActiveRecord::Base
|
6
|
+
self.primary_key = :id
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def record(name1, name2, stamp)
|
10
|
+
retries ||= 5
|
11
|
+
name1, name2 = _sorted(name2, name1)
|
12
|
+
where(name1: name1, name2: name2).first || create!(name1: name1, name2: name2, t: Time.at(stamp))
|
13
|
+
rescue ActiveRecord::RecordNotUnique
|
14
|
+
retry unless (retries -= 1).zero?
|
15
|
+
raise
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# always the "lowest" name first, with preference to numeric names
|
21
|
+
def _sorted(name1, name2)
|
22
|
+
if name1 =~ /^[0-9]+$/
|
23
|
+
[name1, name2]
|
24
|
+
elsif name2 =~ /^[0-9]+$/
|
25
|
+
[name2, name1]
|
26
|
+
elsif name2 < name1
|
27
|
+
[name2, name1]
|
28
|
+
else
|
29
|
+
[name1, name2]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
extend ClassMethods
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'active_record'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
class BlacklistedProperty < ActiveRecord::Base
|
7
|
+
module ClassMethods
|
8
|
+
def include?(name)
|
9
|
+
_data.include?(name)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def _data
|
15
|
+
@_data ||= Set.new(pluck(:name))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
extend ClassMethods
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Base class for KM data.
|
4
|
+
Connect to a secondary database to store events, users, & properties.
|
5
|
+
|
6
|
+
FIXME: the database connection is hard-coded for now.
|
7
|
+
|
8
|
+
=end
|
9
|
+
|
10
|
+
require 'active_record'
|
11
|
+
require 'erb'
|
12
|
+
require 'yaml'
|
13
|
+
|
14
|
+
|
15
|
+
module KMDB
|
16
|
+
module CustomRecord
|
17
|
+
def self.included(by)
|
18
|
+
by.extend ClassMethods
|
19
|
+
end
|
20
|
+
|
21
|
+
module ClassMethods
|
22
|
+
def disable_index
|
23
|
+
connection.execute %Q{
|
24
|
+
ALTER TABLE `#{table_name}` DISABLE KEYS;
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def enable_index
|
29
|
+
connection.execute %Q{
|
30
|
+
ALTER TABLE `#{table_name}` ENABLE KEYS;
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def find_or_create(options)
|
35
|
+
retries ||= 5
|
36
|
+
where(options).first || create!(options)
|
37
|
+
rescue ActiveRecord::RecordNotUnique, ActiveRecord::RecordInvalid
|
38
|
+
$stderr.write("could not create #{self.name} with #{options.inspect}, retrying (#{retries})}\n")
|
39
|
+
retry unless (retries -= 1).zero?
|
40
|
+
raise
|
41
|
+
end
|
42
|
+
|
43
|
+
def commit(tid)
|
44
|
+
where(tid: tid).update_all(tid: nil)
|
45
|
+
end
|
46
|
+
|
47
|
+
def clear_uncommitted
|
48
|
+
# TODO: this needs to be protected by a global lock
|
49
|
+
where('tid IS NOT NULL').delete_all
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
require 'kmdb/models/json_file'
|
3
|
+
|
4
|
+
module KMDB
|
5
|
+
# Remembers which JSON files where imported, and up to which point.
|
6
|
+
class Dumpfile < ActiveRecord::Base
|
7
|
+
include CustomRecord
|
8
|
+
|
9
|
+
validates_presence_of :offset
|
10
|
+
validates_presence_of :revision
|
11
|
+
|
12
|
+
def set(offset)
|
13
|
+
update_attributes!(offset: offset)
|
14
|
+
end
|
15
|
+
|
16
|
+
def offset
|
17
|
+
attributes['offset'] || 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def file
|
21
|
+
JsonFile.new(revision)
|
22
|
+
end
|
23
|
+
|
24
|
+
def complete?
|
25
|
+
return if offset.nil? || length.nil?
|
26
|
+
offset >= length
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.get(revision)
|
30
|
+
find_or_create(revision: revision)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
require 'kmdb/concerns/belongs_to_user'
|
3
|
+
require 'kmdb/concerns/has_properties'
|
4
|
+
require 'kmdb/models/global_uid'
|
5
|
+
require 'kmdb/user_error'
|
6
|
+
|
7
|
+
module KMDB
|
8
|
+
class Event < ActiveRecord::Base
|
9
|
+
self.primary_key = :id
|
10
|
+
|
11
|
+
include CustomRecord
|
12
|
+
include BelongsToUser
|
13
|
+
include HasProperties
|
14
|
+
|
15
|
+
scope :before, lambda { |date| where("`#{table_name}`.`t` < ?", date) }
|
16
|
+
scope :after, lambda { |date| where("`#{table_name}`.`t` > ?", date) }
|
17
|
+
|
18
|
+
scope :named, lambda { |name| where(n: KMDB::Key.get(name)) }
|
19
|
+
|
20
|
+
scope :by_date, lambda { order("`#{table_name}`.`t` ASC") }
|
21
|
+
|
22
|
+
# return value of property
|
23
|
+
def prop(name)
|
24
|
+
if p = properties.named(name).first
|
25
|
+
p.value
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def name
|
32
|
+
KMDB::Key.find(n).string
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.sql_for(hash)
|
36
|
+
user_name = hash.delete('_p')
|
37
|
+
user = User.find_or_create(name: user_name)
|
38
|
+
raise UserError.new "User missing for '#{user_name}'" unless user.present?
|
39
|
+
|
40
|
+
stamp = Time.at hash.delete('_t')
|
41
|
+
key = Key.get hash.delete('_n').scrub
|
42
|
+
|
43
|
+
event_id = GlobalUID.get(:event)
|
44
|
+
event_sql = sanitize_sql_array(["(?,?,?,?)", event_id, stamp, key, user.id])
|
45
|
+
properties_sql = Property.sql_for(hash, stamp: stamp, user: user, event_id: event_id)
|
46
|
+
|
47
|
+
yield event_sql, properties_sql
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.mass_create(values_strings)
|
51
|
+
return if values_strings.empty?
|
52
|
+
sql_insert = "INSERT INTO `#{table_name}` (`id`, `t`, `n`, `user_id`) VALUES\n"
|
53
|
+
connection.execute(sql_insert + values_strings.join(",\n"))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/redis'
|
3
|
+
require 'kmdb/models/global_uid'
|
4
|
+
require 'zlib'
|
5
|
+
require 'digest'
|
6
|
+
|
7
|
+
module KMDB
|
8
|
+
# Models a list of events, in chronological order,
|
9
|
+
# spanning entire seconds.
|
10
|
+
# Abstracts (compressed) storage in Redis.
|
11
|
+
# This effectively acts as a write cache.
|
12
|
+
class EventBatch
|
13
|
+
# provide either and Array (when creating a batch)
|
14
|
+
# or and encoded String (when loading)
|
15
|
+
def initialize(data, id:nil)
|
16
|
+
case data
|
17
|
+
when Array
|
18
|
+
@events = data
|
19
|
+
when String
|
20
|
+
@encoded = data
|
21
|
+
@id = id
|
22
|
+
else
|
23
|
+
raise ArgumentError
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def save!
|
28
|
+
_check_redis_space!
|
29
|
+
redis.set(id, _encoded)
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete
|
34
|
+
redis.del(id)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.find(id)
|
38
|
+
encoded = redis.get(id)
|
39
|
+
return if encoded.nil?
|
40
|
+
new(encoded, id: id)
|
41
|
+
end
|
42
|
+
|
43
|
+
def events
|
44
|
+
@events ||= Marshal.load(Zlib.inflate(@encoded))
|
45
|
+
end
|
46
|
+
|
47
|
+
def id
|
48
|
+
@id ||= GlobalUID.get('batches').to_s
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def _encoded
|
54
|
+
@encoded ||= Zlib.deflate(Marshal.dump(@events), 3)
|
55
|
+
end
|
56
|
+
|
57
|
+
# raise exception if space in Redis is getting low
|
58
|
+
def _check_redis_space!
|
59
|
+
# FIXME: not implemented
|
60
|
+
# raise RuntimeError.new('low Redis storage space')
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
module SharedMethods
|
65
|
+
def redis
|
66
|
+
@_redis ||= KMDB::Redis.namespaced('kmdb:batches')
|
67
|
+
end
|
68
|
+
end
|
69
|
+
include SharedMethods
|
70
|
+
extend SharedMethods
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/redis'
|
3
|
+
|
4
|
+
module KMDB
|
5
|
+
# Efficiently generate cross-process globally unique IDs
|
6
|
+
# pernamespace, using Redis.
|
7
|
+
# IDs start at 1 and increment monotonically; each client is handed Ids in
|
8
|
+
# batches of 100.
|
9
|
+
class GlobalUID
|
10
|
+
|
11
|
+
def self.get(ns = 'value')
|
12
|
+
@instances ||= {}
|
13
|
+
@instances[ns] ||= new(ns)
|
14
|
+
@instances[ns].get
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(ns)
|
18
|
+
@ns = ns
|
19
|
+
@major = nil
|
20
|
+
@minor = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def get
|
24
|
+
if @major.nil? || @minor > BATCH_SIZE
|
25
|
+
@major = _redis.incr(@ns) % (1 << 48)
|
26
|
+
@minor = 1
|
27
|
+
end
|
28
|
+
|
29
|
+
uid = (@major-1) * BATCH_SIZE + @minor
|
30
|
+
@minor += 1
|
31
|
+
return uid
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
BATCH_SIZE = 100
|
37
|
+
|
38
|
+
def _redis
|
39
|
+
@@_redis ||= Redis.namespaced('kmdb:globaluid:v2')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'active_record'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
class IgnoredUser < ActiveRecord::Base
|
7
|
+
module ClassMethods
|
8
|
+
def include?(name)
|
9
|
+
_data.include?(name)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def _data
|
15
|
+
@_data ||= Set.new(pluck(:name))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
extend ClassMethods
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/models/dumpfile'
|
3
|
+
require 'kmdb/models/s3_object'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
# Models one of KissMetrics's JSON dumps.
|
7
|
+
# Supports caching them from S3 and finding the latest one.
|
8
|
+
class JsonFile
|
9
|
+
attr_reader :revision
|
10
|
+
|
11
|
+
def initialize(revision)
|
12
|
+
@revision = revision
|
13
|
+
end
|
14
|
+
|
15
|
+
# Yields an IO object for this file
|
16
|
+
def open(&block)
|
17
|
+
_cached.open('r') do |io|
|
18
|
+
metadata.update_attributes!(length: io.size)
|
19
|
+
yield io
|
20
|
+
end
|
21
|
+
_flush_cache if _should_flush?
|
22
|
+
end
|
23
|
+
|
24
|
+
def exist?
|
25
|
+
_cached_path.exist? || _s3object.exist?
|
26
|
+
end
|
27
|
+
|
28
|
+
def metadata
|
29
|
+
@metadata ||= Dumpfile.get(revision)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def _should_flush?
|
35
|
+
!! (ENV.fetch('KMDB_KEEP_FILES', 'YES') !~ /YES/)
|
36
|
+
end
|
37
|
+
|
38
|
+
def _flush_cache
|
39
|
+
_cached_path.delete if _cached_path.exist?
|
40
|
+
end
|
41
|
+
|
42
|
+
def _cached
|
43
|
+
return _cached_path if _cached_path.exist?
|
44
|
+
_s3object.download(_cached_path)
|
45
|
+
_cached_path
|
46
|
+
end
|
47
|
+
|
48
|
+
def _cached_path
|
49
|
+
@_cached_path ||= Pathname.new("tmp/#{revision}.json")
|
50
|
+
end
|
51
|
+
|
52
|
+
def _s3object
|
53
|
+
@_s3object ||= S3Object.new("revisions/#{revision}.json")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
|
3
|
+
module KMDB
|
4
|
+
# Map strings (event and property names) to unique integers (Key#id) for performance
|
5
|
+
class Key < ActiveRecord::Base
|
6
|
+
include CustomRecord
|
7
|
+
|
8
|
+
MAX_SIZE = 255
|
9
|
+
|
10
|
+
has_many :events, foreign_key: :n, class_name: 'KMDB::Event', dependent: :delete_all
|
11
|
+
has_many :properties, foreign_key: :key, class_name: 'KMDB::Property', dependent: :delete_all
|
12
|
+
|
13
|
+
|
14
|
+
def self.get(string)
|
15
|
+
@cache ||= {}
|
16
|
+
@cache[string] ||= get_uncached(string)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Replace each duplicate key ID with its most-used variant
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def self.get_uncached(string)
|
24
|
+
string.size <= MAX_SIZE or raise "String is too long"
|
25
|
+
find_or_create(string: string).id
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
require 'kmdb/concerns/belongs_to_user'
|
3
|
+
require 'kmdb/models/blacklisted_property'
|
4
|
+
require 'kmdb/user_error'
|
5
|
+
|
6
|
+
module KMDB
|
7
|
+
class Property < ActiveRecord::Base
|
8
|
+
self.primary_key = :id
|
9
|
+
|
10
|
+
include CustomRecord
|
11
|
+
include BelongsToUser
|
12
|
+
|
13
|
+
belongs_to :event, class_name: 'KMDB::Event'
|
14
|
+
|
15
|
+
scope :named, lambda { |name| where(key: KMDB::Key.get(name)) }
|
16
|
+
|
17
|
+
def self.sql_for(hash, stamp: nil, user: nil, event_id: nil)
|
18
|
+
user_name = hash.delete('_p')
|
19
|
+
user ||= User.find_or_create(name: user_name)
|
20
|
+
raise UserError.new "User missing for '#{user_name}'" unless user.present?
|
21
|
+
|
22
|
+
stamp = Time.at hash.delete('_t') || stamp
|
23
|
+
return if hash.empty?
|
24
|
+
|
25
|
+
sql_values = []
|
26
|
+
|
27
|
+
hash.each_pair do |prop_name,value|
|
28
|
+
next if BlacklistedProperty.include?(prop_name)
|
29
|
+
key = Key.get(prop_name)
|
30
|
+
value = value[0...255].scrub
|
31
|
+
sql_values << sanitize_sql_array(['(?,?,?,?,?)', stamp,user.id,event_id,key,value])
|
32
|
+
end
|
33
|
+
|
34
|
+
return if sql_values.empty?
|
35
|
+
sql_values.join(",\n")
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.mass_create(values_strings)
|
39
|
+
return if values_strings.empty?
|
40
|
+
sql_insert = "INSERT INTO `#{table_name}` (`t`,`user_id`,`event_id`,`key`,`value`) VALUES\n"
|
41
|
+
connection.execute(sql_insert + values_strings.join(",\n"))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/s3_bucket'
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
class S3Object
|
7
|
+
def initialize(path)
|
8
|
+
@path = path
|
9
|
+
end
|
10
|
+
|
11
|
+
def exist?
|
12
|
+
!!_file
|
13
|
+
end
|
14
|
+
|
15
|
+
def download(target)
|
16
|
+
raise "JSON file for revision #{@revision} not found" unless exist?
|
17
|
+
_log "downloading"
|
18
|
+
system 'curl', '-o', _tempfile.path, '--silent', _file.url(_expiry)
|
19
|
+
raise "Download failed for #{@path}" unless $?.success?
|
20
|
+
target.parent.mkpath
|
21
|
+
File.rename(_tempfile.path, target.to_s)
|
22
|
+
true
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def _file
|
28
|
+
@_file ||= begin
|
29
|
+
_log "checking for existence"
|
30
|
+
S3Bucket.instance.files.head(@path)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def _expiry
|
35
|
+
Time.now.utc.to_i + 600
|
36
|
+
end
|
37
|
+
|
38
|
+
def _tempfile
|
39
|
+
@_tempfile ||= begin
|
40
|
+
Pathname.new('tmp').mkpath
|
41
|
+
Tempfile.new('kmdb', 'tmp')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def _tempdir
|
46
|
+
Pathname.new('tmp/').mk
|
47
|
+
end
|
48
|
+
|
49
|
+
def _log(message)
|
50
|
+
$stderr.write("s3 #{@path}: #{message}\n")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|