km-db 0.2.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/.ruby-version +1 -0
- data/Gemfile +2 -4
- data/Gemfile.lock +179 -20
- data/Procfile +2 -0
- data/Procfile.work +1 -0
- data/README.md +186 -0
- data/Rakefile +1 -0
- data/bin/kmdb-flush +13 -0
- data/bin/kmdb-import +13 -0
- data/bin/kmdb-partition +15 -0
- data/bin/kmdb-pool +8 -0
- data/bin/kmdb-realias +12 -0
- data/bin/kmdb-ui +6 -0
- data/bin/kmdb-work +17 -0
- data/config/amazon-rds-ca-cert.pem +260 -0
- data/config.ru +8 -0
- data/km-db.gemspec +17 -17
- data/lib/kmdb/{belongs_to_user.rb → concerns/belongs_to_user.rb} +3 -3
- data/lib/kmdb/concerns/has_properties.rb +35 -0
- data/lib/kmdb/jobs/find_files.rb +32 -0
- data/lib/kmdb/jobs/list_files.rb +37 -0
- data/lib/kmdb/jobs/locked.rb +10 -0
- data/lib/kmdb/jobs/parse_file.rb +109 -0
- data/lib/kmdb/jobs/record_batch.rb +65 -0
- data/lib/kmdb/jobs/redo_unaliasing.rb +31 -0
- data/lib/kmdb/jobs/unalias_user.rb +32 -0
- data/lib/kmdb/migrations/01_kmdb_initial.rb +78 -0
- data/lib/kmdb/migrations/02_kmdb_partitions.rb +28 -0
- data/lib/kmdb/migrations/03_kmdb_blacklist.rb +20 -0
- data/lib/kmdb/models/alias.rb +36 -0
- data/lib/kmdb/models/blacklisted_property.rb +20 -0
- data/lib/kmdb/models/custom_record.rb +53 -0
- data/lib/kmdb/models/dumpfile.rb +33 -0
- data/lib/kmdb/models/event.rb +56 -0
- data/lib/kmdb/models/event_batch.rb +72 -0
- data/lib/kmdb/models/global_uid.rb +42 -0
- data/lib/kmdb/models/ignored_user.rb +20 -0
- data/lib/kmdb/models/json_file.rb +56 -0
- data/lib/kmdb/models/key.rb +28 -0
- data/lib/kmdb/models/property.rb +44 -0
- data/lib/kmdb/models/s3_object.rb +54 -0
- data/lib/kmdb/models/user.rb +53 -0
- data/lib/kmdb/models/whitelisted_event.rb +20 -0
- data/lib/kmdb/parser.rb +4 -4
- data/lib/kmdb/redis.rb +17 -0
- data/lib/kmdb/resque.rb +38 -0
- data/lib/kmdb/s3_bucket.rb +33 -0
- data/lib/kmdb/services/partitioner.rb +65 -0
- data/lib/kmdb/version.rb +1 -1
- data/lib/kmdb.rb +31 -6
- metadata +236 -186
- data/README.markdown +0 -91
- data/bin/km_db_import +0 -36
- data/lib/kmdb/custom_record.rb +0 -54
- data/lib/kmdb/dumpfile.rb +0 -23
- data/lib/kmdb/event.rb +0 -39
- data/lib/kmdb/has_properties.rb +0 -33
- data/lib/kmdb/key.rb +0 -56
- data/lib/kmdb/migration.rb +0 -63
- data/lib/kmdb/parallel_parser.rb +0 -85
- data/lib/kmdb/property.rb +0 -33
- data/lib/kmdb/user.rb +0 -83
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/models/user'
|
3
|
+
require 'kmdb/models/event'
|
4
|
+
require 'kmdb/models/property'
|
5
|
+
require 'kmdb/jobs/locked'
|
6
|
+
|
7
|
+
module KMDB
|
8
|
+
module Jobs
|
9
|
+
# Removes all references to a user alias
|
10
|
+
class UnaliasUser < Locked
|
11
|
+
@queue = :medium
|
12
|
+
|
13
|
+
def self.perform(name1, name2)
|
14
|
+
new(name1, name2).work
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(name1, name2)
|
18
|
+
@user = User.where(name: name1).first
|
19
|
+
@alias = User.where(name: name2).first
|
20
|
+
end
|
21
|
+
|
22
|
+
def work
|
23
|
+
return unless @user && @alias
|
24
|
+
[Property, Event].each do |model|
|
25
|
+
model.where(user_id: @alias.id).update_all(user_id: @user.id)
|
26
|
+
end
|
27
|
+
@alias.destroy
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Setup a custom database for KissMetrics tracking events.
|
4
|
+
|
5
|
+
=end
|
6
|
+
require 'active_record'
|
7
|
+
require 'kmdb'
|
8
|
+
|
9
|
+
class KmdbInitial < ActiveRecord::Migration
|
10
|
+
|
11
|
+
def up
|
12
|
+
execute "SET character_set_database = 'utf8'"
|
13
|
+
|
14
|
+
create_table :events do |t|
|
15
|
+
t.integer :user_id
|
16
|
+
t.integer :n
|
17
|
+
t.datetime :t
|
18
|
+
end
|
19
|
+
add_index :events, [:n], using: :hash
|
20
|
+
add_index :events, [:user_id], using: :hash
|
21
|
+
add_index :events, [:user_id, :n, :t]
|
22
|
+
|
23
|
+
|
24
|
+
create_table :keys do |t|
|
25
|
+
t.string :string
|
26
|
+
end
|
27
|
+
add_index :keys, [:string], using: :hash, unique: true
|
28
|
+
|
29
|
+
create_table :properties do |t|
|
30
|
+
t.integer :user_id
|
31
|
+
t.integer :event_id
|
32
|
+
t.integer :key
|
33
|
+
t.datetime :t
|
34
|
+
t.string :value
|
35
|
+
end
|
36
|
+
add_index :properties, [:key], using: :hash
|
37
|
+
add_index :properties, [:user_id], using: :hash
|
38
|
+
add_index :properties, [:event_id], using: :hash
|
39
|
+
add_index :properties, [:user_id, :key, :t]
|
40
|
+
|
41
|
+
create_table :users do |t|
|
42
|
+
t.string :name, limit: 48
|
43
|
+
end
|
44
|
+
add_index :users, [:name], using: :hash, unique: true
|
45
|
+
|
46
|
+
create_table :ignored_users do |t|
|
47
|
+
t.string :name, limit: 48
|
48
|
+
end
|
49
|
+
|
50
|
+
create_table :whitelisted_events do |t|
|
51
|
+
t.string :name
|
52
|
+
end
|
53
|
+
|
54
|
+
create_table :aliases do |t|
|
55
|
+
t.string :name1, limit: 48
|
56
|
+
t.string :name2, limit: 48
|
57
|
+
t.datetime :t
|
58
|
+
end
|
59
|
+
# add_index :aliases, [:name1, :name2], using: :hash, unique: true
|
60
|
+
add_index :aliases, [:name1], using: :hash
|
61
|
+
add_index :aliases, [:name2], using: :hash
|
62
|
+
|
63
|
+
create_table :dumpfiles do |t|
|
64
|
+
t.integer :revision
|
65
|
+
t.integer :length
|
66
|
+
t.integer :offset
|
67
|
+
t.timestamps
|
68
|
+
end
|
69
|
+
add_index :dumpfiles, [:revision]
|
70
|
+
end
|
71
|
+
|
72
|
+
def down
|
73
|
+
drop_table :events
|
74
|
+
drop_table :properties
|
75
|
+
drop_table :users
|
76
|
+
drop_table :aliases
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Setup events and properties for partitioning
|
4
|
+
|
5
|
+
=end
|
6
|
+
require 'active_record'
|
7
|
+
require 'kmdb'
|
8
|
+
|
9
|
+
class KmdbPartitions < ActiveRecord::Migration
|
10
|
+
|
11
|
+
def up
|
12
|
+
%w(events properties aliases).each do |t|
|
13
|
+
sql = %Q{
|
14
|
+
ALTER TABLE #{t} CHANGE id id BIGINT NOT NULL
|
15
|
+
ALTER TABLE #{t} DROP PRIMARY KEY
|
16
|
+
CREATE UNIQUE INDEX index_events_partition ON #{t} (t, id)
|
17
|
+
CREATE INDEX index_events_id ON #{t} (id)
|
18
|
+
ALTER TABLE #{t} CHANGE id id BIGINT NOT NULL AUTO_INCREMENT
|
19
|
+
ALTER TABLE #{t} PARTITION BY RANGE COLUMNS (t) (PARTITION pLast VALUES LESS THAN MAXVALUE)
|
20
|
+
}
|
21
|
+
sql.strip.split(/\n/).each { |stmt| execute stmt.strip }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def down
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Setup events and properties for partitioning
|
4
|
+
|
5
|
+
=end
|
6
|
+
require 'active_record'
|
7
|
+
require 'kmdb'
|
8
|
+
|
9
|
+
class KmdbBlacklist < ActiveRecord::Migration
|
10
|
+
|
11
|
+
def up
|
12
|
+
create_table :blacklisted_properties do |t|
|
13
|
+
t.string :name
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def down
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
|
3
|
+
module KMDB
|
4
|
+
# Aliasing events, ie. pairs of user names that are considered the same actual user.
|
5
|
+
class Alias < ActiveRecord::Base
|
6
|
+
self.primary_key = :id
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def record(name1, name2, stamp)
|
10
|
+
retries ||= 5
|
11
|
+
name1, name2 = _sorted(name2, name1)
|
12
|
+
where(name1: name1, name2: name2).first || create!(name1: name1, name2: name2, t: Time.at(stamp))
|
13
|
+
rescue ActiveRecord::RecordNotUnique
|
14
|
+
retry unless (retries -= 1).zero?
|
15
|
+
raise
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# always the "lowest" name first, with preference to numeric names
|
21
|
+
def _sorted(name1, name2)
|
22
|
+
if name1 =~ /^[0-9]+$/
|
23
|
+
[name1, name2]
|
24
|
+
elsif name2 =~ /^[0-9]+$/
|
25
|
+
[name2, name1]
|
26
|
+
elsif name2 < name1
|
27
|
+
[name2, name1]
|
28
|
+
else
|
29
|
+
[name1, name2]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
extend ClassMethods
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'active_record'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
class BlacklistedProperty < ActiveRecord::Base
|
7
|
+
module ClassMethods
|
8
|
+
def include?(name)
|
9
|
+
_data.include?(name)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def _data
|
15
|
+
@_data ||= Set.new(pluck(:name))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
extend ClassMethods
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
Base class for KM data.
|
4
|
+
Connect to a secondary database to store events, users, & properties.
|
5
|
+
|
6
|
+
FIXME: the database connection is hard-coded for now.
|
7
|
+
|
8
|
+
=end
|
9
|
+
|
10
|
+
require 'active_record'
|
11
|
+
require 'erb'
|
12
|
+
require 'yaml'
|
13
|
+
|
14
|
+
|
15
|
+
module KMDB
|
16
|
+
module CustomRecord
|
17
|
+
def self.included(by)
|
18
|
+
by.extend ClassMethods
|
19
|
+
end
|
20
|
+
|
21
|
+
module ClassMethods
|
22
|
+
def disable_index
|
23
|
+
connection.execute %Q{
|
24
|
+
ALTER TABLE `#{table_name}` DISABLE KEYS;
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def enable_index
|
29
|
+
connection.execute %Q{
|
30
|
+
ALTER TABLE `#{table_name}` ENABLE KEYS;
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def find_or_create(options)
|
35
|
+
retries ||= 5
|
36
|
+
where(options).first || create!(options)
|
37
|
+
rescue ActiveRecord::RecordNotUnique, ActiveRecord::RecordInvalid
|
38
|
+
$stderr.write("could not create #{self.name} with #{options.inspect}, retrying (#{retries})}\n")
|
39
|
+
retry unless (retries -= 1).zero?
|
40
|
+
raise
|
41
|
+
end
|
42
|
+
|
43
|
+
def commit(tid)
|
44
|
+
where(tid: tid).update_all(tid: nil)
|
45
|
+
end
|
46
|
+
|
47
|
+
def clear_uncommitted
|
48
|
+
# TODO: this needs to be protected by a global lock
|
49
|
+
where('tid IS NOT NULL').delete_all
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
require 'kmdb/models/json_file'
|
3
|
+
|
4
|
+
module KMDB
|
5
|
+
# Remembers which JSON files where imported, and up to which point.
|
6
|
+
class Dumpfile < ActiveRecord::Base
|
7
|
+
include CustomRecord
|
8
|
+
|
9
|
+
validates_presence_of :offset
|
10
|
+
validates_presence_of :revision
|
11
|
+
|
12
|
+
def set(offset)
|
13
|
+
update_attributes!(offset: offset)
|
14
|
+
end
|
15
|
+
|
16
|
+
def offset
|
17
|
+
attributes['offset'] || 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def file
|
21
|
+
JsonFile.new(revision)
|
22
|
+
end
|
23
|
+
|
24
|
+
def complete?
|
25
|
+
return if offset.nil? || length.nil?
|
26
|
+
offset >= length
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.get(revision)
|
30
|
+
find_or_create(revision: revision)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
require 'kmdb/concerns/belongs_to_user'
|
3
|
+
require 'kmdb/concerns/has_properties'
|
4
|
+
require 'kmdb/models/global_uid'
|
5
|
+
require 'kmdb/user_error'
|
6
|
+
|
7
|
+
module KMDB
|
8
|
+
class Event < ActiveRecord::Base
|
9
|
+
self.primary_key = :id
|
10
|
+
|
11
|
+
include CustomRecord
|
12
|
+
include BelongsToUser
|
13
|
+
include HasProperties
|
14
|
+
|
15
|
+
scope :before, lambda { |date| where("`#{table_name}`.`t` < ?", date) }
|
16
|
+
scope :after, lambda { |date| where("`#{table_name}`.`t` > ?", date) }
|
17
|
+
|
18
|
+
scope :named, lambda { |name| where(n: KMDB::Key.get(name)) }
|
19
|
+
|
20
|
+
scope :by_date, lambda { order("`#{table_name}`.`t` ASC") }
|
21
|
+
|
22
|
+
# return value of property
|
23
|
+
def prop(name)
|
24
|
+
if p = properties.named(name).first
|
25
|
+
p.value
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def name
|
32
|
+
KMDB::Key.find(n).string
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.sql_for(hash)
|
36
|
+
user_name = hash.delete('_p')
|
37
|
+
user = User.find_or_create(name: user_name)
|
38
|
+
raise UserError.new "User missing for '#{user_name}'" unless user.present?
|
39
|
+
|
40
|
+
stamp = Time.at hash.delete('_t')
|
41
|
+
key = Key.get hash.delete('_n').scrub
|
42
|
+
|
43
|
+
event_id = GlobalUID.get(:event)
|
44
|
+
event_sql = sanitize_sql_array(["(?,?,?,?)", event_id, stamp, key, user.id])
|
45
|
+
properties_sql = Property.sql_for(hash, stamp: stamp, user: user, event_id: event_id)
|
46
|
+
|
47
|
+
yield event_sql, properties_sql
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.mass_create(values_strings)
|
51
|
+
return if values_strings.empty?
|
52
|
+
sql_insert = "INSERT INTO `#{table_name}` (`id`, `t`, `n`, `user_id`) VALUES\n"
|
53
|
+
connection.execute(sql_insert + values_strings.join(",\n"))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/redis'
|
3
|
+
require 'kmdb/models/global_uid'
|
4
|
+
require 'zlib'
|
5
|
+
require 'digest'
|
6
|
+
|
7
|
+
module KMDB
|
8
|
+
# Models a list of events, in chronological order,
|
9
|
+
# spanning entire seconds.
|
10
|
+
# Abstracts (compressed) storage in Redis.
|
11
|
+
# This effectively acts as a write cache.
|
12
|
+
class EventBatch
|
13
|
+
# provide either and Array (when creating a batch)
|
14
|
+
# or and encoded String (when loading)
|
15
|
+
def initialize(data, id:nil)
|
16
|
+
case data
|
17
|
+
when Array
|
18
|
+
@events = data
|
19
|
+
when String
|
20
|
+
@encoded = data
|
21
|
+
@id = id
|
22
|
+
else
|
23
|
+
raise ArgumentError
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def save!
|
28
|
+
_check_redis_space!
|
29
|
+
redis.set(id, _encoded)
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete
|
34
|
+
redis.del(id)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.find(id)
|
38
|
+
encoded = redis.get(id)
|
39
|
+
return if encoded.nil?
|
40
|
+
new(encoded, id: id)
|
41
|
+
end
|
42
|
+
|
43
|
+
def events
|
44
|
+
@events ||= Marshal.load(Zlib.inflate(@encoded))
|
45
|
+
end
|
46
|
+
|
47
|
+
def id
|
48
|
+
@id ||= GlobalUID.get('batches').to_s
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def _encoded
|
54
|
+
@encoded ||= Zlib.deflate(Marshal.dump(@events), 3)
|
55
|
+
end
|
56
|
+
|
57
|
+
# raise exception if space in Redis is getting low
|
58
|
+
def _check_redis_space!
|
59
|
+
# FIXME: not implemented
|
60
|
+
# raise RuntimeError.new('low Redis storage space')
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
module SharedMethods
|
65
|
+
def redis
|
66
|
+
@_redis ||= KMDB::Redis.namespaced('kmdb:batches')
|
67
|
+
end
|
68
|
+
end
|
69
|
+
include SharedMethods
|
70
|
+
extend SharedMethods
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/redis'
|
3
|
+
|
4
|
+
module KMDB
|
5
|
+
# Efficiently generate cross-process globally unique IDs
|
6
|
+
# pernamespace, using Redis.
|
7
|
+
# IDs start at 1 and increment monotonically; each client is handed Ids in
|
8
|
+
# batches of 100.
|
9
|
+
class GlobalUID
|
10
|
+
|
11
|
+
def self.get(ns = 'value')
|
12
|
+
@instances ||= {}
|
13
|
+
@instances[ns] ||= new(ns)
|
14
|
+
@instances[ns].get
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(ns)
|
18
|
+
@ns = ns
|
19
|
+
@major = nil
|
20
|
+
@minor = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def get
|
24
|
+
if @major.nil? || @minor > BATCH_SIZE
|
25
|
+
@major = _redis.incr(@ns) % (1 << 48)
|
26
|
+
@minor = 1
|
27
|
+
end
|
28
|
+
|
29
|
+
uid = (@major-1) * BATCH_SIZE + @minor
|
30
|
+
@minor += 1
|
31
|
+
return uid
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
BATCH_SIZE = 100
|
37
|
+
|
38
|
+
def _redis
|
39
|
+
@@_redis ||= Redis.namespaced('kmdb:globaluid:v2')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'active_record'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
class IgnoredUser < ActiveRecord::Base
|
7
|
+
module ClassMethods
|
8
|
+
def include?(name)
|
9
|
+
_data.include?(name)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def _data
|
15
|
+
@_data ||= Set.new(pluck(:name))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
extend ClassMethods
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/models/dumpfile'
|
3
|
+
require 'kmdb/models/s3_object'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
# Models one of KissMetrics's JSON dumps.
|
7
|
+
# Supports caching them from S3 and finding the latest one.
|
8
|
+
class JsonFile
|
9
|
+
attr_reader :revision
|
10
|
+
|
11
|
+
def initialize(revision)
|
12
|
+
@revision = revision
|
13
|
+
end
|
14
|
+
|
15
|
+
# Yields an IO object for this file
|
16
|
+
def open(&block)
|
17
|
+
_cached.open('r') do |io|
|
18
|
+
metadata.update_attributes!(length: io.size)
|
19
|
+
yield io
|
20
|
+
end
|
21
|
+
_flush_cache if _should_flush?
|
22
|
+
end
|
23
|
+
|
24
|
+
def exist?
|
25
|
+
_cached_path.exist? || _s3object.exist?
|
26
|
+
end
|
27
|
+
|
28
|
+
def metadata
|
29
|
+
@metadata ||= Dumpfile.get(revision)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def _should_flush?
|
35
|
+
!! (ENV.fetch('KMDB_KEEP_FILES', 'YES') !~ /YES/)
|
36
|
+
end
|
37
|
+
|
38
|
+
def _flush_cache
|
39
|
+
_cached_path.delete if _cached_path.exist?
|
40
|
+
end
|
41
|
+
|
42
|
+
def _cached
|
43
|
+
return _cached_path if _cached_path.exist?
|
44
|
+
_s3object.download(_cached_path)
|
45
|
+
_cached_path
|
46
|
+
end
|
47
|
+
|
48
|
+
def _cached_path
|
49
|
+
@_cached_path ||= Pathname.new("tmp/#{revision}.json")
|
50
|
+
end
|
51
|
+
|
52
|
+
def _s3object
|
53
|
+
@_s3object ||= S3Object.new("revisions/#{revision}.json")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
|
3
|
+
module KMDB
|
4
|
+
# Map strings (event and property names) to unique integers (Key#id) for performance
|
5
|
+
class Key < ActiveRecord::Base
|
6
|
+
include CustomRecord
|
7
|
+
|
8
|
+
MAX_SIZE = 255
|
9
|
+
|
10
|
+
has_many :events, foreign_key: :n, class_name: 'KMDB::Event', dependent: :delete_all
|
11
|
+
has_many :properties, foreign_key: :key, class_name: 'KMDB::Property', dependent: :delete_all
|
12
|
+
|
13
|
+
|
14
|
+
def self.get(string)
|
15
|
+
@cache ||= {}
|
16
|
+
@cache[string] ||= get_uncached(string)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Replace each duplicate key ID with its most-used variant
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def self.get_uncached(string)
|
24
|
+
string.size <= MAX_SIZE or raise "String is too long"
|
25
|
+
find_or_create(string: string).id
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'kmdb/models/custom_record'
|
2
|
+
require 'kmdb/concerns/belongs_to_user'
|
3
|
+
require 'kmdb/models/blacklisted_property'
|
4
|
+
require 'kmdb/user_error'
|
5
|
+
|
6
|
+
module KMDB
|
7
|
+
class Property < ActiveRecord::Base
|
8
|
+
self.primary_key = :id
|
9
|
+
|
10
|
+
include CustomRecord
|
11
|
+
include BelongsToUser
|
12
|
+
|
13
|
+
belongs_to :event, class_name: 'KMDB::Event'
|
14
|
+
|
15
|
+
scope :named, lambda { |name| where(key: KMDB::Key.get(name)) }
|
16
|
+
|
17
|
+
def self.sql_for(hash, stamp: nil, user: nil, event_id: nil)
|
18
|
+
user_name = hash.delete('_p')
|
19
|
+
user ||= User.find_or_create(name: user_name)
|
20
|
+
raise UserError.new "User missing for '#{user_name}'" unless user.present?
|
21
|
+
|
22
|
+
stamp = Time.at hash.delete('_t') || stamp
|
23
|
+
return if hash.empty?
|
24
|
+
|
25
|
+
sql_values = []
|
26
|
+
|
27
|
+
hash.each_pair do |prop_name,value|
|
28
|
+
next if BlacklistedProperty.include?(prop_name)
|
29
|
+
key = Key.get(prop_name)
|
30
|
+
value = value[0...255].scrub
|
31
|
+
sql_values << sanitize_sql_array(['(?,?,?,?,?)', stamp,user.id,event_id,key,value])
|
32
|
+
end
|
33
|
+
|
34
|
+
return if sql_values.empty?
|
35
|
+
sql_values.join(",\n")
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.mass_create(values_strings)
|
39
|
+
return if values_strings.empty?
|
40
|
+
sql_insert = "INSERT INTO `#{table_name}` (`t`,`user_id`,`event_id`,`key`,`value`) VALUES\n"
|
41
|
+
connection.execute(sql_insert + values_strings.join(",\n"))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'kmdb'
|
2
|
+
require 'kmdb/s3_bucket'
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
module KMDB
|
6
|
+
class S3Object
|
7
|
+
def initialize(path)
|
8
|
+
@path = path
|
9
|
+
end
|
10
|
+
|
11
|
+
def exist?
|
12
|
+
!!_file
|
13
|
+
end
|
14
|
+
|
15
|
+
def download(target)
|
16
|
+
raise "JSON file for revision #{@revision} not found" unless exist?
|
17
|
+
_log "downloading"
|
18
|
+
system 'curl', '-o', _tempfile.path, '--silent', _file.url(_expiry)
|
19
|
+
raise "Download failed for #{@path}" unless $?.success?
|
20
|
+
target.parent.mkpath
|
21
|
+
File.rename(_tempfile.path, target.to_s)
|
22
|
+
true
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def _file
|
28
|
+
@_file ||= begin
|
29
|
+
_log "checking for existence"
|
30
|
+
S3Bucket.instance.files.head(@path)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def _expiry
|
35
|
+
Time.now.utc.to_i + 600
|
36
|
+
end
|
37
|
+
|
38
|
+
def _tempfile
|
39
|
+
@_tempfile ||= begin
|
40
|
+
Pathname.new('tmp').mkpath
|
41
|
+
Tempfile.new('kmdb', 'tmp')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def _tempdir
|
46
|
+
Pathname.new('tmp/').mk
|
47
|
+
end
|
48
|
+
|
49
|
+
def _log(message)
|
50
|
+
$stderr.write("s3 #{@path}: #{message}\n")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|