km-db 0.2.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/.ruby-version +1 -0
- data/Gemfile +2 -4
- data/Gemfile.lock +179 -20
- data/Procfile +2 -0
- data/Procfile.work +1 -0
- data/README.md +186 -0
- data/Rakefile +1 -0
- data/bin/kmdb-flush +13 -0
- data/bin/kmdb-import +13 -0
- data/bin/kmdb-partition +15 -0
- data/bin/kmdb-pool +8 -0
- data/bin/kmdb-realias +12 -0
- data/bin/kmdb-ui +6 -0
- data/bin/kmdb-work +17 -0
- data/config/amazon-rds-ca-cert.pem +260 -0
- data/config.ru +8 -0
- data/km-db.gemspec +17 -17
- data/lib/kmdb/{belongs_to_user.rb → concerns/belongs_to_user.rb} +3 -3
- data/lib/kmdb/concerns/has_properties.rb +35 -0
- data/lib/kmdb/jobs/find_files.rb +32 -0
- data/lib/kmdb/jobs/list_files.rb +37 -0
- data/lib/kmdb/jobs/locked.rb +10 -0
- data/lib/kmdb/jobs/parse_file.rb +109 -0
- data/lib/kmdb/jobs/record_batch.rb +65 -0
- data/lib/kmdb/jobs/redo_unaliasing.rb +31 -0
- data/lib/kmdb/jobs/unalias_user.rb +32 -0
- data/lib/kmdb/migrations/01_kmdb_initial.rb +78 -0
- data/lib/kmdb/migrations/02_kmdb_partitions.rb +28 -0
- data/lib/kmdb/migrations/03_kmdb_blacklist.rb +20 -0
- data/lib/kmdb/models/alias.rb +36 -0
- data/lib/kmdb/models/blacklisted_property.rb +20 -0
- data/lib/kmdb/models/custom_record.rb +53 -0
- data/lib/kmdb/models/dumpfile.rb +33 -0
- data/lib/kmdb/models/event.rb +56 -0
- data/lib/kmdb/models/event_batch.rb +72 -0
- data/lib/kmdb/models/global_uid.rb +42 -0
- data/lib/kmdb/models/ignored_user.rb +20 -0
- data/lib/kmdb/models/json_file.rb +56 -0
- data/lib/kmdb/models/key.rb +28 -0
- data/lib/kmdb/models/property.rb +44 -0
- data/lib/kmdb/models/s3_object.rb +54 -0
- data/lib/kmdb/models/user.rb +53 -0
- data/lib/kmdb/models/whitelisted_event.rb +20 -0
- data/lib/kmdb/parser.rb +4 -4
- data/lib/kmdb/redis.rb +17 -0
- data/lib/kmdb/resque.rb +38 -0
- data/lib/kmdb/s3_bucket.rb +33 -0
- data/lib/kmdb/services/partitioner.rb +65 -0
- data/lib/kmdb/version.rb +1 -1
- data/lib/kmdb.rb +31 -6
- metadata +236 -186
- data/README.markdown +0 -91
- data/bin/km_db_import +0 -36
- data/lib/kmdb/custom_record.rb +0 -54
- data/lib/kmdb/dumpfile.rb +0 -23
- data/lib/kmdb/event.rb +0 -39
- data/lib/kmdb/has_properties.rb +0 -33
- data/lib/kmdb/key.rb +0 -56
- data/lib/kmdb/migration.rb +0 -63
- data/lib/kmdb/parallel_parser.rb +0 -85
- data/lib/kmdb/property.rb +0 -33
- data/lib/kmdb/user.rb +0 -83
data/lib/kmdb/event.rb
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'kmdb/custom_record'
|
2
|
-
require 'kmdb/belongs_to_user'
|
3
|
-
require 'kmdb/has_properties'
|
4
|
-
|
5
|
-
module KMDB
|
6
|
-
class Event < CustomRecord
|
7
|
-
include BelongsToUser
|
8
|
-
include HasProperties
|
9
|
-
|
10
|
-
set_table_name "events"
|
11
|
-
|
12
|
-
named_scope :before, lambda { |date| { :conditions => ["`#{table_name}`.`t` < ?", date] } }
|
13
|
-
named_scope :after, lambda { |date| { :conditions => ["`#{table_name}`.`t` > ?", date] } }
|
14
|
-
|
15
|
-
named_scope :named, lambda { |name| { :conditions => { :n => KMDB::Key.get(name) } } }
|
16
|
-
|
17
|
-
named_scope :by_date, lambda { { :order => "`#{table_name}`.`t` ASC" } }
|
18
|
-
|
19
|
-
# return value of property
|
20
|
-
def prop(name)
|
21
|
-
properties.named(name).first.andand.value
|
22
|
-
end
|
23
|
-
|
24
|
-
def name
|
25
|
-
KMDB::Key.find(n).value
|
26
|
-
end
|
27
|
-
|
28
|
-
def self.record(hash)
|
29
|
-
user_name = hash.delete('_p')
|
30
|
-
user ||= User.get(user_name)
|
31
|
-
raise UserError.new "User missing for '#{user_name}'" unless user.present?
|
32
|
-
|
33
|
-
stamp = Time.at hash.delete('_t')
|
34
|
-
key = Key.get hash.delete('_n')
|
35
|
-
event = create(:t => stamp, :n => key, :user => user)
|
36
|
-
Property.set(hash, stamp, user, event)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
data/lib/kmdb/has_properties.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
|
3
|
-
KMDB::HasProperties --
|
4
|
-
|
5
|
-
Trait shared by Event and User.
|
6
|
-
|
7
|
-
=end
|
8
|
-
|
9
|
-
module KMDB
|
10
|
-
module HasProperties
|
11
|
-
def self.included(mod)
|
12
|
-
mod.class_eval do
|
13
|
-
has_many :properties, :class_name => 'KMDB::Property'
|
14
|
-
|
15
|
-
named_scope :with_properties, lambda { |*props|
|
16
|
-
direction = props.delete(:exclude_missing) ? 'INNER' : 'LEFT'
|
17
|
-
prop_table = Property.table_name
|
18
|
-
selects = ["`#{table_name}`.*"]
|
19
|
-
joins = []
|
20
|
-
props.each_with_index { |prop,k|
|
21
|
-
temp_name = "#{prop_table}_#{k}"
|
22
|
-
selects << "`#{temp_name}`.`value` AS `#{prop.split.join('_')}`"
|
23
|
-
joins << sanitize_sql_array([%Q{
|
24
|
-
#{direction} JOIN `properties` AS `#{temp_name}`
|
25
|
-
ON `#{table_name}`.id = `#{temp_name}`.event_id
|
26
|
-
AND `#{temp_name}`.`key` = ?}, KMDB::Key.get(prop)])
|
27
|
-
}
|
28
|
-
{ :select => selects.join(', '), :joins => joins.join("\n") }
|
29
|
-
}
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
data/lib/kmdb/key.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
|
3
|
-
Map strings (event and property names) to unique integers (Key#id) for performance
|
4
|
-
|
5
|
-
=end
|
6
|
-
|
7
|
-
require 'kmdb/custom_record'
|
8
|
-
|
9
|
-
module KMDB
|
10
|
-
class Key < CustomRecord
|
11
|
-
set_table_name "keys"
|
12
|
-
|
13
|
-
has_many :events, :foreign_key => :n, :class_name => 'KMDB::Event', :dependent => :delete_all
|
14
|
-
has_many :properties, :foreign_key => :key, :class_name => 'KMDB::Property', :dependent => :delete_all
|
15
|
-
|
16
|
-
named_scope :has_duplicate, lambda {
|
17
|
-
{
|
18
|
-
:select => "id, string, COUNT(id) AS quantity",
|
19
|
-
:group => :string, :having => "quantity > 1"
|
20
|
-
}
|
21
|
-
}
|
22
|
-
|
23
|
-
def self.get(string)
|
24
|
-
@cache ||= {}
|
25
|
-
@cache[string] ||= get_uncached(string)
|
26
|
-
end
|
27
|
-
|
28
|
-
# Replace each duplicate key ID with its most-used variant
|
29
|
-
def self.fix_duplicates!
|
30
|
-
has_duplicate.map(&:string).each do |string|
|
31
|
-
all_keys = find(:all, :conditions => { :string => string })
|
32
|
-
|
33
|
-
# sort keys by usage
|
34
|
-
all_ids = all_keys.map { |key|
|
35
|
-
[key.id, Event.named(key.id).count + Property.named(key.id).count]
|
36
|
-
}.sort { |k1,k2|
|
37
|
-
k1.second <=> k2.second
|
38
|
-
}.map { |k|
|
39
|
-
k.first
|
40
|
-
}
|
41
|
-
id_to_keep = all_ids.pop
|
42
|
-
$stderr.write "Fixing key '#{string}' #{all_ids.inspect} -> #{id_to_keep.inspect}\n"
|
43
|
-
Event.update_all({ :n => id_to_keep }, ["`events`.`n` IN (?)", all_ids])
|
44
|
-
Property.update_all({ :key => id_to_keep }, ["`properties`.`key` IN (?)", all_ids])
|
45
|
-
Key.delete_all(["id IN (?)", all_ids])
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
private
|
50
|
-
|
51
|
-
def self.get_uncached(string)
|
52
|
-
string.size <= MaxStringSize or raise "String is too long"
|
53
|
-
find_or_create(:string => string).id
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
data/lib/kmdb/migration.rb
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
|
3
|
-
Setup a custom database for KissMetrics tracking events.
|
4
|
-
|
5
|
-
=end
|
6
|
-
|
7
|
-
require 'active_record'
|
8
|
-
|
9
|
-
module KMDB
|
10
|
-
class SetupEventsDatabase < ActiveRecord::Migration
|
11
|
-
def self.connection
|
12
|
-
CustomRecord.connection
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.up
|
16
|
-
create_table :events do |t|
|
17
|
-
t.integer :user_id
|
18
|
-
t.integer :n
|
19
|
-
t.datetime :t
|
20
|
-
end
|
21
|
-
add_index :events, [:n]
|
22
|
-
add_index :events, [:user_id]
|
23
|
-
|
24
|
-
|
25
|
-
create_table :keys do |t|
|
26
|
-
t.string :string, :limit => MaxStringSize
|
27
|
-
end
|
28
|
-
add_index :keys, [:string]
|
29
|
-
|
30
|
-
create_table :properties do |t|
|
31
|
-
t.integer :user_id
|
32
|
-
t.integer :event_id
|
33
|
-
t.integer :key
|
34
|
-
t.string :value, :limit => 64
|
35
|
-
t.datetime :t
|
36
|
-
end
|
37
|
-
add_index :properties, [:key]
|
38
|
-
add_index :properties, [:user_id]
|
39
|
-
add_index :properties, [:event_id]
|
40
|
-
|
41
|
-
create_table :users do |t|
|
42
|
-
t.string :name, :limit => 48
|
43
|
-
t.integer :alias_id
|
44
|
-
end
|
45
|
-
add_index :users, [:name]
|
46
|
-
|
47
|
-
create_table :dumpfiles do |t|
|
48
|
-
t.string :path
|
49
|
-
t.string :job
|
50
|
-
t.integer :offset
|
51
|
-
end
|
52
|
-
add_index :dumpfiles, [:path]
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.down
|
57
|
-
drop_table :events
|
58
|
-
drop_table :properties
|
59
|
-
drop_table :users
|
60
|
-
drop_table :aliases
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
data/lib/kmdb/parallel_parser.rb
DELETED
@@ -1,85 +0,0 @@
|
|
1
|
-
require 'kmdb/parser'
|
2
|
-
require 'parallel'
|
3
|
-
|
4
|
-
module KMDB
|
5
|
-
class ParallelParser < Parser
|
6
|
-
|
7
|
-
def initialize(options = {})
|
8
|
-
super(options)
|
9
|
-
@worker_count = options.delete(:workers) || Parallel.processor_count
|
10
|
-
end
|
11
|
-
|
12
|
-
def run(argv)
|
13
|
-
@pipe_rd, @pipe_wr = IO.pipe
|
14
|
-
|
15
|
-
inputs = list_files_in(argv)
|
16
|
-
total_bytes = total_size_of_files(inputs)
|
17
|
-
log "total bytes : #{total_bytes}"
|
18
|
-
total_bytes -= inputs.map { |p| Dumpfile.get(p, @resume_job) }.compact.map(&:offset).sum
|
19
|
-
log "left to process : #{total_bytes}"
|
20
|
-
|
21
|
-
# Start workers
|
22
|
-
log "Using #{@worker_count} workers."
|
23
|
-
Process.fork do
|
24
|
-
@pipe_rd.close
|
25
|
-
Parallel.each(inputs, :in_processes => @worker_count) do |input|
|
26
|
-
KMDB::Event.connection.reconnect!
|
27
|
-
log "Worker #{Process.pid} starting #{input}"
|
28
|
-
$0 = "worker: #{input}"
|
29
|
-
process_events_in_file(input)
|
30
|
-
log "Worker #{Process.pid} done"
|
31
|
-
true
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
# Start gatherer
|
36
|
-
$0 = "gatherer: #{$0}"
|
37
|
-
@pipe_wr.close
|
38
|
-
byte_counter = 0
|
39
|
-
log "Starting gatherer, total bytes: #{total_bytes}"
|
40
|
-
progress = ProgressBar.new("-" * 20, total_bytes)
|
41
|
-
while line = @pipe_rd.gets
|
42
|
-
if line =~ /^OK (\d+)$/
|
43
|
-
byte_counter += $1.to_i
|
44
|
-
progress.set byte_counter
|
45
|
-
elsif line =~ /^FILE (.*)$/
|
46
|
-
progress.title = $1
|
47
|
-
else
|
48
|
-
log "Unparsed line: '#{line}'"
|
49
|
-
end
|
50
|
-
end
|
51
|
-
progress.finish
|
52
|
-
log "Total bytes processed: #{byte_counter}"
|
53
|
-
Process.waitall
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
def process_events_in_file(pathname)
|
59
|
-
pathname.open do |input|
|
60
|
-
processed_bytes = 0
|
61
|
-
if @resume_job
|
62
|
-
dumpfile = Dumpfile.get(pathname, @resume_job)
|
63
|
-
log "Starting file #{pathname} from offset #{dumpfile.offset}"
|
64
|
-
input.seek(dumpfile.offset)
|
65
|
-
end
|
66
|
-
line_number = 0
|
67
|
-
@pipe_wr.write "FILE #{pathname.basename}\n"
|
68
|
-
while line = input.gets
|
69
|
-
line_number += 1
|
70
|
-
processed_bytes += line.size
|
71
|
-
|
72
|
-
process_event(line)
|
73
|
-
dumpfile.set(input.tell)
|
74
|
-
|
75
|
-
if processed_bytes > 100_000
|
76
|
-
@pipe_wr.write "OK #{processed_bytes}\n"
|
77
|
-
processed_bytes = 0
|
78
|
-
end
|
79
|
-
end
|
80
|
-
@pipe_wr.write "OK #{processed_bytes}\n"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
end
|
85
|
-
end
|
data/lib/kmdb/property.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
require 'kmdb/belongs_to_user'
|
2
|
-
|
3
|
-
module KMDB
|
4
|
-
class Property < CustomRecord
|
5
|
-
include BelongsToUser
|
6
|
-
|
7
|
-
set_table_name "properties"
|
8
|
-
belongs_to :event, :class_name => 'KMDB::Event'
|
9
|
-
|
10
|
-
default_scope :order => 't DESC'
|
11
|
-
named_scope :named, lambda { |name| { :conditions => { :key => KMDB::Key.get(name) } } }
|
12
|
-
|
13
|
-
def self.set(hash, stamp=nil, user=nil, event=nil)
|
14
|
-
user_name = hash.delete('_p')
|
15
|
-
user ||= User.get(user_name)
|
16
|
-
raise UserError.new "User missing for '#{user_name}'" unless user.present?
|
17
|
-
|
18
|
-
event_id = event ? event.id : nil
|
19
|
-
stamp = Time.at hash.delete('_t') || stamp
|
20
|
-
|
21
|
-
return if hash.empty?
|
22
|
-
sql_insert = "INSERT INTO `#{table_name}` (`t`,`user_id`,`event_id`,`key`,`value`) VALUES "
|
23
|
-
sql_values = []
|
24
|
-
|
25
|
-
hash.each_pair do |prop_name,value|
|
26
|
-
key = Key.get(prop_name)
|
27
|
-
sql_values << sanitize_sql_array(["(?,?,?,?,?)", stamp,user.id,event_id,key,value])
|
28
|
-
end
|
29
|
-
|
30
|
-
connection.execute(sql_insert + sql_values.join(","))
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
data/lib/kmdb/user.rb
DELETED
@@ -1,83 +0,0 @@
|
|
1
|
-
require 'kmdb/has_properties'
|
2
|
-
|
3
|
-
module KMDB
|
4
|
-
class User < CustomRecord
|
5
|
-
include HasProperties
|
6
|
-
|
7
|
-
set_table_name "users"
|
8
|
-
|
9
|
-
has_many :events, :class_name => 'KMDB::Event'
|
10
|
-
belongs_to :alias, :class_name => 'KMDB::User'
|
11
|
-
# points to the aliased user. if set, no properties/events should belong to this user
|
12
|
-
|
13
|
-
validates_presence_of :name
|
14
|
-
validates_uniqueness_of :name
|
15
|
-
|
16
|
-
named_scope :named, lambda { |name| { :conditions => { :name => name } } }
|
17
|
-
|
18
|
-
named_scope :duplicates, lambda {{
|
19
|
-
:select => "id, COUNT(id) AS quantity", :group => :name, :having => "quantity > 1"
|
20
|
-
}}
|
21
|
-
|
22
|
-
# return (latest) value of property
|
23
|
-
def prop(name)
|
24
|
-
properties.named(name).first.andand.value
|
25
|
-
end
|
26
|
-
|
27
|
-
# mark this user as aliasing another
|
28
|
-
def aliases!(other)
|
29
|
-
[Property,Event].each do |model|
|
30
|
-
model.user_is(self).update_all({:user_id => other.id})
|
31
|
-
end
|
32
|
-
self.update_attributes!(:alias => other)
|
33
|
-
end
|
34
|
-
|
35
|
-
# return the user named `name` (creating it if necessary)
|
36
|
-
# if `name` is an alias, return the original user
|
37
|
-
def self.get(name)
|
38
|
-
user = named(name).first || create(:name => name)
|
39
|
-
user = user.alias while user.alias
|
40
|
-
return user
|
41
|
-
end
|
42
|
-
|
43
|
-
|
44
|
-
# mark the two names as pointing to the same user
|
45
|
-
def self.alias!(name1, name2)
|
46
|
-
u1 = get(name1)
|
47
|
-
u2 = get(name2)
|
48
|
-
$stderr.write "Warning: user '#{user.name}' has an alias\n" if u1.alias
|
49
|
-
$stderr.write "Warning: user '#{user.name}' has an alias\n" if u2.alias
|
50
|
-
|
51
|
-
# nothing to do if both names already point to the same user
|
52
|
-
return if u1 == u2
|
53
|
-
|
54
|
-
u2.aliases! u1
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
# duplication can occur during parallel imports because we're not running transactionally.
|
59
|
-
def self.fix_duplicates!
|
60
|
-
duplicates.map(&:name).each do |name|
|
61
|
-
named(name).all.tap do |all_users|
|
62
|
-
kept_user = all_users.pop
|
63
|
-
all_users.each do |user|
|
64
|
-
user.aliases! kept_user
|
65
|
-
user.destroy
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
# detect alias chains
|
73
|
-
def self.resolve_alias_chains!
|
74
|
-
find(:all, :joins => :alias, :conditions => 'aliases_users.alias_id IS NOT NULL').each do |user|
|
75
|
-
user = find(user.id)
|
76
|
-
origin = find(user.alias_id)
|
77
|
-
origin = origin.alias while origin.alias # go up the chain
|
78
|
-
$stderr.write "Aliasing #{user.name} -> #{origin.name}\n"
|
79
|
-
user.aliases!(origin)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|