km-db 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +6 -0
 - data/Gemfile.lock +46 -0
 - data/README.markdown +91 -0
 - data/Rakefile +5 -0
 - data/bin/km_db_import +36 -0
 - data/km-db.gemspec +32 -0
 - data/lib/kmdb/belongs_to_user.rb +15 -0
 - data/lib/kmdb/custom_record.rb +54 -0
 - data/lib/kmdb/dumpfile.rb +23 -0
 - data/lib/kmdb/event.rb +39 -0
 - data/lib/kmdb/has_properties.rb +33 -0
 - data/lib/kmdb/key.rb +56 -0
 - data/lib/kmdb/migration.rb +63 -0
 - data/lib/kmdb/parallel_parser.rb +85 -0
 - data/lib/kmdb/parser.rb +143 -0
 - data/lib/kmdb/property.rb +33 -0
 - data/lib/kmdb/user.rb +83 -0
 - data/lib/kmdb/user_error.rb +2 -0
 - data/lib/kmdb/version.rb +4 -0
 - data/lib/kmdb.rb +10 -0
 - metadata +234 -0
 
    
        data/Gemfile
    ADDED
    
    
    
        data/Gemfile.lock
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            PATH
         
     | 
| 
      
 2 
     | 
    
         
            +
              remote: .
         
     | 
| 
      
 3 
     | 
    
         
            +
              specs:
         
     | 
| 
      
 4 
     | 
    
         
            +
                km-db (0.2.1)
         
     | 
| 
      
 5 
     | 
    
         
            +
                  activerecord (~> 2.3.12)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  andand
         
     | 
| 
      
 7 
     | 
    
         
            +
                  parallel
         
     | 
| 
      
 8 
     | 
    
         
            +
                  progressbar
         
     | 
| 
      
 9 
     | 
    
         
            +
                  yajl-ruby
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            GEM
         
     | 
| 
      
 12 
     | 
    
         
            +
              remote: http://rubygems.org/
         
     | 
| 
      
 13 
     | 
    
         
            +
              specs:
         
     | 
| 
      
 14 
     | 
    
         
            +
                activerecord (2.3.18)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  activesupport (= 2.3.18)
         
     | 
| 
      
 16 
     | 
    
         
            +
                activesupport (2.3.18)
         
     | 
| 
      
 17 
     | 
    
         
            +
                andand (1.3.3)
         
     | 
| 
      
 18 
     | 
    
         
            +
                diff-lcs (1.1.3)
         
     | 
| 
      
 19 
     | 
    
         
            +
                json (1.7.7)
         
     | 
| 
      
 20 
     | 
    
         
            +
                parallel (0.6.3)
         
     | 
| 
      
 21 
     | 
    
         
            +
                progressbar (0.20.0)
         
     | 
| 
      
 22 
     | 
    
         
            +
                rake (10.0.3)
         
     | 
| 
      
 23 
     | 
    
         
            +
                rspec (2.4.0)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  rspec-core (~> 2.4.0)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  rspec-expectations (~> 2.4.0)
         
     | 
| 
      
 26 
     | 
    
         
            +
                  rspec-mocks (~> 2.4.0)
         
     | 
| 
      
 27 
     | 
    
         
            +
                rspec-core (2.4.0)
         
     | 
| 
      
 28 
     | 
    
         
            +
                rspec-expectations (2.4.0)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  diff-lcs (~> 1.1.2)
         
     | 
| 
      
 30 
     | 
    
         
            +
                rspec-mocks (2.4.0)
         
     | 
| 
      
 31 
     | 
    
         
            +
                sqlite3 (1.3.7)
         
     | 
| 
      
 32 
     | 
    
         
            +
                sqlite3-ruby (1.3.3)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  sqlite3 (>= 1.3.3)
         
     | 
| 
      
 34 
     | 
    
         
            +
                yajl-ruby (1.1.0)
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
            PLATFORMS
         
     | 
| 
      
 37 
     | 
    
         
            +
              ruby
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            DEPENDENCIES
         
     | 
| 
      
 40 
     | 
    
         
            +
              bundler (>= 1.0.0)
         
     | 
| 
      
 41 
     | 
    
         
            +
              json
         
     | 
| 
      
 42 
     | 
    
         
            +
              km-db!
         
     | 
| 
      
 43 
     | 
    
         
            +
              progressbar
         
     | 
| 
      
 44 
     | 
    
         
            +
              rake
         
     | 
| 
      
 45 
     | 
    
         
            +
              rspec (~> 2.4.0)
         
     | 
| 
      
 46 
     | 
    
         
            +
              sqlite3-ruby
         
     | 
    
        data/README.markdown
    ADDED
    
    | 
         @@ -0,0 +1,91 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            The `km-db` gem should be useful to KissMetrics (KM) users.
         
     | 
| 
      
 2 
     | 
    
         
            +
            Its aim is to efficiently process data obtained with KM's "Data Export" feature.
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            It is meant to :
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            * import KM event dumps into a SQL database (preferably MySQL / PostgreSQL)
         
     | 
| 
      
 7 
     | 
    
         
            +
            * quickly process KM event dumps
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            Once imported, you can run complex queries against your visit history, for instance run multivariate analysis.
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            Beware though, KM data can be huge, and processing it is taxing !
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            Installing
         
     | 
| 
      
 15 
     | 
    
         
            +
            ----------
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            Add this to your Gemfile if you're using Bundler:
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                gem 'km-db', :git => 'git://github.com/HouseTrip/km-db.git'
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            Importing data
         
     | 
| 
      
 23 
     | 
    
         
            +
            --------------
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            Running reports on raw logs can be less effective than running against a (relational) database.
         
     | 
| 
      
 26 
     | 
    
         
            +
            `km-db` provides a `km_db_import` executable. Run it with:
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                $ bundle exec km_db_import <data-dump-directory>…
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            By default, you events will be imported in `test.db`, a SQLite database.
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            You can create `km_db.yml` or `config/km_db.yml` to have it import using another adapter, for instance:
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                ---- km_db.yml ----
         
     | 
| 
      
 35 
     | 
    
         
            +
                adapter:  mysql2
         
     | 
| 
      
 36 
     | 
    
         
            +
                database: km_events
         
     | 
| 
      
 37 
     | 
    
         
            +
                user:     root
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            Remember to add `sqlite3-ruby` or `mysql2` to your Gemfile.
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            Using imported data
         
     | 
| 
      
 43 
     | 
    
         
            +
            -------------------
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            The `KMDB` module exposes four `ActiveRecord` classes:
         
     | 
| 
      
 46 
     | 
    
         
            +
            `Event`, `Property`, `User` are the main domain objects.
         
     | 
| 
      
 47 
     | 
    
         
            +
            `Key` is used to intern strings (event and property names) for performance.
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
            ### Finding events and properties
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            All visits during Jan. 2012:
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                KMDB::Event.before('2012-02-1').after('2012-01-01').named('visited site').by_date
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            All of a user's visit:
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
                KMDB::User.last.events.named('visited site')
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            A user's referers:
         
     | 
| 
      
 60 
     | 
    
         
            +
                
         
     | 
| 
      
 61 
     | 
    
         
            +
                KMDB::User.last.properties.named('referer').map(&:value)
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            Load some properties with events (uses a left join by default):
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                KMDB::User.last.events.with_properties('a prop', 'another prop').map(&:another_prop)
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            Note that many more complex queries will require building SQL queries directly.
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
            Processing data
         
     | 
| 
      
 71 
     | 
    
         
            +
            ---------------
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
            You don't have to import to filter your data.
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
            The two classes you're looking for are `KMDB::Parser` and `KMDB::ParallelParser`.
         
     | 
| 
      
 76 
     | 
    
         
            +
            The latter runs your filter task on all available CPUs, using the `parallel` gem.
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
            The following example counts the number of *aliasing* events in all JSON files under `dumps/`:
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                require 'rubygems'
         
     | 
| 
      
 81 
     | 
    
         
            +
                require 'kmdb'
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                counter = 0
         
     | 
| 
      
 84 
     | 
    
         
            +
                parser = KMDB::Parser.new
         
     | 
| 
      
 85 
     | 
    
         
            +
                parser.add_filter do |text,event|
         
     | 
| 
      
 86 
     | 
    
         
            +
                    counter += 1 if event['_p2']
         
     | 
| 
      
 87 
     | 
    
         
            +
                end
         
     | 
| 
      
 88 
     | 
    
         
            +
                parser.run('dumps/')
         
     | 
| 
      
 89 
     | 
    
         
            +
                puts counter
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
            Note that it will not work with `ParallelParser`, as the `counter` variable will be different for each process.
         
     | 
    
        data/Rakefile
    ADDED
    
    
    
        data/bin/km_db_import
    ADDED
    
    | 
         @@ -0,0 +1,36 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
            =begin
         
     | 
| 
      
 3 
     | 
    
         
            +
                
         
     | 
| 
      
 4 
     | 
    
         
            +
              Import KM events from the raw dumps.
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            =end
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            require 'rubygems'
         
     | 
| 
      
 9 
     | 
    
         
            +
            require 'kmdb'
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            if KMDB::Event.connection.class.to_s =~ /(mysql|pgsql)/i
         
     | 
| 
      
 12 
     | 
    
         
            +
              parser_class = KMDB::ParallelParser
         
     | 
| 
      
 13 
     | 
    
         
            +
            else
         
     | 
| 
      
 14 
     | 
    
         
            +
              parser_class = KMDB::Parser
         
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            parser = parser_class.new(:resume  => 'import', 
         
     | 
| 
      
 18 
     | 
    
         
            +
                                      :verbose => true)
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            # import events by category
         
     | 
| 
      
 21 
     | 
    
         
            +
            parser.add_filter { |text, event|
         
     | 
| 
      
 22 
     | 
    
         
            +
              if event['_p2']
         
     | 
| 
      
 23 
     | 
    
         
            +
                KMDB::User.alias! event['_p'], event['_p2']
         
     | 
| 
      
 24 
     | 
    
         
            +
              elsif event['_n']
         
     | 
| 
      
 25 
     | 
    
         
            +
                KMDB::Event.record event
         
     | 
| 
      
 26 
     | 
    
         
            +
              else
         
     | 
| 
      
 27 
     | 
    
         
            +
                KMDB::Property.set event
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
              
         
     | 
| 
      
 30 
     | 
    
         
            +
              event
         
     | 
| 
      
 31 
     | 
    
         
            +
            }
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            parser.run(ARGV)                     # heavy lifting here
         
     | 
| 
      
 34 
     | 
    
         
            +
            KMDB::Event.connection.reconnect!  # reconnect to database (breaks because of processes forking off)
         
     | 
| 
      
 35 
     | 
    
         
            +
            KMDB::User.resolve_alias_chains!   # detect and filter alias chains
         
     | 
| 
      
 36 
     | 
    
         
            +
            KMDB::Key.fix_duplicates!          # remove key duplicates
         
     | 
    
        data/km-db.gemspec
    ADDED
    
    | 
         @@ -0,0 +1,32 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
            require File.expand_path("../lib/kmdb/version", __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            Gem::Specification.new do |s|
         
     | 
| 
      
 5 
     | 
    
         
            +
              s.name        = "km-db"
         
     | 
| 
      
 6 
     | 
    
         
            +
              s.version     = KMDB::VERSION
         
     | 
| 
      
 7 
     | 
    
         
            +
              s.platform    = Gem::Platform::RUBY
         
     | 
| 
      
 8 
     | 
    
         
            +
              s.authors     = ["HouseTrip"]
         
     | 
| 
      
 9 
     | 
    
         
            +
              s.email       = ["jtl@housetrip.com"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              s.homepage    = "https://github.com/housetrip/km-db"
         
     | 
| 
      
 11 
     | 
    
         
            +
              s.summary     = "Process KISSmetrics data dumps"
         
     | 
| 
      
 12 
     | 
    
         
            +
              s.description = "Process KISSmetrics data dumps"
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              s.required_rubygems_version = ">= 1.3.6"
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              s.add_development_dependency "bundler", ">= 1.0.0"
         
     | 
| 
      
 17 
     | 
    
         
            +
              s.add_development_dependency "rspec", "~> 2.4.0"
         
     | 
| 
      
 18 
     | 
    
         
            +
              s.add_development_dependency "rake"
         
     | 
| 
      
 19 
     | 
    
         
            +
              s.add_development_dependency "json"
         
     | 
| 
      
 20 
     | 
    
         
            +
              s.add_development_dependency "sqlite3-ruby"
         
     | 
| 
      
 21 
     | 
    
         
            +
              
         
     | 
| 
      
 22 
     | 
    
         
            +
              s.add_dependency "yajl-ruby"
         
     | 
| 
      
 23 
     | 
    
         
            +
              s.add_dependency "progressbar"
         
     | 
| 
      
 24 
     | 
    
         
            +
              s.add_dependency "parallel"
         
     | 
| 
      
 25 
     | 
    
         
            +
              s.add_dependency "andand"
         
     | 
| 
      
 26 
     | 
    
         
            +
              s.add_dependency "activerecord", "~> 2.3.12"
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              s.files        = `git ls-files`.split("\n")
         
     | 
| 
      
 29 
     | 
    
         
            +
              s.test_files   = `git ls-files -- {test,spec,features}/*`.split("\n")
         
     | 
| 
      
 30 
     | 
    
         
            +
              s.executables  = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
         
     | 
| 
      
 31 
     | 
    
         
            +
              s.require_path = 'lib'
         
     | 
| 
      
 32 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 2 
     | 
    
         
            +
              module BelongsToUser
         
     | 
| 
      
 3 
     | 
    
         
            +
                def self.included(mod)
         
     | 
| 
      
 4 
     | 
    
         
            +
                  mod.class_eval do
         
     | 
| 
      
 5 
     | 
    
         
            +
                    belongs_to :user,  :class_name => 'KMDB::User'
         
     | 
| 
      
 6 
     | 
    
         
            +
                    validates_presence_of :user
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                    named_scope :user_is, lambda { |user| 
         
     | 
| 
      
 9 
     | 
    
         
            +
                      user.kind_of?(User) or raise TypeError.new("Not a kind of User")
         
     | 
| 
      
 10 
     | 
    
         
            +
                      { :conditions => { :user_id => user.id } }
         
     | 
| 
      
 11 
     | 
    
         
            +
                    }
         
     | 
| 
      
 12 
     | 
    
         
            +
                  end
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
              end
         
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,54 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            =begin
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              Base class for KM data.
         
     | 
| 
      
 4 
     | 
    
         
            +
              Connect to a secondary database to store events, users, & properties.
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
              FIXME: the database connection is hard-coded for now.
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            =end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            require 'active_record'
         
     | 
| 
      
 11 
     | 
    
         
            +
            require 'erb'
         
     | 
| 
      
 12 
     | 
    
         
            +
            require 'yaml'
         
     | 
| 
      
 13 
     | 
    
         
            +
            require 'kmdb/migration'
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 17 
     | 
    
         
            +
              class CustomRecord < ActiveRecord::Base
         
     | 
| 
      
 18 
     | 
    
         
            +
                DefaultConfig = {
         
     | 
| 
      
 19 
     | 
    
         
            +
                  'adapter'  => 'sqlite3',
         
     | 
| 
      
 20 
     | 
    
         
            +
                  'database' => "test.db"
         
     | 
| 
      
 21 
     | 
    
         
            +
                }
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                def self.disable_index
         
     | 
| 
      
 24 
     | 
    
         
            +
                  connection.execute %Q{
         
     | 
| 
      
 25 
     | 
    
         
            +
                    ALTER TABLE `#{table_name}` DISABLE KEYS;
         
     | 
| 
      
 26 
     | 
    
         
            +
                  }
         
     | 
| 
      
 27 
     | 
    
         
            +
                end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                def self.enable_index
         
     | 
| 
      
 30 
     | 
    
         
            +
                  connection.execute %Q{
         
     | 
| 
      
 31 
     | 
    
         
            +
                    ALTER TABLE `#{table_name}` ENABLE KEYS;
         
     | 
| 
      
 32 
     | 
    
         
            +
                  }
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                def self.find_or_create(options)
         
     | 
| 
      
 36 
     | 
    
         
            +
                  find(:first, :conditions => options) || create(options)
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                def self.connect_to_km_db!
         
     | 
| 
      
 40 
     | 
    
         
            +
                  config = DefaultConfig.dup
         
     | 
| 
      
 41 
     | 
    
         
            +
                  ['km_db.yml', 'config/km_db.yml'].each do |config_path|
         
     | 
| 
      
 42 
     | 
    
         
            +
                    next unless File.exist?(config_path)
         
     | 
| 
      
 43 
     | 
    
         
            +
                    config.merge! YAML.load(ERB.new(File.open(config_path).read).result)
         
     | 
| 
      
 44 
     | 
    
         
            +
                    break
         
     | 
| 
      
 45 
     | 
    
         
            +
                  end
         
     | 
| 
      
 46 
     | 
    
         
            +
                  establish_connection(config)
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                  unless connection.table_exists?('events')
         
     | 
| 
      
 49 
     | 
    
         
            +
                    SetupEventsDatabase.up
         
     | 
| 
      
 50 
     | 
    
         
            +
                    self.reset_column_information
         
     | 
| 
      
 51 
     | 
    
         
            +
                  end
         
     | 
| 
      
 52 
     | 
    
         
            +
                end
         
     | 
| 
      
 53 
     | 
    
         
            +
              end
         
     | 
| 
      
 54 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'kmdb/custom_record'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Dumpfile < CustomRecord
         
     | 
| 
      
 5 
     | 
    
         
            +
                set_table_name "dumpfiles"
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                validates_presence_of :offset
         
     | 
| 
      
 8 
     | 
    
         
            +
                validates_presence_of :path
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                def set(offset)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  update_attributes!(:offset => offset)
         
     | 
| 
      
 12 
     | 
    
         
            +
                end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                def offset
         
     | 
| 
      
 15 
     | 
    
         
            +
                  attributes['offset'] || 0
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                def self.get(pathname, job = nil)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  job ||= 'nil'
         
     | 
| 
      
 20 
     | 
    
         
            +
                  find_or_create(:path => pathname.cleanpath.to_s, :job => job)
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/kmdb/event.rb
    ADDED
    
    | 
         @@ -0,0 +1,39 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'kmdb/custom_record'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'kmdb/belongs_to_user'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'kmdb/has_properties'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 6 
     | 
    
         
            +
              class Event < CustomRecord
         
     | 
| 
      
 7 
     | 
    
         
            +
                include BelongsToUser
         
     | 
| 
      
 8 
     | 
    
         
            +
                include HasProperties
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                set_table_name "events"
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                named_scope :before, lambda { |date| { :conditions => ["`#{table_name}`.`t` < ?", date] } }
         
     | 
| 
      
 13 
     | 
    
         
            +
                named_scope :after,  lambda { |date| { :conditions => ["`#{table_name}`.`t` > ?", date] } }
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                named_scope :named, lambda { |name| { :conditions => { :n => KMDB::Key.get(name) } } }
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                named_scope :by_date, lambda { { :order => "`#{table_name}`.`t` ASC" } }
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                # return value of property
         
     | 
| 
      
 20 
     | 
    
         
            +
                def prop(name)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  properties.named(name).first.andand.value
         
     | 
| 
      
 22 
     | 
    
         
            +
                end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                def name
         
     | 
| 
      
 25 
     | 
    
         
            +
                  KMDB::Key.find(n).value
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                def self.record(hash)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  user_name = hash.delete('_p')
         
     | 
| 
      
 30 
     | 
    
         
            +
                  user ||= User.get(user_name)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  raise UserError.new "User missing for '#{user_name}'" unless user.present?
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                  stamp = Time.at hash.delete('_t')
         
     | 
| 
      
 34 
     | 
    
         
            +
                  key = Key.get hash.delete('_n')
         
     | 
| 
      
 35 
     | 
    
         
            +
                  event = create(:t => stamp, :n => key, :user => user)
         
     | 
| 
      
 36 
     | 
    
         
            +
                  Property.set(hash, stamp, user, event)
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            =begin
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              KMDB::HasProperties --
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              Trait shared by Event and User.
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            =end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 10 
     | 
    
         
            +
              module HasProperties
         
     | 
| 
      
 11 
     | 
    
         
            +
                def self.included(mod)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  mod.class_eval do
         
     | 
| 
      
 13 
     | 
    
         
            +
                    has_many   :properties, :class_name => 'KMDB::Property'
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                    named_scope :with_properties, lambda { |*props|
         
     | 
| 
      
 16 
     | 
    
         
            +
                      direction = props.delete(:exclude_missing) ? 'INNER' : 'LEFT'
         
     | 
| 
      
 17 
     | 
    
         
            +
                      prop_table = Property.table_name
         
     | 
| 
      
 18 
     | 
    
         
            +
                      selects = ["`#{table_name}`.*"]
         
     | 
| 
      
 19 
     | 
    
         
            +
                      joins = []
         
     | 
| 
      
 20 
     | 
    
         
            +
                      props.each_with_index { |prop,k|
         
     | 
| 
      
 21 
     | 
    
         
            +
                        temp_name = "#{prop_table}_#{k}"
         
     | 
| 
      
 22 
     | 
    
         
            +
                        selects << "`#{temp_name}`.`value` AS `#{prop.split.join('_')}`"
         
     | 
| 
      
 23 
     | 
    
         
            +
                        joins << sanitize_sql_array([%Q{
         
     | 
| 
      
 24 
     | 
    
         
            +
                          #{direction} JOIN `properties` AS `#{temp_name}`
         
     | 
| 
      
 25 
     | 
    
         
            +
                          ON `#{table_name}`.id = `#{temp_name}`.event_id 
         
     | 
| 
      
 26 
     | 
    
         
            +
                          AND `#{temp_name}`.`key` = ?}, KMDB::Key.get(prop)])
         
     | 
| 
      
 27 
     | 
    
         
            +
                      }
         
     | 
| 
      
 28 
     | 
    
         
            +
                      { :select => selects.join(', '), :joins => joins.join("\n") }
         
     | 
| 
      
 29 
     | 
    
         
            +
                    }
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
              end
         
     | 
| 
      
 33 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/kmdb/key.rb
    ADDED
    
    | 
         @@ -0,0 +1,56 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            =begin
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              Map strings (event and property names) to unique integers (Key#id) for performance
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            =end
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            require 'kmdb/custom_record'
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 10 
     | 
    
         
            +
              class Key < CustomRecord
         
     | 
| 
      
 11 
     | 
    
         
            +
                set_table_name "keys"
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                has_many :events,     :foreign_key => :n,   :class_name => 'KMDB::Event',    :dependent => :delete_all
         
     | 
| 
      
 14 
     | 
    
         
            +
                has_many :properties, :foreign_key => :key, :class_name => 'KMDB::Property', :dependent => :delete_all
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                named_scope :has_duplicate, lambda {
         
     | 
| 
      
 17 
     | 
    
         
            +
                  {
         
     | 
| 
      
 18 
     | 
    
         
            +
                    :select => "id, string, COUNT(id) AS quantity",
         
     | 
| 
      
 19 
     | 
    
         
            +
                    :group => :string, :having => "quantity > 1"
         
     | 
| 
      
 20 
     | 
    
         
            +
                  }
         
     | 
| 
      
 21 
     | 
    
         
            +
                }
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                def self.get(string)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  @cache ||= {}
         
     | 
| 
      
 25 
     | 
    
         
            +
                  @cache[string] ||= get_uncached(string)
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                # Replace each duplicate key ID with its most-used variant
         
     | 
| 
      
 29 
     | 
    
         
            +
                def self.fix_duplicates!
         
     | 
| 
      
 30 
     | 
    
         
            +
                  has_duplicate.map(&:string).each do |string|
         
     | 
| 
      
 31 
     | 
    
         
            +
                    all_keys = find(:all, :conditions => { :string => string })
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                    # sort keys by usage
         
     | 
| 
      
 34 
     | 
    
         
            +
                    all_ids = all_keys.map { |key|
         
     | 
| 
      
 35 
     | 
    
         
            +
                      [key.id, Event.named(key.id).count + Property.named(key.id).count]
         
     | 
| 
      
 36 
     | 
    
         
            +
                    }.sort { |k1,k2|
         
     | 
| 
      
 37 
     | 
    
         
            +
                      k1.second <=> k2.second
         
     | 
| 
      
 38 
     | 
    
         
            +
                    }.map { |k|
         
     | 
| 
      
 39 
     | 
    
         
            +
                      k.first
         
     | 
| 
      
 40 
     | 
    
         
            +
                    }
         
     | 
| 
      
 41 
     | 
    
         
            +
                    id_to_keep = all_ids.pop
         
     | 
| 
      
 42 
     | 
    
         
            +
                    $stderr.write "Fixing key '#{string}' #{all_ids.inspect} -> #{id_to_keep.inspect}\n"
         
     | 
| 
      
 43 
     | 
    
         
            +
                    Event.update_all({ :n => id_to_keep }, ["`events`.`n` IN (?)", all_ids])
         
     | 
| 
      
 44 
     | 
    
         
            +
                    Property.update_all({ :key => id_to_keep }, ["`properties`.`key` IN (?)", all_ids])
         
     | 
| 
      
 45 
     | 
    
         
            +
                    Key.delete_all(["id IN (?)", all_ids])
         
     | 
| 
      
 46 
     | 
    
         
            +
                  end
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
              private
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                def self.get_uncached(string)
         
     | 
| 
      
 52 
     | 
    
         
            +
                  string.size <= MaxStringSize or raise "String is too long"
         
     | 
| 
      
 53 
     | 
    
         
            +
                  find_or_create(:string => string).id
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
              end
         
     | 
| 
      
 56 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,63 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            =begin
         
     | 
| 
      
 2 
     | 
    
         
            +
              
         
     | 
| 
      
 3 
     | 
    
         
            +
              Setup a custom database for KissMetrics tracking events.
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            =end
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            require 'active_record'
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 10 
     | 
    
         
            +
              class SetupEventsDatabase < ActiveRecord::Migration
         
     | 
| 
      
 11 
     | 
    
         
            +
                def self.connection
         
     | 
| 
      
 12 
     | 
    
         
            +
                  CustomRecord.connection
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                def self.up
         
     | 
| 
      
 16 
     | 
    
         
            +
                  create_table :events do |t|
         
     | 
| 
      
 17 
     | 
    
         
            +
                    t.integer  :user_id
         
     | 
| 
      
 18 
     | 
    
         
            +
                    t.integer  :n
         
     | 
| 
      
 19 
     | 
    
         
            +
                    t.datetime :t
         
     | 
| 
      
 20 
     | 
    
         
            +
                  end
         
     | 
| 
      
 21 
     | 
    
         
            +
                  add_index :events, [:n]
         
     | 
| 
      
 22 
     | 
    
         
            +
                  add_index :events, [:user_id]
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  create_table :keys do |t|
         
     | 
| 
      
 26 
     | 
    
         
            +
                    t.string :string, :limit => MaxStringSize
         
     | 
| 
      
 27 
     | 
    
         
            +
                  end
         
     | 
| 
      
 28 
     | 
    
         
            +
                  add_index :keys, [:string]
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                  create_table :properties do |t|
         
     | 
| 
      
 31 
     | 
    
         
            +
                    t.integer  :user_id
         
     | 
| 
      
 32 
     | 
    
         
            +
                    t.integer  :event_id
         
     | 
| 
      
 33 
     | 
    
         
            +
                    t.integer  :key
         
     | 
| 
      
 34 
     | 
    
         
            +
                    t.string   :value,   :limit => 64
         
     | 
| 
      
 35 
     | 
    
         
            +
                    t.datetime :t
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end
         
     | 
| 
      
 37 
     | 
    
         
            +
                  add_index :properties, [:key]
         
     | 
| 
      
 38 
     | 
    
         
            +
                  add_index :properties, [:user_id]
         
     | 
| 
      
 39 
     | 
    
         
            +
                  add_index :properties, [:event_id]
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                  create_table :users do |t|
         
     | 
| 
      
 42 
     | 
    
         
            +
                    t.string  :name, :limit => 48
         
     | 
| 
      
 43 
     | 
    
         
            +
                    t.integer :alias_id
         
     | 
| 
      
 44 
     | 
    
         
            +
                  end
         
     | 
| 
      
 45 
     | 
    
         
            +
                  add_index :users, [:name]
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                  create_table :dumpfiles do |t|
         
     | 
| 
      
 48 
     | 
    
         
            +
                    t.string  :path
         
     | 
| 
      
 49 
     | 
    
         
            +
                    t.string  :job
         
     | 
| 
      
 50 
     | 
    
         
            +
                    t.integer :offset
         
     | 
| 
      
 51 
     | 
    
         
            +
                  end
         
     | 
| 
      
 52 
     | 
    
         
            +
                  add_index :dumpfiles, [:path]
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                def self.down
         
     | 
| 
      
 57 
     | 
    
         
            +
                  drop_table :events
         
     | 
| 
      
 58 
     | 
    
         
            +
                  drop_table :properties
         
     | 
| 
      
 59 
     | 
    
         
            +
                  drop_table :users
         
     | 
| 
      
 60 
     | 
    
         
            +
                  drop_table :aliases
         
     | 
| 
      
 61 
     | 
    
         
            +
                end
         
     | 
| 
      
 62 
     | 
    
         
            +
              end
         
     | 
| 
      
 63 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,85 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'kmdb/parser'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'parallel'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 5 
     | 
    
         
            +
              class ParallelParser < Parser
         
     | 
| 
      
 6 
     | 
    
         
            +
                
         
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize(options = {})
         
     | 
| 
      
 8 
     | 
    
         
            +
                  super(options)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @worker_count = options.delete(:workers) || Parallel.processor_count
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                def run(argv)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @pipe_rd, @pipe_wr = IO.pipe
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                  inputs = list_files_in(argv)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  total_bytes = total_size_of_files(inputs)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  log "total bytes : #{total_bytes}"
         
     | 
| 
      
 18 
     | 
    
         
            +
                  total_bytes -= inputs.map { |p| Dumpfile.get(p, @resume_job) }.compact.map(&:offset).sum
         
     | 
| 
      
 19 
     | 
    
         
            +
                  log "left to process : #{total_bytes}"
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  # Start workers
         
     | 
| 
      
 22 
     | 
    
         
            +
                  log "Using #{@worker_count} workers."
         
     | 
| 
      
 23 
     | 
    
         
            +
                  Process.fork do
         
     | 
| 
      
 24 
     | 
    
         
            +
                    @pipe_rd.close
         
     | 
| 
      
 25 
     | 
    
         
            +
                    Parallel.each(inputs, :in_processes => @worker_count) do |input|
         
     | 
| 
      
 26 
     | 
    
         
            +
                      KMDB::Event.connection.reconnect!
         
     | 
| 
      
 27 
     | 
    
         
            +
                      log "Worker #{Process.pid} starting #{input}"
         
     | 
| 
      
 28 
     | 
    
         
            +
                      $0 = "worker: #{input}"
         
     | 
| 
      
 29 
     | 
    
         
            +
                      process_events_in_file(input)
         
     | 
| 
      
 30 
     | 
    
         
            +
                      log "Worker #{Process.pid} done"
         
     | 
| 
      
 31 
     | 
    
         
            +
                      true
         
     | 
| 
      
 32 
     | 
    
         
            +
                    end
         
     | 
| 
      
 33 
     | 
    
         
            +
                  end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                  # Start gatherer
         
     | 
| 
      
 36 
     | 
    
         
            +
                  $0 = "gatherer: #{$0}"
         
     | 
| 
      
 37 
     | 
    
         
            +
                  @pipe_wr.close
         
     | 
| 
      
 38 
     | 
    
         
            +
                  byte_counter = 0
         
     | 
| 
      
 39 
     | 
    
         
            +
                  log "Starting gatherer, total bytes: #{total_bytes}"
         
     | 
| 
      
 40 
     | 
    
         
            +
                  progress = ProgressBar.new("-" * 20, total_bytes)
         
     | 
| 
      
 41 
     | 
    
         
            +
                  while line = @pipe_rd.gets
         
     | 
| 
      
 42 
     | 
    
         
            +
                    if line =~ /^OK (\d+)$/
         
     | 
| 
      
 43 
     | 
    
         
            +
                      byte_counter += $1.to_i
         
     | 
| 
      
 44 
     | 
    
         
            +
                      progress.set byte_counter
         
     | 
| 
      
 45 
     | 
    
         
            +
                    elsif line =~ /^FILE (.*)$/
         
     | 
| 
      
 46 
     | 
    
         
            +
                      progress.title = $1
         
     | 
| 
      
 47 
     | 
    
         
            +
                    else
         
     | 
| 
      
 48 
     | 
    
         
            +
                      log "Unparsed line: '#{line}'"
         
     | 
| 
      
 49 
     | 
    
         
            +
                    end
         
     | 
| 
      
 50 
     | 
    
         
            +
                  end
         
     | 
| 
      
 51 
     | 
    
         
            +
                  progress.finish
         
     | 
| 
      
 52 
     | 
    
         
            +
                  log "Total bytes processed: #{byte_counter}"
         
     | 
| 
      
 53 
     | 
    
         
            +
                  Process.waitall
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
              private
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                def process_events_in_file(pathname)
         
     | 
| 
      
 59 
     | 
    
         
            +
                  pathname.open do |input|
         
     | 
| 
      
 60 
     | 
    
         
            +
                    processed_bytes = 0
         
     | 
| 
      
 61 
     | 
    
         
            +
                    if @resume_job
         
     | 
| 
      
 62 
     | 
    
         
            +
                      dumpfile = Dumpfile.get(pathname, @resume_job)
         
     | 
| 
      
 63 
     | 
    
         
            +
                      log "Starting file #{pathname} from offset #{dumpfile.offset}"
         
     | 
| 
      
 64 
     | 
    
         
            +
                      input.seek(dumpfile.offset)
         
     | 
| 
      
 65 
     | 
    
         
            +
                    end
         
     | 
| 
      
 66 
     | 
    
         
            +
                    line_number = 0
         
     | 
| 
      
 67 
     | 
    
         
            +
                    @pipe_wr.write "FILE #{pathname.basename}\n"
         
     | 
| 
      
 68 
     | 
    
         
            +
                    while line = input.gets
         
     | 
| 
      
 69 
     | 
    
         
            +
                      line_number += 1
         
     | 
| 
      
 70 
     | 
    
         
            +
                      processed_bytes += line.size
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                      process_event(line)
         
     | 
| 
      
 73 
     | 
    
         
            +
                      dumpfile.set(input.tell)
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
                      if processed_bytes > 100_000
         
     | 
| 
      
 76 
     | 
    
         
            +
                        @pipe_wr.write "OK #{processed_bytes}\n"
         
     | 
| 
      
 77 
     | 
    
         
            +
                        processed_bytes = 0
         
     | 
| 
      
 78 
     | 
    
         
            +
                      end
         
     | 
| 
      
 79 
     | 
    
         
            +
                    end
         
     | 
| 
      
 80 
     | 
    
         
            +
                    @pipe_wr.write "OK #{processed_bytes}\n"
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
      
 82 
     | 
    
         
            +
                end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
              end
         
     | 
| 
      
 85 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/kmdb/parser.rb
    ADDED
    
    | 
         @@ -0,0 +1,143 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'yajl/json_gem'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'pathname'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'progressbar'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'pstore'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 7 
     | 
    
         
            +
              class Parser
         
     | 
| 
      
 8 
     | 
    
         
            +
                class ProgressBar < ::ProgressBar
         
     | 
| 
      
 9 
     | 
    
         
            +
                  attr_writer :title
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
                
         
     | 
| 
      
 12 
     | 
    
         
            +
                attr :resume_job
         
     | 
| 
      
 13 
     | 
    
         
            +
                attr :verbose
         
     | 
| 
      
 14 
     | 
    
         
            +
                attr :abort_on_error
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                def initialize(options = {})
         
     | 
| 
      
 17 
     | 
    
         
            +
                  @processed_bytes = nil
         
     | 
| 
      
 18 
     | 
    
         
            +
                  @total_bytes = nil
         
     | 
| 
      
 19 
     | 
    
         
            +
                  @exclude_regexps = []
         
     | 
| 
      
 20 
     | 
    
         
            +
                  @include_regexps = []
         
     | 
| 
      
 21 
     | 
    
         
            +
                  @filters = []
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @verbose        = options.delete(:verbose)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @resume_job     = options.delete(:resume)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  @abort_on_error = options.delete(:abort_on_error)
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                  if @resume_job && @verbose && Dumpfile.count > 0
         
     | 
| 
      
 27 
     | 
    
         
            +
                    log "Using restart information"
         
     | 
| 
      
 28 
     | 
    
         
            +
                  end
         
     | 
| 
      
 29 
     | 
    
         
            +
                end
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                def exclude(regexp)
         
     | 
| 
      
 32 
     | 
    
         
            +
                  @exclude_regexps << regexp
         
     | 
| 
      
 33 
     | 
    
         
            +
                  self
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                def only(regexp)
         
     | 
| 
      
 37 
     | 
    
         
            +
                  @include_regexps << regexp
         
     | 
| 
      
 38 
     | 
    
         
            +
                  self
         
     | 
| 
      
 39 
     | 
    
         
            +
                end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                def add_filter(&block)
         
     | 
| 
      
 42 
     | 
    
         
            +
                  @filters << block
         
     | 
| 
      
 43 
     | 
    
         
            +
                  self
         
     | 
| 
      
 44 
     | 
    
         
            +
                end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                def run(argv)
         
     | 
| 
      
 47 
     | 
    
         
            +
                  inputs = list_files_in(argv)
         
     | 
| 
      
 48 
     | 
    
         
            +
                  total_bytes = total_size_of_files(inputs)
         
     | 
| 
      
 49 
     | 
    
         
            +
                  log "total bytes : #{total_bytes}"
         
     | 
| 
      
 50 
     | 
    
         
            +
                  total_bytes -= inputs.map { |p| Dumpfile.get(p, @resume_job) }.compact.map(&:offset).sum
         
     | 
| 
      
 51 
     | 
    
         
            +
                  log "left to process : #{total_bytes}"
         
     | 
| 
      
 52 
     | 
    
         
            +
                  
         
     | 
| 
      
 53 
     | 
    
         
            +
                  @processed_bytes = 0
         
     | 
| 
      
 54 
     | 
    
         
            +
                  @progress = ProgressBar.new("-" * 20, total_bytes)
         
     | 
| 
      
 55 
     | 
    
         
            +
                  @progress.long_running if @progress.respond_to?(:long_running)
         
     | 
| 
      
 56 
     | 
    
         
            +
                  
         
     | 
| 
      
 57 
     | 
    
         
            +
                  inputs.sort.each do |input|
         
     | 
| 
      
 58 
     | 
    
         
            +
                    process_events_in_file(input)
         
     | 
| 
      
 59 
     | 
    
         
            +
                  end
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
                  @progress.finish
         
     | 
| 
      
 62 
     | 
    
         
            +
                end
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
              private
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                def log(message)
         
     | 
| 
      
 67 
     | 
    
         
            +
                  $stderr.write(message + "\n") if @verbose
         
     | 
| 
      
 68 
     | 
    
         
            +
                end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                def process_event(text)
         
     | 
| 
      
 71 
     | 
    
         
            +
                  return if @exclude_regexps.any? { |re| text =~ re }
         
     | 
| 
      
 72 
     | 
    
         
            +
                  return unless @include_regexps.all? { |re| text =~ re }
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                  # filter strange utf-8 encoding/escaping found in KM dumps   
         
     | 
| 
      
 75 
     | 
    
         
            +
                  if text =~ /\\30[3-5]\\[0-9]{3}/
         
     | 
| 
      
 76 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 77 
     | 
    
         
            +
                      text = eval("%Q(#{text})") 
         
     | 
| 
      
 78 
     | 
    
         
            +
                    rescue SyntaxError => e
         
     | 
| 
      
 79 
     | 
    
         
            +
                      log "Syntax error in: #{text}"
         
     | 
| 
      
 80 
     | 
    
         
            +
                      raise e if @abort_on_error
         
     | 
| 
      
 81 
     | 
    
         
            +
                    end
         
     | 
| 
      
 82 
     | 
    
         
            +
                  end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 85 
     | 
    
         
            +
                    data = JSON.parse(text)
         
     | 
| 
      
 86 
     | 
    
         
            +
                  rescue JSON::ParserError => e
         
     | 
| 
      
 87 
     | 
    
         
            +
                    log "Warning, JSON parse error in: #{text}"
         
     | 
| 
      
 88 
     | 
    
         
            +
                    raise e if @abort_on_error
         
     | 
| 
      
 89 
     | 
    
         
            +
                    return
         
     | 
| 
      
 90 
     | 
    
         
            +
                  end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                  if data.nil?
         
     | 
| 
      
 93 
     | 
    
         
            +
                    log "Warning, JSON parse failed in: #{text}"
         
     | 
| 
      
 94 
     | 
    
         
            +
                    return
         
     | 
| 
      
 95 
     | 
    
         
            +
                  end
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                  @filters.each do |filter|
         
     | 
| 
      
 98 
     | 
    
         
            +
                    data = filter.call(text, data) or break
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
      
 100 
     | 
    
         
            +
                end
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
                def process_events_in_file(pathname)
         
     | 
| 
      
 103 
     | 
    
         
            +
                  pathname.open do |input|
         
     | 
| 
      
 104 
     | 
    
         
            +
                    @progress.title = pathname.basename.to_s
         
     | 
| 
      
 105 
     | 
    
         
            +
                    if @resume_job
         
     | 
| 
      
 106 
     | 
    
         
            +
                      dumpfile = Dumpfile.get(pathname, @resume_job)
         
     | 
| 
      
 107 
     | 
    
         
            +
                      log "Starting file #{pathname} from offset #{dumpfile.offset}"
         
     | 
| 
      
 108 
     | 
    
         
            +
                      input.seek(dumpfile.offset)
         
     | 
| 
      
 109 
     | 
    
         
            +
                    end
         
     | 
| 
      
 110 
     | 
    
         
            +
                    line_number = 0
         
     | 
| 
      
 111 
     | 
    
         
            +
                    while line = input.gets
         
     | 
| 
      
 112 
     | 
    
         
            +
                      @processed_bytes += line.size
         
     | 
| 
      
 113 
     | 
    
         
            +
                      @progress.set @processed_bytes if line_number % 100 == 0
         
     | 
| 
      
 114 
     | 
    
         
            +
                      line_number += 1
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                      process_event(line)
         
     | 
| 
      
 117 
     | 
    
         
            +
                      dumpfile.set(input.tell) if @resume_job
         
     | 
| 
      
 118 
     | 
    
         
            +
                    end
         
     | 
| 
      
 119 
     | 
    
         
            +
                  end
         
     | 
| 
      
 120 
     | 
    
         
            +
                end
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                def total_size_of_files(inputs)
         
     | 
| 
      
 123 
     | 
    
         
            +
                  inputs.map { |c| c.stat.size }.inject(0) { |a,b| a+b }
         
     | 
| 
      
 124 
     | 
    
         
            +
                end
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                def list_files_in_directory(directory)
         
     | 
| 
      
 127 
     | 
    
         
            +
                  input_fns = []
         
     | 
| 
      
 128 
     | 
    
         
            +
                  directory.find do |input_pn|
         
     | 
| 
      
 129 
     | 
    
         
            +
                    input_pn.to_s =~ /\.json$/ or next
         
     | 
| 
      
 130 
     | 
    
         
            +
                    input_fns << input_pn
         
     | 
| 
      
 131 
     | 
    
         
            +
                  end
         
     | 
| 
      
 132 
     | 
    
         
            +
                  input_fns.sort
         
     | 
| 
      
 133 
     | 
    
         
            +
                end
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
      
 135 
     | 
    
         
            +
                def list_files_in(argv)
         
     | 
| 
      
 136 
     | 
    
         
            +
                  argv.map { |arg| Pathname.new(arg) }.map { |pn|
         
     | 
| 
      
 137 
     | 
    
         
            +
                    pn.exist? and pn or raise "No such file or directory '#{pn}'"
         
     | 
| 
      
 138 
     | 
    
         
            +
                  }.map { |pn|
         
     | 
| 
      
 139 
     | 
    
         
            +
                    pn.directory? ? list_files_in_directory(pn) : pn
         
     | 
| 
      
 140 
     | 
    
         
            +
                  }.flatten
         
     | 
| 
      
 141 
     | 
    
         
            +
                end
         
     | 
| 
      
 142 
     | 
    
         
            +
              end
         
     | 
| 
      
 143 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'kmdb/belongs_to_user'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Property < CustomRecord
         
     | 
| 
      
 5 
     | 
    
         
            +
                include BelongsToUser
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                set_table_name "properties"
         
     | 
| 
      
 8 
     | 
    
         
            +
                belongs_to :event, :class_name => 'KMDB::Event'
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                default_scope :order => 't DESC'
         
     | 
| 
      
 11 
     | 
    
         
            +
                named_scope :named, lambda { |name| { :conditions => { :key => KMDB::Key.get(name) } } }
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                def self.set(hash, stamp=nil, user=nil, event=nil)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  user_name = hash.delete('_p')
         
     | 
| 
      
 15 
     | 
    
         
            +
                  user ||= User.get(user_name)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  raise UserError.new "User missing for '#{user_name}'" unless user.present?
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                  event_id = event ? event.id : nil
         
     | 
| 
      
 19 
     | 
    
         
            +
                  stamp = Time.at hash.delete('_t') || stamp
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  return if hash.empty?
         
     | 
| 
      
 22 
     | 
    
         
            +
                  sql_insert = "INSERT INTO `#{table_name}` (`t`,`user_id`,`event_id`,`key`,`value`) VALUES "
         
     | 
| 
      
 23 
     | 
    
         
            +
                  sql_values = []
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  hash.each_pair do |prop_name,value|
         
     | 
| 
      
 26 
     | 
    
         
            +
                    key = Key.get(prop_name)
         
     | 
| 
      
 27 
     | 
    
         
            +
                    sql_values << sanitize_sql_array(["(?,?,?,?,?)", stamp,user.id,event_id,key,value])
         
     | 
| 
      
 28 
     | 
    
         
            +
                  end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                  connection.execute(sql_insert + sql_values.join(","))
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
              end
         
     | 
| 
      
 33 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/kmdb/user.rb
    ADDED
    
    | 
         @@ -0,0 +1,83 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'kmdb/has_properties'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module KMDB
         
     | 
| 
      
 4 
     | 
    
         
            +
              class User < CustomRecord
         
     | 
| 
      
 5 
     | 
    
         
            +
                include HasProperties
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                set_table_name "users"
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                has_many :events,     :class_name => 'KMDB::Event'
         
     | 
| 
      
 10 
     | 
    
         
            +
                belongs_to :alias,    :class_name => 'KMDB::User' 
         
     | 
| 
      
 11 
     | 
    
         
            +
                  # points to the aliased user. if set, no properties/events should belong to this user
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                validates_presence_of   :name
         
     | 
| 
      
 14 
     | 
    
         
            +
                validates_uniqueness_of :name
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                named_scope :named, lambda { |name| { :conditions => { :name => name } } }
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                named_scope :duplicates, lambda {{
         
     | 
| 
      
 19 
     | 
    
         
            +
                  :select => "id, COUNT(id) AS quantity", :group => :name, :having => "quantity > 1"
         
     | 
| 
      
 20 
     | 
    
         
            +
                }}
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                # return (latest) value of property
         
     | 
| 
      
 23 
     | 
    
         
            +
                def prop(name)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  properties.named(name).first.andand.value
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                # mark this user as aliasing another
         
     | 
| 
      
 28 
     | 
    
         
            +
                def aliases!(other)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  [Property,Event].each do |model|
         
     | 
| 
      
 30 
     | 
    
         
            +
                    model.user_is(self).update_all({:user_id => other.id})
         
     | 
| 
      
 31 
     | 
    
         
            +
                  end
         
     | 
| 
      
 32 
     | 
    
         
            +
                  self.update_attributes!(:alias => other)
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                # return the user named `name` (creating it if necessary)
         
     | 
| 
      
 36 
     | 
    
         
            +
                # if `name` is an alias, return the original user
         
     | 
| 
      
 37 
     | 
    
         
            +
                def self.get(name)
         
     | 
| 
      
 38 
     | 
    
         
            +
                  user = named(name).first || create(:name => name)
         
     | 
| 
      
 39 
     | 
    
         
            +
                  user = user.alias while user.alias
         
     | 
| 
      
 40 
     | 
    
         
            +
                  return user
         
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                # mark the two names as pointing to the same user
         
     | 
| 
      
 45 
     | 
    
         
            +
                def self.alias!(name1, name2)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  u1 = get(name1)
         
     | 
| 
      
 47 
     | 
    
         
            +
                  u2 = get(name2)
         
     | 
| 
      
 48 
     | 
    
         
            +
                  $stderr.write "Warning: user '#{user.name}' has an alias\n" if u1.alias
         
     | 
| 
      
 49 
     | 
    
         
            +
                  $stderr.write "Warning: user '#{user.name}' has an alias\n" if u2.alias
         
     | 
| 
      
 50 
     | 
    
         
            +
                  
         
     | 
| 
      
 51 
     | 
    
         
            +
                  # nothing to do if both names already point to the same user
         
     | 
| 
      
 52 
     | 
    
         
            +
                  return if u1 == u2  
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                  u2.aliases! u1
         
     | 
| 
      
 55 
     | 
    
         
            +
                end
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                # duplication can occur during parallel imports because we're not running transactionally.
         
     | 
| 
      
 59 
     | 
    
         
            +
                def self.fix_duplicates!
         
     | 
| 
      
 60 
     | 
    
         
            +
                  duplicates.map(&:name).each do |name|
         
     | 
| 
      
 61 
     | 
    
         
            +
                    named(name).all.tap do |all_users|
         
     | 
| 
      
 62 
     | 
    
         
            +
                      kept_user = all_users.pop
         
     | 
| 
      
 63 
     | 
    
         
            +
                      all_users.each do |user|
         
     | 
| 
      
 64 
     | 
    
         
            +
                        user.aliases! kept_user
         
     | 
| 
      
 65 
     | 
    
         
            +
                        user.destroy
         
     | 
| 
      
 66 
     | 
    
         
            +
                      end
         
     | 
| 
      
 67 
     | 
    
         
            +
                    end
         
     | 
| 
      
 68 
     | 
    
         
            +
                  end
         
     | 
| 
      
 69 
     | 
    
         
            +
                end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                # detect alias chains
         
     | 
| 
      
 73 
     | 
    
         
            +
                def self.resolve_alias_chains!
         
     | 
| 
      
 74 
     | 
    
         
            +
                  find(:all, :joins => :alias, :conditions => 'aliases_users.alias_id IS NOT NULL').each do |user|
         
     | 
| 
      
 75 
     | 
    
         
            +
                    user = find(user.id)
         
     | 
| 
      
 76 
     | 
    
         
            +
                    origin = find(user.alias_id)
         
     | 
| 
      
 77 
     | 
    
         
            +
                    origin = origin.alias while origin.alias # go up the chain
         
     | 
| 
      
 78 
     | 
    
         
            +
                    $stderr.write "Aliasing #{user.name} -> #{origin.name}\n"
         
     | 
| 
      
 79 
     | 
    
         
            +
                    user.aliases!(origin)
         
     | 
| 
      
 80 
     | 
    
         
            +
                  end
         
     | 
| 
      
 81 
     | 
    
         
            +
                end
         
     | 
| 
      
 82 
     | 
    
         
            +
              end
         
     | 
| 
      
 83 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/kmdb/version.rb
    ADDED
    
    
    
        data/lib/kmdb.rb
    ADDED
    
    
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,234 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification 
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: km-db
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version 
         
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 21
         
     | 
| 
      
 5 
     | 
    
         
            +
              prerelease: 
         
     | 
| 
      
 6 
     | 
    
         
            +
              segments: 
         
     | 
| 
      
 7 
     | 
    
         
            +
              - 0
         
     | 
| 
      
 8 
     | 
    
         
            +
              - 2
         
     | 
| 
      
 9 
     | 
    
         
            +
              - 1
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 0.2.1
         
     | 
| 
      
 11 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 12 
     | 
    
         
            +
            authors: 
         
     | 
| 
      
 13 
     | 
    
         
            +
            - HouseTrip
         
     | 
| 
      
 14 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 15 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 16 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            date: 2013-03-23 00:00:00 +00:00
         
     | 
| 
      
 19 
     | 
    
         
            +
            default_executable: 
         
     | 
| 
      
 20 
     | 
    
         
            +
            dependencies: 
         
     | 
| 
      
 21 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 22 
     | 
    
         
            +
              requirement: &id001 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 23 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 24 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 25 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 26 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 27 
     | 
    
         
            +
                    hash: 23
         
     | 
| 
      
 28 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 29 
     | 
    
         
            +
                    - 1
         
     | 
| 
      
 30 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 31 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 32 
     | 
    
         
            +
                    version: 1.0.0
         
     | 
| 
      
 33 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 34 
     | 
    
         
            +
              name: bundler
         
     | 
| 
      
 35 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: *id001
         
     | 
| 
      
 37 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 38 
     | 
    
         
            +
              requirement: &id002 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 39 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 40 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 41 
     | 
    
         
            +
                - - ~>
         
     | 
| 
      
 42 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 43 
     | 
    
         
            +
                    hash: 31
         
     | 
| 
      
 44 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 45 
     | 
    
         
            +
                    - 2
         
     | 
| 
      
 46 
     | 
    
         
            +
                    - 4
         
     | 
| 
      
 47 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 48 
     | 
    
         
            +
                    version: 2.4.0
         
     | 
| 
      
 49 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 50 
     | 
    
         
            +
              name: rspec
         
     | 
| 
      
 51 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 52 
     | 
    
         
            +
              version_requirements: *id002
         
     | 
| 
      
 53 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 54 
     | 
    
         
            +
              requirement: &id003 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 55 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 56 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 57 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 58 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 59 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
      
 60 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 61 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 62 
     | 
    
         
            +
                    version: "0"
         
     | 
| 
      
 63 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 64 
     | 
    
         
            +
              name: rake
         
     | 
| 
      
 65 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 66 
     | 
    
         
            +
              version_requirements: *id003
         
     | 
| 
      
 67 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 68 
     | 
    
         
            +
              requirement: &id004 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 69 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 70 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 71 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 72 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 73 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
      
 74 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 75 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 76 
     | 
    
         
            +
                    version: "0"
         
     | 
| 
      
 77 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 78 
     | 
    
         
            +
              name: json
         
     | 
| 
      
 79 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 80 
     | 
    
         
            +
              version_requirements: *id004
         
     | 
| 
      
 81 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 82 
     | 
    
         
            +
              requirement: &id005 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 83 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 84 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 85 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 86 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 87 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
      
 88 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 89 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 90 
     | 
    
         
            +
                    version: "0"
         
     | 
| 
      
 91 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 92 
     | 
    
         
            +
              name: sqlite3-ruby
         
     | 
| 
      
 93 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 94 
     | 
    
         
            +
              version_requirements: *id005
         
     | 
| 
      
 95 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 96 
     | 
    
         
            +
              requirement: &id006 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 97 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 98 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 99 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 100 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 101 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
      
 102 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 103 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 104 
     | 
    
         
            +
                    version: "0"
         
     | 
| 
      
 105 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 106 
     | 
    
         
            +
              name: yajl-ruby
         
     | 
| 
      
 107 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 108 
     | 
    
         
            +
              version_requirements: *id006
         
     | 
| 
      
 109 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 110 
     | 
    
         
            +
              requirement: &id007 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 111 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 112 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 113 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 114 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 115 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
      
 116 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 117 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 118 
     | 
    
         
            +
                    version: "0"
         
     | 
| 
      
 119 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 120 
     | 
    
         
            +
              name: progressbar
         
     | 
| 
      
 121 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 122 
     | 
    
         
            +
              version_requirements: *id007
         
     | 
| 
      
 123 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 124 
     | 
    
         
            +
              requirement: &id008 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 125 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 126 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 127 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 128 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 129 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
      
 130 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 131 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 132 
     | 
    
         
            +
                    version: "0"
         
     | 
| 
      
 133 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 134 
     | 
    
         
            +
              name: parallel
         
     | 
| 
      
 135 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 136 
     | 
    
         
            +
              version_requirements: *id008
         
     | 
| 
      
 137 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 138 
     | 
    
         
            +
              requirement: &id009 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 139 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 140 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 141 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 142 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 143 
     | 
    
         
            +
                    hash: 3
         
     | 
| 
      
 144 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 145 
     | 
    
         
            +
                    - 0
         
     | 
| 
      
 146 
     | 
    
         
            +
                    version: "0"
         
     | 
| 
      
 147 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 148 
     | 
    
         
            +
              name: andand
         
     | 
| 
      
 149 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 150 
     | 
    
         
            +
              version_requirements: *id009
         
     | 
| 
      
 151 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency 
         
     | 
| 
      
 152 
     | 
    
         
            +
              requirement: &id010 !ruby/object:Gem::Requirement 
         
     | 
| 
      
 153 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 154 
     | 
    
         
            +
                requirements: 
         
     | 
| 
      
 155 
     | 
    
         
            +
                - - ~>
         
     | 
| 
      
 156 
     | 
    
         
            +
                  - !ruby/object:Gem::Version 
         
     | 
| 
      
 157 
     | 
    
         
            +
                    hash: 27
         
     | 
| 
      
 158 
     | 
    
         
            +
                    segments: 
         
     | 
| 
      
 159 
     | 
    
         
            +
                    - 2
         
     | 
| 
      
 160 
     | 
    
         
            +
                    - 3
         
     | 
| 
      
 161 
     | 
    
         
            +
                    - 12
         
     | 
| 
      
 162 
     | 
    
         
            +
                    version: 2.3.12
         
     | 
| 
      
 163 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 164 
     | 
    
         
            +
              name: activerecord
         
     | 
| 
      
 165 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 166 
     | 
    
         
            +
              version_requirements: *id010
         
     | 
| 
      
 167 
     | 
    
         
            +
            description: Process KISSmetrics data dumps
         
     | 
| 
      
 168 
     | 
    
         
            +
            email: 
         
     | 
| 
      
 169 
     | 
    
         
            +
            - jtl@housetrip.com
         
     | 
| 
      
 170 
     | 
    
         
            +
            executables: 
         
     | 
| 
      
 171 
     | 
    
         
            +
            - km_db_import
         
     | 
| 
      
 172 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
      
 174 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
            files: 
         
     | 
| 
      
 177 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 178 
     | 
    
         
            +
            - Gemfile.lock
         
     | 
| 
      
 179 
     | 
    
         
            +
            - README.markdown
         
     | 
| 
      
 180 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 181 
     | 
    
         
            +
            - bin/km_db_import
         
     | 
| 
      
 182 
     | 
    
         
            +
            - km-db.gemspec
         
     | 
| 
      
 183 
     | 
    
         
            +
            - lib/kmdb.rb
         
     | 
| 
      
 184 
     | 
    
         
            +
            - lib/kmdb/belongs_to_user.rb
         
     | 
| 
      
 185 
     | 
    
         
            +
            - lib/kmdb/custom_record.rb
         
     | 
| 
      
 186 
     | 
    
         
            +
            - lib/kmdb/dumpfile.rb
         
     | 
| 
      
 187 
     | 
    
         
            +
            - lib/kmdb/event.rb
         
     | 
| 
      
 188 
     | 
    
         
            +
            - lib/kmdb/has_properties.rb
         
     | 
| 
      
 189 
     | 
    
         
            +
            - lib/kmdb/key.rb
         
     | 
| 
      
 190 
     | 
    
         
            +
            - lib/kmdb/migration.rb
         
     | 
| 
      
 191 
     | 
    
         
            +
            - lib/kmdb/parallel_parser.rb
         
     | 
| 
      
 192 
     | 
    
         
            +
            - lib/kmdb/parser.rb
         
     | 
| 
      
 193 
     | 
    
         
            +
            - lib/kmdb/property.rb
         
     | 
| 
      
 194 
     | 
    
         
            +
            - lib/kmdb/user.rb
         
     | 
| 
      
 195 
     | 
    
         
            +
            - lib/kmdb/user_error.rb
         
     | 
| 
      
 196 
     | 
    
         
            +
            - lib/kmdb/version.rb
         
     | 
| 
      
 197 
     | 
    
         
            +
            has_rdoc: true
         
     | 
| 
      
 198 
     | 
    
         
            +
            homepage: https://github.com/housetrip/km-db
         
     | 
| 
      
 199 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 200 
     | 
    
         
            +
             
     | 
| 
      
 201 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 202 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
            require_paths: 
         
     | 
| 
      
 205 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 206 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 207 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 208 
     | 
    
         
            +
              requirements: 
         
     | 
| 
      
 209 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 210 
     | 
    
         
            +
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 211 
     | 
    
         
            +
                  hash: 3
         
     | 
| 
      
 212 
     | 
    
         
            +
                  segments: 
         
     | 
| 
      
 213 
     | 
    
         
            +
                  - 0
         
     | 
| 
      
 214 
     | 
    
         
            +
                  version: "0"
         
     | 
| 
      
 215 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 216 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 217 
     | 
    
         
            +
              requirements: 
         
     | 
| 
      
 218 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 219 
     | 
    
         
            +
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 220 
     | 
    
         
            +
                  hash: 23
         
     | 
| 
      
 221 
     | 
    
         
            +
                  segments: 
         
     | 
| 
      
 222 
     | 
    
         
            +
                  - 1
         
     | 
| 
      
 223 
     | 
    
         
            +
                  - 3
         
     | 
| 
      
 224 
     | 
    
         
            +
                  - 6
         
     | 
| 
      
 225 
     | 
    
         
            +
                  version: 1.3.6
         
     | 
| 
      
 226 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 227 
     | 
    
         
            +
             
     | 
| 
      
 228 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 229 
     | 
    
         
            +
            rubygems_version: 1.3.9.5
         
     | 
| 
      
 230 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 231 
     | 
    
         
            +
            specification_version: 3
         
     | 
| 
      
 232 
     | 
    
         
            +
            summary: Process KISSmetrics data dumps
         
     | 
| 
      
 233 
     | 
    
         
            +
            test_files: []
         
     | 
| 
      
 234 
     | 
    
         
            +
             
     |