statlysis 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,51 @@
1
+ # rcov generated
2
+ coverage
3
+ coverage.data
4
+
5
+ # rdoc generated
6
+ rdoc
7
+
8
+ # yard generated
9
+ doc
10
+ .yardoc
11
+
12
+ # bundler
13
+ .bundle
14
+
15
+ # jeweler generated
16
+ pkg
17
+
18
+ # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
19
+ #
20
+ # * Create a file at ~/.gitignore
21
+ # * Include files you want ignored
22
+ # * Run: git config --global core.excludesfile ~/.gitignore
23
+ #
24
+ # After doing this, these files will be ignored in all your git projects,
25
+ # saving you from having to 'pollute' every project you touch with them
26
+ #
27
+ # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
28
+ #
29
+ # For MacOS:
30
+ #
31
+ .DS_Store
32
+
33
+ # For TextMate
34
+ *.tmproj
35
+ tmtags
36
+
37
+ # For emacs:
38
+ *~
39
+ \#*
40
+ .\#*
41
+
42
+ # For vim:
43
+ *.swp
44
+
45
+ # For redcar:
46
+ .redcar
47
+
48
+ # For rubinius:
49
+ *.rbc
50
+
51
+ coverage
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://rubygems.org"
2
+ gemspec
@@ -0,0 +1,110 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ statlysis (0.0.1)
5
+ activerecord
6
+ activerecord_idnamecache
7
+ activesupport
8
+ mongoid
9
+ mysql2
10
+ only_one_rake
11
+ rails
12
+ rake
13
+ sequel
14
+
15
+ GEM
16
+ remote: http://rubygems.org/
17
+ specs:
18
+ actionmailer (3.2.13)
19
+ actionpack (= 3.2.13)
20
+ mail (~> 2.5.3)
21
+ actionpack (3.2.13)
22
+ activemodel (= 3.2.13)
23
+ activesupport (= 3.2.13)
24
+ builder (~> 3.0.0)
25
+ erubis (~> 2.7.0)
26
+ journey (~> 1.0.4)
27
+ rack (~> 1.4.5)
28
+ rack-cache (~> 1.2)
29
+ rack-test (~> 0.6.1)
30
+ sprockets (~> 2.2.1)
31
+ activemodel (3.2.13)
32
+ activesupport (= 3.2.13)
33
+ builder (~> 3.0.0)
34
+ activerecord (3.2.13)
35
+ activemodel (= 3.2.13)
36
+ activesupport (= 3.2.13)
37
+ arel (~> 3.0.2)
38
+ tzinfo (~> 0.3.29)
39
+ activerecord_idnamecache (0.1)
40
+ activeresource (3.2.13)
41
+ activemodel (= 3.2.13)
42
+ activesupport (= 3.2.13)
43
+ activesupport (3.2.13)
44
+ i18n (= 0.6.1)
45
+ multi_json (~> 1.0)
46
+ arel (3.0.2)
47
+ builder (3.0.4)
48
+ erubis (2.7.0)
49
+ hike (1.2.3)
50
+ i18n (0.6.1)
51
+ journey (1.0.4)
52
+ json (1.8.0)
53
+ mail (2.5.4)
54
+ mime-types (~> 1.16)
55
+ treetop (~> 1.4.8)
56
+ mime-types (1.23)
57
+ mongoid (3.1.4)
58
+ activemodel (~> 3.2)
59
+ moped (~> 1.4)
60
+ origin (~> 1.0)
61
+ tzinfo (~> 0.3.22)
62
+ moped (1.5.0)
63
+ multi_json (1.7.7)
64
+ mysql2 (0.3.11)
65
+ only_one_rake (0.0.4)
66
+ origin (1.1.0)
67
+ polyglot (0.3.3)
68
+ rack (1.4.5)
69
+ rack-cache (1.2)
70
+ rack (>= 0.4)
71
+ rack-ssl (1.3.3)
72
+ rack
73
+ rack-test (0.6.2)
74
+ rack (>= 1.0)
75
+ rails (3.2.13)
76
+ actionmailer (= 3.2.13)
77
+ actionpack (= 3.2.13)
78
+ activerecord (= 3.2.13)
79
+ activeresource (= 3.2.13)
80
+ activesupport (= 3.2.13)
81
+ bundler (~> 1.0)
82
+ railties (= 3.2.13)
83
+ railties (3.2.13)
84
+ actionpack (= 3.2.13)
85
+ activesupport (= 3.2.13)
86
+ rack-ssl (~> 1.3.2)
87
+ rake (>= 0.8.7)
88
+ rdoc (~> 3.4)
89
+ thor (>= 0.14.6, < 2.0)
90
+ rake (10.1.0)
91
+ rdoc (3.12.2)
92
+ json (~> 1.4)
93
+ sequel (4.0.0)
94
+ sprockets (2.2.2)
95
+ hike (~> 1.2)
96
+ multi_json (~> 1.0)
97
+ rack (~> 1.0)
98
+ tilt (~> 1.1, != 1.3.0)
99
+ thor (0.18.1)
100
+ tilt (1.4.1)
101
+ treetop (1.4.14)
102
+ polyglot
103
+ polyglot (>= 0.3.1)
104
+ tzinfo (0.3.37)
105
+
106
+ PLATFORMS
107
+ ruby
108
+
109
+ DEPENDENCIES
110
+ statlysis!
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 David Chen at eoe.cn
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,43 @@
1
+ statlysis
2
+ ===============================================
3
+ statistical analysis in ruby dsl
4
+
5
+ Usage
6
+ -----------------------------------------------
7
+ ```ruby
8
+ module Statlysis
9
+ set_database :statlysis
10
+ update_time_columns :t
11
+ set_tablename_default_pre :st
12
+
13
+ # 初始化键值model
14
+ Statlysis::Top.new('', :test => true).pattern_table_and_model 'st_single_kvs'
15
+ Statlysis::Top.new('', :test => true).pattern_table_and_model 'st_single_kv_histories'
16
+
17
+ # 日常count
18
+ EoeLog.class # preload EoeLogTest
19
+ @log_model = IS_DEVELOP ? EoeLogTest : EoeLog
20
+ hourly @log_model, :t
21
+ daily @log_model, :t
22
+ daily @log_model.where(:ui => 0), :t
23
+ daily @log_model.where(:ui => {"$ne" => 0}), :t
24
+
25
+ # 统计各个模块
26
+ daily @log_model.where(:do => {"$in" => [DOMAINS_HASH[:blog], DOMAINS_HASH[:my]]}), :t
27
+ [:www, :code, :skill, :book, :edu, :news, :wiki, :salon, :android].each do |site|
28
+ daily @log_model.where(:do => DOMAINS_HASH[site]), :t
29
+ end
30
+ ```
31
+
32
+ TODO
33
+ -----------------------------------------------
34
+ 1. Admin interface
35
+ 2. statistical query api in Ruby and HTTP
36
+ 3. Interacting with Javascript charting library, e.g. Highcharts, D3.
37
+ 4. Add namespace to DSL, like rake
38
+ 5. More tests
39
+
40
+
41
+ Copyright
42
+ -----------------------------------------------
43
+ MIT. David Chen at eoe.cn.
@@ -0,0 +1,11 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs << 'test'
8
+ end
9
+
10
+ desc "Run tests"
11
+ task :default => :test
@@ -0,0 +1,134 @@
1
+ # encoding: UTF-8
2
+ # Sequel的操作均需通过Symbol
3
+ #
4
+ # 删除匹配的统计表
5
+ # Statlysis.sequel.tables.select {|i| i.to_s.match(//i) }.each {|i| Statlysis.sequel.drop_table i }
6
+
7
+ require "active_support/all"
8
+ require 'active_support/core_ext/module/attribute_accessors.rb'
9
+ require 'active_record'
10
+ require 'rails'
11
+ %w[yaml sequel only_one_rake mongoid].map(&method(:require))
12
+
13
+ module Statlysis
14
+ Units = %w[hour day week month year]
15
+ DefaultTableOpts = {:charset => "utf8", :collate => "utf8_general_ci", :engine => "MyISAM"}
16
+
17
+ def self.setup_stat_table_and_model cron, tablename = nil
18
+ tablename = cron.stat_table_name if tablename.nil?
19
+ tablename ||= cron.stat_table.first_source_table
20
+ cron.stat_table = Statlysis.sequel[tablename.to_sym]
21
+
22
+ str = tablename.to_s.singularize.camelize
23
+ eval("class ::#{str} < Sequel::Model;
24
+ self.set_dataset :#{tablename}
25
+ def self.[] item_id
26
+ JSON.parse(find_or_create(:pattern => item_id).result) rescue []
27
+ end
28
+ end; ")
29
+ cron.stat_model = str.constantize
30
+ end
31
+
32
+ end
33
+
34
+ require 'statlysis/common'
35
+ require 'statlysis/timeseries'
36
+ require 'statlysis/clock'
37
+ require 'statlysis/rake'
38
+ require 'statlysis/cron'
39
+ require 'statlysis/similar'
40
+
41
+ module Statlysis
42
+ mattr_accessor :sequel, :default_time_columns, :database_opts, :tablename_default_pre
43
+ Units.each {|unit| module_eval "mattr_accessor :#{unit}_crons; self.#{unit}_crons = []" }
44
+ [:realtime, :similar, :hotest].each do |sym|
45
+ sym = "#{sym}_crons".to_sym
46
+ mattr_accessor sym; self.send "#{sym}=", []
47
+ end
48
+ # TODO _crons uniq, no readd
49
+ extend self
50
+
51
+ # 会在自动拼接统计数据库表名时去除这些时间字段
52
+ def update_time_columns *columns
53
+ self.default_time_columns ||= [:created_at, :updated_at]
54
+ columns.each {|column| self.default_time_columns.push column }
55
+ self.default_time_columns = self.default_time_columns.uniq
56
+ end
57
+
58
+ def set_database sym_or_hash
59
+ self.database_opts = if sym_or_hash.is_a? Symbol
60
+ YAML.load_file(Rails.root.join("config/database.yml"))[sym_or_hash.to_s]
61
+ elsif Hash
62
+ sym_or_hash
63
+ else
64
+ raise "Statlysis#set_database only support symbol or hash params"
65
+ end
66
+ self.sequel = Sequel.connect self.database_opts.except('database')
67
+ self.sequel.execute("CREATE DATABASE IF NOT EXISTS #{self.database_opts['database']} DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;")
68
+ self.sequel.use self.database_opts['database']
69
+ # Statlysis.sequel.tables.map {|t| eval "class ::#{t.to_s.camelize} < ActiveRecord::Base; self.establish_connection Statlysis.database_opts; self.table_name = :#{t}; end; #{t.to_s.camelize}" }
70
+ end
71
+
72
+ def set_tablename_default_pre str
73
+ self.tablename_default_pre = str.to_s
74
+ end
75
+
76
+ def daily source, time_column = :created_at; timely source, :time_unit => :day, :time_column => time_column end
77
+ def hourly source, time_column = :created_at; timely source, :time_unit => :hour, :time_column => time_column end
78
+
79
+ def check_set_database; raise "Please setup database first" if sequel.nil? end
80
+
81
+ def timely source, opts
82
+ self.check_set_database
83
+ opts.reverse_merge! :time_column => :created_at, :time_unit => :day
84
+ t = Timely.new source, opts
85
+ module_eval("self.#{opts[:time_unit]}_crons").push t
86
+ end
87
+
88
+ # the real requirement is to compute lastest items group by special pattens, like user_id, url prefix, ...
89
+ def lastest_visits source, opts
90
+ self.check_set_database
91
+ opts.reverse_merge! :time_column => :created_at
92
+ self.realtime_crons.push LastestVisits.new(source, opts)
93
+ end
94
+
95
+ # TODO 为什么一层proc的话会直接执行的
96
+ def hotest_items key, id_to_score_and_time_hash = {}
97
+ _p = proc { if block_given?
98
+ (proc do
99
+ id_to_score_and_time_hash = Hash.new
100
+ yield id_to_score_and_time_hash
101
+ id_to_score_and_time_hash
102
+ end)
103
+ else
104
+ (proc { id_to_score_and_time_hash })
105
+ end}
106
+
107
+ self.hotest_crons.push HotestItems.new(key, _p)
108
+ end
109
+
110
+ # TODO support mongoid
111
+ def similar_items model_name, id_to_text_hash = {}
112
+ _p = if block_given?
113
+ (proc do
114
+ id_to_text_hash = Hash.new {|hash, key| hash[key] = "" }
115
+ yield id_to_text_hash
116
+ id_to_text_hash
117
+ end)
118
+ else
119
+ (proc { id_to_text_hash })
120
+ end
121
+
122
+ self.similar_crons.push Similar.new(model_name, _p)
123
+ end
124
+
125
+ end
126
+
127
+
128
+ module Statlysis
129
+ class Railtie < Rails::Railtie
130
+ rake_tasks do
131
+ load File.expand_path('../statlysis/rake.rb', __FILE__)
132
+ end
133
+ end if defined? Rails
134
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+
3
+ module Statlysis
4
+ DateTime1970 = DateTime.parse("19700101").in_time_zone
5
+
6
+ class Clock
7
+ attr_accessor :clock
8
+ include Common
9
+
10
+ def initialize feature, default_time
11
+ raise "Please assign default_time params" if not default_time
12
+ cron.stat_table_name = [Statlysis.tablename_default_pre, 'clocks'].compact.join("_")
13
+ unless Statlysis.sequel.table_exists?(cron.stat_table_name)
14
+ Statlysis.sequel.create_table cron.stat_table_name, DefaultTableOpts.merge(:engine => "InnoDB") do
15
+ primary_key :id
16
+ String :feature
17
+ DateTime :t
18
+ index :feature, :unique => true
19
+ end
20
+ end
21
+ Statlysis.setup_stat_table_and_model cron
22
+ cron.clock = cron.stat_model.find_or_create(:feature => feature)
23
+ cron.clock.update :t => default_time if cron.current.nil?
24
+ cron
25
+ end
26
+
27
+ def update time
28
+ time = DateTime.now if time == DateTime1970
29
+ return false if time && (time < cron.current)
30
+ cron.clock.update :t => time
31
+ end
32
+
33
+ def current; cron.clock.t end
34
+ end
35
+
36
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ module Statlysis
4
+ module Common
5
+ attr_accessor :stat_table_name, :stat_model, :stat_table
6
+ def pattern_table_and_model tn
7
+ # ensure statlysis table
8
+ tn = tn.pluralize
9
+ unless Statlysis.sequel.table_exists?(tn)
10
+ Statlysis.sequel.create_table tn, DefaultTableOpts.merge(:engine => "InnoDB") do
11
+ primary_key :id
12
+ String :pattern
13
+ index :pattern
14
+ end
15
+ Statlysis.sequel.add_column tn, :result, String, :text => true
16
+ end
17
+
18
+ # generate a statlysis model
19
+ cron.stat_model = Statlysis.setup_stat_table_and_model cron, tn
20
+ end
21
+
22
+ def cron; self end
23
+ # TODO remove puts, conflict user, user logger
24
+ def puts(*strs); $stdout.puts(*strs) if ENV['DEBUG'] end
25
+
26
+ end
27
+ end