statlysis 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,51 @@
1
+ # rcov generated
2
+ coverage
3
+ coverage.data
4
+
5
+ # rdoc generated
6
+ rdoc
7
+
8
+ # yard generated
9
+ doc
10
+ .yardoc
11
+
12
+ # bundler
13
+ .bundle
14
+
15
+ # jeweler generated
16
+ pkg
17
+
18
+ # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
19
+ #
20
+ # * Create a file at ~/.gitignore
21
+ # * Include files you want ignored
22
+ # * Run: git config --global core.excludesfile ~/.gitignore
23
+ #
24
+ # After doing this, these files will be ignored in all your git projects,
25
+ # saving you from having to 'pollute' every project you touch with them
26
+ #
27
+ # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
28
+ #
29
+ # For MacOS:
30
+ #
31
+ .DS_Store
32
+
33
+ # For TextMate
34
+ *.tmproj
35
+ tmtags
36
+
37
+ # For emacs:
38
+ *~
39
+ \#*
40
+ .\#*
41
+
42
+ # For vim:
43
+ *.swp
44
+
45
+ # For redcar:
46
+ .redcar
47
+
48
+ # For rubinius:
49
+ *.rbc
50
+
51
+ coverage
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://rubygems.org"
2
+ gemspec
@@ -0,0 +1,110 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ statlysis (0.0.1)
5
+ activerecord
6
+ activerecord_idnamecache
7
+ activesupport
8
+ mongoid
9
+ mysql2
10
+ only_one_rake
11
+ rails
12
+ rake
13
+ sequel
14
+
15
+ GEM
16
+ remote: http://rubygems.org/
17
+ specs:
18
+ actionmailer (3.2.13)
19
+ actionpack (= 3.2.13)
20
+ mail (~> 2.5.3)
21
+ actionpack (3.2.13)
22
+ activemodel (= 3.2.13)
23
+ activesupport (= 3.2.13)
24
+ builder (~> 3.0.0)
25
+ erubis (~> 2.7.0)
26
+ journey (~> 1.0.4)
27
+ rack (~> 1.4.5)
28
+ rack-cache (~> 1.2)
29
+ rack-test (~> 0.6.1)
30
+ sprockets (~> 2.2.1)
31
+ activemodel (3.2.13)
32
+ activesupport (= 3.2.13)
33
+ builder (~> 3.0.0)
34
+ activerecord (3.2.13)
35
+ activemodel (= 3.2.13)
36
+ activesupport (= 3.2.13)
37
+ arel (~> 3.0.2)
38
+ tzinfo (~> 0.3.29)
39
+ activerecord_idnamecache (0.1)
40
+ activeresource (3.2.13)
41
+ activemodel (= 3.2.13)
42
+ activesupport (= 3.2.13)
43
+ activesupport (3.2.13)
44
+ i18n (= 0.6.1)
45
+ multi_json (~> 1.0)
46
+ arel (3.0.2)
47
+ builder (3.0.4)
48
+ erubis (2.7.0)
49
+ hike (1.2.3)
50
+ i18n (0.6.1)
51
+ journey (1.0.4)
52
+ json (1.8.0)
53
+ mail (2.5.4)
54
+ mime-types (~> 1.16)
55
+ treetop (~> 1.4.8)
56
+ mime-types (1.23)
57
+ mongoid (3.1.4)
58
+ activemodel (~> 3.2)
59
+ moped (~> 1.4)
60
+ origin (~> 1.0)
61
+ tzinfo (~> 0.3.22)
62
+ moped (1.5.0)
63
+ multi_json (1.7.7)
64
+ mysql2 (0.3.11)
65
+ only_one_rake (0.0.4)
66
+ origin (1.1.0)
67
+ polyglot (0.3.3)
68
+ rack (1.4.5)
69
+ rack-cache (1.2)
70
+ rack (>= 0.4)
71
+ rack-ssl (1.3.3)
72
+ rack
73
+ rack-test (0.6.2)
74
+ rack (>= 1.0)
75
+ rails (3.2.13)
76
+ actionmailer (= 3.2.13)
77
+ actionpack (= 3.2.13)
78
+ activerecord (= 3.2.13)
79
+ activeresource (= 3.2.13)
80
+ activesupport (= 3.2.13)
81
+ bundler (~> 1.0)
82
+ railties (= 3.2.13)
83
+ railties (3.2.13)
84
+ actionpack (= 3.2.13)
85
+ activesupport (= 3.2.13)
86
+ rack-ssl (~> 1.3.2)
87
+ rake (>= 0.8.7)
88
+ rdoc (~> 3.4)
89
+ thor (>= 0.14.6, < 2.0)
90
+ rake (10.1.0)
91
+ rdoc (3.12.2)
92
+ json (~> 1.4)
93
+ sequel (4.0.0)
94
+ sprockets (2.2.2)
95
+ hike (~> 1.2)
96
+ multi_json (~> 1.0)
97
+ rack (~> 1.0)
98
+ tilt (~> 1.1, != 1.3.0)
99
+ thor (0.18.1)
100
+ tilt (1.4.1)
101
+ treetop (1.4.14)
102
+ polyglot
103
+ polyglot (>= 0.3.1)
104
+ tzinfo (0.3.37)
105
+
106
+ PLATFORMS
107
+ ruby
108
+
109
+ DEPENDENCIES
110
+ statlysis!
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 David Chen at eoe.cn
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,43 @@
1
+ statlysis
2
+ ===============================================
3
+ statistical analysis in ruby dsl
4
+
5
+ Usage
6
+ -----------------------------------------------
7
+ ```ruby
8
+ module Statlysis
9
+ set_database :statlysis
10
+ update_time_columns :t
11
+ set_tablename_default_pre :st
12
+
13
+ # 初始化键值model
14
+ Statlysis::Top.new('', :test => true).pattern_table_and_model 'st_single_kvs'
15
+ Statlysis::Top.new('', :test => true).pattern_table_and_model 'st_single_kv_histories'
16
+
17
+ # 日常count
18
+ EoeLog.class # preload EoeLogTest
19
+ @log_model = IS_DEVELOP ? EoeLogTest : EoeLog
20
+ hourly @log_model, :t
21
+ daily @log_model, :t
22
+ daily @log_model.where(:ui => 0), :t
23
+ daily @log_model.where(:ui => {"$ne" => 0}), :t
24
+
25
+ # 统计各个模块
26
+ daily @log_model.where(:do => {"$in" => [DOMAINS_HASH[:blog], DOMAINS_HASH[:my]]}), :t
27
+ [:www, :code, :skill, :book, :edu, :news, :wiki, :salon, :android].each do |site|
28
+ daily @log_model.where(:do => DOMAINS_HASH[site]), :t
29
+ end
30
+ ```
31
+
32
+ TODO
33
+ -----------------------------------------------
34
+ 1. Admin interface
35
+ 2. statistical query api in Ruby and HTTP
36
+ 3. Interacting with Javascript charting library, e.g. Highcharts, D3.
37
+ 4. Add namespace to DSL, like rake
38
+ 5. More tests
39
+
40
+
41
+ Copyright
42
+ -----------------------------------------------
43
+ MIT. David Chen at eoe.cn.
@@ -0,0 +1,11 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs << 'test'
8
+ end
9
+
10
+ desc "Run tests"
11
+ task :default => :test
@@ -0,0 +1,134 @@
1
+ # encoding: UTF-8
2
+ # Sequel的操作均需通过Symbol
3
+ #
4
+ # 删除匹配的统计表
5
+ # Statlysis.sequel.tables.select {|i| i.to_s.match(//i) }.each {|i| Statlysis.sequel.drop_table i }
6
+
7
+ require "active_support/all"
8
+ require 'active_support/core_ext/module/attribute_accessors.rb'
9
+ require 'active_record'
10
+ require 'rails'
11
+ %w[yaml sequel only_one_rake mongoid].map(&method(:require))
12
+
13
+ module Statlysis
14
+ Units = %w[hour day week month year]
15
+ DefaultTableOpts = {:charset => "utf8", :collate => "utf8_general_ci", :engine => "MyISAM"}
16
+
17
+ def self.setup_stat_table_and_model cron, tablename = nil
18
+ tablename = cron.stat_table_name if tablename.nil?
19
+ tablename ||= cron.stat_table.first_source_table
20
+ cron.stat_table = Statlysis.sequel[tablename.to_sym]
21
+
22
+ str = tablename.to_s.singularize.camelize
23
+ eval("class ::#{str} < Sequel::Model;
24
+ self.set_dataset :#{tablename}
25
+ def self.[] item_id
26
+ JSON.parse(find_or_create(:pattern => item_id).result) rescue []
27
+ end
28
+ end; ")
29
+ cron.stat_model = str.constantize
30
+ end
31
+
32
+ end
33
+
34
+ require 'statlysis/common'
35
+ require 'statlysis/timeseries'
36
+ require 'statlysis/clock'
37
+ require 'statlysis/rake'
38
+ require 'statlysis/cron'
39
+ require 'statlysis/similar'
40
+
41
+ module Statlysis
42
+ mattr_accessor :sequel, :default_time_columns, :database_opts, :tablename_default_pre
43
+ Units.each {|unit| module_eval "mattr_accessor :#{unit}_crons; self.#{unit}_crons = []" }
44
+ [:realtime, :similar, :hotest].each do |sym|
45
+ sym = "#{sym}_crons".to_sym
46
+ mattr_accessor sym; self.send "#{sym}=", []
47
+ end
48
+ # TODO _crons uniq, no readd
49
+ extend self
50
+
51
+ # 会在自动拼接统计数据库表名时去除这些时间字段
52
+ def update_time_columns *columns
53
+ self.default_time_columns ||= [:created_at, :updated_at]
54
+ columns.each {|column| self.default_time_columns.push column }
55
+ self.default_time_columns = self.default_time_columns.uniq
56
+ end
57
+
58
+ def set_database sym_or_hash
59
+ self.database_opts = if sym_or_hash.is_a? Symbol
60
+ YAML.load_file(Rails.root.join("config/database.yml"))[sym_or_hash.to_s]
61
+ elsif Hash
62
+ sym_or_hash
63
+ else
64
+ raise "Statlysis#set_database only support symbol or hash params"
65
+ end
66
+ self.sequel = Sequel.connect self.database_opts.except('database')
67
+ self.sequel.execute("CREATE DATABASE IF NOT EXISTS #{self.database_opts['database']} DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;")
68
+ self.sequel.use self.database_opts['database']
69
+ # Statlysis.sequel.tables.map {|t| eval "class ::#{t.to_s.camelize} < ActiveRecord::Base; self.establish_connection Statlysis.database_opts; self.table_name = :#{t}; end; #{t.to_s.camelize}" }
70
+ end
71
+
72
+ def set_tablename_default_pre str
73
+ self.tablename_default_pre = str.to_s
74
+ end
75
+
76
+ def daily source, time_column = :created_at; timely source, :time_unit => :day, :time_column => time_column end
77
+ def hourly source, time_column = :created_at; timely source, :time_unit => :hour, :time_column => time_column end
78
+
79
+ def check_set_database; raise "Please setup database first" if sequel.nil? end
80
+
81
+ def timely source, opts
82
+ self.check_set_database
83
+ opts.reverse_merge! :time_column => :created_at, :time_unit => :day
84
+ t = Timely.new source, opts
85
+ module_eval("self.#{opts[:time_unit]}_crons").push t
86
+ end
87
+
88
+ # the real requirement is to compute lastest items group by special pattens, like user_id, url prefix, ...
89
+ def lastest_visits source, opts
90
+ self.check_set_database
91
+ opts.reverse_merge! :time_column => :created_at
92
+ self.realtime_crons.push LastestVisits.new(source, opts)
93
+ end
94
+
95
+ # TODO 为什么一层proc的话会直接执行的
96
+ def hotest_items key, id_to_score_and_time_hash = {}
97
+ _p = proc { if block_given?
98
+ (proc do
99
+ id_to_score_and_time_hash = Hash.new
100
+ yield id_to_score_and_time_hash
101
+ id_to_score_and_time_hash
102
+ end)
103
+ else
104
+ (proc { id_to_score_and_time_hash })
105
+ end}
106
+
107
+ self.hotest_crons.push HotestItems.new(key, _p)
108
+ end
109
+
110
+ # TODO support mongoid
111
+ def similar_items model_name, id_to_text_hash = {}
112
+ _p = if block_given?
113
+ (proc do
114
+ id_to_text_hash = Hash.new {|hash, key| hash[key] = "" }
115
+ yield id_to_text_hash
116
+ id_to_text_hash
117
+ end)
118
+ else
119
+ (proc { id_to_text_hash })
120
+ end
121
+
122
+ self.similar_crons.push Similar.new(model_name, _p)
123
+ end
124
+
125
+ end
126
+
127
+
128
+ module Statlysis
129
+ class Railtie < Rails::Railtie
130
+ rake_tasks do
131
+ load File.expand_path('../statlysis/rake.rb', __FILE__)
132
+ end
133
+ end if defined? Rails
134
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+
3
+ module Statlysis
4
+ DateTime1970 = DateTime.parse("19700101").in_time_zone
5
+
6
+ class Clock
7
+ attr_accessor :clock
8
+ include Common
9
+
10
+ def initialize feature, default_time
11
+ raise "Please assign default_time params" if not default_time
12
+ cron.stat_table_name = [Statlysis.tablename_default_pre, 'clocks'].compact.join("_")
13
+ unless Statlysis.sequel.table_exists?(cron.stat_table_name)
14
+ Statlysis.sequel.create_table cron.stat_table_name, DefaultTableOpts.merge(:engine => "InnoDB") do
15
+ primary_key :id
16
+ String :feature
17
+ DateTime :t
18
+ index :feature, :unique => true
19
+ end
20
+ end
21
+ Statlysis.setup_stat_table_and_model cron
22
+ cron.clock = cron.stat_model.find_or_create(:feature => feature)
23
+ cron.clock.update :t => default_time if cron.current.nil?
24
+ cron
25
+ end
26
+
27
+ def update time
28
+ time = DateTime.now if time == DateTime1970
29
+ return false if time && (time < cron.current)
30
+ cron.clock.update :t => time
31
+ end
32
+
33
+ def current; cron.clock.t end
34
+ end
35
+
36
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ module Statlysis
4
+ module Common
5
+ attr_accessor :stat_table_name, :stat_model, :stat_table
6
+ def pattern_table_and_model tn
7
+ # ensure statlysis table
8
+ tn = tn.pluralize
9
+ unless Statlysis.sequel.table_exists?(tn)
10
+ Statlysis.sequel.create_table tn, DefaultTableOpts.merge(:engine => "InnoDB") do
11
+ primary_key :id
12
+ String :pattern
13
+ index :pattern
14
+ end
15
+ Statlysis.sequel.add_column tn, :result, String, :text => true
16
+ end
17
+
18
+ # generate a statlysis model
19
+ cron.stat_model = Statlysis.setup_stat_table_and_model cron, tn
20
+ end
21
+
22
+ def cron; self end
23
+ # TODO remove puts, conflict user, user logger
24
+ def puts(*strs); $stdout.puts(*strs) if ENV['DEBUG'] end
25
+
26
+ end
27
+ end