data_miner 2.1.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ 2.1.2 / 2012-05-22
2
+
3
+ * Breaking changes
4
+
5
+ * DataMiner::Run#column_statistics_for(column_name, period) -> {initial,final}_column_statistics(column_name)
6
+ * Remove std dev from statistics kept
7
+
8
+ * Enhancements
9
+
10
+ * Added zero count and null count
11
+
1
12
  2.1.1 / 2012-05-16
2
13
 
3
14
  * Enhancements
@@ -2,22 +2,10 @@ class DataMiner
2
2
  class Run < ::ActiveRecord::Base
3
3
  # If +DataMiner.per_column_statistics?+, this model keeps per-column stats like max, min, average, standard deviation, etc.
4
4
  #
5
- # Each +DataMiner::Run+ will have two of these for every column; a "before" and an "after".
5
+ # Each +DataMiner::Run+ will have two of these for every column; an "initial" and a "final"
6
6
  class ColumnStatistic < ::ActiveRecord::Base
7
7
  class << self
8
- # @private
9
- def before(run)
10
- period run, 'before'
11
- end
12
-
13
- # @private
14
- def after(run)
15
- period run, 'after'
16
- end
17
-
18
- private
19
-
20
- def period(run, period)
8
+ def take(run)
21
9
  unless table_exists?
22
10
  auto_upgrade!
23
11
  end
@@ -27,9 +15,8 @@ class DataMiner
27
15
  column_statistic = new
28
16
  column_statistic.run = run
29
17
  column_statistic.model_name = run.model_name
30
- column_statistic.period = period
31
18
  column_statistic.column_name = column_name
32
- column_statistic.perform_calculations
19
+ column_statistic.take_statistics
33
20
  column_statistic.save!
34
21
  end
35
22
  nil
@@ -49,36 +36,40 @@ class DataMiner
49
36
 
50
37
  col :run_id, :type => :integer
51
38
  col :model_name
52
- col :period
53
39
  col :column_name
54
40
  col :null_count, :type => :integer
41
+ col :zero_count, :type => :integer
42
+ col :blank_count, :type => :integer
55
43
  col :max
56
44
  col :min
57
45
  col :average, :type => :float
58
- col :standard_deviation, :type => :float
59
46
  col :sum, :type => :float
47
+ col :created_at, :type => :datetime
60
48
  add_index :run_id
61
49
  add_index :model_name
62
50
 
63
51
  # @private
64
- def perform_calculations
52
+ def take_statistics
65
53
  model = run.model_name.constantize
66
54
 
67
55
  self.null_count = model.where("#{model.connection.quote_column_name(column_name)} IS NULL").count
68
- self.max = calculate(:MAX).inspect
69
- self.min = calculate(:MIN).inspect
56
+
57
+ self.max = calc(:MAX).inspect
58
+ self.min = calc(:MIN).inspect
70
59
 
71
60
  column = model.columns_hash[column_name]
72
61
  if NUMERIC.include?(column.type)
73
- self.average = calculate :AVG
74
- self.standard_deviation = calculate :STDDEV
75
- self.sum = calculate :SUM
62
+ self.zero_count = model.where(column_name => 0).count
63
+ self.average = calc :AVG
64
+ self.sum = calc :SUM
65
+ elsif column.type == :string
66
+ self.blank_count = model.where("LENGTH(TRIM(#{model.connection.quote_column_name(column_name)})) = 0").count
76
67
  end
77
68
  end
78
69
 
79
70
  private
80
71
 
81
- def calculate(operation)
72
+ def calc(operation)
82
73
  model = run.model_name.constantize
83
74
  model.connection.select_value "SELECT #{operation}(#{model.connection.quote_column_name(column_name)}) FROM #{model.quoted_table_name}"
84
75
  end
@@ -86,7 +86,7 @@ class DataMiner
86
86
  end
87
87
  save!
88
88
  if DataMiner.per_column_statistics?
89
- ColumnStatistic.before self
89
+ ColumnStatistic.take self
90
90
  end
91
91
  begin
92
92
  catch :data_miner_succeed do
@@ -102,38 +102,31 @@ class DataMiner
102
102
  ensure
103
103
  self.row_count_after = model.count
104
104
  if DataMiner.per_column_statistics?
105
- ColumnStatistic.after self
105
+ ColumnStatistic.take self
106
106
  end
107
- self.stopped_at = ::Time.now
107
+ self.stopped_at = ::Time.now.utc
108
108
  save!
109
109
  DataMiner.logger.info %{[data_miner] #{model_name} #{aasm_current_state.to_s.upcase} (#{(stopped_at-created_at).round(2)}s)}
110
110
  end
111
111
  self
112
112
  end
113
113
 
114
- # Get the column statistics for a particular column before or after this run.
114
+ # Get the column statistics for a particular column before this run started.
115
115
  #
116
116
  # @param [String] column_name The column you want to know about.
117
- # @param ["before","after"] period Whether you want to know about before or after the run.
118
117
  #
119
118
  # @return [ColumnStatistic]
120
- def column_statistics_for(column_name, period)
121
- column_name = column_name.to_s
122
- period = period.to_s
123
- model = model_name.constantize
124
- if existing = column_statistics.where(:column_name => column_name, :period => period).first
125
- existing
126
- elsif model.table_exists?
127
- unless model.column_names.include?(column_name)
128
- raise ::ArgumentError, %{[data_miner] Nonexistent column #{column_name.inspect} on #{model_name}}
129
- end
130
- blank = ColumnStatistic.new
131
- blank.run = self
132
- blank.model_name = model_name
133
- blank.period = period
134
- blank.column_name = column_name
135
- blank
136
- end
119
+ def initial_column_statistics(column_name)
120
+ column_statistics.where(:column_name => column_name.to_s).first
121
+ end
122
+
123
+ # Get the column statistics for a particular column after this run finished.
124
+ #
125
+ # @param [String] column_name The column you want to know about.
126
+ #
127
+ # @return [ColumnStatistic]
128
+ def final_column_statistics(column_name)
129
+ column_statistics.where(:column_name => column_name.to_s).last
137
130
  end
138
131
 
139
132
  # @private
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.1.1'
2
+ VERSION = '2.1.2'
3
3
  end
@@ -8,6 +8,7 @@ describe DataMiner::Run::ColumnStatistic do
8
8
  Pet.delete_all
9
9
  DataMiner::Run.delete_all
10
10
  DataMiner::Run::ColumnStatistic.delete_all
11
+ Pet.run_data_miner!
11
12
  end
12
13
 
13
14
  after do
@@ -15,29 +16,35 @@ describe DataMiner::Run::ColumnStatistic do
15
16
  end
16
17
 
17
18
  it "keeps null count" do
18
- Pet.run_data_miner!
19
-
20
- Pet.data_miner_runs.first.column_statistics_for(:breed_id, :before).null_count.must_equal 0
21
- Pet.data_miner_runs.first.column_statistics_for(:breed_id, :after).null_count.must_equal 1
19
+ Pet.data_miner_runs.first.initial_column_statistics(:breed_id).null_count.must_equal 0
20
+ Pet.data_miner_runs.first.final_column_statistics(:breed_id).null_count.must_equal 1
22
21
 
23
- Pet.data_miner_runs.first.column_statistics_for(:command_phrase, :before).null_count.must_equal 0
24
- Pet.data_miner_runs.first.column_statistics_for(:command_phrase, :after).null_count.must_equal 0
22
+ Pet.data_miner_runs.first.initial_column_statistics(:command_phrase).null_count.must_equal 0
23
+ Pet.data_miner_runs.first.final_column_statistics(:command_phrase).null_count.must_equal 0
25
24
  end
26
25
 
27
26
  it "keeps max and min (as strings)" do
28
- Pet.run_data_miner!
29
- Pet.data_miner_runs.first.column_statistics_for(:age, :before).max.must_equal 'nil'
30
- Pet.data_miner_runs.first.column_statistics_for(:age, :after).max.must_equal '17'
27
+ Pet.data_miner_runs.first.initial_column_statistics(:age).max.must_equal 'nil'
28
+ Pet.data_miner_runs.first.final_column_statistics(:age).max.must_equal '17'
31
29
  end
32
30
 
33
- it "keeps average and stddev" do
34
- Pet.run_data_miner!
31
+ it "keeps average and sum" do
32
+ Pet.data_miner_runs.first.initial_column_statistics(:age).average.must_be_nil
33
+ Pet.data_miner_runs.first.final_column_statistics(:age).average.must_equal 7.0
35
34
 
36
- Pet.data_miner_runs.first.column_statistics_for(:age, :before).average.must_be_nil
37
- Pet.data_miner_runs.first.column_statistics_for(:age, :after).average.must_equal 7.0
35
+ Pet.data_miner_runs.first.initial_column_statistics(:age).sum.must_be_nil
36
+ Pet.data_miner_runs.first.final_column_statistics(:age).sum.must_equal 28.0
37
+ end
38
+
39
+ it "keeps blank (empty string) count" do
40
+ Pet.data_miner_runs.first.initial_column_statistics(:command_phrase).blank_count.must_equal 0
41
+ Pet.data_miner_runs.first.final_column_statistics(:command_phrase).blank_count.must_equal 3
42
+ end
38
43
 
39
- Pet.data_miner_runs.first.column_statistics_for(:age, :before).standard_deviation.must_be_nil
40
- Pet.data_miner_runs.first.column_statistics_for(:age, :after).standard_deviation.must_equal 5.8737
44
+ it "keeps zero count" do
45
+ Pet.data_miner_runs.first.initial_column_statistics(:age).zero_count.must_equal 0
46
+ Pet.data_miner_runs.first.final_column_statistics(:age).zero_count.must_equal 0
41
47
  end
48
+
42
49
  end
43
50
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 2.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-05-16 00:00:00.000000000 Z
14
+ date: 2012-05-22 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: remote_table