data_miner 2.5.2 → 3.0.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,78 +0,0 @@
1
- class DataMiner
2
- class Run < ::ActiveRecord::Base
3
- # If +DataMiner.per_column_statistics?+, this model keeps per-column stats like max, min, average, standard deviation, etc.
4
- #
5
- # Each +DataMiner::Run+ will have two of these for every column; an "initial" and a "final"
6
- class ColumnStatistic < ::ActiveRecord::Base
7
- class << self
8
- def take(run)
9
- unless table_exists?
10
- auto_upgrade!
11
- end
12
- model = run.model_name.constantize
13
- return unless model.table_exists?
14
- model.column_names.each do |column_name|
15
- column_statistic = new
16
- column_statistic.run = run
17
- column_statistic.model_name = run.model_name
18
- column_statistic.column_name = column_name
19
- column_statistic.take_statistics
20
- column_statistic.save!
21
- end
22
- nil
23
- end
24
-
25
- end
26
-
27
- NUMERIC = [
28
- :integer,
29
- :float,
30
- :decimal,
31
- ]
32
-
33
- self.table_name = 'data_miner_run_column_statistics'
34
-
35
- belongs_to :run, :class_name => 'DataMiner::Run'
36
-
37
- col :run_id, :type => :integer
38
- col :model_name
39
- col :column_name
40
- col :null_count, :type => :integer
41
- col :zero_count, :type => :integer
42
- col :blank_count, :type => :integer
43
- col :max
44
- col :min
45
- col :average, :type => :float
46
- col :sum, :type => :float
47
- col :created_at, :type => :datetime
48
- add_index :run_id
49
- add_index :model_name
50
-
51
- # @private
52
- def take_statistics
53
- model = run.model_name.constantize
54
-
55
- self.null_count = model.where("#{model.connection.quote_column_name(column_name)} IS NULL").count
56
-
57
- self.max = calc(:MAX).inspect
58
- self.min = calc(:MIN).inspect
59
-
60
- column = model.columns_hash[column_name]
61
- if NUMERIC.include?(column.type)
62
- self.zero_count = model.where(column_name => 0).count
63
- self.average = calc :AVG
64
- self.sum = calc :SUM
65
- elsif column.type == :string
66
- self.blank_count = model.where("LENGTH(TRIM(#{model.connection.quote_column_name(column_name)})) = 0").count
67
- end
68
- end
69
-
70
- private
71
-
72
- def calc(operation)
73
- model = run.model_name.constantize
74
- model.connection.select_value "SELECT #{operation}(#{model.connection.quote_column_name(column_name)}) FROM #{model.quoted_table_name}"
75
- end
76
- end
77
- end
78
- end
@@ -1,12 +0,0 @@
1
- class DataMiner
2
- class UnitConverter
3
- class << self
4
- def load(type)
5
- if type
6
- require "data_miner/unit_converter/#{type}"
7
- const_get(type.to_s.camelcase).new
8
- end
9
- end
10
- end
11
- end
12
- end
@@ -1,11 +0,0 @@
1
- require 'alchemist'
2
-
3
- class DataMiner
4
- class UnitConverter
5
- class Alchemist < UnitConverter
6
- def convert(value, from, to)
7
- value.to_f.send(from).to.send(to).to_f
8
- end
9
- end
10
- end
11
- end
@@ -1,11 +0,0 @@
1
- require 'conversions'
2
-
3
- class DataMiner
4
- class UnitConverter
5
- class Conversions < UnitConverter
6
- def convert(value, from, to)
7
- value.to_f.convert from, to
8
- end
9
- end
10
- end
11
- end
@@ -1,35 +0,0 @@
1
- require 'helper'
2
- init_database
3
- require 'earth'
4
-
5
- require 'earth/residence'
6
- require 'earth/electricity'
7
- require 'earth/hospitality'
8
-
9
- class PetBlue < ActiveRecord::Base
10
- data_miner do
11
- import 'fake', :url => 'fake' do
12
- key :id
13
- end
14
- end
15
- end
16
- PetBlue.auto_upgrade!
17
-
18
- describe DataMiner::Step::Import do
19
- describe '#table_has_autoincrementing_primary_key?' do
20
- it "recognizes auto-increment primary keys" do
21
- PetBlue.data_miner_script.steps.first.send(:table_has_autoincrementing_primary_key?).must_equal true
22
- end
23
- it "recognizes that not all integer primary keys are auto-increment" do
24
- [
25
- ElectricUtility,
26
- ResidentialEnergyConsumptionSurveyResponse,
27
- CommercialBuildingEnergyConsumptionSurveyResponse,
28
- ].each do |model|
29
- model.data_miner_script.steps.select { |s| s.is_a?(DataMiner::Step::Import) }.each do |import_step|
30
- import_step.send(:table_has_autoincrementing_primary_key?).must_equal false
31
- end
32
- end
33
- end
34
- end
35
- end
@@ -1,20 +0,0 @@
1
- require 'helper'
2
-
3
- describe 'DataMiner::UnitConverter::Alchemist' do
4
- before do
5
- @original_converter = DataMiner.unit_converter
6
- DataMiner.unit_converter = :alchemist
7
- end
8
-
9
- after do
10
- DataMiner.unit_converter = @original_converter
11
- end
12
-
13
- describe '#convert' do
14
- it 'converts a value from one unit to another' do
15
- value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
16
- assert value.is_a?(Float)
17
- value.must_be_close_to 7.71617918
18
- end
19
- end
20
- end
@@ -1,20 +0,0 @@
1
- require 'helper'
2
-
3
- describe 'DataMiner::UnitConverter::Conversions' do
4
- before do
5
- @original_converter = DataMiner.unit_converter
6
- DataMiner.unit_converter = :conversions
7
- end
8
-
9
- after do
10
- DataMiner.unit_converter = @original_converter
11
- end
12
-
13
- describe '#convert' do
14
- it 'converts a value from one unit to another' do
15
- value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
16
- assert value.is_a?(Float)
17
- value.must_be_close_to 7.71617918
18
- end
19
- end
20
- end
@@ -1,16 +0,0 @@
1
- require 'helper'
2
-
3
- require 'conversions'
4
- Conversions.register :years, :years, 1
5
-
6
- describe 'DataMiner with Conversions' do
7
- before do
8
- init_database(:conversions)
9
- init_models
10
- Pet.run_data_miner!
11
- end
12
-
13
- it 'converts convertible units' do
14
- Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to(:kilograms)
15
- end
16
- end
@@ -1,51 +0,0 @@
1
- require 'helper'
2
-
3
- class MyPet < ActiveRecord::Base
4
- PETS = File.expand_path('../pets.csv', __FILE__)
5
- COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
6
-
7
- self.primary_key = "name"
8
- col :name
9
- col :color_id
10
- col :age, :type => :integer
11
- col :age_units
12
- col :weight, :type => :float
13
- col :weight_units
14
- col :height, :type => :integer
15
- col :height_units
16
- col :favorite_food
17
- col :command_phrase
18
-
19
- data_miner do
20
- process :auto_upgrade!
21
- import("A list of pets", :url => "file://#{PETS}") do
22
- key :name
23
- store :age
24
- store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
25
- store :weight
26
- store :favorite_food, :nullify_blank_strings => true
27
- store :command_phrase
28
- store :height, :units => :centimetres
29
- end
30
- end
31
- end
32
-
33
- describe 'DataMiner with Conversions' do
34
- it 'happens when DataMiner.unit_converter is nil' do
35
- DataMiner.unit_converter.must_be_nil
36
- end
37
-
38
- it 'converts convertible units' do
39
- init_database(nil)
40
- MyPet.run_data_miner!
41
- MyPet.find('Pierre').weight.must_equal 4.4
42
- end
43
-
44
- it 'raises an error if conversions are attempted' do
45
- init_database(nil)
46
- lambda do
47
- init_models
48
- Pet.run_data_miner!
49
- end.must_raise DataMiner::Attribute::NoConverterSet
50
- end
51
- end
@@ -1,52 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- require 'helper'
3
- init_database
4
- init_models
5
-
6
- describe DataMiner::Run::ColumnStatistic do
7
- describe "when advanced statistics are enabled" do
8
- before do
9
- DataMiner.per_column_statistics = true
10
- Pet.delete_all
11
- DataMiner::Run.delete_all
12
- DataMiner::Run::ColumnStatistic.delete_all
13
- Pet.run_data_miner!
14
- end
15
-
16
- after do
17
- DataMiner.per_column_statistics = false
18
- end
19
-
20
- it "keeps null count" do
21
- Pet.data_miner_runs.first.initial_column_statistics(:breed_id).null_count.must_equal 0
22
- Pet.data_miner_runs.first.final_column_statistics(:breed_id).null_count.must_equal 1
23
-
24
- Pet.data_miner_runs.first.initial_column_statistics(:command_phrase).null_count.must_equal 0
25
- Pet.data_miner_runs.first.final_column_statistics(:command_phrase).null_count.must_equal 0
26
- end
27
-
28
- it "keeps max and min (as strings)" do
29
- Pet.data_miner_runs.first.initial_column_statistics(:age).max.must_equal 'nil'
30
- Pet.data_miner_runs.first.final_column_statistics(:age).max.must_include '17'
31
- end
32
-
33
- it "keeps average and sum" do
34
- Pet.data_miner_runs.first.initial_column_statistics(:age).average.must_be_nil
35
- Pet.data_miner_runs.first.final_column_statistics(:age).average.must_equal 7.0
36
-
37
- Pet.data_miner_runs.first.initial_column_statistics(:age).sum.must_be_nil
38
- Pet.data_miner_runs.first.final_column_statistics(:age).sum.must_equal 28.0
39
- end
40
-
41
- it "keeps blank (empty string) count" do
42
- Pet.data_miner_runs.first.initial_column_statistics(:command_phrase).blank_count.must_equal 0
43
- Pet.data_miner_runs.first.final_column_statistics(:command_phrase).blank_count.must_equal 3
44
- end
45
-
46
- it "keeps zero count" do
47
- Pet.data_miner_runs.first.initial_column_statistics(:age).zero_count.must_equal 0
48
- Pet.data_miner_runs.first.final_column_statistics(:age).zero_count.must_equal 0
49
- end
50
-
51
- end
52
- end
@@ -1,26 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- require 'helper'
3
- init_database
4
- require 'earth'
5
-
6
- # use earth, which has a plethora of real-world data_miner blocks
7
- Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
8
-
9
- describe DataMiner do
10
- describe "being used by the Earth library's import steps" do
11
- describe "for pets" do
12
- it "can pull breed and species" do
13
- Breed.run_data_miner!
14
- Breed.find('Golden Retriever').species.must_equal Species.find('dog')
15
- end
16
- end
17
- describe "for localities" do
18
- it "can handle non-latin characters" do
19
- Country.run_data_miner!
20
- Country.find('DE').name.must_equal 'Germany'
21
- Country.find('AX').name.must_equal 'Åland Islands'
22
- Country.find('CI').name.must_equal "Côte d'Ivoire"
23
- end
24
- end
25
- end
26
- end
data/test/test_safety.rb DELETED
@@ -1,84 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- require 'helper'
3
- init_database
4
- init_models
5
- require 'earth'
6
-
7
- require 'lock_method'
8
- DataMiner::Run.lock_method :start
9
-
10
- # use earth, which has a plethora of real-world data_miner blocks
11
- Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
12
-
13
- describe DataMiner do
14
- describe "when being run in a multi-threaded environment" do
15
- before do
16
- @old_thread_abort_on_exception = Thread.abort_on_exception
17
- Thread.abort_on_exception = false
18
- end
19
-
20
- after do
21
- Thread.abort_on_exception = @old_thread_abort_on_exception
22
- end
23
-
24
- it "tries not to duplicate data" do
25
- Breed.delete_all
26
- Breed.run_data_miner!
27
- reference_count = Breed.count
28
- Breed.delete_all
29
- threads = (0..2).map do |i|
30
- Thread.new do
31
- # $stderr.write "Thread #{i} starting\n"
32
- Breed.run_data_miner!
33
- # $stderr.write "Thread #{i} done\n"
34
- end
35
- end
36
- exceptions = []
37
- threads.each do |t|
38
- begin
39
- t.join
40
- rescue
41
- exceptions << $!
42
- end
43
- end
44
- exceptions.length.must_equal 2
45
- exceptions.each do |exception|
46
- exception.must_be_kind_of LockMethod::Locked
47
- end
48
- Breed.count.must_equal reference_count
49
- end
50
-
51
- it "allows you to clear locks if necessary" do
52
- threads = (0..2).map do |i|
53
- Thread.new do
54
- # $stderr.write "Thread #{i} starting\n"
55
- case i
56
- when 0
57
- Breed.run_data_miner!
58
- when 1
59
- sleep 0.3
60
- DataMiner::Run.clear_locks
61
- Breed.run_data_miner!
62
- when 2
63
- # i will hit a lock!
64
- sleep 0.6
65
- Breed.run_data_miner!
66
- end
67
- # $stderr.write "Thread #{i} done\n"
68
- end
69
- end
70
- exceptions = []
71
- threads.each do |t|
72
- begin
73
- t.join
74
- rescue
75
- exceptions << $!
76
- end
77
- end
78
- exceptions.length.must_equal 1
79
- exceptions.each do |exception|
80
- exception.must_be_kind_of LockMethod::Locked
81
- end
82
- end
83
- end
84
- end
@@ -1,16 +0,0 @@
1
- require 'helper'
2
-
3
- describe 'DataMiner unit conversion' do
4
- it "blows up if you don't specify a converter" do
5
- output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_without_unit_converter.rb', __FILE__)}`
6
- refute $?.success?, output
7
- end
8
- it 'can convert with alchemist' do
9
- output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_alchemist.rb', __FILE__)}`
10
- assert $?.success?, output
11
- end
12
- it 'can convert with conversions' do
13
- output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_conversions.rb', __FILE__)}`
14
- assert $?.success?, output
15
- end
16
- end