data_miner 2.5.2 → 3.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,78 +0,0 @@
1
- class DataMiner
2
- class Run < ::ActiveRecord::Base
3
- # If +DataMiner.per_column_statistics?+, this model keeps per-column stats like max, min, average, standard deviation, etc.
4
- #
5
- # Each +DataMiner::Run+ will have two of these for every column; an "initial" and a "final"
6
- class ColumnStatistic < ::ActiveRecord::Base
7
- class << self
8
- def take(run)
9
- unless table_exists?
10
- auto_upgrade!
11
- end
12
- model = run.model_name.constantize
13
- return unless model.table_exists?
14
- model.column_names.each do |column_name|
15
- column_statistic = new
16
- column_statistic.run = run
17
- column_statistic.model_name = run.model_name
18
- column_statistic.column_name = column_name
19
- column_statistic.take_statistics
20
- column_statistic.save!
21
- end
22
- nil
23
- end
24
-
25
- end
26
-
27
- NUMERIC = [
28
- :integer,
29
- :float,
30
- :decimal,
31
- ]
32
-
33
- self.table_name = 'data_miner_run_column_statistics'
34
-
35
- belongs_to :run, :class_name => 'DataMiner::Run'
36
-
37
- col :run_id, :type => :integer
38
- col :model_name
39
- col :column_name
40
- col :null_count, :type => :integer
41
- col :zero_count, :type => :integer
42
- col :blank_count, :type => :integer
43
- col :max
44
- col :min
45
- col :average, :type => :float
46
- col :sum, :type => :float
47
- col :created_at, :type => :datetime
48
- add_index :run_id
49
- add_index :model_name
50
-
51
- # @private
52
- def take_statistics
53
- model = run.model_name.constantize
54
-
55
- self.null_count = model.where("#{model.connection.quote_column_name(column_name)} IS NULL").count
56
-
57
- self.max = calc(:MAX).inspect
58
- self.min = calc(:MIN).inspect
59
-
60
- column = model.columns_hash[column_name]
61
- if NUMERIC.include?(column.type)
62
- self.zero_count = model.where(column_name => 0).count
63
- self.average = calc :AVG
64
- self.sum = calc :SUM
65
- elsif column.type == :string
66
- self.blank_count = model.where("LENGTH(TRIM(#{model.connection.quote_column_name(column_name)})) = 0").count
67
- end
68
- end
69
-
70
- private
71
-
72
- def calc(operation)
73
- model = run.model_name.constantize
74
- model.connection.select_value "SELECT #{operation}(#{model.connection.quote_column_name(column_name)}) FROM #{model.quoted_table_name}"
75
- end
76
- end
77
- end
78
- end
@@ -1,12 +0,0 @@
1
- class DataMiner
2
- class UnitConverter
3
- class << self
4
- def load(type)
5
- if type
6
- require "data_miner/unit_converter/#{type}"
7
- const_get(type.to_s.camelcase).new
8
- end
9
- end
10
- end
11
- end
12
- end
@@ -1,11 +0,0 @@
1
- require 'alchemist'
2
-
3
- class DataMiner
4
- class UnitConverter
5
- class Alchemist < UnitConverter
6
- def convert(value, from, to)
7
- value.to_f.send(from).to.send(to).to_f
8
- end
9
- end
10
- end
11
- end
@@ -1,11 +0,0 @@
1
- require 'conversions'
2
-
3
- class DataMiner
4
- class UnitConverter
5
- class Conversions < UnitConverter
6
- def convert(value, from, to)
7
- value.to_f.convert from, to
8
- end
9
- end
10
- end
11
- end
@@ -1,35 +0,0 @@
1
- require 'helper'
2
- init_database
3
- require 'earth'
4
-
5
- require 'earth/residence'
6
- require 'earth/electricity'
7
- require 'earth/hospitality'
8
-
9
- class PetBlue < ActiveRecord::Base
10
- data_miner do
11
- import 'fake', :url => 'fake' do
12
- key :id
13
- end
14
- end
15
- end
16
- PetBlue.auto_upgrade!
17
-
18
- describe DataMiner::Step::Import do
19
- describe '#table_has_autoincrementing_primary_key?' do
20
- it "recognizes auto-increment primary keys" do
21
- PetBlue.data_miner_script.steps.first.send(:table_has_autoincrementing_primary_key?).must_equal true
22
- end
23
- it "recognizes that not all integer primary keys are auto-increment" do
24
- [
25
- ElectricUtility,
26
- ResidentialEnergyConsumptionSurveyResponse,
27
- CommercialBuildingEnergyConsumptionSurveyResponse,
28
- ].each do |model|
29
- model.data_miner_script.steps.select { |s| s.is_a?(DataMiner::Step::Import) }.each do |import_step|
30
- import_step.send(:table_has_autoincrementing_primary_key?).must_equal false
31
- end
32
- end
33
- end
34
- end
35
- end
@@ -1,20 +0,0 @@
1
- require 'helper'
2
-
3
- describe 'DataMiner::UnitConverter::Alchemist' do
4
- before do
5
- @original_converter = DataMiner.unit_converter
6
- DataMiner.unit_converter = :alchemist
7
- end
8
-
9
- after do
10
- DataMiner.unit_converter = @original_converter
11
- end
12
-
13
- describe '#convert' do
14
- it 'converts a value from one unit to another' do
15
- value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
16
- assert value.is_a?(Float)
17
- value.must_be_close_to 7.71617918
18
- end
19
- end
20
- end
@@ -1,20 +0,0 @@
1
- require 'helper'
2
-
3
- describe 'DataMiner::UnitConverter::Conversions' do
4
- before do
5
- @original_converter = DataMiner.unit_converter
6
- DataMiner.unit_converter = :conversions
7
- end
8
-
9
- after do
10
- DataMiner.unit_converter = @original_converter
11
- end
12
-
13
- describe '#convert' do
14
- it 'converts a value from one unit to another' do
15
- value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
16
- assert value.is_a?(Float)
17
- value.must_be_close_to 7.71617918
18
- end
19
- end
20
- end
@@ -1,16 +0,0 @@
1
- require 'helper'
2
-
3
- require 'conversions'
4
- Conversions.register :years, :years, 1
5
-
6
- describe 'DataMiner with Conversions' do
7
- before do
8
- init_database(:conversions)
9
- init_models
10
- Pet.run_data_miner!
11
- end
12
-
13
- it 'converts convertible units' do
14
- Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to(:kilograms)
15
- end
16
- end
@@ -1,51 +0,0 @@
1
- require 'helper'
2
-
3
- class MyPet < ActiveRecord::Base
4
- PETS = File.expand_path('../pets.csv', __FILE__)
5
- COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
6
-
7
- self.primary_key = "name"
8
- col :name
9
- col :color_id
10
- col :age, :type => :integer
11
- col :age_units
12
- col :weight, :type => :float
13
- col :weight_units
14
- col :height, :type => :integer
15
- col :height_units
16
- col :favorite_food
17
- col :command_phrase
18
-
19
- data_miner do
20
- process :auto_upgrade!
21
- import("A list of pets", :url => "file://#{PETS}") do
22
- key :name
23
- store :age
24
- store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
25
- store :weight
26
- store :favorite_food, :nullify_blank_strings => true
27
- store :command_phrase
28
- store :height, :units => :centimetres
29
- end
30
- end
31
- end
32
-
33
- describe 'DataMiner with Conversions' do
34
- it 'happens when DataMiner.unit_converter is nil' do
35
- DataMiner.unit_converter.must_be_nil
36
- end
37
-
38
- it 'converts convertible units' do
39
- init_database(nil)
40
- MyPet.run_data_miner!
41
- MyPet.find('Pierre').weight.must_equal 4.4
42
- end
43
-
44
- it 'raises an error if conversions are attempted' do
45
- init_database(nil)
46
- lambda do
47
- init_models
48
- Pet.run_data_miner!
49
- end.must_raise DataMiner::Attribute::NoConverterSet
50
- end
51
- end
@@ -1,52 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- require 'helper'
3
- init_database
4
- init_models
5
-
6
- describe DataMiner::Run::ColumnStatistic do
7
- describe "when advanced statistics are enabled" do
8
- before do
9
- DataMiner.per_column_statistics = true
10
- Pet.delete_all
11
- DataMiner::Run.delete_all
12
- DataMiner::Run::ColumnStatistic.delete_all
13
- Pet.run_data_miner!
14
- end
15
-
16
- after do
17
- DataMiner.per_column_statistics = false
18
- end
19
-
20
- it "keeps null count" do
21
- Pet.data_miner_runs.first.initial_column_statistics(:breed_id).null_count.must_equal 0
22
- Pet.data_miner_runs.first.final_column_statistics(:breed_id).null_count.must_equal 1
23
-
24
- Pet.data_miner_runs.first.initial_column_statistics(:command_phrase).null_count.must_equal 0
25
- Pet.data_miner_runs.first.final_column_statistics(:command_phrase).null_count.must_equal 0
26
- end
27
-
28
- it "keeps max and min (as strings)" do
29
- Pet.data_miner_runs.first.initial_column_statistics(:age).max.must_equal 'nil'
30
- Pet.data_miner_runs.first.final_column_statistics(:age).max.must_include '17'
31
- end
32
-
33
- it "keeps average and sum" do
34
- Pet.data_miner_runs.first.initial_column_statistics(:age).average.must_be_nil
35
- Pet.data_miner_runs.first.final_column_statistics(:age).average.must_equal 7.0
36
-
37
- Pet.data_miner_runs.first.initial_column_statistics(:age).sum.must_be_nil
38
- Pet.data_miner_runs.first.final_column_statistics(:age).sum.must_equal 28.0
39
- end
40
-
41
- it "keeps blank (empty string) count" do
42
- Pet.data_miner_runs.first.initial_column_statistics(:command_phrase).blank_count.must_equal 0
43
- Pet.data_miner_runs.first.final_column_statistics(:command_phrase).blank_count.must_equal 3
44
- end
45
-
46
- it "keeps zero count" do
47
- Pet.data_miner_runs.first.initial_column_statistics(:age).zero_count.must_equal 0
48
- Pet.data_miner_runs.first.final_column_statistics(:age).zero_count.must_equal 0
49
- end
50
-
51
- end
52
- end
@@ -1,26 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- require 'helper'
3
- init_database
4
- require 'earth'
5
-
6
- # use earth, which has a plethora of real-world data_miner blocks
7
- Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
8
-
9
- describe DataMiner do
10
- describe "being used by the Earth library's import steps" do
11
- describe "for pets" do
12
- it "can pull breed and species" do
13
- Breed.run_data_miner!
14
- Breed.find('Golden Retriever').species.must_equal Species.find('dog')
15
- end
16
- end
17
- describe "for localities" do
18
- it "can handle non-latin characters" do
19
- Country.run_data_miner!
20
- Country.find('DE').name.must_equal 'Germany'
21
- Country.find('AX').name.must_equal 'Åland Islands'
22
- Country.find('CI').name.must_equal "Côte d'Ivoire"
23
- end
24
- end
25
- end
26
- end
data/test/test_safety.rb DELETED
@@ -1,84 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- require 'helper'
3
- init_database
4
- init_models
5
- require 'earth'
6
-
7
- require 'lock_method'
8
- DataMiner::Run.lock_method :start
9
-
10
- # use earth, which has a plethora of real-world data_miner blocks
11
- Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
12
-
13
- describe DataMiner do
14
- describe "when being run in a multi-threaded environment" do
15
- before do
16
- @old_thread_abort_on_exception = Thread.abort_on_exception
17
- Thread.abort_on_exception = false
18
- end
19
-
20
- after do
21
- Thread.abort_on_exception = @old_thread_abort_on_exception
22
- end
23
-
24
- it "tries not to duplicate data" do
25
- Breed.delete_all
26
- Breed.run_data_miner!
27
- reference_count = Breed.count
28
- Breed.delete_all
29
- threads = (0..2).map do |i|
30
- Thread.new do
31
- # $stderr.write "Thread #{i} starting\n"
32
- Breed.run_data_miner!
33
- # $stderr.write "Thread #{i} done\n"
34
- end
35
- end
36
- exceptions = []
37
- threads.each do |t|
38
- begin
39
- t.join
40
- rescue
41
- exceptions << $!
42
- end
43
- end
44
- exceptions.length.must_equal 2
45
- exceptions.each do |exception|
46
- exception.must_be_kind_of LockMethod::Locked
47
- end
48
- Breed.count.must_equal reference_count
49
- end
50
-
51
- it "allows you to clear locks if necessary" do
52
- threads = (0..2).map do |i|
53
- Thread.new do
54
- # $stderr.write "Thread #{i} starting\n"
55
- case i
56
- when 0
57
- Breed.run_data_miner!
58
- when 1
59
- sleep 0.3
60
- DataMiner::Run.clear_locks
61
- Breed.run_data_miner!
62
- when 2
63
- # i will hit a lock!
64
- sleep 0.6
65
- Breed.run_data_miner!
66
- end
67
- # $stderr.write "Thread #{i} done\n"
68
- end
69
- end
70
- exceptions = []
71
- threads.each do |t|
72
- begin
73
- t.join
74
- rescue
75
- exceptions << $!
76
- end
77
- end
78
- exceptions.length.must_equal 1
79
- exceptions.each do |exception|
80
- exception.must_be_kind_of LockMethod::Locked
81
- end
82
- end
83
- end
84
- end
@@ -1,16 +0,0 @@
1
- require 'helper'
2
-
3
- describe 'DataMiner unit conversion' do
4
- it "blows up if you don't specify a converter" do
5
- output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_without_unit_converter.rb', __FILE__)}`
6
- refute $?.success?, output
7
- end
8
- it 'can convert with alchemist' do
9
- output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_alchemist.rb', __FILE__)}`
10
- assert $?.success?, output
11
- end
12
- it 'can convert with conversions' do
13
- output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_conversions.rb', __FILE__)}`
14
- assert $?.success?, output
15
- end
16
- end