data_miner 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/lib/data_miner/configuration.rb +8 -16
- data/lib/data_miner/run.rb +1 -1
- data/lib/data_miner/target.rb +20 -1
- data/lib/data_miner.rb +4 -5
- data/test/data_miner_test.rb +1 -14
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.1
|
data/data_miner.gemspec
CHANGED
@@ -25,11 +25,7 @@ module DataMiner
|
|
25
25
|
self.runnable_counter += 1
|
26
26
|
runnables << DataMiner::Import.new(self, runnable_counter, options, &block)
|
27
27
|
end
|
28
|
-
|
29
|
-
def before_invoke
|
30
|
-
self.class.create_tables
|
31
|
-
end
|
32
|
-
|
28
|
+
|
33
29
|
def after_invoke
|
34
30
|
if unique_indices.empty?
|
35
31
|
raise(MissingHashColumn, "No unique_index defined for #{klass.name}, so you need a row_hash:string column.") unless klass.column_names.include?('row_hash')
|
@@ -40,18 +36,20 @@ module DataMiner
|
|
40
36
|
|
41
37
|
# Mine data for this class.
|
42
38
|
def run
|
43
|
-
target = DataMiner::Target.
|
39
|
+
target = DataMiner::Target.find(klass.name)
|
44
40
|
run = target.runs.create! :started_at => Time.now
|
41
|
+
finished = false
|
45
42
|
begin
|
46
43
|
runnables.each(&:run)
|
44
|
+
finished = true
|
47
45
|
ensure
|
48
|
-
run.update_attributes! :ended_at => Time.now
|
46
|
+
run.update_attributes! :ended_at => Time.now, :finished => finished
|
49
47
|
end
|
50
48
|
nil
|
51
49
|
end
|
52
50
|
|
53
51
|
cattr_accessor :classes
|
54
|
-
self.classes =
|
52
|
+
self.classes = Set.new
|
55
53
|
class << self
|
56
54
|
# Mine data. Defaults to all classes touched by DataMiner.
|
57
55
|
#
|
@@ -64,14 +62,7 @@ module DataMiner
|
|
64
62
|
end
|
65
63
|
end
|
66
64
|
end
|
67
|
-
|
68
|
-
# Queue up all the ActiveRecord classes that DataMiner should touch.
|
69
|
-
#
|
70
|
-
# Generally done in <tt>config/initializers/data_miner_config.rb</tt>.
|
71
|
-
def enqueue(&block)
|
72
|
-
yield self.classes
|
73
|
-
end
|
74
|
-
|
65
|
+
|
75
66
|
def create_tables
|
76
67
|
c = ActiveRecord::Base.connection
|
77
68
|
unless c.table_exists?('data_miner_targets')
|
@@ -85,6 +76,7 @@ module DataMiner
|
|
85
76
|
unless c.table_exists?('data_miner_runs')
|
86
77
|
c.create_table 'data_miner_runs', :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
87
78
|
t.string 'data_miner_target_id'
|
79
|
+
t.boolean 'finished'
|
88
80
|
t.datetime 'started_at'
|
89
81
|
t.datetime 'ended_at'
|
90
82
|
t.datetime 'created_at'
|
data/lib/data_miner/run.rb
CHANGED
data/lib/data_miner/target.rb
CHANGED
@@ -2,6 +2,25 @@ module DataMiner
|
|
2
2
|
class Target < ActiveRecord::Base
|
3
3
|
set_table_name 'data_miner_targets'
|
4
4
|
set_primary_key :name
|
5
|
-
has_many :runs, :foreign_key => 'data_miner_target_id'
|
5
|
+
has_many :runs, :class_name => '::DataMiner::Run', :foreign_key => 'data_miner_target_id'
|
6
|
+
|
7
|
+
def klass
|
8
|
+
name.constantize
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
klass.data_miner_config.run
|
13
|
+
end
|
14
|
+
|
15
|
+
def included_in_list_of_targets
|
16
|
+
msg = "must have a data_miner block"
|
17
|
+
unless DataMiner.classes.include?(name.constantize)
|
18
|
+
errors.add :name, msg
|
19
|
+
end
|
20
|
+
rescue NameError
|
21
|
+
errors.add :name, msg
|
22
|
+
end
|
23
|
+
|
24
|
+
validate :included_in_list_of_targets
|
6
25
|
end
|
7
26
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -38,10 +38,6 @@ module DataMiner
|
|
38
38
|
DataMiner::Configuration.run options
|
39
39
|
end
|
40
40
|
|
41
|
-
def self.enqueue(&block)
|
42
|
-
DataMiner::Configuration.enqueue &block
|
43
|
-
end
|
44
|
-
|
45
41
|
def self.classes
|
46
42
|
DataMiner::Configuration.classes
|
47
43
|
end
|
@@ -53,11 +49,14 @@ end
|
|
53
49
|
|
54
50
|
ActiveRecord::Base.class_eval do
|
55
51
|
def self.data_miner(&block)
|
52
|
+
DataMiner.classes.add self
|
53
|
+
DataMiner.create_tables
|
54
|
+
DataMiner::Target.find_or_create_by_name name
|
55
|
+
|
56
56
|
# this is class_eval'ed here so that each ActiveRecord descendant has its own copy, or none at all
|
57
57
|
class_eval { cattr_accessor :data_miner_config }
|
58
58
|
self.data_miner_config = DataMiner::Configuration.new self
|
59
59
|
|
60
|
-
data_miner_config.before_invoke
|
61
60
|
Blockenspiel.invoke block, data_miner_config
|
62
61
|
data_miner_config.after_invoke
|
63
62
|
end
|
data/test/data_miner_test.rb
CHANGED
@@ -590,19 +590,6 @@ class CensusRegion < ActiveRecord::Base
|
|
590
590
|
end
|
591
591
|
end
|
592
592
|
|
593
|
-
DataMiner.enqueue do |queue|
|
594
|
-
queue << Country
|
595
|
-
queue << Airport
|
596
|
-
queue << CensusRegion
|
597
|
-
queue << AutomobileFuelType # OK
|
598
|
-
queue << AutomobileModel # OK
|
599
|
-
queue << AutomobileMake # OK
|
600
|
-
queue << AutomobileModelYear # OK
|
601
|
-
queue << AutomobileVariant # OK
|
602
|
-
queue << AutomobileMakeFleetYear # OK; third-party data not yet hosted on third-party site
|
603
|
-
queue << AutomobileMakeYear # OK
|
604
|
-
end
|
605
|
-
|
606
593
|
class DataMinerTest < Test::Unit::TestCase
|
607
594
|
should "be idempotent" do
|
608
595
|
Country.data_miner_config.run
|
@@ -648,7 +635,7 @@ class DataMinerTest < Test::Unit::TestCase
|
|
648
635
|
end
|
649
636
|
|
650
637
|
# should "mine multiple classes in the correct order" do
|
651
|
-
# DataMiner.run
|
638
|
+
# DataMiner.run
|
652
639
|
# uy = Country.find_by_iso_3166('UY')
|
653
640
|
# assert_equal 'Uruguay', uy.name
|
654
641
|
# end
|