data_miner 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/lib/data_miner/configuration.rb +8 -16
- data/lib/data_miner/run.rb +1 -1
- data/lib/data_miner/target.rb +20 -1
- data/lib/data_miner.rb +4 -5
- data/test/data_miner_test.rb +1 -14
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.1
|
data/data_miner.gemspec
CHANGED
@@ -25,11 +25,7 @@ module DataMiner
|
|
25
25
|
self.runnable_counter += 1
|
26
26
|
runnables << DataMiner::Import.new(self, runnable_counter, options, &block)
|
27
27
|
end
|
28
|
-
|
29
|
-
def before_invoke
|
30
|
-
self.class.create_tables
|
31
|
-
end
|
32
|
-
|
28
|
+
|
33
29
|
def after_invoke
|
34
30
|
if unique_indices.empty?
|
35
31
|
raise(MissingHashColumn, "No unique_index defined for #{klass.name}, so you need a row_hash:string column.") unless klass.column_names.include?('row_hash')
|
@@ -40,18 +36,20 @@ module DataMiner
|
|
40
36
|
|
41
37
|
# Mine data for this class.
|
42
38
|
def run
|
43
|
-
target = DataMiner::Target.
|
39
|
+
target = DataMiner::Target.find(klass.name)
|
44
40
|
run = target.runs.create! :started_at => Time.now
|
41
|
+
finished = false
|
45
42
|
begin
|
46
43
|
runnables.each(&:run)
|
44
|
+
finished = true
|
47
45
|
ensure
|
48
|
-
run.update_attributes! :ended_at => Time.now
|
46
|
+
run.update_attributes! :ended_at => Time.now, :finished => finished
|
49
47
|
end
|
50
48
|
nil
|
51
49
|
end
|
52
50
|
|
53
51
|
cattr_accessor :classes
|
54
|
-
self.classes =
|
52
|
+
self.classes = Set.new
|
55
53
|
class << self
|
56
54
|
# Mine data. Defaults to all classes touched by DataMiner.
|
57
55
|
#
|
@@ -64,14 +62,7 @@ module DataMiner
|
|
64
62
|
end
|
65
63
|
end
|
66
64
|
end
|
67
|
-
|
68
|
-
# Queue up all the ActiveRecord classes that DataMiner should touch.
|
69
|
-
#
|
70
|
-
# Generally done in <tt>config/initializers/data_miner_config.rb</tt>.
|
71
|
-
def enqueue(&block)
|
72
|
-
yield self.classes
|
73
|
-
end
|
74
|
-
|
65
|
+
|
75
66
|
def create_tables
|
76
67
|
c = ActiveRecord::Base.connection
|
77
68
|
unless c.table_exists?('data_miner_targets')
|
@@ -85,6 +76,7 @@ module DataMiner
|
|
85
76
|
unless c.table_exists?('data_miner_runs')
|
86
77
|
c.create_table 'data_miner_runs', :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
87
78
|
t.string 'data_miner_target_id'
|
79
|
+
t.boolean 'finished'
|
88
80
|
t.datetime 'started_at'
|
89
81
|
t.datetime 'ended_at'
|
90
82
|
t.datetime 'created_at'
|
data/lib/data_miner/run.rb
CHANGED
data/lib/data_miner/target.rb
CHANGED
@@ -2,6 +2,25 @@ module DataMiner
|
|
2
2
|
class Target < ActiveRecord::Base
|
3
3
|
set_table_name 'data_miner_targets'
|
4
4
|
set_primary_key :name
|
5
|
-
has_many :runs, :foreign_key => 'data_miner_target_id'
|
5
|
+
has_many :runs, :class_name => '::DataMiner::Run', :foreign_key => 'data_miner_target_id'
|
6
|
+
|
7
|
+
def klass
|
8
|
+
name.constantize
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
klass.data_miner_config.run
|
13
|
+
end
|
14
|
+
|
15
|
+
def included_in_list_of_targets
|
16
|
+
msg = "must have a data_miner block"
|
17
|
+
unless DataMiner.classes.include?(name.constantize)
|
18
|
+
errors.add :name, msg
|
19
|
+
end
|
20
|
+
rescue NameError
|
21
|
+
errors.add :name, msg
|
22
|
+
end
|
23
|
+
|
24
|
+
validate :included_in_list_of_targets
|
6
25
|
end
|
7
26
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -38,10 +38,6 @@ module DataMiner
|
|
38
38
|
DataMiner::Configuration.run options
|
39
39
|
end
|
40
40
|
|
41
|
-
def self.enqueue(&block)
|
42
|
-
DataMiner::Configuration.enqueue &block
|
43
|
-
end
|
44
|
-
|
45
41
|
def self.classes
|
46
42
|
DataMiner::Configuration.classes
|
47
43
|
end
|
@@ -53,11 +49,14 @@ end
|
|
53
49
|
|
54
50
|
ActiveRecord::Base.class_eval do
|
55
51
|
def self.data_miner(&block)
|
52
|
+
DataMiner.classes.add self
|
53
|
+
DataMiner.create_tables
|
54
|
+
DataMiner::Target.find_or_create_by_name name
|
55
|
+
|
56
56
|
# this is class_eval'ed here so that each ActiveRecord descendant has its own copy, or none at all
|
57
57
|
class_eval { cattr_accessor :data_miner_config }
|
58
58
|
self.data_miner_config = DataMiner::Configuration.new self
|
59
59
|
|
60
|
-
data_miner_config.before_invoke
|
61
60
|
Blockenspiel.invoke block, data_miner_config
|
62
61
|
data_miner_config.after_invoke
|
63
62
|
end
|
data/test/data_miner_test.rb
CHANGED
@@ -590,19 +590,6 @@ class CensusRegion < ActiveRecord::Base
|
|
590
590
|
end
|
591
591
|
end
|
592
592
|
|
593
|
-
DataMiner.enqueue do |queue|
|
594
|
-
queue << Country
|
595
|
-
queue << Airport
|
596
|
-
queue << CensusRegion
|
597
|
-
queue << AutomobileFuelType # OK
|
598
|
-
queue << AutomobileModel # OK
|
599
|
-
queue << AutomobileMake # OK
|
600
|
-
queue << AutomobileModelYear # OK
|
601
|
-
queue << AutomobileVariant # OK
|
602
|
-
queue << AutomobileMakeFleetYear # OK; third-party data not yet hosted on third-party site
|
603
|
-
queue << AutomobileMakeYear # OK
|
604
|
-
end
|
605
|
-
|
606
593
|
class DataMinerTest < Test::Unit::TestCase
|
607
594
|
should "be idempotent" do
|
608
595
|
Country.data_miner_config.run
|
@@ -648,7 +635,7 @@ class DataMinerTest < Test::Unit::TestCase
|
|
648
635
|
end
|
649
636
|
|
650
637
|
# should "mine multiple classes in the correct order" do
|
651
|
-
# DataMiner.run
|
638
|
+
# DataMiner.run
|
652
639
|
# uy = Country.find_by_iso_3166('UY')
|
653
640
|
# assert_equal 'Uruguay', uy.name
|
654
641
|
# end
|