data_miner-ruby19 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Brighter Planet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,271 @@
1
+ =data_miner
2
+
3
+ Programmatically import useful data into your ActiveRecord models.
4
+
5
+ (see http://wiki.github.com/seamusabshere/data_miner for more examples)
6
+
7
+ ==Quick start
8
+
9
+ You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
10
+
11
+ class Country < ActiveRecord::Base
12
+ set_primary_key :iso_3166_code
13
+
14
+ data_miner do
15
+ schema do
16
+ string 'iso_3166_code'
17
+ string 'name'
18
+ end
19
+
20
+ import 'the official ISO country list',
21
+ :url => 'http://www.iso.org/iso/list-en1-semic-3.txt',
22
+ :skip => 2,
23
+ :headers => false,
24
+ :delimiter => ';',
25
+ :encoding => 'ISO-8859-1' do
26
+ key 'iso_3166_code', :field_number => 1
27
+ store 'name', :field_number => 0
28
+ end
29
+ end
30
+ end
31
+
32
+ Now you can run:
33
+
34
+ irb(main):001:0> Country.run_data_miner!
35
+ => nil
36
+
37
+ ==Advanced usage
38
+
39
+ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA's list of aircraft with the US Department of Transportations list of aircraft:
40
+
41
+ class Aircraft < ActiveRecord::Base
42
+ # Tell ActiveRecord that we want to use a string primary key.
43
+ # This makes it easier to repeatedly truncate and re-import this
44
+ # table without breaking associations.
45
+ set_primary_key :icao_code
46
+
47
+ # A dictionary between BTS aircraft type codes and ICAO aircraft
48
+ # codes that uses string similarity instead of exact matching.
49
+ # This is preferable to typing everything out.
50
+ def self.bts_name_dictionary
51
+ # Sorry for documenting the LooseTightDictionary gem here, but it's useful
52
+ @_bts_dictionary ||= LooseTightDictionary.new(
53
+ # The first argument is the source... the possible matches. Most Enumerables will do.
54
+ RemoteTable.new(:url => 'http://www.transtats.bts.gov/Download_Lookup.asp?Lookup=L_AIRCRAFT_TYPE', :select => lambda { |record| record['Code'].to_i.between?(1, 998) }),
55
+ # Tightenings optionally pull out what is important on both sides of a potential match
56
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
57
+ # Identities optionally require a particular capture from both sides of a match to be equal
58
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
59
+ # Blockings restrict comparisons to a subset where everything matches the blocking
60
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
61
+ # This means that lookups that don't match a blocking won't be compared to possible matches that **do** match a blocking.
62
+ # This is useful because we say /boeing/ and only boeings are matched against other boeings.
63
+ :blocking_only => true,
64
+ # Tell the dictionary how read things from the source.
65
+ :right_reader => lambda { |record| record['Description'] }
66
+ )
67
+ end
68
+
69
+ # A dictionary between what appear to be ICAO aircraft names and
70
+ # objects of this class itself.
71
+ # Warning: self-referential (it calls Aircraft.all) so it should be run after the first DataMiner step.
72
+ def self.icao_name_dictionary
73
+ @_icao_dictionary ||= LooseTightDictionary.new Aircraft.all,
74
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
75
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
76
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
77
+ :right_reader => lambda { |record| record.manufacturer_name.to_s + ' ' + record.name.to_s }
78
+ end
79
+
80
+ # This responds to the "Matcher" interface as defined by DataMiner.
81
+ # In other words, it takes Matcher#match(*args) and returns something.
82
+ class BtsMatcher
83
+ attr_reader :wants
84
+ def initialize(wants)
85
+ @wants = wants
86
+ end
87
+ def match(raw_faa_icao_record)
88
+ @_match ||= Hash.new
89
+ return @_match[raw_faa_icao_record] if @_match.has_key?(raw_faa_icao_record)
90
+ faa_icao_record = [ raw_faa_icao_record['Manufacturer'] + ' ' + raw_faa_icao_record['Model'] ]
91
+ bts_record = Aircraft.bts_name_dictionary.left_to_right faa_icao_record
92
+ retval = case wants
93
+ when :bts_aircraft_type_code
94
+ bts_record['Code']
95
+ when :bts_name
96
+ bts_record['Description']
97
+ end if bts_record
98
+ @_match[raw_faa_icao_record] = retval
99
+ end
100
+ end
101
+
102
+ # Another class that implements the "Matcher" interface as expected by DataMiner.
103
+ class FuelUseMatcher
104
+ def match(raw_fuel_use_record)
105
+ @_match ||= Hash.new
106
+ return @_match[raw_fuel_use_record] if @_match.has_key?(raw_fuel_use_record)
107
+ # First try assuming we have an ICAO code
108
+ aircraft_record = if raw_fuel_use_record['ICAO'] =~ /\A[0-9A-Z]+\z/
109
+ Aircraft.find_by_icao_code raw_fuel_use_record['ICAO']
110
+ end
111
+ # No luck? then try a fuzzy match
112
+ aircraft_record ||= if raw_fuel_use_record['Aircraft Name'].present?
113
+ Aircraft.icao_name_dictionary.left_to_right [ raw_fuel_use_record['Aircraft Name'] ]
114
+ end
115
+ if aircraft_record
116
+ @_match[raw_fuel_use_record] = aircraft_record.icao_code
117
+ else
118
+ # While we're developing the dictionary, we want it to blow up until we have 100% matchability
119
+ raise "Didn't find a match for #{raw_fuel_use_record['Aircraft Name']} (#{raw_fuel_use_record['ICAO']}), which we found in the fuel use spreadsheet"
120
+ end
121
+ end
122
+ end
123
+
124
+ # This responds to the "Responder" interface as expected by Errata.
125
+ # Basically it lets you say "Is a DC plane" in the errata file and
126
+ # have it map to a Ruby method.
127
+ class Guru
128
+ def is_a_dc_plane?(row)
129
+ row['Designator'] =~ /^DC\d/i
130
+ end
131
+ def is_a_g159?(row)
132
+ row['Designator'] =~ /^G159$/
133
+ end
134
+ def is_a_galx?(row)
135
+ row['Designator'] =~ /^GALX$/
136
+ end
137
+ def method_missing(method_id, *args, &block)
138
+ if method_id.to_s =~ /\Ais_n?o?t?_?attributed_to_([^\?]+)/
139
+ manufacturer_name = $1
140
+ manufacturer_regexp = Regexp.new(manufacturer_name.gsub('_', ' ?'), Regexp::IGNORECASE)
141
+ matches = manufacturer_regexp.match(args.first['Manufacturer']) # row['Manufacturer'] =~ /mcdonnell douglas/i
142
+ method_id.to_s.include?('not_attributed') ? matches.nil? : !matches.nil?
143
+ else
144
+ super
145
+ end
146
+ end
147
+ end
148
+
149
+ data_miner do
150
+ # In our app, we defined DataMiner::Run.allowed? to return false if a run
151
+ # has taken place in the last hour (among other things).
152
+ # By raising DataMiner::Skip, we skip this run but call it a success.
153
+ process "Don't re-import too often" do
154
+ raise DataMiner::Skip unless DataMiner::Run.allowed? Aircraft
155
+ end
156
+
157
+ # Define the database schema in-line.
158
+ # It will destructively and automatically add/remove columns.
159
+ # This is "OK" because you can always just re-run the import script to get the data back.
160
+ # PS. if we were using DataMapper, we wouldn't need this.
161
+ schema :options => 'ENGINE=InnoDB default charset=utf8' do
162
+ string 'icao_code'
163
+ string 'manufacturer_name'
164
+ string 'name'
165
+ string 'bts_name'
166
+ string 'bts_aircraft_type_code'
167
+ string 'brighter_planet_aircraft_class_code'
168
+ string 'fuel_use_aircraft_name'
169
+ float 'm3'
170
+ string 'm3_units'
171
+ float 'm2'
172
+ string 'm2_units'
173
+ float 'm1'
174
+ string 'm1_units'
175
+ float 'endpoint_fuel'
176
+ string 'endpoint_fuel_units'
177
+ float 'seats'
178
+ float 'distance'
179
+ string 'distance_units'
180
+ float 'load_factor'
181
+ float 'freight_share'
182
+ float 'payload'
183
+ float 'weighting'
184
+ index 'bts_aircraft_type_code'
185
+ end
186
+
187
+ # The FAA publishes a document to help people identify aircraft by different names.
188
+ ('A'..'Z').each do |letter|
189
+ import( "ICAO aircraft codes starting with the letter #{letter} used by the FAA",
190
+ # The master URL of the source file (one for every letter)
191
+ :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
192
+ # The RFC-style errata... note that it will use the Guru class we defined above. See the Errata gem for more details.
193
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :responder => Aircraft::Guru.new),
194
+ # If it's not UTF-8, you should say what it is so that we can iconv it!
195
+ :encoding => 'windows-1252',
196
+ # Nokogiri is being used to grab each row starting from the second
197
+ :row_xpath => '//table/tr[2]/td/table/tr',
198
+ # ditto... XPath for Nokogiri
199
+ :column_xpath => 'td' ) do
200
+ # The code that they use is in fact the ICAO code!
201
+ key 'icao_code', :field_name => 'Designator'
202
+ # We get this for free
203
+ store 'manufacturer_name', :field_name => 'Manufacturer'
204
+ # ditto
205
+ store 'name', :field_name => 'Model'
206
+ # Use the loose-tight dictionary.
207
+ # It gets the entire input row to play with before deciding on an output.
208
+ store 'bts_aircraft_type_code', :matcher => Aircraft::BtsMatcher.new(:bts_aircraft_type_code)
209
+ store 'bts_name', :matcher => Aircraft::BtsMatcher.new(:bts_name)
210
+ end
211
+ end
212
+
213
+ # Pull in some data that might only be important to Brighter Planet
214
+ import "Brighter Planet's aircraft class codes",
215
+ :url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
216
+ key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
217
+ store 'brighter_planet_aircraft_class_code'
218
+ end
219
+
220
+ # Pull in fuel use equation (y = m3*x^3 + m2*x^2 + m1*x + endpoint_fuel).
221
+ # This data comes from the EEA.
222
+ import "pre-calculated fuel use equation coefficients",
223
+ :url => 'http://static.brighterplanet.com/science/data/transport/air/fuel_use/aircraft_fuel_use_formulae.ods',
224
+ :select => lambda { |row| row['ICAO'].present? or row['Aircraft Name'].present? } do
225
+ # We want to key on ICAO code, but since it's sometimes missing, use the loose-tight dictionary we defined above.
226
+ key 'icao_code', :matcher => Aircraft::FuelUseMatcher.new
227
+ # Keep the name for sanity checking. Yes, we have 3 different "name" fields... they should all refer to the same aircraft.
228
+ store 'fuel_use_aircraft_name', :field_name => 'Aircraft Name'
229
+ store 'm3'
230
+ store 'm2'
231
+ store 'm1'
232
+ store 'endpoint_fuel', :field_name => 'b'
233
+ end
234
+
235
+ # Use arel and the weighted_average gem to do some crazy averaging.
236
+ # This assumes that you're dealing with the BTS T-100 flight segment data.
237
+ # See http://data.brighterplanet.com/flight_segments for a pre-sanitized version.
238
+ process "Derive some average flight characteristics from flight segments" do
239
+ FlightSegment.run_data_miner!
240
+ aircraft = Aircraft.arel_table
241
+ segments = FlightSegment.arel_table
242
+
243
+ conditional_relation = aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])
244
+ update_all "seats = (#{FlightSegment.weighted_average_relation(:seats, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
245
+ update_all "distance = (#{FlightSegment.weighted_average_relation(:distance, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
246
+ update_all "load_factor = (#{FlightSegment.weighted_average_relation(:load_factor, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
247
+ update_all "freight_share = (#{FlightSegment.weighted_average_relation(:freight_share, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
248
+ update_all "payload = (#{FlightSegment.weighted_average_relation(:payload, :weighted_by => :passengers, :disaggregate_by => :departures_performed).where(conditional_relation).to_sql})"
249
+
250
+ update_all "weighting = (#{segments.project(segments[:passengers].sum).where(aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])).to_sql})"
251
+ end
252
+
253
+ # And finally re-run the import of resources that depend on this resource.
254
+ # Don't worry about calling Aircraft.run_data_miner! at the top of AircraftManufacturer's data_miner block;
255
+ # that's the right way to do dependencies. It won't get called twice in the same run.
256
+ [ AircraftManufacturer ].each do |synthetic_resource|
257
+ process "Synthesize #{synthetic_resource}" do
258
+ synthetic_resource.run_data_miner!
259
+ end
260
+ end
261
+ end
262
+ end
263
+
264
+ ==Authors
265
+
266
+ * Seamus Abshere <seamus@abshere.net>
267
+ * Andy Rossmeissl <andy@rossmeissl.net>
268
+
269
+ ==Copyright
270
+
271
+ Copyright (c) 2010 Brighter Planet. See LICENSE for details.
@@ -0,0 +1,136 @@
1
+ require 'active_support'
2
+ require 'active_support/version'
3
+ %w{
4
+ active_support/core_ext/array/conversions
5
+ active_support/core_ext/string/access
6
+ active_support/core_ext/string/multibyte
7
+ }.each do |active_support_3_requirement|
8
+ require active_support_3_requirement
9
+ end if ActiveSupport::VERSION::MAJOR == 3
10
+
11
+ require 'active_record'
12
+ require 'blockenspiel'
13
+ require 'conversions'
14
+ require 'errata'
15
+ require 'remote_table'
16
+ require 'escape'
17
+ require 'andand'
18
+ require 'log4r'
19
+ require 'fileutils'
20
+ require 'tmpdir'
21
+ require 'zlib'
22
+
23
+ require 'data_miner/attribute'
24
+ require 'data_miner/base'
25
+ require 'data_miner/dictionary'
26
+ require 'data_miner/import'
27
+ require 'data_miner/tap'
28
+ require 'data_miner/process'
29
+ require 'data_miner/run'
30
+ require 'data_miner/schema'
31
+
32
+ module DataMiner
33
+ class MissingHashColumn < StandardError; end
34
+ class Finish < StandardError; end
35
+ class Skip < StandardError; end
36
+
37
+ mattr_accessor :logger
38
+
39
+ def self.start_logging
40
+ return if logger
41
+
42
+ if defined? Rails
43
+ self.logger = Rails.logger
44
+ else
45
+ class_eval { include Log4r }
46
+ info_outputter = FileOutputter.new 'f1', :filename => 'data_miner.log'
47
+ error_outputter = Outputter.stderr
48
+ info_outputter.only_at DEBUG, INFO
49
+ error_outputter.only_at WARN, ERROR, FATAL
50
+
51
+ self.logger = Logger.new 'data_miner'
52
+ logger.add info_outputter, error_outputter
53
+ ActiveRecord::Base.logger = logger
54
+ end
55
+ end
56
+
57
+ def self.log_or_raise(message)
58
+ message = "[data_miner gem] #{message}"
59
+ if ENV['RAILS_ENV'] == 'production' or ENV['DONT_RAISE'] == 'true'
60
+ logger.error message
61
+ else
62
+ raise message
63
+ end
64
+ end
65
+
66
+ def self.log_info(message)
67
+ logger.info "[data_miner gem] #{message}"
68
+ end
69
+
70
+ def self.log_debug(message)
71
+ logger.debug "[data_miner gem] #{message}"
72
+ end
73
+
74
+ def self.run(options = {})
75
+ DataMiner::Base.run options.merge(:preserve_call_stack_between_runs => true)
76
+ DataMiner::Base.call_stack.clear
77
+ end
78
+
79
+ def self.resource_names
80
+ DataMiner::Base.resource_names
81
+ end
82
+
83
+ # TODO this should probably live somewhere else
84
+ def self.backtick_with_reporting(cmd)
85
+ cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
86
+ output = `#{cmd}`
87
+ if not $?.success?
88
+ raise %{
89
+ From the data_miner gem...
90
+
91
+ Command failed:
92
+ #{cmd}
93
+
94
+ Output:
95
+ #{output}
96
+ }
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+ ActiveRecord::Base.class_eval do
103
+ def self.x_data_miner(&block)
104
+ DataMiner.start_logging
105
+
106
+ DataMiner.log_debug "Skipping data_miner block in #{self.name} because called as x_data_miner"
107
+ end
108
+
109
+ def self.data_miner(&block)
110
+ DataMiner.start_logging
111
+
112
+ DataMiner.log_debug "Database table `#{table_name}` doesn't exist. It might be created in the data_miner block, but if it's not, DataMiner probably won't work properly until you run a migration or otherwise fix the schema." unless table_exists?
113
+
114
+ DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
115
+
116
+ # this is class_eval'ed here so that each ActiveRecord descendant has its own copy, or none at all
117
+ class_eval do
118
+ cattr_accessor :data_miner_base
119
+ def self.data_miner_runs
120
+ DataMiner::Run.scoped :conditions => { :resource_name => name }
121
+ end
122
+ def self.run_data_miner!(options = {})
123
+ data_miner_base.run options
124
+ end
125
+ def self.execute_schema
126
+ schema = data_miner_base.steps.find { |s| s.instance_of?(DataMiner::Schema) }
127
+ schema.run(nil) if schema
128
+ end
129
+ end
130
+ self.data_miner_base = DataMiner::Base.new self
131
+
132
+ Blockenspiel.invoke block, data_miner_base
133
+
134
+ data_miner_base.after_invoke
135
+ end
136
+ end
@@ -0,0 +1,233 @@
1
+ module DataMiner
2
+ class Attribute
3
+ attr_accessor :step
4
+ attr_accessor :name
5
+ attr_accessor :options
6
+
7
+ delegate :resource, :to => :step
8
+
9
+ VALID_OPTIONS = [
10
+ :from_units,
11
+ :to_units,
12
+ :static,
13
+ :dictionary,
14
+ :matcher,
15
+ :field_name,
16
+ :delimiter,
17
+ :split,
18
+ :units,
19
+ :sprintf,
20
+ :nullify,
21
+ :overwrite,
22
+ :upcase,
23
+ :units_field_name,
24
+ :units_field_number,
25
+ :field_number,
26
+ :chars,
27
+ :synthesize
28
+ ]
29
+
30
+ def initialize(step, name, options = {})
31
+ options.symbolize_keys!
32
+
33
+ @step = step
34
+ @name = name
35
+
36
+ invalid_option_keys = options.keys.select { |k| not VALID_OPTIONS.include? k }
37
+ DataMiner.log_or_raise "Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence} (#{inspect})" if invalid_option_keys.any?
38
+ @options = options
39
+ end
40
+
41
+ def inspect
42
+ "Attribute(#{resource}##{name})"
43
+ end
44
+
45
+ def value_in_dictionary(str)
46
+ dictionary.lookup str
47
+ end
48
+
49
+ def value_in_source(row)
50
+ if wants_static?
51
+ value = static
52
+ elsif field_number
53
+ if field_number.is_a?(Range)
54
+ value = field_number.map { |n| row[n] }.join(delimiter)
55
+ else
56
+ value = row[field_number]
57
+ end
58
+ else
59
+ value = row[field_name]
60
+ end
61
+ return nil if value.nil?
62
+ return value if value.is_a?(ActiveRecord::Base) # escape valve for parsers that look up associations directly
63
+ value = value.to_s
64
+ value = value[chars] if wants_chars?
65
+ value = do_split(value) if wants_split?
66
+ # taken from old errata... maybe we want to do this here
67
+ value.gsub! /[ ]+/, ' '
68
+ # text.gsub!('- ', '-')
69
+ value.gsub! /([^\\])~/, '\1 '
70
+ value.strip!
71
+ value.upcase! if wants_upcase?
72
+ value = do_convert row, value if wants_conversion?
73
+ value = do_sprintf value if wants_sprintf?
74
+ value
75
+ end
76
+
77
+ def match_row(row)
78
+ matcher.match row
79
+ end
80
+
81
+ def value_from_row(row)
82
+ return match_row row if wants_matcher?
83
+ value = value_in_source row
84
+ return value if value.is_a? ActiveRecord::Base # carry through trapdoor
85
+ value = value_in_dictionary value if wants_dictionary?
86
+ value = synthesize.call(row) if wants_synthesize?
87
+ value
88
+ end
89
+
90
+ # this will overwrite nils, even if wants_overwriting? is false
91
+ # returns true if an attr was changed, otherwise false
92
+ def set_record_from_row(record, row)
93
+ return false if !wants_overwriting? and !record.send(name).nil?
94
+ what_it_was = record.send name
95
+ what_it_should_be = value_from_row row
96
+
97
+ record.send "#{name}=", what_it_should_be
98
+ record.send "#{name}_units=", (to_units || unit_from_source(row)).to_s if wants_units?
99
+
100
+ what_it_is = record.send name
101
+ if what_it_is.nil? and !what_it_should_be.nil?
102
+ DataMiner.log_debug "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
103
+ nil
104
+ elsif what_it_is == what_it_was
105
+ false
106
+ else
107
+ true
108
+ end
109
+ end
110
+
111
+ def unit_from_source(row)
112
+ row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
113
+ end
114
+
115
+ def do_convert(row, value)
116
+ DataMiner.log_or_raise "If you use :from_units, you need to set :to_units (#{inspect})" unless wants_units?
117
+ value.to_f.convert((from_units || unit_from_source(row)), (to_units || unit_from_source(row)))
118
+ end
119
+
120
+ def do_sprintf(value)
121
+ if /\%[0-9\.]*f/.match sprintf
122
+ value = value.to_f
123
+ elsif /\%[0-9\.]*d/.match sprintf
124
+ value = value.to_i
125
+ end
126
+ sprintf % value
127
+ end
128
+
129
+ def do_split(value)
130
+ pattern = split_options[:pattern] || /\s+/ # default is split on whitespace
131
+ keep = split_options[:keep] || 0 # default is keep first element
132
+ value.to_s.split(pattern)[keep].to_s
133
+ end
134
+
135
+ def column_type
136
+ resource.columns_hash[name.to_s].type
137
+ end
138
+
139
+ # Our wants and needs :)
140
+ def wants_split?
141
+ split_options.present?
142
+ end
143
+ def wants_sprintf?
144
+ sprintf.present?
145
+ end
146
+ def wants_upcase?
147
+ upcase.present?
148
+ end
149
+ def wants_static?
150
+ options.has_key? :static
151
+ end
152
+ def wants_nullification?
153
+ nullify != false
154
+ end
155
+ def wants_chars?
156
+ chars.present?
157
+ end
158
+ def wants_synthesize?
159
+ synthesize.is_a?(Proc)
160
+ end
161
+ def wants_overwriting?
162
+ overwrite != false
163
+ end
164
+ def wants_conversion?
165
+ from_units.present? or units_field_name.present? or units_field_number.present?
166
+ end
167
+ def wants_units?
168
+ to_units.present? or units_field_name.present? or units_field_number.present?
169
+ end
170
+ def wants_dictionary?
171
+ options[:dictionary].present?
172
+ end
173
+ def wants_matcher?
174
+ options[:matcher].present?
175
+ end
176
+
177
+ # Options that always have values
178
+ def field_name
179
+ (options[:field_name] || name).to_s
180
+ end
181
+ def delimiter
182
+ (options[:delimiter] || ', ')
183
+ end
184
+
185
+ # Options that can't be referred to by their names
186
+ def split_options
187
+ options[:split]
188
+ end
189
+
190
+ def from_units
191
+ options[:from_units]
192
+ end
193
+ def to_units
194
+ options[:to_units] || options[:units]
195
+ end
196
+ def sprintf
197
+ options[:sprintf]
198
+ end
199
+ def nullify
200
+ options[:nullify]
201
+ end
202
+ def overwrite
203
+ options[:overwrite]
204
+ end
205
+ def upcase
206
+ options[:upcase]
207
+ end
208
+ def units_field_name
209
+ options[:units_field_name]
210
+ end
211
+ def units_field_number
212
+ options[:units_field_number]
213
+ end
214
+ def field_number
215
+ options[:field_number]
216
+ end
217
+ def chars
218
+ options[:chars]
219
+ end
220
+ def synthesize
221
+ options[:synthesize]
222
+ end
223
+ def static
224
+ options[:static]
225
+ end
226
+ def dictionary
227
+ @_dictionary ||= (options[:dictionary].is_a?(Dictionary) ? options[:dictionary] : Dictionary.new(options[:dictionary]))
228
+ end
229
+ def matcher
230
+ @_matcher ||= (options[:matcher].is_a?(String) ? options[:matcher].constantize.new : options[:matcher])
231
+ end
232
+ end
233
+ end