data_miner-ruby19 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Brighter Planet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,271 @@
1
+ =data_miner
2
+
3
+ Programmatically import useful data into your ActiveRecord models.
4
+
5
+ (see http://wiki.github.com/seamusabshere/data_miner for more examples)
6
+
7
+ ==Quick start
8
+
9
+ You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
10
+
11
+ class Country < ActiveRecord::Base
12
+ set_primary_key :iso_3166_code
13
+
14
+ data_miner do
15
+ schema do
16
+ string 'iso_3166_code'
17
+ string 'name'
18
+ end
19
+
20
+ import 'the official ISO country list',
21
+ :url => 'http://www.iso.org/iso/list-en1-semic-3.txt',
22
+ :skip => 2,
23
+ :headers => false,
24
+ :delimiter => ';',
25
+ :encoding => 'ISO-8859-1' do
26
+ key 'iso_3166_code', :field_number => 1
27
+ store 'name', :field_number => 0
28
+ end
29
+ end
30
+ end
31
+
32
+ Now you can run:
33
+
34
+ irb(main):001:0> Country.run_data_miner!
35
+ => nil
36
+
37
+ ==Advanced usage
38
+
39
+ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA's list of aircraft with the US Department of Transportations list of aircraft:
40
+
41
+ class Aircraft < ActiveRecord::Base
42
+ # Tell ActiveRecord that we want to use a string primary key.
43
+ # This makes it easier to repeatedly truncate and re-import this
44
+ # table without breaking associations.
45
+ set_primary_key :icao_code
46
+
47
+ # A dictionary between BTS aircraft type codes and ICAO aircraft
48
+ # codes that uses string similarity instead of exact matching.
49
+ # This is preferable to typing everything out.
50
+ def self.bts_name_dictionary
51
+ # Sorry for documenting the LooseTightDictionary gem here, but it's useful
52
+ @_bts_dictionary ||= LooseTightDictionary.new(
53
+ # The first argument is the source... the possible matches. Most Enumerables will do.
54
+ RemoteTable.new(:url => 'http://www.transtats.bts.gov/Download_Lookup.asp?Lookup=L_AIRCRAFT_TYPE', :select => lambda { |record| record['Code'].to_i.between?(1, 998) }),
55
+ # Tightenings optionally pull out what is important on both sides of a potential match
56
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
57
+ # Identities optionally require a particular capture from both sides of a match to be equal
58
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
59
+ # Blockings restrict comparisons to a subset where everything matches the blocking
60
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
61
+ # This means that lookups that don't match a blocking won't be compared to possible matches that **do** match a blocking.
62
+ # This is useful because we say /boeing/ and only boeings are matched against other boeings.
63
+ :blocking_only => true,
64
+ # Tell the dictionary how read things from the source.
65
+ :right_reader => lambda { |record| record['Description'] }
66
+ )
67
+ end
68
+
69
+ # A dictionary between what appear to be ICAO aircraft names and
70
+ # objects of this class itself.
71
+ # Warning: self-referential (it calls Aircraft.all) so it should be run after the first DataMiner step.
72
+ def self.icao_name_dictionary
73
+ @_icao_dictionary ||= LooseTightDictionary.new Aircraft.all,
74
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
75
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
76
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
77
+ :right_reader => lambda { |record| record.manufacturer_name.to_s + ' ' + record.name.to_s }
78
+ end
79
+
80
+ # This responds to the "Matcher" interface as defined by DataMiner.
81
+ # In other words, it takes Matcher#match(*args) and returns something.
82
+ class BtsMatcher
83
+ attr_reader :wants
84
+ def initialize(wants)
85
+ @wants = wants
86
+ end
87
+ def match(raw_faa_icao_record)
88
+ @_match ||= Hash.new
89
+ return @_match[raw_faa_icao_record] if @_match.has_key?(raw_faa_icao_record)
90
+ faa_icao_record = [ raw_faa_icao_record['Manufacturer'] + ' ' + raw_faa_icao_record['Model'] ]
91
+ bts_record = Aircraft.bts_name_dictionary.left_to_right faa_icao_record
92
+ retval = case wants
93
+ when :bts_aircraft_type_code
94
+ bts_record['Code']
95
+ when :bts_name
96
+ bts_record['Description']
97
+ end if bts_record
98
+ @_match[raw_faa_icao_record] = retval
99
+ end
100
+ end
101
+
102
+ # Another class that implements the "Matcher" interface as expected by DataMiner.
103
+ class FuelUseMatcher
104
+ def match(raw_fuel_use_record)
105
+ @_match ||= Hash.new
106
+ return @_match[raw_fuel_use_record] if @_match.has_key?(raw_fuel_use_record)
107
+ # First try assuming we have an ICAO code
108
+ aircraft_record = if raw_fuel_use_record['ICAO'] =~ /\A[0-9A-Z]+\z/
109
+ Aircraft.find_by_icao_code raw_fuel_use_record['ICAO']
110
+ end
111
+ # No luck? then try a fuzzy match
112
+ aircraft_record ||= if raw_fuel_use_record['Aircraft Name'].present?
113
+ Aircraft.icao_name_dictionary.left_to_right [ raw_fuel_use_record['Aircraft Name'] ]
114
+ end
115
+ if aircraft_record
116
+ @_match[raw_fuel_use_record] = aircraft_record.icao_code
117
+ else
118
+ # While we're developing the dictionary, we want it to blow up until we have 100% matchability
119
+ raise "Didn't find a match for #{raw_fuel_use_record['Aircraft Name']} (#{raw_fuel_use_record['ICAO']}), which we found in the fuel use spreadsheet"
120
+ end
121
+ end
122
+ end
123
+
124
+ # This responds to the "Responder" interface as expected by Errata.
125
+ # Basically it lets you say "Is a DC plane" in the errata file and
126
+ # have it map to a Ruby method.
127
+ class Guru
128
+ def is_a_dc_plane?(row)
129
+ row['Designator'] =~ /^DC\d/i
130
+ end
131
+ def is_a_g159?(row)
132
+ row['Designator'] =~ /^G159$/
133
+ end
134
+ def is_a_galx?(row)
135
+ row['Designator'] =~ /^GALX$/
136
+ end
137
+ def method_missing(method_id, *args, &block)
138
+ if method_id.to_s =~ /\Ais_n?o?t?_?attributed_to_([^\?]+)/
139
+ manufacturer_name = $1
140
+ manufacturer_regexp = Regexp.new(manufacturer_name.gsub('_', ' ?'), Regexp::IGNORECASE)
141
+ matches = manufacturer_regexp.match(args.first['Manufacturer']) # row['Manufacturer'] =~ /mcdonnell douglas/i
142
+ method_id.to_s.include?('not_attributed') ? matches.nil? : !matches.nil?
143
+ else
144
+ super
145
+ end
146
+ end
147
+ end
148
+
149
+ data_miner do
150
+ # In our app, we defined DataMiner::Run.allowed? to return false if a run
151
+ # has taken place in the last hour (among other things).
152
+ # By raising DataMiner::Skip, we skip this run but call it a success.
153
+ process "Don't re-import too often" do
154
+ raise DataMiner::Skip unless DataMiner::Run.allowed? Aircraft
155
+ end
156
+
157
+ # Define the database schema in-line.
158
+ # It will destructively and automatically add/remove columns.
159
+ # This is "OK" because you can always just re-run the import script to get the data back.
160
+ # PS. if we were using DataMapper, we wouldn't need this.
161
+ schema :options => 'ENGINE=InnoDB default charset=utf8' do
162
+ string 'icao_code'
163
+ string 'manufacturer_name'
164
+ string 'name'
165
+ string 'bts_name'
166
+ string 'bts_aircraft_type_code'
167
+ string 'brighter_planet_aircraft_class_code'
168
+ string 'fuel_use_aircraft_name'
169
+ float 'm3'
170
+ string 'm3_units'
171
+ float 'm2'
172
+ string 'm2_units'
173
+ float 'm1'
174
+ string 'm1_units'
175
+ float 'endpoint_fuel'
176
+ string 'endpoint_fuel_units'
177
+ float 'seats'
178
+ float 'distance'
179
+ string 'distance_units'
180
+ float 'load_factor'
181
+ float 'freight_share'
182
+ float 'payload'
183
+ float 'weighting'
184
+ index 'bts_aircraft_type_code'
185
+ end
186
+
187
+ # The FAA publishes a document to help people identify aircraft by different names.
188
+ ('A'..'Z').each do |letter|
189
+ import( "ICAO aircraft codes starting with the letter #{letter} used by the FAA",
190
+ # The master URL of the source file (one for every letter)
191
+ :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
192
+ # The RFC-style errata... note that it will use the Guru class we defined above. See the Errata gem for more details.
193
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :responder => Aircraft::Guru.new),
194
+ # If it's not UTF-8, you should say what it is so that we can iconv it!
195
+ :encoding => 'windows-1252',
196
+ # Nokogiri is being used to grab each row starting from the second
197
+ :row_xpath => '//table/tr[2]/td/table/tr',
198
+ # ditto... XPath for Nokogiri
199
+ :column_xpath => 'td' ) do
200
+ # The code that they use is in fact the ICAO code!
201
+ key 'icao_code', :field_name => 'Designator'
202
+ # We get this for free
203
+ store 'manufacturer_name', :field_name => 'Manufacturer'
204
+ # ditto
205
+ store 'name', :field_name => 'Model'
206
+ # Use the loose-tight dictionary.
207
+ # It gets the entire input row to play with before deciding on an output.
208
+ store 'bts_aircraft_type_code', :matcher => Aircraft::BtsMatcher.new(:bts_aircraft_type_code)
209
+ store 'bts_name', :matcher => Aircraft::BtsMatcher.new(:bts_name)
210
+ end
211
+ end
212
+
213
+ # Pull in some data that might only be important to Brighter Planet
214
+ import "Brighter Planet's aircraft class codes",
215
+ :url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
216
+ key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
217
+ store 'brighter_planet_aircraft_class_code'
218
+ end
219
+
220
+ # Pull in fuel use equation (y = m3*x^3 + m2*x^2 + m1*x + endpoint_fuel).
221
+ # This data comes from the EEA.
222
+ import "pre-calculated fuel use equation coefficients",
223
+ :url => 'http://static.brighterplanet.com/science/data/transport/air/fuel_use/aircraft_fuel_use_formulae.ods',
224
+ :select => lambda { |row| row['ICAO'].present? or row['Aircraft Name'].present? } do
225
+ # We want to key on ICAO code, but since it's sometimes missing, use the loose-tight dictionary we defined above.
226
+ key 'icao_code', :matcher => Aircraft::FuelUseMatcher.new
227
+ # Keep the name for sanity checking. Yes, we have 3 different "name" fields... they should all refer to the same aircraft.
228
+ store 'fuel_use_aircraft_name', :field_name => 'Aircraft Name'
229
+ store 'm3'
230
+ store 'm2'
231
+ store 'm1'
232
+ store 'endpoint_fuel', :field_name => 'b'
233
+ end
234
+
235
+ # Use arel and the weighted_average gem to do some crazy averaging.
236
+ # This assumes that you're dealing with the BTS T-100 flight segment data.
237
+ # See http://data.brighterplanet.com/flight_segments for a pre-sanitized version.
238
+ process "Derive some average flight characteristics from flight segments" do
239
+ FlightSegment.run_data_miner!
240
+ aircraft = Aircraft.arel_table
241
+ segments = FlightSegment.arel_table
242
+
243
+ conditional_relation = aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])
244
+ update_all "seats = (#{FlightSegment.weighted_average_relation(:seats, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
245
+ update_all "distance = (#{FlightSegment.weighted_average_relation(:distance, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
246
+ update_all "load_factor = (#{FlightSegment.weighted_average_relation(:load_factor, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
247
+ update_all "freight_share = (#{FlightSegment.weighted_average_relation(:freight_share, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
248
+ update_all "payload = (#{FlightSegment.weighted_average_relation(:payload, :weighted_by => :passengers, :disaggregate_by => :departures_performed).where(conditional_relation).to_sql})"
249
+
250
+ update_all "weighting = (#{segments.project(segments[:passengers].sum).where(aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])).to_sql})"
251
+ end
252
+
253
+ # And finally re-run the import of resources that depend on this resource.
254
+ # Don't worry about calling Aircraft.run_data_miner! at the top of AircraftManufacturer's data_miner block;
255
+ # that's the right way to do dependencies. It won't get called twice in the same run.
256
+ [ AircraftManufacturer ].each do |synthetic_resource|
257
+ process "Synthesize #{synthetic_resource}" do
258
+ synthetic_resource.run_data_miner!
259
+ end
260
+ end
261
+ end
262
+ end
263
+
264
+ ==Authors
265
+
266
+ * Seamus Abshere <seamus@abshere.net>
267
+ * Andy Rossmeissl <andy@rossmeissl.net>
268
+
269
+ ==Copyright
270
+
271
+ Copyright (c) 2010 Brighter Planet. See LICENSE for details.
@@ -0,0 +1,136 @@
1
+ require 'active_support'
2
+ require 'active_support/version'
3
+ %w{
4
+ active_support/core_ext/array/conversions
5
+ active_support/core_ext/string/access
6
+ active_support/core_ext/string/multibyte
7
+ }.each do |active_support_3_requirement|
8
+ require active_support_3_requirement
9
+ end if ActiveSupport::VERSION::MAJOR == 3
10
+
11
+ require 'active_record'
12
+ require 'blockenspiel'
13
+ require 'conversions'
14
+ require 'errata'
15
+ require 'remote_table'
16
+ require 'escape'
17
+ require 'andand'
18
+ require 'log4r'
19
+ require 'fileutils'
20
+ require 'tmpdir'
21
+ require 'zlib'
22
+
23
+ require 'data_miner/attribute'
24
+ require 'data_miner/base'
25
+ require 'data_miner/dictionary'
26
+ require 'data_miner/import'
27
+ require 'data_miner/tap'
28
+ require 'data_miner/process'
29
+ require 'data_miner/run'
30
+ require 'data_miner/schema'
31
+
32
+ module DataMiner
33
+ class MissingHashColumn < StandardError; end
34
+ class Finish < StandardError; end
35
+ class Skip < StandardError; end
36
+
37
+ mattr_accessor :logger
38
+
39
+ def self.start_logging
40
+ return if logger
41
+
42
+ if defined? Rails
43
+ self.logger = Rails.logger
44
+ else
45
+ class_eval { include Log4r }
46
+ info_outputter = FileOutputter.new 'f1', :filename => 'data_miner.log'
47
+ error_outputter = Outputter.stderr
48
+ info_outputter.only_at DEBUG, INFO
49
+ error_outputter.only_at WARN, ERROR, FATAL
50
+
51
+ self.logger = Logger.new 'data_miner'
52
+ logger.add info_outputter, error_outputter
53
+ ActiveRecord::Base.logger = logger
54
+ end
55
+ end
56
+
57
+ def self.log_or_raise(message)
58
+ message = "[data_miner gem] #{message}"
59
+ if ENV['RAILS_ENV'] == 'production' or ENV['DONT_RAISE'] == 'true'
60
+ logger.error message
61
+ else
62
+ raise message
63
+ end
64
+ end
65
+
66
+ def self.log_info(message)
67
+ logger.info "[data_miner gem] #{message}"
68
+ end
69
+
70
+ def self.log_debug(message)
71
+ logger.debug "[data_miner gem] #{message}"
72
+ end
73
+
74
+ def self.run(options = {})
75
+ DataMiner::Base.run options.merge(:preserve_call_stack_between_runs => true)
76
+ DataMiner::Base.call_stack.clear
77
+ end
78
+
79
+ def self.resource_names
80
+ DataMiner::Base.resource_names
81
+ end
82
+
83
+ # TODO this should probably live somewhere else
84
+ def self.backtick_with_reporting(cmd)
85
+ cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
86
+ output = `#{cmd}`
87
+ if not $?.success?
88
+ raise %{
89
+ From the data_miner gem...
90
+
91
+ Command failed:
92
+ #{cmd}
93
+
94
+ Output:
95
+ #{output}
96
+ }
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+ ActiveRecord::Base.class_eval do
103
+ def self.x_data_miner(&block)
104
+ DataMiner.start_logging
105
+
106
+ DataMiner.log_debug "Skipping data_miner block in #{self.name} because called as x_data_miner"
107
+ end
108
+
109
+ def self.data_miner(&block)
110
+ DataMiner.start_logging
111
+
112
+ DataMiner.log_debug "Database table `#{table_name}` doesn't exist. It might be created in the data_miner block, but if it's not, DataMiner probably won't work properly until you run a migration or otherwise fix the schema." unless table_exists?
113
+
114
+ DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
115
+
116
+ # this is class_eval'ed here so that each ActiveRecord descendant has its own copy, or none at all
117
+ class_eval do
118
+ cattr_accessor :data_miner_base
119
+ def self.data_miner_runs
120
+ DataMiner::Run.scoped :conditions => { :resource_name => name }
121
+ end
122
+ def self.run_data_miner!(options = {})
123
+ data_miner_base.run options
124
+ end
125
+ def self.execute_schema
126
+ schema = data_miner_base.steps.find { |s| s.instance_of?(DataMiner::Schema) }
127
+ schema.run(nil) if schema
128
+ end
129
+ end
130
+ self.data_miner_base = DataMiner::Base.new self
131
+
132
+ Blockenspiel.invoke block, data_miner_base
133
+
134
+ data_miner_base.after_invoke
135
+ end
136
+ end
@@ -0,0 +1,233 @@
1
+ module DataMiner
2
+ class Attribute
3
+ attr_accessor :step
4
+ attr_accessor :name
5
+ attr_accessor :options
6
+
7
+ delegate :resource, :to => :step
8
+
9
+ VALID_OPTIONS = [
10
+ :from_units,
11
+ :to_units,
12
+ :static,
13
+ :dictionary,
14
+ :matcher,
15
+ :field_name,
16
+ :delimiter,
17
+ :split,
18
+ :units,
19
+ :sprintf,
20
+ :nullify,
21
+ :overwrite,
22
+ :upcase,
23
+ :units_field_name,
24
+ :units_field_number,
25
+ :field_number,
26
+ :chars,
27
+ :synthesize
28
+ ]
29
+
30
+ def initialize(step, name, options = {})
31
+ options.symbolize_keys!
32
+
33
+ @step = step
34
+ @name = name
35
+
36
+ invalid_option_keys = options.keys.select { |k| not VALID_OPTIONS.include? k }
37
+ DataMiner.log_or_raise "Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence} (#{inspect})" if invalid_option_keys.any?
38
+ @options = options
39
+ end
40
+
41
+ def inspect
42
+ "Attribute(#{resource}##{name})"
43
+ end
44
+
45
+ def value_in_dictionary(str)
46
+ dictionary.lookup str
47
+ end
48
+
49
+ def value_in_source(row)
50
+ if wants_static?
51
+ value = static
52
+ elsif field_number
53
+ if field_number.is_a?(Range)
54
+ value = field_number.map { |n| row[n] }.join(delimiter)
55
+ else
56
+ value = row[field_number]
57
+ end
58
+ else
59
+ value = row[field_name]
60
+ end
61
+ return nil if value.nil?
62
+ return value if value.is_a?(ActiveRecord::Base) # escape valve for parsers that look up associations directly
63
+ value = value.to_s
64
+ value = value[chars] if wants_chars?
65
+ value = do_split(value) if wants_split?
66
+ # taken from old errata... maybe we want to do this here
67
+ value.gsub! /[ ]+/, ' '
68
+ # text.gsub!('- ', '-')
69
+ value.gsub! /([^\\])~/, '\1 '
70
+ value.strip!
71
+ value.upcase! if wants_upcase?
72
+ value = do_convert row, value if wants_conversion?
73
+ value = do_sprintf value if wants_sprintf?
74
+ value
75
+ end
76
+
77
+ def match_row(row)
78
+ matcher.match row
79
+ end
80
+
81
+ def value_from_row(row)
82
+ return match_row row if wants_matcher?
83
+ value = value_in_source row
84
+ return value if value.is_a? ActiveRecord::Base # carry through trapdoor
85
+ value = value_in_dictionary value if wants_dictionary?
86
+ value = synthesize.call(row) if wants_synthesize?
87
+ value
88
+ end
89
+
90
+ # this will overwrite nils, even if wants_overwriting? is false
91
+ # returns true if an attr was changed, otherwise false
92
+ def set_record_from_row(record, row)
93
+ return false if !wants_overwriting? and !record.send(name).nil?
94
+ what_it_was = record.send name
95
+ what_it_should_be = value_from_row row
96
+
97
+ record.send "#{name}=", what_it_should_be
98
+ record.send "#{name}_units=", (to_units || unit_from_source(row)).to_s if wants_units?
99
+
100
+ what_it_is = record.send name
101
+ if what_it_is.nil? and !what_it_should_be.nil?
102
+ DataMiner.log_debug "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
103
+ nil
104
+ elsif what_it_is == what_it_was
105
+ false
106
+ else
107
+ true
108
+ end
109
+ end
110
+
111
+ def unit_from_source(row)
112
+ row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
113
+ end
114
+
115
+ def do_convert(row, value)
116
+ DataMiner.log_or_raise "If you use :from_units, you need to set :to_units (#{inspect})" unless wants_units?
117
+ value.to_f.convert((from_units || unit_from_source(row)), (to_units || unit_from_source(row)))
118
+ end
119
+
120
+ def do_sprintf(value)
121
+ if /\%[0-9\.]*f/.match sprintf
122
+ value = value.to_f
123
+ elsif /\%[0-9\.]*d/.match sprintf
124
+ value = value.to_i
125
+ end
126
+ sprintf % value
127
+ end
128
+
129
+ def do_split(value)
130
+ pattern = split_options[:pattern] || /\s+/ # default is split on whitespace
131
+ keep = split_options[:keep] || 0 # default is keep first element
132
+ value.to_s.split(pattern)[keep].to_s
133
+ end
134
+
135
+ def column_type
136
+ resource.columns_hash[name.to_s].type
137
+ end
138
+
139
+ # Our wants and needs :)
140
+ def wants_split?
141
+ split_options.present?
142
+ end
143
+ def wants_sprintf?
144
+ sprintf.present?
145
+ end
146
+ def wants_upcase?
147
+ upcase.present?
148
+ end
149
+ def wants_static?
150
+ options.has_key? :static
151
+ end
152
+ def wants_nullification?
153
+ nullify != false
154
+ end
155
+ def wants_chars?
156
+ chars.present?
157
+ end
158
+ def wants_synthesize?
159
+ synthesize.is_a?(Proc)
160
+ end
161
+ def wants_overwriting?
162
+ overwrite != false
163
+ end
164
+ def wants_conversion?
165
+ from_units.present? or units_field_name.present? or units_field_number.present?
166
+ end
167
+ def wants_units?
168
+ to_units.present? or units_field_name.present? or units_field_number.present?
169
+ end
170
+ def wants_dictionary?
171
+ options[:dictionary].present?
172
+ end
173
+ def wants_matcher?
174
+ options[:matcher].present?
175
+ end
176
+
177
+ # Options that always have values
178
+ def field_name
179
+ (options[:field_name] || name).to_s
180
+ end
181
+ def delimiter
182
+ (options[:delimiter] || ', ')
183
+ end
184
+
185
+ # Options that can't be referred to by their names
186
+ def split_options
187
+ options[:split]
188
+ end
189
+
190
+ def from_units
191
+ options[:from_units]
192
+ end
193
+ def to_units
194
+ options[:to_units] || options[:units]
195
+ end
196
+ def sprintf
197
+ options[:sprintf]
198
+ end
199
+ def nullify
200
+ options[:nullify]
201
+ end
202
+ def overwrite
203
+ options[:overwrite]
204
+ end
205
+ def upcase
206
+ options[:upcase]
207
+ end
208
+ def units_field_name
209
+ options[:units_field_name]
210
+ end
211
+ def units_field_number
212
+ options[:units_field_number]
213
+ end
214
+ def field_number
215
+ options[:field_number]
216
+ end
217
+ def chars
218
+ options[:chars]
219
+ end
220
+ def synthesize
221
+ options[:synthesize]
222
+ end
223
+ def static
224
+ options[:static]
225
+ end
226
+ def dictionary
227
+ @_dictionary ||= (options[:dictionary].is_a?(Dictionary) ? options[:dictionary] : Dictionary.new(options[:dictionary]))
228
+ end
229
+ def matcher
230
+ @_matcher ||= (options[:matcher].is_a?(String) ? options[:matcher].constantize.new : options[:matcher])
231
+ end
232
+ end
233
+ end