importu 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ module Importu
2
+ class ImportuException < StandardError
3
+ def name
4
+ self.class.name[/[^:]+$/]
5
+ end
6
+ end
7
+
8
+ class InvalidInput < ImportuException; end
9
+
10
+ class InvalidRecord < ImportuException
11
+ attr_reader :validation_errors
12
+
13
+ def initialize(message = nil, validation_errors = nil)
14
+ @validation_errors = validation_errors
15
+ super(message)
16
+ end
17
+ end
18
+
19
+ class FieldParseError < InvalidRecord; end
20
+ class DuplicateRecord < InvalidRecord; end
21
+
22
+ class MissingField < InvalidRecord
23
+ attr_reader :definition
24
+
25
+ def initialize(definition)
26
+ @definition = definition
27
+ end
28
+
29
+ def message
30
+ field = definition[:label] || definition[:name]
31
+ "missing field \"#{field}\" from source data"
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,119 @@
1
+ require 'active_record/errors'
2
+
3
+ class Importu::Importer
4
+ attr_reader :options, :infile, :outfile, :validation_errors
5
+ attr_reader :total, :invalid, :created, :updated, :unchanged
6
+
7
+ include Importu::Dsl
8
+ include Importu::Converters
9
+
10
+ def initialize(infile, options = {})
11
+ @options = options
12
+ @total = @invalid = @created = @updated = @unchanged = 0
13
+ @validation_errors = Hash.new(0) # counter for each validation error
14
+
15
+ @infile = infile.respond_to?(:readline) ? infile : File.open(infile, 'rb')
16
+ end
17
+
18
+ def records
19
+ [].to_enum # implement in a subclass
20
+ end
21
+
22
+ def outfile
23
+ @outfile ||= Tempfile.new('import', Rails.root.join('tmp'), 'wb+')
24
+ end
25
+
26
+ def import!(finder_scope = nil, &block)
27
+ # if a scope is passed in, that scope becomes the starting scope used by
28
+ # the finder, otherwise the model's default scope is used).
29
+
30
+ finder_scope ||= model_class.scoped
31
+ records.each {|r| import_record(r, finder_scope, &block) }
32
+ end
33
+
34
+ def result_msg
35
+ msg = <<-END.strip_heredoc
36
+ Total: #{@total}
37
+ Created: #{@created}
38
+ Updated: #{@updated}
39
+ Invalid: #{@invalid}
40
+ Unchanged: #{@unchanged}
41
+ END
42
+
43
+ if @validation_errors.any?
44
+ msg << "\nValidation Errors:\n"
45
+ msg << @validation_errors.map {|e,c| " - #{e}: #{c}" }.join("\n")
46
+ end
47
+
48
+ msg
49
+ end
50
+
51
+
52
+ protected
53
+
54
+ def model_class
55
+ @model_class ||= model.constantize
56
+ end
57
+
58
+ def import_record(record, finder_scope, &block)
59
+ begin
60
+ object = find(finder_scope, record) || model_class.new
61
+ action = object.new_record? ? :create : :update
62
+ check_duplicate(object) if action == :update
63
+
64
+ case ([action] - allowed_actions).first
65
+ when :create then raise Importu::InvalidRecord, "#{model} not found"
66
+ when :update then raise Importu::InvalidRecord, "existing #{model} found"
67
+ end
68
+
69
+ record.assign_to(object, action, &block)
70
+
71
+ case record.save!
72
+ when :created then @created += 1
73
+ when :updated then @updated += 1
74
+ when :unchanged then @unchanged += 1
75
+ end
76
+
77
+ rescue Importu::InvalidRecord => e
78
+ if errors = e.validation_errors
79
+ # convention: assume data-specific error messages put data inside parens, e.g. 'Dupe record found (sysnum 5489x)'
80
+ errors.each {|error| @validation_errors[error.gsub(/ *\([^)]+\)/,'')] += 1 }
81
+ else
82
+ @validation_errors["#{e.name}: #{e.message}"] += 1
83
+ end
84
+
85
+ @invalid += 1
86
+ raise
87
+
88
+ ensure
89
+ @total += 1
90
+ end
91
+ end
92
+
93
+ def find(scope, record)
94
+ # FIXME: find does not report if it finds more than one record matching
95
+ # the :find_by conditions passed in. it just uses the first match for
96
+ # now. what should be the correct behaviour?
97
+
98
+ field_groups = self.class.finder_fields or return
99
+ field_groups.each do |field_group|
100
+ if field_group.respond_to?(:call) # proc
101
+ object = scope.instance_exec(record, &field_group).first
102
+ else
103
+ conditions = Hash[field_group.map {|f| [f, record[f]]}]
104
+ object = scope.where(conditions).first
105
+ end
106
+
107
+ return object if object
108
+ end
109
+ nil
110
+ end
111
+
112
+ def check_duplicate(record)
113
+ return unless id = record.respond_to?(:id) && record.id
114
+ if ((@encountered||=Hash.new(0))[id] += 1) > 1
115
+ raise Importu::DuplicateRecord, 'matches a previously imported record'
116
+ end
117
+ end
118
+
119
+ end
@@ -0,0 +1,52 @@
1
+ require 'csv'
2
+
3
+ class Importu::Importer::Csv < Importu::Importer
4
+ def initialize(infile, options = {})
5
+ super
6
+
7
+ @csv_options = {
8
+ :headers => true,
9
+ :return_headers => true,
10
+ :write_headers => true,
11
+ :skip_blanks => true,
12
+ }.merge(options[:csv_options]||{})
13
+
14
+ @reader = ::CSV.new(@infile, @csv_options)
15
+ @header = @reader.readline
16
+ @data_pos = @infile.pos
17
+ end
18
+
19
+ def records
20
+ @infile.pos = @data_pos
21
+ Enumerator.new do |yielder|
22
+ @reader.each do |row|
23
+ yielder.yield record_class.new(self, row.to_hash, row)
24
+ end
25
+ end
26
+ end
27
+
28
+ def import_record(record, finder_scope, &block)
29
+ begin
30
+ super
31
+ rescue Importu::MissingField => e
32
+ # if one record missing field, all are, major error
33
+ raise Importu::InvalidInput, "missing required field: #{e.message}"
34
+ rescue Importu::InvalidRecord => e
35
+ write_error(record.raw_data, e.message)
36
+ end
37
+ end
38
+
39
+
40
+ private
41
+
42
+ def write_error(data, msg)
43
+ unless @writer
44
+ @writer = ::CSV.new(outfile, @csv_options)
45
+ @header['_errors'] = '_errors'
46
+ @writer << @header
47
+ end
48
+
49
+ data['_errors'] = msg
50
+ @writer << data
51
+ end
52
+ end
@@ -0,0 +1,45 @@
1
+ require 'multi_json'
2
+
3
+ class Importu::Importer::Json < Importu::Importer
4
+ def initialize(infile, options = {})
5
+ super
6
+
7
+ begin
8
+ infile.rewind
9
+ @reader = MultiJson.load(infile.read)
10
+ rescue MultiJson::DecodeError => e
11
+ raise Importu::InvalidInput, e.message
12
+ end
13
+ end
14
+
15
+ def import!(finder_scope = nil, &block)
16
+ result = super
17
+ outfile.write(JSON.pretty_generate(@error_records)) if @invalid > 0
18
+ result
19
+ end
20
+
21
+ def records(&block)
22
+ enum = Enumerator.new do |yielder|
23
+ @reader.each_with_index do |data,idx|
24
+ yielder.yield record_class.new(self, data, data)
25
+ end
26
+ end
27
+ end
28
+
29
+ def import_record(record, finder_scope, &block)
30
+ begin
31
+ super
32
+ rescue Importu::InvalidRecord => e
33
+ write_error(record.raw_data, e.message)
34
+ end
35
+ end
36
+
37
+
38
+ private
39
+
40
+ def write_error(data, msg)
41
+ @error_records ||= []
42
+ @error_records << data.merge('_errors' => msg)
43
+ end
44
+
45
+ end
@@ -0,0 +1,55 @@
1
+ require 'nokogiri'
2
+
3
+ class Importu::Importer::Xml < Importu::Importer
4
+ config_dsl :records_xpath
5
+
6
+ def initialize(infile, options = {})
7
+ super
8
+
9
+ xml_options = {}.merge(options[:xml_options]||{})
10
+ if reader.errors.any?
11
+ raise Importu::InvalidInput, reader.errors.join("\n")
12
+ end
13
+ end
14
+
15
+ def reader
16
+ @reader ||= Nokogiri::XML(infile)
17
+ end
18
+
19
+ def import!(finder_scope = nil, &block)
20
+ reader.xpath('//_errors').remove
21
+ result = super
22
+ outfile.write(reader) if @invalid > 0
23
+ result
24
+ end
25
+
26
+ def records
27
+ Enumerator.new do |yielder|
28
+ reader.xpath(records_xpath).each do |xml|
29
+ data = Hash[xml.elements.map {|e| [e.name, e.content]}]
30
+ yielder.yield record_class.new(self, data, xml)
31
+ end
32
+ end
33
+ end
34
+
35
+ def import_record(record, finder_scope, &block)
36
+ begin
37
+ super
38
+ record.raw_data.remove
39
+ rescue Importu::InvalidRecord => e
40
+ add_xml_record_error(record.raw_data, e.message)
41
+ end
42
+ end
43
+
44
+
45
+ private
46
+
47
+ def add_xml_record_error(xml, text)
48
+ unless node = xml.xpath('./_errors').first
49
+ node = Nokogiri::XML::Node.new '_errors', reader
50
+ xml.add_child(node)
51
+ end
52
+ node.content = text + ','
53
+ end
54
+
55
+ end
@@ -0,0 +1,124 @@
1
+ require 'active_support/core_ext/module/delegation'
2
+
3
+ class Importu::Record
4
+ attr_reader :importer, :data, :raw_data
5
+
6
+ include Enumerable
7
+
8
+ delegate :keys, :values, :each, :[], :key?, :to => :record_hash
9
+ delegate :preprocessor, :postprocessor, :to => :importer
10
+ delegate :definitions, :converters, :to => :importer
11
+
12
+ def initialize(importer, data, raw_data)
13
+ @importer, @data, @raw_data = importer, data, raw_data
14
+ end
15
+
16
+ def record_hash
17
+ @record_hash ||= generate_record_hash
18
+ end
19
+
20
+ def to_hash
21
+ record_hash
22
+ end
23
+
24
+ def convert(name, type, options = {})
25
+ type, options = type[:to], type if type.kind_of?(Hash)
26
+ converter = type ? converters[type] : options[:converter] \
27
+ or raise "converter not found: #{type}"
28
+
29
+ # TODO: defining options in field definition is deprecated
30
+ definition = definitions[name] || {}
31
+ options = definition.merge(options)
32
+
33
+ begin
34
+ value = instance_exec(name, options, &converter)
35
+ value.nil? ? options[:default] : value
36
+
37
+ rescue Importu::MissingField => e
38
+ raise e if options[:required]
39
+ options[:default]
40
+
41
+ rescue ArgumentError => e
42
+ # conversion of field value most likely failed
43
+ raise Importu::FieldParseError, "#{name}: #{e.message}"
44
+ end
45
+ end
46
+
47
+ def field_value(name, options = {})
48
+ definition = definitions[name] \
49
+ or raise "importer field not defined: #{name}"
50
+
51
+ convert(name, nil, definition.merge(options))
52
+ end
53
+
54
+ def assign_to(object, action, &block)
55
+ @object, @action = object, action
56
+
57
+ instance_eval(&preprocessor) if preprocessor
58
+ instance_exec(object, record_hash, &block) if block
59
+
60
+ # filter out any fields we're not allowed to copy for this action
61
+ allowed_fields = definitions.select {|n,d| d[action] }.keys
62
+ concrete_fields = definitions.reject {|n,d| d[:abstract] }.keys
63
+ field_names = record_hash.keys & allowed_fields & concrete_fields
64
+
65
+ unsupported = field_names.reject {|n| object.respond_to?("#{n}=") }
66
+ if unsupported.any?
67
+ raise "model does not support assigning fields: #{unsupported.to_sentence}"
68
+ end
69
+
70
+ (record_hash.keys & allowed_fields & concrete_fields).each do |name|
71
+ if object.respond_to?("#{name}=")
72
+ object.send("#{name}=", record_hash[name])
73
+ else
74
+ end
75
+ end
76
+
77
+ instance_eval(&postprocessor) if postprocessor
78
+
79
+ object
80
+ end
81
+
82
+ def save!
83
+ return :unchanged unless @object.changed?
84
+
85
+ begin
86
+ @object.save!
87
+ case @action
88
+ when :create then :created
89
+ when :update then :updated
90
+ end
91
+
92
+ rescue ActiveRecord::RecordInvalid => e
93
+ error_msgs = @object.errors.map do |name,message|
94
+ name = definitions[name][:label] if definitions[name]
95
+ name == 'base' ? message : "#{name} #{message}"
96
+ end.join(', ')
97
+
98
+ raise Importu::InvalidRecord, error_msgs, @object.errors.full_messages
99
+ end
100
+ end
101
+
102
+
103
+ private
104
+
105
+ attr_reader :object, :action # needed for exposing to instance_eval'd blocks
106
+
107
+ alias_method :record, :record_hash
108
+
109
+ def generate_record_hash
110
+ definitions.inject({}) do |hash,(name,definition)|
111
+ hash[name.to_sym] = field_value(name)
112
+ hash
113
+ end
114
+ end
115
+
116
+ def method_missing(meth, *args, &block)
117
+ if converters[meth]
118
+ convert(args[0], meth, args[1]||{}) # convert(name, type, options)
119
+ else
120
+ super
121
+ end
122
+ end
123
+
124
+ end
@@ -0,0 +1,3 @@
1
+ module Importu
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,12 @@
1
+ FactoryGirl.define do
2
+ factory :importer, :class => Importu::Importer do
3
+ initialize_with do
4
+ Importu::Importer.new(infile, options)
5
+ end
6
+
7
+ ignore do
8
+ infile { StringIO.new }
9
+ options { Hash.new }
10
+ end
11
+ end
12
+ end