importu 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/README.md +163 -0
- data/importu.gemspec +29 -0
- data/lib/importu.rb +12 -0
- data/lib/importu/converters.rb +82 -0
- data/lib/importu/core_ext.rb +3 -0
- data/lib/importu/core_ext/array/deep_freeze.rb +7 -0
- data/lib/importu/core_ext/deep_freeze.rb +3 -0
- data/lib/importu/core_ext/hash/deep_freeze.rb +7 -0
- data/lib/importu/core_ext/object/deep_freeze.rb +6 -0
- data/lib/importu/dsl.rb +127 -0
- data/lib/importu/exceptions.rb +34 -0
- data/lib/importu/importer.rb +119 -0
- data/lib/importu/importer/csv.rb +52 -0
- data/lib/importu/importer/json.rb +45 -0
- data/lib/importu/importer/xml.rb +55 -0
- data/lib/importu/record.rb +124 -0
- data/lib/importu/version.rb +3 -0
- data/spec/factories/importer.rb +12 -0
- data/spec/factories/importer_record.rb +13 -0
- data/spec/factories/json_importer.rb +14 -0
- data/spec/factories/xml_importer.rb +12 -0
- data/spec/lib/importu/converters_spec.rb +276 -0
- data/spec/lib/importu/dsl_spec.rb +26 -0
- data/spec/lib/importu/exceptions_spec.rb +96 -0
- data/spec/lib/importu/importer/json_spec.rb +37 -0
- data/spec/lib/importu/importer/xml_spec.rb +14 -0
- data/spec/lib/importu/record_spec.rb +123 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/support/matchers/delegate_matcher.rb +42 -0
- metadata +218 -0
@@ -0,0 +1,34 @@
|
|
1
|
+
module Importu
|
2
|
+
class ImportuException < StandardError
|
3
|
+
def name
|
4
|
+
self.class.name[/[^:]+$/]
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class InvalidInput < ImportuException; end
|
9
|
+
|
10
|
+
class InvalidRecord < ImportuException
|
11
|
+
attr_reader :validation_errors
|
12
|
+
|
13
|
+
def initialize(message = nil, validation_errors = nil)
|
14
|
+
@validation_errors = validation_errors
|
15
|
+
super(message)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class FieldParseError < InvalidRecord; end
|
20
|
+
class DuplicateRecord < InvalidRecord; end
|
21
|
+
|
22
|
+
class MissingField < InvalidRecord
|
23
|
+
attr_reader :definition
|
24
|
+
|
25
|
+
def initialize(definition)
|
26
|
+
@definition = definition
|
27
|
+
end
|
28
|
+
|
29
|
+
def message
|
30
|
+
field = definition[:label] || definition[:name]
|
31
|
+
"missing field \"#{field}\" from source data"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'active_record/errors'
|
2
|
+
|
3
|
+
class Importu::Importer
|
4
|
+
attr_reader :options, :infile, :outfile, :validation_errors
|
5
|
+
attr_reader :total, :invalid, :created, :updated, :unchanged
|
6
|
+
|
7
|
+
include Importu::Dsl
|
8
|
+
include Importu::Converters
|
9
|
+
|
10
|
+
def initialize(infile, options = {})
|
11
|
+
@options = options
|
12
|
+
@total = @invalid = @created = @updated = @unchanged = 0
|
13
|
+
@validation_errors = Hash.new(0) # counter for each validation error
|
14
|
+
|
15
|
+
@infile = infile.respond_to?(:readline) ? infile : File.open(infile, 'rb')
|
16
|
+
end
|
17
|
+
|
18
|
+
def records
|
19
|
+
[].to_enum # implement in a subclass
|
20
|
+
end
|
21
|
+
|
22
|
+
def outfile
|
23
|
+
@outfile ||= Tempfile.new('import', Rails.root.join('tmp'), 'wb+')
|
24
|
+
end
|
25
|
+
|
26
|
+
def import!(finder_scope = nil, &block)
|
27
|
+
# if a scope is passed in, that scope becomes the starting scope used by
|
28
|
+
# the finder, otherwise the model's default scope is used).
|
29
|
+
|
30
|
+
finder_scope ||= model_class.scoped
|
31
|
+
records.each {|r| import_record(r, finder_scope, &block) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def result_msg
|
35
|
+
msg = <<-END.strip_heredoc
|
36
|
+
Total: #{@total}
|
37
|
+
Created: #{@created}
|
38
|
+
Updated: #{@updated}
|
39
|
+
Invalid: #{@invalid}
|
40
|
+
Unchanged: #{@unchanged}
|
41
|
+
END
|
42
|
+
|
43
|
+
if @validation_errors.any?
|
44
|
+
msg << "\nValidation Errors:\n"
|
45
|
+
msg << @validation_errors.map {|e,c| " - #{e}: #{c}" }.join("\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
msg
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
protected
|
53
|
+
|
54
|
+
def model_class
|
55
|
+
@model_class ||= model.constantize
|
56
|
+
end
|
57
|
+
|
58
|
+
def import_record(record, finder_scope, &block)
|
59
|
+
begin
|
60
|
+
object = find(finder_scope, record) || model_class.new
|
61
|
+
action = object.new_record? ? :create : :update
|
62
|
+
check_duplicate(object) if action == :update
|
63
|
+
|
64
|
+
case ([action] - allowed_actions).first
|
65
|
+
when :create then raise Importu::InvalidRecord, "#{model} not found"
|
66
|
+
when :update then raise Importu::InvalidRecord, "existing #{model} found"
|
67
|
+
end
|
68
|
+
|
69
|
+
record.assign_to(object, action, &block)
|
70
|
+
|
71
|
+
case record.save!
|
72
|
+
when :created then @created += 1
|
73
|
+
when :updated then @updated += 1
|
74
|
+
when :unchanged then @unchanged += 1
|
75
|
+
end
|
76
|
+
|
77
|
+
rescue Importu::InvalidRecord => e
|
78
|
+
if errors = e.validation_errors
|
79
|
+
# convention: assume data-specific error messages put data inside parens, e.g. 'Dupe record found (sysnum 5489x)'
|
80
|
+
errors.each {|error| @validation_errors[error.gsub(/ *\([^)]+\)/,'')] += 1 }
|
81
|
+
else
|
82
|
+
@validation_errors["#{e.name}: #{e.message}"] += 1
|
83
|
+
end
|
84
|
+
|
85
|
+
@invalid += 1
|
86
|
+
raise
|
87
|
+
|
88
|
+
ensure
|
89
|
+
@total += 1
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def find(scope, record)
|
94
|
+
# FIXME: find does not report if it finds more than one record matching
|
95
|
+
# the :find_by conditions passed in. it just uses the first match for
|
96
|
+
# now. what should be the correct behaviour?
|
97
|
+
|
98
|
+
field_groups = self.class.finder_fields or return
|
99
|
+
field_groups.each do |field_group|
|
100
|
+
if field_group.respond_to?(:call) # proc
|
101
|
+
object = scope.instance_exec(record, &field_group).first
|
102
|
+
else
|
103
|
+
conditions = Hash[field_group.map {|f| [f, record[f]]}]
|
104
|
+
object = scope.where(conditions).first
|
105
|
+
end
|
106
|
+
|
107
|
+
return object if object
|
108
|
+
end
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
|
112
|
+
def check_duplicate(record)
|
113
|
+
return unless id = record.respond_to?(:id) && record.id
|
114
|
+
if ((@encountered||=Hash.new(0))[id] += 1) > 1
|
115
|
+
raise Importu::DuplicateRecord, 'matches a previously imported record'
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
class Importu::Importer::Csv < Importu::Importer
|
4
|
+
def initialize(infile, options = {})
|
5
|
+
super
|
6
|
+
|
7
|
+
@csv_options = {
|
8
|
+
:headers => true,
|
9
|
+
:return_headers => true,
|
10
|
+
:write_headers => true,
|
11
|
+
:skip_blanks => true,
|
12
|
+
}.merge(options[:csv_options]||{})
|
13
|
+
|
14
|
+
@reader = ::CSV.new(@infile, @csv_options)
|
15
|
+
@header = @reader.readline
|
16
|
+
@data_pos = @infile.pos
|
17
|
+
end
|
18
|
+
|
19
|
+
def records
|
20
|
+
@infile.pos = @data_pos
|
21
|
+
Enumerator.new do |yielder|
|
22
|
+
@reader.each do |row|
|
23
|
+
yielder.yield record_class.new(self, row.to_hash, row)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def import_record(record, finder_scope, &block)
|
29
|
+
begin
|
30
|
+
super
|
31
|
+
rescue Importu::MissingField => e
|
32
|
+
# if one record missing field, all are, major error
|
33
|
+
raise Importu::InvalidInput, "missing required field: #{e.message}"
|
34
|
+
rescue Importu::InvalidRecord => e
|
35
|
+
write_error(record.raw_data, e.message)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def write_error(data, msg)
|
43
|
+
unless @writer
|
44
|
+
@writer = ::CSV.new(outfile, @csv_options)
|
45
|
+
@header['_errors'] = '_errors'
|
46
|
+
@writer << @header
|
47
|
+
end
|
48
|
+
|
49
|
+
data['_errors'] = msg
|
50
|
+
@writer << data
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'multi_json'
|
2
|
+
|
3
|
+
class Importu::Importer::Json < Importu::Importer
|
4
|
+
def initialize(infile, options = {})
|
5
|
+
super
|
6
|
+
|
7
|
+
begin
|
8
|
+
infile.rewind
|
9
|
+
@reader = MultiJson.load(infile.read)
|
10
|
+
rescue MultiJson::DecodeError => e
|
11
|
+
raise Importu::InvalidInput, e.message
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def import!(finder_scope = nil, &block)
|
16
|
+
result = super
|
17
|
+
outfile.write(JSON.pretty_generate(@error_records)) if @invalid > 0
|
18
|
+
result
|
19
|
+
end
|
20
|
+
|
21
|
+
def records(&block)
|
22
|
+
enum = Enumerator.new do |yielder|
|
23
|
+
@reader.each_with_index do |data,idx|
|
24
|
+
yielder.yield record_class.new(self, data, data)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def import_record(record, finder_scope, &block)
|
30
|
+
begin
|
31
|
+
super
|
32
|
+
rescue Importu::InvalidRecord => e
|
33
|
+
write_error(record.raw_data, e.message)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def write_error(data, msg)
|
41
|
+
@error_records ||= []
|
42
|
+
@error_records << data.merge('_errors' => msg)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
class Importu::Importer::Xml < Importu::Importer
|
4
|
+
config_dsl :records_xpath
|
5
|
+
|
6
|
+
def initialize(infile, options = {})
|
7
|
+
super
|
8
|
+
|
9
|
+
xml_options = {}.merge(options[:xml_options]||{})
|
10
|
+
if reader.errors.any?
|
11
|
+
raise Importu::InvalidInput, reader.errors.join("\n")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def reader
|
16
|
+
@reader ||= Nokogiri::XML(infile)
|
17
|
+
end
|
18
|
+
|
19
|
+
def import!(finder_scope = nil, &block)
|
20
|
+
reader.xpath('//_errors').remove
|
21
|
+
result = super
|
22
|
+
outfile.write(reader) if @invalid > 0
|
23
|
+
result
|
24
|
+
end
|
25
|
+
|
26
|
+
def records
|
27
|
+
Enumerator.new do |yielder|
|
28
|
+
reader.xpath(records_xpath).each do |xml|
|
29
|
+
data = Hash[xml.elements.map {|e| [e.name, e.content]}]
|
30
|
+
yielder.yield record_class.new(self, data, xml)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def import_record(record, finder_scope, &block)
|
36
|
+
begin
|
37
|
+
super
|
38
|
+
record.raw_data.remove
|
39
|
+
rescue Importu::InvalidRecord => e
|
40
|
+
add_xml_record_error(record.raw_data, e.message)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def add_xml_record_error(xml, text)
|
48
|
+
unless node = xml.xpath('./_errors').first
|
49
|
+
node = Nokogiri::XML::Node.new '_errors', reader
|
50
|
+
xml.add_child(node)
|
51
|
+
end
|
52
|
+
node.content = text + ','
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'active_support/core_ext/module/delegation'
|
2
|
+
|
3
|
+
class Importu::Record
|
4
|
+
attr_reader :importer, :data, :raw_data
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
delegate :keys, :values, :each, :[], :key?, :to => :record_hash
|
9
|
+
delegate :preprocessor, :postprocessor, :to => :importer
|
10
|
+
delegate :definitions, :converters, :to => :importer
|
11
|
+
|
12
|
+
def initialize(importer, data, raw_data)
|
13
|
+
@importer, @data, @raw_data = importer, data, raw_data
|
14
|
+
end
|
15
|
+
|
16
|
+
def record_hash
|
17
|
+
@record_hash ||= generate_record_hash
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_hash
|
21
|
+
record_hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def convert(name, type, options = {})
|
25
|
+
type, options = type[:to], type if type.kind_of?(Hash)
|
26
|
+
converter = type ? converters[type] : options[:converter] \
|
27
|
+
or raise "converter not found: #{type}"
|
28
|
+
|
29
|
+
# TODO: defining options in field definition is deprecated
|
30
|
+
definition = definitions[name] || {}
|
31
|
+
options = definition.merge(options)
|
32
|
+
|
33
|
+
begin
|
34
|
+
value = instance_exec(name, options, &converter)
|
35
|
+
value.nil? ? options[:default] : value
|
36
|
+
|
37
|
+
rescue Importu::MissingField => e
|
38
|
+
raise e if options[:required]
|
39
|
+
options[:default]
|
40
|
+
|
41
|
+
rescue ArgumentError => e
|
42
|
+
# conversion of field value most likely failed
|
43
|
+
raise Importu::FieldParseError, "#{name}: #{e.message}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def field_value(name, options = {})
|
48
|
+
definition = definitions[name] \
|
49
|
+
or raise "importer field not defined: #{name}"
|
50
|
+
|
51
|
+
convert(name, nil, definition.merge(options))
|
52
|
+
end
|
53
|
+
|
54
|
+
def assign_to(object, action, &block)
|
55
|
+
@object, @action = object, action
|
56
|
+
|
57
|
+
instance_eval(&preprocessor) if preprocessor
|
58
|
+
instance_exec(object, record_hash, &block) if block
|
59
|
+
|
60
|
+
# filter out any fields we're not allowed to copy for this action
|
61
|
+
allowed_fields = definitions.select {|n,d| d[action] }.keys
|
62
|
+
concrete_fields = definitions.reject {|n,d| d[:abstract] }.keys
|
63
|
+
field_names = record_hash.keys & allowed_fields & concrete_fields
|
64
|
+
|
65
|
+
unsupported = field_names.reject {|n| object.respond_to?("#{n}=") }
|
66
|
+
if unsupported.any?
|
67
|
+
raise "model does not support assigning fields: #{unsupported.to_sentence}"
|
68
|
+
end
|
69
|
+
|
70
|
+
(record_hash.keys & allowed_fields & concrete_fields).each do |name|
|
71
|
+
if object.respond_to?("#{name}=")
|
72
|
+
object.send("#{name}=", record_hash[name])
|
73
|
+
else
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
instance_eval(&postprocessor) if postprocessor
|
78
|
+
|
79
|
+
object
|
80
|
+
end
|
81
|
+
|
82
|
+
def save!
|
83
|
+
return :unchanged unless @object.changed?
|
84
|
+
|
85
|
+
begin
|
86
|
+
@object.save!
|
87
|
+
case @action
|
88
|
+
when :create then :created
|
89
|
+
when :update then :updated
|
90
|
+
end
|
91
|
+
|
92
|
+
rescue ActiveRecord::RecordInvalid => e
|
93
|
+
error_msgs = @object.errors.map do |name,message|
|
94
|
+
name = definitions[name][:label] if definitions[name]
|
95
|
+
name == 'base' ? message : "#{name} #{message}"
|
96
|
+
end.join(', ')
|
97
|
+
|
98
|
+
raise Importu::InvalidRecord, error_msgs, @object.errors.full_messages
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
attr_reader :object, :action # needed for exposing to instance_eval'd blocks
|
106
|
+
|
107
|
+
alias_method :record, :record_hash
|
108
|
+
|
109
|
+
def generate_record_hash
|
110
|
+
definitions.inject({}) do |hash,(name,definition)|
|
111
|
+
hash[name.to_sym] = field_value(name)
|
112
|
+
hash
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def method_missing(meth, *args, &block)
|
117
|
+
if converters[meth]
|
118
|
+
convert(args[0], meth, args[1]||{}) # convert(name, type, options)
|
119
|
+
else
|
120
|
+
super
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|