importu 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +15 -0
- data/.github/workflows/ci.yml +48 -0
- data/.gitignore +4 -0
- data/.rspec +1 -0
- data/.rubocop.yml +311 -0
- data/.simplecov +14 -0
- data/.yardstick.yml +36 -0
- data/Appraisals +22 -0
- data/CHANGELOG.md +51 -0
- data/CONTRIBUTING.md +86 -0
- data/Gemfile +5 -1
- data/LICENSE +21 -0
- data/README.md +435 -52
- data/Rakefile +71 -0
- data/UPGRADING.md +188 -0
- data/gemfiles/rails_7_2.gemfile +11 -0
- data/gemfiles/rails_7_2.gemfile.lock +268 -0
- data/gemfiles/rails_8_0.gemfile +11 -0
- data/gemfiles/rails_8_0.gemfile.lock +271 -0
- data/gemfiles/rails_8_1.gemfile +11 -0
- data/gemfiles/rails_8_1.gemfile.lock +269 -0
- data/gemfiles/standalone.gemfile +8 -0
- data/gemfiles/standalone.gemfile.lock +197 -0
- data/importu.gemspec +41 -22
- data/lib/importu/backends/active_record.rb +171 -0
- data/lib/importu/backends/middleware/duplicate_manager_proxy.rb +41 -0
- data/lib/importu/backends/middleware/enforce_allowed_actions.rb +52 -0
- data/lib/importu/backends/middleware.rb +11 -0
- data/lib/importu/backends.rb +103 -0
- data/lib/importu/config_dsl.rb +381 -0
- data/lib/importu/converter_context.rb +94 -0
- data/lib/importu/converters.rb +119 -64
- data/lib/importu/definition.rb +23 -0
- data/lib/importu/duplicate_manager.rb +88 -0
- data/lib/importu/exceptions.rb +135 -4
- data/lib/importu/importer.rb +183 -96
- data/lib/importu/record.rb +138 -102
- data/lib/importu/sources/csv.rb +122 -0
- data/lib/importu/sources/json.rb +106 -0
- data/lib/importu/sources/ruby.rb +46 -0
- data/lib/importu/sources/xml.rb +133 -0
- data/lib/importu/sources.rb +13 -0
- data/lib/importu/summary.rb +277 -0
- data/lib/importu/version.rb +3 -1
- data/lib/importu.rb +45 -9
- data/spec/fixtures/books-duplicates/README.md +7 -0
- data/spec/fixtures/books-duplicates/infile.csv +7 -0
- data/spec/fixtures/books-duplicates/model.json +23 -0
- data/spec/fixtures/books-duplicates/summary.json +10 -0
- data/spec/fixtures/books-valid/README.md +13 -0
- data/spec/fixtures/books-valid/infile.csv +4 -0
- data/spec/fixtures/books-valid/infile.json +23 -0
- data/spec/fixtures/books-valid/infile.xml +21 -0
- data/spec/fixtures/books-valid/model.json +23 -0
- data/spec/fixtures/books-valid/record.json +26 -0
- data/spec/fixtures/books-valid/summary.json +8 -0
- data/spec/fixtures/source-empty-file/infile.csv +0 -0
- data/spec/fixtures/source-empty-file/infile.json +0 -0
- data/spec/fixtures/source-empty-file/infile.xml +0 -0
- data/spec/fixtures/source-empty-records/infile.csv +3 -0
- data/spec/fixtures/source-empty-records/infile.json +1 -0
- data/spec/fixtures/source-empty-records/infile.xml +6 -0
- data/spec/fixtures/source-malformed/infile.csv +1 -0
- data/spec/fixtures/source-malformed/infile.json +1 -0
- data/spec/fixtures/source-malformed/infile.xml +3 -0
- data/spec/fixtures/source-no-records/infile.csv +1 -0
- data/spec/fixtures/source-no-records/infile.json +1 -0
- data/spec/fixtures/source-no-records/infile.xml +3 -0
- data/spec/lib/importu/backends/active_record_spec.rb +150 -0
- data/spec/lib/importu/backends/middleware/duplicate_manager_proxy_spec.rb +70 -0
- data/spec/lib/importu/backends/middleware/enforce_allowed_actions_spec.rb +70 -0
- data/spec/lib/importu/backends_spec.rb +170 -0
- data/spec/lib/importu/converters_spec.rb +184 -141
- data/spec/lib/importu/definition_spec.rb +248 -0
- data/spec/lib/importu/duplicate_manager_spec.rb +92 -0
- data/spec/lib/importu/exceptions_spec.rb +69 -16
- data/spec/lib/importu/import_context_spec.rb +199 -0
- data/spec/lib/importu/importer_spec.rb +95 -0
- data/spec/lib/importu/integration_spec.rb +221 -0
- data/spec/lib/importu/record_spec.rb +130 -80
- data/spec/lib/importu/sources/csv_spec.rb +29 -0
- data/spec/lib/importu/sources/importer_source_examples.rb +175 -0
- data/spec/lib/importu/sources/json_spec.rb +29 -0
- data/spec/lib/importu/sources/ruby_spec.rb +102 -0
- data/spec/lib/importu/sources/xml_spec.rb +70 -0
- data/spec/lib/importu/summary_spec.rb +186 -0
- data/spec/spec_helper.rb +91 -7
- data/spec/support/active_record.rb +20 -0
- data/spec/support/book_importer.rb +31 -0
- data/spec/support/dummy_backend.rb +50 -0
- data/spec/support/fixtures_helper.rb +43 -0
- data/spec/support/matchers/delegate_matcher.rb +14 -8
- metadata +173 -100
- data/lib/importu/core_ext/array/deep_freeze.rb +0 -7
- data/lib/importu/core_ext/deep_freeze.rb +0 -3
- data/lib/importu/core_ext/hash/deep_freeze.rb +0 -7
- data/lib/importu/core_ext/object/deep_freeze.rb +0 -6
- data/lib/importu/core_ext.rb +0 -3
- data/lib/importu/dsl.rb +0 -127
- data/lib/importu/importer/csv.rb +0 -52
- data/lib/importu/importer/json.rb +0 -45
- data/lib/importu/importer/xml.rb +0 -55
- data/spec/factories/importer.rb +0 -12
- data/spec/factories/importer_record.rb +0 -13
- data/spec/factories/json_importer.rb +0 -14
- data/spec/factories/xml_importer.rb +0 -12
- data/spec/lib/importu/dsl_spec.rb +0 -26
- data/spec/lib/importu/importer/json_spec.rb +0 -37
- data/spec/lib/importu/importer/xml_spec.rb +0 -14
data/lib/importu/record.rb
CHANGED
|
@@ -1,123 +1,159 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "forwardable"
|
|
4
|
+
|
|
5
|
+
require "importu/converter_context"
|
|
6
|
+
require "importu/exceptions"
|
|
7
|
+
|
|
8
|
+
# Represents a single record from the import source.
|
|
9
|
+
#
|
|
10
|
+
# Records lazily convert field values on access and behave like hashes,
|
|
11
|
+
# supporting standard hash methods like [], fetch, keys, values, and each.
|
|
12
|
+
#
|
|
13
|
+
# @example Iterating over records
|
|
14
|
+
# importer.records.each do |record|
|
|
15
|
+
# puts "#{record[:title]} by #{record[:author]}"
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# @example Accessing field values
|
|
19
|
+
# record[:title] # => "The Ruby Programming Language"
|
|
20
|
+
# record.fetch(:author) # => "David Flanagan"
|
|
21
|
+
# record.keys # => [:title, :author, :isbn]
|
|
22
|
+
#
|
|
23
|
+
# @example Converting to a plain hash
|
|
24
|
+
# record.to_hash # => { title: "...", author: "...", isbn: "..." }
|
|
25
|
+
#
|
|
26
|
+
# @example Accessing raw source data
|
|
27
|
+
# record.data # => { "Title" => "...", "Author" => "..." }
|
|
28
|
+
#
|
|
29
|
+
# @example Checking for conversion errors
|
|
30
|
+
# if record.valid?
|
|
31
|
+
# process(record.to_hash)
|
|
32
|
+
# else
|
|
33
|
+
# record.errors.each { |e| puts e.to_s }
|
|
34
|
+
# end
|
|
35
|
+
#
|
|
36
|
+
# @see Importu::Importer#records
|
|
37
|
+
# @api public
|
|
3
38
|
class Importu::Record
|
|
4
|
-
attr_reader :importer, :data, :raw_data
|
|
5
|
-
|
|
6
|
-
include Enumerable
|
|
7
|
-
|
|
8
|
-
delegate :keys, :values, :each, :[], :key?, :to => :record_hash
|
|
9
|
-
delegate :preprocessor, :postprocessor, :to => :importer
|
|
10
|
-
delegate :definitions, :converters, :to => :importer
|
|
11
|
-
|
|
12
|
-
def initialize(importer, data, raw_data)
|
|
13
|
-
@importer, @data, @raw_data = importer, data, raw_data
|
|
14
|
-
end
|
|
15
39
|
|
|
16
|
-
|
|
17
|
-
|
|
40
|
+
extend Forwardable
|
|
41
|
+
|
|
42
|
+
# The raw data from the source before conversion.
|
|
43
|
+
#
|
|
44
|
+
# @return [Hash] the raw source data
|
|
45
|
+
# @api public
|
|
46
|
+
attr_reader :data
|
|
47
|
+
|
|
48
|
+
# Creates a new record from source data.
|
|
49
|
+
#
|
|
50
|
+
# @param data [Hash] the raw source data
|
|
51
|
+
# @param context [Class] the converter context class
|
|
52
|
+
# @param fields [Hash] field definitions
|
|
53
|
+
# @api private
|
|
54
|
+
def initialize(data, context, fields:, **)
|
|
55
|
+
@data = data
|
|
56
|
+
@field_definitions = fields
|
|
57
|
+
@context = context.new(data)
|
|
58
|
+
|
|
59
|
+
@errors = []
|
|
18
60
|
end
|
|
19
61
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
definition = definitions[name] || {}
|
|
31
|
-
options = definition.merge(options)
|
|
32
|
-
|
|
33
|
-
begin
|
|
34
|
-
value = instance_exec(name, options, &converter)
|
|
35
|
-
value.nil? ? options[:default] : value
|
|
36
|
-
|
|
37
|
-
rescue Importu::MissingField => e
|
|
38
|
-
raise e if options[:required]
|
|
39
|
-
options[:default]
|
|
40
|
-
|
|
41
|
-
rescue ArgumentError => e
|
|
42
|
-
# conversion of field value most likely failed
|
|
43
|
-
raise Importu::FieldParseError, "#{name}: #{e.message}"
|
|
62
|
+
# Returns field names that can be assigned for the given action.
|
|
63
|
+
#
|
|
64
|
+
# @param action [Symbol] :create or :update
|
|
65
|
+
# @return [Array<Symbol>] assignable field names
|
|
66
|
+
# @api semipublic
|
|
67
|
+
def assignable_fields_for(action)
|
|
68
|
+
@field_definitions.each_with_object([]) do |(name, definition), acc|
|
|
69
|
+
if definition[action] == true && definition[:abstract] == false
|
|
70
|
+
acc << name
|
|
71
|
+
end
|
|
44
72
|
end
|
|
45
73
|
end
|
|
46
74
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
75
|
+
# Returns any conversion errors encountered when processing fields.
|
|
76
|
+
#
|
|
77
|
+
# @return [Array<Importu::FieldParseError>] conversion errors
|
|
78
|
+
# @api public
|
|
79
|
+
def errors
|
|
80
|
+
ensure_record_hash
|
|
81
|
+
@errors
|
|
52
82
|
end
|
|
53
83
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
allowed_fields = definitions.select {|n,d| d[action] }.keys
|
|
62
|
-
concrete_fields = definitions.reject {|n,d| d[:abstract] }.keys
|
|
63
|
-
field_names = record_hash.keys & allowed_fields & concrete_fields
|
|
64
|
-
|
|
65
|
-
unsupported = field_names.reject {|n| object.respond_to?("#{n}=") }
|
|
66
|
-
if unsupported.any?
|
|
67
|
-
raise "model does not support assigning fields: #{unsupported.to_sentence}"
|
|
68
|
-
end
|
|
84
|
+
# Converts the record to a hash of field names to converted values.
|
|
85
|
+
#
|
|
86
|
+
# @return [Hash{Symbol => Object}] the converted field values
|
|
87
|
+
# @raise [Importu::InvalidRecord] if any field conversion errors occurred
|
|
88
|
+
# @api public
|
|
89
|
+
def to_hash
|
|
90
|
+
ensure_record_hash
|
|
69
91
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
end
|
|
92
|
+
if errors.any?
|
|
93
|
+
raise Importu::InvalidRecord.new("field parse errors", errors)
|
|
94
|
+
else
|
|
95
|
+
@record_hash
|
|
75
96
|
end
|
|
76
|
-
|
|
77
|
-
instance_eval(&postprocessor) if postprocessor
|
|
78
|
-
|
|
79
|
-
object
|
|
80
97
|
end
|
|
81
98
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
when :update then :updated
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
rescue ActiveRecord::RecordInvalid => e
|
|
93
|
-
error_msgs = @object.errors.map do |name,message|
|
|
94
|
-
name = definitions[name][:label] if definitions[name]
|
|
95
|
-
name == 'base' ? message : "#{name} #{message}"
|
|
96
|
-
end.join(', ')
|
|
97
|
-
|
|
98
|
-
raise Importu::InvalidRecord, error_msgs, @object.errors.full_messages
|
|
99
|
-
end
|
|
99
|
+
# Returns whether the record has any conversion errors.
|
|
100
|
+
#
|
|
101
|
+
# @return [Boolean] true if no errors, false otherwise
|
|
102
|
+
# @api public
|
|
103
|
+
def valid?
|
|
104
|
+
ensure_record_hash
|
|
105
|
+
errors.none?
|
|
100
106
|
end
|
|
101
107
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
108
|
+
# @!method [](key)
|
|
109
|
+
# Access a field value by name.
|
|
110
|
+
# @param key [Symbol] the field name
|
|
111
|
+
# @return [Object] the converted value
|
|
112
|
+
#
|
|
113
|
+
# @!method fetch(key, default = nil)
|
|
114
|
+
# Access a field value with a default.
|
|
115
|
+
# @param key [Symbol] the field name
|
|
116
|
+
# @param default [Object] value to return if key not found
|
|
117
|
+
# @return [Object] the converted value or default
|
|
118
|
+
#
|
|
119
|
+
# @!method keys
|
|
120
|
+
# Returns all field names.
|
|
121
|
+
# @return [Array<Symbol>] the field names
|
|
122
|
+
#
|
|
123
|
+
# @!method values
|
|
124
|
+
# Returns all converted field values.
|
|
125
|
+
# @return [Array<Object>] the field values
|
|
126
|
+
delegate (Hash.public_instance_methods - public_instance_methods) => :to_hash
|
|
127
|
+
|
|
128
|
+
private def ensure_record_hash
|
|
129
|
+
@record_hash ||= @field_definitions.each_with_object({}) do |(name, _), hash|
|
|
130
|
+
hash[name] = @context.field_value(name)
|
|
131
|
+
rescue Importu::FieldParseError => e
|
|
132
|
+
@errors << e
|
|
113
133
|
end
|
|
114
134
|
end
|
|
115
135
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
136
|
+
# Iterates over source rows, yielding Record instances.
|
|
137
|
+
#
|
|
138
|
+
# @api semipublic
|
|
139
|
+
class Iterator < Enumerator
|
|
140
|
+
# Creates a new iterator over source rows.
|
|
141
|
+
#
|
|
142
|
+
# @param rows [Enumerator] the source rows to iterate
|
|
143
|
+
# @param converters [Hash] converter definitions
|
|
144
|
+
# @param fields [Hash] field definitions
|
|
145
|
+
# @api private
|
|
146
|
+
def initialize(rows, converters:, fields:, **)
|
|
147
|
+
context = Importu::ConverterContext.with_config(
|
|
148
|
+
converters: converters,
|
|
149
|
+
fields: fields,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
super() do |yielder|
|
|
153
|
+
rows.each do |row|
|
|
154
|
+
yielder.yield Importu::Record.new(row, context, fields: fields)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
121
157
|
end
|
|
122
158
|
end
|
|
123
159
|
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require "csv"
|
|
3
|
+
require "tempfile"
|
|
4
|
+
|
|
5
|
+
require "importu/exceptions"
|
|
6
|
+
require "importu/sources"
|
|
7
|
+
|
|
8
|
+
# Parses CSV files as import source data.
|
|
9
|
+
#
|
|
10
|
+
# Each row becomes a hash with header names as keys. The CSV must have a
|
|
11
|
+
# header row.
|
|
12
|
+
#
|
|
13
|
+
# @example Basic usage
|
|
14
|
+
# source = Importu::Sources::CSV.new("data.csv")
|
|
15
|
+
# source.rows.each { |row| puts row["name"] }
|
|
16
|
+
#
|
|
17
|
+
# @example From a string
|
|
18
|
+
# csv_data = "name,email\nAlice,alice@example.com"
|
|
19
|
+
# source = Importu::Sources::CSV.new(StringIO.new(csv_data))
|
|
20
|
+
#
|
|
21
|
+
# @example With semicolon delimiter
|
|
22
|
+
# source = Importu::Sources::CSV.new("data.csv", csv_options: { col_sep: ";" })
|
|
23
|
+
#
|
|
24
|
+
# @example With tab delimiter
|
|
25
|
+
# source = Importu::Sources::CSV.new("data.tsv", csv_options: { col_sep: "\t" })
|
|
26
|
+
#
|
|
27
|
+
# @example Common csv_options
|
|
28
|
+
# csv_options: {
|
|
29
|
+
# col_sep: ";", # Column separator (default: ",")
|
|
30
|
+
# quote_char: "'", # Quote character (default: '"')
|
|
31
|
+
# encoding: "UTF-8", # File encoding
|
|
32
|
+
# }
|
|
33
|
+
#
|
|
34
|
+
# @see https://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html Ruby CSV documentation
|
|
35
|
+
# @api public
|
|
36
|
+
class Importu::Sources::CSV
|
|
37
|
+
# Creates a new CSV source.
|
|
38
|
+
#
|
|
39
|
+
# @param infile [String, IO] file path or IO object to read from
|
|
40
|
+
# @param csv_options [Hash] options passed to Ruby's CSV parser
|
|
41
|
+
# @raise [Importu::InvalidInput] if the CSV is malformed or empty
|
|
42
|
+
def initialize(infile, csv_options: {}, **)
|
|
43
|
+
@owns_handle = !infile.respond_to?(:readline)
|
|
44
|
+
@infile = @owns_handle ? File.open(infile, "rb") : infile
|
|
45
|
+
|
|
46
|
+
@csv_options = {
|
|
47
|
+
headers: true,
|
|
48
|
+
return_headers: true,
|
|
49
|
+
write_headers: true,
|
|
50
|
+
skip_blanks: true,
|
|
51
|
+
}.merge(csv_options)
|
|
52
|
+
|
|
53
|
+
begin
|
|
54
|
+
@reader = ::CSV.new(@infile, **@csv_options)
|
|
55
|
+
@header = @reader.readline
|
|
56
|
+
rescue CSV::MalformedCSVError => e
|
|
57
|
+
raise Importu::InvalidInput, e.message
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
if @header.nil?
|
|
61
|
+
raise Importu::InvalidInput, "Empty document"
|
|
62
|
+
end
|
|
63
|
+
rescue StandardError
|
|
64
|
+
close
|
|
65
|
+
raise
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Closes the underlying file handle if opened by this source.
|
|
69
|
+
#
|
|
70
|
+
# Safe to call multiple times. Only closes handles that were opened
|
|
71
|
+
# by this source (not IO objects passed in).
|
|
72
|
+
#
|
|
73
|
+
# @return [void]
|
|
74
|
+
def close
|
|
75
|
+
return unless @owns_handle && @infile && !@infile.closed?
|
|
76
|
+
@infile.close
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Returns an enumerator that yields each row as a hash.
|
|
80
|
+
#
|
|
81
|
+
# @return [Enumerator<Hash>] rows with header names as keys
|
|
82
|
+
def rows
|
|
83
|
+
@infile.rewind
|
|
84
|
+
reader = ::CSV.new(@infile, **@csv_options)
|
|
85
|
+
Enumerator.new do |yielder|
|
|
86
|
+
reader.each {|row| yielder.yield(row.to_hash) unless row.header_row? }
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Generates a CSV file with error information appended.
|
|
91
|
+
#
|
|
92
|
+
# Creates a copy of the original data with an "_errors" column containing
|
|
93
|
+
# any validation errors for each row. Useful for returning to data providers.
|
|
94
|
+
#
|
|
95
|
+
# @param summary [Importu::Summary] the import summary containing errors
|
|
96
|
+
# @param only_errors [Boolean] if true, only include rows that had errors
|
|
97
|
+
# @return [Tempfile, nil] temp file with error data, or nil if no errors
|
|
98
|
+
def write_errors(summary, only_errors: false)
|
|
99
|
+
return unless summary.itemized_errors.any?
|
|
100
|
+
|
|
101
|
+
header = @header.fields | ["_errors"]
|
|
102
|
+
itemized_errors = summary.itemized_errors
|
|
103
|
+
|
|
104
|
+
Tempfile.new("import").tap do |file|
|
|
105
|
+
writer = CSV.new(file, **@csv_options)
|
|
106
|
+
writer << header
|
|
107
|
+
|
|
108
|
+
rows.each.with_index do |row, index|
|
|
109
|
+
errors = itemized_errors.key?(index) \
|
|
110
|
+
? itemized_errors[index].join(", ")
|
|
111
|
+
: nil
|
|
112
|
+
|
|
113
|
+
if errors || !only_errors
|
|
114
|
+
writer << row.merge("_errors" => errors).values_at(*header)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
file.rewind
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require "json"
|
|
3
|
+
require "tempfile"
|
|
4
|
+
|
|
5
|
+
require "importu/exceptions"
|
|
6
|
+
require "importu/sources"
|
|
7
|
+
|
|
8
|
+
# Parses JSON files as import source data.
|
|
9
|
+
#
|
|
10
|
+
# The JSON must have an array as the root element. Each array element becomes
|
|
11
|
+
# a row. The entire file is loaded into memory.
|
|
12
|
+
#
|
|
13
|
+
# @example Basic usage
|
|
14
|
+
# source = Importu::Sources::JSON.new("data.json")
|
|
15
|
+
# source.rows.each { |row| puts row["name"] }
|
|
16
|
+
#
|
|
17
|
+
# @example Expected JSON format
|
|
18
|
+
# # data.json
|
|
19
|
+
# [
|
|
20
|
+
# { "name": "Alice", "email": "alice@example.com" },
|
|
21
|
+
# { "name": "Bob", "email": "bob@example.com" }
|
|
22
|
+
# ]
|
|
23
|
+
#
|
|
24
|
+
# @example From a string
|
|
25
|
+
# json_data = '[{"name": "Alice"}, {"name": "Bob"}]'
|
|
26
|
+
# source = Importu::Sources::JSON.new(StringIO.new(json_data))
|
|
27
|
+
#
|
|
28
|
+
# @note The entire JSON file is loaded into memory. For very large files,
|
|
29
|
+
# consider using CSV or a streaming JSON parser.
|
|
30
|
+
#
|
|
31
|
+
# @api public
|
|
32
|
+
class Importu::Sources::JSON
|
|
33
|
+
# Creates a new JSON source.
|
|
34
|
+
#
|
|
35
|
+
# @param infile [String, IO] file path or IO object to read from
|
|
36
|
+
# @raise [Importu::InvalidInput] if the JSON is malformed or empty
|
|
37
|
+
def initialize(infile, **)
|
|
38
|
+
owns_handle = !infile.respond_to?(:readline)
|
|
39
|
+
@infile = owns_handle ? File.open(infile, "rb") : infile
|
|
40
|
+
|
|
41
|
+
begin
|
|
42
|
+
@infile.rewind
|
|
43
|
+
@reader = ::JSON.parse(@infile.read)
|
|
44
|
+
rescue ::JSON::ParserError => e
|
|
45
|
+
raise Importu::InvalidInput, e.message
|
|
46
|
+
ensure
|
|
47
|
+
# JSON loads entire content into memory, so we can close immediately
|
|
48
|
+
@infile.close if owns_handle && @infile && !@infile.closed?
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
if @reader.nil?
|
|
52
|
+
raise Importu::InvalidInput, "Empty document"
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Closes any resources held by this source.
|
|
57
|
+
#
|
|
58
|
+
# For JSON sources, the file is already closed after initialization
|
|
59
|
+
# since the entire content is loaded into memory. This method is
|
|
60
|
+
# provided for API consistency with other sources.
|
|
61
|
+
#
|
|
62
|
+
# @return [void]
|
|
63
|
+
def close
|
|
64
|
+
# JSON source closes file immediately after reading into memory
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Returns an enumerator that yields each element as a hash.
|
|
68
|
+
#
|
|
69
|
+
# @return [Enumerator<Hash>] rows from the JSON array
|
|
70
|
+
def rows
|
|
71
|
+
Enumerator.new do |yielder|
|
|
72
|
+
@reader.each {|row| yielder.yield(row) }
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Generates a JSON file with error information appended.
|
|
77
|
+
#
|
|
78
|
+
# Creates a copy of the original data with an "_errors" key containing
|
|
79
|
+
# any validation errors for each row.
|
|
80
|
+
#
|
|
81
|
+
# @param summary [Importu::Summary] the import summary containing errors
|
|
82
|
+
# @param only_errors [Boolean] if true, only include rows that had errors
|
|
83
|
+
# @return [Tempfile, nil] temp file with error data, or nil if no errors
|
|
84
|
+
def write_errors(summary, only_errors: false)
|
|
85
|
+
return unless summary.itemized_errors.any?
|
|
86
|
+
|
|
87
|
+
itemized_errors = summary.itemized_errors
|
|
88
|
+
updated_rows = rows.each.with_index.with_object([]) do |(row, index), acc|
|
|
89
|
+
if itemized_errors.key?(index)
|
|
90
|
+
acc << row.merge("_errors" => itemized_errors[index].join(", "))
|
|
91
|
+
elsif only_errors
|
|
92
|
+
# Requested to only include rows with new errors, row has none
|
|
93
|
+
elsif row.key?("_errors")
|
|
94
|
+
acc << row.dup.tap {|r| r.delete("_errors") }
|
|
95
|
+
else
|
|
96
|
+
acc << row
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
Tempfile.new("import").tap do |file|
|
|
101
|
+
file.write(JSON.pretty_generate(updated_rows))
|
|
102
|
+
file.rewind
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require "importu/sources"
|
|
3
|
+
|
|
4
|
+
# Uses Ruby objects as import source data.
|
|
5
|
+
#
|
|
6
|
+
# Accepts an array of hashes or any enumerable that yields objects responding
|
|
7
|
+
# to #to_hash. Hash keys should be strings to match other source formats.
|
|
8
|
+
#
|
|
9
|
+
# @example Basic usage
|
|
10
|
+
# data = [{ "name" => "Alice" }, { "name" => "Bob" }]
|
|
11
|
+
# source = Importu::Sources::Ruby.new(data)
|
|
12
|
+
# source.rows.each { |row| puts row["name"] }
|
|
13
|
+
#
|
|
14
|
+
# @api public
|
|
15
|
+
class Importu::Sources::Ruby
|
|
16
|
+
# Creates a new Ruby source.
|
|
17
|
+
#
|
|
18
|
+
# @param data [Array<Hash>, Enumerable] objects that respond to #to_hash
|
|
19
|
+
def initialize(data, **)
|
|
20
|
+
@data = data
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Returns an enumerator that yields each element as a hash.
|
|
24
|
+
#
|
|
25
|
+
# @return [Enumerator<Hash>] rows from the data array
|
|
26
|
+
def rows
|
|
27
|
+
Enumerator.new do |yielder|
|
|
28
|
+
@data.each {|row| yielder.yield(row.to_hash) }
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Not implemented for Ruby source.
|
|
33
|
+
#
|
|
34
|
+
# @param summary [Importu::Summary] the import summary (unused)
|
|
35
|
+
# @param only_errors [Boolean] (unused)
|
|
36
|
+
# @return [nil] always returns nil
|
|
37
|
+
def write_errors(summary, only_errors: false); end
|
|
38
|
+
|
|
39
|
+
# No-op for Ruby source (no file handles to close).
|
|
40
|
+
#
|
|
41
|
+
# Provided for API consistency with file-based sources.
|
|
42
|
+
#
|
|
43
|
+
# @return [void]
|
|
44
|
+
def close; end
|
|
45
|
+
|
|
46
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require "nokogiri"
|
|
3
|
+
require "tempfile"
|
|
4
|
+
|
|
5
|
+
require "importu/exceptions"
|
|
6
|
+
require "importu/sources"
|
|
7
|
+
|
|
8
|
+
# Parses XML files as import source data.
|
|
9
|
+
#
|
|
10
|
+
# Requires an XPath expression to identify which elements represent records.
|
|
11
|
+
# Each matching element becomes a row, with child elements and attributes
|
|
12
|
+
# as fields.
|
|
13
|
+
#
|
|
14
|
+
# ## Field Extraction
|
|
15
|
+
# For each matching element:
|
|
16
|
+
# - XML attributes become fields (e.g., `<book id="123">` → `{ "id" => "123" }`)
|
|
17
|
+
# - Child element text becomes fields (e.g., `<title>Ruby</title>` → `{ "title" => "Ruby" }`)
|
|
18
|
+
#
|
|
19
|
+
# @example Basic usage
|
|
20
|
+
# source = Importu::Sources::XML.new("data.xml", records_xpath: "//book")
|
|
21
|
+
# source.rows.each { |row| puts row["title"] }
|
|
22
|
+
#
|
|
23
|
+
# @example Expected XML format
|
|
24
|
+
# # data.xml
|
|
25
|
+
# <library>
|
|
26
|
+
# <book id="1">
|
|
27
|
+
# <title>The Ruby Way</title>
|
|
28
|
+
# <author>Hal Fulton</author>
|
|
29
|
+
# </book>
|
|
30
|
+
# <book id="2">
|
|
31
|
+
# <title>Programming Ruby</title>
|
|
32
|
+
# <author>Dave Thomas</author>
|
|
33
|
+
# </book>
|
|
34
|
+
# </library>
|
|
35
|
+
#
|
|
36
|
+
# @example Resulting rows
|
|
37
|
+
# # With records_xpath: "//book"
|
|
38
|
+
# { "id" => "1", "title" => "The Ruby Way", "author" => "Hal Fulton" }
|
|
39
|
+
# { "id" => "2", "title" => "Programming Ruby", "author" => "Dave Thomas" }
|
|
40
|
+
#
|
|
41
|
+
# @example Configure in importer
|
|
42
|
+
# class BookImporter < Importu::Importer
|
|
43
|
+
# source :xml, records_xpath: "//book"
|
|
44
|
+
# end
|
|
45
|
+
#
|
|
46
|
+
# @note Requires the nokogiri gem.
|
|
47
|
+
# @api public
|
|
48
|
+
class Importu::Sources::XML
|
|
49
|
+
# Creates a new XML source.
|
|
50
|
+
#
|
|
51
|
+
# @param infile [String, IO] file path or IO object to read from
|
|
52
|
+
# @param records_xpath [String] XPath expression to select record elements
|
|
53
|
+
# @raise [Importu::InvalidInput] if the XML is malformed or empty
|
|
54
|
+
def initialize(infile, records_xpath:, **)
|
|
55
|
+
@owns_handle = !infile.respond_to?(:readline)
|
|
56
|
+
@infile = @owns_handle ? File.open(infile, "rb") : infile
|
|
57
|
+
@records_xpath = records_xpath
|
|
58
|
+
|
|
59
|
+
if reader.root.nil?
|
|
60
|
+
raise Importu::InvalidInput, "Empty document"
|
|
61
|
+
elsif reader.errors.any?
|
|
62
|
+
raise Importu::InvalidInput, reader.errors.join("\n")
|
|
63
|
+
end
|
|
64
|
+
rescue StandardError
|
|
65
|
+
close
|
|
66
|
+
raise
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Closes the underlying file handle if opened by this source.
|
|
70
|
+
#
|
|
71
|
+
# Safe to call multiple times. Only closes handles that were opened
|
|
72
|
+
# by this source (not IO objects passed in).
|
|
73
|
+
#
|
|
74
|
+
# @return [void]
|
|
75
|
+
def close
|
|
76
|
+
return unless @owns_handle && @infile && !@infile.closed?
|
|
77
|
+
@infile.close
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Returns an enumerator that yields each matching element as a hash.
|
|
81
|
+
#
|
|
82
|
+
# Element attributes and child element text content become hash keys.
|
|
83
|
+
#
|
|
84
|
+
# @return [Enumerator<Hash>] rows from matching XML elements
|
|
85
|
+
def rows
|
|
86
|
+
Enumerator.new do |yielder|
|
|
87
|
+
reader.xpath(@records_xpath).each do |xml|
|
|
88
|
+
data = [
|
|
89
|
+
*xml.attribute_nodes.map {|a| [a.node_name, a.content] },
|
|
90
|
+
*xml.elements.map {|e| [e.name, e.content]},
|
|
91
|
+
].to_h
|
|
92
|
+
yielder.yield(data)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Generates an XML file with error information appended.
|
|
98
|
+
#
|
|
99
|
+
# Creates a copy of the original data with an "_errors" child element
|
|
100
|
+
# containing any validation errors for each record.
|
|
101
|
+
#
|
|
102
|
+
# @param summary [Importu::Summary] the import summary containing errors
|
|
103
|
+
# @param only_errors [Boolean] if true, only include records that had errors
|
|
104
|
+
# @return [Tempfile, nil] temp file with error data, or nil if no errors
|
|
105
|
+
def write_errors(summary, only_errors: false)
|
|
106
|
+
return unless summary.itemized_errors.any?
|
|
107
|
+
|
|
108
|
+
@infile.rewind
|
|
109
|
+
writer = Nokogiri::XML(@infile, &:nonet)
|
|
110
|
+
writer.xpath("//_errors").remove
|
|
111
|
+
|
|
112
|
+
itemized_errors = summary.itemized_errors
|
|
113
|
+
writer.xpath(@records_xpath).each_with_index do |xml, index|
|
|
114
|
+
if itemized_errors.key?(index)
|
|
115
|
+
node = Nokogiri::XML::Node.new "_errors", writer
|
|
116
|
+
node.content = itemized_errors[index].join(", ")
|
|
117
|
+
xml.add_child(node)
|
|
118
|
+
elsif only_errors
|
|
119
|
+
xml.remove
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
Tempfile.new("import").tap do |file|
|
|
124
|
+
file.write(writer)
|
|
125
|
+
file.rewind
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
private def reader
|
|
130
|
+
@reader ||= Nokogiri::XML(@infile, &:nonet)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Namespace for data source parsers.
|
|
4
|
+
#
|
|
5
|
+
# Sources parse input data (CSV, JSON, XML, Ruby objects) and provide
|
|
6
|
+
# an enumerator of row hashes for the importer to process.
|
|
7
|
+
#
|
|
8
|
+
# @see Importu::Sources::CSV
|
|
9
|
+
# @see Importu::Sources::JSON
|
|
10
|
+
# @see Importu::Sources::XML
|
|
11
|
+
# @see Importu::Sources::Ruby
|
|
12
|
+
module Importu::Sources
|
|
13
|
+
end
|