importu 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +15 -0
  3. data/.github/workflows/ci.yml +48 -0
  4. data/.gitignore +4 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +311 -0
  7. data/.simplecov +14 -0
  8. data/.yardstick.yml +36 -0
  9. data/Appraisals +22 -0
  10. data/CHANGELOG.md +51 -0
  11. data/CONTRIBUTING.md +86 -0
  12. data/Gemfile +5 -1
  13. data/LICENSE +21 -0
  14. data/README.md +435 -52
  15. data/Rakefile +71 -0
  16. data/UPGRADING.md +188 -0
  17. data/gemfiles/rails_7_2.gemfile +11 -0
  18. data/gemfiles/rails_7_2.gemfile.lock +268 -0
  19. data/gemfiles/rails_8_0.gemfile +11 -0
  20. data/gemfiles/rails_8_0.gemfile.lock +271 -0
  21. data/gemfiles/rails_8_1.gemfile +11 -0
  22. data/gemfiles/rails_8_1.gemfile.lock +269 -0
  23. data/gemfiles/standalone.gemfile +8 -0
  24. data/gemfiles/standalone.gemfile.lock +197 -0
  25. data/importu.gemspec +41 -22
  26. data/lib/importu/backends/active_record.rb +171 -0
  27. data/lib/importu/backends/middleware/duplicate_manager_proxy.rb +41 -0
  28. data/lib/importu/backends/middleware/enforce_allowed_actions.rb +52 -0
  29. data/lib/importu/backends/middleware.rb +11 -0
  30. data/lib/importu/backends.rb +103 -0
  31. data/lib/importu/config_dsl.rb +381 -0
  32. data/lib/importu/converter_context.rb +94 -0
  33. data/lib/importu/converters.rb +119 -64
  34. data/lib/importu/definition.rb +23 -0
  35. data/lib/importu/duplicate_manager.rb +88 -0
  36. data/lib/importu/exceptions.rb +135 -4
  37. data/lib/importu/importer.rb +183 -96
  38. data/lib/importu/record.rb +138 -102
  39. data/lib/importu/sources/csv.rb +122 -0
  40. data/lib/importu/sources/json.rb +106 -0
  41. data/lib/importu/sources/ruby.rb +46 -0
  42. data/lib/importu/sources/xml.rb +133 -0
  43. data/lib/importu/sources.rb +13 -0
  44. data/lib/importu/summary.rb +277 -0
  45. data/lib/importu/version.rb +3 -1
  46. data/lib/importu.rb +45 -9
  47. data/spec/fixtures/books-duplicates/README.md +7 -0
  48. data/spec/fixtures/books-duplicates/infile.csv +7 -0
  49. data/spec/fixtures/books-duplicates/model.json +23 -0
  50. data/spec/fixtures/books-duplicates/summary.json +10 -0
  51. data/spec/fixtures/books-valid/README.md +13 -0
  52. data/spec/fixtures/books-valid/infile.csv +4 -0
  53. data/spec/fixtures/books-valid/infile.json +23 -0
  54. data/spec/fixtures/books-valid/infile.xml +21 -0
  55. data/spec/fixtures/books-valid/model.json +23 -0
  56. data/spec/fixtures/books-valid/record.json +26 -0
  57. data/spec/fixtures/books-valid/summary.json +8 -0
  58. data/spec/fixtures/source-empty-file/infile.csv +0 -0
  59. data/spec/fixtures/source-empty-file/infile.json +0 -0
  60. data/spec/fixtures/source-empty-file/infile.xml +0 -0
  61. data/spec/fixtures/source-empty-records/infile.csv +3 -0
  62. data/spec/fixtures/source-empty-records/infile.json +1 -0
  63. data/spec/fixtures/source-empty-records/infile.xml +6 -0
  64. data/spec/fixtures/source-malformed/infile.csv +1 -0
  65. data/spec/fixtures/source-malformed/infile.json +1 -0
  66. data/spec/fixtures/source-malformed/infile.xml +3 -0
  67. data/spec/fixtures/source-no-records/infile.csv +1 -0
  68. data/spec/fixtures/source-no-records/infile.json +1 -0
  69. data/spec/fixtures/source-no-records/infile.xml +3 -0
  70. data/spec/lib/importu/backends/active_record_spec.rb +150 -0
  71. data/spec/lib/importu/backends/middleware/duplicate_manager_proxy_spec.rb +70 -0
  72. data/spec/lib/importu/backends/middleware/enforce_allowed_actions_spec.rb +70 -0
  73. data/spec/lib/importu/backends_spec.rb +170 -0
  74. data/spec/lib/importu/converters_spec.rb +184 -141
  75. data/spec/lib/importu/definition_spec.rb +248 -0
  76. data/spec/lib/importu/duplicate_manager_spec.rb +92 -0
  77. data/spec/lib/importu/exceptions_spec.rb +69 -16
  78. data/spec/lib/importu/import_context_spec.rb +199 -0
  79. data/spec/lib/importu/importer_spec.rb +95 -0
  80. data/spec/lib/importu/integration_spec.rb +221 -0
  81. data/spec/lib/importu/record_spec.rb +130 -80
  82. data/spec/lib/importu/sources/csv_spec.rb +29 -0
  83. data/spec/lib/importu/sources/importer_source_examples.rb +175 -0
  84. data/spec/lib/importu/sources/json_spec.rb +29 -0
  85. data/spec/lib/importu/sources/ruby_spec.rb +102 -0
  86. data/spec/lib/importu/sources/xml_spec.rb +70 -0
  87. data/spec/lib/importu/summary_spec.rb +186 -0
  88. data/spec/spec_helper.rb +91 -7
  89. data/spec/support/active_record.rb +20 -0
  90. data/spec/support/book_importer.rb +31 -0
  91. data/spec/support/dummy_backend.rb +50 -0
  92. data/spec/support/fixtures_helper.rb +43 -0
  93. data/spec/support/matchers/delegate_matcher.rb +14 -8
  94. metadata +173 -100
  95. data/lib/importu/core_ext/array/deep_freeze.rb +0 -7
  96. data/lib/importu/core_ext/deep_freeze.rb +0 -3
  97. data/lib/importu/core_ext/hash/deep_freeze.rb +0 -7
  98. data/lib/importu/core_ext/object/deep_freeze.rb +0 -6
  99. data/lib/importu/core_ext.rb +0 -3
  100. data/lib/importu/dsl.rb +0 -127
  101. data/lib/importu/importer/csv.rb +0 -52
  102. data/lib/importu/importer/json.rb +0 -45
  103. data/lib/importu/importer/xml.rb +0 -55
  104. data/spec/factories/importer.rb +0 -12
  105. data/spec/factories/importer_record.rb +0 -13
  106. data/spec/factories/json_importer.rb +0 -14
  107. data/spec/factories/xml_importer.rb +0 -12
  108. data/spec/lib/importu/dsl_spec.rb +0 -26
  109. data/spec/lib/importu/importer/json_spec.rb +0 -37
  110. data/spec/lib/importu/importer/xml_spec.rb +0 -14
@@ -1,123 +1,159 @@
1
- require 'active_support/core_ext/module/delegation'
2
-
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ require "importu/converter_context"
6
+ require "importu/exceptions"
7
+
8
+ # Represents a single record from the import source.
9
+ #
10
+ # Records lazily convert field values on access and behave like hashes,
11
+ # supporting standard hash methods like [], fetch, keys, values, and each.
12
+ #
13
+ # @example Iterating over records
14
+ # importer.records.each do |record|
15
+ # puts "#{record[:title]} by #{record[:author]}"
16
+ # end
17
+ #
18
+ # @example Accessing field values
19
+ # record[:title] # => "The Ruby Programming Language"
20
+ # record.fetch(:author) # => "David Flanagan"
21
+ # record.keys # => [:title, :author, :isbn]
22
+ #
23
+ # @example Converting to a plain hash
24
+ # record.to_hash # => { title: "...", author: "...", isbn: "..." }
25
+ #
26
+ # @example Accessing raw source data
27
+ # record.data # => { "Title" => "...", "Author" => "..." }
28
+ #
29
+ # @example Checking for conversion errors
30
+ # if record.valid?
31
+ # process(record.to_hash)
32
+ # else
33
+ # record.errors.each { |e| puts e.to_s }
34
+ # end
35
+ #
36
+ # @see Importu::Importer#records
37
+ # @api public
3
38
  class Importu::Record
4
- attr_reader :importer, :data, :raw_data
5
-
6
- include Enumerable
7
-
8
- delegate :keys, :values, :each, :[], :key?, :to => :record_hash
9
- delegate :preprocessor, :postprocessor, :to => :importer
10
- delegate :definitions, :converters, :to => :importer
11
-
12
- def initialize(importer, data, raw_data)
13
- @importer, @data, @raw_data = importer, data, raw_data
14
- end
15
39
 
16
- def record_hash
17
- @record_hash ||= generate_record_hash
40
+ extend Forwardable
41
+
42
+ # The raw data from the source before conversion.
43
+ #
44
+ # @return [Hash] the raw source data
45
+ # @api public
46
+ attr_reader :data
47
+
48
+ # Creates a new record from source data.
49
+ #
50
+ # @param data [Hash] the raw source data
51
+ # @param context [Class] the converter context class
52
+ # @param fields [Hash] field definitions
53
+ # @api private
54
+ def initialize(data, context, fields:, **)
55
+ @data = data
56
+ @field_definitions = fields
57
+ @context = context.new(data)
58
+
59
+ @errors = []
18
60
  end
19
61
 
20
- def to_hash
21
- record_hash
22
- end
23
-
24
- def convert(name, type, options = {})
25
- type, options = type[:to], type if type.kind_of?(Hash)
26
- converter = type ? converters[type] : options[:converter] \
27
- or raise "converter not found: #{type}"
28
-
29
- # TODO: defining options in field definition is deprecated
30
- definition = definitions[name] || {}
31
- options = definition.merge(options)
32
-
33
- begin
34
- value = instance_exec(name, options, &converter)
35
- value.nil? ? options[:default] : value
36
-
37
- rescue Importu::MissingField => e
38
- raise e if options[:required]
39
- options[:default]
40
-
41
- rescue ArgumentError => e
42
- # conversion of field value most likely failed
43
- raise Importu::FieldParseError, "#{name}: #{e.message}"
62
+ # Returns field names that can be assigned for the given action.
63
+ #
64
+ # @param action [Symbol] :create or :update
65
+ # @return [Array<Symbol>] assignable field names
66
+ # @api semipublic
67
+ def assignable_fields_for(action)
68
+ @field_definitions.each_with_object([]) do |(name, definition), acc|
69
+ if definition[action] == true && definition[:abstract] == false
70
+ acc << name
71
+ end
44
72
  end
45
73
  end
46
74
 
47
- def field_value(name, options = {})
48
- definition = definitions[name] \
49
- or raise "importer field not defined: #{name}"
50
-
51
- convert(name, nil, definition.merge(options))
75
+ # Returns any conversion errors encountered when processing fields.
76
+ #
77
+ # @return [Array<Importu::FieldParseError>] conversion errors
78
+ # @api public
79
+ def errors
80
+ ensure_record_hash
81
+ @errors
52
82
  end
53
83
 
54
- def assign_to(object, action, &block)
55
- @object, @action = object, action
56
-
57
- instance_eval(&preprocessor) if preprocessor
58
- instance_exec(object, record_hash, &block) if block
59
-
60
- # filter out any fields we're not allowed to copy for this action
61
- allowed_fields = definitions.select {|n,d| d[action] }.keys
62
- concrete_fields = definitions.reject {|n,d| d[:abstract] }.keys
63
- field_names = record_hash.keys & allowed_fields & concrete_fields
64
-
65
- unsupported = field_names.reject {|n| object.respond_to?("#{n}=") }
66
- if unsupported.any?
67
- raise "model does not support assigning fields: #{unsupported.to_sentence}"
68
- end
84
+ # Converts the record to a hash of field names to converted values.
85
+ #
86
+ # @return [Hash{Symbol => Object}] the converted field values
87
+ # @raise [Importu::InvalidRecord] if any field conversion errors occurred
88
+ # @api public
89
+ def to_hash
90
+ ensure_record_hash
69
91
 
70
- (record_hash.keys & allowed_fields & concrete_fields).each do |name|
71
- if object.respond_to?("#{name}=")
72
- object.send("#{name}=", record_hash[name])
73
- else
74
- end
92
+ if errors.any?
93
+ raise Importu::InvalidRecord.new("field parse errors", errors)
94
+ else
95
+ @record_hash
75
96
  end
76
-
77
- instance_eval(&postprocessor) if postprocessor
78
-
79
- object
80
97
  end
81
98
 
82
- def save!
83
- return :unchanged unless @object.changed?
84
-
85
- begin
86
- @object.save!
87
- case @action
88
- when :create then :created
89
- when :update then :updated
90
- end
91
-
92
- rescue ActiveRecord::RecordInvalid => e
93
- error_msgs = @object.errors.map do |name,message|
94
- name = definitions[name][:label] if definitions[name]
95
- name == 'base' ? message : "#{name} #{message}"
96
- end.join(', ')
97
-
98
- raise Importu::InvalidRecord, error_msgs, @object.errors.full_messages
99
- end
99
+ # Returns whether the record has any conversion errors.
100
+ #
101
+ # @return [Boolean] true if no errors, false otherwise
102
+ # @api public
103
+ def valid?
104
+ ensure_record_hash
105
+ errors.none?
100
106
  end
101
107
 
102
-
103
- private
104
-
105
- attr_reader :object, :action # needed for exposing to instance_eval'd blocks
106
-
107
- alias_method :record, :record_hash
108
-
109
- def generate_record_hash
110
- definitions.inject({}) do |hash,(name,definition)|
111
- hash[name.to_sym] = field_value(name)
112
- hash
108
+ # @!method [](key)
109
+ # Access a field value by name.
110
+ # @param key [Symbol] the field name
111
+ # @return [Object] the converted value
112
+ #
113
+ # @!method fetch(key, default = nil)
114
+ # Access a field value with a default.
115
+ # @param key [Symbol] the field name
116
+ # @param default [Object] value to return if key not found
117
+ # @return [Object] the converted value or default
118
+ #
119
+ # @!method keys
120
+ # Returns all field names.
121
+ # @return [Array<Symbol>] the field names
122
+ #
123
+ # @!method values
124
+ # Returns all converted field values.
125
+ # @return [Array<Object>] the field values
126
+ delegate (Hash.public_instance_methods - public_instance_methods) => :to_hash
127
+
128
+ private def ensure_record_hash
129
+ @record_hash ||= @field_definitions.each_with_object({}) do |(name, _), hash|
130
+ hash[name] = @context.field_value(name)
131
+ rescue Importu::FieldParseError => e
132
+ @errors << e
113
133
  end
114
134
  end
115
135
 
116
- def method_missing(meth, *args, &block)
117
- if converters[meth]
118
- convert(args[0], meth, args[1]||{}) # convert(name, type, options)
119
- else
120
- super
136
+ # Iterates over source rows, yielding Record instances.
137
+ #
138
+ # @api semipublic
139
+ class Iterator < Enumerator
140
+ # Creates a new iterator over source rows.
141
+ #
142
+ # @param rows [Enumerator] the source rows to iterate
143
+ # @param converters [Hash] converter definitions
144
+ # @param fields [Hash] field definitions
145
+ # @api private
146
+ def initialize(rows, converters:, fields:, **)
147
+ context = Importu::ConverterContext.with_config(
148
+ converters: converters,
149
+ fields: fields,
150
+ )
151
+
152
+ super() do |yielder|
153
+ rows.each do |row|
154
+ yielder.yield Importu::Record.new(row, context, fields: fields)
155
+ end
156
+ end
121
157
  end
122
158
  end
123
159
 
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+ require "csv"
3
+ require "tempfile"
4
+
5
+ require "importu/exceptions"
6
+ require "importu/sources"
7
+
8
+ # Parses CSV files as import source data.
9
+ #
10
+ # Each row becomes a hash with header names as keys. The CSV must have a
11
+ # header row.
12
+ #
13
+ # @example Basic usage
14
+ # source = Importu::Sources::CSV.new("data.csv")
15
+ # source.rows.each { |row| puts row["name"] }
16
+ #
17
+ # @example From a string
18
+ # csv_data = "name,email\nAlice,alice@example.com"
19
+ # source = Importu::Sources::CSV.new(StringIO.new(csv_data))
20
+ #
21
+ # @example With semicolon delimiter
22
+ # source = Importu::Sources::CSV.new("data.csv", csv_options: { col_sep: ";" })
23
+ #
24
+ # @example With tab delimiter
25
+ # source = Importu::Sources::CSV.new("data.tsv", csv_options: { col_sep: "\t" })
26
+ #
27
+ # @example Common csv_options
28
+ # csv_options: {
29
+ # col_sep: ";", # Column separator (default: ",")
30
+ # quote_char: "'", # Quote character (default: '"')
31
+ # encoding: "UTF-8", # File encoding
32
+ # }
33
+ #
34
+ # @see https://ruby-doc.org/stdlib/libdoc/csv/rdoc/CSV.html Ruby CSV documentation
35
+ # @api public
36
+ class Importu::Sources::CSV
37
+ # Creates a new CSV source.
38
+ #
39
+ # @param infile [String, IO] file path or IO object to read from
40
+ # @param csv_options [Hash] options passed to Ruby's CSV parser
41
+ # @raise [Importu::InvalidInput] if the CSV is malformed or empty
42
+ def initialize(infile, csv_options: {}, **)
43
+ @owns_handle = !infile.respond_to?(:readline)
44
+ @infile = @owns_handle ? File.open(infile, "rb") : infile
45
+
46
+ @csv_options = {
47
+ headers: true,
48
+ return_headers: true,
49
+ write_headers: true,
50
+ skip_blanks: true,
51
+ }.merge(csv_options)
52
+
53
+ begin
54
+ @reader = ::CSV.new(@infile, **@csv_options)
55
+ @header = @reader.readline
56
+ rescue CSV::MalformedCSVError => e
57
+ raise Importu::InvalidInput, e.message
58
+ end
59
+
60
+ if @header.nil?
61
+ raise Importu::InvalidInput, "Empty document"
62
+ end
63
+ rescue StandardError
64
+ close
65
+ raise
66
+ end
67
+
68
+ # Closes the underlying file handle if opened by this source.
69
+ #
70
+ # Safe to call multiple times. Only closes handles that were opened
71
+ # by this source (not IO objects passed in).
72
+ #
73
+ # @return [void]
74
+ def close
75
+ return unless @owns_handle && @infile && !@infile.closed?
76
+ @infile.close
77
+ end
78
+
79
+ # Returns an enumerator that yields each row as a hash.
80
+ #
81
+ # @return [Enumerator<Hash>] rows with header names as keys
82
+ def rows
83
+ @infile.rewind
84
+ reader = ::CSV.new(@infile, **@csv_options)
85
+ Enumerator.new do |yielder|
86
+ reader.each {|row| yielder.yield(row.to_hash) unless row.header_row? }
87
+ end
88
+ end
89
+
90
+ # Generates a CSV file with error information appended.
91
+ #
92
+ # Creates a copy of the original data with an "_errors" column containing
93
+ # any validation errors for each row. Useful for returning to data providers.
94
+ #
95
+ # @param summary [Importu::Summary] the import summary containing errors
96
+ # @param only_errors [Boolean] if true, only include rows that had errors
97
+ # @return [Tempfile, nil] temp file with error data, or nil if no errors
98
+ def write_errors(summary, only_errors: false)
99
+ return unless summary.itemized_errors.any?
100
+
101
+ header = @header.fields | ["_errors"]
102
+ itemized_errors = summary.itemized_errors
103
+
104
+ Tempfile.new("import").tap do |file|
105
+ writer = CSV.new(file, **@csv_options)
106
+ writer << header
107
+
108
+ rows.each.with_index do |row, index|
109
+ errors = itemized_errors.key?(index) \
110
+ ? itemized_errors[index].join(", ")
111
+ : nil
112
+
113
+ if errors || !only_errors
114
+ writer << row.merge("_errors" => errors).values_at(*header)
115
+ end
116
+ end
117
+
118
+ file.rewind
119
+ end
120
+ end
121
+
122
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+ require "json"
3
+ require "tempfile"
4
+
5
+ require "importu/exceptions"
6
+ require "importu/sources"
7
+
8
+ # Parses JSON files as import source data.
9
+ #
10
+ # The JSON must have an array as the root element. Each array element becomes
11
+ # a row. The entire file is loaded into memory.
12
+ #
13
+ # @example Basic usage
14
+ # source = Importu::Sources::JSON.new("data.json")
15
+ # source.rows.each { |row| puts row["name"] }
16
+ #
17
+ # @example Expected JSON format
18
+ # # data.json
19
+ # [
20
+ # { "name": "Alice", "email": "alice@example.com" },
21
+ # { "name": "Bob", "email": "bob@example.com" }
22
+ # ]
23
+ #
24
+ # @example From a string
25
+ # json_data = '[{"name": "Alice"}, {"name": "Bob"}]'
26
+ # source = Importu::Sources::JSON.new(StringIO.new(json_data))
27
+ #
28
+ # @note The entire JSON file is loaded into memory. For very large files,
29
+ # consider using CSV or a streaming JSON parser.
30
+ #
31
+ # @api public
32
+ class Importu::Sources::JSON
33
+ # Creates a new JSON source.
34
+ #
35
+ # @param infile [String, IO] file path or IO object to read from
36
+ # @raise [Importu::InvalidInput] if the JSON is malformed or empty
37
+ def initialize(infile, **)
38
+ owns_handle = !infile.respond_to?(:readline)
39
+ @infile = owns_handle ? File.open(infile, "rb") : infile
40
+
41
+ begin
42
+ @infile.rewind
43
+ @reader = ::JSON.parse(@infile.read)
44
+ rescue ::JSON::ParserError => e
45
+ raise Importu::InvalidInput, e.message
46
+ ensure
47
+ # JSON loads entire content into memory, so we can close immediately
48
+ @infile.close if owns_handle && @infile && !@infile.closed?
49
+ end
50
+
51
+ if @reader.nil?
52
+ raise Importu::InvalidInput, "Empty document"
53
+ end
54
+ end
55
+
56
+ # Closes any resources held by this source.
57
+ #
58
+ # For JSON sources, the file is already closed after initialization
59
+ # since the entire content is loaded into memory. This method is
60
+ # provided for API consistency with other sources.
61
+ #
62
+ # @return [void]
63
+ def close
64
+ # JSON source closes file immediately after reading into memory
65
+ end
66
+
67
+ # Returns an enumerator that yields each element as a hash.
68
+ #
69
+ # @return [Enumerator<Hash>] rows from the JSON array
70
+ def rows
71
+ Enumerator.new do |yielder|
72
+ @reader.each {|row| yielder.yield(row) }
73
+ end
74
+ end
75
+
76
+ # Generates a JSON file with error information appended.
77
+ #
78
+ # Creates a copy of the original data with an "_errors" key containing
79
+ # any validation errors for each row.
80
+ #
81
+ # @param summary [Importu::Summary] the import summary containing errors
82
+ # @param only_errors [Boolean] if true, only include rows that had errors
83
+ # @return [Tempfile, nil] temp file with error data, or nil if no errors
84
+ def write_errors(summary, only_errors: false)
85
+ return unless summary.itemized_errors.any?
86
+
87
+ itemized_errors = summary.itemized_errors
88
+ updated_rows = rows.each.with_index.with_object([]) do |(row, index), acc|
89
+ if itemized_errors.key?(index)
90
+ acc << row.merge("_errors" => itemized_errors[index].join(", "))
91
+ elsif only_errors
92
+ # Requested to only include rows with new errors, row has none
93
+ elsif row.key?("_errors")
94
+ acc << row.dup.tap {|r| r.delete("_errors") }
95
+ else
96
+ acc << row
97
+ end
98
+ end
99
+
100
+ Tempfile.new("import").tap do |file|
101
+ file.write(JSON.pretty_generate(updated_rows))
102
+ file.rewind
103
+ end
104
+ end
105
+
106
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+ require "importu/sources"
3
+
4
+ # Uses Ruby objects as import source data.
5
+ #
6
+ # Accepts an array of hashes or any enumerable that yields objects responding
7
+ # to #to_hash. Hash keys should be strings to match other source formats.
8
+ #
9
+ # @example Basic usage
10
+ # data = [{ "name" => "Alice" }, { "name" => "Bob" }]
11
+ # source = Importu::Sources::Ruby.new(data)
12
+ # source.rows.each { |row| puts row["name"] }
13
+ #
14
+ # @api public
15
+ class Importu::Sources::Ruby
16
+ # Creates a new Ruby source.
17
+ #
18
+ # @param data [Array<Hash>, Enumerable] objects that respond to #to_hash
19
+ def initialize(data, **)
20
+ @data = data
21
+ end
22
+
23
+ # Returns an enumerator that yields each element as a hash.
24
+ #
25
+ # @return [Enumerator<Hash>] rows from the data array
26
+ def rows
27
+ Enumerator.new do |yielder|
28
+ @data.each {|row| yielder.yield(row.to_hash) }
29
+ end
30
+ end
31
+
32
+ # Not implemented for Ruby source.
33
+ #
34
+ # @param summary [Importu::Summary] the import summary (unused)
35
+ # @param only_errors [Boolean] (unused)
36
+ # @return [nil] always returns nil
37
+ def write_errors(summary, only_errors: false); end
38
+
39
+ # No-op for Ruby source (no file handles to close).
40
+ #
41
+ # Provided for API consistency with file-based sources.
42
+ #
43
+ # @return [void]
44
+ def close; end
45
+
46
+ end
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+ require "nokogiri"
3
+ require "tempfile"
4
+
5
+ require "importu/exceptions"
6
+ require "importu/sources"
7
+
8
+ # Parses XML files as import source data.
9
+ #
10
+ # Requires an XPath expression to identify which elements represent records.
11
+ # Each matching element becomes a row, with child elements and attributes
12
+ # as fields.
13
+ #
14
+ # ## Field Extraction
15
+ # For each matching element:
16
+ # - XML attributes become fields (e.g., `<book id="123">` → `{ "id" => "123" }`)
17
+ # - Child element text becomes fields (e.g., `<title>Ruby</title>` → `{ "title" => "Ruby" }`)
18
+ #
19
+ # @example Basic usage
20
+ # source = Importu::Sources::XML.new("data.xml", records_xpath: "//book")
21
+ # source.rows.each { |row| puts row["title"] }
22
+ #
23
+ # @example Expected XML format
24
+ # # data.xml
25
+ # <library>
26
+ # <book id="1">
27
+ # <title>The Ruby Way</title>
28
+ # <author>Hal Fulton</author>
29
+ # </book>
30
+ # <book id="2">
31
+ # <title>Programming Ruby</title>
32
+ # <author>Dave Thomas</author>
33
+ # </book>
34
+ # </library>
35
+ #
36
+ # @example Resulting rows
37
+ # # With records_xpath: "//book"
38
+ # { "id" => "1", "title" => "The Ruby Way", "author" => "Hal Fulton" }
39
+ # { "id" => "2", "title" => "Programming Ruby", "author" => "Dave Thomas" }
40
+ #
41
+ # @example Configure in importer
42
+ # class BookImporter < Importu::Importer
43
+ # source :xml, records_xpath: "//book"
44
+ # end
45
+ #
46
+ # @note Requires the nokogiri gem.
47
+ # @api public
48
+ class Importu::Sources::XML
49
+ # Creates a new XML source.
50
+ #
51
+ # @param infile [String, IO] file path or IO object to read from
52
+ # @param records_xpath [String] XPath expression to select record elements
53
+ # @raise [Importu::InvalidInput] if the XML is malformed or empty
54
+ def initialize(infile, records_xpath:, **)
55
+ @owns_handle = !infile.respond_to?(:readline)
56
+ @infile = @owns_handle ? File.open(infile, "rb") : infile
57
+ @records_xpath = records_xpath
58
+
59
+ if reader.root.nil?
60
+ raise Importu::InvalidInput, "Empty document"
61
+ elsif reader.errors.any?
62
+ raise Importu::InvalidInput, reader.errors.join("\n")
63
+ end
64
+ rescue StandardError
65
+ close
66
+ raise
67
+ end
68
+
69
+ # Closes the underlying file handle if opened by this source.
70
+ #
71
+ # Safe to call multiple times. Only closes handles that were opened
72
+ # by this source (not IO objects passed in).
73
+ #
74
+ # @return [void]
75
+ def close
76
+ return unless @owns_handle && @infile && !@infile.closed?
77
+ @infile.close
78
+ end
79
+
80
+ # Returns an enumerator that yields each matching element as a hash.
81
+ #
82
+ # Element attributes and child element text content become hash keys.
83
+ #
84
+ # @return [Enumerator<Hash>] rows from matching XML elements
85
+ def rows
86
+ Enumerator.new do |yielder|
87
+ reader.xpath(@records_xpath).each do |xml|
88
+ data = [
89
+ *xml.attribute_nodes.map {|a| [a.node_name, a.content] },
90
+ *xml.elements.map {|e| [e.name, e.content]},
91
+ ].to_h
92
+ yielder.yield(data)
93
+ end
94
+ end
95
+ end
96
+
97
+ # Generates an XML file with error information appended.
98
+ #
99
+ # Creates a copy of the original data with an "_errors" child element
100
+ # containing any validation errors for each record.
101
+ #
102
+ # @param summary [Importu::Summary] the import summary containing errors
103
+ # @param only_errors [Boolean] if true, only include records that had errors
104
+ # @return [Tempfile, nil] temp file with error data, or nil if no errors
105
+ def write_errors(summary, only_errors: false)
106
+ return unless summary.itemized_errors.any?
107
+
108
+ @infile.rewind
109
+ writer = Nokogiri::XML(@infile, &:nonet)
110
+ writer.xpath("//_errors").remove
111
+
112
+ itemized_errors = summary.itemized_errors
113
+ writer.xpath(@records_xpath).each_with_index do |xml, index|
114
+ if itemized_errors.key?(index)
115
+ node = Nokogiri::XML::Node.new "_errors", writer
116
+ node.content = itemized_errors[index].join(", ")
117
+ xml.add_child(node)
118
+ elsif only_errors
119
+ xml.remove
120
+ end
121
+ end
122
+
123
+ Tempfile.new("import").tap do |file|
124
+ file.write(writer)
125
+ file.rewind
126
+ end
127
+ end
128
+
129
+ private def reader
130
+ @reader ||= Nokogiri::XML(@infile, &:nonet)
131
+ end
132
+
133
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Namespace for data source parsers.
4
+ #
5
+ # Sources parse input data (CSV, JSON, XML, Ruby objects) and provide
6
+ # an enumerator of row hashes for the importer to process.
7
+ #
8
+ # @see Importu::Sources::CSV
9
+ # @see Importu::Sources::JSON
10
+ # @see Importu::Sources::XML
11
+ # @see Importu::Sources::Ruby
12
+ module Importu::Sources
13
+ end