smart_csv_import 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.adoc +134 -0
  3. data/README.md +534 -0
  4. data/app/jobs/smart_csv_import/import_job.rb +22 -0
  5. data/app/models/smart_csv_import/import.rb +36 -0
  6. data/app/models/smart_csv_import/import_row_error.rb +17 -0
  7. data/lib/generators/smart_csv_import/import/import_generator.rb +49 -0
  8. data/lib/generators/smart_csv_import/import/templates/import_form.rb.tt +32 -0
  9. data/lib/generators/smart_csv_import/import/templates/import_form_spec.rb.tt +38 -0
  10. data/lib/generators/smart_csv_import/install/install_generator.rb +34 -0
  11. data/lib/generators/smart_csv_import/install/templates/create_smart_csv_import_import_row_errors.rb.tt +18 -0
  12. data/lib/generators/smart_csv_import/install/templates/create_smart_csv_import_imports.rb.tt +23 -0
  13. data/lib/generators/smart_csv_import/install/templates/initializer.rb.tt +51 -0
  14. data/lib/generators/smart_csv_import/scaffold/scaffold_generator.rb +56 -0
  15. data/lib/generators/smart_csv_import/scaffold/templates/controller.rb.tt +33 -0
  16. data/lib/generators/smart_csv_import/scaffold/templates/new.html.erb.tt +12 -0
  17. data/lib/generators/smart_csv_import/scaffold/templates/show.html.erb.tt +59 -0
  18. data/lib/smart_csv_import/configuration.rb +77 -0
  19. data/lib/smart_csv_import/cosine_similarity.rb +15 -0
  20. data/lib/smart_csv_import/engine.rb +12 -0
  21. data/lib/smart_csv_import/failed_row_exporter.rb +78 -0
  22. data/lib/smart_csv_import/file_storage.rb +34 -0
  23. data/lib/smart_csv_import/header_normalizer.rb +76 -0
  24. data/lib/smart_csv_import/logging.rb +37 -0
  25. data/lib/smart_csv_import/match_result.rb +36 -0
  26. data/lib/smart_csv_import/matchable.rb +76 -0
  27. data/lib/smart_csv_import/matcher.rb +198 -0
  28. data/lib/smart_csv_import/normalizers/boolean_converter.rb +26 -0
  29. data/lib/smart_csv_import/normalizers/date_converter.rb +28 -0
  30. data/lib/smart_csv_import/notifications.rb +16 -0
  31. data/lib/smart_csv_import/processor/csv_preflight_analyzer.rb +74 -0
  32. data/lib/smart_csv_import/processor/import_result_builder.rb +97 -0
  33. data/lib/smart_csv_import/processor/mapping_review_policy.rb +90 -0
  34. data/lib/smart_csv_import/processor/nil_cell_counter.rb +19 -0
  35. data/lib/smart_csv_import/processor/null_progress_callback.rb +11 -0
  36. data/lib/smart_csv_import/processor/row_processor.rb +70 -0
  37. data/lib/smart_csv_import/processor.rb +294 -0
  38. data/lib/smart_csv_import/result.rb +101 -0
  39. data/lib/smart_csv_import/stability_report.rb +104 -0
  40. data/lib/smart_csv_import/strategies/llm.rb +106 -0
  41. data/lib/smart_csv_import/strategies/lookup.rb +41 -0
  42. data/lib/smart_csv_import/strategies/vector.rb +155 -0
  43. data/lib/smart_csv_import/strategy.rb +9 -0
  44. data/lib/smart_csv_import/strategy_failure.rb +13 -0
  45. data/lib/smart_csv_import/version.rb +5 -0
  46. data/lib/smart_csv_import.rb +79 -0
  47. data/smart_csv_import.gemspec +35 -0
  48. metadata +216 -0
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartCsvImport
4
+ class Processor
5
+ module ImportResultBuilder
6
+ module_function
7
+
8
+ def build_parse_errors(bad_rows)
9
+ bad_rows.map do |bad_row|
10
+ ParseError.new(
11
+ line_number: bad_row[:csv_line_number] || bad_row[:file_line_number],
12
+ raw_line: bad_row[:raw_logical_line].to_s.chomp,
13
+ error_message: bad_row[:error_message].to_s
14
+ )
15
+ end
16
+ end
17
+
18
+ def build_row_error_attributes(errors:, parse_errors:, import_id:)
19
+ now = Time.current
20
+ validation_attrs = errors.map do |err|
21
+ {
22
+ import_id: import_id,
23
+ row_number: err.row,
24
+ error_type: 'validation',
25
+ column_name: err.column.to_s,
26
+ messages: err.messages,
27
+ raw_line: nil,
28
+ error_message: nil,
29
+ created_at: now
30
+ }
31
+ end
32
+ parse_attrs = parse_errors.map do |pe|
33
+ {
34
+ import_id: import_id,
35
+ row_number: pe.line_number,
36
+ error_type: 'parse',
37
+ column_name: nil,
38
+ messages: [],
39
+ raw_line: pe.raw_line,
40
+ error_message: pe.error_message,
41
+ created_at: now
42
+ }
43
+ end
44
+ validation_attrs + parse_attrs
45
+ end
46
+
47
+ def build_blank_row_warnings(blank_count)
48
+ return [] if blank_count.zero?
49
+
50
+ noun = blank_count == 1 ? 'row' : 'rows'
51
+ verb = blank_count == 1 ? 'was' : 'were'
52
+ [RowWarning.new(row: 0, message: "#{blank_count} blank #{noun} #{verb} skipped", type: :blank_rows)]
53
+ end
54
+
55
+ def build_abort_warning(parse_error_count:, total:, bad_row_limit:)
56
+ ratio = total.positive? ? parse_error_count.to_f / total : 0
57
+ pct = (ratio * 100).round(1)
58
+ limit_pct = (bad_row_limit * 100).round(1)
59
+ RowWarning.new(
60
+ row: 0,
61
+ message: "Import aborted: #{pct}% of rows were malformed (limit: #{limit_pct}%)"
62
+ )
63
+ end
64
+
65
+ def final_result(imported:, failed:, errors:, parse_errors:, warnings:, header_mappings:, import_id:, dry_run:)
66
+ total = imported + failed + parse_errors.size
67
+ all_failed = failed + parse_errors.size
68
+
69
+ if dry_run
70
+ Result.dry_run(imported: imported, failed: failed, total: total, errors: errors,
71
+ header_mappings: header_mappings, warnings: warnings, parse_errors: parse_errors)
72
+ elsif all_failed.positive?
73
+ Result.partial_failure(imported: imported, failed: failed, total: total, errors: errors,
74
+ header_mappings: header_mappings, import_id: import_id,
75
+ warnings: warnings, parse_errors: parse_errors)
76
+ else
77
+ Result.completed(imported: imported, failed: failed, total: total, errors: errors,
78
+ header_mappings: header_mappings, import_id: import_id,
79
+ warnings: warnings, parse_errors: parse_errors)
80
+ end
81
+ end
82
+
83
+ def abort_result(imported:, failed:, parse_errors:, warnings:, header_mappings:, import_id:, bad_row_limit:)
84
+ total = imported + failed + parse_errors.size
85
+ abort_warning = build_abort_warning(
86
+ parse_error_count: parse_errors.size, total: total, bad_row_limit: bad_row_limit
87
+ )
88
+
89
+ Result.partial_failure(
90
+ imported: imported, failed: failed, total: total, errors: [],
91
+ header_mappings: header_mappings, import_id: import_id,
92
+ warnings: [*warnings, abort_warning], parse_errors: parse_errors
93
+ )
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartCsvImport
4
+ class Processor
5
+ module MappingReviewPolicy
6
+ module_function
7
+
8
+ def serialize_mappings(mappings)
9
+ mappings.each_with_object({}) do |(header, result), acc|
10
+ acc[header] = result.respond_to?(:target_field) ? result.target_field.to_s : nil
11
+ end
12
+ end
13
+
14
+ def build_field_map(mappings)
15
+ mappings.each_with_object({}) do |(header, result), acc|
16
+ next unless result.respond_to?(:target_field) && result.matched?
17
+
18
+ acc[header] = result.target_field
19
+ end
20
+ end
21
+
22
+ def unmatched_headers(mappings)
23
+ mappings.select { |_, result| result.unmatched? }.keys
24
+ end
25
+
26
+ def build_proposed_mappings(mappings, confidence_threshold:)
27
+ mappings.each_with_object({}) do |(header, result), proposed|
28
+ proposed[header] = if result.unmatched?
29
+ { field: nil, confidence: 0.0, status: :unmatched }
30
+ elsif result.confidence < confidence_threshold
31
+ { field: result.target_field, confidence: result.confidence, status: :low_confidence }
32
+ else
33
+ { field: result.target_field, confidence: result.confidence, status: :matched }
34
+ end
35
+ end
36
+ end
37
+
38
+ def review_gate_triggered?(mappings, review_mode:, confidence_threshold:, required_fields:)
39
+ case review_mode
40
+ when :always
41
+ true
42
+ when :auto
43
+ auto_gate?(mappings, confidence_threshold)
44
+ when :skip
45
+ required_field_below_threshold?(mappings, confidence_threshold, required_fields)
46
+ end
47
+ end
48
+
49
+ def collect_warnings(mappings, duplicate_warning: nil)
50
+ base = duplicate_warning ? [duplicate_warning] : []
51
+ unmatched = unmatched_headers(mappings)
52
+ return base if unmatched.empty?
53
+
54
+ per_column = unmatched.map do |header|
55
+ UnmatchedColumnWarning.new(
56
+ column_name: header,
57
+ message: "Column '#{header}' was not imported — no matching field found"
58
+ )
59
+ end
60
+ [*base, *per_column, summary_warning(unmatched)]
61
+ end
62
+
63
+ def auto_gate?(mappings, confidence_threshold)
64
+ return true if mappings.values.any?(&:unmatched?)
65
+
66
+ mappings.values.any? do |r|
67
+ r.respond_to?(:confidence) && r.confidence < confidence_threshold
68
+ end
69
+ end
70
+
71
+ def required_field_below_threshold?(mappings, confidence_threshold, required_fields)
72
+ required_fields.any? do |field_name|
73
+ mapping = mappings.values.find do |result|
74
+ result.respond_to?(:target_field) && result.target_field == field_name
75
+ end
76
+ mapping.nil? || mapping.confidence < confidence_threshold
77
+ end
78
+ end
79
+
80
+ def summary_warning(unmatched)
81
+ noun = unmatched.size == 1 ? 'column' : 'columns'
82
+ verb = unmatched.size == 1 ? 'was' : 'were'
83
+ RowWarning.new(
84
+ row: 0,
85
+ message: "#{unmatched.size} #{noun} from your file #{verb} not imported: #{unmatched.join(', ')}"
86
+ )
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartCsvImport
4
+ class Processor
5
+ module NilCellCounter
6
+ module_function
7
+
8
+ def count_cells(rows:, nil_values:)
9
+ return 0 if nil_values.blank?
10
+
11
+ rows.sum { |row| count_row(row, nil_values) }
12
+ end
13
+
14
+ def count_row(row, nil_values)
15
+ row.count { |cell| cell && nil_values.include?(cell.strip) }
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartCsvImport
4
+ class Processor
5
+ class NullProgressCallback
6
+ def call(_payload)
7
+ nil
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartCsvImport
4
+ class Processor
5
+ module RowProcessor
6
+ module_function
7
+
8
+ ChunkResult = Struct.new(:imported, :failed, :blank, :errors, :aborted_error, keyword_init: true)
9
+
10
+ def process_chunk(chunk, field_map:, form_class:, dry_run:)
11
+ blank_rows, data_rows = chunk.partition { |row| blank_row?(row) }
12
+ imported = 0
13
+ failed = 0
14
+ errors = []
15
+ aborted_error = nil
16
+
17
+ data_rows.each do |csv_row|
18
+ outcome = submit_row(csv_row, field_map: field_map, form_class: form_class, dry_run: dry_run)
19
+ case outcome.first
20
+ when :imported
21
+ imported += 1
22
+ when :failed
23
+ failed += 1
24
+ errors = [*errors, *outcome.last]
25
+ when :aborted
26
+ aborted_error = outcome.last
27
+ break
28
+ end
29
+ end
30
+
31
+ ChunkResult.new(
32
+ imported: imported, failed: failed, blank: blank_rows.size,
33
+ errors: errors, aborted_error: aborted_error
34
+ )
35
+ end
36
+
37
+ def submit_row(csv_row, field_map:, form_class:, dry_run:)
38
+ form = form_class.new(build_attributes(csv_row, field_map))
39
+ success = dry_run ? form.valid? : form.save
40
+ return [:imported] if success
41
+
42
+ [:failed, collect_row_errors(form, csv_row[:csv_line_number] || 0)]
43
+ rescue ActiveRecord::StatementInvalid => e
44
+ [:aborted, e]
45
+ end
46
+
47
+ def blank_row?(row)
48
+ row.except(:csv_line_number).values.all? { |v| v.nil? || v.to_s.empty? }
49
+ end
50
+
51
+ def build_attributes(csv_row, field_map)
52
+ field_map.each_with_object({}) do |(header, field_name), attrs|
53
+ next if header == :csv_line_number
54
+
55
+ attrs[field_name] = csv_row[header]
56
+ end
57
+ end
58
+
59
+ def collect_row_errors(form, row_number)
60
+ form.errors.group_by_attribute.each_with_object([]) do |(attr, row_errors), errs|
61
+ errs << RowError.new(
62
+ row: row_number,
63
+ column: attr.to_sym,
64
+ messages: row_errors.map(&:message)
65
+ )
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,294 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "smarter_csv"
5
+
6
+ module SmartCsvImport
7
+ class Processor
8
+ include Logging
9
+
10
+ FULL_LOAD_CHUNK_SIZE = 10_000_000
11
+
12
+ def initialize(file_path:, form_class:, mode: :sync, batch_size: SmartCsvImport.configuration.batch_size, dry_run: false, import: nil, confirmed_mappings: nil, on_progress: nil)
13
+ @file_path = file_path
14
+ @form_class = form_class
15
+ @mode = mode
16
+ @batch_size = batch_size
17
+ @dry_run = dry_run
18
+ @existing_import = import
19
+ @confirmed_mappings = confirmed_mappings
20
+ @on_progress = on_progress
21
+
22
+ validate_form_class!
23
+ validate_on_progress!
24
+
25
+ @on_progress ||= NullProgressCallback.new
26
+ end
27
+
28
+ def call
29
+ import = @existing_import || create_import
30
+ check_duplicate(import) unless @existing_import
31
+
32
+ return enqueue_async(import) if @mode == :async
33
+
34
+ process_csv(import)
35
+ end
36
+
37
+ private
38
+
39
+ def validate_form_class!
40
+ unless @form_class.respond_to?(:csv_fields) && @form_class.ancestors.include?(Matchable)
41
+ raise ConfigurationError, "form_class must include SmartCsvImport::Matchable"
42
+ end
43
+
44
+ unless @form_class.method_defined?(:save)
45
+ raise ConfigurationError, "form_class must define a #save method"
46
+ end
47
+ end
48
+
49
+ def create_import
50
+ stored_path = FileStorage.store(source_path: @file_path, import_type: @form_class.name)
51
+ file_hash = FileStorage.compute_hash(file_path: @file_path)
52
+
53
+ Import.create!(
54
+ import_type: @form_class.name,
55
+ original_filename: File.basename(@file_path),
56
+ file_path: stored_path,
57
+ file_hash: file_hash,
58
+ status: "processing"
59
+ )
60
+ end
61
+
62
+ def check_duplicate(import)
63
+ duplicate = Import.find_duplicate(file_hash: import.file_hash, import_type: import.import_type)
64
+ return unless duplicate && duplicate.id != import.id
65
+
66
+ @duplicate_warning = RowWarning.new(
67
+ row: 0,
68
+ message: "File appears to be a duplicate of import ##{duplicate.id}"
69
+ )
70
+ end
71
+
72
+ def enqueue_async(import)
73
+ ImportJob.perform_later(import.id, @form_class.name)
74
+ Result.queued(import_id: import.id)
75
+ end
76
+
77
+ def process_csv(import)
78
+ dup_header_warnings = CsvPreflightAnalyzer.duplicate_header_warnings(file_path: @file_path)
79
+ nil_match_count = CsvPreflightAnalyzer.count_nil_matches(
80
+ file_path: @file_path, nil_values: SmartCsvImport.configuration.nil_values_matching
81
+ )
82
+ nil_warnings = if nil_match_count > 0
83
+ noun = nil_match_count == 1 ? "cell" : "cells"
84
+ [RowWarning.new(row: 0, message: "#{nil_match_count} #{noun} contained Excel error markers and were treated as empty", type: :nil_cleaned)]
85
+ else
86
+ []
87
+ end
88
+
89
+ if @confirmed_mappings
90
+ field_map = @confirmed_mappings
91
+ serialized_mappings = field_map.transform_values(&:to_s)
92
+ import.update!(header_mappings: serialized_mappings)
93
+ return process_rows(import, field_map, [*dup_header_warnings, *nil_warnings])
94
+ end
95
+
96
+ mappings = run_matcher
97
+ serialized_mappings = MappingReviewPolicy.serialize_mappings(mappings)
98
+ import.update!(header_mappings: serialized_mappings)
99
+
100
+ warnings = MappingReviewPolicy.collect_warnings(mappings, duplicate_warning: @duplicate_warning)
101
+
102
+ threshold = SmartCsvImport.configuration.confidence_threshold
103
+
104
+ if MappingReviewPolicy.review_gate_triggered?(
105
+ mappings,
106
+ review_mode: SmartCsvImport.configuration.review_mode,
107
+ confidence_threshold: threshold,
108
+ required_fields: @form_class.required_csv_fields
109
+ )
110
+ import.update!(status: "mapping_review")
111
+ proposed = MappingReviewPolicy.build_proposed_mappings(mappings, confidence_threshold: threshold)
112
+ unmatched = MappingReviewPolicy.unmatched_headers(mappings)
113
+ return Result.review_required(
114
+ header_mappings: serialized_mappings,
115
+ import_id: import.id,
116
+ proposed_mappings: proposed,
117
+ unmatched_columns: unmatched,
118
+ warnings: [*dup_header_warnings, *nil_warnings, *warnings]
119
+ )
120
+ end
121
+
122
+ field_map = MappingReviewPolicy.build_field_map(mappings)
123
+ process_rows(import, field_map, [*dup_header_warnings, *nil_warnings, *warnings])
124
+ rescue StandardError => e
125
+ log_error("Processing failed: #{e.message}")
126
+ import.update!(status: "failed")
127
+ raise
128
+ end
129
+
130
+ def run_matcher
131
+ Matcher.new(
132
+ file_path: @file_path,
133
+ form_class: @form_class,
134
+ confidence_threshold: SmartCsvImport.configuration.confidence_threshold
135
+ ).call
136
+ end
137
+
138
+ def process_rows(import, field_map, warnings)
139
+ imported_count = 0
140
+ failed_count = 0
141
+ errors = []
142
+ blank_count = 0
143
+ aborted = false
144
+
145
+ reader = SmarterCSV::Reader.new(@file_path, smarter_csv_options.merge(chunk_size: effective_chunk_size))
146
+ reader.each_chunk do |chunk, chunk_index|
147
+ chunk_number = chunk_index + 1
148
+
149
+ if chunk_number > 1
150
+ bad_rows_so_far = reader.errors.fetch(:bad_rows, []).size
151
+ total_seen = imported_count + failed_count + bad_rows_so_far
152
+ if total_seen > 0 && bad_rows_so_far.to_f / total_seen > SmartCsvImport.configuration.bad_row_limit
153
+ aborted = true
154
+ break
155
+ end
156
+ end
157
+
158
+ chunk_result = RowProcessor.process_chunk(
159
+ chunk, field_map: field_map, form_class: @form_class, dry_run: @dry_run
160
+ )
161
+ imported_count += chunk_result.imported
162
+ failed_count += chunk_result.failed
163
+ blank_count += chunk_result.blank
164
+ errors = [*errors, *chunk_result.errors]
165
+
166
+ if chunk_result.aborted_error
167
+ log_error("Database error during chunk #{chunk_number}: #{chunk_result.aborted_error.message}")
168
+ chunk_parse_errors = ImportResultBuilder.build_parse_errors(reader.errors.fetch(:bad_rows, []))
169
+ persist_final_import(
170
+ import,
171
+ status: "partial_failure",
172
+ imported_count: imported_count,
173
+ failed_count: failed_count + chunk_parse_errors.size,
174
+ total_rows: imported_count + failed_count + chunk_parse_errors.size,
175
+ errors: errors, parse_errors: chunk_parse_errors
176
+ )
177
+ return Result.partial_failure(
178
+ imported: imported_count,
179
+ failed: failed_count,
180
+ total: imported_count + failed_count + chunk_parse_errors.size,
181
+ errors: errors,
182
+ header_mappings: import.header_mappings,
183
+ import_id: import.id,
184
+ warnings: [*warnings, *ImportResultBuilder.build_blank_row_warnings(blank_count)],
185
+ parse_errors: chunk_parse_errors
186
+ )
187
+ end
188
+
189
+ parse_errors_count = reader.errors.fetch(:bad_rows, []).size
190
+ import.update!(imported_count: imported_count, failed_count: failed_count + parse_errors_count)
191
+
192
+ fire_progress_callback(imported_count, failed_count, chunk_number)
193
+ end
194
+
195
+ parse_errors = ImportResultBuilder.build_parse_errors(reader.errors.fetch(:bad_rows, []))
196
+ all_warnings = [*warnings, *ImportResultBuilder.build_blank_row_warnings(blank_count)]
197
+
198
+ total = imported_count + failed_count + parse_errors.size
199
+ if !aborted && parse_errors.any? && total > 0
200
+ ratio = parse_errors.size.to_f / total
201
+ aborted = ratio > SmartCsvImport.configuration.bad_row_limit
202
+ end
203
+
204
+ return build_abort_result(import, imported_count, failed_count, errors, parse_errors, all_warnings) if aborted
205
+
206
+ build_final_result(import, imported_count, failed_count, errors, parse_errors, all_warnings)
207
+ end
208
+
209
+ def smarter_csv_options
210
+ {
211
+ strings_as_keys: true,
212
+ strip_whitespace: true,
213
+ keep_original_headers: true,
214
+ field_size_limit: SmartCsvImport.configuration.field_size_limit,
215
+ with_line_numbers: true,
216
+ on_bad_row: :collect,
217
+ collect_raw_lines: true,
218
+ convert_values_to_numeric: false,
219
+ remove_empty_values: false,
220
+ remove_empty_hashes: false,
221
+ invalid_byte_sequence: "",
222
+ force_utf8: true,
223
+ duplicate_header_suffix: "_",
224
+ nil_values_matching: SmartCsvImport.configuration.nil_values_regexp,
225
+ bad_row_limit: nil
226
+ }
227
+ end
228
+
229
+ def effective_chunk_size
230
+ SmartCsvImport.configuration.chunk_size || FULL_LOAD_CHUNK_SIZE
231
+ end
232
+
233
+ def build_abort_result(import, imported_count, failed_count, errors, parse_errors, warnings)
234
+ persist_final_import(
235
+ import, status: "failed",
236
+ imported_count: imported_count, failed_count: failed_count + parse_errors.size,
237
+ total_rows: imported_count + failed_count + parse_errors.size,
238
+ errors: errors, parse_errors: parse_errors
239
+ )
240
+ ImportResultBuilder.abort_result(
241
+ imported: imported_count, failed: failed_count, parse_errors: parse_errors,
242
+ warnings: warnings, header_mappings: import.header_mappings, import_id: import.id,
243
+ bad_row_limit: SmartCsvImport.configuration.bad_row_limit
244
+ )
245
+ end
246
+
247
+ def build_final_result(import, imported_count, failed_count, errors, parse_errors, warnings)
248
+ all_failed = failed_count + parse_errors.size
249
+ total = imported_count + failed_count + parse_errors.size
250
+ persist_final_import(
251
+ import, status: all_failed > 0 ? "partial_failure" : "completed",
252
+ imported_count: imported_count, failed_count: all_failed,
253
+ total_rows: total, errors: errors, parse_errors: parse_errors
254
+ )
255
+ ImportResultBuilder.final_result(
256
+ imported: imported_count, failed: failed_count, errors: errors,
257
+ parse_errors: parse_errors, warnings: warnings,
258
+ header_mappings: import.header_mappings, import_id: import.id, dry_run: @dry_run
259
+ )
260
+ end
261
+
262
+ def persist_final_import(import, status:, imported_count:, failed_count:, total_rows:, errors:, parse_errors:)
263
+ ImportRowError.transaction do
264
+ import.update!(
265
+ status: status, imported_count: imported_count, failed_count: failed_count,
266
+ total_rows: total_rows
267
+ )
268
+ attrs = ImportResultBuilder.build_row_error_attributes(
269
+ errors: errors, parse_errors: parse_errors, import_id: import.id
270
+ )
271
+ # rubocop:disable Rails/SkipsModelValidations
272
+ # Bulk insert is intentional — attributes built from pre-validated RowError/ParseError structs.
273
+ attrs.each_slice(1000) { |batch| ImportRowError.insert_all(batch) } if attrs.any?
274
+ # rubocop:enable Rails/SkipsModelValidations
275
+ end
276
+ end
277
+
278
+ def fire_progress_callback(imported_count, failed_count, chunk_number)
279
+ @on_progress.call({
280
+ imported: imported_count,
281
+ failed: failed_count,
282
+ chunk_number: chunk_number
283
+ })
284
+ end
285
+
286
+ def validate_on_progress!
287
+ return if @on_progress.nil?
288
+
289
+ unless @on_progress.respond_to?(:call)
290
+ raise ConfigurationError, "on_progress must respond to :call"
291
+ end
292
+ end
293
+ end
294
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartCsvImport
4
+ RowError = Struct.new(:row, :column, :messages, keyword_init: true)
5
+ RowWarning = Struct.new(:row, :message, :type, keyword_init: true)
6
+ ParseError = Struct.new(:line_number, :raw_line, :error_message, keyword_init: true)
7
+ UnmatchedColumnWarning = Struct.new(:column_name, :message, keyword_init: true)
8
+
9
+ class Result
10
+ attr_reader :status, :imported, :failed, :total, :errors,
11
+ :header_mappings, :import_id, :warnings, :parse_errors,
12
+ :proposed_mappings, :unmatched_columns
13
+
14
+ private_class_method :new
15
+
16
+ def self.completed(imported:, failed:, total:, errors:, header_mappings:, import_id: nil, warnings: [], parse_errors: [])
17
+ new(
18
+ status: :completed,
19
+ imported: imported,
20
+ failed: failed,
21
+ total: total,
22
+ errors: errors,
23
+ header_mappings: header_mappings,
24
+ import_id: import_id,
25
+ warnings: warnings,
26
+ parse_errors: parse_errors
27
+ )
28
+ end
29
+
30
+ def self.partial_failure(imported:, failed:, total:, errors:, header_mappings:, import_id: nil, warnings: [], parse_errors: [])
31
+ new(
32
+ status: :partial_failure,
33
+ imported: imported,
34
+ failed: failed,
35
+ total: total,
36
+ errors: errors,
37
+ header_mappings: header_mappings,
38
+ import_id: import_id,
39
+ warnings: warnings,
40
+ parse_errors: parse_errors
41
+ )
42
+ end
43
+
44
+ def self.review_required(header_mappings:, import_id: nil, proposed_mappings: {}, unmatched_columns: [], warnings: [])
45
+ new(
46
+ status: :review_required,
47
+ header_mappings: header_mappings,
48
+ import_id: import_id,
49
+ proposed_mappings: proposed_mappings,
50
+ unmatched_columns: unmatched_columns,
51
+ warnings: warnings
52
+ )
53
+ end
54
+
55
+ def self.queued(import_id:, **rest)
56
+ invalid_keys = rest.keys & %i[imported failed total]
57
+ raise ArgumentError, "queued result does not accept: #{invalid_keys.join(", ")}" if invalid_keys.any?
58
+
59
+ new(
60
+ status: :queued,
61
+ import_id: import_id
62
+ )
63
+ end
64
+
65
+ def self.dry_run(imported:, failed:, total:, errors:, header_mappings:, warnings: [], parse_errors: [])
66
+ new(
67
+ status: :dry_run,
68
+ imported: imported,
69
+ failed: failed,
70
+ total: total,
71
+ errors: errors,
72
+ header_mappings: header_mappings,
73
+ warnings: warnings,
74
+ parse_errors: parse_errors
75
+ )
76
+ end
77
+
78
+ def completed? = status == :completed
79
+ def partial_failure? = status == :partial_failure
80
+ def review_required? = status == :review_required
81
+ def queued? = status == :queued
82
+ def dry_run? = status == :dry_run
83
+ def success? = completed? && errors.empty?
84
+
85
+ private
86
+
87
+ def initialize(status:, imported: nil, failed: nil, total: nil, errors: [], header_mappings: {}, import_id: nil, warnings: [], parse_errors: [], proposed_mappings: {}, unmatched_columns: [])
88
+ @status = status
89
+ @imported = imported
90
+ @failed = failed
91
+ @total = total
92
+ @errors = errors
93
+ @header_mappings = header_mappings
94
+ @import_id = import_id
95
+ @warnings = warnings
96
+ @parse_errors = parse_errors
97
+ @proposed_mappings = proposed_mappings
98
+ @unmatched_columns = unmatched_columns
99
+ end
100
+ end
101
+ end