ragdoll-rails 0.1.8 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +18 -21
  3. data/app/assets/javascripts/ragdoll/application.js +129 -0
  4. data/app/assets/javascripts/ragdoll/bulk_upload_status.js +454 -0
  5. data/app/assets/stylesheets/ragdoll/application.css +84 -0
  6. data/app/assets/stylesheets/ragdoll/bulk_upload_status.css +379 -0
  7. data/app/channels/application_cable/channel.rb +6 -0
  8. data/app/channels/application_cable/connection.rb +6 -0
  9. data/app/channels/ragdoll/bulk_upload_status_channel.rb +27 -0
  10. data/app/channels/ragdoll/file_processing_channel.rb +26 -0
  11. data/app/components/ragdoll/alert_component.html.erb +4 -0
  12. data/app/components/ragdoll/alert_component.rb +32 -0
  13. data/app/components/ragdoll/application_component.rb +6 -0
  14. data/app/components/ragdoll/card_component.html.erb +15 -0
  15. data/app/components/ragdoll/card_component.rb +21 -0
  16. data/app/components/ragdoll/document_list_component.html.erb +41 -0
  17. data/app/components/ragdoll/document_list_component.rb +13 -0
  18. data/app/components/ragdoll/document_table_component.html.erb +76 -0
  19. data/app/components/ragdoll/document_table_component.rb +13 -0
  20. data/app/components/ragdoll/empty_state_component.html.erb +12 -0
  21. data/app/components/ragdoll/empty_state_component.rb +17 -0
  22. data/app/components/ragdoll/flash_messages_component.html.erb +3 -0
  23. data/app/components/ragdoll/flash_messages_component.rb +37 -0
  24. data/app/components/ragdoll/navbar_component.html.erb +24 -0
  25. data/app/components/ragdoll/navbar_component.rb +31 -0
  26. data/app/components/ragdoll/page_header_component.html.erb +13 -0
  27. data/app/components/ragdoll/page_header_component.rb +15 -0
  28. data/app/components/ragdoll/stats_card_component.html.erb +11 -0
  29. data/app/components/ragdoll/stats_card_component.rb +17 -0
  30. data/app/components/ragdoll/status_badge_component.html.erb +3 -0
  31. data/app/components/ragdoll/status_badge_component.rb +30 -0
  32. data/app/controllers/ragdoll/api/v1/analytics_controller.rb +72 -0
  33. data/app/controllers/ragdoll/api/v1/base_controller.rb +29 -0
  34. data/app/controllers/ragdoll/api/v1/documents_controller.rb +148 -0
  35. data/app/controllers/ragdoll/api/v1/search_controller.rb +87 -0
  36. data/app/controllers/ragdoll/api/v1/system_controller.rb +97 -0
  37. data/app/controllers/ragdoll/application_controller.rb +17 -0
  38. data/app/controllers/ragdoll/configuration_controller.rb +82 -0
  39. data/app/controllers/ragdoll/dashboard_controller.rb +98 -0
  40. data/app/controllers/ragdoll/documents_controller.rb +460 -0
  41. data/app/controllers/ragdoll/documents_controller_backup.rb +68 -0
  42. data/app/controllers/ragdoll/jobs_controller.rb +116 -0
  43. data/app/controllers/ragdoll/search_controller.rb +368 -0
  44. data/app/jobs/application_job.rb +9 -0
  45. data/app/jobs/ragdoll/bulk_document_processing_job.rb +280 -0
  46. data/app/jobs/ragdoll/process_file_job.rb +166 -0
  47. data/app/services/ragdoll/worker_health_service.rb +111 -0
  48. data/app/views/layouts/ragdoll/application.html.erb +162 -0
  49. data/app/views/ragdoll/dashboard/analytics.html.erb +333 -0
  50. data/app/views/ragdoll/dashboard/index.html.erb +208 -0
  51. data/app/views/ragdoll/documents/edit.html.erb +91 -0
  52. data/app/views/ragdoll/documents/index.html.erb +302 -0
  53. data/app/views/ragdoll/documents/new.html.erb +1518 -0
  54. data/app/views/ragdoll/documents/show.html.erb +188 -0
  55. data/app/views/ragdoll/documents/upload_results.html.erb +248 -0
  56. data/app/views/ragdoll/jobs/index.html.erb +669 -0
  57. data/app/views/ragdoll/jobs/show.html.erb +129 -0
  58. data/app/views/ragdoll/search/index.html.erb +324 -0
  59. data/config/cable.yml +12 -0
  60. data/config/routes.rb +57 -2
  61. data/lib/generators/ragdoll/init/templates/INSTALL +3 -2
  62. data/lib/generators/ragdoll/init_generator.rb +68 -0
  63. data/lib/ragdoll/rails/engine.rb +48 -0
  64. data/lib/ragdoll/rails/version.rb +1 -1
  65. metadata +231 -6
  66. data/lib/generators/ragdoll/init/init_generator.rb +0 -26
@@ -0,0 +1,368 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ class SearchController < ApplicationController
5
+ skip_before_action :verify_authenticity_token, only: [:search]
6
+
7
+ def index
8
+ # Load popular queries for sidebar
9
+ @popular_queries = ::Ragdoll::Search.group(:query).count.sort_by { |query, count| -count }.first(10).to_h
10
+
11
+ # Check if we're reconstructing a previous search
12
+ if params[:search_id].present?
13
+ begin
14
+ previous_search = ::Ragdoll::Search.find(params[:search_id])
15
+ @reconstructed_search = previous_search
16
+
17
+ # Extract stored form parameters
18
+ search_options = previous_search.search_options.is_a?(Hash) ? previous_search.search_options :
19
+ (previous_search.search_options.present? ? JSON.parse(previous_search.search_options) : {})
20
+ search_filters = previous_search.search_filters.is_a?(Hash) ? previous_search.search_filters :
21
+ (previous_search.search_filters.present? ? JSON.parse(previous_search.search_filters) : {})
22
+
23
+ form_params = search_options.dig('form_params') || {}
24
+
25
+ # Reconstruct query and filters from stored search
26
+ @query = previous_search.query
27
+ @filters = {
28
+ document_type: form_params['document_type'] || search_filters['document_type'],
29
+ status: form_params['status'] || search_filters['status'],
30
+ limit: form_params['limit'] || search_filters['limit'] || 10,
31
+ threshold: form_params['threshold'] || search_filters['threshold'] || 0.001
32
+ }
33
+
34
+ # Reconstruct boolean search options
35
+ @use_similarity_search = form_params['use_similarity_search'] || search_options['use_similarity'] || 'true'
36
+ @use_fulltext_search = form_params['use_fulltext_search'] || search_options['use_fulltext'] || 'true'
37
+
38
+ ::Rails.logger.debug "🔍 Reconstructed search from ID #{params[:search_id]}: #{@query}"
39
+
40
+ rescue ActiveRecord::RecordNotFound
41
+ ::Rails.logger.warn "🔍 Search ID #{params[:search_id]} not found"
42
+ # Fall back to default behavior
43
+ rescue => e
44
+ ::Rails.logger.error "🔍 Error reconstructing search: #{e.message}"
45
+ # Fall back to default behavior
46
+ end
47
+ end
48
+
49
+ # Default values if not reconstructing a search
50
+ unless @reconstructed_search
51
+ @filters = {
52
+ document_type: params[:document_type],
53
+ status: params[:status],
54
+ limit: params[:limit]&.to_i || 10,
55
+ threshold: params[:threshold]&.to_f || (::Rails.env.development? ? 0.001 : 0.7)
56
+ }
57
+ @query = params[:query]
58
+ @use_similarity_search = params[:use_similarity_search] || 'true'
59
+ @use_fulltext_search = params[:use_fulltext_search] || 'true'
60
+ end
61
+
62
+ @search_performed = false
63
+ end
64
+
65
+ def search
66
+ ::Rails.logger.debug "🔍 Search called with params: #{params.inspect}"
67
+ ::Rails.logger.debug "🔍 Use similarity search: #{params[:use_similarity_search]}"
68
+ ::Rails.logger.debug "🔍 Use fulltext search: #{params[:use_fulltext_search]}"
69
+ @query = params[:query]
70
+ @filters = {
71
+ document_type: params[:document_type],
72
+ status: params[:status],
73
+ limit: params[:limit]&.to_i || 10,
74
+ threshold: params[:threshold]&.to_f || (::Rails.env.development? ? 0.001 : 0.7) # Much lower threshold for development
75
+ }
76
+ ::Rails.logger.debug "🔍 Query: #{@query.inspect}, Filters: #{@filters.inspect}"
77
+
78
+ # Initialize data needed for the view sidebar - load popular queries
79
+ @popular_queries = ::Ragdoll::Search.group(:query).count.sort_by { |query, count| -count }.first(10).to_h
80
+
81
+ if @query.present?
82
+ begin
83
+ # Check which search types are enabled (default to both if neither param is set)
84
+ use_similarity = params[:use_similarity_search] != 'false'
85
+ use_fulltext = params[:use_fulltext_search] != 'false'
86
+
87
+ @detailed_results = []
88
+ @below_threshold_results = []
89
+ @similarity_search_attempted = false
90
+ @similarity_threshold_used = @filters[:threshold]
91
+
92
+ # Perform similarity search if enabled
93
+ if use_similarity
94
+ begin
95
+ search_params = {
96
+ query: @query,
97
+ limit: @filters[:limit],
98
+ threshold: @filters[:threshold]
99
+ }
100
+
101
+ # Add document type filter if specified
102
+ if @filters[:document_type].present?
103
+ search_params[:document_type] = @filters[:document_type]
104
+ end
105
+
106
+ # Add status filter if specified
107
+ if @filters[:status].present?
108
+ search_params[:status] = @filters[:status]
109
+ end
110
+
111
+ search_response = ::Ragdoll.search(**search_params.merge(track_search: false))
112
+
113
+ # The search returns a hash with :results and :statistics
114
+ @results = search_response.is_a?(Hash) ? search_response[:results] || [] : []
115
+ @similarity_stats = search_response.is_a?(Hash) ? search_response[:statistics] || {} : {}
116
+
117
+ # Add similarity search results
118
+ @results.each do |result|
119
+ if result[:embedding_id] && result[:document_id]
120
+ embedding = ::Ragdoll::Embedding.find(result[:embedding_id])
121
+ document = ::Ragdoll::Document.find(result[:document_id])
122
+ @detailed_results << {
123
+ embedding: embedding,
124
+ document: document,
125
+ similarity: result[:similarity],
126
+ content: result[:content],
127
+ usage_count: embedding.usage_count,
128
+ last_used: embedding.returned_at,
129
+ search_type: 'similarity'
130
+ }
131
+ end
132
+ end
133
+
134
+ # Mark that similarity search was attempted
135
+ @similarity_search_attempted = true
136
+
137
+ # Always gather statistics about all possible matches when similarity search returns limited results
138
+ similarity_results_count = @detailed_results.select { |r| r[:search_type] == 'similarity' }.count
139
+ ::Rails.logger.debug "🔍 Similarity results found: #{similarity_results_count}"
140
+
141
+ # Gather statistics if we have no results OR if the threshold is relatively high (> 0.1)
142
+ # This ensures we provide helpful feedback even when the search succeeds with a lower threshold
143
+ should_gather_stats = similarity_results_count == 0 || @filters[:threshold] > 0.1
144
+ ::Rails.logger.debug "🔍 Should gather stats: #{should_gather_stats} (results: #{similarity_results_count}, threshold: #{@filters[:threshold]})"
145
+
146
+ if should_gather_stats
147
+ ::Rails.logger.debug "🔍 Gathering below-threshold statistics..."
148
+ begin
149
+ # Search again with minimal threshold to get all potential matches
150
+ stats_params = search_params.merge(threshold: 0.0, limit: 100)
151
+ stats_response = ::Ragdoll.search(**stats_params)
152
+
153
+ ::Rails.logger.debug "🔍 Stats response: #{stats_response.inspect}"
154
+
155
+ if stats_response.is_a?(Hash) && stats_response[:results]
156
+ all_similarities = []
157
+ stats_response[:results].each do |result|
158
+ if result[:similarity]
159
+ all_similarities << result[:similarity]
160
+ # Store below-threshold results
161
+ if result[:similarity] < @filters[:threshold] && result[:similarity] > 0
162
+ @below_threshold_results << {
163
+ document_id: result[:document_id],
164
+ similarity: result[:similarity],
165
+ content: result[:content]
166
+ }
167
+ end
168
+ end
169
+ end
170
+
171
+ ::Rails.logger.debug "🔍 All similarities collected: #{all_similarities.inspect}"
172
+ ::Rails.logger.debug "🔍 Threshold: #{@filters[:threshold]}"
173
+
174
+ # Calculate statistics for display
175
+ if all_similarities.any?
176
+ below_threshold_count = all_similarities.count { |s| s < @filters[:threshold] && s > 0 }
177
+ @below_threshold_stats = {
178
+ count: below_threshold_count,
179
+ highest: all_similarities.max,
180
+ lowest: all_similarities.select { |s| s > 0 }.min,
181
+ average: all_similarities.sum / all_similarities.size.to_f,
182
+ suggested_threshold: all_similarities.select { |s| s > 0 }.min.round(3)
183
+ }
184
+ ::Rails.logger.debug "🔍 Below threshold stats: #{@below_threshold_stats.inspect}"
185
+ else
186
+ ::Rails.logger.debug "🔍 No similarities found in stats response"
187
+ end
188
+ else
189
+ ::Rails.logger.debug "🔍 Stats response was not in expected format or had no results"
190
+ end
191
+ rescue => stats_error
192
+ ::Rails.logger.error "Stats gathering error: #{stats_error.message}"
193
+ end
194
+ end
195
+
196
+ rescue => e
197
+ ::Rails.logger.error "Similarity search error: #{e.message}"
198
+ # Continue with fulltext search even if similarity search fails
199
+ end
200
+ end
201
+
202
+ # Perform full-text search if enabled
203
+ if use_fulltext
204
+ fulltext_params = {
205
+ limit: @filters[:limit],
206
+ threshold: @filters[:threshold]
207
+ }
208
+
209
+ # Add document type filter if specified
210
+ if @filters[:document_type].present?
211
+ fulltext_params[:document_type] = @filters[:document_type]
212
+ end
213
+
214
+ # Add status filter if specified
215
+ if @filters[:status].present?
216
+ fulltext_params[:status] = @filters[:status]
217
+ end
218
+
219
+ fulltext_results = ::Ragdoll::Document.search_content(@query, **fulltext_params)
220
+
221
+ # Collect fulltext similarities for statistics
222
+ fulltext_similarities = []
223
+ fulltext_results.each do |document|
224
+ # Avoid duplicates if document was already found in similarity search
225
+ unless @detailed_results.any? { |r| r[:document].id == document.id }
226
+ # Use the fulltext_similarity score from the enhanced search
227
+ fulltext_similarity = document.respond_to?(:fulltext_similarity) ? document.fulltext_similarity.to_f : 0.0
228
+ fulltext_similarities << fulltext_similarity if fulltext_similarity > 0
229
+
230
+ @detailed_results << {
231
+ document: document,
232
+ content: document.metadata&.dig('summary') || document.title || "No summary available",
233
+ search_type: 'fulltext',
234
+ similarity: fulltext_similarity
235
+ }
236
+ end
237
+ end
238
+
239
+ # Gather fulltext statistics if we have few results OR if threshold is high (> 0.1)
240
+ # This ensures consistent feedback regardless of which search types are enabled
241
+ fulltext_results_count = @detailed_results.select { |r| r[:search_type] == 'fulltext' }.count
242
+ should_gather_fulltext_stats = fulltext_results_count == 0 || @filters[:threshold] > 0.1
243
+
244
+ if should_gather_fulltext_stats && !@below_threshold_stats
245
+ ::Rails.logger.debug "🔍 Gathering fulltext below-threshold statistics..."
246
+ begin
247
+ # Search again with lower threshold to get all potential matches
248
+ stats_params = fulltext_params.merge(threshold: 0.0, limit: 100)
249
+ all_fulltext_results = ::Ragdoll::Document.search_content(@query, **stats_params)
250
+
251
+ all_fulltext_similarities = []
252
+ all_fulltext_results.each do |document|
253
+ similarity = document.respond_to?(:fulltext_similarity) ? document.fulltext_similarity.to_f : 0.0
254
+ if similarity > 0
255
+ all_fulltext_similarities << similarity
256
+ # Store below-threshold results
257
+ if similarity < @filters[:threshold]
258
+ @below_threshold_results << {
259
+ document_id: document.id,
260
+ similarity: similarity,
261
+ content: document.metadata&.dig('summary') || document.title || "No summary available"
262
+ }
263
+ end
264
+ end
265
+ end
266
+
267
+ ::Rails.logger.debug "🔍 Fulltext similarities collected: #{all_fulltext_similarities.inspect}"
268
+ ::Rails.logger.debug "🔍 Threshold: #{@filters[:threshold]}"
269
+
270
+ # Calculate statistics for display
271
+ if all_fulltext_similarities.any?
272
+ below_threshold_count = all_fulltext_similarities.count { |s| s < @filters[:threshold] && s > 0 }
273
+ @below_threshold_stats = {
274
+ count: below_threshold_count,
275
+ highest: all_fulltext_similarities.max,
276
+ lowest: all_fulltext_similarities.select { |s| s > 0 }.min,
277
+ average: all_fulltext_similarities.sum / all_fulltext_similarities.size.to_f,
278
+ suggested_threshold: all_fulltext_similarities.select { |s| s > 0 }.min.round(3)
279
+ }
280
+ ::Rails.logger.debug "🔍 Fulltext below threshold stats: #{@below_threshold_stats.inspect}"
281
+ else
282
+ ::Rails.logger.debug "🔍 No fulltext similarities found in stats response"
283
+ end
284
+ rescue => stats_error
285
+ ::Rails.logger.error "Fulltext stats gathering error: #{stats_error.message}"
286
+ end
287
+ end
288
+ end
289
+
290
+ # Sort results by similarity score if available, otherwise by relevance
291
+ @detailed_results.sort_by! { |r| r[:similarity] ? -r[:similarity] : 0 }
292
+
293
+ # Save search for analytics
294
+ search_type = case
295
+ when use_similarity && use_fulltext then 'hybrid'
296
+ when use_similarity then 'similarity'
297
+ when use_fulltext then 'fulltext'
298
+ else 'unknown'
299
+ end
300
+
301
+ similarity_results = @detailed_results.select { |r| r[:search_type] == 'similarity' }
302
+ similarities = similarity_results.map { |r| r[:similarity] }.compact
303
+
304
+ # Save search for analytics without query embedding (which is optional)
305
+ begin
306
+ ::Ragdoll::Search.create!(
307
+ query: @query,
308
+ search_type: search_type,
309
+ results_count: @detailed_results.count,
310
+ max_similarity_score: similarities.any? ? similarities.max : nil,
311
+ min_similarity_score: similarities.any? ? similarities.min : nil,
312
+ avg_similarity_score: similarities.any? ? (similarities.sum / similarities.size.to_f) : nil,
313
+ search_filters: @filters.to_json,
314
+ search_options: {
315
+ threshold_used: @filters[:threshold],
316
+ similarity_results: similarity_results.count,
317
+ fulltext_results: @detailed_results.select { |r| r[:search_type] == 'fulltext' }.count,
318
+ use_similarity: use_similarity,
319
+ use_fulltext: use_fulltext,
320
+ # Store original form parameters for reconstruction
321
+ form_params: {
322
+ use_similarity_search: params[:use_similarity_search],
323
+ use_fulltext_search: params[:use_fulltext_search],
324
+ limit: @filters[:limit],
325
+ threshold: @filters[:threshold],
326
+ document_type: @filters[:document_type],
327
+ status: @filters[:status]
328
+ }
329
+ }.to_json
330
+ )
331
+ ::Rails.logger.debug "🔍 Search saved successfully"
332
+ rescue => e
333
+ ::Rails.logger.error "🔍 Failed to save search: #{e.message}"
334
+ # Continue without failing the search
335
+ end
336
+
337
+ ::Rails.logger.debug "🔍 Search completed successfully. Results count: #{@detailed_results.count}"
338
+ ::Rails.logger.debug "🔍 Similarity search attempted: #{@similarity_search_attempted}"
339
+ ::Rails.logger.debug "🔍 Below threshold stats: #{@below_threshold_stats.inspect}"
340
+ ::Rails.logger.debug "🔍 Threshold used: #{@similarity_threshold_used}"
341
+ @search_performed = true
342
+
343
+ rescue => e
344
+ ::Rails.logger.error "🔍 Search error: #{e.message}"
345
+ ::Rails.logger.error e.backtrace.join("\n")
346
+ @error = e.message
347
+ @search_performed = false
348
+ end
349
+ else
350
+ @search_performed = false
351
+ end
352
+
353
+ respond_to do |format|
354
+ format.html { render :index }
355
+ format.json {
356
+ json_response = { results: @detailed_results, error: @error }
357
+ if @similarity_search_attempted && @similarity_stats
358
+ json_response[:similarity_statistics] = {
359
+ threshold_used: @similarity_threshold_used,
360
+ stats: @similarity_stats
361
+ }
362
+ end
363
+ render json: json_response
364
+ }
365
+ end
366
+ end
367
+ end
368
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ApplicationJob < ActiveJob::Base
4
+ # Automatically retry jobs that encountered a deadlock
5
+ # retry_on ActiveRecord::Deadlocked
6
+
7
+ # Most jobs are safe to ignore if the underlying records are no longer available
8
+ # discard_on ActiveJob::DeserializationError
9
+ end
@@ -0,0 +1,280 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ class BulkDocumentProcessingJob < ApplicationJob
5
+ queue_as :default
6
+
7
+ private
8
+
9
+ def broadcast_status_update(session_id, data)
10
+ ActionCable.server.broadcast("bulk_upload_status_#{session_id}", data)
11
+ rescue => e
12
+ logger.error "Failed to broadcast status update: #{e.message}"
13
+ end
14
+
15
+ def safe_log_operation(operation, details = {})
16
+ return unless defined?(RagdollLogging)
17
+ RagdollLogging.log_operation(operation, details)
18
+ rescue => e
19
+ logger.debug "Failed to log operation #{operation}: #{e.message}"
20
+ end
21
+
22
+ def safe_log_error(operation, error, details = {})
23
+ return unless defined?(RagdollLogging)
24
+ RagdollLogging.log_error(operation, error, details)
25
+ rescue => e
26
+ logger.debug "Failed to log error #{operation}: #{e.message}"
27
+ end
28
+
29
+ def safe_log_performance(operation, duration, details = {})
30
+ return unless defined?(RagdollLogging)
31
+ RagdollLogging.log_performance(operation, duration, details)
32
+ rescue => e
33
+ logger.debug "Failed to log performance #{operation}: #{e.message}"
34
+ end
35
+
36
+ def perform(session_id, file_paths_data, force_duplicate = false)
37
+ start_time = Time.current
38
+
39
+ # Initialize variables early to avoid nil errors in rescue block
40
+ total_files = file_paths_data&.size || 0
41
+ processed_count = 0
42
+ failed_files = []
43
+
44
+ safe_log_operation("bulk_processing_start", {
45
+ session_id: session_id,
46
+ file_count: total_files,
47
+ force_duplicate: force_duplicate,
48
+ job_id: job_id
49
+ })
50
+
51
+ logger.info "🚀 Starting bulk document processing job for session #{session_id}"
52
+ logger.info "📁 Processing #{total_files} files"
53
+
54
+ # Early return if no files to process
55
+ if file_paths_data.nil? || file_paths_data.empty?
56
+ logger.warn "⚠️ No files provided for processing in session #{session_id}"
57
+ broadcast_status_update(session_id, {
58
+ type: 'upload_error',
59
+ error: 'No files provided for processing',
60
+ status: 'failed'
61
+ })
62
+ return
63
+ end
64
+
65
+ # Broadcast upload start
66
+ broadcast_status_update(session_id, {
67
+ type: 'upload_start',
68
+ total_files: total_files,
69
+ status: 'processing',
70
+ started_at: Time.current.iso8601
71
+ })
72
+
73
+ batch_size = 10 # Process 10 files at a time for async jobs
74
+
75
+ file_paths_data.each_slice(batch_size).with_index do |file_batch, batch_index|
76
+ logger.info "📦 Processing batch #{batch_index + 1} of #{(total_files.to_f / batch_size).ceil}"
77
+
78
+ file_batch.each do |file_data|
79
+ file_start_time = Time.current
80
+
81
+ begin
82
+ temp_path = file_data[:temp_path]
83
+ original_filename = file_data[:original_filename]
84
+
85
+ safe_log_operation("file_processing_start", {
86
+ session_id: session_id,
87
+ filename: original_filename,
88
+ temp_path: temp_path,
89
+ file_exists: File.exist?(temp_path),
90
+ file_size: File.exist?(temp_path) ? File.size(temp_path) : 0
91
+ })
92
+
93
+ unless File.exist?(temp_path)
94
+ error_msg = "Temporary file not found: #{temp_path}"
95
+ safe_log_error("file_processing", StandardError.new(error_msg), {
96
+ session_id: session_id,
97
+ filename: original_filename,
98
+ temp_path: temp_path
99
+ })
100
+ next
101
+ end
102
+
103
+ logger.info "🔄 Processing file: #{original_filename}"
104
+
105
+ # Broadcast file start
106
+ progress_percentage = ((processed_count.to_f / total_files) * 100).round(1)
107
+ broadcast_status_update(session_id, {
108
+ type: 'file_start',
109
+ filename: original_filename,
110
+ processed: processed_count,
111
+ total: total_files,
112
+ percentage: progress_percentage,
113
+ status: 'processing',
114
+ batch_index: batch_index + 1,
115
+ total_batches: (total_files.to_f / batch_size).ceil
116
+ })
117
+
118
+ # Process the document
119
+ ragdoll_start_time = Time.current
120
+ result = ::Ragdoll.add_document(path: temp_path, force: force_duplicate)
121
+ ragdoll_duration = Time.current - ragdoll_start_time
122
+
123
+ safe_log_performance("ragdoll_add_document", ragdoll_duration, {
124
+ session_id: session_id,
125
+ filename: original_filename,
126
+ result_success: result && result[:success],
127
+ force_duplicate: force_duplicate
128
+ })
129
+
130
+ if result && result[:success]
131
+ processed_count += 1
132
+ file_duration = Time.current - file_start_time
133
+
134
+ safe_log_operation("file_processing_success", {
135
+ session_id: session_id,
136
+ filename: original_filename,
137
+ document_id: result[:document_id],
138
+ processing_duration: file_duration.round(3),
139
+ processed_count: processed_count,
140
+ total_files: total_files
141
+ })
142
+
143
+ logger.info "✅ Successfully processed: #{original_filename}"
144
+
145
+ # Broadcast success
146
+ broadcast_status_update(session_id, {
147
+ type: 'file_complete',
148
+ filename: original_filename,
149
+ processed: processed_count,
150
+ total: total_files,
151
+ percentage: ((processed_count.to_f / total_files) * 100).round(1),
152
+ status: 'completed',
153
+ document_id: result[:document_id],
154
+ processing_time: file_duration.round(3)
155
+ })
156
+ else
157
+ failed_files << original_filename
158
+ error_message = result ? result[:error] : 'Unknown error'
159
+ file_duration = Time.current - file_start_time
160
+
161
+ safe_log_error("file_processing", StandardError.new(error_message), {
162
+ session_id: session_id,
163
+ filename: original_filename,
164
+ processing_duration: file_duration.round(3),
165
+ ragdoll_result: result,
166
+ temp_path: temp_path,
167
+ file_size: File.size(temp_path)
168
+ })
169
+
170
+ logger.error "❌ Failed to process: #{original_filename} - #{error_message}"
171
+
172
+ # Broadcast error
173
+ broadcast_status_update(session_id, {
174
+ type: 'file_error',
175
+ filename: original_filename,
176
+ processed: processed_count,
177
+ total: total_files,
178
+ percentage: ((processed_count.to_f / total_files) * 100).round(1),
179
+ status: 'failed',
180
+ error: error_message,
181
+ processing_time: file_duration.round(3)
182
+ })
183
+ end
184
+
185
+ # Clean up temp file
186
+ File.delete(temp_path) if File.exist?(temp_path)
187
+
188
+ rescue => e
189
+ failed_files << (file_data[:original_filename] || 'unknown file')
190
+ file_duration = Time.current - file_start_time
191
+
192
+ safe_log_error("file_processing_exception", e, {
193
+ session_id: session_id,
194
+ filename: file_data[:original_filename],
195
+ temp_path: file_data[:temp_path],
196
+ processing_duration: file_duration.round(3),
197
+ file_data: file_data,
198
+ processed_count: processed_count,
199
+ total_files: total_files
200
+ })
201
+
202
+ logger.error "💥 Exception processing file #{file_data[:original_filename]}: #{e.message}"
203
+ logger.error e.backtrace.join("\n")
204
+
205
+ # Broadcast error
206
+ ActionCable.server.broadcast("ragdoll_file_processing_#{session_id}", {
207
+ type: 'file_error',
208
+ filename: file_data[:original_filename],
209
+ processed: processed_count,
210
+ total: total_files,
211
+ percentage: ((processed_count.to_f / total_files) * 100).round(1),
212
+ status: 'failed',
213
+ error: e.message
214
+ })
215
+ end
216
+ end
217
+
218
+ # Force garbage collection after each batch
219
+ GC.start
220
+
221
+ # Small delay between batches to prevent overwhelming the system
222
+ sleep(0.1)
223
+ end
224
+
225
+ # Broadcast final completion
226
+ total_duration = Time.current - start_time
227
+ final_percentage = 100.0
228
+ broadcast_status_update(session_id, {
229
+ type: 'upload_complete',
230
+ processed: processed_count,
231
+ total: total_files,
232
+ failed: failed_files.size,
233
+ failed_files: failed_files,
234
+ percentage: final_percentage,
235
+ status: 'completed',
236
+ total_duration: total_duration.round(3),
237
+ completed_at: Time.current.iso8601
238
+ })
239
+
240
+ safe_log_operation("bulk_processing_complete", {
241
+ session_id: session_id,
242
+ total_files: total_files,
243
+ processed_count: processed_count,
244
+ failed_count: failed_files.size,
245
+ failed_files: failed_files,
246
+ total_duration: total_duration.round(3),
247
+ avg_file_duration: total_files > 0 ? (total_duration / total_files).round(3) : 0
248
+ })
249
+
250
+ logger.info "🎉 Bulk processing completed for session #{session_id}"
251
+ logger.info "📊 Results: #{processed_count}/#{total_files} successful, #{failed_files.size} failed"
252
+
253
+ rescue => e
254
+ total_duration = Time.current - start_time
255
+
256
+ safe_log_error("bulk_processing_job_failure", e, {
257
+ session_id: session_id,
258
+ total_files: total_files,
259
+ processed_count: processed_count,
260
+ failed_count: failed_files.size,
261
+ total_duration: total_duration.round(3),
262
+ job_id: job_id
263
+ })
264
+
265
+ logger.error "💀 Bulk processing job failed for session #{session_id}: #{e.message}"
266
+ logger.error e.backtrace.join("\n")
267
+
268
+ # Broadcast job failure
269
+ broadcast_status_update(session_id, {
270
+ type: 'upload_error',
271
+ error: e.message,
272
+ status: 'failed',
273
+ processed: processed_count,
274
+ total: total_files,
275
+ failed_at: Time.current.iso8601,
276
+ total_duration: total_duration.round(3)
277
+ })
278
+ end
279
+ end
280
+ end