ragdoll-rails 0.1.8 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -21
- data/app/assets/javascripts/ragdoll/application.js +129 -0
- data/app/assets/javascripts/ragdoll/bulk_upload_status.js +454 -0
- data/app/assets/stylesheets/ragdoll/application.css +84 -0
- data/app/assets/stylesheets/ragdoll/bulk_upload_status.css +379 -0
- data/app/channels/application_cable/channel.rb +6 -0
- data/app/channels/application_cable/connection.rb +6 -0
- data/app/channels/ragdoll/bulk_upload_status_channel.rb +27 -0
- data/app/channels/ragdoll/file_processing_channel.rb +26 -0
- data/app/components/ragdoll/alert_component.html.erb +4 -0
- data/app/components/ragdoll/alert_component.rb +32 -0
- data/app/components/ragdoll/application_component.rb +6 -0
- data/app/components/ragdoll/card_component.html.erb +15 -0
- data/app/components/ragdoll/card_component.rb +21 -0
- data/app/components/ragdoll/document_list_component.html.erb +41 -0
- data/app/components/ragdoll/document_list_component.rb +13 -0
- data/app/components/ragdoll/document_table_component.html.erb +76 -0
- data/app/components/ragdoll/document_table_component.rb +13 -0
- data/app/components/ragdoll/empty_state_component.html.erb +12 -0
- data/app/components/ragdoll/empty_state_component.rb +17 -0
- data/app/components/ragdoll/flash_messages_component.html.erb +3 -0
- data/app/components/ragdoll/flash_messages_component.rb +37 -0
- data/app/components/ragdoll/navbar_component.html.erb +24 -0
- data/app/components/ragdoll/navbar_component.rb +31 -0
- data/app/components/ragdoll/page_header_component.html.erb +13 -0
- data/app/components/ragdoll/page_header_component.rb +15 -0
- data/app/components/ragdoll/stats_card_component.html.erb +11 -0
- data/app/components/ragdoll/stats_card_component.rb +17 -0
- data/app/components/ragdoll/status_badge_component.html.erb +3 -0
- data/app/components/ragdoll/status_badge_component.rb +30 -0
- data/app/controllers/ragdoll/api/v1/analytics_controller.rb +72 -0
- data/app/controllers/ragdoll/api/v1/base_controller.rb +29 -0
- data/app/controllers/ragdoll/api/v1/documents_controller.rb +148 -0
- data/app/controllers/ragdoll/api/v1/search_controller.rb +87 -0
- data/app/controllers/ragdoll/api/v1/system_controller.rb +97 -0
- data/app/controllers/ragdoll/application_controller.rb +17 -0
- data/app/controllers/ragdoll/configuration_controller.rb +82 -0
- data/app/controllers/ragdoll/dashboard_controller.rb +98 -0
- data/app/controllers/ragdoll/documents_controller.rb +460 -0
- data/app/controllers/ragdoll/documents_controller_backup.rb +68 -0
- data/app/controllers/ragdoll/jobs_controller.rb +116 -0
- data/app/controllers/ragdoll/search_controller.rb +368 -0
- data/app/jobs/application_job.rb +9 -0
- data/app/jobs/ragdoll/bulk_document_processing_job.rb +280 -0
- data/app/jobs/ragdoll/process_file_job.rb +166 -0
- data/app/services/ragdoll/worker_health_service.rb +111 -0
- data/app/views/layouts/ragdoll/application.html.erb +162 -0
- data/app/views/ragdoll/dashboard/analytics.html.erb +333 -0
- data/app/views/ragdoll/dashboard/index.html.erb +208 -0
- data/app/views/ragdoll/documents/edit.html.erb +91 -0
- data/app/views/ragdoll/documents/index.html.erb +302 -0
- data/app/views/ragdoll/documents/new.html.erb +1518 -0
- data/app/views/ragdoll/documents/show.html.erb +188 -0
- data/app/views/ragdoll/documents/upload_results.html.erb +248 -0
- data/app/views/ragdoll/jobs/index.html.erb +669 -0
- data/app/views/ragdoll/jobs/show.html.erb +129 -0
- data/app/views/ragdoll/search/index.html.erb +324 -0
- data/config/cable.yml +12 -0
- data/config/routes.rb +57 -2
- data/lib/generators/ragdoll/init/templates/INSTALL +3 -2
- data/lib/generators/ragdoll/init_generator.rb +68 -0
- data/lib/ragdoll/rails/engine.rb +48 -0
- data/lib/ragdoll/rails/version.rb +1 -1
- metadata +231 -6
- data/lib/generators/ragdoll/init/init_generator.rb +0 -26
@@ -0,0 +1,368 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ragdoll
|
4
|
+
class SearchController < ApplicationController
|
5
|
+
skip_before_action :verify_authenticity_token, only: [:search]
|
6
|
+
|
7
|
+
def index
|
8
|
+
# Load popular queries for sidebar
|
9
|
+
@popular_queries = ::Ragdoll::Search.group(:query).count.sort_by { |query, count| -count }.first(10).to_h
|
10
|
+
|
11
|
+
# Check if we're reconstructing a previous search
|
12
|
+
if params[:search_id].present?
|
13
|
+
begin
|
14
|
+
previous_search = ::Ragdoll::Search.find(params[:search_id])
|
15
|
+
@reconstructed_search = previous_search
|
16
|
+
|
17
|
+
# Extract stored form parameters
|
18
|
+
search_options = previous_search.search_options.is_a?(Hash) ? previous_search.search_options :
|
19
|
+
(previous_search.search_options.present? ? JSON.parse(previous_search.search_options) : {})
|
20
|
+
search_filters = previous_search.search_filters.is_a?(Hash) ? previous_search.search_filters :
|
21
|
+
(previous_search.search_filters.present? ? JSON.parse(previous_search.search_filters) : {})
|
22
|
+
|
23
|
+
form_params = search_options.dig('form_params') || {}
|
24
|
+
|
25
|
+
# Reconstruct query and filters from stored search
|
26
|
+
@query = previous_search.query
|
27
|
+
@filters = {
|
28
|
+
document_type: form_params['document_type'] || search_filters['document_type'],
|
29
|
+
status: form_params['status'] || search_filters['status'],
|
30
|
+
limit: form_params['limit'] || search_filters['limit'] || 10,
|
31
|
+
threshold: form_params['threshold'] || search_filters['threshold'] || 0.001
|
32
|
+
}
|
33
|
+
|
34
|
+
# Reconstruct boolean search options
|
35
|
+
@use_similarity_search = form_params['use_similarity_search'] || search_options['use_similarity'] || 'true'
|
36
|
+
@use_fulltext_search = form_params['use_fulltext_search'] || search_options['use_fulltext'] || 'true'
|
37
|
+
|
38
|
+
::Rails.logger.debug "🔍 Reconstructed search from ID #{params[:search_id]}: #{@query}"
|
39
|
+
|
40
|
+
rescue ActiveRecord::RecordNotFound
|
41
|
+
::Rails.logger.warn "🔍 Search ID #{params[:search_id]} not found"
|
42
|
+
# Fall back to default behavior
|
43
|
+
rescue => e
|
44
|
+
::Rails.logger.error "🔍 Error reconstructing search: #{e.message}"
|
45
|
+
# Fall back to default behavior
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Default values if not reconstructing a search
|
50
|
+
unless @reconstructed_search
|
51
|
+
@filters = {
|
52
|
+
document_type: params[:document_type],
|
53
|
+
status: params[:status],
|
54
|
+
limit: params[:limit]&.to_i || 10,
|
55
|
+
threshold: params[:threshold]&.to_f || (::Rails.env.development? ? 0.001 : 0.7)
|
56
|
+
}
|
57
|
+
@query = params[:query]
|
58
|
+
@use_similarity_search = params[:use_similarity_search] || 'true'
|
59
|
+
@use_fulltext_search = params[:use_fulltext_search] || 'true'
|
60
|
+
end
|
61
|
+
|
62
|
+
@search_performed = false
|
63
|
+
end
|
64
|
+
|
65
|
+
def search
|
66
|
+
::Rails.logger.debug "🔍 Search called with params: #{params.inspect}"
|
67
|
+
::Rails.logger.debug "🔍 Use similarity search: #{params[:use_similarity_search]}"
|
68
|
+
::Rails.logger.debug "🔍 Use fulltext search: #{params[:use_fulltext_search]}"
|
69
|
+
@query = params[:query]
|
70
|
+
@filters = {
|
71
|
+
document_type: params[:document_type],
|
72
|
+
status: params[:status],
|
73
|
+
limit: params[:limit]&.to_i || 10,
|
74
|
+
threshold: params[:threshold]&.to_f || (::Rails.env.development? ? 0.001 : 0.7) # Much lower threshold for development
|
75
|
+
}
|
76
|
+
::Rails.logger.debug "🔍 Query: #{@query.inspect}, Filters: #{@filters.inspect}"
|
77
|
+
|
78
|
+
# Initialize data needed for the view sidebar - load popular queries
|
79
|
+
@popular_queries = ::Ragdoll::Search.group(:query).count.sort_by { |query, count| -count }.first(10).to_h
|
80
|
+
|
81
|
+
if @query.present?
|
82
|
+
begin
|
83
|
+
# Check which search types are enabled (default to both if neither param is set)
|
84
|
+
use_similarity = params[:use_similarity_search] != 'false'
|
85
|
+
use_fulltext = params[:use_fulltext_search] != 'false'
|
86
|
+
|
87
|
+
@detailed_results = []
|
88
|
+
@below_threshold_results = []
|
89
|
+
@similarity_search_attempted = false
|
90
|
+
@similarity_threshold_used = @filters[:threshold]
|
91
|
+
|
92
|
+
# Perform similarity search if enabled
|
93
|
+
if use_similarity
|
94
|
+
begin
|
95
|
+
search_params = {
|
96
|
+
query: @query,
|
97
|
+
limit: @filters[:limit],
|
98
|
+
threshold: @filters[:threshold]
|
99
|
+
}
|
100
|
+
|
101
|
+
# Add document type filter if specified
|
102
|
+
if @filters[:document_type].present?
|
103
|
+
search_params[:document_type] = @filters[:document_type]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Add status filter if specified
|
107
|
+
if @filters[:status].present?
|
108
|
+
search_params[:status] = @filters[:status]
|
109
|
+
end
|
110
|
+
|
111
|
+
search_response = ::Ragdoll.search(**search_params.merge(track_search: false))
|
112
|
+
|
113
|
+
# The search returns a hash with :results and :statistics
|
114
|
+
@results = search_response.is_a?(Hash) ? search_response[:results] || [] : []
|
115
|
+
@similarity_stats = search_response.is_a?(Hash) ? search_response[:statistics] || {} : {}
|
116
|
+
|
117
|
+
# Add similarity search results
|
118
|
+
@results.each do |result|
|
119
|
+
if result[:embedding_id] && result[:document_id]
|
120
|
+
embedding = ::Ragdoll::Embedding.find(result[:embedding_id])
|
121
|
+
document = ::Ragdoll::Document.find(result[:document_id])
|
122
|
+
@detailed_results << {
|
123
|
+
embedding: embedding,
|
124
|
+
document: document,
|
125
|
+
similarity: result[:similarity],
|
126
|
+
content: result[:content],
|
127
|
+
usage_count: embedding.usage_count,
|
128
|
+
last_used: embedding.returned_at,
|
129
|
+
search_type: 'similarity'
|
130
|
+
}
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Mark that similarity search was attempted
|
135
|
+
@similarity_search_attempted = true
|
136
|
+
|
137
|
+
# Always gather statistics about all possible matches when similarity search returns limited results
|
138
|
+
similarity_results_count = @detailed_results.select { |r| r[:search_type] == 'similarity' }.count
|
139
|
+
::Rails.logger.debug "🔍 Similarity results found: #{similarity_results_count}"
|
140
|
+
|
141
|
+
# Gather statistics if we have no results OR if the threshold is relatively high (> 0.1)
|
142
|
+
# This ensures we provide helpful feedback even when the search succeeds with a lower threshold
|
143
|
+
should_gather_stats = similarity_results_count == 0 || @filters[:threshold] > 0.1
|
144
|
+
::Rails.logger.debug "🔍 Should gather stats: #{should_gather_stats} (results: #{similarity_results_count}, threshold: #{@filters[:threshold]})"
|
145
|
+
|
146
|
+
if should_gather_stats
|
147
|
+
::Rails.logger.debug "🔍 Gathering below-threshold statistics..."
|
148
|
+
begin
|
149
|
+
# Search again with minimal threshold to get all potential matches
|
150
|
+
stats_params = search_params.merge(threshold: 0.0, limit: 100)
|
151
|
+
stats_response = ::Ragdoll.search(**stats_params)
|
152
|
+
|
153
|
+
::Rails.logger.debug "🔍 Stats response: #{stats_response.inspect}"
|
154
|
+
|
155
|
+
if stats_response.is_a?(Hash) && stats_response[:results]
|
156
|
+
all_similarities = []
|
157
|
+
stats_response[:results].each do |result|
|
158
|
+
if result[:similarity]
|
159
|
+
all_similarities << result[:similarity]
|
160
|
+
# Store below-threshold results
|
161
|
+
if result[:similarity] < @filters[:threshold] && result[:similarity] > 0
|
162
|
+
@below_threshold_results << {
|
163
|
+
document_id: result[:document_id],
|
164
|
+
similarity: result[:similarity],
|
165
|
+
content: result[:content]
|
166
|
+
}
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
::Rails.logger.debug "🔍 All similarities collected: #{all_similarities.inspect}"
|
172
|
+
::Rails.logger.debug "🔍 Threshold: #{@filters[:threshold]}"
|
173
|
+
|
174
|
+
# Calculate statistics for display
|
175
|
+
if all_similarities.any?
|
176
|
+
below_threshold_count = all_similarities.count { |s| s < @filters[:threshold] && s > 0 }
|
177
|
+
@below_threshold_stats = {
|
178
|
+
count: below_threshold_count,
|
179
|
+
highest: all_similarities.max,
|
180
|
+
lowest: all_similarities.select { |s| s > 0 }.min,
|
181
|
+
average: all_similarities.sum / all_similarities.size.to_f,
|
182
|
+
suggested_threshold: all_similarities.select { |s| s > 0 }.min.round(3)
|
183
|
+
}
|
184
|
+
::Rails.logger.debug "🔍 Below threshold stats: #{@below_threshold_stats.inspect}"
|
185
|
+
else
|
186
|
+
::Rails.logger.debug "🔍 No similarities found in stats response"
|
187
|
+
end
|
188
|
+
else
|
189
|
+
::Rails.logger.debug "🔍 Stats response was not in expected format or had no results"
|
190
|
+
end
|
191
|
+
rescue => stats_error
|
192
|
+
::Rails.logger.error "Stats gathering error: #{stats_error.message}"
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
rescue => e
|
197
|
+
::Rails.logger.error "Similarity search error: #{e.message}"
|
198
|
+
# Continue with fulltext search even if similarity search fails
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# Perform full-text search if enabled
|
203
|
+
if use_fulltext
|
204
|
+
fulltext_params = {
|
205
|
+
limit: @filters[:limit],
|
206
|
+
threshold: @filters[:threshold]
|
207
|
+
}
|
208
|
+
|
209
|
+
# Add document type filter if specified
|
210
|
+
if @filters[:document_type].present?
|
211
|
+
fulltext_params[:document_type] = @filters[:document_type]
|
212
|
+
end
|
213
|
+
|
214
|
+
# Add status filter if specified
|
215
|
+
if @filters[:status].present?
|
216
|
+
fulltext_params[:status] = @filters[:status]
|
217
|
+
end
|
218
|
+
|
219
|
+
fulltext_results = ::Ragdoll::Document.search_content(@query, **fulltext_params)
|
220
|
+
|
221
|
+
# Collect fulltext similarities for statistics
|
222
|
+
fulltext_similarities = []
|
223
|
+
fulltext_results.each do |document|
|
224
|
+
# Avoid duplicates if document was already found in similarity search
|
225
|
+
unless @detailed_results.any? { |r| r[:document].id == document.id }
|
226
|
+
# Use the fulltext_similarity score from the enhanced search
|
227
|
+
fulltext_similarity = document.respond_to?(:fulltext_similarity) ? document.fulltext_similarity.to_f : 0.0
|
228
|
+
fulltext_similarities << fulltext_similarity if fulltext_similarity > 0
|
229
|
+
|
230
|
+
@detailed_results << {
|
231
|
+
document: document,
|
232
|
+
content: document.metadata&.dig('summary') || document.title || "No summary available",
|
233
|
+
search_type: 'fulltext',
|
234
|
+
similarity: fulltext_similarity
|
235
|
+
}
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
# Gather fulltext statistics if we have few results OR if threshold is high (> 0.1)
|
240
|
+
# This ensures consistent feedback regardless of which search types are enabled
|
241
|
+
fulltext_results_count = @detailed_results.select { |r| r[:search_type] == 'fulltext' }.count
|
242
|
+
should_gather_fulltext_stats = fulltext_results_count == 0 || @filters[:threshold] > 0.1
|
243
|
+
|
244
|
+
if should_gather_fulltext_stats && !@below_threshold_stats
|
245
|
+
::Rails.logger.debug "🔍 Gathering fulltext below-threshold statistics..."
|
246
|
+
begin
|
247
|
+
# Search again with lower threshold to get all potential matches
|
248
|
+
stats_params = fulltext_params.merge(threshold: 0.0, limit: 100)
|
249
|
+
all_fulltext_results = ::Ragdoll::Document.search_content(@query, **stats_params)
|
250
|
+
|
251
|
+
all_fulltext_similarities = []
|
252
|
+
all_fulltext_results.each do |document|
|
253
|
+
similarity = document.respond_to?(:fulltext_similarity) ? document.fulltext_similarity.to_f : 0.0
|
254
|
+
if similarity > 0
|
255
|
+
all_fulltext_similarities << similarity
|
256
|
+
# Store below-threshold results
|
257
|
+
if similarity < @filters[:threshold]
|
258
|
+
@below_threshold_results << {
|
259
|
+
document_id: document.id,
|
260
|
+
similarity: similarity,
|
261
|
+
content: document.metadata&.dig('summary') || document.title || "No summary available"
|
262
|
+
}
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
::Rails.logger.debug "🔍 Fulltext similarities collected: #{all_fulltext_similarities.inspect}"
|
268
|
+
::Rails.logger.debug "🔍 Threshold: #{@filters[:threshold]}"
|
269
|
+
|
270
|
+
# Calculate statistics for display
|
271
|
+
if all_fulltext_similarities.any?
|
272
|
+
below_threshold_count = all_fulltext_similarities.count { |s| s < @filters[:threshold] && s > 0 }
|
273
|
+
@below_threshold_stats = {
|
274
|
+
count: below_threshold_count,
|
275
|
+
highest: all_fulltext_similarities.max,
|
276
|
+
lowest: all_fulltext_similarities.select { |s| s > 0 }.min,
|
277
|
+
average: all_fulltext_similarities.sum / all_fulltext_similarities.size.to_f,
|
278
|
+
suggested_threshold: all_fulltext_similarities.select { |s| s > 0 }.min.round(3)
|
279
|
+
}
|
280
|
+
::Rails.logger.debug "🔍 Fulltext below threshold stats: #{@below_threshold_stats.inspect}"
|
281
|
+
else
|
282
|
+
::Rails.logger.debug "🔍 No fulltext similarities found in stats response"
|
283
|
+
end
|
284
|
+
rescue => stats_error
|
285
|
+
::Rails.logger.error "Fulltext stats gathering error: #{stats_error.message}"
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
# Sort results by similarity score if available, otherwise by relevance
|
291
|
+
@detailed_results.sort_by! { |r| r[:similarity] ? -r[:similarity] : 0 }
|
292
|
+
|
293
|
+
# Save search for analytics
|
294
|
+
search_type = case
|
295
|
+
when use_similarity && use_fulltext then 'hybrid'
|
296
|
+
when use_similarity then 'similarity'
|
297
|
+
when use_fulltext then 'fulltext'
|
298
|
+
else 'unknown'
|
299
|
+
end
|
300
|
+
|
301
|
+
similarity_results = @detailed_results.select { |r| r[:search_type] == 'similarity' }
|
302
|
+
similarities = similarity_results.map { |r| r[:similarity] }.compact
|
303
|
+
|
304
|
+
# Save search for analytics without query embedding (which is optional)
|
305
|
+
begin
|
306
|
+
::Ragdoll::Search.create!(
|
307
|
+
query: @query,
|
308
|
+
search_type: search_type,
|
309
|
+
results_count: @detailed_results.count,
|
310
|
+
max_similarity_score: similarities.any? ? similarities.max : nil,
|
311
|
+
min_similarity_score: similarities.any? ? similarities.min : nil,
|
312
|
+
avg_similarity_score: similarities.any? ? (similarities.sum / similarities.size.to_f) : nil,
|
313
|
+
search_filters: @filters.to_json,
|
314
|
+
search_options: {
|
315
|
+
threshold_used: @filters[:threshold],
|
316
|
+
similarity_results: similarity_results.count,
|
317
|
+
fulltext_results: @detailed_results.select { |r| r[:search_type] == 'fulltext' }.count,
|
318
|
+
use_similarity: use_similarity,
|
319
|
+
use_fulltext: use_fulltext,
|
320
|
+
# Store original form parameters for reconstruction
|
321
|
+
form_params: {
|
322
|
+
use_similarity_search: params[:use_similarity_search],
|
323
|
+
use_fulltext_search: params[:use_fulltext_search],
|
324
|
+
limit: @filters[:limit],
|
325
|
+
threshold: @filters[:threshold],
|
326
|
+
document_type: @filters[:document_type],
|
327
|
+
status: @filters[:status]
|
328
|
+
}
|
329
|
+
}.to_json
|
330
|
+
)
|
331
|
+
::Rails.logger.debug "🔍 Search saved successfully"
|
332
|
+
rescue => e
|
333
|
+
::Rails.logger.error "🔍 Failed to save search: #{e.message}"
|
334
|
+
# Continue without failing the search
|
335
|
+
end
|
336
|
+
|
337
|
+
::Rails.logger.debug "🔍 Search completed successfully. Results count: #{@detailed_results.count}"
|
338
|
+
::Rails.logger.debug "🔍 Similarity search attempted: #{@similarity_search_attempted}"
|
339
|
+
::Rails.logger.debug "🔍 Below threshold stats: #{@below_threshold_stats.inspect}"
|
340
|
+
::Rails.logger.debug "🔍 Threshold used: #{@similarity_threshold_used}"
|
341
|
+
@search_performed = true
|
342
|
+
|
343
|
+
rescue => e
|
344
|
+
::Rails.logger.error "🔍 Search error: #{e.message}"
|
345
|
+
::Rails.logger.error e.backtrace.join("\n")
|
346
|
+
@error = e.message
|
347
|
+
@search_performed = false
|
348
|
+
end
|
349
|
+
else
|
350
|
+
@search_performed = false
|
351
|
+
end
|
352
|
+
|
353
|
+
respond_to do |format|
|
354
|
+
format.html { render :index }
|
355
|
+
format.json {
|
356
|
+
json_response = { results: @detailed_results, error: @error }
|
357
|
+
if @similarity_search_attempted && @similarity_stats
|
358
|
+
json_response[:similarity_statistics] = {
|
359
|
+
threshold_used: @similarity_threshold_used,
|
360
|
+
stats: @similarity_stats
|
361
|
+
}
|
362
|
+
end
|
363
|
+
render json: json_response
|
364
|
+
}
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ApplicationJob < ActiveJob::Base
|
4
|
+
# Automatically retry jobs that encountered a deadlock
|
5
|
+
# retry_on ActiveRecord::Deadlocked
|
6
|
+
|
7
|
+
# Most jobs are safe to ignore if the underlying records are no longer available
|
8
|
+
# discard_on ActiveJob::DeserializationError
|
9
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ragdoll
|
4
|
+
class BulkDocumentProcessingJob < ApplicationJob
|
5
|
+
queue_as :default
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def broadcast_status_update(session_id, data)
|
10
|
+
ActionCable.server.broadcast("bulk_upload_status_#{session_id}", data)
|
11
|
+
rescue => e
|
12
|
+
logger.error "Failed to broadcast status update: #{e.message}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def safe_log_operation(operation, details = {})
|
16
|
+
return unless defined?(RagdollLogging)
|
17
|
+
RagdollLogging.log_operation(operation, details)
|
18
|
+
rescue => e
|
19
|
+
logger.debug "Failed to log operation #{operation}: #{e.message}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def safe_log_error(operation, error, details = {})
|
23
|
+
return unless defined?(RagdollLogging)
|
24
|
+
RagdollLogging.log_error(operation, error, details)
|
25
|
+
rescue => e
|
26
|
+
logger.debug "Failed to log error #{operation}: #{e.message}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def safe_log_performance(operation, duration, details = {})
|
30
|
+
return unless defined?(RagdollLogging)
|
31
|
+
RagdollLogging.log_performance(operation, duration, details)
|
32
|
+
rescue => e
|
33
|
+
logger.debug "Failed to log performance #{operation}: #{e.message}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def perform(session_id, file_paths_data, force_duplicate = false)
|
37
|
+
start_time = Time.current
|
38
|
+
|
39
|
+
# Initialize variables early to avoid nil errors in rescue block
|
40
|
+
total_files = file_paths_data&.size || 0
|
41
|
+
processed_count = 0
|
42
|
+
failed_files = []
|
43
|
+
|
44
|
+
safe_log_operation("bulk_processing_start", {
|
45
|
+
session_id: session_id,
|
46
|
+
file_count: total_files,
|
47
|
+
force_duplicate: force_duplicate,
|
48
|
+
job_id: job_id
|
49
|
+
})
|
50
|
+
|
51
|
+
logger.info "🚀 Starting bulk document processing job for session #{session_id}"
|
52
|
+
logger.info "📁 Processing #{total_files} files"
|
53
|
+
|
54
|
+
# Early return if no files to process
|
55
|
+
if file_paths_data.nil? || file_paths_data.empty?
|
56
|
+
logger.warn "⚠️ No files provided for processing in session #{session_id}"
|
57
|
+
broadcast_status_update(session_id, {
|
58
|
+
type: 'upload_error',
|
59
|
+
error: 'No files provided for processing',
|
60
|
+
status: 'failed'
|
61
|
+
})
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
# Broadcast upload start
|
66
|
+
broadcast_status_update(session_id, {
|
67
|
+
type: 'upload_start',
|
68
|
+
total_files: total_files,
|
69
|
+
status: 'processing',
|
70
|
+
started_at: Time.current.iso8601
|
71
|
+
})
|
72
|
+
|
73
|
+
batch_size = 10 # Process 10 files at a time for async jobs
|
74
|
+
|
75
|
+
file_paths_data.each_slice(batch_size).with_index do |file_batch, batch_index|
|
76
|
+
logger.info "📦 Processing batch #{batch_index + 1} of #{(total_files.to_f / batch_size).ceil}"
|
77
|
+
|
78
|
+
file_batch.each do |file_data|
|
79
|
+
file_start_time = Time.current
|
80
|
+
|
81
|
+
begin
|
82
|
+
temp_path = file_data[:temp_path]
|
83
|
+
original_filename = file_data[:original_filename]
|
84
|
+
|
85
|
+
safe_log_operation("file_processing_start", {
|
86
|
+
session_id: session_id,
|
87
|
+
filename: original_filename,
|
88
|
+
temp_path: temp_path,
|
89
|
+
file_exists: File.exist?(temp_path),
|
90
|
+
file_size: File.exist?(temp_path) ? File.size(temp_path) : 0
|
91
|
+
})
|
92
|
+
|
93
|
+
unless File.exist?(temp_path)
|
94
|
+
error_msg = "Temporary file not found: #{temp_path}"
|
95
|
+
safe_log_error("file_processing", StandardError.new(error_msg), {
|
96
|
+
session_id: session_id,
|
97
|
+
filename: original_filename,
|
98
|
+
temp_path: temp_path
|
99
|
+
})
|
100
|
+
next
|
101
|
+
end
|
102
|
+
|
103
|
+
logger.info "🔄 Processing file: #{original_filename}"
|
104
|
+
|
105
|
+
# Broadcast file start
|
106
|
+
progress_percentage = ((processed_count.to_f / total_files) * 100).round(1)
|
107
|
+
broadcast_status_update(session_id, {
|
108
|
+
type: 'file_start',
|
109
|
+
filename: original_filename,
|
110
|
+
processed: processed_count,
|
111
|
+
total: total_files,
|
112
|
+
percentage: progress_percentage,
|
113
|
+
status: 'processing',
|
114
|
+
batch_index: batch_index + 1,
|
115
|
+
total_batches: (total_files.to_f / batch_size).ceil
|
116
|
+
})
|
117
|
+
|
118
|
+
# Process the document
|
119
|
+
ragdoll_start_time = Time.current
|
120
|
+
result = ::Ragdoll.add_document(path: temp_path, force: force_duplicate)
|
121
|
+
ragdoll_duration = Time.current - ragdoll_start_time
|
122
|
+
|
123
|
+
safe_log_performance("ragdoll_add_document", ragdoll_duration, {
|
124
|
+
session_id: session_id,
|
125
|
+
filename: original_filename,
|
126
|
+
result_success: result && result[:success],
|
127
|
+
force_duplicate: force_duplicate
|
128
|
+
})
|
129
|
+
|
130
|
+
if result && result[:success]
|
131
|
+
processed_count += 1
|
132
|
+
file_duration = Time.current - file_start_time
|
133
|
+
|
134
|
+
safe_log_operation("file_processing_success", {
|
135
|
+
session_id: session_id,
|
136
|
+
filename: original_filename,
|
137
|
+
document_id: result[:document_id],
|
138
|
+
processing_duration: file_duration.round(3),
|
139
|
+
processed_count: processed_count,
|
140
|
+
total_files: total_files
|
141
|
+
})
|
142
|
+
|
143
|
+
logger.info "✅ Successfully processed: #{original_filename}"
|
144
|
+
|
145
|
+
# Broadcast success
|
146
|
+
broadcast_status_update(session_id, {
|
147
|
+
type: 'file_complete',
|
148
|
+
filename: original_filename,
|
149
|
+
processed: processed_count,
|
150
|
+
total: total_files,
|
151
|
+
percentage: ((processed_count.to_f / total_files) * 100).round(1),
|
152
|
+
status: 'completed',
|
153
|
+
document_id: result[:document_id],
|
154
|
+
processing_time: file_duration.round(3)
|
155
|
+
})
|
156
|
+
else
|
157
|
+
failed_files << original_filename
|
158
|
+
error_message = result ? result[:error] : 'Unknown error'
|
159
|
+
file_duration = Time.current - file_start_time
|
160
|
+
|
161
|
+
safe_log_error("file_processing", StandardError.new(error_message), {
|
162
|
+
session_id: session_id,
|
163
|
+
filename: original_filename,
|
164
|
+
processing_duration: file_duration.round(3),
|
165
|
+
ragdoll_result: result,
|
166
|
+
temp_path: temp_path,
|
167
|
+
file_size: File.size(temp_path)
|
168
|
+
})
|
169
|
+
|
170
|
+
logger.error "❌ Failed to process: #{original_filename} - #{error_message}"
|
171
|
+
|
172
|
+
# Broadcast error
|
173
|
+
broadcast_status_update(session_id, {
|
174
|
+
type: 'file_error',
|
175
|
+
filename: original_filename,
|
176
|
+
processed: processed_count,
|
177
|
+
total: total_files,
|
178
|
+
percentage: ((processed_count.to_f / total_files) * 100).round(1),
|
179
|
+
status: 'failed',
|
180
|
+
error: error_message,
|
181
|
+
processing_time: file_duration.round(3)
|
182
|
+
})
|
183
|
+
end
|
184
|
+
|
185
|
+
# Clean up temp file
|
186
|
+
File.delete(temp_path) if File.exist?(temp_path)
|
187
|
+
|
188
|
+
rescue => e
|
189
|
+
failed_files << (file_data[:original_filename] || 'unknown file')
|
190
|
+
file_duration = Time.current - file_start_time
|
191
|
+
|
192
|
+
safe_log_error("file_processing_exception", e, {
|
193
|
+
session_id: session_id,
|
194
|
+
filename: file_data[:original_filename],
|
195
|
+
temp_path: file_data[:temp_path],
|
196
|
+
processing_duration: file_duration.round(3),
|
197
|
+
file_data: file_data,
|
198
|
+
processed_count: processed_count,
|
199
|
+
total_files: total_files
|
200
|
+
})
|
201
|
+
|
202
|
+
logger.error "💥 Exception processing file #{file_data[:original_filename]}: #{e.message}"
|
203
|
+
logger.error e.backtrace.join("\n")
|
204
|
+
|
205
|
+
# Broadcast error
|
206
|
+
ActionCable.server.broadcast("ragdoll_file_processing_#{session_id}", {
|
207
|
+
type: 'file_error',
|
208
|
+
filename: file_data[:original_filename],
|
209
|
+
processed: processed_count,
|
210
|
+
total: total_files,
|
211
|
+
percentage: ((processed_count.to_f / total_files) * 100).round(1),
|
212
|
+
status: 'failed',
|
213
|
+
error: e.message
|
214
|
+
})
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Force garbage collection after each batch
|
219
|
+
GC.start
|
220
|
+
|
221
|
+
# Small delay between batches to prevent overwhelming the system
|
222
|
+
sleep(0.1)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Broadcast final completion
|
226
|
+
total_duration = Time.current - start_time
|
227
|
+
final_percentage = 100.0
|
228
|
+
broadcast_status_update(session_id, {
|
229
|
+
type: 'upload_complete',
|
230
|
+
processed: processed_count,
|
231
|
+
total: total_files,
|
232
|
+
failed: failed_files.size,
|
233
|
+
failed_files: failed_files,
|
234
|
+
percentage: final_percentage,
|
235
|
+
status: 'completed',
|
236
|
+
total_duration: total_duration.round(3),
|
237
|
+
completed_at: Time.current.iso8601
|
238
|
+
})
|
239
|
+
|
240
|
+
safe_log_operation("bulk_processing_complete", {
|
241
|
+
session_id: session_id,
|
242
|
+
total_files: total_files,
|
243
|
+
processed_count: processed_count,
|
244
|
+
failed_count: failed_files.size,
|
245
|
+
failed_files: failed_files,
|
246
|
+
total_duration: total_duration.round(3),
|
247
|
+
avg_file_duration: total_files > 0 ? (total_duration / total_files).round(3) : 0
|
248
|
+
})
|
249
|
+
|
250
|
+
logger.info "🎉 Bulk processing completed for session #{session_id}"
|
251
|
+
logger.info "📊 Results: #{processed_count}/#{total_files} successful, #{failed_files.size} failed"
|
252
|
+
|
253
|
+
rescue => e
|
254
|
+
total_duration = Time.current - start_time
|
255
|
+
|
256
|
+
safe_log_error("bulk_processing_job_failure", e, {
|
257
|
+
session_id: session_id,
|
258
|
+
total_files: total_files,
|
259
|
+
processed_count: processed_count,
|
260
|
+
failed_count: failed_files.size,
|
261
|
+
total_duration: total_duration.round(3),
|
262
|
+
job_id: job_id
|
263
|
+
})
|
264
|
+
|
265
|
+
logger.error "💀 Bulk processing job failed for session #{session_id}: #{e.message}"
|
266
|
+
logger.error e.backtrace.join("\n")
|
267
|
+
|
268
|
+
# Broadcast job failure
|
269
|
+
broadcast_status_update(session_id, {
|
270
|
+
type: 'upload_error',
|
271
|
+
error: e.message,
|
272
|
+
status: 'failed',
|
273
|
+
processed: processed_count,
|
274
|
+
total: total_files,
|
275
|
+
failed_at: Time.current.iso8601,
|
276
|
+
total_duration: total_duration.round(3)
|
277
|
+
})
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|