ragdoll-rails 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/ragdoll/application.js +129 -0
  3. data/app/assets/javascripts/ragdoll/bulk_upload_status.js +454 -0
  4. data/app/assets/stylesheets/ragdoll/application.css +84 -0
  5. data/app/assets/stylesheets/ragdoll/bulk_upload_status.css +379 -0
  6. data/app/channels/application_cable/channel.rb +6 -0
  7. data/app/channels/application_cable/connection.rb +6 -0
  8. data/app/channels/ragdoll/bulk_upload_status_channel.rb +27 -0
  9. data/app/channels/ragdoll/file_processing_channel.rb +26 -0
  10. data/app/components/ragdoll/alert_component.html.erb +4 -0
  11. data/app/components/ragdoll/alert_component.rb +32 -0
  12. data/app/components/ragdoll/application_component.rb +6 -0
  13. data/app/components/ragdoll/card_component.html.erb +15 -0
  14. data/app/components/ragdoll/card_component.rb +21 -0
  15. data/app/components/ragdoll/document_list_component.html.erb +41 -0
  16. data/app/components/ragdoll/document_list_component.rb +13 -0
  17. data/app/components/ragdoll/document_table_component.html.erb +76 -0
  18. data/app/components/ragdoll/document_table_component.rb +13 -0
  19. data/app/components/ragdoll/empty_state_component.html.erb +12 -0
  20. data/app/components/ragdoll/empty_state_component.rb +17 -0
  21. data/app/components/ragdoll/flash_messages_component.html.erb +3 -0
  22. data/app/components/ragdoll/flash_messages_component.rb +37 -0
  23. data/app/components/ragdoll/navbar_component.html.erb +24 -0
  24. data/app/components/ragdoll/navbar_component.rb +31 -0
  25. data/app/components/ragdoll/page_header_component.html.erb +13 -0
  26. data/app/components/ragdoll/page_header_component.rb +15 -0
  27. data/app/components/ragdoll/stats_card_component.html.erb +11 -0
  28. data/app/components/ragdoll/stats_card_component.rb +17 -0
  29. data/app/components/ragdoll/status_badge_component.html.erb +3 -0
  30. data/app/components/ragdoll/status_badge_component.rb +30 -0
  31. data/app/controllers/ragdoll/api/v1/analytics_controller.rb +72 -0
  32. data/app/controllers/ragdoll/api/v1/base_controller.rb +29 -0
  33. data/app/controllers/ragdoll/api/v1/documents_controller.rb +148 -0
  34. data/app/controllers/ragdoll/api/v1/search_controller.rb +87 -0
  35. data/app/controllers/ragdoll/api/v1/system_controller.rb +97 -0
  36. data/app/controllers/ragdoll/application_controller.rb +17 -0
  37. data/app/controllers/ragdoll/configuration_controller.rb +82 -0
  38. data/app/controllers/ragdoll/dashboard_controller.rb +98 -0
  39. data/app/controllers/ragdoll/documents_controller.rb +460 -0
  40. data/app/controllers/ragdoll/documents_controller_backup.rb +68 -0
  41. data/app/controllers/ragdoll/jobs_controller.rb +116 -0
  42. data/app/controllers/ragdoll/search_controller.rb +368 -0
  43. data/app/jobs/application_job.rb +9 -0
  44. data/app/jobs/ragdoll/bulk_document_processing_job.rb +280 -0
  45. data/app/jobs/ragdoll/process_file_job.rb +166 -0
  46. data/app/services/ragdoll/worker_health_service.rb +111 -0
  47. data/app/views/layouts/ragdoll/application.html.erb +162 -0
  48. data/app/views/ragdoll/dashboard/analytics.html.erb +333 -0
  49. data/app/views/ragdoll/dashboard/index.html.erb +208 -0
  50. data/app/views/ragdoll/documents/edit.html.erb +91 -0
  51. data/app/views/ragdoll/documents/index.html.erb +302 -0
  52. data/app/views/ragdoll/documents/new.html.erb +1518 -0
  53. data/app/views/ragdoll/documents/show.html.erb +188 -0
  54. data/app/views/ragdoll/documents/upload_results.html.erb +248 -0
  55. data/app/views/ragdoll/jobs/index.html.erb +669 -0
  56. data/app/views/ragdoll/jobs/show.html.erb +129 -0
  57. data/app/views/ragdoll/search/index.html.erb +324 -0
  58. data/config/cable.yml +12 -0
  59. data/config/routes.rb +56 -1
  60. data/lib/ragdoll/rails/engine.rb +32 -1
  61. data/lib/ragdoll/rails/version.rb +1 -1
  62. metadata +86 -1
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ class ConfigurationController < ApplicationController
5
+ def index
6
+ @configuration = ::Ragdoll.configuration
7
+ @available_providers = %w[openai anthropic google azure ollama huggingface]
8
+ @available_models = {
9
+ openai: ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'],
10
+ anthropic: ['claude-3-haiku-20240307', 'claude-3-sonnet-20240229', 'claude-3-opus-20240229'],
11
+ google: ['gemini-pro', 'gemini-1.5-flash', 'gemini-1.5-pro'],
12
+ azure: ['text-embedding-3-small', 'text-embedding-3-large'],
13
+ ollama: ['llama2', 'mistral', 'codellama'],
14
+ huggingface: ['sentence-transformers/all-MiniLM-L6-v2', 'sentence-transformers/all-mpnet-base-v2']
15
+ }
16
+
17
+ @current_stats = {
18
+ total_documents: ::Ragdoll::Document.count,
19
+ total_embeddings: ::Ragdoll::Embedding.count,
20
+ embedding_dimensions: ::Ragdoll::Embedding.first&.embedding_dimensions || 0,
21
+ average_chunk_size: ::Ragdoll::Embedding.average('LENGTH(content)')&.round || 0
22
+ }
23
+ end
24
+
25
+ def update
26
+ config_params = params.require(:configuration).permit(
27
+ :llm_provider,
28
+ :embedding_provider,
29
+ :embedding_model,
30
+ :chunk_size,
31
+ :chunk_overlap,
32
+ :max_search_results,
33
+ :search_similarity_threshold,
34
+ :enable_search_analytics,
35
+ :enable_document_summarization,
36
+ :enable_usage_tracking,
37
+ :usage_ranking_enabled,
38
+ :openai_api_key,
39
+ :anthropic_api_key,
40
+ :google_api_key,
41
+ :azure_api_key,
42
+ :ollama_url,
43
+ :huggingface_api_key
44
+ )
45
+
46
+ begin
47
+ # Update configuration
48
+ config = ::Ragdoll.configuration
49
+
50
+ config_params.each do |key, value|
51
+ # Convert string values to appropriate types
52
+ case key
53
+ when 'chunk_size', 'chunk_overlap', 'max_search_results'
54
+ config.send("#{key}=", value.to_i)
55
+ when 'search_similarity_threshold'
56
+ config.send("#{key}=", value.to_f)
57
+ when 'enable_search_analytics', 'enable_document_summarization', 'enable_usage_tracking', 'usage_ranking_enabled'
58
+ config.send("#{key}=", value == '1' || value == 'true')
59
+ else
60
+ config.send("#{key}=", value) if value.present?
61
+ end
62
+ end
63
+
64
+ flash[:notice] = 'Configuration updated successfully.'
65
+
66
+ # Test the configuration
67
+ begin
68
+ client = ::Ragdoll::Client.new
69
+ test_result = client.stats
70
+ flash[:notice] += " Configuration test successful."
71
+ rescue => e
72
+ flash[:warning] = "Configuration saved but test failed: #{e.message}"
73
+ end
74
+
75
+ rescue => e
76
+ flash[:alert] = "Error updating configuration: #{e.message}"
77
+ end
78
+
79
+ redirect_to ragdoll.configuration_path
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ class DashboardController < ApplicationController
5
+ def index
6
+ @stats = {
7
+ total_documents: ::Ragdoll::Document.count,
8
+ processed_documents: ::Ragdoll::Document.where(status: 'processed').count,
9
+ failed_documents: ::Ragdoll::Document.where(status: 'failed').count,
10
+ pending_documents: ::Ragdoll::Document.where(status: 'pending').count,
11
+ total_embeddings: ::Ragdoll::Embedding.count,
12
+ total_searches: ::Ragdoll::Search.count,
13
+ recent_searches: ::Ragdoll::Search.order(created_at: :desc).limit(5)
14
+ }
15
+
16
+ @document_types = ::Ragdoll::Document.group(:document_type).count
17
+ @recent_documents = ::Ragdoll::Document.order(created_at: :desc).limit(10)
18
+
19
+ # Usage analytics - join through embeddable (Content) to get to documents
20
+ @top_searched_documents = ::Ragdoll::Embedding
21
+ .joins("JOIN ragdoll_contents ON ragdoll_contents.id = ragdoll_embeddings.embeddable_id")
22
+ .joins("JOIN ragdoll_documents ON ragdoll_documents.id = ragdoll_contents.document_id")
23
+ .group('ragdoll_documents.title')
24
+ .order(Arel.sql('SUM(ragdoll_embeddings.usage_count) DESC'))
25
+ .limit(5)
26
+ .sum(:usage_count)
27
+ end
28
+
29
+ def analytics
30
+ today = Date.current
31
+ week_start = today.beginning_of_week
32
+ month_start = today.beginning_of_month
33
+
34
+ # Calculate search statistics
35
+ all_searches = ::Ragdoll::Search.all
36
+ searches_today = all_searches.where(created_at: today.beginning_of_day..today.end_of_day)
37
+ searches_this_week = all_searches.where(created_at: week_start.beginning_of_day..today.end_of_day)
38
+ searches_this_month = all_searches.where(created_at: month_start.beginning_of_day..today.end_of_day)
39
+
40
+ # Comprehensive search analytics combining both pages
41
+ @search_analytics = {
42
+ total_searches: all_searches.count,
43
+ unique_queries: all_searches.distinct.count(:query),
44
+ searches_today: searches_today.count,
45
+ searches_this_week: searches_this_week.count,
46
+ searches_this_month: searches_this_month.count,
47
+ average_results: all_searches.average(:results_count)&.round(1) || 0,
48
+ average_similarity: all_searches.where.not(avg_similarity_score: nil).average(:avg_similarity_score)&.round(3) || 0,
49
+ avg_execution_time: all_searches.average(:execution_time_ms)&.round(1) || 0,
50
+ search_types: all_searches.group(:search_type).count
51
+ }
52
+
53
+ # Top queries (most frequent)
54
+ @top_queries = all_searches
55
+ .group(:query)
56
+ .count
57
+ .sort_by { |query, count| -count }
58
+ .first(10)
59
+ .to_h
60
+
61
+ # Search trends by day for the last 7 days
62
+ @search_trends = (6.days.ago.to_date..today).map do |date|
63
+ count = all_searches.where(created_at: date.beginning_of_day..date.end_of_day).count
64
+ [date.strftime('%m/%d'), count]
65
+ end.to_h
66
+
67
+ # Most searched documents (using embedding usage as proxy)
68
+ @top_documents = ::Ragdoll::Embedding
69
+ .joins("JOIN ragdoll_contents ON ragdoll_contents.id = ragdoll_embeddings.embeddable_id")
70
+ .joins("JOIN ragdoll_documents ON ragdoll_documents.id = ragdoll_contents.document_id")
71
+ .group('ragdoll_documents.title')
72
+ .order(Arel.sql('SUM(ragdoll_embeddings.usage_count) DESC'))
73
+ .limit(10)
74
+ .sum(:usage_count)
75
+
76
+ # Similarity score distribution
77
+ similarity_scores = all_searches.where.not(avg_similarity_score: nil).pluck(:avg_similarity_score)
78
+ @similarity_distribution = {
79
+ "0.9-1.0" => similarity_scores.count { |s| s >= 0.9 },
80
+ "0.8-0.9" => similarity_scores.count { |s| s >= 0.8 && s < 0.9 },
81
+ "0.7-0.8" => similarity_scores.count { |s| s >= 0.7 && s < 0.8 },
82
+ "0.6-0.7" => similarity_scores.count { |s| s >= 0.6 && s < 0.7 },
83
+ "0.5-0.6" => similarity_scores.count { |s| s >= 0.5 && s < 0.6 },
84
+ "< 0.5" => similarity_scores.count { |s| s < 0.5 }
85
+ }
86
+
87
+ # System statistics
88
+ @system_stats = {
89
+ total_documents: ::Ragdoll::Document.count,
90
+ processed_documents: ::Ragdoll::Document.where(status: 'processed').count,
91
+ failed_documents: ::Ragdoll::Document.where(status: 'failed').count,
92
+ pending_documents: ::Ragdoll::Document.where(status: 'pending').count,
93
+ total_embeddings: ::Ragdoll::Embedding.count,
94
+ total_embedding_usage: ::Ragdoll::Embedding.sum(:usage_count)
95
+ }
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,460 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ class DocumentsController < ApplicationController
5
+ before_action :set_document, only: [:show, :edit, :update, :destroy, :preview, :reprocess, :download]
6
+ skip_before_action :verify_authenticity_token, only: [:upload_async, :bulk_upload, :create]
7
+
8
+ def index
9
+ @documents = ::Ragdoll::Document.all
10
+ @documents = @documents.where(status: params[:status]) if params[:status].present?
11
+ @documents = @documents.where(document_type: params[:document_type]) if params[:document_type].present?
12
+ @documents = @documents.where('title ILIKE ?', "%#{params[:search]}%") if params[:search].present?
13
+ @documents = @documents.order(created_at: :desc)
14
+
15
+ @document_types = ::Ragdoll::Document.distinct.pluck(:document_type).compact
16
+ @statuses = ::Ragdoll::Document.distinct.pluck(:status).compact
17
+ end
18
+
19
+ def show
20
+ @embeddings = @document.all_embeddings
21
+ # Load recent searches for sidebar
22
+ @recent_searches = ::Ragdoll::Search.order(created_at: :desc).limit(10)
23
+ end
24
+
25
+ def new
26
+ @document = ::Ragdoll::Document.new
27
+ end
28
+
29
+ def create
30
+ if params[:ragdoll_document] && params[:ragdoll_document][:files].present?
31
+ uploaded_files = params[:ragdoll_document][:files]
32
+ force_duplicate = params[:ragdoll_document][:force_duplicate] == '1'
33
+ @results = []
34
+
35
+ # Ensure uploaded_files is always an array
36
+ uploaded_files = [uploaded_files] unless uploaded_files.is_a?(Array)
37
+
38
+ uploaded_files.each do |file|
39
+ begin
40
+ # Skip if file is not a valid upload object
41
+ next unless file.respond_to?(:original_filename)
42
+
43
+ # Save uploaded file temporarily
44
+ temp_path = ::Rails.root.join('tmp', 'uploads', file.original_filename)
45
+ FileUtils.mkdir_p(File.dirname(temp_path))
46
+ File.binwrite(temp_path, file.read)
47
+
48
+ # Use Ragdoll to add document with force option
49
+ result = ::Ragdoll.add_document(path: temp_path.to_s, force: force_duplicate)
50
+
51
+ # Get the actual document object if successful
52
+ if result[:success] && result[:document_id]
53
+ document = ::Ragdoll::Document.find(result[:document_id])
54
+ duplicate_detected = result[:duplicate] || (result[:message] && result[:message].include?('already exists'))
55
+ @results << {
56
+ file: file.original_filename,
57
+ success: true,
58
+ document: document,
59
+ message: result[:message],
60
+ duplicate: duplicate_detected,
61
+ forced: force_duplicate
62
+ }
63
+ else
64
+ @results << { file: file.original_filename, success: false, error: result[:error] || "Unknown error" }
65
+ end
66
+
67
+ # Clean up temp file
68
+ File.delete(temp_path) if File.exist?(temp_path)
69
+ rescue => e
70
+ filename = file.respond_to?(:original_filename) ? file.original_filename : file.to_s
71
+ @results << { file: filename, success: false, error: e.message }
72
+ end
73
+ end
74
+
75
+ render :upload_results
76
+ elsif params[:ragdoll_document] && params[:ragdoll_document][:text_content].present?
77
+ begin
78
+ force_duplicate = params[:ragdoll_document][:force_duplicate] == '1'
79
+
80
+ # For text content, we need to save it as a file first since Ragdoll.add_document expects a file
81
+ temp_path = ::Rails.root.join('tmp', 'uploads', "#{SecureRandom.hex(8)}.txt")
82
+ FileUtils.mkdir_p(File.dirname(temp_path))
83
+ File.write(temp_path, params[:ragdoll_document][:text_content])
84
+
85
+ result = ::Ragdoll.add_document(path: temp_path.to_s, force: force_duplicate)
86
+
87
+ # Clean up temp file
88
+ File.delete(temp_path) if File.exist?(temp_path)
89
+
90
+ if result[:success] && result[:document_id]
91
+ document = ::Ragdoll::Document.find(result[:document_id])
92
+ duplicate_detected = result[:duplicate] || (result[:message] && result[:message].include?('already exists'))
93
+
94
+ if duplicate_detected && !force_duplicate
95
+ redirect_to ragdoll.document_path(document), notice: 'Document already exists - returned existing document.'
96
+ elsif duplicate_detected && force_duplicate
97
+ redirect_to ragdoll.document_path(document), notice: 'Document was successfully created (duplicate forced).'
98
+ else
99
+ redirect_to ragdoll.document_path(document), notice: 'Document was successfully created.'
100
+ end
101
+ else
102
+ @document = ::Ragdoll::Document.new
103
+ @document.errors.add(:base, result[:error] || "Unknown error occurred")
104
+ render :new
105
+ end
106
+ rescue => e
107
+ @document = ::Ragdoll::Document.new
108
+ @document.errors.add(:base, e.message)
109
+ render :new
110
+ end
111
+ else
112
+ @document = ::Ragdoll::Document.new
113
+ @document.errors.add(:base, "Please provide either files or text content")
114
+ render :new
115
+ end
116
+ end
117
+
118
+ def edit
119
+ end
120
+
121
+ def update
122
+ if @document.update(document_params)
123
+ redirect_to ragdoll.document_path(@document), notice: 'Document was successfully updated.'
124
+ else
125
+ render :edit
126
+ end
127
+ end
128
+
129
+ def destroy
130
+ @document.destroy
131
+ redirect_to ragdoll.documents_url, notice: 'Document was successfully deleted.'
132
+ end
133
+
134
+ def preview
135
+ respond_to do |format|
136
+ format.html { render layout: false }
137
+ format.json { render json: { content: @document.content, metadata: @document.metadata } }
138
+ end
139
+ end
140
+
141
+ def reprocess
142
+ begin
143
+ # Delete existing embeddings
144
+ @document.all_embeddings.destroy_all
145
+
146
+ # Reprocess document
147
+ @document.update(status: 'pending')
148
+
149
+ # Process embeddings in background
150
+ ::Ragdoll::GenerateEmbeddingsJob.perform_later(@document.id)
151
+
152
+ redirect_to ragdoll.document_path(@document), notice: 'Document reprocessing initiated.'
153
+ rescue => e
154
+ redirect_to ragdoll.document_path(@document), alert: "Error reprocessing document: #{e.message}"
155
+ end
156
+ end
157
+
158
+ def download
159
+ if @document.location.present? && File.exist?(@document.location)
160
+ send_file @document.location, filename: @document.title
161
+ else
162
+ redirect_to ragdoll.document_path(@document), alert: 'File not found.'
163
+ end
164
+ end
165
+
166
+ def bulk_upload
167
+ logger.info "🔍 BULK UPLOAD METHOD CALLED"
168
+ logger.info "📊 Request params: #{params.inspect}"
169
+ logger.info "📊 Session ID: #{session.id}"
170
+
171
+ if params[:directory_files].present?
172
+ # Use temp_session_id from frontend, session ID, or request ID as fallback
173
+ session_id = if params[:temp_session_id].present?
174
+ params[:temp_session_id]
175
+ elsif session.id.present?
176
+ session.id.to_s
177
+ else
178
+ request.request_id
179
+ end
180
+
181
+ logger.info "📊 Using session_id: #{session_id}"
182
+
183
+ uploaded_files = params[:directory_files]
184
+ force_duplicate = params[:force_duplicate] == '1'
185
+
186
+ # Ensure uploaded_files is always an array
187
+ uploaded_files = [uploaded_files] unless uploaded_files.is_a?(Array)
188
+
189
+ logger.info "📁 Processing #{uploaded_files.size} files"
190
+
191
+ # Prepare file data for background job
192
+ file_paths_data = []
193
+ uploaded_files.each_with_index do |file, index|
194
+ next unless file.respond_to?(:original_filename)
195
+
196
+ begin
197
+ # Generate unique temp filename
198
+ file_id = "#{session_id}_#{index}_#{Time.current.to_i}"
199
+ temp_path = ::Rails.root.join('tmp', 'uploads', "#{file_id}_#{file.original_filename}")
200
+ FileUtils.mkdir_p(File.dirname(temp_path))
201
+ File.binwrite(temp_path, file.read)
202
+
203
+ file_paths_data << {
204
+ temp_path: temp_path.to_s,
205
+ original_filename: file.original_filename
206
+ }
207
+
208
+ logger.info "📄 Queued file: #{file.original_filename}"
209
+ rescue => e
210
+ logger.error "❌ Failed to prepare file #{file.original_filename}: #{e.message}"
211
+ end
212
+ end
213
+
214
+ if file_paths_data.any?
215
+ # Queue background job for processing
216
+ ::Ragdoll::BulkDocumentProcessingJob.perform_later(session_id, file_paths_data, force_duplicate)
217
+
218
+ logger.info "🚀 Queued bulk processing job for #{file_paths_data.size} files"
219
+
220
+ # Check if this is an AJAX request
221
+ if request.xhr? || request.format.json?
222
+ render json: {
223
+ success: true,
224
+ session_id: session_id,
225
+ file_count: file_paths_data.size,
226
+ message: "Upload started! Processing #{file_paths_data.size} files in the background."
227
+ }
228
+ else
229
+ flash[:notice] = "Upload started! Processing #{file_paths_data.size} files in the background. You can monitor progress below."
230
+ redirect_to ragdoll.documents_path
231
+ end
232
+ else
233
+ logger.warn "⚠️ No valid files found for processing"
234
+
235
+ if request.xhr? || request.format.json?
236
+ render json: {
237
+ success: false,
238
+ error: "No valid files found for processing."
239
+ }, status: :unprocessable_entity
240
+ else
241
+ flash[:alert] = "No valid files found for processing."
242
+ redirect_to ragdoll.documents_path
243
+ end
244
+ end
245
+ else
246
+ logger.warn "⚠️ No files provided in bulk upload"
247
+
248
+ if request.xhr? || request.format.json?
249
+ render json: {
250
+ success: false,
251
+ error: "Please select files to upload."
252
+ }, status: :bad_request
253
+ else
254
+ flash[:alert] = "Please select files to upload."
255
+ redirect_to ragdoll.documents_path
256
+ end
257
+ end
258
+ rescue => e
259
+ logger.error "💥 Fatal error in bulk_upload: #{e.message}"
260
+ logger.error e.backtrace.join("\n")
261
+
262
+ if request.xhr? || request.format.json?
263
+ render json: {
264
+ success: false,
265
+ error: "Upload failed: #{e.message}"
266
+ }, status: :internal_server_error
267
+ else
268
+ flash[:alert] = "Upload failed: #{e.message}"
269
+ redirect_to ragdoll.documents_path
270
+ end
271
+ end
272
+
273
+ def bulk_delete
274
+ if params[:document_ids].present?
275
+ documents = ::Ragdoll::Document.where(id: params[:document_ids])
276
+ count = documents.count
277
+ documents.destroy_all
278
+ flash[:notice] = "Successfully deleted #{count} documents."
279
+ else
280
+ flash[:alert] = "No documents selected for deletion."
281
+ end
282
+
283
+ redirect_to ragdoll.documents_path
284
+ end
285
+
286
+ def bulk_reprocess
287
+ if params[:document_ids].present?
288
+ documents = ::Ragdoll::Document.where(id: params[:document_ids])
289
+ documents.each do |document|
290
+ document.all_embeddings.destroy_all
291
+ document.update(status: 'pending')
292
+ ::Ragdoll::GenerateEmbeddingsJob.perform_later(document.id)
293
+ end
294
+ flash[:notice] = "Reprocessing initiated for #{documents.count} documents."
295
+ else
296
+ flash[:alert] = "No documents selected for reprocessing."
297
+ end
298
+
299
+ redirect_to ragdoll.documents_path
300
+ end
301
+
302
+ def upload_async
303
+ logger.info "upload_async called with params: #{params.inspect}"
304
+ logger.info "Session ID: #{session.id}"
305
+ logger.info "Request ID: #{request.request_id}"
306
+ logger.info "Temp Session ID: #{params[:temp_session_id]}"
307
+
308
+ if params[:ragdoll_document] && params[:ragdoll_document][:files].present?
309
+ # Priority: temp_session_id from frontend, then session ID, then request ID as fallback
310
+ session_id = if params[:temp_session_id].present?
311
+ params[:temp_session_id]
312
+ elsif session.id.present?
313
+ session.id.to_s
314
+ else
315
+ request.request_id
316
+ end
317
+ logger.info "Using session_id: #{session_id} (source: #{params[:temp_session_id].present? ? 'temp_session_id' : session.id.present? ? 'session' : 'request'})"
318
+ uploaded_files = params[:ragdoll_document][:files]
319
+
320
+ logger.info "Files received: #{uploaded_files.inspect}"
321
+
322
+ # Ensure uploaded_files is always an array
323
+ uploaded_files = [uploaded_files] unless uploaded_files.is_a?(Array)
324
+
325
+ # Log file analysis for debugging count discrepancies
326
+ total_files = uploaded_files.count
327
+ valid_files = uploaded_files.select { |f| f && f.respond_to?(:original_filename) && f.original_filename.present? }
328
+ filtered_count = total_files - valid_files.count
329
+
330
+ logger.info "📊 File upload analysis:"
331
+ logger.info " Total files in upload array: #{total_files}"
332
+ logger.info " Valid files with original_filename: #{valid_files.count}"
333
+ logger.info " Files filtered out: #{filtered_count}"
334
+
335
+ if filtered_count > 0
336
+ uploaded_files.each_with_index do |file, index|
337
+ unless file && file.respond_to?(:original_filename) && file.original_filename.present?
338
+ logger.warn " Filtered file #{index + 1}: #{file.class} - #{file.inspect}"
339
+ end
340
+ end
341
+ end
342
+
343
+ processed_count = 0
344
+ results = []
345
+
346
+ # Process files in batches to avoid "too many open files" error
347
+ batch_size = 50 # Process 50 files at a time
348
+ uploaded_files.each_slice(batch_size).with_index do |file_batch, batch_index|
349
+ file_batch.each_with_index do |file, batch_file_index|
350
+ index = batch_index * batch_size + batch_file_index
351
+ next unless file.respond_to?(:original_filename)
352
+
353
+ logger.info "Processing file #{index + 1}: #{file.original_filename}"
354
+
355
+ begin
356
+ # Generate unique file ID
357
+ file_id = "#{session_id}_#{index}_#{Time.current.to_i}"
358
+
359
+ # Save uploaded file temporarily
360
+ temp_path = ::Rails.root.join('tmp', 'uploads', "#{file_id}_#{file.original_filename}")
361
+ FileUtils.mkdir_p(File.dirname(temp_path))
362
+ File.binwrite(temp_path, file.read)
363
+
364
+ logger.info "File saved to: #{temp_path}"
365
+
366
+ # Try to queue background job first, fallback to direct processing
367
+ begin
368
+ if defined?(::Ragdoll::ProcessFileJob)
369
+ ::Ragdoll::ProcessFileJob.perform_later(file_id, session_id, file.original_filename, temp_path.to_s)
370
+ logger.info "Job queued for file: #{file_id}"
371
+ results << { file: file.original_filename, status: 'queued' }
372
+ else
373
+ raise "ProcessFileJob not available"
374
+ end
375
+ rescue => job_error
376
+ logger.warn "Background job failed, processing directly: #{job_error.message}"
377
+
378
+ # Process directly if job system is not available
379
+ force_duplicate = params[:ragdoll_document][:force_duplicate] == '1'
380
+ result = ::Ragdoll.add_document(path: temp_path.to_s, force: force_duplicate)
381
+
382
+ if result[:success] && result[:document_id]
383
+ document = ::Ragdoll::Document.find(result[:document_id])
384
+ results << {
385
+ file: file.original_filename,
386
+ status: 'completed_sync',
387
+ document_id: document.id
388
+ }
389
+ logger.info "File processed synchronously: #{file.original_filename}"
390
+ else
391
+ results << {
392
+ file: file.original_filename,
393
+ status: 'failed',
394
+ error: result[:error] || 'Unknown error'
395
+ }
396
+ end
397
+
398
+ # Clean up temp file for sync processing
399
+ File.delete(temp_path) if File.exist?(temp_path)
400
+ end
401
+
402
+ processed_count += 1
403
+ rescue => file_error
404
+ logger.error "Error processing file #{file.original_filename}: #{file_error.message}"
405
+ results << {
406
+ file: file.original_filename,
407
+ status: 'failed',
408
+ error: file_error.message
409
+ }
410
+ end
411
+ end
412
+
413
+ # Force garbage collection after each batch to free file descriptors
414
+ GC.start
415
+ end
416
+
417
+ logger.info "Returning success response for #{processed_count} files"
418
+ render json: {
419
+ success: true,
420
+ session_id: session_id,
421
+ results: results,
422
+ message: "#{processed_count} file(s) processed"
423
+ }
424
+ else
425
+ logger.error "No files provided in upload_async"
426
+ render json: { success: false, error: "No files provided" }, status: :bad_request
427
+ end
428
+ rescue => e
429
+ logger.error "Error in upload_async: #{e.message}"
430
+ logger.error e.backtrace.join("\n")
431
+ render json: { success: false, error: e.message }, status: :internal_server_error
432
+ end
433
+
434
+ def status
435
+ @processing_stats = {
436
+ pending: ::Ragdoll::Document.where(status: 'pending').count,
437
+ processing: ::Ragdoll::Document.where(status: 'processing').count,
438
+ processed: ::Ragdoll::Document.where(status: 'processed').count,
439
+ failed: ::Ragdoll::Document.where(status: 'failed').count
440
+ }
441
+
442
+ @recent_activity = ::Ragdoll::Document.order(updated_at: :desc).limit(20)
443
+
444
+ respond_to do |format|
445
+ format.html
446
+ format.json { render json: @processing_stats }
447
+ end
448
+ end
449
+
450
+ private
451
+
452
+ def set_document
453
+ @document = ::Ragdoll::Document.find(params[:id])
454
+ end
455
+
456
+ def document_params
457
+ params.require(:ragdoll_document).permit(:title, :content, :metadata, :status, :text_content, :force_duplicate, files: [])
458
+ end
459
+ end
460
+ end