ragdoll-rails 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/ragdoll/application.js +129 -0
- data/app/assets/javascripts/ragdoll/bulk_upload_status.js +454 -0
- data/app/assets/stylesheets/ragdoll/application.css +84 -0
- data/app/assets/stylesheets/ragdoll/bulk_upload_status.css +379 -0
- data/app/channels/application_cable/channel.rb +6 -0
- data/app/channels/application_cable/connection.rb +6 -0
- data/app/channels/ragdoll/bulk_upload_status_channel.rb +27 -0
- data/app/channels/ragdoll/file_processing_channel.rb +26 -0
- data/app/components/ragdoll/alert_component.html.erb +4 -0
- data/app/components/ragdoll/alert_component.rb +32 -0
- data/app/components/ragdoll/application_component.rb +6 -0
- data/app/components/ragdoll/card_component.html.erb +15 -0
- data/app/components/ragdoll/card_component.rb +21 -0
- data/app/components/ragdoll/document_list_component.html.erb +41 -0
- data/app/components/ragdoll/document_list_component.rb +13 -0
- data/app/components/ragdoll/document_table_component.html.erb +76 -0
- data/app/components/ragdoll/document_table_component.rb +13 -0
- data/app/components/ragdoll/empty_state_component.html.erb +12 -0
- data/app/components/ragdoll/empty_state_component.rb +17 -0
- data/app/components/ragdoll/flash_messages_component.html.erb +3 -0
- data/app/components/ragdoll/flash_messages_component.rb +37 -0
- data/app/components/ragdoll/navbar_component.html.erb +24 -0
- data/app/components/ragdoll/navbar_component.rb +31 -0
- data/app/components/ragdoll/page_header_component.html.erb +13 -0
- data/app/components/ragdoll/page_header_component.rb +15 -0
- data/app/components/ragdoll/stats_card_component.html.erb +11 -0
- data/app/components/ragdoll/stats_card_component.rb +17 -0
- data/app/components/ragdoll/status_badge_component.html.erb +3 -0
- data/app/components/ragdoll/status_badge_component.rb +30 -0
- data/app/controllers/ragdoll/api/v1/analytics_controller.rb +72 -0
- data/app/controllers/ragdoll/api/v1/base_controller.rb +29 -0
- data/app/controllers/ragdoll/api/v1/documents_controller.rb +148 -0
- data/app/controllers/ragdoll/api/v1/search_controller.rb +87 -0
- data/app/controllers/ragdoll/api/v1/system_controller.rb +97 -0
- data/app/controllers/ragdoll/application_controller.rb +17 -0
- data/app/controllers/ragdoll/configuration_controller.rb +82 -0
- data/app/controllers/ragdoll/dashboard_controller.rb +98 -0
- data/app/controllers/ragdoll/documents_controller.rb +460 -0
- data/app/controllers/ragdoll/documents_controller_backup.rb +68 -0
- data/app/controllers/ragdoll/jobs_controller.rb +116 -0
- data/app/controllers/ragdoll/search_controller.rb +368 -0
- data/app/jobs/application_job.rb +9 -0
- data/app/jobs/ragdoll/bulk_document_processing_job.rb +280 -0
- data/app/jobs/ragdoll/process_file_job.rb +166 -0
- data/app/services/ragdoll/worker_health_service.rb +111 -0
- data/app/views/layouts/ragdoll/application.html.erb +162 -0
- data/app/views/ragdoll/dashboard/analytics.html.erb +333 -0
- data/app/views/ragdoll/dashboard/index.html.erb +208 -0
- data/app/views/ragdoll/documents/edit.html.erb +91 -0
- data/app/views/ragdoll/documents/index.html.erb +302 -0
- data/app/views/ragdoll/documents/new.html.erb +1518 -0
- data/app/views/ragdoll/documents/show.html.erb +188 -0
- data/app/views/ragdoll/documents/upload_results.html.erb +248 -0
- data/app/views/ragdoll/jobs/index.html.erb +669 -0
- data/app/views/ragdoll/jobs/show.html.erb +129 -0
- data/app/views/ragdoll/search/index.html.erb +324 -0
- data/config/cable.yml +12 -0
- data/config/routes.rb +56 -1
- data/lib/ragdoll/rails/engine.rb +32 -1
- data/lib/ragdoll/rails/version.rb +1 -1
- metadata +86 -1
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ragdoll
|
4
|
+
class ConfigurationController < ApplicationController
|
5
|
+
def index
|
6
|
+
@configuration = ::Ragdoll.configuration
|
7
|
+
@available_providers = %w[openai anthropic google azure ollama huggingface]
|
8
|
+
@available_models = {
|
9
|
+
openai: ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'],
|
10
|
+
anthropic: ['claude-3-haiku-20240307', 'claude-3-sonnet-20240229', 'claude-3-opus-20240229'],
|
11
|
+
google: ['gemini-pro', 'gemini-1.5-flash', 'gemini-1.5-pro'],
|
12
|
+
azure: ['text-embedding-3-small', 'text-embedding-3-large'],
|
13
|
+
ollama: ['llama2', 'mistral', 'codellama'],
|
14
|
+
huggingface: ['sentence-transformers/all-MiniLM-L6-v2', 'sentence-transformers/all-mpnet-base-v2']
|
15
|
+
}
|
16
|
+
|
17
|
+
@current_stats = {
|
18
|
+
total_documents: ::Ragdoll::Document.count,
|
19
|
+
total_embeddings: ::Ragdoll::Embedding.count,
|
20
|
+
embedding_dimensions: ::Ragdoll::Embedding.first&.embedding_dimensions || 0,
|
21
|
+
average_chunk_size: ::Ragdoll::Embedding.average('LENGTH(content)')&.round || 0
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def update
|
26
|
+
config_params = params.require(:configuration).permit(
|
27
|
+
:llm_provider,
|
28
|
+
:embedding_provider,
|
29
|
+
:embedding_model,
|
30
|
+
:chunk_size,
|
31
|
+
:chunk_overlap,
|
32
|
+
:max_search_results,
|
33
|
+
:search_similarity_threshold,
|
34
|
+
:enable_search_analytics,
|
35
|
+
:enable_document_summarization,
|
36
|
+
:enable_usage_tracking,
|
37
|
+
:usage_ranking_enabled,
|
38
|
+
:openai_api_key,
|
39
|
+
:anthropic_api_key,
|
40
|
+
:google_api_key,
|
41
|
+
:azure_api_key,
|
42
|
+
:ollama_url,
|
43
|
+
:huggingface_api_key
|
44
|
+
)
|
45
|
+
|
46
|
+
begin
|
47
|
+
# Update configuration
|
48
|
+
config = ::Ragdoll.configuration
|
49
|
+
|
50
|
+
config_params.each do |key, value|
|
51
|
+
# Convert string values to appropriate types
|
52
|
+
case key
|
53
|
+
when 'chunk_size', 'chunk_overlap', 'max_search_results'
|
54
|
+
config.send("#{key}=", value.to_i)
|
55
|
+
when 'search_similarity_threshold'
|
56
|
+
config.send("#{key}=", value.to_f)
|
57
|
+
when 'enable_search_analytics', 'enable_document_summarization', 'enable_usage_tracking', 'usage_ranking_enabled'
|
58
|
+
config.send("#{key}=", value == '1' || value == 'true')
|
59
|
+
else
|
60
|
+
config.send("#{key}=", value) if value.present?
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
flash[:notice] = 'Configuration updated successfully.'
|
65
|
+
|
66
|
+
# Test the configuration
|
67
|
+
begin
|
68
|
+
client = ::Ragdoll::Client.new
|
69
|
+
test_result = client.stats
|
70
|
+
flash[:notice] += " Configuration test successful."
|
71
|
+
rescue => e
|
72
|
+
flash[:warning] = "Configuration saved but test failed: #{e.message}"
|
73
|
+
end
|
74
|
+
|
75
|
+
rescue => e
|
76
|
+
flash[:alert] = "Error updating configuration: #{e.message}"
|
77
|
+
end
|
78
|
+
|
79
|
+
redirect_to ragdoll.configuration_path
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ragdoll
|
4
|
+
class DashboardController < ApplicationController
|
5
|
+
def index
|
6
|
+
@stats = {
|
7
|
+
total_documents: ::Ragdoll::Document.count,
|
8
|
+
processed_documents: ::Ragdoll::Document.where(status: 'processed').count,
|
9
|
+
failed_documents: ::Ragdoll::Document.where(status: 'failed').count,
|
10
|
+
pending_documents: ::Ragdoll::Document.where(status: 'pending').count,
|
11
|
+
total_embeddings: ::Ragdoll::Embedding.count,
|
12
|
+
total_searches: ::Ragdoll::Search.count,
|
13
|
+
recent_searches: ::Ragdoll::Search.order(created_at: :desc).limit(5)
|
14
|
+
}
|
15
|
+
|
16
|
+
@document_types = ::Ragdoll::Document.group(:document_type).count
|
17
|
+
@recent_documents = ::Ragdoll::Document.order(created_at: :desc).limit(10)
|
18
|
+
|
19
|
+
# Usage analytics - join through embeddable (Content) to get to documents
|
20
|
+
@top_searched_documents = ::Ragdoll::Embedding
|
21
|
+
.joins("JOIN ragdoll_contents ON ragdoll_contents.id = ragdoll_embeddings.embeddable_id")
|
22
|
+
.joins("JOIN ragdoll_documents ON ragdoll_documents.id = ragdoll_contents.document_id")
|
23
|
+
.group('ragdoll_documents.title')
|
24
|
+
.order(Arel.sql('SUM(ragdoll_embeddings.usage_count) DESC'))
|
25
|
+
.limit(5)
|
26
|
+
.sum(:usage_count)
|
27
|
+
end
|
28
|
+
|
29
|
+
def analytics
|
30
|
+
today = Date.current
|
31
|
+
week_start = today.beginning_of_week
|
32
|
+
month_start = today.beginning_of_month
|
33
|
+
|
34
|
+
# Calculate search statistics
|
35
|
+
all_searches = ::Ragdoll::Search.all
|
36
|
+
searches_today = all_searches.where(created_at: today.beginning_of_day..today.end_of_day)
|
37
|
+
searches_this_week = all_searches.where(created_at: week_start.beginning_of_day..today.end_of_day)
|
38
|
+
searches_this_month = all_searches.where(created_at: month_start.beginning_of_day..today.end_of_day)
|
39
|
+
|
40
|
+
# Comprehensive search analytics combining both pages
|
41
|
+
@search_analytics = {
|
42
|
+
total_searches: all_searches.count,
|
43
|
+
unique_queries: all_searches.distinct.count(:query),
|
44
|
+
searches_today: searches_today.count,
|
45
|
+
searches_this_week: searches_this_week.count,
|
46
|
+
searches_this_month: searches_this_month.count,
|
47
|
+
average_results: all_searches.average(:results_count)&.round(1) || 0,
|
48
|
+
average_similarity: all_searches.where.not(avg_similarity_score: nil).average(:avg_similarity_score)&.round(3) || 0,
|
49
|
+
avg_execution_time: all_searches.average(:execution_time_ms)&.round(1) || 0,
|
50
|
+
search_types: all_searches.group(:search_type).count
|
51
|
+
}
|
52
|
+
|
53
|
+
# Top queries (most frequent)
|
54
|
+
@top_queries = all_searches
|
55
|
+
.group(:query)
|
56
|
+
.count
|
57
|
+
.sort_by { |query, count| -count }
|
58
|
+
.first(10)
|
59
|
+
.to_h
|
60
|
+
|
61
|
+
# Search trends by day for the last 7 days
|
62
|
+
@search_trends = (6.days.ago.to_date..today).map do |date|
|
63
|
+
count = all_searches.where(created_at: date.beginning_of_day..date.end_of_day).count
|
64
|
+
[date.strftime('%m/%d'), count]
|
65
|
+
end.to_h
|
66
|
+
|
67
|
+
# Most searched documents (using embedding usage as proxy)
|
68
|
+
@top_documents = ::Ragdoll::Embedding
|
69
|
+
.joins("JOIN ragdoll_contents ON ragdoll_contents.id = ragdoll_embeddings.embeddable_id")
|
70
|
+
.joins("JOIN ragdoll_documents ON ragdoll_documents.id = ragdoll_contents.document_id")
|
71
|
+
.group('ragdoll_documents.title')
|
72
|
+
.order(Arel.sql('SUM(ragdoll_embeddings.usage_count) DESC'))
|
73
|
+
.limit(10)
|
74
|
+
.sum(:usage_count)
|
75
|
+
|
76
|
+
# Similarity score distribution
|
77
|
+
similarity_scores = all_searches.where.not(avg_similarity_score: nil).pluck(:avg_similarity_score)
|
78
|
+
@similarity_distribution = {
|
79
|
+
"0.9-1.0" => similarity_scores.count { |s| s >= 0.9 },
|
80
|
+
"0.8-0.9" => similarity_scores.count { |s| s >= 0.8 && s < 0.9 },
|
81
|
+
"0.7-0.8" => similarity_scores.count { |s| s >= 0.7 && s < 0.8 },
|
82
|
+
"0.6-0.7" => similarity_scores.count { |s| s >= 0.6 && s < 0.7 },
|
83
|
+
"0.5-0.6" => similarity_scores.count { |s| s >= 0.5 && s < 0.6 },
|
84
|
+
"< 0.5" => similarity_scores.count { |s| s < 0.5 }
|
85
|
+
}
|
86
|
+
|
87
|
+
# System statistics
|
88
|
+
@system_stats = {
|
89
|
+
total_documents: ::Ragdoll::Document.count,
|
90
|
+
processed_documents: ::Ragdoll::Document.where(status: 'processed').count,
|
91
|
+
failed_documents: ::Ragdoll::Document.where(status: 'failed').count,
|
92
|
+
pending_documents: ::Ragdoll::Document.where(status: 'pending').count,
|
93
|
+
total_embeddings: ::Ragdoll::Embedding.count,
|
94
|
+
total_embedding_usage: ::Ragdoll::Embedding.sum(:usage_count)
|
95
|
+
}
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,460 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ragdoll
|
4
|
+
class DocumentsController < ApplicationController
|
5
|
+
before_action :set_document, only: [:show, :edit, :update, :destroy, :preview, :reprocess, :download]
|
6
|
+
skip_before_action :verify_authenticity_token, only: [:upload_async, :bulk_upload, :create]
|
7
|
+
|
8
|
+
def index
|
9
|
+
@documents = ::Ragdoll::Document.all
|
10
|
+
@documents = @documents.where(status: params[:status]) if params[:status].present?
|
11
|
+
@documents = @documents.where(document_type: params[:document_type]) if params[:document_type].present?
|
12
|
+
@documents = @documents.where('title ILIKE ?', "%#{params[:search]}%") if params[:search].present?
|
13
|
+
@documents = @documents.order(created_at: :desc)
|
14
|
+
|
15
|
+
@document_types = ::Ragdoll::Document.distinct.pluck(:document_type).compact
|
16
|
+
@statuses = ::Ragdoll::Document.distinct.pluck(:status).compact
|
17
|
+
end
|
18
|
+
|
19
|
+
def show
|
20
|
+
@embeddings = @document.all_embeddings
|
21
|
+
# Load recent searches for sidebar
|
22
|
+
@recent_searches = ::Ragdoll::Search.order(created_at: :desc).limit(10)
|
23
|
+
end
|
24
|
+
|
25
|
+
def new
|
26
|
+
@document = ::Ragdoll::Document.new
|
27
|
+
end
|
28
|
+
|
29
|
+
def create
|
30
|
+
if params[:ragdoll_document] && params[:ragdoll_document][:files].present?
|
31
|
+
uploaded_files = params[:ragdoll_document][:files]
|
32
|
+
force_duplicate = params[:ragdoll_document][:force_duplicate] == '1'
|
33
|
+
@results = []
|
34
|
+
|
35
|
+
# Ensure uploaded_files is always an array
|
36
|
+
uploaded_files = [uploaded_files] unless uploaded_files.is_a?(Array)
|
37
|
+
|
38
|
+
uploaded_files.each do |file|
|
39
|
+
begin
|
40
|
+
# Skip if file is not a valid upload object
|
41
|
+
next unless file.respond_to?(:original_filename)
|
42
|
+
|
43
|
+
# Save uploaded file temporarily
|
44
|
+
temp_path = ::Rails.root.join('tmp', 'uploads', file.original_filename)
|
45
|
+
FileUtils.mkdir_p(File.dirname(temp_path))
|
46
|
+
File.binwrite(temp_path, file.read)
|
47
|
+
|
48
|
+
# Use Ragdoll to add document with force option
|
49
|
+
result = ::Ragdoll.add_document(path: temp_path.to_s, force: force_duplicate)
|
50
|
+
|
51
|
+
# Get the actual document object if successful
|
52
|
+
if result[:success] && result[:document_id]
|
53
|
+
document = ::Ragdoll::Document.find(result[:document_id])
|
54
|
+
duplicate_detected = result[:duplicate] || (result[:message] && result[:message].include?('already exists'))
|
55
|
+
@results << {
|
56
|
+
file: file.original_filename,
|
57
|
+
success: true,
|
58
|
+
document: document,
|
59
|
+
message: result[:message],
|
60
|
+
duplicate: duplicate_detected,
|
61
|
+
forced: force_duplicate
|
62
|
+
}
|
63
|
+
else
|
64
|
+
@results << { file: file.original_filename, success: false, error: result[:error] || "Unknown error" }
|
65
|
+
end
|
66
|
+
|
67
|
+
# Clean up temp file
|
68
|
+
File.delete(temp_path) if File.exist?(temp_path)
|
69
|
+
rescue => e
|
70
|
+
filename = file.respond_to?(:original_filename) ? file.original_filename : file.to_s
|
71
|
+
@results << { file: filename, success: false, error: e.message }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
render :upload_results
|
76
|
+
elsif params[:ragdoll_document] && params[:ragdoll_document][:text_content].present?
|
77
|
+
begin
|
78
|
+
force_duplicate = params[:ragdoll_document][:force_duplicate] == '1'
|
79
|
+
|
80
|
+
# For text content, we need to save it as a file first since Ragdoll.add_document expects a file
|
81
|
+
temp_path = ::Rails.root.join('tmp', 'uploads', "#{SecureRandom.hex(8)}.txt")
|
82
|
+
FileUtils.mkdir_p(File.dirname(temp_path))
|
83
|
+
File.write(temp_path, params[:ragdoll_document][:text_content])
|
84
|
+
|
85
|
+
result = ::Ragdoll.add_document(path: temp_path.to_s, force: force_duplicate)
|
86
|
+
|
87
|
+
# Clean up temp file
|
88
|
+
File.delete(temp_path) if File.exist?(temp_path)
|
89
|
+
|
90
|
+
if result[:success] && result[:document_id]
|
91
|
+
document = ::Ragdoll::Document.find(result[:document_id])
|
92
|
+
duplicate_detected = result[:duplicate] || (result[:message] && result[:message].include?('already exists'))
|
93
|
+
|
94
|
+
if duplicate_detected && !force_duplicate
|
95
|
+
redirect_to ragdoll.document_path(document), notice: 'Document already exists - returned existing document.'
|
96
|
+
elsif duplicate_detected && force_duplicate
|
97
|
+
redirect_to ragdoll.document_path(document), notice: 'Document was successfully created (duplicate forced).'
|
98
|
+
else
|
99
|
+
redirect_to ragdoll.document_path(document), notice: 'Document was successfully created.'
|
100
|
+
end
|
101
|
+
else
|
102
|
+
@document = ::Ragdoll::Document.new
|
103
|
+
@document.errors.add(:base, result[:error] || "Unknown error occurred")
|
104
|
+
render :new
|
105
|
+
end
|
106
|
+
rescue => e
|
107
|
+
@document = ::Ragdoll::Document.new
|
108
|
+
@document.errors.add(:base, e.message)
|
109
|
+
render :new
|
110
|
+
end
|
111
|
+
else
|
112
|
+
@document = ::Ragdoll::Document.new
|
113
|
+
@document.errors.add(:base, "Please provide either files or text content")
|
114
|
+
render :new
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def edit
|
119
|
+
end
|
120
|
+
|
121
|
+
def update
|
122
|
+
if @document.update(document_params)
|
123
|
+
redirect_to ragdoll.document_path(@document), notice: 'Document was successfully updated.'
|
124
|
+
else
|
125
|
+
render :edit
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def destroy
|
130
|
+
@document.destroy
|
131
|
+
redirect_to ragdoll.documents_url, notice: 'Document was successfully deleted.'
|
132
|
+
end
|
133
|
+
|
134
|
+
def preview
|
135
|
+
respond_to do |format|
|
136
|
+
format.html { render layout: false }
|
137
|
+
format.json { render json: { content: @document.content, metadata: @document.metadata } }
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def reprocess
|
142
|
+
begin
|
143
|
+
# Delete existing embeddings
|
144
|
+
@document.all_embeddings.destroy_all
|
145
|
+
|
146
|
+
# Reprocess document
|
147
|
+
@document.update(status: 'pending')
|
148
|
+
|
149
|
+
# Process embeddings in background
|
150
|
+
::Ragdoll::GenerateEmbeddingsJob.perform_later(@document.id)
|
151
|
+
|
152
|
+
redirect_to ragdoll.document_path(@document), notice: 'Document reprocessing initiated.'
|
153
|
+
rescue => e
|
154
|
+
redirect_to ragdoll.document_path(@document), alert: "Error reprocessing document: #{e.message}"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def download
|
159
|
+
if @document.location.present? && File.exist?(@document.location)
|
160
|
+
send_file @document.location, filename: @document.title
|
161
|
+
else
|
162
|
+
redirect_to ragdoll.document_path(@document), alert: 'File not found.'
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def bulk_upload
|
167
|
+
logger.info "🔍 BULK UPLOAD METHOD CALLED"
|
168
|
+
logger.info "📊 Request params: #{params.inspect}"
|
169
|
+
logger.info "📊 Session ID: #{session.id}"
|
170
|
+
|
171
|
+
if params[:directory_files].present?
|
172
|
+
# Use temp_session_id from frontend, session ID, or request ID as fallback
|
173
|
+
session_id = if params[:temp_session_id].present?
|
174
|
+
params[:temp_session_id]
|
175
|
+
elsif session.id.present?
|
176
|
+
session.id.to_s
|
177
|
+
else
|
178
|
+
request.request_id
|
179
|
+
end
|
180
|
+
|
181
|
+
logger.info "📊 Using session_id: #{session_id}"
|
182
|
+
|
183
|
+
uploaded_files = params[:directory_files]
|
184
|
+
force_duplicate = params[:force_duplicate] == '1'
|
185
|
+
|
186
|
+
# Ensure uploaded_files is always an array
|
187
|
+
uploaded_files = [uploaded_files] unless uploaded_files.is_a?(Array)
|
188
|
+
|
189
|
+
logger.info "📁 Processing #{uploaded_files.size} files"
|
190
|
+
|
191
|
+
# Prepare file data for background job
|
192
|
+
file_paths_data = []
|
193
|
+
uploaded_files.each_with_index do |file, index|
|
194
|
+
next unless file.respond_to?(:original_filename)
|
195
|
+
|
196
|
+
begin
|
197
|
+
# Generate unique temp filename
|
198
|
+
file_id = "#{session_id}_#{index}_#{Time.current.to_i}"
|
199
|
+
temp_path = ::Rails.root.join('tmp', 'uploads', "#{file_id}_#{file.original_filename}")
|
200
|
+
FileUtils.mkdir_p(File.dirname(temp_path))
|
201
|
+
File.binwrite(temp_path, file.read)
|
202
|
+
|
203
|
+
file_paths_data << {
|
204
|
+
temp_path: temp_path.to_s,
|
205
|
+
original_filename: file.original_filename
|
206
|
+
}
|
207
|
+
|
208
|
+
logger.info "📄 Queued file: #{file.original_filename}"
|
209
|
+
rescue => e
|
210
|
+
logger.error "❌ Failed to prepare file #{file.original_filename}: #{e.message}"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
if file_paths_data.any?
|
215
|
+
# Queue background job for processing
|
216
|
+
::Ragdoll::BulkDocumentProcessingJob.perform_later(session_id, file_paths_data, force_duplicate)
|
217
|
+
|
218
|
+
logger.info "🚀 Queued bulk processing job for #{file_paths_data.size} files"
|
219
|
+
|
220
|
+
# Check if this is an AJAX request
|
221
|
+
if request.xhr? || request.format.json?
|
222
|
+
render json: {
|
223
|
+
success: true,
|
224
|
+
session_id: session_id,
|
225
|
+
file_count: file_paths_data.size,
|
226
|
+
message: "Upload started! Processing #{file_paths_data.size} files in the background."
|
227
|
+
}
|
228
|
+
else
|
229
|
+
flash[:notice] = "Upload started! Processing #{file_paths_data.size} files in the background. You can monitor progress below."
|
230
|
+
redirect_to ragdoll.documents_path
|
231
|
+
end
|
232
|
+
else
|
233
|
+
logger.warn "⚠️ No valid files found for processing"
|
234
|
+
|
235
|
+
if request.xhr? || request.format.json?
|
236
|
+
render json: {
|
237
|
+
success: false,
|
238
|
+
error: "No valid files found for processing."
|
239
|
+
}, status: :unprocessable_entity
|
240
|
+
else
|
241
|
+
flash[:alert] = "No valid files found for processing."
|
242
|
+
redirect_to ragdoll.documents_path
|
243
|
+
end
|
244
|
+
end
|
245
|
+
else
|
246
|
+
logger.warn "⚠️ No files provided in bulk upload"
|
247
|
+
|
248
|
+
if request.xhr? || request.format.json?
|
249
|
+
render json: {
|
250
|
+
success: false,
|
251
|
+
error: "Please select files to upload."
|
252
|
+
}, status: :bad_request
|
253
|
+
else
|
254
|
+
flash[:alert] = "Please select files to upload."
|
255
|
+
redirect_to ragdoll.documents_path
|
256
|
+
end
|
257
|
+
end
|
258
|
+
rescue => e
|
259
|
+
logger.error "💥 Fatal error in bulk_upload: #{e.message}"
|
260
|
+
logger.error e.backtrace.join("\n")
|
261
|
+
|
262
|
+
if request.xhr? || request.format.json?
|
263
|
+
render json: {
|
264
|
+
success: false,
|
265
|
+
error: "Upload failed: #{e.message}"
|
266
|
+
}, status: :internal_server_error
|
267
|
+
else
|
268
|
+
flash[:alert] = "Upload failed: #{e.message}"
|
269
|
+
redirect_to ragdoll.documents_path
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
def bulk_delete
|
274
|
+
if params[:document_ids].present?
|
275
|
+
documents = ::Ragdoll::Document.where(id: params[:document_ids])
|
276
|
+
count = documents.count
|
277
|
+
documents.destroy_all
|
278
|
+
flash[:notice] = "Successfully deleted #{count} documents."
|
279
|
+
else
|
280
|
+
flash[:alert] = "No documents selected for deletion."
|
281
|
+
end
|
282
|
+
|
283
|
+
redirect_to ragdoll.documents_path
|
284
|
+
end
|
285
|
+
|
286
|
+
def bulk_reprocess
|
287
|
+
if params[:document_ids].present?
|
288
|
+
documents = ::Ragdoll::Document.where(id: params[:document_ids])
|
289
|
+
documents.each do |document|
|
290
|
+
document.all_embeddings.destroy_all
|
291
|
+
document.update(status: 'pending')
|
292
|
+
::Ragdoll::GenerateEmbeddingsJob.perform_later(document.id)
|
293
|
+
end
|
294
|
+
flash[:notice] = "Reprocessing initiated for #{documents.count} documents."
|
295
|
+
else
|
296
|
+
flash[:alert] = "No documents selected for reprocessing."
|
297
|
+
end
|
298
|
+
|
299
|
+
redirect_to ragdoll.documents_path
|
300
|
+
end
|
301
|
+
|
302
|
+
def upload_async
|
303
|
+
logger.info "upload_async called with params: #{params.inspect}"
|
304
|
+
logger.info "Session ID: #{session.id}"
|
305
|
+
logger.info "Request ID: #{request.request_id}"
|
306
|
+
logger.info "Temp Session ID: #{params[:temp_session_id]}"
|
307
|
+
|
308
|
+
if params[:ragdoll_document] && params[:ragdoll_document][:files].present?
|
309
|
+
# Priority: temp_session_id from frontend, then session ID, then request ID as fallback
|
310
|
+
session_id = if params[:temp_session_id].present?
|
311
|
+
params[:temp_session_id]
|
312
|
+
elsif session.id.present?
|
313
|
+
session.id.to_s
|
314
|
+
else
|
315
|
+
request.request_id
|
316
|
+
end
|
317
|
+
logger.info "Using session_id: #{session_id} (source: #{params[:temp_session_id].present? ? 'temp_session_id' : session.id.present? ? 'session' : 'request'})"
|
318
|
+
uploaded_files = params[:ragdoll_document][:files]
|
319
|
+
|
320
|
+
logger.info "Files received: #{uploaded_files.inspect}"
|
321
|
+
|
322
|
+
# Ensure uploaded_files is always an array
|
323
|
+
uploaded_files = [uploaded_files] unless uploaded_files.is_a?(Array)
|
324
|
+
|
325
|
+
# Log file analysis for debugging count discrepancies
|
326
|
+
total_files = uploaded_files.count
|
327
|
+
valid_files = uploaded_files.select { |f| f && f.respond_to?(:original_filename) && f.original_filename.present? }
|
328
|
+
filtered_count = total_files - valid_files.count
|
329
|
+
|
330
|
+
logger.info "📊 File upload analysis:"
|
331
|
+
logger.info " Total files in upload array: #{total_files}"
|
332
|
+
logger.info " Valid files with original_filename: #{valid_files.count}"
|
333
|
+
logger.info " Files filtered out: #{filtered_count}"
|
334
|
+
|
335
|
+
if filtered_count > 0
|
336
|
+
uploaded_files.each_with_index do |file, index|
|
337
|
+
unless file && file.respond_to?(:original_filename) && file.original_filename.present?
|
338
|
+
logger.warn " Filtered file #{index + 1}: #{file.class} - #{file.inspect}"
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
processed_count = 0
|
344
|
+
results = []
|
345
|
+
|
346
|
+
# Process files in batches to avoid "too many open files" error
|
347
|
+
batch_size = 50 # Process 50 files at a time
|
348
|
+
uploaded_files.each_slice(batch_size).with_index do |file_batch, batch_index|
|
349
|
+
file_batch.each_with_index do |file, batch_file_index|
|
350
|
+
index = batch_index * batch_size + batch_file_index
|
351
|
+
next unless file.respond_to?(:original_filename)
|
352
|
+
|
353
|
+
logger.info "Processing file #{index + 1}: #{file.original_filename}"
|
354
|
+
|
355
|
+
begin
|
356
|
+
# Generate unique file ID
|
357
|
+
file_id = "#{session_id}_#{index}_#{Time.current.to_i}"
|
358
|
+
|
359
|
+
# Save uploaded file temporarily
|
360
|
+
temp_path = ::Rails.root.join('tmp', 'uploads', "#{file_id}_#{file.original_filename}")
|
361
|
+
FileUtils.mkdir_p(File.dirname(temp_path))
|
362
|
+
File.binwrite(temp_path, file.read)
|
363
|
+
|
364
|
+
logger.info "File saved to: #{temp_path}"
|
365
|
+
|
366
|
+
# Try to queue background job first, fallback to direct processing
|
367
|
+
begin
|
368
|
+
if defined?(::Ragdoll::ProcessFileJob)
|
369
|
+
::Ragdoll::ProcessFileJob.perform_later(file_id, session_id, file.original_filename, temp_path.to_s)
|
370
|
+
logger.info "Job queued for file: #{file_id}"
|
371
|
+
results << { file: file.original_filename, status: 'queued' }
|
372
|
+
else
|
373
|
+
raise "ProcessFileJob not available"
|
374
|
+
end
|
375
|
+
rescue => job_error
|
376
|
+
logger.warn "Background job failed, processing directly: #{job_error.message}"
|
377
|
+
|
378
|
+
# Process directly if job system is not available
|
379
|
+
force_duplicate = params[:ragdoll_document][:force_duplicate] == '1'
|
380
|
+
result = ::Ragdoll.add_document(path: temp_path.to_s, force: force_duplicate)
|
381
|
+
|
382
|
+
if result[:success] && result[:document_id]
|
383
|
+
document = ::Ragdoll::Document.find(result[:document_id])
|
384
|
+
results << {
|
385
|
+
file: file.original_filename,
|
386
|
+
status: 'completed_sync',
|
387
|
+
document_id: document.id
|
388
|
+
}
|
389
|
+
logger.info "File processed synchronously: #{file.original_filename}"
|
390
|
+
else
|
391
|
+
results << {
|
392
|
+
file: file.original_filename,
|
393
|
+
status: 'failed',
|
394
|
+
error: result[:error] || 'Unknown error'
|
395
|
+
}
|
396
|
+
end
|
397
|
+
|
398
|
+
# Clean up temp file for sync processing
|
399
|
+
File.delete(temp_path) if File.exist?(temp_path)
|
400
|
+
end
|
401
|
+
|
402
|
+
processed_count += 1
|
403
|
+
rescue => file_error
|
404
|
+
logger.error "Error processing file #{file.original_filename}: #{file_error.message}"
|
405
|
+
results << {
|
406
|
+
file: file.original_filename,
|
407
|
+
status: 'failed',
|
408
|
+
error: file_error.message
|
409
|
+
}
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
# Force garbage collection after each batch to free file descriptors
|
414
|
+
GC.start
|
415
|
+
end
|
416
|
+
|
417
|
+
logger.info "Returning success response for #{processed_count} files"
|
418
|
+
render json: {
|
419
|
+
success: true,
|
420
|
+
session_id: session_id,
|
421
|
+
results: results,
|
422
|
+
message: "#{processed_count} file(s) processed"
|
423
|
+
}
|
424
|
+
else
|
425
|
+
logger.error "No files provided in upload_async"
|
426
|
+
render json: { success: false, error: "No files provided" }, status: :bad_request
|
427
|
+
end
|
428
|
+
rescue => e
|
429
|
+
logger.error "Error in upload_async: #{e.message}"
|
430
|
+
logger.error e.backtrace.join("\n")
|
431
|
+
render json: { success: false, error: e.message }, status: :internal_server_error
|
432
|
+
end
|
433
|
+
|
434
|
+
def status
|
435
|
+
@processing_stats = {
|
436
|
+
pending: ::Ragdoll::Document.where(status: 'pending').count,
|
437
|
+
processing: ::Ragdoll::Document.where(status: 'processing').count,
|
438
|
+
processed: ::Ragdoll::Document.where(status: 'processed').count,
|
439
|
+
failed: ::Ragdoll::Document.where(status: 'failed').count
|
440
|
+
}
|
441
|
+
|
442
|
+
@recent_activity = ::Ragdoll::Document.order(updated_at: :desc).limit(20)
|
443
|
+
|
444
|
+
respond_to do |format|
|
445
|
+
format.html
|
446
|
+
format.json { render json: @processing_stats }
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
private
|
451
|
+
|
452
|
+
def set_document
|
453
|
+
@document = ::Ragdoll::Document.find(params[:id])
|
454
|
+
end
|
455
|
+
|
456
|
+
def document_params
|
457
|
+
params.require(:ragdoll_document).permit(:title, :content, :metadata, :status, :text_content, :force_duplicate, files: [])
|
458
|
+
end
|
459
|
+
end
|
460
|
+
end
|