ragdoll-rails 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/app/controllers/ragdoll/search_controller.rb +6 -0
- data/app/views/ragdoll/documents/index.html.erb +4 -1
- data/app/views/ragdoll/search/index.html.erb +7 -4
- data/lib/generators/ragdoll/init/templates/ragdoll_config.rb +17 -4
- data/lib/ragdoll/rails/configuration.rb +2 -1
- data/lib/ragdoll/rails/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 490d43fbee26a4ebe582b4ce30e6d09050bb59b2e3edfb79ab7d12c964061e6f
|
4
|
+
data.tar.gz: d6f36e0b81eb7c9c1f0eb60657e3c372fa83dd3119555e776fcaf6469ac8c622
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddb0ea79bdc41ffddb7aed78acdb27434f0e9325608ac7e4c854421731a0dea3bf16d100a998a6258f32ca5a8afaf837112873051c087f963b1a2ea34b28a114
|
7
|
+
data.tar.gz: db740b880b21b5e93ed7fd357c42382dc5bdb71d636e63ae3291d355fe030caf953fccd220c8af6841e78779fd2cc893e711510dc55157d62a3aa3e54c269893
|
data/README.md
CHANGED
@@ -31,9 +31,10 @@ See Also:
|
|
31
31
|
|
32
32
|
## ✨ Features
|
33
33
|
|
34
|
-
-
|
34
|
+
- 🔄 **Unified Text-Based RAG** - All media types converted to searchable text before vectorization
|
35
|
+
- 🔍 **Cross-Modal Search** - Find images by descriptions, audio by transcripts, documents by content
|
35
36
|
- 🤖 **Multi-Provider Support** - OpenAI, Anthropic, Google, Azure, Ollama, HuggingFace via ruby_llm
|
36
|
-
- 📄 **Multi-format Support** - PDF, DOCX, text, HTML, JSON, XML, CSV
|
37
|
+
- 📄 **Multi-format Support** - PDF, DOCX, text, HTML, JSON, XML, CSV, images (AI descriptions), audio (transcripts)
|
37
38
|
- 🧠 **Context Enhancement** - Automatically enhance AI prompts with relevant context
|
38
39
|
- ⚡ **Background Processing** - Asynchronous document processing with Sidekiq
|
39
40
|
- 🎛️ **Simple API** - Clean, intuitive interface for Rails integration
|
@@ -99,8 +99,11 @@ module Ragdoll
|
|
99
99
|
}
|
100
100
|
|
101
101
|
# Add document type filter if specified
|
102
|
+
# NOTE: Document type filtering is deprecated in unified text-based architecture
|
103
|
+
# All media types are now converted to text for unified cross-modal search
|
102
104
|
if @filters[:document_type].present?
|
103
105
|
search_params[:document_type] = @filters[:document_type]
|
106
|
+
::Rails.logger.warn "⚠️ Document type filtering is deprecated in unified text-based RAG architecture"
|
104
107
|
end
|
105
108
|
|
106
109
|
# Add status filter if specified
|
@@ -207,8 +210,11 @@ module Ragdoll
|
|
207
210
|
}
|
208
211
|
|
209
212
|
# Add document type filter if specified
|
213
|
+
# NOTE: Document type filtering is deprecated in unified text-based architecture
|
214
|
+
# All media types are now converted to text for unified cross-modal search
|
210
215
|
if @filters[:document_type].present?
|
211
216
|
fulltext_params[:document_type] = @filters[:document_type]
|
217
|
+
::Rails.logger.warn "⚠️ Document type filtering is deprecated in unified text-based RAG architecture"
|
212
218
|
end
|
213
219
|
|
214
220
|
# Add status filter if specified
|
@@ -21,7 +21,10 @@
|
|
21
21
|
<%= form.text_field :search, placeholder: "Search documents...", value: params[:search], class: "form-control" %>
|
22
22
|
</div>
|
23
23
|
<div class="col-md-3">
|
24
|
-
<%= form.select :document_type, options_for_select([["All Types", ""]] + @document_types.map { |type| [type.titleize, type] }, params[:document_type]), {}, { class: "form-select" } %>
|
24
|
+
<%= form.select :document_type, options_for_select([["All Types", ""]] + @document_types.map { |type| [type.titleize, type] }, params[:document_type]), {}, { class: "form-select", title: "Document Type filtering is deprecated in unified text-based architecture" } %>
|
25
|
+
<small class="form-text text-warning">
|
26
|
+
<i class="fas fa-exclamation-triangle"></i> Note: All media types are now converted to text for unified search
|
27
|
+
</small>
|
25
28
|
</div>
|
26
29
|
<div class="col-md-3">
|
27
30
|
<%= form.select :status, options_for_select([["All Statuses", ""]] + @statuses.map { |status| [status.titleize, status] }, params[:status]), {}, { class: "form-select" } %>
|
@@ -38,10 +38,13 @@
|
|
38
38
|
<div class="col-md-6">
|
39
39
|
<div class="mb-3">
|
40
40
|
<label class="form-label">Document Type</label>
|
41
|
-
<%= form.select :document_type,
|
42
|
-
options_for_select([["All Types", ""]] + ::Ragdoll::Document.distinct.pluck(:document_type).compact.map { |type| [type.titleize, type] }, @filters[:document_type]),
|
43
|
-
{},
|
44
|
-
{ class: "form-select" } %>
|
41
|
+
<%= form.select :document_type,
|
42
|
+
options_for_select([["All Types", ""]] + ::Ragdoll::Document.distinct.pluck(:document_type).compact.map { |type| [type.titleize, type] }, @filters[:document_type]),
|
43
|
+
{},
|
44
|
+
{ class: "form-select", title: "Document Type filtering is deprecated in unified text-based architecture" } %>
|
45
|
+
<small class="form-text text-warning">
|
46
|
+
<i class="fas fa-exclamation-triangle"></i> Note: All media types are now converted to text for unified cross-modal search
|
47
|
+
</small>
|
45
48
|
</div>
|
46
49
|
</div>
|
47
50
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# Ragdoll
|
3
|
+
# Ragdoll Unified Text-Based RAG Configuration
|
4
4
|
# This initializer configures the Ragdoll Rails engine for your application.
|
5
|
+
# All media types (images, audio, documents) are converted to searchable text.
|
5
6
|
|
6
7
|
Ragdoll.configure do |config|
|
7
8
|
# LLM Provider Configuration
|
@@ -39,9 +40,13 @@ Ragdoll.configure do |config|
|
|
39
40
|
}
|
40
41
|
}
|
41
42
|
|
42
|
-
# Embedding Model Configuration
|
43
|
+
# Unified Embedding Model Configuration
|
44
|
+
# Single model for all content types (converted to text)
|
43
45
|
# Examples: 'text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'
|
44
|
-
config.embedding_model = 'text-embedding-3-
|
46
|
+
config.embedding_model = 'text-embedding-3-large'
|
47
|
+
|
48
|
+
# Enable unified text-based architecture
|
49
|
+
config.use_unified_content = true
|
45
50
|
|
46
51
|
# Default model for chat/completion
|
47
52
|
config.default_model = 'gpt-4o-mini'
|
@@ -50,6 +55,13 @@ Ragdoll.configure do |config|
|
|
50
55
|
config.chunk_size = 1000
|
51
56
|
config.chunk_overlap = 200
|
52
57
|
|
58
|
+
# Text Conversion Settings (for unified architecture)
|
59
|
+
config.text_conversion = {
|
60
|
+
image_detail_level: :comprehensive, # :minimal, :standard, :comprehensive, :analytical
|
61
|
+
audio_transcription_provider: :openai, # :openai, :azure, :google, :whisper_local
|
62
|
+
enable_fallback_descriptions: true
|
63
|
+
}
|
64
|
+
|
53
65
|
# Search Configuration
|
54
66
|
config.search_similarity_threshold = 0.7
|
55
67
|
config.max_search_results = 10
|
@@ -92,5 +104,6 @@ Ragdoll::Rails.configure do |config|
|
|
92
104
|
config.max_file_size = 10.megabytes
|
93
105
|
|
94
106
|
# Allowed file types for document upload
|
95
|
-
|
107
|
+
# All types are converted to text: images -> descriptions, audio -> transcripts
|
108
|
+
config.allowed_file_types = %w[pdf docx txt md html htm json xml csv jpg jpeg png gif mp3 wav m4a]
|
96
109
|
end
|
@@ -12,7 +12,8 @@ module Ragdoll
|
|
12
12
|
@job_adapter = :sidekiq
|
13
13
|
@queue_name = :ragdoll
|
14
14
|
@max_file_size = 10 * 1024 * 1024 # 10MB
|
15
|
-
|
15
|
+
# Unified text-based architecture supports all media types converted to text
|
16
|
+
@allowed_file_types = %w[pdf docx txt md html htm json xml csv jpg jpeg png gif mp3 wav m4a]
|
16
17
|
end
|
17
18
|
|
18
19
|
def configure_core
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ragdoll-rails
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dewayne VanHoozer
|
@@ -206,7 +206,8 @@ dependencies:
|
|
206
206
|
- !ruby/object:Gem::Version
|
207
207
|
version: '0'
|
208
208
|
description: Rails engine providing ActiveRecord integration, background jobs, and
|
209
|
-
UI components for
|
209
|
+
UI components for Ragdoll's unified text-based RAG system. Converts all media types
|
210
|
+
to searchable text for powerful cross-modal search capabilities.
|
210
211
|
email:
|
211
212
|
- dvanhoozer@gmail.com
|
212
213
|
executables: []
|
@@ -305,7 +306,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
305
306
|
- !ruby/object:Gem::Version
|
306
307
|
version: '0'
|
307
308
|
requirements: []
|
308
|
-
rubygems_version: 3.7.
|
309
|
+
rubygems_version: 3.7.2
|
309
310
|
specification_version: 4
|
310
|
-
summary: Rails engine for Ragdoll RAG system
|
311
|
+
summary: Rails engine for Ragdoll unified text-based RAG system
|
311
312
|
test_files: []
|