ragdoll-rails 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1672f9baacb258a7c8842d8f3632011b5e4f064b0fbab5b58ac803cbac12d196
4
- data.tar.gz: 4ae58b4aa072fd99e1a48593812b6dcdde99b5877b9e9cdcce04e3614ff09d76
3
+ metadata.gz: 490d43fbee26a4ebe582b4ce30e6d09050bb59b2e3edfb79ab7d12c964061e6f
4
+ data.tar.gz: d6f36e0b81eb7c9c1f0eb60657e3c372fa83dd3119555e776fcaf6469ac8c622
5
5
  SHA512:
6
- metadata.gz: 9233cdc22a07420f550c992c6ba7b0ec53eb36d72f6b2aafa7ca8771802fdffe9a067364842ad40e318321260d7478853663ec39978c2926fc796152996b447d
7
- data.tar.gz: fcff07ebf80eba9f1e6c4cbbd48e995e2b29593b775185e15b179589cf21de935ef524c88fb823f985bdf04cf00a3dbfa806b5042d85c8bcae0b3a7d2fd93ce7
6
+ metadata.gz: ddb0ea79bdc41ffddb7aed78acdb27434f0e9325608ac7e4c854421731a0dea3bf16d100a998a6258f32ca5a8afaf837112873051c087f963b1a2ea34b28a114
7
+ data.tar.gz: db740b880b21b5e93ed7fd357c42382dc5bdb71d636e63ae3291d355fe030caf953fccd220c8af6841e78779fd2cc893e711510dc55157d62a3aa3e54c269893
data/README.md CHANGED
@@ -31,9 +31,10 @@ See Also:
31
31
 
32
32
  ## ✨ Features
33
33
 
34
- - 🔍 **Semantic Search** - Vector similarity search with flexible embedding models and pgvector
34
+ - 🔄 **Unified Text-Based RAG** - All media types converted to searchable text before vectorization
35
+ - 🔍 **Cross-Modal Search** - Find images by descriptions, audio by transcripts, documents by content
35
36
  - 🤖 **Multi-Provider Support** - OpenAI, Anthropic, Google, Azure, Ollama, HuggingFace via ruby_llm
36
- - 📄 **Multi-format Support** - PDF, DOCX, text, HTML, JSON, XML, CSV document parsing
37
+ - 📄 **Multi-format Support** - PDF, DOCX, text, HTML, JSON, XML, CSV, images (AI descriptions), audio (transcripts)
37
38
  - 🧠 **Context Enhancement** - Automatically enhance AI prompts with relevant context
38
39
  - ⚡ **Background Processing** - Asynchronous document processing with Sidekiq
39
40
  - 🎛️ **Simple API** - Clean, intuitive interface for Rails integration
@@ -99,8 +99,11 @@ module Ragdoll
99
99
  }
100
100
 
101
101
  # Add document type filter if specified
102
+ # NOTE: Document type filtering is deprecated in unified text-based architecture
103
+ # All media types are now converted to text for unified cross-modal search
102
104
  if @filters[:document_type].present?
103
105
  search_params[:document_type] = @filters[:document_type]
106
+ ::Rails.logger.warn "⚠️ Document type filtering is deprecated in unified text-based RAG architecture"
104
107
  end
105
108
 
106
109
  # Add status filter if specified
@@ -207,8 +210,11 @@ module Ragdoll
207
210
  }
208
211
 
209
212
  # Add document type filter if specified
213
+ # NOTE: Document type filtering is deprecated in unified text-based architecture
214
+ # All media types are now converted to text for unified cross-modal search
210
215
  if @filters[:document_type].present?
211
216
  fulltext_params[:document_type] = @filters[:document_type]
217
+ ::Rails.logger.warn "⚠️ Document type filtering is deprecated in unified text-based RAG architecture"
212
218
  end
213
219
 
214
220
  # Add status filter if specified
@@ -21,7 +21,10 @@
21
21
  <%= form.text_field :search, placeholder: "Search documents...", value: params[:search], class: "form-control" %>
22
22
  </div>
23
23
  <div class="col-md-3">
24
- <%= form.select :document_type, options_for_select([["All Types", ""]] + @document_types.map { |type| [type.titleize, type] }, params[:document_type]), {}, { class: "form-select" } %>
24
+ <%= form.select :document_type, options_for_select([["All Types", ""]] + @document_types.map { |type| [type.titleize, type] }, params[:document_type]), {}, { class: "form-select", title: "Document Type filtering is deprecated in unified text-based architecture" } %>
25
+ <small class="form-text text-warning">
26
+ <i class="fas fa-exclamation-triangle"></i> Note: All media types are now converted to text for unified search
27
+ </small>
25
28
  </div>
26
29
  <div class="col-md-3">
27
30
  <%= form.select :status, options_for_select([["All Statuses", ""]] + @statuses.map { |status| [status.titleize, status] }, params[:status]), {}, { class: "form-select" } %>
@@ -38,10 +38,13 @@
38
38
  <div class="col-md-6">
39
39
  <div class="mb-3">
40
40
  <label class="form-label">Document Type</label>
41
- <%= form.select :document_type,
42
- options_for_select([["All Types", ""]] + ::Ragdoll::Document.distinct.pluck(:document_type).compact.map { |type| [type.titleize, type] }, @filters[:document_type]),
43
- {},
44
- { class: "form-select" } %>
41
+ <%= form.select :document_type,
42
+ options_for_select([["All Types", ""]] + ::Ragdoll::Document.distinct.pluck(:document_type).compact.map { |type| [type.titleize, type] }, @filters[:document_type]),
43
+ {},
44
+ { class: "form-select", title: "Document Type filtering is deprecated in unified text-based architecture" } %>
45
+ <small class="form-text text-warning">
46
+ <i class="fas fa-exclamation-triangle"></i> Note: All media types are now converted to text for unified cross-modal search
47
+ </small>
45
48
  </div>
46
49
  </div>
47
50
 
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Ragdoll RAG (Retrieval-Augmented Generation) Configuration
3
+ # Ragdoll Unified Text-Based RAG Configuration
4
4
  # This initializer configures the Ragdoll Rails engine for your application.
5
+ # All media types (images, audio, documents) are converted to searchable text.
5
6
 
6
7
  Ragdoll.configure do |config|
7
8
  # LLM Provider Configuration
@@ -39,9 +40,13 @@ Ragdoll.configure do |config|
39
40
  }
40
41
  }
41
42
 
42
- # Embedding Model Configuration
43
+ # Unified Embedding Model Configuration
44
+ # Single model for all content types (converted to text)
43
45
  # Examples: 'text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'
44
- config.embedding_model = 'text-embedding-3-small'
46
+ config.embedding_model = 'text-embedding-3-large'
47
+
48
+ # Enable unified text-based architecture
49
+ config.use_unified_content = true
45
50
 
46
51
  # Default model for chat/completion
47
52
  config.default_model = 'gpt-4o-mini'
@@ -50,6 +55,13 @@ Ragdoll.configure do |config|
50
55
  config.chunk_size = 1000
51
56
  config.chunk_overlap = 200
52
57
 
58
+ # Text Conversion Settings (for unified architecture)
59
+ config.text_conversion = {
60
+ image_detail_level: :comprehensive, # :minimal, :standard, :comprehensive, :analytical
61
+ audio_transcription_provider: :openai, # :openai, :azure, :google, :whisper_local
62
+ enable_fallback_descriptions: true
63
+ }
64
+
53
65
  # Search Configuration
54
66
  config.search_similarity_threshold = 0.7
55
67
  config.max_search_results = 10
@@ -92,5 +104,6 @@ Ragdoll::Rails.configure do |config|
92
104
  config.max_file_size = 10.megabytes
93
105
 
94
106
  # Allowed file types for document upload
95
- config.allowed_file_types = %w[pdf docx txt md html htm json xml csv]
107
+ # All types are converted to text: images -> descriptions, audio -> transcripts
108
+ config.allowed_file_types = %w[pdf docx txt md html htm json xml csv jpg jpeg png gif mp3 wav m4a]
96
109
  end
@@ -12,7 +12,8 @@ module Ragdoll
12
12
  @job_adapter = :sidekiq
13
13
  @queue_name = :ragdoll
14
14
  @max_file_size = 10 * 1024 * 1024 # 10MB
15
- @allowed_file_types = %w[pdf docx txt md html htm json xml csv]
15
+ # Unified text-based architecture supports all media types converted to text
16
+ @allowed_file_types = %w[pdf docx txt md html htm json xml csv jpg jpeg png gif mp3 wav m4a]
16
17
  end
17
18
 
18
19
  def configure_core
@@ -4,6 +4,6 @@
4
4
 
5
5
  module Ragdoll
6
6
  module Rails
7
- VERSION = "0.1.11"
7
+ VERSION = "0.1.12"
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ragdoll-rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dewayne VanHoozer
@@ -206,7 +206,8 @@ dependencies:
206
206
  - !ruby/object:Gem::Version
207
207
  version: '0'
208
208
  description: Rails engine providing ActiveRecord integration, background jobs, and
209
- UI components for the Ragdoll RAG (Retrieval-Augmented Generation) system
209
+ UI components for Ragdoll's unified text-based RAG system. Converts all media types
210
+ to searchable text for powerful cross-modal search capabilities.
210
211
  email:
211
212
  - dvanhoozer@gmail.com
212
213
  executables: []
@@ -305,7 +306,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
305
306
  - !ruby/object:Gem::Version
306
307
  version: '0'
307
308
  requirements: []
308
- rubygems_version: 3.7.1
309
+ rubygems_version: 3.7.2
309
310
  specification_version: 4
310
- summary: Rails engine for Ragdoll RAG system
311
+ summary: Rails engine for Ragdoll unified text-based RAG system
311
312
  test_files: []