ragdoll-cli 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 843beb22244ce386dadbe959585717710940767c066307b2a92d9cfeafa6c2c9
4
- data.tar.gz: 3a47b7c95155ab97644063746998f3e2f9888f0ad7e757af1a35240baed67537
3
+ metadata.gz: a4794fa5ad365db7598b0cb87530391c6d8ca24c1ea431149e614e4997a4d5a6
4
+ data.tar.gz: 1a3fb4eb495a8ed99028b57925c6ddead351c4bc031e02b6b2c075c024c869aa
5
5
  SHA512:
6
- metadata.gz: d15dd454cec4f80ce738ffa100c6a0c00e8642e7881443aca0247436dcb9e321761467b7554439324b3ba5c9b1b232f76d87ce18a4a90547b88e899ea2e196b4
7
- data.tar.gz: 14d92726298d5d0419075bc8d9f87bc0be0d0a34d2128c20da4fb768d68770cf0ded31735c588347cce462c94c705dc8252734317db7c09fafe25d73b19f29c6
6
+ metadata.gz: bd2009f1c320bc2cad1666b9f154f4e9c4899e9e703e89bded35cd3bc2a679d8f9a5726ebd3000546bab6c4a3fbe1bd79f2a98486ac85234fbdffde9a59479b6
7
+ data.tar.gz: dfa5ec3f7be52723c6850714ec7b3ebec1e9fd3cfd929fb970ab5bc1df2b77233924b2286f8ef3788a864e2d70ae817b7acc6ed361ac9278288d31d45087b40d
data/README.md CHANGED
@@ -95,18 +95,47 @@ ragdoll import "files/*" --type pdf
95
95
  ### Search
96
96
 
97
97
  ```bash
98
- # Basic search
98
+ # Basic semantic search (default)
99
99
  ragdoll search "machine learning concepts"
100
100
 
101
+ # Full-text search for exact keywords
102
+ ragdoll search "neural networks" --search-type fulltext
103
+
104
+ # Hybrid search combining semantic and full-text
105
+ ragdoll search "AI algorithms" --search-type hybrid
106
+
107
+ # Customize hybrid search weights
108
+ ragdoll search "deep learning" --search-type hybrid --semantic-weight 0.6 --text-weight 0.4
109
+
101
110
  # Limit number of results
102
111
  ragdoll search "AI algorithms" --limit 5
103
112
 
113
+ # Set similarity threshold
114
+ ragdoll search "machine learning" --threshold 0.8
115
+
104
116
  # Different output formats
105
117
  ragdoll search "deep learning" --format json
106
118
  ragdoll search "AI" --format plain
107
119
  ragdoll search "ML" --format table # default
108
120
  ```
109
121
 
122
+ #### Search Types
123
+
124
+ - **Semantic Search** (default): Uses AI embeddings to find conceptually similar content
125
+ - **Full-text Search**: Uses PostgreSQL text search for exact keyword matching
126
+ - **Hybrid Search**: Combines both semantic and full-text search with configurable weights
127
+
128
+ ```bash
129
+ # Semantic search - best for concepts and meaning
130
+ ragdoll search "How do neural networks learn?" --search-type semantic
131
+
132
+ # Full-text search - best for exact terms
133
+ ragdoll search "backpropagation algorithm" --search-type fulltext
134
+
135
+ # Hybrid search - best comprehensive results
136
+ ragdoll search "transformer architecture" --search-type hybrid --semantic-weight 0.7 --text-weight 0.3
137
+ ```
138
+
110
139
  ### Document Management
111
140
 
112
141
  ```bash
@@ -232,11 +261,20 @@ ragdoll import "knowledge-base/*" --recursive
232
261
  ### Search and get enhanced prompts
233
262
 
234
263
  ```bash
235
- # Basic search
264
+ # Semantic search for concepts
236
265
  ragdoll search "How to configure SSL certificates?"
237
266
 
238
- # Get detailed results
239
- ragdoll search "database optimization" --format plain --limit 3
267
+ # Full-text search for specific terms
268
+ ragdoll search "SSL certificate configuration" --search-type fulltext
269
+
270
+ # Hybrid search for comprehensive results
271
+ ragdoll search "database optimization techniques" --search-type hybrid
272
+
273
+ # Get detailed results with custom formatting
274
+ ragdoll search "performance tuning" --format plain --limit 3
275
+
276
+ # Search with custom similarity threshold
277
+ ragdoll search "security best practices" --threshold 0.75 --search-type semantic
240
278
  ```
241
279
 
242
280
  ### Manage your knowledge base
data/Rakefile CHANGED
@@ -1,18 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'simplecov'
4
- SimpleCov.start
5
-
6
3
  # Suppress bundler/rubygems warnings
7
4
  $VERBOSE = nil
8
5
 
9
6
  require "bundler/gem_tasks"
10
7
  require "rake/testtask"
11
8
 
12
- Rake::TestTask.new(:test) do |t|
13
- t.libs << "test"
14
- t.libs << "lib"
15
- t.test_files = FileList["test/**/*_test.rb"]
9
+ # Custom test task that ensures proper exit codes
10
+ desc "Run tests"
11
+ task :test do
12
+ # Use the original TestTask internally but capture output
13
+ test_files = FileList["test/**/*_test.rb"]
14
+
15
+ # Run tests and capture both stdout and stderr
16
+ output = `bundle exec ruby -I lib:test #{test_files.join(' ')} 2>&1`
17
+ exit_status = $?.exitstatus
18
+
19
+ # Print the output
20
+ puts output
21
+
22
+ # Check if tests actually failed by looking for failure indicators
23
+ test_failed = output.match(/(\d+) failures.*[^0] failures/) ||
24
+ output.match(/(\d+) errors.*[^0] errors/) ||
25
+ output.include?("FAIL") ||
26
+ exit_status > 1 # Exit status 1 might be SimpleCov, >1 is real failure
27
+
28
+ if test_failed
29
+ puts "Tests failed!"
30
+ exit 1
31
+ else
32
+ puts "All tests passed successfully!" unless output.include?("0 failures, 0 errors")
33
+ exit 0
34
+ end
16
35
  end
17
36
 
18
37
  # Load annotate tasks
@@ -0,0 +1,317 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+ require 'json'
5
+
6
+ module Ragdoll
7
+ module CLI
8
+ class Keywords < Thor
9
+ desc 'search KEYWORD [KEYWORD2...]', 'Search documents by keywords only'
10
+ method_option :all, type: :boolean, default: false, aliases: '-a',
11
+ desc: 'Require ALL keywords to match (AND logic, default: OR logic)'
12
+ method_option :limit, type: :numeric, default: 20, aliases: '-l',
13
+ desc: 'Maximum number of results to return'
14
+ method_option :format, type: :string, default: 'table', aliases: '-f',
15
+ desc: 'Output format (table, json, plain)'
16
+ def search(*keywords)
17
+ if keywords.empty?
18
+ puts 'Error: No keywords provided'
19
+ puts 'Usage: ragdoll keywords search KEYWORD [KEYWORD2...]'
20
+ puts 'Examples:'
21
+ puts ' ragdoll keywords search ruby programming'
22
+ puts ' ragdoll keywords search --all ruby programming # Must contain ALL keywords'
23
+ puts ' ragdoll keywords search ruby --limit=50'
24
+ exit 1
25
+ end
26
+
27
+ client = StandaloneClient.new
28
+
29
+ puts "Searching documents by keywords: #{keywords.join(', ')}"
30
+ puts "Mode: #{options[:all] ? 'ALL keywords (AND)' : 'ANY keywords (OR)'}"
31
+ puts
32
+
33
+ begin
34
+ # Use the new keywords search methods
35
+ search_method = options[:all] ? :search_by_keywords_all : :search_by_keywords
36
+ results = client.public_send(search_method, keywords, limit: options[:limit])
37
+
38
+ # Convert results to standard format if needed
39
+ results = normalize_results(results)
40
+
41
+ if results.empty?
42
+ puts "No documents found with keywords: #{keywords.join(', ')}"
43
+ puts
44
+ puts "💡 Suggestions:"
45
+ puts " • Try different keywords"
46
+ puts " • Use fewer keywords"
47
+ puts " • Switch between --all and default (OR) modes"
48
+ puts " • Check available keywords with: ragdoll keywords list"
49
+ return
50
+ end
51
+
52
+ display_results(results, options[:format], keywords)
53
+ rescue StandardError => e
54
+ puts "Error searching by keywords: #{e.message}"
55
+ exit 1
56
+ end
57
+ end
58
+
59
+ desc 'list', 'List all available keywords in the system'
60
+ method_option :limit, type: :numeric, default: 100, aliases: '-l',
61
+ desc: 'Maximum number of keywords to show'
62
+ method_option :format, type: :string, default: 'table', aliases: '-f',
63
+ desc: 'Output format (table, json, plain)'
64
+ method_option :min_count, type: :numeric, default: 1, aliases: '-m',
65
+ desc: 'Show only keywords used by at least N documents'
66
+ def list
67
+ client = StandaloneClient.new
68
+
69
+ begin
70
+ keyword_frequencies = client.keyword_frequencies(
71
+ limit: options[:limit],
72
+ min_count: options[:min_count]
73
+ )
74
+
75
+ if keyword_frequencies.empty?
76
+ puts "No keywords found in the system."
77
+ puts "Add documents with keywords or update existing documents."
78
+ return
79
+ end
80
+
81
+ case options[:format]
82
+ when 'json'
83
+ puts JSON.pretty_generate(keyword_frequencies)
84
+ when 'plain'
85
+ keyword_frequencies.each do |keyword, count|
86
+ puts "#{keyword}: #{count}"
87
+ end
88
+ else
89
+ # Table format
90
+ puts "Keywords in system (minimum #{options[:min_count]} documents):"
91
+ puts
92
+ puts 'Keyword'.ljust(30) + 'Document Count'
93
+ puts '-' * 45
94
+
95
+ keyword_frequencies.each do |keyword, count|
96
+ keyword_display = keyword[0..29].ljust(30)
97
+ puts "#{keyword_display}#{count}"
98
+ end
99
+
100
+ puts
101
+ puts "Total keywords: #{keyword_frequencies.length}"
102
+ end
103
+ rescue StandardError => e
104
+ puts "Error listing keywords: #{e.message}"
105
+ exit 1
106
+ end
107
+ end
108
+
109
+ desc 'add DOCUMENT_ID KEYWORD [KEYWORD2...]', 'Add keywords to a document'
110
+ def add(document_id, *keywords)
111
+ if keywords.empty?
112
+ puts 'Error: No keywords provided'
113
+ puts 'Usage: ragdoll keywords add DOCUMENT_ID KEYWORD [KEYWORD2...]'
114
+ puts 'Example: ragdoll keywords add 123 ruby programming web'
115
+ exit 1
116
+ end
117
+
118
+ client = StandaloneClient.new
119
+
120
+ begin
121
+ result = client.add_keywords_to_document(document_id, keywords)
122
+
123
+ if result[:success]
124
+ puts "✓ Added keywords to document #{document_id}: #{keywords.join(', ')}"
125
+ puts "Document now has keywords: #{result[:keywords].join(', ')}" if result[:keywords]
126
+ else
127
+ puts "✗ Failed to add keywords: #{result[:message] || 'Unknown error'}"
128
+ exit 1
129
+ end
130
+ rescue StandardError => e
131
+ puts "Error adding keywords: #{e.message}"
132
+ exit 1
133
+ end
134
+ end
135
+
136
+ desc 'remove DOCUMENT_ID KEYWORD [KEYWORD2...]', 'Remove keywords from a document'
137
+ def remove(document_id, *keywords)
138
+ if keywords.empty?
139
+ puts 'Error: No keywords provided'
140
+ puts 'Usage: ragdoll keywords remove DOCUMENT_ID KEYWORD [KEYWORD2...]'
141
+ puts 'Example: ragdoll keywords remove 123 old-keyword deprecated'
142
+ exit 1
143
+ end
144
+
145
+ client = StandaloneClient.new
146
+
147
+ begin
148
+ result = client.remove_keywords_from_document(document_id, keywords)
149
+
150
+ if result[:success]
151
+ puts "✓ Removed keywords from document #{document_id}: #{keywords.join(', ')}"
152
+ puts "Document now has keywords: #{result[:keywords].join(', ')}" if result[:keywords]
153
+ else
154
+ puts "✗ Failed to remove keywords: #{result[:message] || 'Unknown error'}"
155
+ exit 1
156
+ end
157
+ rescue StandardError => e
158
+ puts "Error removing keywords: #{e.message}"
159
+ exit 1
160
+ end
161
+ end
162
+
163
+ desc 'set DOCUMENT_ID KEYWORD [KEYWORD2...]', 'Set keywords for a document (replaces existing)'
164
+ def set(document_id, *keywords)
165
+ if keywords.empty?
166
+ puts 'Error: No keywords provided'
167
+ puts 'Usage: ragdoll keywords set DOCUMENT_ID KEYWORD [KEYWORD2...]'
168
+ puts 'Example: ragdoll keywords set 123 ruby programming web'
169
+ exit 1
170
+ end
171
+
172
+ client = StandaloneClient.new
173
+
174
+ begin
175
+ result = client.set_document_keywords(document_id, keywords)
176
+
177
+ if result[:success]
178
+ puts "✓ Set keywords for document #{document_id}: #{keywords.join(', ')}"
179
+ else
180
+ puts "✗ Failed to set keywords: #{result[:message] || 'Unknown error'}"
181
+ exit 1
182
+ end
183
+ rescue StandardError => e
184
+ puts "Error setting keywords: #{e.message}"
185
+ exit 1
186
+ end
187
+ end
188
+
189
+ desc 'show DOCUMENT_ID', 'Show keywords for a specific document'
190
+ def show(document_id)
191
+ client = StandaloneClient.new
192
+
193
+ begin
194
+ document = client.get_document(document_id)
195
+
196
+ keywords = document[:keywords] || document['keywords'] || []
197
+
198
+ puts "Keywords for document #{document_id}:"
199
+ puts " Title: #{document[:title] || document['title'] || 'Untitled'}"
200
+
201
+ if keywords.empty?
202
+ puts " Keywords: (none)"
203
+ puts
204
+ puts "💡 Add keywords with: ragdoll keywords add #{document_id} KEYWORD1 KEYWORD2..."
205
+ else
206
+ puts " Keywords: #{keywords.join(', ')}"
207
+ end
208
+ rescue StandardError => e
209
+ puts "Error getting document keywords: #{e.message}"
210
+ exit 1
211
+ end
212
+ end
213
+
214
+ desc 'find KEYWORD', 'Find documents containing a specific keyword'
215
+ method_option :limit, type: :numeric, default: 20, aliases: '-l',
216
+ desc: 'Maximum number of results to return'
217
+ method_option :format, type: :string, default: 'table', aliases: '-f',
218
+ desc: 'Output format (table, json, plain)'
219
+ def find(keyword)
220
+ search(keyword)
221
+ end
222
+
223
+ desc 'stats', 'Show keyword usage statistics'
224
+ def stats
225
+ client = StandaloneClient.new
226
+
227
+ begin
228
+ stats = client.keyword_statistics
229
+
230
+ puts "Keyword Statistics:"
231
+ puts " Total unique keywords: #{stats[:total_keywords] || 0}"
232
+ puts " Total documents with keywords: #{stats[:documents_with_keywords] || 0}"
233
+ puts " Average keywords per document: #{stats[:avg_keywords_per_document]&.round(2) || 0}"
234
+ puts " Most common keywords:"
235
+
236
+ if stats[:top_keywords]&.any?
237
+ stats[:top_keywords].each_with_index do |(keyword, count), index|
238
+ puts " #{index + 1}. #{keyword} (#{count} documents)"
239
+ end
240
+ else
241
+ puts " (none)"
242
+ end
243
+
244
+ puts " Least used keywords: #{stats[:singleton_keywords] || 0}"
245
+ rescue StandardError => e
246
+ puts "Error getting keyword statistics: #{e.message}"
247
+ exit 1
248
+ end
249
+ end
250
+
251
+ private
252
+
253
+ def normalize_results(results)
254
+ # Ensure results are in the expected format
255
+ case results
256
+ when Array
257
+ results.map do |result|
258
+ case result
259
+ when Hash
260
+ result
261
+ else
262
+ # Convert ActiveRecord objects to hash if needed
263
+ if result.respond_to?(:to_hash)
264
+ result.to_hash
265
+ elsif result.respond_to?(:attributes)
266
+ result.attributes.symbolize_keys
267
+ else
268
+ result
269
+ end
270
+ end
271
+ end
272
+ else
273
+ []
274
+ end
275
+ end
276
+
277
+ def display_results(results, format, keywords)
278
+ case format
279
+ when 'json'
280
+ puts JSON.pretty_generate(results)
281
+ when 'plain'
282
+ results.each_with_index do |result, index|
283
+ title = result[:title] || result['title'] || 'Untitled'
284
+ doc_keywords = result[:keywords] || result['keywords'] || []
285
+ matching_keywords = doc_keywords & keywords
286
+
287
+ puts "#{index + 1}. #{title}"
288
+ puts " ID: #{result[:id] || result['id']}"
289
+ puts " Keywords: #{doc_keywords.join(', ')}"
290
+ puts " Matching: #{matching_keywords.join(', ')}" if matching_keywords.any?
291
+ puts
292
+ end
293
+ else
294
+ # Table format
295
+ puts "Found #{results.length} documents:"
296
+ puts
297
+ puts 'ID'.ljust(12) + 'Title'.ljust(30) + 'Keywords'.ljust(40) + 'Matches'
298
+ puts '-' * 90
299
+
300
+ results.each do |result|
301
+ id = (result[:id] || result['id'] || '')[0..11].ljust(12)
302
+ title = (result[:title] || result['title'] || 'Untitled')[0..29].ljust(30)
303
+ doc_keywords = result[:keywords] || result['keywords'] || []
304
+ keywords_str = doc_keywords.join(', ')[0..39].ljust(40)
305
+ matching_keywords = doc_keywords & keywords
306
+ matches = matching_keywords.length
307
+
308
+ puts "#{id}#{title}#{keywords_str}#{matches}"
309
+ end
310
+
311
+ puts
312
+ puts "Use --format=json for complete results or --format=plain for detailed view"
313
+ end
314
+ end
315
+ end
316
+ end
317
+ end
@@ -9,7 +9,25 @@ module Ragdoll
9
9
  client = StandaloneClient.new
10
10
 
11
11
  puts "Searching for: #{query}"
12
- puts "Options: #{options.to_h}" unless options.to_h.empty?
12
+ puts "Search type: #{options[:search_type] || 'semantic'}"
13
+
14
+ # Show hybrid search weights if applicable
15
+ if options[:search_type] == 'hybrid'
16
+ semantic_w = options[:semantic_weight] || 0.7
17
+ text_w = options[:text_weight] || 0.3
18
+ puts "Weights: semantic=#{semantic_w}, text=#{text_w}"
19
+ end
20
+
21
+ # Show keyword search mode if keywords are provided
22
+ if options[:keywords]
23
+ keywords_array = options[:keywords].split(',').map(&:strip)
24
+ keywords_mode = options[:keywords_all] ? "ALL keywords (AND)" : "ANY keywords (OR)"
25
+ puts "Keywords: #{keywords_array.join(', ')} [#{keywords_mode}]"
26
+ end
27
+
28
+ # Show other options, excluding display-related ones
29
+ relevant_options = options.to_h.except(:keywords, :keywords_all, :search_type, :semantic_weight, :text_weight, :format)
30
+ puts "Options: #{relevant_options}" unless relevant_options.empty?
13
31
  puts
14
32
 
15
33
  search_options = {}
@@ -17,7 +35,11 @@ module Ragdoll
17
35
  search_options[:threshold] = options[:threshold] if options[:threshold]
18
36
  search_options[:content_type] = options[:content_type] if options[:content_type]
19
37
  search_options[:classification] = options[:classification] if options[:classification]
20
- search_options[:keywords] = options[:keywords].split(',').map(&:strip) if options[:keywords]
38
+ if options[:keywords]
39
+ keywords_array = options[:keywords].split(',').map(&:strip)
40
+ search_options[:keywords] = keywords_array
41
+ search_options[:keywords_all] = options[:keywords_all] if options[:keywords_all]
42
+ end
21
43
  search_options[:tags] = options[:tags].split(',').map(&:strip) if options[:tags]
22
44
 
23
45
  # Add search tracking options
@@ -28,11 +50,14 @@ module Ragdoll
28
50
  # Select search method based on search_type
29
51
  search_response = case options[:search_type]
30
52
  when 'hybrid'
31
- client.hybrid_search(query: query, **search_options)
53
+ # Add weight parameters if provided
54
+ search_options[:semantic_weight] = options[:semantic_weight] if options[:semantic_weight]
55
+ search_options[:text_weight] = options[:text_weight] if options[:text_weight]
56
+ client.hybrid_search(query, **search_options)
32
57
  when 'fulltext'
33
- # Note: fulltext search would need to be implemented in client
34
- client.search(query: query, **search_options)
58
+ client.fulltext_search(query, **search_options)
35
59
  else
60
+ # Default to semantic search
36
61
  client.search(query: query, **search_options)
37
62
  end
38
63
 
@@ -76,6 +101,8 @@ module Ragdoll
76
101
  if highest < 0.3
77
102
  puts " • Your query might not match the document content well"
78
103
  puts " • Try different or more specific search terms"
104
+ puts " • Try keyword-based search: ragdoll keywords search KEYWORD"
105
+ puts " • List available keywords: ragdoll keywords list"
79
106
  end
80
107
  elsif above_threshold > 0
81
108
  puts "💡 Note: Found #{above_threshold} similar content above threshold #{threshold}"
@@ -85,6 +112,7 @@ module Ragdoll
85
112
  else
86
113
  puts "(Total documents in system: #{total})" if total > 0
87
114
  puts "Try adjusting your search terms or check if documents have been processed."
115
+ puts "Alternative: Use keyword-based search: ragdoll keywords search KEYWORD"
88
116
  end
89
117
 
90
118
  return
@@ -99,28 +127,65 @@ module Ragdoll
99
127
  content = safe_string_value(result, [:content, :text], '')
100
128
  puts "#{index + 1}. #{title}"
101
129
  puts " ID: #{result[:document_id] || result[:id]}"
102
- puts " Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
130
+
131
+ # Show appropriate score based on search type
132
+ if options[:search_type] == 'hybrid'
133
+ puts " Combined Score: #{result[:combined_score]&.round(3) || 'N/A'}"
134
+ if result[:search_types]
135
+ puts " Match Types: #{result[:search_types].join(', ')}"
136
+ end
137
+ elsif options[:search_type] == 'fulltext'
138
+ puts " Text Match: #{result[:fulltext_similarity]&.round(3) || 'N/A'}"
139
+ else
140
+ puts " Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
141
+ end
142
+
103
143
  puts " Content: #{content[0..200]}..."
104
144
  puts
105
145
  end
106
146
  else
107
147
  # Table format (default)
108
- puts "Found #{results.length} results:"
148
+ puts "Found #{results.length} results (#{search_response[:search_type] || 'semantic'} search):"
109
149
  puts
110
- puts 'Rank'.ljust(5) + 'Title'.ljust(30) + 'Similarity'.ljust(12) + 'Content Preview'
150
+
151
+ # Adjust column header based on search type
152
+ score_header = case options[:search_type]
153
+ when 'hybrid'
154
+ 'Score'.ljust(12)
155
+ when 'fulltext'
156
+ 'Text Match'.ljust(12)
157
+ else
158
+ 'Similarity'.ljust(12)
159
+ end
160
+
161
+ puts 'Rank'.ljust(5) + 'Title'.ljust(30) + score_header + 'Content Preview'
111
162
  puts '-' * 80
112
163
 
113
164
  results.each_with_index do |result, index|
114
165
  rank = (index + 1).to_s.ljust(5)
115
166
  title = safe_string_value(result, [:title, :document_title], 'Untitled')[0..29].ljust(30)
116
- similarity = (result[:similarity]&.round(3) || 'N/A').to_s.ljust(12)
167
+
168
+ # Get appropriate score based on search type
169
+ score = case options[:search_type]
170
+ when 'hybrid'
171
+ result[:combined_score] || result[:weighted_score]
172
+ when 'fulltext'
173
+ result[:fulltext_similarity]
174
+ else
175
+ result[:similarity]
176
+ end
177
+
178
+ score_str = (score&.round(3) || 'N/A').to_s.ljust(12)
117
179
  content = safe_string_value(result, [:content, :text], '')[0..50]
118
180
  content += '...' if content.length == 50
119
181
 
120
- puts "#{rank}#{title}#{similarity}#{content}"
182
+ puts "#{rank}#{title}#{score_str}#{content}"
121
183
  end
122
184
 
123
185
  puts
186
+ if options[:search_type] == 'hybrid' && (options[:semantic_weight] || options[:text_weight])
187
+ puts "Weights: semantic=#{options[:semantic_weight] || 0.7}, text=#{options[:text_weight] || 0.3}"
188
+ end
124
189
  puts 'Use --format=json for complete results or --format=plain for detailed view'
125
190
  end
126
191
  end
@@ -106,19 +106,47 @@ module Ragdoll
106
106
  end
107
107
  end
108
108
 
109
- def hybrid_search(query = nil, **options)
110
- # TODO: This will delegate to Ragdoll core when hybrid search is implemented
111
- if defined?(Ragdoll) && Ragdoll.respond_to?(:hybrid_search)
112
- if query
113
- Ragdoll.hybrid_search(query: query, **options)
114
- else
115
- Ragdoll.hybrid_search(**options)
116
- end
117
- else
118
- # Fallback to regular search for now
119
- result = search(query, **options)
120
- result.is_a?(Hash) ? result.merge(search_type: 'hybrid') : { search_type: 'hybrid', results: [] }
109
+ def hybrid_search(query, **options)
110
+ # Properly delegate to Ragdoll core's hybrid_search
111
+ Ragdoll.hybrid_search(query: query, **options)
112
+ end
113
+
114
+ def fulltext_search(query, **options)
115
+ # Perform full-text search using Document.search_content
116
+ limit = options[:limit] || 20
117
+ threshold = options[:threshold] || 0.0
118
+
119
+ # Get full-text search results
120
+ documents = Ragdoll::Document.search_content(query, **options)
121
+
122
+ # Format results to match expected structure
123
+ results = documents.map do |doc|
124
+ {
125
+ document_id: doc.id.to_s,
126
+ document_title: doc.title,
127
+ document_location: doc.location,
128
+ content: doc.content[0..500], # Preview
129
+ fulltext_similarity: doc.respond_to?(:fulltext_similarity) ? doc.fulltext_similarity : nil,
130
+ document_type: doc.document_type,
131
+ status: doc.status
132
+ }
121
133
  end
134
+
135
+ {
136
+ query: query,
137
+ search_type: 'fulltext',
138
+ results: results,
139
+ total_results: results.length,
140
+ threshold_used: threshold
141
+ }
142
+ rescue StandardError => e
143
+ {
144
+ query: query,
145
+ search_type: 'fulltext',
146
+ results: [],
147
+ total_results: 0,
148
+ error: "Full-text search failed: #{e.message}"
149
+ }
122
150
  end
123
151
 
124
152
  def healthy?
@@ -130,6 +158,142 @@ module Ragdoll
130
158
  Ragdoll.configuration
131
159
  end
132
160
 
161
+ # Keywords-specific search methods
162
+ def search_by_keywords(keywords, **options)
163
+ if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords)
164
+ Ragdoll::Document.search_by_keywords(keywords, **options).map(&:to_hash)
165
+ else
166
+ # Fallback to regular search with keywords filter
167
+ search(keywords: keywords, **options)
168
+ end
169
+ end
170
+
171
+ def search_by_keywords_all(keywords, **options)
172
+ if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords_all)
173
+ Ragdoll::Document.search_by_keywords_all(keywords, **options).map(&:to_hash)
174
+ else
175
+ # Fallback to regular search with keywords filter
176
+ search(keywords: keywords, **options)
177
+ end
178
+ end
179
+
180
+ def keyword_frequencies(limit: 100, min_count: 1)
181
+ if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:keyword_frequencies)
182
+ frequencies = Ragdoll::Document.keyword_frequencies
183
+ # Filter by min_count and limit
184
+ filtered = frequencies.select { |_keyword, count| count >= min_count }
185
+ filtered.first(limit).to_h
186
+ else
187
+ {}
188
+ end
189
+ end
190
+
191
+ def add_keywords_to_document(document_id, keywords)
192
+ begin
193
+ document = Ragdoll::Document.find(document_id)
194
+ Array(keywords).each { |keyword| document.add_keyword(keyword) }
195
+ document.save!
196
+ {
197
+ success: true,
198
+ keywords: document.keywords_array
199
+ }
200
+ rescue StandardError => e
201
+ {
202
+ success: false,
203
+ message: e.message
204
+ }
205
+ end
206
+ end
207
+
208
+ def remove_keywords_from_document(document_id, keywords)
209
+ begin
210
+ document = Ragdoll::Document.find(document_id)
211
+ Array(keywords).each { |keyword| document.remove_keyword(keyword) }
212
+ document.save!
213
+ {
214
+ success: true,
215
+ keywords: document.keywords_array
216
+ }
217
+ rescue StandardError => e
218
+ {
219
+ success: false,
220
+ message: e.message
221
+ }
222
+ end
223
+ end
224
+
225
+ def set_document_keywords(document_id, keywords)
226
+ begin
227
+ document = Ragdoll::Document.find(document_id)
228
+ document.keywords = Array(keywords)
229
+ document.save!
230
+ {
231
+ success: true,
232
+ keywords: document.keywords_array
233
+ }
234
+ rescue StandardError => e
235
+ {
236
+ success: false,
237
+ message: e.message
238
+ }
239
+ end
240
+ end
241
+
242
+ def keyword_statistics
243
+ begin
244
+ total_keywords = 0
245
+ documents_with_keywords = 0
246
+ total_keyword_count = 0
247
+ keyword_frequencies = {}
248
+
249
+ if defined?(Ragdoll::Document)
250
+ documents_with_keywords = Ragdoll::Document.where.not(keywords: []).count
251
+
252
+ Ragdoll::Document.where.not(keywords: []).find_each do |doc|
253
+ doc_keywords = doc.keywords_array
254
+ total_keyword_count += doc_keywords.length
255
+
256
+ doc_keywords.each do |keyword|
257
+ keyword_frequencies[keyword] = (keyword_frequencies[keyword] || 0) + 1
258
+ end
259
+ end
260
+
261
+ total_keywords = keyword_frequencies.keys.length
262
+ avg_keywords_per_document = documents_with_keywords > 0 ? (total_keyword_count.to_f / documents_with_keywords) : 0
263
+
264
+ # Top 10 most common keywords
265
+ top_keywords = keyword_frequencies.sort_by { |_k, v| -v }.first(10)
266
+
267
+ # Count singleton keywords (used by only 1 document)
268
+ singleton_keywords = keyword_frequencies.count { |_k, v| v == 1 }
269
+
270
+ {
271
+ total_keywords: total_keywords,
272
+ documents_with_keywords: documents_with_keywords,
273
+ avg_keywords_per_document: avg_keywords_per_document,
274
+ top_keywords: top_keywords,
275
+ singleton_keywords: singleton_keywords
276
+ }
277
+ else
278
+ {
279
+ total_keywords: 0,
280
+ documents_with_keywords: 0,
281
+ avg_keywords_per_document: 0,
282
+ top_keywords: [],
283
+ singleton_keywords: 0
284
+ }
285
+ end
286
+ rescue StandardError => e
287
+ {
288
+ total_keywords: 0,
289
+ documents_with_keywords: 0,
290
+ avg_keywords_per_document: 0,
291
+ top_keywords: [],
292
+ singleton_keywords: 0,
293
+ error: e.message
294
+ }
295
+ end
296
+ end
133
297
 
134
298
  end
135
299
  end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module Ragdoll
5
5
  module CLI
6
- VERSION = "0.1.9"
6
+ VERSION = "0.1.10"
7
7
  end
8
8
  end
data/lib/ragdoll/cli.rb CHANGED
@@ -15,6 +15,7 @@ require_relative 'cli/commands/config'
15
15
  require_relative 'cli/commands/delete'
16
16
  require_relative 'cli/commands/update'
17
17
  require_relative 'cli/commands/analytics'
18
+ require_relative 'cli/commands/keywords'
18
19
 
19
20
  module Ragdoll
20
21
  module CLI
@@ -45,7 +46,9 @@ module Ragdoll
45
46
  method_option :classification, type: :string, aliases: '-C',
46
47
  desc: 'Filter by classification'
47
48
  method_option :keywords, type: :string, aliases: '-k',
48
- desc: 'Filter by keywords (comma-separated)'
49
+ desc: 'Filter by keywords (comma-separated). Use ragdoll keywords for keyword-only search'
50
+ method_option :keywords_all, type: :boolean, default: false, aliases: '-K',
51
+ desc: 'Require ALL keywords to match (default: any keyword matches)'
49
52
  method_option :tags, type: :string, aliases: '-T',
50
53
  desc: 'Filter by tags (comma-separated)'
51
54
  method_option :format, type: :string, default: 'table', aliases: '-f',
@@ -58,6 +61,10 @@ module Ragdoll
58
61
  desc: 'Enable search tracking (default: true)'
59
62
  method_option :search_type, type: :string, default: 'semantic', aliases: '-S',
60
63
  desc: 'Search type: semantic, hybrid, fulltext (default: semantic)'
64
+ method_option :semantic_weight, type: :numeric, aliases: '-w',
65
+ desc: 'Weight for semantic search in hybrid mode (0.0-1.0, default: 0.7)'
66
+ method_option :text_weight, type: :numeric, aliases: '-W',
67
+ desc: 'Weight for text search in hybrid mode (0.0-1.0, default: 0.3)'
61
68
  def search(query)
62
69
  Search.new.call(query, options)
63
70
  end
@@ -68,6 +75,9 @@ module Ragdoll
68
75
  desc 'analytics SUBCOMMAND', 'Search analytics and reporting'
69
76
  subcommand 'analytics', Analytics
70
77
 
78
+ desc 'keywords SUBCOMMAND', 'Manage and search by document keywords'
79
+ subcommand 'keywords', Keywords
80
+
71
81
  desc 'stats', 'Show document and embedding statistics'
72
82
  def stats
73
83
  client = StandaloneClient.new
@@ -159,12 +169,22 @@ module Ragdoll
159
169
  puts " Status: #{document[:status]}"
160
170
  puts " Embeddings Count: #{document[:embeddings_count]}"
161
171
  puts " Content Length: #{document[:content_length]} characters"
172
+
173
+ # Show keywords prominently
174
+ keywords = document[:keywords] || document['keywords'] || []
175
+ if keywords.any?
176
+ puts " Keywords: #{keywords.join(', ')}"
177
+ else
178
+ puts " Keywords: (none)"
179
+ end
180
+
162
181
  puts " Created: #{document[:created_at]}"
163
182
  puts " Updated: #{document[:updated_at]}"
164
183
 
165
- if document[:metadata]
184
+ if document[:metadata] && document[:metadata].any?
166
185
  puts "\nMetadata:"
167
186
  document[:metadata].each do |key, value|
187
+ next if key == 'keywords' # Already displayed above
168
188
  puts " #{key}: #{value}"
169
189
  end
170
190
  end
@@ -193,9 +213,25 @@ module Ragdoll
193
213
  desc: 'Maximum number of documents to list'
194
214
  method_option :format, type: :string, default: 'table', aliases: '-f',
195
215
  desc: 'Output format (table, json, plain)'
216
+ method_option :keywords, type: :string, aliases: '-k',
217
+ desc: 'Filter by keywords (comma-separated)'
218
+ method_option :keywords_all, type: :boolean, default: false, aliases: '-K',
219
+ desc: 'Require ALL keywords to match (default: any keyword matches)'
196
220
  def list
197
221
  client = StandaloneClient.new
198
- documents = client.list_documents(limit: options[:limit])
222
+
223
+ # Handle keyword filtering if provided
224
+ if options[:keywords]
225
+ keywords_array = options[:keywords].split(',').map(&:strip)
226
+ search_method = options[:keywords_all] ? :search_by_keywords_all : :search_by_keywords
227
+ documents = client.public_send(search_method, keywords_array, limit: options[:limit])
228
+
229
+ puts "Listing documents with keywords: #{keywords_array.join(', ')}"
230
+ puts "Mode: #{options[:keywords_all] ? 'ALL keywords (AND)' : 'ANY keywords (OR)'}"
231
+ puts
232
+ else
233
+ documents = client.list_documents(limit: options[:limit])
234
+ end
199
235
 
200
236
  # Get accurate embeddings count for all documents
201
237
  documents.each do |doc|
@@ -215,16 +251,30 @@ module Ragdoll
215
251
  puts "#{doc[:id]}: #{doc[:title] || 'Untitled'}"
216
252
  end
217
253
  else
218
- # Table format
219
- puts 'ID'.ljust(10) + 'Title'.ljust(40) + 'Status'.ljust(12) + 'Embeddings'
220
- puts '-' * 80
221
- documents.each do |doc|
222
- id = (doc[:id] || doc['id'] || '')[0..9].ljust(10)
223
- title = (doc[:title] || doc['title'] || 'Untitled')[0..39].ljust(40)
224
- status = (doc[:status] || doc['status'] || 'unknown')[0..11].ljust(12)
225
- embeddings = (doc[:embeddings_count] || doc['embeddings_count'] || 0).to_s
226
-
227
- puts "#{id}#{title}#{status}#{embeddings}"
254
+ # Table format - show keywords if keyword filtering is being used
255
+ if options[:keywords]
256
+ puts 'ID'.ljust(10) + 'Title'.ljust(30) + 'Keywords'.ljust(35) + 'Status'.ljust(10) + 'Emb'
257
+ puts '-' * 90
258
+ documents.each do |doc|
259
+ id = (doc[:id] || doc['id'] || '')[0..9].ljust(10)
260
+ title = (doc[:title] || doc['title'] || 'Untitled')[0..29].ljust(30)
261
+ keywords = (doc[:keywords] || doc['keywords'] || []).join(', ')[0..34].ljust(35)
262
+ status = (doc[:status] || doc['status'] || 'unknown')[0..9].ljust(10)
263
+ embeddings = (doc[:embeddings_count] || doc['embeddings_count'] || 0).to_s
264
+
265
+ puts "#{id}#{title}#{keywords}#{status}#{embeddings}"
266
+ end
267
+ else
268
+ puts 'ID'.ljust(10) + 'Title'.ljust(40) + 'Status'.ljust(12) + 'Embeddings'
269
+ puts '-' * 80
270
+ documents.each do |doc|
271
+ id = (doc[:id] || doc['id'] || '')[0..9].ljust(10)
272
+ title = (doc[:title] || doc['title'] || 'Untitled')[0..39].ljust(40)
273
+ status = (doc[:status] || doc['status'] || 'unknown')[0..11].ljust(12)
274
+ embeddings = (doc[:embeddings_count] || doc['embeddings_count'] || 0).to_s
275
+
276
+ puts "#{id}#{title}#{status}#{embeddings}"
277
+ end
228
278
  end
229
279
  end
230
280
  end
@@ -489,16 +539,12 @@ module Ragdoll
489
539
  end
490
540
 
491
541
  def display_no_results_feedback(query, search_response, command_type)
492
- # Extract the actual results array from the response
493
- results = search_response[:results] || search_response['results'] || []
494
-
495
542
  puts "No results found for '#{query}'"
496
543
  puts
497
544
 
498
545
  # Get statistics for better feedback
499
546
  statistics = search_response[:statistics] || search_response['statistics']
500
547
  execution_time = search_response[:execution_time_ms] || search_response['execution_time_ms']
501
- total = search_response[:total_results] || search_response['total_results'] || 0
502
548
 
503
549
  if statistics
504
550
  threshold = statistics[:threshold_used] || statistics['threshold_used']
@@ -528,6 +574,8 @@ module Ragdoll
528
574
  if highest < 0.3
529
575
  puts " • Your query might not match the document content well"
530
576
  puts " • Try different or more specific search terms"
577
+ puts " • Try keyword-based search: ragdoll keywords search KEYWORD"
578
+ puts " • List available keywords: ragdoll keywords list"
531
579
  end
532
580
  elsif above_threshold > 0
533
581
  puts "💡 Note: Found #{above_threshold} similar content above threshold #{threshold}"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ragdoll-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dewayne VanHoozer
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: '0'
18
+ version: 0.1.10
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: '0'
25
+ version: 0.1.10
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: ruby-progressbar
28
28
  requirement: !ruby/object:Gem::Requirement
@@ -177,7 +177,8 @@ dependencies:
177
177
  - - ">="
178
178
  - !ruby/object:Gem::Version
179
179
  version: '0'
180
- description: Under development. Contributors welcome.
180
+ description: Command-line interface for Ragdoll RAG system with semantic, full-text,
181
+ and hybrid search capabilities. Under development. Contributors welcome.
181
182
  email:
182
183
  - dvanhoozer@gmail.com
183
184
  executables:
@@ -193,6 +194,7 @@ files:
193
194
  - lib/ragdoll/cli/commands/config.rb
194
195
  - lib/ragdoll/cli/commands/delete.rb
195
196
  - lib/ragdoll/cli/commands/health.rb
197
+ - lib/ragdoll/cli/commands/keywords.rb
196
198
  - lib/ragdoll/cli/commands/list.rb
197
199
  - lib/ragdoll/cli/commands/search.rb
198
200
  - lib/ragdoll/cli/commands/stats.rb