ragdoll-cli 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,26 +9,112 @@ module Ragdoll
9
9
  client = StandaloneClient.new
10
10
 
11
11
  puts "Searching for: #{query}"
12
- puts "Options: #{options.to_h}" unless options.to_h.empty?
12
+ puts "Search type: #{options[:search_type] || 'semantic'}"
13
+
14
+ # Show hybrid search weights if applicable
15
+ if options[:search_type] == 'hybrid'
16
+ semantic_w = options[:semantic_weight] || 0.7
17
+ text_w = options[:text_weight] || 0.3
18
+ puts "Weights: semantic=#{semantic_w}, text=#{text_w}"
19
+ end
20
+
21
+ # Show keyword search mode if keywords are provided
22
+ if options[:keywords]
23
+ keywords_array = options[:keywords].split(',').map(&:strip)
24
+ keywords_mode = options[:keywords_all] ? "ALL keywords (AND)" : "ANY keywords (OR)"
25
+ puts "Keywords: #{keywords_array.join(', ')} [#{keywords_mode}]"
26
+ end
27
+
28
+ # Show other options, excluding display-related ones
29
+ relevant_options = options.to_h.except(:keywords, :keywords_all, :search_type, :semantic_weight, :text_weight, :format)
30
+ puts "Options: #{relevant_options}" unless relevant_options.empty?
13
31
  puts
14
32
 
15
33
  search_options = {}
16
34
  search_options[:limit] = options[:limit] if options[:limit]
35
+ search_options[:threshold] = options[:threshold] if options[:threshold]
17
36
  search_options[:content_type] = options[:content_type] if options[:content_type]
18
37
  search_options[:classification] = options[:classification] if options[:classification]
19
- search_options[:keywords] = options[:keywords].split(',').map(&:strip) if options[:keywords]
38
+ if options[:keywords]
39
+ keywords_array = options[:keywords].split(',').map(&:strip)
40
+ search_options[:keywords] = keywords_array
41
+ search_options[:keywords_all] = options[:keywords_all] if options[:keywords_all]
42
+ end
20
43
  search_options[:tags] = options[:tags].split(',').map(&:strip) if options[:tags]
44
+
45
+ # Add search tracking options
46
+ search_options[:session_id] = options[:session_id] if options[:session_id]
47
+ search_options[:user_id] = options[:user_id] if options[:user_id]
48
+ search_options[:track_search] = options[:track_search] if options.respond_to?(:key?) ? options.key?(:track_search) : options.track_search
21
49
 
22
- search_response = client.search(query: query, **search_options)
50
+ # Select search method based on search_type
51
+ search_response = case options[:search_type]
52
+ when 'hybrid'
53
+ # Add weight parameters if provided
54
+ search_options[:semantic_weight] = options[:semantic_weight] if options[:semantic_weight]
55
+ search_options[:text_weight] = options[:text_weight] if options[:text_weight]
56
+ client.hybrid_search(query, **search_options)
57
+ when 'fulltext'
58
+ client.fulltext_search(query, **search_options)
59
+ else
60
+ # Default to semantic search
61
+ client.search(query: query, **search_options)
62
+ end
23
63
 
24
64
  # Extract the actual results array from the response
25
65
  results = search_response[:results] || search_response['results'] || []
26
66
 
27
67
  if results.empty?
68
+ # Get statistics for better feedback
69
+ statistics = search_response[:statistics] || search_response['statistics']
70
+ execution_time = search_response[:execution_time_ms] || search_response['execution_time_ms']
28
71
  total = search_response[:total_results] || search_response['total_results'] || 0
72
+
29
73
  puts "No results found for '#{query}'"
30
- puts "(Total documents in system: #{total})" if total > 0
31
- puts "Try adjusting your search terms or check if documents have been processed."
74
+ puts
75
+
76
+ if statistics
77
+ threshold = statistics[:threshold_used] || statistics['threshold_used']
78
+ highest = statistics[:highest_similarity] || statistics['highest_similarity']
79
+ lowest = statistics[:lowest_similarity] || statistics['lowest_similarity']
80
+ average = statistics[:average_similarity] || statistics['average_similarity']
81
+ above_threshold = statistics[:similarities_above_threshold] || statistics['similarities_above_threshold']
82
+ total_checked = statistics[:total_embeddings_checked] || statistics['total_embeddings_checked']
83
+
84
+ puts "Search Analysis:"
85
+ puts " • Similarity threshold: #{threshold&.round(3) || 'N/A'}"
86
+ puts " • Embeddings analyzed: #{total_checked || 0}"
87
+ if highest && lowest && average
88
+ puts " • Similarity range: #{lowest.round(3)} - #{highest.round(3)} (avg: #{average.round(3)})"
89
+ end
90
+ puts " • Results above threshold: #{above_threshold || 0}"
91
+ puts " • Search time: #{execution_time || 0}ms"
92
+ puts
93
+
94
+ # Provide actionable suggestions
95
+ if highest && threshold
96
+ if highest < threshold
97
+ suggested_threshold = (highest * 0.9).round(3)
98
+ puts "💡 Suggestions:"
99
+ puts " • Lower the similarity threshold (highest found: #{highest.round(3)})"
100
+ puts " • Try: ragdoll search '#{query}' --threshold=#{suggested_threshold}"
101
+ if highest < 0.3
102
+ puts " • Your query might not match the document content well"
103
+ puts " • Try different or more specific search terms"
104
+ puts " • Try keyword-based search: ragdoll keywords search KEYWORD"
105
+ puts " • List available keywords: ragdoll keywords list"
106
+ end
107
+ elsif above_threshold > 0
108
+ puts "💡 Note: Found #{above_threshold} similar content above threshold #{threshold}"
109
+ puts " This suggests an issue with result processing."
110
+ end
111
+ end
112
+ else
113
+ puts "(Total documents in system: #{total})" if total > 0
114
+ puts "Try adjusting your search terms or check if documents have been processed."
115
+ puts "Alternative: Use keyword-based search: ragdoll keywords search KEYWORD"
116
+ end
117
+
32
118
  return
33
119
  end
34
120
 
@@ -41,28 +127,65 @@ module Ragdoll
41
127
  content = safe_string_value(result, [:content, :text], '')
42
128
  puts "#{index + 1}. #{title}"
43
129
  puts " ID: #{result[:document_id] || result[:id]}"
44
- puts " Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
130
+
131
+ # Show appropriate score based on search type
132
+ if options[:search_type] == 'hybrid'
133
+ puts " Combined Score: #{result[:combined_score]&.round(3) || 'N/A'}"
134
+ if result[:search_types]
135
+ puts " Match Types: #{result[:search_types].join(', ')}"
136
+ end
137
+ elsif options[:search_type] == 'fulltext'
138
+ puts " Text Match: #{result[:fulltext_similarity]&.round(3) || 'N/A'}"
139
+ else
140
+ puts " Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
141
+ end
142
+
45
143
  puts " Content: #{content[0..200]}..."
46
144
  puts
47
145
  end
48
146
  else
49
147
  # Table format (default)
50
- puts "Found #{results.length} results:"
148
+ puts "Found #{results.length} results (#{search_response[:search_type] || 'semantic'} search):"
51
149
  puts
52
- puts 'Rank'.ljust(5) + 'Title'.ljust(30) + 'Similarity'.ljust(12) + 'Content Preview'
150
+
151
+ # Adjust column header based on search type
152
+ score_header = case options[:search_type]
153
+ when 'hybrid'
154
+ 'Score'.ljust(12)
155
+ when 'fulltext'
156
+ 'Text Match'.ljust(12)
157
+ else
158
+ 'Similarity'.ljust(12)
159
+ end
160
+
161
+ puts 'Rank'.ljust(5) + 'Title'.ljust(30) + score_header + 'Content Preview'
53
162
  puts '-' * 80
54
163
 
55
164
  results.each_with_index do |result, index|
56
165
  rank = (index + 1).to_s.ljust(5)
57
166
  title = safe_string_value(result, [:title, :document_title], 'Untitled')[0..29].ljust(30)
58
- similarity = (result[:similarity]&.round(3) || 'N/A').to_s.ljust(12)
167
+
168
+ # Get appropriate score based on search type
169
+ score = case options[:search_type]
170
+ when 'hybrid'
171
+ result[:combined_score] || result[:weighted_score]
172
+ when 'fulltext'
173
+ result[:fulltext_similarity]
174
+ else
175
+ result[:similarity]
176
+ end
177
+
178
+ score_str = (score&.round(3) || 'N/A').to_s.ljust(12)
59
179
  content = safe_string_value(result, [:content, :text], '')[0..50]
60
180
  content += '...' if content.length == 50
61
181
 
62
- puts "#{rank}#{title}#{similarity}#{content}"
182
+ puts "#{rank}#{title}#{score_str}#{content}"
63
183
  end
64
184
 
65
185
  puts
186
+ if options[:search_type] == 'hybrid' && (options[:semantic_weight] || options[:text_weight])
187
+ puts "Weights: semantic=#{options[:semantic_weight] || 0.7}, text=#{options[:text_weight] || 0.3}"
188
+ end
66
189
  puts 'Use --format=json for complete results or --format=plain for detailed view'
67
190
  end
68
191
  end
@@ -18,7 +18,7 @@ module Ragdoll
18
18
  return
19
19
  end
20
20
 
21
- result = client.update_document(id, update_options)
21
+ result = client.update_document(id, **update_options)
22
22
 
23
23
  if result[:success]
24
24
  puts "Document ID #{id} updated successfully."
@@ -4,6 +4,7 @@ module Ragdoll
4
4
  module CLI
5
5
  class StandaloneClient
6
6
  include DebugMe
7
+
7
8
  def add_document(path, **options)
8
9
  Ragdoll.add_document(path: path, **options)
9
10
  end
@@ -34,8 +35,12 @@ module Ragdoll
34
35
  end
35
36
 
36
37
 
37
- def search(query, **options)
38
- Ragdoll.search(query: query, **options)
38
+ def search(query = nil, **options)
39
+ if query
40
+ Ragdoll.search(query: query, **options)
41
+ else
42
+ Ragdoll.search(**options)
43
+ end
39
44
  end
40
45
 
41
46
 
@@ -53,6 +58,96 @@ module Ragdoll
53
58
  Ragdoll.stats
54
59
  end
55
60
 
61
+ def search_analytics(days: 30)
62
+ # TODO: This will delegate to Ragdoll core when analytics are implemented
63
+ if defined?(Ragdoll) && Ragdoll.respond_to?(:search_analytics)
64
+ Ragdoll.search_analytics(days: days)
65
+ else
66
+ # Placeholder response for now
67
+ {
68
+ total_searches: 0,
69
+ unique_queries: 0,
70
+ avg_results_per_search: 0.0,
71
+ avg_execution_time: 0.0,
72
+ search_types: {},
73
+ searches_with_results: 0,
74
+ avg_click_through_rate: 0.0
75
+ }
76
+ end
77
+ end
78
+
79
+ def search_history(limit: 20, **options)
80
+ # TODO: This will delegate to Ragdoll core when analytics are implemented
81
+ if defined?(Ragdoll) && Ragdoll.respond_to?(:search_history)
82
+ Ragdoll.search_history(limit: limit, **options)
83
+ else
84
+ # Placeholder response for now
85
+ []
86
+ end
87
+ end
88
+
89
+ def trending_queries(limit: 10, days: 7)
90
+ # TODO: This will delegate to Ragdoll core when analytics are implemented
91
+ if defined?(Ragdoll) && Ragdoll.respond_to?(:trending_queries)
92
+ Ragdoll.trending_queries(limit: limit, days: days)
93
+ else
94
+ # Placeholder response for now
95
+ []
96
+ end
97
+ end
98
+
99
+ def cleanup_searches(**options)
100
+ # TODO: This will delegate to Ragdoll core when analytics are implemented
101
+ if defined?(Ragdoll) && Ragdoll.respond_to?(:cleanup_searches)
102
+ Ragdoll.cleanup_searches(**options)
103
+ else
104
+ # Placeholder response for now
105
+ { orphaned_count: 0, unused_count: 0 }
106
+ end
107
+ end
108
+
109
+ def hybrid_search(query, **options)
110
+ # Properly delegate to Ragdoll core's hybrid_search
111
+ Ragdoll.hybrid_search(query: query, **options)
112
+ end
113
+
114
+ def fulltext_search(query, **options)
115
+ # Perform full-text search using Document.search_content
116
+ limit = options[:limit] || 20
117
+ threshold = options[:threshold] || 0.0
118
+
119
+ # Get full-text search results
120
+ documents = Ragdoll::Document.search_content(query, **options)
121
+
122
+ # Format results to match expected structure
123
+ results = documents.map do |doc|
124
+ {
125
+ document_id: doc.id.to_s,
126
+ document_title: doc.title,
127
+ document_location: doc.location,
128
+ content: doc.content[0..500], # Preview
129
+ fulltext_similarity: doc.respond_to?(:fulltext_similarity) ? doc.fulltext_similarity : nil,
130
+ document_type: doc.document_type,
131
+ status: doc.status
132
+ }
133
+ end
134
+
135
+ {
136
+ query: query,
137
+ search_type: 'fulltext',
138
+ results: results,
139
+ total_results: results.length,
140
+ threshold_used: threshold
141
+ }
142
+ rescue StandardError => e
143
+ {
144
+ query: query,
145
+ search_type: 'fulltext',
146
+ results: [],
147
+ total_results: 0,
148
+ error: "Full-text search failed: #{e.message}"
149
+ }
150
+ end
56
151
 
57
152
  def healthy?
58
153
  Ragdoll.healthy?
@@ -63,6 +158,142 @@ module Ragdoll
63
158
  Ragdoll.configuration
64
159
  end
65
160
 
161
+ # Keywords-specific search methods
162
+ def search_by_keywords(keywords, **options)
163
+ if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords)
164
+ Ragdoll::Document.search_by_keywords(keywords, **options).map(&:to_hash)
165
+ else
166
+ # Fallback to regular search with keywords filter
167
+ search(keywords: keywords, **options)
168
+ end
169
+ end
170
+
171
+ def search_by_keywords_all(keywords, **options)
172
+ if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords_all)
173
+ Ragdoll::Document.search_by_keywords_all(keywords, **options).map(&:to_hash)
174
+ else
175
+ # Fallback to regular search with keywords filter
176
+ search(keywords: keywords, **options)
177
+ end
178
+ end
179
+
180
+ def keyword_frequencies(limit: 100, min_count: 1)
181
+ if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:keyword_frequencies)
182
+ frequencies = Ragdoll::Document.keyword_frequencies
183
+ # Filter by min_count and limit
184
+ filtered = frequencies.select { |_keyword, count| count >= min_count }
185
+ filtered.first(limit).to_h
186
+ else
187
+ {}
188
+ end
189
+ end
190
+
191
+ def add_keywords_to_document(document_id, keywords)
192
+ begin
193
+ document = Ragdoll::Document.find(document_id)
194
+ Array(keywords).each { |keyword| document.add_keyword(keyword) }
195
+ document.save!
196
+ {
197
+ success: true,
198
+ keywords: document.keywords_array
199
+ }
200
+ rescue StandardError => e
201
+ {
202
+ success: false,
203
+ message: e.message
204
+ }
205
+ end
206
+ end
207
+
208
+ def remove_keywords_from_document(document_id, keywords)
209
+ begin
210
+ document = Ragdoll::Document.find(document_id)
211
+ Array(keywords).each { |keyword| document.remove_keyword(keyword) }
212
+ document.save!
213
+ {
214
+ success: true,
215
+ keywords: document.keywords_array
216
+ }
217
+ rescue StandardError => e
218
+ {
219
+ success: false,
220
+ message: e.message
221
+ }
222
+ end
223
+ end
224
+
225
+ def set_document_keywords(document_id, keywords)
226
+ begin
227
+ document = Ragdoll::Document.find(document_id)
228
+ document.keywords = Array(keywords)
229
+ document.save!
230
+ {
231
+ success: true,
232
+ keywords: document.keywords_array
233
+ }
234
+ rescue StandardError => e
235
+ {
236
+ success: false,
237
+ message: e.message
238
+ }
239
+ end
240
+ end
241
+
242
+ def keyword_statistics
243
+ begin
244
+ total_keywords = 0
245
+ documents_with_keywords = 0
246
+ total_keyword_count = 0
247
+ keyword_frequencies = {}
248
+
249
+ if defined?(Ragdoll::Document)
250
+ documents_with_keywords = Ragdoll::Document.where.not(keywords: []).count
251
+
252
+ Ragdoll::Document.where.not(keywords: []).find_each do |doc|
253
+ doc_keywords = doc.keywords_array
254
+ total_keyword_count += doc_keywords.length
255
+
256
+ doc_keywords.each do |keyword|
257
+ keyword_frequencies[keyword] = (keyword_frequencies[keyword] || 0) + 1
258
+ end
259
+ end
260
+
261
+ total_keywords = keyword_frequencies.keys.length
262
+ avg_keywords_per_document = documents_with_keywords > 0 ? (total_keyword_count.to_f / documents_with_keywords) : 0
263
+
264
+ # Top 10 most common keywords
265
+ top_keywords = keyword_frequencies.sort_by { |_k, v| -v }.first(10)
266
+
267
+ # Count singleton keywords (used by only 1 document)
268
+ singleton_keywords = keyword_frequencies.count { |_k, v| v == 1 }
269
+
270
+ {
271
+ total_keywords: total_keywords,
272
+ documents_with_keywords: documents_with_keywords,
273
+ avg_keywords_per_document: avg_keywords_per_document,
274
+ top_keywords: top_keywords,
275
+ singleton_keywords: singleton_keywords
276
+ }
277
+ else
278
+ {
279
+ total_keywords: 0,
280
+ documents_with_keywords: 0,
281
+ avg_keywords_per_document: 0,
282
+ top_keywords: [],
283
+ singleton_keywords: 0
284
+ }
285
+ end
286
+ rescue StandardError => e
287
+ {
288
+ total_keywords: 0,
289
+ documents_with_keywords: 0,
290
+ avg_keywords_per_document: 0,
291
+ top_keywords: [],
292
+ singleton_keywords: 0,
293
+ error: e.message
294
+ }
295
+ end
296
+ end
66
297
 
67
298
  end
68
299
  end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module Ragdoll
5
5
  module CLI
6
- VERSION = "0.1.8"
6
+ VERSION = "0.1.10"
7
7
  end
8
8
  end