ragdoll-cli 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -4
- data/Rakefile +26 -7
- data/lib/ragdoll/cli/commands/analytics.rb +222 -0
- data/lib/ragdoll/cli/commands/keywords.rb +317 -0
- data/lib/ragdoll/cli/commands/search.rb +133 -10
- data/lib/ragdoll/cli/commands/update.rb +1 -1
- data/lib/ragdoll/cli/standalone_client.rb +233 -2
- data/lib/ragdoll/cli/version.rb +1 -1
- data/lib/ragdoll/cli.rb +235 -23
- metadata +174 -3
|
@@ -9,26 +9,112 @@ module Ragdoll
|
|
|
9
9
|
client = StandaloneClient.new
|
|
10
10
|
|
|
11
11
|
puts "Searching for: #{query}"
|
|
12
|
-
puts "
|
|
12
|
+
puts "Search type: #{options[:search_type] || 'semantic'}"
|
|
13
|
+
|
|
14
|
+
# Show hybrid search weights if applicable
|
|
15
|
+
if options[:search_type] == 'hybrid'
|
|
16
|
+
semantic_w = options[:semantic_weight] || 0.7
|
|
17
|
+
text_w = options[:text_weight] || 0.3
|
|
18
|
+
puts "Weights: semantic=#{semantic_w}, text=#{text_w}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Show keyword search mode if keywords are provided
|
|
22
|
+
if options[:keywords]
|
|
23
|
+
keywords_array = options[:keywords].split(',').map(&:strip)
|
|
24
|
+
keywords_mode = options[:keywords_all] ? "ALL keywords (AND)" : "ANY keywords (OR)"
|
|
25
|
+
puts "Keywords: #{keywords_array.join(', ')} [#{keywords_mode}]"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Show other options, excluding display-related ones
|
|
29
|
+
relevant_options = options.to_h.except(:keywords, :keywords_all, :search_type, :semantic_weight, :text_weight, :format)
|
|
30
|
+
puts "Options: #{relevant_options}" unless relevant_options.empty?
|
|
13
31
|
puts
|
|
14
32
|
|
|
15
33
|
search_options = {}
|
|
16
34
|
search_options[:limit] = options[:limit] if options[:limit]
|
|
35
|
+
search_options[:threshold] = options[:threshold] if options[:threshold]
|
|
17
36
|
search_options[:content_type] = options[:content_type] if options[:content_type]
|
|
18
37
|
search_options[:classification] = options[:classification] if options[:classification]
|
|
19
|
-
|
|
38
|
+
if options[:keywords]
|
|
39
|
+
keywords_array = options[:keywords].split(',').map(&:strip)
|
|
40
|
+
search_options[:keywords] = keywords_array
|
|
41
|
+
search_options[:keywords_all] = options[:keywords_all] if options[:keywords_all]
|
|
42
|
+
end
|
|
20
43
|
search_options[:tags] = options[:tags].split(',').map(&:strip) if options[:tags]
|
|
44
|
+
|
|
45
|
+
# Add search tracking options
|
|
46
|
+
search_options[:session_id] = options[:session_id] if options[:session_id]
|
|
47
|
+
search_options[:user_id] = options[:user_id] if options[:user_id]
|
|
48
|
+
search_options[:track_search] = options[:track_search] if options.respond_to?(:key?) ? options.key?(:track_search) : options.track_search
|
|
21
49
|
|
|
22
|
-
|
|
50
|
+
# Select search method based on search_type
|
|
51
|
+
search_response = case options[:search_type]
|
|
52
|
+
when 'hybrid'
|
|
53
|
+
# Add weight parameters if provided
|
|
54
|
+
search_options[:semantic_weight] = options[:semantic_weight] if options[:semantic_weight]
|
|
55
|
+
search_options[:text_weight] = options[:text_weight] if options[:text_weight]
|
|
56
|
+
client.hybrid_search(query, **search_options)
|
|
57
|
+
when 'fulltext'
|
|
58
|
+
client.fulltext_search(query, **search_options)
|
|
59
|
+
else
|
|
60
|
+
# Default to semantic search
|
|
61
|
+
client.search(query: query, **search_options)
|
|
62
|
+
end
|
|
23
63
|
|
|
24
64
|
# Extract the actual results array from the response
|
|
25
65
|
results = search_response[:results] || search_response['results'] || []
|
|
26
66
|
|
|
27
67
|
if results.empty?
|
|
68
|
+
# Get statistics for better feedback
|
|
69
|
+
statistics = search_response[:statistics] || search_response['statistics']
|
|
70
|
+
execution_time = search_response[:execution_time_ms] || search_response['execution_time_ms']
|
|
28
71
|
total = search_response[:total_results] || search_response['total_results'] || 0
|
|
72
|
+
|
|
29
73
|
puts "No results found for '#{query}'"
|
|
30
|
-
puts
|
|
31
|
-
|
|
74
|
+
puts
|
|
75
|
+
|
|
76
|
+
if statistics
|
|
77
|
+
threshold = statistics[:threshold_used] || statistics['threshold_used']
|
|
78
|
+
highest = statistics[:highest_similarity] || statistics['highest_similarity']
|
|
79
|
+
lowest = statistics[:lowest_similarity] || statistics['lowest_similarity']
|
|
80
|
+
average = statistics[:average_similarity] || statistics['average_similarity']
|
|
81
|
+
above_threshold = statistics[:similarities_above_threshold] || statistics['similarities_above_threshold']
|
|
82
|
+
total_checked = statistics[:total_embeddings_checked] || statistics['total_embeddings_checked']
|
|
83
|
+
|
|
84
|
+
puts "Search Analysis:"
|
|
85
|
+
puts " • Similarity threshold: #{threshold&.round(3) || 'N/A'}"
|
|
86
|
+
puts " • Embeddings analyzed: #{total_checked || 0}"
|
|
87
|
+
if highest && lowest && average
|
|
88
|
+
puts " • Similarity range: #{lowest.round(3)} - #{highest.round(3)} (avg: #{average.round(3)})"
|
|
89
|
+
end
|
|
90
|
+
puts " • Results above threshold: #{above_threshold || 0}"
|
|
91
|
+
puts " • Search time: #{execution_time || 0}ms"
|
|
92
|
+
puts
|
|
93
|
+
|
|
94
|
+
# Provide actionable suggestions
|
|
95
|
+
if highest && threshold
|
|
96
|
+
if highest < threshold
|
|
97
|
+
suggested_threshold = (highest * 0.9).round(3)
|
|
98
|
+
puts "💡 Suggestions:"
|
|
99
|
+
puts " • Lower the similarity threshold (highest found: #{highest.round(3)})"
|
|
100
|
+
puts " • Try: ragdoll search '#{query}' --threshold=#{suggested_threshold}"
|
|
101
|
+
if highest < 0.3
|
|
102
|
+
puts " • Your query might not match the document content well"
|
|
103
|
+
puts " • Try different or more specific search terms"
|
|
104
|
+
puts " • Try keyword-based search: ragdoll keywords search KEYWORD"
|
|
105
|
+
puts " • List available keywords: ragdoll keywords list"
|
|
106
|
+
end
|
|
107
|
+
elsif above_threshold > 0
|
|
108
|
+
puts "💡 Note: Found #{above_threshold} similar content above threshold #{threshold}"
|
|
109
|
+
puts " This suggests an issue with result processing."
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
else
|
|
113
|
+
puts "(Total documents in system: #{total})" if total > 0
|
|
114
|
+
puts "Try adjusting your search terms or check if documents have been processed."
|
|
115
|
+
puts "Alternative: Use keyword-based search: ragdoll keywords search KEYWORD"
|
|
116
|
+
end
|
|
117
|
+
|
|
32
118
|
return
|
|
33
119
|
end
|
|
34
120
|
|
|
@@ -41,28 +127,65 @@ module Ragdoll
|
|
|
41
127
|
content = safe_string_value(result, [:content, :text], '')
|
|
42
128
|
puts "#{index + 1}. #{title}"
|
|
43
129
|
puts " ID: #{result[:document_id] || result[:id]}"
|
|
44
|
-
|
|
130
|
+
|
|
131
|
+
# Show appropriate score based on search type
|
|
132
|
+
if options[:search_type] == 'hybrid'
|
|
133
|
+
puts " Combined Score: #{result[:combined_score]&.round(3) || 'N/A'}"
|
|
134
|
+
if result[:search_types]
|
|
135
|
+
puts " Match Types: #{result[:search_types].join(', ')}"
|
|
136
|
+
end
|
|
137
|
+
elsif options[:search_type] == 'fulltext'
|
|
138
|
+
puts " Text Match: #{result[:fulltext_similarity]&.round(3) || 'N/A'}"
|
|
139
|
+
else
|
|
140
|
+
puts " Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
|
|
141
|
+
end
|
|
142
|
+
|
|
45
143
|
puts " Content: #{content[0..200]}..."
|
|
46
144
|
puts
|
|
47
145
|
end
|
|
48
146
|
else
|
|
49
147
|
# Table format (default)
|
|
50
|
-
puts "Found #{results.length} results:"
|
|
148
|
+
puts "Found #{results.length} results (#{search_response[:search_type] || 'semantic'} search):"
|
|
51
149
|
puts
|
|
52
|
-
|
|
150
|
+
|
|
151
|
+
# Adjust column header based on search type
|
|
152
|
+
score_header = case options[:search_type]
|
|
153
|
+
when 'hybrid'
|
|
154
|
+
'Score'.ljust(12)
|
|
155
|
+
when 'fulltext'
|
|
156
|
+
'Text Match'.ljust(12)
|
|
157
|
+
else
|
|
158
|
+
'Similarity'.ljust(12)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
puts 'Rank'.ljust(5) + 'Title'.ljust(30) + score_header + 'Content Preview'
|
|
53
162
|
puts '-' * 80
|
|
54
163
|
|
|
55
164
|
results.each_with_index do |result, index|
|
|
56
165
|
rank = (index + 1).to_s.ljust(5)
|
|
57
166
|
title = safe_string_value(result, [:title, :document_title], 'Untitled')[0..29].ljust(30)
|
|
58
|
-
|
|
167
|
+
|
|
168
|
+
# Get appropriate score based on search type
|
|
169
|
+
score = case options[:search_type]
|
|
170
|
+
when 'hybrid'
|
|
171
|
+
result[:combined_score] || result[:weighted_score]
|
|
172
|
+
when 'fulltext'
|
|
173
|
+
result[:fulltext_similarity]
|
|
174
|
+
else
|
|
175
|
+
result[:similarity]
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
score_str = (score&.round(3) || 'N/A').to_s.ljust(12)
|
|
59
179
|
content = safe_string_value(result, [:content, :text], '')[0..50]
|
|
60
180
|
content += '...' if content.length == 50
|
|
61
181
|
|
|
62
|
-
puts "#{rank}#{title}#{
|
|
182
|
+
puts "#{rank}#{title}#{score_str}#{content}"
|
|
63
183
|
end
|
|
64
184
|
|
|
65
185
|
puts
|
|
186
|
+
if options[:search_type] == 'hybrid' && (options[:semantic_weight] || options[:text_weight])
|
|
187
|
+
puts "Weights: semantic=#{options[:semantic_weight] || 0.7}, text=#{options[:text_weight] || 0.3}"
|
|
188
|
+
end
|
|
66
189
|
puts 'Use --format=json for complete results or --format=plain for detailed view'
|
|
67
190
|
end
|
|
68
191
|
end
|
|
@@ -4,6 +4,7 @@ module Ragdoll
|
|
|
4
4
|
module CLI
|
|
5
5
|
class StandaloneClient
|
|
6
6
|
include DebugMe
|
|
7
|
+
|
|
7
8
|
def add_document(path, **options)
|
|
8
9
|
Ragdoll.add_document(path: path, **options)
|
|
9
10
|
end
|
|
@@ -34,8 +35,12 @@ module Ragdoll
|
|
|
34
35
|
end
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def search(query, **options)
|
|
38
|
-
|
|
38
|
+
def search(query = nil, **options)
|
|
39
|
+
if query
|
|
40
|
+
Ragdoll.search(query: query, **options)
|
|
41
|
+
else
|
|
42
|
+
Ragdoll.search(**options)
|
|
43
|
+
end
|
|
39
44
|
end
|
|
40
45
|
|
|
41
46
|
|
|
@@ -53,6 +58,96 @@ module Ragdoll
|
|
|
53
58
|
Ragdoll.stats
|
|
54
59
|
end
|
|
55
60
|
|
|
61
|
+
def search_analytics(days: 30)
|
|
62
|
+
# TODO: This will delegate to Ragdoll core when analytics are implemented
|
|
63
|
+
if defined?(Ragdoll) && Ragdoll.respond_to?(:search_analytics)
|
|
64
|
+
Ragdoll.search_analytics(days: days)
|
|
65
|
+
else
|
|
66
|
+
# Placeholder response for now
|
|
67
|
+
{
|
|
68
|
+
total_searches: 0,
|
|
69
|
+
unique_queries: 0,
|
|
70
|
+
avg_results_per_search: 0.0,
|
|
71
|
+
avg_execution_time: 0.0,
|
|
72
|
+
search_types: {},
|
|
73
|
+
searches_with_results: 0,
|
|
74
|
+
avg_click_through_rate: 0.0
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def search_history(limit: 20, **options)
|
|
80
|
+
# TODO: This will delegate to Ragdoll core when analytics are implemented
|
|
81
|
+
if defined?(Ragdoll) && Ragdoll.respond_to?(:search_history)
|
|
82
|
+
Ragdoll.search_history(limit: limit, **options)
|
|
83
|
+
else
|
|
84
|
+
# Placeholder response for now
|
|
85
|
+
[]
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def trending_queries(limit: 10, days: 7)
|
|
90
|
+
# TODO: This will delegate to Ragdoll core when analytics are implemented
|
|
91
|
+
if defined?(Ragdoll) && Ragdoll.respond_to?(:trending_queries)
|
|
92
|
+
Ragdoll.trending_queries(limit: limit, days: days)
|
|
93
|
+
else
|
|
94
|
+
# Placeholder response for now
|
|
95
|
+
[]
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def cleanup_searches(**options)
|
|
100
|
+
# TODO: This will delegate to Ragdoll core when analytics are implemented
|
|
101
|
+
if defined?(Ragdoll) && Ragdoll.respond_to?(:cleanup_searches)
|
|
102
|
+
Ragdoll.cleanup_searches(**options)
|
|
103
|
+
else
|
|
104
|
+
# Placeholder response for now
|
|
105
|
+
{ orphaned_count: 0, unused_count: 0 }
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def hybrid_search(query, **options)
|
|
110
|
+
# Properly delegate to Ragdoll core's hybrid_search
|
|
111
|
+
Ragdoll.hybrid_search(query: query, **options)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def fulltext_search(query, **options)
|
|
115
|
+
# Perform full-text search using Document.search_content
|
|
116
|
+
limit = options[:limit] || 20
|
|
117
|
+
threshold = options[:threshold] || 0.0
|
|
118
|
+
|
|
119
|
+
# Get full-text search results
|
|
120
|
+
documents = Ragdoll::Document.search_content(query, **options)
|
|
121
|
+
|
|
122
|
+
# Format results to match expected structure
|
|
123
|
+
results = documents.map do |doc|
|
|
124
|
+
{
|
|
125
|
+
document_id: doc.id.to_s,
|
|
126
|
+
document_title: doc.title,
|
|
127
|
+
document_location: doc.location,
|
|
128
|
+
content: doc.content[0..500], # Preview
|
|
129
|
+
fulltext_similarity: doc.respond_to?(:fulltext_similarity) ? doc.fulltext_similarity : nil,
|
|
130
|
+
document_type: doc.document_type,
|
|
131
|
+
status: doc.status
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
{
|
|
136
|
+
query: query,
|
|
137
|
+
search_type: 'fulltext',
|
|
138
|
+
results: results,
|
|
139
|
+
total_results: results.length,
|
|
140
|
+
threshold_used: threshold
|
|
141
|
+
}
|
|
142
|
+
rescue StandardError => e
|
|
143
|
+
{
|
|
144
|
+
query: query,
|
|
145
|
+
search_type: 'fulltext',
|
|
146
|
+
results: [],
|
|
147
|
+
total_results: 0,
|
|
148
|
+
error: "Full-text search failed: #{e.message}"
|
|
149
|
+
}
|
|
150
|
+
end
|
|
56
151
|
|
|
57
152
|
def healthy?
|
|
58
153
|
Ragdoll.healthy?
|
|
@@ -63,6 +158,142 @@ module Ragdoll
|
|
|
63
158
|
Ragdoll.configuration
|
|
64
159
|
end
|
|
65
160
|
|
|
161
|
+
# Keywords-specific search methods
|
|
162
|
+
def search_by_keywords(keywords, **options)
|
|
163
|
+
if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords)
|
|
164
|
+
Ragdoll::Document.search_by_keywords(keywords, **options).map(&:to_hash)
|
|
165
|
+
else
|
|
166
|
+
# Fallback to regular search with keywords filter
|
|
167
|
+
search(keywords: keywords, **options)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def search_by_keywords_all(keywords, **options)
|
|
172
|
+
if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords_all)
|
|
173
|
+
Ragdoll::Document.search_by_keywords_all(keywords, **options).map(&:to_hash)
|
|
174
|
+
else
|
|
175
|
+
# Fallback to regular search with keywords filter
|
|
176
|
+
search(keywords: keywords, **options)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def keyword_frequencies(limit: 100, min_count: 1)
|
|
181
|
+
if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:keyword_frequencies)
|
|
182
|
+
frequencies = Ragdoll::Document.keyword_frequencies
|
|
183
|
+
# Filter by min_count and limit
|
|
184
|
+
filtered = frequencies.select { |_keyword, count| count >= min_count }
|
|
185
|
+
filtered.first(limit).to_h
|
|
186
|
+
else
|
|
187
|
+
{}
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def add_keywords_to_document(document_id, keywords)
|
|
192
|
+
begin
|
|
193
|
+
document = Ragdoll::Document.find(document_id)
|
|
194
|
+
Array(keywords).each { |keyword| document.add_keyword(keyword) }
|
|
195
|
+
document.save!
|
|
196
|
+
{
|
|
197
|
+
success: true,
|
|
198
|
+
keywords: document.keywords_array
|
|
199
|
+
}
|
|
200
|
+
rescue StandardError => e
|
|
201
|
+
{
|
|
202
|
+
success: false,
|
|
203
|
+
message: e.message
|
|
204
|
+
}
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def remove_keywords_from_document(document_id, keywords)
|
|
209
|
+
begin
|
|
210
|
+
document = Ragdoll::Document.find(document_id)
|
|
211
|
+
Array(keywords).each { |keyword| document.remove_keyword(keyword) }
|
|
212
|
+
document.save!
|
|
213
|
+
{
|
|
214
|
+
success: true,
|
|
215
|
+
keywords: document.keywords_array
|
|
216
|
+
}
|
|
217
|
+
rescue StandardError => e
|
|
218
|
+
{
|
|
219
|
+
success: false,
|
|
220
|
+
message: e.message
|
|
221
|
+
}
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def set_document_keywords(document_id, keywords)
|
|
226
|
+
begin
|
|
227
|
+
document = Ragdoll::Document.find(document_id)
|
|
228
|
+
document.keywords = Array(keywords)
|
|
229
|
+
document.save!
|
|
230
|
+
{
|
|
231
|
+
success: true,
|
|
232
|
+
keywords: document.keywords_array
|
|
233
|
+
}
|
|
234
|
+
rescue StandardError => e
|
|
235
|
+
{
|
|
236
|
+
success: false,
|
|
237
|
+
message: e.message
|
|
238
|
+
}
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def keyword_statistics
|
|
243
|
+
begin
|
|
244
|
+
total_keywords = 0
|
|
245
|
+
documents_with_keywords = 0
|
|
246
|
+
total_keyword_count = 0
|
|
247
|
+
keyword_frequencies = {}
|
|
248
|
+
|
|
249
|
+
if defined?(Ragdoll::Document)
|
|
250
|
+
documents_with_keywords = Ragdoll::Document.where.not(keywords: []).count
|
|
251
|
+
|
|
252
|
+
Ragdoll::Document.where.not(keywords: []).find_each do |doc|
|
|
253
|
+
doc_keywords = doc.keywords_array
|
|
254
|
+
total_keyword_count += doc_keywords.length
|
|
255
|
+
|
|
256
|
+
doc_keywords.each do |keyword|
|
|
257
|
+
keyword_frequencies[keyword] = (keyword_frequencies[keyword] || 0) + 1
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
total_keywords = keyword_frequencies.keys.length
|
|
262
|
+
avg_keywords_per_document = documents_with_keywords > 0 ? (total_keyword_count.to_f / documents_with_keywords) : 0
|
|
263
|
+
|
|
264
|
+
# Top 10 most common keywords
|
|
265
|
+
top_keywords = keyword_frequencies.sort_by { |_k, v| -v }.first(10)
|
|
266
|
+
|
|
267
|
+
# Count singleton keywords (used by only 1 document)
|
|
268
|
+
singleton_keywords = keyword_frequencies.count { |_k, v| v == 1 }
|
|
269
|
+
|
|
270
|
+
{
|
|
271
|
+
total_keywords: total_keywords,
|
|
272
|
+
documents_with_keywords: documents_with_keywords,
|
|
273
|
+
avg_keywords_per_document: avg_keywords_per_document,
|
|
274
|
+
top_keywords: top_keywords,
|
|
275
|
+
singleton_keywords: singleton_keywords
|
|
276
|
+
}
|
|
277
|
+
else
|
|
278
|
+
{
|
|
279
|
+
total_keywords: 0,
|
|
280
|
+
documents_with_keywords: 0,
|
|
281
|
+
avg_keywords_per_document: 0,
|
|
282
|
+
top_keywords: [],
|
|
283
|
+
singleton_keywords: 0
|
|
284
|
+
}
|
|
285
|
+
end
|
|
286
|
+
rescue StandardError => e
|
|
287
|
+
{
|
|
288
|
+
total_keywords: 0,
|
|
289
|
+
documents_with_keywords: 0,
|
|
290
|
+
avg_keywords_per_document: 0,
|
|
291
|
+
top_keywords: [],
|
|
292
|
+
singleton_keywords: 0,
|
|
293
|
+
error: e.message
|
|
294
|
+
}
|
|
295
|
+
end
|
|
296
|
+
end
|
|
66
297
|
|
|
67
298
|
end
|
|
68
299
|
end
|