ragdoll-cli 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +90 -17
- data/Rakefile +26 -7
- data/lib/ragdoll/cli/commands/keywords.rb +317 -0
- data/lib/ragdoll/cli/commands/search.rb +75 -10
- data/lib/ragdoll/cli/standalone_client.rb +183 -13
- data/lib/ragdoll/cli/version.rb +1 -1
- data/lib/ragdoll/cli.rb +114 -27
- metadata +6 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 89c5200b031e287b125fb93d7025ea666ad3456bdc209fbc68a0dc70b673a410
|
|
4
|
+
data.tar.gz: 5bfa7dabb52f7b990b5935736ec1710a3b0f4cace386903bb9491bcf4f88720a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f3eb42ababba85311e9d6bb3db1b5cf4f63f00e46a4a07ea8e9ec21841a9b4cdea44d36619b168a9298d08393d250917f66c14fa82c72b3866937833b3a44bae
|
|
7
|
+
data.tar.gz: 2a5085c3c38a9147f2bf26fb8a4ad2814bccdb05dfa3d5d76f5cb7ec21319d82d969758eece5b066f01b50100efa85ae9a9abfa20badca70d9144b5bd52c7906
|
data/README.md
CHANGED
|
@@ -42,9 +42,9 @@ This will install the `ragdoll` command-line tool.
|
|
|
42
42
|
export OPENAI_API_KEY=your_api_key_here
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
-
3. **
|
|
45
|
+
3. **Add documents:**
|
|
46
46
|
```bash
|
|
47
|
-
ragdoll
|
|
47
|
+
ragdoll add docs/*.pdf --recursive
|
|
48
48
|
```
|
|
49
49
|
|
|
50
50
|
4. **Search for content:**
|
|
@@ -77,42 +77,104 @@ ragdoll config path
|
|
|
77
77
|
ragdoll config database
|
|
78
78
|
```
|
|
79
79
|
|
|
80
|
-
### Document
|
|
80
|
+
### Document Management
|
|
81
81
|
|
|
82
82
|
```bash
|
|
83
|
-
#
|
|
84
|
-
ragdoll
|
|
83
|
+
# Add a single document
|
|
84
|
+
ragdoll add document.pdf
|
|
85
|
+
|
|
86
|
+
# Add multiple documents and directories
|
|
87
|
+
ragdoll add file1.pdf file2.txt ../docs
|
|
88
|
+
|
|
89
|
+
# Add files matching a pattern
|
|
90
|
+
ragdoll add "documents/*.pdf"
|
|
85
91
|
|
|
86
|
-
#
|
|
87
|
-
ragdoll
|
|
92
|
+
# Add recursively from directory (default: true)
|
|
93
|
+
ragdoll add "docs/" --recursive
|
|
88
94
|
|
|
89
95
|
# Filter by document type
|
|
90
|
-
ragdoll
|
|
96
|
+
ragdoll add "files/*" --type pdf
|
|
97
|
+
|
|
98
|
+
# Available types: pdf, docx, txt, md, html
|
|
99
|
+
|
|
100
|
+
# Skip confirmation prompts
|
|
101
|
+
ragdoll add docs/ --skip-confirmation
|
|
102
|
+
|
|
103
|
+
# Force addition of duplicate documents
|
|
104
|
+
ragdoll add document.pdf --force-duplicate
|
|
91
105
|
|
|
92
106
|
# Available types: pdf, docx, txt, md, html
|
|
93
107
|
```
|
|
94
108
|
|
|
109
|
+
#### Duplicate Detection
|
|
110
|
+
|
|
111
|
+
Ragdoll automatically detects and prevents duplicate documents from being processed:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
# Normal behavior - duplicates are detected and skipped
|
|
115
|
+
ragdoll add document.pdf
|
|
116
|
+
ragdoll add document.pdf # Skipped (duplicate detected)
|
|
117
|
+
|
|
118
|
+
# Force addition of duplicates when needed
|
|
119
|
+
ragdoll add document.pdf --force-duplicate # Creates new document despite duplicate
|
|
120
|
+
|
|
121
|
+
# Batch processing safely handles mixed new/duplicate files
|
|
122
|
+
ragdoll add docs/*.pdf # Only processes new files, skips duplicates
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
**Duplicate Detection Features:**
|
|
126
|
+
- **File-based detection**: Compares file location, modification time, and SHA256 hash
|
|
127
|
+
- **Content-based detection**: Compares extracted text content and metadata
|
|
128
|
+
- **Smart similarity**: Detects duplicates even with minor differences (5% tolerance)
|
|
129
|
+
- **Performance optimized**: Uses database indexes for fast duplicate lookups
|
|
130
|
+
|
|
95
131
|
### Search
|
|
96
132
|
|
|
97
133
|
```bash
|
|
98
|
-
# Basic search
|
|
134
|
+
# Basic semantic search (default)
|
|
99
135
|
ragdoll search "machine learning concepts"
|
|
100
136
|
|
|
137
|
+
# Full-text search for exact keywords
|
|
138
|
+
ragdoll search "neural networks" --search-type fulltext
|
|
139
|
+
|
|
140
|
+
# Hybrid search combining semantic and full-text
|
|
141
|
+
ragdoll search "AI algorithms" --search-type hybrid
|
|
142
|
+
|
|
143
|
+
# Customize hybrid search weights
|
|
144
|
+
ragdoll search "deep learning" --search-type hybrid --semantic-weight 0.6 --text-weight 0.4
|
|
145
|
+
|
|
101
146
|
# Limit number of results
|
|
102
147
|
ragdoll search "AI algorithms" --limit 5
|
|
103
148
|
|
|
149
|
+
# Set similarity threshold
|
|
150
|
+
ragdoll search "machine learning" --threshold 0.8
|
|
151
|
+
|
|
104
152
|
# Different output formats
|
|
105
153
|
ragdoll search "deep learning" --format json
|
|
106
154
|
ragdoll search "AI" --format plain
|
|
107
155
|
ragdoll search "ML" --format table # default
|
|
108
156
|
```
|
|
109
157
|
|
|
110
|
-
|
|
158
|
+
#### Search Types
|
|
159
|
+
|
|
160
|
+
- **Semantic Search** (default): Uses AI embeddings to find conceptually similar content
|
|
161
|
+
- **Full-text Search**: Uses PostgreSQL text search for exact keyword matching
|
|
162
|
+
- **Hybrid Search**: Combines both semantic and full-text search with configurable weights
|
|
111
163
|
|
|
112
164
|
```bash
|
|
113
|
-
#
|
|
114
|
-
ragdoll
|
|
165
|
+
# Semantic search - best for concepts and meaning
|
|
166
|
+
ragdoll search "How do neural networks learn?" --search-type semantic
|
|
167
|
+
|
|
168
|
+
# Full-text search - best for exact terms
|
|
169
|
+
ragdoll search "backpropagation algorithm" --search-type fulltext
|
|
115
170
|
|
|
171
|
+
# Hybrid search - best comprehensive results
|
|
172
|
+
ragdoll search "transformer architecture" --search-type hybrid --semantic-weight 0.7 --text-weight 0.3
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Document Operations
|
|
176
|
+
|
|
177
|
+
```bash
|
|
116
178
|
# List all documents
|
|
117
179
|
ragdoll list
|
|
118
180
|
|
|
@@ -126,6 +188,10 @@ ragdoll list --format plain
|
|
|
126
188
|
# Check document status
|
|
127
189
|
ragdoll status <id>
|
|
128
190
|
|
|
191
|
+
# Show detailed document information
|
|
192
|
+
ragdoll show <id>
|
|
193
|
+
ragdoll show <id> --format json
|
|
194
|
+
|
|
129
195
|
# Update document metadata
|
|
130
196
|
ragdoll update <id> --title "New Title"
|
|
131
197
|
|
|
@@ -135,8 +201,6 @@ ragdoll delete <id> --force # Bypass confirmation
|
|
|
135
201
|
|
|
136
202
|
# Show system statistics
|
|
137
203
|
ragdoll stats
|
|
138
|
-
ragdoll stats --format json
|
|
139
|
-
ragdoll stats --format plain
|
|
140
204
|
```
|
|
141
205
|
|
|
142
206
|
### Retrieval Utilities
|
|
@@ -232,11 +296,20 @@ ragdoll import "knowledge-base/*" --recursive
|
|
|
232
296
|
### Search and get enhanced prompts
|
|
233
297
|
|
|
234
298
|
```bash
|
|
235
|
-
#
|
|
299
|
+
# Semantic search for concepts
|
|
236
300
|
ragdoll search "How to configure SSL certificates?"
|
|
237
301
|
|
|
238
|
-
#
|
|
239
|
-
ragdoll search "
|
|
302
|
+
# Full-text search for specific terms
|
|
303
|
+
ragdoll search "SSL certificate configuration" --search-type fulltext
|
|
304
|
+
|
|
305
|
+
# Hybrid search for comprehensive results
|
|
306
|
+
ragdoll search "database optimization techniques" --search-type hybrid
|
|
307
|
+
|
|
308
|
+
# Get detailed results with custom formatting
|
|
309
|
+
ragdoll search "performance tuning" --format plain --limit 3
|
|
310
|
+
|
|
311
|
+
# Search with custom similarity threshold
|
|
312
|
+
ragdoll search "security best practices" --threshold 0.75 --search-type semantic
|
|
240
313
|
```
|
|
241
314
|
|
|
242
315
|
### Manage your knowledge base
|
data/Rakefile
CHANGED
|
@@ -1,18 +1,37 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require 'simplecov'
|
|
4
|
-
SimpleCov.start
|
|
5
|
-
|
|
6
3
|
# Suppress bundler/rubygems warnings
|
|
7
4
|
$VERBOSE = nil
|
|
8
5
|
|
|
9
6
|
require "bundler/gem_tasks"
|
|
10
7
|
require "rake/testtask"
|
|
11
8
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
# Custom test task that ensures proper exit codes
|
|
10
|
+
desc "Run tests"
|
|
11
|
+
task :test do
|
|
12
|
+
# Use the original TestTask internally but capture output
|
|
13
|
+
test_files = FileList["test/**/*_test.rb"]
|
|
14
|
+
|
|
15
|
+
# Run tests and capture both stdout and stderr
|
|
16
|
+
output = `bundle exec ruby -I lib:test #{test_files.join(' ')} 2>&1`
|
|
17
|
+
exit_status = $?.exitstatus
|
|
18
|
+
|
|
19
|
+
# Print the output
|
|
20
|
+
puts output
|
|
21
|
+
|
|
22
|
+
# Check if tests actually failed by looking for failure indicators
|
|
23
|
+
test_failed = output.match(/(\d+) failures.*[^0] failures/) ||
|
|
24
|
+
output.match(/(\d+) errors.*[^0] errors/) ||
|
|
25
|
+
output.include?("FAIL") ||
|
|
26
|
+
exit_status > 1 # Exit status 1 might be SimpleCov, >1 is real failure
|
|
27
|
+
|
|
28
|
+
if test_failed
|
|
29
|
+
puts "Tests failed!"
|
|
30
|
+
exit 1
|
|
31
|
+
else
|
|
32
|
+
puts "All tests passed successfully!" unless output.include?("0 failures, 0 errors")
|
|
33
|
+
exit 0
|
|
34
|
+
end
|
|
16
35
|
end
|
|
17
36
|
|
|
18
37
|
# Load annotate tasks
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'thor'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
module Ragdoll
|
|
7
|
+
module CLI
|
|
8
|
+
class Keywords < Thor
|
|
9
|
+
desc 'search KEYWORD [KEYWORD2...]', 'Search documents by keywords only'
|
|
10
|
+
method_option :all, type: :boolean, default: false, aliases: '-a',
|
|
11
|
+
desc: 'Require ALL keywords to match (AND logic, default: OR logic)'
|
|
12
|
+
method_option :limit, type: :numeric, default: 20, aliases: '-l',
|
|
13
|
+
desc: 'Maximum number of results to return'
|
|
14
|
+
method_option :format, type: :string, default: 'table', aliases: '-f',
|
|
15
|
+
desc: 'Output format (table, json, plain)'
|
|
16
|
+
def search(*keywords)
|
|
17
|
+
if keywords.empty?
|
|
18
|
+
puts 'Error: No keywords provided'
|
|
19
|
+
puts 'Usage: ragdoll keywords search KEYWORD [KEYWORD2...]'
|
|
20
|
+
puts 'Examples:'
|
|
21
|
+
puts ' ragdoll keywords search ruby programming'
|
|
22
|
+
puts ' ragdoll keywords search --all ruby programming # Must contain ALL keywords'
|
|
23
|
+
puts ' ragdoll keywords search ruby --limit=50'
|
|
24
|
+
exit 1
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
client = StandaloneClient.new
|
|
28
|
+
|
|
29
|
+
puts "Searching documents by keywords: #{keywords.join(', ')}"
|
|
30
|
+
puts "Mode: #{options[:all] ? 'ALL keywords (AND)' : 'ANY keywords (OR)'}"
|
|
31
|
+
puts
|
|
32
|
+
|
|
33
|
+
begin
|
|
34
|
+
# Use the new keywords search methods
|
|
35
|
+
search_method = options[:all] ? :search_by_keywords_all : :search_by_keywords
|
|
36
|
+
results = client.public_send(search_method, keywords, limit: options[:limit])
|
|
37
|
+
|
|
38
|
+
# Convert results to standard format if needed
|
|
39
|
+
results = normalize_results(results)
|
|
40
|
+
|
|
41
|
+
if results.empty?
|
|
42
|
+
puts "No documents found with keywords: #{keywords.join(', ')}"
|
|
43
|
+
puts
|
|
44
|
+
puts "💡 Suggestions:"
|
|
45
|
+
puts " • Try different keywords"
|
|
46
|
+
puts " • Use fewer keywords"
|
|
47
|
+
puts " • Switch between --all and default (OR) modes"
|
|
48
|
+
puts " • Check available keywords with: ragdoll keywords list"
|
|
49
|
+
return
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
display_results(results, options[:format], keywords)
|
|
53
|
+
rescue StandardError => e
|
|
54
|
+
puts "Error searching by keywords: #{e.message}"
|
|
55
|
+
exit 1
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
desc 'list', 'List all available keywords in the system'
|
|
60
|
+
method_option :limit, type: :numeric, default: 100, aliases: '-l',
|
|
61
|
+
desc: 'Maximum number of keywords to show'
|
|
62
|
+
method_option :format, type: :string, default: 'table', aliases: '-f',
|
|
63
|
+
desc: 'Output format (table, json, plain)'
|
|
64
|
+
method_option :min_count, type: :numeric, default: 1, aliases: '-m',
|
|
65
|
+
desc: 'Show only keywords used by at least N documents'
|
|
66
|
+
def list
|
|
67
|
+
client = StandaloneClient.new
|
|
68
|
+
|
|
69
|
+
begin
|
|
70
|
+
keyword_frequencies = client.keyword_frequencies(
|
|
71
|
+
limit: options[:limit],
|
|
72
|
+
min_count: options[:min_count]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if keyword_frequencies.empty?
|
|
76
|
+
puts "No keywords found in the system."
|
|
77
|
+
puts "Add documents with keywords or update existing documents."
|
|
78
|
+
return
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
case options[:format]
|
|
82
|
+
when 'json'
|
|
83
|
+
puts JSON.pretty_generate(keyword_frequencies)
|
|
84
|
+
when 'plain'
|
|
85
|
+
keyword_frequencies.each do |keyword, count|
|
|
86
|
+
puts "#{keyword}: #{count}"
|
|
87
|
+
end
|
|
88
|
+
else
|
|
89
|
+
# Table format
|
|
90
|
+
puts "Keywords in system (minimum #{options[:min_count]} documents):"
|
|
91
|
+
puts
|
|
92
|
+
puts 'Keyword'.ljust(30) + 'Document Count'
|
|
93
|
+
puts '-' * 45
|
|
94
|
+
|
|
95
|
+
keyword_frequencies.each do |keyword, count|
|
|
96
|
+
keyword_display = keyword[0..29].ljust(30)
|
|
97
|
+
puts "#{keyword_display}#{count}"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
puts
|
|
101
|
+
puts "Total keywords: #{keyword_frequencies.length}"
|
|
102
|
+
end
|
|
103
|
+
rescue StandardError => e
|
|
104
|
+
puts "Error listing keywords: #{e.message}"
|
|
105
|
+
exit 1
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
desc 'add DOCUMENT_ID KEYWORD [KEYWORD2...]', 'Add keywords to a document'
|
|
110
|
+
def add(document_id, *keywords)
|
|
111
|
+
if keywords.empty?
|
|
112
|
+
puts 'Error: No keywords provided'
|
|
113
|
+
puts 'Usage: ragdoll keywords add DOCUMENT_ID KEYWORD [KEYWORD2...]'
|
|
114
|
+
puts 'Example: ragdoll keywords add 123 ruby programming web'
|
|
115
|
+
exit 1
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
client = StandaloneClient.new
|
|
119
|
+
|
|
120
|
+
begin
|
|
121
|
+
result = client.add_keywords_to_document(document_id, keywords)
|
|
122
|
+
|
|
123
|
+
if result[:success]
|
|
124
|
+
puts "✓ Added keywords to document #{document_id}: #{keywords.join(', ')}"
|
|
125
|
+
puts "Document now has keywords: #{result[:keywords].join(', ')}" if result[:keywords]
|
|
126
|
+
else
|
|
127
|
+
puts "✗ Failed to add keywords: #{result[:message] || 'Unknown error'}"
|
|
128
|
+
exit 1
|
|
129
|
+
end
|
|
130
|
+
rescue StandardError => e
|
|
131
|
+
puts "Error adding keywords: #{e.message}"
|
|
132
|
+
exit 1
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
desc 'remove DOCUMENT_ID KEYWORD [KEYWORD2...]', 'Remove keywords from a document'
|
|
137
|
+
def remove(document_id, *keywords)
|
|
138
|
+
if keywords.empty?
|
|
139
|
+
puts 'Error: No keywords provided'
|
|
140
|
+
puts 'Usage: ragdoll keywords remove DOCUMENT_ID KEYWORD [KEYWORD2...]'
|
|
141
|
+
puts 'Example: ragdoll keywords remove 123 old-keyword deprecated'
|
|
142
|
+
exit 1
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
client = StandaloneClient.new
|
|
146
|
+
|
|
147
|
+
begin
|
|
148
|
+
result = client.remove_keywords_from_document(document_id, keywords)
|
|
149
|
+
|
|
150
|
+
if result[:success]
|
|
151
|
+
puts "✓ Removed keywords from document #{document_id}: #{keywords.join(', ')}"
|
|
152
|
+
puts "Document now has keywords: #{result[:keywords].join(', ')}" if result[:keywords]
|
|
153
|
+
else
|
|
154
|
+
puts "✗ Failed to remove keywords: #{result[:message] || 'Unknown error'}"
|
|
155
|
+
exit 1
|
|
156
|
+
end
|
|
157
|
+
rescue StandardError => e
|
|
158
|
+
puts "Error removing keywords: #{e.message}"
|
|
159
|
+
exit 1
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
desc 'set DOCUMENT_ID KEYWORD [KEYWORD2...]', 'Set keywords for a document (replaces existing)'
|
|
164
|
+
def set(document_id, *keywords)
|
|
165
|
+
if keywords.empty?
|
|
166
|
+
puts 'Error: No keywords provided'
|
|
167
|
+
puts 'Usage: ragdoll keywords set DOCUMENT_ID KEYWORD [KEYWORD2...]'
|
|
168
|
+
puts 'Example: ragdoll keywords set 123 ruby programming web'
|
|
169
|
+
exit 1
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
client = StandaloneClient.new
|
|
173
|
+
|
|
174
|
+
begin
|
|
175
|
+
result = client.set_document_keywords(document_id, keywords)
|
|
176
|
+
|
|
177
|
+
if result[:success]
|
|
178
|
+
puts "✓ Set keywords for document #{document_id}: #{keywords.join(', ')}"
|
|
179
|
+
else
|
|
180
|
+
puts "✗ Failed to set keywords: #{result[:message] || 'Unknown error'}"
|
|
181
|
+
exit 1
|
|
182
|
+
end
|
|
183
|
+
rescue StandardError => e
|
|
184
|
+
puts "Error setting keywords: #{e.message}"
|
|
185
|
+
exit 1
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
desc 'show DOCUMENT_ID', 'Show keywords for a specific document'
|
|
190
|
+
def show(document_id)
|
|
191
|
+
client = StandaloneClient.new
|
|
192
|
+
|
|
193
|
+
begin
|
|
194
|
+
document = client.get_document(document_id)
|
|
195
|
+
|
|
196
|
+
keywords = document[:keywords] || document['keywords'] || []
|
|
197
|
+
|
|
198
|
+
puts "Keywords for document #{document_id}:"
|
|
199
|
+
puts " Title: #{document[:title] || document['title'] || 'Untitled'}"
|
|
200
|
+
|
|
201
|
+
if keywords.empty?
|
|
202
|
+
puts " Keywords: (none)"
|
|
203
|
+
puts
|
|
204
|
+
puts "💡 Add keywords with: ragdoll keywords add #{document_id} KEYWORD1 KEYWORD2..."
|
|
205
|
+
else
|
|
206
|
+
puts " Keywords: #{keywords.join(', ')}"
|
|
207
|
+
end
|
|
208
|
+
rescue StandardError => e
|
|
209
|
+
puts "Error getting document keywords: #{e.message}"
|
|
210
|
+
exit 1
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
desc 'find KEYWORD', 'Find documents containing a specific keyword'
|
|
215
|
+
method_option :limit, type: :numeric, default: 20, aliases: '-l',
|
|
216
|
+
desc: 'Maximum number of results to return'
|
|
217
|
+
method_option :format, type: :string, default: 'table', aliases: '-f',
|
|
218
|
+
desc: 'Output format (table, json, plain)'
|
|
219
|
+
def find(keyword)
|
|
220
|
+
search(keyword)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
desc 'stats', 'Show keyword usage statistics'
|
|
224
|
+
def stats
|
|
225
|
+
client = StandaloneClient.new
|
|
226
|
+
|
|
227
|
+
begin
|
|
228
|
+
stats = client.keyword_statistics
|
|
229
|
+
|
|
230
|
+
puts "Keyword Statistics:"
|
|
231
|
+
puts " Total unique keywords: #{stats[:total_keywords] || 0}"
|
|
232
|
+
puts " Total documents with keywords: #{stats[:documents_with_keywords] || 0}"
|
|
233
|
+
puts " Average keywords per document: #{stats[:avg_keywords_per_document]&.round(2) || 0}"
|
|
234
|
+
puts " Most common keywords:"
|
|
235
|
+
|
|
236
|
+
if stats[:top_keywords]&.any?
|
|
237
|
+
stats[:top_keywords].each_with_index do |(keyword, count), index|
|
|
238
|
+
puts " #{index + 1}. #{keyword} (#{count} documents)"
|
|
239
|
+
end
|
|
240
|
+
else
|
|
241
|
+
puts " (none)"
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
puts " Least used keywords: #{stats[:singleton_keywords] || 0}"
|
|
245
|
+
rescue StandardError => e
|
|
246
|
+
puts "Error getting keyword statistics: #{e.message}"
|
|
247
|
+
exit 1
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
private
|
|
252
|
+
|
|
253
|
+
def normalize_results(results)
|
|
254
|
+
# Ensure results are in the expected format
|
|
255
|
+
case results
|
|
256
|
+
when Array
|
|
257
|
+
results.map do |result|
|
|
258
|
+
case result
|
|
259
|
+
when Hash
|
|
260
|
+
result
|
|
261
|
+
else
|
|
262
|
+
# Convert ActiveRecord objects to hash if needed
|
|
263
|
+
if result.respond_to?(:to_hash)
|
|
264
|
+
result.to_hash
|
|
265
|
+
elsif result.respond_to?(:attributes)
|
|
266
|
+
result.attributes.symbolize_keys
|
|
267
|
+
else
|
|
268
|
+
result
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
else
|
|
273
|
+
[]
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def display_results(results, format, keywords)
|
|
278
|
+
case format
|
|
279
|
+
when 'json'
|
|
280
|
+
puts JSON.pretty_generate(results)
|
|
281
|
+
when 'plain'
|
|
282
|
+
results.each_with_index do |result, index|
|
|
283
|
+
title = result[:title] || result['title'] || 'Untitled'
|
|
284
|
+
doc_keywords = result[:keywords] || result['keywords'] || []
|
|
285
|
+
matching_keywords = doc_keywords & keywords
|
|
286
|
+
|
|
287
|
+
puts "#{index + 1}. #{title}"
|
|
288
|
+
puts " ID: #{result[:id] || result['id']}"
|
|
289
|
+
puts " Keywords: #{doc_keywords.join(', ')}"
|
|
290
|
+
puts " Matching: #{matching_keywords.join(', ')}" if matching_keywords.any?
|
|
291
|
+
puts
|
|
292
|
+
end
|
|
293
|
+
else
|
|
294
|
+
# Table format
|
|
295
|
+
puts "Found #{results.length} documents:"
|
|
296
|
+
puts
|
|
297
|
+
puts 'ID'.ljust(12) + 'Title'.ljust(30) + 'Keywords'.ljust(40) + 'Matches'
|
|
298
|
+
puts '-' * 90
|
|
299
|
+
|
|
300
|
+
results.each do |result|
|
|
301
|
+
id = (result[:id] || result['id'] || '')[0..11].ljust(12)
|
|
302
|
+
title = (result[:title] || result['title'] || 'Untitled')[0..29].ljust(30)
|
|
303
|
+
doc_keywords = result[:keywords] || result['keywords'] || []
|
|
304
|
+
keywords_str = doc_keywords.join(', ')[0..39].ljust(40)
|
|
305
|
+
matching_keywords = doc_keywords & keywords
|
|
306
|
+
matches = matching_keywords.length
|
|
307
|
+
|
|
308
|
+
puts "#{id}#{title}#{keywords_str}#{matches}"
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
puts
|
|
312
|
+
puts "Use --format=json for complete results or --format=plain for detailed view"
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
end
|
|
@@ -9,7 +9,25 @@ module Ragdoll
|
|
|
9
9
|
client = StandaloneClient.new
|
|
10
10
|
|
|
11
11
|
puts "Searching for: #{query}"
|
|
12
|
-
puts "
|
|
12
|
+
puts "Search type: #{options[:search_type] || 'semantic'}"
|
|
13
|
+
|
|
14
|
+
# Show hybrid search weights if applicable
|
|
15
|
+
if options[:search_type] == 'hybrid'
|
|
16
|
+
semantic_w = options[:semantic_weight] || 0.7
|
|
17
|
+
text_w = options[:text_weight] || 0.3
|
|
18
|
+
puts "Weights: semantic=#{semantic_w}, text=#{text_w}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Show keyword search mode if keywords are provided
|
|
22
|
+
if options[:keywords]
|
|
23
|
+
keywords_array = options[:keywords].split(',').map(&:strip)
|
|
24
|
+
keywords_mode = options[:keywords_all] ? "ALL keywords (AND)" : "ANY keywords (OR)"
|
|
25
|
+
puts "Keywords: #{keywords_array.join(', ')} [#{keywords_mode}]"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Show other options, excluding display-related ones
|
|
29
|
+
relevant_options = options.to_h.except(:keywords, :keywords_all, :search_type, :semantic_weight, :text_weight, :format)
|
|
30
|
+
puts "Options: #{relevant_options}" unless relevant_options.empty?
|
|
13
31
|
puts
|
|
14
32
|
|
|
15
33
|
search_options = {}
|
|
@@ -17,7 +35,11 @@ module Ragdoll
|
|
|
17
35
|
search_options[:threshold] = options[:threshold] if options[:threshold]
|
|
18
36
|
search_options[:content_type] = options[:content_type] if options[:content_type]
|
|
19
37
|
search_options[:classification] = options[:classification] if options[:classification]
|
|
20
|
-
|
|
38
|
+
if options[:keywords]
|
|
39
|
+
keywords_array = options[:keywords].split(',').map(&:strip)
|
|
40
|
+
search_options[:keywords] = keywords_array
|
|
41
|
+
search_options[:keywords_all] = options[:keywords_all] if options[:keywords_all]
|
|
42
|
+
end
|
|
21
43
|
search_options[:tags] = options[:tags].split(',').map(&:strip) if options[:tags]
|
|
22
44
|
|
|
23
45
|
# Add search tracking options
|
|
@@ -28,11 +50,14 @@ module Ragdoll
|
|
|
28
50
|
# Select search method based on search_type
|
|
29
51
|
search_response = case options[:search_type]
|
|
30
52
|
when 'hybrid'
|
|
31
|
-
|
|
53
|
+
# Add weight parameters if provided
|
|
54
|
+
search_options[:semantic_weight] = options[:semantic_weight] if options[:semantic_weight]
|
|
55
|
+
search_options[:text_weight] = options[:text_weight] if options[:text_weight]
|
|
56
|
+
client.hybrid_search(query, **search_options)
|
|
32
57
|
when 'fulltext'
|
|
33
|
-
|
|
34
|
-
client.search(query: query, **search_options)
|
|
58
|
+
client.fulltext_search(query, **search_options)
|
|
35
59
|
else
|
|
60
|
+
# Default to semantic search
|
|
36
61
|
client.search(query: query, **search_options)
|
|
37
62
|
end
|
|
38
63
|
|
|
@@ -76,6 +101,8 @@ module Ragdoll
|
|
|
76
101
|
if highest < 0.3
|
|
77
102
|
puts " • Your query might not match the document content well"
|
|
78
103
|
puts " • Try different or more specific search terms"
|
|
104
|
+
puts " • Try keyword-based search: ragdoll keywords search KEYWORD"
|
|
105
|
+
puts " • List available keywords: ragdoll keywords list"
|
|
79
106
|
end
|
|
80
107
|
elsif above_threshold > 0
|
|
81
108
|
puts "💡 Note: Found #{above_threshold} similar content above threshold #{threshold}"
|
|
@@ -85,6 +112,7 @@ module Ragdoll
|
|
|
85
112
|
else
|
|
86
113
|
puts "(Total documents in system: #{total})" if total > 0
|
|
87
114
|
puts "Try adjusting your search terms or check if documents have been processed."
|
|
115
|
+
puts "Alternative: Use keyword-based search: ragdoll keywords search KEYWORD"
|
|
88
116
|
end
|
|
89
117
|
|
|
90
118
|
return
|
|
@@ -99,28 +127,65 @@ module Ragdoll
|
|
|
99
127
|
content = safe_string_value(result, [:content, :text], '')
|
|
100
128
|
puts "#{index + 1}. #{title}"
|
|
101
129
|
puts " ID: #{result[:document_id] || result[:id]}"
|
|
102
|
-
|
|
130
|
+
|
|
131
|
+
# Show appropriate score based on search type
|
|
132
|
+
if options[:search_type] == 'hybrid'
|
|
133
|
+
puts " Combined Score: #{result[:combined_score]&.round(3) || 'N/A'}"
|
|
134
|
+
if result[:search_types]
|
|
135
|
+
puts " Match Types: #{result[:search_types].join(', ')}"
|
|
136
|
+
end
|
|
137
|
+
elsif options[:search_type] == 'fulltext'
|
|
138
|
+
puts " Text Match: #{result[:fulltext_similarity]&.round(3) || 'N/A'}"
|
|
139
|
+
else
|
|
140
|
+
puts " Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
|
|
141
|
+
end
|
|
142
|
+
|
|
103
143
|
puts " Content: #{content[0..200]}..."
|
|
104
144
|
puts
|
|
105
145
|
end
|
|
106
146
|
else
|
|
107
147
|
# Table format (default)
|
|
108
|
-
puts "Found #{results.length} results:"
|
|
148
|
+
puts "Found #{results.length} results (#{search_response[:search_type] || 'semantic'} search):"
|
|
109
149
|
puts
|
|
110
|
-
|
|
150
|
+
|
|
151
|
+
# Adjust column header based on search type
|
|
152
|
+
score_header = case options[:search_type]
|
|
153
|
+
when 'hybrid'
|
|
154
|
+
'Score'.ljust(12)
|
|
155
|
+
when 'fulltext'
|
|
156
|
+
'Text Match'.ljust(12)
|
|
157
|
+
else
|
|
158
|
+
'Similarity'.ljust(12)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
puts 'Rank'.ljust(5) + 'Title'.ljust(30) + score_header + 'Content Preview'
|
|
111
162
|
puts '-' * 80
|
|
112
163
|
|
|
113
164
|
results.each_with_index do |result, index|
|
|
114
165
|
rank = (index + 1).to_s.ljust(5)
|
|
115
166
|
title = safe_string_value(result, [:title, :document_title], 'Untitled')[0..29].ljust(30)
|
|
116
|
-
|
|
167
|
+
|
|
168
|
+
# Get appropriate score based on search type
|
|
169
|
+
score = case options[:search_type]
|
|
170
|
+
when 'hybrid'
|
|
171
|
+
result[:combined_score] || result[:weighted_score]
|
|
172
|
+
when 'fulltext'
|
|
173
|
+
result[:fulltext_similarity]
|
|
174
|
+
else
|
|
175
|
+
result[:similarity]
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
score_str = (score&.round(3) || 'N/A').to_s.ljust(12)
|
|
117
179
|
content = safe_string_value(result, [:content, :text], '')[0..50]
|
|
118
180
|
content += '...' if content.length == 50
|
|
119
181
|
|
|
120
|
-
puts "#{rank}#{title}#{
|
|
182
|
+
puts "#{rank}#{title}#{score_str}#{content}"
|
|
121
183
|
end
|
|
122
184
|
|
|
123
185
|
puts
|
|
186
|
+
if options[:search_type] == 'hybrid' && (options[:semantic_weight] || options[:text_weight])
|
|
187
|
+
puts "Weights: semantic=#{options[:semantic_weight] || 0.7}, text=#{options[:text_weight] || 0.3}"
|
|
188
|
+
end
|
|
124
189
|
puts 'Use --format=json for complete results or --format=plain for detailed view'
|
|
125
190
|
end
|
|
126
191
|
end
|
|
@@ -6,7 +6,13 @@ module Ragdoll
|
|
|
6
6
|
include DebugMe
|
|
7
7
|
|
|
8
8
|
def add_document(path, **options)
|
|
9
|
-
|
|
9
|
+
# Map force_duplicate option to force parameter for core library
|
|
10
|
+
core_options = options.dup
|
|
11
|
+
if core_options.key?(:force_duplicate)
|
|
12
|
+
core_options[:force] = core_options.delete(:force_duplicate)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
Ragdoll.add_document(path: path, **core_options)
|
|
10
16
|
end
|
|
11
17
|
|
|
12
18
|
|
|
@@ -106,19 +112,47 @@ module Ragdoll
|
|
|
106
112
|
end
|
|
107
113
|
end
|
|
108
114
|
|
|
109
|
-
def hybrid_search(query
|
|
110
|
-
#
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
115
|
+
def hybrid_search(query, **options)
|
|
116
|
+
# Properly delegate to Ragdoll core's hybrid_search
|
|
117
|
+
Ragdoll.hybrid_search(query: query, **options)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def fulltext_search(query, **options)
|
|
121
|
+
# Perform full-text search using Document.search_content
|
|
122
|
+
limit = options[:limit] || 20
|
|
123
|
+
threshold = options[:threshold] || 0.0
|
|
124
|
+
|
|
125
|
+
# Get full-text search results
|
|
126
|
+
documents = Ragdoll::Document.search_content(query, **options)
|
|
127
|
+
|
|
128
|
+
# Format results to match expected structure
|
|
129
|
+
results = documents.map do |doc|
|
|
130
|
+
{
|
|
131
|
+
document_id: doc.id.to_s,
|
|
132
|
+
document_title: doc.title,
|
|
133
|
+
document_location: doc.location,
|
|
134
|
+
content: doc.content[0..500], # Preview
|
|
135
|
+
fulltext_similarity: doc.respond_to?(:fulltext_similarity) ? doc.fulltext_similarity : nil,
|
|
136
|
+
document_type: doc.document_type,
|
|
137
|
+
status: doc.status
|
|
138
|
+
}
|
|
121
139
|
end
|
|
140
|
+
|
|
141
|
+
{
|
|
142
|
+
query: query,
|
|
143
|
+
search_type: 'fulltext',
|
|
144
|
+
results: results,
|
|
145
|
+
total_results: results.length,
|
|
146
|
+
threshold_used: threshold
|
|
147
|
+
}
|
|
148
|
+
rescue StandardError => e
|
|
149
|
+
{
|
|
150
|
+
query: query,
|
|
151
|
+
search_type: 'fulltext',
|
|
152
|
+
results: [],
|
|
153
|
+
total_results: 0,
|
|
154
|
+
error: "Full-text search failed: #{e.message}"
|
|
155
|
+
}
|
|
122
156
|
end
|
|
123
157
|
|
|
124
158
|
def healthy?
|
|
@@ -130,6 +164,142 @@ module Ragdoll
|
|
|
130
164
|
Ragdoll.configuration
|
|
131
165
|
end
|
|
132
166
|
|
|
167
|
+
# Keywords-specific search methods
|
|
168
|
+
def search_by_keywords(keywords, **options)
|
|
169
|
+
if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords)
|
|
170
|
+
Ragdoll::Document.search_by_keywords(keywords, **options).map(&:to_hash)
|
|
171
|
+
else
|
|
172
|
+
# Fallback to regular search with keywords filter
|
|
173
|
+
search(keywords: keywords, **options)
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def search_by_keywords_all(keywords, **options)
|
|
178
|
+
if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords_all)
|
|
179
|
+
Ragdoll::Document.search_by_keywords_all(keywords, **options).map(&:to_hash)
|
|
180
|
+
else
|
|
181
|
+
# Fallback to regular search with keywords filter
|
|
182
|
+
search(keywords: keywords, **options)
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def keyword_frequencies(limit: 100, min_count: 1)
|
|
187
|
+
if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:keyword_frequencies)
|
|
188
|
+
frequencies = Ragdoll::Document.keyword_frequencies
|
|
189
|
+
# Filter by min_count and limit
|
|
190
|
+
filtered = frequencies.select { |_keyword, count| count >= min_count }
|
|
191
|
+
filtered.first(limit).to_h
|
|
192
|
+
else
|
|
193
|
+
{}
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def add_keywords_to_document(document_id, keywords)
|
|
198
|
+
begin
|
|
199
|
+
document = Ragdoll::Document.find(document_id)
|
|
200
|
+
Array(keywords).each { |keyword| document.add_keyword(keyword) }
|
|
201
|
+
document.save!
|
|
202
|
+
{
|
|
203
|
+
success: true,
|
|
204
|
+
keywords: document.keywords_array
|
|
205
|
+
}
|
|
206
|
+
rescue StandardError => e
|
|
207
|
+
{
|
|
208
|
+
success: false,
|
|
209
|
+
message: e.message
|
|
210
|
+
}
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def remove_keywords_from_document(document_id, keywords)
|
|
215
|
+
begin
|
|
216
|
+
document = Ragdoll::Document.find(document_id)
|
|
217
|
+
Array(keywords).each { |keyword| document.remove_keyword(keyword) }
|
|
218
|
+
document.save!
|
|
219
|
+
{
|
|
220
|
+
success: true,
|
|
221
|
+
keywords: document.keywords_array
|
|
222
|
+
}
|
|
223
|
+
rescue StandardError => e
|
|
224
|
+
{
|
|
225
|
+
success: false,
|
|
226
|
+
message: e.message
|
|
227
|
+
}
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def set_document_keywords(document_id, keywords)
|
|
232
|
+
begin
|
|
233
|
+
document = Ragdoll::Document.find(document_id)
|
|
234
|
+
document.keywords = Array(keywords)
|
|
235
|
+
document.save!
|
|
236
|
+
{
|
|
237
|
+
success: true,
|
|
238
|
+
keywords: document.keywords_array
|
|
239
|
+
}
|
|
240
|
+
rescue StandardError => e
|
|
241
|
+
{
|
|
242
|
+
success: false,
|
|
243
|
+
message: e.message
|
|
244
|
+
}
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def keyword_statistics
|
|
249
|
+
begin
|
|
250
|
+
total_keywords = 0
|
|
251
|
+
documents_with_keywords = 0
|
|
252
|
+
total_keyword_count = 0
|
|
253
|
+
keyword_frequencies = {}
|
|
254
|
+
|
|
255
|
+
if defined?(Ragdoll::Document)
|
|
256
|
+
documents_with_keywords = Ragdoll::Document.where.not(keywords: []).count
|
|
257
|
+
|
|
258
|
+
Ragdoll::Document.where.not(keywords: []).find_each do |doc|
|
|
259
|
+
doc_keywords = doc.keywords_array
|
|
260
|
+
total_keyword_count += doc_keywords.length
|
|
261
|
+
|
|
262
|
+
doc_keywords.each do |keyword|
|
|
263
|
+
keyword_frequencies[keyword] = (keyword_frequencies[keyword] || 0) + 1
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
total_keywords = keyword_frequencies.keys.length
|
|
268
|
+
avg_keywords_per_document = documents_with_keywords > 0 ? (total_keyword_count.to_f / documents_with_keywords) : 0
|
|
269
|
+
|
|
270
|
+
# Top 10 most common keywords
|
|
271
|
+
top_keywords = keyword_frequencies.sort_by { |_k, v| -v }.first(10)
|
|
272
|
+
|
|
273
|
+
# Count singleton keywords (used by only 1 document)
|
|
274
|
+
singleton_keywords = keyword_frequencies.count { |_k, v| v == 1 }
|
|
275
|
+
|
|
276
|
+
{
|
|
277
|
+
total_keywords: total_keywords,
|
|
278
|
+
documents_with_keywords: documents_with_keywords,
|
|
279
|
+
avg_keywords_per_document: avg_keywords_per_document,
|
|
280
|
+
top_keywords: top_keywords,
|
|
281
|
+
singleton_keywords: singleton_keywords
|
|
282
|
+
}
|
|
283
|
+
else
|
|
284
|
+
{
|
|
285
|
+
total_keywords: 0,
|
|
286
|
+
documents_with_keywords: 0,
|
|
287
|
+
avg_keywords_per_document: 0,
|
|
288
|
+
top_keywords: [],
|
|
289
|
+
singleton_keywords: 0
|
|
290
|
+
}
|
|
291
|
+
end
|
|
292
|
+
rescue StandardError => e
|
|
293
|
+
{
|
|
294
|
+
total_keywords: 0,
|
|
295
|
+
documents_with_keywords: 0,
|
|
296
|
+
avg_keywords_per_document: 0,
|
|
297
|
+
top_keywords: [],
|
|
298
|
+
singleton_keywords: 0,
|
|
299
|
+
error: e.message
|
|
300
|
+
}
|
|
301
|
+
end
|
|
302
|
+
end
|
|
133
303
|
|
|
134
304
|
end
|
|
135
305
|
end
|
data/lib/ragdoll/cli/version.rb
CHANGED
data/lib/ragdoll/cli.rb
CHANGED
|
@@ -15,6 +15,7 @@ require_relative 'cli/commands/config'
|
|
|
15
15
|
require_relative 'cli/commands/delete'
|
|
16
16
|
require_relative 'cli/commands/update'
|
|
17
17
|
require_relative 'cli/commands/analytics'
|
|
18
|
+
require_relative 'cli/commands/keywords'
|
|
18
19
|
|
|
19
20
|
module Ragdoll
|
|
20
21
|
module CLI
|
|
@@ -45,7 +46,9 @@ module Ragdoll
|
|
|
45
46
|
method_option :classification, type: :string, aliases: '-C',
|
|
46
47
|
desc: 'Filter by classification'
|
|
47
48
|
method_option :keywords, type: :string, aliases: '-k',
|
|
48
|
-
desc: 'Filter by keywords (comma-separated)'
|
|
49
|
+
desc: 'Filter by keywords (comma-separated). Use ragdoll keywords for keyword-only search'
|
|
50
|
+
method_option :keywords_all, type: :boolean, default: false, aliases: '-K',
|
|
51
|
+
desc: 'Require ALL keywords to match (default: any keyword matches)'
|
|
49
52
|
method_option :tags, type: :string, aliases: '-T',
|
|
50
53
|
desc: 'Filter by tags (comma-separated)'
|
|
51
54
|
method_option :format, type: :string, default: 'table', aliases: '-f',
|
|
@@ -58,6 +61,10 @@ module Ragdoll
|
|
|
58
61
|
desc: 'Enable search tracking (default: true)'
|
|
59
62
|
method_option :search_type, type: :string, default: 'semantic', aliases: '-S',
|
|
60
63
|
desc: 'Search type: semantic, hybrid, fulltext (default: semantic)'
|
|
64
|
+
method_option :semantic_weight, type: :numeric, aliases: '-w',
|
|
65
|
+
desc: 'Weight for semantic search in hybrid mode (0.0-1.0, default: 0.7)'
|
|
66
|
+
method_option :text_weight, type: :numeric, aliases: '-W',
|
|
67
|
+
desc: 'Weight for text search in hybrid mode (0.0-1.0, default: 0.3)'
|
|
61
68
|
def search(query)
|
|
62
69
|
Search.new.call(query, options)
|
|
63
70
|
end
|
|
@@ -68,6 +75,9 @@ module Ragdoll
|
|
|
68
75
|
desc 'analytics SUBCOMMAND', 'Search analytics and reporting'
|
|
69
76
|
subcommand 'analytics', Analytics
|
|
70
77
|
|
|
78
|
+
desc 'keywords SUBCOMMAND', 'Manage and search by document keywords'
|
|
79
|
+
subcommand 'keywords', Keywords
|
|
80
|
+
|
|
71
81
|
desc 'stats', 'Show document and embedding statistics'
|
|
72
82
|
def stats
|
|
73
83
|
client = StandaloneClient.new
|
|
@@ -159,12 +169,22 @@ module Ragdoll
|
|
|
159
169
|
puts " Status: #{document[:status]}"
|
|
160
170
|
puts " Embeddings Count: #{document[:embeddings_count]}"
|
|
161
171
|
puts " Content Length: #{document[:content_length]} characters"
|
|
172
|
+
|
|
173
|
+
# Show keywords prominently
|
|
174
|
+
keywords = document[:keywords] || document['keywords'] || []
|
|
175
|
+
if keywords.any?
|
|
176
|
+
puts " Keywords: #{keywords.join(', ')}"
|
|
177
|
+
else
|
|
178
|
+
puts " Keywords: (none)"
|
|
179
|
+
end
|
|
180
|
+
|
|
162
181
|
puts " Created: #{document[:created_at]}"
|
|
163
182
|
puts " Updated: #{document[:updated_at]}"
|
|
164
183
|
|
|
165
|
-
if document[:metadata]
|
|
184
|
+
if document[:metadata] && document[:metadata].any?
|
|
166
185
|
puts "\nMetadata:"
|
|
167
186
|
document[:metadata].each do |key, value|
|
|
187
|
+
next if key == 'keywords' # Already displayed above
|
|
168
188
|
puts " #{key}: #{value}"
|
|
169
189
|
end
|
|
170
190
|
end
|
|
@@ -193,9 +213,25 @@ module Ragdoll
|
|
|
193
213
|
desc: 'Maximum number of documents to list'
|
|
194
214
|
method_option :format, type: :string, default: 'table', aliases: '-f',
|
|
195
215
|
desc: 'Output format (table, json, plain)'
|
|
216
|
+
method_option :keywords, type: :string, aliases: '-k',
|
|
217
|
+
desc: 'Filter by keywords (comma-separated)'
|
|
218
|
+
method_option :keywords_all, type: :boolean, default: false, aliases: '-K',
|
|
219
|
+
desc: 'Require ALL keywords to match (default: any keyword matches)'
|
|
196
220
|
def list
|
|
197
221
|
client = StandaloneClient.new
|
|
198
|
-
|
|
222
|
+
|
|
223
|
+
# Handle keyword filtering if provided
|
|
224
|
+
if options[:keywords]
|
|
225
|
+
keywords_array = options[:keywords].split(',').map(&:strip)
|
|
226
|
+
search_method = options[:keywords_all] ? :search_by_keywords_all : :search_by_keywords
|
|
227
|
+
documents = client.public_send(search_method, keywords_array, limit: options[:limit])
|
|
228
|
+
|
|
229
|
+
puts "Listing documents with keywords: #{keywords_array.join(', ')}"
|
|
230
|
+
puts "Mode: #{options[:keywords_all] ? 'ALL keywords (AND)' : 'ANY keywords (OR)'}"
|
|
231
|
+
puts
|
|
232
|
+
else
|
|
233
|
+
documents = client.list_documents(limit: options[:limit])
|
|
234
|
+
end
|
|
199
235
|
|
|
200
236
|
# Get accurate embeddings count for all documents
|
|
201
237
|
documents.each do |doc|
|
|
@@ -215,16 +251,30 @@ module Ragdoll
|
|
|
215
251
|
puts "#{doc[:id]}: #{doc[:title] || 'Untitled'}"
|
|
216
252
|
end
|
|
217
253
|
else
|
|
218
|
-
# Table format
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
254
|
+
# Table format - show keywords if keyword filtering is being used
|
|
255
|
+
if options[:keywords]
|
|
256
|
+
puts 'ID'.ljust(10) + 'Title'.ljust(30) + 'Keywords'.ljust(35) + 'Status'.ljust(10) + 'Emb'
|
|
257
|
+
puts '-' * 90
|
|
258
|
+
documents.each do |doc|
|
|
259
|
+
id = (doc[:id] || doc['id'] || '')[0..9].ljust(10)
|
|
260
|
+
title = (doc[:title] || doc['title'] || 'Untitled')[0..29].ljust(30)
|
|
261
|
+
keywords = (doc[:keywords] || doc['keywords'] || []).join(', ')[0..34].ljust(35)
|
|
262
|
+
status = (doc[:status] || doc['status'] || 'unknown')[0..9].ljust(10)
|
|
263
|
+
embeddings = (doc[:embeddings_count] || doc['embeddings_count'] || 0).to_s
|
|
264
|
+
|
|
265
|
+
puts "#{id}#{title}#{keywords}#{status}#{embeddings}"
|
|
266
|
+
end
|
|
267
|
+
else
|
|
268
|
+
puts 'ID'.ljust(10) + 'Title'.ljust(40) + 'Status'.ljust(12) + 'Embeddings'
|
|
269
|
+
puts '-' * 80
|
|
270
|
+
documents.each do |doc|
|
|
271
|
+
id = (doc[:id] || doc['id'] || '')[0..9].ljust(10)
|
|
272
|
+
title = (doc[:title] || doc['title'] || 'Untitled')[0..39].ljust(40)
|
|
273
|
+
status = (doc[:status] || doc['status'] || 'unknown')[0..11].ljust(12)
|
|
274
|
+
embeddings = (doc[:embeddings_count] || doc['embeddings_count'] || 0).to_s
|
|
275
|
+
|
|
276
|
+
puts "#{id}#{title}#{status}#{embeddings}"
|
|
277
|
+
end
|
|
228
278
|
end
|
|
229
279
|
end
|
|
230
280
|
end
|
|
@@ -235,17 +285,22 @@ module Ragdoll
|
|
|
235
285
|
desc: 'Recursively process subdirectories (default: true)'
|
|
236
286
|
method_option :type, type: :string, aliases: '-t',
|
|
237
287
|
desc: 'Filter by document type (pdf, docx, txt, md, html)'
|
|
238
|
-
method_option :
|
|
288
|
+
method_option :skip_confirmation, type: :boolean, default: false, aliases: '-y',
|
|
239
289
|
desc: 'Skip confirmation prompts'
|
|
290
|
+
method_option :force_duplicate, type: :boolean, default: false, aliases: '-f',
|
|
291
|
+
desc: 'Force addition of duplicate documents (bypasses duplicate detection)'
|
|
240
292
|
def add(*paths)
|
|
241
293
|
if paths.empty?
|
|
242
294
|
puts 'Error: No paths provided'
|
|
243
|
-
puts 'Usage: ragdoll add PATH [PATH2] [PATH3]...'
|
|
295
|
+
puts 'Usage: ragdoll add PATH [PATH2] [PATH3]... [OPTIONS]'
|
|
244
296
|
puts 'Examples:'
|
|
245
297
|
puts ' ragdoll add file.pdf'
|
|
246
298
|
puts ' ragdoll add ../docs'
|
|
247
299
|
puts ' ragdoll add ../docs/**/*.md'
|
|
248
300
|
puts ' ragdoll add file1.txt file2.pdf ../docs'
|
|
301
|
+
puts ' ragdoll add file.pdf --force-duplicate # Force add even if duplicate'
|
|
302
|
+
puts ' ragdoll add ../docs --type=pdf # Only process PDF files'
|
|
303
|
+
puts ' ragdoll add ../docs --skip-confirmation # Skip prompts'
|
|
249
304
|
exit 1
|
|
250
305
|
end
|
|
251
306
|
|
|
@@ -288,12 +343,16 @@ module Ragdoll
|
|
|
288
343
|
|
|
289
344
|
progressbar.finish
|
|
290
345
|
|
|
291
|
-
# Summary
|
|
346
|
+
# Summary with duplicate detection information
|
|
292
347
|
success_count = all_results.count { |r| r && r[:status] == 'success' }
|
|
293
348
|
error_count = all_results.count { |r| r && r[:status] == 'error' }
|
|
349
|
+
duplicate_count = all_results.count { |r| r && r[:status] == 'success' && r[:duplicate] }
|
|
350
|
+
new_count = success_count - duplicate_count
|
|
294
351
|
|
|
295
352
|
puts "\nCompleted:"
|
|
296
|
-
puts " Successfully
|
|
353
|
+
puts " Successfully processed: #{success_count} files"
|
|
354
|
+
puts " New documents: #{new_count}"
|
|
355
|
+
puts " Duplicates #{options[:force_duplicate] ? 'forced' : 'detected'}: #{duplicate_count}" if duplicate_count > 0
|
|
297
356
|
puts " Errors: #{error_count} files"
|
|
298
357
|
|
|
299
358
|
if error_count > 0
|
|
@@ -305,10 +364,33 @@ module Ragdoll
|
|
|
305
364
|
|
|
306
365
|
return unless success_count > 0
|
|
307
366
|
|
|
308
|
-
|
|
309
|
-
all_results.select { |r| r && r[:status] == 'success'
|
|
310
|
-
|
|
311
|
-
puts "
|
|
367
|
+
# Show new documents
|
|
368
|
+
new_documents = all_results.select { |r| r && r[:status] == 'success' && !r[:duplicate] }
|
|
369
|
+
if new_documents.any?
|
|
370
|
+
puts "\nNew documents added:"
|
|
371
|
+
new_documents.each do |result|
|
|
372
|
+
puts " #{result[:file]} (ID: #{result[:document_id]})"
|
|
373
|
+
puts " #{result[:message]}" if result[:message]
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
# Show duplicate information
|
|
378
|
+
duplicate_documents = all_results.select { |r| r && r[:status] == 'success' && r[:duplicate] }
|
|
379
|
+
if duplicate_documents.any?
|
|
380
|
+
if options[:force_duplicate]
|
|
381
|
+
puts "\nDuplicates forced to be added:"
|
|
382
|
+
duplicate_documents.each do |result|
|
|
383
|
+
puts " #{result[:file]} (ID: #{result[:document_id]})"
|
|
384
|
+
puts " #{result[:message]}" if result[:message]
|
|
385
|
+
end
|
|
386
|
+
else
|
|
387
|
+
puts "\nDuplicates detected (skipped):"
|
|
388
|
+
duplicate_documents.each do |result|
|
|
389
|
+
puts " #{result[:file]} (existing ID: #{result[:document_id]})"
|
|
390
|
+
puts " #{result[:message]}" if result[:message]
|
|
391
|
+
end
|
|
392
|
+
puts "\nTip: Use --force-duplicate (-f) to force adding duplicates"
|
|
393
|
+
end
|
|
312
394
|
end
|
|
313
395
|
|
|
314
396
|
puts "\nNote: Documents are being processed in the background."
|
|
@@ -362,12 +444,19 @@ module Ragdoll
|
|
|
362
444
|
|
|
363
445
|
def process_single_file(client, path, options)
|
|
364
446
|
begin
|
|
365
|
-
|
|
447
|
+
# Pass force_duplicate parameter for duplicate detection
|
|
448
|
+
result = client.add_document(path, force_duplicate: options[:force_duplicate])
|
|
449
|
+
|
|
450
|
+
# Determine if this was a duplicate detection
|
|
451
|
+
duplicate_detected = result[:duplicate] || (result[:message] && result[:message].include?('already exists'))
|
|
452
|
+
|
|
366
453
|
{
|
|
367
454
|
file: path,
|
|
368
455
|
document_id: result[:document_id],
|
|
369
456
|
status: result[:success] ? 'success' : 'error',
|
|
370
|
-
message: result[:message]
|
|
457
|
+
message: result[:message],
|
|
458
|
+
duplicate: duplicate_detected,
|
|
459
|
+
forced: options[:force_duplicate]
|
|
371
460
|
}
|
|
372
461
|
rescue StandardError => e
|
|
373
462
|
{
|
|
@@ -489,16 +578,12 @@ module Ragdoll
|
|
|
489
578
|
end
|
|
490
579
|
|
|
491
580
|
def display_no_results_feedback(query, search_response, command_type)
|
|
492
|
-
# Extract the actual results array from the response
|
|
493
|
-
results = search_response[:results] || search_response['results'] || []
|
|
494
|
-
|
|
495
581
|
puts "No results found for '#{query}'"
|
|
496
582
|
puts
|
|
497
583
|
|
|
498
584
|
# Get statistics for better feedback
|
|
499
585
|
statistics = search_response[:statistics] || search_response['statistics']
|
|
500
586
|
execution_time = search_response[:execution_time_ms] || search_response['execution_time_ms']
|
|
501
|
-
total = search_response[:total_results] || search_response['total_results'] || 0
|
|
502
587
|
|
|
503
588
|
if statistics
|
|
504
589
|
threshold = statistics[:threshold_used] || statistics['threshold_used']
|
|
@@ -528,6 +613,8 @@ module Ragdoll
|
|
|
528
613
|
if highest < 0.3
|
|
529
614
|
puts " • Your query might not match the document content well"
|
|
530
615
|
puts " • Try different or more specific search terms"
|
|
616
|
+
puts " • Try keyword-based search: ragdoll keywords search KEYWORD"
|
|
617
|
+
puts " • List available keywords: ragdoll keywords list"
|
|
531
618
|
end
|
|
532
619
|
elsif above_threshold > 0
|
|
533
620
|
puts "💡 Note: Found #{above_threshold} similar content above threshold #{threshold}"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ragdoll-cli
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dewayne VanHoozer
|
|
@@ -15,14 +15,14 @@ dependencies:
|
|
|
15
15
|
requirements:
|
|
16
16
|
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version:
|
|
18
|
+
version: 0.1.10
|
|
19
19
|
type: :runtime
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version:
|
|
25
|
+
version: 0.1.10
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: ruby-progressbar
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -177,7 +177,8 @@ dependencies:
|
|
|
177
177
|
- - ">="
|
|
178
178
|
- !ruby/object:Gem::Version
|
|
179
179
|
version: '0'
|
|
180
|
-
description:
|
|
180
|
+
description: Command-line interface for Ragdoll RAG system with semantic, full-text,
|
|
181
|
+
and hybrid search capabilities. Under development. Contributors welcome.
|
|
181
182
|
email:
|
|
182
183
|
- dvanhoozer@gmail.com
|
|
183
184
|
executables:
|
|
@@ -193,6 +194,7 @@ files:
|
|
|
193
194
|
- lib/ragdoll/cli/commands/config.rb
|
|
194
195
|
- lib/ragdoll/cli/commands/delete.rb
|
|
195
196
|
- lib/ragdoll/cli/commands/health.rb
|
|
197
|
+
- lib/ragdoll/cli/commands/keywords.rb
|
|
196
198
|
- lib/ragdoll/cli/commands/list.rb
|
|
197
199
|
- lib/ragdoll/cli/commands/search.rb
|
|
198
200
|
- lib/ragdoll/cli/commands/stats.rb
|