ragdoll-cli 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,380 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'debug_me'
4
+ include DebugMe
5
+
6
+ require 'thor'
7
+ require 'json'
8
+ require 'ruby-progressbar'
9
+
10
+ require_relative 'cli/version'
11
+ require_relative 'cli/configuration_loader'
12
+ require_relative 'cli/standalone_client'
13
+ require_relative 'cli/commands/search'
14
+ require_relative 'cli/commands/config'
15
+ require_relative 'cli/commands/delete'
16
+ require_relative 'cli/commands/update'
17
+
18
+ module Ragdoll
19
+ module CLI
20
+ class Main < Thor
21
+ def initialize(args = [], local_options = {}, config = {})
22
+ super
23
+ load_configuration
24
+ end
25
+
26
+ desc 'version', 'Show version information'
27
+ def version
28
+ puts Ragdoll.version.join("\n")
29
+ end
30
+
31
+ desc 'init', 'Initialize Ragdoll configuration'
32
+ def init
33
+ Config.new.init
34
+ end
35
+
36
+
37
+ desc 'search QUERY', 'Search for documents matching the query'
38
+ method_option :limit, type: :numeric, default: 10, aliases: '-l',
39
+ desc: 'Maximum number of results to return'
40
+ method_option :content_type, type: :string, aliases: '-c',
41
+ desc: 'Filter by content type (text, image, audio)'
42
+ method_option :classification, type: :string, aliases: '-C',
43
+ desc: 'Filter by classification'
44
+ method_option :keywords, type: :string, aliases: '-k',
45
+ desc: 'Filter by keywords (comma-separated)'
46
+ method_option :tags, type: :string, aliases: '-T',
47
+ desc: 'Filter by tags (comma-separated)'
48
+ method_option :format, type: :string, default: 'table', aliases: '-f',
49
+ desc: 'Output format (table, json, plain)'
50
+ def search(query)
51
+ Search.new.call(query, options)
52
+ end
53
+
54
+ desc 'config SUBCOMMAND', 'Manage configuration'
55
+ subcommand 'config', Config
56
+
57
+ desc 'stats', 'Show document and embedding statistics'
58
+ def stats
59
+ client = StandaloneClient.new
60
+ stats = client.stats
61
+
62
+ puts 'System Statistics:'
63
+ puts " Total documents: #{stats[:total_documents]}"
64
+ puts " Total embeddings: #{stats[:total_embeddings]}"
65
+ puts " Storage type: #{stats[:storage_type] || 'unknown'}"
66
+
67
+ if stats[:by_status]
68
+ puts " Documents by status:"
69
+ stats[:by_status].each do |status, count|
70
+ puts " #{status}: #{count}"
71
+ end
72
+ end
73
+
74
+ if stats[:by_type]
75
+ puts " Documents by type:"
76
+ stats[:by_type].each do |type, count|
77
+ puts " #{type}: #{count}"
78
+ end
79
+ end
80
+
81
+ return unless stats[:content_types]
82
+
83
+ puts "\nContent Types:"
84
+ stats[:content_types].each do |type, count|
85
+ puts " #{type}: #{count}"
86
+ end
87
+ end
88
+
89
+ desc 'status DOCUMENT_ID', 'Show document processing status'
90
+ def status(document_id)
91
+ client = StandaloneClient.new
92
+
93
+ begin
94
+ status = client.document_status(document_id)
95
+ puts "Document Status for ID: #{document_id}"
96
+ puts " Status: #{status[:status]}"
97
+ puts " Embeddings Count: #{status[:embeddings_count]}"
98
+ puts " Embeddings Ready: #{status[:embeddings_ready] ? 'Yes' : 'No'}"
99
+ puts " Message: #{status[:message]}"
100
+ rescue StandardError => e
101
+ puts "Error getting document status: #{e.message}"
102
+ end
103
+ end
104
+
105
+ desc 'show DOCUMENT_ID', 'Show detailed document information'
106
+ method_option :format, type: :string, default: 'table', aliases: '-f',
107
+ desc: 'Output format (table, json)'
108
+ def show(document_id)
109
+ client = StandaloneClient.new
110
+
111
+ begin
112
+ document = client.get_document(document_id)
113
+
114
+ case options[:format]
115
+ when 'json'
116
+ puts JSON.pretty_generate(document)
117
+ else
118
+ puts "Document Details for ID: #{document_id}"
119
+ puts " Title: #{document[:title]}"
120
+ puts " Status: #{document[:status]}"
121
+ puts " Embeddings Count: #{document[:embeddings_count]}"
122
+ puts " Content Length: #{document[:content_length]} characters"
123
+ puts " Created: #{document[:created_at]}"
124
+ puts " Updated: #{document[:updated_at]}"
125
+
126
+ if document[:metadata]
127
+ puts "\nMetadata:"
128
+ document[:metadata].each do |key, value|
129
+ puts " #{key}: #{value}"
130
+ end
131
+ end
132
+ end
133
+ rescue StandardError => e
134
+ puts "Error getting document: #{e.message}"
135
+ end
136
+ end
137
+
138
+ desc 'health', 'Check system health'
139
+ def health
140
+ client = StandaloneClient.new
141
+
142
+ if client.healthy?
143
+ puts '✓ System is healthy'
144
+ puts '✓ Database connection: OK'
145
+ puts '✓ Configuration: OK'
146
+ else
147
+ puts '✗ System health check failed'
148
+ exit 1
149
+ end
150
+ end
151
+
152
+ desc 'list', 'List all documents'
153
+ method_option :limit, type: :numeric, default: 20, aliases: '-l',
154
+ desc: 'Maximum number of documents to list'
155
+ method_option :format, type: :string, default: 'table', aliases: '-f',
156
+ desc: 'Output format (table, json, plain)'
157
+ def list
158
+ client = StandaloneClient.new
159
+ documents = client.list_documents(limit: options[:limit])
160
+
161
+ # Get accurate embeddings count for all documents
162
+ documents.each do |doc|
163
+ begin
164
+ status_info = client.document_status(doc[:id] || doc['id'])
165
+ doc[:embeddings_count] = status_info[:embeddings_count]
166
+ rescue
167
+ # Keep original count if status fails
168
+ end
169
+ end
170
+
171
+ case options[:format]
172
+ when 'json'
173
+ puts JSON.pretty_generate(documents)
174
+ when 'plain'
175
+ documents.each do |doc|
176
+ puts "#{doc[:id]}: #{doc[:title] || 'Untitled'}"
177
+ end
178
+ else
179
+ # Table format
180
+ puts 'ID'.ljust(10) + 'Title'.ljust(40) + 'Status'.ljust(12) + 'Embeddings'
181
+ puts '-' * 80
182
+ documents.each do |doc|
183
+ id = (doc[:id] || doc['id'] || '')[0..9].ljust(10)
184
+ title = (doc[:title] || doc['title'] || 'Untitled')[0..39].ljust(40)
185
+ status = (doc[:status] || doc['status'] || 'unknown')[0..11].ljust(12)
186
+ embeddings = (doc[:embeddings_count] || doc['embeddings_count'] || 0).to_s
187
+
188
+ puts "#{id}#{title}#{status}#{embeddings}"
189
+ end
190
+ end
191
+ end
192
+
193
+ # -- Core API Parity Commands --
194
+ desc 'add PATHS...', 'Add documents, directories, or glob patterns'
195
+ method_option :recursive, type: :boolean, default: true, aliases: '-r',
196
+ desc: 'Recursively process subdirectories (default: true)'
197
+ method_option :type, type: :string, aliases: '-t',
198
+ desc: 'Filter by document type (pdf, docx, txt, md, html)'
199
+ method_option :force, type: :boolean, default: false, aliases: '-f',
200
+ desc: 'Skip confirmation prompts'
201
+ def add(*paths)
202
+ if paths.empty?
203
+ puts 'Error: No paths provided'
204
+ puts 'Usage: ragdoll add PATH [PATH2] [PATH3]...'
205
+ puts 'Examples:'
206
+ puts ' ragdoll add file.pdf'
207
+ puts ' ragdoll add ../docs'
208
+ puts ' ragdoll add ../docs/**/*.md'
209
+ puts ' ragdoll add file1.txt file2.pdf ../docs'
210
+ exit 1
211
+ end
212
+
213
+ client = StandaloneClient.new
214
+ all_results = []
215
+
216
+ # First pass: collect all files to process
217
+ all_files = []
218
+ paths.each do |path|
219
+ if path.include?('*') || path.include?('?')
220
+ all_files.concat(collect_files_from_glob(path, options))
221
+ elsif File.directory?(path)
222
+ all_files.concat(collect_files_from_directory(path, options))
223
+ elsif File.file?(path)
224
+ all_files << path
225
+ else
226
+ puts "Warning: Path not found or not accessible: #{path}"
227
+ end
228
+ end
229
+
230
+ if all_files.empty?
231
+ puts "No files found to process."
232
+ return
233
+ end
234
+
235
+ # Initialize progress bar
236
+ progressbar = ProgressBar.create(
237
+ title: "Adding documents",
238
+ total: all_files.length,
239
+ format: "%t: |%B| %p%% (%c/%C) %e %f"
240
+ )
241
+
242
+ # Second pass: process each file with progress
243
+ all_files.each do |file_path|
244
+ progressbar.log "Processing: #{File.basename(file_path)}"
245
+ result = process_single_file(client, file_path, options)
246
+ all_results << result
247
+ progressbar.increment
248
+ end
249
+
250
+ progressbar.finish
251
+
252
+ # Summary
253
+ success_count = all_results.count { |r| r && r[:status] == 'success' }
254
+ error_count = all_results.count { |r| r && r[:status] == 'error' }
255
+
256
+ puts "\nCompleted:"
257
+ puts " Successfully added: #{success_count} files"
258
+ puts " Errors: #{error_count} files"
259
+
260
+ if error_count > 0
261
+ puts "\nErrors:"
262
+ all_results.select { |r| r && r[:status] == 'error' }.each do |result|
263
+ puts " #{result[:file]}: #{result[:error] || result[:message]}"
264
+ end
265
+ end
266
+
267
+ return unless success_count > 0
268
+
269
+ puts "\nSuccessfully added files:"
270
+ all_results.select { |r| r && r[:status] == 'success' }.each do |result|
271
+ puts " #{result[:file]} (ID: #{result[:document_id]})"
272
+ puts " #{result[:message]}" if result[:message]
273
+ end
274
+
275
+ puts "\nNote: Documents are being processed in the background."
276
+ puts "Use 'ragdoll status <id>' to check processing status."
277
+ end
278
+
279
+ private
280
+
281
+ def collect_files_from_glob(pattern, options)
282
+ files = []
283
+ Dir.glob(pattern).each do |path|
284
+ if File.file?(path)
285
+ files << path if should_process_file?(path, options)
286
+ elsif File.directory?(path) && options[:recursive]
287
+ files.concat(collect_files_from_directory(path, options))
288
+ end
289
+ end
290
+ files
291
+ end
292
+
293
+ def collect_files_from_directory(dir_path, options)
294
+ files = []
295
+ pattern = if options[:recursive]
296
+ File.join(dir_path, '**', '*')
297
+ else
298
+ File.join(dir_path, '*')
299
+ end
300
+
301
+ Dir.glob(pattern).each do |path|
302
+ next unless File.file?(path)
303
+ files << path if should_process_file?(path, options)
304
+ end
305
+ files
306
+ end
307
+
308
+ def should_process_file?(path, options)
309
+ return true unless options[:type]
310
+
311
+ ext = File.extname(path).downcase
312
+ type_extensions = {
313
+ 'pdf' => ['.pdf'],
314
+ 'docx' => ['.docx'],
315
+ 'txt' => ['.txt'],
316
+ 'md' => ['.md', '.markdown'],
317
+ 'html' => ['.html', '.htm']
318
+ }
319
+
320
+ allowed_extensions = type_extensions[options[:type]] || []
321
+ allowed_extensions.include?(ext)
322
+ end
323
+
324
+ def process_single_file(client, path, options)
325
+ begin
326
+ result = client.add_document(path)
327
+ {
328
+ file: path,
329
+ document_id: result[:document_id],
330
+ status: result[:success] ? 'success' : 'error',
331
+ message: result[:message]
332
+ }
333
+ rescue StandardError => e
334
+ {
335
+ file: path,
336
+ error: e.message,
337
+ status: 'error'
338
+ }
339
+ end
340
+ end
341
+
342
+ public
343
+
344
+ desc 'update DOCUMENT_ID', 'Update document metadata'
345
+ method_option :title, type: :string, aliases: '-t', desc: 'New title for document'
346
+ def update(document_id)
347
+ Update.new.call(document_id, options)
348
+ end
349
+
350
+ desc 'delete DOCUMENT_ID', 'Delete a document'
351
+ method_option :force, type: :boolean, aliases: '-f', desc: 'Force deletion without confirmation'
352
+ def delete(document_id)
353
+ Delete.new.call(document_id, options)
354
+ end
355
+
356
+ desc 'context QUERY', 'Get context for RAG applications'
357
+ method_option :limit, type: :numeric, default: 5, aliases: '-l', desc: 'Maximum number of context chunks'
358
+ def context(query)
359
+ client = StandaloneClient.new
360
+ ctx = client.get_context(query, limit: options[:limit])
361
+ puts JSON.pretty_generate(ctx)
362
+ end
363
+
364
+ desc 'enhance PROMPT', 'Enhance a prompt with context'
365
+ method_option :context_limit, type: :numeric, default: 5, aliases: '-l', desc: 'Number of context chunks to include'
366
+ def enhance(prompt)
367
+ client = StandaloneClient.new
368
+ enhanced = client.enhance_prompt(prompt, context_limit: options[:context_limit])
369
+ puts enhanced
370
+ end
371
+
372
+
373
+ private
374
+
375
+ def load_configuration
376
+ ConfigurationLoader.new.load
377
+ end
378
+ end
379
+ end
380
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "annotate"
4
+ require "annotate/annotate_models"
5
+
6
+ # Define environment task for gem context
7
+ task :environment do
8
+ # Load the gem's environment
9
+ require_relative "../ragdoll-core"
10
+
11
+ # Set up database connection
12
+ begin
13
+ # Load all models to ensure they're available
14
+ Dir[File.join(__dir__, "../ragdoll/core/models/*.rb")].each { |file| require file }
15
+
16
+ # Set up database connection with default config
17
+ Ragdoll::Core::Database.setup({
18
+ adapter: "postgresql",
19
+ database: "ragdoll_development",
20
+ username: "ragdoll",
21
+ password: ENV["RAGDOLL_DATABASE_PASSWORD"] || ENV["DATABASE_PASSWORD"],
22
+ host: "localhost",
23
+ port: 5432,
24
+ auto_migrate: false # Don't auto-migrate during annotation
25
+ })
26
+
27
+ puts "✅ Connected to database: ragdoll_development"
28
+ rescue StandardError => e
29
+ puts "❌ Database connection failed: #{e.message}"
30
+ puts " Annotations will be based on model definitions only"
31
+ end
32
+ end
33
+
34
+ namespace :annotate do
35
+ task :models do
36
+ AnnotateModels.do_annotations({
37
+ "models" => "true",
38
+ "position_in_class" => "before",
39
+ "show_foreign_keys" => "true",
40
+ "show_indexes" => "true",
41
+ "model_dir" => "lib/ragdoll/core/models",
42
+ "exclude_tests" => "true",
43
+ "exclude_fixtures" => "true",
44
+ "exclude_factories" => "true",
45
+ "exclude_serializers" => "true",
46
+ "exclude_scaffolds" => "true",
47
+ "exclude_controllers" => "true",
48
+ "exclude_helpers" => "true"
49
+ })
50
+ end
51
+ end
52
+
53
+ task :set_annotation_options do
54
+ # You can override any of these by setting an environment variable of the
55
+ # same name.
56
+ Annotate.set_defaults(
57
+ "active_admin" => "false",
58
+ "additional_file_patterns" => [],
59
+ "routes" => "false",
60
+ "models" => "true",
61
+ "position_in_routes" => "before",
62
+ "position_in_class" => "before",
63
+ "position_in_test" => "before",
64
+ "position_in_fixture" => "before",
65
+ "position_in_factory" => "before",
66
+ "position_in_serializer" => "before",
67
+ "show_foreign_keys" => "true",
68
+ "show_complete_foreign_keys" => "false",
69
+ "show_indexes" => "true",
70
+ "simple_indexes" => "false",
71
+ "model_dir" => "lib/ragdoll/core/models",
72
+ "root_dir" => "",
73
+ "include_version" => "false",
74
+ "require" => "",
75
+ "exclude_tests" => "false",
76
+ "exclude_fixtures" => "false",
77
+ "exclude_factories" => "false",
78
+ "exclude_serializers" => "false",
79
+ "exclude_scaffolds" => "false",
80
+ "exclude_controllers" => "true",
81
+ "exclude_helpers" => "true",
82
+ "exclude_sti_subclasses" => "false",
83
+ "ignore_model_sub_dir" => "false",
84
+ "ignore_columns" => nil,
85
+ "ignore_routes" => nil,
86
+ "ignore_unknown_options" => "false",
87
+ "hide_limit_column_types" => "integer,bigint,boolean",
88
+ "hide_default_column_types" => "json,jsonb,hstore",
89
+ "skip_on_db_migrate" => "false",
90
+ "format_bare" => "true",
91
+ "format_rdoc" => "false",
92
+ "format_yard" => "false",
93
+ "format_markdown" => "false",
94
+ "sort" => "false",
95
+ "force" => "false",
96
+ "frozen" => "false",
97
+ "classified_sort" => "true",
98
+ "trace" => "false",
99
+ "wrapper_open" => nil,
100
+ "wrapper_close" => nil,
101
+ "with_comment" => "true"
102
+ )
103
+ end
104
+
105
+ # Load only essential model annotation tasks
106
+ desc "Add schema information (as comments) to model files"
107
+ task annotate_models: :environment do
108
+ puts "Running annotate for ragdoll-core models..."
109
+
110
+ # Use the CLI approach since the programmatic API doesn't respect model_dir properly
111
+ success = system("MODEL_DIR=lib/ragdoll/core/models bundle exec annotate --models --position-in-class=before --show-foreign-keys --show-indexes")
112
+
113
+ if success
114
+ puts "✅ Model annotations updated successfully!"
115
+ else
116
+ puts "⚠️ Annotate completed with warnings (database connection issues)"
117
+ end
118
+ end
119
+
120
+ desc "Remove schema information from model files"
121
+ task :remove_annotation do
122
+ AnnotateModels.remove_annotations({
123
+ "models" => "true",
124
+ "model_dir" => "lib/ragdoll/core/models"
125
+ })
126
+ end