aia 0.9.15 → 0.9.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.version +1 -1
- data/CHANGELOG.md +62 -0
- data/README.md +77 -0
- data/docs/faq.md +83 -1
- data/docs/guides/local-models.md +304 -0
- data/docs/guides/models.md +157 -0
- data/lib/aia/chat_processor_service.rb +20 -5
- data/lib/aia/directives/models.rb +135 -5
- data/lib/aia/ruby_llm_adapter.rb +174 -19
- data/lib/aia/session.rb +27 -16
- data/lib/extensions/ruby_llm/provider_fix.rb +34 -0
- data/mkdocs.yml +1 -0
- metadata +31 -1
data/docs/guides/models.md
CHANGED
@@ -373,6 +373,163 @@ premium_models:
|
|
373
373
|
- **Llama 2**: Open-source general purpose
|
374
374
|
- **Mixtral**: High-performance open model
|
375
375
|
|
376
|
+
## Local Model Providers
|
377
|
+
|
378
|
+
### Ollama
|
379
|
+
|
380
|
+
[Ollama](https://ollama.ai) enables running open-source AI models locally.
|
381
|
+
|
382
|
+
#### Setup
|
383
|
+
|
384
|
+
```bash
|
385
|
+
# Install Ollama
|
386
|
+
brew install ollama # macOS
|
387
|
+
# or download from https://ollama.ai
|
388
|
+
|
389
|
+
# Pull models
|
390
|
+
ollama pull llama3.2
|
391
|
+
ollama pull mistral
|
392
|
+
ollama pull qwen2.5-coder
|
393
|
+
|
394
|
+
# List available models
|
395
|
+
ollama list
|
396
|
+
```
|
397
|
+
|
398
|
+
#### Usage with AIA
|
399
|
+
|
400
|
+
```bash
|
401
|
+
# Use Ollama model (prefix with 'ollama/')
|
402
|
+
aia --model ollama/llama3.2 my_prompt
|
403
|
+
|
404
|
+
# Chat mode
|
405
|
+
aia --chat --model ollama/mistral
|
406
|
+
|
407
|
+
# List Ollama models from AIA
|
408
|
+
aia --model ollama/llama3.2 --chat
|
409
|
+
> //models
|
410
|
+
|
411
|
+
# Combine with cloud models for comparison
|
412
|
+
aia --model ollama/llama3.2,gpt-4o-mini,claude-3-sonnet my_prompt
|
413
|
+
```
|
414
|
+
|
415
|
+
#### Configuration
|
416
|
+
|
417
|
+
```yaml
|
418
|
+
# ~/.aia/config.yml
|
419
|
+
model: ollama/llama3.2
|
420
|
+
|
421
|
+
# Optional: Custom Ollama endpoint
|
422
|
+
# Set via environment variable
|
423
|
+
export OLLAMA_API_BASE=http://custom-host:11434
|
424
|
+
```
|
425
|
+
|
426
|
+
#### Popular Ollama Models
|
427
|
+
|
428
|
+
- **llama3.2**: Latest Llama model, good general purpose
|
429
|
+
- **llama3.2:70b**: Larger version, better quality
|
430
|
+
- **mistral**: Fast and efficient
|
431
|
+
- **mixtral**: High-performance mixture of experts
|
432
|
+
- **qwen2.5-coder**: Specialized for code
|
433
|
+
- **codellama**: Code-focused model
|
434
|
+
|
435
|
+
### LM Studio
|
436
|
+
|
437
|
+
[LM Studio](https://lmstudio.ai) provides a GUI for running local models with OpenAI-compatible API.
|
438
|
+
|
439
|
+
#### Setup
|
440
|
+
|
441
|
+
1. Download LM Studio from https://lmstudio.ai
|
442
|
+
2. Install and launch the application
|
443
|
+
3. Browse and download models within LM Studio
|
444
|
+
4. Start the local server:
|
445
|
+
- Click "Local Server" tab
|
446
|
+
- Click "Start Server"
|
447
|
+
- Default endpoint: http://localhost:1234/v1
|
448
|
+
|
449
|
+
#### Usage with AIA
|
450
|
+
|
451
|
+
```bash
|
452
|
+
# Use LM Studio model (prefix with 'lms/')
|
453
|
+
aia --model lms/qwen/qwen3-coder-30b my_prompt
|
454
|
+
|
455
|
+
# Chat mode
|
456
|
+
aia --chat --model lms/llama-3.2-3b-instruct
|
457
|
+
|
458
|
+
# List LM Studio models from AIA
|
459
|
+
aia --model lms/any-loaded-model --chat
|
460
|
+
> //models
|
461
|
+
|
462
|
+
# Model validation
|
463
|
+
# AIA validates model names against LM Studio's loaded models
|
464
|
+
# If you specify an invalid model, you'll see:
|
465
|
+
# ❌ 'model-name' is not a valid LM Studio model.
|
466
|
+
#
|
467
|
+
# Available LM Studio models:
|
468
|
+
# - lms/qwen/qwen3-coder-30b
|
469
|
+
# - lms/llama-3.2-3b-instruct
|
470
|
+
```
|
471
|
+
|
472
|
+
#### Configuration
|
473
|
+
|
474
|
+
```yaml
|
475
|
+
# ~/.aia/config.yml
|
476
|
+
model: lms/qwen/qwen3-coder-30b
|
477
|
+
|
478
|
+
# Optional: Custom LM Studio endpoint
|
479
|
+
# Set via environment variable
|
480
|
+
export LMS_API_BASE=http://localhost:1234/v1
|
481
|
+
```
|
482
|
+
|
483
|
+
#### Tips for LM Studio
|
484
|
+
|
485
|
+
- Use the model name exactly as shown in LM Studio
|
486
|
+
- Prefix all model names with `lms/`
|
487
|
+
- Ensure the local server is running before use
|
488
|
+
- LM Studio supports one model at a time (unlike Ollama)
|
489
|
+
|
490
|
+
### Comparison: Ollama vs LM Studio
|
491
|
+
|
492
|
+
| Feature | Ollama | LM Studio |
|
493
|
+
|---------|--------|-----------|
|
494
|
+
| **Interface** | Command-line | GUI + CLI |
|
495
|
+
| **Model Management** | Via CLI (`ollama pull`) | GUI download |
|
496
|
+
| **API Compatibility** | Custom + OpenAI-like | OpenAI-compatible |
|
497
|
+
| **Multiple Models** | Yes (switch quickly) | One at a time |
|
498
|
+
| **Platform** | macOS, Linux, Windows | macOS, Windows |
|
499
|
+
| **Model Format** | GGUF, custom | GGUF |
|
500
|
+
| **Best For** | CLI users, automation | GUI users, experimentation |
|
501
|
+
|
502
|
+
### Local + Cloud Model Workflows
|
503
|
+
|
504
|
+
#### Privacy-First Workflow
|
505
|
+
```bash
|
506
|
+
# Use local model for sensitive data
|
507
|
+
aia --model ollama/llama3.2 --out_file draft.md process_private_data.txt
|
508
|
+
|
509
|
+
# Use cloud model for final polish (on sanitized data)
|
510
|
+
aia --model gpt-4 --include draft.md refine_output
|
511
|
+
```
|
512
|
+
|
513
|
+
#### Cost-Optimization Workflow
|
514
|
+
```bash
|
515
|
+
# Bulk processing with local model (free)
|
516
|
+
for file in *.txt; do
|
517
|
+
aia --model ollama/mistral --out_file "${file%.txt}_summary.md" summarize "$file"
|
518
|
+
done
|
519
|
+
|
520
|
+
# Final review with premium cloud model
|
521
|
+
aia --model gpt-4 --include *_summary.md final_report
|
522
|
+
```
|
523
|
+
|
524
|
+
#### Consensus with Mixed Models
|
525
|
+
```bash
|
526
|
+
# Get consensus from local and cloud models
|
527
|
+
aia --model ollama/llama3.2,ollama/mistral,gpt-4o-mini --consensus decision_prompt
|
528
|
+
|
529
|
+
# Or individual responses to compare
|
530
|
+
aia --model ollama/llama3.2,lms/qwen-coder,claude-3-sonnet --no-consensus code_review.py
|
531
|
+
```
|
532
|
+
|
376
533
|
## Troubleshooting Models
|
377
534
|
|
378
535
|
### Common Issues
|
@@ -28,25 +28,37 @@ module AIA
|
|
28
28
|
result = send_to_client(prompt)
|
29
29
|
end
|
30
30
|
|
31
|
+
# Debug output to understand what we're receiving
|
32
|
+
puts "[DEBUG ChatProcessor] Result class: #{result.class}" if AIA.config.debug
|
33
|
+
puts "[DEBUG ChatProcessor] Result inspect: #{result.inspect[0..500]}..." if AIA.config.debug
|
34
|
+
|
31
35
|
# Preserve token information if available for metrics
|
32
36
|
if result.is_a?(String)
|
37
|
+
puts "[DEBUG ChatProcessor] Processing as String" if AIA.config.debug
|
33
38
|
{ content: result, metrics: nil }
|
34
39
|
elsif result.respond_to?(:multi_model?) && result.multi_model?
|
40
|
+
puts "[DEBUG ChatProcessor] Processing as multi-model response" if AIA.config.debug
|
35
41
|
# Handle multi-model response with metrics
|
36
42
|
{
|
37
43
|
content: result.content,
|
38
44
|
metrics: nil, # Individual model metrics handled separately
|
39
45
|
multi_metrics: result.metrics_list
|
40
46
|
}
|
41
|
-
|
47
|
+
elsif result.respond_to?(:content)
|
48
|
+
puts "[DEBUG ChatProcessor] Processing as standard response with content method" if AIA.config.debug
|
49
|
+
# Standard response object with content method
|
42
50
|
{
|
43
51
|
content: result.content,
|
44
52
|
metrics: {
|
45
|
-
input_tokens: result.input_tokens,
|
46
|
-
output_tokens: result.output_tokens,
|
47
|
-
model_id: result.model_id
|
53
|
+
input_tokens: result.respond_to?(:input_tokens) ? result.input_tokens : nil,
|
54
|
+
output_tokens: result.respond_to?(:output_tokens) ? result.output_tokens : nil,
|
55
|
+
model_id: result.respond_to?(:model_id) ? result.model_id : nil
|
48
56
|
}
|
49
57
|
}
|
58
|
+
else
|
59
|
+
puts "[DEBUG ChatProcessor] Processing as fallback (unexpected type)" if AIA.config.debug
|
60
|
+
# Fallback for unexpected response types
|
61
|
+
{ content: result.to_s, metrics: nil }
|
50
62
|
end
|
51
63
|
end
|
52
64
|
|
@@ -56,7 +68,10 @@ module AIA
|
|
56
68
|
def send_to_client(conversation)
|
57
69
|
maybe_change_model
|
58
70
|
|
59
|
-
AIA.
|
71
|
+
puts "[DEBUG ChatProcessor] Sending conversation to client: #{conversation.inspect[0..500]}..." if AIA.config.debug
|
72
|
+
result = AIA.client.chat(conversation)
|
73
|
+
puts "[DEBUG ChatProcessor] Client returned: #{result.class} - #{result.inspect[0..500]}..." if AIA.config.debug
|
74
|
+
result
|
60
75
|
end
|
61
76
|
|
62
77
|
|
@@ -30,9 +30,141 @@ module AIA
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.available_models(args = nil, context_manager = nil)
|
33
|
+
# Check if we're using a local provider
|
34
|
+
current_models = AIA.config.model
|
35
|
+
current_models = [current_models] if current_models.is_a?(String)
|
36
|
+
|
37
|
+
using_local_provider = current_models.any? { |m| m.start_with?('ollama/', 'lms/') }
|
38
|
+
|
39
|
+
if using_local_provider
|
40
|
+
show_local_models(current_models, args)
|
41
|
+
else
|
42
|
+
show_rubyllm_models(args)
|
43
|
+
end
|
44
|
+
|
45
|
+
""
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.show_local_models(current_models, args)
|
49
|
+
require 'net/http'
|
50
|
+
require 'json'
|
51
|
+
|
52
|
+
puts "\nLocal LLM Models:"
|
53
|
+
puts
|
54
|
+
|
55
|
+
current_models.each do |model_spec|
|
56
|
+
if model_spec.start_with?('ollama/')
|
57
|
+
# Ollama uses its native API, not /v1
|
58
|
+
api_base = ENV.fetch('OLLAMA_API_BASE', 'http://localhost:11434')
|
59
|
+
# Remove /v1 suffix if present
|
60
|
+
api_base = api_base.gsub(%r{/v1/?$}, '')
|
61
|
+
show_ollama_models(api_base, args)
|
62
|
+
elsif model_spec.start_with?('lms/')
|
63
|
+
api_base = ENV.fetch('LMS_API_BASE', 'http://localhost:1234')
|
64
|
+
show_lms_models(api_base, args)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.show_ollama_models(api_base, args)
|
70
|
+
begin
|
71
|
+
uri = URI("#{api_base}/api/tags")
|
72
|
+
response = Net::HTTP.get_response(uri)
|
73
|
+
|
74
|
+
unless response.is_a?(Net::HTTPSuccess)
|
75
|
+
puts "❌ Cannot connect to Ollama at #{api_base}"
|
76
|
+
return
|
77
|
+
end
|
78
|
+
|
79
|
+
data = JSON.parse(response.body)
|
80
|
+
models = data['models'] || []
|
81
|
+
|
82
|
+
if models.empty?
|
83
|
+
puts "No Ollama models found"
|
84
|
+
return
|
85
|
+
end
|
86
|
+
|
87
|
+
puts "Ollama Models (#{api_base}):"
|
88
|
+
puts "-" * 60
|
89
|
+
|
90
|
+
counter = 0
|
91
|
+
models.each do |model|
|
92
|
+
name = model['name']
|
93
|
+
size = model['size'] ? format_bytes(model['size']) : 'unknown'
|
94
|
+
modified = model['modified_at'] ? Time.parse(model['modified_at']).strftime('%Y-%m-%d') : 'unknown'
|
95
|
+
|
96
|
+
entry = "- ollama/#{name} (size: #{size}, modified: #{modified})"
|
97
|
+
|
98
|
+
# Apply query filter if provided
|
99
|
+
if args.nil? || args.empty? || args.any? { |q| entry.downcase.include?(q.downcase) }
|
100
|
+
puts entry
|
101
|
+
counter += 1
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
puts
|
106
|
+
puts "#{counter} Ollama model(s) available"
|
107
|
+
puts
|
108
|
+
rescue StandardError => e
|
109
|
+
puts "❌ Error fetching Ollama models: #{e.message}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.show_lms_models(api_base, args)
|
114
|
+
begin
|
115
|
+
uri = URI("#{api_base.gsub(%r{/v1/?$}, '')}/v1/models")
|
116
|
+
response = Net::HTTP.get_response(uri)
|
117
|
+
|
118
|
+
unless response.is_a?(Net::HTTPSuccess)
|
119
|
+
puts "❌ Cannot connect to LM Studio at #{api_base}"
|
120
|
+
return
|
121
|
+
end
|
122
|
+
|
123
|
+
data = JSON.parse(response.body)
|
124
|
+
models = data['data'] || []
|
125
|
+
|
126
|
+
if models.empty?
|
127
|
+
puts "No LM Studio models found"
|
128
|
+
return
|
129
|
+
end
|
130
|
+
|
131
|
+
puts "LM Studio Models (#{api_base}):"
|
132
|
+
puts "-" * 60
|
133
|
+
|
134
|
+
counter = 0
|
135
|
+
models.each do |model|
|
136
|
+
name = model['id']
|
137
|
+
entry = "- lms/#{name}"
|
138
|
+
|
139
|
+
# Apply query filter if provided
|
140
|
+
if args.nil? || args.empty? || args.any? { |q| entry.downcase.include?(q.downcase) }
|
141
|
+
puts entry
|
142
|
+
counter += 1
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
puts
|
147
|
+
puts "#{counter} LM Studio model(s) available"
|
148
|
+
puts
|
149
|
+
rescue StandardError => e
|
150
|
+
puts "❌ Error fetching LM Studio models: #{e.message}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.format_bytes(bytes)
|
155
|
+
units = ['B', 'KB', 'MB', 'GB', 'TB']
|
156
|
+
return "0 B" if bytes.zero?
|
157
|
+
|
158
|
+
exp = (Math.log(bytes) / Math.log(1024)).to_i
|
159
|
+
exp = [exp, units.length - 1].min
|
160
|
+
|
161
|
+
"%.1f %s" % [bytes.to_f / (1024 ** exp), units[exp]]
|
162
|
+
end
|
163
|
+
|
164
|
+
def self.show_rubyllm_models(args)
|
33
165
|
query = args
|
34
166
|
|
35
|
-
if 1 == query.size
|
167
|
+
if query && 1 == query.size
|
36
168
|
query = query.first.split(',')
|
37
169
|
end
|
38
170
|
|
@@ -42,8 +174,8 @@ module AIA
|
|
42
174
|
puts header + ':'
|
43
175
|
puts
|
44
176
|
|
45
|
-
q1 = query.select { |q| q.include?('_to_') }
|
46
|
-
q2 = query.reject { |q| q.include?('_to_') }
|
177
|
+
q1 = query ? query.select { |q| q.include?('_to_') } : []
|
178
|
+
q2 = query ? query.reject { |q| q.include?('_to_') } : []
|
47
179
|
|
48
180
|
counter = 0
|
49
181
|
|
@@ -75,8 +207,6 @@ module AIA
|
|
75
207
|
puts if counter > 0
|
76
208
|
puts "#{counter} LLMs matching your query"
|
77
209
|
puts
|
78
|
-
|
79
|
-
""
|
80
210
|
end
|
81
211
|
|
82
212
|
def self.help(args = nil, context_manager = nil)
|