prescient 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.example +37 -0
- data/.rubocop.yml +326 -0
- data/Dockerfile.example +41 -0
- data/README.md +859 -13
- data/Rakefile +25 -3
- data/VECTOR_SEARCH_GUIDE.md +450 -0
- data/db/init/01_enable_pgvector.sql +30 -0
- data/db/init/02_create_schema.sql +108 -0
- data/db/init/03_create_indexes.sql +96 -0
- data/db/init/04_insert_sample_data.sql +121 -0
- data/db/migrate/001_create_prescient_tables.rb +158 -0
- data/docker-compose.yml +153 -0
- data/examples/basic_usage.rb +123 -0
- data/examples/custom_contexts.rb +355 -0
- data/examples/custom_prompts.rb +212 -0
- data/examples/vector_search.rb +330 -0
- data/lib/prescient/base.rb +270 -0
- data/lib/prescient/client.rb +107 -0
- data/lib/prescient/provider/anthropic.rb +146 -0
- data/lib/prescient/provider/huggingface.rb +202 -0
- data/lib/prescient/provider/ollama.rb +172 -0
- data/lib/prescient/provider/openai.rb +181 -0
- data/lib/prescient/version.rb +1 -1
- data/lib/prescient.rb +84 -2
- data/prescient.gemspec +51 -0
- data/scripts/setup-ollama-models.sh +77 -0
- metadata +215 -12
- data/.vscode/settings.json +0 -1
@@ -0,0 +1,146 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'httparty'
|
4
|
+
|
5
|
+
class Prescient::Provider::Anthropic < Prescient::Base
|
6
|
+
include HTTParty
|
7
|
+
|
8
|
+
base_uri 'https://api.anthropic.com'
|
9
|
+
|
10
|
+
def initialize(**options)
|
11
|
+
super
|
12
|
+
self.class.default_timeout(@options[:timeout] || 60)
|
13
|
+
end
|
14
|
+
|
15
|
+
def generate_embedding(_text, **_options)
|
16
|
+
# Anthropic doesn't provide embedding API, raise error
|
17
|
+
raise Prescient::Error,
|
18
|
+
'Anthropic provider does not support embeddings. Use OpenAI or HuggingFace for embeddings.'
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate_response(prompt, context_items = [], **options)
|
22
|
+
handle_errors do
|
23
|
+
formatted_prompt = build_prompt(prompt, context_items)
|
24
|
+
|
25
|
+
response = self.class.post('/v1/messages',
|
26
|
+
headers: {
|
27
|
+
'Content-Type' => 'application/json',
|
28
|
+
'x-api-key' => @options[:api_key],
|
29
|
+
'anthropic-version' => '2023-06-01',
|
30
|
+
},
|
31
|
+
body: {
|
32
|
+
model: @options[:model],
|
33
|
+
max_tokens: options[:max_tokens] || 2000,
|
34
|
+
temperature: options[:temperature] || 0.7,
|
35
|
+
messages: [
|
36
|
+
{
|
37
|
+
role: 'user',
|
38
|
+
content: formatted_prompt,
|
39
|
+
},
|
40
|
+
],
|
41
|
+
}.to_json)
|
42
|
+
|
43
|
+
validate_response!(response, 'text generation')
|
44
|
+
|
45
|
+
content = response.parsed_response.dig('content', 0, 'text')
|
46
|
+
raise Prescient::InvalidResponseError, 'No response generated' unless content
|
47
|
+
|
48
|
+
{
|
49
|
+
response: content.strip,
|
50
|
+
model: @options[:model],
|
51
|
+
provider: 'anthropic',
|
52
|
+
processing_time: nil,
|
53
|
+
metadata: {
|
54
|
+
usage: response.parsed_response['usage'],
|
55
|
+
},
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def health_check
|
61
|
+
handle_errors do
|
62
|
+
# Test with a simple message
|
63
|
+
response = self.class.post('/v1/messages',
|
64
|
+
headers: {
|
65
|
+
'Content-Type' => 'application/json',
|
66
|
+
'x-api-key' => @options[:api_key],
|
67
|
+
'anthropic-version' => '2023-06-01',
|
68
|
+
},
|
69
|
+
body: {
|
70
|
+
model: @options[:model],
|
71
|
+
max_tokens: 10,
|
72
|
+
messages: [
|
73
|
+
{
|
74
|
+
role: 'user',
|
75
|
+
content: 'Test',
|
76
|
+
},
|
77
|
+
],
|
78
|
+
}.to_json)
|
79
|
+
|
80
|
+
if response.success?
|
81
|
+
{
|
82
|
+
status: 'healthy',
|
83
|
+
provider: 'anthropic',
|
84
|
+
model: @options[:model],
|
85
|
+
ready: true,
|
86
|
+
}
|
87
|
+
else
|
88
|
+
{
|
89
|
+
status: 'unhealthy',
|
90
|
+
provider: 'anthropic',
|
91
|
+
error: "HTTP #{response.code}",
|
92
|
+
message: response.message,
|
93
|
+
}
|
94
|
+
end
|
95
|
+
end
|
96
|
+
rescue Prescient::ConnectionError => e
|
97
|
+
{
|
98
|
+
status: 'unavailable',
|
99
|
+
provider: 'anthropic',
|
100
|
+
error: e.class.name,
|
101
|
+
message: e.message,
|
102
|
+
}
|
103
|
+
end
|
104
|
+
|
105
|
+
def list_models
|
106
|
+
# Anthropic doesn't provide a models list API
|
107
|
+
[
|
108
|
+
{ name: 'claude-3-haiku-20240307', type: 'text' },
|
109
|
+
{ name: 'claude-3-sonnet-20240229', type: 'text' },
|
110
|
+
{ name: 'claude-3-opus-20240229', type: 'text' },
|
111
|
+
]
|
112
|
+
end
|
113
|
+
|
114
|
+
protected
|
115
|
+
|
116
|
+
def validate_configuration!
|
117
|
+
required_options = [:api_key, :model]
|
118
|
+
missing_options = required_options.select { |opt| @options[opt].nil? }
|
119
|
+
|
120
|
+
return unless missing_options.any?
|
121
|
+
|
122
|
+
raise Prescient::Error, "Missing required options: #{missing_options.join(', ')}"
|
123
|
+
end
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
def validate_response!(response, operation)
|
128
|
+
return if response.success?
|
129
|
+
|
130
|
+
case response.code
|
131
|
+
when 400
|
132
|
+
raise Prescient::Error, "Bad request for #{operation}: #{response.body}"
|
133
|
+
when 401
|
134
|
+
raise Prescient::AuthenticationError, "Authentication failed for #{operation}"
|
135
|
+
when 403
|
136
|
+
raise Prescient::AuthenticationError, "Forbidden access for #{operation}"
|
137
|
+
when 429
|
138
|
+
raise Prescient::RateLimitError, "Rate limit exceeded for #{operation}"
|
139
|
+
when 500..599
|
140
|
+
raise Prescient::Error, "Anthropic server error during #{operation}: #{response.body}"
|
141
|
+
else
|
142
|
+
raise Prescient::Error,
|
143
|
+
"Anthropic request failed for #{operation}: HTTP #{response.code} - #{response.message}"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'httparty'
|
4
|
+
|
5
|
+
class Prescient::Provider::HuggingFace < Prescient::Base
|
6
|
+
include HTTParty
|
7
|
+
|
8
|
+
base_uri 'https://api-inference.huggingface.co'
|
9
|
+
|
10
|
+
EMBEDDING_DIMENSIONS = {
|
11
|
+
'sentence-transformers/all-MiniLM-L6-v2' => 384,
|
12
|
+
'sentence-transformers/all-mpnet-base-v2' => 768,
|
13
|
+
'sentence-transformers/all-roberta-large-v1' => 1024,
|
14
|
+
}.freeze
|
15
|
+
|
16
|
+
def initialize(**options)
|
17
|
+
super
|
18
|
+
self.class.default_timeout(@options[:timeout] || 60)
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate_embedding(text, **_options)
|
22
|
+
handle_errors do
|
23
|
+
clean_text_input = clean_text(text)
|
24
|
+
|
25
|
+
response = self.class.post("/pipeline/feature-extraction/#{@options[:embedding_model]}",
|
26
|
+
headers: {
|
27
|
+
'Content-Type' => 'application/json',
|
28
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
29
|
+
},
|
30
|
+
body: {
|
31
|
+
inputs: clean_text_input,
|
32
|
+
options: {
|
33
|
+
wait_for_model: true,
|
34
|
+
},
|
35
|
+
}.to_json)
|
36
|
+
|
37
|
+
validate_response!(response, 'embedding generation')
|
38
|
+
|
39
|
+
# HuggingFace returns embeddings as nested arrays, get the first one
|
40
|
+
embedding_data = response.parsed_response
|
41
|
+
embedding_data = embedding_data.first if embedding_data.is_a?(Array) && embedding_data.first.is_a?(Array)
|
42
|
+
|
43
|
+
raise Prescient::InvalidResponseError, 'No embedding returned' unless embedding_data.is_a?(Array)
|
44
|
+
|
45
|
+
expected_dimensions = EMBEDDING_DIMENSIONS[@options[:embedding_model]] || 384
|
46
|
+
normalize_embedding(embedding_data, expected_dimensions)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def generate_response(prompt, context_items = [], **options)
|
51
|
+
handle_errors do
|
52
|
+
formatted_prompt = build_prompt(prompt, context_items)
|
53
|
+
|
54
|
+
response = self.class.post("/models/#{@options[:chat_model]}",
|
55
|
+
headers: {
|
56
|
+
'Content-Type' => 'application/json',
|
57
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
58
|
+
},
|
59
|
+
body: {
|
60
|
+
inputs: formatted_prompt,
|
61
|
+
parameters: {
|
62
|
+
max_new_tokens: options[:max_tokens] || 2000,
|
63
|
+
temperature: options[:temperature] || 0.7,
|
64
|
+
top_p: options[:top_p] || 0.9,
|
65
|
+
return_full_text: false,
|
66
|
+
},
|
67
|
+
options: {
|
68
|
+
wait_for_model: true,
|
69
|
+
},
|
70
|
+
}.to_json)
|
71
|
+
|
72
|
+
validate_response!(response, 'text generation')
|
73
|
+
|
74
|
+
# HuggingFace returns different formats depending on the model
|
75
|
+
generated_text = nil
|
76
|
+
parsed_response = response.parsed_response
|
77
|
+
|
78
|
+
if parsed_response.is_a?(Array) && parsed_response.first.is_a?(Hash)
|
79
|
+
generated_text = parsed_response.first['generated_text']
|
80
|
+
elsif parsed_response.is_a?(Hash)
|
81
|
+
generated_text = parsed_response['generated_text'] || parsed_response['text']
|
82
|
+
end
|
83
|
+
|
84
|
+
raise Prescient::InvalidResponseError, 'No response generated' unless generated_text
|
85
|
+
|
86
|
+
{
|
87
|
+
response: generated_text.strip,
|
88
|
+
model: @options[:chat_model],
|
89
|
+
provider: 'huggingface',
|
90
|
+
processing_time: nil,
|
91
|
+
metadata: {},
|
92
|
+
}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def health_check
|
97
|
+
handle_errors do
|
98
|
+
# Test embedding model
|
99
|
+
embedding_response = self.class.post("/pipeline/feature-extraction/#{@options[:embedding_model]}",
|
100
|
+
headers: {
|
101
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
102
|
+
},
|
103
|
+
body: { inputs: 'test' }.to_json)
|
104
|
+
|
105
|
+
# Test chat model
|
106
|
+
chat_response = self.class.post("/models/#{@options[:chat_model]}",
|
107
|
+
headers: {
|
108
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
109
|
+
},
|
110
|
+
body: {
|
111
|
+
inputs: 'test',
|
112
|
+
parameters: { max_new_tokens: 5 },
|
113
|
+
}.to_json)
|
114
|
+
|
115
|
+
embedding_healthy = embedding_response.success?
|
116
|
+
chat_healthy = chat_response.success?
|
117
|
+
|
118
|
+
{
|
119
|
+
status: embedding_healthy && chat_healthy ? 'healthy' : 'partial',
|
120
|
+
provider: 'huggingface',
|
121
|
+
embedding_model: {
|
122
|
+
name: @options[:embedding_model],
|
123
|
+
available: embedding_healthy,
|
124
|
+
},
|
125
|
+
chat_model: {
|
126
|
+
name: @options[:chat_model],
|
127
|
+
available: chat_healthy,
|
128
|
+
},
|
129
|
+
ready: embedding_healthy && chat_healthy,
|
130
|
+
}
|
131
|
+
end
|
132
|
+
rescue Prescient::ConnectionError => e
|
133
|
+
{
|
134
|
+
status: 'unavailable',
|
135
|
+
provider: 'huggingface',
|
136
|
+
error: e.class.name,
|
137
|
+
message: e.message,
|
138
|
+
}
|
139
|
+
end
|
140
|
+
|
141
|
+
def list_models
|
142
|
+
# HuggingFace doesn't provide a simple API to list all models
|
143
|
+
# Return the configured models
|
144
|
+
[
|
145
|
+
{
|
146
|
+
name: @options[:embedding_model],
|
147
|
+
type: 'embedding',
|
148
|
+
dimensions: EMBEDDING_DIMENSIONS[@options[:embedding_model]],
|
149
|
+
},
|
150
|
+
{
|
151
|
+
name: @options[:chat_model],
|
152
|
+
type: 'text-generation',
|
153
|
+
},
|
154
|
+
]
|
155
|
+
end
|
156
|
+
|
157
|
+
protected
|
158
|
+
|
159
|
+
def validate_configuration!
|
160
|
+
required_options = [:api_key, :embedding_model, :chat_model]
|
161
|
+
missing_options = required_options.select { |opt| @options[opt].nil? }
|
162
|
+
|
163
|
+
return unless missing_options.any?
|
164
|
+
|
165
|
+
raise Prescient::Error, "Missing required options: #{missing_options.join(', ')}"
|
166
|
+
end
|
167
|
+
|
168
|
+
private
|
169
|
+
|
170
|
+
def validate_response!(response, operation)
|
171
|
+
return if response.success?
|
172
|
+
|
173
|
+
case response.code
|
174
|
+
when 400
|
175
|
+
raise Prescient::Error, "Bad request for #{operation}: #{response.body}"
|
176
|
+
when 401
|
177
|
+
raise Prescient::AuthenticationError, "Authentication failed for #{operation}"
|
178
|
+
when 403
|
179
|
+
raise Prescient::AuthenticationError, "Forbidden access for #{operation}"
|
180
|
+
when 429
|
181
|
+
raise Prescient::RateLimitError, "Rate limit exceeded for #{operation}"
|
182
|
+
when 503
|
183
|
+
# HuggingFace model loading
|
184
|
+
error_body = begin
|
185
|
+
response.parsed_response
|
186
|
+
rescue StandardError
|
187
|
+
response.body
|
188
|
+
end
|
189
|
+
if error_body.is_a?(Hash) && error_body['error']&.include?('loading')
|
190
|
+
raise Prescient::Error, 'Model is loading, please try again later'
|
191
|
+
end
|
192
|
+
|
193
|
+
raise Prescient::Error, "HuggingFace service unavailable for #{operation}"
|
194
|
+
|
195
|
+
when 500..599
|
196
|
+
raise Prescient::Error, "HuggingFace server error during #{operation}: #{response.body}"
|
197
|
+
else
|
198
|
+
raise Prescient::Error,
|
199
|
+
"HuggingFace request failed for #{operation}: HTTP #{response.code} - #{response.message}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'httparty'
|
4
|
+
|
5
|
+
class Prescient::Provider::Ollama < Prescient::Base
|
6
|
+
include HTTParty
|
7
|
+
|
8
|
+
EMBEDDING_DIMENSIONS = 768 # nomic-embed-text dimensions
|
9
|
+
|
10
|
+
def initialize(**options)
|
11
|
+
super
|
12
|
+
self.class.base_uri(@options[:url])
|
13
|
+
self.class.default_timeout(@options[:timeout] || 60)
|
14
|
+
end
|
15
|
+
|
16
|
+
def generate_embedding(text, **_options)
|
17
|
+
handle_errors do
|
18
|
+
embedding = fetch_and_parse('post', '/api/embeddings',
|
19
|
+
root_key: 'embedding',
|
20
|
+
headers: { 'Content-Type' => 'application/json' },
|
21
|
+
body: {
|
22
|
+
model: @options[:embedding_model],
|
23
|
+
prompt: clean_text(text),
|
24
|
+
}.to_json)
|
25
|
+
|
26
|
+
raise Prescient::InvalidResponseError, 'No embedding returned' unless embedding
|
27
|
+
|
28
|
+
normalize_embedding(embedding, EMBEDDING_DIMENSIONS)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def generate_response(prompt, context_items = [], **options)
|
33
|
+
handle_errors do
|
34
|
+
request_options = prepare_generate_response(prompt, context_items, **options)
|
35
|
+
|
36
|
+
# Make the request and store both text and full response
|
37
|
+
response = self.class.post('/api/generate', **request_options)
|
38
|
+
validate_response!(response, 'POST /api/generate')
|
39
|
+
|
40
|
+
generated_text = response.parsed_response['response']
|
41
|
+
raise Prescient::InvalidResponseError, 'No response generated' unless generated_text
|
42
|
+
|
43
|
+
{
|
44
|
+
response: generated_text.strip,
|
45
|
+
model: @options[:chat_model],
|
46
|
+
provider: 'ollama',
|
47
|
+
processing_time: response.parsed_response['total_duration']&./(1_000_000_000.0),
|
48
|
+
metadata: {
|
49
|
+
eval_count: response.parsed_response['eval_count'],
|
50
|
+
eval_duration: response.parsed_response['eval_duration'],
|
51
|
+
prompt_eval_count: response.parsed_response['prompt_eval_count'],
|
52
|
+
},
|
53
|
+
}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def health_check
|
58
|
+
handle_errors do
|
59
|
+
models = available_models
|
60
|
+
embedding_available = models.any? { |m| m[:embedding] }
|
61
|
+
chat_available = models.any? { |m| m[:chat] }
|
62
|
+
|
63
|
+
{
|
64
|
+
status: 'healthy',
|
65
|
+
provider: 'ollama',
|
66
|
+
url: @options[:url],
|
67
|
+
models_available: models.map { |m| m[:name] },
|
68
|
+
embedding_model: {
|
69
|
+
name: @options[:embedding_model],
|
70
|
+
available: embedding_available,
|
71
|
+
},
|
72
|
+
chat_model: {
|
73
|
+
name: @options[:chat_model],
|
74
|
+
available: chat_available,
|
75
|
+
},
|
76
|
+
ready: embedding_available && chat_available,
|
77
|
+
}
|
78
|
+
end
|
79
|
+
rescue Prescient::Error => e
|
80
|
+
{
|
81
|
+
status: 'unavailable',
|
82
|
+
provider: 'ollama',
|
83
|
+
error: e.class.name,
|
84
|
+
message: e.message,
|
85
|
+
url: @options[:url],
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def available_models
|
90
|
+
return @_available_models if defined?(@_available_models)
|
91
|
+
|
92
|
+
handle_errors do
|
93
|
+
@_available_models = (fetch_and_parse('get', '/api/tags', root_key: 'models') || []).map { |model|
|
94
|
+
{ embedding: model['name'] == @options[:embedding_model],
|
95
|
+
chat: model['name'] == @options[:chat_model],
|
96
|
+
name: model['name'], size: model['size'], modified_at: model['modified_at'], digest: model['digest'] }
|
97
|
+
}
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def pull_model(model_name)
|
102
|
+
handle_errors do
|
103
|
+
fetch_and_parse('post', '/api/pull',
|
104
|
+
headers: { 'Content-Type' => 'application/json' },
|
105
|
+
body: { name: model_name }.to_json,
|
106
|
+
timeout: 300) # 5 minutes for model download
|
107
|
+
{
|
108
|
+
success: true,
|
109
|
+
model: model_name,
|
110
|
+
message: "Model #{model_name} pulled successfully",
|
111
|
+
}
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
protected
|
116
|
+
|
117
|
+
def validate_configuration!
|
118
|
+
required_options = [:url, :embedding_model, :chat_model]
|
119
|
+
missing_options = required_options.select { |opt| @options[opt].nil? }
|
120
|
+
|
121
|
+
return unless missing_options.any?
|
122
|
+
|
123
|
+
raise Prescient::Error, "Missing required options: #{missing_options.join(', ')}"
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
def prepare_generate_response(prompt, context_items = [], **options)
|
129
|
+
formatted_prompt = build_prompt(prompt, context_items)
|
130
|
+
{ root_key: 'response',
|
131
|
+
headers: { 'Content-Type' => 'application/json' },
|
132
|
+
body: {
|
133
|
+
model: @options[:chat_model],
|
134
|
+
prompt: formatted_prompt,
|
135
|
+
stream: false,
|
136
|
+
options: {
|
137
|
+
num_predict: options[:max_tokens] || 2000,
|
138
|
+
temperature: options[:temperature] || 0.7,
|
139
|
+
top_p: options[:top_p] || 0.9,
|
140
|
+
},
|
141
|
+
}.to_json }
|
142
|
+
end
|
143
|
+
|
144
|
+
def fetch_and_parse(htt_verb, endpoint, **options)
|
145
|
+
options = options.dup
|
146
|
+
root_key = options.delete(:root_key)
|
147
|
+
|
148
|
+
response = self.class.send(htt_verb, endpoint, **options)
|
149
|
+
validate_response!(response, "#{htt_verb.upcase} #{endpoint}")
|
150
|
+
return unless root_key
|
151
|
+
|
152
|
+
response.parsed_response[root_key]
|
153
|
+
end
|
154
|
+
|
155
|
+
def validate_response!(response, operation)
|
156
|
+
return if response.success?
|
157
|
+
|
158
|
+
case response.code
|
159
|
+
when 404
|
160
|
+
raise Prescient::ModelNotAvailableError, "Model not available for #{operation}"
|
161
|
+
when 429
|
162
|
+
raise Prescient::RateLimitError, "Rate limit exceeded for #{operation}"
|
163
|
+
when 401, 403
|
164
|
+
raise Prescient::AuthenticationError, "Authentication failed for #{operation}"
|
165
|
+
when 500..599
|
166
|
+
raise Prescient::Error, "Ollama server error during #{operation}: #{response.body}"
|
167
|
+
else
|
168
|
+
raise Prescient::Error,
|
169
|
+
"Ollama request failed for #{operation}: HTTP #{response.code} - #{response.message}"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'httparty'
|
4
|
+
|
5
|
+
class Prescient::Provider::OpenAI < Prescient::Base
|
6
|
+
include HTTParty
|
7
|
+
|
8
|
+
base_uri 'https://api.openai.com'
|
9
|
+
|
10
|
+
EMBEDDING_DIMENSIONS = {
|
11
|
+
'text-embedding-3-small' => 1536,
|
12
|
+
'text-embedding-3-large' => 3072,
|
13
|
+
'text-embedding-ada-002' => 1536,
|
14
|
+
}.freeze
|
15
|
+
|
16
|
+
def initialize(**options)
|
17
|
+
super
|
18
|
+
self.class.default_timeout(@options[:timeout] || 60)
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate_embedding(text, **_options)
|
22
|
+
handle_errors do
|
23
|
+
clean_text_input = clean_text(text)
|
24
|
+
|
25
|
+
response = self.class.post('/v1/embeddings',
|
26
|
+
headers: {
|
27
|
+
'Content-Type' => 'application/json',
|
28
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
29
|
+
},
|
30
|
+
body: {
|
31
|
+
model: @options[:embedding_model],
|
32
|
+
input: clean_text_input,
|
33
|
+
encoding_format: 'float',
|
34
|
+
}.to_json)
|
35
|
+
|
36
|
+
validate_response!(response, 'embedding generation')
|
37
|
+
|
38
|
+
embedding_data = response.parsed_response.dig('data', 0, 'embedding')
|
39
|
+
raise Prescient::InvalidResponseError, 'No embedding returned' unless embedding_data
|
40
|
+
|
41
|
+
expected_dimensions = EMBEDDING_DIMENSIONS[@options[:embedding_model]] || 1536
|
42
|
+
normalize_embedding(embedding_data, expected_dimensions)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def generate_response(prompt, context_items = [], **options)
|
47
|
+
handle_errors do
|
48
|
+
formatted_prompt = build_prompt(prompt, context_items)
|
49
|
+
|
50
|
+
response = self.class.post('/v1/chat/completions',
|
51
|
+
headers: {
|
52
|
+
'Content-Type' => 'application/json',
|
53
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
54
|
+
},
|
55
|
+
body: {
|
56
|
+
model: @options[:chat_model],
|
57
|
+
messages: [
|
58
|
+
{
|
59
|
+
role: 'user',
|
60
|
+
content: formatted_prompt,
|
61
|
+
},
|
62
|
+
],
|
63
|
+
max_tokens: options[:max_tokens] || 2000,
|
64
|
+
temperature: options[:temperature] || 0.7,
|
65
|
+
top_p: options[:top_p] || 0.9,
|
66
|
+
}.to_json)
|
67
|
+
|
68
|
+
validate_response!(response, 'text generation')
|
69
|
+
|
70
|
+
content = response.parsed_response.dig('choices', 0, 'message', 'content')
|
71
|
+
raise Prescient::InvalidResponseError, 'No response generated' unless content
|
72
|
+
|
73
|
+
{
|
74
|
+
response: content.strip,
|
75
|
+
model: @options[:chat_model],
|
76
|
+
provider: 'openai',
|
77
|
+
processing_time: nil,
|
78
|
+
metadata: {
|
79
|
+
usage: response.parsed_response['usage'],
|
80
|
+
finish_reason: response.parsed_response.dig('choices', 0, 'finish_reason'),
|
81
|
+
},
|
82
|
+
}
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def health_check
|
87
|
+
handle_errors do
|
88
|
+
response = self.class.get('/v1/models',
|
89
|
+
headers: {
|
90
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
91
|
+
})
|
92
|
+
|
93
|
+
if response.success?
|
94
|
+
models = response.parsed_response['data'] || []
|
95
|
+
embedding_available = models.any? { |m| m['id'] == @options[:embedding_model] }
|
96
|
+
chat_available = models.any? { |m| m['id'] == @options[:chat_model] }
|
97
|
+
|
98
|
+
{
|
99
|
+
status: 'healthy',
|
100
|
+
provider: 'openai',
|
101
|
+
models_available: models.map { |m| m['id'] },
|
102
|
+
embedding_model: {
|
103
|
+
name: @options[:embedding_model],
|
104
|
+
available: embedding_available,
|
105
|
+
},
|
106
|
+
chat_model: {
|
107
|
+
name: @options[:chat_model],
|
108
|
+
available: chat_available,
|
109
|
+
},
|
110
|
+
ready: embedding_available && chat_available,
|
111
|
+
}
|
112
|
+
else
|
113
|
+
{
|
114
|
+
status: 'unhealthy',
|
115
|
+
provider: 'openai',
|
116
|
+
error: "HTTP #{response.code}",
|
117
|
+
message: response.message,
|
118
|
+
}
|
119
|
+
end
|
120
|
+
end
|
121
|
+
rescue Prescient::Error => e
|
122
|
+
{
|
123
|
+
status: 'unavailable',
|
124
|
+
provider: 'openai',
|
125
|
+
error: e.class.name,
|
126
|
+
message: e.message,
|
127
|
+
}
|
128
|
+
end
|
129
|
+
|
130
|
+
def list_models
|
131
|
+
handle_errors do
|
132
|
+
response = self.class.get('/v1/models',
|
133
|
+
headers: {
|
134
|
+
'Authorization' => "Bearer #{@options[:api_key]}",
|
135
|
+
})
|
136
|
+
validate_response!(response, 'model listing')
|
137
|
+
|
138
|
+
models = response.parsed_response['data'] || []
|
139
|
+
models.map do |model|
|
140
|
+
{
|
141
|
+
name: model['id'],
|
142
|
+
created: model['created'],
|
143
|
+
owned_by: model['owned_by'],
|
144
|
+
}
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
protected
|
150
|
+
|
151
|
+
def validate_configuration!
|
152
|
+
required_options = [:api_key, :embedding_model, :chat_model]
|
153
|
+
missing_options = required_options.select { |opt| @options[opt].nil? }
|
154
|
+
|
155
|
+
return unless missing_options.any?
|
156
|
+
|
157
|
+
raise Prescient::Error, "Missing required options: #{missing_options.join(', ')}"
|
158
|
+
end
|
159
|
+
|
160
|
+
private
|
161
|
+
|
162
|
+
def validate_response!(response, operation)
|
163
|
+
return if response.success?
|
164
|
+
|
165
|
+
case response.code
|
166
|
+
when 400
|
167
|
+
raise Prescient::Error, "Bad request for #{operation}: #{response.body}"
|
168
|
+
when 401
|
169
|
+
raise Prescient::AuthenticationError, "Authentication failed for #{operation}"
|
170
|
+
when 403
|
171
|
+
raise Prescient::AuthenticationError, "Forbidden access for #{operation}"
|
172
|
+
when 429
|
173
|
+
raise Prescient::RateLimitError, "Rate limit exceeded for #{operation}"
|
174
|
+
when 500..599
|
175
|
+
raise Prescient::Error, "OpenAI server error during #{operation}: #{response.body}"
|
176
|
+
else
|
177
|
+
raise Prescient::Error,
|
178
|
+
"OpenAI request failed for #{operation}: HTTP #{response.code} - #{response.message}"
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|