glim_ai 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +25 -0
- data/Gemfile.lock +49 -0
- data/LICENSE.txt +21 -0
- data/README.md +125 -0
- data/Rakefile +31 -0
- data/examples/autocode/autocode.rb +166 -0
- data/examples/autocode/solargraph_test.rb +59 -0
- data/examples/autocode/templates/changed_files_now_evaluate_output.erb +29 -0
- data/examples/autocode/templates/task.erb +16 -0
- data/examples/calc/calc.rb +50 -0
- data/examples/code_competition/code_competition.rb +78 -0
- data/examples/code_competition/output/python_claude-2.rb +33 -0
- data/examples/code_competition/output/python_claude-instant-1.rb +18 -0
- data/examples/code_competition/output/python_gpt-3.5-turbo-16k.rb +69 -0
- data/examples/code_competition/output/python_gpt-3.5-turbo.rb +43 -0
- data/examples/code_competition/output/python_gpt-4.rb +34 -0
- data/examples/code_competition/output/ruby_claude-2.rb +22 -0
- data/examples/code_competition/output/ruby_claude-instant-1.rb +20 -0
- data/examples/code_competition/output/ruby_gpt-3.5-turbo-16k.rb +27 -0
- data/examples/code_competition/output/ruby_gpt-3.5-turbo.rb +30 -0
- data/examples/code_competition/output/ruby_gpt-4.rb +31 -0
- data/examples/code_competition/output/ruby_human.rb +41 -0
- data/examples/code_competition/templates/analyze_code.erb +33 -0
- data/examples/code_competition/templates/write_code.erb +26 -0
- data/examples/glim_demo/ask_all.rb +35 -0
- data/examples/glim_demo/templates/rate_all.erb +24 -0
- data/examples/improve_prompt/improve_prompt.rb +62 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_explicit_steps.erb +15 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_explicit_steps_user_message.erb +15 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_initial.erb +8 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_nothing.erb +19 -0
- data/examples/improve_prompt/templates/try_code_first.erb +13 -0
- data/examples/improve_prompt/templates/try_code_first_system.erb +22 -0
- data/examples/old/econ/discounting.rb +27 -0
- data/examples/old/econ/templates/discounting.erb +10 -0
- data/examples/old/generate_glim_code/generate_glim_code.rb +34 -0
- data/examples/old/generate_glim_code/templates/generate_glim_code.erb +17 -0
- data/examples/old/generate_glim_code/templates/improve_code.erb +27 -0
- data/examples/old/glim_dev_tools/ask_code_question.rb +38 -0
- data/examples/old/glim_dev_tools/templates/ask_code_question.erb +12 -0
- data/examples/old/glim_dev_tools/templates/write_globals_test.erb +28 -0
- data/examples/old/glim_dev_tools/write_globals_test.rb +20 -0
- data/examples/old/linguistics/nine.rb +0 -0
- data/examples/old/rewrite_code/input/hello.py +1 -0
- data/examples/old/rewrite_code/input/subdir/hello.py +1 -0
- data/examples/old/rewrite_code/input/world.py +1 -0
- data/examples/old/rewrite_code/rewrite_code.rb +18 -0
- data/examples/old/rewrite_code/templates/rewrite_code.erb +32 -0
- data/examples/window_check/data.rb +1260 -0
- data/examples/window_check/fruits.rb +118 -0
- data/examples/window_check/tools.rb +56 -0
- data/examples/window_check/window_check.rb +214 -0
- data/glim_generated_tests/make_special_code_with_fixed_length_test.rb +44 -0
- data/glim_generated_tests/old-20230831120513-make_special_code_with_fixed_length_test.rb +1 -0
- data/glim_generated_tests/old-20230831121222-make_special_code_with_fixed_length_test.rb +55 -0
- data/glim_generated_tests/old-20230831124501-make_special_code_with_fixed_length_test.rb +33 -0
- data/glim_generated_tests/test/make_special_code_with_fixed_length_test.rb +58 -0
- data/lib/anthropic_request_details.rb +37 -0
- data/lib/anthropic_response.rb +101 -0
- data/lib/chat_request_details.rb +140 -0
- data/lib/chat_response.rb +303 -0
- data/lib/glim_ai/version.rb +5 -0
- data/lib/glim_ai.rb +8 -0
- data/lib/glim_ai_callable.rb +151 -0
- data/lib/glim_context.rb +62 -0
- data/lib/glim_helpers.rb +54 -0
- data/lib/glim_request.rb +266 -0
- data/lib/glim_response.rb +155 -0
- data/lib/globals.rb +255 -0
- data/lib/html_templates/chat_request.erb +86 -0
- data/sample.env +9 -0
- metadata +131 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'anthropic'
|
2
|
+
#require_relative 'globals'
|
3
|
+
#require 'tiktoken_ruby' # TODO only for token counting while anthropic doesn't support it
|
4
|
+
|
5
|
+
class AnthropicResponse < GlimResponse
|
6
|
+
|
7
|
+
def self._llm_info(llm_name)
|
8
|
+
if llm_name.start_with?("claude-instant-1")
|
9
|
+
prompt_price = 1.63
|
10
|
+
completion_price = 5.51
|
11
|
+
context_length = 100_000
|
12
|
+
elsif llm_name.start_with?("claude-2")
|
13
|
+
prompt_price = 11.02
|
14
|
+
completion_price = 32.68
|
15
|
+
context_length = 100_000
|
16
|
+
else
|
17
|
+
raise "Unknown model #{llm_name}"
|
18
|
+
end
|
19
|
+
cost_per_prompt_token = prompt_price / 1_000_000.0
|
20
|
+
cost_per_completion_token = completion_price / 1_000_000.0
|
21
|
+
return {cost_per_prompt_token:,cost_per_completion_token:,context_length:}
|
22
|
+
end
|
23
|
+
|
24
|
+
def self._count_tokens(model, s)
|
25
|
+
# TODO -- not yet support by ruby-antrhopic
|
26
|
+
# client = Anthropic::Client.new
|
27
|
+
# puts "***** #{client.count_tokens(req.prompt)}"
|
28
|
+
|
29
|
+
enc = Tiktoken.encoding_for_model("gpt-3.5-turbo") # this is obviously wrong, should use anthropic
|
30
|
+
return enc.encode(s).length
|
31
|
+
end
|
32
|
+
|
33
|
+
# Anthropic does not report token counts so we use the number from the request
|
34
|
+
def prompt_tokens
|
35
|
+
req.prompt_token_count
|
36
|
+
end
|
37
|
+
|
38
|
+
# Anthropic does not report token counts so we count ourselves
|
39
|
+
# NOTE: This means there may be some inaccuracies.
|
40
|
+
def completion_tokens
|
41
|
+
wait_for_response
|
42
|
+
req.count_tokens(completion)
|
43
|
+
end
|
44
|
+
|
45
|
+
def responding_llm_name
|
46
|
+
"TODO FOR ANTHROPIC"
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.api_limiter
|
50
|
+
@_api_limiter ||= APILimiter.new(max_concurrent_requests: 1)
|
51
|
+
end
|
52
|
+
|
53
|
+
attr_reader :total_token_count, :prompt_token_count, :completion_token_count
|
54
|
+
|
55
|
+
def process_response_from_api
|
56
|
+
@completion = raw_response[:completion] || err("Anthropic API error: No completion in #{raw_response}")
|
57
|
+
# puts "JSON: #{JSON.pretty_generate(raw_response)}"
|
58
|
+
usage = raw_response[:usage]
|
59
|
+
if usage
|
60
|
+
@total_token_count = usage[:total_tokens] || err("No total_tokens in #{usage}")
|
61
|
+
@prompt_token_count = usage[:prompt_tokens] || err("No prompt_tokens in #{usage}")
|
62
|
+
@completion_token_count = usage[:completion_tokens] || err("No completion_tokens in #{usage}")
|
63
|
+
else
|
64
|
+
#raise err("No usage in #{raw_response}")
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
def async_send_request_to_api
|
70
|
+
# anthropic API modifies the prompt
|
71
|
+
# but no need to params = Marshal.load(Marshal.dump(req.request_hash))
|
72
|
+
# since it's only the prompt, we can just do this:
|
73
|
+
params = req.request_hash.dup
|
74
|
+
params[:prompt] = req.request_hash[:prompt].dup
|
75
|
+
|
76
|
+
raise "request_hash should not have messages for Anthropic" if params[:messages]
|
77
|
+
@thread = Thread.new do
|
78
|
+
AnthropicResponse.api_limiter.with_limit do
|
79
|
+
client = Anthropic::Client.new
|
80
|
+
# this is necessary because the Anthropic API modifies the prompt
|
81
|
+
_raw_response = client.complete(parameters: params).with_indifferent_access
|
82
|
+
if _raw_response[:error]
|
83
|
+
if _raw_response[:error][:type] == "rate_limit_error"
|
84
|
+
limit = AnthropicResponse.api_limiter.max_concurrent_requests
|
85
|
+
raise RateLimitExceededError, "Rate limit (#{limit}) exceeded. Edit config or negotiate with Anthropic to avoid this."
|
86
|
+
else
|
87
|
+
"Anthropic API error: #{_raw_response[:error]}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
_raw_response
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
##### NOT SUPPORTED in the code yet
|
97
|
+
# top_k
|
98
|
+
# integer
|
99
|
+
# Only sample from the top K options for each subsequent token.
|
100
|
+
# Used to remove "long tail" low probability responses. Learn more technical details here.
|
101
|
+
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require 'json-schema'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
require 'openai'
|
5
|
+
require 'tiktoken_ruby'
|
6
|
+
|
7
|
+
require_relative 'globals'
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
# GlimRequest delegates to this
|
12
|
+
class ChatRequestDetails # only for requests that involve a message array, like OpenAI
|
13
|
+
|
14
|
+
def initialize(req)
|
15
|
+
@req = req
|
16
|
+
end
|
17
|
+
attr_accessor :req
|
18
|
+
|
19
|
+
def response_class
|
20
|
+
ChatResponse
|
21
|
+
end
|
22
|
+
|
23
|
+
def llm_class_changed
|
24
|
+
update_request_hash
|
25
|
+
end
|
26
|
+
|
27
|
+
def update_request_hash
|
28
|
+
req.request_hash[:temperature] = req.temperature / 2.0 if req.temperature # for some reason, OpenAI goes from 0 to 2, not 0 to 1
|
29
|
+
req.request_hash[:max_tokens] = req.max_tokens
|
30
|
+
req.request_hash[:model] = req.llm_name
|
31
|
+
|
32
|
+
#this is risky because it can overwrite things....
|
33
|
+
|
34
|
+
if @message_history
|
35
|
+
messages = req.message_history.dup
|
36
|
+
else
|
37
|
+
messages = [{"role":"system","content":"You are a helpful assistant."}]
|
38
|
+
end
|
39
|
+
# this could make sense, for example, if message_history
|
40
|
+
# has a function call in it, along with the response to the function call that glim inserted
|
41
|
+
# TODO: we might want to handle that case differently.
|
42
|
+
if req.prompt
|
43
|
+
messages.append({ role: 'user', content: req.prompt })
|
44
|
+
end
|
45
|
+
|
46
|
+
req.request_hash[:messages] = messages
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
def set_message_history(messages)
|
51
|
+
@message_history = messages
|
52
|
+
update_request_hash
|
53
|
+
end
|
54
|
+
attr_reader :message_history
|
55
|
+
|
56
|
+
def replace_initial_system_message(system_message)
|
57
|
+
@message_history ||= []
|
58
|
+
@message_history[0] ||= {
|
59
|
+
"role": "system",
|
60
|
+
"content": system_message
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
# for convenience, if you want a list, you can specify the schema for the items
|
66
|
+
def set_output_schema(output_schema, *flags)
|
67
|
+
output_schema.must_be_a Hash
|
68
|
+
meta_schema = JSON::Validator.validator_for_name('draft4').metaschema
|
69
|
+
begin
|
70
|
+
JSON::Validator.validate!(meta_schema, output_schema)
|
71
|
+
# putt :extract_data, 'The schema is valid.'
|
72
|
+
@output_schema = output_schema
|
73
|
+
rescue JSON::Schema::ValidationError => e
|
74
|
+
putt :extract_data, "The schema is not valid. Reason: #{e.message}"
|
75
|
+
putt :extract_data, "Schema: #{output_schema}"
|
76
|
+
raise
|
77
|
+
end
|
78
|
+
|
79
|
+
if flags.include?(:list)
|
80
|
+
@output_schema = {
|
81
|
+
type: 'object',
|
82
|
+
properties: {
|
83
|
+
list: {
|
84
|
+
type: 'array',
|
85
|
+
items: output_schema
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
end
|
90
|
+
|
91
|
+
extract_data_function_name = 'extract_data'
|
92
|
+
req.request_hash[:functions] = [{
|
93
|
+
name: extract_data_function_name,
|
94
|
+
description: "Extracts data from the user's message",
|
95
|
+
parameters: @output_schema
|
96
|
+
}]
|
97
|
+
# Specifying a particular function via {"name":\ "my_function"} forces the model to call that function.
|
98
|
+
req.request_hash[:function_call] = { "name": extract_data_function_name }
|
99
|
+
end
|
100
|
+
attr_reader :output_schema
|
101
|
+
|
102
|
+
def expected_output_is_list?
|
103
|
+
@output_schema && @output_schema[:type] == 'object' && @output_schema[:properties] && @output_schema[:properties][:list]
|
104
|
+
end
|
105
|
+
|
106
|
+
# function that in this request we offer to
|
107
|
+
# the LLM API to call
|
108
|
+
def set_functions_object(functions_object)
|
109
|
+
@functions_object = functions_object
|
110
|
+
# update_request_hash
|
111
|
+
req.request_hash[:functions] = functions_object.class.ai_method_signatures_clean
|
112
|
+
# [{
|
113
|
+
# name: extract_data_function_name,
|
114
|
+
# description: "Extracts data from the user's message",
|
115
|
+
# parameters: @output_schema
|
116
|
+
# }]
|
117
|
+
# # Specifying a particular function via {"name":\ "my_function"} forces the model to call that function.
|
118
|
+
# request_data[:function_call] = { "name": extract_data_function_name }
|
119
|
+
end
|
120
|
+
|
121
|
+
def force_function_call(function_name)
|
122
|
+
req.request_hash[:function_call] = { "name": function_name }
|
123
|
+
end
|
124
|
+
|
125
|
+
attr_reader :functions_object
|
126
|
+
|
127
|
+
def to_s
|
128
|
+
s = "ChatRequestDetails: "
|
129
|
+
if output_schema
|
130
|
+
schema = JSON.pretty_generate(output_schema)
|
131
|
+
s += "\nSchema:\n#{schema}\n"
|
132
|
+
end
|
133
|
+
if functions_object
|
134
|
+
s += "\nFunctions object: #{functions_object}"
|
135
|
+
end
|
136
|
+
return s
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
|
@@ -0,0 +1,303 @@
|
|
1
|
+
require 'json-schema'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
require 'openai'
|
5
|
+
require 'tiktoken_ruby'
|
6
|
+
|
7
|
+
require_relative 'glim_ai_callable'
|
8
|
+
|
9
|
+
require_relative 'globals'
|
10
|
+
|
11
|
+
OpenAI.configure do |config|
|
12
|
+
config.access_token = ENV.fetch('OPENAI_API_KEY')
|
13
|
+
config.request_timeout = 480 # Optional
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
class ChatResponse < GlimResponse
|
18
|
+
|
19
|
+
def self._llm_info(model)
|
20
|
+
if model.start_with?("gpt-3.5-turbo-16k")
|
21
|
+
prompt_price = 3.0
|
22
|
+
completion_price = prompt_price / 3.0 * 4.0
|
23
|
+
context_length = 16384
|
24
|
+
elsif model.start_with?("gpt-3.5")
|
25
|
+
prompt_price = 1.5
|
26
|
+
completion_price = prompt_price / 3.0 * 4.0
|
27
|
+
context_length = 4096
|
28
|
+
elsif model.start_with?("gpt-4-32k")
|
29
|
+
prompt_price = 60.0
|
30
|
+
completion_price = 2 * prompt_price
|
31
|
+
context_length = 32768
|
32
|
+
elsif model.start_with?("gpt-4")
|
33
|
+
prompt_price = 30.0
|
34
|
+
completion_price = 2 * prompt_price
|
35
|
+
context_length = 8192
|
36
|
+
elsif model.start_with?("meta-llama")
|
37
|
+
prompt_price = 1.0
|
38
|
+
completion_price = prompt_price
|
39
|
+
context_length = 4096
|
40
|
+
elsif model.start_with?("codellama")
|
41
|
+
prompt_price = 1.0
|
42
|
+
completion_price = prompt_price
|
43
|
+
context_length = 4096
|
44
|
+
else
|
45
|
+
raise "Unknown model #{model}"
|
46
|
+
end
|
47
|
+
cost_per_prompt_token = prompt_price / 1_000_000.0
|
48
|
+
cost_per_completion_token = completion_price / 1_000_000.0
|
49
|
+
return {cost_per_prompt_token:,cost_per_completion_token:,context_length:}
|
50
|
+
end
|
51
|
+
|
52
|
+
def self._count_tokens(llm_name, s)
|
53
|
+
enc = Tiktoken.encoding_for_model(llm_name)
|
54
|
+
if !enc
|
55
|
+
# putt :warning, "Tiktoken doesn't know model #{llm_name}"
|
56
|
+
enc = Tiktoken.encoding_for_model("gpt-3.5-turbo")
|
57
|
+
end
|
58
|
+
return enc.encode(s).length
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.api_limiter
|
62
|
+
@_api_limiter ||= APILimiter.new(max_concurrent_requests: 2)
|
63
|
+
end
|
64
|
+
|
65
|
+
# TODO -- some day we'll want to support tokens
|
66
|
+
# token_string = params.to_s
|
67
|
+
# enc = Tiktoken.encoding_for_model("gpt-4")
|
68
|
+
# @token_estimate = enc.encode(token_string).length
|
69
|
+
|
70
|
+
|
71
|
+
def placeholder_anyscale_api_call(params)
|
72
|
+
# for some reason the ruby gem for OpenAI won't work with anyscale
|
73
|
+
# so we're doing it manually for now
|
74
|
+
key = ENV.fetch('ANYSCALE_API_KEY')
|
75
|
+
# for some reason, this doesn't work, just returns "details: Not found"
|
76
|
+
# client = OpenAI::Client.new(uri_base: "https://api.endpoints.anyscale.com/v1", access_token: key)
|
77
|
+
# @raw_response = deep_symbolize_keys(cached_response || client.chat(parameters: params))
|
78
|
+
api_base = "https://api.endpoints.anyscale.com/v1"
|
79
|
+
uri = URI("#{api_base}/chat/completions")
|
80
|
+
r = Net::HTTP::Post.new(uri, 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{key}")
|
81
|
+
r.body = params.to_json
|
82
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
83
|
+
http.read_timeout = 480 # seconds
|
84
|
+
http.use_ssl = true if uri.scheme == 'https'
|
85
|
+
response = http.request(r)
|
86
|
+
return JSON.parse(response.body).with_indifferent_access
|
87
|
+
end
|
88
|
+
|
89
|
+
def async_send_request_to_api
|
90
|
+
h = req.request_hash
|
91
|
+
log_request_hash
|
92
|
+
|
93
|
+
raise "model not set; remember to set request.llm_name = (the model you want to use)" unless h[:model]
|
94
|
+
|
95
|
+
@thread = Thread.new do
|
96
|
+
ChatResponse.api_limiter.with_limit do
|
97
|
+
if GlimRequest.openai_llms.include?(req.llm_name)
|
98
|
+
client = OpenAI::Client.new
|
99
|
+
_raw_response = client.chat(parameters: req.request_hash).with_indifferent_access
|
100
|
+
else
|
101
|
+
_raw_response = placeholder_anyscale_api_call(req.request_hash).with_indifferent_access
|
102
|
+
end
|
103
|
+
if _raw_response[:error]
|
104
|
+
if _raw_response[:error][:type] == "rate_limit_error"
|
105
|
+
limit = self.api_limiter.max_concurrent_requests
|
106
|
+
raise RateLimitExceededError, "Rate limit (#{limit}) exceeded. Edit config or negotiate with Anthropic to avoid this."
|
107
|
+
else
|
108
|
+
puts "-------Error. Prompt:"
|
109
|
+
puts req.prompt.class
|
110
|
+
raise "OpenAI API error: #{_raw_response[:error]}"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
_raw_response
|
114
|
+
end
|
115
|
+
end # thread; its value is the raw_response
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
def process_response_from_api
|
120
|
+
raise "no raw response!" unless @raw_response
|
121
|
+
|
122
|
+
# TODO
|
123
|
+
# request has message_history
|
124
|
+
# response has messages_sent (still need reader)
|
125
|
+
# and then what does the following look like?
|
126
|
+
|
127
|
+
@message = (raw_response.dig(:choices, 0, :message) || err("No message returned!"))
|
128
|
+
@messages = messages_sent.dup + [@message]
|
129
|
+
@completion = @message[:content] # turns out there is always a completion, even with function calls
|
130
|
+
log_completion
|
131
|
+
if function_call_message?
|
132
|
+
log_function_call_message
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
def function_call_message?
|
138
|
+
wait_for_response
|
139
|
+
@message[:function_call] != nil
|
140
|
+
end
|
141
|
+
|
142
|
+
def _function_call_from_message
|
143
|
+
wait_for_response
|
144
|
+
@message[:function_call] || err("No function call!")
|
145
|
+
end
|
146
|
+
|
147
|
+
def _function_call_arguments_from_message
|
148
|
+
begin
|
149
|
+
s = _function_call_from_message[:arguments]
|
150
|
+
JSON.parse(s).with_indifferent_access
|
151
|
+
rescue => e
|
152
|
+
puts "JSON parse error:"
|
153
|
+
puts s
|
154
|
+
log_json_error(s)
|
155
|
+
err(e)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def function_name_from_message
|
160
|
+
_function_call_from_message[:name]
|
161
|
+
end
|
162
|
+
|
163
|
+
def log_json_error(s)
|
164
|
+
req.save_log_file("json_error.json", s)
|
165
|
+
end
|
166
|
+
|
167
|
+
def log_function_call_message
|
168
|
+
s = "LLM requested results of function call to #{req.functions_object}##{function_name_from_message}\n"
|
169
|
+
s += JSON.pretty_generate(_function_call_arguments_from_message)
|
170
|
+
req.save_log_file("function_call.txt", s)
|
171
|
+
end
|
172
|
+
|
173
|
+
# returns a new GlimRequest that is preloaded with the data for
|
174
|
+
# sending the results of a function call back to the LLM API
|
175
|
+
def create_request_with_function_result
|
176
|
+
wait_for_response
|
177
|
+
|
178
|
+
eval_functions_object = req.functions_object || err("No functions_object")
|
179
|
+
raise "functions_object must be ai_callable, is #{eval_functions_object}" unless eval_functions_object.is_a?(AICallable)
|
180
|
+
|
181
|
+
eval_function_name = _function_call_from_message[:name].to_sym
|
182
|
+
raise "no_method_error #{eval_function_name}" unless eval_functions_object.respond_to?(eval_function_name)
|
183
|
+
|
184
|
+
# TODO -- validate that the schema is right?
|
185
|
+
eval_function_arguments = _function_call_arguments_from_message
|
186
|
+
|
187
|
+
putt :functions, "#{eval_functions_object}.#{eval_function_name}(#{eval_function_arguments})"
|
188
|
+
eval_function_result = eval_functions_object._perform_ai_call(eval_function_name, eval_function_arguments)
|
189
|
+
|
190
|
+
return create_request_for_chat(message_to_append: {
|
191
|
+
role: "function",
|
192
|
+
name: eval_function_name,
|
193
|
+
content: eval_function_result.to_json
|
194
|
+
})
|
195
|
+
end
|
196
|
+
|
197
|
+
def create_request_for_chat(message_to_append: nil)
|
198
|
+
wait_for_response
|
199
|
+
h = req.generic_params_hash.merge({
|
200
|
+
llm_name: req.llm_name,
|
201
|
+
context: req.context
|
202
|
+
})
|
203
|
+
new_request = GlimRequest.new(**h)
|
204
|
+
new_request.set_message_history(messages.dup)
|
205
|
+
|
206
|
+
if message_to_append
|
207
|
+
messages.append(message_to_append)
|
208
|
+
end
|
209
|
+
|
210
|
+
new_request.request_hash[:messages] = messages
|
211
|
+
new_request
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
# the message generated by GPT
|
216
|
+
def message
|
217
|
+
wait_for_response
|
218
|
+
@message
|
219
|
+
end
|
220
|
+
|
221
|
+
# all messages: prior ones, the prompt, GPT's response, and the function call, if it happened
|
222
|
+
def messages
|
223
|
+
wait_for_response
|
224
|
+
@messages
|
225
|
+
end
|
226
|
+
|
227
|
+
# the extracted data generated by GPT
|
228
|
+
def extracted_data
|
229
|
+
return @extracted_data if @extracted_data
|
230
|
+
raise "no output schema specified, can't get extracted_data" unless req.output_schema
|
231
|
+
wait_for_response
|
232
|
+
args = _function_call_arguments_from_message
|
233
|
+
|
234
|
+
JSON::Validator.validate!(req.output_schema, args)
|
235
|
+
if req.expected_output_is_list? # TODO -- this feels a bit awkward
|
236
|
+
@extracted_data = args[:list] || raise(err("Expected list"))
|
237
|
+
else
|
238
|
+
@extracted_data = args
|
239
|
+
end
|
240
|
+
@extracted_data
|
241
|
+
end
|
242
|
+
|
243
|
+
def messages_sent
|
244
|
+
req.request_hash[:messages]
|
245
|
+
end
|
246
|
+
|
247
|
+
def usage
|
248
|
+
raw_response[:usage]
|
249
|
+
end
|
250
|
+
|
251
|
+
def prompt_tokens
|
252
|
+
usage[:prompt_tokens]
|
253
|
+
end
|
254
|
+
|
255
|
+
def completion_tokens
|
256
|
+
usage[:completion_tokens]
|
257
|
+
end
|
258
|
+
|
259
|
+
def responding_llm_name
|
260
|
+
raw_response[:model]
|
261
|
+
end
|
262
|
+
|
263
|
+
end
|
264
|
+
|
265
|
+
|
266
|
+
|
267
|
+
######## OpenAI only - TODO
|
268
|
+
# maybe put this in a hash called ai.open_ai ?
|
269
|
+
|
270
|
+
# n
|
271
|
+
# integer or null
|
272
|
+
# Optional
|
273
|
+
# Defaults to 1
|
274
|
+
# How many chat completion choices to generate for each input message.
|
275
|
+
|
276
|
+
#-------
|
277
|
+
|
278
|
+
# presence_penalty
|
279
|
+
# number or null
|
280
|
+
# Optional
|
281
|
+
# Defaults to 0
|
282
|
+
# Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
|
283
|
+
# See more information about frequency and presence penalties.
|
284
|
+
|
285
|
+
# frequency_penalty
|
286
|
+
# number or null
|
287
|
+
# Optional
|
288
|
+
# Defaults to 0
|
289
|
+
# Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
|
290
|
+
# See more information about frequency and presence penalties.
|
291
|
+
|
292
|
+
# logit_bias
|
293
|
+
# map
|
294
|
+
# Optional
|
295
|
+
# Defaults to null
|
296
|
+
# Modify the likelihood of specified tokens appearing in the completion.
|
297
|
+
# Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
|
298
|
+
|
299
|
+
|
300
|
+
# "none" means the model does not call a function, and responds to the end-user.
|
301
|
+
# "auto" means the model can pick between an end-user or calling a function.
|
302
|
+
# Specifying a particular function via {"name":\ "my_function"} forces the model to call that function.
|
303
|
+
# "none" is the default when no functions are present. "auto" is the default if functions are present.
|