glim_ai 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +25 -0
- data/Gemfile.lock +49 -0
- data/LICENSE.txt +21 -0
- data/README.md +125 -0
- data/Rakefile +31 -0
- data/examples/autocode/autocode.rb +166 -0
- data/examples/autocode/solargraph_test.rb +59 -0
- data/examples/autocode/templates/changed_files_now_evaluate_output.erb +29 -0
- data/examples/autocode/templates/task.erb +16 -0
- data/examples/calc/calc.rb +50 -0
- data/examples/code_competition/code_competition.rb +78 -0
- data/examples/code_competition/output/python_claude-2.rb +33 -0
- data/examples/code_competition/output/python_claude-instant-1.rb +18 -0
- data/examples/code_competition/output/python_gpt-3.5-turbo-16k.rb +69 -0
- data/examples/code_competition/output/python_gpt-3.5-turbo.rb +43 -0
- data/examples/code_competition/output/python_gpt-4.rb +34 -0
- data/examples/code_competition/output/ruby_claude-2.rb +22 -0
- data/examples/code_competition/output/ruby_claude-instant-1.rb +20 -0
- data/examples/code_competition/output/ruby_gpt-3.5-turbo-16k.rb +27 -0
- data/examples/code_competition/output/ruby_gpt-3.5-turbo.rb +30 -0
- data/examples/code_competition/output/ruby_gpt-4.rb +31 -0
- data/examples/code_competition/output/ruby_human.rb +41 -0
- data/examples/code_competition/templates/analyze_code.erb +33 -0
- data/examples/code_competition/templates/write_code.erb +26 -0
- data/examples/glim_demo/ask_all.rb +35 -0
- data/examples/glim_demo/templates/rate_all.erb +24 -0
- data/examples/improve_prompt/improve_prompt.rb +62 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_explicit_steps.erb +15 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_explicit_steps_user_message.erb +15 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_initial.erb +8 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_nothing.erb +19 -0
- data/examples/improve_prompt/templates/try_code_first.erb +13 -0
- data/examples/improve_prompt/templates/try_code_first_system.erb +22 -0
- data/examples/old/econ/discounting.rb +27 -0
- data/examples/old/econ/templates/discounting.erb +10 -0
- data/examples/old/generate_glim_code/generate_glim_code.rb +34 -0
- data/examples/old/generate_glim_code/templates/generate_glim_code.erb +17 -0
- data/examples/old/generate_glim_code/templates/improve_code.erb +27 -0
- data/examples/old/glim_dev_tools/ask_code_question.rb +38 -0
- data/examples/old/glim_dev_tools/templates/ask_code_question.erb +12 -0
- data/examples/old/glim_dev_tools/templates/write_globals_test.erb +28 -0
- data/examples/old/glim_dev_tools/write_globals_test.rb +20 -0
- data/examples/old/linguistics/nine.rb +0 -0
- data/examples/old/rewrite_code/input/hello.py +1 -0
- data/examples/old/rewrite_code/input/subdir/hello.py +1 -0
- data/examples/old/rewrite_code/input/world.py +1 -0
- data/examples/old/rewrite_code/rewrite_code.rb +18 -0
- data/examples/old/rewrite_code/templates/rewrite_code.erb +32 -0
- data/examples/window_check/data.rb +1260 -0
- data/examples/window_check/fruits.rb +118 -0
- data/examples/window_check/tools.rb +56 -0
- data/examples/window_check/window_check.rb +214 -0
- data/glim_generated_tests/make_special_code_with_fixed_length_test.rb +44 -0
- data/glim_generated_tests/old-20230831120513-make_special_code_with_fixed_length_test.rb +1 -0
- data/glim_generated_tests/old-20230831121222-make_special_code_with_fixed_length_test.rb +55 -0
- data/glim_generated_tests/old-20230831124501-make_special_code_with_fixed_length_test.rb +33 -0
- data/glim_generated_tests/test/make_special_code_with_fixed_length_test.rb +58 -0
- data/lib/anthropic_request_details.rb +37 -0
- data/lib/anthropic_response.rb +101 -0
- data/lib/chat_request_details.rb +140 -0
- data/lib/chat_response.rb +303 -0
- data/lib/glim_ai/version.rb +5 -0
- data/lib/glim_ai.rb +8 -0
- data/lib/glim_ai_callable.rb +151 -0
- data/lib/glim_context.rb +62 -0
- data/lib/glim_helpers.rb +54 -0
- data/lib/glim_request.rb +266 -0
- data/lib/glim_response.rb +155 -0
- data/lib/globals.rb +255 -0
- data/lib/html_templates/chat_request.erb +86 -0
- data/sample.env +9 -0
- metadata +131 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 439f2dac7e8ac38e125560a5475f94f427b6649c10517bf0ce0ddc9643ffd0f2
|
4
|
+
data.tar.gz: 5eecfa7efb241e85f67368d90b061d227ad9b33c44312984d6dcf385f818aba0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c4fc1d7361ff5f465298d5e9a32f1447a900ac165b80c40de6a23781735b7d0a92aec42a2f86a254e7da71ce49e489ae5696e61905037292e81b7f73c4230cb1
|
7
|
+
data.tar.gz: a4e2fa686290840c2f8183397bbdc7aad1e1ca4a8466d2fa0336202428a364cb52ed1311b609ee72ff34f0fb523fc08b246dffb6bf8ae23803e1e09077c67d61
|
data/Gemfile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in llm.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem 'must_be'
|
9
|
+
|
10
|
+
gem 'rake', '~> 13.0'
|
11
|
+
|
12
|
+
gem 'minitest', '~> 5.0'
|
13
|
+
|
14
|
+
gem 'anthropic'
|
15
|
+
gem 'ruby-openai'
|
16
|
+
|
17
|
+
gem 'json-schema'
|
18
|
+
|
19
|
+
# this was a bad idea; purpose was to use this for OpenAI functions, but those use kwargs which
|
20
|
+
# are not supported very well by Sorbet
|
21
|
+
# gem 'sorbet', :group => :development
|
22
|
+
# gem 'sorbet-runtime'
|
23
|
+
# gem 'tapioca', require: false, :group => :development
|
24
|
+
|
25
|
+
gem "tiktoken_ruby", "~> 0.0.5"
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
llm (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
addressable (2.8.5)
|
10
|
+
public_suffix (>= 2.0.2, < 6.0)
|
11
|
+
anthropic (0.1.0)
|
12
|
+
faraday (>= 1)
|
13
|
+
faraday-multipart (>= 1)
|
14
|
+
dotenv (2.8.1)
|
15
|
+
faraday (2.7.10)
|
16
|
+
faraday-net_http (>= 2.0, < 3.1)
|
17
|
+
ruby2_keywords (>= 0.0.4)
|
18
|
+
faraday-multipart (1.0.4)
|
19
|
+
multipart-post (~> 2)
|
20
|
+
faraday-net_http (3.0.2)
|
21
|
+
json-schema (4.0.0)
|
22
|
+
addressable (>= 2.8)
|
23
|
+
minitest (5.19.0)
|
24
|
+
multipart-post (2.3.0)
|
25
|
+
must_be (1.1.0)
|
26
|
+
public_suffix (5.0.3)
|
27
|
+
rake (13.0.6)
|
28
|
+
ruby-openai (4.2.0)
|
29
|
+
faraday (>= 1)
|
30
|
+
faraday-multipart (>= 1)
|
31
|
+
ruby2_keywords (0.0.5)
|
32
|
+
tiktoken_ruby (0.0.5-arm64-darwin)
|
33
|
+
|
34
|
+
PLATFORMS
|
35
|
+
arm64-darwin-22
|
36
|
+
|
37
|
+
DEPENDENCIES
|
38
|
+
anthropic
|
39
|
+
dotenv
|
40
|
+
json-schema
|
41
|
+
llm!
|
42
|
+
minitest (~> 5.0)
|
43
|
+
must_be
|
44
|
+
rake (~> 13.0)
|
45
|
+
ruby-openai
|
46
|
+
tiktoken_ruby (~> 0.0.5)
|
47
|
+
|
48
|
+
BUNDLED WITH
|
49
|
+
2.4.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2023 Ulrich Gall
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
|
2
|
+
## Design goals
|
3
|
+
|
4
|
+
It's becoming clear that langchain's abstractions aren't the right thing. Langchain introduced abstractions that allow converting their way of interacting with LLMs to whichever underlying model is used. That's too much complexity overhead. We take a more pragmatic approach: Make it easy to work with current APIs. The problem right now isn't that we need future proof code, it's that we need good code quickly.
|
5
|
+
|
6
|
+
Specifically, goals include:
|
7
|
+
- Make it easy to iterate on prompts separately from iterating on the code.
|
8
|
+
- Allow changing of the model as easily as possible. Model can be specified by default in code and overridden by prompt, or the other way around.
|
9
|
+
- Make it as easy as possible to send requests asynchronously without having to think about concurrency more than necessary
|
10
|
+
- Handle rate limits in a smart way.
|
11
|
+
|
12
|
+
## Status
|
13
|
+
|
14
|
+
Still lots to do, just asking for some initial feedback on:
|
15
|
+
|
16
|
+
- spec template idea in general
|
17
|
+
- naming!
|
18
|
+
- ruby conventions i'm violating
|
19
|
+
|
20
|
+
## Design Details
|
21
|
+
|
22
|
+
A GlimRequest represents a request to an LLM to perform a task. GlimRequest itself contains
|
23
|
+
functionality and parameters that are common to all supported LLMs:
|
24
|
+
- parameters like temperature, top_p, etc
|
25
|
+
- the name of the llm to be used
|
26
|
+
- code for handling erb templates
|
27
|
+
- token counting and cost estimate code
|
28
|
+
|
29
|
+
To support functionality that is specific to some LLM APIs, there is, for each support LLM API,
|
30
|
+
a GlimRequestDetails class.
|
31
|
+
|
32
|
+
Each GlimRequest can have a reference to a GlimRequestDetails object, to which it delegates any
|
33
|
+
methods. The GlimRequest, potentially with support from a GlimRequestDetails object, has to meet
|
34
|
+
one key responsibility: After it is created, it must at all times be able to provide a request_hash,
|
35
|
+
which is a Hash that contains all of the data that needs to be sent to the LLM's API in order to
|
36
|
+
submit the request.
|
37
|
+
|
38
|
+
Thus, the GlimRequest and GlimRequestDetails must, whenever the user make a modification to either,
|
39
|
+
update its internal request_hash to stay consistent.
|
40
|
+
|
41
|
+
There is one tricky situation that is a bit annoying, but we decided to be pragmatic about it
|
42
|
+
and tolerate some awkwardness: If you change the llm for a GlimRequest to an llm that requires a different
|
43
|
+
GlimRequestDetails class, then the GlimRequestDetails will be replaced and any data in it is lost.
|
44
|
+
|
45
|
+
For example, when changing from "gpt-3.5-turbo" (ChatRequestDetails) to "claude-instant-1" (AnthropicRequestDetails),
|
46
|
+
then the output_schema or function_object will of course be deleted. This is facilitated by the GlimRequest
|
47
|
+
creating a new AnthropicRequestDetails instance; as it is created, it is responsible for making sure that
|
48
|
+
the request_hash is accurate. In the other direction, changing from claude to GPT, similarly, a new
|
49
|
+
ChatRequestDetails instance would be created.
|
50
|
+
|
51
|
+
Above we have described that (and how) a GlimRequest can always provide a request_hash object. The point of
|
52
|
+
this is that
|
53
|
+
- this hash is used for generating the cache key. If the hashes are identical, we don't need
|
54
|
+
to contact the LLM API again, which saves time and money.
|
55
|
+
- the corresponding GlimResponse class can call GlimRequest#request_hash to obtain the necessary data,
|
56
|
+
and then it is responsibe for sending the request off to an LLM, as well as interpreting the response
|
57
|
+
and making it accessible in a convenient way to the user.
|
58
|
+
|
59
|
+
There is one additional feature that is related: For each GlimRequest, there is a log directory, in which
|
60
|
+
at any time there are several files that represent the content of the GlimRequest:
|
61
|
+
- generic_request_params: temperature, llm_name, etc
|
62
|
+
- prompt
|
63
|
+
- template_text (if a template was used)
|
64
|
+
- request_hash
|
65
|
+
|
66
|
+
And for ChatRequestDetails, also:
|
67
|
+
- messages: the array of messages, up to and including the message that will be sent
|
68
|
+
- output_schema.json
|
69
|
+
|
70
|
+
Once a response has been created, it would also contain:
|
71
|
+
- raw_response.json: the exact reponse as received when making the LLM API call
|
72
|
+
- completion.txt: just the completion that was generated by the LLM for this request
|
73
|
+
|
74
|
+
## Running the code
|
75
|
+
|
76
|
+
Probably some annoying issues with paths and whatnot.
|
77
|
+
Maybe best to just read the code without running it.
|
78
|
+
Check out:
|
79
|
+
test/*
|
80
|
+
examples/*
|
81
|
+
|
82
|
+
|
83
|
+
## License
|
84
|
+
|
85
|
+
The gem will be available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
86
|
+
|
87
|
+
## TODO
|
88
|
+
|
89
|
+
# Cleanup before alpha
|
90
|
+
|
91
|
+
- write a better README
|
92
|
+
- make a proper gem
|
93
|
+
|
94
|
+
# Features to add / change
|
95
|
+
|
96
|
+
(1) post processing, including a way to *throw* errors and perhaps have an errors/ subdir in the llm_logs
|
97
|
+
-- maybe not needed because extract_data? or maybe support it only for json?
|
98
|
+
|
99
|
+
(2) AICallable
|
100
|
+
- more data types: array, boolean?
|
101
|
+
- allow changing the ai_name for the function, not just the args
|
102
|
+
|
103
|
+
(3)
|
104
|
+
- request#response -- rename to make clear that it's async
|
105
|
+
- ask for feedback on this one?
|
106
|
+
|
107
|
+
(4) support "continue" prompting, especially for claude; 2k tokens is not much
|
108
|
+
|
109
|
+
(5) make include_files and extract_files so that they can be model specific... for example, to use Anthropic's XML tag training
|
110
|
+
- might need to get rid of the ruby anthropic API since it forces the \n\nHuman: pattern
|
111
|
+
|
112
|
+
|
113
|
+
(6) web view on the input and outputs for the llm?
|
114
|
+
|
115
|
+
(7) iterate on prompt to measure effectiveness. have LLM develop variations of prompt.
|
116
|
+
try it out on the multi-file-generation.
|
117
|
+
|
118
|
+
(8) Token healing?
|
119
|
+
https://github.com/guidance-ai/guidance
|
120
|
+
|
121
|
+
|
122
|
+
---
|
123
|
+
git pull for code_gen?
|
124
|
+
---
|
125
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
def clear(var,what)
|
8
|
+
path = ENV[var] || what
|
9
|
+
FileUtils.mv(path, File.join("/tmp","glim_#{what}_#{Time.now.to_i}"))
|
10
|
+
FileUtils.mkdir(path)
|
11
|
+
end
|
12
|
+
|
13
|
+
Rake::TestTask.new(:test) do |t|
|
14
|
+
t.libs << 'test'
|
15
|
+
t.libs << 'lib'
|
16
|
+
t.test_files = FileList['test/**/test_*.rb']
|
17
|
+
end
|
18
|
+
|
19
|
+
task default: :test
|
20
|
+
|
21
|
+
task :clear_cache do
|
22
|
+
clear('GLIM_CACHE_DIRECTORY', 'glim_cache')
|
23
|
+
end
|
24
|
+
|
25
|
+
task :clear_logs do
|
26
|
+
clear('GLIM_LOG_DIRECTORY', 'glim_logs')
|
27
|
+
end
|
28
|
+
|
29
|
+
task :clear_generated_code do
|
30
|
+
clear('GLIM_GENERATED_CODE_DIRECTORY', 'glim_generated_code')
|
31
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require_relative '../../lib/globals'
|
2
|
+
require_relative '../../lib/glim_ai_callable'
|
3
|
+
|
4
|
+
require 'tempfile'
|
5
|
+
require 'open3'
|
6
|
+
|
7
|
+
class CodeBase
|
8
|
+
include AICallable
|
9
|
+
def done?
|
10
|
+
return @done != nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(run_command:, project_root:)
|
14
|
+
@run_command = run_command
|
15
|
+
@project_root = project_root
|
16
|
+
@done = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
attr_reader :run_command
|
20
|
+
attr_accessor :project_root
|
21
|
+
|
22
|
+
def done_confidence
|
23
|
+
return @done
|
24
|
+
end
|
25
|
+
|
26
|
+
ai_callable_as :show_file do
|
27
|
+
describe "Returns the contents of the file with the given path name"
|
28
|
+
string :path_name, "The path name of the file to show", required:true
|
29
|
+
end
|
30
|
+
def show_file(path_name:)
|
31
|
+
puts "Reading file and providing to LLM: project_root = #{project_root} / #{path_name}"
|
32
|
+
begin
|
33
|
+
full_path = File.join(project_root, path_name)
|
34
|
+
return File.read(full_path)
|
35
|
+
rescue Errno::ENOENT
|
36
|
+
return "File not found"
|
37
|
+
rescue
|
38
|
+
raise
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# ai_callable_as :run_code do
|
43
|
+
# describe "Run the code to try it out. Returns [stderr, stdout] with the output produced by running the code."
|
44
|
+
# number :confidence_level, "Your estimate of the likelihood (0..1) that your code changes will work.", required:true
|
45
|
+
# end
|
46
|
+
|
47
|
+
# plan was to allow LLM to trigger this, but it turns out that's more complicated
|
48
|
+
# because it doesn't reliably call a function AND provide a completion
|
49
|
+
def run_code(confidence_level: 1)
|
50
|
+
Open3.popen3(@run_command) do |stdin, stdout, stderr, thread|
|
51
|
+
output, errors = "", ""
|
52
|
+
out_thread = Thread.new { output = stdout.read }
|
53
|
+
err_thread = Thread.new { errors = stderr.read }
|
54
|
+
out_thread.join
|
55
|
+
err_thread.join
|
56
|
+
puts "\n\n\n>>> $#{@run_command}"
|
57
|
+
return errors,output
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
ai_callable_as :code_ran_as_expected do
|
62
|
+
describe "Call this to indicate that the code ran as expected."
|
63
|
+
number :confidence_level, "Your estimate of the likelihood (0..1) that the code ran as expected and accomplished the given task.", required:true
|
64
|
+
end
|
65
|
+
def code_ran_as_expected(confidence_level:)
|
66
|
+
puts "\n\n\nWe think we are done. Confidence level: #{confidence_level}"
|
67
|
+
@done = confidence_level
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def perform_task(llm_name:, project_root:, task:, run_command:)
|
72
|
+
|
73
|
+
cb = CodeBase.new(run_command:, project_root:)
|
74
|
+
glim = GlimContext.new(log_name:"autocode-#{Time.now.strftime('%Y-%m-%d-%H-%M-%S')}")
|
75
|
+
req = glim.request_from_template("task", task:)
|
76
|
+
req.llm_name = llm_name
|
77
|
+
|
78
|
+
max_iter = 4
|
79
|
+
|
80
|
+
for iter in 0..max_iter
|
81
|
+
req.set_functions_object(cb)
|
82
|
+
response = req.response
|
83
|
+
completion = response.completion
|
84
|
+
if completion
|
85
|
+
puts "Got Completion."
|
86
|
+
if response.function_call_message?
|
87
|
+
puts "WEIRD: ALso got a function call message; LLM wants us to call #{response.function_name_from_message}"
|
88
|
+
puts "IGNORED IT."
|
89
|
+
end
|
90
|
+
text, files_saved = extract_and_save_files(completion, cb.project_root)
|
91
|
+
puts "Extracted #{files_saved} files from completion."
|
92
|
+
if files_saved.length > 0
|
93
|
+
stderr, stdout = cb.run_code
|
94
|
+
# construct new request with the output of the code
|
95
|
+
req = response.create_request_for_chat
|
96
|
+
req.process_template("changed_files_now_evaluate_output", stdout:, stderr:, files_saved:)
|
97
|
+
# function object will be set in net iter
|
98
|
+
next
|
99
|
+
else
|
100
|
+
puts "No files saved, no point in running the code."
|
101
|
+
end
|
102
|
+
end
|
103
|
+
# either there was a completion and we handed it, or there was no completion.
|
104
|
+
# regardless, we need to now see if there is a function to call.
|
105
|
+
if response.function_call_message?
|
106
|
+
if completion
|
107
|
+
puts "Weird, we have a completion and also a function call that is not run_code."
|
108
|
+
puts "Completion:"
|
109
|
+
puts completion
|
110
|
+
puts "\n\nFunction call requested: "
|
111
|
+
puts response.function_name_from_message
|
112
|
+
puts "IGNORING COMPLETION. CALLING FUNCTION."
|
113
|
+
end
|
114
|
+
new_req = response.create_request_with_function_result
|
115
|
+
# that invoked the function requested, so now we can check:
|
116
|
+
break if cb.done?
|
117
|
+
req = new_req
|
118
|
+
else
|
119
|
+
if completion
|
120
|
+
puts "TODO -- ONLY got a completion."
|
121
|
+
puts completion
|
122
|
+
break
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# This did not work well, mostly because the arg sent by GPT did not have strings escaped properly
|
129
|
+
|
130
|
+
# ai_callable_as :apply_patch do
|
131
|
+
# describe "Modify the codebase by appling a patch"
|
132
|
+
# string :patch_string, "A string containing the patch to be applied, in diff -u format", ai_name: "string_with_patch_in_diff_u_format", required: true
|
133
|
+
# string :file_path_name, "The path name of the file to apply the patch to", required: true
|
134
|
+
# end
|
135
|
+
# def apply_patch(file_path_name:, patch_string:)
|
136
|
+
# puts "PATCH to #{file_path_name}: \n>>\n#{patch_string}\n<<\n"
|
137
|
+
# patch_file = Tempfile.new('patch')
|
138
|
+
# patch_file.write(patch_string)
|
139
|
+
# patch_file.rewind
|
140
|
+
# Open3.popen3("patch #{file_path_name} #{patch_file.path}") do |stdin, stdout, stderr, thread|
|
141
|
+
# err = stderr.read
|
142
|
+
# unless err.empty?
|
143
|
+
# patch_file.close
|
144
|
+
# patch_file.unlink
|
145
|
+
# raise "Failed to apply patch: #{err}"
|
146
|
+
# end
|
147
|
+
# end
|
148
|
+
# patch_file.close
|
149
|
+
# patch_file.unlink
|
150
|
+
# end
|
151
|
+
|
152
|
+
|
153
|
+
# task = "Expand 'test/test_levenshtein.rb' to improve test coverage. Include edge cases with legit input, as well as incorrect input."
|
154
|
+
# #llm_name = "gpt-3.5-turbo"
|
155
|
+
# llm_name = "gpt-4"
|
156
|
+
# project_root = "../autocode/ruby-llm"
|
157
|
+
# run_command = "cd #{project_root}; ruby test/test_levenshtein.rb"
|
158
|
+
|
159
|
+
|
160
|
+
task = "I just wrote examples/improve_prompt/improve_prompt.rb, but it has some syntax errors. Plese try to get it to work."
|
161
|
+
#llm_name = "gpt-3.5-turbo"
|
162
|
+
llm_name = "gpt-4"
|
163
|
+
project_root = "../autocode/ruby-llm"
|
164
|
+
run_command = "cd #{project_root}; ruby examples/improve_prompt/improve_prompt.rb"
|
165
|
+
|
166
|
+
perform_task(llm_name:, project_root:, task:, run_command:)
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# require 'solargraph'
|
2
|
+
# require_relative '../../lib/globals'
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
# NOT WORKING, UGH.
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
# putt :info, "I belong in line 5, which means it's line 4 for solagraph"
|
13
|
+
|
14
|
+
|
15
|
+
# require 'solargraph'
|
16
|
+
|
17
|
+
|
18
|
+
# def find_definition(filename, lineno, identifier, project_dir)
|
19
|
+
# # Initialize a Solargraph::ApiMap object
|
20
|
+
# api_map = Solargraph::ApiMap.new
|
21
|
+
|
22
|
+
# # Map all Ruby files in the project directory
|
23
|
+
# source_files = Dir.glob("#{project_dir}/**/*.rb")
|
24
|
+
# puts "Mapping #{source_files.length} Ruby source files from project directory #{project_dir}"
|
25
|
+
|
26
|
+
# source_files.each do |file|
|
27
|
+
# source = Solargraph::Source.load(file)
|
28
|
+
# puts "Mapping source file: #{file}"
|
29
|
+
# api_map.map source
|
30
|
+
# end
|
31
|
+
|
32
|
+
# # Use `get_path_pins` to find the pin (definition) for the identifier
|
33
|
+
# pins = api_map.get_path_pins(identifier)
|
34
|
+
# puts "Number of pins found for identifier '#{identifier}': #{pins.length}"
|
35
|
+
|
36
|
+
# if pins.empty?
|
37
|
+
# return "Definition not found."
|
38
|
+
# else
|
39
|
+
# pin = pins.first
|
40
|
+
# puts "First pin details: #{pin.inspect}"
|
41
|
+
# return pin.location.to_s
|
42
|
+
# end
|
43
|
+
# end
|
44
|
+
|
45
|
+
|
46
|
+
# # Replace these variables with the appropriate values
|
47
|
+
# project_path = "."
|
48
|
+
# identifier = "GlimRequest"
|
49
|
+
# context_file = "examples/autocode/solargraph_test.rb"
|
50
|
+
# context_file = "lib/globals.rb"
|
51
|
+
|
52
|
+
|
53
|
+
# context_line = 4 # Line numbers are zero-based in Solargraph
|
54
|
+
# context_line = 4 # Line numbers are zero-based in Solargraph
|
55
|
+
|
56
|
+
|
57
|
+
# source_code = find_definition(context_file, context_line, identifier,".")
|
58
|
+
# puts source_code
|
59
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
<% req.replace_initial_system_message <<~SYSTEM
|
2
|
+
|
3
|
+
You are an experienced ruby developer. You will be asked to work on a large project;
|
4
|
+
however, you don't have full access to the source code. Therefore, you have to explicit ask for
|
5
|
+
any files you need. When asking for files, remember that you can use the "require" and "require_relative"
|
6
|
+
statements to determine which files might be helpful. To request the contents of a file, use the functions
|
7
|
+
available to you.
|
8
|
+
When you want to make changes to the code, simply provide it in your response, as follows:
|
9
|
+
|
10
|
+
#{prompt_output_files}
|
11
|
+
|
12
|
+
SYSTEM
|
13
|
+
%>
|
14
|
+
|
15
|
+
You have just made some changes to the code. Specifically, the following files have changed:
|
16
|
+
<%= files_saved.join ','%>.
|
17
|
+
|
18
|
+
Running the code again, we got the following output:
|
19
|
+
|
20
|
+
stderr:
|
21
|
+
<%= stderr %>
|
22
|
+
---
|
23
|
+
stdout:
|
24
|
+
<%= stdout %>
|
25
|
+
---
|
26
|
+
|
27
|
+
If it looks like all your changes worked and the output is as expected, call the function "code_ran_as_expected".
|
28
|
+
Otherwise, you can provide updated code to address the issues, or call the function show_file to see more of the source
|
29
|
+
code.
|
@@ -0,0 +1,16 @@
|
|
1
|
+
<% req.replace_initial_system_message <<~SYSTEM
|
2
|
+
|
3
|
+
You are an experienced ruby developer. You will be asked to work on a large project;
|
4
|
+
however, you don't have full access to the source code. Therefore, you have to explicit ask for
|
5
|
+
any files you need. When asking for files, remember that you can use the "require" and "require_relative"
|
6
|
+
statements to determine which files might be helpful. To request the contents of a file, use the functions
|
7
|
+
available to you.
|
8
|
+
|
9
|
+
When you want to make changes to the code, simply provide the whole file (not just your additions!) in your response, as follows:
|
10
|
+
|
11
|
+
#{prompt_output_files}
|
12
|
+
|
13
|
+
SYSTEM
|
14
|
+
%>
|
15
|
+
|
16
|
+
<%= task %>
|
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
require_relative '../../lib/globals'
|
3
|
+
require_relative '../../lib/glim_ai_callable'
|
4
|
+
|
5
|
+
class CalculatorService
|
6
|
+
include AICallable
|
7
|
+
ai_callable_as :evaluate_expression do
|
8
|
+
describe "Evaluates the given ruby expression and returns the result."
|
9
|
+
string :exp, "The expression, as a string, in correct ruby syntax", ai_name: :expression_to_evaluate, required: true
|
10
|
+
end
|
11
|
+
def evaluate_expression(exp:)
|
12
|
+
# Add validation logic here
|
13
|
+
return eval(exp).to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
# puts CalculatorService.ai_method_signatures
|
19
|
+
|
20
|
+
glim = GlimContext.new
|
21
|
+
|
22
|
+
llm_name = "gpt-3.5-turbo"
|
23
|
+
|
24
|
+
calc = CalculatorService.new
|
25
|
+
raise "Calculator Service must be AICallable" unless calc.is_a?(AICallable)
|
26
|
+
puts "Let's test the calculator locally: "
|
27
|
+
exp = " 1+2"
|
28
|
+
puts exp + "=" + calc.send(:evaluate_expression, exp:).to_s
|
29
|
+
|
30
|
+
puts("And now let's get GPT to use it:")
|
31
|
+
req = glim.request(llm_name:)
|
32
|
+
req.set_functions_object(calc)
|
33
|
+
|
34
|
+
req.prompt = "What is the resistance of a 100m long copper cable that has a 6mm^2 cross section?"
|
35
|
+
puts "First question to GPT: #{req.prompt}"
|
36
|
+
response = req.response
|
37
|
+
puts " response.completion:"
|
38
|
+
puts response.completion
|
39
|
+
|
40
|
+
# this will also invoke the function
|
41
|
+
new_req = response.create_request_with_function_result
|
42
|
+
|
43
|
+
# and now we send the request with the result of the function
|
44
|
+
# evaluation, so that the LLM can use that
|
45
|
+
new_response = new_req.response
|
46
|
+
|
47
|
+
puts("new_response.messages:")
|
48
|
+
puts(JSON.pretty_generate(new_response.messages))
|
49
|
+
puts "new_response.completion:"
|
50
|
+
puts new_response.completion
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require_relative '../../lib/globals'
|
2
|
+
|
3
|
+
llm_names = ["claude-instant-1", "claude-2", "gpt-3.5-turbo-16k", "gpt-4" ]
|
4
|
+
|
5
|
+
glim = GlimContext.new
|
6
|
+
|
7
|
+
code = {} # lang,llm_name -> code
|
8
|
+
for lang in ["ruby", "python"] #, "javascript"]
|
9
|
+
for llm_name in llm_names
|
10
|
+
req = glim.request_from_template("write_code", language: lang)
|
11
|
+
req.llm_name = llm_name
|
12
|
+
code[[lang,llm_name]] = req.response
|
13
|
+
template_text = req.template_text
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
for lang, llm_name in code.keys
|
18
|
+
code[[lang,llm_name]] = code[[lang,llm_name]].completion
|
19
|
+
File.write("examples/output/#{lang}_#{llm_name}.rb", code[[lang,llm_name]])
|
20
|
+
end
|
21
|
+
|
22
|
+
code[["ruby","human"]] = File.read("examples/output/ruby_human.rb")
|
23
|
+
ratings = {}
|
24
|
+
|
25
|
+
for llm_name in llm_names
|
26
|
+
puts "analyze_code: #{llm_name}"
|
27
|
+
request = glim.request_from_template("analyze_code", code:, text: template_text)
|
28
|
+
request.llm_name = llm_name
|
29
|
+
ratings[llm_name] = request.response
|
30
|
+
end
|
31
|
+
|
32
|
+
# TODO - would need to support arrays as args, first.
|
33
|
+
|
34
|
+
# class CodeRatings
|
35
|
+
# include AICallable
|
36
|
+
|
37
|
+
# def initialize(developer_name)
|
38
|
+
# @developer_name = developer_name
|
39
|
+
# end
|
40
|
+
|
41
|
+
# ai_callable_as :add_ratings do
|
42
|
+
# describe "Report ratings extracted from the text"
|
43
|
+
# string :lang, "The language of the code", required: true
|
44
|
+
# number :elegance, "The elegance of the code", required: true
|
45
|
+
# number :parallelism, "The parallelism of the code", required: true
|
46
|
+
# number :correctness, "The correctness of the code", required: true
|
47
|
+
# number :instructions_conformity, "The instructions conformity of the code", required: true
|
48
|
+
# end
|
49
|
+
# def add_ratings(**args)
|
50
|
+
|
51
|
+
|
52
|
+
response_by_llm_name = {}
|
53
|
+
for llm_name in llm_names
|
54
|
+
req = glim.request(llm_name: "gpt-3.5-turbo")
|
55
|
+
req.set_output_schema({
|
56
|
+
type: "object",
|
57
|
+
properties: {
|
58
|
+
lang: { type: "string" },
|
59
|
+
llm_name: { type: "string" },
|
60
|
+
elegance: {type: "number"},
|
61
|
+
parallelism: {type: "number"},
|
62
|
+
correctness: {type: "number"},
|
63
|
+
instructions_conformity: {type: "number"}
|
64
|
+
}
|
65
|
+
}, :list)
|
66
|
+
text = ratings[llm_name].completion
|
67
|
+
req.prompt = "Extract all of the json data from the following text:\n\n#{text}"
|
68
|
+
response_by_llm_name[llm_name] = req.response
|
69
|
+
end
|
70
|
+
|
71
|
+
ratings_by_llm_name = {}
|
72
|
+
for llm_name in llm_names
|
73
|
+
ratings_by_llm_name[llm_name] = response_by_llm_name[llm_name].extracted_data
|
74
|
+
end
|
75
|
+
|
76
|
+
# puts JSON.pretty_generate(ratings_by_llm_name)
|
77
|
+
|
78
|
+
# TODO -- analyze results
|