glim_ai 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +25 -0
- data/Gemfile.lock +49 -0
- data/LICENSE.txt +21 -0
- data/README.md +125 -0
- data/Rakefile +31 -0
- data/examples/autocode/autocode.rb +166 -0
- data/examples/autocode/solargraph_test.rb +59 -0
- data/examples/autocode/templates/changed_files_now_evaluate_output.erb +29 -0
- data/examples/autocode/templates/task.erb +16 -0
- data/examples/calc/calc.rb +50 -0
- data/examples/code_competition/code_competition.rb +78 -0
- data/examples/code_competition/output/python_claude-2.rb +33 -0
- data/examples/code_competition/output/python_claude-instant-1.rb +18 -0
- data/examples/code_competition/output/python_gpt-3.5-turbo-16k.rb +69 -0
- data/examples/code_competition/output/python_gpt-3.5-turbo.rb +43 -0
- data/examples/code_competition/output/python_gpt-4.rb +34 -0
- data/examples/code_competition/output/ruby_claude-2.rb +22 -0
- data/examples/code_competition/output/ruby_claude-instant-1.rb +20 -0
- data/examples/code_competition/output/ruby_gpt-3.5-turbo-16k.rb +27 -0
- data/examples/code_competition/output/ruby_gpt-3.5-turbo.rb +30 -0
- data/examples/code_competition/output/ruby_gpt-4.rb +31 -0
- data/examples/code_competition/output/ruby_human.rb +41 -0
- data/examples/code_competition/templates/analyze_code.erb +33 -0
- data/examples/code_competition/templates/write_code.erb +26 -0
- data/examples/glim_demo/ask_all.rb +35 -0
- data/examples/glim_demo/templates/rate_all.erb +24 -0
- data/examples/improve_prompt/improve_prompt.rb +62 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_explicit_steps.erb +15 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_explicit_steps_user_message.erb +15 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_initial.erb +8 -0
- data/examples/improve_prompt/templates/stashed/prompt_attempt_nothing.erb +19 -0
- data/examples/improve_prompt/templates/try_code_first.erb +13 -0
- data/examples/improve_prompt/templates/try_code_first_system.erb +22 -0
- data/examples/old/econ/discounting.rb +27 -0
- data/examples/old/econ/templates/discounting.erb +10 -0
- data/examples/old/generate_glim_code/generate_glim_code.rb +34 -0
- data/examples/old/generate_glim_code/templates/generate_glim_code.erb +17 -0
- data/examples/old/generate_glim_code/templates/improve_code.erb +27 -0
- data/examples/old/glim_dev_tools/ask_code_question.rb +38 -0
- data/examples/old/glim_dev_tools/templates/ask_code_question.erb +12 -0
- data/examples/old/glim_dev_tools/templates/write_globals_test.erb +28 -0
- data/examples/old/glim_dev_tools/write_globals_test.rb +20 -0
- data/examples/old/linguistics/nine.rb +0 -0
- data/examples/old/rewrite_code/input/hello.py +1 -0
- data/examples/old/rewrite_code/input/subdir/hello.py +1 -0
- data/examples/old/rewrite_code/input/world.py +1 -0
- data/examples/old/rewrite_code/rewrite_code.rb +18 -0
- data/examples/old/rewrite_code/templates/rewrite_code.erb +32 -0
- data/examples/window_check/data.rb +1260 -0
- data/examples/window_check/fruits.rb +118 -0
- data/examples/window_check/tools.rb +56 -0
- data/examples/window_check/window_check.rb +214 -0
- data/glim_generated_tests/make_special_code_with_fixed_length_test.rb +44 -0
- data/glim_generated_tests/old-20230831120513-make_special_code_with_fixed_length_test.rb +1 -0
- data/glim_generated_tests/old-20230831121222-make_special_code_with_fixed_length_test.rb +55 -0
- data/glim_generated_tests/old-20230831124501-make_special_code_with_fixed_length_test.rb +33 -0
- data/glim_generated_tests/test/make_special_code_with_fixed_length_test.rb +58 -0
- data/lib/anthropic_request_details.rb +37 -0
- data/lib/anthropic_response.rb +101 -0
- data/lib/chat_request_details.rb +140 -0
- data/lib/chat_response.rb +303 -0
- data/lib/glim_ai/version.rb +5 -0
- data/lib/glim_ai.rb +8 -0
- data/lib/glim_ai_callable.rb +151 -0
- data/lib/glim_context.rb +62 -0
- data/lib/glim_helpers.rb +54 -0
- data/lib/glim_request.rb +266 -0
- data/lib/glim_response.rb +155 -0
- data/lib/globals.rb +255 -0
- data/lib/html_templates/chat_request.erb +86 -0
- data/sample.env +9 -0
- metadata +131 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
### library (python)
|
2
|
+
# no lib needed
|
3
|
+
|
4
|
+
### problem specific code
|
5
|
+
import asyncio
|
6
|
+
|
7
|
+
async def Q(msg, fut):
|
8
|
+
await asyncio.sleep(0.1) # pretend this is an API call
|
9
|
+
fut.set_result(msg)
|
10
|
+
return fut
|
11
|
+
|
12
|
+
async def f(m1, m2, m3):
|
13
|
+
return [await m1, await m2, await m3]
|
14
|
+
|
15
|
+
async def g(m1, m2):
|
16
|
+
return m1[0] + m2[1]
|
17
|
+
|
18
|
+
prompt1 = asyncio.Future()
|
19
|
+
prompt1.set_result('code')
|
20
|
+
|
21
|
+
m1 = Q('analyzing code 1', asyncio.Future())
|
22
|
+
m2 = Q('analyzing code 2', asyncio.Future())
|
23
|
+
m3 = Q('analyzing code 3', asyncio.Future())
|
24
|
+
|
25
|
+
prompt2 = asyncio.gather(f(m1, m2, m3))
|
26
|
+
|
27
|
+
m1 = Q('summary 1', asyncio.Future())
|
28
|
+
m2 = Q('summary 2', asyncio.Future())
|
29
|
+
|
30
|
+
prompt3 = g(m1, m2)
|
31
|
+
|
32
|
+
result = asyncio.gather(prompt3)
|
33
|
+
print(asyncio.run(result))
|
@@ -0,0 +1,18 @@
|
|
1
|
+
'''
|
2
|
+
### library (python)
|
3
|
+
# no lib needed
|
4
|
+
|
5
|
+
### problem specific code
|
6
|
+
import asyncio
|
7
|
+
|
8
|
+
async def prompt2_analyze_code():
|
9
|
+
tasks = [Q(m1,"prompt1_write_code"), Q(m2,"prompt1_write_code"), Q(m3,"prompt1_write_code")]
|
10
|
+
await asyncio.gather(*tasks)
|
11
|
+
|
12
|
+
async def prompt3_summarize():
|
13
|
+
tasks = [Q(m1, await prompt2_analyze_code()), Q(m2, await prompt2_analyze_code())]
|
14
|
+
await asyncio.gather(*tasks)
|
15
|
+
|
16
|
+
asyncio.run(prompt3_summarize())
|
17
|
+
result = Q(m1, await prompt3_summarize())
|
18
|
+
'''
|
@@ -0,0 +1,69 @@
|
|
1
|
+
### library ( python )
|
2
|
+
|
3
|
+
```python
|
4
|
+
import threading
|
5
|
+
|
6
|
+
class APICall:
|
7
|
+
def __init__(self, func, args):
|
8
|
+
self.func = func
|
9
|
+
self.args = args
|
10
|
+
self.result = None
|
11
|
+
self.finished = False
|
12
|
+
self.lock = threading.Lock()
|
13
|
+
|
14
|
+
def finish(self, result):
|
15
|
+
with self.lock:
|
16
|
+
self.result = result
|
17
|
+
self.finished = True
|
18
|
+
self.lock.notify_all()
|
19
|
+
|
20
|
+
class Q:
|
21
|
+
def __init__(self, module, prompt):
|
22
|
+
self.module = module
|
23
|
+
self.prompt = prompt
|
24
|
+
|
25
|
+
def __call__(self, *args):
|
26
|
+
api_call = APICall(self.module.Q, (self.prompt,) + args)
|
27
|
+
threading.Thread(target=self.module.Q, args=((self.prompt,) + args, api_call.finish)).start()
|
28
|
+
return api_call
|
29
|
+
|
30
|
+
### problem specific code
|
31
|
+
|
32
|
+
def f(*args):
|
33
|
+
print("API call f with args:", args)
|
34
|
+
# Perform the API call using external library or code
|
35
|
+
return "Some result from f"
|
36
|
+
|
37
|
+
def g(*args):
|
38
|
+
print("API call g with args:", args)
|
39
|
+
# Perform the API call using external library or code
|
40
|
+
return "Some result from g"
|
41
|
+
|
42
|
+
def prompt1_write_code(arg):
|
43
|
+
print("Prompt 1:", arg)
|
44
|
+
# Perform some operation using external library or code
|
45
|
+
|
46
|
+
def prompt2_analyze_code(arg):
|
47
|
+
print("Prompt 2:", arg)
|
48
|
+
# Perform some operation using external library or code
|
49
|
+
|
50
|
+
def prompt3_summarize(arg):
|
51
|
+
print("Prompt 3:", arg)
|
52
|
+
# Perform some operation using external library or code
|
53
|
+
|
54
|
+
m1 = None # Placeholder for module 1, replace with actual module
|
55
|
+
m2 = None # Placeholder for module 2, replace with actual module
|
56
|
+
m3 = None # Placeholder for module 3, replace with actual module
|
57
|
+
|
58
|
+
def main():
|
59
|
+
prompt2_analyze_code = f(Q(m1, prompt1_write_code), Q(m2, prompt1_write_code), Q(m3, prompt1_write_code))
|
60
|
+
prompt3_summarize = g(Q(m1, prompt2_analyze_code), Q(m2, prompt2_analyze_code))
|
61
|
+
result = Q(m1, prompt3_summarize)
|
62
|
+
|
63
|
+
print("Result:", result.result) # Wait for the result, if needed
|
64
|
+
|
65
|
+
if __name__ == '__main__':
|
66
|
+
main()
|
67
|
+
```
|
68
|
+
|
69
|
+
Note: Replace the `print` statements and function bodies with actual implementation according to your requirements.
|
@@ -0,0 +1,43 @@
|
|
1
|
+
### library ( python )
|
2
|
+
```python
|
3
|
+
import threading
|
4
|
+
|
5
|
+
class Q:
|
6
|
+
def __init__(self, caller, func):
|
7
|
+
self.caller = caller
|
8
|
+
self.func = func
|
9
|
+
self.result = None
|
10
|
+
self.thread = None
|
11
|
+
self.lock = threading.Lock()
|
12
|
+
|
13
|
+
def __call__(self, *args, **kwargs):
|
14
|
+
if self.result is None:
|
15
|
+
with self.lock:
|
16
|
+
if self.result is None:
|
17
|
+
self.thread = threading.Thread(target=self.execute, args=args, kwargs=kwargs)
|
18
|
+
self.thread.start()
|
19
|
+
self.thread.join()
|
20
|
+
return self.result
|
21
|
+
|
22
|
+
def execute(self, *args, **kwargs):
|
23
|
+
self.result = self.func(*args, **kwargs)
|
24
|
+
|
25
|
+
def f(*args, **kwargs):
|
26
|
+
return sum(args)
|
27
|
+
|
28
|
+
def g(*args, **kwargs):
|
29
|
+
return sum(args)
|
30
|
+
```
|
31
|
+
|
32
|
+
### problem specific code
|
33
|
+
```python
|
34
|
+
m1 = "John"
|
35
|
+
m2 = "Jane"
|
36
|
+
m3 = "David"
|
37
|
+
|
38
|
+
prompt1_write_code = "Write code"
|
39
|
+
|
40
|
+
prompt2_analyze_code = f(Q(m1, prompt1_write_code), Q(m2, prompt1_write_code), Q(m3, prompt1_write_code))
|
41
|
+
prompt3_summarize = g(Q(m1, prompt2_analyze_code), Q(m2, prompt2_analyze_code))
|
42
|
+
result = Q(m1, prompt3_summarize)
|
43
|
+
```
|
@@ -0,0 +1,34 @@
|
|
1
|
+
'''
|
2
|
+
### library ( python )
|
3
|
+
|
4
|
+
import asyncio
|
5
|
+
import concurrent.futures
|
6
|
+
|
7
|
+
class AsyncAPI:
|
8
|
+
def __init__(self):
|
9
|
+
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
|
10
|
+
|
11
|
+
def Queue(self, function, *args):
|
12
|
+
loop = asyncio.get_event_loop()
|
13
|
+
return loop.run_in_executor(self.executor, function, *args)
|
14
|
+
|
15
|
+
asyncApi = AsyncAPI()
|
16
|
+
|
17
|
+
### problem specific code
|
18
|
+
|
19
|
+
def Q(model, task):
|
20
|
+
## write the actual code for api call
|
21
|
+
pass
|
22
|
+
|
23
|
+
def f(*results):
|
24
|
+
## write the actual code for analyze code
|
25
|
+
pass
|
26
|
+
|
27
|
+
def g(*results):
|
28
|
+
## write the actual code for summarize
|
29
|
+
pass
|
30
|
+
|
31
|
+
prompt2_analyze_code = f( asyncApi.Queue(Q,m1,prompt1_write_code), asyncApi.Queue(Q,m2,prompt1_write_code), asyncApi.Queue(Q,m3,prompt1_write_code) )
|
32
|
+
prompt3_summarize = g( asyncApi.Queue(Q,m1,prompt2_analyze_code), asyncApi.Queue(Q,m2,prompt2_analyze_code) )
|
33
|
+
result = asyncApi.Queue(Q, m1, prompt3_summarize)
|
34
|
+
'''
|
@@ -0,0 +1,22 @@
|
|
1
|
+
### library (ruby)
|
2
|
+
# no lib needed
|
3
|
+
|
4
|
+
### problem specific code
|
5
|
+
|
6
|
+
m1 = Q.async(:prompt1_write_code)
|
7
|
+
m2 = Q.async(:prompt1_write_code)
|
8
|
+
m3 = Q.async(:prompt1_write_code)
|
9
|
+
|
10
|
+
m1.wait
|
11
|
+
m2.wait
|
12
|
+
m3.wait
|
13
|
+
|
14
|
+
prompt2_analyze_code = f.async(m1, m2, m3)
|
15
|
+
|
16
|
+
m1.wait
|
17
|
+
m2.wait
|
18
|
+
m3.wait
|
19
|
+
|
20
|
+
prompt3_summarize = g.async(m1, m2)
|
21
|
+
|
22
|
+
result = prompt3_summarize.wait
|
@@ -0,0 +1,20 @@
|
|
1
|
+
### library ( ruby )
|
2
|
+
# no lib needed
|
3
|
+
|
4
|
+
### problem specific code
|
5
|
+
require 'concurrent'
|
6
|
+
|
7
|
+
executor = Concurrent::CachedThreadPool.new
|
8
|
+
|
9
|
+
prompt2_analyze_code = -> do
|
10
|
+
f(executor.post { Q(m1, prompt1_write_code) },
|
11
|
+
executor.post { Q(m2, prompt1_write_code) },
|
12
|
+
executor.post { Q(m3, prompt1_write_code) })
|
13
|
+
end
|
14
|
+
|
15
|
+
prompt3_summarize = -> do
|
16
|
+
g(executor.post { prompt2_analyze_code.call },
|
17
|
+
executor.post { prompt2_analyze_code.call })
|
18
|
+
end
|
19
|
+
|
20
|
+
result = executor.post { prompt3_summarize.call }.await
|
@@ -0,0 +1,27 @@
|
|
1
|
+
### library (ruby)
|
2
|
+
|
3
|
+
No library is needed for this problem.
|
4
|
+
|
5
|
+
### problem specific code
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
def Q(module_name, statement)
|
9
|
+
# Code to make API call and return the result
|
10
|
+
# (Assuming implementation for making API call is already done)
|
11
|
+
end
|
12
|
+
|
13
|
+
def f(*statements)
|
14
|
+
# Code to process multiple statements asynchronously
|
15
|
+
end
|
16
|
+
|
17
|
+
def g(*statements)
|
18
|
+
# Code to process multiple statements asynchronously
|
19
|
+
end
|
20
|
+
|
21
|
+
# Example usage
|
22
|
+
prompt2_analyze_code = f(Q(:m1, Q(:prompt1_write_code)), Q(:m2, Q(:prompt1_write_code)), Q(:m3, Q(:prompt1_write_code)))
|
23
|
+
prompt3_summarize = g(Q(:m1, prompt2_analyze_code), Q(:m2, prompt2_analyze_code))
|
24
|
+
result = Q(:m1, prompt3_summarize)
|
25
|
+
```
|
26
|
+
|
27
|
+
Note: The implementation of making API calls and processing statements asynchronously is not provided as it is specific to the API being used and the requirements of processing the statements.
|
@@ -0,0 +1,30 @@
|
|
1
|
+
### library (ruby)
|
2
|
+
```ruby
|
3
|
+
module Q
|
4
|
+
def self.call(api, prompt)
|
5
|
+
# logic for making an API call
|
6
|
+
end
|
7
|
+
end
|
8
|
+
```
|
9
|
+
|
10
|
+
### problem specific code
|
11
|
+
```ruby
|
12
|
+
m1 = "m1"
|
13
|
+
m2 = "m2"
|
14
|
+
m3 = "m3"
|
15
|
+
|
16
|
+
prompt1_write_code = "prompt1_write_code"
|
17
|
+
|
18
|
+
prompt2_analyze_code = Q.call(m1, prompt1_write_code)
|
19
|
+
Q.call(m2, prompt1_write_code)
|
20
|
+
Q.call(m3, prompt1_write_code)
|
21
|
+
|
22
|
+
prompt3_summarize = Q.call(m1, prompt2_analyze_code)
|
23
|
+
Q.call(m2, prompt2_analyze_code)
|
24
|
+
|
25
|
+
result = Q.call(m1, prompt3_summarize)
|
26
|
+
```
|
27
|
+
|
28
|
+
In the above code, I have created a module `Q` which defines a `call` method to make the API call. The `call` method takes two parameters - the API name and the prompt. This allows us to easily make API calls without waiting for the answer, unless the answer is needed to proceed.
|
29
|
+
|
30
|
+
The `problem specific code` section shows how the `Q` module can be used to solve the problem mentioned in the example. The API calls are made in the desired sequence, and the result is obtained by making the necessary API calls in the desired order.
|
@@ -0,0 +1,31 @@
|
|
1
|
+
```ruby
|
2
|
+
### library ( ruby )
|
3
|
+
|
4
|
+
require 'concurrent'
|
5
|
+
|
6
|
+
class Q
|
7
|
+
def initialize(m, action)
|
8
|
+
@future = Concurrent::Future.execute do
|
9
|
+
m.send(action)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def result
|
14
|
+
@future.value
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
### problem specific code
|
19
|
+
|
20
|
+
def f(*actions)
|
21
|
+
actions.map(&:result)
|
22
|
+
end
|
23
|
+
|
24
|
+
def g(*actions)
|
25
|
+
actions.map(&:result)
|
26
|
+
end
|
27
|
+
|
28
|
+
prompt2_analyze_code = f( Q.new(m1, :prompt1_write_code), Q.new(m2, :prompt1_write_code), Q.new(m3, :prompt1_write_code))
|
29
|
+
prompt3_summarize = g( Q.new(m1, :prompt2_analyze_code), Q.new(m2, :prompt2_analyze_code))
|
30
|
+
result = Q.new(m1, :prompt3_summarize).result
|
31
|
+
```
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
# library code - can be reused for other applications
|
3
|
+
class Future
|
4
|
+
def initialize(&block)
|
5
|
+
@thread = Thread.new(&block)
|
6
|
+
end
|
7
|
+
|
8
|
+
def value
|
9
|
+
@thread.value
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
####################
|
14
|
+
# code specific to problem
|
15
|
+
|
16
|
+
def Q(m, p)
|
17
|
+
Future.new do
|
18
|
+
# Define the asynchronous operation here
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
p1 = # define p1
|
23
|
+
m1 = # define m1
|
24
|
+
m2 = # define m2
|
25
|
+
|
26
|
+
# Create futures for Q(m1, p1) and Q(m2, p1), will be evaluated in parallel
|
27
|
+
q1_p1 = Q(m1, p1)
|
28
|
+
q2_p1 = Q(m2, p1)
|
29
|
+
|
30
|
+
# Retrieve values and calculate p2; this blocks until both values are there
|
31
|
+
p2 = f(q1_p1.value, q2_p1.value)
|
32
|
+
|
33
|
+
# Create futures for Q(m1, p2) and Q(m2, p2), will be evaluated in parallel
|
34
|
+
q1_p2 = Q(m1, p2)
|
35
|
+
q2_p2 = Q(m2, p2)
|
36
|
+
|
37
|
+
# Retrieve values and calculate p3, blocks until both values are there
|
38
|
+
p3 = g(q1_p2.value, q2_p2.value)
|
39
|
+
|
40
|
+
# Calculate the final result
|
41
|
+
result = Q(m1, p3).value # blocks until value is there
|
@@ -0,0 +1,33 @@
|
|
1
|
+
I gave the following programming task to some LLMs and a human, in different programming languages:
|
2
|
+
'''
|
3
|
+
<%= text %>
|
4
|
+
'''
|
5
|
+
Below are the answers:
|
6
|
+
<% for l, m in code.keys %>
|
7
|
+
---l
|
8
|
+
Language: <%= l %>
|
9
|
+
Author: <%= m %>
|
10
|
+
'''
|
11
|
+
<%= code[[l,m]] %>
|
12
|
+
'''
|
13
|
+
<% end %>
|
14
|
+
---
|
15
|
+
Write a brief critique of each answer. Carefully check if the provided code will work and if it maximizes parallelism.
|
16
|
+
Rate the elegance of the problem-specific code; disregard the library code for rating elegance.
|
17
|
+
Then, analyze which language works better for this, and which LLMs did best.
|
18
|
+
Finally: Can you draw any conclusions about connections between the LLMs and the languages?
|
19
|
+
For example, does it seem like a particular LLM prioritizes patterns that are more common in one language than another?
|
20
|
+
At the end of your answer, after a "---" as a separator, write a json array with the various ratings for each one, on a scale of 0..1, like this:
|
21
|
+
---
|
22
|
+
[
|
23
|
+
<% for l, m in code.keys %>
|
24
|
+
{
|
25
|
+
lang: <%=l%>,
|
26
|
+
model: <%=m%>,
|
27
|
+
elegance: 0.42, # reason for elegance score
|
28
|
+
parallelism: 0.42, # reason for parallelism score
|
29
|
+
correctness: 0.42, # reason for correctness score
|
30
|
+
instructions_conformity: 0.42,}, # reason this score; it should measure how well the answer conforms to the instructions
|
31
|
+
<% end %>
|
32
|
+
]
|
33
|
+
(Replace 0.42 with your rating for each answer)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
I am working on a <%= language %> library for developing software that frequently requires use of external APIs.
|
3
|
+
It can take a while for the external API calls to finish, so we want to make it easy for the developer
|
4
|
+
to send API calls without waiting for the answer, unless the answer is needed to proceed.
|
5
|
+
|
6
|
+
For example:
|
7
|
+
'''
|
8
|
+
prompt2_analyze_code = f( Q(m1,prompt1_write_code), Q(m2,prompt1_write_code), Q(m3,prompt1_write_code))
|
9
|
+
prompt3_summarize = g( Q(m1,prompt2_analyze_code), Q(m2,prompt2_analyze_code))
|
10
|
+
result = Q(m1,prompt3_summarize)
|
11
|
+
'''
|
12
|
+
The API calls are in function Q. Q,f,g do not have side effects we need to worry about.
|
13
|
+
|
14
|
+
Write a <%= language %> library for this, and then show how it can be used for the example above.
|
15
|
+
If no library is needed, or one already exists, then no need to write one - just write "# no lib needed"
|
16
|
+
|
17
|
+
Structure your code like this:
|
18
|
+
'''
|
19
|
+
### library ( <%= language %> )
|
20
|
+
(library code if needed, or # no lib needed)
|
21
|
+
|
22
|
+
### problem specific code
|
23
|
+
(elegant code that solves the problem above)
|
24
|
+
'''
|
25
|
+
Your goal is to make the (non-library) code as short, readable, and elegant as possible.
|
26
|
+
Do not explain your code -- only respond with the code itself.
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative '../../lib/globals'
|
2
|
+
glim = GlimContext.new(log_name: "ask_all")
|
3
|
+
# these first two lines above are the only ones that you need to add to your code
|
4
|
+
|
5
|
+
# in this example, we will want to compare the answers of these different models
|
6
|
+
models = ["claude-instant-1", "gpt-3.5-turbo"]
|
7
|
+
|
8
|
+
# we will ask this question to each model
|
9
|
+
question = "If, in some cataclysm, all of scientific knowledge were to be destroyed, and only one sentence passed on to the next generation of creatures, what statement would contain the most information in the fewest words?"
|
10
|
+
|
11
|
+
responses = {}
|
12
|
+
for model in models
|
13
|
+
# construct a request that will be sent to the LLM
|
14
|
+
request = glim.request(llm_name: model)
|
15
|
+
request.prompt = question
|
16
|
+
# LLMResponse.compute will send the request to the model specified in the request
|
17
|
+
responses[model] = request.response
|
18
|
+
end
|
19
|
+
|
20
|
+
# now we can rate and summarize the answers
|
21
|
+
|
22
|
+
# construct a request using an erb template. The template is in the specs directory
|
23
|
+
# and is called "rate_all.erb". We will pass the question and the hash with all of the
|
24
|
+
# answers to the template.
|
25
|
+
request = glim.request_from_template("rate_all", question:, answers: responses)
|
26
|
+
|
27
|
+
# the request now contains a prompt that is based on the template and the
|
28
|
+
# arguments that we passed to the template (question and answers)
|
29
|
+
puts request.inspect
|
30
|
+
|
31
|
+
# send the request and print the completion it generated
|
32
|
+
response = request.response
|
33
|
+
puts response.completion
|
34
|
+
|
35
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<%
|
2
|
+
# This is the template for rendering the prompt for rating all answers to a question.
|
3
|
+
req.llm_name = 'gpt-3.5-turbo'
|
4
|
+
%>
|
5
|
+
<%= prompt_output_files %>
|
6
|
+
|
7
|
+
---
|
8
|
+
|
9
|
+
Below are different answers to the following question:
|
10
|
+
"<%= question # this will insert the question passed in to the erb template
|
11
|
+
%>"
|
12
|
+
|
13
|
+
<%
|
14
|
+
# iterate over the answers we want to rate so that they will all be in the prompt
|
15
|
+
answers.each_pair do |model, answer|
|
16
|
+
%>
|
17
|
+
<%= model %>'s response: <%= answer.completion %>
|
18
|
+
<% end %>
|
19
|
+
|
20
|
+
<%
|
21
|
+
# now we will instruct the LLM to rate each of the answers:
|
22
|
+
%>
|
23
|
+
Write a brief critique of each answer.
|
24
|
+
Then, generate a string in json format providing, for each respondent, a rating for that respondent's response on a scale of 0 to 1.
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require_relative '../../lib/globals'
|
2
|
+
|
3
|
+
glim = GlimContext.new
|
4
|
+
|
5
|
+
testcases = [
|
6
|
+
# [ "gen_two_files", "Generate two files named f1 and f2, which each contain the world `hello` and nothing else" ],
|
7
|
+
# [ "gen_word_list", "Generate a file named `word_list` which contains the first 5 words from NATO phonetic alphabet, each in its own line." ],
|
8
|
+
# [ "write_code", "Write a program in ruby called 'count_lines.rb' which reads a file and prints the number of lines in it. Also generate a file for testing your code."],
|
9
|
+
[ "gen_word_list_subdir", "Generate a file named `word_list` which contains the first 5 words from NATO phonetic alphabet, each in its own line. It should go into a subdirectory called 'fun_words'" ]
|
10
|
+
]
|
11
|
+
|
12
|
+
llm_names = ["gpt-3.5-turbo", "claude-instant-1"]
|
13
|
+
|
14
|
+
responses = {}
|
15
|
+
|
16
|
+
Dir.glob(File.join(__dir__, "templates/try_*.erb")) do |try_path|
|
17
|
+
try = File.basename(try_path,'.erb')
|
18
|
+
responses[try] = {}
|
19
|
+
for test_name, test_prompt in testcases
|
20
|
+
responses[try][test_name] = {}
|
21
|
+
for llm_name in llm_names
|
22
|
+
#puts "LLM = #{llm_name}, testing #{try} with prompt #{test_prompt}"
|
23
|
+
req = glim.request_from_template(try, test_prompt: test_prompt)
|
24
|
+
req.llm_name = llm_name
|
25
|
+
req.temperature = 0.0
|
26
|
+
responses[try][test_name][llm_name] = req.response
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
extracted_info = {}
|
32
|
+
for try in responses.keys
|
33
|
+
extracted_info[try] = {}
|
34
|
+
for test_name in responses[try].keys
|
35
|
+
extracted_info[try][test_name] = {}
|
36
|
+
baseline_extracted_info = nil
|
37
|
+
baseline_completion = nil
|
38
|
+
for llm_name in llm_names # we want them in this order because first one is the gold standard
|
39
|
+
completion = responses[try][test_name][llm_name].completion
|
40
|
+
extracted_by_llm = extract_and_save_files(completion)
|
41
|
+
if !baseline_extracted_info
|
42
|
+
baseline_extracted_info = extracted_by_llm
|
43
|
+
baseline_completion = completion
|
44
|
+
next
|
45
|
+
end
|
46
|
+
info = ""
|
47
|
+
if baseline_extracted_info[0] != extracted_by_llm[0]
|
48
|
+
info += "\n\nExtracted info_text differs:\n#{extracted_info[0]}."
|
49
|
+
info += "\nBaseline was:\n#{baseline_extracted_info[0]}."
|
50
|
+
end
|
51
|
+
# if baseline_extracted_info[1] != extracted_by_llm[1]
|
52
|
+
# info += "\n\nExtracted files differ: #{JSON.pretty_generate(extracted_by_llm[1])}."
|
53
|
+
# info += "\nBaseline was: #{JSON.pretty_generate(baseline_extracted_info[1])}."
|
54
|
+
# end
|
55
|
+
if !info.empty?
|
56
|
+
puts "\n\n#{try} on test case #{test_name} with #{llm_name}:"
|
57
|
+
puts info
|
58
|
+
puts "\nCompletion was: \n#{completion}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<% req.replace_initial_system_message <<~SYSTEM
|
2
|
+
|
3
|
+
ALWAYS, when asked to generate one or more files, include such files in your response as follows:
|
4
|
+
1. Write the following file separator line:
|
5
|
+
```
|
6
|
+
2. Write a line containing # followed by the relative pathname of the file, for example:
|
7
|
+
# File: relative_path_to_file/filename.suffix
|
8
|
+
3. Write out the content of the file, line by line
|
9
|
+
4. Write the following file separator line again:
|
10
|
+
```
|
11
|
+
|
12
|
+
SYSTEM
|
13
|
+
%>
|
14
|
+
|
15
|
+
<%= test_prompt %>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
SYSTEM MESSAGE: ALWAYS, when asked to generate one or more files, include such files in your response as follows:
|
4
|
+
1. Write the following file separator line:
|
5
|
+
```
|
6
|
+
2. Write a line containing # followed by the relative pathname of the file, for example:
|
7
|
+
# File: relative_path_to_file/filename.suffix
|
8
|
+
3. Write out the content of the file, line by line
|
9
|
+
4. Write the following file separator line again:
|
10
|
+
```
|
11
|
+
|
12
|
+
---
|
13
|
+
|
14
|
+
|
15
|
+
<%= test_prompt %>
|
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
SYSTEM MESSAGE: ALWAYS, when asked to generate one or more files, include such files in your response as follows:
|
4
|
+
1. Write the following file separator line:
|
5
|
+
```
|
6
|
+
2. Write a line containing # followed by the relative pathname of the file, for example:
|
7
|
+
# File: relative_path_to_file/filename.suffix
|
8
|
+
3. Write out the content of the file, line by line
|
9
|
+
4. Write the following file separator line again:
|
10
|
+
```
|
11
|
+
|
12
|
+
---
|
13
|
+
|
14
|
+
|
15
|
+
<%= test_prompt %>
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
<%= test_prompt %>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
|
2
|
+
SYSTEM MESSAGE: ALWAYS, when asked to generate source code or other text files, use the following format:
|
3
|
+
<file pathname="path_to_file/hello.rb">
|
4
|
+
puts "Hello from Line 1"
|
5
|
+
puts "hello from Line 2"
|
6
|
+
</file>
|
7
|
+
So, the example above shows how you would include a file called "hello.rb" that belongs in the subdirectory "path_to_file" of the current directory.
|
8
|
+
The file would contain two "puts" statements.
|
9
|
+
Use this for all text files you generate, not just source code.
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
<%= test_prompt %>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<% req.replace_initial_system_message <<~SYSTEM
|
2
|
+
|
3
|
+
When asked to generate source code or other text files, use the following format:
|
4
|
+
<file pathname="path_to_file/hello.rb">
|
5
|
+
puts "Hello from Line 1"
|
6
|
+
puts "hello from Line 2"
|
7
|
+
</file>
|
8
|
+
So, the example above shows how you would include a file called "hello.rb" that belongs in the subdirectory "path_to_file" of the current directory.
|
9
|
+
The file would contain two "puts" statements.
|
10
|
+
Use this for all text files you generate, not just source code.
|
11
|
+
|
12
|
+
SYSTEM
|
13
|
+
%>
|
14
|
+
|
15
|
+
|
16
|
+
<%= test_prompt %>
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
---
|
21
|
+
|
22
|
+
<%= test_prompt %>
|