ruby-openai 7.2.0 → 7.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +1 -1
- data/README.md +110 -0
- data/lib/openai/http.rb +2 -1
- data/lib/openai/run_steps.rb +2 -2
- data/lib/openai/runs.rb +3 -2
- data/lib/openai/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 278f25c283d841bfa33614bd69b4340b9275712b83e9121a1aa2a6a439767714
|
4
|
+
data.tar.gz: 702c11ba4b0411a47e9d6f9fdb178d1eb40a7baede5909f0665b41edc00797b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c4a1bdb8fd3f466808f740112c3223a04da5ef73fd355b5f2136ecf28f5be2968ec4ecced5db25833878ba7e9de1f63e063529580f86d269c7bdd82f7e77df9
|
7
|
+
data.tar.gz: '014855034340e14ac2e78c845ae791f619dedf636c9839ce5edc2ea27d7eb54e973dbe4a41998b41d1e89c2c56ce04cd2c062c90bdc87b858b7467005e78100c'
|
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
+
## [7.3.0] - 2024-10-11
|
9
|
+
|
10
|
+
### Added
|
11
|
+
|
12
|
+
- Add ability to (with the right incantations) retrieve the chunks used by an Assistant file search - thanks to [@agamble](https://github.com/agamble) for the addition!
|
13
|
+
|
8
14
|
## [7.2.0] - 2024-10-10
|
9
15
|
|
10
16
|
### Added
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1111,6 +1111,116 @@ end
|
|
1111
1111
|
|
1112
1112
|
Note that you have 10 minutes to submit your tool output before the run expires.
|
1113
1113
|
|
1114
|
+
#### Exploring chunks used in File Search
|
1115
|
+
|
1116
|
+
Take a deep breath. You might need a drink for this one.
|
1117
|
+
|
1118
|
+
It's possible for OpenAI to share what chunks it used in its internal RAG Pipeline to create its filesearch example.
|
1119
|
+
|
1120
|
+
An example spec can be found [here](https://github.com/alexrudall/ruby-openai/blob/main/spec/openai/client/assistant_file_search_spec.rb) that does this, just so you know it's possible.
|
1121
|
+
|
1122
|
+
Here's how to get the chunks used in a file search. In this example I'm using [this file](https://css4.pub/2015/textbook/somatosensory.pdf):
|
1123
|
+
|
1124
|
+
```
|
1125
|
+
require "openai"
|
1126
|
+
|
1127
|
+
# Make a client
|
1128
|
+
client = OpenAI::Client.new(
|
1129
|
+
access_token: "access_token_goes_here",
|
1130
|
+
log_errors: true # Don't do this in production.
|
1131
|
+
)
|
1132
|
+
|
1133
|
+
# Upload your file(s)
|
1134
|
+
file_id = client.files.upload(
|
1135
|
+
parameters: {
|
1136
|
+
file: "path/to/somatosensory.pdf",
|
1137
|
+
purpose: "assistants"
|
1138
|
+
}
|
1139
|
+
)["id"]
|
1140
|
+
|
1141
|
+
# Create a vector store to store the vectorised file(s)
|
1142
|
+
vector_store_id = client.vector_stores.create(parameters: {})["id"]
|
1143
|
+
|
1144
|
+
# Vectorise the file(s)
|
1145
|
+
vector_store_file_id = client.vector_store_files.create(
|
1146
|
+
vector_store_id: vector_store_id,
|
1147
|
+
parameters: { file_id: file_id }
|
1148
|
+
)["id"]
|
1149
|
+
|
1150
|
+
# Check that the file is vectorised (wait for status to be "completed")
|
1151
|
+
client.vector_store_files.retrieve(vector_store_id: vector_store_id, id: vector_store_file_id)["status"]
|
1152
|
+
|
1153
|
+
# Create an assistant, referencing the vector store
|
1154
|
+
assistant_id = client.assistants.create(
|
1155
|
+
parameters: {
|
1156
|
+
model: "gpt-4o",
|
1157
|
+
name: "Answer finder",
|
1158
|
+
instructions: "You are a file search tool. Find the answer in the given files, please.",
|
1159
|
+
tools: [
|
1160
|
+
{ type: "file_search" }
|
1161
|
+
],
|
1162
|
+
tool_resources: {
|
1163
|
+
file_search: {
|
1164
|
+
vector_store_ids: [vector_store_id]
|
1165
|
+
}
|
1166
|
+
}
|
1167
|
+
}
|
1168
|
+
)["id"]
|
1169
|
+
|
1170
|
+
# Create a thread with your question
|
1171
|
+
thread_id = client.threads.create(parameters: {
|
1172
|
+
messages: [
|
1173
|
+
{ role: "user",
|
1174
|
+
content: "Find the description of a nociceptor." }
|
1175
|
+
]
|
1176
|
+
})["id"]
|
1177
|
+
|
1178
|
+
# Run the thread to generate the response. Include the "GIVE ME THE CHUNKS" incantation.
|
1179
|
+
run_id = client.runs.create(
|
1180
|
+
thread_id: thread_id,
|
1181
|
+
parameters: {
|
1182
|
+
assistant_id: assistant_id
|
1183
|
+
},
|
1184
|
+
query_parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
|
1185
|
+
)["id"]
|
1186
|
+
|
1187
|
+
# Get the steps that happened in the run
|
1188
|
+
steps = client.run_steps.list(
|
1189
|
+
thread_id: thread_id,
|
1190
|
+
run_id: run_id,
|
1191
|
+
parameters: { order: "asc" }
|
1192
|
+
)
|
1193
|
+
|
1194
|
+
# Get the last step ID (or whichever one you want to look at)
|
1195
|
+
step_id = steps["data"].first["id"]
|
1196
|
+
|
1197
|
+
# Retrieve all the steps. Include the "GIVE ME THE CHUNKS" incantation again.
|
1198
|
+
steps = steps["data"].map do |step|
|
1199
|
+
client.run_steps.retrieve(
|
1200
|
+
thread_id: thread_id,
|
1201
|
+
run_id: run_id,
|
1202
|
+
id: step["id"],
|
1203
|
+
parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
|
1204
|
+
)
|
1205
|
+
end
|
1206
|
+
|
1207
|
+
# Now we've got the chunk info, buried deep. Loop through the steps and find chunks if included:
|
1208
|
+
chunks = steps.flat_map do |step|
|
1209
|
+
included_results = step.dig("step_details", "tool_calls", 0, "file_search", "results")
|
1210
|
+
|
1211
|
+
next if included_results.nil? || included_results.empty?
|
1212
|
+
|
1213
|
+
included_results.flat_map do |result|
|
1214
|
+
result["content"].map do |content|
|
1215
|
+
content["text"]
|
1216
|
+
end
|
1217
|
+
end
|
1218
|
+
end.compact
|
1219
|
+
|
1220
|
+
# The first chunk will be the closest match to the prompt. Finally, if you want to view the completed message(s):
|
1221
|
+
client.messages.list(thread_id: thread_id)
|
1222
|
+
```
|
1223
|
+
|
1114
1224
|
### Image Generation
|
1115
1225
|
|
1116
1226
|
Generate images using DALL·E 2 or DALL·E 3!
|
data/lib/openai/http.rb
CHANGED
@@ -18,9 +18,10 @@ module OpenAI
|
|
18
18
|
end&.body)
|
19
19
|
end
|
20
20
|
|
21
|
-
def json_post(path:, parameters:)
|
21
|
+
def json_post(path:, parameters:, query_parameters: {})
|
22
22
|
conn.post(uri(path: path)) do |req|
|
23
23
|
configure_json_post_request(req, parameters)
|
24
|
+
req.params = query_parameters
|
24
25
|
end&.body
|
25
26
|
end
|
26
27
|
|
data/lib/openai/run_steps.rb
CHANGED
@@ -8,8 +8,8 @@ module OpenAI
|
|
8
8
|
@client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps", parameters: parameters)
|
9
9
|
end
|
10
10
|
|
11
|
-
def retrieve(thread_id:, run_id:, id:)
|
12
|
-
@client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}")
|
11
|
+
def retrieve(thread_id:, run_id:, id:, parameters: {})
|
12
|
+
@client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}", parameters: parameters)
|
13
13
|
end
|
14
14
|
end
|
15
15
|
end
|
data/lib/openai/runs.rb
CHANGED
@@ -12,8 +12,9 @@ module OpenAI
|
|
12
12
|
@client.get(path: "/threads/#{thread_id}/runs/#{id}")
|
13
13
|
end
|
14
14
|
|
15
|
-
def create(thread_id:, parameters: {})
|
16
|
-
@client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters
|
15
|
+
def create(thread_id:, parameters: {}, query_parameters: {})
|
16
|
+
@client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters,
|
17
|
+
query_parameters: query_parameters)
|
17
18
|
end
|
18
19
|
|
19
20
|
def modify(id:, thread_id:, parameters: {})
|
data/lib/openai/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-openai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 7.
|
4
|
+
version: 7.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: event_stream_parser
|