ruby-openai 7.2.0 → 7.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7108bf76aa6f30cd7c38b41967b0162d6a4014698c1c8364b8116e5c665c044f
4
- data.tar.gz: 736587458668b4608e49fa8ec50e460da14eeda0dfb8a739c68806e3bd5d0a2c
3
+ metadata.gz: 278f25c283d841bfa33614bd69b4340b9275712b83e9121a1aa2a6a439767714
4
+ data.tar.gz: 702c11ba4b0411a47e9d6f9fdb178d1eb40a7baede5909f0665b41edc00797b0
5
5
  SHA512:
6
- metadata.gz: 76cac4818b5941d5732becebc91675c1ebeaba4f27c1710888afbb1893f3f8ff4d18e24a3c90f4d4332eb89730b0f6195507cf99519e764876a614ca3927a0cb
7
- data.tar.gz: 4757d8e11b494a75d0ea839ae073610adfa77317ba0e13ddabdd21bf10fd34127a5747138644dba4d9073aa509341bfeaed69a3d00150d1ae90a42ddeadd53e4
6
+ metadata.gz: 7c4a1bdb8fd3f466808f740112c3223a04da5ef73fd355b5f2136ecf28f5be2968ec4ecced5db25833878ba7e9de1f63e063529580f86d269c7bdd82f7e77df9
7
+ data.tar.gz: '014855034340e14ac2e78c845ae791f619dedf636c9839ce5edc2ea27d7eb54e973dbe4a41998b41d1e89c2c56ce04cd2c062c90bdc87b858b7467005e78100c'
data/CHANGELOG.md CHANGED
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [7.3.0] - 2024-10-11
9
+
10
+ ### Added
11
+
12
+ - Add ability to (with the right incantations) retrieve the chunks used by an Assistant file search - thanks to [@agamble](https://github.com/agamble) for the addition!
13
+
8
14
  ## [7.2.0] - 2024-10-10
9
15
 
10
16
  ### Added
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-openai (7.2.0)
4
+ ruby-openai (7.3.0)
5
5
  event_stream_parser (>= 0.3.0, < 2.0.0)
6
6
  faraday (>= 1)
7
7
  faraday-multipart (>= 1)
data/README.md CHANGED
@@ -1111,6 +1111,116 @@ end
1111
1111
 
1112
1112
  Note that you have 10 minutes to submit your tool output before the run expires.
1113
1113
 
1114
+ #### Exploring chunks used in File Search
1115
+
1116
+ Take a deep breath. You might need a drink for this one.
1117
+
1118
+ It's possible for OpenAI to share what chunks it used in its internal RAG Pipeline to create its filesearch example.
1119
+
1120
+ An example spec can be found [here](https://github.com/alexrudall/ruby-openai/blob/main/spec/openai/client/assistant_file_search_spec.rb) that does this, just so you know it's possible.
1121
+
1122
+ Here's how to get the chunks used in a file search. In this example I'm using [this file](https://css4.pub/2015/textbook/somatosensory.pdf):
1123
+
1124
+ ```
1125
+ require "openai"
1126
+
1127
+ # Make a client
1128
+ client = OpenAI::Client.new(
1129
+ access_token: "access_token_goes_here",
1130
+ log_errors: true # Don't do this in production.
1131
+ )
1132
+
1133
+ # Upload your file(s)
1134
+ file_id = client.files.upload(
1135
+ parameters: {
1136
+ file: "path/to/somatosensory.pdf",
1137
+ purpose: "assistants"
1138
+ }
1139
+ )["id"]
1140
+
1141
+ # Create a vector store to store the vectorised file(s)
1142
+ vector_store_id = client.vector_stores.create(parameters: {})["id"]
1143
+
1144
+ # Vectorise the file(s)
1145
+ vector_store_file_id = client.vector_store_files.create(
1146
+ vector_store_id: vector_store_id,
1147
+ parameters: { file_id: file_id }
1148
+ )["id"]
1149
+
1150
+ # Check that the file is vectorised (wait for status to be "completed")
1151
+ client.vector_store_files.retrieve(vector_store_id: vector_store_id, id: vector_store_file_id)["status"]
1152
+
1153
+ # Create an assistant, referencing the vector store
1154
+ assistant_id = client.assistants.create(
1155
+ parameters: {
1156
+ model: "gpt-4o",
1157
+ name: "Answer finder",
1158
+ instructions: "You are a file search tool. Find the answer in the given files, please.",
1159
+ tools: [
1160
+ { type: "file_search" }
1161
+ ],
1162
+ tool_resources: {
1163
+ file_search: {
1164
+ vector_store_ids: [vector_store_id]
1165
+ }
1166
+ }
1167
+ }
1168
+ )["id"]
1169
+
1170
+ # Create a thread with your question
1171
+ thread_id = client.threads.create(parameters: {
1172
+ messages: [
1173
+ { role: "user",
1174
+ content: "Find the description of a nociceptor." }
1175
+ ]
1176
+ })["id"]
1177
+
1178
+ # Run the thread to generate the response. Include the "GIVE ME THE CHUNKS" incantation.
1179
+ run_id = client.runs.create(
1180
+ thread_id: thread_id,
1181
+ parameters: {
1182
+ assistant_id: assistant_id
1183
+ },
1184
+ query_parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
1185
+ )["id"]
1186
+
1187
+ # Get the steps that happened in the run
1188
+ steps = client.run_steps.list(
1189
+ thread_id: thread_id,
1190
+ run_id: run_id,
1191
+ parameters: { order: "asc" }
1192
+ )
1193
+
1194
+ # Get the last step ID (or whichever one you want to look at)
1195
+ step_id = steps["data"].first["id"]
1196
+
1197
+ # Retrieve all the steps. Include the "GIVE ME THE CHUNKS" incantation again.
1198
+ steps = steps["data"].map do |step|
1199
+ client.run_steps.retrieve(
1200
+ thread_id: thread_id,
1201
+ run_id: run_id,
1202
+ id: step["id"],
1203
+ parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
1204
+ )
1205
+ end
1206
+
1207
+ # Now we've got the chunk info, buried deep. Loop through the steps and find chunks if included:
1208
+ chunks = steps.flat_map do |step|
1209
+ included_results = step.dig("step_details", "tool_calls", 0, "file_search", "results")
1210
+
1211
+ next if included_results.nil? || included_results.empty?
1212
+
1213
+ included_results.flat_map do |result|
1214
+ result["content"].map do |content|
1215
+ content["text"]
1216
+ end
1217
+ end
1218
+ end.compact
1219
+
1220
+ # The first chunk will be the closest match to the prompt. Finally, if you want to view the completed message(s):
1221
+ client.messages.list(thread_id: thread_id)
1222
+ ```
1223
+
1114
1224
  ### Image Generation
1115
1225
 
1116
1226
  Generate images using DALL·E 2 or DALL·E 3!
data/lib/openai/http.rb CHANGED
@@ -18,9 +18,10 @@ module OpenAI
18
18
  end&.body)
19
19
  end
20
20
 
21
- def json_post(path:, parameters:)
21
+ def json_post(path:, parameters:, query_parameters: {})
22
22
  conn.post(uri(path: path)) do |req|
23
23
  configure_json_post_request(req, parameters)
24
+ req.params = query_parameters
24
25
  end&.body
25
26
  end
26
27
 
@@ -8,8 +8,8 @@ module OpenAI
8
8
  @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps", parameters: parameters)
9
9
  end
10
10
 
11
- def retrieve(thread_id:, run_id:, id:)
12
- @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}")
11
+ def retrieve(thread_id:, run_id:, id:, parameters: {})
12
+ @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}", parameters: parameters)
13
13
  end
14
14
  end
15
15
  end
data/lib/openai/runs.rb CHANGED
@@ -12,8 +12,9 @@ module OpenAI
12
12
  @client.get(path: "/threads/#{thread_id}/runs/#{id}")
13
13
  end
14
14
 
15
- def create(thread_id:, parameters: {})
16
- @client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters)
15
+ def create(thread_id:, parameters: {}, query_parameters: {})
16
+ @client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters,
17
+ query_parameters: query_parameters)
17
18
  end
18
19
 
19
20
  def modify(id:, thread_id:, parameters: {})
@@ -1,3 +1,3 @@
1
1
  module OpenAI
2
- VERSION = "7.2.0".freeze
2
+ VERSION = "7.3.0".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-openai
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.2.0
4
+ version: 7.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-10-10 00:00:00.000000000 Z
11
+ date: 2024-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: event_stream_parser