ruby-openai 7.2.0 → 7.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7108bf76aa6f30cd7c38b41967b0162d6a4014698c1c8364b8116e5c665c044f
4
- data.tar.gz: 736587458668b4608e49fa8ec50e460da14eeda0dfb8a739c68806e3bd5d0a2c
3
+ metadata.gz: 278f25c283d841bfa33614bd69b4340b9275712b83e9121a1aa2a6a439767714
4
+ data.tar.gz: 702c11ba4b0411a47e9d6f9fdb178d1eb40a7baede5909f0665b41edc00797b0
5
5
  SHA512:
6
- metadata.gz: 76cac4818b5941d5732becebc91675c1ebeaba4f27c1710888afbb1893f3f8ff4d18e24a3c90f4d4332eb89730b0f6195507cf99519e764876a614ca3927a0cb
7
- data.tar.gz: 4757d8e11b494a75d0ea839ae073610adfa77317ba0e13ddabdd21bf10fd34127a5747138644dba4d9073aa509341bfeaed69a3d00150d1ae90a42ddeadd53e4
6
+ metadata.gz: 7c4a1bdb8fd3f466808f740112c3223a04da5ef73fd355b5f2136ecf28f5be2968ec4ecced5db25833878ba7e9de1f63e063529580f86d269c7bdd82f7e77df9
7
+ data.tar.gz: '014855034340e14ac2e78c845ae791f619dedf636c9839ce5edc2ea27d7eb54e973dbe4a41998b41d1e89c2c56ce04cd2c062c90bdc87b858b7467005e78100c'
data/CHANGELOG.md CHANGED
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [7.3.0] - 2024-10-11
9
+
10
+ ### Added
11
+
12
+ - Add ability to (with the right incantations) retrieve the chunks used by an Assistant file search - thanks to [@agamble](https://github.com/agamble) for the addition!
13
+
8
14
  ## [7.2.0] - 2024-10-10
9
15
 
10
16
  ### Added
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ruby-openai (7.2.0)
4
+ ruby-openai (7.3.0)
5
5
  event_stream_parser (>= 0.3.0, < 2.0.0)
6
6
  faraday (>= 1)
7
7
  faraday-multipart (>= 1)
data/README.md CHANGED
@@ -1111,6 +1111,116 @@ end
1111
1111
 
1112
1112
  Note that you have 10 minutes to submit your tool output before the run expires.
1113
1113
 
1114
+ #### Exploring chunks used in File Search
1115
+
1116
+ Take a deep breath. You might need a drink for this one.
1117
+
1118
+ It's possible for OpenAI to share what chunks it used in its internal RAG Pipeline to create its filesearch example.
1119
+
1120
+ An example spec can be found [here](https://github.com/alexrudall/ruby-openai/blob/main/spec/openai/client/assistant_file_search_spec.rb) that does this, just so you know it's possible.
1121
+
1122
+ Here's how to get the chunks used in a file search. In this example I'm using [this file](https://css4.pub/2015/textbook/somatosensory.pdf):
1123
+
1124
+ ```
1125
+ require "openai"
1126
+
1127
+ # Make a client
1128
+ client = OpenAI::Client.new(
1129
+ access_token: "access_token_goes_here",
1130
+ log_errors: true # Don't do this in production.
1131
+ )
1132
+
1133
+ # Upload your file(s)
1134
+ file_id = client.files.upload(
1135
+ parameters: {
1136
+ file: "path/to/somatosensory.pdf",
1137
+ purpose: "assistants"
1138
+ }
1139
+ )["id"]
1140
+
1141
+ # Create a vector store to store the vectorised file(s)
1142
+ vector_store_id = client.vector_stores.create(parameters: {})["id"]
1143
+
1144
+ # Vectorise the file(s)
1145
+ vector_store_file_id = client.vector_store_files.create(
1146
+ vector_store_id: vector_store_id,
1147
+ parameters: { file_id: file_id }
1148
+ )["id"]
1149
+
1150
+ # Check that the file is vectorised (wait for status to be "completed")
1151
+ client.vector_store_files.retrieve(vector_store_id: vector_store_id, id: vector_store_file_id)["status"]
1152
+
1153
+ # Create an assistant, referencing the vector store
1154
+ assistant_id = client.assistants.create(
1155
+ parameters: {
1156
+ model: "gpt-4o",
1157
+ name: "Answer finder",
1158
+ instructions: "You are a file search tool. Find the answer in the given files, please.",
1159
+ tools: [
1160
+ { type: "file_search" }
1161
+ ],
1162
+ tool_resources: {
1163
+ file_search: {
1164
+ vector_store_ids: [vector_store_id]
1165
+ }
1166
+ }
1167
+ }
1168
+ )["id"]
1169
+
1170
+ # Create a thread with your question
1171
+ thread_id = client.threads.create(parameters: {
1172
+ messages: [
1173
+ { role: "user",
1174
+ content: "Find the description of a nociceptor." }
1175
+ ]
1176
+ })["id"]
1177
+
1178
+ # Run the thread to generate the response. Include the "GIVE ME THE CHUNKS" incantation.
1179
+ run_id = client.runs.create(
1180
+ thread_id: thread_id,
1181
+ parameters: {
1182
+ assistant_id: assistant_id
1183
+ },
1184
+ query_parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
1185
+ )["id"]
1186
+
1187
+ # Get the steps that happened in the run
1188
+ steps = client.run_steps.list(
1189
+ thread_id: thread_id,
1190
+ run_id: run_id,
1191
+ parameters: { order: "asc" }
1192
+ )
1193
+
1194
+ # Get the last step ID (or whichever one you want to look at)
1195
+ step_id = steps["data"].first["id"]
1196
+
1197
+ # Retrieve all the steps. Include the "GIVE ME THE CHUNKS" incantation again.
1198
+ steps = steps["data"].map do |step|
1199
+ client.run_steps.retrieve(
1200
+ thread_id: thread_id,
1201
+ run_id: run_id,
1202
+ id: step["id"],
1203
+ parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
1204
+ )
1205
+ end
1206
+
1207
+ # Now we've got the chunk info, buried deep. Loop through the steps and find chunks if included:
1208
+ chunks = steps.flat_map do |step|
1209
+ included_results = step.dig("step_details", "tool_calls", 0, "file_search", "results")
1210
+
1211
+ next if included_results.nil? || included_results.empty?
1212
+
1213
+ included_results.flat_map do |result|
1214
+ result["content"].map do |content|
1215
+ content["text"]
1216
+ end
1217
+ end
1218
+ end.compact
1219
+
1220
+ # The first chunk will be the closest match to the prompt. Finally, if you want to view the completed message(s):
1221
+ client.messages.list(thread_id: thread_id)
1222
+ ```
1223
+
1114
1224
  ### Image Generation
1115
1225
 
1116
1226
  Generate images using DALL·E 2 or DALL·E 3!
data/lib/openai/http.rb CHANGED
@@ -18,9 +18,10 @@ module OpenAI
18
18
  end&.body)
19
19
  end
20
20
 
21
- def json_post(path:, parameters:)
21
+ def json_post(path:, parameters:, query_parameters: {})
22
22
  conn.post(uri(path: path)) do |req|
23
23
  configure_json_post_request(req, parameters)
24
+ req.params = query_parameters
24
25
  end&.body
25
26
  end
26
27
 
@@ -8,8 +8,8 @@ module OpenAI
8
8
  @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps", parameters: parameters)
9
9
  end
10
10
 
11
- def retrieve(thread_id:, run_id:, id:)
12
- @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}")
11
+ def retrieve(thread_id:, run_id:, id:, parameters: {})
12
+ @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}", parameters: parameters)
13
13
  end
14
14
  end
15
15
  end
data/lib/openai/runs.rb CHANGED
@@ -12,8 +12,9 @@ module OpenAI
12
12
  @client.get(path: "/threads/#{thread_id}/runs/#{id}")
13
13
  end
14
14
 
15
- def create(thread_id:, parameters: {})
16
- @client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters)
15
+ def create(thread_id:, parameters: {}, query_parameters: {})
16
+ @client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters,
17
+ query_parameters: query_parameters)
17
18
  end
18
19
 
19
20
  def modify(id:, thread_id:, parameters: {})
@@ -1,3 +1,3 @@
1
1
  module OpenAI
2
- VERSION = "7.2.0".freeze
2
+ VERSION = "7.3.0".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-openai
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.2.0
4
+ version: 7.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-10-10 00:00:00.000000000 Z
11
+ date: 2024-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: event_stream_parser