RubyGems - ruby-openai - Versions diffs - 7.2.0 → 7.3.0 - Mend

ruby-openai 7.2.0 → 7.3.0

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7108bf76aa6f30cd7c38b41967b0162d6a4014698c1c8364b8116e5c665c044f
-  data.tar.gz: 736587458668b4608e49fa8ec50e460da14eeda0dfb8a739c68806e3bd5d0a2c
+  metadata.gz: 278f25c283d841bfa33614bd69b4340b9275712b83e9121a1aa2a6a439767714
+  data.tar.gz: 702c11ba4b0411a47e9d6f9fdb178d1eb40a7baede5909f0665b41edc00797b0
 SHA512:
-  metadata.gz: 76cac4818b5941d5732becebc91675c1ebeaba4f27c1710888afbb1893f3f8ff4d18e24a3c90f4d4332eb89730b0f6195507cf99519e764876a614ca3927a0cb
-  data.tar.gz: 4757d8e11b494a75d0ea839ae073610adfa77317ba0e13ddabdd21bf10fd34127a5747138644dba4d9073aa509341bfeaed69a3d00150d1ae90a42ddeadd53e4
+  metadata.gz: 7c4a1bdb8fd3f466808f740112c3223a04da5ef73fd355b5f2136ecf28f5be2968ec4ecced5db25833878ba7e9de1f63e063529580f86d269c7bdd82f7e77df9
+  data.tar.gz: '014855034340e14ac2e78c845ae791f619dedf636c9839ce5edc2ea27d7eb54e973dbe4a41998b41d1e89c2c56ce04cd2c062c90bdc87b858b7467005e78100c'

data/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [7.3.0] - 2024-10-11
+### Added
+- Add ability to (with the right incantations) retrieve the chunks used by an Assistant file search - thanks to [@agamble](https://github.com/agamble) for the addition!
 ## [7.2.0] - 2024-10-10
 ### Added

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    ruby-openai (7.2.0)
+    ruby-openai (7.3.0)
       event_stream_parser (>= 0.3.0, < 2.0.0)
       faraday (>= 1)
       faraday-multipart (>= 1)

data/README.md CHANGED Viewed

@@ -1111,6 +1111,116 @@ end
 Note that you have 10 minutes to submit your tool output before the run expires.
+#### Exploring chunks used in File Search
+Take a deep breath. You might need a drink for this one.
+It's possible for OpenAI to share what chunks it used in its internal RAG Pipeline to create its filesearch example.
+An example spec can be found [here](https://github.com/alexrudall/ruby-openai/blob/main/spec/openai/client/assistant_file_search_spec.rb) that does this, just so you know it's possible.
+Here's how to get the chunks used in a file search. In this example I'm using [this file](https://css4.pub/2015/textbook/somatosensory.pdf):
+```
+require "openai"
+# Make a client
+client = OpenAI::Client.new(
+  access_token: "access_token_goes_here",
+  log_errors: true # Don't do this in production.
+)
+# Upload your file(s)
+file_id = client.files.upload(
+  parameters: {
+    file: "path/to/somatosensory.pdf",
+    purpose: "assistants"
+  }
+)["id"]
+# Create a vector store to store the vectorised file(s)
+vector_store_id = client.vector_stores.create(parameters: {})["id"]
+# Vectorise the file(s)
+vector_store_file_id = client.vector_store_files.create(
+  vector_store_id: vector_store_id,
+  parameters: { file_id: file_id }
+)["id"]
+# Check that the file is vectorised (wait for status to be "completed")
+client.vector_store_files.retrieve(vector_store_id: vector_store_id, id: vector_store_file_id)["status"]
+# Create an assistant, referencing the vector store
+assistant_id = client.assistants.create(
+  parameters: {
+    model: "gpt-4o",
+    name: "Answer finder",
+    instructions: "You are a file search tool. Find the answer in the given files, please.",
+    tools: [
+      { type: "file_search" }
+    ],
+    tool_resources: {
+      file_search: {
+        vector_store_ids: [vector_store_id]
+      }
+    }
+  }
+)["id"]
+# Create a thread with your question
+thread_id = client.threads.create(parameters: {
+  messages: [
+    { role: "user",
+      content: "Find the description of a nociceptor." }
+  ]
+})["id"]
+# Run the thread to generate the response. Include the "GIVE ME THE CHUNKS" incantation.
+run_id = client.runs.create(
+  thread_id: thread_id,
+  parameters: {
+    assistant_id: assistant_id
+  },
+  query_parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
+)["id"]
+# Get the steps that happened in the run
+steps = client.run_steps.list(
+  thread_id: thread_id,
+  run_id: run_id,
+  parameters: { order: "asc" }
+)
+# Get the last step ID (or whichever one you want to look at)
+step_id = steps["data"].first["id"]
+# Retrieve all the steps. Include the "GIVE ME THE CHUNKS" incantation again.
+steps = steps["data"].map do |step|
+  client.run_steps.retrieve(
+    thread_id: thread_id,
+    run_id: run_id,
+    id: step["id"],
+    parameters: { include: ["step_details.tool_calls[*].file_search.results[*].content"] } # incantation
+  )
+end
+# Now we've got the chunk info, buried deep. Loop through the steps and find chunks if included:
+chunks = steps.flat_map do |step|
+  included_results = step.dig("step_details", "tool_calls", 0, "file_search", "results")
+  next if included_results.nil? || included_results.empty?
+  included_results.flat_map do |result|
+    result["content"].map do |content|
+      content["text"]
+    end
+  end
+end.compact
+# The first chunk will be the closest match to the prompt. Finally, if you want to view the completed message(s):
+client.messages.list(thread_id: thread_id)
+```
 ### Image Generation
 Generate images using DALL·E 2 or DALL·E 3!

data/lib/openai/http.rb CHANGED Viewed

@@ -18,9 +18,10 @@ module OpenAI
       end&.body)
     end
-    def json_post(path:, parameters:)
+    def json_post(path:, parameters:, query_parameters: {})
       conn.post(uri(path: path)) do |req|
         configure_json_post_request(req, parameters)
+        req.params = query_parameters
       end&.body
     end

data/lib/openai/run_steps.rb CHANGED Viewed

@@ -8,8 +8,8 @@ module OpenAI
       @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps", parameters: parameters)
     end
-    def retrieve(thread_id:, run_id:, id:)
-      @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}")
+    def retrieve(thread_id:, run_id:, id:, parameters: {})
+      @client.get(path: "/threads/#{thread_id}/runs/#{run_id}/steps/#{id}", parameters: parameters)
     end
   end
 end

data/lib/openai/runs.rb CHANGED Viewed

@@ -12,8 +12,9 @@ module OpenAI
       @client.get(path: "/threads/#{thread_id}/runs/#{id}")
     end
-    def create(thread_id:, parameters: {})
-      @client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters)
+    def create(thread_id:, parameters: {}, query_parameters: {})
+      @client.json_post(path: "/threads/#{thread_id}/runs", parameters: parameters,
+                        query_parameters: query_parameters)
     end
     def modify(id:, thread_id:, parameters: {})

data/lib/openai/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module OpenAI
-  VERSION = "7.2.0".freeze
+  VERSION = "7.3.0".freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby-openai
 version: !ruby/object:Gem::Version
-  version: 7.2.0
+  version: 7.3.0
 platform: ruby
 authors:
 - Alex
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-10-10 00:00:00.000000000 Z
+date: 2024-10-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: event_stream_parser