RubyGems - gpt-function - Versions diffs - 0.3.0 → 0.5.0 - Mend

gpt-function 0.3.0 → 0.5.0

Files changed (11) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/README.md +73 -6
data/lib/gpt-function.rb +4 -1
data/lib/gpt_function/batch.rb +67 -20
data/lib/gpt_function/file.rb +0 -1
data/lib/gpt_function/simple_queue.rb +18 -0
data/lib/gpt_function/storage.rb +20 -0
data/lib/gpt_function/version.rb +1 -1
metadata +4 -3
data/lib/gpt/function.rb +0 -10

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c103f09f18b5ef5a39f26b5ede8a3948dd4b2305fb754908c6b090c7f78d1e79
-  data.tar.gz: 686b1a27e1f955b45827abedcd665662c07386eb0008d6ad012a81a0dff6aedd
+  metadata.gz: 4382f4c9861d57510b154ab5179854eee8fad548dd849f3b283d870f736e599e
+  data.tar.gz: 498b2499d3d6ecdb03847a6b5c15f9d2f99c231d59655f3cf8a602898d525102
 SHA512:
-  metadata.gz: 2237aa77cb338408e6d237c07036d9d7998f356939e292e481305a760386c9282b34b683d6c3ba41b51b6fbb911802077804a563372beaab4c0bfcf48d357ee0
-  data.tar.gz: 0573a13abda0ae4a58c5b5cdfa0bdbf4062a68aba8b04802773c0efc8192728ce0ffa0bf30b8feddd70b2fb910b3b5248f6f55b3d40da94352e261febb8c9771
+  metadata.gz: 6ba7a96725e99edb9d87f9467011357fbd8e8e40dc62f902420b480334af7b432bf071e1b63df7ce5e0645611bcb6355caff08d275e9905a21a638ec6033cc06
+  data.tar.gz: 680facff33d5f9737fb4dbc6cfc10a08e35f221245cb1e90c82a3e280c4ed32a0f1129d52561e471ca210f984a4c973b671aec01445d7776fdb5815e1d25339b

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    gpt-function (0.3.0)
+    gpt-function (0.4.0)
 GEM
   remote: https://rubygems.org/

data/README.md CHANGED Viewed

@@ -27,27 +27,94 @@ gem 'gpt-function'
 require 'gpt-function'
 # 你需要設定你的 api key 和 model name
-Gpt::Function.configure(api_key: '...', model: 'gpt-3.5-turbo-1106')
+GptFunction.configure(api_key: '...', model: 'gpt-4o-mini', batch_storage: MyBatchStorage)
 # 使用內建的翻譯方法
-p Gpt::Functions.翻譯成中文("banana") # "香蕉"
+p GptFunctions.翻譯成中文.call("banana") # "香蕉"
 # 使用內建的擷取關鍵字方法
-p Gpt::Functions.擷取關鍵字("臺北市政府推動綠色交通計劃，鼓勵民眾使用公共運輸和自行車")  # ["臺北市政府", "綠色交通計劃", "民眾", "公共運輸", "自行車"]
+p GptFunctions.擷取關鍵字.call("臺北市政府推動綠色交通計劃，鼓勵民眾使用公共運輸和自行車")  # ["臺北市政府", "綠色交通計劃", "民眾", "公共運輸", "自行車"]
 # 你也可以自己定義方法
-def 擷取關鍵字(input)
+def 擷取關鍵字
   # 創建一個簡單的 GPT 函數，你需要描述這個函數的功能，以及提供一些範例
-  Gpt::Function.new("Extract all keywords",
+  GptFunction.new("Extract all keywords",
   [
     [
       "臺灣最新5G網路覆蓋率達95%，推動智慧城市發展，領先亞洲多國",
       ["臺灣", "5G網路", "覆蓋率", "智慧城市", "亞洲"]
     ]
-  ]).call(input)
+  ])
 end
 ```
+Batch Storage 是一個用來儲存 GPT 函數的結果的類別，你可以自己定義一個類似的類別，並且在 `GptFunction.configure` 中設定。
+```ruby
+class MyBatchStorage
+  def initialize
+    @queue = []
+  end
+  def enqueue(value)
+    @queue << value
+    true
+  end
+  def dequeue
+    @queue.shift
+  end
+end
+GptFunction.configure(api_key: '...', model: 'gpt-4o-mini', batch_storage: MyBatchStorage)
+```
+你可以用 Batch.create 建立一個新的 Batch, 在 create 成功時，會自動將 Batch 存入 BatchStorage 中。
+```ruby
+request1 = GptFunctions.翻譯成中文.to_request_body("apple")
+request2 = GptFunctions.翻譯成中文.to_request_body("tesla")
+batch = GptFunction::Batch.create([request1, request2])
+```
+你可以用 Batch.process 來處理 Batch，如果 Batch 的 status 在 "failed", "completed", "expired", "cancelled" 當中，Batch 會被從 queue 中移除，如果是其他狀態，Batch 會自動重新加入 queue 中，你只需要定期持續呼叫 process 就可以。
+```ruby
+GptFunction::Batch.process do |batch|
+  puts "batch id: #{batch.id}, status: #{batch.status}, progress: #{batch.request_counts_completed}/#{batch.request_counts_total}"
+  batch.pairs.each do |input, output|
+    puts "input: #{input}, output: #{output}"
+  end
+end
+```
+可以用 count 參數來限制每次處理的數量，預設值為 1。
+```ruby
+GptFunction::Batch.process(count: 2) do |batch|
+  ...
+end
+```
+Batch Storage 整合 Active Record 的範例：
+```ruby
+class Model < ApplicationRecord
+  class << self
+    def enqueue(hash)
+      create!(hash)
+      true
+    end
+    def dequeue
+      first&.destroy
+    end
+  end
+end
+GptFunction.configure(api_key: '...', model: 'gpt-4o-mini', batch_storage: Model)
+```
 ## License
 The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

data/lib/gpt-function.rb CHANGED Viewed

@@ -5,6 +5,8 @@ require "json"
 require_relative "gpt_function/version"
 require_relative "gpt_function/file"
+require_relative "gpt_function/storage"
+require_relative "gpt_function/simple_queue"
 require_relative "gpt_function/batch"
 require_relative "gpt_functions"
@@ -17,9 +19,10 @@ class GptFunction
   class << self
     attr_accessor :api_key, :model
-    def configure(api_key:, model:)
+    def configure(api_key:, model:, batch_storage: GptFunction::SimpleQueue.new)
       @api_key = api_key
       @model = model
+      GptFunction::Storage.batch_storage = batch_storage
     end
   end

data/lib/gpt_function/batch.rb CHANGED Viewed

@@ -1,9 +1,7 @@
-# lib/gpt_function/batch.rb
 # frozen_string_literal: true
 require "net/http"
 require "json"
-require "byebug"
 class GptFunction
   class Batch
@@ -30,8 +28,7 @@ class GptFunction
     attr_reader :request_counts_completed
     attr_reader :request_counts_failed
-    attr_reader :metadata_customer_id
-    attr_reader :metadata_batch_description
+    attr_reader :metadata
     def initialize(hash)
       @id = hash["id"]
@@ -57,8 +54,7 @@ class GptFunction
       @request_counts_completed = hash.dig("request_counts", "completed")
       @request_counts_failed = hash.dig("request_counts", "failed")
-      @metadata_customer_id = hash.dig("metadata", "customer_id")
-      @metadata_batch_description = hash.dig("metadata", "batch_description")
+      @metadata = hash.dig("metadata")
     end
     def to_hash
@@ -84,8 +80,7 @@ class GptFunction
         request_counts_total: request_counts_total,
         request_counts_completed: request_counts_completed,
         request_counts_failed: request_counts_failed,
-        metadata_customer_id: metadata_customer_id,
-        metadata_batch_description: metadata_batch_description,
+        metadata: metadata
       }
     end
@@ -98,19 +93,21 @@ class GptFunction
     end
     def input_file
+      return nil if input_file_id.nil?
       @input_file ||= File.from_id(input_file_id)
     end
     def output_file
+      return nil if output_file_id.nil?
       @output_file ||= File.from_id(output_file_id)
     end
     def input_jsonl
-      @input_jsonl ||= input_file.jsonl
+      @input_jsonl ||= input_file&.jsonl || []
     end
     def output_jsonl
-      @output_jsonl ||= output_file.jsonl
+      @output_jsonl ||= output_file&.jsonl || []
     end
     def inputs
@@ -135,14 +132,16 @@ class GptFunction
     def pairs
       hash = {}
-      inputs.each do |input|
-        hash[input["custom_id"]] = {
-          "input" => input["content"],
-        }
-      end
       outputs.each do |output|
-        hash[output["custom_id"]]["output"] = output["content"]
+        hash[output["custom_id"]] = [nil ,output["content"]]
+      end
+      inputs.each do |input|
+        next if hash[input["custom_id"]].nil?
+        hash[input["custom_id"]][0] = input["content"]
       end
       hash.values
     end
@@ -150,6 +149,24 @@ class GptFunction
       Batch.cancel(id)
     end
+    def enqueue
+      return false if GptFunction::Storage.batch_storage.nil?
+      GptFunction::Storage.batch_storage.enqueue(self.to_hash)
+    end
+    # validating	the input file is being validated before the batch can begin
+    # failed	the input file has failed the validation process
+    # in_progress	the input file was successfully validated and the batch is currently being run
+    # finalizing	the batch has completed and the results are being prepared
+    # completed	the batch has been completed and the results are ready
+    # expired	the batch was not able to be completed within the 24-hour time window
+    # cancelling	the batch is being cancelled (may take up to 10 minutes)
+    # cancelled	the batch was cancelled
+    def is_processed
+      ["failed", "completed", "expired", "cancelled"].include? status
+    end
     class << self
       def list(limit: 20, after: nil)
         # 創建批次請求
@@ -169,7 +186,7 @@ class GptFunction
         end
       end
-      def create(requests)
+      def create(requests, metadata: nil)
         requests = requests.each_with_index.map do |request, index|
           {
             custom_id: "request-#{index + 1}",
@@ -186,11 +203,13 @@ class GptFunction
         uri = URI('https://api.openai.com/v1/batches')
         request = Net::HTTP::Post.new(uri, 'Content-Type' => 'application/json')
         request['Authorization'] = "Bearer #{GptFunction.api_key}"
-        request.body = {
+        body = {
           input_file_id: file.id,
           endpoint: '/v1/chat/completions',
           completion_window: '24h'
-        }.to_json
+        }
+        body[:metadata] = metadata unless metadata.nil?
+        request.body = body.to_json
         response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
           http.request(request)
@@ -199,7 +218,9 @@ class GptFunction
         raise "Batch creation failed: #{response.body}" unless response.is_a?(Net::HTTPSuccess)
         hash = JSON.parse(response.body)
-        Batch.new(hash)
+        batch = Batch.new(hash)
+        batch.enqueue
+        batch
       rescue => e
         file&.delete
         raise e
@@ -245,6 +266,32 @@ class GptFunction
         response.body
       end
+      def dequeue
+        hash = GptFunction::Storage.batch_storage&.dequeue
+        id = hash&.dig("id") || hash&.dig(:id)
+        from_id(id) if id
+      end
+      # 進行批次請求處理
+      # count: 處理批次請求的數量
+      # block: 處理批次請求的 block
+      # 返回值: 是否還有批次請求需要處理
+      def process(count: 1, &block)
+        # 從 Storage 取出 count 個批次請求
+        count.times do
+          batch = dequeue
+          # 如果沒有批次請求，則跳出迴圈
+          return false if batch.nil?
+          yield batch
+          # 如果 batch 還未處理完成，將批次請求重新加入 Storage
+          batch.enqueue unless batch.is_processed
+        end
+        true
+      end
     end
   end
 end

data/lib/gpt_function/file.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-# lib/gpt_function/batch.rb
 # frozen_string_literal: true
 require "net/http"

data/lib/gpt_function/simple_queue.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+class GptFunction
+  class SimpleQueue
+    def initialize
+      @queue = []
+    end
+    def enqueue(value)
+      @queue << value
+      true
+    end
+    def dequeue
+      @queue.shift
+    end
+  end
+end

data/lib/gpt_function/storage.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+class GptFunction
+  module Storage
+    class << self
+      def batch_storage=(value)
+        # 檢查 value 有實作 enqueue 方法
+        raise "Invalid batch storage: should respond to #enqueue" unless value.respond_to?(:enqueue)
+        # 檢查 value 有實作 dequeue 方法
+        raise "Invalid batch storage: should respond to #dequeue" unless value.respond_to?(:dequeue)
+        @batch_storage = value
+      end
+      def batch_storage
+        @batch_storage
+      end
+    end
+  end
+end

data/lib/gpt_function/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 class GptFunction
-  VERSION = "0.3.0"
+  VERSION = "0.5.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: gpt-function
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.5.0
 platform: ruby
 authors:
 - etrex kuo
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-08-02 00:00:00.000000000 Z
+date: 2024-08-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dotenv
@@ -41,9 +41,10 @@ files:
 - Rakefile
 - gpt-function.gemspec
 - lib/gpt-function.rb
-- lib/gpt/function.rb
 - lib/gpt_function/batch.rb
 - lib/gpt_function/file.rb
+- lib/gpt_function/simple_queue.rb
+- lib/gpt_function/storage.rb
 - lib/gpt_function/version.rb
 - lib/gpt_functions.rb
 - workflows/main.yml

data/lib/gpt/function.rb DELETED Viewed

@@ -1,10 +0,0 @@
-# frozen_string_literal: true
-require "net/http"
-require "json"
-require_relative "function/batch"
-module Gpt
-  # 這是一個簡單的 GPT 函數類別
-end