omniai-openai 1.0.2 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 667d8318e96d3e6611a3ca20ced5dcefefc0896a4c84c48427ce441756cb9e60
4
- data.tar.gz: 13161c5726cc33fcc7934a4164f751244b3c577fb52a1cdf30301dc10f733c84
3
+ metadata.gz: 8fa0f1acc820456282d48d42e9bc1154f8aa9b83fe9fa86817e1d3446a8cd31d
4
+ data.tar.gz: cd4c84c579985a17c96edadcb91324b545035bf170fb20c3172855e13e83586c
5
5
  SHA512:
6
- metadata.gz: d44485bdcebe5ee67a846c4517ea62fdb4fd98dfbab464f64fd10316561a396369ce5579367c8f073d79edfee0a790baa46c2fc31dfd2a4522221f0d16679a23
7
- data.tar.gz: 2f1054f58e35f37012bc6b95f7605b0eeee01010718c2446a14b623b37cde0028d08ed56330d10bf1145fe5dfa69c3c138f54f5ec7d2bf65e55eb4727a21c664
6
+ metadata.gz: c4583c5da031c749e56b6ec2bbcf59daaebe8f873c5fe9a9cbdfdfb67acf8d8eabc798a754fe5e2784fd73495bb91181deb3e323d00e0e84d962ff86624192a5
7
+ data.tar.gz: fcf7612d1f7361697820a3e14cf5558fe83fb096b1154a5c3a67c3c6c64823d230e1c1d029252ee91fa0e9cd26bbba398fc18612189171db891d47a830536266
data/Gemfile CHANGED
@@ -11,4 +11,5 @@ gem 'rspec_junit_formatter'
11
11
  gem 'rubocop'
12
12
  gem 'rubocop-rake'
13
13
  gem 'rubocop-rspec'
14
+ gem 'simplecov'
14
15
  gem 'webmock'
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # OmniAI::OpenAI
2
2
 
3
+ [![CircleCI](https://circleci.com/gh/ksylvest/omniai-openai.svg?style=svg)](https://circleci.com/gh/ksylvest/omniai-openai)
4
+
3
5
  An OpenAI implementation of the [OmniAI](https://github.com/ksylvest/omniai) APIs.
4
6
 
5
7
  ## Installation
@@ -116,3 +118,116 @@ JSON.parse(completion.choice.message.content) # { "name": "Ringo" }
116
118
  [OpenAI API Reference `response_format`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream)
117
119
 
118
120
  > When using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message.
121
+
122
+ ### Transcribe
123
+
124
+ A transcription is generated by passing in a path to a file:
125
+
126
+ ```ruby
127
+ transcription = client.transcribe(file.path)
128
+ transcription.text # '...'
129
+ ```
130
+
131
+ #### Prompt
132
+
133
+ `prompt` is optional and can provide additional context for transcribing:
134
+
135
+ ```ruby
136
+ transcription = client.transcribe(file.path, prompt: '')
137
+ transcription.text # '...'
138
+ ```
139
+
140
+ [OpenAI API Reference `prompt`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-prompt)
141
+
142
+ #### Format
143
+
144
+ `format` is optional and supports `json`, `text`, `srt` or `vtt`:
145
+
146
+ ```ruby
147
+ transcription = client.transcribe(file.path, format: OmniAI::Transcribe::Format::TEXT)
148
+ transcription.text # '...'
149
+ ```
150
+
151
+ [OpenAI API Reference `response_format`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-response_format)
152
+
153
+ #### Language
154
+
155
+ `language` is optional and may improve accuracy and latency:
156
+
157
+ ```ruby
158
+ transcription = client.transcribe(file.path, language: OmniAI::Transcribe::Language::SPANISH)
159
+ transcription.text
160
+ ```
161
+
162
+ [OpenAI API Reference `language`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-language)
163
+
164
+ #### Temperature
165
+
166
+ `temperature` is optional and must be between 0.0 (more deterministic) and 1.0 (less deterministic):
167
+
168
+ ```ruby
169
+ transcription = client.transcribe(file.path, temperature: 0.2)
170
+ transcription.text
171
+ ```
172
+
173
+ [OpenAI API Reference `temperature`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-temperature)
174
+
175
+ ### Speak
176
+
177
+ Speech can be generated by passing text with a block:
178
+
179
+ ```ruby
180
+ File.open('example.ogg', 'wb') do |file|
181
+ client.speak('How can a clam cram in a clean cream can?') do |chunk|
182
+ file << chunk
183
+ end
184
+ end
185
+ ```
186
+
187
+ If a block is not provided then a tempfile is returned:
188
+
189
+ ```ruby
190
+ tempfile = client.speak('Can you can a can as a canner can can a can?')
191
+ tempfile.close
192
+ tempfile.unlink
193
+ ```
194
+
195
+ #### Voice
196
+
197
+ `voice` is optional and must be one of the supported voices:
198
+
199
+ ```ruby
200
+ client.speak('She sells seashells by the seashore.', voice: OmniAI::OpenAI::Speak::Voice::SHIMMER)
201
+ ```
202
+
203
+ [OpenAI API Reference `voice`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-voice)
204
+
205
+ #### Model
206
+
207
+ `model` is optional and must be either `tts-1` or `tts-1-hd` (default):
208
+
209
+ ```ruby
210
+ client.speak('I saw a kitten eating chicken in the kitchen.', format: OmniAI::OpenAI::Speak::Model::TTS_1)
211
+ ```
212
+
213
+ [OpenAI API Refernce `model`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-model)
214
+
215
+ #### Speed
216
+
217
+ `speed` is optional and must be between 0.25 and 0.40:
218
+
219
+ ```ruby
220
+ client.speak('How much wood would a woodchuck chuck if a woodchuck could chuck wood?', speed: 4.0)
221
+ ```
222
+
223
+ [OmniAI API Reference `speed`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-speed)
224
+
225
+ #### Format
226
+
227
+ `format` is optional and supports `MP3` (default), `OPUS`, `AAC`, `FLAC`, `WAV` or `PCM`:
228
+
229
+ ```ruby
230
+ client.speak('A pessemistic pest exists amidst us.', format: OmniAI::OpenAI::Speak::Format::FLAC)
231
+ ```
232
+
233
+ [OpenAI API Reference `format`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-response_format)
@@ -64,6 +64,41 @@ module OmniAI
64
64
  def chat(messages, model: Chat::Model::GPT_4O, temperature: nil, format: nil, stream: nil)
65
65
  Chat.process!(messages, model:, temperature:, format:, stream:, client: self)
66
66
  end
67
+
68
+ # @raise [OmniAI::Error]
69
+ #
70
+ # @param path [String]
71
+ # @param model [String]
72
+ # @param language [String, nil] optional
73
+ # @param prompt [String, nil] optional
74
+ # @param temperature [Float, nil] optional
75
+ # @param format [Symbol] :text, :srt, :vtt, or :json (default)
76
+ #
77
+ # @return text [OmniAI::Transcribe::Transcription]
78
+ def transcribe(path, model: Transcribe::Model::WHISPER, language: nil, prompt: nil, temperature: nil, format: nil)
79
+ Transcribe.process!(path, model:, language:, prompt:, temperature:, format:, client: self)
80
+ end
81
+
82
+ # @raise [OmniAI::Error]
83
+ #
84
+ # @param input [String] required
85
+ # @param model [String] optional
86
+ # @param voice [String] optional
87
+ # @param speed [Float] optional
88
+ # @param format [String] optional (default "aac"):
89
+ # - "aac"
90
+ # - "mp3"
91
+ # - "flac"
92
+ # - "opus"
93
+ # - "pcm"
94
+ # - "wav"
95
+ #
96
+ # @yield [output] optional
97
+ #
98
+ # @return [Tempfile``]
99
+ def speak(input, model: Speak::Model::TTS_1_HD, voice: Speak::Voice::ALLOY, speed: nil, format: nil, &)
100
+ Speak.process!(input, model:, voice:, speed:, format:, client: self, &)
101
+ end
67
102
  end
68
103
  end
69
104
  end
@@ -4,7 +4,7 @@ module OmniAI
4
4
  module OpenAI
5
5
  # Configuration for managing the OpenAI `api_key` / `organization` / `project` / `logger`.
6
6
  class Config < OmniAI::Config
7
- attr_accessor :organization, :project, :chat_options
7
+ attr_accessor :organization, :project, :chat_options, :transcribe_options, :speak_options
8
8
 
9
9
  def initialize
10
10
  super
@@ -13,6 +13,8 @@ module OmniAI
13
13
  @project = ENV.fetch('OPENAI_PROJECT', nil)
14
14
  @host = ENV.fetch('OPENAI_HOST', 'https://api.openai.com')
15
15
  @chat_options = {}
16
+ @transcribe_options = {}
17
+ @speak_options = {}
16
18
  end
17
19
  end
18
20
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module OpenAI
5
+ # An OpenAI transcribe implementation.
6
+ class Speak < OmniAI::Speak
7
+ module Model
8
+ TTS_1 = 'tts-1'
9
+ TTS_1_HD = 'tts-1-hd'
10
+ end
11
+
12
+ module Voice
13
+ ALLOY = 'alloy' # https://platform.openai.com/docs/guides/text-to-speech/alloy
14
+ ECHO = 'echo' # https://platform.openai.com/docs/guides/text-to-speech/echo
15
+ FABLE = 'fable' # https://platform.openai.com/docs/guides/text-to-speech/fable
16
+ NOVA = 'nova' # https://platform.openai.com/docs/guides/text-to-speech/nova
17
+ ONYX = 'onyx' # https://platform.openai.com/docs/guides/text-to-speech/onyx
18
+ SHIMMER = 'shimmer' # https://platform.openai.com/docs/guides/text-to-speech/shimmer
19
+ end
20
+
21
+ protected
22
+
23
+ # @return [Hash]
24
+ def payload
25
+ OmniAI::OpenAI
26
+ .config.speak_options
27
+ .merge(super)
28
+ .merge({ response_format: @format }.compact)
29
+ end
30
+
31
+ # @return [String]
32
+ def path
33
+ "/#{OmniAI::OpenAI::Client::VERSION}/audio/speech"
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module OpenAI
5
+ # An OpenAI transcribe implementation.
6
+ class Transcribe < OmniAI::Transcribe
7
+ module Model
8
+ WHISPER_1 = 'whisper-1'
9
+ WHISPER = WHISPER_1
10
+ end
11
+
12
+ protected
13
+
14
+ # @return [Hash]
15
+ def payload
16
+ OmniAI::OpenAI
17
+ .config.transcribe_options
18
+ .merge(super)
19
+ .merge({ response_format: @format || Format::JSON })
20
+ end
21
+
22
+ # @return [String]
23
+ def path
24
+ "/#{OmniAI::OpenAI::Client::VERSION}/audio/transcriptions"
25
+ end
26
+ end
27
+ end
28
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module OpenAI
5
- VERSION = '1.0.2'
5
+ VERSION = '1.1.1'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-openai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-15 00:00:00.000000000 Z
11
+ date: 2024-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: event_stream_parser
@@ -65,6 +65,8 @@ files:
65
65
  - lib/omniai/openai/chat.rb
66
66
  - lib/omniai/openai/client.rb
67
67
  - lib/omniai/openai/config.rb
68
+ - lib/omniai/openai/speak.rb
69
+ - lib/omniai/openai/transcribe.rb
68
70
  - lib/omniai/openai/version.rb
69
71
  homepage: https://github.com/ksylvest/omniai-openai
70
72
  licenses: []