omniai-openai 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/README.md +115 -0
- data/lib/omniai/openai/client.rb +35 -0
- data/lib/omniai/openai/config.rb +3 -1
- data/lib/omniai/openai/speak.rb +37 -0
- data/lib/omniai/openai/transcribe.rb +28 -0
- data/lib/omniai/openai/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8fa0f1acc820456282d48d42e9bc1154f8aa9b83fe9fa86817e1d3446a8cd31d
|
4
|
+
data.tar.gz: cd4c84c579985a17c96edadcb91324b545035bf170fb20c3172855e13e83586c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4583c5da031c749e56b6ec2bbcf59daaebe8f873c5fe9a9cbdfdfb67acf8d8eabc798a754fe5e2784fd73495bb91181deb3e323d00e0e84d962ff86624192a5
|
7
|
+
data.tar.gz: fcf7612d1f7361697820a3e14cf5558fe83fb096b1154a5c3a67c3c6c64823d230e1c1d029252ee91fa0e9cd26bbba398fc18612189171db891d47a830536266
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# OmniAI::OpenAI
|
2
2
|
|
3
|
+
[](https://circleci.com/gh/ksylvest/omniai-openai)
|
4
|
+
|
3
5
|
An OpenAI implementation of the [OmniAI](https://github.com/ksylvest/omniai) APIs.
|
4
6
|
|
5
7
|
## Installation
|
@@ -116,3 +118,116 @@ JSON.parse(completion.choice.message.content) # { "name": "Ringo" }
|
|
116
118
|
[OpenAI API Reference `response_format`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream)
|
117
119
|
|
118
120
|
> When using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message.
|
121
|
+
|
122
|
+
### Transcribe
|
123
|
+
|
124
|
+
A transcription is generated by passing in a path to a file:
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
transcription = client.transcribe(file.path)
|
128
|
+
transcription.text # '...'
|
129
|
+
```
|
130
|
+
|
131
|
+
#### Prompt
|
132
|
+
|
133
|
+
`prompt` is optional and can provide additional context for transcribing:
|
134
|
+
|
135
|
+
```ruby
|
136
|
+
transcription = client.transcribe(file.path, prompt: '')
|
137
|
+
transcription.text # '...'
|
138
|
+
```
|
139
|
+
|
140
|
+
[OpenAI API Reference `prompt`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-prompt)
|
141
|
+
|
142
|
+
#### Format
|
143
|
+
|
144
|
+
`format` is optional and supports `json`, `text`, `srt` or `vtt`:
|
145
|
+
|
146
|
+
```ruby
|
147
|
+
transcription = client.transcribe(file.path, format: OmniAI::Transcribe::Format::TEXT)
|
148
|
+
transcription.text # '...'
|
149
|
+
```
|
150
|
+
|
151
|
+
[OpenAI API Reference `response_format`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-response_format)
|
152
|
+
|
153
|
+
#### Language
|
154
|
+
|
155
|
+
`language` is optional and may improve accuracy and latency:
|
156
|
+
|
157
|
+
```ruby
|
158
|
+
transcription = client.transcribe(file.path, language: OmniAI::Transcribe::Language::SPANISH)
|
159
|
+
transcription.text
|
160
|
+
```
|
161
|
+
|
162
|
+
[OpenAI API Reference `language`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-language)
|
163
|
+
|
164
|
+
#### Temperature
|
165
|
+
|
166
|
+
`temperature` is optional and must be between 0.0 (more deterministic) and 1.0 (less deterministic):
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
transcription = client.transcribe(file.path, temperature: 0.2)
|
170
|
+
transcription.text
|
171
|
+
```
|
172
|
+
|
173
|
+
[OpenAI API Reference `temperature`](https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-temperature)
|
174
|
+
|
175
|
+
### Speak
|
176
|
+
|
177
|
+
Speech can be generated by passing text with a block:
|
178
|
+
|
179
|
+
```ruby
|
180
|
+
File.open('example.ogg', 'wb') do |file|
|
181
|
+
client.speak('How can a clam cram in a clean cream can?') do |chunk|
|
182
|
+
file << chunk
|
183
|
+
end
|
184
|
+
end
|
185
|
+
```
|
186
|
+
|
187
|
+
If a block is not provided then a tempfile is returned:
|
188
|
+
|
189
|
+
```ruby
|
190
|
+
tempfile = client.speak('Can you can a can as a canner can can a can?')
|
191
|
+
tempfile.close
|
192
|
+
tempfile.unlink
|
193
|
+
```
|
194
|
+
|
195
|
+
#### Voice
|
196
|
+
|
197
|
+
`voice` is optional and must be one of the supported voices:
|
198
|
+
|
199
|
+
```ruby
|
200
|
+
client.speak('She sells seashells by the seashore.', voice: OmniAI::OpenAI::Speak::Voice::SHIMMER)
|
201
|
+
```
|
202
|
+
|
203
|
+
[OpenAI API Reference `voice`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-voice)
|
204
|
+
|
205
|
+
#### Model
|
206
|
+
|
207
|
+
`model` is optional and must be either `tts-1` or `tts-1-hd` (default):
|
208
|
+
|
209
|
+
```ruby
|
210
|
+
client.speak('I saw a kitten eating chicken in the kitchen.', format: OmniAI::OpenAI::Speak::Model::TTS_1)
|
211
|
+
```
|
212
|
+
|
213
|
+
[OpenAI API Refernce `model`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-model)
|
214
|
+
|
215
|
+
#### Speed
|
216
|
+
|
217
|
+
`speed` is optional and must be between 0.25 and 0.40:
|
218
|
+
|
219
|
+
```ruby
|
220
|
+
client.speak('How much wood would a woodchuck chuck if a woodchuck could chuck wood?', speed: 4.0)
|
221
|
+
```
|
222
|
+
|
223
|
+
[OmniAI API Reference `speed`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-speed)
|
224
|
+
|
225
|
+
#### Format
|
226
|
+
|
227
|
+
`format` is optional and supports `MP3` (default), `OPUS`, `AAC`, `FLAC`, `WAV` or `PCM`:
|
228
|
+
|
229
|
+
```ruby
|
230
|
+
client.speak('A pessemistic pest exists amidst us.', format: OmniAI::OpenAI::Speak::Format::FLAC)
|
231
|
+
```
|
232
|
+
|
233
|
+
[OpenAI API Reference `format`](https://platform.openai.com/docs/api-reference/audio/createSpeech#audio-createspeech-response_format)
|
data/lib/omniai/openai/client.rb
CHANGED
@@ -64,6 +64,41 @@ module OmniAI
|
|
64
64
|
def chat(messages, model: Chat::Model::GPT_4O, temperature: nil, format: nil, stream: nil)
|
65
65
|
Chat.process!(messages, model:, temperature:, format:, stream:, client: self)
|
66
66
|
end
|
67
|
+
|
68
|
+
# @raise [OmniAI::Error]
|
69
|
+
#
|
70
|
+
# @param path [String]
|
71
|
+
# @param model [String]
|
72
|
+
# @param language [String, nil] optional
|
73
|
+
# @param prompt [String, nil] optional
|
74
|
+
# @param temperature [Float, nil] optional
|
75
|
+
# @param format [Symbol] :text, :srt, :vtt, or :json (default)
|
76
|
+
#
|
77
|
+
# @return text [OmniAI::Transcribe::Transcription]
|
78
|
+
def transcribe(path, model: Transcribe::Model::WHISPER, language: nil, prompt: nil, temperature: nil, format: nil)
|
79
|
+
Transcribe.process!(path, model:, language:, prompt:, temperature:, format:, client: self)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @raise [OmniAI::Error]
|
83
|
+
#
|
84
|
+
# @param input [String] required
|
85
|
+
# @param model [String] optional
|
86
|
+
# @param voice [String] optional
|
87
|
+
# @param speed [Float] optional
|
88
|
+
# @param format [String] optional (default "aac"):
|
89
|
+
# - "aac"
|
90
|
+
# - "mp3"
|
91
|
+
# - "flac"
|
92
|
+
# - "opus"
|
93
|
+
# - "pcm"
|
94
|
+
# - "wav"
|
95
|
+
#
|
96
|
+
# @yield [output] optional
|
97
|
+
#
|
98
|
+
# @return [Tempfile``]
|
99
|
+
def speak(input, model: Speak::Model::TTS_1_HD, voice: Speak::Voice::ALLOY, speed: nil, format: nil, &)
|
100
|
+
Speak.process!(input, model:, voice:, speed:, format:, client: self, &)
|
101
|
+
end
|
67
102
|
end
|
68
103
|
end
|
69
104
|
end
|
data/lib/omniai/openai/config.rb
CHANGED
@@ -4,7 +4,7 @@ module OmniAI
|
|
4
4
|
module OpenAI
|
5
5
|
# Configuration for managing the OpenAI `api_key` / `organization` / `project` / `logger`.
|
6
6
|
class Config < OmniAI::Config
|
7
|
-
attr_accessor :organization, :project, :chat_options
|
7
|
+
attr_accessor :organization, :project, :chat_options, :transcribe_options, :speak_options
|
8
8
|
|
9
9
|
def initialize
|
10
10
|
super
|
@@ -13,6 +13,8 @@ module OmniAI
|
|
13
13
|
@project = ENV.fetch('OPENAI_PROJECT', nil)
|
14
14
|
@host = ENV.fetch('OPENAI_HOST', 'https://api.openai.com')
|
15
15
|
@chat_options = {}
|
16
|
+
@transcribe_options = {}
|
17
|
+
@speak_options = {}
|
16
18
|
end
|
17
19
|
end
|
18
20
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module OpenAI
|
5
|
+
# An OpenAI transcribe implementation.
|
6
|
+
class Speak < OmniAI::Speak
|
7
|
+
module Model
|
8
|
+
TTS_1 = 'tts-1'
|
9
|
+
TTS_1_HD = 'tts-1-hd'
|
10
|
+
end
|
11
|
+
|
12
|
+
module Voice
|
13
|
+
ALLOY = 'alloy' # https://platform.openai.com/docs/guides/text-to-speech/alloy
|
14
|
+
ECHO = 'echo' # https://platform.openai.com/docs/guides/text-to-speech/echo
|
15
|
+
FABLE = 'fable' # https://platform.openai.com/docs/guides/text-to-speech/fable
|
16
|
+
NOVA = 'nova' # https://platform.openai.com/docs/guides/text-to-speech/nova
|
17
|
+
ONYX = 'onyx' # https://platform.openai.com/docs/guides/text-to-speech/onyx
|
18
|
+
SHIMMER = 'shimmer' # https://platform.openai.com/docs/guides/text-to-speech/shimmer
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
# @return [Hash]
|
24
|
+
def payload
|
25
|
+
OmniAI::OpenAI
|
26
|
+
.config.speak_options
|
27
|
+
.merge(super)
|
28
|
+
.merge({ response_format: @format }.compact)
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [String]
|
32
|
+
def path
|
33
|
+
"/#{OmniAI::OpenAI::Client::VERSION}/audio/speech"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module OpenAI
|
5
|
+
# An OpenAI transcribe implementation.
|
6
|
+
class Transcribe < OmniAI::Transcribe
|
7
|
+
module Model
|
8
|
+
WHISPER_1 = 'whisper-1'
|
9
|
+
WHISPER = WHISPER_1
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
# @return [Hash]
|
15
|
+
def payload
|
16
|
+
OmniAI::OpenAI
|
17
|
+
.config.transcribe_options
|
18
|
+
.merge(super)
|
19
|
+
.merge({ response_format: @format || Format::JSON })
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [String]
|
23
|
+
def path
|
24
|
+
"/#{OmniAI::OpenAI::Client::VERSION}/audio/transcriptions"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: omniai-openai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Sylvestre
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: event_stream_parser
|
@@ -65,6 +65,8 @@ files:
|
|
65
65
|
- lib/omniai/openai/chat.rb
|
66
66
|
- lib/omniai/openai/client.rb
|
67
67
|
- lib/omniai/openai/config.rb
|
68
|
+
- lib/omniai/openai/speak.rb
|
69
|
+
- lib/omniai/openai/transcribe.rb
|
68
70
|
- lib/omniai/openai/version.rb
|
69
71
|
homepage: https://github.com/ksylvest/omniai-openai
|
70
72
|
licenses: []
|