omniai-google 3.8.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -3
- data/lib/omniai/google/transcribe.rb +3 -1
- data/lib/omniai/google/transcribe_helpers.rb +30 -2
- data/lib/omniai/google/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5683bc29149bb31082f782ed460483773e2a88537290909d04adc3fb5285aad4
|
|
4
|
+
data.tar.gz: dd2cdde8860b212f25a4c761e1e48eccb31aeb0cbaa58dc963d48386f2352c6c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1880ce5da42484fa6f28ba61d14f4ee1d11cb4f3bf2d2b7d772b663af9222743775618dfc1f02de7ac1f50ac952ca742374b039722930ae28c6b388d67c0eff8
|
|
7
|
+
data.tar.gz: 6e8d16e2b1686ae61810ece5e5fb6c07fa69a4b0dcd25063261edc54e8765e228e3e40bd9fdde8b56fffc7b5c467f1a4e9c83708bc48b2bcd47ecfb144b52f31
|
data/README.md
CHANGED
|
@@ -266,20 +266,32 @@ client.transcribe("phone_call.mp3", model: OmniAI::Google::Transcribe::Model::TE
|
|
|
266
266
|
# For medical conversations
|
|
267
267
|
client.transcribe("medical_interview.mp3", model: OmniAI::Google::Transcribe::Model::MEDICAL_CONVERSATION)
|
|
268
268
|
|
|
269
|
+
# Latest generation multilingual model (recommended)
|
|
270
|
+
client.transcribe("audio.mp3", model: OmniAI::Google::Transcribe::Model::CHIRP_3)
|
|
271
|
+
|
|
269
272
|
# Other available models
|
|
270
273
|
client.transcribe("audio.mp3", model: OmniAI::Google::Transcribe::Model::CHIRP_2) # Enhanced model
|
|
271
274
|
client.transcribe("audio.mp3", model: OmniAI::Google::Transcribe::Model::CHIRP) # Universal model
|
|
272
275
|
```
|
|
273
276
|
|
|
274
277
|
**Available Model Constants:**
|
|
278
|
+
- `OmniAI::Google::Transcribe::Model::CHIRP_3` - Latest-generation multilingual ASR model (recommended)
|
|
279
|
+
- `OmniAI::Google::Transcribe::Model::CHIRP_2` - Enhanced universal model
|
|
280
|
+
- `OmniAI::Google::Transcribe::Model::CHIRP` - Universal model
|
|
275
281
|
- `OmniAI::Google::Transcribe::Model::LATEST_SHORT` - Optimized for audio < 60 seconds
|
|
276
282
|
- `OmniAI::Google::Transcribe::Model::LATEST_LONG` - Optimized for long-form audio
|
|
283
|
+
- `OmniAI::Google::Transcribe::Model::TELEPHONY` - For phone/telephony audio
|
|
277
284
|
- `OmniAI::Google::Transcribe::Model::TELEPHONY_SHORT` - For short phone calls
|
|
278
|
-
- `OmniAI::Google::Transcribe::Model::TELEPHONY_LONG` - For long phone calls
|
|
285
|
+
- `OmniAI::Google::Transcribe::Model::TELEPHONY_LONG` - For long phone calls
|
|
279
286
|
- `OmniAI::Google::Transcribe::Model::MEDICAL_CONVERSATION` - For medical conversations
|
|
280
287
|
- `OmniAI::Google::Transcribe::Model::MEDICAL_DICTATION` - For medical dictation
|
|
281
|
-
|
|
282
|
-
|
|
288
|
+
|
|
289
|
+
> **Region note:** `CHIRP_3` is only served from the `us` and `eu` multi-region endpoints (not `global`,
|
|
290
|
+
> and not zonal regions like `us-east4`). The provider maps the configured `location_id` to its
|
|
291
|
+
> multi-region parent — any `us*` region resolves to `us`, any `eu`/`europe*` region resolves to `eu` —
|
|
292
|
+
> and defaults to `us` when nothing is configured. This means a Vertex AI client configured with a zonal
|
|
293
|
+
> `location_id` (e.g. `us-east4`) for Gemini will still route `CHIRP_3` correctly. `CHIRP_2` is always
|
|
294
|
+
> routed to `us-central1`.
|
|
283
295
|
|
|
284
296
|
#### Supported Formats
|
|
285
297
|
|
|
@@ -12,10 +12,12 @@ module OmniAI
|
|
|
12
12
|
include TranscribeHelpers
|
|
13
13
|
|
|
14
14
|
module Model
|
|
15
|
+
CHIRP_3 = "chirp_3"
|
|
15
16
|
CHIRP_2 = "chirp_2"
|
|
16
17
|
CHIRP = "chirp"
|
|
17
18
|
LATEST_LONG = "latest_long"
|
|
18
19
|
LATEST_SHORT = "latest_short"
|
|
20
|
+
TELEPHONY = "telephony"
|
|
19
21
|
TELEPHONY_LONG = "telephony_long"
|
|
20
22
|
TELEPHONY_SHORT = "telephony_short"
|
|
21
23
|
MEDICAL_CONVERSATION = "medical_conversation"
|
|
@@ -111,7 +113,7 @@ module OmniAI
|
|
|
111
113
|
# Speech-to-Text API uses different endpoints for regional vs global
|
|
112
114
|
endpoint = speech_endpoint
|
|
113
115
|
speech_connection = HTTP.persistent(endpoint)
|
|
114
|
-
.timeout(
|
|
116
|
+
.timeout(**http_timeout_options)
|
|
115
117
|
.accept(:json)
|
|
116
118
|
|
|
117
119
|
# Add authentication if using credentials
|
|
@@ -17,16 +17,44 @@ module OmniAI
|
|
|
17
17
|
case @model
|
|
18
18
|
when "chirp_2"
|
|
19
19
|
"us-central1"
|
|
20
|
+
when "chirp_3"
|
|
21
|
+
chirp_3_location_id
|
|
20
22
|
else
|
|
21
23
|
@client.instance_variable_get(:@location_id) || "global"
|
|
22
24
|
end
|
|
23
25
|
end
|
|
24
26
|
|
|
27
|
+
# Chirp 3 is only served from the `us` and `eu` multi-region endpoints (not `global`, and
|
|
28
|
+
# not zonal regions like `us-east4`). A Vertex client typically configures a zonal
|
|
29
|
+
# `location_id` for Gemini, so map any configured region to its multi-region parent and
|
|
30
|
+
# default to `us`.
|
|
31
|
+
#
|
|
32
|
+
# @return [String] "us" or "eu"
|
|
33
|
+
def chirp_3_location_id
|
|
34
|
+
case @client.instance_variable_get(:@location_id)
|
|
35
|
+
when /\A(eu|europe)/i then "eu"
|
|
36
|
+
else "us"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
25
40
|
# @return [String]
|
|
26
41
|
def speech_endpoint
|
|
27
42
|
location_id == "global" ? "https://speech.googleapis.com" : "https://#{location_id}-speech.googleapis.com"
|
|
28
43
|
end
|
|
29
44
|
|
|
45
|
+
# Normalizes the client timeout into keyword args for HTTP.rb's `.timeout`. The speech
|
|
46
|
+
# endpoints build their own connections, so (unlike the base client, which passes the
|
|
47
|
+
# value straight through) they must accept both a scalar and a per-operation Hash. A Hash
|
|
48
|
+
# is passed through untouched; a scalar (or nil) is wrapped per-operation as before.
|
|
49
|
+
#
|
|
50
|
+
# @return [Hash]
|
|
51
|
+
def http_timeout_options
|
|
52
|
+
timeout = @client.timeout
|
|
53
|
+
return timeout if timeout.is_a?(Hash)
|
|
54
|
+
|
|
55
|
+
{ connect: timeout, write: timeout, read: timeout }
|
|
56
|
+
end
|
|
57
|
+
|
|
30
58
|
# @return [Array<String>, nil]
|
|
31
59
|
def language_codes
|
|
32
60
|
case @language
|
|
@@ -184,7 +212,7 @@ module OmniAI
|
|
|
184
212
|
def poll_operation!(operation_name)
|
|
185
213
|
endpoint = speech_endpoint
|
|
186
214
|
connection = HTTP.persistent(endpoint)
|
|
187
|
-
.timeout(
|
|
215
|
+
.timeout(**http_timeout_options)
|
|
188
216
|
.accept(:json)
|
|
189
217
|
|
|
190
218
|
# Add authentication if using credentials
|
|
@@ -222,7 +250,7 @@ module OmniAI
|
|
|
222
250
|
def request_batch!
|
|
223
251
|
endpoint = speech_endpoint
|
|
224
252
|
connection = HTTP.persistent(endpoint)
|
|
225
|
-
.timeout(
|
|
253
|
+
.timeout(**http_timeout_options)
|
|
226
254
|
.accept(:json)
|
|
227
255
|
|
|
228
256
|
# Add authentication if using credentials
|