cloudflare-ai 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f16a1fd5c112fb69440d4d89b1f27d167e854c537ca952b91e1f3f673c0efb5
4
- data.tar.gz: 59f999e994f5bd21071ee2f125b5e138ba353feb857912b1df472f35acc05006
3
+ metadata.gz: dfe55c49c1310aa3940e9bc8963535680a93ccf90a0b98061b26a75c85fb42d3
4
+ data.tar.gz: 79335c8d797ca3585f8fb7a770fd064ac3aa7e76622fd8b794fbbb88abf24b43
5
5
  SHA512:
6
- metadata.gz: 1d258b49887ca664f616aad4dbc74afc4bb8d49d0452933c2ee907306af56cf81da0bf084c260ddbe4db9d96f522812cc5abfb771c3d94757f12342334b16fd6
7
- data.tar.gz: 133fcfd21cedbdb324604ec0e9bd4aacb0750b9f486df7bc217998e4dd117f4e6d482572ba1feee8b629e3362857cb8b8f28d2f1e3c6f1d89acddd18b73752e9
6
+ metadata.gz: ee82ccc27724c218043e4702a1dd39ba0878c6ff35caa35592e9dae3b26ea7bd1ccb0b73b182ad97864d637414edcfb1a587ec4b203fc4fe30767f15d56f4d74
7
+ data.tar.gz: 4cab183f89a4fd13cca87e8b0e264c93eddae4bf12c62a7ccfb5ae9e453b4cabc6916a3fdbf573ca55291ee8c537df190ebae06dbb2e2377af208e10cca8fe0c
data/README.md CHANGED
@@ -14,18 +14,14 @@ generation to make legal services more accessible. [Email me](mailto:cloudflare-
14
14
 
15
15
  If you're looking for legal help, it's best to book a slot via https://www.krishnan.ca.
16
16
 
17
- # Todo
18
- It's still early days, and here are my immediate priorities:
19
- * [x] Support for streamed responses
20
- * [x] CI pipeline
21
- * [ ] Support for more AI model categories
22
- * [x] [Text Generation](https://developers.cloudflare.com/workers-ai/models/text-generation/)
23
- * [x] [Text Embeddings](https://developers.cloudflare.com/workers-ai/models/text-embeddings/)
24
- * [x] [Text Classification](https://developers.cloudflare.com/workers-ai/models/text-classification/)
25
- * [x] [Translation](https://developers.cloudflare.com/workers-ai/models/translation/)
26
- * [x] [Image Classification](https://developers.cloudflare.com/workers-ai/models/image-classification/)
27
- * [x] [Text-to-Image](https://developers.cloudflare.com/workers-ai/models/text-to-image/)
28
- * [ ] [Automatic Speech Recognition](https://developers.cloudflare.com/workers-ai/models/speech-recognition/)
17
+ # Supported features
18
+ * [x] [Text Generation](https://developers.cloudflare.com/workers-ai/models/text-generation/)
19
+ * [x] [Text Embeddings](https://developers.cloudflare.com/workers-ai/models/text-embeddings/)
20
+ * [x] [Text Classification](https://developers.cloudflare.com/workers-ai/models/text-classification/)
21
+ * [x] [Translation](https://developers.cloudflare.com/workers-ai/models/translation/)
22
+ * [x] [Image Classification](https://developers.cloudflare.com/workers-ai/models/image-classification/)
23
+ * [x] [Text-to-Image](https://developers.cloudflare.com/workers-ai/models/text-to-image/)
24
+ * [x] [Automatic Speech Recognition](https://developers.cloudflare.com/workers-ai/models/speech-recognition/)
29
25
 
30
26
  # Table of Contents
31
27
 
@@ -180,9 +176,23 @@ All invocations of the `draw` method returns a `Cloudflare::AI::Results::TextToI
180
176
  result = client.translate(text: "Hello Jello", source_lang: "en", target_lang: "fr")
181
177
  p result.translated_text # => Hola Jello
182
178
  ```
179
+ #### Result object
180
+ All invocations of the `translate` method returns a `Cloudflare::AI::Results::Translate`.
181
+
183
182
 
183
+ ### Automatic speech recognition
184
+ You can pass either a URL (source_url:) or a file (audio:) to the `transcribe` method.
185
+ ```ruby
186
+ result = client.transcribe(source_url: "http://example.org/path/to/audio.wav")
187
+ p result.text # => "Hello Jello."
188
+ p result.word_count # => 2
189
+ p result.to_json # => {"result":{"text":"Hello Jello.","word_count":2,"words":[{"word":"Hello","start":0,"end":1.340000033378601},{"word":"Jello.","start":1.340000033378601,"end":1.340000033378601}},"success":true,"errors":[],"messages":[]}
190
+
191
+ result = client.transcribe(audio: File.open("/path/to/audio.wav"))
192
+ # ...
193
+ ```
184
194
  #### Result object
185
- All invocations of the `translate` methods return a `Cloudflare::AI::Results::Translate`.
195
+ All invocations of the `transcribe` method returns a `Cloudflare::AI::Results::Transcribe`.
186
196
 
187
197
  # Logging
188
198
 
@@ -2,7 +2,7 @@ require "event_stream_parser"
2
2
  require "faraday"
3
3
 
4
4
  class Cloudflare::AI::Client
5
- include Cloudflare::AI::Clients::ImageHelpers
5
+ include Cloudflare::AI::Clients::MediaHelpers
6
6
  include Cloudflare::AI::Clients::TextGenerationHelpers
7
7
 
8
8
  attr_reader :url, :account_id, :api_token
@@ -59,6 +59,17 @@ class Cloudflare::AI::Client
59
59
  Cloudflare::AI::Results::TextEmbedding.new(connection.post(url, payload).body)
60
60
  end
61
61
 
62
+ def transcribe(source_url: nil, audio: nil, model_name: Cloudflare::AI::Models.automatic_speech_recognition.first)
63
+ raise ArgumentError, "Must provide either audio_url or audio" if [source_url, audio].compact.size != 1
64
+
65
+ audio = download_audio(source_url) if source_url
66
+
67
+ url = service_url_for(account_id: account_id, model_name: model_name)
68
+ response = post_request_with_binary_file(url, audio)
69
+
70
+ Cloudflare::AI::Results::AutomaticSpeechRecognition.new(response.body)
71
+ end
72
+
62
73
  def translate(text:, target_lang:, source_lang: "en", model_name: Cloudflare::AI::Models.translation.first)
63
74
  url = service_url_for(account_id: account_id, model_name: model_name)
64
75
  payload = {text: text, target_lang: target_lang, source_lang: source_lang}.to_json
@@ -3,9 +3,18 @@ require "faraday/multipart"
3
3
  module Cloudflare
4
4
  module AI
5
5
  module Clients
6
- module ImageHelpers
6
+ module MediaHelpers
7
7
  private
8
8
 
9
+ def download_audio(source_url)
10
+ download_result = Faraday.new(source_url).get
11
+ binary_file = Tempfile.new(["cloudflare-ai-automatic-speech-recognition", ".wav"])
12
+ binary_file.binmode
13
+ binary_file.write(download_result.body)
14
+ binary_file.rewind
15
+ binary_file
16
+ end
17
+
9
18
  def post_request_with_binary_file(url, file)
10
19
  connection.post do |req|
11
20
  req.url url
@@ -4,7 +4,7 @@ class Cloudflare::AI::Models
4
4
  %w[@cf/meta/llama-2-7b-chat-fp16 @cf/meta/llama-2-7b-chat-int8 @cf/mistral/mistral-7b-instruct-v0.1 @hf/thebloke/codellama-7b-instruct-awq]
5
5
  end
6
6
 
7
- def speech_recognition
7
+ def automatic_speech_recognition
8
8
  %w[@cf/openai/whisper]
9
9
  end
10
10
 
@@ -31,7 +31,7 @@ class Cloudflare::AI::Models
31
31
  def all
32
32
  {
33
33
  text_generation: text_generation,
34
- speech_recognition: speech_recognition,
34
+ automatic_speech_recognition: automatic_speech_recognition,
35
35
  translation: translation,
36
36
  text_classification: text_classification,
37
37
  image_classification: image_classification,
@@ -0,0 +1,13 @@
1
+ class Cloudflare::AI::Results::AutomaticSpeechRecognition < Cloudflare::AI::Result
2
+ def text
3
+ result&.dig(:text) # nil if no shape
4
+ end
5
+
6
+ def word_count
7
+ result&.dig(:word_count) # nil if no shape
8
+ end
9
+
10
+ def words
11
+ result&.dig(:words) # nil if no shape
12
+ end
13
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Cloudflare
4
4
  module AI
5
- VERSION = "0.7.0"
5
+ VERSION = "0.8.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloudflare-ai
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ajay Krishnan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-25 00:00:00.000000000 Z
11
+ date: 2024-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -107,12 +107,13 @@ files:
107
107
  - README.md
108
108
  - lib/cloudflare/ai.rb
109
109
  - lib/cloudflare/ai/client.rb
110
- - lib/cloudflare/ai/clients/image_helpers.rb
110
+ - lib/cloudflare/ai/clients/media_helpers.rb
111
111
  - lib/cloudflare/ai/clients/text_generation_helpers.rb
112
112
  - lib/cloudflare/ai/contextual_logger.rb
113
113
  - lib/cloudflare/ai/message.rb
114
114
  - lib/cloudflare/ai/models.rb
115
115
  - lib/cloudflare/ai/result.rb
116
+ - lib/cloudflare/ai/results/automatic_speech_recognition.rb
116
117
  - lib/cloudflare/ai/results/image_classification.rb
117
118
  - lib/cloudflare/ai/results/text_classification.rb
118
119
  - lib/cloudflare/ai/results/text_embedding.rb