cloudflare-ai 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f16a1fd5c112fb69440d4d89b1f27d167e854c537ca952b91e1f3f673c0efb5
4
- data.tar.gz: 59f999e994f5bd21071ee2f125b5e138ba353feb857912b1df472f35acc05006
3
+ metadata.gz: dfe55c49c1310aa3940e9bc8963535680a93ccf90a0b98061b26a75c85fb42d3
4
+ data.tar.gz: 79335c8d797ca3585f8fb7a770fd064ac3aa7e76622fd8b794fbbb88abf24b43
5
5
  SHA512:
6
- metadata.gz: 1d258b49887ca664f616aad4dbc74afc4bb8d49d0452933c2ee907306af56cf81da0bf084c260ddbe4db9d96f522812cc5abfb771c3d94757f12342334b16fd6
7
- data.tar.gz: 133fcfd21cedbdb324604ec0e9bd4aacb0750b9f486df7bc217998e4dd117f4e6d482572ba1feee8b629e3362857cb8b8f28d2f1e3c6f1d89acddd18b73752e9
6
+ metadata.gz: ee82ccc27724c218043e4702a1dd39ba0878c6ff35caa35592e9dae3b26ea7bd1ccb0b73b182ad97864d637414edcfb1a587ec4b203fc4fe30767f15d56f4d74
7
+ data.tar.gz: 4cab183f89a4fd13cca87e8b0e264c93eddae4bf12c62a7ccfb5ae9e453b4cabc6916a3fdbf573ca55291ee8c537df190ebae06dbb2e2377af208e10cca8fe0c
data/README.md CHANGED
@@ -14,18 +14,14 @@ generation to make legal services more accessible. [Email me](mailto:cloudflare-
14
14
 
15
15
  If you're looking for legal help, it's best to book a slot via https://www.krishnan.ca.
16
16
 
17
- # Todo
18
- It's still early days, and here are my immediate priorities:
19
- * [x] Support for streamed responses
20
- * [x] CI pipeline
21
- * [ ] Support for more AI model categories
22
- * [x] [Text Generation](https://developers.cloudflare.com/workers-ai/models/text-generation/)
23
- * [x] [Text Embeddings](https://developers.cloudflare.com/workers-ai/models/text-embeddings/)
24
- * [x] [Text Classification](https://developers.cloudflare.com/workers-ai/models/text-classification/)
25
- * [x] [Translation](https://developers.cloudflare.com/workers-ai/models/translation/)
26
- * [x] [Image Classification](https://developers.cloudflare.com/workers-ai/models/image-classification/)
27
- * [x] [Text-to-Image](https://developers.cloudflare.com/workers-ai/models/text-to-image/)
28
- * [ ] [Automatic Speech Recognition](https://developers.cloudflare.com/workers-ai/models/speech-recognition/)
17
+ # Supported features
18
+ * [x] [Text Generation](https://developers.cloudflare.com/workers-ai/models/text-generation/)
19
+ * [x] [Text Embeddings](https://developers.cloudflare.com/workers-ai/models/text-embeddings/)
20
+ * [x] [Text Classification](https://developers.cloudflare.com/workers-ai/models/text-classification/)
21
+ * [x] [Translation](https://developers.cloudflare.com/workers-ai/models/translation/)
22
+ * [x] [Image Classification](https://developers.cloudflare.com/workers-ai/models/image-classification/)
23
+ * [x] [Text-to-Image](https://developers.cloudflare.com/workers-ai/models/text-to-image/)
24
+ * [x] [Automatic Speech Recognition](https://developers.cloudflare.com/workers-ai/models/speech-recognition/)
29
25
 
30
26
  # Table of Contents
31
27
 
@@ -180,9 +176,23 @@ All invocations of the `draw` method returns a `Cloudflare::AI::Results::TextToI
180
176
  result = client.translate(text: "Hello Jello", source_lang: "en", target_lang: "fr")
181
177
  p result.translated_text # => Hola Jello
182
178
  ```
179
+ #### Result object
180
+ All invocations of the `translate` method returns a `Cloudflare::AI::Results::Translate`.
181
+
183
182
 
183
+ ### Automatic speech recognition
184
+ You can pass either a URL (source_url:) or a file (audio:) to the `transcribe` method.
185
+ ```ruby
186
+ result = client.transcribe(source_url: "http://example.org/path/to/audio.wav")
187
+ p result.text # => "Hello Jello."
188
+ p result.word_count # => 2
189
+ p result.to_json # => {"result":{"text":"Hello Jello.","word_count":2,"words":[{"word":"Hello","start":0,"end":1.340000033378601},{"word":"Jello.","start":1.340000033378601,"end":1.340000033378601}},"success":true,"errors":[],"messages":[]}
190
+
191
+ result = client.transcribe(audio: File.open("/path/to/audio.wav"))
192
+ # ...
193
+ ```
184
194
  #### Result object
185
- All invocations of the `translate` methods return a `Cloudflare::AI::Results::Translate`.
195
+ All invocations of the `transcribe` method returns a `Cloudflare::AI::Results::Transcribe`.
186
196
 
187
197
  # Logging
188
198
 
@@ -2,7 +2,7 @@ require "event_stream_parser"
2
2
  require "faraday"
3
3
 
4
4
  class Cloudflare::AI::Client
5
- include Cloudflare::AI::Clients::ImageHelpers
5
+ include Cloudflare::AI::Clients::MediaHelpers
6
6
  include Cloudflare::AI::Clients::TextGenerationHelpers
7
7
 
8
8
  attr_reader :url, :account_id, :api_token
@@ -59,6 +59,17 @@ class Cloudflare::AI::Client
59
59
  Cloudflare::AI::Results::TextEmbedding.new(connection.post(url, payload).body)
60
60
  end
61
61
 
62
+ def transcribe(source_url: nil, audio: nil, model_name: Cloudflare::AI::Models.automatic_speech_recognition.first)
63
+ raise ArgumentError, "Must provide either audio_url or audio" if [source_url, audio].compact.size != 1
64
+
65
+ audio = download_audio(source_url) if source_url
66
+
67
+ url = service_url_for(account_id: account_id, model_name: model_name)
68
+ response = post_request_with_binary_file(url, audio)
69
+
70
+ Cloudflare::AI::Results::AutomaticSpeechRecognition.new(response.body)
71
+ end
72
+
62
73
  def translate(text:, target_lang:, source_lang: "en", model_name: Cloudflare::AI::Models.translation.first)
63
74
  url = service_url_for(account_id: account_id, model_name: model_name)
64
75
  payload = {text: text, target_lang: target_lang, source_lang: source_lang}.to_json
@@ -3,9 +3,18 @@ require "faraday/multipart"
3
3
  module Cloudflare
4
4
  module AI
5
5
  module Clients
6
- module ImageHelpers
6
+ module MediaHelpers
7
7
  private
8
8
 
9
+ def download_audio(source_url)
10
+ download_result = Faraday.new(source_url).get
11
+ binary_file = Tempfile.new(["cloudflare-ai-automatic-speech-recognition", ".wav"])
12
+ binary_file.binmode
13
+ binary_file.write(download_result.body)
14
+ binary_file.rewind
15
+ binary_file
16
+ end
17
+
9
18
  def post_request_with_binary_file(url, file)
10
19
  connection.post do |req|
11
20
  req.url url
@@ -4,7 +4,7 @@ class Cloudflare::AI::Models
4
4
  %w[@cf/meta/llama-2-7b-chat-fp16 @cf/meta/llama-2-7b-chat-int8 @cf/mistral/mistral-7b-instruct-v0.1 @hf/thebloke/codellama-7b-instruct-awq]
5
5
  end
6
6
 
7
- def speech_recognition
7
+ def automatic_speech_recognition
8
8
  %w[@cf/openai/whisper]
9
9
  end
10
10
 
@@ -31,7 +31,7 @@ class Cloudflare::AI::Models
31
31
  def all
32
32
  {
33
33
  text_generation: text_generation,
34
- speech_recognition: speech_recognition,
34
+ automatic_speech_recognition: automatic_speech_recognition,
35
35
  translation: translation,
36
36
  text_classification: text_classification,
37
37
  image_classification: image_classification,
@@ -0,0 +1,13 @@
1
+ class Cloudflare::AI::Results::AutomaticSpeechRecognition < Cloudflare::AI::Result
2
+ def text
3
+ result&.dig(:text) # nil if no shape
4
+ end
5
+
6
+ def word_count
7
+ result&.dig(:word_count) # nil if no shape
8
+ end
9
+
10
+ def words
11
+ result&.dig(:words) # nil if no shape
12
+ end
13
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Cloudflare
4
4
  module AI
5
- VERSION = "0.7.0"
5
+ VERSION = "0.8.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloudflare-ai
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ajay Krishnan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-25 00:00:00.000000000 Z
11
+ date: 2024-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -107,12 +107,13 @@ files:
107
107
  - README.md
108
108
  - lib/cloudflare/ai.rb
109
109
  - lib/cloudflare/ai/client.rb
110
- - lib/cloudflare/ai/clients/image_helpers.rb
110
+ - lib/cloudflare/ai/clients/media_helpers.rb
111
111
  - lib/cloudflare/ai/clients/text_generation_helpers.rb
112
112
  - lib/cloudflare/ai/contextual_logger.rb
113
113
  - lib/cloudflare/ai/message.rb
114
114
  - lib/cloudflare/ai/models.rb
115
115
  - lib/cloudflare/ai/result.rb
116
+ - lib/cloudflare/ai/results/automatic_speech_recognition.rb
116
117
  - lib/cloudflare/ai/results/image_classification.rb
117
118
  - lib/cloudflare/ai/results/text_classification.rb
118
119
  - lib/cloudflare/ai/results/text_embedding.rb