ai.muna.muna 0.0.43 → 0.0.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Editor/MunaMenu.cs +11 -1
- package/Plugins/Android/Muna.aar +0 -0
- package/Plugins/Linux/arm64/libFunction.so +0 -0
- package/Plugins/Linux/x86_64/libFunction.so +0 -0
- package/Plugins/Windows/arm64/Function.dll +0 -0
- package/Plugins/Windows/x86_64/Function.dll +0 -0
- package/Plugins/iOS/Function.xcframework/Info.plist +11 -11
- package/Plugins/iOS/Function.xcframework/ios-arm64/Function.framework/Function +0 -0
- package/Plugins/iOS/Function.xcframework/ios-arm64/Function.framework/Headers/FXNVersion.h +1 -1
- package/Plugins/iOS/Function.xcframework/ios-arm64/Function.framework/Info.plist +0 -0
- package/Plugins/iOS/Function.xcframework/ios-arm64/Function.framework/_CodeSignature/CodeResources +3 -3
- package/Plugins/iOS/Function.xcframework/ios-arm64_x86_64-simulator/Function.framework/Function +0 -0
- package/Plugins/iOS/Function.xcframework/ios-arm64_x86_64-simulator/Function.framework/Headers/FXNVersion.h +1 -1
- package/Plugins/iOS/Function.xcframework/ios-arm64_x86_64-simulator/Function.framework/Info.plist +0 -0
- package/Plugins/iOS/Function.xcframework/ios-arm64_x86_64-simulator/Function.framework/_CodeSignature/CodeResources +3 -3
- package/Plugins/iOS/Function.xcframework/xros-arm64/Function.framework/Function +0 -0
- package/Plugins/iOS/Function.xcframework/xros-arm64/Function.framework/Headers/FXNVersion.h +1 -1
- package/Plugins/iOS/Function.xcframework/xros-arm64/Function.framework/Info.plist +0 -0
- package/Plugins/iOS/Function.xcframework/xros-arm64/Function.framework/_CodeSignature/CodeResources +3 -3
- package/Plugins/iOS/Function.xcframework/xros-arm64_x86_64-simulator/Function.framework/Function +0 -0
- package/Plugins/iOS/Function.xcframework/xros-arm64_x86_64-simulator/Function.framework/Headers/FXNVersion.h +1 -1
- package/Plugins/iOS/Function.xcframework/xros-arm64_x86_64-simulator/Function.framework/Info.plist +0 -0
- package/Plugins/iOS/Function.xcframework/xros-arm64_x86_64-simulator/Function.framework/_CodeSignature/CodeResources +3 -3
- package/Plugins/macOS/Function.dylib +0 -0
- package/README.md +1 -1
- package/Runtime/API/DotNetClient.cs +0 -3
- package/Runtime/Beta/BetaClient.cs +14 -1
- package/Runtime/Beta/OpenAI/AudioService.cs +38 -0
- package/Runtime/Beta/OpenAI/AudioService.cs.meta +11 -0
- package/Runtime/Beta/OpenAI/ChatService.cs +38 -0
- package/Runtime/Beta/OpenAI/ChatService.cs.meta +11 -0
- package/Runtime/Beta/OpenAI/CompletionService.cs +117 -0
- package/Runtime/Beta/OpenAI/CompletionService.cs.meta +11 -0
- package/Runtime/Beta/OpenAI/EmbeddingService.cs +252 -0
- package/Runtime/Beta/OpenAI/EmbeddingService.cs.meta +11 -0
- package/Runtime/Beta/OpenAI/OpenAIClient.cs +50 -0
- package/Runtime/Beta/OpenAI/OpenAIClient.cs.meta +11 -0
- package/Runtime/Beta/OpenAI/SpeechService.cs +250 -0
- package/Runtime/Beta/OpenAI/SpeechService.cs.meta +11 -0
- package/Runtime/Beta/OpenAI/Types.cs +365 -0
- package/Runtime/Beta/OpenAI/Types.cs.meta +11 -0
- package/Runtime/Beta/OpenAI.meta +8 -0
- package/Runtime/Beta/Remote/RemotePredictionService.cs +45 -66
- package/Runtime/Beta/{Value.cs → Remote/Value.cs} +3 -4
- package/Runtime/C/Configuration.cs +1 -1
- package/Runtime/C/Function.cs +1 -1
- package/Runtime/C/Prediction.cs +1 -1
- package/Runtime/C/PredictionStream.cs +1 -1
- package/Runtime/C/Predictor.cs +1 -1
- package/Runtime/C/Value.cs +3 -2
- package/Runtime/C/ValueMap.cs +1 -1
- package/Runtime/Muna.cs +2 -2
- package/Runtime/Types/Parameter.cs +75 -0
- package/Runtime/Types/Parameter.cs.meta +11 -0
- package/Runtime/Types/Predictor.cs +0 -53
- package/Unity/API/PredictionCacheClient.cs +1 -1
- package/Unity/MunaUnity.cs +0 -1
- package/package.json +1 -1
- /package/Runtime/Beta/{Value.cs.meta → Remote/Value.cs.meta} +0 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Muna
|
|
3
|
+
* Copyright © 2025 NatML Inc. All rights reserved.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
#nullable enable
|
|
7
|
+
|
|
8
|
+
namespace Muna.Beta.OpenAI {
|
|
9
|
+
|
|
10
|
+
using System;
|
|
11
|
+
using System.Collections.Generic;
|
|
12
|
+
using System.Linq;
|
|
13
|
+
using System.Runtime.Serialization;
|
|
14
|
+
using System.Threading.Tasks;
|
|
15
|
+
using Newtonsoft.Json;
|
|
16
|
+
using Newtonsoft.Json.Converters;
|
|
17
|
+
using Newtonsoft.Json.Linq;
|
|
18
|
+
using Services;
|
|
19
|
+
using PredictorService = global::Muna.Services.PredictorService;
|
|
20
|
+
using EdgePredictionService = global::Muna.Services.PredictionService;
|
|
21
|
+
|
|
22
|
+
/// <summary>
|
|
23
|
+
/// Create embeddings.
|
|
24
|
+
/// </summary>
|
|
25
|
+
public sealed class EmbeddingService {
|
|
26
|
+
|
|
27
|
+
#region --Client API--
|
|
28
|
+
/// <summary>
|
|
29
|
+
/// Embedding encoding format.
|
|
30
|
+
/// </summary>
|
|
31
|
+
[JsonConverter(typeof(StringEnumConverter))]
|
|
32
|
+
public enum EncodingFormat {
|
|
33
|
+
/// <summary>
|
|
34
|
+
/// Float array.
|
|
35
|
+
/// </summary>
|
|
36
|
+
[EnumMember(Value = @"float")]
|
|
37
|
+
Float = 1,
|
|
38
|
+
/// <summary>
|
|
39
|
+
/// Base64 string.
|
|
40
|
+
/// </summary>
|
|
41
|
+
[EnumMember(Value = @"base64")]
|
|
42
|
+
Base64 = 2
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/// <summary>
|
|
46
|
+
/// Create an embedding vector representing the input text.
|
|
47
|
+
/// </summary>
|
|
48
|
+
/// <param name="input">Input text to embed. The input must not exceed the max input tokens for the model.</param>
|
|
49
|
+
/// <param name="model">Embedding model predictor tag.</param>
|
|
50
|
+
/// <param name="dimensions">The number of dimensions the resulting output embeddings should have. Only supported by Matryoshka embedding models.</param>
|
|
51
|
+
/// <param name="encodingFormat">The format to return the embeddings in.</param>
|
|
52
|
+
/// <param name="acceleration">Prediction acceleration.</param>
|
|
53
|
+
/// <returns>Embeddings.</returns>
|
|
54
|
+
public Task<CreateEmbeddingResponse> Create(
|
|
55
|
+
string model,
|
|
56
|
+
string input,
|
|
57
|
+
int? dimensions = null,
|
|
58
|
+
EncodingFormat encodingFormat = EncodingFormat.Float,
|
|
59
|
+
object? acceleration = null
|
|
60
|
+
) => Create(
|
|
61
|
+
model,
|
|
62
|
+
new[] { input },
|
|
63
|
+
dimensions: dimensions,
|
|
64
|
+
encodingFormat: encodingFormat,
|
|
65
|
+
acceleration: acceleration
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
/// <summary>
|
|
69
|
+
/// Create an embedding vector representing the input text.
|
|
70
|
+
/// </summary>
|
|
71
|
+
/// <param name="input">Input text to embed. The input must not exceed the max input tokens for the model.</param>
|
|
72
|
+
/// <param name="model">Embedding model predictor tag.</param>
|
|
73
|
+
/// <param name="dimensions">The number of dimensions the resulting output embeddings should have. Only supported by Matryoshka embedding models.</param>
|
|
74
|
+
/// <param name="encodingFormat">The format to return the embeddings in.</param>
|
|
75
|
+
/// <param name="acceleration">Prediction acceleration.</param>
|
|
76
|
+
/// <returns>Embeddings.</returns>
|
|
77
|
+
public async Task<CreateEmbeddingResponse> Create(
|
|
78
|
+
string model,
|
|
79
|
+
string[] input,
|
|
80
|
+
int? dimensions = null,
|
|
81
|
+
EncodingFormat encodingFormat = EncodingFormat.Float,
|
|
82
|
+
object? acceleration = null
|
|
83
|
+
) {
|
|
84
|
+
// Ensure we have a delegate
|
|
85
|
+
if (!cache.ContainsKey(model)) {
|
|
86
|
+
var @delegate = await CreateEmbeddingDelegate(model);
|
|
87
|
+
cache.Add(model, @delegate);
|
|
88
|
+
}
|
|
89
|
+
// Make prediction
|
|
90
|
+
var handler = cache[model];
|
|
91
|
+
var result = await handler(
|
|
92
|
+
model,
|
|
93
|
+
input,
|
|
94
|
+
dimensions,
|
|
95
|
+
encodingFormat,
|
|
96
|
+
acceleration: acceleration ?? Acceleration.Auto
|
|
97
|
+
);
|
|
98
|
+
// Return
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
#endregion
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
#region --Operations--
|
|
105
|
+
private readonly PredictorService predictors;
|
|
106
|
+
private readonly EdgePredictionService predictions;
|
|
107
|
+
private readonly RemotePredictionService remotePredictions;
|
|
108
|
+
private readonly Dictionary<string, EmbeddingDelegate> cache;
|
|
109
|
+
private delegate Task<CreateEmbeddingResponse> EmbeddingDelegate(
|
|
110
|
+
string model,
|
|
111
|
+
string[] input,
|
|
112
|
+
int? dimensions,
|
|
113
|
+
EncodingFormat encodingFormat,
|
|
114
|
+
object acceleration
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
internal EmbeddingService(
|
|
118
|
+
PredictorService predictors,
|
|
119
|
+
EdgePredictionService predictions,
|
|
120
|
+
RemotePredictionService remotePredictions
|
|
121
|
+
) {
|
|
122
|
+
this.predictors = predictors;
|
|
123
|
+
this.predictions = predictions;
|
|
124
|
+
this.remotePredictions = remotePredictions;
|
|
125
|
+
this.cache = new();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private async Task<EmbeddingDelegate> CreateEmbeddingDelegate(string tag) {
|
|
129
|
+
// Retrieve predictor
|
|
130
|
+
var predictor = await predictors.Retrieve(tag);
|
|
131
|
+
if (predictor == null)
|
|
132
|
+
throw new ArgumentException($"{tag} cannot be used for OpenAI embedding API because the predictor could not be found.");
|
|
133
|
+
// Get required inputs
|
|
134
|
+
var signature = predictor.signature!;
|
|
135
|
+
var requiredInputParams = signature.inputs.Where(parameter => parameter.optional == false).ToArray();
|
|
136
|
+
if (requiredInputParams.Length != 1)
|
|
137
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI embedding API because it does not have exactly one required input parameter.");
|
|
138
|
+
// Check the text input parameter
|
|
139
|
+
var inputParam = requiredInputParams[0];
|
|
140
|
+
if (inputParam.type != Dtype.List)
|
|
141
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI embedding API because it does not have the required text embedding input parameter.");
|
|
142
|
+
// Get the Matryoshka dim parameter (optional)
|
|
143
|
+
var matryoshkaParam = signature.inputs.FirstOrDefault(parameter =>
|
|
144
|
+
new[] {
|
|
145
|
+
Dtype.Int8, Dtype.Int16, Dtype.Int32, Dtype.Int64,
|
|
146
|
+
Dtype.Uint8, Dtype.Uint16, Dtype.Uint32, Dtype.Uint64
|
|
147
|
+
}.Contains(parameter.type) &&
|
|
148
|
+
parameter.denotation == "embedding.dims"
|
|
149
|
+
);
|
|
150
|
+
// Get the embedding output parameter
|
|
151
|
+
var (embeddingParamIdx, embeddingParam) = signature.outputs
|
|
152
|
+
.Select((parameter, idx) => (idx, parameter))
|
|
153
|
+
.Where(pair =>
|
|
154
|
+
pair.parameter.type == Dtype.Float32 &&
|
|
155
|
+
pair.parameter.denotation == "embedding"
|
|
156
|
+
)
|
|
157
|
+
.FirstOrDefault();
|
|
158
|
+
if (embeddingParam == null)
|
|
159
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI embedding API because it has no outputs with an `embedding` denotation.");
|
|
160
|
+
// Get the index of the usage output (optional)
|
|
161
|
+
var (usageParamIdx, usageParam) = signature.outputs
|
|
162
|
+
.Select((parameter, idx) => (idx, parameter))
|
|
163
|
+
.Where(pair =>
|
|
164
|
+
pair.parameter.type == Dtype.Dict &&
|
|
165
|
+
pair.parameter.denotation == "openai.embedding.usage"
|
|
166
|
+
)
|
|
167
|
+
.FirstOrDefault();
|
|
168
|
+
// Create delegate
|
|
169
|
+
EmbeddingDelegate result = async (
|
|
170
|
+
string model,
|
|
171
|
+
string[] input,
|
|
172
|
+
int? dimensions,
|
|
173
|
+
EncodingFormat encodingFormat,
|
|
174
|
+
object acceleration
|
|
175
|
+
) => {
|
|
176
|
+
// Build prediction input map
|
|
177
|
+
var inputMap = new Dictionary<string, object?> {
|
|
178
|
+
[inputParam.name] = input
|
|
179
|
+
};
|
|
180
|
+
if (dimensions != null && matryoshkaParam != null)
|
|
181
|
+
inputMap[matryoshkaParam.name] = dimensions.Value;
|
|
182
|
+
// Create prediction
|
|
183
|
+
var prediction = await CreatePrediction(
|
|
184
|
+
model,
|
|
185
|
+
inputs: inputMap,
|
|
186
|
+
acceleration: acceleration
|
|
187
|
+
);
|
|
188
|
+
// Check for error
|
|
189
|
+
if (prediction.error != null)
|
|
190
|
+
throw new InvalidOperationException(prediction.error);
|
|
191
|
+
// Check returned embedding
|
|
192
|
+
var rawEmbeddingMatrix = prediction.results![embeddingParamIdx]!;
|
|
193
|
+
if (!(rawEmbeddingMatrix is Tensor<float> embeddingMatrix))
|
|
194
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI embedding API because it returned an object of type {rawEmbeddingMatrix.GetType()} instead of an embedding matrix.");
|
|
195
|
+
if (embeddingMatrix.shape.Length != 2) {
|
|
196
|
+
var shapeStr = "(" + string.Join(",", embeddingMatrix.shape) + ")";
|
|
197
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI embedding API because it returned an embedding matrix with invalid shape: {shapeStr}");
|
|
198
|
+
}
|
|
199
|
+
// Create embedding response
|
|
200
|
+
var embeddings = Enumerable
|
|
201
|
+
.Range(0, embeddingMatrix.shape[0])
|
|
202
|
+
.Select(idx => ParseEmbedding(embeddingMatrix, idx, encodingFormat))
|
|
203
|
+
.ToArray();
|
|
204
|
+
var usage = usageParam != null ?
|
|
205
|
+
(prediction.results![usageParamIdx]! as JObject)!.ToObject<CreateEmbeddingResponse.UsageInfo>() :
|
|
206
|
+
default;
|
|
207
|
+
var response = new CreateEmbeddingResponse {
|
|
208
|
+
Object = "list",
|
|
209
|
+
Model = model,
|
|
210
|
+
Data = embeddings,
|
|
211
|
+
Usage = usage
|
|
212
|
+
};
|
|
213
|
+
// Return
|
|
214
|
+
return response;
|
|
215
|
+
};
|
|
216
|
+
// Return
|
|
217
|
+
return result;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
private Task<Prediction> CreatePrediction(
|
|
221
|
+
string tag,
|
|
222
|
+
Dictionary<string, object?> inputs,
|
|
223
|
+
object acceleration
|
|
224
|
+
) => acceleration switch {
|
|
225
|
+
Acceleration acc => predictions.Create(tag, inputs, acc),
|
|
226
|
+
RemoteAcceleration acc => remotePredictions.Create(tag, inputs, acc),
|
|
227
|
+
_ => throw new InvalidOperationException($"Cannot create {tag} prediction because acceleration is invalid: {acceleration}")
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
private unsafe Embedding ParseEmbedding(
|
|
231
|
+
Tensor<float> matrix,
|
|
232
|
+
int index,
|
|
233
|
+
EncodingFormat format
|
|
234
|
+
) {
|
|
235
|
+
fixed (float* data = matrix) {
|
|
236
|
+
var baseAddress = data + index * matrix.shape[1];
|
|
237
|
+
var floatSpan = new ReadOnlySpan<float>(baseAddress, matrix.shape[1]);
|
|
238
|
+
var byteSpan = new ReadOnlySpan<byte>(baseAddress, matrix.shape[1] * sizeof(float));
|
|
239
|
+
var embeddingVector = format == EncodingFormat.Float ? floatSpan.ToArray() : null;
|
|
240
|
+
var base64Rep = format == EncodingFormat.Base64 ? Convert.ToBase64String(byteSpan) : null;
|
|
241
|
+
var embedding = new Embedding {
|
|
242
|
+
Object = @"embedding",
|
|
243
|
+
Floats = embeddingVector,
|
|
244
|
+
Index = index,
|
|
245
|
+
Base64 = base64Rep
|
|
246
|
+
};
|
|
247
|
+
return embedding;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
#endregion
|
|
251
|
+
}
|
|
252
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Muna
|
|
3
|
+
* Copyright © 2025 NatML Inc. All rights reserved.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
#nullable enable
|
|
7
|
+
|
|
8
|
+
namespace Muna.Beta.OpenAI {
|
|
9
|
+
|
|
10
|
+
using Services;
|
|
11
|
+
using PredictorService = global::Muna.Services.PredictorService;
|
|
12
|
+
using EdgePredictionService = global::Muna.Services.PredictionService;
|
|
13
|
+
|
|
14
|
+
/// <summary>
|
|
15
|
+
/// Mock OpenAI client.
|
|
16
|
+
/// </summary>
|
|
17
|
+
public sealed class OpenAIClient {
|
|
18
|
+
|
|
19
|
+
#region --Client API--
|
|
20
|
+
/// <summary>
|
|
21
|
+
/// Create chat conversations.
|
|
22
|
+
/// </summary>
|
|
23
|
+
public readonly ChatService Chat;
|
|
24
|
+
|
|
25
|
+
/// <summary>
|
|
26
|
+
/// Create embedding vectors.
|
|
27
|
+
/// </summary>
|
|
28
|
+
public readonly EmbeddingService Embeddings;
|
|
29
|
+
|
|
30
|
+
/// <summary>
|
|
31
|
+
/// Create speech and transcriptions.
|
|
32
|
+
/// </summary>
|
|
33
|
+
public readonly AudioService Audio;
|
|
34
|
+
#endregion
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
#region --Operations--
|
|
38
|
+
|
|
39
|
+
internal OpenAIClient(
|
|
40
|
+
PredictorService predictors,
|
|
41
|
+
EdgePredictionService predictions,
|
|
42
|
+
RemotePredictionService remotePredictions
|
|
43
|
+
) {
|
|
44
|
+
Chat = new ChatService(predictors, predictions, remotePredictions);
|
|
45
|
+
Embeddings = new EmbeddingService(predictors, predictions, remotePredictions);
|
|
46
|
+
Audio = new AudioService(predictors, predictions, remotePredictions);
|
|
47
|
+
}
|
|
48
|
+
#endregion
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Muna
|
|
3
|
+
* Copyright © 2025 NatML Inc. All rights reserved.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
#nullable enable
|
|
7
|
+
|
|
8
|
+
namespace Muna.Beta.OpenAI {
|
|
9
|
+
|
|
10
|
+
using System;
|
|
11
|
+
using System.Collections.Generic;
|
|
12
|
+
using System.Linq;
|
|
13
|
+
using System.Runtime.Serialization;
|
|
14
|
+
using System.Threading.Tasks;
|
|
15
|
+
using Newtonsoft.Json;
|
|
16
|
+
using Newtonsoft.Json.Converters;
|
|
17
|
+
using Services;
|
|
18
|
+
using PredictorService = global::Muna.Services.PredictorService;
|
|
19
|
+
using EdgePredictionService = global::Muna.Services.PredictionService;
|
|
20
|
+
|
|
21
|
+
/// <summary>
|
|
22
|
+
/// Create speech.
|
|
23
|
+
/// </summary>
|
|
24
|
+
public sealed class SpeechService {
|
|
25
|
+
|
|
26
|
+
#region --Client API--
|
|
27
|
+
/// <summary>
|
|
28
|
+
/// Audio output format.
|
|
29
|
+
/// </summary>
|
|
30
|
+
[JsonConverter(typeof(StringEnumConverter))]
|
|
31
|
+
public enum ResponseFormat {
|
|
32
|
+
/// <summary>
|
|
33
|
+
/// MP3 audio.
|
|
34
|
+
/// </summary>
|
|
35
|
+
[EnumMember(Value = @"mp3")]
|
|
36
|
+
MP3 = 1,
|
|
37
|
+
/// <summary>
|
|
38
|
+
/// Opus.
|
|
39
|
+
/// </summary>
|
|
40
|
+
[EnumMember(Value = @"opus")]
|
|
41
|
+
Opus = 2,
|
|
42
|
+
/// <summary>
|
|
43
|
+
/// AAC.
|
|
44
|
+
/// </summary>
|
|
45
|
+
[EnumMember(Value = @"aac")]
|
|
46
|
+
AAC = 3,
|
|
47
|
+
/// <summary>
|
|
48
|
+
/// FLAC losseless audio.
|
|
49
|
+
/// </summary>
|
|
50
|
+
[EnumMember(Value = @"flac")]
|
|
51
|
+
FLAC = 4,
|
|
52
|
+
/// <summary>
|
|
53
|
+
/// Waveform audio.
|
|
54
|
+
/// </summary>
|
|
55
|
+
[EnumMember(Value = @"wav")]
|
|
56
|
+
WAV = 5,
|
|
57
|
+
/// <summary>
|
|
58
|
+
/// Linear PCM audio.
|
|
59
|
+
/// </summary>
|
|
60
|
+
[EnumMember(Value = @"pcm")]
|
|
61
|
+
PCM = 6,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/// <summary>
|
|
65
|
+
/// The format to stream the audio in.
|
|
66
|
+
/// </summary>
|
|
67
|
+
[JsonConverter(typeof(StringEnumConverter))]
|
|
68
|
+
public enum StreamFormat {
|
|
69
|
+
/// <summary>
|
|
70
|
+
/// Raw audio.
|
|
71
|
+
/// </summary>
|
|
72
|
+
Audio = 1,
|
|
73
|
+
/// <summary>
|
|
74
|
+
/// Server-sent events.
|
|
75
|
+
/// </summary>
|
|
76
|
+
SSE = 2,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/// <summary>
|
|
80
|
+
/// Generate audio from the input text.
|
|
81
|
+
/// </summary>
|
|
82
|
+
/// <param name="model">Speech generation predictor tag.</param>
|
|
83
|
+
/// <param name="input">The text to generate audio for.</param>
|
|
84
|
+
/// <param name="voice">The voice to use when generating the audio.</param>
|
|
85
|
+
/// <param name="speed">The speed of the generated audio. Defaults to 1.0.</param>
|
|
86
|
+
/// <param name="responseFormat">The format to return audio in. Currently only `ResponseFormat.PCM` is supported.</param>
|
|
87
|
+
/// <param name="streamFormat">The format to stream the audio in. Currently only `StreamFormat.Audio` is supported.</param>
|
|
88
|
+
/// <param name="acceleration">Prediction acceleration. Must be `Acceleration` or `RemoteAcceleration` instance.</param>
|
|
89
|
+
/// <returns>Generated audio.</returns>
|
|
90
|
+
public async Task<BinaryData> Create(
|
|
91
|
+
string model,
|
|
92
|
+
string input,
|
|
93
|
+
string voice,
|
|
94
|
+
float speed = 1f,
|
|
95
|
+
ResponseFormat responseFormat = ResponseFormat.MP3,
|
|
96
|
+
StreamFormat streamFormat = StreamFormat.Audio,
|
|
97
|
+
object? acceleration = null
|
|
98
|
+
) {
|
|
99
|
+
// Ensure we have a delegate
|
|
100
|
+
if (!cache.ContainsKey(model)) {
|
|
101
|
+
var @delegate = await CreateSpeechDelegate(model);
|
|
102
|
+
cache.Add(model, @delegate);
|
|
103
|
+
}
|
|
104
|
+
// Make prediction
|
|
105
|
+
var handler = cache[model];
|
|
106
|
+
var result = await handler(
|
|
107
|
+
model,
|
|
108
|
+
input,
|
|
109
|
+
voice,
|
|
110
|
+
speed,
|
|
111
|
+
responseFormat,
|
|
112
|
+
streamFormat,
|
|
113
|
+
acceleration: acceleration ?? Acceleration.Auto
|
|
114
|
+
);
|
|
115
|
+
// Return
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
#endregion
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
#region --Operations--
|
|
122
|
+
private readonly PredictorService predictors;
|
|
123
|
+
private readonly EdgePredictionService predictions;
|
|
124
|
+
private readonly RemotePredictionService remotePredictions;
|
|
125
|
+
private readonly Dictionary<string, SpeechDelegate> cache;
|
|
126
|
+
private delegate Task<BinaryData> SpeechDelegate(
|
|
127
|
+
string model,
|
|
128
|
+
string input,
|
|
129
|
+
string voice,
|
|
130
|
+
float speed,
|
|
131
|
+
ResponseFormat responseFormat,
|
|
132
|
+
StreamFormat streamFormat,
|
|
133
|
+
object acceleration
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
internal SpeechService(
|
|
137
|
+
PredictorService predictors,
|
|
138
|
+
EdgePredictionService predictions,
|
|
139
|
+
RemotePredictionService remotePredictions
|
|
140
|
+
) {
|
|
141
|
+
this.predictors = predictors;
|
|
142
|
+
this.predictions = predictions;
|
|
143
|
+
this.remotePredictions = remotePredictions;
|
|
144
|
+
this.cache = new();
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private async Task<SpeechDelegate> CreateSpeechDelegate(string tag) {
|
|
148
|
+
// Retrieve predictor
|
|
149
|
+
var predictor = await predictors.Retrieve(tag);
|
|
150
|
+
if (predictor == null)
|
|
151
|
+
throw new ArgumentException($"{tag} cannot be used for OpenAI speech API because the predictor could not be found.");
|
|
152
|
+
// Get required inputs
|
|
153
|
+
var signature = predictor.signature!;
|
|
154
|
+
var requiredInputParams = signature.inputs.Where(parameter => parameter.optional == false).ToArray();
|
|
155
|
+
if (requiredInputParams.Length != 2)
|
|
156
|
+
throw new InvalidOperationException($"${tag} cannot be used with OpenAI speech API because it does not have exactly two required input parameters.");
|
|
157
|
+
// Get the text input parameter
|
|
158
|
+
var inputParam = requiredInputParams.FirstOrDefault(parameter => parameter.type == Dtype.String);
|
|
159
|
+
if (inputParam == null)
|
|
160
|
+
throw new InvalidOperationException($"${tag} cannot be used with OpenAI speech API because it does not have the required speech input parameter.");
|
|
161
|
+
// Get the voice input parameter
|
|
162
|
+
var voiceParam = requiredInputParams.FirstOrDefault(parameter =>
|
|
163
|
+
parameter.type == Dtype.String &&
|
|
164
|
+
parameter.denotation == "audio.voice"
|
|
165
|
+
);
|
|
166
|
+
if (voiceParam == null)
|
|
167
|
+
throw new InvalidOperationException($"${tag} cannot be used with OpenAI speech API because it does not have the required speech voice parameter.");
|
|
168
|
+
// Get the speed input parameter (optional)
|
|
169
|
+
var speedParam = signature.inputs.FirstOrDefault(parameter =>
|
|
170
|
+
new[] { Dtype.Float32, Dtype.Float64 }.Contains((Dtype)parameter.type!) &&
|
|
171
|
+
parameter.denotation == "audio.speed"
|
|
172
|
+
);
|
|
173
|
+
// Get the audio output parameter
|
|
174
|
+
var (audioParamIdx, audioParam) = signature.outputs
|
|
175
|
+
.Select((parameter, idx) => (idx, parameter))
|
|
176
|
+
.Where(pair =>
|
|
177
|
+
pair.parameter.type == Dtype.Float32 &&
|
|
178
|
+
pair.parameter.denotation == "audio"
|
|
179
|
+
)
|
|
180
|
+
.FirstOrDefault();
|
|
181
|
+
if (audioParam == null)
|
|
182
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI speech API because it has no outputs with an `audio` denotation.");
|
|
183
|
+
// Create delegate
|
|
184
|
+
SpeechDelegate result = async(
|
|
185
|
+
string model,
|
|
186
|
+
string input,
|
|
187
|
+
string voice,
|
|
188
|
+
float speed,
|
|
189
|
+
ResponseFormat responseFormat,
|
|
190
|
+
StreamFormat streamFormat,
|
|
191
|
+
object acceleration
|
|
192
|
+
) => {
|
|
193
|
+
// Build prediction input map
|
|
194
|
+
var inputMap = new Dictionary<string, object?> {
|
|
195
|
+
[inputParam.name] = input,
|
|
196
|
+
[voiceParam.name] = voice
|
|
197
|
+
};
|
|
198
|
+
if (speedParam != null)
|
|
199
|
+
inputMap[speedParam.name] = speed;
|
|
200
|
+
// Create prediction
|
|
201
|
+
var prediction = await CreatePrediction(
|
|
202
|
+
model,
|
|
203
|
+
inputs: inputMap,
|
|
204
|
+
acceleration: acceleration
|
|
205
|
+
);
|
|
206
|
+
// Check for error
|
|
207
|
+
if (prediction.error != null)
|
|
208
|
+
throw new InvalidOperationException(prediction.error);
|
|
209
|
+
// Check returned audio
|
|
210
|
+
var result = prediction.results![audioParamIdx]!;
|
|
211
|
+
if (!(result is Tensor<float> tensor))
|
|
212
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI speech API because it returned an object of type {result.GetType()} instead of an audio tensor.");
|
|
213
|
+
if (tensor.shape.Length != 1 && tensor.shape.Length != 2) {
|
|
214
|
+
var shapeStr = "(" + string.Join(",", tensor.shape) + ")";
|
|
215
|
+
throw new InvalidOperationException($"{tag} cannot be used with OpenAI speech API because it returned an audio tensor with an invalid shape: {shapeStr}");
|
|
216
|
+
}
|
|
217
|
+
// Create response
|
|
218
|
+
var channels = tensor.shape.Length == 2 ? tensor.shape[0] : 1; // Assume planar
|
|
219
|
+
var mediaType = $"audio/pcm;rate={audioParam.sampleRate};channels={channels}";
|
|
220
|
+
var response = ToBinaryData(tensor, mediaType);
|
|
221
|
+
// Return
|
|
222
|
+
return response;
|
|
223
|
+
};
|
|
224
|
+
// Return
|
|
225
|
+
return result;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
private Task<Prediction> CreatePrediction(
|
|
229
|
+
string tag,
|
|
230
|
+
Dictionary<string, object?> inputs,
|
|
231
|
+
object acceleration
|
|
232
|
+
) => acceleration switch {
|
|
233
|
+
Acceleration acc => predictions.Create(tag, inputs, acc),
|
|
234
|
+
RemoteAcceleration acc => remotePredictions.Create(tag, inputs, acc),
|
|
235
|
+
_ => throw new InvalidOperationException($"Cannot create {tag} prediction because acceleration is invalid: {acceleration}")
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
private static unsafe BinaryData ToBinaryData<T>(
|
|
239
|
+
Tensor<T> tensor,
|
|
240
|
+
string? mediaType = null
|
|
241
|
+
) where T : unmanaged {
|
|
242
|
+
var elementCount = tensor.shape.Aggregate(1, (a, b) => a * b);
|
|
243
|
+
var data = new byte[elementCount * sizeof(T)];
|
|
244
|
+
fixed (void* src = tensor, dst = data)
|
|
245
|
+
Buffer.MemoryCopy(src, dst, data.Length, data.Length);
|
|
246
|
+
return new BinaryData(data, mediaType);
|
|
247
|
+
}
|
|
248
|
+
#endregion
|
|
249
|
+
}
|
|
250
|
+
}
|