@fugood/llama.node 1.1.11 → 1.2.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CMakeLists.txt +5 -8
  2. package/lib/binding.ts +18 -1
  3. package/lib/index.js +2 -2
  4. package/lib/index.ts +2 -2
  5. package/package.json +20 -16
  6. package/src/DecodeAudioTokenWorker.cpp +23 -26
  7. package/src/DecodeAudioTokenWorker.h +6 -8
  8. package/src/DetokenizeWorker.cpp +5 -8
  9. package/src/DetokenizeWorker.h +6 -5
  10. package/src/DisposeWorker.cpp +23 -3
  11. package/src/DisposeWorker.h +4 -2
  12. package/src/EmbeddingWorker.cpp +9 -35
  13. package/src/EmbeddingWorker.h +3 -2
  14. package/src/LlamaCompletionWorker.cpp +217 -315
  15. package/src/LlamaCompletionWorker.h +6 -12
  16. package/src/LlamaContext.cpp +166 -396
  17. package/src/LlamaContext.h +8 -13
  18. package/src/LoadSessionWorker.cpp +22 -19
  19. package/src/LoadSessionWorker.h +3 -2
  20. package/src/RerankWorker.h +3 -2
  21. package/src/SaveSessionWorker.cpp +22 -19
  22. package/src/SaveSessionWorker.h +3 -2
  23. package/src/TokenizeWorker.cpp +38 -35
  24. package/src/TokenizeWorker.h +12 -3
  25. package/src/common.hpp +0 -458
  26. package/src/llama.cpp/common/arg.cpp +50 -30
  27. package/src/llama.cpp/common/chat.cpp +111 -1
  28. package/src/llama.cpp/common/chat.h +3 -0
  29. package/src/llama.cpp/common/common.h +1 -1
  30. package/src/llama.cpp/common/log.cpp +53 -2
  31. package/src/llama.cpp/common/log.h +10 -4
  32. package/src/llama.cpp/common/sampling.cpp +23 -2
  33. package/src/llama.cpp/common/sampling.h +3 -1
  34. package/src/llama.cpp/common/speculative.cpp +1 -1
  35. package/src/llama.cpp/ggml/CMakeLists.txt +3 -2
  36. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -0
  37. package/src/llama.cpp/ggml/include/ggml-cpu.h +0 -1
  38. package/src/llama.cpp/ggml/include/ggml.h +50 -1
  39. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +14 -13
  40. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +210 -96
  41. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +0 -6
  42. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +11 -37
  43. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +3 -4
  44. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +4 -9
  45. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +218 -4
  46. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
  47. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +41 -37
  48. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +150 -28
  49. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +320 -73
  50. package/src/llama.cpp/include/llama.h +5 -6
  51. package/src/llama.cpp/src/llama-adapter.cpp +33 -0
  52. package/src/llama.cpp/src/llama-adapter.h +3 -0
  53. package/src/llama.cpp/src/llama-arch.cpp +27 -4
  54. package/src/llama.cpp/src/llama-arch.h +2 -0
  55. package/src/llama.cpp/src/llama-context.cpp +62 -56
  56. package/src/llama.cpp/src/llama-context.h +1 -1
  57. package/src/llama.cpp/src/llama-graph.cpp +54 -9
  58. package/src/llama.cpp/src/llama-graph.h +8 -0
  59. package/src/llama.cpp/src/llama-hparams.cpp +37 -0
  60. package/src/llama.cpp/src/llama-hparams.h +9 -3
  61. package/src/llama.cpp/src/llama-kv-cache.cpp +1 -23
  62. package/src/llama.cpp/src/llama-kv-cache.h +1 -0
  63. package/src/llama.cpp/src/llama-model.cpp +159 -1
  64. package/src/llama.cpp/src/llama-model.h +0 -1
  65. package/src/llama.cpp/src/llama-sampling.cpp +226 -126
  66. package/src/anyascii.c +0 -22223
  67. package/src/anyascii.h +0 -42
  68. package/src/tts_utils.cpp +0 -371
  69. package/src/tts_utils.h +0 -103
package/src/anyascii.h DELETED
@@ -1,42 +0,0 @@
1
- /*
2
- ISC License
3
-
4
- Copyright (c) 2020-2023, Hunter WB <hunterwb.com>
5
-
6
- Permission to use, copy, modify, and/or distribute this software for any
7
- purpose with or without fee is hereby granted, provided that the above
8
- copyright notice and this permission notice appear in all copies.
9
-
10
- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
- */
18
-
19
- #ifndef ANYASCII_H
20
- #define ANYASCII_H
21
-
22
- #ifdef __cplusplus
23
- extern "C" {
24
- #endif
25
-
26
- #include <stddef.h>
27
- #include <stdint.h>
28
-
29
- /**
30
- * Gets the ASCII transliteration of a Unicode code point
31
- *
32
- * @param utf32 A Unicode code point
33
- * @param ascii A pointer for the result to be written to; not null-terminated
34
- * @return The number of chars in *ascii
35
- */
36
- size_t anyascii(uint_least32_t utf32, const char **ascii);
37
-
38
- #ifdef __cplusplus
39
- }
40
- #endif
41
-
42
- #endif
package/src/tts_utils.cpp DELETED
@@ -1,371 +0,0 @@
1
- #include "tts_utils.h"
2
- #include "anyascii.h"
3
- #include <codecvt>
4
-
5
- using json = nlohmann::json;
6
-
7
- static std::string anyascii_string(const std::string &input) {
8
- std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
9
- auto wstr = converter.from_bytes(input);
10
- std::string output;
11
- for (char32_t c : wstr) {
12
- const char *r;
13
- size_t rlen = anyascii(c, &r);
14
- output.append(r, rlen);
15
- }
16
- return output;
17
- }
18
-
19
- std::string audio_text_from_speaker(json speaker,
20
- const tts_type type = OUTETTS_V0_2) {
21
- std::string audio_text = "<|text_start|>";
22
-
23
- if (type == OUTETTS_V0_2 || type == OUTETTS_V0_3) {
24
- std::string separator =
25
- (type == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
26
- for (const auto &word : speaker["words"]) {
27
- audio_text += word["word"].get<std::string>() + separator;
28
- }
29
- }
30
-
31
- return audio_text;
32
- }
33
-
34
- std::string audio_data_from_speaker(json speaker,
35
- const tts_type type = OUTETTS_V0_2) {
36
- std::string audio_data = "<|audio_start|>\n";
37
-
38
- if (type == OUTETTS_V0_2 || type == OUTETTS_V0_3) {
39
- std::string code_start = (type == OUTETTS_V0_3) ? "" : "<|code_start|>";
40
- std::string code_end =
41
- (type == OUTETTS_V0_3) ? "<|space|>" : "<|code_end|>";
42
- for (const auto &word : speaker["words"]) {
43
- std::string word_text = word["word"].get<std::string>();
44
- double duration = word["duration"].get<double>();
45
- std::vector<int> codes = word["codes"].get<std::vector<int>>();
46
-
47
- // Create the audio output entry
48
- std::ostringstream word_entry;
49
- word_entry << word_text << "<|t_" << std::fixed << std::setprecision(2)
50
- << duration << "|>" + code_start;
51
- for (const auto &Code : codes) {
52
- word_entry << "<|" << Code << "|>";
53
- }
54
- word_entry << code_end << "\n";
55
- audio_data += word_entry.str();
56
- }
57
- }
58
-
59
- return audio_data;
60
- }
61
-
62
- static const std::map<int, std::string> ones = {
63
- {0, "zero"}, {1, "one"}, {2, "two"}, {3, "three"},
64
- {4, "four"}, {5, "five"}, {6, "six"}, {7, "seven"},
65
- {8, "eight"}, {9, "nine"}, {10, "ten"}, {11, "eleven"},
66
- {12, "twelve"}, {13, "thirteen"}, {14, "fourteen"}, {15, "fifteen"},
67
- {16, "sixteen"}, {17, "seventeen"}, {18, "eighteen"}, {19, "nineteen"}};
68
-
69
- static const std::map<int, std::string> tens = {
70
- {2, "twenty"}, {3, "thirty"}, {4, "forty"}, {5, "fifty"},
71
- {6, "sixty"}, {7, "seventy"}, {8, "eighty"}, {9, "ninety"}};
72
-
73
- // Convert a number less than 1000 to words
74
- std::string convert_less_than_thousand(int num) {
75
- std::string result;
76
-
77
- if (num >= 100) {
78
- result += ones.at(num / 100) + " hundred ";
79
- num %= 100;
80
- }
81
-
82
- if (num >= 20) {
83
- result += tens.at(num / 10);
84
- if (num % 10 > 0) {
85
- result += "-" + ones.at(num % 10);
86
- }
87
- } else if (num > 0) {
88
- result += ones.at(num);
89
- }
90
-
91
- return result;
92
- }
93
-
94
- std::string number_to_words(const std::string &number_str) {
95
- try {
96
- size_t decimal_pos = number_str.find('.');
97
- std::string integer_part = number_str.substr(0, decimal_pos);
98
-
99
- int int_number = std::stoi(integer_part);
100
- std::string result;
101
-
102
- if (int_number == 0) {
103
- result = "zero";
104
- } else {
105
- if (int_number >= 1000000000) {
106
- int billions = int_number / 1000000000;
107
- result += convert_less_than_thousand(billions) + " billion ";
108
- int_number %= 1000000000;
109
- }
110
-
111
- if (int_number >= 1000000) {
112
- int millions = int_number / 1000000;
113
- result += convert_less_than_thousand(millions) + " million ";
114
- int_number %= 1000000;
115
- }
116
-
117
- if (int_number >= 1000) {
118
- int thousands = int_number / 1000;
119
- result += convert_less_than_thousand(thousands) + " thousand ";
120
- int_number %= 1000;
121
- }
122
-
123
- if (int_number > 0) {
124
- result += convert_less_than_thousand(int_number);
125
- }
126
- }
127
-
128
- // Handle decimal part
129
- if (decimal_pos != std::string::npos) {
130
- result += " point";
131
- std::string decimal_part = number_str.substr(decimal_pos + 1);
132
- for (char digit : decimal_part) {
133
- result += " " + ones.at(digit - '0');
134
- }
135
- }
136
-
137
- return result;
138
- } catch (const std::exception &e) {
139
- // Skip if fails
140
- return " ";
141
- }
142
- }
143
-
144
- std::string replace_numbers_with_words(const std::string &input_text) {
145
- std::regex number_pattern(R"(\d+(\.\d+)?)");
146
- std::string result;
147
- auto it = std::sregex_iterator(input_text.begin(), input_text.end(),
148
- number_pattern);
149
- auto end = std::sregex_iterator();
150
-
151
- size_t last_pos = 0;
152
- for (std::sregex_iterator i = it; i != end; ++i) {
153
- const std::smatch &match = *i;
154
- result.append(input_text, last_pos, match.position() - last_pos);
155
- result.append(number_to_words(match.str()));
156
- last_pos = match.position() + match.length();
157
- }
158
- result.append(input_text, last_pos);
159
-
160
- return result;
161
- }
162
-
163
- std::string process_text(const std::string &text,
164
- const tts_type tts_type = OUTETTS_V0_2) {
165
- std::string processed_text = replace_numbers_with_words(text);
166
-
167
- if (tts_type == OUTETTS_V0_2 || tts_type == OUTETTS_V0_3) {
168
- processed_text = anyascii_string(processed_text);
169
-
170
- std::regex dashes(R"([—–-])");
171
- processed_text = std::regex_replace(processed_text, dashes, " ");
172
- }
173
-
174
- std::transform(processed_text.begin(), processed_text.end(),
175
- processed_text.begin(), ::tolower);
176
-
177
- std::regex special_chars(R"([-_/,\.\\])");
178
- processed_text = std::regex_replace(processed_text, special_chars, " ");
179
-
180
- std::regex non_alpha(R"([^a-z\s])");
181
- processed_text = std::regex_replace(processed_text, non_alpha, "");
182
-
183
- std::regex multiple_spaces(R"(\s+)");
184
- processed_text = std::regex_replace(processed_text, multiple_spaces, " ");
185
-
186
- processed_text =
187
- std::regex_replace(processed_text, std::regex(R"(^\s+|\s+$)"), "");
188
-
189
- /*
190
- Replace spaces with the separator token same as in line 365
191
-
192
- for (auto & c : prompt_user) {
193
- if (c == ' ') {
194
- prompt_clean += "<|text_sep|>";
195
- */
196
- std::string separator =
197
- (tts_type == OUTETTS_V0_3) ? "<|space|>" : "<|text_sep|>";
198
- processed_text =
199
- std::regex_replace(processed_text, std::regex(R"(\s)"), separator);
200
-
201
- return processed_text;
202
- }
203
-
204
- #ifdef _WIN32
205
- #define M_PI 3.14159265358979323846
206
- #endif
207
-
208
- void fill_hann_window(int length, bool periodic, float *output) {
209
- int offset = -1;
210
- if (periodic) {
211
- offset = 0;
212
- }
213
- for (int i = 0; i < length; i++) {
214
- output[i] = 0.5 * (1.0 - cosf((2.0 * M_PI * i) / (length + offset)));
215
- }
216
- }
217
-
218
- void twiddle(float *real, float *imag, int k, int N) {
219
- float angle = 2 * M_PI * k / N;
220
- *real = cos(angle);
221
- *imag = sin(angle);
222
- }
223
-
224
- void irfft(int n, const float *inp_cplx, float *out_real) {
225
- int N = n / 2 + 1;
226
-
227
- std::vector<float> real_input(N);
228
- std::vector<float> imag_input(N);
229
- for (int i = 0; i < N; ++i) {
230
- real_input[i] = inp_cplx[2 * i];
231
- imag_input[i] = inp_cplx[2 * i + 1];
232
- }
233
-
234
- std::vector<float> real_output(n);
235
- std::vector<float> imag_output(n);
236
-
237
- for (int k = 0; k < n; ++k) {
238
- real_output[k] = 0.0f;
239
- imag_output[k] = 0.0f;
240
- for (int m = 0; m < N; ++m) {
241
- float twiddle_real;
242
- float twiddle_imag;
243
-
244
- twiddle(&twiddle_real, &twiddle_imag, k * m, n);
245
-
246
- real_output[k] +=
247
- real_input[m] * twiddle_real - imag_input[m] * twiddle_imag;
248
- imag_output[k] +=
249
- real_input[m] * twiddle_imag + imag_input[m] * twiddle_real;
250
- }
251
- }
252
-
253
- for (int i = 0; i < n; ++i) {
254
- out_real[i] = real_output[i] / N;
255
- }
256
- }
257
-
258
- void fold(const std::vector<float> &data, int64_t n_out, int64_t n_win,
259
- int64_t n_hop, int64_t n_pad, std::vector<float> &output) {
260
- int64_t output_height = n_out;
261
- int64_t kernel_w = n_win;
262
- int64_t stride_w = n_hop;
263
- int64_t width = n_out;
264
-
265
- output.resize(width, 0.0f);
266
-
267
- int64_t col_idx = 0;
268
- for (int64_t w_col = 0; w_col < width; ++w_col) {
269
- int64_t start = w_col * stride_w - n_pad;
270
- int64_t end = start + kernel_w;
271
-
272
- for (int64_t w_im = start; w_im < end; ++w_im) {
273
- if (w_im >= 0 && w_im < output_height && col_idx < (int64_t)data.size()) {
274
- output[w_im] += data[col_idx];
275
- }
276
- col_idx++;
277
- }
278
- }
279
-
280
- output.resize(n_out - 2 * n_pad);
281
- }
282
-
283
- std::vector<float> embd_to_audio(const float *embd, const int n_codes,
284
- const int n_embd, const int n_thread) {
285
- const int n_fft = 1280;
286
- const int n_hop = 320;
287
- const int n_win = 1280;
288
- const int n_pad = (n_win - n_hop) / 2;
289
- const int n_out = (n_codes - 1) * n_hop + n_win;
290
-
291
- std::vector<float> hann(n_fft);
292
-
293
- fill_hann_window(hann.size(), true, hann.data());
294
-
295
- int n_spec = n_embd * n_codes;
296
-
297
- std::vector<float> E(n_spec);
298
- std::vector<float> S(n_spec);
299
- std::vector<float> ST(n_spec);
300
-
301
- for (int l = 0; l < n_codes; ++l) {
302
- for (int k = 0; k < n_embd; ++k) {
303
- E[k * n_codes + l] = embd[l * n_embd + k];
304
- }
305
- }
306
-
307
- for (int k = 0; k < n_embd / 2; ++k) {
308
- for (int l = 0; l < n_codes; ++l) {
309
- float mag = E[(k)*n_codes + l];
310
- float phi = E[(k + n_embd / 2) * n_codes + l];
311
-
312
- mag = exp(mag);
313
-
314
- if (mag > 1e2) {
315
- mag = 1e2;
316
- }
317
- S[2 * (k * n_codes + l) + 0] = mag * cosf(phi);
318
- S[2 * (k * n_codes + l) + 1] = mag * sinf(phi);
319
- }
320
- }
321
-
322
- for (int l = 0; l < n_codes; ++l) {
323
- for (int k = 0; k < n_embd / 2; ++k) {
324
- ST[l * n_embd + 2 * k + 0] = S[2 * (k * n_codes + l) + 0];
325
- ST[l * n_embd + 2 * k + 1] = S[2 * (k * n_codes + l) + 1];
326
- }
327
- }
328
-
329
- std::vector<float> res(n_codes * n_fft);
330
- std::vector<float> hann2(n_codes * n_fft);
331
-
332
- std::vector<std::thread> workers(n_thread);
333
- for (int i = 0; i < n_thread; ++i) {
334
- workers[i] = std::thread([&, i]() {
335
- for (int l = i; l < n_codes; l += n_thread) {
336
- irfft(n_fft, ST.data() + l * n_embd, res.data() + l * n_fft);
337
- for (int j = 0; j < n_fft; ++j) {
338
- res[l * n_fft + j] *= hann[j];
339
- hann2[l * n_fft + j] = hann[j] * hann[j];
340
- }
341
- }
342
- });
343
- }
344
- for (int i = 0; i < n_thread; ++i) {
345
- workers[i].join();
346
- }
347
-
348
- std::vector<float> audio;
349
- std::vector<float> env;
350
-
351
- fold(res, n_out, n_win, n_hop, n_pad, audio);
352
- fold(hann2, n_out, n_win, n_hop, n_pad, env); // TODO: can be done once
353
-
354
- for (size_t i = 0; i < audio.size(); ++i) {
355
- audio[i] /= env[i];
356
- }
357
-
358
- return audio;
359
- }
360
-
361
- const char *get_tts_grammar(const tts_type type) {
362
- switch (type) {
363
- case OUTETTS_V0_1:
364
- return OUTETTS_V1_GRAMMAR;
365
- case OUTETTS_V0_2:
366
- case OUTETTS_V0_3:
367
- return OUTETTS_V2_GRAMMAR;
368
- default:
369
- return nullptr;
370
- }
371
- }
package/src/tts_utils.h DELETED
@@ -1,103 +0,0 @@
1
- #pragma once
2
-
3
- #include <regex>
4
- #include <sstream>
5
- #include <string>
6
- #include <thread>
7
- #include <vector>
8
-
9
- #include <nlohmann/json.hpp>
10
-
11
- enum tts_type { UNKNOWN = -1, OUTETTS_V0_1 = 1, OUTETTS_V0_2 = 2, OUTETTS_V0_3 = 3 };
12
-
13
- static std::string anyascii_string(const std::string &input);
14
-
15
- std::string audio_text_from_speaker(nlohmann::json speaker,
16
- const tts_type type);
17
- std::string audio_data_from_speaker(nlohmann::json speaker,
18
- const tts_type type);
19
- std::string process_text(const std::string &text, const tts_type tts_type);
20
- std::vector<float> embd_to_audio(const float *embd, const int n_codes,
21
- const int n_embd, const int n_thread);
22
-
23
- const char *get_tts_grammar(const tts_type type);
24
-
25
- // the default speaker profile is from:
26
- // https://github.com/edwko/OuteTTS/blob/main/outetts/version/v1/default_speakers/en_male_1.json
27
- static const char *DEFAULT_AUDIO_TEXT =
28
- "<|text_start|>the<|text_sep|>overall<|text_sep|>package<|text_sep|>from<|"
29
- "text_sep|>just<|text_sep|>two<|text_sep|>people<|text_sep|>is<|text_sep|>"
30
- "pretty<|text_sep|>remarkable<|text_sep|>sure<|text_sep|>i<|text_sep|>have<"
31
- "|text_sep|>some<|text_sep|>critiques<|text_sep|>about<|text_sep|>some<|"
32
- "text_sep|>of<|text_sep|>the<|text_sep|>gameplay<|text_sep|>aspects<|text_"
33
- "sep|>but<|text_sep|>its<|text_sep|>still<|text_sep|>really<|text_sep|>"
34
- "enjoyable<|text_sep|>and<|text_sep|>it<|text_sep|>looks<|text_sep|>lovely<"
35
- "|text_sep|>";
36
- static const char *DEFAULT_AUDIO_DATA = R"(<|audio_start|>
37
- the<|t_0.08|><|code_start|><|257|><|740|><|636|><|913|><|788|><|1703|><|code_end|>
38
- overall<|t_0.36|><|code_start|><|127|><|201|><|191|><|774|><|700|><|532|><|1056|><|557|><|798|><|298|><|1741|><|747|><|1662|><|1617|><|1702|><|1527|><|368|><|1588|><|1049|><|1008|><|1625|><|747|><|1576|><|728|><|1019|><|1696|><|1765|><|code_end|>
39
- package<|t_0.56|><|code_start|><|935|><|584|><|1319|><|627|><|1016|><|1491|><|1344|><|1117|><|1526|><|1040|><|239|><|1435|><|951|><|498|><|723|><|1180|><|535|><|789|><|1649|><|1637|><|78|><|465|><|1668|><|901|><|595|><|1675|><|117|><|1009|><|1667|><|320|><|840|><|79|><|507|><|1762|><|1508|><|1228|><|1768|><|802|><|1450|><|1457|><|232|><|639|><|code_end|>
40
- from<|t_0.19|><|code_start|><|604|><|782|><|1682|><|872|><|1532|><|1600|><|1036|><|1761|><|647|><|1554|><|1371|><|653|><|1595|><|950|><|code_end|>
41
- just<|t_0.25|><|code_start|><|1782|><|1670|><|317|><|786|><|1748|><|631|><|599|><|1155|><|1364|><|1524|><|36|><|1591|><|889|><|1535|><|541|><|440|><|1532|><|50|><|870|><|code_end|>
42
- two<|t_0.24|><|code_start|><|1681|><|1510|><|673|><|799|><|805|><|1342|><|330|><|519|><|62|><|640|><|1138|><|565|><|1552|><|1497|><|1552|><|572|><|1715|><|1732|><|code_end|>
43
- people<|t_0.39|><|code_start|><|593|><|274|><|136|><|740|><|691|><|633|><|1484|><|1061|><|1138|><|1485|><|344|><|428|><|397|><|1562|><|645|><|917|><|1035|><|1449|><|1669|><|487|><|442|><|1484|><|1329|><|1832|><|1704|><|600|><|761|><|653|><|269|><|code_end|>
44
- is<|t_0.16|><|code_start|><|566|><|583|><|1755|><|646|><|1337|><|709|><|802|><|1008|><|485|><|1583|><|652|><|10|><|code_end|>
45
- pretty<|t_0.32|><|code_start|><|1818|><|1747|><|692|><|733|><|1010|><|534|><|406|><|1697|><|1053|><|1521|><|1355|><|1274|><|816|><|1398|><|211|><|1218|><|817|><|1472|><|1703|><|686|><|13|><|822|><|445|><|1068|><|code_end|>
46
- remarkable<|t_0.68|><|code_start|><|230|><|1048|><|1705|><|355|><|706|><|1149|><|1535|><|1787|><|1356|><|1396|><|835|><|1583|><|486|><|1249|><|286|><|937|><|1076|><|1150|><|614|><|42|><|1058|><|705|><|681|><|798|><|934|><|490|><|514|><|1399|><|572|><|1446|><|1703|><|1346|><|1040|><|1426|><|1304|><|664|><|171|><|1530|><|625|><|64|><|1708|><|1830|><|1030|><|443|><|1509|><|1063|><|1605|><|1785|><|721|><|1440|><|923|><|code_end|>
47
- sure<|t_0.36|><|code_start|><|792|><|1780|><|923|><|1640|><|265|><|261|><|1525|><|567|><|1491|><|1250|><|1730|><|362|><|919|><|1766|><|543|><|1|><|333|><|113|><|970|><|252|><|1606|><|133|><|302|><|1810|><|1046|><|1190|><|1675|><|code_end|>
48
- i<|t_0.08|><|code_start|><|123|><|439|><|1074|><|705|><|1799|><|637|><|code_end|>
49
- have<|t_0.16|><|code_start|><|1509|><|599|><|518|><|1170|><|552|><|1029|><|1267|><|864|><|419|><|143|><|1061|><|0|><|code_end|>
50
- some<|t_0.16|><|code_start|><|619|><|400|><|1270|><|62|><|1370|><|1832|><|917|><|1661|><|167|><|269|><|1366|><|1508|><|code_end|>
51
- critiques<|t_0.60|><|code_start|><|559|><|584|><|1163|><|1129|><|1313|><|1728|><|721|><|1146|><|1093|><|577|><|928|><|27|><|630|><|1080|><|1346|><|1337|><|320|><|1382|><|1175|><|1682|><|1556|><|990|><|1683|><|860|><|1721|><|110|><|786|><|376|><|1085|><|756|><|1523|><|234|><|1334|><|1506|><|1578|><|659|><|612|><|1108|><|1466|><|1647|><|308|><|1470|><|746|><|556|><|1061|><|code_end|>
52
- about<|t_0.29|><|code_start|><|26|><|1649|><|545|><|1367|><|1263|><|1728|><|450|><|859|><|1434|><|497|><|1220|><|1285|><|179|><|755|><|1154|><|779|><|179|><|1229|><|1213|><|922|><|1774|><|1408|><|code_end|>
53
- some<|t_0.23|><|code_start|><|986|><|28|><|1649|><|778|><|858|><|1519|><|1|><|18|><|26|><|1042|><|1174|><|1309|><|1499|><|1712|><|1692|><|1516|><|1574|><|code_end|>
54
- of<|t_0.07|><|code_start|><|197|><|716|><|1039|><|1662|><|64|><|code_end|>
55
- the<|t_0.08|><|code_start|><|1811|><|1568|><|569|><|886|><|1025|><|1374|><|code_end|>
56
- gameplay<|t_0.48|><|code_start|><|1269|><|1092|><|933|><|1362|><|1762|><|1700|><|1675|><|215|><|781|><|1086|><|461|><|838|><|1022|><|759|><|649|><|1416|><|1004|><|551|><|909|><|787|><|343|><|830|><|1391|><|1040|><|1622|><|1779|><|1360|><|1231|><|1187|><|1317|><|76|><|997|><|989|><|978|><|737|><|189|><|code_end|>
57
- aspects<|t_0.56|><|code_start|><|1423|><|797|><|1316|><|1222|><|147|><|719|><|1347|><|386|><|1390|><|1558|><|154|><|440|><|634|><|592|><|1097|><|1718|><|712|><|763|><|1118|><|1721|><|1311|><|868|><|580|><|362|><|1435|><|868|><|247|><|221|><|886|><|1145|><|1274|><|1284|><|457|><|1043|><|1459|><|1818|><|62|><|599|><|1035|><|62|><|1649|><|778|><|code_end|>
58
- but<|t_0.20|><|code_start|><|780|><|1825|><|1681|><|1007|><|861|><|710|><|702|><|939|><|1669|><|1491|><|613|><|1739|><|823|><|1469|><|648|><|code_end|>
59
- its<|t_0.09|><|code_start|><|92|><|688|><|1623|><|962|><|1670|><|527|><|599|><|code_end|>
60
- still<|t_0.27|><|code_start|><|636|><|10|><|1217|><|344|><|713|><|957|><|823|><|154|><|1649|><|1286|><|508|><|214|><|1760|><|1250|><|456|><|1352|><|1368|><|921|><|615|><|5|><|code_end|>
61
- really<|t_0.36|><|code_start|><|55|><|420|><|1008|><|1659|><|27|><|644|><|1266|><|617|><|761|><|1712|><|109|><|1465|><|1587|><|503|><|1541|><|619|><|197|><|1019|><|817|><|269|><|377|><|362|><|1381|><|507|><|1488|><|4|><|1695|><|code_end|>
62
- enjoyable<|t_0.49|><|code_start|><|678|><|501|><|864|><|319|><|288|><|1472|><|1341|><|686|><|562|><|1463|><|619|><|1563|><|471|><|911|><|730|><|1811|><|1006|><|520|><|861|><|1274|><|125|><|1431|><|638|><|621|><|153|><|876|><|1770|><|437|><|987|><|1653|><|1109|><|898|><|1285|><|80|><|593|><|1709|><|843|><|code_end|>
63
- and<|t_0.15|><|code_start|><|1285|><|987|><|303|><|1037|><|730|><|1164|><|502|><|120|><|1737|><|1655|><|1318|><|code_end|>
64
- it<|t_0.09|><|code_start|><|848|><|1366|><|395|><|1601|><|1513|><|593|><|1302|><|code_end|>
65
- looks<|t_0.27|><|code_start|><|1281|><|1266|><|1755|><|572|><|248|><|1751|><|1257|><|695|><|1380|><|457|><|659|><|585|><|1315|><|1105|><|1776|><|736|><|24|><|736|><|654|><|1027|><|code_end|>
66
- lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|1481|><|1721|><|1123|><|438|><|1246|><|1251|><|795|><|659|><|1381|><|1658|><|217|><|1772|><|562|><|952|><|107|><|1129|><|1112|><|467|><|550|><|1079|><|840|><|1615|><|1469|><|1380|><|168|><|917|><|836|><|1827|><|437|><|583|><|67|><|595|><|1087|><|1646|><|1493|><|1677|><|code_end|>)";
67
-
68
- static const char *OUTETTS_V1_GRAMMAR = R"(
69
- root ::= NL? wordAudioBlock+ audioEnd NL eos?
70
- wordAudioBlock ::= WORD codeBlock NL
71
- codeBlock ::= TIME CODE*
72
- eos ::= "<|im_end|>"
73
- codeStart ::= "<|code_start|>"
74
- codeEnd ::= "<|code_end|>"
75
- audioEnd ::= "<|audio_end|>"
76
- WORD ::= [A-Za-z]+
77
- NL ::= "\n"
78
- TIME ::= "<|t_" DECIMAL "|>"
79
- CODE ::= "<|" DIGITS "|>"
80
- DIGITS ::= [0-9]+
81
- DECIMAL ::= [0-9]+ "." [0-9]+
82
- )";
83
-
84
- static const char *OUTETTS_V2_GRAMMAR = R"(
85
- root ::= NL? content+ audioEnd NL eos?
86
- content ::= wordAudioBlock | emotionBlock
87
- wordAudioBlock ::= WORD punch* codeBlock space NL
88
- codeBlock ::= TIME CODE*
89
- emotionBlock ::= emotionStart TEXT emotionEnd space NL
90
- TEXT ::= [A-Za-z0-9 .,?!]+
91
- eos ::= "<|im_end|>"
92
- emotionStart ::= "<|emotion_start|>"
93
- emotionEnd ::= "<|emotion_end|>"
94
- audioEnd ::= "<|audio_end|>"
95
- space ::= "<|space|>"
96
- WORD ::= [A-Za-z]+
97
- NL ::= [\n]
98
- TIME ::= "<|t_" DECIMAL "|>"
99
- CODE ::= "<|" DIGITS "|>"
100
- DIGITS ::= [0-9]+
101
- DECIMAL ::= [0-9]+ "." [0-9]+
102
- punch ::= "<|" [a-z_]+ "|>"
103
- )";