appostle-installer 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/appostle-installer.js +5 -1
- package/dist/appostle-installer.js.map +2 -2
- package/dist/appostle.js +24 -456
- package/dist/appostle.js.map +4 -4
- package/dist/worker.js +58 -525
- package/dist/worker.js.map +4 -4
- package/package.json +1 -1
package/dist/appostle.js
CHANGED
|
@@ -9,7 +9,7 @@ import { createRequire as createRequire5 } from "node:module";
|
|
|
9
9
|
import { Command as Command2 } from "commander";
|
|
10
10
|
|
|
11
11
|
// ../cli/src/utils/client.ts
|
|
12
|
-
import { existsSync as
|
|
12
|
+
import { existsSync as existsSync10, readFileSync as readFileSync6 } from "node:fs";
|
|
13
13
|
|
|
14
14
|
// ../server/src/server/bootstrap.ts
|
|
15
15
|
import express from "express";
|
|
@@ -7086,18 +7086,6 @@ function pcm16lePeakAbs(pcm16le) {
|
|
|
7086
7086
|
}
|
|
7087
7087
|
return peak;
|
|
7088
7088
|
}
|
|
7089
|
-
function pcm16leToFloat32(pcm16le, gain = 1) {
|
|
7090
|
-
if (pcm16le.length % 2 !== 0) {
|
|
7091
|
-
throw new Error(`PCM16 chunk byteLength must be even, got ${pcm16le.length}`);
|
|
7092
|
-
}
|
|
7093
|
-
const int16 = new Int16Array(pcm16le.buffer, pcm16le.byteOffset, pcm16le.byteLength / 2);
|
|
7094
|
-
const out = new Float32Array(int16.length);
|
|
7095
|
-
for (let i = 0; i < int16.length; i += 1) {
|
|
7096
|
-
const v = int16[i] / 32768 * gain;
|
|
7097
|
-
out[i] = v > 1 ? 1 : v < -1 ? -1 : v;
|
|
7098
|
-
}
|
|
7099
|
-
return out;
|
|
7100
|
-
}
|
|
7101
7089
|
|
|
7102
7090
|
// ../server/src/server/agent/pcm16-resampler.ts
|
|
7103
7091
|
var Pcm16MonoResampler = class {
|
|
@@ -14802,12 +14790,12 @@ function extractPlanNameFromFrontmatter(content) {
|
|
|
14802
14790
|
return null;
|
|
14803
14791
|
}
|
|
14804
14792
|
function resolvePlanFilename(options) {
|
|
14805
|
-
const { originalPath, newSlug, existsSync:
|
|
14793
|
+
const { originalPath, newSlug, existsSync: existsSync14 } = options;
|
|
14806
14794
|
const dir = path5.dirname(originalPath);
|
|
14807
14795
|
const base = `${newSlug}.md`;
|
|
14808
14796
|
let candidate = path5.join(dir, base);
|
|
14809
14797
|
let counter = 2;
|
|
14810
|
-
while (candidate !== originalPath &&
|
|
14798
|
+
while (candidate !== originalPath && existsSync14(candidate)) {
|
|
14811
14799
|
candidate = path5.join(dir, `${newSlug}-${counter}.md`);
|
|
14812
14800
|
counter += 1;
|
|
14813
14801
|
}
|
|
@@ -39416,52 +39404,11 @@ var SHERPA_ONNX_MODEL_CATALOG = {
|
|
|
39416
39404
|
description: "Kokoro TTS (higher quality; larger).",
|
|
39417
39405
|
aliases: ["kokoro"],
|
|
39418
39406
|
defaultFor: "tts"
|
|
39419
|
-
},
|
|
39420
|
-
"pocket-tts-onnx-int8": {
|
|
39421
|
-
kind: "tts",
|
|
39422
|
-
extractedDir: "pocket-tts-onnx-int8",
|
|
39423
|
-
downloadFiles: [
|
|
39424
|
-
{
|
|
39425
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/mimi_encoder.onnx",
|
|
39426
|
-
relPath: "onnx/mimi_encoder.onnx"
|
|
39427
|
-
},
|
|
39428
|
-
{
|
|
39429
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/text_conditioner.onnx",
|
|
39430
|
-
relPath: "onnx/text_conditioner.onnx"
|
|
39431
|
-
},
|
|
39432
|
-
{
|
|
39433
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/flow_lm_main_int8.onnx",
|
|
39434
|
-
relPath: "onnx/flow_lm_main_int8.onnx"
|
|
39435
|
-
},
|
|
39436
|
-
{
|
|
39437
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/flow_lm_flow_int8.onnx",
|
|
39438
|
-
relPath: "onnx/flow_lm_flow_int8.onnx"
|
|
39439
|
-
},
|
|
39440
|
-
{
|
|
39441
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/mimi_decoder_int8.onnx",
|
|
39442
|
-
relPath: "onnx/mimi_decoder_int8.onnx"
|
|
39443
|
-
},
|
|
39444
|
-
{
|
|
39445
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/tokenizer.model",
|
|
39446
|
-
relPath: "tokenizer.model"
|
|
39447
|
-
},
|
|
39448
|
-
{
|
|
39449
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/reference_sample.wav",
|
|
39450
|
-
relPath: "reference_sample.wav"
|
|
39451
|
-
}
|
|
39452
|
-
],
|
|
39453
|
-
requiredFiles: [
|
|
39454
|
-
"onnx/mimi_encoder.onnx",
|
|
39455
|
-
"onnx/text_conditioner.onnx",
|
|
39456
|
-
"onnx/flow_lm_main_int8.onnx",
|
|
39457
|
-
"onnx/flow_lm_flow_int8.onnx",
|
|
39458
|
-
"onnx/mimi_decoder_int8.onnx",
|
|
39459
|
-
"tokenizer.model",
|
|
39460
|
-
"reference_sample.wav"
|
|
39461
|
-
],
|
|
39462
|
-
description: "Pocket TTS ONNX (INT8) with streaming decode support (via onnxruntime).",
|
|
39463
|
-
aliases: ["pocket", "pocket-tts"]
|
|
39464
39407
|
}
|
|
39408
|
+
// pocket-tts-onnx-int8 was previously listed here. Removed on
|
|
39409
|
+
// 2026-05-02: source was a single individual HuggingFace account
|
|
39410
|
+
// (KevinAHM/pocket-tts-onnx) with no provenance/integrity guarantees.
|
|
39411
|
+
// Re-add only if we host a verified mirror with pinned hashes.
|
|
39465
39412
|
};
|
|
39466
39413
|
var ALL_MODEL_IDS = Object.keys(SHERPA_ONNX_MODEL_CATALOG);
|
|
39467
39414
|
var LOCAL_STT_MODEL_IDS = ALL_MODEL_IDS.filter(
|
|
@@ -39512,385 +39459,6 @@ var LocalTtsModelIdSchema = createAliasedModelIdSchema({
|
|
|
39512
39459
|
aliases: TTS_MODEL_ALIASES
|
|
39513
39460
|
});
|
|
39514
39461
|
|
|
39515
|
-
// ../server/src/server/speech/providers/local/pocket/pocket-tts-onnx.ts
|
|
39516
|
-
import { existsSync as existsSync10 } from "node:fs";
|
|
39517
|
-
import { readFile as readFile3 } from "node:fs/promises";
|
|
39518
|
-
function assertFileExists(filePath, label) {
|
|
39519
|
-
if (!existsSync10(filePath)) {
|
|
39520
|
-
throw new Error(`Missing ${label}: ${filePath}`);
|
|
39521
|
-
}
|
|
39522
|
-
}
|
|
39523
|
-
function product(dims) {
|
|
39524
|
-
let out = 1;
|
|
39525
|
-
for (const d of dims) out *= d;
|
|
39526
|
-
return out;
|
|
39527
|
-
}
|
|
39528
|
-
function normalizeDims(dims) {
|
|
39529
|
-
return dims.map((d) => {
|
|
39530
|
-
if (typeof d === "number" && Number.isFinite(d)) {
|
|
39531
|
-
if (d === 0) return 0;
|
|
39532
|
-
if (d > 0) return d;
|
|
39533
|
-
return 1;
|
|
39534
|
-
}
|
|
39535
|
-
return 1;
|
|
39536
|
-
});
|
|
39537
|
-
}
|
|
39538
|
-
function getSessionInputMeta(session, inputName) {
|
|
39539
|
-
const metaAny = session.inputMetadata;
|
|
39540
|
-
if (Array.isArray(metaAny)) {
|
|
39541
|
-
const entry = metaAny.find(
|
|
39542
|
-
(m) => m && typeof m === "object" && m.name === inputName
|
|
39543
|
-
);
|
|
39544
|
-
if (!entry) return void 0;
|
|
39545
|
-
return { type: entry.type, dims: entry.shape };
|
|
39546
|
-
}
|
|
39547
|
-
if (metaAny && typeof metaAny === "object" && inputName in metaAny) {
|
|
39548
|
-
const entry = metaAny[inputName];
|
|
39549
|
-
return { type: entry?.type, dims: entry?.dimensions ?? entry?.shape };
|
|
39550
|
-
}
|
|
39551
|
-
return void 0;
|
|
39552
|
-
}
|
|
39553
|
-
function toBigInt64(values) {
|
|
39554
|
-
const out = new BigInt64Array(values.length);
|
|
39555
|
-
for (let i = 0; i < values.length; i += 1) {
|
|
39556
|
-
out[i] = BigInt(values[i]);
|
|
39557
|
-
}
|
|
39558
|
-
return out;
|
|
39559
|
-
}
|
|
39560
|
-
function randn() {
|
|
39561
|
-
let u = 0;
|
|
39562
|
-
let v = 0;
|
|
39563
|
-
while (u === 0) u = Math.random();
|
|
39564
|
-
while (v === 0) v = Math.random();
|
|
39565
|
-
return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
|
|
39566
|
-
}
|
|
39567
|
-
function normalizeTextForPocket(text) {
|
|
39568
|
-
const trimmed = text.trim();
|
|
39569
|
-
if (!trimmed) {
|
|
39570
|
-
throw new Error("Cannot synthesize empty text");
|
|
39571
|
-
}
|
|
39572
|
-
let out = trimmed;
|
|
39573
|
-
if (out.length > 0 && /[A-Za-z0-9]$/.test(out)) {
|
|
39574
|
-
out = `${out}.`;
|
|
39575
|
-
}
|
|
39576
|
-
if (out.length > 0 && /[a-z]/.test(out[0])) {
|
|
39577
|
-
out = out[0].toUpperCase() + out.slice(1);
|
|
39578
|
-
}
|
|
39579
|
-
return out;
|
|
39580
|
-
}
|
|
39581
|
-
async function loadOrt() {
|
|
39582
|
-
return await import("onnxruntime-node");
|
|
39583
|
-
}
|
|
39584
|
-
async function loadSentencePiece(tokenizerModelPath) {
|
|
39585
|
-
const mod = await import("@sctg/sentencepiece-js");
|
|
39586
|
-
const Processor = mod.SentencePieceProcessor ?? mod.default?.SentencePieceProcessor ?? mod.default;
|
|
39587
|
-
if (!Processor) {
|
|
39588
|
-
throw new Error("Failed to load SentencePiece processor from @sctg/sentencepiece-js");
|
|
39589
|
-
}
|
|
39590
|
-
const sp = new Processor();
|
|
39591
|
-
if (typeof sp.load === "function") {
|
|
39592
|
-
await sp.load(tokenizerModelPath);
|
|
39593
|
-
} else if (typeof sp.Load === "function") {
|
|
39594
|
-
sp.Load(tokenizerModelPath);
|
|
39595
|
-
} else {
|
|
39596
|
-
throw new Error("SentencePiece processor does not expose load()/Load()");
|
|
39597
|
-
}
|
|
39598
|
-
return sp;
|
|
39599
|
-
}
|
|
39600
|
-
function getOrtProviders(ort, device) {
|
|
39601
|
-
if (device === "cpu") return ["cpu"];
|
|
39602
|
-
if (device === "cuda") return ["cuda", "cpu"];
|
|
39603
|
-
void ort;
|
|
39604
|
-
return ["cpu"];
|
|
39605
|
-
}
|
|
39606
|
-
function createZeroTensorForInput(ort, session, inputName) {
|
|
39607
|
-
const meta = getSessionInputMeta(session, inputName);
|
|
39608
|
-
const dims = normalizeDims(meta?.dims ?? []);
|
|
39609
|
-
if (dims.length === 0) {
|
|
39610
|
-
throw new Error(`Missing input metadata shape for ${inputName}`);
|
|
39611
|
-
}
|
|
39612
|
-
const type = (meta?.type ?? "float32").toLowerCase();
|
|
39613
|
-
const size = product(dims);
|
|
39614
|
-
if (type.includes("int64")) {
|
|
39615
|
-
return new ort.Tensor("int64", new BigInt64Array(size), dims);
|
|
39616
|
-
}
|
|
39617
|
-
if (type.includes("bool")) {
|
|
39618
|
-
return new ort.Tensor("bool", new Uint8Array(size), dims);
|
|
39619
|
-
}
|
|
39620
|
-
return new ort.Tensor("float32", new Float32Array(size), dims);
|
|
39621
|
-
}
|
|
39622
|
-
function initState(session, ort) {
|
|
39623
|
-
const out = {};
|
|
39624
|
-
for (const name of session.inputNames) {
|
|
39625
|
-
if (name.startsWith("state_")) {
|
|
39626
|
-
out[name] = createZeroTensorForInput(ort, session, name);
|
|
39627
|
-
}
|
|
39628
|
-
}
|
|
39629
|
-
return out;
|
|
39630
|
-
}
|
|
39631
|
-
function updateStateFromOutputs(state, outputs) {
|
|
39632
|
-
for (const [name, tensor] of Object.entries(outputs)) {
|
|
39633
|
-
if (!name.startsWith("out_state_")) continue;
|
|
39634
|
-
const idx = Number.parseInt(name.replace("out_state_", ""), 10);
|
|
39635
|
-
if (Number.isFinite(idx)) {
|
|
39636
|
-
state[`state_${idx}`] = tensor;
|
|
39637
|
-
}
|
|
39638
|
-
}
|
|
39639
|
-
}
|
|
39640
|
-
function tensorDataFloat32(t) {
|
|
39641
|
-
const data = t.data;
|
|
39642
|
-
if (data instanceof Float32Array) return data;
|
|
39643
|
-
if (Array.isArray(data)) return Float32Array.from(data);
|
|
39644
|
-
throw new Error("Unexpected tensor data type (expected Float32Array)");
|
|
39645
|
-
}
|
|
39646
|
-
var PocketTtsOnnxEngine = class _PocketTtsOnnxEngine {
|
|
39647
|
-
static {
|
|
39648
|
-
this.SAMPLE_RATE = 24e3;
|
|
39649
|
-
}
|
|
39650
|
-
static {
|
|
39651
|
-
this.SAMPLES_PER_FRAME = 1920;
|
|
39652
|
-
}
|
|
39653
|
-
constructor(args) {
|
|
39654
|
-
this.ort = args.ort;
|
|
39655
|
-
this.temperature = args.temperature;
|
|
39656
|
-
this.lsdSteps = args.lsdSteps;
|
|
39657
|
-
this.maxFrames = args.maxFrames;
|
|
39658
|
-
this.framesAfterEos = args.framesAfterEos;
|
|
39659
|
-
this.firstChunkFrames = args.firstChunkFrames;
|
|
39660
|
-
this.maxChunkFrames = args.maxChunkFrames;
|
|
39661
|
-
this.tokenizer = args.tokenizer;
|
|
39662
|
-
this.textConditioner = args.textConditioner;
|
|
39663
|
-
this.flowLmMain = args.flowLmMain;
|
|
39664
|
-
this.flowLmFlow = args.flowLmFlow;
|
|
39665
|
-
this.mimiDecoder = args.mimiDecoder;
|
|
39666
|
-
this.stBuffers = args.stBuffers;
|
|
39667
|
-
this.voiceEmbeddings = args.voiceEmbeddings;
|
|
39668
|
-
}
|
|
39669
|
-
static async create(config, logger) {
|
|
39670
|
-
const log2 = logger.child({
|
|
39671
|
-
module: "speech",
|
|
39672
|
-
provider: "pocket-tts",
|
|
39673
|
-
component: "onnx-engine"
|
|
39674
|
-
});
|
|
39675
|
-
const modelDir = config.modelDir;
|
|
39676
|
-
const onnxDir = `${modelDir}/onnx`;
|
|
39677
|
-
const precision = config.precision ?? "int8";
|
|
39678
|
-
const device = config.device ?? "auto";
|
|
39679
|
-
const temperature = config.temperature ?? 0.7;
|
|
39680
|
-
const lsdSteps = config.lsdSteps ?? 10;
|
|
39681
|
-
const maxFrames = config.maxFrames ?? 500;
|
|
39682
|
-
const framesAfterEos = config.framesAfterEos ?? 3;
|
|
39683
|
-
const firstChunkFrames = config.firstChunkFrames ?? 2;
|
|
39684
|
-
const maxChunkFrames = config.maxChunkFrames ?? 15;
|
|
39685
|
-
const tokenizerPath = `${modelDir}/tokenizer.model`;
|
|
39686
|
-
const referenceAudioFile = config.referenceAudioFile ?? `${modelDir}/reference_sample.wav`;
|
|
39687
|
-
const flowMainFile = precision === "int8" ? "flow_lm_main_int8.onnx" : "flow_lm_main.onnx";
|
|
39688
|
-
const flowFlowFile = precision === "int8" ? "flow_lm_flow_int8.onnx" : "flow_lm_flow.onnx";
|
|
39689
|
-
const decoderFile = precision === "int8" ? "mimi_decoder_int8.onnx" : "mimi_decoder.onnx";
|
|
39690
|
-
assertFileExists(`${onnxDir}/mimi_encoder.onnx`, "PocketTTS mimi_encoder");
|
|
39691
|
-
assertFileExists(`${onnxDir}/text_conditioner.onnx`, "PocketTTS text_conditioner");
|
|
39692
|
-
assertFileExists(`${onnxDir}/${flowMainFile}`, "PocketTTS flow_lm_main");
|
|
39693
|
-
assertFileExists(`${onnxDir}/${flowFlowFile}`, "PocketTTS flow_lm_flow");
|
|
39694
|
-
assertFileExists(`${onnxDir}/${decoderFile}`, "PocketTTS mimi_decoder");
|
|
39695
|
-
assertFileExists(tokenizerPath, "PocketTTS tokenizer.model");
|
|
39696
|
-
assertFileExists(referenceAudioFile, "PocketTTS reference_sample.wav");
|
|
39697
|
-
const ort = await loadOrt();
|
|
39698
|
-
const providers = getOrtProviders(ort, device);
|
|
39699
|
-
const [tokenizer, mimiEncoder, textConditioner, flowLmMain, flowLmFlow, mimiDecoder] = await Promise.all([
|
|
39700
|
-
loadSentencePiece(tokenizerPath),
|
|
39701
|
-
ort.InferenceSession.create(`${onnxDir}/mimi_encoder.onnx`, {
|
|
39702
|
-
executionProviders: providers
|
|
39703
|
-
}),
|
|
39704
|
-
ort.InferenceSession.create(`${onnxDir}/text_conditioner.onnx`, {
|
|
39705
|
-
executionProviders: providers
|
|
39706
|
-
}),
|
|
39707
|
-
ort.InferenceSession.create(`${onnxDir}/${flowMainFile}`, {
|
|
39708
|
-
executionProviders: providers
|
|
39709
|
-
}),
|
|
39710
|
-
ort.InferenceSession.create(`${onnxDir}/${flowFlowFile}`, {
|
|
39711
|
-
executionProviders: providers
|
|
39712
|
-
}),
|
|
39713
|
-
ort.InferenceSession.create(`${onnxDir}/${decoderFile}`, { executionProviders: providers })
|
|
39714
|
-
]);
|
|
39715
|
-
const stBuffers = [];
|
|
39716
|
-
for (let j = 0; j < lsdSteps; j += 1) {
|
|
39717
|
-
const s = j / lsdSteps;
|
|
39718
|
-
const t = s + 1 / lsdSteps;
|
|
39719
|
-
stBuffers.push({
|
|
39720
|
-
s: new ort.Tensor("float32", new Float32Array([s]), [1, 1]),
|
|
39721
|
-
t: new ort.Tensor("float32", new Float32Array([t]), [1, 1])
|
|
39722
|
-
});
|
|
39723
|
-
}
|
|
39724
|
-
const refWav = await readFile3(referenceAudioFile);
|
|
39725
|
-
const parsed = parsePcm16MonoWav(refWav);
|
|
39726
|
-
let pcm16 = parsed.pcm16;
|
|
39727
|
-
if (parsed.sampleRate !== _PocketTtsOnnxEngine.SAMPLE_RATE) {
|
|
39728
|
-
const resampler = new Pcm16MonoResampler({
|
|
39729
|
-
inputRate: parsed.sampleRate,
|
|
39730
|
-
outputRate: _PocketTtsOnnxEngine.SAMPLE_RATE
|
|
39731
|
-
});
|
|
39732
|
-
pcm16 = resampler.processChunk(pcm16);
|
|
39733
|
-
}
|
|
39734
|
-
const floatAudio = pcm16leToFloat32(pcm16);
|
|
39735
|
-
const audioTensor = new ort.Tensor("float32", floatAudio, [1, 1, floatAudio.length]);
|
|
39736
|
-
const encoded = await mimiEncoder.run({ audio: audioTensor });
|
|
39737
|
-
const firstOutName = mimiEncoder.outputNames?.[0];
|
|
39738
|
-
const voiceEmb = firstOutName ? encoded[firstOutName] : Object.values(encoded)[0];
|
|
39739
|
-
if (!voiceEmb) {
|
|
39740
|
-
throw new Error("PocketTTS mimi_encoder: missing output");
|
|
39741
|
-
}
|
|
39742
|
-
log2.info({ precision, device, providers, lsdSteps, temperature }, "PocketTTS ONNX initialized");
|
|
39743
|
-
return new _PocketTtsOnnxEngine({
|
|
39744
|
-
ort,
|
|
39745
|
-
temperature,
|
|
39746
|
-
lsdSteps,
|
|
39747
|
-
maxFrames,
|
|
39748
|
-
framesAfterEos,
|
|
39749
|
-
firstChunkFrames,
|
|
39750
|
-
maxChunkFrames,
|
|
39751
|
-
tokenizer,
|
|
39752
|
-
textConditioner,
|
|
39753
|
-
flowLmMain,
|
|
39754
|
-
flowLmFlow,
|
|
39755
|
-
mimiDecoder,
|
|
39756
|
-
stBuffers,
|
|
39757
|
-
voiceEmbeddings: voiceEmb
|
|
39758
|
-
});
|
|
39759
|
-
}
|
|
39760
|
-
tokenize(text) {
|
|
39761
|
-
const normalized = normalizeTextForPocket(text);
|
|
39762
|
-
const ids = this.tokenizer.encodeIds(normalized);
|
|
39763
|
-
const data = toBigInt64(ids ?? []);
|
|
39764
|
-
return new this.ort.Tensor("int64", data, [1, data.length]);
|
|
39765
|
-
}
|
|
39766
|
-
async runTextConditioner(tokenIds) {
|
|
39767
|
-
const out = await this.textConditioner.run({ token_ids: tokenIds });
|
|
39768
|
-
const firstOutName = this.textConditioner.outputNames?.[0];
|
|
39769
|
-
const t = firstOutName ? out[firstOutName] : Object.values(out)[0];
|
|
39770
|
-
if (!t) throw new Error("PocketTTS text_conditioner: missing output");
|
|
39771
|
-
return t;
|
|
39772
|
-
}
|
|
39773
|
-
async *runFlowLm(textEmbeddings) {
|
|
39774
|
-
const ort = this.ort;
|
|
39775
|
-
const state = initState(this.flowLmMain, ort);
|
|
39776
|
-
const emptySeq = new ort.Tensor("float32", new Float32Array(0), [1, 0, 32]);
|
|
39777
|
-
const emptyText = new ort.Tensor("float32", new Float32Array(0), [1, 0, 1024]);
|
|
39778
|
-
const resVoice = await this.flowLmMain.run({
|
|
39779
|
-
sequence: emptySeq,
|
|
39780
|
-
text_embeddings: this.voiceEmbeddings,
|
|
39781
|
-
...state
|
|
39782
|
-
});
|
|
39783
|
-
updateStateFromOutputs(state, resVoice);
|
|
39784
|
-
const resText = await this.flowLmMain.run({
|
|
39785
|
-
sequence: emptySeq,
|
|
39786
|
-
text_embeddings: textEmbeddings,
|
|
39787
|
-
...state
|
|
39788
|
-
});
|
|
39789
|
-
updateStateFromOutputs(state, resText);
|
|
39790
|
-
const curr = new Float32Array(32);
|
|
39791
|
-
curr.fill(Number.NaN);
|
|
39792
|
-
let currTensor = new ort.Tensor("float32", curr, [1, 1, 32]);
|
|
39793
|
-
const dt = 1 / this.lsdSteps;
|
|
39794
|
-
let eosStep = null;
|
|
39795
|
-
for (let step = 0; step < this.maxFrames; step += 1) {
|
|
39796
|
-
const resStep = await this.flowLmMain.run({
|
|
39797
|
-
sequence: currTensor,
|
|
39798
|
-
text_embeddings: emptyText,
|
|
39799
|
-
...state
|
|
39800
|
-
});
|
|
39801
|
-
const outputNames = this.flowLmMain.outputNames;
|
|
39802
|
-
const conditioningName = outputNames?.[0] ?? Object.keys(resStep)[0];
|
|
39803
|
-
const eosName = outputNames?.[1] ?? Object.keys(resStep)[1];
|
|
39804
|
-
const conditioning = resStep[conditioningName];
|
|
39805
|
-
const eos = resStep[eosName];
|
|
39806
|
-
if (!conditioning || !eos) {
|
|
39807
|
-
throw new Error("PocketTTS flow_lm_main: missing conditioning/EOS outputs");
|
|
39808
|
-
}
|
|
39809
|
-
updateStateFromOutputs(state, resStep);
|
|
39810
|
-
const eosData = tensorDataFloat32(eos);
|
|
39811
|
-
if (eosData[0] > -4 && eosStep === null) {
|
|
39812
|
-
eosStep = step;
|
|
39813
|
-
}
|
|
39814
|
-
if (eosStep !== null && step >= eosStep + this.framesAfterEos) {
|
|
39815
|
-
break;
|
|
39816
|
-
}
|
|
39817
|
-
const std = this.temperature > 0 ? Math.sqrt(this.temperature) : 0;
|
|
39818
|
-
const x = new Float32Array(32);
|
|
39819
|
-
if (std > 0) {
|
|
39820
|
-
for (let i = 0; i < x.length; i += 1) {
|
|
39821
|
-
x[i] = randn() * std;
|
|
39822
|
-
}
|
|
39823
|
-
}
|
|
39824
|
-
for (const st of this.stBuffers) {
|
|
39825
|
-
const xTensor = new ort.Tensor("float32", x, [1, 32]);
|
|
39826
|
-
const flowOut = await this.flowLmFlow.run({
|
|
39827
|
-
c: conditioning,
|
|
39828
|
-
s: st.s,
|
|
39829
|
-
t: st.t,
|
|
39830
|
-
x: xTensor
|
|
39831
|
-
});
|
|
39832
|
-
const first = this.flowLmFlow.outputNames?.[0];
|
|
39833
|
-
const flowTensor = first ? flowOut[first] : Object.values(flowOut)[0];
|
|
39834
|
-
if (!flowTensor) throw new Error("PocketTTS flow_lm_flow: missing output");
|
|
39835
|
-
const delta = tensorDataFloat32(flowTensor);
|
|
39836
|
-
for (let i = 0; i < x.length; i += 1) {
|
|
39837
|
-
x[i] = x[i] + delta[i] * dt;
|
|
39838
|
-
}
|
|
39839
|
-
}
|
|
39840
|
-
yield x;
|
|
39841
|
-
currTensor = new ort.Tensor("float32", x, [1, 1, 32]);
|
|
39842
|
-
}
|
|
39843
|
-
}
|
|
39844
|
-
async decodeLatentsChunk(frames, state) {
|
|
39845
|
-
const ort = this.ort;
|
|
39846
|
-
const frameCount = frames.length;
|
|
39847
|
-
const flattened = new Float32Array(frameCount * 32);
|
|
39848
|
-
for (let i = 0; i < frameCount; i += 1) {
|
|
39849
|
-
flattened.set(frames[i], i * 32);
|
|
39850
|
-
}
|
|
39851
|
-
const latent = new ort.Tensor("float32", flattened, [1, frameCount, 32]);
|
|
39852
|
-
const out = await this.mimiDecoder.run({ latent, ...state });
|
|
39853
|
-
updateStateFromOutputs(state, out);
|
|
39854
|
-
const firstOutName = this.mimiDecoder.outputNames?.[0];
|
|
39855
|
-
const audioTensor = firstOutName ? out[firstOutName] : Object.values(out)[0];
|
|
39856
|
-
if (!audioTensor) {
|
|
39857
|
-
throw new Error("PocketTTS mimi_decoder: missing audio output");
|
|
39858
|
-
}
|
|
39859
|
-
return tensorDataFloat32(audioTensor);
|
|
39860
|
-
}
|
|
39861
|
-
async *streamAudio(text) {
|
|
39862
|
-
const tokenIds = this.tokenize(text);
|
|
39863
|
-
const textEmb = await this.runTextConditioner(tokenIds);
|
|
39864
|
-
const decoderState = initState(this.mimiDecoder, this.ort);
|
|
39865
|
-
const generated = [];
|
|
39866
|
-
let decodedFrames = 0;
|
|
39867
|
-
for await (const latent of this.runFlowLm(textEmb)) {
|
|
39868
|
-
generated.push(latent);
|
|
39869
|
-
const pending = generated.length - decodedFrames;
|
|
39870
|
-
let chunkSize = 0;
|
|
39871
|
-
if (decodedFrames === 0) {
|
|
39872
|
-
if (pending >= this.firstChunkFrames) {
|
|
39873
|
-
chunkSize = this.firstChunkFrames;
|
|
39874
|
-
}
|
|
39875
|
-
} else if (pending >= this.maxChunkFrames) {
|
|
39876
|
-
chunkSize = this.maxChunkFrames;
|
|
39877
|
-
}
|
|
39878
|
-
if (chunkSize > 0) {
|
|
39879
|
-
const audio = await this.decodeLatentsChunk(
|
|
39880
|
-
generated.slice(decodedFrames, decodedFrames + chunkSize),
|
|
39881
|
-
decoderState
|
|
39882
|
-
);
|
|
39883
|
-
decodedFrames += chunkSize;
|
|
39884
|
-
yield audio;
|
|
39885
|
-
}
|
|
39886
|
-
}
|
|
39887
|
-
if (decodedFrames < generated.length) {
|
|
39888
|
-
const audio = await this.decodeLatentsChunk(generated.slice(decodedFrames), decoderState);
|
|
39889
|
-
yield audio;
|
|
39890
|
-
}
|
|
39891
|
-
}
|
|
39892
|
-
};
|
|
39893
|
-
|
|
39894
39462
|
// ../server/src/server/speech/providers/local/sherpa/sherpa-parakeet-stt.ts
|
|
39895
39463
|
import { v4 as uuidv48 } from "uuid";
|
|
39896
39464
|
|
|
@@ -44100,7 +43668,7 @@ import path16 from "node:path";
|
|
|
44100
43668
|
import WebSocket3 from "ws";
|
|
44101
43669
|
|
|
44102
43670
|
// ../cli/src/utils/client-id.ts
|
|
44103
|
-
import { mkdir as mkdir4, readFile as
|
|
43671
|
+
import { mkdir as mkdir4, readFile as readFile3, writeFile as writeFile4 } from "node:fs/promises";
|
|
44104
43672
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
44105
43673
|
import { dirname as dirname4, join as join9 } from "node:path";
|
|
44106
43674
|
import { homedir as homedir5 } from "node:os";
|
|
@@ -44121,7 +43689,7 @@ async function getOrCreateCliClientId() {
|
|
|
44121
43689
|
return cachedClientId;
|
|
44122
43690
|
}
|
|
44123
43691
|
try {
|
|
44124
|
-
const existing = normalizeClientId2(await
|
|
43692
|
+
const existing = normalizeClientId2(await readFile3(CLIENT_SESSION_KEY_FILE, "utf8"));
|
|
44125
43693
|
if (existing) {
|
|
44126
43694
|
cachedClientId = existing;
|
|
44127
43695
|
return existing;
|
|
@@ -44173,7 +43741,7 @@ function isTcpDaemonHost(host) {
|
|
|
44173
43741
|
}
|
|
44174
43742
|
function readPidSocketTarget(appostleHome) {
|
|
44175
43743
|
const pidPath = path16.join(appostleHome, PID_FILENAME);
|
|
44176
|
-
if (!
|
|
43744
|
+
if (!existsSync10(pidPath)) {
|
|
44177
43745
|
return null;
|
|
44178
43746
|
}
|
|
44179
43747
|
try {
|
|
@@ -45487,7 +45055,7 @@ async function runStopCommand(id, options, _command) {
|
|
|
45487
45055
|
}
|
|
45488
45056
|
|
|
45489
45057
|
// ../cli/src/commands/agent/send.ts
|
|
45490
|
-
import { readFile as
|
|
45058
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
45491
45059
|
import { extname as extname3, resolve as resolve11 } from "node:path";
|
|
45492
45060
|
var agentSendSchema = {
|
|
45493
45061
|
idField: "agentId",
|
|
@@ -45504,7 +45072,7 @@ async function readImageFiles(imagePaths) {
|
|
|
45504
45072
|
const images = [];
|
|
45505
45073
|
for (const path22 of imagePaths) {
|
|
45506
45074
|
try {
|
|
45507
|
-
const buffer = await
|
|
45075
|
+
const buffer = await readFile4(path22);
|
|
45508
45076
|
const ext = extname3(path22).toLowerCase();
|
|
45509
45077
|
let mimeType = "image/jpeg";
|
|
45510
45078
|
switch (ext) {
|
|
@@ -45568,7 +45136,7 @@ async function resolvePromptInput(options) {
|
|
|
45568
45136
|
throw error;
|
|
45569
45137
|
}
|
|
45570
45138
|
try {
|
|
45571
|
-
return await
|
|
45139
|
+
return await readFile4(resolve11(promptFilePath), "utf8");
|
|
45572
45140
|
} catch (err) {
|
|
45573
45141
|
const message = err instanceof Error ? err.message : String(err);
|
|
45574
45142
|
const error = {
|
|
@@ -46677,7 +46245,7 @@ import chalk3 from "chalk";
|
|
|
46677
46245
|
|
|
46678
46246
|
// ../cli/src/commands/daemon/local-daemon.ts
|
|
46679
46247
|
import { spawn as spawn7, spawnSync } from "node:child_process";
|
|
46680
|
-
import { existsSync as
|
|
46248
|
+
import { existsSync as existsSync11, readFileSync as readFileSync8 } from "node:fs";
|
|
46681
46249
|
import { createRequire as createRequire3 } from "node:module";
|
|
46682
46250
|
import path17 from "node:path";
|
|
46683
46251
|
import { fileURLToPath } from "node:url";
|
|
@@ -46731,7 +46299,7 @@ function resolveDaemonRunnerEntry() {
|
|
|
46731
46299
|
try {
|
|
46732
46300
|
const here = fileURLToPath(import.meta.url);
|
|
46733
46301
|
const sibling = path17.join(path17.dirname(here), "supervisor-entrypoint.js");
|
|
46734
|
-
if (
|
|
46302
|
+
if (existsSync11(sibling)) {
|
|
46735
46303
|
return sibling;
|
|
46736
46304
|
}
|
|
46737
46305
|
} catch {
|
|
@@ -46747,12 +46315,12 @@ function resolveDaemonRunnerEntry() {
|
|
|
46747
46315
|
let currentDir = path17.dirname(serverExportPath);
|
|
46748
46316
|
while (true) {
|
|
46749
46317
|
const packageJsonPath = path17.join(currentDir, "package.json");
|
|
46750
|
-
if (
|
|
46318
|
+
if (existsSync11(packageJsonPath)) {
|
|
46751
46319
|
try {
|
|
46752
46320
|
const packageJson = JSON.parse(readFileSync8(packageJsonPath, "utf-8"));
|
|
46753
46321
|
if (packageJson.name === "@appostle/server") {
|
|
46754
46322
|
const distRunner = path17.join(currentDir, "dist", "scripts", "supervisor-entrypoint.js");
|
|
46755
|
-
if (
|
|
46323
|
+
if (existsSync11(distRunner)) {
|
|
46756
46324
|
return distRunner;
|
|
46757
46325
|
}
|
|
46758
46326
|
return path17.join(currentDir, "scripts", "supervisor-entrypoint.ts");
|
|
@@ -46912,7 +46480,7 @@ function resolveLocalDaemonState(options = {}) {
|
|
|
46912
46480
|
const config = loadConfig(home, { env });
|
|
46913
46481
|
const pidPath = pidFilePath(home);
|
|
46914
46482
|
const logPath = path17.join(home, DAEMON_LOG_FILENAME);
|
|
46915
|
-
const pidInfo =
|
|
46483
|
+
const pidInfo = existsSync11(pidPath) ? readPidFile(pidPath) : null;
|
|
46916
46484
|
const running = pidInfo ? isProcessRunning(pidInfo.pid) : false;
|
|
46917
46485
|
const listen = pidInfo?.listen ?? config.listen;
|
|
46918
46486
|
return {
|
|
@@ -50272,7 +49840,7 @@ function createCli() {
|
|
|
50272
49840
|
}
|
|
50273
49841
|
|
|
50274
49842
|
// ../cli/src/classify.ts
|
|
50275
|
-
import { existsSync as
|
|
49843
|
+
import { existsSync as existsSync12, statSync as statSync3 } from "node:fs";
|
|
50276
49844
|
import { homedir as homedir7 } from "node:os";
|
|
50277
49845
|
import path20 from "node:path";
|
|
50278
49846
|
function expandUserPath2(inputPath) {
|
|
@@ -50286,7 +49854,7 @@ function expandUserPath2(inputPath) {
|
|
|
50286
49854
|
}
|
|
50287
49855
|
function isExistingDirectory(input) {
|
|
50288
49856
|
const resolvedPath = path20.resolve(input.cwd, expandUserPath2(input.pathArg));
|
|
50289
|
-
if (!
|
|
49857
|
+
if (!existsSync12(resolvedPath)) {
|
|
50290
49858
|
return false;
|
|
50291
49859
|
}
|
|
50292
49860
|
return statSync3(resolvedPath).isDirectory();
|
|
@@ -50312,7 +49880,7 @@ function classifyInvocation(input) {
|
|
|
50312
49880
|
}
|
|
50313
49881
|
|
|
50314
49882
|
// ../cli/src/commands/open.ts
|
|
50315
|
-
import { existsSync as
|
|
49883
|
+
import { existsSync as existsSync13 } from "node:fs";
|
|
50316
49884
|
import { spawn as spawn8 } from "node:child_process";
|
|
50317
49885
|
import { homedir as homedir8 } from "node:os";
|
|
50318
49886
|
import path21 from "node:path";
|
|
@@ -50323,7 +49891,7 @@ function findDesktopApp() {
|
|
|
50323
49891
|
path21.join(homedir8(), "Applications", "Appostle.app")
|
|
50324
49892
|
];
|
|
50325
49893
|
for (const candidate of candidates) {
|
|
50326
|
-
if (
|
|
49894
|
+
if (existsSync13(candidate)) {
|
|
50327
49895
|
return candidate;
|
|
50328
49896
|
}
|
|
50329
49897
|
}
|
|
@@ -50336,7 +49904,7 @@ function findDesktopApp() {
|
|
|
50336
49904
|
path21.join(homedir8(), "Applications", "Appostle.AppImage")
|
|
50337
49905
|
];
|
|
50338
49906
|
for (const candidate of candidates) {
|
|
50339
|
-
if (
|
|
49907
|
+
if (existsSync13(candidate)) {
|
|
50340
49908
|
return candidate;
|
|
50341
49909
|
}
|
|
50342
49910
|
}
|
|
@@ -50348,7 +49916,7 @@ function findDesktopApp() {
|
|
|
50348
49916
|
return null;
|
|
50349
49917
|
}
|
|
50350
49918
|
const candidate = path21.join(localAppData, "Programs", "Appostle", "Appostle.exe");
|
|
50351
|
-
return
|
|
49919
|
+
return existsSync13(candidate) ? candidate : null;
|
|
50352
49920
|
}
|
|
50353
49921
|
return null;
|
|
50354
49922
|
}
|