appostle-installer 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/appostle-installer.js +5 -1
- package/dist/appostle-installer.js.map +2 -2
- package/dist/appostle.js +40 -456
- package/dist/appostle.js.map +4 -4
- package/dist/supervisor-entrypoint.js +485 -0
- package/dist/supervisor-entrypoint.js.map +7 -0
- package/dist/worker.js +55338 -0
- package/dist/worker.js.map +7 -0
- package/package.json +1 -1
package/dist/appostle.js
CHANGED
|
@@ -9,7 +9,7 @@ import { createRequire as createRequire5 } from "node:module";
|
|
|
9
9
|
import { Command as Command2 } from "commander";
|
|
10
10
|
|
|
11
11
|
// ../cli/src/utils/client.ts
|
|
12
|
-
import { existsSync as
|
|
12
|
+
import { existsSync as existsSync10, readFileSync as readFileSync6 } from "node:fs";
|
|
13
13
|
|
|
14
14
|
// ../server/src/server/bootstrap.ts
|
|
15
15
|
import express from "express";
|
|
@@ -7086,18 +7086,6 @@ function pcm16lePeakAbs(pcm16le) {
|
|
|
7086
7086
|
}
|
|
7087
7087
|
return peak;
|
|
7088
7088
|
}
|
|
7089
|
-
function pcm16leToFloat32(pcm16le, gain = 1) {
|
|
7090
|
-
if (pcm16le.length % 2 !== 0) {
|
|
7091
|
-
throw new Error(`PCM16 chunk byteLength must be even, got ${pcm16le.length}`);
|
|
7092
|
-
}
|
|
7093
|
-
const int16 = new Int16Array(pcm16le.buffer, pcm16le.byteOffset, pcm16le.byteLength / 2);
|
|
7094
|
-
const out = new Float32Array(int16.length);
|
|
7095
|
-
for (let i = 0; i < int16.length; i += 1) {
|
|
7096
|
-
const v = int16[i] / 32768 * gain;
|
|
7097
|
-
out[i] = v > 1 ? 1 : v < -1 ? -1 : v;
|
|
7098
|
-
}
|
|
7099
|
-
return out;
|
|
7100
|
-
}
|
|
7101
7089
|
|
|
7102
7090
|
// ../server/src/server/agent/pcm16-resampler.ts
|
|
7103
7091
|
var Pcm16MonoResampler = class {
|
|
@@ -14802,12 +14790,12 @@ function extractPlanNameFromFrontmatter(content) {
|
|
|
14802
14790
|
return null;
|
|
14803
14791
|
}
|
|
14804
14792
|
function resolvePlanFilename(options) {
|
|
14805
|
-
const { originalPath, newSlug, existsSync:
|
|
14793
|
+
const { originalPath, newSlug, existsSync: existsSync14 } = options;
|
|
14806
14794
|
const dir = path5.dirname(originalPath);
|
|
14807
14795
|
const base = `${newSlug}.md`;
|
|
14808
14796
|
let candidate = path5.join(dir, base);
|
|
14809
14797
|
let counter = 2;
|
|
14810
|
-
while (candidate !== originalPath &&
|
|
14798
|
+
while (candidate !== originalPath && existsSync14(candidate)) {
|
|
14811
14799
|
candidate = path5.join(dir, `${newSlug}-${counter}.md`);
|
|
14812
14800
|
counter += 1;
|
|
14813
14801
|
}
|
|
@@ -39416,52 +39404,11 @@ var SHERPA_ONNX_MODEL_CATALOG = {
|
|
|
39416
39404
|
description: "Kokoro TTS (higher quality; larger).",
|
|
39417
39405
|
aliases: ["kokoro"],
|
|
39418
39406
|
defaultFor: "tts"
|
|
39419
|
-
},
|
|
39420
|
-
"pocket-tts-onnx-int8": {
|
|
39421
|
-
kind: "tts",
|
|
39422
|
-
extractedDir: "pocket-tts-onnx-int8",
|
|
39423
|
-
downloadFiles: [
|
|
39424
|
-
{
|
|
39425
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/mimi_encoder.onnx",
|
|
39426
|
-
relPath: "onnx/mimi_encoder.onnx"
|
|
39427
|
-
},
|
|
39428
|
-
{
|
|
39429
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/text_conditioner.onnx",
|
|
39430
|
-
relPath: "onnx/text_conditioner.onnx"
|
|
39431
|
-
},
|
|
39432
|
-
{
|
|
39433
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/flow_lm_main_int8.onnx",
|
|
39434
|
-
relPath: "onnx/flow_lm_main_int8.onnx"
|
|
39435
|
-
},
|
|
39436
|
-
{
|
|
39437
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/flow_lm_flow_int8.onnx",
|
|
39438
|
-
relPath: "onnx/flow_lm_flow_int8.onnx"
|
|
39439
|
-
},
|
|
39440
|
-
{
|
|
39441
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/mimi_decoder_int8.onnx",
|
|
39442
|
-
relPath: "onnx/mimi_decoder_int8.onnx"
|
|
39443
|
-
},
|
|
39444
|
-
{
|
|
39445
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/tokenizer.model",
|
|
39446
|
-
relPath: "tokenizer.model"
|
|
39447
|
-
},
|
|
39448
|
-
{
|
|
39449
|
-
url: "https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/reference_sample.wav",
|
|
39450
|
-
relPath: "reference_sample.wav"
|
|
39451
|
-
}
|
|
39452
|
-
],
|
|
39453
|
-
requiredFiles: [
|
|
39454
|
-
"onnx/mimi_encoder.onnx",
|
|
39455
|
-
"onnx/text_conditioner.onnx",
|
|
39456
|
-
"onnx/flow_lm_main_int8.onnx",
|
|
39457
|
-
"onnx/flow_lm_flow_int8.onnx",
|
|
39458
|
-
"onnx/mimi_decoder_int8.onnx",
|
|
39459
|
-
"tokenizer.model",
|
|
39460
|
-
"reference_sample.wav"
|
|
39461
|
-
],
|
|
39462
|
-
description: "Pocket TTS ONNX (INT8) with streaming decode support (via onnxruntime).",
|
|
39463
|
-
aliases: ["pocket", "pocket-tts"]
|
|
39464
39407
|
}
|
|
39408
|
+
// pocket-tts-onnx-int8 was previously listed here. Removed on
|
|
39409
|
+
// 2026-05-02: source was a single individual HuggingFace account
|
|
39410
|
+
// (KevinAHM/pocket-tts-onnx) with no provenance/integrity guarantees.
|
|
39411
|
+
// Re-add only if we host a verified mirror with pinned hashes.
|
|
39465
39412
|
};
|
|
39466
39413
|
var ALL_MODEL_IDS = Object.keys(SHERPA_ONNX_MODEL_CATALOG);
|
|
39467
39414
|
var LOCAL_STT_MODEL_IDS = ALL_MODEL_IDS.filter(
|
|
@@ -39512,385 +39459,6 @@ var LocalTtsModelIdSchema = createAliasedModelIdSchema({
|
|
|
39512
39459
|
aliases: TTS_MODEL_ALIASES
|
|
39513
39460
|
});
|
|
39514
39461
|
|
|
39515
|
-
// ../server/src/server/speech/providers/local/pocket/pocket-tts-onnx.ts
|
|
39516
|
-
import { existsSync as existsSync10 } from "node:fs";
|
|
39517
|
-
import { readFile as readFile3 } from "node:fs/promises";
|
|
39518
|
-
function assertFileExists(filePath, label) {
|
|
39519
|
-
if (!existsSync10(filePath)) {
|
|
39520
|
-
throw new Error(`Missing ${label}: ${filePath}`);
|
|
39521
|
-
}
|
|
39522
|
-
}
|
|
39523
|
-
function product(dims) {
|
|
39524
|
-
let out = 1;
|
|
39525
|
-
for (const d of dims) out *= d;
|
|
39526
|
-
return out;
|
|
39527
|
-
}
|
|
39528
|
-
function normalizeDims(dims) {
|
|
39529
|
-
return dims.map((d) => {
|
|
39530
|
-
if (typeof d === "number" && Number.isFinite(d)) {
|
|
39531
|
-
if (d === 0) return 0;
|
|
39532
|
-
if (d > 0) return d;
|
|
39533
|
-
return 1;
|
|
39534
|
-
}
|
|
39535
|
-
return 1;
|
|
39536
|
-
});
|
|
39537
|
-
}
|
|
39538
|
-
function getSessionInputMeta(session, inputName) {
|
|
39539
|
-
const metaAny = session.inputMetadata;
|
|
39540
|
-
if (Array.isArray(metaAny)) {
|
|
39541
|
-
const entry = metaAny.find(
|
|
39542
|
-
(m) => m && typeof m === "object" && m.name === inputName
|
|
39543
|
-
);
|
|
39544
|
-
if (!entry) return void 0;
|
|
39545
|
-
return { type: entry.type, dims: entry.shape };
|
|
39546
|
-
}
|
|
39547
|
-
if (metaAny && typeof metaAny === "object" && inputName in metaAny) {
|
|
39548
|
-
const entry = metaAny[inputName];
|
|
39549
|
-
return { type: entry?.type, dims: entry?.dimensions ?? entry?.shape };
|
|
39550
|
-
}
|
|
39551
|
-
return void 0;
|
|
39552
|
-
}
|
|
39553
|
-
function toBigInt64(values) {
|
|
39554
|
-
const out = new BigInt64Array(values.length);
|
|
39555
|
-
for (let i = 0; i < values.length; i += 1) {
|
|
39556
|
-
out[i] = BigInt(values[i]);
|
|
39557
|
-
}
|
|
39558
|
-
return out;
|
|
39559
|
-
}
|
|
39560
|
-
function randn() {
|
|
39561
|
-
let u = 0;
|
|
39562
|
-
let v = 0;
|
|
39563
|
-
while (u === 0) u = Math.random();
|
|
39564
|
-
while (v === 0) v = Math.random();
|
|
39565
|
-
return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
|
|
39566
|
-
}
|
|
39567
|
-
function normalizeTextForPocket(text) {
|
|
39568
|
-
const trimmed = text.trim();
|
|
39569
|
-
if (!trimmed) {
|
|
39570
|
-
throw new Error("Cannot synthesize empty text");
|
|
39571
|
-
}
|
|
39572
|
-
let out = trimmed;
|
|
39573
|
-
if (out.length > 0 && /[A-Za-z0-9]$/.test(out)) {
|
|
39574
|
-
out = `${out}.`;
|
|
39575
|
-
}
|
|
39576
|
-
if (out.length > 0 && /[a-z]/.test(out[0])) {
|
|
39577
|
-
out = out[0].toUpperCase() + out.slice(1);
|
|
39578
|
-
}
|
|
39579
|
-
return out;
|
|
39580
|
-
}
|
|
39581
|
-
async function loadOrt() {
|
|
39582
|
-
return await import("onnxruntime-node");
|
|
39583
|
-
}
|
|
39584
|
-
async function loadSentencePiece(tokenizerModelPath) {
|
|
39585
|
-
const mod = await import("@sctg/sentencepiece-js");
|
|
39586
|
-
const Processor = mod.SentencePieceProcessor ?? mod.default?.SentencePieceProcessor ?? mod.default;
|
|
39587
|
-
if (!Processor) {
|
|
39588
|
-
throw new Error("Failed to load SentencePiece processor from @sctg/sentencepiece-js");
|
|
39589
|
-
}
|
|
39590
|
-
const sp = new Processor();
|
|
39591
|
-
if (typeof sp.load === "function") {
|
|
39592
|
-
await sp.load(tokenizerModelPath);
|
|
39593
|
-
} else if (typeof sp.Load === "function") {
|
|
39594
|
-
sp.Load(tokenizerModelPath);
|
|
39595
|
-
} else {
|
|
39596
|
-
throw new Error("SentencePiece processor does not expose load()/Load()");
|
|
39597
|
-
}
|
|
39598
|
-
return sp;
|
|
39599
|
-
}
|
|
39600
|
-
function getOrtProviders(ort, device) {
|
|
39601
|
-
if (device === "cpu") return ["cpu"];
|
|
39602
|
-
if (device === "cuda") return ["cuda", "cpu"];
|
|
39603
|
-
void ort;
|
|
39604
|
-
return ["cpu"];
|
|
39605
|
-
}
|
|
39606
|
-
function createZeroTensorForInput(ort, session, inputName) {
|
|
39607
|
-
const meta = getSessionInputMeta(session, inputName);
|
|
39608
|
-
const dims = normalizeDims(meta?.dims ?? []);
|
|
39609
|
-
if (dims.length === 0) {
|
|
39610
|
-
throw new Error(`Missing input metadata shape for ${inputName}`);
|
|
39611
|
-
}
|
|
39612
|
-
const type = (meta?.type ?? "float32").toLowerCase();
|
|
39613
|
-
const size = product(dims);
|
|
39614
|
-
if (type.includes("int64")) {
|
|
39615
|
-
return new ort.Tensor("int64", new BigInt64Array(size), dims);
|
|
39616
|
-
}
|
|
39617
|
-
if (type.includes("bool")) {
|
|
39618
|
-
return new ort.Tensor("bool", new Uint8Array(size), dims);
|
|
39619
|
-
}
|
|
39620
|
-
return new ort.Tensor("float32", new Float32Array(size), dims);
|
|
39621
|
-
}
|
|
39622
|
-
function initState(session, ort) {
|
|
39623
|
-
const out = {};
|
|
39624
|
-
for (const name of session.inputNames) {
|
|
39625
|
-
if (name.startsWith("state_")) {
|
|
39626
|
-
out[name] = createZeroTensorForInput(ort, session, name);
|
|
39627
|
-
}
|
|
39628
|
-
}
|
|
39629
|
-
return out;
|
|
39630
|
-
}
|
|
39631
|
-
function updateStateFromOutputs(state, outputs) {
|
|
39632
|
-
for (const [name, tensor] of Object.entries(outputs)) {
|
|
39633
|
-
if (!name.startsWith("out_state_")) continue;
|
|
39634
|
-
const idx = Number.parseInt(name.replace("out_state_", ""), 10);
|
|
39635
|
-
if (Number.isFinite(idx)) {
|
|
39636
|
-
state[`state_${idx}`] = tensor;
|
|
39637
|
-
}
|
|
39638
|
-
}
|
|
39639
|
-
}
|
|
39640
|
-
function tensorDataFloat32(t) {
|
|
39641
|
-
const data = t.data;
|
|
39642
|
-
if (data instanceof Float32Array) return data;
|
|
39643
|
-
if (Array.isArray(data)) return Float32Array.from(data);
|
|
39644
|
-
throw new Error("Unexpected tensor data type (expected Float32Array)");
|
|
39645
|
-
}
|
|
39646
|
-
var PocketTtsOnnxEngine = class _PocketTtsOnnxEngine {
|
|
39647
|
-
static {
|
|
39648
|
-
this.SAMPLE_RATE = 24e3;
|
|
39649
|
-
}
|
|
39650
|
-
static {
|
|
39651
|
-
this.SAMPLES_PER_FRAME = 1920;
|
|
39652
|
-
}
|
|
39653
|
-
constructor(args) {
|
|
39654
|
-
this.ort = args.ort;
|
|
39655
|
-
this.temperature = args.temperature;
|
|
39656
|
-
this.lsdSteps = args.lsdSteps;
|
|
39657
|
-
this.maxFrames = args.maxFrames;
|
|
39658
|
-
this.framesAfterEos = args.framesAfterEos;
|
|
39659
|
-
this.firstChunkFrames = args.firstChunkFrames;
|
|
39660
|
-
this.maxChunkFrames = args.maxChunkFrames;
|
|
39661
|
-
this.tokenizer = args.tokenizer;
|
|
39662
|
-
this.textConditioner = args.textConditioner;
|
|
39663
|
-
this.flowLmMain = args.flowLmMain;
|
|
39664
|
-
this.flowLmFlow = args.flowLmFlow;
|
|
39665
|
-
this.mimiDecoder = args.mimiDecoder;
|
|
39666
|
-
this.stBuffers = args.stBuffers;
|
|
39667
|
-
this.voiceEmbeddings = args.voiceEmbeddings;
|
|
39668
|
-
}
|
|
39669
|
-
static async create(config, logger) {
|
|
39670
|
-
const log2 = logger.child({
|
|
39671
|
-
module: "speech",
|
|
39672
|
-
provider: "pocket-tts",
|
|
39673
|
-
component: "onnx-engine"
|
|
39674
|
-
});
|
|
39675
|
-
const modelDir = config.modelDir;
|
|
39676
|
-
const onnxDir = `${modelDir}/onnx`;
|
|
39677
|
-
const precision = config.precision ?? "int8";
|
|
39678
|
-
const device = config.device ?? "auto";
|
|
39679
|
-
const temperature = config.temperature ?? 0.7;
|
|
39680
|
-
const lsdSteps = config.lsdSteps ?? 10;
|
|
39681
|
-
const maxFrames = config.maxFrames ?? 500;
|
|
39682
|
-
const framesAfterEos = config.framesAfterEos ?? 3;
|
|
39683
|
-
const firstChunkFrames = config.firstChunkFrames ?? 2;
|
|
39684
|
-
const maxChunkFrames = config.maxChunkFrames ?? 15;
|
|
39685
|
-
const tokenizerPath = `${modelDir}/tokenizer.model`;
|
|
39686
|
-
const referenceAudioFile = config.referenceAudioFile ?? `${modelDir}/reference_sample.wav`;
|
|
39687
|
-
const flowMainFile = precision === "int8" ? "flow_lm_main_int8.onnx" : "flow_lm_main.onnx";
|
|
39688
|
-
const flowFlowFile = precision === "int8" ? "flow_lm_flow_int8.onnx" : "flow_lm_flow.onnx";
|
|
39689
|
-
const decoderFile = precision === "int8" ? "mimi_decoder_int8.onnx" : "mimi_decoder.onnx";
|
|
39690
|
-
assertFileExists(`${onnxDir}/mimi_encoder.onnx`, "PocketTTS mimi_encoder");
|
|
39691
|
-
assertFileExists(`${onnxDir}/text_conditioner.onnx`, "PocketTTS text_conditioner");
|
|
39692
|
-
assertFileExists(`${onnxDir}/${flowMainFile}`, "PocketTTS flow_lm_main");
|
|
39693
|
-
assertFileExists(`${onnxDir}/${flowFlowFile}`, "PocketTTS flow_lm_flow");
|
|
39694
|
-
assertFileExists(`${onnxDir}/${decoderFile}`, "PocketTTS mimi_decoder");
|
|
39695
|
-
assertFileExists(tokenizerPath, "PocketTTS tokenizer.model");
|
|
39696
|
-
assertFileExists(referenceAudioFile, "PocketTTS reference_sample.wav");
|
|
39697
|
-
const ort = await loadOrt();
|
|
39698
|
-
const providers = getOrtProviders(ort, device);
|
|
39699
|
-
const [tokenizer, mimiEncoder, textConditioner, flowLmMain, flowLmFlow, mimiDecoder] = await Promise.all([
|
|
39700
|
-
loadSentencePiece(tokenizerPath),
|
|
39701
|
-
ort.InferenceSession.create(`${onnxDir}/mimi_encoder.onnx`, {
|
|
39702
|
-
executionProviders: providers
|
|
39703
|
-
}),
|
|
39704
|
-
ort.InferenceSession.create(`${onnxDir}/text_conditioner.onnx`, {
|
|
39705
|
-
executionProviders: providers
|
|
39706
|
-
}),
|
|
39707
|
-
ort.InferenceSession.create(`${onnxDir}/${flowMainFile}`, {
|
|
39708
|
-
executionProviders: providers
|
|
39709
|
-
}),
|
|
39710
|
-
ort.InferenceSession.create(`${onnxDir}/${flowFlowFile}`, {
|
|
39711
|
-
executionProviders: providers
|
|
39712
|
-
}),
|
|
39713
|
-
ort.InferenceSession.create(`${onnxDir}/${decoderFile}`, { executionProviders: providers })
|
|
39714
|
-
]);
|
|
39715
|
-
const stBuffers = [];
|
|
39716
|
-
for (let j = 0; j < lsdSteps; j += 1) {
|
|
39717
|
-
const s = j / lsdSteps;
|
|
39718
|
-
const t = s + 1 / lsdSteps;
|
|
39719
|
-
stBuffers.push({
|
|
39720
|
-
s: new ort.Tensor("float32", new Float32Array([s]), [1, 1]),
|
|
39721
|
-
t: new ort.Tensor("float32", new Float32Array([t]), [1, 1])
|
|
39722
|
-
});
|
|
39723
|
-
}
|
|
39724
|
-
const refWav = await readFile3(referenceAudioFile);
|
|
39725
|
-
const parsed = parsePcm16MonoWav(refWav);
|
|
39726
|
-
let pcm16 = parsed.pcm16;
|
|
39727
|
-
if (parsed.sampleRate !== _PocketTtsOnnxEngine.SAMPLE_RATE) {
|
|
39728
|
-
const resampler = new Pcm16MonoResampler({
|
|
39729
|
-
inputRate: parsed.sampleRate,
|
|
39730
|
-
outputRate: _PocketTtsOnnxEngine.SAMPLE_RATE
|
|
39731
|
-
});
|
|
39732
|
-
pcm16 = resampler.processChunk(pcm16);
|
|
39733
|
-
}
|
|
39734
|
-
const floatAudio = pcm16leToFloat32(pcm16);
|
|
39735
|
-
const audioTensor = new ort.Tensor("float32", floatAudio, [1, 1, floatAudio.length]);
|
|
39736
|
-
const encoded = await mimiEncoder.run({ audio: audioTensor });
|
|
39737
|
-
const firstOutName = mimiEncoder.outputNames?.[0];
|
|
39738
|
-
const voiceEmb = firstOutName ? encoded[firstOutName] : Object.values(encoded)[0];
|
|
39739
|
-
if (!voiceEmb) {
|
|
39740
|
-
throw new Error("PocketTTS mimi_encoder: missing output");
|
|
39741
|
-
}
|
|
39742
|
-
log2.info({ precision, device, providers, lsdSteps, temperature }, "PocketTTS ONNX initialized");
|
|
39743
|
-
return new _PocketTtsOnnxEngine({
|
|
39744
|
-
ort,
|
|
39745
|
-
temperature,
|
|
39746
|
-
lsdSteps,
|
|
39747
|
-
maxFrames,
|
|
39748
|
-
framesAfterEos,
|
|
39749
|
-
firstChunkFrames,
|
|
39750
|
-
maxChunkFrames,
|
|
39751
|
-
tokenizer,
|
|
39752
|
-
textConditioner,
|
|
39753
|
-
flowLmMain,
|
|
39754
|
-
flowLmFlow,
|
|
39755
|
-
mimiDecoder,
|
|
39756
|
-
stBuffers,
|
|
39757
|
-
voiceEmbeddings: voiceEmb
|
|
39758
|
-
});
|
|
39759
|
-
}
|
|
39760
|
-
tokenize(text) {
|
|
39761
|
-
const normalized = normalizeTextForPocket(text);
|
|
39762
|
-
const ids = this.tokenizer.encodeIds(normalized);
|
|
39763
|
-
const data = toBigInt64(ids ?? []);
|
|
39764
|
-
return new this.ort.Tensor("int64", data, [1, data.length]);
|
|
39765
|
-
}
|
|
39766
|
-
async runTextConditioner(tokenIds) {
|
|
39767
|
-
const out = await this.textConditioner.run({ token_ids: tokenIds });
|
|
39768
|
-
const firstOutName = this.textConditioner.outputNames?.[0];
|
|
39769
|
-
const t = firstOutName ? out[firstOutName] : Object.values(out)[0];
|
|
39770
|
-
if (!t) throw new Error("PocketTTS text_conditioner: missing output");
|
|
39771
|
-
return t;
|
|
39772
|
-
}
|
|
39773
|
-
async *runFlowLm(textEmbeddings) {
|
|
39774
|
-
const ort = this.ort;
|
|
39775
|
-
const state = initState(this.flowLmMain, ort);
|
|
39776
|
-
const emptySeq = new ort.Tensor("float32", new Float32Array(0), [1, 0, 32]);
|
|
39777
|
-
const emptyText = new ort.Tensor("float32", new Float32Array(0), [1, 0, 1024]);
|
|
39778
|
-
const resVoice = await this.flowLmMain.run({
|
|
39779
|
-
sequence: emptySeq,
|
|
39780
|
-
text_embeddings: this.voiceEmbeddings,
|
|
39781
|
-
...state
|
|
39782
|
-
});
|
|
39783
|
-
updateStateFromOutputs(state, resVoice);
|
|
39784
|
-
const resText = await this.flowLmMain.run({
|
|
39785
|
-
sequence: emptySeq,
|
|
39786
|
-
text_embeddings: textEmbeddings,
|
|
39787
|
-
...state
|
|
39788
|
-
});
|
|
39789
|
-
updateStateFromOutputs(state, resText);
|
|
39790
|
-
const curr = new Float32Array(32);
|
|
39791
|
-
curr.fill(Number.NaN);
|
|
39792
|
-
let currTensor = new ort.Tensor("float32", curr, [1, 1, 32]);
|
|
39793
|
-
const dt = 1 / this.lsdSteps;
|
|
39794
|
-
let eosStep = null;
|
|
39795
|
-
for (let step = 0; step < this.maxFrames; step += 1) {
|
|
39796
|
-
const resStep = await this.flowLmMain.run({
|
|
39797
|
-
sequence: currTensor,
|
|
39798
|
-
text_embeddings: emptyText,
|
|
39799
|
-
...state
|
|
39800
|
-
});
|
|
39801
|
-
const outputNames = this.flowLmMain.outputNames;
|
|
39802
|
-
const conditioningName = outputNames?.[0] ?? Object.keys(resStep)[0];
|
|
39803
|
-
const eosName = outputNames?.[1] ?? Object.keys(resStep)[1];
|
|
39804
|
-
const conditioning = resStep[conditioningName];
|
|
39805
|
-
const eos = resStep[eosName];
|
|
39806
|
-
if (!conditioning || !eos) {
|
|
39807
|
-
throw new Error("PocketTTS flow_lm_main: missing conditioning/EOS outputs");
|
|
39808
|
-
}
|
|
39809
|
-
updateStateFromOutputs(state, resStep);
|
|
39810
|
-
const eosData = tensorDataFloat32(eos);
|
|
39811
|
-
if (eosData[0] > -4 && eosStep === null) {
|
|
39812
|
-
eosStep = step;
|
|
39813
|
-
}
|
|
39814
|
-
if (eosStep !== null && step >= eosStep + this.framesAfterEos) {
|
|
39815
|
-
break;
|
|
39816
|
-
}
|
|
39817
|
-
const std = this.temperature > 0 ? Math.sqrt(this.temperature) : 0;
|
|
39818
|
-
const x = new Float32Array(32);
|
|
39819
|
-
if (std > 0) {
|
|
39820
|
-
for (let i = 0; i < x.length; i += 1) {
|
|
39821
|
-
x[i] = randn() * std;
|
|
39822
|
-
}
|
|
39823
|
-
}
|
|
39824
|
-
for (const st of this.stBuffers) {
|
|
39825
|
-
const xTensor = new ort.Tensor("float32", x, [1, 32]);
|
|
39826
|
-
const flowOut = await this.flowLmFlow.run({
|
|
39827
|
-
c: conditioning,
|
|
39828
|
-
s: st.s,
|
|
39829
|
-
t: st.t,
|
|
39830
|
-
x: xTensor
|
|
39831
|
-
});
|
|
39832
|
-
const first = this.flowLmFlow.outputNames?.[0];
|
|
39833
|
-
const flowTensor = first ? flowOut[first] : Object.values(flowOut)[0];
|
|
39834
|
-
if (!flowTensor) throw new Error("PocketTTS flow_lm_flow: missing output");
|
|
39835
|
-
const delta = tensorDataFloat32(flowTensor);
|
|
39836
|
-
for (let i = 0; i < x.length; i += 1) {
|
|
39837
|
-
x[i] = x[i] + delta[i] * dt;
|
|
39838
|
-
}
|
|
39839
|
-
}
|
|
39840
|
-
yield x;
|
|
39841
|
-
currTensor = new ort.Tensor("float32", x, [1, 1, 32]);
|
|
39842
|
-
}
|
|
39843
|
-
}
|
|
39844
|
-
async decodeLatentsChunk(frames, state) {
|
|
39845
|
-
const ort = this.ort;
|
|
39846
|
-
const frameCount = frames.length;
|
|
39847
|
-
const flattened = new Float32Array(frameCount * 32);
|
|
39848
|
-
for (let i = 0; i < frameCount; i += 1) {
|
|
39849
|
-
flattened.set(frames[i], i * 32);
|
|
39850
|
-
}
|
|
39851
|
-
const latent = new ort.Tensor("float32", flattened, [1, frameCount, 32]);
|
|
39852
|
-
const out = await this.mimiDecoder.run({ latent, ...state });
|
|
39853
|
-
updateStateFromOutputs(state, out);
|
|
39854
|
-
const firstOutName = this.mimiDecoder.outputNames?.[0];
|
|
39855
|
-
const audioTensor = firstOutName ? out[firstOutName] : Object.values(out)[0];
|
|
39856
|
-
if (!audioTensor) {
|
|
39857
|
-
throw new Error("PocketTTS mimi_decoder: missing audio output");
|
|
39858
|
-
}
|
|
39859
|
-
return tensorDataFloat32(audioTensor);
|
|
39860
|
-
}
|
|
39861
|
-
async *streamAudio(text) {
|
|
39862
|
-
const tokenIds = this.tokenize(text);
|
|
39863
|
-
const textEmb = await this.runTextConditioner(tokenIds);
|
|
39864
|
-
const decoderState = initState(this.mimiDecoder, this.ort);
|
|
39865
|
-
const generated = [];
|
|
39866
|
-
let decodedFrames = 0;
|
|
39867
|
-
for await (const latent of this.runFlowLm(textEmb)) {
|
|
39868
|
-
generated.push(latent);
|
|
39869
|
-
const pending = generated.length - decodedFrames;
|
|
39870
|
-
let chunkSize = 0;
|
|
39871
|
-
if (decodedFrames === 0) {
|
|
39872
|
-
if (pending >= this.firstChunkFrames) {
|
|
39873
|
-
chunkSize = this.firstChunkFrames;
|
|
39874
|
-
}
|
|
39875
|
-
} else if (pending >= this.maxChunkFrames) {
|
|
39876
|
-
chunkSize = this.maxChunkFrames;
|
|
39877
|
-
}
|
|
39878
|
-
if (chunkSize > 0) {
|
|
39879
|
-
const audio = await this.decodeLatentsChunk(
|
|
39880
|
-
generated.slice(decodedFrames, decodedFrames + chunkSize),
|
|
39881
|
-
decoderState
|
|
39882
|
-
);
|
|
39883
|
-
decodedFrames += chunkSize;
|
|
39884
|
-
yield audio;
|
|
39885
|
-
}
|
|
39886
|
-
}
|
|
39887
|
-
if (decodedFrames < generated.length) {
|
|
39888
|
-
const audio = await this.decodeLatentsChunk(generated.slice(decodedFrames), decoderState);
|
|
39889
|
-
yield audio;
|
|
39890
|
-
}
|
|
39891
|
-
}
|
|
39892
|
-
};
|
|
39893
|
-
|
|
39894
39462
|
// ../server/src/server/speech/providers/local/sherpa/sherpa-parakeet-stt.ts
|
|
39895
39463
|
import { v4 as uuidv48 } from "uuid";
|
|
39896
39464
|
|
|
@@ -44100,7 +43668,7 @@ import path16 from "node:path";
|
|
|
44100
43668
|
import WebSocket3 from "ws";
|
|
44101
43669
|
|
|
44102
43670
|
// ../cli/src/utils/client-id.ts
|
|
44103
|
-
import { mkdir as mkdir4, readFile as
|
|
43671
|
+
import { mkdir as mkdir4, readFile as readFile3, writeFile as writeFile4 } from "node:fs/promises";
|
|
44104
43672
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
44105
43673
|
import { dirname as dirname4, join as join9 } from "node:path";
|
|
44106
43674
|
import { homedir as homedir5 } from "node:os";
|
|
@@ -44121,7 +43689,7 @@ async function getOrCreateCliClientId() {
|
|
|
44121
43689
|
return cachedClientId;
|
|
44122
43690
|
}
|
|
44123
43691
|
try {
|
|
44124
|
-
const existing = normalizeClientId2(await
|
|
43692
|
+
const existing = normalizeClientId2(await readFile3(CLIENT_SESSION_KEY_FILE, "utf8"));
|
|
44125
43693
|
if (existing) {
|
|
44126
43694
|
cachedClientId = existing;
|
|
44127
43695
|
return existing;
|
|
@@ -44173,7 +43741,7 @@ function isTcpDaemonHost(host) {
|
|
|
44173
43741
|
}
|
|
44174
43742
|
function readPidSocketTarget(appostleHome) {
|
|
44175
43743
|
const pidPath = path16.join(appostleHome, PID_FILENAME);
|
|
44176
|
-
if (!
|
|
43744
|
+
if (!existsSync10(pidPath)) {
|
|
44177
43745
|
return null;
|
|
44178
43746
|
}
|
|
44179
43747
|
try {
|
|
@@ -45487,7 +45055,7 @@ async function runStopCommand(id, options, _command) {
|
|
|
45487
45055
|
}
|
|
45488
45056
|
|
|
45489
45057
|
// ../cli/src/commands/agent/send.ts
|
|
45490
|
-
import { readFile as
|
|
45058
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
45491
45059
|
import { extname as extname3, resolve as resolve11 } from "node:path";
|
|
45492
45060
|
var agentSendSchema = {
|
|
45493
45061
|
idField: "agentId",
|
|
@@ -45504,7 +45072,7 @@ async function readImageFiles(imagePaths) {
|
|
|
45504
45072
|
const images = [];
|
|
45505
45073
|
for (const path22 of imagePaths) {
|
|
45506
45074
|
try {
|
|
45507
|
-
const buffer = await
|
|
45075
|
+
const buffer = await readFile4(path22);
|
|
45508
45076
|
const ext = extname3(path22).toLowerCase();
|
|
45509
45077
|
let mimeType = "image/jpeg";
|
|
45510
45078
|
switch (ext) {
|
|
@@ -45568,7 +45136,7 @@ async function resolvePromptInput(options) {
|
|
|
45568
45136
|
throw error;
|
|
45569
45137
|
}
|
|
45570
45138
|
try {
|
|
45571
|
-
return await
|
|
45139
|
+
return await readFile4(resolve11(promptFilePath), "utf8");
|
|
45572
45140
|
} catch (err) {
|
|
45573
45141
|
const message = err instanceof Error ? err.message : String(err);
|
|
45574
45142
|
const error = {
|
|
@@ -46677,9 +46245,10 @@ import chalk3 from "chalk";
|
|
|
46677
46245
|
|
|
46678
46246
|
// ../cli/src/commands/daemon/local-daemon.ts
|
|
46679
46247
|
import { spawn as spawn7, spawnSync } from "node:child_process";
|
|
46680
|
-
import { existsSync as
|
|
46248
|
+
import { existsSync as existsSync11, readFileSync as readFileSync8 } from "node:fs";
|
|
46681
46249
|
import { createRequire as createRequire3 } from "node:module";
|
|
46682
46250
|
import path17 from "node:path";
|
|
46251
|
+
import { fileURLToPath } from "node:url";
|
|
46683
46252
|
var DETACHED_STARTUP_GRACE_MS = 1200;
|
|
46684
46253
|
var PID_POLL_INTERVAL_MS = 100;
|
|
46685
46254
|
var KILL_TIMEOUT_MS = 3e3;
|
|
@@ -46727,16 +46296,31 @@ function buildChildEnv(options) {
|
|
|
46727
46296
|
return childEnv;
|
|
46728
46297
|
}
|
|
46729
46298
|
function resolveDaemonRunnerEntry() {
|
|
46730
|
-
|
|
46299
|
+
try {
|
|
46300
|
+
const here = fileURLToPath(import.meta.url);
|
|
46301
|
+
const sibling = path17.join(path17.dirname(here), "supervisor-entrypoint.js");
|
|
46302
|
+
if (existsSync11(sibling)) {
|
|
46303
|
+
return sibling;
|
|
46304
|
+
}
|
|
46305
|
+
} catch {
|
|
46306
|
+
}
|
|
46307
|
+
let serverExportPath;
|
|
46308
|
+
try {
|
|
46309
|
+
serverExportPath = require4.resolve("@appostle/server");
|
|
46310
|
+
} catch {
|
|
46311
|
+
throw new Error(
|
|
46312
|
+
"Unable to resolve @appostle/server package root for daemon runner (and no sibling supervisor-entrypoint.js was bundled)"
|
|
46313
|
+
);
|
|
46314
|
+
}
|
|
46731
46315
|
let currentDir = path17.dirname(serverExportPath);
|
|
46732
46316
|
while (true) {
|
|
46733
46317
|
const packageJsonPath = path17.join(currentDir, "package.json");
|
|
46734
|
-
if (
|
|
46318
|
+
if (existsSync11(packageJsonPath)) {
|
|
46735
46319
|
try {
|
|
46736
46320
|
const packageJson = JSON.parse(readFileSync8(packageJsonPath, "utf-8"));
|
|
46737
46321
|
if (packageJson.name === "@appostle/server") {
|
|
46738
46322
|
const distRunner = path17.join(currentDir, "dist", "scripts", "supervisor-entrypoint.js");
|
|
46739
|
-
if (
|
|
46323
|
+
if (existsSync11(distRunner)) {
|
|
46740
46324
|
return distRunner;
|
|
46741
46325
|
}
|
|
46742
46326
|
return path17.join(currentDir, "scripts", "supervisor-entrypoint.ts");
|
|
@@ -46896,7 +46480,7 @@ function resolveLocalDaemonState(options = {}) {
|
|
|
46896
46480
|
const config = loadConfig(home, { env });
|
|
46897
46481
|
const pidPath = pidFilePath(home);
|
|
46898
46482
|
const logPath = path17.join(home, DAEMON_LOG_FILENAME);
|
|
46899
|
-
const pidInfo =
|
|
46483
|
+
const pidInfo = existsSync11(pidPath) ? readPidFile(pidPath) : null;
|
|
46900
46484
|
const running = pidInfo ? isProcessRunning(pidInfo.pid) : false;
|
|
46901
46485
|
const listen = pidInfo?.listen ?? config.listen;
|
|
46902
46486
|
return {
|
|
@@ -50256,7 +49840,7 @@ function createCli() {
|
|
|
50256
49840
|
}
|
|
50257
49841
|
|
|
50258
49842
|
// ../cli/src/classify.ts
|
|
50259
|
-
import { existsSync as
|
|
49843
|
+
import { existsSync as existsSync12, statSync as statSync3 } from "node:fs";
|
|
50260
49844
|
import { homedir as homedir7 } from "node:os";
|
|
50261
49845
|
import path20 from "node:path";
|
|
50262
49846
|
function expandUserPath2(inputPath) {
|
|
@@ -50270,7 +49854,7 @@ function expandUserPath2(inputPath) {
|
|
|
50270
49854
|
}
|
|
50271
49855
|
function isExistingDirectory(input) {
|
|
50272
49856
|
const resolvedPath = path20.resolve(input.cwd, expandUserPath2(input.pathArg));
|
|
50273
|
-
if (!
|
|
49857
|
+
if (!existsSync12(resolvedPath)) {
|
|
50274
49858
|
return false;
|
|
50275
49859
|
}
|
|
50276
49860
|
return statSync3(resolvedPath).isDirectory();
|
|
@@ -50296,7 +49880,7 @@ function classifyInvocation(input) {
|
|
|
50296
49880
|
}
|
|
50297
49881
|
|
|
50298
49882
|
// ../cli/src/commands/open.ts
|
|
50299
|
-
import { existsSync as
|
|
49883
|
+
import { existsSync as existsSync13 } from "node:fs";
|
|
50300
49884
|
import { spawn as spawn8 } from "node:child_process";
|
|
50301
49885
|
import { homedir as homedir8 } from "node:os";
|
|
50302
49886
|
import path21 from "node:path";
|
|
@@ -50307,7 +49891,7 @@ function findDesktopApp() {
|
|
|
50307
49891
|
path21.join(homedir8(), "Applications", "Appostle.app")
|
|
50308
49892
|
];
|
|
50309
49893
|
for (const candidate of candidates) {
|
|
50310
|
-
if (
|
|
49894
|
+
if (existsSync13(candidate)) {
|
|
50311
49895
|
return candidate;
|
|
50312
49896
|
}
|
|
50313
49897
|
}
|
|
@@ -50320,7 +49904,7 @@ function findDesktopApp() {
|
|
|
50320
49904
|
path21.join(homedir8(), "Applications", "Appostle.AppImage")
|
|
50321
49905
|
];
|
|
50322
49906
|
for (const candidate of candidates) {
|
|
50323
|
-
if (
|
|
49907
|
+
if (existsSync13(candidate)) {
|
|
50324
49908
|
return candidate;
|
|
50325
49909
|
}
|
|
50326
49910
|
}
|
|
@@ -50332,7 +49916,7 @@ function findDesktopApp() {
|
|
|
50332
49916
|
return null;
|
|
50333
49917
|
}
|
|
50334
49918
|
const candidate = path21.join(localAppData, "Programs", "Appostle", "Appostle.exe");
|
|
50335
|
-
return
|
|
49919
|
+
return existsSync13(candidate) ? candidate : null;
|
|
50336
49920
|
}
|
|
50337
49921
|
return null;
|
|
50338
49922
|
}
|