@ztimson/ai-utils 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/asr.d.ts CHANGED
@@ -1 +1 @@
1
- export declare function canDiarization(): Promise<boolean>;
1
+ export declare function canDiarization(): Promise<string | null>;
package/dist/asr.js CHANGED
@@ -1,10 +1,10 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const S=require("@xenova/transformers"),u=require("worker_threads"),g=require("node:child_process"),d=require("node:fs"),y=require("node:path"),k=require("node:os"),h=require("wavefile");let f;async function w(){return new Promise(o=>{const r=g.spawn("python",["-c","import pyannote.audio"]);r.on("close",n=>o(n===0)),r.on("error",()=>o(!1))})}async function P(o,r,n){const s=`
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const k=require("@xenova/transformers"),l=require("worker_threads"),y=require("node:child_process"),m=require("node:fs"),h=require("node:path"),S=require("node:os"),g=require("wavefile");let d;async function w(){const s=r=>new Promise(t=>{const n=y.spawn(r,["-c","import pyannote.audio"]);n.on("close",o=>t(o===0)),n.on("error",()=>t(!1))});return await s("python3")?"python3":await s("python")?"python":null}async function P(s,r,t,n){const o=`
2
2
  import sys
3
3
  import json
4
4
  import os
5
5
  from pyannote.audio import Pipeline
6
6
 
7
- os.environ['TORCH_HOME'] = r"${r}"
7
+ os.environ['TORCH_HOME'] = r"${t}"
8
8
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", token="${n}")
9
9
  output = pipeline(sys.argv[1])
10
10
 
@@ -13,6 +13,6 @@ for turn, speaker in output.speaker_diarization:
13
13
  segments.append({"start": turn.start, "end": turn.end, "speaker": speaker})
14
14
 
15
15
  print(json.dumps(segments))
16
- `;return new Promise((i,t)=>{let a="";const e=g.spawn("python",["-c",s,o]);e.stdout.on("data",p=>a+=p.toString()),e.stderr.on("data",p=>console.error(p.toString())),e.on("close",p=>{if(p===0)try{i(JSON.parse(a))}catch{t(new Error("Failed to parse diarization output"))}else t(new Error(`Python process exited with code ${p}`))}),e.on("error",t)})}function v(o,r){const n=new Map;let s=0;r.forEach(e=>{n.has(e.speaker)||n.set(e.speaker,++s)});const i=[];let t=-1,a="";return o.forEach(e=>{const p=e.timestamp[0],l=r.find(m=>p>=m.start&&p<=m.end),c=l?n.get(l.speaker):1;c!==t?(a&&i.push(`[Speaker ${t}]: ${a.trim()}`),t=c,a=e.text):a+=e.text}),a&&i.push(`[Speaker ${t}]: ${a.trim()}`),i.join(`
17
- `)}function _(o){let r,n;try{r=new h.WaveFile(d.readFileSync(o))}catch{n=y.join(d.mkdtempSync(y.join(k.tmpdir(),"audio-")),"converted.wav"),g.execSync(`ffmpeg -i "${o}" -ar 16000 -ac 1 -f wav "${n}"`,{stdio:"ignore"}),r=new h.WaveFile(d.readFileSync(n))}finally{r.toBitDepth("32f"),r.toSampleRate(16e3);const s=r.getSamples();if(Array.isArray(s)){const i=s[0],t=s[1],a=new Float32Array(i.length);for(let e=0;e<i.length;e++)a[e]=(i[e]+t[e])/2;return[n||o,a]}return[n||o,s]}}u.parentPort?.on("message",async({file:o,speaker:r,model:n,modelDir:s,token:i})=>{try{f||(f=await S.pipeline("automatic-speech-recognition",`Xenova/${n}`,{cache_dir:s,quantized:!0}));const[t,a]=_(o),e=r&&await w(),[p,l]=await Promise.all([f(a,{return_timestamps:r?"word":!1}),!r||!i||!e?Promise.resolve():P(t,s,i)]);o!=t&&d.rmSync(t,{recursive:!0,force:!0});const c=p.text?.trim()||null;if(!r)return u.parentPort?.postMessage({text:c});if(!i)return u.parentPort?.postMessage({text:c,error:"HuggingFace token required"});if(!e)return u.parentPort?.postMessage({text:c,error:"Speaker diarization unavailable"});const m=v(p.chunks||[],l||[]);u.parentPort?.postMessage({text:m})}catch(t){u.parentPort?.postMessage({error:t.stack||t.message})}});exports.canDiarization=w;
16
+ `;return new Promise((a,i)=>{let e="";const c=y.spawn(s,["-c",o,r]);c.stdout.on("data",p=>e+=p.toString()),c.stderr.on("data",p=>console.error(p.toString())),c.on("close",p=>{if(p===0)try{a(JSON.parse(e))}catch{i(new Error("Failed to parse diarization output"))}else i(new Error(`Python process exited with code ${p}`))}),c.on("error",i)})}function v(s,r){const t=new Map;let n=0;r.forEach(e=>{t.has(e.speaker)||t.set(e.speaker,++n)});const o=[];let a=-1,i="";return s.forEach(e=>{const c=e.timestamp[0],p=r.find(f=>c>=f.start&&c<=f.end),u=p?t.get(p.speaker):1;u!==a?(i&&o.push(`[Speaker ${a}]: ${i.trim()}`),a=u,i=e.text):i+=e.text}),i&&o.push(`[Speaker ${a}]: ${i.trim()}`),o.join(`
17
+ `)}function _(s){let r,t;try{r=new g.WaveFile(m.readFileSync(s))}catch{t=h.join(m.mkdtempSync(h.join(S.tmpdir(),"audio-")),"converted.wav"),y.execSync(`ffmpeg -i "${s}" -ar 16000 -ac 1 -f wav "${t}"`,{stdio:"ignore"}),r=new g.WaveFile(m.readFileSync(t))}finally{r.toBitDepth("32f"),r.toSampleRate(16e3);const n=r.getSamples();if(Array.isArray(n)){const o=n[0],a=n[1],i=new Float32Array(o.length);for(let e=0;e<o.length;e++)i[e]=(o[e]+a[e])/2;return[t||s,i]}return[t||s,n]}}l.parentPort?.on("message",async({file:s,speaker:r,model:t,modelDir:n,token:o})=>{try{d||(d=await k.pipeline("automatic-speech-recognition",`Xenova/${t}`,{cache_dir:n,quantized:!0}));const[a,i]=_(s),e=await w(),[c,p]=await Promise.all([d(i,{return_timestamps:r?"word":!1}),!r||!o||!e?Promise.resolve():P(e,a,n,o)]);s!=a&&m.rmSync(a,{recursive:!0,force:!0});const u=c.text?.trim()||null;if(!r)return l.parentPort?.postMessage({text:u});if(!o)return l.parentPort?.postMessage({text:u,error:"HuggingFace token required"});if(!e)return l.parentPort?.postMessage({text:u,error:"Speaker diarization unavailable"});const f=v(c.chunks||[],p||[]);l.parentPort?.postMessage({text:f})}catch(a){l.parentPort?.postMessage({error:a.stack||a.message})}});exports.canDiarization=w;
18
18
  //# sourceMappingURL=asr.js.map
package/dist/asr.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"asr.js","sources":["../src/asr.ts"],"sourcesContent":["import { pipeline } from '@xenova/transformers';\nimport { parentPort } from 'worker_threads';\nimport { spawn } from 'node:child_process';\nimport { execSync } from 'node:child_process';\nimport { mkdtempSync, rmSync, readFileSync } from 'node:fs';\nimport { join } from 'node:path';\nimport { tmpdir } from 'node:os';\nimport wavefile from 'wavefile';\n\nlet whisperPipeline: any;\n\nexport async function canDiarization(): Promise<boolean> {\n\treturn new Promise((resolve) => {\n\t\tconst proc = spawn('python', ['-c', 'import pyannote.audio']);\n\t\tproc.on('close', (code: number) => resolve(code === 0));\n\t\tproc.on('error', () => resolve(false));\n\t});\n}\n\nasync function runDiarization(audioPath: string, dir: string, token: string): Promise<any[]> {\n\tconst script = `\nimport sys\nimport json\nimport os\nfrom pyannote.audio import Pipeline\n\nos.environ['TORCH_HOME'] = r\"${dir}\"\npipeline = Pipeline.from_pretrained(\"pyannote/speaker-diarization-3.1\", token=\"${token}\")\noutput = pipeline(sys.argv[1])\n\nsegments = []\nfor turn, speaker in output.speaker_diarization:\n segments.append({\"start\": turn.start, \"end\": turn.end, \"speaker\": speaker})\n\nprint(json.dumps(segments))\n`;\n\n\treturn new Promise((resolve, reject) => {\n\t\tlet output = '';\n\t\tconst proc = spawn('python', ['-c', script, audioPath]);\n\t\tproc.stdout.on('data', (data: Buffer) => output += data.toString());\n\t\tproc.stderr.on('data', (data: Buffer) => console.error(data.toString()));\n\t\tproc.on('close', (code: number) => {\n\t\t\tif(code === 0) {\n\t\t\t\ttry {\n\t\t\t\t\tresolve(JSON.parse(output));\n\t\t\t\t} catch (err) {\n\t\t\t\t\treject(new Error('Failed to parse diarization output'));\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\treject(new Error(`Python process exited with code ${code}`));\n\t\t\t}\n\t\t});\n\t\tproc.on('error', reject);\n\t});\n}\n\nfunction combineSpeakerTranscript(chunks: any[], speakers: any[]): string {\n\tconst speakerMap = new Map();\n\tlet speakerCount = 0;\n\tspeakers.forEach((seg: any) => {\n\t\tif(!speakerMap.has(seg.speaker)) speakerMap.set(seg.speaker, ++speakerCount);\n\t});\n\n\tconst lines: string[] = [];\n\tlet currentSpeaker = -1;\n\tlet currentText = '';\n\tchunks.forEach((chunk: any) => {\n\t\tconst time = chunk.timestamp[0];\n\t\tconst speaker = speakers.find((s: any) => time >= s.start && time <= s.end);\n\t\tconst speakerNum = speaker ? speakerMap.get(speaker.speaker) : 1;\n\t\tif (speakerNum !== currentSpeaker) {\n\t\t\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\t\t\tcurrentSpeaker = speakerNum;\n\t\t\tcurrentText = chunk.text;\n\t\t} else {\n\t\t\tcurrentText += chunk.text;\n\t\t}\n\t});\n\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\treturn lines.join('\\n');\n}\n\nfunction prepareAudioBuffer(file: string): [string, Float32Array] {\n\tlet wav: any, tmp;\n\ttry {\n\t\twav = new wavefile.WaveFile(readFileSync(file));\n\t} catch(err) {\n\t\ttmp = join(mkdtempSync(join(tmpdir(), 'audio-')), 'converted.wav');\n\t\texecSync(`ffmpeg -i \"${file}\" -ar 16000 -ac 1 -f wav \"${tmp}\"`, { stdio: 'ignore' });\n\t\twav = new wavefile.WaveFile(readFileSync(tmp));\n\t} finally {\n\t\twav.toBitDepth('32f');\n\t\twav.toSampleRate(16000);\n\t\tconst samples = wav.getSamples();\n\t\tif(Array.isArray(samples)) {\n\t\t\tconst left = samples[0];\n\t\t\tconst right = samples[1];\n\t\t\tconst buffer = new Float32Array(left.length);\n\t\t\tfor (let i = 0; i < left.length; i++) buffer[i] = (left[i] + right[i]) / 2;\n\t\t\treturn [tmp || file, buffer];\n\t\t}\n\t\treturn [tmp || file, samples];\n\t}\n}\n\nparentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => {\n\ttry {\n\t\tif(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true});\n\n\t\t// Prepare audio file\n\t\tconst [f, buffer] = prepareAudioBuffer(file);\n\n\t\t// Fetch transcript and speakers\n\t\tconst hasDiarization = speaker && await canDiarization();\n\t\tconst [transcript, speakers] = await Promise.all([\n\t\t\twhisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false}),\n\t\t\t(!speaker || !token || !hasDiarization) ? Promise.resolve(): runDiarization(f, modelDir, token),\n\t\t]);\n\t\tif(file != f) rmSync(f, { recursive: true, force: true });\n\n\t\t// Return any results / errors if no more processing required\n\t\tconst text = transcript.text?.trim() || null;\n\t\tif(!speaker) return parentPort?.postMessage({ text });\n\t\tif(!token) return parentPort?.postMessage({ text, error: 'HuggingFace token required' });\n\t\tif(!hasDiarization) return parentPort?.postMessage({ text, error: 'Speaker diarization unavailable' });\n\n\t\t// Combine transcript and speakers\n\t\tconst combined = combineSpeakerTranscript(transcript.chunks || [], speakers || []);\n\t\tparentPort?.postMessage({ text: combined });\n\t} catch (err: any) {\n\t\tparentPort?.postMessage({ error: err.stack || err.message });\n\t}\n});\n"],"names":["whisperPipeline","canDiarization","resolve","proc","spawn","code","runDiarization","audioPath","dir","token","script","reject","output","data","combineSpeakerTranscript","chunks","speakers","speakerMap","speakerCount","seg","lines","currentSpeaker","currentText","chunk","time","speaker","s","speakerNum","prepareAudioBuffer","file","wav","tmp","wavefile","readFileSync","join","mkdtempSync","tmpdir","execSync","samples","left","right","buffer","i","parentPort","model","modelDir","pipeline","f","hasDiarization","transcript","rmSync","text","combined","err"],"mappings":"2QASA,IAAIA,EAEJ,eAAsBC,GAAmC,CACxD,OAAO,IAAI,QAASC,GAAY,CAC/B,MAAMC,EAAOC,EAAAA,MAAM,SAAU,CAAC,KAAM,uBAAuB,CAAC,EAC5DD,EAAK,GAAG,QAAUE,GAAiBH,EAAQG,IAAS,CAAC,CAAC,EACtDF,EAAK,GAAG,QAAS,IAAMD,EAAQ,EAAK,CAAC,CACtC,CAAC,CACF,CAEA,eAAeI,EAAeC,EAAmBC,EAAaC,EAA+B,CAC5F,MAAMC,EAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,+BAMeF,CAAG;AAAA,iFAC+CC,CAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUrF,OAAO,IAAI,QAAQ,CAACP,EAASS,IAAW,CACvC,IAAIC,EAAS,GACb,MAAMT,EAAOC,EAAAA,MAAM,SAAU,CAAC,KAAMM,EAAQH,CAAS,CAAC,EACtDJ,EAAK,OAAO,GAAG,OAASU,GAAiBD,GAAUC,EAAK,UAAU,EAClEV,EAAK,OAAO,GAAG,OAASU,GAAiB,QAAQ,MAAMA,EAAK,SAAA,CAAU,CAAC,EACvEV,EAAK,GAAG,QAAUE,GAAiB,CAClC,GAAGA,IAAS,EACX,GAAI,CACHH,EAAQ,KAAK,MAAMU,CAAM,CAAC,CAC3B,MAAc,CACbD,EAAO,IAAI,MAAM,oCAAoC,CAAC,CACvD,MAEAA,EAAO,IAAI,MAAM,mCAAmCN,CAAI,EAAE,CAAC,CAE7D,CAAC,EACDF,EAAK,GAAG,QAASQ,CAAM,CACxB,CAAC,CACF,CAEA,SAASG,EAAyBC,EAAeC,EAAyB,CACzE,MAAMC,MAAiB,IACvB,IAAIC,EAAe,EACnBF,EAAS,QAASG,GAAa,CAC1BF,EAAW,IAAIE,EAAI,OAAO,GAAGF,EAAW,IAAIE,EAAI,QAAS,EAAED,CAAY,CAC5E,CAAC,EAED,MAAME,EAAkB,CAAA,EACxB,IAAIC,EAAiB,GACjBC,EAAc,GAClB,OAAAP,EAAO,QAASQ,GAAe,CAC9B,MAAMC,EAAOD,EAAM,UAAU,CAAC,EACxBE,EAAUT,EAAS,KAAMU,GAAWF,GAAQE,EAAE,OAASF,GAAQE,EAAE,GAAG,EACpEC,EAAaF,EAAUR,EAAW,IAAIQ,EAAQ,OAAO,EAAI,EAC3DE,IAAeN,GACfC,KAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,EAC/ED,EAAiBM,EACjBL,EAAcC,EAAM,MAEpBD,GAAeC,EAAM,IAEvB,CAAC,EACED,KAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,EACxEF,EAAM,KAAK;AAAA,CAAI,CACvB,CAEA,SAASQ,EAAmBC,EAAsC,CACjE,IAAIC,EAAUC,EACd,GAAI,CACHD,EAAM,IAAIE,EAAS,SAASC,EAAAA,aAAaJ,CAAI,CAAC,CAC/C,MAAa,CACZE,EAAMG,EAAAA,KAAKC,EAAAA,YAAYD,EAAAA,KAAKE,EAAAA,OAAA,EAAU,QAAQ,CAAC,EAAG,eAAe,EACjEC,WAAS,cAAcR,CAAI,6BAA6BE,CAAG,IAAK,CAAE,MAAO,SAAU,EACnFD,EAAM,IAAIE,EAAS,SAASC,EAAAA,aAAaF,CAAG,CAAC,CAC9C,QAAA,CACCD,EAAI,WAAW,KAAK,EACpBA,EAAI,aAAa,IAAK,EACtB,MAAMQ,EAAUR,EAAI,WAAA,EACpB,GAAG,MAAM,QAAQQ,CAAO,EAAG,CAC1B,MAAMC,EAAOD,EAAQ,CAAC,EAChBE,EAAQF,EAAQ,CAAC,EACjBG,EAAS,IAAI,aAAaF,EAAK,MAAM,EAC3C,QAASG,EAAI,EAAGA,EAAIH,EAAK,OAAQG,IAAKD,EAAOC,CAAC,GAAKH,EAAKG,CAAC,EAAIF,EAAME,CAAC,GAAK,EACzE,MAAO,CAACX,GAAOF,EAAMY,CAAM,CAC5B,CACA,MAAO,CAACV,GAAOF,EAAMS,CAAO,CAC7B,CACD,CAEAK,EAAAA,YAAY,GAAG,UAAW,MAAO,CAAE,KAAAd,EAAM,QAAAJ,EAAS,MAAAmB,EAAO,SAAAC,EAAU,MAAApC,KAAY,CAC9E,GAAI,CACCT,IAAiBA,EAAkB,MAAM8C,EAAAA,SAAS,+BAAgC,UAAUF,CAAK,GAAI,CAAC,UAAWC,EAAU,UAAW,GAAK,GAG/I,KAAM,CAACE,EAAGN,CAAM,EAAIb,EAAmBC,CAAI,EAGrCmB,EAAiBvB,GAAW,MAAMxB,EAAA,EAClC,CAACgD,EAAYjC,CAAQ,EAAI,MAAM,QAAQ,IAAI,CAChDhB,EAAgByC,EAAQ,CAAC,kBAAmBhB,EAAU,OAAS,GAAM,EACpE,CAACA,GAAW,CAAChB,GAAS,CAACuC,EAAkB,QAAQ,QAAA,EAAW1C,EAAeyC,EAAGF,EAAUpC,CAAK,CAAA,CAC9F,EACEoB,GAAQkB,GAAGG,EAAAA,OAAOH,EAAG,CAAE,UAAW,GAAM,MAAO,GAAM,EAGxD,MAAMI,EAAOF,EAAW,MAAM,KAAA,GAAU,KACxC,GAAG,CAACxB,EAAS,OAAOkB,EAAAA,YAAY,YAAY,CAAE,KAAAQ,EAAM,EACpD,GAAG,CAAC1C,EAAO,OAAOkC,cAAY,YAAY,CAAE,KAAAQ,EAAM,MAAO,6BAA8B,EACvF,GAAG,CAACH,EAAgB,OAAOL,cAAY,YAAY,CAAE,KAAAQ,EAAM,MAAO,kCAAmC,EAGrG,MAAMC,EAAWtC,EAAyBmC,EAAW,QAAU,CAAA,EAAIjC,GAAY,EAAE,EACjF2B,EAAAA,YAAY,YAAY,CAAE,KAAMS,CAAA,CAAU,CAC3C,OAASC,EAAU,CAClBV,EAAAA,YAAY,YAAY,CAAE,MAAOU,EAAI,OAASA,EAAI,QAAS,CAC5D,CACD,CAAC"}
1
+ {"version":3,"file":"asr.js","sources":["../src/asr.ts"],"sourcesContent":["import { pipeline } from '@xenova/transformers';\nimport { parentPort } from 'worker_threads';\nimport { spawn } from 'node:child_process';\nimport { execSync } from 'node:child_process';\nimport { mkdtempSync, rmSync, readFileSync } from 'node:fs';\nimport { join } from 'node:path';\nimport { tmpdir } from 'node:os';\nimport wavefile from 'wavefile';\n\nlet whisperPipeline: any;\n\nexport async function canDiarization(): Promise<string | null> {\n\tconst checkPython = (cmd: string) => {\n\t\treturn new Promise<boolean>((resolve) => {\n\t\t\tconst proc = spawn(cmd, ['-c', 'import pyannote.audio']);\n\t\t\tproc.on('close', (code: number) => resolve(code === 0));\n\t\t\tproc.on('error', () => resolve(false));\n\t\t});\n\t};\n\tif(await checkPython('python3')) return 'python3';\n\tif(await checkPython('python')) return 'python';\n\treturn null;\n}\n\nasync function runDiarization(binary: string, audioPath: string, dir: string, token: string): Promise<any[]> {\n\tconst script = `\nimport sys\nimport json\nimport os\nfrom pyannote.audio import Pipeline\n\nos.environ['TORCH_HOME'] = r\"${dir}\"\npipeline = Pipeline.from_pretrained(\"pyannote/speaker-diarization-3.1\", token=\"${token}\")\noutput = pipeline(sys.argv[1])\n\nsegments = []\nfor turn, speaker in output.speaker_diarization:\n segments.append({\"start\": turn.start, \"end\": turn.end, \"speaker\": speaker})\n\nprint(json.dumps(segments))\n`;\n\n\treturn new Promise((resolve, reject) => {\n\t\tlet output = '';\n\t\tconst proc = spawn(binary, ['-c', script, audioPath]);\n\t\tproc.stdout.on('data', (data: Buffer) => output += data.toString());\n\t\tproc.stderr.on('data', (data: Buffer) => console.error(data.toString()));\n\t\tproc.on('close', (code: number) => {\n\t\t\tif(code === 0) {\n\t\t\t\ttry {\n\t\t\t\t\tresolve(JSON.parse(output));\n\t\t\t\t} catch (err) {\n\t\t\t\t\treject(new Error('Failed to parse diarization output'));\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\treject(new Error(`Python process exited with code ${code}`));\n\t\t\t}\n\t\t});\n\t\tproc.on('error', reject);\n\t});\n}\n\nfunction combineSpeakerTranscript(chunks: any[], speakers: any[]): string {\n\tconst speakerMap = new Map();\n\tlet speakerCount = 0;\n\tspeakers.forEach((seg: any) => {\n\t\tif(!speakerMap.has(seg.speaker)) speakerMap.set(seg.speaker, ++speakerCount);\n\t});\n\n\tconst lines: string[] = [];\n\tlet currentSpeaker = -1;\n\tlet currentText = '';\n\tchunks.forEach((chunk: any) => {\n\t\tconst time = chunk.timestamp[0];\n\t\tconst speaker = speakers.find((s: any) => time >= s.start && time <= s.end);\n\t\tconst speakerNum = speaker ? speakerMap.get(speaker.speaker) : 1;\n\t\tif (speakerNum !== currentSpeaker) {\n\t\t\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\t\t\tcurrentSpeaker = speakerNum;\n\t\t\tcurrentText = chunk.text;\n\t\t} else {\n\t\t\tcurrentText += chunk.text;\n\t\t}\n\t});\n\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\treturn lines.join('\\n');\n}\n\nfunction prepareAudioBuffer(file: string): [string, Float32Array] {\n\tlet wav: any, tmp;\n\ttry {\n\t\twav = new wavefile.WaveFile(readFileSync(file));\n\t} catch(err) {\n\t\ttmp = join(mkdtempSync(join(tmpdir(), 'audio-')), 'converted.wav');\n\t\texecSync(`ffmpeg -i \"${file}\" -ar 16000 -ac 1 -f wav \"${tmp}\"`, { stdio: 'ignore' });\n\t\twav = new wavefile.WaveFile(readFileSync(tmp));\n\t} finally {\n\t\twav.toBitDepth('32f');\n\t\twav.toSampleRate(16000);\n\t\tconst samples = wav.getSamples();\n\t\tif(Array.isArray(samples)) {\n\t\t\tconst left = samples[0];\n\t\t\tconst right = samples[1];\n\t\t\tconst buffer = new Float32Array(left.length);\n\t\t\tfor (let i = 0; i < left.length; i++) buffer[i] = (left[i] + right[i]) / 2;\n\t\t\treturn [tmp || file, buffer];\n\t\t}\n\t\treturn [tmp || file, samples];\n\t}\n}\n\nparentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => {\n\ttry {\n\t\tif(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true});\n\n\t\t// Prepare audio file\n\t\tconst [f, buffer] = prepareAudioBuffer(file);\n\n\t\t// Fetch transcript and speakers\n\t\tconst hasDiarization = await canDiarization();\n\t\tconst [transcript, speakers] = await Promise.all([\n\t\t\twhisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false}),\n\t\t\t(!speaker || !token || !hasDiarization) ? Promise.resolve(): runDiarization(hasDiarization, f, modelDir, token),\n\t\t]);\n\t\tif(file != f) rmSync(f, { recursive: true, force: true });\n\n\t\t// Return any results / errors if no more processing required\n\t\tconst text = transcript.text?.trim() || null;\n\t\tif(!speaker) return parentPort?.postMessage({ text });\n\t\tif(!token) return parentPort?.postMessage({ text, error: 'HuggingFace token required' });\n\t\tif(!hasDiarization) return parentPort?.postMessage({ text, error: 'Speaker diarization unavailable' });\n\n\t\t// Combine transcript and speakers\n\t\tconst combined = combineSpeakerTranscript(transcript.chunks || [], speakers || []);\n\t\tparentPort?.postMessage({ text: combined });\n\t} catch (err: any) {\n\t\tparentPort?.postMessage({ error: err.stack || err.message });\n\t}\n});\n"],"names":["whisperPipeline","canDiarization","checkPython","cmd","resolve","proc","spawn","code","runDiarization","binary","audioPath","dir","token","script","reject","output","data","combineSpeakerTranscript","chunks","speakers","speakerMap","speakerCount","seg","lines","currentSpeaker","currentText","chunk","time","speaker","s","speakerNum","prepareAudioBuffer","file","wav","tmp","wavefile","readFileSync","join","mkdtempSync","tmpdir","execSync","samples","left","right","buffer","i","parentPort","model","modelDir","pipeline","f","hasDiarization","transcript","rmSync","text","combined","err"],"mappings":"2QASA,IAAIA,EAEJ,eAAsBC,GAAyC,CAC9D,MAAMC,EAAeC,GACb,IAAI,QAAkBC,GAAY,CACxC,MAAMC,EAAOC,EAAAA,MAAMH,EAAK,CAAC,KAAM,uBAAuB,CAAC,EACvDE,EAAK,GAAG,QAAUE,GAAiBH,EAAQG,IAAS,CAAC,CAAC,EACtDF,EAAK,GAAG,QAAS,IAAMD,EAAQ,EAAK,CAAC,CACtC,CAAC,EAEF,OAAG,MAAMF,EAAY,SAAS,EAAU,UACrC,MAAMA,EAAY,QAAQ,EAAU,SAChC,IACR,CAEA,eAAeM,EAAeC,EAAgBC,EAAmBC,EAAaC,EAA+B,CAC5G,MAAMC,EAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,+BAMeF,CAAG;AAAA,iFAC+CC,CAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUrF,OAAO,IAAI,QAAQ,CAACR,EAASU,IAAW,CACvC,IAAIC,EAAS,GACb,MAAMV,EAAOC,EAAAA,MAAMG,EAAQ,CAAC,KAAMI,EAAQH,CAAS,CAAC,EACpDL,EAAK,OAAO,GAAG,OAASW,GAAiBD,GAAUC,EAAK,UAAU,EAClEX,EAAK,OAAO,GAAG,OAASW,GAAiB,QAAQ,MAAMA,EAAK,SAAA,CAAU,CAAC,EACvEX,EAAK,GAAG,QAAUE,GAAiB,CAClC,GAAGA,IAAS,EACX,GAAI,CACHH,EAAQ,KAAK,MAAMW,CAAM,CAAC,CAC3B,MAAc,CACbD,EAAO,IAAI,MAAM,oCAAoC,CAAC,CACvD,MAEAA,EAAO,IAAI,MAAM,mCAAmCP,CAAI,EAAE,CAAC,CAE7D,CAAC,EACDF,EAAK,GAAG,QAASS,CAAM,CACxB,CAAC,CACF,CAEA,SAASG,EAAyBC,EAAeC,EAAyB,CACzE,MAAMC,MAAiB,IACvB,IAAIC,EAAe,EACnBF,EAAS,QAASG,GAAa,CAC1BF,EAAW,IAAIE,EAAI,OAAO,GAAGF,EAAW,IAAIE,EAAI,QAAS,EAAED,CAAY,CAC5E,CAAC,EAED,MAAME,EAAkB,CAAA,EACxB,IAAIC,EAAiB,GACjBC,EAAc,GAClB,OAAAP,EAAO,QAASQ,GAAe,CAC9B,MAAMC,EAAOD,EAAM,UAAU,CAAC,EACxBE,EAAUT,EAAS,KAAMU,GAAWF,GAAQE,EAAE,OAASF,GAAQE,EAAE,GAAG,EACpEC,EAAaF,EAAUR,EAAW,IAAIQ,EAAQ,OAAO,EAAI,EAC3DE,IAAeN,GACfC,KAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,EAC/ED,EAAiBM,EACjBL,EAAcC,EAAM,MAEpBD,GAAeC,EAAM,IAEvB,CAAC,EACED,KAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,EACxEF,EAAM,KAAK;AAAA,CAAI,CACvB,CAEA,SAASQ,EAAmBC,EAAsC,CACjE,IAAIC,EAAUC,EACd,GAAI,CACHD,EAAM,IAAIE,EAAS,SAASC,EAAAA,aAAaJ,CAAI,CAAC,CAC/C,MAAa,CACZE,EAAMG,EAAAA,KAAKC,EAAAA,YAAYD,EAAAA,KAAKE,EAAAA,OAAA,EAAU,QAAQ,CAAC,EAAG,eAAe,EACjEC,WAAS,cAAcR,CAAI,6BAA6BE,CAAG,IAAK,CAAE,MAAO,SAAU,EACnFD,EAAM,IAAIE,EAAS,SAASC,EAAAA,aAAaF,CAAG,CAAC,CAC9C,QAAA,CACCD,EAAI,WAAW,KAAK,EACpBA,EAAI,aAAa,IAAK,EACtB,MAAMQ,EAAUR,EAAI,WAAA,EACpB,GAAG,MAAM,QAAQQ,CAAO,EAAG,CAC1B,MAAMC,EAAOD,EAAQ,CAAC,EAChBE,EAAQF,EAAQ,CAAC,EACjBG,EAAS,IAAI,aAAaF,EAAK,MAAM,EAC3C,QAASG,EAAI,EAAGA,EAAIH,EAAK,OAAQG,IAAKD,EAAOC,CAAC,GAAKH,EAAKG,CAAC,EAAIF,EAAME,CAAC,GAAK,EACzE,MAAO,CAACX,GAAOF,EAAMY,CAAM,CAC5B,CACA,MAAO,CAACV,GAAOF,EAAMS,CAAO,CAC7B,CACD,CAEAK,EAAAA,YAAY,GAAG,UAAW,MAAO,CAAE,KAAAd,EAAM,QAAAJ,EAAS,MAAAmB,EAAO,SAAAC,EAAU,MAAApC,KAAY,CAC9E,GAAI,CACCZ,IAAiBA,EAAkB,MAAMiD,EAAAA,SAAS,+BAAgC,UAAUF,CAAK,GAAI,CAAC,UAAWC,EAAU,UAAW,GAAK,GAG/I,KAAM,CAACE,EAAGN,CAAM,EAAIb,EAAmBC,CAAI,EAGrCmB,EAAiB,MAAMlD,EAAA,EACvB,CAACmD,EAAYjC,CAAQ,EAAI,MAAM,QAAQ,IAAI,CAChDnB,EAAgB4C,EAAQ,CAAC,kBAAmBhB,EAAU,OAAS,GAAM,EACpE,CAACA,GAAW,CAAChB,GAAS,CAACuC,EAAkB,QAAQ,QAAA,EAAW3C,EAAe2C,EAAgBD,EAAGF,EAAUpC,CAAK,CAAA,CAC9G,EACEoB,GAAQkB,GAAGG,EAAAA,OAAOH,EAAG,CAAE,UAAW,GAAM,MAAO,GAAM,EAGxD,MAAMI,EAAOF,EAAW,MAAM,KAAA,GAAU,KACxC,GAAG,CAACxB,EAAS,OAAOkB,EAAAA,YAAY,YAAY,CAAE,KAAAQ,EAAM,EACpD,GAAG,CAAC1C,EAAO,OAAOkC,cAAY,YAAY,CAAE,KAAAQ,EAAM,MAAO,6BAA8B,EACvF,GAAG,CAACH,EAAgB,OAAOL,cAAY,YAAY,CAAE,KAAAQ,EAAM,MAAO,kCAAmC,EAGrG,MAAMC,EAAWtC,EAAyBmC,EAAW,QAAU,CAAA,EAAIjC,GAAY,EAAE,EACjF2B,EAAAA,YAAY,YAAY,CAAE,KAAMS,CAAA,CAAU,CAC3C,OAASC,EAAU,CAClBV,EAAAA,YAAY,YAAY,CAAE,MAAOU,EAAI,OAASA,EAAI,QAAS,CAC5D,CACD,CAAC"}
package/dist/asr.mjs CHANGED
@@ -2,24 +2,25 @@ import { pipeline as w } from "@xenova/transformers";
2
2
  import { parentPort as m } from "worker_threads";
3
3
  import { execSync as k, spawn as h } from "node:child_process";
4
4
  import { rmSync as S, readFileSync as d, mkdtempSync as v } from "node:fs";
5
- import { join as g } from "node:path";
6
- import { tmpdir as $ } from "node:os";
7
- import y from "wavefile";
5
+ import { join as y } from "node:path";
6
+ import { tmpdir as P } from "node:os";
7
+ import g from "wavefile";
8
8
  let l;
9
- async function x() {
10
- return new Promise((o) => {
11
- const r = h("python", ["-c", "import pyannote.audio"]);
12
- r.on("close", (n) => o(n === 0)), r.on("error", () => o(!1));
9
+ async function $() {
10
+ const s = (r) => new Promise((t) => {
11
+ const n = h(r, ["-c", "import pyannote.audio"]);
12
+ n.on("close", (o) => t(o === 0)), n.on("error", () => t(!1));
13
13
  });
14
+ return await s("python3") ? "python3" : await s("python") ? "python" : null;
14
15
  }
15
- async function P(o, r, n) {
16
- const s = `
16
+ async function x(s, r, t, n) {
17
+ const o = `
17
18
  import sys
18
19
  import json
19
20
  import os
20
21
  from pyannote.audio import Pipeline
21
22
 
22
- os.environ['TORCH_HOME'] = r"${r}"
23
+ os.environ['TORCH_HOME'] = r"${t}"
23
24
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", token="${n}")
24
25
  output = pipeline(sys.argv[1])
25
26
 
@@ -29,71 +30,71 @@ for turn, speaker in output.speaker_diarization:
29
30
 
30
31
  print(json.dumps(segments))
31
32
  `;
32
- return new Promise((a, t) => {
33
- let i = "";
34
- const e = h("python", ["-c", s, o]);
35
- e.stdout.on("data", (p) => i += p.toString()), e.stderr.on("data", (p) => console.error(p.toString())), e.on("close", (p) => {
33
+ return new Promise((i, a) => {
34
+ let e = "";
35
+ const c = h(s, ["-c", o, r]);
36
+ c.stdout.on("data", (p) => e += p.toString()), c.stderr.on("data", (p) => console.error(p.toString())), c.on("close", (p) => {
36
37
  if (p === 0)
37
38
  try {
38
- a(JSON.parse(i));
39
+ i(JSON.parse(e));
39
40
  } catch {
40
- t(new Error("Failed to parse diarization output"));
41
+ a(new Error("Failed to parse diarization output"));
41
42
  }
42
43
  else
43
- t(new Error(`Python process exited with code ${p}`));
44
- }), e.on("error", t);
44
+ a(new Error(`Python process exited with code ${p}`));
45
+ }), c.on("error", a);
45
46
  });
46
47
  }
47
- function z(o, r) {
48
- const n = /* @__PURE__ */ new Map();
49
- let s = 0;
48
+ function z(s, r) {
49
+ const t = /* @__PURE__ */ new Map();
50
+ let n = 0;
50
51
  r.forEach((e) => {
51
- n.has(e.speaker) || n.set(e.speaker, ++s);
52
+ t.has(e.speaker) || t.set(e.speaker, ++n);
52
53
  });
53
- const a = [];
54
- let t = -1, i = "";
55
- return o.forEach((e) => {
56
- const p = e.timestamp[0], u = r.find((f) => p >= f.start && p <= f.end), c = u ? n.get(u.speaker) : 1;
57
- c !== t ? (i && a.push(`[Speaker ${t}]: ${i.trim()}`), t = c, i = e.text) : i += e.text;
58
- }), i && a.push(`[Speaker ${t}]: ${i.trim()}`), a.join(`
54
+ const o = [];
55
+ let i = -1, a = "";
56
+ return s.forEach((e) => {
57
+ const c = e.timestamp[0], p = r.find((f) => c >= f.start && c <= f.end), u = p ? t.get(p.speaker) : 1;
58
+ u !== i ? (a && o.push(`[Speaker ${i}]: ${a.trim()}`), i = u, a = e.text) : a += e.text;
59
+ }), a && o.push(`[Speaker ${i}]: ${a.trim()}`), o.join(`
59
60
  `);
60
61
  }
61
- function M(o) {
62
- let r, n;
62
+ function M(s) {
63
+ let r, t;
63
64
  try {
64
- r = new y.WaveFile(d(o));
65
+ r = new g.WaveFile(d(s));
65
66
  } catch {
66
- n = g(v(g($(), "audio-")), "converted.wav"), k(`ffmpeg -i "${o}" -ar 16000 -ac 1 -f wav "${n}"`, { stdio: "ignore" }), r = new y.WaveFile(d(n));
67
+ t = y(v(y(P(), "audio-")), "converted.wav"), k(`ffmpeg -i "${s}" -ar 16000 -ac 1 -f wav "${t}"`, { stdio: "ignore" }), r = new g.WaveFile(d(t));
67
68
  } finally {
68
69
  r.toBitDepth("32f"), r.toSampleRate(16e3);
69
- const s = r.getSamples();
70
- if (Array.isArray(s)) {
71
- const a = s[0], t = s[1], i = new Float32Array(a.length);
72
- for (let e = 0; e < a.length; e++) i[e] = (a[e] + t[e]) / 2;
73
- return [n || o, i];
70
+ const n = r.getSamples();
71
+ if (Array.isArray(n)) {
72
+ const o = n[0], i = n[1], a = new Float32Array(o.length);
73
+ for (let e = 0; e < o.length; e++) a[e] = (o[e] + i[e]) / 2;
74
+ return [t || s, a];
74
75
  }
75
- return [n || o, s];
76
+ return [t || s, n];
76
77
  }
77
78
  }
78
- m?.on("message", async ({ file: o, speaker: r, model: n, modelDir: s, token: a }) => {
79
+ m?.on("message", async ({ file: s, speaker: r, model: t, modelDir: n, token: o }) => {
79
80
  try {
80
- l || (l = await w("automatic-speech-recognition", `Xenova/${n}`, { cache_dir: s, quantized: !0 }));
81
- const [t, i] = M(o), e = r && await x(), [p, u] = await Promise.all([
82
- l(i, { return_timestamps: r ? "word" : !1 }),
83
- !r || !a || !e ? Promise.resolve() : P(t, s, a)
81
+ l || (l = await w("automatic-speech-recognition", `Xenova/${t}`, { cache_dir: n, quantized: !0 }));
82
+ const [i, a] = M(s), e = await $(), [c, p] = await Promise.all([
83
+ l(a, { return_timestamps: r ? "word" : !1 }),
84
+ !r || !o || !e ? Promise.resolve() : x(e, i, n, o)
84
85
  ]);
85
- o != t && S(t, { recursive: !0, force: !0 });
86
- const c = p.text?.trim() || null;
87
- if (!r) return m?.postMessage({ text: c });
88
- if (!a) return m?.postMessage({ text: c, error: "HuggingFace token required" });
89
- if (!e) return m?.postMessage({ text: c, error: "Speaker diarization unavailable" });
90
- const f = z(p.chunks || [], u || []);
86
+ s != i && S(i, { recursive: !0, force: !0 });
87
+ const u = c.text?.trim() || null;
88
+ if (!r) return m?.postMessage({ text: u });
89
+ if (!o) return m?.postMessage({ text: u, error: "HuggingFace token required" });
90
+ if (!e) return m?.postMessage({ text: u, error: "Speaker diarization unavailable" });
91
+ const f = z(c.chunks || [], p || []);
91
92
  m?.postMessage({ text: f });
92
- } catch (t) {
93
- m?.postMessage({ error: t.stack || t.message });
93
+ } catch (i) {
94
+ m?.postMessage({ error: i.stack || i.message });
94
95
  }
95
96
  });
96
97
  export {
97
- x as canDiarization
98
+ $ as canDiarization
98
99
  };
99
100
  //# sourceMappingURL=asr.mjs.map
package/dist/asr.mjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"asr.mjs","sources":["../src/asr.ts"],"sourcesContent":["import { pipeline } from '@xenova/transformers';\nimport { parentPort } from 'worker_threads';\nimport { spawn } from 'node:child_process';\nimport { execSync } from 'node:child_process';\nimport { mkdtempSync, rmSync, readFileSync } from 'node:fs';\nimport { join } from 'node:path';\nimport { tmpdir } from 'node:os';\nimport wavefile from 'wavefile';\n\nlet whisperPipeline: any;\n\nexport async function canDiarization(): Promise<boolean> {\n\treturn new Promise((resolve) => {\n\t\tconst proc = spawn('python', ['-c', 'import pyannote.audio']);\n\t\tproc.on('close', (code: number) => resolve(code === 0));\n\t\tproc.on('error', () => resolve(false));\n\t});\n}\n\nasync function runDiarization(audioPath: string, dir: string, token: string): Promise<any[]> {\n\tconst script = `\nimport sys\nimport json\nimport os\nfrom pyannote.audio import Pipeline\n\nos.environ['TORCH_HOME'] = r\"${dir}\"\npipeline = Pipeline.from_pretrained(\"pyannote/speaker-diarization-3.1\", token=\"${token}\")\noutput = pipeline(sys.argv[1])\n\nsegments = []\nfor turn, speaker in output.speaker_diarization:\n segments.append({\"start\": turn.start, \"end\": turn.end, \"speaker\": speaker})\n\nprint(json.dumps(segments))\n`;\n\n\treturn new Promise((resolve, reject) => {\n\t\tlet output = '';\n\t\tconst proc = spawn('python', ['-c', script, audioPath]);\n\t\tproc.stdout.on('data', (data: Buffer) => output += data.toString());\n\t\tproc.stderr.on('data', (data: Buffer) => console.error(data.toString()));\n\t\tproc.on('close', (code: number) => {\n\t\t\tif(code === 0) {\n\t\t\t\ttry {\n\t\t\t\t\tresolve(JSON.parse(output));\n\t\t\t\t} catch (err) {\n\t\t\t\t\treject(new Error('Failed to parse diarization output'));\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\treject(new Error(`Python process exited with code ${code}`));\n\t\t\t}\n\t\t});\n\t\tproc.on('error', reject);\n\t});\n}\n\nfunction combineSpeakerTranscript(chunks: any[], speakers: any[]): string {\n\tconst speakerMap = new Map();\n\tlet speakerCount = 0;\n\tspeakers.forEach((seg: any) => {\n\t\tif(!speakerMap.has(seg.speaker)) speakerMap.set(seg.speaker, ++speakerCount);\n\t});\n\n\tconst lines: string[] = [];\n\tlet currentSpeaker = -1;\n\tlet currentText = '';\n\tchunks.forEach((chunk: any) => {\n\t\tconst time = chunk.timestamp[0];\n\t\tconst speaker = speakers.find((s: any) => time >= s.start && time <= s.end);\n\t\tconst speakerNum = speaker ? speakerMap.get(speaker.speaker) : 1;\n\t\tif (speakerNum !== currentSpeaker) {\n\t\t\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\t\t\tcurrentSpeaker = speakerNum;\n\t\t\tcurrentText = chunk.text;\n\t\t} else {\n\t\t\tcurrentText += chunk.text;\n\t\t}\n\t});\n\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\treturn lines.join('\\n');\n}\n\nfunction prepareAudioBuffer(file: string): [string, Float32Array] {\n\tlet wav: any, tmp;\n\ttry {\n\t\twav = new wavefile.WaveFile(readFileSync(file));\n\t} catch(err) {\n\t\ttmp = join(mkdtempSync(join(tmpdir(), 'audio-')), 'converted.wav');\n\t\texecSync(`ffmpeg -i \"${file}\" -ar 16000 -ac 1 -f wav \"${tmp}\"`, { stdio: 'ignore' });\n\t\twav = new wavefile.WaveFile(readFileSync(tmp));\n\t} finally {\n\t\twav.toBitDepth('32f');\n\t\twav.toSampleRate(16000);\n\t\tconst samples = wav.getSamples();\n\t\tif(Array.isArray(samples)) {\n\t\t\tconst left = samples[0];\n\t\t\tconst right = samples[1];\n\t\t\tconst buffer = new Float32Array(left.length);\n\t\t\tfor (let i = 0; i < left.length; i++) buffer[i] = (left[i] + right[i]) / 2;\n\t\t\treturn [tmp || file, buffer];\n\t\t}\n\t\treturn [tmp || file, samples];\n\t}\n}\n\nparentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => {\n\ttry {\n\t\tif(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true});\n\n\t\t// Prepare audio file\n\t\tconst [f, buffer] = prepareAudioBuffer(file);\n\n\t\t// Fetch transcript and speakers\n\t\tconst hasDiarization = speaker && await canDiarization();\n\t\tconst [transcript, speakers] = await Promise.all([\n\t\t\twhisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false}),\n\t\t\t(!speaker || !token || !hasDiarization) ? Promise.resolve(): runDiarization(f, modelDir, token),\n\t\t]);\n\t\tif(file != f) rmSync(f, { recursive: true, force: true });\n\n\t\t// Return any results / errors if no more processing required\n\t\tconst text = transcript.text?.trim() || null;\n\t\tif(!speaker) return parentPort?.postMessage({ text });\n\t\tif(!token) return parentPort?.postMessage({ text, error: 'HuggingFace token required' });\n\t\tif(!hasDiarization) return parentPort?.postMessage({ text, error: 'Speaker diarization unavailable' });\n\n\t\t// Combine transcript and speakers\n\t\tconst combined = combineSpeakerTranscript(transcript.chunks || [], speakers || []);\n\t\tparentPort?.postMessage({ text: combined });\n\t} catch (err: any) {\n\t\tparentPort?.postMessage({ error: err.stack || err.message });\n\t}\n});\n"],"names":["whisperPipeline","canDiarization","resolve","proc","spawn","code","runDiarization","audioPath","dir","token","script","reject","output","data","combineSpeakerTranscript","chunks","speakers","speakerMap","speakerCount","seg","lines","currentSpeaker","currentText","chunk","time","speaker","s","speakerNum","prepareAudioBuffer","file","wav","tmp","wavefile","readFileSync","join","mkdtempSync","tmpdir","execSync","samples","left","right","buffer","i","parentPort","model","modelDir","pipeline","f","hasDiarization","transcript","rmSync","text","combined","err"],"mappings":";;;;;;;AASA,IAAIA;AAEJ,eAAsBC,IAAmC;AACxD,SAAO,IAAI,QAAQ,CAACC,MAAY;AAC/B,UAAMC,IAAOC,EAAM,UAAU,CAAC,MAAM,uBAAuB,CAAC;AAC5D,IAAAD,EAAK,GAAG,SAAS,CAACE,MAAiBH,EAAQG,MAAS,CAAC,CAAC,GACtDF,EAAK,GAAG,SAAS,MAAMD,EAAQ,EAAK,CAAC;AAAA,EACtC,CAAC;AACF;AAEA,eAAeI,EAAeC,GAAmBC,GAAaC,GAA+B;AAC5F,QAAMC,IAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,+BAMeF,CAAG;AAAA,iFAC+CC,CAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAUrF,SAAO,IAAI,QAAQ,CAACP,GAASS,MAAW;AACvC,QAAIC,IAAS;AACb,UAAMT,IAAOC,EAAM,UAAU,CAAC,MAAMM,GAAQH,CAAS,CAAC;AACtD,IAAAJ,EAAK,OAAO,GAAG,QAAQ,CAACU,MAAiBD,KAAUC,EAAK,UAAU,GAClEV,EAAK,OAAO,GAAG,QAAQ,CAACU,MAAiB,QAAQ,MAAMA,EAAK,SAAA,CAAU,CAAC,GACvEV,EAAK,GAAG,SAAS,CAACE,MAAiB;AAClC,UAAGA,MAAS;AACX,YAAI;AACH,UAAAH,EAAQ,KAAK,MAAMU,CAAM,CAAC;AAAA,QAC3B,QAAc;AACb,UAAAD,EAAO,IAAI,MAAM,oCAAoC,CAAC;AAAA,QACvD;AAAA;AAEA,QAAAA,EAAO,IAAI,MAAM,mCAAmCN,CAAI,EAAE,CAAC;AAAA,IAE7D,CAAC,GACDF,EAAK,GAAG,SAASQ,CAAM;AAAA,EACxB,CAAC;AACF;AAEA,SAASG,EAAyBC,GAAeC,GAAyB;AACzE,QAAMC,wBAAiB,IAAA;AACvB,MAAIC,IAAe;AACnB,EAAAF,EAAS,QAAQ,CAACG,MAAa;AAC9B,IAAIF,EAAW,IAAIE,EAAI,OAAO,KAAGF,EAAW,IAAIE,EAAI,SAAS,EAAED,CAAY;AAAA,EAC5E,CAAC;AAED,QAAME,IAAkB,CAAA;AACxB,MAAIC,IAAiB,IACjBC,IAAc;AAClB,SAAAP,EAAO,QAAQ,CAACQ,MAAe;AAC9B,UAAMC,IAAOD,EAAM,UAAU,CAAC,GACxBE,IAAUT,EAAS,KAAK,CAACU,MAAWF,KAAQE,EAAE,SAASF,KAAQE,EAAE,GAAG,GACpEC,IAAaF,IAAUR,EAAW,IAAIQ,EAAQ,OAAO,IAAI;AAC/D,IAAIE,MAAeN,KACfC,OAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,GAC/ED,IAAiBM,GACjBL,IAAcC,EAAM,QAEpBD,KAAeC,EAAM;AAAA,EAEvB,CAAC,GACED,OAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,GACxEF,EAAM,KAAK;AAAA,CAAI;AACvB;AAEA,SAASQ,EAAmBC,GAAsC;AACjE,MAAIC,GAAUC;AACd,MAAI;AACH,IAAAD,IAAM,IAAIE,EAAS,SAASC,EAAaJ,CAAI,CAAC;AAAA,EAC/C,QAAa;AACZ,IAAAE,IAAMG,EAAKC,EAAYD,EAAKE,EAAA,GAAU,QAAQ,CAAC,GAAG,eAAe,GACjEC,EAAS,cAAcR,CAAI,6BAA6BE,CAAG,KAAK,EAAE,OAAO,UAAU,GACnFD,IAAM,IAAIE,EAAS,SAASC,EAAaF,CAAG,CAAC;AAAA,EAC9C,UAAA;AACC,IAAAD,EAAI,WAAW,KAAK,GACpBA,EAAI,aAAa,IAAK;AACtB,UAAMQ,IAAUR,EAAI,WAAA;AACpB,QAAG,MAAM,QAAQQ,CAAO,GAAG;AAC1B,YAAMC,IAAOD,EAAQ,CAAC,GAChBE,IAAQF,EAAQ,CAAC,GACjBG,IAAS,IAAI,aAAaF,EAAK,MAAM;AAC3C,eAASG,IAAI,GAAGA,IAAIH,EAAK,QAAQG,IAAK,CAAAD,EAAOC,CAAC,KAAKH,EAAKG,CAAC,IAAIF,EAAME,CAAC,KAAK;AACzE,aAAO,CAACX,KAAOF,GAAMY,CAAM;AAAA,IAC5B;AACA,WAAO,CAACV,KAAOF,GAAMS,CAAO;AAAA,EAC7B;AACD;AAEAK,GAAY,GAAG,WAAW,OAAO,EAAE,MAAAd,GAAM,SAAAJ,GAAS,OAAAmB,GAAO,UAAAC,GAAU,OAAApC,QAAY;AAC9E,MAAI;AACH,IAAIT,MAAiBA,IAAkB,MAAM8C,EAAS,gCAAgC,UAAUF,CAAK,IAAI,EAAC,WAAWC,GAAU,WAAW,IAAK;AAG/I,UAAM,CAACE,GAAGN,CAAM,IAAIb,EAAmBC,CAAI,GAGrCmB,IAAiBvB,KAAW,MAAMxB,EAAA,GAClC,CAACgD,GAAYjC,CAAQ,IAAI,MAAM,QAAQ,IAAI;AAAA,MAChDhB,EAAgByC,GAAQ,EAAC,mBAAmBhB,IAAU,SAAS,IAAM;AAAA,MACpE,CAACA,KAAW,CAAChB,KAAS,CAACuC,IAAkB,QAAQ,QAAA,IAAW1C,EAAeyC,GAAGF,GAAUpC,CAAK;AAAA,IAAA,CAC9F;AACD,IAAGoB,KAAQkB,KAAGG,EAAOH,GAAG,EAAE,WAAW,IAAM,OAAO,IAAM;AAGxD,UAAMI,IAAOF,EAAW,MAAM,KAAA,KAAU;AACxC,QAAG,CAACxB,EAAS,QAAOkB,GAAY,YAAY,EAAE,MAAAQ,GAAM;AACpD,QAAG,CAAC1C,EAAO,QAAOkC,GAAY,YAAY,EAAE,MAAAQ,GAAM,OAAO,8BAA8B;AACvF,QAAG,CAACH,EAAgB,QAAOL,GAAY,YAAY,EAAE,MAAAQ,GAAM,OAAO,mCAAmC;AAGrG,UAAMC,IAAWtC,EAAyBmC,EAAW,UAAU,CAAA,GAAIjC,KAAY,EAAE;AACjF,IAAA2B,GAAY,YAAY,EAAE,MAAMS,EAAA,CAAU;AAAA,EAC3C,SAASC,GAAU;AAClB,IAAAV,GAAY,YAAY,EAAE,OAAOU,EAAI,SAASA,EAAI,SAAS;AAAA,EAC5D;AACD,CAAC;"}
1
+ {"version":3,"file":"asr.mjs","sources":["../src/asr.ts"],"sourcesContent":["import { pipeline } from '@xenova/transformers';\nimport { parentPort } from 'worker_threads';\nimport { spawn } from 'node:child_process';\nimport { execSync } from 'node:child_process';\nimport { mkdtempSync, rmSync, readFileSync } from 'node:fs';\nimport { join } from 'node:path';\nimport { tmpdir } from 'node:os';\nimport wavefile from 'wavefile';\n\nlet whisperPipeline: any;\n\nexport async function canDiarization(): Promise<string | null> {\n\tconst checkPython = (cmd: string) => {\n\t\treturn new Promise<boolean>((resolve) => {\n\t\t\tconst proc = spawn(cmd, ['-c', 'import pyannote.audio']);\n\t\t\tproc.on('close', (code: number) => resolve(code === 0));\n\t\t\tproc.on('error', () => resolve(false));\n\t\t});\n\t};\n\tif(await checkPython('python3')) return 'python3';\n\tif(await checkPython('python')) return 'python';\n\treturn null;\n}\n\nasync function runDiarization(binary: string, audioPath: string, dir: string, token: string): Promise<any[]> {\n\tconst script = `\nimport sys\nimport json\nimport os\nfrom pyannote.audio import Pipeline\n\nos.environ['TORCH_HOME'] = r\"${dir}\"\npipeline = Pipeline.from_pretrained(\"pyannote/speaker-diarization-3.1\", token=\"${token}\")\noutput = pipeline(sys.argv[1])\n\nsegments = []\nfor turn, speaker in output.speaker_diarization:\n segments.append({\"start\": turn.start, \"end\": turn.end, \"speaker\": speaker})\n\nprint(json.dumps(segments))\n`;\n\n\treturn new Promise((resolve, reject) => {\n\t\tlet output = '';\n\t\tconst proc = spawn(binary, ['-c', script, audioPath]);\n\t\tproc.stdout.on('data', (data: Buffer) => output += data.toString());\n\t\tproc.stderr.on('data', (data: Buffer) => console.error(data.toString()));\n\t\tproc.on('close', (code: number) => {\n\t\t\tif(code === 0) {\n\t\t\t\ttry {\n\t\t\t\t\tresolve(JSON.parse(output));\n\t\t\t\t} catch (err) {\n\t\t\t\t\treject(new Error('Failed to parse diarization output'));\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\treject(new Error(`Python process exited with code ${code}`));\n\t\t\t}\n\t\t});\n\t\tproc.on('error', reject);\n\t});\n}\n\nfunction combineSpeakerTranscript(chunks: any[], speakers: any[]): string {\n\tconst speakerMap = new Map();\n\tlet speakerCount = 0;\n\tspeakers.forEach((seg: any) => {\n\t\tif(!speakerMap.has(seg.speaker)) speakerMap.set(seg.speaker, ++speakerCount);\n\t});\n\n\tconst lines: string[] = [];\n\tlet currentSpeaker = -1;\n\tlet currentText = '';\n\tchunks.forEach((chunk: any) => {\n\t\tconst time = chunk.timestamp[0];\n\t\tconst speaker = speakers.find((s: any) => time >= s.start && time <= s.end);\n\t\tconst speakerNum = speaker ? speakerMap.get(speaker.speaker) : 1;\n\t\tif (speakerNum !== currentSpeaker) {\n\t\t\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\t\t\tcurrentSpeaker = speakerNum;\n\t\t\tcurrentText = chunk.text;\n\t\t} else {\n\t\t\tcurrentText += chunk.text;\n\t\t}\n\t});\n\tif(currentText) lines.push(`[Speaker ${currentSpeaker}]: ${currentText.trim()}`);\n\treturn lines.join('\\n');\n}\n\nfunction prepareAudioBuffer(file: string): [string, Float32Array] {\n\tlet wav: any, tmp;\n\ttry {\n\t\twav = new wavefile.WaveFile(readFileSync(file));\n\t} catch(err) {\n\t\ttmp = join(mkdtempSync(join(tmpdir(), 'audio-')), 'converted.wav');\n\t\texecSync(`ffmpeg -i \"${file}\" -ar 16000 -ac 1 -f wav \"${tmp}\"`, { stdio: 'ignore' });\n\t\twav = new wavefile.WaveFile(readFileSync(tmp));\n\t} finally {\n\t\twav.toBitDepth('32f');\n\t\twav.toSampleRate(16000);\n\t\tconst samples = wav.getSamples();\n\t\tif(Array.isArray(samples)) {\n\t\t\tconst left = samples[0];\n\t\t\tconst right = samples[1];\n\t\t\tconst buffer = new Float32Array(left.length);\n\t\t\tfor (let i = 0; i < left.length; i++) buffer[i] = (left[i] + right[i]) / 2;\n\t\t\treturn [tmp || file, buffer];\n\t\t}\n\t\treturn [tmp || file, samples];\n\t}\n}\n\nparentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => {\n\ttry {\n\t\tif(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true});\n\n\t\t// Prepare audio file\n\t\tconst [f, buffer] = prepareAudioBuffer(file);\n\n\t\t// Fetch transcript and speakers\n\t\tconst hasDiarization = await canDiarization();\n\t\tconst [transcript, speakers] = await Promise.all([\n\t\t\twhisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false}),\n\t\t\t(!speaker || !token || !hasDiarization) ? Promise.resolve(): runDiarization(hasDiarization, f, modelDir, token),\n\t\t]);\n\t\tif(file != f) rmSync(f, { recursive: true, force: true });\n\n\t\t// Return any results / errors if no more processing required\n\t\tconst text = transcript.text?.trim() || null;\n\t\tif(!speaker) return parentPort?.postMessage({ text });\n\t\tif(!token) return parentPort?.postMessage({ text, error: 'HuggingFace token required' });\n\t\tif(!hasDiarization) return parentPort?.postMessage({ text, error: 'Speaker diarization unavailable' });\n\n\t\t// Combine transcript and speakers\n\t\tconst combined = combineSpeakerTranscript(transcript.chunks || [], speakers || []);\n\t\tparentPort?.postMessage({ text: combined });\n\t} catch (err: any) {\n\t\tparentPort?.postMessage({ error: err.stack || err.message });\n\t}\n});\n"],"names":["whisperPipeline","canDiarization","checkPython","cmd","resolve","proc","spawn","code","runDiarization","binary","audioPath","dir","token","script","reject","output","data","combineSpeakerTranscript","chunks","speakers","speakerMap","speakerCount","seg","lines","currentSpeaker","currentText","chunk","time","speaker","s","speakerNum","prepareAudioBuffer","file","wav","tmp","wavefile","readFileSync","join","mkdtempSync","tmpdir","execSync","samples","left","right","buffer","i","parentPort","model","modelDir","pipeline","f","hasDiarization","transcript","rmSync","text","combined","err"],"mappings":";;;;;;;AASA,IAAIA;AAEJ,eAAsBC,IAAyC;AAC9D,QAAMC,IAAc,CAACC,MACb,IAAI,QAAiB,CAACC,MAAY;AACxC,UAAMC,IAAOC,EAAMH,GAAK,CAAC,MAAM,uBAAuB,CAAC;AACvD,IAAAE,EAAK,GAAG,SAAS,CAACE,MAAiBH,EAAQG,MAAS,CAAC,CAAC,GACtDF,EAAK,GAAG,SAAS,MAAMD,EAAQ,EAAK,CAAC;AAAA,EACtC,CAAC;AAEF,SAAG,MAAMF,EAAY,SAAS,IAAU,YACrC,MAAMA,EAAY,QAAQ,IAAU,WAChC;AACR;AAEA,eAAeM,EAAeC,GAAgBC,GAAmBC,GAAaC,GAA+B;AAC5G,QAAMC,IAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,+BAMeF,CAAG;AAAA,iFAC+CC,CAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAUrF,SAAO,IAAI,QAAQ,CAACR,GAASU,MAAW;AACvC,QAAIC,IAAS;AACb,UAAMV,IAAOC,EAAMG,GAAQ,CAAC,MAAMI,GAAQH,CAAS,CAAC;AACpD,IAAAL,EAAK,OAAO,GAAG,QAAQ,CAACW,MAAiBD,KAAUC,EAAK,UAAU,GAClEX,EAAK,OAAO,GAAG,QAAQ,CAACW,MAAiB,QAAQ,MAAMA,EAAK,SAAA,CAAU,CAAC,GACvEX,EAAK,GAAG,SAAS,CAACE,MAAiB;AAClC,UAAGA,MAAS;AACX,YAAI;AACH,UAAAH,EAAQ,KAAK,MAAMW,CAAM,CAAC;AAAA,QAC3B,QAAc;AACb,UAAAD,EAAO,IAAI,MAAM,oCAAoC,CAAC;AAAA,QACvD;AAAA;AAEA,QAAAA,EAAO,IAAI,MAAM,mCAAmCP,CAAI,EAAE,CAAC;AAAA,IAE7D,CAAC,GACDF,EAAK,GAAG,SAASS,CAAM;AAAA,EACxB,CAAC;AACF;AAEA,SAASG,EAAyBC,GAAeC,GAAyB;AACzE,QAAMC,wBAAiB,IAAA;AACvB,MAAIC,IAAe;AACnB,EAAAF,EAAS,QAAQ,CAACG,MAAa;AAC9B,IAAIF,EAAW,IAAIE,EAAI,OAAO,KAAGF,EAAW,IAAIE,EAAI,SAAS,EAAED,CAAY;AAAA,EAC5E,CAAC;AAED,QAAME,IAAkB,CAAA;AACxB,MAAIC,IAAiB,IACjBC,IAAc;AAClB,SAAAP,EAAO,QAAQ,CAACQ,MAAe;AAC9B,UAAMC,IAAOD,EAAM,UAAU,CAAC,GACxBE,IAAUT,EAAS,KAAK,CAACU,MAAWF,KAAQE,EAAE,SAASF,KAAQE,EAAE,GAAG,GACpEC,IAAaF,IAAUR,EAAW,IAAIQ,EAAQ,OAAO,IAAI;AAC/D,IAAIE,MAAeN,KACfC,OAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,GAC/ED,IAAiBM,GACjBL,IAAcC,EAAM,QAEpBD,KAAeC,EAAM;AAAA,EAEvB,CAAC,GACED,OAAmB,KAAK,YAAYD,CAAc,MAAMC,EAAY,KAAA,CAAM,EAAE,GACxEF,EAAM,KAAK;AAAA,CAAI;AACvB;AAEA,SAASQ,EAAmBC,GAAsC;AACjE,MAAIC,GAAUC;AACd,MAAI;AACH,IAAAD,IAAM,IAAIE,EAAS,SAASC,EAAaJ,CAAI,CAAC;AAAA,EAC/C,QAAa;AACZ,IAAAE,IAAMG,EAAKC,EAAYD,EAAKE,EAAA,GAAU,QAAQ,CAAC,GAAG,eAAe,GACjEC,EAAS,cAAcR,CAAI,6BAA6BE,CAAG,KAAK,EAAE,OAAO,UAAU,GACnFD,IAAM,IAAIE,EAAS,SAASC,EAAaF,CAAG,CAAC;AAAA,EAC9C,UAAA;AACC,IAAAD,EAAI,WAAW,KAAK,GACpBA,EAAI,aAAa,IAAK;AACtB,UAAMQ,IAAUR,EAAI,WAAA;AACpB,QAAG,MAAM,QAAQQ,CAAO,GAAG;AAC1B,YAAMC,IAAOD,EAAQ,CAAC,GAChBE,IAAQF,EAAQ,CAAC,GACjBG,IAAS,IAAI,aAAaF,EAAK,MAAM;AAC3C,eAASG,IAAI,GAAGA,IAAIH,EAAK,QAAQG,IAAK,CAAAD,EAAOC,CAAC,KAAKH,EAAKG,CAAC,IAAIF,EAAME,CAAC,KAAK;AACzE,aAAO,CAACX,KAAOF,GAAMY,CAAM;AAAA,IAC5B;AACA,WAAO,CAACV,KAAOF,GAAMS,CAAO;AAAA,EAC7B;AACD;AAEAK,GAAY,GAAG,WAAW,OAAO,EAAE,MAAAd,GAAM,SAAAJ,GAAS,OAAAmB,GAAO,UAAAC,GAAU,OAAApC,QAAY;AAC9E,MAAI;AACH,IAAIZ,MAAiBA,IAAkB,MAAMiD,EAAS,gCAAgC,UAAUF,CAAK,IAAI,EAAC,WAAWC,GAAU,WAAW,IAAK;AAG/I,UAAM,CAACE,GAAGN,CAAM,IAAIb,EAAmBC,CAAI,GAGrCmB,IAAiB,MAAMlD,EAAA,GACvB,CAACmD,GAAYjC,CAAQ,IAAI,MAAM,QAAQ,IAAI;AAAA,MAChDnB,EAAgB4C,GAAQ,EAAC,mBAAmBhB,IAAU,SAAS,IAAM;AAAA,MACpE,CAACA,KAAW,CAAChB,KAAS,CAACuC,IAAkB,QAAQ,QAAA,IAAW3C,EAAe2C,GAAgBD,GAAGF,GAAUpC,CAAK;AAAA,IAAA,CAC9G;AACD,IAAGoB,KAAQkB,KAAGG,EAAOH,GAAG,EAAE,WAAW,IAAM,OAAO,IAAM;AAGxD,UAAMI,IAAOF,EAAW,MAAM,KAAA,KAAU;AACxC,QAAG,CAACxB,EAAS,QAAOkB,GAAY,YAAY,EAAE,MAAAQ,GAAM;AACpD,QAAG,CAAC1C,EAAO,QAAOkC,GAAY,YAAY,EAAE,MAAAQ,GAAM,OAAO,8BAA8B;AACvF,QAAG,CAACH,EAAgB,QAAOL,GAAY,YAAY,EAAE,MAAAQ,GAAM,OAAO,mCAAmC;AAGrG,UAAMC,IAAWtC,EAAyBmC,EAAW,UAAU,CAAA,GAAIjC,KAAY,EAAE;AACjF,IAAA2B,GAAY,YAAY,EAAE,MAAMS,EAAA,CAAU;AAAA,EAC3C,SAASC,GAAU;AAClB,IAAAV,GAAY,YAAY,EAAE,OAAOU,EAAI,SAASA,EAAI,SAAS;AAAA,EAC5D;AACD,CAAC;"}
package/dist/audio.d.ts CHANGED
@@ -1,5 +1,4 @@
1
1
  import { AbortablePromise, Ai } from './ai.ts';
2
- import { canDiarization } from './asr.ts';
3
2
  export declare class Audio {
4
3
  private ai;
5
4
  constructor(ai: Ai);
@@ -7,5 +6,5 @@ export declare class Audio {
7
6
  model?: string;
8
7
  speaker?: boolean | 'id';
9
8
  }): AbortablePromise<string | null>;
10
- canDiarization: typeof canDiarization;
9
+ canDiarization: () => Promise<boolean>;
11
10
  }
package/dist/index.js CHANGED
@@ -1,31 +1,31 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const R=require("node:os"),f=require("@ztimson/utils"),$=require("@anthropic-ai/sdk"),v=require("openai"),x=require("worker_threads"),S=require("url"),w=require("path"),j=require("./asr.js"),L=require("tesseract.js");require("./embedder.js");const N=require("cheerio"),T=require("@ztimson/node-utils");var y=typeof document<"u"?document.currentScript:null;function q(h){const r=Object.create(null,{[Symbol.toStringTag]:{value:"Module"}});if(h){for(const e in h)if(e!=="default"){const t=Object.getOwnPropertyDescriptor(h,e);Object.defineProperty(r,e,t.get?t:{enumerable:!0,get:()=>h[e]})}}return r.default=h,Object.freeze(r)}const C=q(R),D=q(N);class k{}class P extends k{constructor(r,e,t){super(),this.ai=r,this.apiToken=e,this.model=t,this.client=new $.Anthropic({apiKey:e})}client;toStandard(r){const e=Date.now(),t=[];for(let c of r)if(typeof c.content=="string")t.push({timestamp:e,...c});else{const s=c.content?.filter(n=>n.type=="text").map(n=>n.text).join(`
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const R=require("node:os"),f=require("@ztimson/utils"),$=require("@anthropic-ai/sdk"),v=require("openai"),x=require("worker_threads"),S=require("url"),w=require("path"),j=require("./asr.js"),L=require("tesseract.js");require("./embedder.js");const N=require("cheerio"),T=require("@ztimson/node-utils");var g=typeof document<"u"?document.currentScript:null;function q(h){const r=Object.create(null,{[Symbol.toStringTag]:{value:"Module"}});if(h){for(const e in h)if(e!=="default"){const t=Object.getOwnPropertyDescriptor(h,e);Object.defineProperty(r,e,t.get?t:{enumerable:!0,get:()=>h[e]})}}return r.default=h,Object.freeze(r)}const C=q(R),D=q(N);class k{}class P extends k{constructor(r,e,t){super(),this.ai=r,this.apiToken=e,this.model=t,this.client=new $.Anthropic({apiKey:e})}client;toStandard(r){const e=Date.now(),t=[];for(let c of r)if(typeof c.content=="string")t.push({timestamp:e,...c});else{const n=c.content?.filter(s=>s.type=="text").map(s=>s.text).join(`
2
2
 
3
- `);s&&t.push({timestamp:e,role:c.role,content:s}),c.content.forEach(n=>{if(n.type=="tool_use")t.push({timestamp:e,role:"tool",id:n.id,name:n.name,args:n.input,content:void 0});else if(n.type=="tool_result"){const l=t.findLast(o=>o.id==n.tool_use_id);l&&(l[n.is_error?"error":"content"]=n.content)}})}return t}fromStandard(r){for(let e=0;e<r.length;e++)if(r[e].role=="tool"){const t=r[e];r.splice(e,1,{role:"assistant",content:[{type:"tool_use",id:t.id,name:t.name,input:t.args}]},{role:"user",content:[{type:"tool_result",tool_use_id:t.id,is_error:!!t.error,content:t.error||t.content}]}),e++}return r.map(({timestamp:e,...t})=>t)}ask(r,e={}){const t=new AbortController;return Object.assign(new Promise(async c=>{let s=this.fromStandard([...e.history||[],{role:"user",content:r,timestamp:Date.now()}]);const n=e.tools||this.ai.options.llm?.tools||[],l={model:e.model||this.model,max_tokens:e.max_tokens||this.ai.options.llm?.max_tokens||4096,system:e.system||this.ai.options.llm?.system||"",temperature:e.temperature||this.ai.options.llm?.temperature||.7,tools:n.map(m=>({name:m.name,description:m.description,input_schema:{type:"object",properties:m.args?f.objectMap(m.args,(i,d)=>({...d,required:void 0})):{},required:m.args?Object.entries(m.args).filter(i=>i[1].required).map(i=>i[0]):[]},fn:void 0})),messages:s,stream:!!e.stream};let o,a=!0;do{if(o=await this.client.messages.create(l).catch(i=>{throw i.message+=`
3
+ `);n&&t.push({timestamp:e,role:c.role,content:n}),c.content.forEach(s=>{if(s.type=="tool_use")t.push({timestamp:e,role:"tool",id:s.id,name:s.name,args:s.input,content:void 0});else if(s.type=="tool_result"){const l=t.findLast(o=>o.id==s.tool_use_id);l&&(l[s.is_error?"error":"content"]=s.content)}})}return t}fromStandard(r){for(let e=0;e<r.length;e++)if(r[e].role=="tool"){const t=r[e];r.splice(e,1,{role:"assistant",content:[{type:"tool_use",id:t.id,name:t.name,input:t.args}]},{role:"user",content:[{type:"tool_result",tool_use_id:t.id,is_error:!!t.error,content:t.error||t.content}]}),e++}return r.map(({timestamp:e,...t})=>t)}ask(r,e={}){const t=new AbortController;return Object.assign(new Promise(async c=>{let n=this.fromStandard([...e.history||[],{role:"user",content:r,timestamp:Date.now()}]);const s=e.tools||this.ai.options.llm?.tools||[],l={model:e.model||this.model,max_tokens:e.max_tokens||this.ai.options.llm?.max_tokens||4096,system:e.system||this.ai.options.llm?.system||"",temperature:e.temperature||this.ai.options.llm?.temperature||.7,tools:s.map(m=>({name:m.name,description:m.description,input_schema:{type:"object",properties:m.args?f.objectMap(m.args,(i,d)=>({...d,required:void 0})):{},required:m.args?Object.entries(m.args).filter(i=>i[1].required).map(i=>i[0]):[]},fn:void 0})),messages:n,stream:!!e.stream};let o,a=!0;do{if(o=await this.client.messages.create(l).catch(i=>{throw i.message+=`
4
4
 
5
5
  Messages:
6
- ${JSON.stringify(s,null,2)}`,i}),e.stream){a?a=!1:e.stream({text:`
6
+ ${JSON.stringify(n,null,2)}`,i}),e.stream){a?a=!1:e.stream({text:`
7
7
 
8
- `}),o.content=[];for await(const i of o){if(t.signal.aborted)break;if(i.type==="content_block_start")i.content_block.type==="text"?o.content.push({type:"text",text:""}):i.content_block.type==="tool_use"&&o.content.push({type:"tool_use",id:i.content_block.id,name:i.content_block.name,input:""});else if(i.type==="content_block_delta")if(i.delta.type==="text_delta"){const d=i.delta.text;o.content.at(-1).text+=d,e.stream({text:d})}else i.delta.type==="input_json_delta"&&(o.content.at(-1).input+=i.delta.partial_json);else if(i.type==="content_block_stop"){const d=o.content.at(-1);d.input!=null&&(d.input=d.input?f.JSONAttemptParse(d.input,{}):{})}else if(i.type==="message_stop")break}}const m=o.content.filter(i=>i.type==="tool_use");if(m.length&&!t.signal.aborted){s.push({role:"assistant",content:o.content});const i=await Promise.all(m.map(async d=>{const p=n.find(f.findByProp("name",d.name));if(e.stream&&e.stream({tool:d.name}),!p)return{tool_use_id:d.id,is_error:!0,content:"Tool not found"};try{const u=await p.fn(d.input,e?.stream,this.ai);return{type:"tool_result",tool_use_id:d.id,content:f.JSONSanitize(u)}}catch(u){return{type:"tool_result",tool_use_id:d.id,is_error:!0,content:u?.message||u?.toString()||"Unknown"}}}));s.push({role:"user",content:i}),l.messages=s}}while(!t.signal.aborted&&o.content.some(m=>m.type==="tool_use"));s.push({role:"assistant",content:o.content.filter(m=>m.type=="text").map(m=>m.text).join(`
8
+ `}),o.content=[];for await(const i of o){if(t.signal.aborted)break;if(i.type==="content_block_start")i.content_block.type==="text"?o.content.push({type:"text",text:""}):i.content_block.type==="tool_use"&&o.content.push({type:"tool_use",id:i.content_block.id,name:i.content_block.name,input:""});else if(i.type==="content_block_delta")if(i.delta.type==="text_delta"){const d=i.delta.text;o.content.at(-1).text+=d,e.stream({text:d})}else i.delta.type==="input_json_delta"&&(o.content.at(-1).input+=i.delta.partial_json);else if(i.type==="content_block_stop"){const d=o.content.at(-1);d.input!=null&&(d.input=d.input?f.JSONAttemptParse(d.input,{}):{})}else if(i.type==="message_stop")break}}const m=o.content.filter(i=>i.type==="tool_use");if(m.length&&!t.signal.aborted){n.push({role:"assistant",content:o.content});const i=await Promise.all(m.map(async d=>{const p=s.find(f.findByProp("name",d.name));if(e.stream&&e.stream({tool:d.name}),!p)return{tool_use_id:d.id,is_error:!0,content:"Tool not found"};try{const u=await p.fn(d.input,e?.stream,this.ai);return{type:"tool_result",tool_use_id:d.id,content:f.JSONSanitize(u)}}catch(u){return{type:"tool_result",tool_use_id:d.id,is_error:!0,content:u?.message||u?.toString()||"Unknown"}}}));n.push({role:"user",content:i}),l.messages=n}}while(!t.signal.aborted&&o.content.some(m=>m.type==="tool_use"));n.push({role:"assistant",content:o.content.filter(m=>m.type=="text").map(m=>m.text).join(`
9
9
 
10
- `)}),s=this.toStandard(s),e.stream&&e.stream({done:!0}),e.history&&e.history.splice(0,e.history.length,...s),c(s.at(-1)?.content)}),{abort:()=>t.abort()})}}class _ extends k{constructor(r,e,t,c){super(),this.ai=r,this.host=e,this.token=t,this.model=c,this.client=new v.OpenAI(f.clean({baseURL:e,apiKey:t}))}client;toStandard(r){for(let e=0;e<r.length;e++){const t=r[e];if(t.role==="assistant"&&t.tool_calls){const c=t.tool_calls.map(s=>({role:"tool",id:s.id,name:s.function.name,args:f.JSONAttemptParse(s.function.arguments,{}),timestamp:t.timestamp}));r.splice(e,1,...c),e+=c.length-1}else if(t.role==="tool"&&t.content){const c=r.find(s=>t.tool_call_id==s.id);c&&(t.content.includes('"error":')?c.error=t.content:c.content=t.content),r.splice(e,1),e--}r[e]?.timestamp||(r[e].timestamp=Date.now())}return r}fromStandard(r){return r.reduce((e,t)=>{if(t.role==="tool")e.push({role:"assistant",content:null,tool_calls:[{id:t.id,type:"function",function:{name:t.name,arguments:JSON.stringify(t.args)}}],refusal:null,annotations:[]},{role:"tool",tool_call_id:t.id,content:t.error||t.content});else{const{timestamp:c,...s}=t;e.push(s)}return e},[])}ask(r,e={}){const t=new AbortController;return Object.assign(new Promise(async(c,s)=>{e.system&&e.history?.[0]?.role!="system"&&e.history?.splice(0,0,{role:"system",content:e.system,timestamp:Date.now()});let n=this.fromStandard([...e.history||[],{role:"user",content:r,timestamp:Date.now()}]);const l=e.tools||this.ai.options.llm?.tools||[],o={model:e.model||this.model,messages:n,stream:!!e.stream,max_tokens:e.max_tokens||this.ai.options.llm?.max_tokens||4096,temperature:e.temperature||this.ai.options.llm?.temperature||.7,tools:l.map(i=>({type:"function",function:{name:i.name,description:i.description,parameters:{type:"object",properties:i.args?f.objectMap(i.args,(d,p)=>({...p,required:void 0})):{},required:i.args?Object.entries(i.args).filter(d=>d[1].required).map(d=>d[0]):[]}}}))};let a,m=!0;do{if(a=await this.client.chat.completions.create(o).catch(d=>{throw d.message+=`
10
+ `)}),n=this.toStandard(n),e.stream&&e.stream({done:!0}),e.history&&e.history.splice(0,e.history.length,...n),c(n.at(-1)?.content)}),{abort:()=>t.abort()})}}class _ extends k{constructor(r,e,t,c){super(),this.ai=r,this.host=e,this.token=t,this.model=c,this.client=new v.OpenAI(f.clean({baseURL:e,apiKey:t}))}client;toStandard(r){for(let e=0;e<r.length;e++){const t=r[e];if(t.role==="assistant"&&t.tool_calls){const c=t.tool_calls.map(n=>({role:"tool",id:n.id,name:n.function.name,args:f.JSONAttemptParse(n.function.arguments,{}),timestamp:t.timestamp}));r.splice(e,1,...c),e+=c.length-1}else if(t.role==="tool"&&t.content){const c=r.find(n=>t.tool_call_id==n.id);c&&(t.content.includes('"error":')?c.error=t.content:c.content=t.content),r.splice(e,1),e--}r[e]?.timestamp||(r[e].timestamp=Date.now())}return r}fromStandard(r){return r.reduce((e,t)=>{if(t.role==="tool")e.push({role:"assistant",content:null,tool_calls:[{id:t.id,type:"function",function:{name:t.name,arguments:JSON.stringify(t.args)}}],refusal:null,annotations:[]},{role:"tool",tool_call_id:t.id,content:t.error||t.content});else{const{timestamp:c,...n}=t;e.push(n)}return e},[])}ask(r,e={}){const t=new AbortController;return Object.assign(new Promise(async(c,n)=>{e.system&&e.history?.[0]?.role!="system"&&e.history?.splice(0,0,{role:"system",content:e.system,timestamp:Date.now()});let s=this.fromStandard([...e.history||[],{role:"user",content:r,timestamp:Date.now()}]);const l=e.tools||this.ai.options.llm?.tools||[],o={model:e.model||this.model,messages:s,stream:!!e.stream,max_tokens:e.max_tokens||this.ai.options.llm?.max_tokens||4096,temperature:e.temperature||this.ai.options.llm?.temperature||.7,tools:l.map(i=>({type:"function",function:{name:i.name,description:i.description,parameters:{type:"object",properties:i.args?f.objectMap(i.args,(d,p)=>({...p,required:void 0})):{},required:i.args?Object.entries(i.args).filter(d=>d[1].required).map(d=>d[0]):[]}}}))};let a,m=!0;do{if(a=await this.client.chat.completions.create(o).catch(d=>{throw d.message+=`
11
11
 
12
12
  Messages:
13
- ${JSON.stringify(n,null,2)}`,d}),e.stream){m?m=!1:e.stream({text:`
13
+ ${JSON.stringify(s,null,2)}`,d}),e.stream){m?m=!1:e.stream({text:`
14
14
 
15
- `}),a.choices=[{message:{content:"",tool_calls:[]}}];for await(const d of a){if(t.signal.aborted)break;d.choices[0].delta.content&&(a.choices[0].message.content+=d.choices[0].delta.content,e.stream({text:d.choices[0].delta.content})),d.choices[0].delta.tool_calls&&(a.choices[0].message.tool_calls=d.choices[0].delta.tool_calls)}}const i=a.choices[0].message.tool_calls||[];if(i.length&&!t.signal.aborted){n.push(a.choices[0].message);const d=await Promise.all(i.map(async p=>{const u=l?.find(f.findByProp("name",p.function.name));if(e.stream&&e.stream({tool:p.function.name}),!u)return{role:"tool",tool_call_id:p.id,content:'{"error": "Tool not found"}'};try{const g=f.JSONAttemptParse(p.function.arguments,{}),b=await u.fn(g,e.stream,this.ai);return{role:"tool",tool_call_id:p.id,content:f.JSONSanitize(b)}}catch(g){return{role:"tool",tool_call_id:p.id,content:f.JSONSanitize({error:g?.message||g?.toString()||"Unknown"})}}}));n.push(...d),o.messages=n}}while(!t.signal.aborted&&a.choices?.[0]?.message?.tool_calls?.length);n.push({role:"assistant",content:a.choices[0].message.content||""}),n=this.toStandard(n),e.stream&&e.stream({done:!0}),e.history&&e.history.splice(0,e.history.length,...n),c(n.at(-1)?.content)}),{abort:()=>t.abort()})}}class J{constructor(r){this.ai=r,r.options.llm?.models&&Object.entries(r.options.llm.models).forEach(([e,t])=>{this.defaultModel||(this.defaultModel=e),t.proto=="anthropic"?this.models[e]=new P(this.ai,t.token,e):t.proto=="ollama"?this.models[e]=new _(this.ai,t.host,"not-needed",e):t.proto=="openai"&&(this.models[e]=new _(this.ai,t.host||null,t.token,e))})}defaultModel;models={};ask(r,e={}){const t=e.model||this.defaultModel;if(!this.models[t])throw new Error(`Model does not exist: ${t}`);let c=()=>{};return Object.assign(new Promise(async s=>{if(e.history||(e.history=[]),e.memory){e.system=(e.system||"")+`
15
+ `}),a.choices=[{message:{content:"",tool_calls:[]}}];for await(const d of a){if(t.signal.aborted)break;d.choices[0].delta.content&&(a.choices[0].message.content+=d.choices[0].delta.content,e.stream({text:d.choices[0].delta.content})),d.choices[0].delta.tool_calls&&(a.choices[0].message.tool_calls=d.choices[0].delta.tool_calls)}}const i=a.choices[0].message.tool_calls||[];if(i.length&&!t.signal.aborted){s.push(a.choices[0].message);const d=await Promise.all(i.map(async p=>{const u=l?.find(f.findByProp("name",p.function.name));if(e.stream&&e.stream({tool:p.function.name}),!u)return{role:"tool",tool_call_id:p.id,content:'{"error": "Tool not found"}'};try{const y=f.JSONAttemptParse(p.function.arguments,{}),b=await u.fn(y,e.stream,this.ai);return{role:"tool",tool_call_id:p.id,content:f.JSONSanitize(b)}}catch(y){return{role:"tool",tool_call_id:p.id,content:f.JSONSanitize({error:y?.message||y?.toString()||"Unknown"})}}}));s.push(...d),o.messages=s}}while(!t.signal.aborted&&a.choices?.[0]?.message?.tool_calls?.length);s.push({role:"assistant",content:a.choices[0].message.content||""}),s=this.toStandard(s),e.stream&&e.stream({done:!0}),e.history&&e.history.splice(0,e.history.length,...s),c(s.at(-1)?.content)}),{abort:()=>t.abort()})}}class J{constructor(r){this.ai=r,r.options.llm?.models&&Object.entries(r.options.llm.models).forEach(([e,t])=>{this.defaultModel||(this.defaultModel=e),t.proto=="anthropic"?this.models[e]=new P(this.ai,t.token,e):t.proto=="ollama"?this.models[e]=new _(this.ai,t.host,"not-needed",e):t.proto=="openai"&&(this.models[e]=new _(this.ai,t.host||null,t.token,e))})}defaultModel;models={};ask(r,e={}){const t=e.model||this.defaultModel;if(!this.models[t])throw new Error(`Model does not exist: ${t}`);let c=()=>{};return Object.assign(new Promise(async n=>{if(e.history||(e.history=[]),e.memory){e.system=(e.system||"")+`
16
16
  You have passive persistent memory never make any mention of your memory capabilities and what you can/cannot remember
17
- `;const l=async(a,m,i=50)=>{const[d,p]=await Promise.all([m?this.embedding(m):Promise.resolve(null),a?this.embedding(a):Promise.resolve(null)]);return(e.memory||[]).map(u=>({...u,score:d?this.cosineSimilarity(u.embeddings[0],d[0].embedding):1})).filter(u=>u.score>=.8).map(u=>({...u,score:p?this.cosineSimilarity(u.embeddings[1],p[0].embedding):u.score})).filter(u=>u.score>=.2).toSorted((u,g)=>u.score-g.score).slice(0,i)},o=await l(r);o.length&&e.history.push({role:"assistant",content:`Things I remembered:
17
+ `;const l=async(a,m,i=50)=>{const[d,p]=await Promise.all([m?this.embedding(m):Promise.resolve(null),a?this.embedding(a):Promise.resolve(null)]);return(e.memory||[]).map(u=>({...u,score:d?this.cosineSimilarity(u.embeddings[0],d[0].embedding):1})).filter(u=>u.score>=.8).map(u=>({...u,score:p?this.cosineSimilarity(u.embeddings[1],p[0].embedding):u.score})).filter(u=>u.score>=.2).toSorted((u,y)=>u.score-y.score).slice(0,i)},o=await l(r);o.length&&e.history.push({role:"assistant",content:`Things I remembered:
18
18
  `+o.map(a=>`${a.owner}: ${a.fact}`).join(`
19
- `)}),e.tools=[...e.tools||[],{name:"read_memory",description:"Check your long-term memory for more information",args:{subject:{type:"string",description:"Find information by a subject topic, can be used with or without query argument"},query:{type:"string",description:"Search memory based on a query, can be used with or without subject argument"},limit:{type:"number",description:"Result limit, default 5"}},fn:a=>{if(!a.subject&&!a.query)throw new Error("Either a subject or query argument is required");return l(a.query,a.subject,a.limit||5)}}]}const n=await this.models[t].ask(r,e);if(e.memory){const l=e.history?.findIndex(o=>o.role=="assistant"&&o.content.startsWith("Things I remembered:"));l!=null&&l>=0&&e.history?.splice(l,1)}if(e.compress||e.memory){let l=null;if(e.compress)l=await this.ai.language.compressHistory(e.history,e.compress.max,e.compress.min,e),e.history.splice(0,e.history.length,...l.history);else{const o=e.history?.findLastIndex(a=>a.role=="user")??-1;l=await this.ai.language.compressHistory(o!=-1?e.history.slice(o):e.history,0,0,e)}if(e.memory){const o=e.memory.filter(a=>!l.memory.some(m=>this.cosineSimilarity(a.embeddings[1],m.embeddings[1])>.8)).concat(l.memory);e.memory.splice(0,e.memory.length,...o)}}return s(n)}),{abort:c})}async compressHistory(r,e,t,c){if(this.estimateTokens(r)<e)return{history:r,memory:[]};let s=0,n=0;for(let u of r.toReversed())if(n+=this.estimateTokens(u.content),n<t)s++;else break;if(r.length<=s)return{history:r,memory:[]};const l=r[0].role=="system"?r[0]:null,o=s==0?[]:r.slice(-s),a=(s==0?r:r.slice(0,-s)).filter(u=>u.role==="assistant"||u.role==="user"),m=await this.json(a.map(u=>`${u.role}: ${u.content}`).join(`
19
+ `)}),e.tools=[...e.tools||[],{name:"read_memory",description:"Check your long-term memory for more information",args:{subject:{type:"string",description:"Find information by a subject topic, can be used with or without query argument"},query:{type:"string",description:"Search memory based on a query, can be used with or without subject argument"},limit:{type:"number",description:"Result limit, default 5"}},fn:a=>{if(!a.subject&&!a.query)throw new Error("Either a subject or query argument is required");return l(a.query,a.subject,a.limit||5)}}]}const s=await this.models[t].ask(r,e);if(e.memory){const l=e.history?.findIndex(o=>o.role=="assistant"&&o.content.startsWith("Things I remembered:"));l!=null&&l>=0&&e.history?.splice(l,1)}if(e.compress||e.memory){let l=null;if(e.compress)l=await this.ai.language.compressHistory(e.history,e.compress.max,e.compress.min,e),e.history.splice(0,e.history.length,...l.history);else{const o=e.history?.findLastIndex(a=>a.role=="user")??-1;l=await this.ai.language.compressHistory(o!=-1?e.history.slice(o):e.history,0,0,e)}if(e.memory){const o=e.memory.filter(a=>!l.memory.some(m=>this.cosineSimilarity(a.embeddings[1],m.embeddings[1])>.8)).concat(l.memory);e.memory.splice(0,e.memory.length,...o)}}return n(s)}),{abort:c})}async compressHistory(r,e,t,c){if(this.estimateTokens(r)<e)return{history:r,memory:[]};let n=0,s=0;for(let u of r.toReversed())if(s+=this.estimateTokens(u.content),s<t)n++;else break;if(r.length<=n)return{history:r,memory:[]};const l=r[0].role=="system"?r[0]:null,o=n==0?[]:r.slice(-n),a=(n==0?r:r.slice(0,-n)).filter(u=>u.role==="assistant"||u.role==="user"),m=await this.json(a.map(u=>`${u.role}: ${u.content}`).join(`
20
20
 
21
- `),"{summary: string, facts: [[subject, fact]]}",{system:"Create the smallest summary possible, no more than 500 tokens. Create a list of NEW facts (split by subject [pro]noun and fact) about what you learned from this conversation that you didn't already know or get from a tool call or system prompt. Focus only on new information about people, topics, or facts. Avoid generating facts about the AI.",model:c?.model,temperature:c?.temperature||.3}),i=new Date,d=await Promise.all((m?.facts||[])?.map(async([u,g])=>{const b=await Promise.all([this.embedding(u),this.embedding(`${u}: ${g}`)]);return{owner:u,fact:g,embeddings:[b[0][0].embedding,b[1][0].embedding],timestamp:i}})),p=[{role:"assistant",content:`Conversation Summary: ${m?.summary}`,timestamp:Date.now()},...o];return l&&p.splice(0,0,l),{history:p,memory:d}}cosineSimilarity(r,e){if(r.length!==e.length)throw new Error("Vectors must be same length");let t=0,c=0,s=0;for(let l=0;l<r.length;l++)t+=r[l]*e[l],c+=r[l]*r[l],s+=e[l]*e[l];const n=Math.sqrt(c)*Math.sqrt(s);return n===0?0:t/n}chunk(r,e=500,t=50){const c=(o,a="")=>o?Object.entries(o).flatMap(([m,i])=>{const d=a?`${a}${isNaN(+m)?`.${m}`:`[${m}]`}`:m;return typeof i=="object"&&!Array.isArray(i)?c(i,d):`${d}: ${Array.isArray(i)?i.join(", "):i}`}):[],n=(typeof r=="object"?c(r):r.split(`
21
+ `),"{summary: string, facts: [[subject, fact]]}",{system:"Create the smallest summary possible, no more than 500 tokens. Create a list of NEW facts (split by subject [pro]noun and fact) about what you learned from this conversation that you didn't already know or get from a tool call or system prompt. Focus only on new information about people, topics, or facts. Avoid generating facts about the AI.",model:c?.model,temperature:c?.temperature||.3}),i=new Date,d=await Promise.all((m?.facts||[])?.map(async([u,y])=>{const b=await Promise.all([this.embedding(u),this.embedding(`${u}: ${y}`)]);return{owner:u,fact:y,embeddings:[b[0][0].embedding,b[1][0].embedding],timestamp:i}})),p=[{role:"assistant",content:`Conversation Summary: ${m?.summary}`,timestamp:Date.now()},...o];return l&&p.splice(0,0,l),{history:p,memory:d}}cosineSimilarity(r,e){if(r.length!==e.length)throw new Error("Vectors must be same length");let t=0,c=0,n=0;for(let l=0;l<r.length;l++)t+=r[l]*e[l],c+=r[l]*r[l],n+=e[l]*e[l];const s=Math.sqrt(c)*Math.sqrt(n);return s===0?0:t/s}chunk(r,e=500,t=50){const c=(o,a="")=>o?Object.entries(o).flatMap(([m,i])=>{const d=a?`${a}${isNaN(+m)?`.${m}`:`[${m}]`}`:m;return typeof i=="object"&&!Array.isArray(i)?c(i,d):`${d}: ${Array.isArray(i)?i.join(", "):i}`}):[],s=(typeof r=="object"?c(r):r.split(`
22
22
  `)).flatMap(o=>[...o.split(/\s+/).filter(Boolean),`
23
- `]),l=[];for(let o=0;o<n.length;){let a="",m=o;for(;m<n.length;){const d=a+(a?" ":"")+n[m];if(this.estimateTokens(d.replace(/\s*\n\s*/g,`
23
+ `]),l=[];for(let o=0;o<s.length;){let a="",m=o;for(;m<s.length;){const d=a+(a?" ":"")+s[m];if(this.estimateTokens(d.replace(/\s*\n\s*/g,`
24
24
  `))>e&&a)break;a=d,m++}const i=a.replace(/\s*\n\s*/g,`
25
- `).trim();i&&l.push(i),o=Math.max(m-t,m===o?o+1:m)}return l}async embedding(r,e={}){let{maxTokens:t=500,overlapTokens:c=50}=e;const s=o=>new Promise((a,m)=>{const i=new x.Worker(w.join(w.dirname(S.fileURLToPath(typeof document>"u"?require("url").pathToFileURL(__filename).href:y&&y.tagName.toUpperCase()==="SCRIPT"&&y.src||new URL("index.js",document.baseURI).href)),"embedder.js")),d=({embedding:u})=>{i.terminate(),a(u)},p=u=>{i.terminate(),m(u)};i.on("message",d),i.on("error",p),i.on("exit",u=>{u!==0&&m(new Error(`Worker exited with code ${u}`))}),i.postMessage({text:o,model:this.ai.options?.embedder||"bge-small-en-v1.5",modelDir:this.ai.options.path})}),n=this.chunk(r,t,c),l=[];for(let o=0;o<n.length;o++){const a=n[o],m=await s(a);l.push({index:o,embedding:m,text:a,tokens:this.estimateTokens(a)})}return l}estimateTokens(r){const e=JSON.stringify(r);return Math.ceil(e.length/4*1.2)}fuzzyMatch(r,...e){if(e.length<2)throw new Error("Requires at least 2 strings to compare");const t=(n,l=10)=>n.toLowerCase().split("").map((o,a)=>o.charCodeAt(0)*(a+1)%l/l).slice(0,l),c=t(r),s=e.map(n=>t(n)).map(n=>this.cosineSimilarity(c,n));return{avg:s.reduce((n,l)=>n+l,0)/s.length,max:Math.max(...s),similarities:s}}async json(r,e,t){let c=await this.ask(r,{...t,system:(t?.system?`${t.system}
25
+ `).trim();i&&l.push(i),o=Math.max(m-t,m===o?o+1:m)}return l}async embedding(r,e={}){let{maxTokens:t=500,overlapTokens:c=50}=e;const n=o=>new Promise((a,m)=>{const i=new x.Worker(w.join(w.dirname(S.fileURLToPath(typeof document>"u"?require("url").pathToFileURL(__filename).href:g&&g.tagName.toUpperCase()==="SCRIPT"&&g.src||new URL("index.js",document.baseURI).href)),"embedder.js")),d=({embedding:u})=>{i.terminate(),a(u)},p=u=>{i.terminate(),m(u)};i.on("message",d),i.on("error",p),i.on("exit",u=>{u!==0&&m(new Error(`Worker exited with code ${u}`))}),i.postMessage({text:o,model:this.ai.options?.embedder||"bge-small-en-v1.5",modelDir:this.ai.options.path})}),s=this.chunk(r,t,c),l=[];for(let o=0;o<s.length;o++){const a=s[o],m=await n(a);l.push({index:o,embedding:m,text:a,tokens:this.estimateTokens(a)})}return l}estimateTokens(r){const e=JSON.stringify(r);return Math.ceil(e.length/4*1.2)}fuzzyMatch(r,...e){if(e.length<2)throw new Error("Requires at least 2 strings to compare");const t=(s,l=10)=>s.toLowerCase().split("").map((o,a)=>o.charCodeAt(0)*(a+1)%l/l).slice(0,l),c=t(r),n=e.map(s=>t(s)).map(s=>this.cosineSimilarity(c,s));return{avg:n.reduce((s,l)=>s+l,0)/n.length,max:Math.max(...n),similarities:n}}async json(r,e,t){let c=await this.ask(r,{...t,system:(t?.system?`${t.system}
26
26
  `:"")+`Only respond using a JSON code block matching this schema:
27
27
  \`\`\`json
28
28
  ${e}
29
- \`\`\``});if(!c)return{};const s=/```(?:.+)?\s*([\s\S]*?)```/.exec(c),n=s?s[1].trim():c;return f.JSONAttemptParse(n,{})}summarize(r,e,t){return this.ask(r,{system:`Generate a brief summary <= ${e} tokens. Output nothing else`,temperature:.3,...t})}}class O{constructor(r){this.ai=r}asr(r,e={}){const{model:t=this.ai.options.asr||"whisper-base",speaker:c=!1}=e;let s=!1;const n=()=>{s=!0};let l=new Promise((o,a)=>{const m=new x.Worker(w.join(w.dirname(S.fileURLToPath(typeof document>"u"?require("url").pathToFileURL(__filename).href:y&&y.tagName.toUpperCase()==="SCRIPT"&&y.src||new URL("index.js",document.baseURI).href)),"asr.js")),i=({text:p,warning:u,error:g})=>{m.terminate(),!s&&(g?a(new Error(g)):(u&&console.warn(u),o(p)))},d=p=>{m.terminate(),s||a(p)};m.on("message",i),m.on("error",d),m.on("exit",p=>{p!==0&&!s&&a(new Error(`Worker exited with code ${p}`))}),m.postMessage({file:r,model:t,speaker:c,modelDir:this.ai.options.path,token:this.ai.options.hfToken})});if(e.speaker=="id"){if(!this.ai.language.defaultModel)throw new Error("Configure an LLM for advanced ASR speaker detection");l=l.then(async o=>{if(!o)return o;let a=this.ai.language.chunk(o,500,0);a.length>4&&(a=[...a.slice(0,3),a.at(-1)]);const m=await this.ai.language.json(a.join(`
30
- `),'{1: "Detected Name"}',{system:"Use this following transcript to identify speakers. Only identify speakers you are sure about",temperature:.1});return Object.entries(m).forEach(([i,d])=>{o=o.replaceAll(`[Speaker ${i}]`,`[${d}]`)}),o})}return Object.assign(l,{abort:n})}canDiarization=j.canDiarization}class M{constructor(r){this.ai=r}ocr(r){let e;const t=new Promise(async c=>{e=await L.createWorker(this.ai.options.ocr||"eng",2,{cachePath:this.ai.options.path});const{data:s}=await e.recognize(r);await e.terminate(),c(s.text.trim()||null)});return Object.assign(t,{abort:()=>e?.terminate()})}}class W{constructor(r){this.options=r,r.path||(r.path=C.tmpdir()),process.env.TRANSFORMERS_CACHE=r.path,this.audio=new O(this),this.language=new J(this),this.vision=new M(this)}audio;language;vision}const A={name:"cli",description:"Use the command line interface, returns any output",args:{command:{type:"string",description:"Command to run",required:!0}},fn:h=>T.$`${h.command}`},z={name:"get_datetime",description:"Get current UTC date / time",args:{},fn:async()=>new Date().toUTCString()},I={name:"exec",description:"Run code/scripts",args:{language:{type:"string",description:"Execution language",enum:["cli","node","python"],required:!0},code:{type:"string",description:"Code to execute",required:!0}},fn:async(h,r,e)=>{try{switch(h.type){case"bash":return await A.fn({command:h.code},r,e);case"node":return await U.fn({code:h.code},r,e);case"python":return await E.fn({code:h.code},r,e)}}catch(t){return{error:t?.message||t.toString()}}}},F={name:"fetch",description:"Make HTTP request to URL",args:{url:{type:"string",description:"URL to fetch",required:!0},method:{type:"string",description:"HTTP method to use",enum:["GET","POST","PUT","DELETE"],default:"GET"},headers:{type:"object",description:"HTTP headers to send",default:{}},body:{type:"object",description:"HTTP body to send"}},fn:h=>new f.Http({url:h.url,headers:h.headers}).request({method:h.method||"GET",body:h.body})},U={name:"exec_javascript",description:"Execute commonjs javascript",args:{code:{type:"string",description:"CommonJS javascript",required:!0}},fn:async h=>{const r=f.consoleInterceptor(null),e=await f.fn({console:r},h.code,!0).catch(t=>r.output.error.push(t));return{...r.output,return:e,stdout:void 0,stderr:void 0}}},E={name:"exec_javascript",description:"Execute commonjs javascript",args:{code:{type:"string",description:"CommonJS javascript",required:!0}},fn:async h=>({result:T.$Sync`python -c "${h.code}"`})},H={name:"read_webpage",description:"Extract clean, structured content from a webpage. Use after web_search to read specific URLs",args:{url:{type:"string",description:"URL to extract content from",required:!0},focus:{type:"string",description:'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")'}},fn:async h=>{const r=await fetch(h.url,{headers:{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}}).then(n=>n.text()).catch(n=>{throw new Error(`Failed to fetch: ${n.message}`)}),e=D.load(r);e('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove();const t={title:e('meta[property="og:title"]').attr("content")||e("title").text()||"",description:e('meta[name="description"]').attr("content")||e('meta[property="og:description"]').attr("content")||""};let c="";const s=["article","main",'[role="main"]',".content",".post",".entry","body"];for(const n of s){const l=e(n).first();if(l.length&&l.text().trim().length>200){c=l.text();break}}return c||(c=e("body").text()),c=c.replace(/\s+/g," ").trim().slice(0,8e3),{url:h.url,title:t.title.trim(),description:t.description.trim(),content:c,focus:h.focus}}},B={name:"web_search",description:"Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool",args:{query:{type:"string",description:"Search string",required:!0},length:{type:"string",description:"Number of results to return",default:5}},fn:async h=>{const r=await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(h.query)}`,{headers:{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","Accept-Language":"en-US,en;q=0.9"}}).then(s=>s.text());let e,t=/<a .*?href="(.+?)".+?<\/a>/g;const c=new f.ASet;for(;(e=t.exec(r))!==null;){let s=/uddg=(.+)&amp?/.exec(decodeURIComponent(e[1]))?.[1];if(s&&(s=decodeURIComponent(s)),s&&c.add(s),c.size>=(h.length||5))break}return c}};exports.canDiarization=j.canDiarization;exports.Ai=W;exports.Anthropic=P;exports.Audio=O;exports.CliTool=A;exports.DateTimeTool=z;exports.ExecTool=I;exports.FetchTool=F;exports.JSTool=U;exports.LLMProvider=k;exports.OpenAi=_;exports.PythonTool=E;exports.ReadWebpageTool=H;exports.Vision=M;exports.WebSearchTool=B;
29
+ \`\`\``});if(!c)return{};const n=/```(?:.+)?\s*([\s\S]*?)```/.exec(c),s=n?n[1].trim():c;return f.JSONAttemptParse(s,{})}summarize(r,e,t){return this.ask(r,{system:`Generate a brief summary <= ${e} tokens. Output nothing else`,temperature:.3,...t})}}class O{constructor(r){this.ai=r}asr(r,e={}){const{model:t=this.ai.options.asr||"whisper-base",speaker:c=!1}=e;let n=!1;const s=()=>{n=!0};let l=new Promise((o,a)=>{const m=new x.Worker(w.join(w.dirname(S.fileURLToPath(typeof document>"u"?require("url").pathToFileURL(__filename).href:g&&g.tagName.toUpperCase()==="SCRIPT"&&g.src||new URL("index.js",document.baseURI).href)),"asr.js")),i=({text:p,warning:u,error:y})=>{m.terminate(),!n&&(y?a(new Error(y)):(u&&console.warn(u),o(p)))},d=p=>{m.terminate(),n||a(p)};m.on("message",i),m.on("error",d),m.on("exit",p=>{p!==0&&!n&&a(new Error(`Worker exited with code ${p}`))}),m.postMessage({file:r,model:t,speaker:c,modelDir:this.ai.options.path,token:this.ai.options.hfToken})});if(e.speaker=="id"){if(!this.ai.language.defaultModel)throw new Error("Configure an LLM for advanced ASR speaker detection");l=l.then(async o=>{if(!o)return o;let a=this.ai.language.chunk(o,500,0);a.length>4&&(a=[...a.slice(0,3),a.at(-1)]);const m=await this.ai.language.json(a.join(`
30
+ `),'{1: "Detected Name", 2: "Second Name"}',{system:"Use the following transcript to identify speakers. Only identify speakers you are positive about, dont mention speakers you are unsure about in your response",temperature:.1});return Object.entries(m).forEach(([i,d])=>{o=o.replaceAll(`[Speaker ${i}]`,`[${d}]`)}),o})}return Object.assign(l,{abort:s})}canDiarization=()=>j.canDiarization().then(r=>!!r)}class M{constructor(r){this.ai=r}ocr(r){let e;const t=new Promise(async c=>{e=await L.createWorker(this.ai.options.ocr||"eng",2,{cachePath:this.ai.options.path});const{data:n}=await e.recognize(r);await e.terminate(),c(n.text.trim()||null)});return Object.assign(t,{abort:()=>e?.terminate()})}}class W{constructor(r){this.options=r,r.path||(r.path=C.tmpdir()),process.env.TRANSFORMERS_CACHE=r.path,this.audio=new O(this),this.language=new J(this),this.vision=new M(this)}audio;language;vision}const A={name:"cli",description:"Use the command line interface, returns any output",args:{command:{type:"string",description:"Command to run",required:!0}},fn:h=>T.$`${h.command}`},z={name:"get_datetime",description:"Get current UTC date / time",args:{},fn:async()=>new Date().toUTCString()},I={name:"exec",description:"Run code/scripts",args:{language:{type:"string",description:"Execution language",enum:["cli","node","python"],required:!0},code:{type:"string",description:"Code to execute",required:!0}},fn:async(h,r,e)=>{try{switch(h.type){case"bash":return await A.fn({command:h.code},r,e);case"node":return await U.fn({code:h.code},r,e);case"python":return await E.fn({code:h.code},r,e)}}catch(t){return{error:t?.message||t.toString()}}}},F={name:"fetch",description:"Make HTTP request to URL",args:{url:{type:"string",description:"URL to fetch",required:!0},method:{type:"string",description:"HTTP method to use",enum:["GET","POST","PUT","DELETE"],default:"GET"},headers:{type:"object",description:"HTTP headers to send",default:{}},body:{type:"object",description:"HTTP body to send"}},fn:h=>new f.Http({url:h.url,headers:h.headers}).request({method:h.method||"GET",body:h.body})},U={name:"exec_javascript",description:"Execute commonjs javascript",args:{code:{type:"string",description:"CommonJS javascript",required:!0}},fn:async h=>{const r=f.consoleInterceptor(null),e=await f.fn({console:r},h.code,!0).catch(t=>r.output.error.push(t));return{...r.output,return:e,stdout:void 0,stderr:void 0}}},E={name:"exec_javascript",description:"Execute commonjs javascript",args:{code:{type:"string",description:"CommonJS javascript",required:!0}},fn:async h=>({result:T.$Sync`python -c "${h.code}"`})},H={name:"read_webpage",description:"Extract clean, structured content from a webpage. Use after web_search to read specific URLs",args:{url:{type:"string",description:"URL to extract content from",required:!0},focus:{type:"string",description:'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")'}},fn:async h=>{const r=await fetch(h.url,{headers:{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}}).then(s=>s.text()).catch(s=>{throw new Error(`Failed to fetch: ${s.message}`)}),e=D.load(r);e('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove();const t={title:e('meta[property="og:title"]').attr("content")||e("title").text()||"",description:e('meta[name="description"]').attr("content")||e('meta[property="og:description"]').attr("content")||""};let c="";const n=["article","main",'[role="main"]',".content",".post",".entry","body"];for(const s of n){const l=e(s).first();if(l.length&&l.text().trim().length>200){c=l.text();break}}return c||(c=e("body").text()),c=c.replace(/\s+/g," ").trim().slice(0,8e3),{url:h.url,title:t.title.trim(),description:t.description.trim(),content:c,focus:h.focus}}},B={name:"web_search",description:"Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool",args:{query:{type:"string",description:"Search string",required:!0},length:{type:"string",description:"Number of results to return",default:5}},fn:async h=>{const r=await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(h.query)}`,{headers:{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","Accept-Language":"en-US,en;q=0.9"}}).then(n=>n.text());let e,t=/<a .*?href="(.+?)".+?<\/a>/g;const c=new f.ASet;for(;(e=t.exec(r))!==null;){let n=/uddg=(.+)&amp?/.exec(decodeURIComponent(e[1]))?.[1];if(n&&(n=decodeURIComponent(n)),n&&c.add(n),c.size>=(h.length||5))break}return c}};exports.canDiarization=j.canDiarization;exports.Ai=W;exports.Anthropic=P;exports.Audio=O;exports.CliTool=A;exports.DateTimeTool=z;exports.ExecTool=I;exports.FetchTool=F;exports.JSTool=U;exports.LLMProvider=k;exports.OpenAi=_;exports.PythonTool=E;exports.ReadWebpageTool=H;exports.Vision=M;exports.WebSearchTool=B;
31
31
  //# sourceMappingURL=index.js.map