@exulu/backend 1.60.0 → 1.61.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/backend.cjs +60 -0
- package/dist/{catalog-EOKGOHTY.js → catalog-BWE6SLE2.js} +1 -1
- package/dist/chunk-IDHS2BZO.js +210 -0
- package/dist/{chunk-YS27XOXI.js → chunk-ILAHW4UT.js} +5 -1
- package/dist/{chunk-23YNGK3V.js → chunk-MPV7HBV6.js} +63 -2
- package/dist/cli/start-whisper.cjs +240 -0
- package/dist/cli/start-whisper.d.cts +1 -0
- package/dist/cli/start-whisper.d.ts +1 -0
- package/dist/cli/start-whisper.js +204 -0
- package/dist/{convert-exulu-tools-to-ai-sdk-tools-PLLM2CJL.js → convert-exulu-tools-to-ai-sdk-tools-CULC37U6.js} +1 -1
- package/dist/index.cjs +1827 -346
- package/dist/index.d.cts +2 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +1447 -249
- package/ee/python/requirements.txt +18 -0
- package/ee/python/setup.sh +44 -0
- package/ee/python/transcription/__init__.py +0 -0
- package/ee/python/transcription/pipeline.py +232 -0
- package/ee/python/transcription/server.py +151 -0
- package/ee/python/transcription/tests/__init__.py +0 -0
- package/ee/python/transcription/tests/test_server.py +111 -0
- package/ee/python/transcription/worker.py +135 -0
- package/package.json +5 -2
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Single-consumer asyncio worker queue.
|
|
3
|
+
Pulls one job at a time from the queue, downloads audio, runs the pipeline.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import os
|
|
8
|
+
import tempfile
|
|
9
|
+
import time
|
|
10
|
+
import uuid
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import requests
|
|
15
|
+
|
|
16
|
+
from pipeline import (
|
|
17
|
+
CancelledError,
|
|
18
|
+
TranscriptionOptions,
|
|
19
|
+
TranscriptionPipeline,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Job:
|
|
25
|
+
job_id: str
|
|
26
|
+
audio_url: str
|
|
27
|
+
options: TranscriptionOptions
|
|
28
|
+
status: str = "queued" # queued|running|completed|failed|cancelled
|
|
29
|
+
segments: Optional[list] = None
|
|
30
|
+
language: Optional[str] = None
|
|
31
|
+
duration_seconds: Optional[float] = None
|
|
32
|
+
error: Optional[str] = None
|
|
33
|
+
started_at: Optional[float] = None
|
|
34
|
+
finished_at: Optional[float] = None
|
|
35
|
+
cancel_requested: bool = False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _download_sync(url: str) -> str:
|
|
39
|
+
"""Download a presigned URL to a temp file. Returns the local path."""
|
|
40
|
+
fd, path = tempfile.mkstemp(prefix="exulu-whisper-", suffix=".audio")
|
|
41
|
+
os.close(fd)
|
|
42
|
+
with requests.get(url, stream=True, timeout=300) as resp:
|
|
43
|
+
resp.raise_for_status()
|
|
44
|
+
with open(path, "wb") as f:
|
|
45
|
+
for chunk in resp.iter_content(chunk_size=1024 * 1024):
|
|
46
|
+
if chunk:
|
|
47
|
+
f.write(chunk)
|
|
48
|
+
return path
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class TranscriptionWorker:
|
|
52
|
+
def __init__(self, pipeline: TranscriptionPipeline):
|
|
53
|
+
self.pipeline = pipeline
|
|
54
|
+
self.queue: asyncio.Queue[str] = asyncio.Queue()
|
|
55
|
+
self.jobs: dict[str, Job] = {}
|
|
56
|
+
self._task: Optional[asyncio.Task] = None
|
|
57
|
+
|
|
58
|
+
def submit(self, audio_url: str, options: TranscriptionOptions) -> str:
|
|
59
|
+
job_id = str(uuid.uuid4())
|
|
60
|
+
self.jobs[job_id] = Job(job_id=job_id, audio_url=audio_url, options=options)
|
|
61
|
+
self.queue.put_nowait(job_id)
|
|
62
|
+
return job_id
|
|
63
|
+
|
|
64
|
+
def get(self, job_id: str) -> Optional[Job]:
|
|
65
|
+
return self.jobs.get(job_id)
|
|
66
|
+
|
|
67
|
+
def list_jobs(self) -> list[Job]:
|
|
68
|
+
return list(self.jobs.values())
|
|
69
|
+
|
|
70
|
+
def cancel(self, job_id: str) -> bool:
|
|
71
|
+
job = self.jobs.get(job_id)
|
|
72
|
+
if not job:
|
|
73
|
+
return False
|
|
74
|
+
if job.status == "queued":
|
|
75
|
+
job.status = "cancelled"
|
|
76
|
+
job.finished_at = time.time()
|
|
77
|
+
return True
|
|
78
|
+
if job.status == "running":
|
|
79
|
+
job.cancel_requested = True
|
|
80
|
+
return True
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
def start(self) -> None:
|
|
84
|
+
loop = asyncio.get_event_loop()
|
|
85
|
+
self._task = loop.create_task(self._run_loop())
|
|
86
|
+
|
|
87
|
+
async def _run_loop(self) -> None:
|
|
88
|
+
while True:
|
|
89
|
+
job_id = await self.queue.get()
|
|
90
|
+
job = self.jobs.get(job_id)
|
|
91
|
+
if not job:
|
|
92
|
+
continue
|
|
93
|
+
if job.status == "cancelled":
|
|
94
|
+
# Cancelled while queued — skip.
|
|
95
|
+
continue
|
|
96
|
+
await self._process(job)
|
|
97
|
+
|
|
98
|
+
async def _process(self, job: Job) -> None:
|
|
99
|
+
job.status = "running"
|
|
100
|
+
job.started_at = time.time()
|
|
101
|
+
|
|
102
|
+
def on_audio_loaded(duration: float) -> None:
|
|
103
|
+
# Surfacing duration before transcription completes lets the
|
|
104
|
+
# backend show the user the length of the audio while the slow
|
|
105
|
+
# part is still running.
|
|
106
|
+
job.duration_seconds = duration
|
|
107
|
+
|
|
108
|
+
def work():
|
|
109
|
+
audio_path = _download_sync(job.audio_url)
|
|
110
|
+
try:
|
|
111
|
+
return self.pipeline.transcribe(
|
|
112
|
+
audio_path,
|
|
113
|
+
job.options,
|
|
114
|
+
is_cancelled=lambda: job.cancel_requested,
|
|
115
|
+
on_audio_loaded=on_audio_loaded,
|
|
116
|
+
)
|
|
117
|
+
finally:
|
|
118
|
+
try:
|
|
119
|
+
os.unlink(audio_path)
|
|
120
|
+
except OSError:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
result = await asyncio.to_thread(work)
|
|
125
|
+
job.segments = result.segments
|
|
126
|
+
job.language = result.language
|
|
127
|
+
job.duration_seconds = result.duration_seconds
|
|
128
|
+
job.status = "completed"
|
|
129
|
+
except CancelledError:
|
|
130
|
+
job.status = "cancelled"
|
|
131
|
+
except Exception as e:
|
|
132
|
+
job.status = "failed"
|
|
133
|
+
job.error = f"{type(e).__name__}: {e}"
|
|
134
|
+
finally:
|
|
135
|
+
job.finished_at = time.time()
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exulu/backend",
|
|
3
3
|
"author": "Qventu Bv.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.61.1",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"private": false,
|
|
7
7
|
"publishConfig": {
|
|
@@ -10,7 +10,9 @@
|
|
|
10
10
|
"module": "./dist/index.mjs",
|
|
11
11
|
"types": "./dist/index.d.ts",
|
|
12
12
|
"bin": {
|
|
13
|
-
"
|
|
13
|
+
"backend": "./bin/backend.cjs",
|
|
14
|
+
"setup-python": "./bin/setup-python.cjs",
|
|
15
|
+
"exulu-start-whisper": "./dist/cli/start-whisper.cjs"
|
|
14
16
|
},
|
|
15
17
|
"homepage": "https://exulu.com",
|
|
16
18
|
"engines": {
|
|
@@ -43,6 +45,7 @@
|
|
|
43
45
|
"cpd": "jscpd src --reporters console",
|
|
44
46
|
"cpd:report": "jscpd src --reporters console,html",
|
|
45
47
|
"cpd:view": "serve cpd-report/html -l 3001",
|
|
48
|
+
"start:whisper": "node ./dist/cli/start-whisper.cjs",
|
|
46
49
|
"python:setup": "cd ee/python && ./setup.sh",
|
|
47
50
|
"python:install": "source ee/python/.venv/bin/activate && pip install -r ee/python/requirements.txt",
|
|
48
51
|
"python:validate": "source ee/python/.venv/bin/activate && python -c 'import docling; import transformers; print(\"✓ Python environment is working correctly\")'",
|