@exulu/backend 1.60.0 → 1.61.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,135 @@
1
+ """
2
+ Single-consumer asyncio worker queue.
3
+ Pulls one job at a time from the queue, downloads audio, runs the pipeline.
4
+ """
5
+
6
+ import asyncio
7
+ import os
8
+ import tempfile
9
+ import time
10
+ import uuid
11
+ from dataclasses import dataclass, field
12
+ from typing import Optional
13
+
14
+ import requests
15
+
16
+ from pipeline import (
17
+ CancelledError,
18
+ TranscriptionOptions,
19
+ TranscriptionPipeline,
20
+ )
21
+
22
+
23
+ @dataclass
24
+ class Job:
25
+ job_id: str
26
+ audio_url: str
27
+ options: TranscriptionOptions
28
+ status: str = "queued" # queued|running|completed|failed|cancelled
29
+ segments: Optional[list] = None
30
+ language: Optional[str] = None
31
+ duration_seconds: Optional[float] = None
32
+ error: Optional[str] = None
33
+ started_at: Optional[float] = None
34
+ finished_at: Optional[float] = None
35
+ cancel_requested: bool = False
36
+
37
+
38
+ def _download_sync(url: str) -> str:
39
+ """Download a presigned URL to a temp file. Returns the local path."""
40
+ fd, path = tempfile.mkstemp(prefix="exulu-whisper-", suffix=".audio")
41
+ os.close(fd)
42
+ with requests.get(url, stream=True, timeout=300) as resp:
43
+ resp.raise_for_status()
44
+ with open(path, "wb") as f:
45
+ for chunk in resp.iter_content(chunk_size=1024 * 1024):
46
+ if chunk:
47
+ f.write(chunk)
48
+ return path
49
+
50
+
51
+ class TranscriptionWorker:
52
+ def __init__(self, pipeline: TranscriptionPipeline):
53
+ self.pipeline = pipeline
54
+ self.queue: asyncio.Queue[str] = asyncio.Queue()
55
+ self.jobs: dict[str, Job] = {}
56
+ self._task: Optional[asyncio.Task] = None
57
+
58
+ def submit(self, audio_url: str, options: TranscriptionOptions) -> str:
59
+ job_id = str(uuid.uuid4())
60
+ self.jobs[job_id] = Job(job_id=job_id, audio_url=audio_url, options=options)
61
+ self.queue.put_nowait(job_id)
62
+ return job_id
63
+
64
+ def get(self, job_id: str) -> Optional[Job]:
65
+ return self.jobs.get(job_id)
66
+
67
+ def list_jobs(self) -> list[Job]:
68
+ return list(self.jobs.values())
69
+
70
+ def cancel(self, job_id: str) -> bool:
71
+ job = self.jobs.get(job_id)
72
+ if not job:
73
+ return False
74
+ if job.status == "queued":
75
+ job.status = "cancelled"
76
+ job.finished_at = time.time()
77
+ return True
78
+ if job.status == "running":
79
+ job.cancel_requested = True
80
+ return True
81
+ return False
82
+
83
+ def start(self) -> None:
84
+ loop = asyncio.get_event_loop()
85
+ self._task = loop.create_task(self._run_loop())
86
+
87
+ async def _run_loop(self) -> None:
88
+ while True:
89
+ job_id = await self.queue.get()
90
+ job = self.jobs.get(job_id)
91
+ if not job:
92
+ continue
93
+ if job.status == "cancelled":
94
+ # Cancelled while queued — skip.
95
+ continue
96
+ await self._process(job)
97
+
98
+ async def _process(self, job: Job) -> None:
99
+ job.status = "running"
100
+ job.started_at = time.time()
101
+
102
+ def on_audio_loaded(duration: float) -> None:
103
+ # Surfacing duration before transcription completes lets the
104
+ # backend show the user the length of the audio while the slow
105
+ # part is still running.
106
+ job.duration_seconds = duration
107
+
108
+ def work():
109
+ audio_path = _download_sync(job.audio_url)
110
+ try:
111
+ return self.pipeline.transcribe(
112
+ audio_path,
113
+ job.options,
114
+ is_cancelled=lambda: job.cancel_requested,
115
+ on_audio_loaded=on_audio_loaded,
116
+ )
117
+ finally:
118
+ try:
119
+ os.unlink(audio_path)
120
+ except OSError:
121
+ pass
122
+
123
+ try:
124
+ result = await asyncio.to_thread(work)
125
+ job.segments = result.segments
126
+ job.language = result.language
127
+ job.duration_seconds = result.duration_seconds
128
+ job.status = "completed"
129
+ except CancelledError:
130
+ job.status = "cancelled"
131
+ except Exception as e:
132
+ job.status = "failed"
133
+ job.error = f"{type(e).__name__}: {e}"
134
+ finally:
135
+ job.finished_at = time.time()
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@exulu/backend",
3
3
  "author": "Qventu Bv.",
4
- "version": "1.60.0",
4
+ "version": "1.61.1",
5
5
  "main": "./dist/index.js",
6
6
  "private": false,
7
7
  "publishConfig": {
@@ -10,7 +10,9 @@
10
10
  "module": "./dist/index.mjs",
11
11
  "types": "./dist/index.d.ts",
12
12
  "bin": {
13
- "setup-python": "./bin/setup-python.cjs"
13
+ "backend": "./bin/backend.cjs",
14
+ "setup-python": "./bin/setup-python.cjs",
15
+ "exulu-start-whisper": "./dist/cli/start-whisper.cjs"
14
16
  },
15
17
  "homepage": "https://exulu.com",
16
18
  "engines": {
@@ -43,6 +45,7 @@
43
45
  "cpd": "jscpd src --reporters console",
44
46
  "cpd:report": "jscpd src --reporters console,html",
45
47
  "cpd:view": "serve cpd-report/html -l 3001",
48
+ "start:whisper": "node ./dist/cli/start-whisper.cjs",
46
49
  "python:setup": "cd ee/python && ./setup.sh",
47
50
  "python:install": "source ee/python/.venv/bin/activate && pip install -r ee/python/requirements.txt",
48
51
  "python:validate": "source ee/python/.venv/bin/activate && python -c 'import docling; import transformers; print(\"✓ Python environment is working correctly\")'",