auralwise_cli 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +255 -0
- package/bin/auralwise.js +6 -6
- package/lib/commands/delete.js +2 -2
- package/lib/commands/events.js +5 -5
- package/lib/commands/result.js +3 -3
- package/lib/commands/task.js +2 -2
- package/lib/commands/tasks.js +6 -6
- package/lib/commands/transcribe.js +27 -27
- package/lib/i18n.js +134 -6
- package/lib/utils.js +26 -0
- package/package.json +1 -1
package/README.md
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# AuralWise CLI
|
|
2
|
+
|
|
3
|
+
Command-line interface for [AuralWise](https://auralwise.cn) Speech Intelligence API.
|
|
4
|
+
|
|
5
|
+
One API call returns transcription, speaker diarization, speaker embeddings, word-level timestamps, and 521-class audio event detection — all at once.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Speech Transcription** — 99 languages, with a dedicated Chinese engine (`optimize_zh`) for faster speed and higher accuracy
|
|
10
|
+
- **Speaker Diarization** — Automatic speaker count detection, per-segment speaker labels
|
|
11
|
+
- **Speaker Embeddings** — 192-dim voice print vectors for cross-recording speaker matching
|
|
12
|
+
- **Timestamps** — Word-level (~10ms) or segment-level (~100ms) precision
|
|
13
|
+
- **Audio Event Detection** — 521 AudioSet sound event classes (applause, cough, music, keyboard, etc.)
|
|
14
|
+
- **VAD** — Voice Activity Detection segments
|
|
15
|
+
- **Batch Mode** — Half-price processing using off-peak GPU capacity, delivered within 24h
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm install -g auralwise_cli
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Requires Node.js >= 18.
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Set your API key (get one at https://auralwise.cn)
|
|
29
|
+
export AURALWISE_API_KEY=asr_xxxxxxxxxxxxxxxxxxxx
|
|
30
|
+
|
|
31
|
+
# Transcribe from URL — waits for completion and prints results
|
|
32
|
+
auralwise transcribe https://example.com/meeting.mp3
|
|
33
|
+
|
|
34
|
+
# Transcribe a local file (auto base64 upload)
|
|
35
|
+
auralwise transcribe ./recording.wav
|
|
36
|
+
|
|
37
|
+
# Chinese optimization mode (faster, cheaper for Chinese audio)
|
|
38
|
+
auralwise transcribe ./meeting.mp3 --optimize-zh --language zh
|
|
39
|
+
|
|
40
|
+
# Submit without waiting
|
|
41
|
+
auralwise transcribe https://example.com/audio.mp3 --no-wait
|
|
42
|
+
|
|
43
|
+
# Get JSON output
|
|
44
|
+
auralwise transcribe ./audio.mp3 --json --output result.json
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Commands
|
|
48
|
+
|
|
49
|
+
### `auralwise transcribe <source>`
|
|
50
|
+
|
|
51
|
+
Submit an audio file for processing. `<source>` can be an HTTP(S) URL or a local file path.
|
|
52
|
+
|
|
53
|
+
**Input modes:**
|
|
54
|
+
- **URL mode** — Pass an `https://...` URL; the GPU node downloads directly
|
|
55
|
+
- **File mode** — Pass a local file path; the CLI reads and uploads as base64
|
|
56
|
+
|
|
57
|
+
**Common options:**
|
|
58
|
+
|
|
59
|
+
| Option | Description |
|
|
60
|
+
|--------|-------------|
|
|
61
|
+
| `--language <lang>` | ASR language code (`zh`, `en`, `ja`, ...) or auto-detect if omitted |
|
|
62
|
+
| `--optimize-zh` | Use dedicated Chinese engine (faster, cheaper, segment-level timestamps) |
|
|
63
|
+
| `--no-asr` | Disable transcription |
|
|
64
|
+
| `--no-diarize` | Disable speaker diarization |
|
|
65
|
+
| `--no-events` | Disable audio event detection |
|
|
66
|
+
| `--hotwords <words>` | Boost recognition of specific words (comma-separated) |
|
|
67
|
+
| `--num-speakers <n>` | Set fixed number of speakers |
|
|
68
|
+
| `--max-speakers <n>` | Max speakers for auto-detection (default: 10) |
|
|
69
|
+
| `--batch` | Use batch mode (half-price, 24h delivery) |
|
|
70
|
+
| `--no-wait` | Return immediately after task creation |
|
|
71
|
+
| `--json` | Output result as JSON |
|
|
72
|
+
| `--output <file>` | Save result to file |
|
|
73
|
+
| `--callback-url <url>` | Webhook URL for completion notification |
|
|
74
|
+
|
|
75
|
+
**Advanced ASR options:**
|
|
76
|
+
|
|
77
|
+
| Option | Description |
|
|
78
|
+
|--------|-------------|
|
|
79
|
+
| `--beam-size <n>` | Beam search width (default: 5) |
|
|
80
|
+
| `--temperature <n>` | Decoding temperature (default: 0.0) |
|
|
81
|
+
| `--initial-prompt <text>` | Guide transcription style |
|
|
82
|
+
| `--vad-threshold <n>` | VAD sensitivity 0-1 (default: 0.35) |
|
|
83
|
+
| `--events-threshold <n>` | Audio event confidence threshold (default: 0.3) |
|
|
84
|
+
| `--events-classes <list>` | Only detect specific event classes |
|
|
85
|
+
|
|
86
|
+
### `auralwise tasks`
|
|
87
|
+
|
|
88
|
+
List your tasks with optional filtering.
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
auralwise tasks # List all tasks
|
|
92
|
+
auralwise tasks --status done # Only completed tasks
|
|
93
|
+
auralwise tasks --page 2 --page-size 50 # Pagination
|
|
94
|
+
auralwise tasks --json # JSON output
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### `auralwise task <id>`
|
|
98
|
+
|
|
99
|
+
Get details of a specific task.
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
auralwise task 550e8400-e29b-41d4-a716-446655440000
|
|
103
|
+
auralwise task 550e8400-e29b-41d4-a716-446655440000 --json
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### `auralwise result <id>`
|
|
107
|
+
|
|
108
|
+
Retrieve the full result of a completed task.
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
auralwise result <task-id> # Pretty-printed output
|
|
112
|
+
auralwise result <task-id> --json # JSON output
|
|
113
|
+
auralwise result <task-id> --output result.json # Save to file
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### `auralwise delete <id>`
|
|
117
|
+
|
|
118
|
+
Delete a task and its associated files.
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
auralwise delete <task-id> # With confirmation prompt
|
|
122
|
+
auralwise delete <task-id> --force # Skip confirmation
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### `auralwise events`
|
|
126
|
+
|
|
127
|
+
Browse the 521 AudioSet sound event classes.
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
auralwise events # List all 521 classes
|
|
131
|
+
auralwise events --search Cough # Search by name
|
|
132
|
+
auralwise events --category Music # Filter by category
|
|
133
|
+
auralwise events --json # JSON output
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Configuration
|
|
137
|
+
|
|
138
|
+
### API Key
|
|
139
|
+
|
|
140
|
+
Set your API key via `--api-key` flag or environment variable:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Environment variable (recommended)
|
|
144
|
+
export AURALWISE_API_KEY=asr_xxxxxxxxxxxxxxxxxxxx
|
|
145
|
+
|
|
146
|
+
# Or pass directly
|
|
147
|
+
auralwise --api-key asr_xxxx transcribe ./audio.mp3
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Base URL
|
|
151
|
+
|
|
152
|
+
Override the API endpoint (default: `https://auralwise.cn/api/v1`):
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
auralwise --base-url https://your-private-instance.com/api/v1 transcribe ./audio.mp3
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Language
|
|
159
|
+
|
|
160
|
+
The CLI supports English and Chinese interfaces:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
auralwise --locale zh --help # Chinese interface
|
|
164
|
+
auralwise --locale en transcribe --help # English interface (default)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Examples
|
|
168
|
+
|
|
169
|
+
### Meeting transcription with speaker diarization
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
auralwise transcribe ./meeting.mp3 \
|
|
173
|
+
--optimize-zh \
|
|
174
|
+
--language zh \
|
|
175
|
+
--max-speakers 5 \
|
|
176
|
+
--output meeting_result.json
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Batch processing (half-price)
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
# Submit in batch mode — processed during off-peak hours, 50% discount
|
|
183
|
+
auralwise transcribe https://storage.example.com/archive.mp3 \
|
|
184
|
+
--batch \
|
|
185
|
+
--no-wait \
|
|
186
|
+
--callback-url https://your-server.com/webhook
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Audio event detection only
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
auralwise transcribe ./audio.mp3 \
|
|
193
|
+
--no-asr \
|
|
194
|
+
--no-diarize \
|
|
195
|
+
--events-classes "Cough,Music,Applause" \
|
|
196
|
+
--json
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Transcription only (no diarization, no events)
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
auralwise transcribe ./podcast.mp3 \
|
|
203
|
+
--no-diarize \
|
|
204
|
+
--no-events \
|
|
205
|
+
--hotwords "AuralWise,PGPU" \
|
|
206
|
+
--output transcript.json
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## Output Format
|
|
210
|
+
|
|
211
|
+
### Pretty-printed (default)
|
|
212
|
+
|
|
213
|
+
```
|
|
214
|
+
Audio Duration: 5.3min
|
|
215
|
+
Language: zh (99%)
|
|
216
|
+
Speakers: 2
|
|
217
|
+
|
|
218
|
+
Transcription
|
|
219
|
+
|
|
220
|
+
[0:00.5 - 0:02.3] SPEAKER_0: This is the first sentence
|
|
221
|
+
[0:02.5 - 0:04.1] SPEAKER_1: And this is the reply
|
|
222
|
+
|
|
223
|
+
Audio Events
|
|
224
|
+
|
|
225
|
+
[0:45.0 - 0:45.9] Cough (87%)
|
|
226
|
+
[1:20.0 - 1:25.0] Music (92%)
|
|
227
|
+
|
|
228
|
+
Speaker Embeddings
|
|
229
|
+
|
|
230
|
+
SPEAKER_0: 25 segments, 192-dim vector
|
|
231
|
+
SPEAKER_1: 18 segments, 192-dim vector
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### JSON (`--json`)
|
|
235
|
+
|
|
236
|
+
Returns the full API response. See [API documentation](https://auralwise.cn/api-docs) for the complete schema.
|
|
237
|
+
|
|
238
|
+
## Pricing
|
|
239
|
+
|
|
240
|
+
| Capability | Standard | Batch (50% off) |
|
|
241
|
+
|-----------|----------|-----------------|
|
|
242
|
+
| Chinese transcription | ¥0.27/hr | ¥0.14/hr |
|
|
243
|
+
| General transcription (with word timestamps) | ¥1.20/hr | ¥0.60/hr |
|
|
244
|
+
| Speaker diarization (labels + embeddings) | +¥0.40/hr | +¥0.20/hr |
|
|
245
|
+
| Audio event detection (521 classes) | +¥0.10/hr | +¥0.05/hr |
|
|
246
|
+
|
|
247
|
+
**Example: 100 hours of Chinese meetings (full features) = ¥39 in batch mode.**
|
|
248
|
+
|
|
249
|
+
## API Documentation
|
|
250
|
+
|
|
251
|
+
Full API reference: https://auralwise.cn/api-docs
|
|
252
|
+
|
|
253
|
+
## License
|
|
254
|
+
|
|
255
|
+
MIT
|
package/bin/auralwise.js
CHANGED
|
@@ -9,21 +9,21 @@ import { registerResult } from '../lib/commands/result.js';
|
|
|
9
9
|
import { registerDelete } from '../lib/commands/delete.js';
|
|
10
10
|
import { registerEvents } from '../lib/commands/events.js';
|
|
11
11
|
|
|
12
|
-
const program = new Command();
|
|
13
|
-
|
|
14
12
|
// Parse --locale early before command registration so descriptions use correct language
|
|
15
13
|
const localeIdx = process.argv.indexOf('--locale');
|
|
16
14
|
if (localeIdx !== -1 && process.argv[localeIdx + 1]) {
|
|
17
15
|
setLocale(process.argv[localeIdx + 1]);
|
|
18
16
|
}
|
|
19
17
|
|
|
18
|
+
const program = new Command();
|
|
19
|
+
|
|
20
20
|
program
|
|
21
21
|
.name('auralwise')
|
|
22
22
|
.description(t('descMain'))
|
|
23
|
-
.version('1.0.
|
|
24
|
-
.option('--api-key <key>',
|
|
25
|
-
.option('--base-url <url>', '
|
|
26
|
-
.option('--locale <locale>',
|
|
23
|
+
.version('1.0.2')
|
|
24
|
+
.option('--api-key <key>', t('optApiKey'))
|
|
25
|
+
.option('--base-url <url>', t('optBaseUrl'), 'https://auralwise.cn/api/v1')
|
|
26
|
+
.option('--locale <locale>', t('optLocale'));
|
|
27
27
|
|
|
28
28
|
registerTranscribe(program);
|
|
29
29
|
registerTasks(program);
|
package/lib/commands/delete.js
CHANGED
|
@@ -7,7 +7,7 @@ export function registerDelete(program) {
|
|
|
7
7
|
.command('delete')
|
|
8
8
|
.description(t('descDelete'))
|
|
9
9
|
.argument('<id>', t('argTaskId'))
|
|
10
|
-
.option('--force', '
|
|
10
|
+
.option('--force', t('optForce'))
|
|
11
11
|
.action(async (id, opts) => {
|
|
12
12
|
try {
|
|
13
13
|
if (!opts.force) {
|
|
@@ -27,7 +27,7 @@ export function registerDelete(program) {
|
|
|
27
27
|
await client.deleteTask(id);
|
|
28
28
|
console.log(chalk.green(t('taskDeleted', { id: id.substring(0, 8) })));
|
|
29
29
|
} catch (err) {
|
|
30
|
-
console.error(chalk.red(
|
|
30
|
+
console.error(chalk.red(`${t('error')}: ${err.message}`));
|
|
31
31
|
process.exit(1);
|
|
32
32
|
}
|
|
33
33
|
});
|
package/lib/commands/events.js
CHANGED
|
@@ -6,9 +6,9 @@ export function registerEvents(program) {
|
|
|
6
6
|
program
|
|
7
7
|
.command('events')
|
|
8
8
|
.description(t('descEvents'))
|
|
9
|
-
.option('--category <cat>', '
|
|
10
|
-
.option('--search <keyword>', '
|
|
11
|
-
.option('--json', '
|
|
9
|
+
.option('--category <cat>', t('optCategory'))
|
|
10
|
+
.option('--search <keyword>', t('optSearch'))
|
|
11
|
+
.option('--json', t('optJson'))
|
|
12
12
|
.action(async (opts) => {
|
|
13
13
|
try {
|
|
14
14
|
const client = createClient(program);
|
|
@@ -43,7 +43,7 @@ export function registerEvents(program) {
|
|
|
43
43
|
const isZh = getLocale() === 'zh';
|
|
44
44
|
console.log(chalk.bold(`${t('totalClasses')}: ${classes.length}`));
|
|
45
45
|
console.log();
|
|
46
|
-
console.log(chalk.bold('
|
|
46
|
+
console.log(chalk.bold(`${t('tableHeaderIndex').padStart(5)} ${t('tableHeaderName').padEnd(40)} ${t('category')}`));
|
|
47
47
|
console.log('─'.repeat(80));
|
|
48
48
|
for (const c of classes) {
|
|
49
49
|
const name = isZh ? (c.zh_name || c.display_name) : c.display_name;
|
|
@@ -51,7 +51,7 @@ export function registerEvents(program) {
|
|
|
51
51
|
console.log(`${String(c.index).padStart(5)} ${name.padEnd(40)} ${cat}`);
|
|
52
52
|
}
|
|
53
53
|
} catch (err) {
|
|
54
|
-
console.error(chalk.red(
|
|
54
|
+
console.error(chalk.red(`${t('error')}: ${err.message}`));
|
|
55
55
|
process.exit(1);
|
|
56
56
|
}
|
|
57
57
|
});
|
package/lib/commands/result.js
CHANGED
|
@@ -8,8 +8,8 @@ export function registerResult(program) {
|
|
|
8
8
|
.command('result')
|
|
9
9
|
.description(t('descResult'))
|
|
10
10
|
.argument('<id>', t('argTaskId'))
|
|
11
|
-
.option('--json', '
|
|
12
|
-
.option('--output <file>', '
|
|
11
|
+
.option('--json', t('optJson'))
|
|
12
|
+
.option('--output <file>', t('optOutput'))
|
|
13
13
|
.action(async (id, opts) => {
|
|
14
14
|
try {
|
|
15
15
|
const client = createClient(program);
|
|
@@ -24,7 +24,7 @@ export function registerResult(program) {
|
|
|
24
24
|
printResult(result);
|
|
25
25
|
}
|
|
26
26
|
} catch (err) {
|
|
27
|
-
console.error(chalk.red(
|
|
27
|
+
console.error(chalk.red(`${t('error')}: ${err.message}`));
|
|
28
28
|
process.exit(1);
|
|
29
29
|
}
|
|
30
30
|
});
|
package/lib/commands/task.js
CHANGED
|
@@ -7,7 +7,7 @@ export function registerTask(program) {
|
|
|
7
7
|
.command('task')
|
|
8
8
|
.description(t('descTask'))
|
|
9
9
|
.argument('<id>', t('argTaskId'))
|
|
10
|
-
.option('--json', '
|
|
10
|
+
.option('--json', t('optJson'))
|
|
11
11
|
.action(async (id, opts) => {
|
|
12
12
|
try {
|
|
13
13
|
const client = createClient(program);
|
|
@@ -19,7 +19,7 @@ export function registerTask(program) {
|
|
|
19
19
|
printTaskDetail(task);
|
|
20
20
|
}
|
|
21
21
|
} catch (err) {
|
|
22
|
-
console.error(chalk.red(
|
|
22
|
+
console.error(chalk.red(`${t('error')}: ${err.message}`));
|
|
23
23
|
process.exit(1);
|
|
24
24
|
}
|
|
25
25
|
});
|
package/lib/commands/tasks.js
CHANGED
|
@@ -6,10 +6,10 @@ export function registerTasks(program) {
|
|
|
6
6
|
program
|
|
7
7
|
.command('tasks')
|
|
8
8
|
.description(t('descTasks'))
|
|
9
|
-
.option('--status <status>',
|
|
10
|
-
.option('--page <n>', '
|
|
11
|
-
.option('--page-size <n>', '
|
|
12
|
-
.option('--json', '
|
|
9
|
+
.option('--status <status>', t('optStatus'))
|
|
10
|
+
.option('--page <n>', t('optPage'), parseInt, 1)
|
|
11
|
+
.option('--page-size <n>', t('optPageSize'), parseInt, 20)
|
|
12
|
+
.option('--json', t('optJson'))
|
|
13
13
|
.action(async (opts) => {
|
|
14
14
|
try {
|
|
15
15
|
const client = createClient(program);
|
|
@@ -30,7 +30,7 @@ export function registerTasks(program) {
|
|
|
30
30
|
return;
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
console.log(chalk.bold('
|
|
33
|
+
console.log(chalk.bold(`${t('tableHeaderId').padEnd(10)}${t('tableHeaderStatus').padEnd(20)}${t('tableHeaderFilename').padEnd(42)}${t('tableHeaderCreated')}`));
|
|
34
34
|
console.log('─'.repeat(100));
|
|
35
35
|
for (const task of tasks) {
|
|
36
36
|
console.log(formatTaskRow(task));
|
|
@@ -38,7 +38,7 @@ export function registerTasks(program) {
|
|
|
38
38
|
console.log('─'.repeat(100));
|
|
39
39
|
console.log(`${t('total')}: ${data.total || tasks.length} ${t('page')}: ${data.page || 1}/${Math.ceil((data.total || tasks.length) / (data.page_size || 20))}`);
|
|
40
40
|
} catch (err) {
|
|
41
|
-
console.error(chalk.red(
|
|
41
|
+
console.error(chalk.red(`${t('error')}: ${err.message}`));
|
|
42
42
|
process.exit(1);
|
|
43
43
|
}
|
|
44
44
|
});
|
|
@@ -10,32 +10,32 @@ export function registerTranscribe(program) {
|
|
|
10
10
|
.command('transcribe')
|
|
11
11
|
.description(t('descTranscribe'))
|
|
12
12
|
.argument('<source>', t('argSource'))
|
|
13
|
-
.option('--base64', '
|
|
14
|
-
.option('--language <lang>',
|
|
15
|
-
.option('--no-asr', '
|
|
16
|
-
.option('--no-diarize', '
|
|
17
|
-
.option('--no-events', '
|
|
18
|
-
.option('--optimize-zh', '
|
|
19
|
-
.option('--beam-size <n>', '
|
|
20
|
-
.option('--temperature <n>', '
|
|
21
|
-
.option('--hotwords <words>',
|
|
22
|
-
.option('--initial-prompt <text>', '
|
|
23
|
-
.option('--vad-threshold <n>', '
|
|
24
|
-
.option('--vad-min-speech <ms>', '
|
|
25
|
-
.option('--vad-min-silence <ms>', '
|
|
26
|
-
.option('--no-speech-threshold <n>', '
|
|
27
|
-
.option('--num-speakers <n>', '
|
|
28
|
-
.option('--min-speakers <n>', '
|
|
29
|
-
.option('--max-speakers <n>', '
|
|
30
|
-
.option('--events-threshold <n>', '
|
|
31
|
-
.option('--events-classes <classes>',
|
|
32
|
-
.option('--batch',
|
|
33
|
-
.option('--callback-url <url>', '
|
|
34
|
-
.option('--callback-secret <secret>', '
|
|
35
|
-
.option('--no-wait', '
|
|
36
|
-
.option('--poll-interval <seconds>', '
|
|
37
|
-
.option('--json', '
|
|
38
|
-
.option('--output <file>', '
|
|
13
|
+
.option('--base64', t('optBase64'))
|
|
14
|
+
.option('--language <lang>', t('optLanguage'))
|
|
15
|
+
.option('--no-asr', t('optNoAsr'))
|
|
16
|
+
.option('--no-diarize', t('optNoDiarize'))
|
|
17
|
+
.option('--no-events', t('optNoEvents'))
|
|
18
|
+
.option('--optimize-zh', t('optOptimizeZh'))
|
|
19
|
+
.option('--beam-size <n>', t('optBeamSize'), parseInt)
|
|
20
|
+
.option('--temperature <n>', t('optTemperature'), parseFloat)
|
|
21
|
+
.option('--hotwords <words>', t('optHotwords'))
|
|
22
|
+
.option('--initial-prompt <text>', t('optInitialPrompt'))
|
|
23
|
+
.option('--vad-threshold <n>', t('optVadThreshold'), parseFloat)
|
|
24
|
+
.option('--vad-min-speech <ms>', t('optVadMinSpeech'), parseInt)
|
|
25
|
+
.option('--vad-min-silence <ms>', t('optVadMinSilence'), parseInt)
|
|
26
|
+
.option('--no-speech-threshold <n>', t('optNoSpeechThreshold'), parseFloat)
|
|
27
|
+
.option('--num-speakers <n>', t('optNumSpeakers'), parseInt)
|
|
28
|
+
.option('--min-speakers <n>', t('optMinSpeakers'), parseInt)
|
|
29
|
+
.option('--max-speakers <n>', t('optMaxSpeakers'), parseInt)
|
|
30
|
+
.option('--events-threshold <n>', t('optEventsThreshold'), parseFloat)
|
|
31
|
+
.option('--events-classes <classes>', t('optEventsClasses'))
|
|
32
|
+
.option('--batch', t('optBatch'))
|
|
33
|
+
.option('--callback-url <url>', t('optCallbackUrl'))
|
|
34
|
+
.option('--callback-secret <secret>', t('optCallbackSecret'))
|
|
35
|
+
.option('--no-wait', t('optNoWait'))
|
|
36
|
+
.option('--poll-interval <seconds>', t('optPollInterval'), parseFloat, 5)
|
|
37
|
+
.option('--json', t('optJson'))
|
|
38
|
+
.option('--output <file>', t('optOutput'))
|
|
39
39
|
.action(async (source, opts) => {
|
|
40
40
|
try {
|
|
41
41
|
const client = createClient(program);
|
|
@@ -117,7 +117,7 @@ export function registerTranscribe(program) {
|
|
|
117
117
|
process.exit(1);
|
|
118
118
|
}
|
|
119
119
|
} catch (err) {
|
|
120
|
-
console.error(chalk.red(
|
|
120
|
+
console.error(chalk.red(`${t('error')}: ${err.message}`));
|
|
121
121
|
process.exit(1);
|
|
122
122
|
}
|
|
123
123
|
});
|
package/lib/i18n.js
CHANGED
|
@@ -2,13 +2,63 @@ const messages = {
|
|
|
2
2
|
en: {
|
|
3
3
|
// General
|
|
4
4
|
apiKeyRequired: 'Error: API key required. Use --api-key or set AURALWISE_API_KEY environment variable.',
|
|
5
|
-
//
|
|
5
|
+
// Global options
|
|
6
|
+
optApiKey: 'API key (or set AURALWISE_API_KEY env)',
|
|
7
|
+
optBaseUrl: 'API base URL',
|
|
8
|
+
optLocale: 'UI language: en or zh (default: en)',
|
|
9
|
+
|
|
10
|
+
// Transcribe command
|
|
6
11
|
submittingTask: 'Submitting task...',
|
|
7
12
|
taskCreated: 'Task created',
|
|
8
13
|
waitingTask: 'Waiting for task to complete...',
|
|
9
14
|
taskCompleted: 'Task completed!',
|
|
10
15
|
taskFailed: 'Task {status}: {error}',
|
|
11
16
|
resultSaved: 'Result saved to {file}',
|
|
17
|
+
optBase64: 'Force base64 mode for local file upload',
|
|
18
|
+
optLanguage: 'ASR language (zh/en/ja/... or null for auto)',
|
|
19
|
+
optNoAsr: 'Disable ASR',
|
|
20
|
+
optNoDiarize: 'Disable speaker diarization',
|
|
21
|
+
optNoEvents: 'Disable audio event detection',
|
|
22
|
+
optOptimizeZh: 'Enable Chinese optimization mode',
|
|
23
|
+
optBeamSize: 'Beam search width',
|
|
24
|
+
optTemperature: 'Decoding temperature',
|
|
25
|
+
optHotwords: 'Hotwords (comma-separated)',
|
|
26
|
+
optInitialPrompt: 'Initial prompt text',
|
|
27
|
+
optVadThreshold: 'VAD threshold (0-1)',
|
|
28
|
+
optVadMinSpeech: 'Min speech duration in ms',
|
|
29
|
+
optVadMinSilence: 'Min silence duration in ms',
|
|
30
|
+
optNoSpeechThreshold: 'No-speech probability threshold',
|
|
31
|
+
optNumSpeakers: 'Fixed number of speakers',
|
|
32
|
+
optMinSpeakers: 'Minimum speakers for auto-detection',
|
|
33
|
+
optMaxSpeakers: 'Maximum speakers for auto-detection',
|
|
34
|
+
optEventsThreshold: 'Audio event confidence threshold',
|
|
35
|
+
optEventsClasses: 'Specific event classes (comma-separated)',
|
|
36
|
+
optBatch: 'Enable batch mode (lower priority)',
|
|
37
|
+
optCallbackUrl: 'Webhook callback URL',
|
|
38
|
+
optCallbackSecret: 'Webhook HMAC secret',
|
|
39
|
+
optNoWait: 'Submit and return immediately without polling',
|
|
40
|
+
optPollInterval: 'Polling interval in seconds',
|
|
41
|
+
optJson: 'Output as JSON',
|
|
42
|
+
optOutput: 'Save result to file',
|
|
43
|
+
|
|
44
|
+
// Tasks command
|
|
45
|
+
optStatus: 'Filter by status (pending/processing/done/failed/abandoned)',
|
|
46
|
+
optPage: 'Page number',
|
|
47
|
+
optPageSize: 'Items per page',
|
|
48
|
+
tableHeaderId: 'ID',
|
|
49
|
+
tableHeaderStatus: 'Status',
|
|
50
|
+
tableHeaderFilename: 'Filename',
|
|
51
|
+
tableHeaderCreated: 'Created',
|
|
52
|
+
|
|
53
|
+
// Delete command
|
|
54
|
+
optForce: 'Skip confirmation',
|
|
55
|
+
|
|
56
|
+
// Events command
|
|
57
|
+
optCategory: 'Filter by category',
|
|
58
|
+
optSearch: 'Search event name',
|
|
59
|
+
tableHeaderIndex: 'Index',
|
|
60
|
+
tableHeaderName: 'Name',
|
|
61
|
+
|
|
12
62
|
// Result display
|
|
13
63
|
audioDuration: 'Audio Duration',
|
|
14
64
|
language: 'Language',
|
|
@@ -18,6 +68,7 @@ const messages = {
|
|
|
18
68
|
speakerEmbeddings: 'Speaker Embeddings',
|
|
19
69
|
segments: 'segments',
|
|
20
70
|
dimVector: '-dim vector',
|
|
71
|
+
|
|
21
72
|
// Task detail
|
|
22
73
|
taskId: 'Task ID',
|
|
23
74
|
status: 'Status',
|
|
@@ -31,37 +82,99 @@ const messages = {
|
|
|
31
82
|
progress: 'Progress',
|
|
32
83
|
error: 'Error',
|
|
33
84
|
options: 'Options',
|
|
85
|
+
|
|
34
86
|
// Tasks list
|
|
35
87
|
noTasks: 'No tasks found.',
|
|
36
88
|
total: 'Total',
|
|
37
89
|
page: 'Page',
|
|
90
|
+
|
|
38
91
|
// Delete
|
|
39
92
|
taskDeleted: 'Task {id} deleted.',
|
|
40
93
|
confirmDelete: 'Are you sure you want to delete task {id}?',
|
|
41
94
|
deleteCancelled: 'Delete cancelled.',
|
|
95
|
+
|
|
42
96
|
// Events
|
|
43
97
|
totalClasses: 'Total event classes',
|
|
44
98
|
category: 'Category',
|
|
45
99
|
noEventsFound: 'No matching events found.',
|
|
46
|
-
|
|
47
|
-
|
|
100
|
+
|
|
101
|
+
// Result display - additional
|
|
102
|
+
vadSegments: 'VAD Segments',
|
|
103
|
+
diarizeSegments: 'Diarize Segments',
|
|
104
|
+
langProb: 'Language Probability',
|
|
105
|
+
|
|
106
|
+
// Command descriptions
|
|
107
|
+
descMain: 'AuralWise Speech Intelligence API CLI\n\n Transcription, speaker diarization, speaker embeddings,\n word-level timestamps, and 521-class audio event detection\n — all in one API call.',
|
|
48
108
|
descTranscribe: 'Submit an audio transcription task (URL or local file)',
|
|
49
109
|
descTasks: 'List tasks',
|
|
50
110
|
descTask: 'Get task details',
|
|
51
111
|
descResult: 'Get task result',
|
|
52
112
|
descDelete: 'Delete a task',
|
|
53
|
-
descEvents: 'List
|
|
113
|
+
descEvents: 'List all 521 AudioSet sound event classes',
|
|
54
114
|
argSource: 'Audio URL or local file path',
|
|
55
115
|
argTaskId: 'Task ID',
|
|
56
116
|
},
|
|
57
117
|
zh: {
|
|
118
|
+
// General
|
|
58
119
|
apiKeyRequired: '错误:需要 API Key。请使用 --api-key 参数或设置 AURALWISE_API_KEY 环境变量。',
|
|
120
|
+
// Global options
|
|
121
|
+
optApiKey: 'API 密钥(或设置 AURALWISE_API_KEY 环境变量)',
|
|
122
|
+
optBaseUrl: 'API 基础 URL',
|
|
123
|
+
optLocale: '界面语言:en 或 zh(默认 en)',
|
|
124
|
+
|
|
125
|
+
// Transcribe command
|
|
59
126
|
submittingTask: '提交任务中...',
|
|
60
127
|
taskCreated: '任务已创建',
|
|
61
128
|
waitingTask: '等待任务完成...',
|
|
62
129
|
taskCompleted: '任务完成!',
|
|
63
130
|
taskFailed: '任务{status}:{error}',
|
|
64
131
|
resultSaved: '结果已保存到 {file}',
|
|
132
|
+
optBase64: '强制使用 Base64 模式上传本地文件',
|
|
133
|
+
optLanguage: 'ASR 识别语言(zh/en/ja/... 或留空自动检测)',
|
|
134
|
+
optNoAsr: '禁用语音转写',
|
|
135
|
+
optNoDiarize: '禁用说话人分离',
|
|
136
|
+
optNoEvents: '禁用声音事件检测',
|
|
137
|
+
optOptimizeZh: '启用中文精简模式',
|
|
138
|
+
optBeamSize: 'Beam Search 宽度',
|
|
139
|
+
optTemperature: '解码温度',
|
|
140
|
+
optHotwords: '热词(逗号分隔)',
|
|
141
|
+
optInitialPrompt: '初始提示文本',
|
|
142
|
+
optVadThreshold: 'VAD 语音检测阈值(0-1)',
|
|
143
|
+
optVadMinSpeech: '最短语音段时长(毫秒)',
|
|
144
|
+
optVadMinSilence: '最短静音间隔(毫秒)',
|
|
145
|
+
optNoSpeechThreshold: '无语音概率阈值',
|
|
146
|
+
optNumSpeakers: '固定说话人数',
|
|
147
|
+
optMinSpeakers: '自动检测最少说话人数',
|
|
148
|
+
optMaxSpeakers: '自动检测最多说话人数',
|
|
149
|
+
optEventsThreshold: '声音事件置信度阈值',
|
|
150
|
+
optEventsClasses: '指定事件类别(逗号分隔)',
|
|
151
|
+
optBatch: '启用批量模式(低优先级)',
|
|
152
|
+
optCallbackUrl: 'Webhook 回调 URL',
|
|
153
|
+
optCallbackSecret: 'Webhook HMAC 签名密钥',
|
|
154
|
+
optNoWait: '提交后立即返回,不等待完成',
|
|
155
|
+
optPollInterval: '轮询间隔(秒)',
|
|
156
|
+
optJson: '以 JSON 格式输出',
|
|
157
|
+
optOutput: '将结果保存到文件',
|
|
158
|
+
|
|
159
|
+
// Tasks command
|
|
160
|
+
optStatus: '按状态过滤(pending/processing/done/failed/abandoned)',
|
|
161
|
+
optPage: '页码',
|
|
162
|
+
optPageSize: '每页数量',
|
|
163
|
+
tableHeaderId: 'ID',
|
|
164
|
+
tableHeaderStatus: '状态',
|
|
165
|
+
tableHeaderFilename: '文件名',
|
|
166
|
+
tableHeaderCreated: '创建时间',
|
|
167
|
+
|
|
168
|
+
// Delete command
|
|
169
|
+
optForce: '跳过确认',
|
|
170
|
+
|
|
171
|
+
// Events command
|
|
172
|
+
optCategory: '按类别过滤',
|
|
173
|
+
optSearch: '搜索事件名称',
|
|
174
|
+
tableHeaderIndex: '序号',
|
|
175
|
+
tableHeaderName: '名称',
|
|
176
|
+
|
|
177
|
+
// Result display
|
|
65
178
|
audioDuration: '音频时长',
|
|
66
179
|
language: '语言',
|
|
67
180
|
speakers: '说话人数',
|
|
@@ -70,6 +183,8 @@ const messages = {
|
|
|
70
183
|
speakerEmbeddings: '声纹向量',
|
|
71
184
|
segments: '段',
|
|
72
185
|
dimVector: '维向量',
|
|
186
|
+
|
|
187
|
+
// Task detail
|
|
73
188
|
taskId: '任务 ID',
|
|
74
189
|
status: '状态',
|
|
75
190
|
source: '来源',
|
|
@@ -82,22 +197,35 @@ const messages = {
|
|
|
82
197
|
progress: '进度',
|
|
83
198
|
error: '错误',
|
|
84
199
|
options: '选项',
|
|
200
|
+
|
|
201
|
+
// Tasks list
|
|
85
202
|
noTasks: '没有找到任务。',
|
|
86
203
|
total: '总计',
|
|
87
204
|
page: '页',
|
|
205
|
+
|
|
206
|
+
// Delete
|
|
88
207
|
taskDeleted: '任务 {id} 已删除。',
|
|
89
208
|
confirmDelete: '确定要删除任务 {id} 吗?',
|
|
90
209
|
deleteCancelled: '已取消删除。',
|
|
210
|
+
|
|
211
|
+
// Events
|
|
91
212
|
totalClasses: '事件类别总数',
|
|
92
213
|
category: '类别',
|
|
93
214
|
noEventsFound: '未找到匹配的事件。',
|
|
94
|
-
|
|
215
|
+
|
|
216
|
+
// Result display - additional
|
|
217
|
+
vadSegments: 'VAD 语音段',
|
|
218
|
+
diarizeSegments: '说话人分离段',
|
|
219
|
+
langProb: '语言置信度',
|
|
220
|
+
|
|
221
|
+
// Command descriptions
|
|
222
|
+
descMain: 'AuralWise 语音智能 API 命令行工具\n\n 转写、说话人分离、声纹向量、词级时间戳、521 类声音事件检测\n —— 一次调用,全部返回。',
|
|
95
223
|
descTranscribe: '提交音频转写任务(URL 或本地文件)',
|
|
96
224
|
descTasks: '列出任务',
|
|
97
225
|
descTask: '查看任务详情',
|
|
98
226
|
descResult: '获取任务结果',
|
|
99
227
|
descDelete: '删除任务',
|
|
100
|
-
descEvents: '
|
|
228
|
+
descEvents: '列出全部 521 类 AudioSet 声音事件',
|
|
101
229
|
argSource: '音频 URL 或本地文件路径',
|
|
102
230
|
argTaskId: '任务 ID',
|
|
103
231
|
},
|
package/lib/utils.js
CHANGED
|
@@ -85,6 +85,32 @@ export function printResult(result) {
|
|
|
85
85
|
console.log(` ${chalk.cyan(spk.speaker_id)}: ${spk.segment_count} ${t('segments')}, ${spk.embedding.length}${t('dimVector')}`);
|
|
86
86
|
}
|
|
87
87
|
}
|
|
88
|
+
|
|
89
|
+
if (result.vad_segments && result.vad_segments.length > 0) {
|
|
90
|
+
console.log();
|
|
91
|
+
console.log(chalk.bold.underline(`${t('vadSegments')} (${result.vad_segments.length})`));
|
|
92
|
+
console.log();
|
|
93
|
+
for (const seg of result.vad_segments.slice(0, 20)) {
|
|
94
|
+
const dur = (seg.end - seg.start).toFixed(1);
|
|
95
|
+
console.log(` [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${chalk.dim(`${dur}s`)}`);
|
|
96
|
+
}
|
|
97
|
+
if (result.vad_segments.length > 20) {
|
|
98
|
+
console.log(chalk.dim(` ... +${result.vad_segments.length - 20} more`));
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (result.diarize_segments && result.diarize_segments.length > 0) {
|
|
103
|
+
console.log();
|
|
104
|
+
console.log(chalk.bold.underline(`${t('diarizeSegments')} (${result.diarize_segments.length})`));
|
|
105
|
+
console.log();
|
|
106
|
+
for (const seg of result.diarize_segments.slice(0, 20)) {
|
|
107
|
+
const dur = (seg.end - seg.start).toFixed(1);
|
|
108
|
+
console.log(` [${formatTime(seg.start)} - ${formatTime(seg.end)}] ${chalk.cyan(seg.speaker)} ${chalk.dim(`${dur}s`)}`);
|
|
109
|
+
}
|
|
110
|
+
if (result.diarize_segments.length > 20) {
|
|
111
|
+
console.log(chalk.dim(` ... +${result.diarize_segments.length - 20} more`));
|
|
112
|
+
}
|
|
113
|
+
}
|
|
88
114
|
}
|
|
89
115
|
|
|
90
116
|
export function printTaskDetail(task) {
|