react-native-sherpa-onnx 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/android/src/main/assets/model_licenses/alignment-models-license-status.csv +5 -0
- package/android/src/main/cpp/CMakeLists.txt +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.cpp +66 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-alignment-wrapper.h +17 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-alignment.cpp +108 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +30 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.cpp +66 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-alignment.h +30 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +21 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxAlignmentHelper.kt +555 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +76 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTextSegmenter.kt +330 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +114 -10
- package/ios/Resources/model_licenses/alignment-models-license-status.csv +5 -0
- package/ios/SherpaOnnx+Alignment.mm +704 -0
- package/ios/SherpaOnnx+STT.mm +6 -0
- package/ios/SherpaOnnx+TTS.mm +624 -50
- package/ios/model_detect/sherpa-onnx-model-detect-alignment.mm +108 -0
- package/ios/model_detect/sherpa-onnx-model-detect.h +31 -0
- package/ios/model_detect/sherpa-onnx-validate-alignment.h +30 -0
- package/ios/model_detect/sherpa-onnx-validate-alignment.mm +66 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +3 -1
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +6 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/alignment/index.js +27 -0
- package/lib/module/alignment/index.js.map +1 -0
- package/lib/module/alignment/types.js +2 -0
- package/lib/module/alignment/types.js.map +1 -0
- package/lib/module/alignment/vocab.js +40 -0
- package/lib/module/alignment/vocab.js.map +1 -0
- package/lib/module/download/paths.js +9 -1
- package/lib/module/download/paths.js.map +1 -1
- package/lib/module/download/registry.js +17 -1
- package/lib/module/download/registry.js.map +1 -1
- package/lib/module/download/types.js +1 -0
- package/lib/module/download/types.js.map +1 -1
- package/lib/module/index.js +6 -4
- package/lib/module/index.js.map +1 -1
- package/lib/module/licenses.js +8 -2
- package/lib/module/licenses.js.map +1 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +68 -2
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/subtitles.js +400 -0
- package/lib/module/tts/subtitles.js.map +1 -0
- package/lib/module/tts/tempAudio.js +17 -0
- package/lib/module/tts/tempAudio.js.map +1 -0
- package/lib/module/tts/types.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +34 -3
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/alignment/index.d.ts +8 -0
- package/lib/typescript/src/alignment/index.d.ts.map +1 -0
- package/lib/typescript/src/alignment/types.d.ts +23 -0
- package/lib/typescript/src/alignment/types.d.ts.map +1 -0
- package/lib/typescript/src/alignment/vocab.d.ts +5 -0
- package/lib/typescript/src/alignment/vocab.d.ts.map +1 -0
- package/lib/typescript/src/download/paths.d.ts +5 -2
- package/lib/typescript/src/download/paths.d.ts.map +1 -1
- package/lib/typescript/src/download/registry.d.ts.map +1 -1
- package/lib/typescript/src/download/types.d.ts +2 -1
- package/lib/typescript/src/download/types.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +1 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/licenses.d.ts.map +1 -1
- package/lib/typescript/src/stt/types.d.ts +5 -2
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +2 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/subtitles.d.ts +24 -0
- package/lib/typescript/src/tts/subtitles.d.ts.map +1 -0
- package/lib/typescript/src/tts/tempAudio.d.ts +3 -0
- package/lib/typescript/src/tts/tempAudio.d.ts.map +1 -0
- package/lib/typescript/src/tts/types.d.ts +68 -2
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/alignment-models/README.md +90 -0
- package/scripts/alignment-models/build_and_upload.js +724 -0
- package/scripts/alignment-models/sources.csv +5 -0
- package/scripts/alignment-models/sync_alignment_license_status.js +123 -0
- package/src/NativeSherpaOnnx.ts +35 -3
- package/src/alignment/index.ts +41 -0
- package/src/alignment/types.ts +22 -0
- package/src/alignment/vocab.ts +38 -0
- package/src/download/paths.ts +18 -5
- package/src/download/registry.ts +23 -3
- package/src/download/types.ts +1 -0
- package/src/index.tsx +6 -4
- package/src/licenses.ts +12 -1
- package/src/stt/types.ts +5 -2
- package/src/tts/index.ts +110 -3
- package/src/tts/subtitles.ts +611 -0
- package/src/tts/tempAudio.ts +31 -0
- package/src/tts/types.ts +79 -2
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
|
@@ -0,0 +1,724 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
'use strict';
|
|
4
|
+
|
|
5
|
+
const { Buffer } = require('node:buffer');
|
|
6
|
+
const fs = require('node:fs');
|
|
7
|
+
const fsp = require('node:fs/promises');
|
|
8
|
+
const path = require('node:path');
|
|
9
|
+
const { spawnSync } = require('node:child_process');
|
|
10
|
+
const { createHash } = require('node:crypto');
|
|
11
|
+
const { fileURLToPath } = require('node:url');
|
|
12
|
+
|
|
13
|
+
const DEFAULT_CSV = 'scripts/alignment-models/sources.csv';
|
|
14
|
+
const DEFAULT_BUILD_DIR = 'build/alignment-models';
|
|
15
|
+
const DEFAULT_DIST_DIR = 'dist/alignment-models';
|
|
16
|
+
const DEFAULT_REPO = 'XDcobra/react-native-sherpa-onnx';
|
|
17
|
+
const DEFAULT_TAG = 'alignment-models';
|
|
18
|
+
const EXPECTED_HEADER = [
|
|
19
|
+
'id',
|
|
20
|
+
'onnx_url',
|
|
21
|
+
'license',
|
|
22
|
+
'license_type',
|
|
23
|
+
'commercial_use',
|
|
24
|
+
];
|
|
25
|
+
const CHECKSUM_ASSET_NAME = 'checksum.txt';
|
|
26
|
+
const VALID_ID_RE = /^[A-Za-z0-9._-]+$/;
|
|
27
|
+
|
|
28
|
+
function printHelp() {
|
|
29
|
+
console.log(`Usage: node scripts/alignment-models/build_and_upload.js [options]
|
|
30
|
+
|
|
31
|
+
Options:
|
|
32
|
+
--csv <path> Path to semicolon-separated CSV source list
|
|
33
|
+
--build-dir <path> Workspace directory for unpacked model folders
|
|
34
|
+
--dist-dir <path> Output directory for generated .tar.bz2 files
|
|
35
|
+
--repo <owner/name> GitHub repository in owner/name format
|
|
36
|
+
--tag <tag> Release tag to inspect and upload assets to
|
|
37
|
+
--dry-run Build archives only, skip release lookup and upload
|
|
38
|
+
-h, --help Show this help message
|
|
39
|
+
|
|
40
|
+
After uploads, writes ${CHECKSUM_ASSET_NAME} (SHA-256 per .tar.bz2, tab-separated) and uploads with --clobber.
|
|
41
|
+
|
|
42
|
+
Environment (downloads):
|
|
43
|
+
GITHUB_TOKEN / GH_TOKEN Bearer token for github.com, raw.githubusercontent.com,
|
|
44
|
+
objects.githubusercontent.com, codeload.github.com (rate limits / private).
|
|
45
|
+
HUGGINGFACE_TOKEN Bearer token for huggingface.co (CI; avoids anonymous LFS/rate limits).
|
|
46
|
+
`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function parseArgs(argv) {
|
|
50
|
+
const args = {
|
|
51
|
+
csv: DEFAULT_CSV,
|
|
52
|
+
buildDir: DEFAULT_BUILD_DIR,
|
|
53
|
+
distDir: DEFAULT_DIST_DIR,
|
|
54
|
+
repo: DEFAULT_REPO,
|
|
55
|
+
tag: DEFAULT_TAG,
|
|
56
|
+
dryRun: false,
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
60
|
+
const arg = argv[i];
|
|
61
|
+
|
|
62
|
+
if (arg === '-h' || arg === '--help') {
|
|
63
|
+
printHelp();
|
|
64
|
+
process.exit(0);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (arg === '--dry-run') {
|
|
68
|
+
args.dryRun = true;
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (
|
|
73
|
+
arg === '--csv' ||
|
|
74
|
+
arg === '--build-dir' ||
|
|
75
|
+
arg === '--dist-dir' ||
|
|
76
|
+
arg === '--repo' ||
|
|
77
|
+
arg === '--tag'
|
|
78
|
+
) {
|
|
79
|
+
const value = argv[i + 1];
|
|
80
|
+
if (!value || value.startsWith('--')) {
|
|
81
|
+
throw new Error(`Missing value for ${arg}`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (arg === '--csv') {
|
|
85
|
+
args.csv = value;
|
|
86
|
+
} else if (arg === '--build-dir') {
|
|
87
|
+
args.buildDir = value;
|
|
88
|
+
} else if (arg === '--dist-dir') {
|
|
89
|
+
args.distDir = value;
|
|
90
|
+
} else if (arg === '--repo') {
|
|
91
|
+
args.repo = value;
|
|
92
|
+
} else if (arg === '--tag') {
|
|
93
|
+
args.tag = value;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
i += 1;
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
throw new Error(`Unknown argument: ${arg}`);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return args;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function parseSemicolonCsv(content) {
|
|
107
|
+
const input = content.replace(/^\uFEFF/, '');
|
|
108
|
+
const rows = [];
|
|
109
|
+
let row = [];
|
|
110
|
+
let field = '';
|
|
111
|
+
let inQuotes = false;
|
|
112
|
+
|
|
113
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
114
|
+
const char = input[i];
|
|
115
|
+
|
|
116
|
+
if (inQuotes) {
|
|
117
|
+
if (char === '"') {
|
|
118
|
+
const next = input[i + 1];
|
|
119
|
+
if (next === '"') {
|
|
120
|
+
field += '"';
|
|
121
|
+
i += 1;
|
|
122
|
+
} else {
|
|
123
|
+
inQuotes = false;
|
|
124
|
+
}
|
|
125
|
+
} else {
|
|
126
|
+
field += char;
|
|
127
|
+
}
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (char === '"') {
|
|
132
|
+
inQuotes = true;
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (char === ';') {
|
|
137
|
+
row.push(field);
|
|
138
|
+
field = '';
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (char === '\n') {
|
|
143
|
+
row.push(field);
|
|
144
|
+
field = '';
|
|
145
|
+
if (!(row.length === 1 && row[0] === '')) {
|
|
146
|
+
rows.push(row);
|
|
147
|
+
}
|
|
148
|
+
row = [];
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (char === '\r') {
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
field += char;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (inQuotes) {
|
|
160
|
+
throw new Error('CSV parsing failed: unterminated quoted field');
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (field.length > 0 || row.length > 0) {
|
|
164
|
+
row.push(field);
|
|
165
|
+
if (!(row.length === 1 && row[0] === '')) {
|
|
166
|
+
rows.push(row);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return rows;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function normalizeCell(value) {
|
|
174
|
+
return (value || '').trim();
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function readSources(csvPath) {
|
|
178
|
+
let content;
|
|
179
|
+
try {
|
|
180
|
+
content = await fsp.readFile(csvPath, 'utf8');
|
|
181
|
+
} catch {
|
|
182
|
+
throw new Error(`CSV file not found: ${csvPath}`);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const rows = parseSemicolonCsv(content);
|
|
186
|
+
if (rows.length === 0) {
|
|
187
|
+
throw new Error(`CSV file has no rows: ${csvPath}`);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const header = rows[0].map(normalizeCell);
|
|
191
|
+
if (
|
|
192
|
+
header.length !== EXPECTED_HEADER.length ||
|
|
193
|
+
header.join(';') !== EXPECTED_HEADER.join(';')
|
|
194
|
+
) {
|
|
195
|
+
throw new Error(
|
|
196
|
+
`Invalid CSV header. Expected ${EXPECTED_HEADER.join(
|
|
197
|
+
';'
|
|
198
|
+
)}, got ${header.join(';')}`
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const sources = [];
|
|
203
|
+
const seen = new Set();
|
|
204
|
+
|
|
205
|
+
for (let i = 1; i < rows.length; i += 1) {
|
|
206
|
+
const lineNumber = i + 1;
|
|
207
|
+
const row = rows[i];
|
|
208
|
+
|
|
209
|
+
if (row.length > EXPECTED_HEADER.length) {
|
|
210
|
+
throw new Error(
|
|
211
|
+
`Line ${lineNumber}: too many columns (expected ${EXPECTED_HEADER.length})`
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
while (row.length < EXPECTED_HEADER.length) {
|
|
216
|
+
row.push('');
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const modelId = normalizeCell(row[0]);
|
|
220
|
+
const onnxUrl = normalizeCell(row[1]);
|
|
221
|
+
const licenseUrl = normalizeCell(row[2]);
|
|
222
|
+
const licenseType = normalizeCell(row[3]);
|
|
223
|
+
let commercialUse = normalizeCell(row[4]).toLowerCase();
|
|
224
|
+
|
|
225
|
+
if (!modelId) {
|
|
226
|
+
throw new Error(`Line ${lineNumber}: id is required`);
|
|
227
|
+
}
|
|
228
|
+
if (!VALID_ID_RE.test(modelId)) {
|
|
229
|
+
throw new Error(
|
|
230
|
+
`Line ${lineNumber}: invalid id '${modelId}'. Allowed characters: A-Z a-z 0-9 . _ -`
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
if (!onnxUrl) {
|
|
234
|
+
throw new Error(`Line ${lineNumber}: onnx_url is required`);
|
|
235
|
+
}
|
|
236
|
+
if (!licenseType) {
|
|
237
|
+
throw new Error(`Line ${lineNumber}: license_type is required`);
|
|
238
|
+
}
|
|
239
|
+
if (commercialUse !== 'yes' && commercialUse !== 'no') {
|
|
240
|
+
throw new Error(
|
|
241
|
+
`Line ${lineNumber}: commercial_use must be "yes" or "no" (got "${row[4]}")`
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
if (seen.has(modelId)) {
|
|
245
|
+
throw new Error(`Duplicate id value in CSV: ${modelId}`);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
seen.add(modelId);
|
|
249
|
+
sources.push({
|
|
250
|
+
modelId,
|
|
251
|
+
onnxUrl,
|
|
252
|
+
licenseUrl,
|
|
253
|
+
licenseType,
|
|
254
|
+
commercialUse,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (sources.length === 0) {
|
|
259
|
+
throw new Error(`CSV contains no data rows: ${csvPath}`);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return sources;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
async function ensureCleanDir(targetDir) {
|
|
266
|
+
await fsp.rm(targetDir, { recursive: true, force: true });
|
|
267
|
+
await fsp.mkdir(targetDir, { recursive: true });
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* @param {string} url
|
|
272
|
+
* @returns {Record<string, string>}
|
|
273
|
+
*/
|
|
274
|
+
function headersForDownloadUrl(url) {
|
|
275
|
+
const headers = {
|
|
276
|
+
'User-Agent': 'wav2vec2-model-publisher/1.0',
|
|
277
|
+
'Accept': '*/*',
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
let hostname = '';
|
|
281
|
+
try {
|
|
282
|
+
hostname = new URL(url).hostname.toLowerCase().replace(/^www\./, '');
|
|
283
|
+
} catch {
|
|
284
|
+
return headers;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const githubHosts = new Set([
|
|
288
|
+
'github.com',
|
|
289
|
+
'raw.githubusercontent.com',
|
|
290
|
+
'objects.githubusercontent.com',
|
|
291
|
+
'release-assets.githubusercontent.com',
|
|
292
|
+
'codeload.github.com',
|
|
293
|
+
'gist.githubusercontent.com',
|
|
294
|
+
]);
|
|
295
|
+
|
|
296
|
+
if (githubHosts.has(hostname) || hostname.endsWith('.github.com')) {
|
|
297
|
+
const token = process.env.GITHUB_TOKEN || process.env.GH_TOKEN || '';
|
|
298
|
+
if (token) {
|
|
299
|
+
headers.Authorization = `Bearer ${token}`;
|
|
300
|
+
}
|
|
301
|
+
return headers;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (
|
|
305
|
+
hostname === 'huggingface.co' ||
|
|
306
|
+
hostname === 'hf.co' ||
|
|
307
|
+
hostname.endsWith('.huggingface.co')
|
|
308
|
+
) {
|
|
309
|
+
const hfToken = process.env.HUGGINGFACE_TOKEN || '';
|
|
310
|
+
if (hfToken) {
|
|
311
|
+
headers.Authorization = `Bearer ${hfToken}`;
|
|
312
|
+
}
|
|
313
|
+
return headers;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return headers;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
async function downloadFile(url, destination) {
|
|
320
|
+
await fsp.mkdir(path.dirname(destination), { recursive: true });
|
|
321
|
+
|
|
322
|
+
if (url.startsWith('file://')) {
|
|
323
|
+
const sourcePath = fileURLToPath(url);
|
|
324
|
+
await fsp.copyFile(sourcePath, destination);
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (typeof fetch !== 'function') {
|
|
329
|
+
throw new Error('Node runtime does not provide fetch(); use Node 18+');
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const response = await fetch(url, {
|
|
333
|
+
headers: headersForDownloadUrl(url),
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
if (!response.ok) {
|
|
337
|
+
const body = (await response.text()).slice(0, 500);
|
|
338
|
+
throw new Error(
|
|
339
|
+
`HTTP ${response.status} while downloading ${url}: ${body}`
|
|
340
|
+
);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
344
|
+
await fsp.writeFile(destination, Buffer.from(arrayBuffer));
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function runCommand(command, args, options = {}) {
|
|
348
|
+
const result = spawnSync(command, args, {
|
|
349
|
+
stdio: 'inherit',
|
|
350
|
+
...options,
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
if (result.error) {
|
|
354
|
+
throw result.error;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (result.status !== 0) {
|
|
358
|
+
throw new Error(`${command} exited with code ${result.status}`);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function createArchive(modelId, buildDir, archivePath) {
|
|
363
|
+
fs.mkdirSync(path.dirname(archivePath), { recursive: true });
|
|
364
|
+
if (fs.existsSync(archivePath)) {
|
|
365
|
+
fs.rmSync(archivePath, { force: true });
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
runCommand('tar', ['-cjf', archivePath, '-C', buildDir, modelId]);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
function resolveToken() {
|
|
372
|
+
return process.env.GITHUB_TOKEN || process.env.GH_TOKEN || '';
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
async function getReleaseData(repo, tag, token) {
|
|
376
|
+
if (typeof fetch !== 'function') {
|
|
377
|
+
throw new Error('Node runtime does not provide fetch(); use Node 18+');
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
const endpoint = `https://api.github.com/repos/${repo}/releases/tags/${encodeURIComponent(
|
|
381
|
+
tag
|
|
382
|
+
)}`;
|
|
383
|
+
const headers = {
|
|
384
|
+
'Accept': 'application/vnd.github+json',
|
|
385
|
+
'User-Agent': 'wav2vec2-model-publisher/1.0',
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
if (token) {
|
|
389
|
+
headers.Authorization = `Bearer ${token}`;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const response = await fetch(endpoint, { headers });
|
|
393
|
+
if (!response.ok) {
|
|
394
|
+
const body = (await response.text()).slice(0, 1000);
|
|
395
|
+
throw new Error(
|
|
396
|
+
`Could not query release tag '${tag}' in ${repo}: HTTP ${response.status} ${body}`
|
|
397
|
+
);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const payload = await response.json();
|
|
401
|
+
const raw = Array.isArray(payload.assets) ? payload.assets : [];
|
|
402
|
+
const assets = raw
|
|
403
|
+
.filter(
|
|
404
|
+
(asset) =>
|
|
405
|
+
asset &&
|
|
406
|
+
typeof asset.name === 'string' &&
|
|
407
|
+
typeof asset.browser_download_url === 'string'
|
|
408
|
+
)
|
|
409
|
+
.map((asset) => ({
|
|
410
|
+
name: asset.name,
|
|
411
|
+
browser_download_url: asset.browser_download_url,
|
|
412
|
+
}));
|
|
413
|
+
|
|
414
|
+
return {
|
|
415
|
+
assetNames: new Set(assets.map((a) => a.name)),
|
|
416
|
+
assets,
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function parseChecksumMap(text) {
|
|
421
|
+
const map = new Map();
|
|
422
|
+
for (const line of text.split('\n')) {
|
|
423
|
+
const trimmed = line.trim();
|
|
424
|
+
if (!trimmed) {
|
|
425
|
+
continue;
|
|
426
|
+
}
|
|
427
|
+
const tab = trimmed.indexOf('\t');
|
|
428
|
+
if (tab === -1) {
|
|
429
|
+
continue;
|
|
430
|
+
}
|
|
431
|
+
const name = trimmed.slice(0, tab).trim();
|
|
432
|
+
const hex = trimmed
|
|
433
|
+
.slice(tab + 1)
|
|
434
|
+
.trim()
|
|
435
|
+
.toLowerCase();
|
|
436
|
+
if (name && /^[a-f0-9]{64}$/.test(hex)) {
|
|
437
|
+
map.set(name, hex);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
return map;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
function formatChecksumMap(map, orderedArchiveNames) {
|
|
444
|
+
const lines = orderedArchiveNames.map((name) => {
|
|
445
|
+
const hex = map.get(name);
|
|
446
|
+
if (!hex) {
|
|
447
|
+
throw new Error(`Missing SHA-256 for ${name}`);
|
|
448
|
+
}
|
|
449
|
+
return `${name}\t${hex}`;
|
|
450
|
+
});
|
|
451
|
+
return lines.length > 0 ? `${lines.join('\n')}\n` : '';
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
async function sha256File(filePath) {
|
|
455
|
+
const buf = await fsp.readFile(filePath);
|
|
456
|
+
return createHash('sha256').update(buf).digest('hex');
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
async function sha256FromUrl(url, token) {
|
|
460
|
+
if (typeof fetch !== 'function') {
|
|
461
|
+
throw new Error('Node runtime does not provide fetch(); use Node 18+');
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
const headers = {
|
|
465
|
+
...headersForDownloadUrl(url),
|
|
466
|
+
'User-Agent': 'wav2vec2-model-publisher/1.0',
|
|
467
|
+
};
|
|
468
|
+
if (token) {
|
|
469
|
+
headers.Authorization = `Bearer ${token}`;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
const response = await fetch(url, { headers });
|
|
473
|
+
if (!response.ok) {
|
|
474
|
+
const body = (await response.text()).slice(0, 300);
|
|
475
|
+
throw new Error(`HTTP ${response.status} while hashing ${url}: ${body}`);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
const hash = createHash('sha256');
|
|
479
|
+
if (!response.body) {
|
|
480
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
481
|
+
hash.update(Buffer.from(arrayBuffer));
|
|
482
|
+
return hash.digest('hex');
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
const reader = response.body.getReader();
|
|
486
|
+
while (true) {
|
|
487
|
+
const { done, value } = await reader.read();
|
|
488
|
+
if (done) {
|
|
489
|
+
break;
|
|
490
|
+
}
|
|
491
|
+
if (value) {
|
|
492
|
+
hash.update(value);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
return hash.digest('hex');
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
async function fetchExistingChecksumTextAndMap(releaseAssets, token) {
|
|
499
|
+
const checksumAsset = releaseAssets.find(
|
|
500
|
+
(a) => a.name === CHECKSUM_ASSET_NAME
|
|
501
|
+
);
|
|
502
|
+
if (!checksumAsset) {
|
|
503
|
+
return { text: '', map: new Map() };
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
const headers = {
|
|
507
|
+
...headersForDownloadUrl(checksumAsset.browser_download_url),
|
|
508
|
+
'User-Agent': 'wav2vec2-model-publisher/1.0',
|
|
509
|
+
};
|
|
510
|
+
if (token) {
|
|
511
|
+
headers.Authorization = `Bearer ${token}`;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
const response = await fetch(checksumAsset.browser_download_url, {
|
|
515
|
+
headers,
|
|
516
|
+
});
|
|
517
|
+
if (!response.ok) {
|
|
518
|
+
return { text: '', map: new Map() };
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
const text = await response.text();
|
|
522
|
+
return { text, map: parseChecksumMap(text) };
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
async function buildChecksumMap(
|
|
526
|
+
allSources,
|
|
527
|
+
distDir,
|
|
528
|
+
builtNames,
|
|
529
|
+
releaseAssets,
|
|
530
|
+
priorMap,
|
|
531
|
+
token
|
|
532
|
+
) {
|
|
533
|
+
const map = new Map(priorMap);
|
|
534
|
+
|
|
535
|
+
for (const source of allSources) {
|
|
536
|
+
const name = `${source.modelId}.tar.bz2`;
|
|
537
|
+
const localPath = path.join(distDir, name);
|
|
538
|
+
|
|
539
|
+
if (builtNames.has(name) && fs.existsSync(localPath)) {
|
|
540
|
+
map.set(name, await sha256File(localPath));
|
|
541
|
+
} else if (!map.has(name)) {
|
|
542
|
+
const asset = releaseAssets.find((a) => a.name === name);
|
|
543
|
+
if (!asset) {
|
|
544
|
+
throw new Error(
|
|
545
|
+
`[checksum] ${name} is missing from the release; build or upload it first`
|
|
546
|
+
);
|
|
547
|
+
}
|
|
548
|
+
console.log(`[checksum] hashing remote ${name}`);
|
|
549
|
+
map.set(name, await sha256FromUrl(asset.browser_download_url, token));
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
return map;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
function uploadChecksumFile(repo, tag, checksumPath, token) {
|
|
557
|
+
const env = { ...process.env };
|
|
558
|
+
if (token && !env.GH_TOKEN) {
|
|
559
|
+
env.GH_TOKEN = token;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
runCommand(
|
|
563
|
+
'gh',
|
|
564
|
+
['release', 'upload', tag, checksumPath, '--clobber', '--repo', repo],
|
|
565
|
+
{ env }
|
|
566
|
+
);
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
function uploadArchive(repo, tag, archivePath, token) {
|
|
570
|
+
const env = { ...process.env };
|
|
571
|
+
if (token && !env.GH_TOKEN) {
|
|
572
|
+
env.GH_TOKEN = token;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
runCommand('gh', ['release', 'upload', tag, archivePath, '--repo', repo], {
|
|
576
|
+
env,
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
async function buildArchives(sources, buildDir, distDir) {
|
|
581
|
+
await fsp.mkdir(buildDir, { recursive: true });
|
|
582
|
+
await fsp.mkdir(distDir, { recursive: true });
|
|
583
|
+
|
|
584
|
+
const archives = [];
|
|
585
|
+
|
|
586
|
+
for (const source of sources) {
|
|
587
|
+
const modelDir = path.join(buildDir, source.modelId);
|
|
588
|
+
await ensureCleanDir(modelDir);
|
|
589
|
+
|
|
590
|
+
const modelPath = path.join(modelDir, 'model.onnx');
|
|
591
|
+
console.log(`[download] ${source.modelId}: model.onnx`);
|
|
592
|
+
await downloadFile(source.onnxUrl, modelPath);
|
|
593
|
+
|
|
594
|
+
if (source.licenseUrl) {
|
|
595
|
+
const licensePath = path.join(modelDir, 'LICENSE');
|
|
596
|
+
console.log(`[download] ${source.modelId}: LICENSE`);
|
|
597
|
+
await downloadFile(source.licenseUrl, licensePath);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
const archivePath = path.join(distDir, `${source.modelId}.tar.bz2`);
|
|
601
|
+
createArchive(source.modelId, buildDir, archivePath);
|
|
602
|
+
console.log(`[archive] ${archivePath}`);
|
|
603
|
+
archives.push(archivePath);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
return archives;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
async function main() {
|
|
610
|
+
const args = parseArgs(process.argv.slice(2));
|
|
611
|
+
const allSources = await readSources(args.csv);
|
|
612
|
+
|
|
613
|
+
if (args.dryRun) {
|
|
614
|
+
const archives = await buildArchives(
|
|
615
|
+
allSources,
|
|
616
|
+
args.buildDir,
|
|
617
|
+
args.distDir
|
|
618
|
+
);
|
|
619
|
+
console.log('[dry-run] Upload skipped.');
|
|
620
|
+
for (const archive of archives) {
|
|
621
|
+
console.log(`[dry-run] built ${archive}`);
|
|
622
|
+
}
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
const token = resolveToken();
|
|
627
|
+
if (!token) {
|
|
628
|
+
throw new Error(
|
|
629
|
+
'Missing GITHUB_TOKEN/GH_TOKEN environment variable for release lookup and upload'
|
|
630
|
+
);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
const releaseData = await getReleaseData(args.repo, args.tag, token);
|
|
634
|
+
const { assetNames: existingAssets, assets: releaseAssets } = releaseData;
|
|
635
|
+
console.log(
|
|
636
|
+
`[release] Found ${existingAssets.size} assets on ${args.repo}@${args.tag}`
|
|
637
|
+
);
|
|
638
|
+
|
|
639
|
+
const { text: priorChecksumText, map: priorChecksumMap } =
|
|
640
|
+
await fetchExistingChecksumTextAndMap(releaseAssets, token);
|
|
641
|
+
|
|
642
|
+
const sourcesToBuild = [];
|
|
643
|
+
let skipped = 0;
|
|
644
|
+
|
|
645
|
+
for (const source of allSources) {
|
|
646
|
+
const assetName = `${source.modelId}.tar.bz2`;
|
|
647
|
+
if (existingAssets.has(assetName)) {
|
|
648
|
+
console.log(`[skip] ${assetName} already exists in release`);
|
|
649
|
+
skipped += 1;
|
|
650
|
+
} else {
|
|
651
|
+
sourcesToBuild.push(source);
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
const builtNames = new Set();
|
|
656
|
+
let uploaded = 0;
|
|
657
|
+
|
|
658
|
+
if (sourcesToBuild.length > 0) {
|
|
659
|
+
const archives = await buildArchives(
|
|
660
|
+
sourcesToBuild,
|
|
661
|
+
args.buildDir,
|
|
662
|
+
args.distDir
|
|
663
|
+
);
|
|
664
|
+
for (const archive of archives) {
|
|
665
|
+
const assetName = path.basename(archive);
|
|
666
|
+
console.log(`[upload] ${assetName}`);
|
|
667
|
+
uploadArchive(args.repo, args.tag, archive, token);
|
|
668
|
+
uploaded += 1;
|
|
669
|
+
builtNames.add(assetName);
|
|
670
|
+
}
|
|
671
|
+
} else {
|
|
672
|
+
console.log('[build] No new archives (all assets already on release)');
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
let assetsForChecksum = releaseAssets;
|
|
676
|
+
if (uploaded > 0) {
|
|
677
|
+
const refreshed = await getReleaseData(args.repo, args.tag, token);
|
|
678
|
+
assetsForChecksum = refreshed.assets;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
const archiveNames = allSources
|
|
682
|
+
.map((s) => `${s.modelId}.tar.bz2`)
|
|
683
|
+
.sort((a, b) => a.localeCompare(b));
|
|
684
|
+
|
|
685
|
+
const checksumMap = await buildChecksumMap(
|
|
686
|
+
allSources,
|
|
687
|
+
args.distDir,
|
|
688
|
+
builtNames,
|
|
689
|
+
assetsForChecksum,
|
|
690
|
+
priorChecksumMap,
|
|
691
|
+
token
|
|
692
|
+
);
|
|
693
|
+
|
|
694
|
+
const checksumContent = formatChecksumMap(checksumMap, archiveNames);
|
|
695
|
+
const checksumPath = path.join(args.distDir, CHECKSUM_ASSET_NAME);
|
|
696
|
+
await fsp.mkdir(path.dirname(checksumPath), { recursive: true });
|
|
697
|
+
await fsp.writeFile(checksumPath, checksumContent, 'utf8');
|
|
698
|
+
|
|
699
|
+
const priorNorm = priorChecksumText.replace(/\r\n/g, '\n').trimEnd();
|
|
700
|
+
const nextNorm = checksumContent.replace(/\r\n/g, '\n').trimEnd();
|
|
701
|
+
if (nextNorm !== priorNorm) {
|
|
702
|
+
console.log(`[upload] ${CHECKSUM_ASSET_NAME}`);
|
|
703
|
+
uploadChecksumFile(args.repo, args.tag, checksumPath, token);
|
|
704
|
+
} else {
|
|
705
|
+
console.log(`[checksum] ${CHECKSUM_ASSET_NAME} unchanged`);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
console.log(
|
|
709
|
+
`[done] uploaded=${uploaded} skipped=${skipped} total=${allSources.length}`
|
|
710
|
+
);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
if (require.main === module) {
|
|
714
|
+
main().catch((error) => {
|
|
715
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
716
|
+
console.error(`[error] ${message}`);
|
|
717
|
+
process.exit(1);
|
|
718
|
+
});
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
module.exports = {
|
|
722
|
+
readSources,
|
|
723
|
+
EXPECTED_HEADER,
|
|
724
|
+
};
|