@nxtedition/lib 23.3.27 → 23.3.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app.js +4 -0
- package/package.json +5 -3
- package/transcript.js +215 -0
- package/util/template/nextpressions.js +1 -0
package/app.js
CHANGED
|
@@ -137,6 +137,10 @@ export function makeApp(appConfig, onTerminate) {
|
|
|
137
137
|
} (module:${serviceModule}; instance:${serviceInstanceId}) worker:${serviceWorkerId} Node/${process.version}`) ??
|
|
138
138
|
null)
|
|
139
139
|
|
|
140
|
+
if (isMainThread && serviceName) {
|
|
141
|
+
process.title = serviceName
|
|
142
|
+
}
|
|
143
|
+
|
|
140
144
|
const dailyOffpeakTime = config.dailyOffpeakTime ?? getUTCRangeForLocalTime('00:00-04:00')
|
|
141
145
|
|
|
142
146
|
if (dailyOffpeakTime) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nxtedition/lib",
|
|
3
|
-
"version": "23.3.
|
|
3
|
+
"version": "23.3.29",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "Robert Nagy <robert.nagy@boffins.se>",
|
|
6
6
|
"type": "module",
|
|
@@ -40,6 +40,7 @@
|
|
|
40
40
|
"scheduler.js",
|
|
41
41
|
"stream.js",
|
|
42
42
|
"timeline.js",
|
|
43
|
+
"transcript.js",
|
|
43
44
|
"docker-secrets.js",
|
|
44
45
|
"wordwrap.js"
|
|
45
46
|
],
|
|
@@ -65,9 +66,10 @@
|
|
|
65
66
|
"@elastic/elasticsearch": "^8.17.1",
|
|
66
67
|
"@elastic/transport": "^8.9.3",
|
|
67
68
|
"@nxtedition/nxt-undici": "^6.2.2",
|
|
69
|
+
"@smithy/node-http-handler": "^4.0.4",
|
|
68
70
|
"@swc/wasm-web": "^1.11.8",
|
|
69
|
-
"content-type": "^1.0.5",
|
|
70
71
|
"date-fns": "^4.1.0",
|
|
72
|
+
"diff": "5.2.0",
|
|
71
73
|
"fast-querystring": "^1.1.1",
|
|
72
74
|
"hasha": "^6.0.0",
|
|
73
75
|
"http-errors": "^2.0.0",
|
|
@@ -87,13 +89,13 @@
|
|
|
87
89
|
"request-target": "^1.0.2",
|
|
88
90
|
"smpte-timecode": "^1.3.6",
|
|
89
91
|
"split-string": "^6.0.0",
|
|
90
|
-
"undici": "^7.4.0",
|
|
91
92
|
"url-join": "^5.0.0",
|
|
92
93
|
"xuid": "^4.1.5",
|
|
93
94
|
"yocto-queue": "^1.2.0"
|
|
94
95
|
},
|
|
95
96
|
"devDependencies": {
|
|
96
97
|
"@nxtedition/deepstream.io-client-js": ">=28.1.9",
|
|
98
|
+
"@types/diff": "^5.0.9",
|
|
97
99
|
"@types/lodash": "^4.17.16",
|
|
98
100
|
"@types/node": "^22.13.10",
|
|
99
101
|
"eslint": "^9.22.0",
|
package/transcript.js
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import * as Diff from 'diff'
|
|
2
|
+
import round from 'lodash/round.js'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @type {{
|
|
6
|
+
* start: number,
|
|
7
|
+
* end: number,
|
|
8
|
+
* text: string,
|
|
9
|
+
* p?: number
|
|
10
|
+
* }} Word
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Aligns a new string of words to a reference list of timed words.
|
|
15
|
+
*
|
|
16
|
+
* Takes a list of reference words with associated timing information, and a new
|
|
17
|
+
* string that is similar (e.g., lightly edited). The function attempts to align
|
|
18
|
+
* the new words with the original ones, assigning interpolated timing data to
|
|
19
|
+
* each word in the new string based on the timing of the reference words.
|
|
20
|
+
*
|
|
21
|
+
* @param {string} textToAlign - A new string containing a similar sentence or phrase to align.
|
|
22
|
+
* @param {Word[]} referenceWords - List of words with timing information (e.g., from a transcript).
|
|
23
|
+
* @return {Word[]} An array of words from the new string, each annotated with estimated timing data.
|
|
24
|
+
*/
|
|
25
|
+
export function alignWords(textToAlign, referenceWords) {
|
|
26
|
+
// Use the Diff library to create a diff with minimal changes, based on text content only (ignore timing):
|
|
27
|
+
const patches = Diff.diffArrays(
|
|
28
|
+
referenceWords,
|
|
29
|
+
textToAlign
|
|
30
|
+
.split(' ')
|
|
31
|
+
.map((w) => w.trim())
|
|
32
|
+
.filter((w) => w.length > 0)
|
|
33
|
+
.map((w) => ({ text: w })),
|
|
34
|
+
{
|
|
35
|
+
comparator: (a, b) => a.text === b.text,
|
|
36
|
+
},
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
const patchQueue = [...patches]
|
|
40
|
+
const originalWordsQueue = [...referenceWords]
|
|
41
|
+
|
|
42
|
+
/** @type {Word[]} */
|
|
43
|
+
const targetWords = []
|
|
44
|
+
|
|
45
|
+
while (patchQueue.length > 0) {
|
|
46
|
+
const currentPatch = patchQueue.shift()
|
|
47
|
+
const nextPatch = patchQueue.at(0)
|
|
48
|
+
if (!currentPatch) {
|
|
49
|
+
continue
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (currentPatch.removed && nextPatch?.added) {
|
|
53
|
+
// REPLACE
|
|
54
|
+
patchQueue.shift() // NOTE: Skip next queue item, as we're handling it in this iteration.
|
|
55
|
+
originalWordsQueue.splice(0, currentPatch.count)
|
|
56
|
+
if (!validateTranscriptWords(currentPatch.value)) {
|
|
57
|
+
// NOTE: This check shouldn't be necessary, but acts as a type guard.
|
|
58
|
+
throw new Error('Expected words to be replaced to be valid, timed, transcript words')
|
|
59
|
+
}
|
|
60
|
+
targetWords.push(...assignTimingToReplacement(nextPatch.value, currentPatch.value))
|
|
61
|
+
} else if (currentPatch.added && currentPatch.value) {
|
|
62
|
+
// ADD
|
|
63
|
+
const wordAtChangeIndex = originalWordsQueue.at(0)
|
|
64
|
+
if (!wordAtChangeIndex) {
|
|
65
|
+
// HACK: Edge case: Only added words at the end. In this case we just add them with zero duration.
|
|
66
|
+
// Ideally we should try to split the timing of the previous words somehow.
|
|
67
|
+
const lastWord = targetWords.at(-1)
|
|
68
|
+
if (!lastWord) {
|
|
69
|
+
continue
|
|
70
|
+
}
|
|
71
|
+
for (const word of currentPatch.value) {
|
|
72
|
+
targetWords.push({
|
|
73
|
+
start: lastWord.end,
|
|
74
|
+
end: lastWord.end,
|
|
75
|
+
text: word.text,
|
|
76
|
+
p: 1,
|
|
77
|
+
})
|
|
78
|
+
}
|
|
79
|
+
continue
|
|
80
|
+
}
|
|
81
|
+
targetWords.push(...assignTimingToInsertion(wordAtChangeIndex, currentPatch.value))
|
|
82
|
+
} else if (currentPatch.removed) {
|
|
83
|
+
// REMOVE
|
|
84
|
+
originalWordsQueue.splice(0, currentPatch.count)
|
|
85
|
+
} else if (!currentPatch.removed && !currentPatch.added) {
|
|
86
|
+
// KEEP
|
|
87
|
+
targetWords.push(...originalWordsQueue.splice(0, currentPatch.count))
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
validateTranscriptWords(targetWords)
|
|
92
|
+
|
|
93
|
+
return targetWords
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* @param {Array<{ text: string }>} added
|
|
98
|
+
* @param {Word[]} removedWord
|
|
99
|
+
* @return {Word[]}
|
|
100
|
+
*/
|
|
101
|
+
function assignTimingToReplacement(added, removedWord) {
|
|
102
|
+
if (added.length === removedWord.length) {
|
|
103
|
+
// Same number of words. We assume timing is still valid,
|
|
104
|
+
// even if word lengths may have changed.
|
|
105
|
+
return added.map((addedWord, i) => ({
|
|
106
|
+
text: addedWord.text,
|
|
107
|
+
start: removedWord[i].start,
|
|
108
|
+
end: removedWord[i].end,
|
|
109
|
+
p: 1,
|
|
110
|
+
}))
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Different number of words, we interpolate the timing:
|
|
114
|
+
const firstRemovedWord = removedWord.at(0)
|
|
115
|
+
const lastRemovedWord = removedWord.at(-1)
|
|
116
|
+
|
|
117
|
+
if (!firstRemovedWord || !lastRemovedWord) {
|
|
118
|
+
throw new Error('Expected first and last removed word')
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return interpolate({
|
|
122
|
+
start: firstRemovedWord.start,
|
|
123
|
+
end: lastRemovedWord.end,
|
|
124
|
+
text: added.map((word) => word.text).join(' '),
|
|
125
|
+
}).map((interpolatedWord) => ({
|
|
126
|
+
...interpolatedWord,
|
|
127
|
+
p: 1,
|
|
128
|
+
}))
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* @param {Word} wordAtChangeIndex
|
|
133
|
+
* @param {Array<{ text: string }>} added
|
|
134
|
+
* @return {Word[]}
|
|
135
|
+
*/
|
|
136
|
+
function assignTimingToInsertion(wordAtChangeIndex, added) {
|
|
137
|
+
return added.map((word) => ({
|
|
138
|
+
...word,
|
|
139
|
+
start: wordAtChangeIndex.start,
|
|
140
|
+
// NOTE:
|
|
141
|
+
// Currently, added (non-replaced) words always have zero duration.
|
|
142
|
+
// Could possibly detect if there is a gap available in the original timing,
|
|
143
|
+
// and use that somehow...
|
|
144
|
+
end: wordAtChangeIndex.start,
|
|
145
|
+
p: 1,
|
|
146
|
+
}))
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* @param {unknown} input
|
|
151
|
+
* @return {input is Word[]}
|
|
152
|
+
*/
|
|
153
|
+
function validateTranscriptWords(input) {
|
|
154
|
+
return (
|
|
155
|
+
Array.isArray(input) &&
|
|
156
|
+
input.every(
|
|
157
|
+
(word) =>
|
|
158
|
+
typeof word === 'object' &&
|
|
159
|
+
word !== null &&
|
|
160
|
+
'start' in word &&
|
|
161
|
+
typeof word.start === 'number' &&
|
|
162
|
+
'end' in word &&
|
|
163
|
+
typeof word.end === 'number' &&
|
|
164
|
+
'text' in word &&
|
|
165
|
+
typeof word.text === 'string' &&
|
|
166
|
+
'p' in word &&
|
|
167
|
+
typeof word.p === 'number',
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const zeroTimeCharacters = ['\\s', '\\.', ',', ':', '!', '?', '\\(', '\\)'].join('')
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* @param {object} args
|
|
176
|
+
* @param {string} args.text
|
|
177
|
+
* @param {number} args.start
|
|
178
|
+
* @param {number} args.end
|
|
179
|
+
* @return {Array<{ start: number; end: number; text: string }>}
|
|
180
|
+
*/
|
|
181
|
+
export function interpolate(args) {
|
|
182
|
+
let accumulatedCharacterCount = 0
|
|
183
|
+
const splitWords = args.text.split(' ').map((word) => {
|
|
184
|
+
return {
|
|
185
|
+
word,
|
|
186
|
+
timeUnits: word.replace(new RegExp(`[${zeroTimeCharacters}]`, 'gi'), '').length,
|
|
187
|
+
}
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
let totalTimeUnits = splitWords.reduce((acc, word) => acc + word.timeUnits, 0)
|
|
191
|
+
|
|
192
|
+
if (totalTimeUnits === 0) {
|
|
193
|
+
// Can happen if the "sentence" for some reason only contains punctuation.
|
|
194
|
+
// In that case it doesn't really matter which value we give totalTimeUnits,
|
|
195
|
+
// as long as it's not 0 (since it's the denominator below).
|
|
196
|
+
totalTimeUnits = 1
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const secondsPerCharacter = (args.end - args.start) / totalTimeUnits
|
|
200
|
+
|
|
201
|
+
return splitWords.map((splitWord, splitWordIndex) => {
|
|
202
|
+
const start = round(args.start + secondsPerCharacter * accumulatedCharacterCount, 2)
|
|
203
|
+
accumulatedCharacterCount += splitWord.timeUnits
|
|
204
|
+
const end =
|
|
205
|
+
splitWordIndex === splitWords.length - 1
|
|
206
|
+
? args.end
|
|
207
|
+
: round(args.start + secondsPerCharacter * accumulatedCharacterCount, 2)
|
|
208
|
+
|
|
209
|
+
return {
|
|
210
|
+
start,
|
|
211
|
+
end,
|
|
212
|
+
text: splitWord.word,
|
|
213
|
+
}
|
|
214
|
+
})
|
|
215
|
+
}
|