@nxtedition/lib 23.3.28 → 23.3.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/app.js +4 -0
  2. package/package.json +18 -17
  3. package/time.js +16 -4
  4. package/transcript.js +215 -0
package/app.js CHANGED
@@ -137,6 +137,10 @@ export function makeApp(appConfig, onTerminate) {
137
137
  } (module:${serviceModule}; instance:${serviceInstanceId}) worker:${serviceWorkerId} Node/${process.version}`) ??
138
138
  null)
139
139
 
140
+ if (isMainThread && serviceName) {
141
+ process.title = serviceName
142
+ }
143
+
140
144
  const dailyOffpeakTime = config.dailyOffpeakTime ?? getUTCRangeForLocalTime('00:00-04:00')
141
145
 
142
146
  if (dailyOffpeakTime) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nxtedition/lib",
3
- "version": "23.3.28",
3
+ "version": "23.3.30",
4
4
  "license": "MIT",
5
5
  "author": "Robert Nagy <robert.nagy@boffins.se>",
6
6
  "type": "module",
@@ -40,6 +40,7 @@
40
40
  "scheduler.js",
41
41
  "stream.js",
42
42
  "timeline.js",
43
+ "transcript.js",
43
44
  "docker-secrets.js",
44
45
  "wordwrap.js"
45
46
  ],
@@ -61,23 +62,24 @@
61
62
  "singleQuote": true
62
63
  },
63
64
  "dependencies": {
64
- "@aws-sdk/client-s3": "^3.758.0",
65
+ "@aws-sdk/client-s3": "^3.796.0",
65
66
  "@elastic/elasticsearch": "^8.17.1",
66
67
  "@elastic/transport": "^8.9.3",
67
- "@nxtedition/nxt-undici": "^6.2.2",
68
- "@swc/wasm-web": "^1.11.8",
69
- "content-type": "^1.0.5",
68
+ "@nxtedition/nxt-undici": "^6.3.4",
69
+ "@smithy/node-http-handler": "^4.0.4",
70
+ "@swc/wasm-web": "^1.11.22",
70
71
  "date-fns": "^4.1.0",
72
+ "diff": "5.2.0",
71
73
  "fast-querystring": "^1.1.1",
72
74
  "hasha": "^6.0.0",
73
75
  "http-errors": "^2.0.0",
74
76
  "json5": "^2.2.3",
75
77
  "koa-compose": "^4.1.0",
76
78
  "lodash": "^4.17.21",
77
- "lru-cache": "^11.0.2",
78
- "mime": "^4.0.6",
79
+ "lru-cache": "^11.1.0",
80
+ "mime": "^4.0.7",
79
81
  "mitata": "^1.0.34",
80
- "moment-timezone": "^0.5.46",
82
+ "moment-timezone": "^0.5.48",
81
83
  "nconf": "^0.12.1",
82
84
  "nested-error-stacks": "^2.1.1",
83
85
  "object-hash": "^3.0.0",
@@ -87,30 +89,29 @@
87
89
  "request-target": "^1.0.2",
88
90
  "smpte-timecode": "^1.3.6",
89
91
  "split-string": "^6.0.0",
90
- "undici": "^7.4.0",
91
92
  "url-join": "^5.0.0",
92
93
  "xuid": "^4.1.5",
93
- "yocto-queue": "^1.2.0"
94
+ "yocto-queue": "^1.2.1"
94
95
  },
95
96
  "devDependencies": {
96
- "@nxtedition/deepstream.io-client-js": ">=28.1.9",
97
+ "@nxtedition/deepstream.io-client-js": ">=28.1.15",
97
98
  "@types/lodash": "^4.17.16",
98
- "@types/node": "^22.13.10",
99
- "eslint": "^9.22.0",
100
- "eslint-config-prettier": "^10.1.1",
99
+ "@types/node": "^22.15.1",
100
+ "eslint": "^9.25.1",
101
+ "eslint-config-prettier": "^10.1.2",
101
102
  "eslint-config-standard": "^17.0.0",
102
103
  "eslint-plugin-import": "^2.31.0",
103
- "eslint-plugin-n": "^17.16.2",
104
+ "eslint-plugin-n": "^17.17.0",
104
105
  "eslint-plugin-node": "^11.1.0",
105
106
  "eslint-plugin-promise": "^7.2.1",
106
107
  "husky": "^9.1.7",
107
- "lint-staged": "^15.3.0",
108
+ "lint-staged": "^15.5.1",
108
109
  "pinst": "^3.0.0",
109
110
  "prettier": "^3.5.3",
110
111
  "rxjs": "^7.8.2",
111
112
  "send": "^1.1.0",
112
113
  "tap": "^21.1.0",
113
- "typescript-eslint": "^8.26.0"
114
+ "typescript-eslint": "^8.31.0"
114
115
  },
115
116
  "peerDependencies": {
116
117
  "@elastic/elasticsearch": "^8.6.0",
package/time.js CHANGED
@@ -1,6 +1,16 @@
1
- export function isTimeBetween(date, startTime, endTime) {
2
- const currentHours = date.getHours()
3
- const currentMinutes = date.getMinutes()
1
+ export function isTimeBetween(date, startTime, endTime, isUTC) {
2
+ let currentHours = date.getHours()
3
+ let currentMinutes = date.getMinutes()
4
+
5
+ if (isUTC) {
6
+ currentHours = date.getUTCHours()
7
+ currentMinutes = date.getUTCMinutes()
8
+ } else {
9
+ // Convert local time to UTC equivalent
10
+ const utcDate = new Date(date.getTime() + date.getTimezoneOffset() * 60000)
11
+ currentHours = utcDate.getUTCHours()
12
+ currentMinutes = utcDate.getUTCMinutes()
13
+ }
4
14
 
5
15
  // Validate and parse start and end times
6
16
  if (!startTime) startTime = '00:00' // Default start at midnight
@@ -49,7 +59,9 @@ export function getUTCRangeForLocalTime(range) {
49
59
  // Validate input format (hh:mm-hh:mm)
50
60
  const timeFormat = /^\d{2}:\d{2}-\d{2}:\d{2}$/
51
61
  if (!timeFormat.test(range)) {
52
- throw new Error("Invalid format. Use 'hh:mm-hh:mm' (e.g., '01:00-05:00').")
62
+ throw Object.assign(new Error("Invalid format. Use 'hh:mm-hh:mm' (e.g., '01:00-05:00')."), {
63
+ data: range,
64
+ })
53
65
  }
54
66
 
55
67
  const [startTime, endTime] = range.split('-')
package/transcript.js ADDED
@@ -0,0 +1,215 @@
1
+ import * as Diff from 'diff'
2
+ import round from 'lodash/round.js'
3
+
4
+ /**
5
+ * @type {{
6
+ * start: number,
7
+ * end: number,
8
+ * text: string,
9
+ * p?: number
10
+ * }} Word
11
+ */
12
+
13
+ /**
14
+ * Aligns a new string of words to a reference list of timed words.
15
+ *
16
+ * Takes a list of reference words with associated timing information, and a new
17
+ * string that is similar (e.g., lightly edited). The function attempts to align
18
+ * the new words with the original ones, assigning interpolated timing data to
19
+ * each word in the new string based on the timing of the reference words.
20
+ *
21
+ * @param {string} textToAlign - A new string containing a similar sentence or phrase to align.
22
+ * @param {Word[]} referenceWords - List of words with timing information (e.g., from a transcript).
23
+ * @return {Word[]} An array of words from the new string, each annotated with estimated timing data.
24
+ */
25
+ export function alignWords(textToAlign, referenceWords) {
26
+ // Use the Diff library to create a diff with minimal changes, based on text content only (ignore timing):
27
+ const patches = Diff.diffArrays(
28
+ referenceWords,
29
+ textToAlign
30
+ .split(' ')
31
+ .map((w) => w.trim())
32
+ .filter((w) => w.length > 0)
33
+ .map((w) => ({ text: w })),
34
+ {
35
+ comparator: (a, b) => a.text === b.text,
36
+ },
37
+ )
38
+
39
+ const patchQueue = [...patches]
40
+ const originalWordsQueue = [...referenceWords]
41
+
42
+ /** @type {Word[]} */
43
+ const targetWords = []
44
+
45
+ while (patchQueue.length > 0) {
46
+ const currentPatch = patchQueue.shift()
47
+ const nextPatch = patchQueue.at(0)
48
+ if (!currentPatch) {
49
+ continue
50
+ }
51
+
52
+ if (currentPatch.removed && nextPatch?.added) {
53
+ // REPLACE
54
+ patchQueue.shift() // NOTE: Skip next queue item, as we're handling it in this iteration.
55
+ originalWordsQueue.splice(0, currentPatch.count)
56
+ if (!validateTranscriptWords(currentPatch.value)) {
57
+ // NOTE: This check shouldn't be necessary, but acts as a type guard.
58
+ throw new Error('Expected words to be replaced to be valid, timed, transcript words')
59
+ }
60
+ targetWords.push(...assignTimingToReplacement(nextPatch.value, currentPatch.value))
61
+ } else if (currentPatch.added && currentPatch.value) {
62
+ // ADD
63
+ const wordAtChangeIndex = originalWordsQueue.at(0)
64
+ if (!wordAtChangeIndex) {
65
+ // HACK: Edge case: Only added words at the end. In this case we just add them with zero duration.
66
+ // Ideally we should try to split the timing of the previous words somehow.
67
+ const lastWord = targetWords.at(-1)
68
+ if (!lastWord) {
69
+ continue
70
+ }
71
+ for (const word of currentPatch.value) {
72
+ targetWords.push({
73
+ start: lastWord.end,
74
+ end: lastWord.end,
75
+ text: word.text,
76
+ p: 1,
77
+ })
78
+ }
79
+ continue
80
+ }
81
+ targetWords.push(...assignTimingToInsertion(wordAtChangeIndex, currentPatch.value))
82
+ } else if (currentPatch.removed) {
83
+ // REMOVE
84
+ originalWordsQueue.splice(0, currentPatch.count)
85
+ } else if (!currentPatch.removed && !currentPatch.added) {
86
+ // KEEP
87
+ targetWords.push(...originalWordsQueue.splice(0, currentPatch.count))
88
+ }
89
+ }
90
+
91
+ validateTranscriptWords(targetWords)
92
+
93
+ return targetWords
94
+ }
95
+
96
+ /**
97
+ * @param {Array<{ text: string }>} added
98
+ * @param {Word[]} removedWord
99
+ * @return {Word[]}
100
+ */
101
+ function assignTimingToReplacement(added, removedWord) {
102
+ if (added.length === removedWord.length) {
103
+ // Same number of words. We assume timing is still valid,
104
+ // even if word lengths may have changed.
105
+ return added.map((addedWord, i) => ({
106
+ text: addedWord.text,
107
+ start: removedWord[i].start,
108
+ end: removedWord[i].end,
109
+ p: 1,
110
+ }))
111
+ }
112
+
113
+ // Different number of words, we interpolate the timing:
114
+ const firstRemovedWord = removedWord.at(0)
115
+ const lastRemovedWord = removedWord.at(-1)
116
+
117
+ if (!firstRemovedWord || !lastRemovedWord) {
118
+ throw new Error('Expected first and last removed word')
119
+ }
120
+
121
+ return interpolate({
122
+ start: firstRemovedWord.start,
123
+ end: lastRemovedWord.end,
124
+ text: added.map((word) => word.text).join(' '),
125
+ }).map((interpolatedWord) => ({
126
+ ...interpolatedWord,
127
+ p: 1,
128
+ }))
129
+ }
130
+
131
+ /**
132
+ * @param {Word} wordAtChangeIndex
133
+ * @param {Array<{ text: string }>} added
134
+ * @return {Word[]}
135
+ */
136
+ function assignTimingToInsertion(wordAtChangeIndex, added) {
137
+ return added.map((word) => ({
138
+ ...word,
139
+ start: wordAtChangeIndex.start,
140
+ // NOTE:
141
+ // Currently, added (non-replaced) words always have zero duration.
142
+ // Could possibly detect if there is a gap available in the original timing,
143
+ // and use that somehow...
144
+ end: wordAtChangeIndex.start,
145
+ p: 1,
146
+ }))
147
+ }
148
+
149
+ /**
150
+ * @param {unknown} input
151
+ * @return {input is Word[]}
152
+ */
153
+ function validateTranscriptWords(input) {
154
+ return (
155
+ Array.isArray(input) &&
156
+ input.every(
157
+ (word) =>
158
+ typeof word === 'object' &&
159
+ word !== null &&
160
+ 'start' in word &&
161
+ typeof word.start === 'number' &&
162
+ 'end' in word &&
163
+ typeof word.end === 'number' &&
164
+ 'text' in word &&
165
+ typeof word.text === 'string' &&
166
+ 'p' in word &&
167
+ typeof word.p === 'number',
168
+ )
169
+ )
170
+ }
171
+
172
+ const zeroTimeCharacters = ['\\s', '\\.', ',', ':', '!', '?', '\\(', '\\)'].join('')
173
+
174
+ /**
175
+ * @param {object} args
176
+ * @param {string} args.text
177
+ * @param {number} args.start
178
+ * @param {number} args.end
179
+ * @return {Array<{ start: number; end: number; text: string }>}
180
+ */
181
+ export function interpolate(args) {
182
+ let accumulatedCharacterCount = 0
183
+ const splitWords = args.text.split(' ').map((word) => {
184
+ return {
185
+ word,
186
+ timeUnits: word.replace(new RegExp(`[${zeroTimeCharacters}]`, 'gi'), '').length,
187
+ }
188
+ })
189
+
190
+ let totalTimeUnits = splitWords.reduce((acc, word) => acc + word.timeUnits, 0)
191
+
192
+ if (totalTimeUnits === 0) {
193
+ // Can happen if the "sentence" for some reason only contains punctuation.
194
+ // In that case it doesn't really matter which value we give totalTimeUnits,
195
+ // as long as it's not 0 (since it's the denominator below).
196
+ totalTimeUnits = 1
197
+ }
198
+
199
+ const secondsPerCharacter = (args.end - args.start) / totalTimeUnits
200
+
201
+ return splitWords.map((splitWord, splitWordIndex) => {
202
+ const start = round(args.start + secondsPerCharacter * accumulatedCharacterCount, 2)
203
+ accumulatedCharacterCount += splitWord.timeUnits
204
+ const end =
205
+ splitWordIndex === splitWords.length - 1
206
+ ? args.end
207
+ : round(args.start + secondsPerCharacter * accumulatedCharacterCount, 2)
208
+
209
+ return {
210
+ start,
211
+ end,
212
+ text: splitWord.word,
213
+ }
214
+ })
215
+ }