@okf/ootils 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/node.d.mts CHANGED
@@ -36,6 +36,48 @@ interface RichTextValue {
36
36
  [key: string]: any;
37
37
  }
38
38
  type TagNameInput = string | number | RichTextValue | null | undefined;
39
+ /**
40
+ * Generate a normalized tag ID from a tag name
41
+ *
42
+ * This function performs aggressive normalization to ensure that similar-looking
43
+ * tag names produce the same ID, preventing duplicates and collision errors.
44
+ *
45
+ * @param tagName - The tag name (string, number, or rich text object)
46
+ * @returns A normalized tag ID string
47
+ *
48
+ * @example
49
+ * ```typescript
50
+ * genTagId('Post-COVID') // → 'post_covid'
51
+ * genTagId('Post—COVID') // → 'post_covid' (em-dash)
52
+ * genTagId('Post–COVID') // → 'post_covid' (en-dash)
53
+ * genTagId('café') // → 'cafe'
54
+ * genTagId('CAFÉ') // → 'cafe'
55
+ * genTagId('Café') // → 'cafe'
56
+ * genTagId('C++') // → 'cplusplus'
57
+ * genTagId('New York') // → 'new_york' (double space)
58
+ * genTagId('José Müller') // → 'jose_muller'
59
+ * ```
60
+ *
61
+ * Normalization rules (applied in order):
62
+ * 1. Convert rich text to plain text
63
+ * 2. Normalize Unicode to NFD (Canonical Decomposition)
64
+ * 3. Remove all diacritical marks (accents, umlauts, etc.)
65
+ * 4. Trim whitespace from both ends
66
+ * 5. Convert to lowercase
67
+ * 6. Replace all non-alphanumeric characters with underscore
68
+ * 7. Replace '+' with 'plus'
69
+ * 8. Remove leading/trailing underscores
70
+ *
71
+ * Character variants that normalize to the same ID:
72
+ * - Dashes: hyphen (-), en-dash (–), em-dash (—), minus (−)
73
+ * - Whitespace: space, multiple spaces, tab, newline, non-breaking space
74
+ * - Quotes: straight ("), curly left ("), curly right ("), backtick (`)
75
+ * - Apostrophes: straight ('), curly (')
76
+ * - Accents: café/cafe, José/Jose, Müller/Muller, naïve/naive
77
+ * - Case: TestTag/testtag/TESTTAG
78
+ *
79
+ * @see TAG_NORMALIZATION_GUIDE.md in okf-be for comprehensive documentation
80
+ */
39
81
  declare const genTagId: (tagName: TagNameInput) => string;
40
82
 
41
83
  /**
@@ -284,13 +326,6 @@ declare namespace BASE_BULLMQ_CONFIG {
284
326
  export namespace workerConfig_1 {
285
327
  let concurrency_1: number;
286
328
  export { concurrency_1 as concurrency };
287
- export namespace limiter_1 {
288
- let max_1: number;
289
- export { max_1 as max };
290
- let duration_1: number;
291
- export { duration_1 as duration };
292
- }
293
- export { limiter_1 as limiter };
294
329
  }
295
330
  export { workerConfig_1 as workerConfig };
296
331
  }
@@ -327,13 +362,6 @@ declare namespace BASE_BULLMQ_CONFIG {
327
362
  export namespace workerConfig_2 {
328
363
  let concurrency_2: number;
329
364
  export { concurrency_2 as concurrency };
330
- export namespace limiter_2 {
331
- let max_2: number;
332
- export { max_2 as max };
333
- let duration_2: number;
334
- export { duration_2 as duration };
335
- }
336
- export { limiter_2 as limiter };
337
365
  }
338
366
  export { workerConfig_2 as workerConfig };
339
367
  }
@@ -370,13 +398,6 @@ declare namespace BASE_BULLMQ_CONFIG {
370
398
  export namespace workerConfig_3 {
371
399
  let concurrency_3: number;
372
400
  export { concurrency_3 as concurrency };
373
- export namespace limiter_3 {
374
- let max_3: number;
375
- export { max_3 as max };
376
- let duration_3: number;
377
- export { duration_3 as duration };
378
- }
379
- export { limiter_3 as limiter };
380
401
  }
381
402
  export { workerConfig_3 as workerConfig };
382
403
  }
@@ -413,13 +434,6 @@ declare namespace BASE_BULLMQ_CONFIG {
413
434
  export namespace workerConfig_4 {
414
435
  let concurrency_4: number;
415
436
  export { concurrency_4 as concurrency };
416
- export namespace limiter_4 {
417
- let max_4: number;
418
- export { max_4 as max };
419
- let duration_4: number;
420
- export { duration_4 as duration };
421
- }
422
- export { limiter_4 as limiter };
423
437
  }
424
438
  export { workerConfig_4 as workerConfig };
425
439
  }
@@ -456,13 +470,6 @@ declare namespace BASE_BULLMQ_CONFIG {
456
470
  export namespace workerConfig_5 {
457
471
  let concurrency_5: number;
458
472
  export { concurrency_5 as concurrency };
459
- export namespace limiter_5 {
460
- let max_5: number;
461
- export { max_5 as max };
462
- let duration_5: number;
463
- export { duration_5 as duration };
464
- }
465
- export { limiter_5 as limiter };
466
473
  }
467
474
  export { workerConfig_5 as workerConfig };
468
475
  }
@@ -499,13 +506,6 @@ declare namespace BASE_BULLMQ_CONFIG {
499
506
  export namespace workerConfig_6 {
500
507
  let concurrency_6: number;
501
508
  export { concurrency_6 as concurrency };
502
- export namespace limiter_6 {
503
- let max_6: number;
504
- export { max_6 as max };
505
- let duration_6: number;
506
- export { duration_6 as duration };
507
- }
508
- export { limiter_6 as limiter };
509
509
  }
510
510
  export { workerConfig_6 as workerConfig };
511
511
  }
@@ -542,13 +542,6 @@ declare namespace BASE_BULLMQ_CONFIG {
542
542
  export namespace workerConfig_7 {
543
543
  let concurrency_7: number;
544
544
  export { concurrency_7 as concurrency };
545
- export namespace limiter_7 {
546
- let max_7: number;
547
- export { max_7 as max };
548
- let duration_7: number;
549
- export { duration_7 as duration };
550
- }
551
- export { limiter_7 as limiter };
552
545
  }
553
546
  export { workerConfig_7 as workerConfig };
554
547
  }
@@ -585,13 +578,6 @@ declare namespace BASE_BULLMQ_CONFIG {
585
578
  export namespace workerConfig_8 {
586
579
  let concurrency_8: number;
587
580
  export { concurrency_8 as concurrency };
588
- export namespace limiter_8 {
589
- let max_8: number;
590
- export { max_8 as max };
591
- let duration_8: number;
592
- export { duration_8 as duration };
593
- }
594
- export { limiter_8 as limiter };
595
581
  }
596
582
  export { workerConfig_8 as workerConfig };
597
583
  }
package/dist/node.d.ts CHANGED
@@ -36,6 +36,48 @@ interface RichTextValue {
36
36
  [key: string]: any;
37
37
  }
38
38
  type TagNameInput = string | number | RichTextValue | null | undefined;
39
+ /**
40
+ * Generate a normalized tag ID from a tag name
41
+ *
42
+ * This function performs aggressive normalization to ensure that similar-looking
43
+ * tag names produce the same ID, preventing duplicates and collision errors.
44
+ *
45
+ * @param tagName - The tag name (string, number, or rich text object)
46
+ * @returns A normalized tag ID string
47
+ *
48
+ * @example
49
+ * ```typescript
50
+ * genTagId('Post-COVID') // → 'post_covid'
51
+ * genTagId('Post—COVID') // → 'post_covid' (em-dash)
52
+ * genTagId('Post–COVID') // → 'post_covid' (en-dash)
53
+ * genTagId('café') // → 'cafe'
54
+ * genTagId('CAFÉ') // → 'cafe'
55
+ * genTagId('Café') // → 'cafe'
56
+ * genTagId('C++') // → 'cplusplus'
57
+ * genTagId('New York') // → 'new_york' (double space)
58
+ * genTagId('José Müller') // → 'jose_muller'
59
+ * ```
60
+ *
61
+ * Normalization rules (applied in order):
62
+ * 1. Convert rich text to plain text
63
+ * 2. Normalize Unicode to NFD (Canonical Decomposition)
64
+ * 3. Remove all diacritical marks (accents, umlauts, etc.)
65
+ * 4. Trim whitespace from both ends
66
+ * 5. Convert to lowercase
67
+ * 6. Replace all non-alphanumeric characters with underscore
68
+ * 7. Replace '+' with 'plus'
69
+ * 8. Remove leading/trailing underscores
70
+ *
71
+ * Character variants that normalize to the same ID:
72
+ * - Dashes: hyphen (-), en-dash (–), em-dash (—), minus (−)
73
+ * - Whitespace: space, multiple spaces, tab, newline, non-breaking space
74
+ * - Quotes: straight ("), curly left ("), curly right ("), backtick (`)
75
+ * - Apostrophes: straight ('), curly (')
76
+ * - Accents: café/cafe, José/Jose, Müller/Muller, naïve/naive
77
+ * - Case: TestTag/testtag/TESTTAG
78
+ *
79
+ * @see TAG_NORMALIZATION_GUIDE.md in okf-be for comprehensive documentation
80
+ */
39
81
  declare const genTagId: (tagName: TagNameInput) => string;
40
82
 
41
83
  /**
@@ -284,13 +326,6 @@ declare namespace BASE_BULLMQ_CONFIG {
284
326
  export namespace workerConfig_1 {
285
327
  let concurrency_1: number;
286
328
  export { concurrency_1 as concurrency };
287
- export namespace limiter_1 {
288
- let max_1: number;
289
- export { max_1 as max };
290
- let duration_1: number;
291
- export { duration_1 as duration };
292
- }
293
- export { limiter_1 as limiter };
294
329
  }
295
330
  export { workerConfig_1 as workerConfig };
296
331
  }
@@ -327,13 +362,6 @@ declare namespace BASE_BULLMQ_CONFIG {
327
362
  export namespace workerConfig_2 {
328
363
  let concurrency_2: number;
329
364
  export { concurrency_2 as concurrency };
330
- export namespace limiter_2 {
331
- let max_2: number;
332
- export { max_2 as max };
333
- let duration_2: number;
334
- export { duration_2 as duration };
335
- }
336
- export { limiter_2 as limiter };
337
365
  }
338
366
  export { workerConfig_2 as workerConfig };
339
367
  }
@@ -370,13 +398,6 @@ declare namespace BASE_BULLMQ_CONFIG {
370
398
  export namespace workerConfig_3 {
371
399
  let concurrency_3: number;
372
400
  export { concurrency_3 as concurrency };
373
- export namespace limiter_3 {
374
- let max_3: number;
375
- export { max_3 as max };
376
- let duration_3: number;
377
- export { duration_3 as duration };
378
- }
379
- export { limiter_3 as limiter };
380
401
  }
381
402
  export { workerConfig_3 as workerConfig };
382
403
  }
@@ -413,13 +434,6 @@ declare namespace BASE_BULLMQ_CONFIG {
413
434
  export namespace workerConfig_4 {
414
435
  let concurrency_4: number;
415
436
  export { concurrency_4 as concurrency };
416
- export namespace limiter_4 {
417
- let max_4: number;
418
- export { max_4 as max };
419
- let duration_4: number;
420
- export { duration_4 as duration };
421
- }
422
- export { limiter_4 as limiter };
423
437
  }
424
438
  export { workerConfig_4 as workerConfig };
425
439
  }
@@ -456,13 +470,6 @@ declare namespace BASE_BULLMQ_CONFIG {
456
470
  export namespace workerConfig_5 {
457
471
  let concurrency_5: number;
458
472
  export { concurrency_5 as concurrency };
459
- export namespace limiter_5 {
460
- let max_5: number;
461
- export { max_5 as max };
462
- let duration_5: number;
463
- export { duration_5 as duration };
464
- }
465
- export { limiter_5 as limiter };
466
473
  }
467
474
  export { workerConfig_5 as workerConfig };
468
475
  }
@@ -499,13 +506,6 @@ declare namespace BASE_BULLMQ_CONFIG {
499
506
  export namespace workerConfig_6 {
500
507
  let concurrency_6: number;
501
508
  export { concurrency_6 as concurrency };
502
- export namespace limiter_6 {
503
- let max_6: number;
504
- export { max_6 as max };
505
- let duration_6: number;
506
- export { duration_6 as duration };
507
- }
508
- export { limiter_6 as limiter };
509
509
  }
510
510
  export { workerConfig_6 as workerConfig };
511
511
  }
@@ -542,13 +542,6 @@ declare namespace BASE_BULLMQ_CONFIG {
542
542
  export namespace workerConfig_7 {
543
543
  let concurrency_7: number;
544
544
  export { concurrency_7 as concurrency };
545
- export namespace limiter_7 {
546
- let max_7: number;
547
- export { max_7 as max };
548
- let duration_7: number;
549
- export { duration_7 as duration };
550
- }
551
- export { limiter_7 as limiter };
552
545
  }
553
546
  export { workerConfig_7 as workerConfig };
554
547
  }
@@ -585,13 +578,6 @@ declare namespace BASE_BULLMQ_CONFIG {
585
578
  export namespace workerConfig_8 {
586
579
  let concurrency_8: number;
587
580
  export { concurrency_8 as concurrency };
588
- export namespace limiter_8 {
589
- let max_8: number;
590
- export { max_8 as max };
591
- let duration_8: number;
592
- export { duration_8 as duration };
593
- }
594
- export { limiter_8 as limiter };
595
581
  }
596
582
  export { workerConfig_8 as workerConfig };
597
583
  }
package/dist/node.js CHANGED
@@ -96,14 +96,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
96
96
  }
97
97
  },
98
98
  workerConfig: {
99
- concurrency: 10,
100
- // Process 10 jobs at once for chunk processing
101
- limiter: {
102
- max: 200,
103
- // Max 5 jobs per...
104
- duration: 6e4
105
- // ...60 seconds (higher throughput for chunking)
106
- }
99
+ concurrency: 50
107
100
  }
108
101
  },
109
102
  CREATE_ANNOS_QUEUE: {
@@ -126,14 +119,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
126
119
  }
127
120
  },
128
121
  workerConfig: {
129
- concurrency: 10,
130
- // Process 10 jobs at once for chunk processing
131
- limiter: {
132
- max: 100,
133
- // Max 50 jobs per...
134
- duration: 6e4
135
- // ...60 seconds (higher throughput for chunking)
136
- }
122
+ concurrency: 50
137
123
  }
138
124
  },
139
125
  CONTENT_ENHANCE_AND_EMBED_QUEUE: {
@@ -156,13 +142,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
156
142
  }
157
143
  },
158
144
  workerConfig: {
159
- concurrency: 1,
160
- limiter: {
161
- max: 200,
162
- // Max 50 jobs per...
163
- duration: 6e4
164
- // ...60 seconds (higher throughput for chunking)
165
- }
145
+ concurrency: 1
166
146
  }
167
147
  },
168
148
  DIRECT_DATA_IMPORT_QUEUE: {
@@ -185,14 +165,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
185
165
  }
186
166
  },
187
167
  workerConfig: {
188
- concurrency: 1,
189
- // Cannot mess with this else duplicate options in tpl, maybe even duplicate tags
190
- limiter: {
191
- max: 20,
192
- // Max 5 jobs per...
193
- duration: 6e4
194
- // ...60 seconds (higher throughput for chunking)
195
- }
168
+ concurrency: 1
196
169
  }
197
170
  },
198
171
  AI_CATEGORIZE_QUEUE: {
@@ -215,13 +188,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
215
188
  }
216
189
  },
217
190
  workerConfig: {
218
- concurrency: 1,
219
- limiter: {
220
- max: 100,
221
- // (lets always keep this same as content enhance & embed since it comes immediately after)
222
- duration: 6e4
223
- // ...60 seconds (higher throughput for chunking)
224
- }
191
+ concurrency: 1
225
192
  }
226
193
  },
227
194
  ANNOS_ELASTIC_SYNC_QUEUE: {
@@ -244,13 +211,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
244
211
  }
245
212
  },
246
213
  workerConfig: {
247
- concurrency: 5,
248
- limiter: {
249
- max: 100,
250
- // (lets always keep this same as content enhance & embed since it comes immediately after)
251
- duration: 6e4
252
- // ...60 seconds (higher throughput for chunking)
253
- }
214
+ concurrency: 20
254
215
  }
255
216
  },
256
217
  CHUNKS_ELASTIC_SYNC_QUEUE: {
@@ -273,13 +234,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
273
234
  }
274
235
  },
275
236
  workerConfig: {
276
- concurrency: 5,
277
- limiter: {
278
- max: 200,
279
- // (lets always keep this same as content enhance & embed since it comes immediately after)
280
- duration: 6e4
281
- // ...60 seconds (higher throughput for chunking)
282
- }
237
+ concurrency: 20
283
238
  }
284
239
  },
285
240
  CONTENT_ELASTIC_SYNC_QUEUE: {
@@ -302,13 +257,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
302
257
  }
303
258
  },
304
259
  workerConfig: {
305
- concurrency: 5,
306
- limiter: {
307
- max: 200,
308
- // (lets always keep this same as content enhance & embed since it comes immediately after)
309
- duration: 6e4
310
- // ...60 seconds (higher throughput for chunking)
311
- }
260
+ concurrency: 20
312
261
  }
313
262
  },
314
263
  REINDEX_QUEUE: {
@@ -1613,6 +1562,8 @@ var convertFromRichText = (value) => {
1613
1562
  };
1614
1563
  var genTagId = (tagName) => {
1615
1564
  let toReturn = convertFromRichText(tagName);
1565
+ toReturn = toReturn.normalize("NFD");
1566
+ toReturn = toReturn.replace(/\p{Mn}/gu, "");
1616
1567
  const regex = /[^\p{L}\p{N}\+]+/gui;
1617
1568
  toReturn = toReturn.trim().toLowerCase().replace(regex, "_");
1618
1569
  toReturn = toReturn.replace(/\+/g, "plus");
package/dist/node.mjs CHANGED
@@ -101,14 +101,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
101
101
  }
102
102
  },
103
103
  workerConfig: {
104
- concurrency: 10,
105
- // Process 10 jobs at once for chunk processing
106
- limiter: {
107
- max: 200,
108
- // Max 5 jobs per...
109
- duration: 6e4
110
- // ...60 seconds (higher throughput for chunking)
111
- }
104
+ concurrency: 50
112
105
  }
113
106
  },
114
107
  CREATE_ANNOS_QUEUE: {
@@ -131,14 +124,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
131
124
  }
132
125
  },
133
126
  workerConfig: {
134
- concurrency: 10,
135
- // Process 10 jobs at once for chunk processing
136
- limiter: {
137
- max: 100,
138
- // Max 50 jobs per...
139
- duration: 6e4
140
- // ...60 seconds (higher throughput for chunking)
141
- }
127
+ concurrency: 50
142
128
  }
143
129
  },
144
130
  CONTENT_ENHANCE_AND_EMBED_QUEUE: {
@@ -161,13 +147,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
161
147
  }
162
148
  },
163
149
  workerConfig: {
164
- concurrency: 1,
165
- limiter: {
166
- max: 200,
167
- // Max 50 jobs per...
168
- duration: 6e4
169
- // ...60 seconds (higher throughput for chunking)
170
- }
150
+ concurrency: 1
171
151
  }
172
152
  },
173
153
  DIRECT_DATA_IMPORT_QUEUE: {
@@ -190,14 +170,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
190
170
  }
191
171
  },
192
172
  workerConfig: {
193
- concurrency: 1,
194
- // Cannot mess with this else duplicate options in tpl, maybe even duplicate tags
195
- limiter: {
196
- max: 20,
197
- // Max 5 jobs per...
198
- duration: 6e4
199
- // ...60 seconds (higher throughput for chunking)
200
- }
173
+ concurrency: 1
201
174
  }
202
175
  },
203
176
  AI_CATEGORIZE_QUEUE: {
@@ -220,13 +193,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
220
193
  }
221
194
  },
222
195
  workerConfig: {
223
- concurrency: 1,
224
- limiter: {
225
- max: 100,
226
- // (lets always keep this same as content enhance & embed since it comes immediately after)
227
- duration: 6e4
228
- // ...60 seconds (higher throughput for chunking)
229
- }
196
+ concurrency: 1
230
197
  }
231
198
  },
232
199
  ANNOS_ELASTIC_SYNC_QUEUE: {
@@ -249,13 +216,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
249
216
  }
250
217
  },
251
218
  workerConfig: {
252
- concurrency: 5,
253
- limiter: {
254
- max: 100,
255
- // (lets always keep this same as content enhance & embed since it comes immediately after)
256
- duration: 6e4
257
- // ...60 seconds (higher throughput for chunking)
258
- }
219
+ concurrency: 20
259
220
  }
260
221
  },
261
222
  CHUNKS_ELASTIC_SYNC_QUEUE: {
@@ -278,13 +239,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
278
239
  }
279
240
  },
280
241
  workerConfig: {
281
- concurrency: 5,
282
- limiter: {
283
- max: 200,
284
- // (lets always keep this same as content enhance & embed since it comes immediately after)
285
- duration: 6e4
286
- // ...60 seconds (higher throughput for chunking)
287
- }
242
+ concurrency: 20
288
243
  }
289
244
  },
290
245
  CONTENT_ELASTIC_SYNC_QUEUE: {
@@ -307,13 +262,7 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
307
262
  }
308
263
  },
309
264
  workerConfig: {
310
- concurrency: 5,
311
- limiter: {
312
- max: 200,
313
- // (lets always keep this same as content enhance & embed since it comes immediately after)
314
- duration: 6e4
315
- // ...60 seconds (higher throughput for chunking)
316
- }
265
+ concurrency: 20
317
266
  }
318
267
  },
319
268
  REINDEX_QUEUE: {
@@ -1583,6 +1532,8 @@ var convertFromRichText = (value) => {
1583
1532
  };
1584
1533
  var genTagId = (tagName) => {
1585
1534
  let toReturn = convertFromRichText(tagName);
1535
+ toReturn = toReturn.normalize("NFD");
1536
+ toReturn = toReturn.replace(/\p{Mn}/gu, "");
1586
1537
  const regex = /[^\p{L}\p{N}\+]+/gui;
1587
1538
  toReturn = toReturn.trim().toLowerCase().replace(regex, "_");
1588
1539
  toReturn = toReturn.replace(/\+/g, "plus");