@okf/ootils 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,48 @@ interface RichTextValue {
29
29
  [key: string]: any;
30
30
  }
31
31
  type TagNameInput = string | number | RichTextValue | null | undefined;
32
+ /**
33
+ * Generate a normalized tag ID from a tag name
34
+ *
35
+ * This function performs aggressive normalization to ensure that similar-looking
36
+ * tag names produce the same ID, preventing duplicates and collision errors.
37
+ *
38
+ * @param tagName - The tag name (string, number, or rich text object)
39
+ * @returns A normalized tag ID string
40
+ *
41
+ * @example
42
+ * ```typescript
43
+ * genTagId('Post-COVID') // → 'post_covid'
44
+ * genTagId('Post—COVID') // → 'post_covid' (em-dash)
45
+ * genTagId('Post–COVID') // → 'post_covid' (en-dash)
46
+ * genTagId('café') // → 'cafe'
47
+ * genTagId('CAFÉ') // → 'cafe'
48
+ * genTagId('Café') // → 'cafe'
49
+ * genTagId('C++') // → 'cplusplus'
50
+ * genTagId('New York') // → 'new_york' (double space)
51
+ * genTagId('José Müller') // → 'jose_muller'
52
+ * ```
53
+ *
54
+ * Normalization rules (applied in order):
55
+ * 1. Convert rich text to plain text
56
+ * 2. Normalize Unicode to NFD (Canonical Decomposition)
57
+ * 3. Remove all diacritical marks (accents, umlauts, etc.)
58
+ * 4. Trim whitespace from both ends
59
+ * 5. Convert to lowercase
60
+ * 6. Replace all non-alphanumeric characters with underscore
61
+ * 7. Replace '+' with 'plus'
62
+ * 8. Remove leading/trailing underscores
63
+ *
64
+ * Character variants that normalize to the same ID:
65
+ * - Dashes: hyphen (-), en-dash (–), em-dash (—), minus (−)
66
+ * - Whitespace: space, multiple spaces, tab, newline, non-breaking space
67
+ * - Quotes: straight ("), curly left ("), curly right ("), backtick (`)
68
+ * - Apostrophes: straight ('), curly (')
69
+ * - Accents: café/cafe, José/Jose, Müller/Muller, naïve/naive
70
+ * - Case: TestTag/testtag/TESTTAG
71
+ *
72
+ * @see TAG_NORMALIZATION_GUIDE.md in okf-be for comprehensive documentation
73
+ */
32
74
  declare const genTagId: (tagName: TagNameInput) => string;
33
75
 
34
76
  /**
@@ -277,13 +319,6 @@ declare namespace BASE_BULLMQ_CONFIG {
277
319
  export namespace workerConfig_1 {
278
320
  let concurrency_1: number;
279
321
  export { concurrency_1 as concurrency };
280
- export namespace limiter_1 {
281
- let max_1: number;
282
- export { max_1 as max };
283
- let duration_1: number;
284
- export { duration_1 as duration };
285
- }
286
- export { limiter_1 as limiter };
287
322
  }
288
323
  export { workerConfig_1 as workerConfig };
289
324
  }
@@ -320,13 +355,6 @@ declare namespace BASE_BULLMQ_CONFIG {
320
355
  export namespace workerConfig_2 {
321
356
  let concurrency_2: number;
322
357
  export { concurrency_2 as concurrency };
323
- export namespace limiter_2 {
324
- let max_2: number;
325
- export { max_2 as max };
326
- let duration_2: number;
327
- export { duration_2 as duration };
328
- }
329
- export { limiter_2 as limiter };
330
358
  }
331
359
  export { workerConfig_2 as workerConfig };
332
360
  }
@@ -363,13 +391,6 @@ declare namespace BASE_BULLMQ_CONFIG {
363
391
  export namespace workerConfig_3 {
364
392
  let concurrency_3: number;
365
393
  export { concurrency_3 as concurrency };
366
- export namespace limiter_3 {
367
- let max_3: number;
368
- export { max_3 as max };
369
- let duration_3: number;
370
- export { duration_3 as duration };
371
- }
372
- export { limiter_3 as limiter };
373
394
  }
374
395
  export { workerConfig_3 as workerConfig };
375
396
  }
@@ -406,13 +427,6 @@ declare namespace BASE_BULLMQ_CONFIG {
406
427
  export namespace workerConfig_4 {
407
428
  let concurrency_4: number;
408
429
  export { concurrency_4 as concurrency };
409
- export namespace limiter_4 {
410
- let max_4: number;
411
- export { max_4 as max };
412
- let duration_4: number;
413
- export { duration_4 as duration };
414
- }
415
- export { limiter_4 as limiter };
416
430
  }
417
431
  export { workerConfig_4 as workerConfig };
418
432
  }
@@ -449,13 +463,6 @@ declare namespace BASE_BULLMQ_CONFIG {
449
463
  export namespace workerConfig_5 {
450
464
  let concurrency_5: number;
451
465
  export { concurrency_5 as concurrency };
452
- export namespace limiter_5 {
453
- let max_5: number;
454
- export { max_5 as max };
455
- let duration_5: number;
456
- export { duration_5 as duration };
457
- }
458
- export { limiter_5 as limiter };
459
466
  }
460
467
  export { workerConfig_5 as workerConfig };
461
468
  }
@@ -492,13 +499,6 @@ declare namespace BASE_BULLMQ_CONFIG {
492
499
  export namespace workerConfig_6 {
493
500
  let concurrency_6: number;
494
501
  export { concurrency_6 as concurrency };
495
- export namespace limiter_6 {
496
- let max_6: number;
497
- export { max_6 as max };
498
- let duration_6: number;
499
- export { duration_6 as duration };
500
- }
501
- export { limiter_6 as limiter };
502
502
  }
503
503
  export { workerConfig_6 as workerConfig };
504
504
  }
@@ -535,13 +535,6 @@ declare namespace BASE_BULLMQ_CONFIG {
535
535
  export namespace workerConfig_7 {
536
536
  let concurrency_7: number;
537
537
  export { concurrency_7 as concurrency };
538
- export namespace limiter_7 {
539
- let max_7: number;
540
- export { max_7 as max };
541
- let duration_7: number;
542
- export { duration_7 as duration };
543
- }
544
- export { limiter_7 as limiter };
545
538
  }
546
539
  export { workerConfig_7 as workerConfig };
547
540
  }
@@ -578,13 +571,6 @@ declare namespace BASE_BULLMQ_CONFIG {
578
571
  export namespace workerConfig_8 {
579
572
  let concurrency_8: number;
580
573
  export { concurrency_8 as concurrency };
581
- export namespace limiter_8 {
582
- let max_8: number;
583
- export { max_8 as max };
584
- let duration_8: number;
585
- export { duration_8 as duration };
586
- }
587
- export { limiter_8 as limiter };
588
574
  }
589
575
  export { workerConfig_8 as workerConfig };
590
576
  }
@@ -29,6 +29,48 @@ interface RichTextValue {
29
29
  [key: string]: any;
30
30
  }
31
31
  type TagNameInput = string | number | RichTextValue | null | undefined;
32
+ /**
33
+ * Generate a normalized tag ID from a tag name
34
+ *
35
+ * This function performs aggressive normalization to ensure that similar-looking
36
+ * tag names produce the same ID, preventing duplicates and collision errors.
37
+ *
38
+ * @param tagName - The tag name (string, number, or rich text object)
39
+ * @returns A normalized tag ID string
40
+ *
41
+ * @example
42
+ * ```typescript
43
+ * genTagId('Post-COVID') // → 'post_covid'
44
+ * genTagId('Post—COVID') // → 'post_covid' (em-dash)
45
+ * genTagId('Post–COVID') // → 'post_covid' (en-dash)
46
+ * genTagId('café') // → 'cafe'
47
+ * genTagId('CAFÉ') // → 'cafe'
48
+ * genTagId('Café') // → 'cafe'
49
+ * genTagId('C++') // → 'cplusplus'
50
+ * genTagId('New York') // → 'new_york' (double space)
51
+ * genTagId('José Müller') // → 'jose_muller'
52
+ * ```
53
+ *
54
+ * Normalization rules (applied in order):
55
+ * 1. Convert rich text to plain text
56
+ * 2. Normalize Unicode to NFD (Canonical Decomposition)
57
+ * 3. Remove all diacritical marks (accents, umlauts, etc.)
58
+ * 4. Trim whitespace from both ends
59
+ * 5. Convert to lowercase
60
+ * 6. Replace all non-alphanumeric characters with underscore
61
+ * 7. Replace '+' with 'plus'
62
+ * 8. Remove leading/trailing underscores
63
+ *
64
+ * Character variants that normalize to the same ID:
65
+ * - Dashes: hyphen (-), en-dash (–), em-dash (—), minus (−)
66
+ * - Whitespace: space, multiple spaces, tab, newline, non-breaking space
67
+ * - Quotes: straight ("), curly left ("), curly right ("), backtick (`)
68
+ * - Apostrophes: straight ('), curly (')
69
+ * - Accents: café/cafe, José/Jose, Müller/Muller, naïve/naive
70
+ * - Case: TestTag/testtag/TESTTAG
71
+ *
72
+ * @see TAG_NORMALIZATION_GUIDE.md in okf-be for comprehensive documentation
73
+ */
32
74
  declare const genTagId: (tagName: TagNameInput) => string;
33
75
 
34
76
  /**
@@ -277,13 +319,6 @@ declare namespace BASE_BULLMQ_CONFIG {
277
319
  export namespace workerConfig_1 {
278
320
  let concurrency_1: number;
279
321
  export { concurrency_1 as concurrency };
280
- export namespace limiter_1 {
281
- let max_1: number;
282
- export { max_1 as max };
283
- let duration_1: number;
284
- export { duration_1 as duration };
285
- }
286
- export { limiter_1 as limiter };
287
322
  }
288
323
  export { workerConfig_1 as workerConfig };
289
324
  }
@@ -320,13 +355,6 @@ declare namespace BASE_BULLMQ_CONFIG {
320
355
  export namespace workerConfig_2 {
321
356
  let concurrency_2: number;
322
357
  export { concurrency_2 as concurrency };
323
- export namespace limiter_2 {
324
- let max_2: number;
325
- export { max_2 as max };
326
- let duration_2: number;
327
- export { duration_2 as duration };
328
- }
329
- export { limiter_2 as limiter };
330
358
  }
331
359
  export { workerConfig_2 as workerConfig };
332
360
  }
@@ -363,13 +391,6 @@ declare namespace BASE_BULLMQ_CONFIG {
363
391
  export namespace workerConfig_3 {
364
392
  let concurrency_3: number;
365
393
  export { concurrency_3 as concurrency };
366
- export namespace limiter_3 {
367
- let max_3: number;
368
- export { max_3 as max };
369
- let duration_3: number;
370
- export { duration_3 as duration };
371
- }
372
- export { limiter_3 as limiter };
373
394
  }
374
395
  export { workerConfig_3 as workerConfig };
375
396
  }
@@ -406,13 +427,6 @@ declare namespace BASE_BULLMQ_CONFIG {
406
427
  export namespace workerConfig_4 {
407
428
  let concurrency_4: number;
408
429
  export { concurrency_4 as concurrency };
409
- export namespace limiter_4 {
410
- let max_4: number;
411
- export { max_4 as max };
412
- let duration_4: number;
413
- export { duration_4 as duration };
414
- }
415
- export { limiter_4 as limiter };
416
430
  }
417
431
  export { workerConfig_4 as workerConfig };
418
432
  }
@@ -449,13 +463,6 @@ declare namespace BASE_BULLMQ_CONFIG {
449
463
  export namespace workerConfig_5 {
450
464
  let concurrency_5: number;
451
465
  export { concurrency_5 as concurrency };
452
- export namespace limiter_5 {
453
- let max_5: number;
454
- export { max_5 as max };
455
- let duration_5: number;
456
- export { duration_5 as duration };
457
- }
458
- export { limiter_5 as limiter };
459
466
  }
460
467
  export { workerConfig_5 as workerConfig };
461
468
  }
@@ -492,13 +499,6 @@ declare namespace BASE_BULLMQ_CONFIG {
492
499
  export namespace workerConfig_6 {
493
500
  let concurrency_6: number;
494
501
  export { concurrency_6 as concurrency };
495
- export namespace limiter_6 {
496
- let max_6: number;
497
- export { max_6 as max };
498
- let duration_6: number;
499
- export { duration_6 as duration };
500
- }
501
- export { limiter_6 as limiter };
502
502
  }
503
503
  export { workerConfig_6 as workerConfig };
504
504
  }
@@ -535,13 +535,6 @@ declare namespace BASE_BULLMQ_CONFIG {
535
535
  export namespace workerConfig_7 {
536
536
  let concurrency_7: number;
537
537
  export { concurrency_7 as concurrency };
538
- export namespace limiter_7 {
539
- let max_7: number;
540
- export { max_7 as max };
541
- let duration_7: number;
542
- export { duration_7 as duration };
543
- }
544
- export { limiter_7 as limiter };
545
538
  }
546
539
  export { workerConfig_7 as workerConfig };
547
540
  }
@@ -578,13 +571,6 @@ declare namespace BASE_BULLMQ_CONFIG {
578
571
  export namespace workerConfig_8 {
579
572
  let concurrency_8: number;
580
573
  export { concurrency_8 as concurrency };
581
- export namespace limiter_8 {
582
- let max_8: number;
583
- export { max_8 as max };
584
- let duration_8: number;
585
- export { duration_8 as duration };
586
- }
587
- export { limiter_8 as limiter };
588
574
  }
589
575
  export { workerConfig_8 as workerConfig };
590
576
  }
package/dist/universal.js CHANGED
@@ -184,6 +184,8 @@ var convertFromRichText = (value) => {
184
184
  };
185
185
  var genTagId = (tagName) => {
186
186
  let toReturn = convertFromRichText(tagName);
187
+ toReturn = toReturn.normalize("NFD");
188
+ toReturn = toReturn.replace(/\p{Mn}/gu, "");
187
189
  const regex = /[^\p{L}\p{N}\+]+/gui;
188
190
  toReturn = toReturn.trim().toLowerCase().replace(regex, "_");
189
191
  toReturn = toReturn.replace(/\+/g, "plus");
@@ -475,14 +477,7 @@ var BASE_BULLMQ_CONFIG = {
475
477
  }
476
478
  },
477
479
  workerConfig: {
478
- concurrency: 10,
479
- // Process 10 jobs at once for chunk processing
480
- limiter: {
481
- max: 200,
482
- // Max 5 jobs per...
483
- duration: 6e4
484
- // ...60 seconds (higher throughput for chunking)
485
- }
480
+ concurrency: 50
486
481
  }
487
482
  },
488
483
  CREATE_ANNOS_QUEUE: {
@@ -505,14 +500,7 @@ var BASE_BULLMQ_CONFIG = {
505
500
  }
506
501
  },
507
502
  workerConfig: {
508
- concurrency: 10,
509
- // Process 10 jobs at once for chunk processing
510
- limiter: {
511
- max: 100,
512
- // Max 50 jobs per...
513
- duration: 6e4
514
- // ...60 seconds (higher throughput for chunking)
515
- }
503
+ concurrency: 50
516
504
  }
517
505
  },
518
506
  CONTENT_ENHANCE_AND_EMBED_QUEUE: {
@@ -535,13 +523,7 @@ var BASE_BULLMQ_CONFIG = {
535
523
  }
536
524
  },
537
525
  workerConfig: {
538
- concurrency: 1,
539
- limiter: {
540
- max: 200,
541
- // Max 50 jobs per...
542
- duration: 6e4
543
- // ...60 seconds (higher throughput for chunking)
544
- }
526
+ concurrency: 1
545
527
  }
546
528
  },
547
529
  DIRECT_DATA_IMPORT_QUEUE: {
@@ -564,14 +546,7 @@ var BASE_BULLMQ_CONFIG = {
564
546
  }
565
547
  },
566
548
  workerConfig: {
567
- concurrency: 1,
568
- // Cannot mess with this else duplicate options in tpl, maybe even duplicate tags
569
- limiter: {
570
- max: 20,
571
- // Max 5 jobs per...
572
- duration: 6e4
573
- // ...60 seconds (higher throughput for chunking)
574
- }
549
+ concurrency: 1
575
550
  }
576
551
  },
577
552
  AI_CATEGORIZE_QUEUE: {
@@ -594,13 +569,7 @@ var BASE_BULLMQ_CONFIG = {
594
569
  }
595
570
  },
596
571
  workerConfig: {
597
- concurrency: 1,
598
- limiter: {
599
- max: 100,
600
- // (lets always keep this same as content enhance & embed since it comes immediately after)
601
- duration: 6e4
602
- // ...60 seconds (higher throughput for chunking)
603
- }
572
+ concurrency: 1
604
573
  }
605
574
  },
606
575
  ANNOS_ELASTIC_SYNC_QUEUE: {
@@ -623,13 +592,7 @@ var BASE_BULLMQ_CONFIG = {
623
592
  }
624
593
  },
625
594
  workerConfig: {
626
- concurrency: 5,
627
- limiter: {
628
- max: 100,
629
- // (lets always keep this same as content enhance & embed since it comes immediately after)
630
- duration: 6e4
631
- // ...60 seconds (higher throughput for chunking)
632
- }
595
+ concurrency: 20
633
596
  }
634
597
  },
635
598
  CHUNKS_ELASTIC_SYNC_QUEUE: {
@@ -652,13 +615,7 @@ var BASE_BULLMQ_CONFIG = {
652
615
  }
653
616
  },
654
617
  workerConfig: {
655
- concurrency: 5,
656
- limiter: {
657
- max: 200,
658
- // (lets always keep this same as content enhance & embed since it comes immediately after)
659
- duration: 6e4
660
- // ...60 seconds (higher throughput for chunking)
661
- }
618
+ concurrency: 20
662
619
  }
663
620
  },
664
621
  CONTENT_ELASTIC_SYNC_QUEUE: {
@@ -681,13 +638,7 @@ var BASE_BULLMQ_CONFIG = {
681
638
  }
682
639
  },
683
640
  workerConfig: {
684
- concurrency: 5,
685
- limiter: {
686
- max: 200,
687
- // (lets always keep this same as content enhance & embed since it comes immediately after)
688
- duration: 6e4
689
- // ...60 seconds (higher throughput for chunking)
690
- }
641
+ concurrency: 20
691
642
  }
692
643
  },
693
644
  REINDEX_QUEUE: {
@@ -148,6 +148,8 @@ var convertFromRichText = (value) => {
148
148
  };
149
149
  var genTagId = (tagName) => {
150
150
  let toReturn = convertFromRichText(tagName);
151
+ toReturn = toReturn.normalize("NFD");
152
+ toReturn = toReturn.replace(/\p{Mn}/gu, "");
151
153
  const regex = /[^\p{L}\p{N}\+]+/gui;
152
154
  toReturn = toReturn.trim().toLowerCase().replace(regex, "_");
153
155
  toReturn = toReturn.replace(/\+/g, "plus");
@@ -439,14 +441,7 @@ var BASE_BULLMQ_CONFIG = {
439
441
  }
440
442
  },
441
443
  workerConfig: {
442
- concurrency: 10,
443
- // Process 10 jobs at once for chunk processing
444
- limiter: {
445
- max: 200,
446
- // Max 5 jobs per...
447
- duration: 6e4
448
- // ...60 seconds (higher throughput for chunking)
449
- }
444
+ concurrency: 50
450
445
  }
451
446
  },
452
447
  CREATE_ANNOS_QUEUE: {
@@ -469,14 +464,7 @@ var BASE_BULLMQ_CONFIG = {
469
464
  }
470
465
  },
471
466
  workerConfig: {
472
- concurrency: 10,
473
- // Process 10 jobs at once for chunk processing
474
- limiter: {
475
- max: 100,
476
- // Max 50 jobs per...
477
- duration: 6e4
478
- // ...60 seconds (higher throughput for chunking)
479
- }
467
+ concurrency: 50
480
468
  }
481
469
  },
482
470
  CONTENT_ENHANCE_AND_EMBED_QUEUE: {
@@ -499,13 +487,7 @@ var BASE_BULLMQ_CONFIG = {
499
487
  }
500
488
  },
501
489
  workerConfig: {
502
- concurrency: 1,
503
- limiter: {
504
- max: 200,
505
- // Max 50 jobs per...
506
- duration: 6e4
507
- // ...60 seconds (higher throughput for chunking)
508
- }
490
+ concurrency: 1
509
491
  }
510
492
  },
511
493
  DIRECT_DATA_IMPORT_QUEUE: {
@@ -528,14 +510,7 @@ var BASE_BULLMQ_CONFIG = {
528
510
  }
529
511
  },
530
512
  workerConfig: {
531
- concurrency: 1,
532
- // Cannot mess with this else duplicate options in tpl, maybe even duplicate tags
533
- limiter: {
534
- max: 20,
535
- // Max 5 jobs per...
536
- duration: 6e4
537
- // ...60 seconds (higher throughput for chunking)
538
- }
513
+ concurrency: 1
539
514
  }
540
515
  },
541
516
  AI_CATEGORIZE_QUEUE: {
@@ -558,13 +533,7 @@ var BASE_BULLMQ_CONFIG = {
558
533
  }
559
534
  },
560
535
  workerConfig: {
561
- concurrency: 1,
562
- limiter: {
563
- max: 100,
564
- // (lets always keep this same as content enhance & embed since it comes immediately after)
565
- duration: 6e4
566
- // ...60 seconds (higher throughput for chunking)
567
- }
536
+ concurrency: 1
568
537
  }
569
538
  },
570
539
  ANNOS_ELASTIC_SYNC_QUEUE: {
@@ -587,13 +556,7 @@ var BASE_BULLMQ_CONFIG = {
587
556
  }
588
557
  },
589
558
  workerConfig: {
590
- concurrency: 5,
591
- limiter: {
592
- max: 100,
593
- // (lets always keep this same as content enhance & embed since it comes immediately after)
594
- duration: 6e4
595
- // ...60 seconds (higher throughput for chunking)
596
- }
559
+ concurrency: 20
597
560
  }
598
561
  },
599
562
  CHUNKS_ELASTIC_SYNC_QUEUE: {
@@ -616,13 +579,7 @@ var BASE_BULLMQ_CONFIG = {
616
579
  }
617
580
  },
618
581
  workerConfig: {
619
- concurrency: 5,
620
- limiter: {
621
- max: 200,
622
- // (lets always keep this same as content enhance & embed since it comes immediately after)
623
- duration: 6e4
624
- // ...60 seconds (higher throughput for chunking)
625
- }
582
+ concurrency: 20
626
583
  }
627
584
  },
628
585
  CONTENT_ELASTIC_SYNC_QUEUE: {
@@ -645,13 +602,7 @@ var BASE_BULLMQ_CONFIG = {
645
602
  }
646
603
  },
647
604
  workerConfig: {
648
- concurrency: 5,
649
- limiter: {
650
- max: 200,
651
- // (lets always keep this same as content enhance & embed since it comes immediately after)
652
- duration: 6e4
653
- // ...60 seconds (higher throughput for chunking)
654
- }
605
+ concurrency: 20
655
606
  }
656
607
  },
657
608
  REINDEX_QUEUE: {
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "publishConfig": {
4
4
  "access": "public"
5
5
  },
6
- "version": "1.9.0",
6
+ "version": "1.10.0",
7
7
  "description": "Utility functions for both browser and Node.js",
8
8
  "main": "dist/index.js",
9
9
  "module": "dist/index.mjs",