@okf/ootils 1.31.1 → 1.31.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/universal.js CHANGED
@@ -796,6 +796,34 @@ var BASE_BULLMQ_CONFIG = {
796
796
  maxStalledCount: 3
797
797
  }
798
798
  },
799
+ SARVAM_TRANSCRIPTION_QUEUE: {
800
+ id: "sarvam-transcription-queue",
801
+ queueConfig: {
802
+ defaultJobOptions: {
803
+ attempts: 10,
804
+ // Sarvam jobs need many poll cycles, not retries per se
805
+ backoff: {
806
+ type: "exponential",
807
+ delay: 15e3
808
+ // First retry at 15s, then 30s, 60s, etc.
809
+ },
810
+ removeOnComplete: 50,
811
+ removeOnFail: 200
812
+ },
813
+ streams: {
814
+ events: {
815
+ maxLen: 10
816
+ }
817
+ }
818
+ },
819
+ workerConfig: {
820
+ concurrency: 5,
821
+ // Multiple transcription polls can run in parallel safely
822
+ lockDuration: 12e4,
823
+ // 2 min — fetching results from Sarvam can be slow
824
+ maxStalledCount: 3
825
+ }
826
+ },
799
827
  REINDEX_QUEUE: {
800
828
  id: "reindex-queue",
801
829
  queueConfig: {
@@ -1426,8 +1454,203 @@ var compareAndGroupBlocks = (blocksPerTpl) => {
1426
1454
  return Array.from(templateGroupToFilters.values());
1427
1455
  };
1428
1456
 
1457
+ // src/blockRegistry/schemaPresets.ts
1458
+ var MONGO_SCHEMA_PRESETS = {
1459
+ object: { type: Object },
1460
+ string: { type: String }
1461
+ };
1462
+ var ELASTIC_MAPPING_PRESETS = {
1463
+ largeText: {
1464
+ properties: {
1465
+ allText: {
1466
+ type: "text",
1467
+ analyzer: "LargeTextAnalyzer"
1468
+ }
1469
+ }
1470
+ }
1471
+ };
1472
+ var CHUNKING_PRESETS = {
1473
+ // Lexical-shaped text — uses semantic chunking on allText
1474
+ lexicalSemantic: {
1475
+ strategy: "semanticChunking",
1476
+ windowSize: 3,
1477
+ minSimilarityScore: 0.7
1478
+ },
1479
+ // Plain text input — single chunk per field
1480
+ simpleText: {
1481
+ strategy: "simpleChunking"
1482
+ }
1483
+ };
1484
+
1485
+ // src/blockRegistry/blocks/LexicalTextEditor.ts
1486
+ var LexicalTextEditor = {
1487
+ compName: "LexicalTextEditor",
1488
+ // Identity
1489
+ category: "text",
1490
+ qualQuant: "qual",
1491
+ // Schema
1492
+ mongoSchemaType: MONGO_SCHEMA_PRESETS.object,
1493
+ esMapping: ELASTIC_MAPPING_PRESETS.largeText,
1494
+ // Capabilities
1495
+ capabilities: {
1496
+ hasPlainText: true,
1497
+ annotation: true,
1498
+ aiAnnotation: true,
1499
+ aiEnrichment: true,
1500
+ searchable: true,
1501
+ directDataImport: true,
1502
+ csvExport: true,
1503
+ translatable: true,
1504
+ documentSummarizer: true,
1505
+ stripFromMainOnAnnoChunkSync: true,
1506
+ excludeFromListingProjection: true
1507
+ },
1508
+ // Field paths
1509
+ fieldPaths: {
1510
+ plainTextString: "allText",
1511
+ searchField: "allText",
1512
+ displayValue: "allText"
1513
+ },
1514
+ // Validation
1515
+ validation: {
1516
+ populatedCheckFn: "lexicalTextEditorHasValue",
1517
+ formValidationFn: "lexicalTextEditorHasValue"
1518
+ },
1519
+ // Translation
1520
+ translation: {
1521
+ handlerType: "LexicalBlockHandler"
1522
+ },
1523
+ // Table rendering
1524
+ tableCell: {
1525
+ cellComp: "RichTextAsPlainTextLex",
1526
+ sortPathSuffix: "editorState.root.children.0.children.0.text"
1527
+ },
1528
+ // CSV export
1529
+ csvExport: {
1530
+ transformFn: "KPRichLexicalEditor"
1531
+ },
1532
+ // Slack
1533
+ slackFormat: {
1534
+ handlerFn: "lexicalRichText"
1535
+ },
1536
+ // Batch import
1537
+ batchImport: {
1538
+ valueInjectorFn: "toLexicalValue"
1539
+ },
1540
+ // Content block option — TCI template builder & direct import UI
1541
+ contentBlockOption: {
1542
+ display: "Rich Text Field",
1543
+ icon: "TextAa",
1544
+ directImportGroupsIdx: [2, 2]
1545
+ },
1546
+ // Chunking config — used by okf-sub CreateChunksHandler
1547
+ chunkingConfig: CHUNKING_PRESETS.lexicalSemantic
1548
+ };
1549
+
1550
+ // src/blockRegistry/registry.ts
1551
+ var BlockRegistry = class {
1552
+ constructor() {
1553
+ this.blocks = /* @__PURE__ */ new Map();
1554
+ this.register(LexicalTextEditor);
1555
+ }
1556
+ /** Register a block descriptor. */
1557
+ register(descriptor) {
1558
+ this.blocks.set(descriptor.compName, descriptor);
1559
+ }
1560
+ /** Get the full descriptor for a block type. Returns undefined if not registered. */
1561
+ getBlock(compType) {
1562
+ return this.blocks.get(compType);
1563
+ }
1564
+ /** Check if a block type is registered in the registry. */
1565
+ isRegistered(compType) {
1566
+ return this.blocks.has(compType);
1567
+ }
1568
+ /**
1569
+ * Get all registered block descriptors that have a given capability set to a truthy value.
1570
+ * Optionally pass a specific value to match (e.g. for enum-style capabilities).
1571
+ */
1572
+ getBlocksByCapability(capability, value = true) {
1573
+ return Array.from(this.blocks.values()).filter((b) => {
1574
+ const cap = b.capabilities[capability];
1575
+ if (value === true) return !!cap;
1576
+ return cap === value;
1577
+ });
1578
+ }
1579
+ /**
1580
+ * Get compType strings for all registered blocks with a given capability.
1581
+ * Replaces scattered hardcoded arrays like:
1582
+ * const TEXT_FIELD_COMPONENTS = ["TextInput", "LexicalTextEditor", ...]
1583
+ * becomes:
1584
+ * const TEXT_FIELD_COMPONENTS = blockRegistry.getComps('aiTextExtraction')
1585
+ */
1586
+ getComps(capability, value = true) {
1587
+ return this.getBlocksByCapability(capability, value).map((b) => b.compName);
1588
+ }
1589
+ /** Get all registered blocks in a given category. */
1590
+ getBlocksByCategory(category) {
1591
+ return Array.from(this.blocks.values()).filter((b) => b.category === category);
1592
+ }
1593
+ /** Get compType strings for all qual blocks. */
1594
+ getQualBlocks() {
1595
+ return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "qual").map((b) => b.compName);
1596
+ }
1597
+ /** Get compType strings for all quant blocks. */
1598
+ getQuantBlocks() {
1599
+ return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "quant").map((b) => b.compName);
1600
+ }
1601
+ /** Check if a specific block has a specific capability. */
1602
+ hasCapability(compType, capability) {
1603
+ const block = this.blocks.get(compType);
1604
+ if (!block) return false;
1605
+ return !!block.capabilities[capability];
1606
+ }
1607
+ /** Get all registered block descriptors. */
1608
+ getAll() {
1609
+ return Array.from(this.blocks.values());
1610
+ }
1611
+ /**
1612
+ * Get compName strings for all registered blocks that have a chunking config.
1613
+ * Used by chunking pipelines and prompt-string injection (e.g. searchChunks tool
1614
+ * description) to know which fields actually have chunks to search.
1615
+ */
1616
+ getCompsWithChunking() {
1617
+ return Array.from(this.blocks.values()).filter((b) => !!b.chunkingConfig).map((b) => b.compName);
1618
+ }
1619
+ /**
1620
+ * Filter a list of block instances down to those where annotation is enabled.
1621
+ * A block is annotation-enabled if its registry capability `annotation` is true.
1622
+ * For backwards compat with un-migrated blocks (e.g. deprecated KPRichInput/RichTextEditor),
1623
+ * falls back to the legacy per-instance `props.annotation.enable` toggle.
1624
+ *
1625
+ * Today: every registered annotation-capable block (e.g. LexicalTextEditor) is auto-enabled.
1626
+ */
1627
+ getAnnotationEnabledBlocks(allBlocks) {
1628
+ return allBlocks.filter((block) => {
1629
+ const blockDef = this.blocks.get(block.comp);
1630
+ if (blockDef) return !!blockDef.capabilities.annotation;
1631
+ return block.props?.annotation?.enable === true;
1632
+ });
1633
+ }
1634
+ /**
1635
+ * Resolve the tagTypesConfig for a block instance.
1636
+ *
1637
+ * Resolution order:
1638
+ * 1. `hardCodedTagTypesConfigForSM` — the intended self-managed default, which takes
1639
+ * priority over per-instance values (justifies not persisting per-block on self-managed).
1640
+ * Sourced from `GET_SELF_MANAGED_BASE_CONFIGS().annotation_tagTypesConfig` on BE,
1641
+ * or `platformConfigs.SELF_MANAGED_BASE_CONFIGS.annotation_tagTypesConfig` on FE.
1642
+ * Pass null/undefined for non-SM tenants.
1643
+ * 2. `block.props.annotation.tagTypesConfig` — legacy per-instance persisted value.
1644
+ * 3. Empty array.
1645
+ */
1646
+ getTagTypesConfig(block, hardCodedTagTypesConfigForSM) {
1647
+ return hardCodedTagTypesConfigForSM || block.props?.annotation?.tagTypesConfig || [];
1648
+ }
1649
+ };
1650
+ var blockRegistry = new BlockRegistry();
1651
+
1429
1652
  // src/utils/autoGenFilterConfigsFromTpl/utils/extractAndOrganizeBlocks.ts
1430
- var extractAndOrganizeBlocks = (selectedTpls, allTpls) => {
1653
+ var extractAndOrganizeBlocks = (selectedTpls, allTpls, { smTagTypesConfig } = {}) => {
1431
1654
  const extractedBlocks = {};
1432
1655
  const templateBlocksCache = /* @__PURE__ */ new Map();
1433
1656
  const getCachedBlocks = (tpl) => {
@@ -1438,7 +1661,7 @@ var extractAndOrganizeBlocks = (selectedTpls, allTpls) => {
1438
1661
  };
1439
1662
  extractedBlocks.annoTagBlocks = selectedTpls.map((tpl) => {
1440
1663
  const allBlocks = getCachedBlocks(tpl);
1441
- const allTagTypes = allBlocks.filter((block) => block.props?.annotation?.enable).flatMap((block) => block.props.annotation.tagTypesConfig?.map((d) => d.tagType) || []);
1664
+ const allTagTypes = blockRegistry.getAnnotationEnabledBlocks(allBlocks).flatMap((block) => blockRegistry.getTagTypesConfig(block, smTagTypesConfig).map((d) => d.tagType));
1442
1665
  const uniqueTagTypes = [...new Set(allTagTypes)];
1443
1666
  return {
1444
1667
  contentType: tpl.kp_content_type,
@@ -1452,13 +1675,13 @@ var extractAndOrganizeBlocks = (selectedTpls, allTpls) => {
1452
1675
  const allBlocks = getCachedBlocks(tpl);
1453
1676
  return {
1454
1677
  contentType: tpl.kp_content_type,
1455
- blocks: allBlocks.filter((block) => block.props?.annotation?.enable)
1678
+ blocks: blockRegistry.getAnnotationEnabledBlocks(allBlocks)
1456
1679
  };
1457
1680
  });
1458
1681
  extractedBlocks.annoRollupBlocks = selectedTpls.map((tpl) => {
1459
1682
  const allBlocks = getCachedBlocks(tpl);
1460
1683
  const uniqueTagTypes = Array.from(new Set(
1461
- allBlocks.filter((block) => block.props?.annotation?.enable).flatMap((block) => block.props.annotation.tagTypesConfig || []).map((conf) => conf.tagType)
1684
+ blockRegistry.getAnnotationEnabledBlocks(allBlocks).flatMap((block) => blockRegistry.getTagTypesConfig(block, smTagTypesConfig)).map((conf) => conf.tagType)
1462
1685
  ));
1463
1686
  return {
1464
1687
  contentType: tpl.kp_content_type,
@@ -2105,9 +2328,10 @@ var autoGenFilterConfigsFromTpl = ({
2105
2328
  allTpls,
2106
2329
  filterScopes,
2107
2330
  isSelfManagedTenant = false,
2108
- annotationTagsCount
2331
+ annotationTagsCount,
2332
+ smTagTypesConfig
2109
2333
  }) => {
2110
- const extractedBlocks = extractAndOrganizeBlocks(selectedTpls, allTpls);
2334
+ const extractedBlocks = extractAndOrganizeBlocks(selectedTpls, allTpls, { smTagTypesConfig });
2111
2335
  const allAnnoEnabledBlocks = filterScopes.includes("anno") ? extractedBlocks.annoEnabledBlocks.flatMap((item) => item.blocks).reduce((acc, block) => {
2112
2336
  if (!acc.find((b) => b.valuePath === block.valuePath)) {
2113
2337
  acc.push(block);
@@ -2228,186 +2452,6 @@ var genCleanCamelCaseId = (id) => {
2228
2452
  if (/^\d/.test(result)) result = "a" + result;
2229
2453
  return result.slice(0, MAX_LENGTH);
2230
2454
  };
2231
-
2232
- // src/blockRegistry/schemaPresets.ts
2233
- var MONGO_SCHEMA_PRESETS = {
2234
- object: { type: Object },
2235
- string: { type: String }
2236
- };
2237
- var ELASTIC_MAPPING_PRESETS = {
2238
- largeText: {
2239
- properties: {
2240
- allText: {
2241
- type: "text",
2242
- analyzer: "LargeTextAnalyzer"
2243
- }
2244
- }
2245
- }
2246
- };
2247
- var CHUNKING_PRESETS = {
2248
- // Lexical-shaped text — uses semantic chunking on allText
2249
- lexicalSemantic: {
2250
- strategy: "semanticChunking",
2251
- windowSize: 3,
2252
- minSimilarityScore: 0.7
2253
- },
2254
- // Plain text input — single chunk per field
2255
- simpleText: {
2256
- strategy: "simpleChunking"
2257
- }
2258
- };
2259
-
2260
- // src/blockRegistry/blocks/LexicalTextEditor.ts
2261
- var LexicalTextEditor = {
2262
- compName: "LexicalTextEditor",
2263
- // Identity
2264
- category: "text",
2265
- qualQuant: "qual",
2266
- // Schema
2267
- mongoSchemaType: MONGO_SCHEMA_PRESETS.object,
2268
- esMapping: ELASTIC_MAPPING_PRESETS.largeText,
2269
- // Capabilities
2270
- capabilities: {
2271
- hasPlainText: true,
2272
- annotation: true,
2273
- aiAnnotation: true,
2274
- aiEnrichment: true,
2275
- searchable: true,
2276
- directDataImport: true,
2277
- csvExport: true,
2278
- translatable: true,
2279
- documentSummarizer: true,
2280
- stripFromMainOnAnnoChunkSync: true,
2281
- excludeFromListingProjection: true
2282
- },
2283
- // Field paths
2284
- fieldPaths: {
2285
- plainTextString: "allText",
2286
- searchField: "allText",
2287
- displayValue: "allText"
2288
- },
2289
- // Validation
2290
- validation: {
2291
- populatedCheckFn: "lexicalTextEditorHasValue",
2292
- formValidationFn: "lexicalTextEditorHasValue"
2293
- },
2294
- // Translation
2295
- translation: {
2296
- handlerType: "LexicalBlockHandler"
2297
- },
2298
- // Table rendering
2299
- tableCell: {
2300
- cellComp: "RichTextAsPlainTextLex",
2301
- sortPathSuffix: "editorState.root.children.0.children.0.text"
2302
- },
2303
- // CSV export
2304
- csvExport: {
2305
- transformFn: "KPRichLexicalEditor"
2306
- },
2307
- // Slack
2308
- slackFormat: {
2309
- handlerFn: "lexicalRichText"
2310
- },
2311
- // Batch import
2312
- batchImport: {
2313
- valueInjectorFn: "toLexicalValue"
2314
- },
2315
- // Content block option — TCI template builder & direct import UI
2316
- contentBlockOption: {
2317
- display: "Rich Text Field",
2318
- icon: "TextAa",
2319
- directImportGroupsIdx: [2, 2]
2320
- },
2321
- // Chunking config — used by okf-sub CreateChunksHandler
2322
- chunkingConfig: CHUNKING_PRESETS.lexicalSemantic
2323
- };
2324
-
2325
- // src/blockRegistry/registry.ts
2326
- var BlockRegistry = class {
2327
- constructor() {
2328
- this.blocks = /* @__PURE__ */ new Map();
2329
- this.register(LexicalTextEditor);
2330
- }
2331
- /** Register a block descriptor. */
2332
- register(descriptor) {
2333
- this.blocks.set(descriptor.compName, descriptor);
2334
- }
2335
- /** Get the full descriptor for a block type. Returns undefined if not registered. */
2336
- getBlock(compType) {
2337
- return this.blocks.get(compType);
2338
- }
2339
- /** Check if a block type is registered in the registry. */
2340
- isRegistered(compType) {
2341
- return this.blocks.has(compType);
2342
- }
2343
- /**
2344
- * Get all registered block descriptors that have a given capability set to a truthy value.
2345
- * Optionally pass a specific value to match (e.g. for enum-style capabilities).
2346
- */
2347
- getBlocksByCapability(capability, value = true) {
2348
- return Array.from(this.blocks.values()).filter((b) => {
2349
- const cap = b.capabilities[capability];
2350
- if (value === true) return !!cap;
2351
- return cap === value;
2352
- });
2353
- }
2354
- /**
2355
- * Get compType strings for all registered blocks with a given capability.
2356
- * Replaces scattered hardcoded arrays like:
2357
- * const TEXT_FIELD_COMPONENTS = ["TextInput", "LexicalTextEditor", ...]
2358
- * becomes:
2359
- * const TEXT_FIELD_COMPONENTS = blockRegistry.getComps('aiTextExtraction')
2360
- */
2361
- getComps(capability, value = true) {
2362
- return this.getBlocksByCapability(capability, value).map((b) => b.compName);
2363
- }
2364
- /** Get all registered blocks in a given category. */
2365
- getBlocksByCategory(category) {
2366
- return Array.from(this.blocks.values()).filter((b) => b.category === category);
2367
- }
2368
- /** Get compType strings for all qual blocks. */
2369
- getQualBlocks() {
2370
- return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "qual").map((b) => b.compName);
2371
- }
2372
- /** Get compType strings for all quant blocks. */
2373
- getQuantBlocks() {
2374
- return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "quant").map((b) => b.compName);
2375
- }
2376
- /** Check if a specific block has a specific capability. */
2377
- hasCapability(compType, capability) {
2378
- const block = this.blocks.get(compType);
2379
- if (!block) return false;
2380
- return !!block.capabilities[capability];
2381
- }
2382
- /** Get all registered block descriptors. */
2383
- getAll() {
2384
- return Array.from(this.blocks.values());
2385
- }
2386
- /**
2387
- * Get compName strings for all registered blocks that have a chunking config.
2388
- * Used by chunking pipelines and prompt-string injection (e.g. searchChunks tool
2389
- * description) to know which fields actually have chunks to search.
2390
- */
2391
- getCompsWithChunking() {
2392
- return Array.from(this.blocks.values()).filter((b) => !!b.chunkingConfig).map((b) => b.compName);
2393
- }
2394
- /**
2395
- * Filter a list of block instances down to those where annotation is enabled.
2396
- * A block is annotation-enabled if its registry capability `annotation` is true.
2397
- * For backwards compat with un-migrated blocks (e.g. deprecated KPRichInput/RichTextEditor),
2398
- * falls back to the legacy per-instance `props.annotation.enable` toggle.
2399
- *
2400
- * Today: every registered annotation-capable block (e.g. LexicalTextEditor) is auto-enabled.
2401
- */
2402
- getAnnotationEnabledBlocks(allBlocks) {
2403
- return allBlocks.filter((block) => {
2404
- const blockDef = this.blocks.get(block.comp);
2405
- if (blockDef) return !!blockDef.capabilities.annotation;
2406
- return block.props?.annotation?.enable === true;
2407
- });
2408
- }
2409
- };
2410
- var blockRegistry = new BlockRegistry();
2411
2455
  // Annotate the CommonJS export names for ESM import in node:
2412
2456
  0 && (module.exports = {
2413
2457
  BASE_BULLMQ_CONFIG,