@okf/ootils 1.31.1 → 1.31.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/node.js CHANGED
@@ -370,6 +370,34 @@ var init_GLOBAL_BULLMQ_CONFIG = __esm({
370
370
  maxStalledCount: 3
371
371
  }
372
372
  },
373
+ SARVAM_TRANSCRIPTION_QUEUE: {
374
+ id: "sarvam-transcription-queue",
375
+ queueConfig: {
376
+ defaultJobOptions: {
377
+ attempts: 10,
378
+ // Sarvam jobs need many poll cycles, not retries per se
379
+ backoff: {
380
+ type: "exponential",
381
+ delay: 15e3
382
+ // First retry at 15s, then 30s, 60s, etc.
383
+ },
384
+ removeOnComplete: 50,
385
+ removeOnFail: 200
386
+ },
387
+ streams: {
388
+ events: {
389
+ maxLen: 10
390
+ }
391
+ }
392
+ },
393
+ workerConfig: {
394
+ concurrency: 5,
395
+ // Multiple transcription polls can run in parallel safely
396
+ lockDuration: 12e4,
397
+ // 2 min — fetching results from Sarvam can be slow
398
+ maxStalledCount: 3
399
+ }
400
+ },
373
401
  REINDEX_QUEUE: {
374
402
  id: "reindex-queue",
375
403
  queueConfig: {
@@ -3024,8 +3052,203 @@ var compareAndGroupBlocks = (blocksPerTpl) => {
3024
3052
  return Array.from(templateGroupToFilters.values());
3025
3053
  };
3026
3054
 
3055
+ // src/blockRegistry/schemaPresets.ts
3056
+ var MONGO_SCHEMA_PRESETS = {
3057
+ object: { type: Object },
3058
+ string: { type: String }
3059
+ };
3060
+ var ELASTIC_MAPPING_PRESETS = {
3061
+ largeText: {
3062
+ properties: {
3063
+ allText: {
3064
+ type: "text",
3065
+ analyzer: "LargeTextAnalyzer"
3066
+ }
3067
+ }
3068
+ }
3069
+ };
3070
+ var CHUNKING_PRESETS = {
3071
+ // Lexical-shaped text — uses semantic chunking on allText
3072
+ lexicalSemantic: {
3073
+ strategy: "semanticChunking",
3074
+ windowSize: 3,
3075
+ minSimilarityScore: 0.7
3076
+ },
3077
+ // Plain text input — single chunk per field
3078
+ simpleText: {
3079
+ strategy: "simpleChunking"
3080
+ }
3081
+ };
3082
+
3083
+ // src/blockRegistry/blocks/LexicalTextEditor.ts
3084
+ var LexicalTextEditor = {
3085
+ compName: "LexicalTextEditor",
3086
+ // Identity
3087
+ category: "text",
3088
+ qualQuant: "qual",
3089
+ // Schema
3090
+ mongoSchemaType: MONGO_SCHEMA_PRESETS.object,
3091
+ esMapping: ELASTIC_MAPPING_PRESETS.largeText,
3092
+ // Capabilities
3093
+ capabilities: {
3094
+ hasPlainText: true,
3095
+ annotation: true,
3096
+ aiAnnotation: true,
3097
+ aiEnrichment: true,
3098
+ searchable: true,
3099
+ directDataImport: true,
3100
+ csvExport: true,
3101
+ translatable: true,
3102
+ documentSummarizer: true,
3103
+ stripFromMainOnAnnoChunkSync: true,
3104
+ excludeFromListingProjection: true
3105
+ },
3106
+ // Field paths
3107
+ fieldPaths: {
3108
+ plainTextString: "allText",
3109
+ searchField: "allText",
3110
+ displayValue: "allText"
3111
+ },
3112
+ // Validation
3113
+ validation: {
3114
+ populatedCheckFn: "lexicalTextEditorHasValue",
3115
+ formValidationFn: "lexicalTextEditorHasValue"
3116
+ },
3117
+ // Translation
3118
+ translation: {
3119
+ handlerType: "LexicalBlockHandler"
3120
+ },
3121
+ // Table rendering
3122
+ tableCell: {
3123
+ cellComp: "RichTextAsPlainTextLex",
3124
+ sortPathSuffix: "editorState.root.children.0.children.0.text"
3125
+ },
3126
+ // CSV export
3127
+ csvExport: {
3128
+ transformFn: "KPRichLexicalEditor"
3129
+ },
3130
+ // Slack
3131
+ slackFormat: {
3132
+ handlerFn: "lexicalRichText"
3133
+ },
3134
+ // Batch import
3135
+ batchImport: {
3136
+ valueInjectorFn: "toLexicalValue"
3137
+ },
3138
+ // Content block option — TCI template builder & direct import UI
3139
+ contentBlockOption: {
3140
+ display: "Rich Text Field",
3141
+ icon: "TextAa",
3142
+ directImportGroupsIdx: [2, 2]
3143
+ },
3144
+ // Chunking config — used by okf-sub CreateChunksHandler
3145
+ chunkingConfig: CHUNKING_PRESETS.lexicalSemantic
3146
+ };
3147
+
3148
+ // src/blockRegistry/registry.ts
3149
+ var BlockRegistry = class {
3150
+ constructor() {
3151
+ this.blocks = /* @__PURE__ */ new Map();
3152
+ this.register(LexicalTextEditor);
3153
+ }
3154
+ /** Register a block descriptor. */
3155
+ register(descriptor) {
3156
+ this.blocks.set(descriptor.compName, descriptor);
3157
+ }
3158
+ /** Get the full descriptor for a block type. Returns undefined if not registered. */
3159
+ getBlock(compType) {
3160
+ return this.blocks.get(compType);
3161
+ }
3162
+ /** Check if a block type is registered in the registry. */
3163
+ isRegistered(compType) {
3164
+ return this.blocks.has(compType);
3165
+ }
3166
+ /**
3167
+ * Get all registered block descriptors that have a given capability set to a truthy value.
3168
+ * Optionally pass a specific value to match (e.g. for enum-style capabilities).
3169
+ */
3170
+ getBlocksByCapability(capability, value = true) {
3171
+ return Array.from(this.blocks.values()).filter((b) => {
3172
+ const cap = b.capabilities[capability];
3173
+ if (value === true) return !!cap;
3174
+ return cap === value;
3175
+ });
3176
+ }
3177
+ /**
3178
+ * Get compType strings for all registered blocks with a given capability.
3179
+ * Replaces scattered hardcoded arrays like:
3180
+ * const TEXT_FIELD_COMPONENTS = ["TextInput", "LexicalTextEditor", ...]
3181
+ * becomes:
3182
+ * const TEXT_FIELD_COMPONENTS = blockRegistry.getComps('aiTextExtraction')
3183
+ */
3184
+ getComps(capability, value = true) {
3185
+ return this.getBlocksByCapability(capability, value).map((b) => b.compName);
3186
+ }
3187
+ /** Get all registered blocks in a given category. */
3188
+ getBlocksByCategory(category) {
3189
+ return Array.from(this.blocks.values()).filter((b) => b.category === category);
3190
+ }
3191
+ /** Get compType strings for all qual blocks. */
3192
+ getQualBlocks() {
3193
+ return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "qual").map((b) => b.compName);
3194
+ }
3195
+ /** Get compType strings for all quant blocks. */
3196
+ getQuantBlocks() {
3197
+ return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "quant").map((b) => b.compName);
3198
+ }
3199
+ /** Check if a specific block has a specific capability. */
3200
+ hasCapability(compType, capability) {
3201
+ const block = this.blocks.get(compType);
3202
+ if (!block) return false;
3203
+ return !!block.capabilities[capability];
3204
+ }
3205
+ /** Get all registered block descriptors. */
3206
+ getAll() {
3207
+ return Array.from(this.blocks.values());
3208
+ }
3209
+ /**
3210
+ * Get compName strings for all registered blocks that have a chunking config.
3211
+ * Used by chunking pipelines and prompt-string injection (e.g. searchChunks tool
3212
+ * description) to know which fields actually have chunks to search.
3213
+ */
3214
+ getCompsWithChunking() {
3215
+ return Array.from(this.blocks.values()).filter((b) => !!b.chunkingConfig).map((b) => b.compName);
3216
+ }
3217
+ /**
3218
+ * Filter a list of block instances down to those where annotation is enabled.
3219
+ * A block is annotation-enabled if its registry capability `annotation` is true.
3220
+ * For backwards compat with un-migrated blocks (e.g. deprecated KPRichInput/RichTextEditor),
3221
+ * falls back to the legacy per-instance `props.annotation.enable` toggle.
3222
+ *
3223
+ * Today: every registered annotation-capable block (e.g. LexicalTextEditor) is auto-enabled.
3224
+ */
3225
+ getAnnotationEnabledBlocks(allBlocks) {
3226
+ return allBlocks.filter((block) => {
3227
+ const blockDef = this.blocks.get(block.comp);
3228
+ if (blockDef) return !!blockDef.capabilities.annotation;
3229
+ return block.props?.annotation?.enable === true;
3230
+ });
3231
+ }
3232
+ /**
3233
+ * Resolve the tagTypesConfig for a block instance.
3234
+ *
3235
+ * Resolution order:
3236
+ * 1. `hardCodedTagTypesConfigForSM` — the intended self-managed default, which takes
3237
+ * priority over per-instance values (justifies not persisting per-block on self-managed).
3238
+ * Sourced from `GET_SELF_MANAGED_BASE_CONFIGS().annotation_tagTypesConfig` on BE,
3239
+ * or `platformConfigs.SELF_MANAGED_BASE_CONFIGS.annotation_tagTypesConfig` on FE.
3240
+ * Pass null/undefined for non-SM tenants.
3241
+ * 2. `block.props.annotation.tagTypesConfig` — legacy per-instance persisted value.
3242
+ * 3. Empty array.
3243
+ */
3244
+ getTagTypesConfig(block, hardCodedTagTypesConfigForSM) {
3245
+ return hardCodedTagTypesConfigForSM || block.props?.annotation?.tagTypesConfig || [];
3246
+ }
3247
+ };
3248
+ var blockRegistry = new BlockRegistry();
3249
+
3027
3250
  // src/utils/autoGenFilterConfigsFromTpl/utils/extractAndOrganizeBlocks.ts
3028
- var extractAndOrganizeBlocks = (selectedTpls, allTpls) => {
3251
+ var extractAndOrganizeBlocks = (selectedTpls, allTpls, { smTagTypesConfig } = {}) => {
3029
3252
  const extractedBlocks = {};
3030
3253
  const templateBlocksCache = /* @__PURE__ */ new Map();
3031
3254
  const getCachedBlocks = (tpl) => {
@@ -3036,7 +3259,7 @@ var extractAndOrganizeBlocks = (selectedTpls, allTpls) => {
3036
3259
  };
3037
3260
  extractedBlocks.annoTagBlocks = selectedTpls.map((tpl) => {
3038
3261
  const allBlocks = getCachedBlocks(tpl);
3039
- const allTagTypes = allBlocks.filter((block) => block.props?.annotation?.enable).flatMap((block) => block.props.annotation.tagTypesConfig?.map((d) => d.tagType) || []);
3262
+ const allTagTypes = blockRegistry.getAnnotationEnabledBlocks(allBlocks).flatMap((block) => blockRegistry.getTagTypesConfig(block, smTagTypesConfig).map((d) => d.tagType));
3040
3263
  const uniqueTagTypes = [...new Set(allTagTypes)];
3041
3264
  return {
3042
3265
  contentType: tpl.kp_content_type,
@@ -3050,13 +3273,13 @@ var extractAndOrganizeBlocks = (selectedTpls, allTpls) => {
3050
3273
  const allBlocks = getCachedBlocks(tpl);
3051
3274
  return {
3052
3275
  contentType: tpl.kp_content_type,
3053
- blocks: allBlocks.filter((block) => block.props?.annotation?.enable)
3276
+ blocks: blockRegistry.getAnnotationEnabledBlocks(allBlocks)
3054
3277
  };
3055
3278
  });
3056
3279
  extractedBlocks.annoRollupBlocks = selectedTpls.map((tpl) => {
3057
3280
  const allBlocks = getCachedBlocks(tpl);
3058
3281
  const uniqueTagTypes = Array.from(new Set(
3059
- allBlocks.filter((block) => block.props?.annotation?.enable).flatMap((block) => block.props.annotation.tagTypesConfig || []).map((conf) => conf.tagType)
3282
+ blockRegistry.getAnnotationEnabledBlocks(allBlocks).flatMap((block) => blockRegistry.getTagTypesConfig(block, smTagTypesConfig)).map((conf) => conf.tagType)
3060
3283
  ));
3061
3284
  return {
3062
3285
  contentType: tpl.kp_content_type,
@@ -3703,9 +3926,10 @@ var autoGenFilterConfigsFromTpl = ({
3703
3926
  allTpls,
3704
3927
  filterScopes,
3705
3928
  isSelfManagedTenant = false,
3706
- annotationTagsCount
3929
+ annotationTagsCount,
3930
+ smTagTypesConfig
3707
3931
  }) => {
3708
- const extractedBlocks = extractAndOrganizeBlocks(selectedTpls, allTpls);
3932
+ const extractedBlocks = extractAndOrganizeBlocks(selectedTpls, allTpls, { smTagTypesConfig });
3709
3933
  const allAnnoEnabledBlocks = filterScopes.includes("anno") ? extractedBlocks.annoEnabledBlocks.flatMap((item) => item.blocks).reduce((acc, block) => {
3710
3934
  if (!acc.find((b) => b.valuePath === block.valuePath)) {
3711
3935
  acc.push(block);
@@ -3827,186 +4051,6 @@ var genCleanCamelCaseId = (id) => {
3827
4051
  return result.slice(0, MAX_LENGTH);
3828
4052
  };
3829
4053
 
3830
- // src/blockRegistry/schemaPresets.ts
3831
- var MONGO_SCHEMA_PRESETS = {
3832
- object: { type: Object },
3833
- string: { type: String }
3834
- };
3835
- var ELASTIC_MAPPING_PRESETS = {
3836
- largeText: {
3837
- properties: {
3838
- allText: {
3839
- type: "text",
3840
- analyzer: "LargeTextAnalyzer"
3841
- }
3842
- }
3843
- }
3844
- };
3845
- var CHUNKING_PRESETS = {
3846
- // Lexical-shaped text — uses semantic chunking on allText
3847
- lexicalSemantic: {
3848
- strategy: "semanticChunking",
3849
- windowSize: 3,
3850
- minSimilarityScore: 0.7
3851
- },
3852
- // Plain text input — single chunk per field
3853
- simpleText: {
3854
- strategy: "simpleChunking"
3855
- }
3856
- };
3857
-
3858
- // src/blockRegistry/blocks/LexicalTextEditor.ts
3859
- var LexicalTextEditor = {
3860
- compName: "LexicalTextEditor",
3861
- // Identity
3862
- category: "text",
3863
- qualQuant: "qual",
3864
- // Schema
3865
- mongoSchemaType: MONGO_SCHEMA_PRESETS.object,
3866
- esMapping: ELASTIC_MAPPING_PRESETS.largeText,
3867
- // Capabilities
3868
- capabilities: {
3869
- hasPlainText: true,
3870
- annotation: true,
3871
- aiAnnotation: true,
3872
- aiEnrichment: true,
3873
- searchable: true,
3874
- directDataImport: true,
3875
- csvExport: true,
3876
- translatable: true,
3877
- documentSummarizer: true,
3878
- stripFromMainOnAnnoChunkSync: true,
3879
- excludeFromListingProjection: true
3880
- },
3881
- // Field paths
3882
- fieldPaths: {
3883
- plainTextString: "allText",
3884
- searchField: "allText",
3885
- displayValue: "allText"
3886
- },
3887
- // Validation
3888
- validation: {
3889
- populatedCheckFn: "lexicalTextEditorHasValue",
3890
- formValidationFn: "lexicalTextEditorHasValue"
3891
- },
3892
- // Translation
3893
- translation: {
3894
- handlerType: "LexicalBlockHandler"
3895
- },
3896
- // Table rendering
3897
- tableCell: {
3898
- cellComp: "RichTextAsPlainTextLex",
3899
- sortPathSuffix: "editorState.root.children.0.children.0.text"
3900
- },
3901
- // CSV export
3902
- csvExport: {
3903
- transformFn: "KPRichLexicalEditor"
3904
- },
3905
- // Slack
3906
- slackFormat: {
3907
- handlerFn: "lexicalRichText"
3908
- },
3909
- // Batch import
3910
- batchImport: {
3911
- valueInjectorFn: "toLexicalValue"
3912
- },
3913
- // Content block option — TCI template builder & direct import UI
3914
- contentBlockOption: {
3915
- display: "Rich Text Field",
3916
- icon: "TextAa",
3917
- directImportGroupsIdx: [2, 2]
3918
- },
3919
- // Chunking config — used by okf-sub CreateChunksHandler
3920
- chunkingConfig: CHUNKING_PRESETS.lexicalSemantic
3921
- };
3922
-
3923
- // src/blockRegistry/registry.ts
3924
- var BlockRegistry = class {
3925
- constructor() {
3926
- this.blocks = /* @__PURE__ */ new Map();
3927
- this.register(LexicalTextEditor);
3928
- }
3929
- /** Register a block descriptor. */
3930
- register(descriptor) {
3931
- this.blocks.set(descriptor.compName, descriptor);
3932
- }
3933
- /** Get the full descriptor for a block type. Returns undefined if not registered. */
3934
- getBlock(compType) {
3935
- return this.blocks.get(compType);
3936
- }
3937
- /** Check if a block type is registered in the registry. */
3938
- isRegistered(compType) {
3939
- return this.blocks.has(compType);
3940
- }
3941
- /**
3942
- * Get all registered block descriptors that have a given capability set to a truthy value.
3943
- * Optionally pass a specific value to match (e.g. for enum-style capabilities).
3944
- */
3945
- getBlocksByCapability(capability, value = true) {
3946
- return Array.from(this.blocks.values()).filter((b) => {
3947
- const cap = b.capabilities[capability];
3948
- if (value === true) return !!cap;
3949
- return cap === value;
3950
- });
3951
- }
3952
- /**
3953
- * Get compType strings for all registered blocks with a given capability.
3954
- * Replaces scattered hardcoded arrays like:
3955
- * const TEXT_FIELD_COMPONENTS = ["TextInput", "LexicalTextEditor", ...]
3956
- * becomes:
3957
- * const TEXT_FIELD_COMPONENTS = blockRegistry.getComps('aiTextExtraction')
3958
- */
3959
- getComps(capability, value = true) {
3960
- return this.getBlocksByCapability(capability, value).map((b) => b.compName);
3961
- }
3962
- /** Get all registered blocks in a given category. */
3963
- getBlocksByCategory(category) {
3964
- return Array.from(this.blocks.values()).filter((b) => b.category === category);
3965
- }
3966
- /** Get compType strings for all qual blocks. */
3967
- getQualBlocks() {
3968
- return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "qual").map((b) => b.compName);
3969
- }
3970
- /** Get compType strings for all quant blocks. */
3971
- getQuantBlocks() {
3972
- return Array.from(this.blocks.values()).filter((b) => b.qualQuant === "quant").map((b) => b.compName);
3973
- }
3974
- /** Check if a specific block has a specific capability. */
3975
- hasCapability(compType, capability) {
3976
- const block = this.blocks.get(compType);
3977
- if (!block) return false;
3978
- return !!block.capabilities[capability];
3979
- }
3980
- /** Get all registered block descriptors. */
3981
- getAll() {
3982
- return Array.from(this.blocks.values());
3983
- }
3984
- /**
3985
- * Get compName strings for all registered blocks that have a chunking config.
3986
- * Used by chunking pipelines and prompt-string injection (e.g. searchChunks tool
3987
- * description) to know which fields actually have chunks to search.
3988
- */
3989
- getCompsWithChunking() {
3990
- return Array.from(this.blocks.values()).filter((b) => !!b.chunkingConfig).map((b) => b.compName);
3991
- }
3992
- /**
3993
- * Filter a list of block instances down to those where annotation is enabled.
3994
- * A block is annotation-enabled if its registry capability `annotation` is true.
3995
- * For backwards compat with un-migrated blocks (e.g. deprecated KPRichInput/RichTextEditor),
3996
- * falls back to the legacy per-instance `props.annotation.enable` toggle.
3997
- *
3998
- * Today: every registered annotation-capable block (e.g. LexicalTextEditor) is auto-enabled.
3999
- */
4000
- getAnnotationEnabledBlocks(allBlocks) {
4001
- return allBlocks.filter((block) => {
4002
- const blockDef = this.blocks.get(block.comp);
4003
- if (blockDef) return !!blockDef.capabilities.annotation;
4004
- return block.props?.annotation?.enable === true;
4005
- });
4006
- }
4007
- };
4008
- var blockRegistry = new BlockRegistry();
4009
-
4010
4054
  // src/node.ts
4011
4055
  var import_MongoConnector3 = __toESM(require_MongoConnector());
4012
4056
  var import_ElasticSearchConnector = __toESM(require_ElasticSearchConnector());