npm - modality-kit - Versions diffs - 0.6.1 → 0.6.3 - Mend

modality-kit 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.js +707 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/util_tests/__tests__/console-mock.test.d.ts +1 -0
package/dist/types/util_tests/console-mock.d.ts +49 -0
package/dist/types/util_text_compression.d.ts +14 -20
package/package.json +2 -2

package/dist/index.js CHANGED Viewed

@@ -4283,6 +4283,712 @@ async function loadVersion(packageJsonPath) {
     return "0.0.0";
   }
 }
+// src/util_text_compression.ts
+var DEFAULT_CONFIG = {
+  maxTokens: 4000,
+  compressionLevel: "moderate",
+  preserveCodeBlocks: true,
+  autoDetectLanguage: true,
+  enableLogging: false,
+  maxSentencesForAnalysis: 500,
+  fastModeMaxSentences: 200
+};
+class CompressionError extends ErrorCode {
+  code;
+  details;
+  constructor(message, code, details, originalError) {
+    super(message, originalError);
+    this.code = code;
+    this.details = details;
+  }
+}
+class LanguageDetectionError extends ErrorCode {
+  code = "LANGUAGE_DETECTION_ERROR";
+  fallbackLanguage;
+  constructor(message, fallbackLanguage, originalError) {
+    super(message, originalError);
+    this.fallbackLanguage = fallbackLanguage;
+  }
+}
+class UniversalLanguageDetector {
+  logger;
+  cache = new Map;
+  constructor(logger2) {
+    this.logger = logger2;
+  }
+  async detectLanguage(text) {
+    try {
+      if (!text || typeof text !== "string") {
+        throw new LanguageDetectionError("Invalid input text", "und");
+      }
+      const cacheKey = text.length > 200 ? text.substring(0, 200) : text;
+      if (this.cache.has(cacheKey)) {
+        return this.cache.get(cacheKey);
+      }
+      const result = await this.performDetection(text);
+      if (this.cache.size > 50) {
+        this.cache.clear();
+      }
+      this.cache.set(cacheKey, result);
+      this.logger.info(`Language detected: ${result.code} (confidence: ${result.confidence})`);
+      return result;
+    } catch (error) {
+      this.logger.error("Language detection failed", error);
+      const fallback = {
+        code: "und",
+        locale: "und",
+        confidence: 0.1
+      };
+      return fallback;
+    }
+  }
+  async performDetection(text) {
+    const unicodeHints = this.analyzeUnicodeRanges(text);
+    const testLocales = this.prioritizeLocalesBasedOnUnicode(unicodeHints);
+    let bestMatch = {
+      code: "und",
+      locale: "und",
+      confidence: 0.3
+    };
+    for (const testLocale of testLocales) {
+      try {
+        const intlScore = await this.testLocaleWithIntlAPIs(text, testLocale);
+        const unicodeBoost = this.getUnicodeBoost(testLocale, unicodeHints);
+        const combinedScore = Math.min(1, intlScore + unicodeBoost);
+        if (combinedScore > bestMatch.confidence) {
+          const locale = new Intl.Locale(testLocale);
+          bestMatch = {
+            code: locale.language,
+            locale: testLocale,
+            confidence: combinedScore,
+            script: locale.script,
+            region: locale.region
+          };
+        }
+      } catch (error) {
+        continue;
+      }
+    }
+    return bestMatch;
+  }
+  analyzeUnicodeRanges(text) {
+    const sample = text.slice(0, 500);
+    const codePoints = Array.from(sample).map((char) => char.codePointAt(0));
+    const ranges = new Map;
+    let totalRelevantChars = 0;
+    for (const codePoint of codePoints) {
+      let rangeFound = false;
+      if (codePoint >= 19968 && codePoint <= 40959) {
+        ranges.set("cjk", (ranges.get("cjk") || 0) + 1);
+        totalRelevantChars++;
+        rangeFound = true;
+      }
+      if (codePoint >= 12352 && codePoint <= 12447) {
+        ranges.set("hiragana", (ranges.get("hiragana") || 0) + 1);
+        totalRelevantChars++;
+        rangeFound = true;
+      }
+      if (codePoint >= 12448 && codePoint <= 12543) {
+        ranges.set("katakana", (ranges.get("katakana") || 0) + 1);
+        totalRelevantChars++;
+        rangeFound = true;
+      }
+      if (codePoint >= 44032 && codePoint <= 55215) {
+        ranges.set("hangul", (ranges.get("hangul") || 0) + 1);
+        totalRelevantChars++;
+        rangeFound = true;
+      }
+      if (codePoint >= 1024 && codePoint <= 1279) {
+        ranges.set("cyrillic", (ranges.get("cyrillic") || 0) + 1);
+        totalRelevantChars++;
+        rangeFound = true;
+      }
+      if (codePoint >= 1536 && codePoint <= 1791) {
+        ranges.set("arabic", (ranges.get("arabic") || 0) + 1);
+        totalRelevantChars++;
+        rangeFound = true;
+      }
+      if (!rangeFound && codePoint >= 32 && codePoint <= 126) {
+        ranges.set("latin", (ranges.get("latin") || 0) + 1);
+        totalRelevantChars++;
+      }
+    }
+    const percentages = new Map;
+    for (const [range, count] of ranges) {
+      percentages.set(range, totalRelevantChars > 0 ? count / totalRelevantChars : 0);
+    }
+    return percentages;
+  }
+  prioritizeLocalesBasedOnUnicode(unicodeHints) {
+    const availableLocales = this.getAvailableTestLocales(unicodeHints);
+    return availableLocales.sort((a, b) => {
+      const scoreA = this.getUnicodeRelevanceScore(a, unicodeHints);
+      const scoreB = this.getUnicodeRelevanceScore(b, unicodeHints);
+      return scoreB - scoreA;
+    });
+  }
+  getAvailableTestLocales(unicodeHints) {
+    const locales = new Set;
+    if (typeof Intl !== "undefined" && Intl.DateTimeFormat) {
+      try {
+        const systemLocale = Intl.DateTimeFormat().resolvedOptions().locale;
+        locales.add(systemLocale);
+        const locale = new Intl.Locale(systemLocale);
+        if (locale.language) {
+          locales.add(locale.language);
+        }
+      } catch (error) {}
+    }
+    const ranges = Array.from(unicodeHints.keys());
+    if (ranges.includes("cjk")) {
+      locales.add("zh-Hant");
+      locales.add("zh-Hans");
+    }
+    if (ranges.includes("hiragana") || ranges.includes("katakana")) {
+      locales.add("ja-JP");
+      locales.add("ja");
+    }
+    if (ranges.includes("hangul")) {
+      locales.add("ko-KR");
+      locales.add("ko");
+    }
+    if (ranges.includes("cyrillic")) {
+      locales.add("ru-RU");
+      locales.add("ru");
+    }
+    if (ranges.includes("arabic")) {
+      locales.add("ar-SA");
+      locales.add("ar");
+    }
+    if (ranges.includes("latin")) {
+      locales.add("en-US");
+      locales.add("en");
+      locales.add("fr-FR");
+      locales.add("de-DE");
+      locales.add("es-ES");
+    }
+    locales.add("en-US");
+    locales.add("en");
+    return Array.from(locales);
+  }
+  getUnicodeRelevanceScore(locale, unicodeHints) {
+    const hiragana = unicodeHints.get("hiragana") || 0;
+    const katakana = unicodeHints.get("katakana") || 0;
+    const hangul = unicodeHints.get("hangul") || 0;
+    const cjk = unicodeHints.get("cjk") || 0;
+    const cyrillic = unicodeHints.get("cyrillic") || 0;
+    const arabic = unicodeHints.get("arabic") || 0;
+    const latin = unicodeHints.get("latin") || 0;
+    switch (locale) {
+      case "ja-JP":
+        return hiragana * 10 + katakana * 10 + cjk * 2;
+      case "ko-KR":
+        return hangul * 10 + cjk * 1;
+      case "zh-Hant":
+      case "zh-Hans":
+        return cjk * 5 - (hiragana + katakana + hangul) * 2;
+      case "ru-RU":
+        return cyrillic * 8;
+      case "ar-SA":
+        return arabic * 8;
+      case "en-US":
+      case "fr-FR":
+      case "de-DE":
+      case "es-ES":
+        return latin * 3;
+      default:
+        return 0;
+    }
+  }
+  getUnicodeBoost(locale, unicodeHints) {
+    const relevanceScore = this.getUnicodeRelevanceScore(locale, unicodeHints);
+    return Math.min(0.4, relevanceScore * 0.1);
+  }
+  async testLocaleWithIntlAPIs(text, locale) {
+    let score = 0;
+    const sample = text.slice(0, 500);
+    try {
+      if (typeof Intl !== "undefined" && Intl.Segmenter) {
+        const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
+        const segments = Array.from(segmenter.segment(sample));
+        const segmentQuality = segments.length > 0 ? Math.min(1, segments.length / (sample.length / 10)) : 0;
+        score += segmentQuality * 0.4;
+      }
+      if (typeof Intl !== "undefined" && Intl.Collator) {
+        const collator = new Intl.Collator(locale, { sensitivity: "base" });
+        const testChars = Array.from(sample).slice(0, 10);
+        if (testChars.length > 1) {
+          const sorted = testChars.sort(collator.compare);
+          score += sorted.length > 0 ? 0.3 : 0;
+        }
+      }
+      if (typeof Intl !== "undefined" && Intl.DisplayNames) {
+        try {
+          const displayNames = new Intl.DisplayNames([locale], {
+            type: "language"
+          });
+          const langCode = new Intl.Locale(locale).language;
+          const displayName = displayNames.of(langCode);
+          score += displayName ? 0.2 : 0;
+        } catch (displayError) {}
+      }
+      if (typeof Intl !== "undefined" && Intl.RelativeTimeFormat) {
+        try {
+          const rtf = new Intl.RelativeTimeFormat(locale);
+          score += rtf ? 0.1 : 0;
+        } catch (rtfError) {}
+      }
+    } catch (error) {
+      this.logger.warn(`Testing locale ${locale} failed:`, error);
+      return 0;
+    }
+    return Math.min(1, score);
+  }
+}
+class IntelligentImportanceAnalyzer {
+  wordFrequencyCache = new Map;
+  logger;
+  config;
+  constructor(logger2, config) {
+    this.logger = logger2;
+    this.config = config;
+  }
+  async analyzeImportance(text, detectedLanguage) {
+    if (text.length > 50000) {
+      return await this.fastAnalyzeImportance(text, detectedLanguage);
+    }
+    const sentences = this.segmentSentences(text, detectedLanguage);
+    const maxSentences = Math.min(sentences.length, this.config.maxSentencesForAnalysis);
+    const processedSentences = sentences.slice(0, maxSentences);
+    const wordFrequencies = await this.calculateWordFrequencies(text);
+    const avgSentenceLength = processedSentences.reduce((sum, s) => sum + s.length, 0) / processedSentences.length;
+    const results = await Promise.all(processedSentences.map(async (sentence, index) => {
+      return new Promise((resolve) => {
+        const reasons = [];
+        let score = 1;
+        score *= this.analyzePosition(index, processedSentences.length, reasons);
+        score *= this.analyzeLengthDeviation(sentence, avgSentenceLength, reasons);
+        score *= this.analyzeWordRarity(sentence, wordFrequencies, reasons);
+        resolve({
+          text: sentence.trim(),
+          score: Math.round(score * 100) / 100,
+          reasons
+        });
+      });
+    }));
+    return results;
+  }
+  async fastAnalyzeImportance(text, detectedLanguage) {
+    const sentences = this.segmentSentences(text, detectedLanguage).slice(0, this.config.fastModeMaxSentences);
+    const maxSentences = sentences.length;
+    return sentences.slice(0, maxSentences).map((sentence, index) => {
+      const reasons = [];
+      let score = 1;
+      if (index < 3) {
+        score = 2;
+        reasons.push("first-sentences");
+      } else if (index >= maxSentences - 3) {
+        score = 1.8;
+        reasons.push("last-sentences");
+      } else if (sentence.length > 200) {
+        score = 1.3;
+        reasons.push("long-sentence");
+      }
+      return {
+        text: sentence.trim(),
+        score,
+        reasons
+      };
+    });
+  }
+  segmentSentences(text, locale) {
+    try {
+      if (typeof Intl !== "undefined" && Intl.Segmenter) {
+        const segmenter = new Intl.Segmenter(locale || "en", {
+          granularity: "sentence"
+        });
+        const segments = Array.from(segmenter.segment(text));
+        return segments.map((segment) => segment.segment.trim()).filter((s) => s.length > 10);
+      }
+    } catch (error) {}
+    return text.split(/[.!?]+/).map((s) => s.trim()).filter((s) => s.length > 10);
+  }
+  analyzePosition(index, totalSentences, reasons) {
+    if (totalSentences === 1)
+      return 2;
+    if (index === 0) {
+      reasons.push("first-sentence");
+      return 2;
+    }
+    if (index === totalSentences - 1) {
+      reasons.push("last-sentence");
+      return 2;
+    }
+    if (index < 3 || index >= totalSentences - 3) {
+      reasons.push("near-boundary");
+      return 1.5;
+    }
+    return 1;
+  }
+  analyzeLengthDeviation(sentence, avgLength, reasons) {
+    if (avgLength === 0)
+      return 1;
+    const length = sentence.length;
+    const deviation = Math.abs(length - avgLength) / avgLength;
+    if (deviation > 0.8) {
+      if (length > avgLength) {
+        reasons.push("unusually-long");
+        return 1.3;
+      } else if (length > 20) {
+        reasons.push("unusually-short");
+        return 1.2;
+      }
+    }
+    return 1;
+  }
+  analyzeWordRarity(sentence, wordFreqs, reasons) {
+    const words = sentence.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((w) => w.length > 2);
+    if (words.length === 0)
+      return 1;
+    const totalWords = Array.from(wordFreqs.values()).reduce((sum, freq) => sum + freq, 0);
+    let rareWordCount = 0;
+    let veryCommonCount = 0;
+    words.forEach((word) => {
+      const freq = wordFreqs.get(word) || 0;
+      const relativeFreq = freq / totalWords;
+      if (relativeFreq < 0.005) {
+        rareWordCount++;
+      } else if (relativeFreq > 0.05) {
+        veryCommonCount++;
+      }
+    });
+    const rareWordRatio = rareWordCount / words.length;
+    const commonWordRatio = veryCommonCount / words.length;
+    if (rareWordRatio > 0.25) {
+      reasons.push("has-rare-words");
+      return 1.3;
+    }
+    if (commonWordRatio > 0.8) {
+      reasons.push("mostly-common-words");
+      return 0.7;
+    }
+    return 1;
+  }
+  async calculateWordFrequencies(text) {
+    return new Promise((resolve) => {
+      try {
+        const cacheKey = text.length > 1000 ? text.substring(0, 1000) : text;
+        if (this.wordFrequencyCache.has(cacheKey)) {
+          resolve(this.wordFrequencyCache.get(cacheKey));
+          return;
+        }
+        const processText = text.length > 1e4 ? text.substring(0, 1e4) : text;
+        const processAsync = async () => {
+          const words = processText.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((w) => w.length > 2);
+          const freq = new Map;
+          const chunkSize = 1000;
+          for (let i = 0;i < words.length; i += chunkSize) {
+            const chunk = words.slice(i, i + chunkSize);
+            chunk.forEach((word) => {
+              freq.set(word, (freq.get(word) || 0) + 1);
+            });
+            if (i % 5000 === 0 && i > 0) {
+              await new Promise((resolve2) => setTimeout(resolve2, 0));
+            }
+          }
+          return freq;
+        };
+        processAsync().then((freq) => {
+          if (this.wordFrequencyCache.size > 10) {
+            this.wordFrequencyCache.clear();
+          }
+          this.wordFrequencyCache.set(cacheKey, freq);
+          resolve(freq);
+        });
+      } catch (error) {
+        this.logger.error("Word frequency calculation failed:", error);
+        resolve(new Map);
+      }
+    });
+  }
+}
+class TextCompressionUtility {
+  languageDetector;
+  importanceAnalyzer;
+  logger;
+  config;
+  constructor(config = {}) {
+    this.config = { ...DEFAULT_CONFIG, ...config };
+    this.logger = ModalityLogger.getInstance("TextCompression", this.config.enableLogging ? "info" : "error");
+    this.languageDetector = new UniversalLanguageDetector(this.logger);
+    this.importanceAnalyzer = new IntelligentImportanceAnalyzer(this.logger, this.config);
+  }
+  async compress(text, options = {}) {
+    if (!text || typeof text !== "string") {
+      return {
+        compressedText: "",
+        originalLength: 0,
+        compressedLength: 0,
+        compressionRatio: 1,
+        tokensEstimate: 0,
+        detectedLanguage: "und"
+      };
+    }
+    const maxTokens = options.maxTokens ?? this.config.maxTokens;
+    const preserveCodeBlocks = options.preserveCodeBlocks ?? this.config.preserveCodeBlocks;
+    const compressionLevel = options.compressionLevel ?? this.config.compressionLevel;
+    const autoDetectLanguage = options.autoDetectLanguage ?? this.config.autoDetectLanguage;
+    const {
+      prioritizeFirst = true,
+      prioritizeLast = true,
+      bufferPercentage = 10,
+      maxSentences = 500,
+      fastMode = false
+    } = options;
+    if (maxTokens <= 0) {
+      throw new CompressionError("maxTokens must be greater than 0", "INVALID_MAX_TOKENS");
+    }
+    const trimmedText = text.trim();
+    const originalLength = trimmedText.length;
+    if (originalLength < 10) {
+      return {
+        compressedText: trimmedText,
+        originalLength,
+        compressedLength: originalLength,
+        compressionRatio: 1,
+        tokensEstimate: Math.max(1, Math.ceil(originalLength / 4)),
+        detectedLanguage: "und"
+      };
+    }
+    let detectedLanguage;
+    if (autoDetectLanguage) {
+      try {
+        const detection = await this.languageDetector.detectLanguage(trimmedText);
+        detectedLanguage = detection.code;
+      } catch (error) {
+        this.logger.warn("Language detection failed, using fallback:", error);
+        detectedLanguage = "und";
+      }
+    }
+    const initialTokens = this.estimateTokens(trimmedText);
+    if (initialTokens <= maxTokens) {
+      return {
+        compressedText: trimmedText,
+        originalLength,
+        compressedLength: trimmedText.length,
+        compressionRatio: 1,
+        tokensEstimate: initialTokens,
+        detectedLanguage
+      };
+    }
+    const { text: textWithoutCode, codeMap } = preserveCodeBlocks ? this.extractCodeElements(trimmedText) : { text: trimmedText, codeMap: new Map };
+    if (textWithoutCode.trim().length < 10) {
+      const finalText = this.trimToTokenLimit(trimmedText, maxTokens);
+      return {
+        compressedText: finalText,
+        originalLength,
+        compressedLength: finalText.length,
+        compressionRatio: originalLength / finalText.length,
+        tokensEstimate: this.estimateTokens(finalText),
+        detectedLanguage
+      };
+    }
+    let importanceScores;
+    try {
+      importanceScores = fastMode || textWithoutCode.length > 50000 ? await this.importanceAnalyzer.fastAnalyzeImportance(textWithoutCode, detectedLanguage) : await this.importanceAnalyzer.analyzeImportance(textWithoutCode, detectedLanguage);
+      if (prioritizeFirst || prioritizeLast) {
+        importanceScores = this.applyUserPriorities(importanceScores, prioritizeFirst, prioritizeLast);
+      }
+      if (maxSentences && importanceScores.length > maxSentences) {
+        importanceScores = importanceScores.sort((a, b) => b.score - a.score).slice(0, maxSentences);
+      }
+    } catch (error) {
+      console.warn("Importance analysis failed, using fallback:", error);
+      const sentences = this.segmentSentences(textWithoutCode);
+      importanceScores = sentences.map((sentence, index) => ({
+        text: sentence.trim(),
+        score: index < 3 ? 2 : 1,
+        reasons: index < 3 ? ["first-sentences-fallback"] : ["fallback"]
+      }));
+    }
+    if (importanceScores.length === 0) {
+      const finalText = this.trimToTokenLimit(trimmedText, maxTokens);
+      return {
+        compressedText: finalText,
+        originalLength,
+        compressedLength: finalText.length,
+        compressionRatio: originalLength / finalText.length,
+        tokensEstimate: this.estimateTokens(finalText),
+        detectedLanguage
+      };
+    }
+    let compressed;
+    try {
+      compressed = this.applyCompression(importanceScores, maxTokens, compressionLevel, codeMap, bufferPercentage);
+    } catch (error) {
+      console.warn("Compression failed, using fallback:", error);
+      compressed = this.trimToTokenLimit(trimmedText, maxTokens);
+    }
+    if (compressed.trim().length === 0) {
+      compressed = this.trimToTokenLimit(trimmedText, Math.min(maxTokens, 100));
+    }
+    const finalTokens = this.estimateTokens(compressed);
+    return {
+      compressedText: compressed,
+      originalLength,
+      compressedLength: compressed.length,
+      compressionRatio: originalLength / compressed.length,
+      tokensEstimate: finalTokens,
+      detectedLanguage,
+      importanceScores
+    };
+  }
+  extractCodeElements(text) {
+    const codeMap = new Map;
+    let counter = 0;
+    let result = text;
+    result = result.replace(/```[\s\S]*?```/g, (match) => {
+      const placeholder = `__CODE_BLOCK_${counter++}__`;
+      codeMap.set(placeholder, match);
+      return placeholder;
+    });
+    result = result.replace(/`[^`\n]+`/g, (match) => {
+      const placeholder = `__INLINE_CODE_${counter++}__`;
+      codeMap.set(placeholder, match);
+      return placeholder;
+    });
+    result = result.replace(/\b[a-zA-Z_][a-zA-Z0-9_]*\(\)/g, (match) => {
+      const placeholder = `__FUNC_CALL_${counter++}__`;
+      codeMap.set(placeholder, match);
+      return placeholder;
+    });
+    return { text: result, codeMap };
+  }
+  applyUserPriorities(importanceScores, prioritizeFirst, prioritizeLast) {
+    return importanceScores.map((item, index) => {
+      const newReasons = [...item.reasons];
+      let newScore = item.score;
+      if (prioritizeFirst && index < 3) {
+        newScore *= 1.5;
+        newReasons.push("user-prioritize-first");
+      }
+      if (prioritizeLast && index >= importanceScores.length - 3) {
+        newScore *= 1.4;
+        newReasons.push("user-prioritize-last");
+      }
+      return {
+        ...item,
+        score: newScore,
+        reasons: newReasons
+      };
+    });
+  }
+  applyCompression(importanceScores, maxTokens, level, codeMap, bufferPercentage = 10) {
+    const sorted = [...importanceScores].sort((a, b) => b.score - a.score);
+    const threshold = this.getCompressionThreshold(sorted, level);
+    const important = sorted.filter((item) => item.score >= threshold);
+    let compressed = "";
+    let currentTokens = 0;
+    const bufferMultiplier = (100 - bufferPercentage) / 100;
+    const effectiveMaxTokens = Math.floor(maxTokens * bufferMultiplier);
+    for (const item of important) {
+      const sentenceTokens = this.estimateTokens(item.text);
+      if (currentTokens + sentenceTokens <= effectiveMaxTokens) {
+        compressed += item.text + ". ";
+        currentTokens += sentenceTokens;
+      }
+    }
+    let result = compressed.trim();
+    codeMap.forEach((code, placeholder) => {
+      result = result.replace(new RegExp(placeholder, "g"), code);
+    });
+    if (this.estimateTokens(result) > maxTokens) {
+      result = this.trimToTokenLimit(result, maxTokens);
+    }
+    return result;
+  }
+  getCompressionThreshold(sortedScores, level) {
+    if (sortedScores.length === 0)
+      return 1;
+    const scores = sortedScores.map((s) => s.score);
+    const median = scores[Math.floor(scores.length / 2)];
+    const max = Math.max(...scores);
+    switch (level) {
+      case "light":
+        return Math.max(median * 0.8, 1);
+      case "moderate":
+        return Math.max(median * 1.1, 1.2);
+      case "aggressive":
+        return Math.max(max * 0.7, median * 1.3);
+      default:
+        return Math.max(median * 1.1, 1.2);
+    }
+  }
+  segmentSentences(text, locale) {
+    try {
+      if (typeof Intl !== "undefined" && Intl.Segmenter) {
+        const segmenter = new Intl.Segmenter(locale || "en", {
+          granularity: "sentence"
+        });
+        const segments = Array.from(segmenter.segment(text));
+        return segments.map((segment) => segment.segment.trim()).filter((s) => s.length > 10);
+      }
+    } catch (error) {}
+    return text.split(/[.!?]+/).map((s) => s.trim()).filter((s) => s.length > 10);
+  }
+  trimToTokenLimit(text, maxTokens) {
+    const sentences = this.segmentSentences(text);
+    let result = "";
+    let tokens = 0;
+    for (const sentence of sentences) {
+      const sentenceTokens = this.estimateTokens(sentence + ".");
+      if (tokens + sentenceTokens <= maxTokens) {
+        result += sentence + ". ";
+        tokens += sentenceTokens;
+      } else {
+        break;
+      }
+    }
+    return result.trim();
+  }
+  estimateTokens(text) {
+    try {
+      if (typeof Intl !== "undefined" && Intl.Segmenter) {
+        const sample = text.slice(0, 200);
+        let cjkCount = 0;
+        let totalChars = 0;
+        for (const char of sample) {
+          const codePoint = char.codePointAt(0);
+          totalChars++;
+          if (codePoint >= 19968 && codePoint <= 40959 || codePoint >= 12352 && codePoint <= 12447 || codePoint >= 12448 && codePoint <= 12543 || codePoint >= 44032 && codePoint <= 55215) {
+            cjkCount++;
+          }
+        }
+        const cjkRatio = totalChars > 0 ? cjkCount / totalChars : 0;
+        if (cjkRatio > 0.3) {
+          return Math.ceil(text.length * 1.5);
+        } else {
+          return Math.ceil(text.length / 4);
+        }
+      }
+    } catch (error) {}
+    return Math.ceil(text.length / 4);
+  }
+}
+async function compressWithLanguageDetection(text, maxTokens = DEFAULT_CONFIG.maxTokens) {
+  const compressor = new TextCompressionUtility;
+  return await compressor.compress(text, {
+    maxTokens,
+    autoDetectLanguage: true,
+    preserveCodeBlocks: true,
+    compressionLevel: "moderate"
+  });
+}
 export {
   withErrorHandling,
   setupAITools,
@@ -4291,6 +4997,7 @@ export {
   formatSuccessResponse,
   formatErrorResponse,
   emptySchema,
+  compressWithLanguageDetection as compressText,
   exports_schemas_symbol as SymbolType,
   ErrorCode
 };

package/dist/types/index.d.ts CHANGED Viewed

@@ -7,3 +7,4 @@ export * as SymbolType from "./schemas/schemas_symbol";
 export type { EmptyType } from "./schemas/schemas_empty";
 export { emptySchema } from "./schemas/schemas_empty";
 export { loadVersion } from "./util_version";
+export { compressWithLanguageDetection as compressText } from "./util_text_compression";

package/dist/types/util_tests/__tests__/console-mock.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/util_tests/console-mock.d.ts ADDED Viewed

@@ -0,0 +1,49 @@
+/**
+ * Console Mock Utility
+ *
+ * Provides utilities for mocking console output during testing to keep test output clean.
+ * Can be used across multiple test files for consistent console mocking.
+ */
+export declare class ConsoleMock {
+    private originalMethods;
+    private isMocked;
+    /**
+     * Mock all console methods to prevent output during tests
+     */
+    mock(): void;
+    /**
+     * Restore original console methods
+     */
+    restore(): void;
+    /**
+     * Check if console is currently mocked
+     */
+    get isActive(): boolean;
+    /**
+     * Temporarily restore console methods for debugging purposes
+     * Returns a function to re-mock console methods
+     */
+    temporaryRestore(): () => void;
+}
+export declare const consoleMock: ConsoleMock;
+/**
+ * Convenience functions for common usage patterns
+ */
+/**
+ * Setup console mocking for a test suite (use in beforeAll)
+ */
+export declare function setupConsoleMock(): void;
+/**
+ * Cleanup console mocking for a test suite (use in afterAll)
+ */
+export declare function cleanupConsoleMock(): void;
+/**
+ * Higher-order function to run a function with console temporarily restored
+ * Useful for debugging specific tests
+ */
+export declare function withConsole<T>(fn: () => T): T;
+/**
+ * Higher-order function to run an async function with console temporarily restored
+ * Useful for debugging specific async tests
+ */
+export declare function withConsoleAsync<T>(fn: () => Promise<T>): Promise<T>;

package/dist/types/util_text_compression.d.ts CHANGED Viewed

@@ -1,6 +1,8 @@
+import { ModalityLogger } from './util_logger.js';
+import { ErrorCode } from './util_error.js';
 export interface CompressionConfig {
     maxTokens: number;
-    compressionLevel: 'light' | 'moderate' | 'aggressive';
+    compressionLevel: "light" | "moderate" | "aggressive";
     preserveCodeBlocks: boolean;
     autoDetectLanguage: boolean;
     enableLogging: boolean;
@@ -10,7 +12,7 @@ export interface CompressionConfig {
 export declare const DEFAULT_CONFIG: CompressionConfig;
 export interface CompressionOptions {
     maxTokens?: number;
-    compressionLevel?: 'light' | 'moderate' | 'aggressive';
+    compressionLevel?: "light" | "moderate" | "aggressive";
     preserveCodeBlocks?: boolean;
     autoDetectLanguage?: boolean;
     locale?: string;
@@ -23,7 +25,7 @@ export interface CompressionOptions {
     enableLogging?: boolean;
     sentenceSplitPattern?: RegExp;
     importanceWeights?: ImportanceWeights;
-    tokenizationMethod?: 'simple' | 'advanced';
+    tokenizationMethod?: "simple" | "advanced";
 }
 export interface ImportanceWeights {
     position: number;
@@ -54,26 +56,20 @@ export interface LanguageDetectionResult {
     script?: string;
     region?: string;
 }
-export declare class CompressionError extends Error {
-    code: string;
-    details?: any | undefined;
-    constructor(message: string, code: string, details?: any | undefined);
+export declare class CompressionError extends ErrorCode {
+    readonly code: string;
+    details?: any;
+    constructor(message: string, code: string, details?: any, originalError?: unknown);
 }
-export declare class LanguageDetectionError extends Error {
+export declare class LanguageDetectionError extends ErrorCode {
+    readonly code: string;
     fallbackLanguage: string;
-    constructor(message: string, fallbackLanguage: string);
-}
-export declare class CompressionLogger {
-    private enabled;
-    constructor(enabled?: boolean);
-    info(message: string, data?: any): void;
-    warn(message: string, data?: any): void;
-    error(message: string, error?: Error): void;
+    constructor(message: string, fallbackLanguage: string, originalError?: unknown);
 }
 export declare class UniversalLanguageDetector {
     private logger;
     private cache;
-    constructor(logger: CompressionLogger);
+    constructor(logger: ModalityLogger);
     detectLanguage(text: string): Promise<LanguageDetectionResult>;
     private performDetection;
     private analyzeUnicodeRanges;
@@ -82,14 +78,12 @@ export declare class UniversalLanguageDetector {
     private getUnicodeRelevanceScore;
     private getUnicodeBoost;
     private testLocaleWithIntlAPIs;
-    private enhanceWithTextAnalysis;
-    private detectFromTextHeuristics;
 }
 export declare class IntelligentImportanceAnalyzer {
     private wordFrequencyCache;
     private logger;
     private config;
-    constructor(logger: CompressionLogger, config: CompressionConfig);
+    constructor(logger: ModalityLogger, config: CompressionConfig);
     analyzeImportance(text: string, detectedLanguage?: string): Promise<Array<{
         text: string;
         score: number;

package/package.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "version": "0.6.1",
+  "version": "0.6.3",
   "name": "modality-kit",
   "repository": {
     "type": "git",
@@ -32,7 +32,7 @@
     "build:src": "bun build src/index.ts --outdir dist",
     "build": "bun run build:clean && bun run build:src && bun run build:types",
     "test": "bun test",
-    "prepublishOnly": "npm run test"
+    "prepublishOnly": "npm run build && npm run test"
   },
   "types": "./dist/types/index.d.ts",
   "files": [