toonify-mcp 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.es.md +270 -0
  2. package/README.id.md +270 -0
  3. package/README.ja.md +270 -0
  4. package/README.md +21 -10
  5. package/README.vi.md +270 -0
  6. package/README.zh-CN.md +270 -0
  7. package/README.zh-TW.md +27 -16
  8. package/dist/metrics/metrics-collector.d.ts +2 -0
  9. package/dist/metrics/metrics-collector.d.ts.map +1 -1
  10. package/dist/metrics/metrics-collector.js +43 -8
  11. package/dist/metrics/metrics-collector.js.map +1 -1
  12. package/dist/optimizer/caching/cache-optimizer.d.ts +53 -0
  13. package/dist/optimizer/caching/cache-optimizer.d.ts.map +1 -0
  14. package/dist/optimizer/caching/cache-optimizer.js +176 -0
  15. package/dist/optimizer/caching/cache-optimizer.js.map +1 -0
  16. package/dist/optimizer/caching/cache-strategies.d.ts +19 -0
  17. package/dist/optimizer/caching/cache-strategies.d.ts.map +1 -0
  18. package/dist/optimizer/caching/cache-strategies.js +62 -0
  19. package/dist/optimizer/caching/cache-strategies.js.map +1 -0
  20. package/dist/optimizer/caching/cache-types.d.ts +36 -0
  21. package/dist/optimizer/caching/cache-types.d.ts.map +1 -0
  22. package/dist/optimizer/caching/cache-types.js +5 -0
  23. package/dist/optimizer/caching/cache-types.js.map +1 -0
  24. package/dist/optimizer/caching/index.d.ts +7 -0
  25. package/dist/optimizer/caching/index.d.ts.map +1 -0
  26. package/dist/optimizer/caching/index.js +7 -0
  27. package/dist/optimizer/caching/index.js.map +1 -0
  28. package/dist/optimizer/multilingual/index.d.ts +7 -0
  29. package/dist/optimizer/multilingual/index.d.ts.map +1 -0
  30. package/dist/optimizer/multilingual/index.js +7 -0
  31. package/dist/optimizer/multilingual/index.js.map +1 -0
  32. package/dist/optimizer/multilingual/language-detector.d.ts +43 -0
  33. package/dist/optimizer/multilingual/language-detector.d.ts.map +1 -0
  34. package/dist/optimizer/multilingual/language-detector.js +161 -0
  35. package/dist/optimizer/multilingual/language-detector.js.map +1 -0
  36. package/dist/optimizer/multilingual/language-profiles.d.ts +34 -0
  37. package/dist/optimizer/multilingual/language-profiles.d.ts.map +1 -0
  38. package/dist/optimizer/multilingual/language-profiles.js +196 -0
  39. package/dist/optimizer/multilingual/language-profiles.js.map +1 -0
  40. package/dist/optimizer/multilingual/tokenizer-adapter.d.ts +47 -0
  41. package/dist/optimizer/multilingual/tokenizer-adapter.d.ts.map +1 -0
  42. package/dist/optimizer/multilingual/tokenizer-adapter.js +96 -0
  43. package/dist/optimizer/multilingual/tokenizer-adapter.js.map +1 -0
  44. package/dist/optimizer/token-optimizer.d.ts +11 -1
  45. package/dist/optimizer/token-optimizer.d.ts.map +1 -1
  46. package/dist/optimizer/token-optimizer.js +49 -8
  47. package/dist/optimizer/token-optimizer.js.map +1 -1
  48. package/dist/optimizer/types.d.ts +15 -0
  49. package/dist/optimizer/types.d.ts.map +1 -1
  50. package/package.json +2 -2
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache-optimizer.js","sourceRoot":"","sources":["../../../src/optimizer/caching/cache-optimizer.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpD,MAAM,OAAO,cAAc;IACjB,MAAM,CAAc;IACpB,OAAO,CAAe;IAE9B,YAAY,SAA+B,EAAE;QAC3C,IAAI,CAAC,MAAM,GAAG;YACZ,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,MAAM;YAChB,GAAG,EAAE,OAAO;YACZ,kBAAkB,EAAE,IAAI;YACxB,kBAAkB,EAAE,IAAI;YACxB,GAAG,MAAM;SACV,CAAC;QAEF,IAAI,CAAC,OAAO,GAAG;YACb,SAAS,EAAE,CAAC;YACZ,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,CAAC;YACf,qBAAqB,EAAE,CAAC;YACxB,oBAAoB,EAAE,CAAC;YACvB,sBAAsB,EAAE,CAAC;SAC1B,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,eAAe,CACb,WAAmB,EACnB,QAAgB,EAChB,MAA+B,EAC/B,cAAsB,EACtB,eAAuB;QAEvB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,kBAAkB,EAAE,CAAC;YAC5D,8BAA8B;YAC9B,OAAO;gBACL,YAAY,EAAE,EAAE;gBAChB,cAAc,EAAE,IAAI,CAAC,gBAAgB,CAAC,WAAW,EAAE,QAAQ,EAAE,MAAM,CAAC;gBACpE,eAAe,EAAE,KAAK;aACvB,CAAC;QACJ,CAAC;QAED,+CAA+C;QAC/C,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,WAAW,EAAE,eAAe,CAAC,EAAE,CAAC;YACxD,OAAO;gBACL,YAAY,EAAE,EAAE;gBAChB,cAAc,EAAE,IAAI,CAAC,gBAAgB,CAAC,WAAW,EAAE,QAAQ,EAAE,MAAM,CAAC;gBACpE,eAAe,EAAE,KAAK;aACvB,CAAC;QACJ,CAAC;QAED,iCAAiC;QACjC,MAAM,YAAY,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAE/D,0CAA0C;QAC1C,MAAM,cAAc,GAAG,aAAa,WAAW,EAAE,CAAC;QAElD,sBAAsB;QACtB,MAAM,kBAAkB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,iBAAiB;QAErE,MAAM,QAAQ,GAAkB;YAC9B,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC;gBACzC,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;gBAC1D,IAAI,CAAC,MAAM,CAAC,QAAQ;YACtB,kBAAkB;YAClB,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,GAAG;SACrB,CAAC;QAEF,iBAAiB;QACjB,IAAI,CAAC,OAAO,CAAC,oBAAoB,IAAI,kBAAkB,CAAC;QAExD,OAAO;YACL,YAAY;YACZ,cAAc;YACd,eAAe,EAAE,IAAI;YACrB,aAAa,EAAE,QAAQ;SACxB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,QAAgB,EAAE,MAAc;QACzD,OAAO,sCAAsC,QAAQ;;;;;;;;;;;;;;;;iBAgBxC,MAAM,CAAC,WAAW,EAAE;;;;;;;;;IASjC,CAAC;IACH,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,WAAmB,EAAE,QAAgB,EAAE,MAAc;QAC5E,OAAO,SAAS,MAAM,CAAC,WAAW,EAAE,MAAM,WAAW,EAAE,CAAC;IAC1D,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,MAAqB;QACtC,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;YAC5B,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,cAAc,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,OAAO;YACL;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM,CAAC,YAAY;gBACzB,aAAa,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE;aACrC;YACD;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM,CAAC,cAAc;aAC5B;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,MAAqB;QACnC,OAAO;YACL,MAAM,EAAE,MAAM,CAAC,YAAY,IAAI,4CAA4C;YAC3E,IAAI,EAAE,MAAM,CAAC,cAAc;SAC5B,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,WAAmB;QAChC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,CAAC,qBAAqB,IAAI,WAAW,CAAC;QAClD,IAAI,CAAC,aAAa,EAAE,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,eAAe;QACb,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;QAC3B,IAAI,CAAC,aAAa,EAAE,CAAC;IACvB,CAAC;IAED;;OAEG;IACK,aAAa;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;QAChE,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,CAAC,OAAO,GAAG;YACb,SAAS,EAAE,CAAC;YACZ,WAAW,EAAE,CAAC;YACd,YAAY,EAAE,CAAC;YACf,qBAAqB,EAAE,CAAC;YACxB,oBAAoB,EAAE,CAAC;YACvB,sBAAsB,EAAE,CAAC;SAC1B,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Caching strategies for different providers
3
+ */
4
+ import type { CacheStrategy } from './cache-types.js';
5
+ /**
6
+ * Anthropic Prompt Caching Strategy
7
+ * https://docs.anthropic.com/claude/docs/prompt-caching
8
+ */
9
+ export declare const anthropicStrategy: CacheStrategy;
10
+ /**
11
+ * OpenAI Prompt Caching Strategy (Placeholder)
12
+ * Note: OpenAI's caching is automatic and not configurable via API
13
+ */
14
+ export declare const openaiStrategy: CacheStrategy;
15
+ /**
16
+ * Get strategy by provider name
17
+ */
18
+ export declare function getStrategy(provider: 'anthropic' | 'openai' | 'auto'): CacheStrategy;
19
+ //# sourceMappingURL=cache-strategies.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache-strategies.d.ts","sourceRoot":"","sources":["../../../src/optimizer/caching/cache-strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAiB,MAAM,kBAAkB,CAAC;AAErE;;;GAGG;AACH,eAAO,MAAM,iBAAiB,EAAE,aAsB/B,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,cAAc,EAAE,aAiB5B,CAAC;AAEF;;GAEG;AACH,wBAAgB,WAAW,CAAC,QAAQ,EAAE,WAAW,GAAG,QAAQ,GAAG,MAAM,GAAG,aAAa,CAUpF"}
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Caching strategies for different providers
3
+ */
4
+ /**
5
+ * Anthropic Prompt Caching Strategy
6
+ * https://docs.anthropic.com/claude/docs/prompt-caching
7
+ */
8
+ export const anthropicStrategy = {
9
+ name: 'anthropic',
10
+ shouldCache(content, tokens) {
11
+ // Anthropic requires minimum 1024 tokens for cache breakpoints
12
+ // and minimum 2048 tokens for effective caching
13
+ return tokens >= 1024;
14
+ },
15
+ formatCacheStructure(cached) {
16
+ return [
17
+ {
18
+ type: 'text',
19
+ text: cached.staticPrefix,
20
+ cache_control: { type: 'ephemeral' }
21
+ },
22
+ {
23
+ type: 'text',
24
+ text: cached.dynamicContent
25
+ }
26
+ ];
27
+ }
28
+ };
29
+ /**
30
+ * OpenAI Prompt Caching Strategy (Placeholder)
31
+ * Note: OpenAI's caching is automatic and not configurable via API
32
+ */
33
+ export const openaiStrategy = {
34
+ name: 'openai',
35
+ shouldCache(content, tokens) {
36
+ // OpenAI's caching is automatic
37
+ // We still structure prompts for better reuse
38
+ return tokens >= 500;
39
+ },
40
+ formatCacheStructure(cached) {
41
+ // OpenAI doesn't have explicit cache_control
42
+ // Just return structured content
43
+ return {
44
+ system: cached.staticPrefix,
45
+ user: cached.dynamicContent
46
+ };
47
+ }
48
+ };
49
+ /**
50
+ * Get strategy by provider name
51
+ */
52
+ export function getStrategy(provider) {
53
+ if (provider === 'auto') {
54
+ // Auto-detect based on environment
55
+ if (process.env.ANTHROPIC_API_KEY) {
56
+ return anthropicStrategy;
57
+ }
58
+ return openaiStrategy;
59
+ }
60
+ return provider === 'anthropic' ? anthropicStrategy : openaiStrategy;
61
+ }
62
+ //# sourceMappingURL=cache-strategies.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache-strategies.js","sourceRoot":"","sources":["../../../src/optimizer/caching/cache-strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAkB;IAC9C,IAAI,EAAE,WAAW;IAEjB,WAAW,CAAC,OAAe,EAAE,MAAc;QACzC,+DAA+D;QAC/D,gDAAgD;QAChD,OAAO,MAAM,IAAI,IAAI,CAAC;IACxB,CAAC;IAED,oBAAoB,CAAC,MAAqB;QACxC,OAAO;YACL;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM,CAAC,YAAY;gBACzB,aAAa,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE;aACrC;YACD;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM,CAAC,cAAc;aAC5B;SACF,CAAC;IACJ,CAAC;CACF,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,MAAM,cAAc,GAAkB;IAC3C,IAAI,EAAE,QAAQ;IAEd,WAAW,CAAC,OAAe,EAAE,MAAc;QACzC,gCAAgC;QAChC,8CAA8C;QAC9C,OAAO,MAAM,IAAI,GAAG,CAAC;IACvB,CAAC;IAED,oBAAoB,CAAC,MAAqB;QACxC,6CAA6C;QAC7C,iCAAiC;QACjC,OAAO;YACL,MAAM,EAAE,MAAM,CAAC,YAAY;YAC3B,IAAI,EAAE,MAAM,CAAC,cAAc;SAC5B,CAAC;IACJ,CAAC;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,QAAyC;IACnE,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,mCAAmC;QACnC,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;YAClC,OAAO,iBAAiB,CAAC;QAC3B,CAAC;QACD,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,OAAO,QAAQ,KAAK,WAAW,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,cAAc,CAAC;AACvE,CAAC"}
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Type definitions for prompt caching integration
3
+ */
4
+ export interface CacheConfig {
5
+ enabled: boolean;
6
+ provider: 'anthropic' | 'openai' | 'auto';
7
+ ttl?: '5min' | '1hour';
8
+ cacheStaticPrompts: boolean;
9
+ minCacheableTokens?: number;
10
+ }
11
+ export interface CachedContent {
12
+ staticPrefix: string;
13
+ dynamicContent: string;
14
+ cacheBreakpoint: boolean;
15
+ cacheMetadata?: CacheMetadata;
16
+ }
17
+ export interface CacheMetadata {
18
+ provider: 'anthropic' | 'openai';
19
+ estimatedCacheSize: number;
20
+ cacheKey?: string;
21
+ ttl?: string;
22
+ }
23
+ export interface CacheMetrics {
24
+ cacheHits: number;
25
+ cacheMisses: number;
26
+ cacheHitRate: number;
27
+ estimatedCacheSavings: number;
28
+ totalCacheableTokens: number;
29
+ averageCacheReuseCount: number;
30
+ }
31
+ export interface CacheStrategy {
32
+ name: string;
33
+ shouldCache: (content: string, tokens: number) => boolean;
34
+ formatCacheStructure: (content: CachedContent) => any;
35
+ }
36
+ //# sourceMappingURL=cache-types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache-types.d.ts","sourceRoot":"","sources":["../../../src/optimizer/caching/cache-types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,WAAW,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC1C,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,aAAa;IAC5B,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,OAAO,CAAC;IACzB,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,WAAW,GAAG,QAAQ,CAAC;IACjC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,sBAAsB,EAAE,MAAM,CAAC;CAChC;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC;IAC1D,oBAAoB,EAAE,CAAC,OAAO,EAAE,aAAa,KAAK,GAAG,CAAC;CACvD"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Type definitions for prompt caching integration
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=cache-types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache-types.js","sourceRoot":"","sources":["../../../src/optimizer/caching/cache-types.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Caching module exports
3
+ */
4
+ export * from './cache-types.js';
5
+ export * from './cache-strategies.js';
6
+ export { CacheOptimizer } from './cache-optimizer.js';
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/optimizer/caching/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,kBAAkB,CAAC;AACjC,cAAc,uBAAuB,CAAC;AACtC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Caching module exports
3
+ */
4
+ export * from './cache-types.js';
5
+ export * from './cache-strategies.js';
6
+ export { CacheOptimizer } from './cache-optimizer.js';
7
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/optimizer/caching/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,kBAAkB,CAAC;AACjC,cAAc,uBAAuB,CAAC;AACtC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Multilingual tokenizer module exports
3
+ */
4
+ export * from './language-profiles.js';
5
+ export * from './language-detector.js';
6
+ export * from './tokenizer-adapter.js';
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/optimizer/multilingual/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Multilingual tokenizer module exports
3
+ */
4
+ export * from './language-profiles.js';
5
+ export * from './language-detector.js';
6
+ export * from './tokenizer-adapter.js';
7
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/optimizer/multilingual/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC"}
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Language detection for token estimation
3
+ */
4
+ import { type LanguageProfile } from './language-profiles.js';
5
+ export interface LanguageDetectionResult {
6
+ language: LanguageProfile;
7
+ confidence: number;
8
+ detectedPatterns: number;
9
+ }
10
+ export declare class LanguageDetector {
11
+ private sampleSize;
12
+ constructor(sampleSize?: number);
13
+ /**
14
+ * Detect language from text content
15
+ */
16
+ detect(text: string): LanguageDetectionResult;
17
+ /**
18
+ * Detect if text is mixed-language
19
+ */
20
+ detectMixed(text: string): LanguageProfile[];
21
+ /**
22
+ * Estimate token multiplier for mixed-language content
23
+ */
24
+ estimateMultiplierForMixed(languages: LanguageProfile[]): number;
25
+ /**
26
+ * Estimate tokens with language awareness
27
+ */
28
+ estimateTokens(text: string, baseTokens: number): number;
29
+ /**
30
+ * Estimate tokens for mixed-language content
31
+ */
32
+ estimateTokensMixed(text: string, baseTokens: number): number;
33
+ /**
34
+ * Get detailed language breakdown
35
+ */
36
+ analyze(text: string): {
37
+ primary: LanguageDetectionResult;
38
+ all: LanguageProfile[];
39
+ estimatedMultiplier: number;
40
+ isMixed: boolean;
41
+ };
42
+ }
43
+ //# sourceMappingURL=language-detector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"language-detector.d.ts","sourceRoot":"","sources":["../../../src/optimizer/multilingual/language-detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAqB,KAAK,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAEjF,MAAM,WAAW,uBAAuB;IACtC,QAAQ,EAAE,eAAe,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAAS;gBAEf,UAAU,GAAE,MAAY;IAIpC;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,uBAAuB;IA+E7C;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,eAAe,EAAE;IAgC5C;;OAEG;IACH,0BAA0B,CAAC,SAAS,EAAE,eAAe,EAAE,GAAG,MAAM;IAahE;;OAEG;IACH,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM;IAYxD;;OAEG;IACH,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM;IAM7D;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG;QACrB,OAAO,EAAE,uBAAuB,CAAC;QACjC,GAAG,EAAE,eAAe,EAAE,CAAC;QACvB,mBAAmB,EAAE,MAAM,CAAC;QAC5B,OAAO,EAAE,OAAO,CAAC;KAClB;CAeF"}
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Language detection for token estimation
3
+ */
4
+ import { LANGUAGE_PROFILES } from './language-profiles.js';
5
+ export class LanguageDetector {
6
+ sampleSize;
7
+ constructor(sampleSize = 500) {
8
+ this.sampleSize = sampleSize;
9
+ }
10
+ /**
11
+ * Detect language from text content
12
+ */
13
+ detect(text) {
14
+ if (!text || text.trim().length === 0) {
15
+ return {
16
+ language: LANGUAGE_PROFILES[0], // Default to English
17
+ confidence: 0,
18
+ detectedPatterns: 0
19
+ };
20
+ }
21
+ // Use first N characters for detection (performance optimization)
22
+ const sample = text.slice(0, this.sampleSize);
23
+ // Score each language profile
24
+ const scores = LANGUAGE_PROFILES.map(profile => {
25
+ let matchedPatterns = 0;
26
+ let totalMatches = 0;
27
+ for (const pattern of profile.detectionPatterns) {
28
+ const matches = sample.match(new RegExp(pattern, 'g'));
29
+ if (matches && matches.length > 0) {
30
+ matchedPatterns++;
31
+ totalMatches += matches.length;
32
+ }
33
+ }
34
+ // Calculate confidence based on:
35
+ // 1. Number of patterns matched
36
+ // 2. Total number of matches relative to sample size (density)
37
+ // 3. Profile's inherent confidence
38
+ const patternScore = matchedPatterns / profile.detectionPatterns.length;
39
+ // Density score: how much of the text matches this language
40
+ const sampleLength = Math.max(sample.length, 1);
41
+ // For character-based languages (CJK, Arabic, etc.), matches can be very high
42
+ const matchDensity = Math.min(totalMatches / (sampleLength / 5), 1.0); // Expect ~1 match per 5 chars
43
+ const densityScore = matchDensity;
44
+ // Adaptive weighting: if density is very high, trust it more
45
+ // (many characters match = strong signal, even if only 1 pattern matched)
46
+ const patternWeight = densityScore > 0.9 ? 0.4 : 0.7;
47
+ const densityWeight = 1.0 - patternWeight;
48
+ // Boost confidence for high-confidence scenarios
49
+ let boost = 1.0;
50
+ if (patternScore === 1.0)
51
+ boost = 1.1; // All patterns matched
52
+ if (densityScore > 0.95)
53
+ boost = Math.max(boost, 1.05); // Very high density
54
+ // Calculate final confidence
55
+ const rawConfidence = (patternScore * patternWeight + densityScore * densityWeight) * boost;
56
+ const confidence = Math.min(rawConfidence, 1.0) * profile.confidence;
57
+ return {
58
+ profile,
59
+ confidence,
60
+ matchedPatterns
61
+ };
62
+ });
63
+ // Find the highest scoring language
64
+ const best = scores.reduce((a, b) => a.confidence > b.confidence ? a : b);
65
+ // If confidence is too low, default to English
66
+ if (best.confidence < 0.1) {
67
+ return {
68
+ language: LANGUAGE_PROFILES[0], // English
69
+ confidence: 0.5, // Low confidence fallback
70
+ detectedPatterns: 0
71
+ };
72
+ }
73
+ return {
74
+ language: best.profile,
75
+ confidence: best.confidence,
76
+ detectedPatterns: best.matchedPatterns
77
+ };
78
+ }
79
+ /**
80
+ * Detect if text is mixed-language
81
+ */
82
+ detectMixed(text) {
83
+ const sample = text.slice(0, this.sampleSize);
84
+ const detected = [];
85
+ for (const profile of LANGUAGE_PROFILES) {
86
+ for (const pattern of profile.detectionPatterns) {
87
+ if (pattern.test(sample)) {
88
+ detected.push(profile);
89
+ break;
90
+ }
91
+ }
92
+ }
93
+ // Handle CJK overlap: Japanese Kanji overlaps with Chinese
94
+ // If both Chinese and Japanese are detected, check for Japanese-specific characters
95
+ const hasChinese = detected.some(p => p.code === 'zh');
96
+ const hasJapanese = detected.some(p => p.code === 'ja');
97
+ if (hasChinese && hasJapanese) {
98
+ // Check for Hiragana or Katakana (Japanese-specific)
99
+ const hasHiragana = /[\u3040-\u309f]/.test(sample);
100
+ const hasKatakana = /[\u30a0-\u30ff]/.test(sample);
101
+ if (!hasHiragana && !hasKatakana) {
102
+ // No Japanese-specific characters, remove Japanese
103
+ return detected.filter(p => p.code !== 'ja');
104
+ }
105
+ }
106
+ return detected;
107
+ }
108
+ /**
109
+ * Estimate token multiplier for mixed-language content
110
+ */
111
+ estimateMultiplierForMixed(languages) {
112
+ if (languages.length === 0)
113
+ return 1.0;
114
+ if (languages.length === 1)
115
+ return languages[0].tokenMultiplier;
116
+ // Use weighted average (favor higher multipliers for safety)
117
+ const multipliers = languages.map(l => l.tokenMultiplier);
118
+ const max = Math.max(...multipliers);
119
+ const avg = multipliers.reduce((a, b) => a + b) / multipliers.length;
120
+ // Weight toward max to be conservative
121
+ return avg * 0.4 + max * 0.6;
122
+ }
123
+ /**
124
+ * Estimate tokens with language awareness
125
+ */
126
+ estimateTokens(text, baseTokens) {
127
+ const detection = this.detect(text);
128
+ // If very low confidence, use base tokens
129
+ if (detection.confidence < 0.3) {
130
+ return baseTokens;
131
+ }
132
+ // Apply language multiplier
133
+ return Math.ceil(baseTokens * detection.language.tokenMultiplier);
134
+ }
135
+ /**
136
+ * Estimate tokens for mixed-language content
137
+ */
138
+ estimateTokensMixed(text, baseTokens) {
139
+ const languages = this.detectMixed(text);
140
+ const multiplier = this.estimateMultiplierForMixed(languages);
141
+ return Math.ceil(baseTokens * multiplier);
142
+ }
143
+ /**
144
+ * Get detailed language breakdown
145
+ */
146
+ analyze(text) {
147
+ const primary = this.detect(text);
148
+ const all = this.detectMixed(text);
149
+ const isMixed = all.length > 1;
150
+ const estimatedMultiplier = isMixed
151
+ ? this.estimateMultiplierForMixed(all)
152
+ : primary.language.tokenMultiplier;
153
+ return {
154
+ primary,
155
+ all,
156
+ estimatedMultiplier,
157
+ isMixed
158
+ };
159
+ }
160
+ }
161
+ //# sourceMappingURL=language-detector.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"language-detector.js","sourceRoot":"","sources":["../../../src/optimizer/multilingual/language-detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,iBAAiB,EAAwB,MAAM,wBAAwB,CAAC;AAQjF,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAS;IAE3B,YAAY,aAAqB,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,IAAY;QACjB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtC,OAAO;gBACL,QAAQ,EAAE,iBAAiB,CAAC,CAAC,CAAC,EAAE,qBAAqB;gBACrD,UAAU,EAAE,CAAC;gBACb,gBAAgB,EAAE,CAAC;aACpB,CAAC;QACJ,CAAC;QAED,kEAAkE;QAClE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAE9C,8BAA8B;QAC9B,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE;YAC7C,IAAI,eAAe,GAAG,CAAC,CAAC;YACxB,IAAI,YAAY,GAAG,CAAC,CAAC;YAErB,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;gBAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;gBACvD,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAClC,eAAe,EAAE,CAAC;oBAClB,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC;gBACjC,CAAC;YACH,CAAC;YAED,iCAAiC;YACjC,gCAAgC;YAChC,+DAA+D;YAC/D,mCAAmC;YACnC,MAAM,YAAY,GAAG,eAAe,GAAG,OAAO,CAAC,iBAAiB,CAAC,MAAM,CAAC;YAExE,4DAA4D;YAC5D,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YAChD,8EAA8E;YAC9E,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,GAAG,CAAC,YAAY,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,8BAA8B;YACrG,MAAM,YAAY,GAAG,YAAY,CAAC;YAElC,6DAA6D;YAC7D,0EAA0E;YAC1E,MAAM,aAAa,GAAG,YAAY,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;YACrD,MAAM,aAAa,GAAG,GAAG,GAAG,aAAa,CAAC;YAE1C,iDAAiD;YACjD,IAAI,KAAK,GAAG,GAAG,CAAC;YAChB,IAAI,YAAY,KAAK,GAAG;gBAAE,KAAK,GAAG,GAAG,CAAC,CAAC,uBAAuB;YAC9D,IAAI,YAAY,GAAG,IAAI;gBAAE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,oBAAoB;YAE5E,6BAA6B;YAC7B,MAAM,aAAa,GAAG,CAAC,YAAY,GAAG,aAAa,GAAG,YAAY,GAAG,aAAa,CAAC,GAAG,KAAK,CAAC;YAC5F,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,aAAa,EAAE,GAAG,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC;YAErE,OAAO;gBACL,OAAO;gBACP,UAAU;gBACV,eAAe;aAChB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,oCAAoC;QACpC,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACpC,CAAC;QAEF,+CAA+C;QAC/C,IAAI,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;YAC1B,OAAO;gBACL,QAAQ,EAAE,iBAAiB,CAAC,CAAC,CAAC,EAAE,UAAU;gBAC1C,UAAU,EAAE,GAAG,EAAE,0BAA0B;gBAC3C,gBAAgB,EAAE,CAAC;aACpB,CAAC;QACJ,CAAC;QAED,OAAO;YACL,QAAQ,EAAE,IAAI,CAAC,OAAO;YACtB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,gBAAgB,EAAE,IAAI,CAAC,eAAe;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,IAAY;QACtB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAsB,EAAE,CAAC;QAEvC,KAAK,MAAM,OAAO,IAAI,iBAAiB,EAAE,CAAC;YACxC,KAAK,MAAM,OAAO,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;gBAChD,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;oBACzB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACvB,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;QAED,2DAA2D;QAC3D,oFAAoF;QACpF,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QACvD,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QAExD,IAAI,UAAU,IAAI,WAAW,EAAE,CAAC;YAC9B,qDAAqD;YACrD,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACnD,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEnD,IAAI,CAAC,WAAW,IAAI,CAAC,WAAW,EAAE,CAAC;gBACjC,mDAAmD;gBACnD,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,0BAA0B,CAAC,SAA4B;QACrD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,GAAG,CAAC;QACvC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC;QAEhE,6DAA6D;QAC7D,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;QAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;QACrC,MAAM,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;QAErE,uCAAuC;QACvC,OAAO,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAY,EAAE,UAAkB;QAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEpC,0CAA0C;QAC1C,IAAI,SAAS,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;YAC/B,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,4BAA4B;QAC5B,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACpE,CAAC;IAED;;OAEG;IACH,mBAAmB,CAAC,IAAY,EAAE,UAAkB;QAClD,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,0BAA0B,CAAC,SAAS,CAAC,CAAC;QAC9D,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,IAAY;QAMlB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAClC,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;QAC/B,MAAM,mBAAmB,GAAG,OAAO;YACjC,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,GAAG,CAAC;YACtC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC;QAErC,OAAO;YACL,OAAO;YACP,GAAG;YACH,mBAAmB;YACnB,OAAO;SACR,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Language profiles with token multipliers
3
+ * Based on research: different languages require different numbers of tokens
4
+ */
5
+ export interface LanguageProfile {
6
+ code: string;
7
+ name: string;
8
+ nativeName: string;
9
+ tokenMultiplier: number;
10
+ detectionPatterns: RegExp[];
11
+ confidence: number;
12
+ }
13
+ /**
14
+ * Token multipliers based on research and testing
15
+ *
16
+ * Sources:
17
+ * - Anthropic: https://docs.anthropic.com/claude/docs/models-overview
18
+ * - OpenAI tokenizer analysis
19
+ * - Community research on multilingual token efficiency
20
+ */
21
+ export declare const LANGUAGE_PROFILES: LanguageProfile[];
22
+ /**
23
+ * Get language profile by code
24
+ */
25
+ export declare function getLanguageProfile(code: string): LanguageProfile | null;
26
+ /**
27
+ * Get all supported language codes
28
+ */
29
+ export declare function getSupportedLanguages(): string[];
30
+ /**
31
+ * Get token multiplier for a language code
32
+ */
33
+ export declare function getTokenMultiplier(code: string): number;
34
+ //# sourceMappingURL=language-profiles.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"language-profiles.d.ts","sourceRoot":"","sources":["../../../src/optimizer/multilingual/language-profiles.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;GAOG;AACH,eAAO,MAAM,iBAAiB,EAAE,eAAe,EAmK9C,CAAC;AAEF;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAEvE;AAED;;GAEG;AACH,wBAAgB,qBAAqB,IAAI,MAAM,EAAE,CAEhD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGvD"}
@@ -0,0 +1,196 @@
1
+ /**
2
+ * Language profiles with token multipliers
3
+ * Based on research: different languages require different numbers of tokens
4
+ */
5
+ /**
6
+ * Token multipliers based on research and testing
7
+ *
8
+ * Sources:
9
+ * - Anthropic: https://docs.anthropic.com/claude/docs/models-overview
10
+ * - OpenAI tokenizer analysis
11
+ * - Community research on multilingual token efficiency
12
+ */
13
+ export const LANGUAGE_PROFILES = [
14
+ {
15
+ code: 'en',
16
+ name: 'English',
17
+ nativeName: 'English',
18
+ tokenMultiplier: 1.0,
19
+ detectionPatterns: [
20
+ /[a-zA-Z]{2,}/, // At least 2 consecutive English letters
21
+ /\b(the|is|are|was|were|have|has|had|do|does|did|a|an|and|or|but|in|on|at|to|for|of|with|be|this|that|from|as|by|they|we|you|he|she|it|not|can|will|would|could|should|may|might|must|hello|world)\b/i
22
+ ],
23
+ confidence: 1.0
24
+ },
25
+ {
26
+ code: 'es',
27
+ name: 'Spanish',
28
+ nativeName: 'Español',
29
+ tokenMultiplier: 1.7,
30
+ detectionPatterns: [
31
+ /[áéíóúüñ¿¡]/i,
32
+ /\b(el|la|los|las|un|una|de|en|que|y|es|por)\b/i
33
+ ],
34
+ confidence: 0.9
35
+ },
36
+ {
37
+ code: 'fr',
38
+ name: 'French',
39
+ nativeName: 'Français',
40
+ tokenMultiplier: 1.8,
41
+ detectionPatterns: [
42
+ /[àâäéèêëïîôùûüÿç]/i,
43
+ /\b(le|la|les|un|une|de|et|est|à|en|que)\b/i
44
+ ],
45
+ confidence: 0.9
46
+ },
47
+ {
48
+ code: 'de',
49
+ name: 'German',
50
+ nativeName: 'Deutsch',
51
+ tokenMultiplier: 1.6,
52
+ detectionPatterns: [
53
+ /[äöüß]/i,
54
+ /\b(der|die|das|den|dem|des|ein|eine|und|ist|in)\b/i
55
+ ],
56
+ confidence: 0.9
57
+ },
58
+ {
59
+ code: 'zh',
60
+ name: 'Chinese',
61
+ nativeName: '中文',
62
+ tokenMultiplier: 2.0,
63
+ detectionPatterns: [
64
+ /[\u4e00-\u9fff]/,
65
+ /[\u3400-\u4dbf]/ // CJK Extension A
66
+ ],
67
+ confidence: 0.95
68
+ },
69
+ {
70
+ code: 'ja',
71
+ name: 'Japanese',
72
+ nativeName: '日本語',
73
+ tokenMultiplier: 2.5,
74
+ detectionPatterns: [
75
+ /[\u3040-\u309f]/, // Hiragana
76
+ /[\u30a0-\u30ff]/, // Katakana
77
+ /[\u4e00-\u9fff]/ // Kanji (overlaps with Chinese)
78
+ ],
79
+ confidence: 0.9
80
+ },
81
+ {
82
+ code: 'ko',
83
+ name: 'Korean',
84
+ nativeName: '한국어',
85
+ tokenMultiplier: 2.3,
86
+ detectionPatterns: [
87
+ /[\uac00-\ud7af]/, // Hangul Syllables
88
+ /[\u1100-\u11ff]/ // Hangul Jamo
89
+ ],
90
+ confidence: 0.9
91
+ },
92
+ {
93
+ code: 'ar',
94
+ name: 'Arabic',
95
+ nativeName: 'العربية',
96
+ tokenMultiplier: 3.0,
97
+ detectionPatterns: [
98
+ /[\u0600-\u06ff]/, // Arabic
99
+ /[\u0750-\u077f]/ // Arabic Supplement
100
+ ],
101
+ confidence: 0.85
102
+ },
103
+ {
104
+ code: 'ta',
105
+ name: 'Tamil',
106
+ nativeName: 'தமிழ்',
107
+ tokenMultiplier: 4.5,
108
+ detectionPatterns: [
109
+ /[\u0b80-\u0bff]/ // Tamil
110
+ ],
111
+ confidence: 0.8
112
+ },
113
+ {
114
+ code: 'hi',
115
+ name: 'Hindi',
116
+ nativeName: 'हिन्दी',
117
+ tokenMultiplier: 3.5,
118
+ detectionPatterns: [
119
+ /[\u0900-\u097f]/ // Devanagari
120
+ ],
121
+ confidence: 0.85
122
+ },
123
+ {
124
+ code: 'ru',
125
+ name: 'Russian',
126
+ nativeName: 'Русский',
127
+ tokenMultiplier: 1.9,
128
+ detectionPatterns: [
129
+ /[\u0400-\u04ff]/, // Cyrillic
130
+ /\b(и|в|не|на|я|что|он|с|как|а)\b/i
131
+ ],
132
+ confidence: 0.9
133
+ },
134
+ {
135
+ code: 'pt',
136
+ name: 'Portuguese',
137
+ nativeName: 'Português',
138
+ tokenMultiplier: 1.7,
139
+ detectionPatterns: [
140
+ /[àáâãäèéêëìíîïòóôõöùúûü]/i,
141
+ /\b(o|a|os|as|de|em|que|e|é|do|da)\b/i
142
+ ],
143
+ confidence: 0.9
144
+ },
145
+ {
146
+ code: 'th',
147
+ name: 'Thai',
148
+ nativeName: 'ไทย',
149
+ tokenMultiplier: 4.0,
150
+ detectionPatterns: [
151
+ /[\u0e00-\u0e7f]/ // Thai
152
+ ],
153
+ confidence: 0.8
154
+ },
155
+ {
156
+ code: 'vi',
157
+ name: 'Vietnamese',
158
+ nativeName: 'Tiếng Việt',
159
+ tokenMultiplier: 1.5,
160
+ detectionPatterns: [
161
+ /[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]/i,
162
+ /\b(và|của|có|là|này|được|trong|cho|người|từ|để|với|một|những|các|không|khi|trên)\b/i
163
+ ],
164
+ confidence: 0.9
165
+ },
166
+ {
167
+ code: 'id',
168
+ name: 'Indonesian',
169
+ nativeName: 'Bahasa Indonesia',
170
+ tokenMultiplier: 1.4,
171
+ detectionPatterns: [
172
+ /\b(yang|dan|di|ke|dari|ini|itu|untuk|dengan|pada|adalah|tidak|ada|atau|akan|juga|oleh|dalam)\b/i
173
+ ],
174
+ confidence: 0.85
175
+ }
176
+ ];
177
+ /**
178
+ * Get language profile by code
179
+ */
180
+ export function getLanguageProfile(code) {
181
+ return LANGUAGE_PROFILES.find(p => p.code === code) || null;
182
+ }
183
+ /**
184
+ * Get all supported language codes
185
+ */
186
+ export function getSupportedLanguages() {
187
+ return LANGUAGE_PROFILES.map(p => p.code);
188
+ }
189
+ /**
190
+ * Get token multiplier for a language code
191
+ */
192
+ export function getTokenMultiplier(code) {
193
+ const profile = getLanguageProfile(code);
194
+ return profile ? profile.tokenMultiplier : 1.0; // Default to English
195
+ }
196
+ //# sourceMappingURL=language-profiles.js.map