wogiflow 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. package/.workflow/agents/reviewer.md +81 -0
  2. package/.workflow/agents/security.md +94 -0
  3. package/.workflow/agents/story-writer.md +58 -0
  4. package/.workflow/bridges/base-bridge.js +395 -0
  5. package/.workflow/bridges/claude-bridge.js +434 -0
  6. package/.workflow/bridges/index.js +130 -0
  7. package/.workflow/lib/assumption-detector.js +481 -0
  8. package/.workflow/lib/config-substitution.js +371 -0
  9. package/.workflow/lib/failure-categories.js +478 -0
  10. package/.workflow/state/app-map.md.template +15 -0
  11. package/.workflow/state/architecture.md.template +24 -0
  12. package/.workflow/state/component-index.json.template +5 -0
  13. package/.workflow/state/decisions.md.template +15 -0
  14. package/.workflow/state/feedback-patterns.md.template +9 -0
  15. package/.workflow/state/knowledge-sync.json.template +6 -0
  16. package/.workflow/state/progress.md.template +14 -0
  17. package/.workflow/state/ready.json.template +7 -0
  18. package/.workflow/state/request-log.md.template +14 -0
  19. package/.workflow/state/session-state.json.template +11 -0
  20. package/.workflow/state/stack.md.template +33 -0
  21. package/.workflow/state/testing.md.template +36 -0
  22. package/.workflow/templates/claude-md.hbs +257 -0
  23. package/.workflow/templates/correction-report.md +67 -0
  24. package/.workflow/templates/gemini-md.hbs +52 -0
  25. package/README.md +1802 -0
  26. package/bin/flow +205 -0
  27. package/lib/index.js +33 -0
  28. package/lib/installer.js +467 -0
  29. package/lib/release-channel.js +269 -0
  30. package/lib/skill-registry.js +526 -0
  31. package/lib/upgrader.js +401 -0
  32. package/lib/utils.js +305 -0
  33. package/package.json +64 -0
  34. package/scripts/flow +985 -0
  35. package/scripts/flow-adaptive-learning.js +1259 -0
  36. package/scripts/flow-aggregate.js +488 -0
  37. package/scripts/flow-archive +133 -0
  38. package/scripts/flow-auto-context.js +1015 -0
  39. package/scripts/flow-auto-learn.js +615 -0
  40. package/scripts/flow-bridge.js +223 -0
  41. package/scripts/flow-browser-suggest.js +316 -0
  42. package/scripts/flow-bug.js +247 -0
  43. package/scripts/flow-cascade.js +711 -0
  44. package/scripts/flow-changelog +85 -0
  45. package/scripts/flow-checkpoint.js +483 -0
  46. package/scripts/flow-cli.js +403 -0
  47. package/scripts/flow-code-intelligence.js +760 -0
  48. package/scripts/flow-complexity.js +502 -0
  49. package/scripts/flow-config-set.js +152 -0
  50. package/scripts/flow-constants.js +157 -0
  51. package/scripts/flow-context +152 -0
  52. package/scripts/flow-context-init.js +482 -0
  53. package/scripts/flow-context-monitor.js +384 -0
  54. package/scripts/flow-context-scoring.js +886 -0
  55. package/scripts/flow-correct.js +458 -0
  56. package/scripts/flow-damage-control.js +985 -0
  57. package/scripts/flow-deps +101 -0
  58. package/scripts/flow-diff.js +700 -0
  59. package/scripts/flow-done +151 -0
  60. package/scripts/flow-done.js +489 -0
  61. package/scripts/flow-durable-session.js +1541 -0
  62. package/scripts/flow-entropy-monitor.js +345 -0
  63. package/scripts/flow-export-profile +349 -0
  64. package/scripts/flow-export-scanner.js +1046 -0
  65. package/scripts/flow-figma-confirm.js +400 -0
  66. package/scripts/flow-figma-extract.js +496 -0
  67. package/scripts/flow-figma-generate.js +683 -0
  68. package/scripts/flow-figma-index.js +909 -0
  69. package/scripts/flow-figma-match.js +617 -0
  70. package/scripts/flow-figma-mcp-server.js +518 -0
  71. package/scripts/flow-figma-pipeline.js +414 -0
  72. package/scripts/flow-file-ops.js +301 -0
  73. package/scripts/flow-gate-confidence.js +825 -0
  74. package/scripts/flow-guided-edit.js +659 -0
  75. package/scripts/flow-health +185 -0
  76. package/scripts/flow-health.js +413 -0
  77. package/scripts/flow-hooks.js +556 -0
  78. package/scripts/flow-http-client.js +249 -0
  79. package/scripts/flow-hybrid-detect.js +167 -0
  80. package/scripts/flow-hybrid-interactive.js +591 -0
  81. package/scripts/flow-hybrid-test.js +152 -0
  82. package/scripts/flow-import-profile +439 -0
  83. package/scripts/flow-init +253 -0
  84. package/scripts/flow-instruction-richness.js +827 -0
  85. package/scripts/flow-jira-integration.js +579 -0
  86. package/scripts/flow-knowledge-router.js +522 -0
  87. package/scripts/flow-knowledge-sync.js +589 -0
  88. package/scripts/flow-linear-integration.js +631 -0
  89. package/scripts/flow-links.js +774 -0
  90. package/scripts/flow-log-manager.js +559 -0
  91. package/scripts/flow-loop-enforcer.js +1246 -0
  92. package/scripts/flow-loop-retry-learning.js +630 -0
  93. package/scripts/flow-lsp.js +923 -0
  94. package/scripts/flow-map-index +348 -0
  95. package/scripts/flow-map-sync +201 -0
  96. package/scripts/flow-memory-blocks.js +668 -0
  97. package/scripts/flow-memory-compactor.js +350 -0
  98. package/scripts/flow-memory-db.js +1110 -0
  99. package/scripts/flow-memory-sync.js +484 -0
  100. package/scripts/flow-metrics.js +353 -0
  101. package/scripts/flow-migrate-ids.js +370 -0
  102. package/scripts/flow-model-adapter.js +802 -0
  103. package/scripts/flow-model-router.js +884 -0
  104. package/scripts/flow-models.js +1231 -0
  105. package/scripts/flow-morning.js +517 -0
  106. package/scripts/flow-multi-approach.js +660 -0
  107. package/scripts/flow-new-feature +86 -0
  108. package/scripts/flow-onboard +1042 -0
  109. package/scripts/flow-orchestrate-llm.js +459 -0
  110. package/scripts/flow-orchestrate.js +3592 -0
  111. package/scripts/flow-output.js +123 -0
  112. package/scripts/flow-parallel-detector.js +399 -0
  113. package/scripts/flow-parallel-dispatch.js +987 -0
  114. package/scripts/flow-parallel.js +428 -0
  115. package/scripts/flow-pattern-enforcer.js +600 -0
  116. package/scripts/flow-prd-manager.js +282 -0
  117. package/scripts/flow-progress.js +323 -0
  118. package/scripts/flow-project-analyzer.js +975 -0
  119. package/scripts/flow-prompt-composer.js +487 -0
  120. package/scripts/flow-providers.js +1381 -0
  121. package/scripts/flow-queue.js +308 -0
  122. package/scripts/flow-ready +82 -0
  123. package/scripts/flow-ready.js +189 -0
  124. package/scripts/flow-regression.js +396 -0
  125. package/scripts/flow-response-parser.js +450 -0
  126. package/scripts/flow-resume.js +284 -0
  127. package/scripts/flow-rules-sync.js +439 -0
  128. package/scripts/flow-run-trace.js +718 -0
  129. package/scripts/flow-safety.js +587 -0
  130. package/scripts/flow-search +104 -0
  131. package/scripts/flow-security.js +481 -0
  132. package/scripts/flow-session-end +106 -0
  133. package/scripts/flow-session-end.js +437 -0
  134. package/scripts/flow-session-state.js +671 -0
  135. package/scripts/flow-setup-hooks +216 -0
  136. package/scripts/flow-setup-hooks.js +377 -0
  137. package/scripts/flow-skill-create.js +329 -0
  138. package/scripts/flow-skill-creator.js +572 -0
  139. package/scripts/flow-skill-generator.js +1046 -0
  140. package/scripts/flow-skill-learn.js +880 -0
  141. package/scripts/flow-skill-matcher.js +578 -0
  142. package/scripts/flow-spec-generator.js +820 -0
  143. package/scripts/flow-stack-wizard.js +895 -0
  144. package/scripts/flow-standup +162 -0
  145. package/scripts/flow-start +74 -0
  146. package/scripts/flow-start.js +235 -0
  147. package/scripts/flow-status +110 -0
  148. package/scripts/flow-status.js +301 -0
  149. package/scripts/flow-step-browser.js +83 -0
  150. package/scripts/flow-step-changelog.js +217 -0
  151. package/scripts/flow-step-comments.js +306 -0
  152. package/scripts/flow-step-complexity.js +234 -0
  153. package/scripts/flow-step-coverage.js +218 -0
  154. package/scripts/flow-step-knowledge.js +193 -0
  155. package/scripts/flow-step-pr-tests.js +364 -0
  156. package/scripts/flow-step-regression.js +89 -0
  157. package/scripts/flow-step-review.js +516 -0
  158. package/scripts/flow-step-security.js +162 -0
  159. package/scripts/flow-step-silent-failures.js +290 -0
  160. package/scripts/flow-step-simplifier.js +346 -0
  161. package/scripts/flow-story +105 -0
  162. package/scripts/flow-story.js +500 -0
  163. package/scripts/flow-suspend.js +252 -0
  164. package/scripts/flow-sync-daemon.js +654 -0
  165. package/scripts/flow-task-analyzer.js +606 -0
  166. package/scripts/flow-team-dashboard.js +748 -0
  167. package/scripts/flow-team-sync.js +752 -0
  168. package/scripts/flow-team.js +977 -0
  169. package/scripts/flow-tech-options.js +528 -0
  170. package/scripts/flow-templates.js +812 -0
  171. package/scripts/flow-tiered-learning.js +728 -0
  172. package/scripts/flow-trace +204 -0
  173. package/scripts/flow-transcript-chunking.js +1106 -0
  174. package/scripts/flow-transcript-digest.js +7918 -0
  175. package/scripts/flow-transcript-language.js +465 -0
  176. package/scripts/flow-transcript-parsing.js +1085 -0
  177. package/scripts/flow-transcript-stories.js +2194 -0
  178. package/scripts/flow-update-map +224 -0
  179. package/scripts/flow-utils.js +2242 -0
  180. package/scripts/flow-verification.js +644 -0
  181. package/scripts/flow-verify.js +1177 -0
  182. package/scripts/flow-voice-input.js +638 -0
  183. package/scripts/flow-watch +168 -0
  184. package/scripts/flow-workflow-steps.js +521 -0
  185. package/scripts/flow-workflow.js +1029 -0
  186. package/scripts/flow-worktree.js +489 -0
  187. package/scripts/hooks/adapters/base-adapter.js +102 -0
  188. package/scripts/hooks/adapters/claude-code.js +359 -0
  189. package/scripts/hooks/adapters/index.js +79 -0
  190. package/scripts/hooks/core/component-check.js +341 -0
  191. package/scripts/hooks/core/index.js +35 -0
  192. package/scripts/hooks/core/loop-check.js +241 -0
  193. package/scripts/hooks/core/session-context.js +294 -0
  194. package/scripts/hooks/core/task-gate.js +177 -0
  195. package/scripts/hooks/core/validation.js +230 -0
  196. package/scripts/hooks/entry/claude-code/post-tool-use.js +65 -0
  197. package/scripts/hooks/entry/claude-code/pre-tool-use.js +89 -0
  198. package/scripts/hooks/entry/claude-code/session-end.js +87 -0
  199. package/scripts/hooks/entry/claude-code/session-start.js +46 -0
  200. package/scripts/hooks/entry/claude-code/stop.js +43 -0
  201. package/scripts/postinstall.js +139 -0
  202. package/templates/browser-test-flow.json +56 -0
  203. package/templates/bug-report.md +43 -0
  204. package/templates/component-detail.md +42 -0
  205. package/templates/component.stories.tsx +49 -0
  206. package/templates/context/constraints.md +83 -0
  207. package/templates/context/conventions.md +177 -0
  208. package/templates/context/stack.md +60 -0
  209. package/templates/correction-report.md +90 -0
  210. package/templates/feature-proposal.md +35 -0
  211. package/templates/hybrid/_base.md +254 -0
  212. package/templates/hybrid/_patterns.md +45 -0
  213. package/templates/hybrid/create-component.md +127 -0
  214. package/templates/hybrid/create-file.md +56 -0
  215. package/templates/hybrid/create-hook.md +145 -0
  216. package/templates/hybrid/create-service.md +70 -0
  217. package/templates/hybrid/fix-bug.md +33 -0
  218. package/templates/hybrid/modify-file.md +55 -0
  219. package/templates/story.md +68 -0
  220. package/templates/task.json +56 -0
  221. package/templates/trace.md +69 -0
@@ -0,0 +1,465 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Wogi Flow - Language Detection Module
5
+ *
6
+ * Detects languages in transcript content using:
7
+ * - Script/character set detection
8
+ * - Common word analysis
9
+ * - N-gram/trigram profiles
10
+ *
11
+ * Supports multiple languages including RTL scripts.
12
+ * Extracted from flow-transcript-digest.js for modularity.
13
+ */
14
+
15
+ // ==========================================================================
16
+ // E5-S1: Language Detection Functions
17
+ // ==========================================================================
18
+
19
+ /**
20
+ * Script patterns for character set detection
21
+ */
22
+ const SCRIPT_PATTERNS = {
23
+ latin: /[a-zA-ZàâäéèêëïîôùûüÿçœæÀÂÄÉÈÊËÏÎÔÙÛÜŸÇŒÆáéíóúüñÁÉÍÓÚÜÑäöüßÄÖÜ]/g,
24
+ cyrillic: /[\u0400-\u04FF]/g,
25
+ hebrew: /[\u0590-\u05FF]/g,
26
+ arabic: /[\u0600-\u06FF\u0750-\u077F]/g,
27
+ cjk: /[\u4E00-\u9FFF\u3400-\u4DBF]/g,
28
+ hiragana: /[\u3040-\u309F]/g,
29
+ katakana: /[\u30A0-\u30FF]/g,
30
+ hangul: /[\uAC00-\uD7AF\u1100-\u11FF]/g,
31
+ greek: /[\u0370-\u03FF]/g,
32
+ thai: /[\u0E00-\u0E7F]/g,
33
+ devanagari: /[\u0900-\u097F]/g
34
+ };
35
+
36
+ /**
37
+ * Language metadata
38
+ */
39
+ const LANGUAGE_INFO = {
40
+ en: { name: 'English', script: 'latin', rtl: false },
41
+ es: { name: 'Spanish', script: 'latin', rtl: false },
42
+ fr: { name: 'French', script: 'latin', rtl: false },
43
+ de: { name: 'German', script: 'latin', rtl: false },
44
+ pt: { name: 'Portuguese', script: 'latin', rtl: false },
45
+ it: { name: 'Italian', script: 'latin', rtl: false },
46
+ nl: { name: 'Dutch', script: 'latin', rtl: false },
47
+ ru: { name: 'Russian', script: 'cyrillic', rtl: false },
48
+ he: { name: 'Hebrew', script: 'hebrew', rtl: true },
49
+ ar: { name: 'Arabic', script: 'arabic', rtl: true },
50
+ zh: { name: 'Chinese', script: 'cjk', rtl: false },
51
+ ja: { name: 'Japanese', script: 'cjk', rtl: false },
52
+ ko: { name: 'Korean', script: 'hangul', rtl: false },
53
+ el: { name: 'Greek', script: 'greek', rtl: false },
54
+ th: { name: 'Thai', script: 'thai', rtl: false },
55
+ hi: { name: 'Hindi', script: 'devanagari', rtl: false },
56
+ pl: { name: 'Polish', script: 'latin', rtl: false },
57
+ tr: { name: 'Turkish', script: 'latin', rtl: false },
58
+ sv: { name: 'Swedish', script: 'latin', rtl: false },
59
+ no: { name: 'Norwegian', script: 'latin', rtl: false },
60
+ da: { name: 'Danish', script: 'latin', rtl: false },
61
+ fi: { name: 'Finnish', script: 'latin', rtl: false },
62
+ vi: { name: 'Vietnamese', script: 'latin', rtl: false }
63
+ };
64
+
65
+ /**
66
+ * Common words by language (top 30 most frequent)
67
+ */
68
+ const COMMON_WORDS = {
69
+ en: ['the', 'is', 'are', 'was', 'were', 'have', 'has', 'had', 'do', 'does',
70
+ 'did', 'will', 'would', 'could', 'should', 'can', 'may', 'might',
71
+ 'this', 'that', 'these', 'those', 'with', 'from', 'about', 'into',
72
+ 'through', 'during', 'before', 'after'],
73
+
74
+ es: ['el', 'la', 'los', 'las', 'un', 'una', 'de', 'del', 'que', 'en',
75
+ 'es', 'son', 'por', 'para', 'con', 'sin', 'sobre', 'como', 'pero',
76
+ 'muy', 'ya', 'aunque', 'porque', 'cuando', 'donde', 'quien',
77
+ 'cual', 'todo', 'nada', 'algo'],
78
+
79
+ fr: ['le', 'la', 'les', 'un', 'une', 'de', 'du', 'des', 'et', 'en',
80
+ 'est', 'sont', 'avoir', 'pour', 'que', 'qui', 'dans', 'sur',
81
+ 'avec', 'plus', 'pas', 'ce', 'cette', 'ces', 'nous', 'vous',
82
+ 'ils', 'elle', 'elles', 'mais'],
83
+
84
+ de: ['der', 'die', 'das', 'den', 'dem', 'ein', 'eine', 'und', 'ist', 'sind',
85
+ 'war', 'waren', 'hat', 'haben', 'wird', 'werden', 'kann',
86
+ 'mit', 'von', 'zu', 'bei', 'nach', 'auch', 'nur', 'noch',
87
+ 'aber', 'oder', 'wenn', 'wie', 'nicht'],
88
+
89
+ pt: ['o', 'a', 'os', 'as', 'um', 'uma', 'de', 'do', 'da', 'que', 'em',
90
+ 'no', 'na', 'para', 'por', 'com', 'mais', 'como', 'esse',
91
+ 'essa', 'este', 'esta', 'seu', 'sua', 'ele', 'ela', 'eles',
92
+ 'elas', 'mas', 'ou'],
93
+
94
+ it: ['il', 'la', 'i', 'le', 'lo', 'gli', 'un', 'una', 'di', 'che', 'e',
95
+ 'in', 'per', 'con', 'non', 'da', 'su', 'come', 'ma', 'anche',
96
+ 'questo', 'quella', 'questi', 'quelle', 'essere', 'avere',
97
+ 'fare', 'dire', 'potere', 'volere'],
98
+
99
+ nl: ['de', 'het', 'een', 'van', 'en', 'in', 'is', 'zijn', 'op', 'te',
100
+ 'dat', 'die', 'voor', 'met', 'niet', 'aan', 'er', 'om', 'ook', 'als',
101
+ 'maar', 'bij', 'nog', 'wel', 'dan', 'naar', 'kan', 'zou', 'worden', 'heeft'],
102
+
103
+ he: ['של', 'את', 'על', 'הוא', 'היא', 'הם', 'הן', 'לא', 'זה', 'כי', 'אם',
104
+ 'גם', 'יש', 'אין', 'עם', 'אל', 'מה', 'כל', 'היה', 'להיות', 'אני',
105
+ 'אתה', 'את', 'אנחנו', 'הזה', 'הזאת', 'עוד', 'רק', 'כמו', 'אבל'],
106
+
107
+ ru: ['и', 'в', 'не', 'на', 'я', 'что', 'он', 'с', 'как', 'это',
108
+ 'она', 'они', 'но', 'по', 'из', 'за', 'все', 'так', 'его', 'же',
109
+ 'от', 'для', 'или', 'было', 'бы', 'мне', 'вы', 'мы', 'был', 'быть']
110
+ };
111
+
112
+ /**
113
+ * Common trigrams by language
114
+ */
115
+ const TRIGRAM_PROFILES = {
116
+ en: ['the', 'and', 'ing', 'ion', 'tio', 'ent', 'ati', 'for', 'her', 'ter',
117
+ 'hat', 'tha', 'ere', 'ate', 'his', 'con', 'res', 'ver', 'all', 'ons'],
118
+ es: ['que', 'ent', 'ade', 'los', 'del', 'est', 'con', 'nte', 'par',
119
+ 'las', 'cia', 'era', 'ien', 'com', 'res', 'sta', 'tra', 'pro', 'una', 'por'],
120
+ fr: ['ent', 'que', 'les', 'ion', 'tio', 'men', 'ait', 'ons', 'ant', 'our',
121
+ 'des', 'eur', 'par', 'est', 'eme', 'com', 'ous', 'ter', 'con', 'dan'],
122
+ de: ['der', 'und', 'den', 'ein', 'che', 'die', 'sch', 'ung', 'ich', 'ter',
123
+ 'ent', 'gen', 'das', 'ber', 'ine', 'eit', 'mit', 'ren', 'nen', 'ver']
124
+ };
125
+
126
+ /**
127
+ * Detect script types in text
128
+ */
129
+ function detectScript(text) {
130
+ const scripts = {};
131
+ let total = 0;
132
+
133
+ for (const [name, pattern] of Object.entries(SCRIPT_PATTERNS)) {
134
+ const matches = text.match(pattern) || [];
135
+ if (matches.length > 0) {
136
+ scripts[name] = matches.length;
137
+ total += matches.length;
138
+ }
139
+ }
140
+
141
+ // Calculate percentages
142
+ const percentages = {};
143
+ for (const [name, count] of Object.entries(scripts)) {
144
+ percentages[name] = total > 0 ? count / total : 0;
145
+ }
146
+
147
+ return { counts: scripts, percentages, total };
148
+ }
149
+
150
+ /**
151
+ * Clean text for language detection
152
+ */
153
+ function cleanForDetection(text) {
154
+ return text
155
+ // Remove timestamps
156
+ .replace(/\d{1,2}:\d{2}(:\d{2})?(\.\d+)?/g, '')
157
+ // Remove speaker labels
158
+ .replace(/^[A-Z][a-z]+\s[A-Z][a-z]+:/gm, '')
159
+ .replace(/<v\s+[^>]+>/g, '')
160
+ // Remove URLs
161
+ .replace(/https?:\/\/[^\s]+/g, '')
162
+ // Remove extra whitespace
163
+ .replace(/\s+/g, ' ')
164
+ .trim();
165
+ }
166
+
167
+ /**
168
+ * Extract word tokens from text
169
+ */
170
+ function extractWords(text) {
171
+ // Handle different scripts
172
+ const words = text.toLowerCase().match(/[\p{L}]+/gu) || [];
173
+ return words.filter(w => w.length > 1);
174
+ }
175
+
176
+ /**
177
+ * Analyze common words to score languages
178
+ */
179
+ function analyzeCommonWords(text) {
180
+ const words = extractWords(text);
181
+ const wordSet = new Set(words);
182
+ const scores = {};
183
+
184
+ for (const [lang, commonList] of Object.entries(COMMON_WORDS)) {
185
+ let matches = 0;
186
+ for (const word of commonList) {
187
+ if (wordSet.has(word)) {
188
+ matches++;
189
+ }
190
+ }
191
+ // Also count occurrences
192
+ let occurrences = 0;
193
+ for (const word of words) {
194
+ if (commonList.includes(word)) {
195
+ occurrences++;
196
+ }
197
+ }
198
+ scores[lang] = {
199
+ uniqueMatches: matches,
200
+ totalOccurrences: occurrences,
201
+ score: words.length > 0 ? occurrences / words.length : 0
202
+ };
203
+ }
204
+
205
+ return scores;
206
+ }
207
+
208
+ /**
209
+ * Extract trigrams from text
210
+ */
211
+ function extractTrigrams(text) {
212
+ const cleaned = text.toLowerCase().replace(/[^a-z]/g, '');
213
+ const trigrams = [];
214
+ for (let i = 0; i < cleaned.length - 2; i++) {
215
+ trigrams.push(cleaned.substring(i, i + 3));
216
+ }
217
+ return trigrams;
218
+ }
219
+
220
+ /**
221
+ * Analyze trigrams to score languages
222
+ */
223
+ function analyzeNgrams(text) {
224
+ const trigrams = extractTrigrams(text);
225
+ const trigramSet = new Set(trigrams);
226
+ const scores = {};
227
+
228
+ for (const [lang, profile] of Object.entries(TRIGRAM_PROFILES)) {
229
+ let matches = 0;
230
+ for (const trigram of profile) {
231
+ if (trigramSet.has(trigram)) {
232
+ matches++;
233
+ }
234
+ }
235
+ scores[lang] = {
236
+ matches: matches,
237
+ score: profile.length > 0 ? matches / profile.length : 0
238
+ };
239
+ }
240
+
241
+ return scores;
242
+ }
243
+
244
+ /**
245
+ * Combine detection signals into final scores
246
+ */
247
+ function combineLanguageScores(scriptResult, wordResult, ngramResult) {
248
+ const scores = {};
249
+
250
+ // Script-based detection for non-Latin scripts
251
+ if (scriptResult.percentages.hebrew > 0.3) {
252
+ scores.he = (scores.he || 0) + scriptResult.percentages.hebrew;
253
+ }
254
+ if (scriptResult.percentages.arabic > 0.3) {
255
+ scores.ar = (scores.ar || 0) + scriptResult.percentages.arabic;
256
+ }
257
+ if (scriptResult.percentages.cyrillic > 0.3) {
258
+ scores.ru = (scores.ru || 0) + scriptResult.percentages.cyrillic;
259
+ }
260
+ if (scriptResult.percentages.cjk > 0.3) {
261
+ // Could be Chinese or Japanese
262
+ if (scriptResult.percentages.hiragana > 0.1 || scriptResult.percentages.katakana > 0.1) {
263
+ scores.ja = (scores.ja || 0) + scriptResult.percentages.cjk;
264
+ } else {
265
+ scores.zh = (scores.zh || 0) + scriptResult.percentages.cjk;
266
+ }
267
+ }
268
+ if (scriptResult.percentages.hangul > 0.3) {
269
+ scores.ko = (scores.ko || 0) + scriptResult.percentages.hangul;
270
+ }
271
+ if (scriptResult.percentages.greek > 0.3) {
272
+ scores.el = (scores.el || 0) + scriptResult.percentages.greek;
273
+ }
274
+ if (scriptResult.percentages.thai > 0.3) {
275
+ scores.th = (scores.th || 0) + scriptResult.percentages.thai;
276
+ }
277
+ if (scriptResult.percentages.devanagari > 0.3) {
278
+ scores.hi = (scores.hi || 0) + scriptResult.percentages.devanagari;
279
+ }
280
+
281
+ // Word-based scoring (weighted 0.5)
282
+ for (const [lang, data] of Object.entries(wordResult)) {
283
+ scores[lang] = (scores[lang] || 0) + data.score * 0.5;
284
+ }
285
+
286
+ // N-gram scoring (weighted 0.3)
287
+ for (const [lang, data] of Object.entries(ngramResult)) {
288
+ scores[lang] = (scores[lang] || 0) + data.score * 0.3;
289
+ }
290
+
291
+ // Normalize scores
292
+ const maxScore = Math.max(...Object.values(scores), 0.001);
293
+ for (const lang of Object.keys(scores)) {
294
+ scores[lang] = scores[lang] / maxScore;
295
+ }
296
+
297
+ return scores;
298
+ }
299
+
300
+ /**
301
+ * Detect primary language of text
302
+ */
303
+ function detectLanguage(text, options = {}) {
304
+ const minLength = options.minLength || 20;
305
+
306
+ // Clean text
307
+ const cleaned = cleanForDetection(text);
308
+ if (cleaned.length < minLength) {
309
+ return {
310
+ language: 'unknown',
311
+ languageName: 'Unknown',
312
+ confidence: 0,
313
+ reason: 'insufficient_text'
314
+ };
315
+ }
316
+
317
+ // Analyze
318
+ const scriptResult = detectScript(cleaned);
319
+ const wordResult = analyzeCommonWords(cleaned);
320
+ const ngramResult = analyzeNgrams(cleaned);
321
+
322
+ // Combine scores
323
+ const scores = combineLanguageScores(scriptResult, wordResult, ngramResult);
324
+
325
+ // Sort by score
326
+ const sorted = Object.entries(scores)
327
+ .filter(([_, score]) => score > 0)
328
+ .sort((a, b) => b[1] - a[1]);
329
+
330
+ if (sorted.length === 0) {
331
+ return {
332
+ language: 'unknown',
333
+ languageName: 'Unknown',
334
+ confidence: 0
335
+ };
336
+ }
337
+
338
+ const primary = sorted[0];
339
+ const secondary = sorted.length > 1 && sorted[1][1] > 0.3 ? sorted[1] : null;
340
+
341
+ return {
342
+ language: primary[0],
343
+ languageName: LANGUAGE_INFO[primary[0]]?.name || primary[0],
344
+ confidence: Math.min(primary[1], 1),
345
+ secondary: secondary ? {
346
+ language: secondary[0],
347
+ languageName: LANGUAGE_INFO[secondary[0]]?.name || secondary[0],
348
+ confidence: Math.min(secondary[1], 1)
349
+ } : null,
350
+ scripts: scriptResult.counts,
351
+ wordMatches: Object.fromEntries(
352
+ Object.entries(wordResult)
353
+ .filter(([_, d]) => d.totalOccurrences > 0)
354
+ .map(([lang, d]) => [lang, d.totalOccurrences])
355
+ ),
356
+ allScores: scores
357
+ };
358
+ }
359
+
360
+ /**
361
+ * Detect multiple languages in text (for mixed content)
362
+ */
363
+ function detectMultipleLanguages(text, options = {}) {
364
+ const segmentSize = options.segmentSize || 300;
365
+
366
+ // Split into segments
367
+ const words = text.split(/\s+/);
368
+ const segments = [];
369
+ for (let i = 0; i < words.length; i += segmentSize / 5) {
370
+ const segmentWords = words.slice(i, i + segmentSize / 5);
371
+ if (segmentWords.length > 10) {
372
+ segments.push(segmentWords.join(' '));
373
+ }
374
+ }
375
+
376
+ if (segments.length === 0) {
377
+ return detectLanguage(text, options);
378
+ }
379
+
380
+ // Analyze each segment
381
+ const languageCounts = {};
382
+ const segmentResults = [];
383
+
384
+ for (const segment of segments) {
385
+ const result = detectLanguage(segment, { minLength: 10 });
386
+ if (result.language !== 'unknown' && result.confidence > 0.3) {
387
+ languageCounts[result.language] = (languageCounts[result.language] || 0) + 1;
388
+ segmentResults.push({
389
+ preview: segment.substring(0, 50) + (segment.length > 50 ? '...' : ''),
390
+ language: result.language,
391
+ confidence: result.confidence
392
+ });
393
+ }
394
+ }
395
+
396
+ // Calculate distribution
397
+ const total = Object.values(languageCounts).reduce((a, b) => a + b, 0);
398
+ const distribution = {};
399
+ for (const [lang, count] of Object.entries(languageCounts)) {
400
+ distribution[lang] = total > 0 ? count / total : 0;
401
+ }
402
+
403
+ const sortedLangs = Object.entries(distribution).sort((a, b) => b[1] - a[1]);
404
+ const primaryLang = sortedLangs[0]?.[0] || 'unknown';
405
+
406
+ return {
407
+ language: primaryLang,
408
+ languageName: LANGUAGE_INFO[primaryLang]?.name || primaryLang,
409
+ confidence: distribution[primaryLang] || 0,
410
+ isMultilingual: Object.keys(distribution).length > 1,
411
+ distribution: distribution,
412
+ segmentCount: segments.length,
413
+ segments: segmentResults.slice(0, 10) // Limit to first 10
414
+ };
415
+ }
416
+
417
+ /**
418
+ * Get language info by code
419
+ */
420
+ function getLanguageInfo(code) {
421
+ const info = LANGUAGE_INFO[code];
422
+ if (!info) {
423
+ return { code, name: 'Unknown', script: 'unknown', rtl: false, supported: false };
424
+ }
425
+ return {
426
+ code,
427
+ ...info,
428
+ hasCommonWords: !!COMMON_WORDS[code],
429
+ hasTrigrams: !!TRIGRAM_PROFILES[code],
430
+ supported: true
431
+ };
432
+ }
433
+
434
+ /**
435
+ * List all supported languages
436
+ */
437
+ function listSupportedLanguages() {
438
+ return Object.entries(LANGUAGE_INFO).map(([code, info]) => ({
439
+ code,
440
+ ...info,
441
+ tier: COMMON_WORDS[code] ? (TRIGRAM_PROFILES[code] ? 1 : 2) : 3
442
+ }));
443
+ }
444
+
445
+
446
+
447
+ module.exports = {
448
+ // Constants
449
+ SCRIPT_PATTERNS,
450
+ LANGUAGE_INFO,
451
+ COMMON_WORDS,
452
+ TRIGRAM_PROFILES,
453
+ // Functions
454
+ detectScript,
455
+ cleanForDetection,
456
+ extractWords,
457
+ analyzeCommonWords,
458
+ extractTrigrams,
459
+ analyzeNgrams,
460
+ combineLanguageScores,
461
+ detectLanguage,
462
+ detectMultipleLanguages,
463
+ getLanguageInfo,
464
+ listSupportedLanguages
465
+ };