@lokascript/domain-learn 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/generators/gloss-generator.d.ts +18 -0
  2. package/dist/generators/learn-renderer.d.ts +13 -0
  3. package/dist/generators/sentence-generator.d.ts +34 -0
  4. package/dist/index.cjs +6116 -0
  5. package/dist/index.cjs.map +1 -0
  6. package/dist/index.d.cts +441 -0
  7. package/dist/index.d.ts +55 -0
  8. package/dist/index.js +6056 -0
  9. package/dist/index.js.map +1 -0
  10. package/dist/profiles/ar.d.ts +2 -0
  11. package/dist/profiles/de.d.ts +2 -0
  12. package/dist/profiles/en.d.ts +2 -0
  13. package/dist/profiles/es.d.ts +2 -0
  14. package/dist/profiles/fr.d.ts +2 -0
  15. package/dist/profiles/index.d.ts +20 -0
  16. package/dist/profiles/ja.d.ts +2 -0
  17. package/dist/profiles/ko.d.ts +2 -0
  18. package/dist/profiles/pt.d.ts +2 -0
  19. package/dist/profiles/tr.d.ts +2 -0
  20. package/dist/profiles/zh.d.ts +2 -0
  21. package/dist/schemas/index.d.ts +31 -0
  22. package/dist/tokenizers/index.d.ts +23 -0
  23. package/dist/types.d.ts +266 -0
  24. package/package.json +63 -0
  25. package/src/__tests__/schemas.test.ts +145 -0
  26. package/src/__tests__/sentence-generation.test.ts +189 -0
  27. package/src/generators/gloss-generator.ts +145 -0
  28. package/src/generators/learn-renderer.ts +291 -0
  29. package/src/generators/sentence-generator.ts +501 -0
  30. package/src/index.ts +237 -0
  31. package/src/profiles/ar.ts +526 -0
  32. package/src/profiles/de.ts +481 -0
  33. package/src/profiles/en.ts +181 -0
  34. package/src/profiles/es.ts +829 -0
  35. package/src/profiles/fr.ts +466 -0
  36. package/src/profiles/index.ts +34 -0
  37. package/src/profiles/ja.ts +301 -0
  38. package/src/profiles/ko.ts +286 -0
  39. package/src/profiles/pt.ts +484 -0
  40. package/src/profiles/tr.ts +511 -0
  41. package/src/profiles/zh.ts +256 -0
  42. package/src/schemas/index.ts +576 -0
  43. package/src/tokenizers/index.ts +409 -0
  44. package/src/types.ts +321 -0
@@ -0,0 +1,409 @@
1
+ /**
2
+ * Learn Domain Tokenizers
3
+ *
4
+ * Language-specific tokenizers for learning domain input (10 languages).
5
+ * Created via the framework's createSimpleTokenizer factory.
6
+ *
7
+ * These tokenizers handle:
8
+ * - Command verb keyword classification
9
+ * - Role marker particle/preposition classification
10
+ * - CSS selector recognition (#id, .class)
11
+ * - Non-Latin script handling (Japanese, Arabic, Korean, Chinese)
12
+ */
13
+
14
+ import { createSimpleTokenizer } from '@lokascript/framework';
15
+ import type { LanguageTokenizer, ValueExtractor, ExtractionResult } from '@lokascript/framework';
16
+
17
+ // ─── Shared Extractors ──────────────────────────────────────────
18
+
19
+ /** Handles Latin-script languages with diacritics (French é,à; Turkish ç,ü,ş; German ü,ö,ä) */
20
+ class LatinExtendedIdentifierExtractor implements ValueExtractor {
21
+ readonly name = 'latin-extended-identifier';
22
+
23
+ canExtract(input: string, position: number): boolean {
24
+ return /\p{L}/u.test(input[position]);
25
+ }
26
+
27
+ extract(input: string, position: number): ExtractionResult | null {
28
+ let end = position;
29
+ while (end < input.length && /[\p{L}\p{N}_-]/u.test(input[end])) {
30
+ end++;
31
+ }
32
+ if (end === position) return null;
33
+ return { value: input.slice(position, end), length: end - position };
34
+ }
35
+ }
36
+
37
+ // ─── English ────────────────────────────────────────────────────
38
+
39
+ export const EnglishLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
40
+ language: 'en',
41
+ keywords: [
42
+ // Verbs
43
+ 'add',
44
+ 'remove',
45
+ 'toggle',
46
+ 'put',
47
+ 'set',
48
+ 'show',
49
+ 'hide',
50
+ 'get',
51
+ 'wait',
52
+ 'fetch',
53
+ 'send',
54
+ 'go',
55
+ 'increment',
56
+ 'decrement',
57
+ 'take',
58
+ // Markers
59
+ 'to',
60
+ 'from',
61
+ 'into',
62
+ 'on',
63
+ 'by',
64
+ 'for',
65
+ ],
66
+ includeOperators: true,
67
+ caseInsensitive: true,
68
+ });
69
+
70
+ // ─── Japanese ───────────────────────────────────────────────────
71
+
72
+ export const JapaneseLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
73
+ language: 'ja',
74
+ keywords: [
75
+ // Verbs (te-form used for commanding)
76
+ '追加',
77
+ '削除',
78
+ '切り替え',
79
+ '置',
80
+ '設定',
81
+ '表示',
82
+ '隠',
83
+ '取得',
84
+ '待',
85
+ '送',
86
+ '行',
87
+ '増加',
88
+ '減少',
89
+ '取',
90
+ // Particles
91
+ 'を',
92
+ 'に',
93
+ 'から',
94
+ 'で',
95
+ 'は',
96
+ ],
97
+ keywordExtras: [
98
+ { native: '追加', normalized: 'add' },
99
+ { native: '削除', normalized: 'remove' },
100
+ { native: '切り替え', normalized: 'toggle' },
101
+ { native: '置', normalized: 'put' },
102
+ { native: '設定', normalized: 'set' },
103
+ { native: '表示', normalized: 'show' },
104
+ { native: '隠', normalized: 'hide' },
105
+ { native: '取得', normalized: 'get' },
106
+ { native: '待', normalized: 'wait' },
107
+ { native: '送', normalized: 'send' },
108
+ { native: '行', normalized: 'go' },
109
+ { native: '増加', normalized: 'increment' },
110
+ { native: '減少', normalized: 'decrement' },
111
+ { native: '取', normalized: 'take' },
112
+ ],
113
+ includeOperators: true,
114
+ caseInsensitive: false,
115
+ });
116
+
117
+ // ─── Spanish ────────────────────────────────────────────────────
118
+
119
+ export const SpanishLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
120
+ language: 'es',
121
+ keywords: [
122
+ 'agregar',
123
+ 'eliminar',
124
+ 'alternar',
125
+ 'poner',
126
+ 'establecer',
127
+ 'mostrar',
128
+ 'ocultar',
129
+ 'obtener',
130
+ 'esperar',
131
+ 'buscar',
132
+ 'enviar',
133
+ 'ir',
134
+ 'incrementar',
135
+ 'decrementar',
136
+ 'tomar',
137
+ 'a',
138
+ 'de',
139
+ 'en',
140
+ 'por',
141
+ 'para',
142
+ ],
143
+ customExtractors: [new LatinExtendedIdentifierExtractor()],
144
+ includeOperators: true,
145
+ caseInsensitive: true,
146
+ });
147
+
148
+ // ─── Arabic ─────────────────────────────────────────────────────
149
+
150
+ export const ArabicLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
151
+ language: 'ar',
152
+ keywords: [
153
+ 'أضف',
154
+ 'أزل',
155
+ 'بدّل',
156
+ 'ضع',
157
+ 'عيّن',
158
+ 'أظهر',
159
+ 'أخفِ',
160
+ 'احصل',
161
+ 'انتظر',
162
+ 'اجلب',
163
+ 'أرسل',
164
+ 'اذهب',
165
+ 'زد',
166
+ 'أنقص',
167
+ 'خذ',
168
+ 'إلى',
169
+ 'من',
170
+ 'في',
171
+ 'على',
172
+ 'ب',
173
+ ],
174
+ keywordExtras: [
175
+ { native: 'أضف', normalized: 'add' },
176
+ { native: 'أزل', normalized: 'remove' },
177
+ { native: 'بدّل', normalized: 'toggle' },
178
+ { native: 'ضع', normalized: 'put' },
179
+ { native: 'عيّن', normalized: 'set' },
180
+ { native: 'أظهر', normalized: 'show' },
181
+ { native: 'أخفِ', normalized: 'hide' },
182
+ { native: 'احصل', normalized: 'get' },
183
+ { native: 'انتظر', normalized: 'wait' },
184
+ { native: 'اجلب', normalized: 'fetch' },
185
+ { native: 'أرسل', normalized: 'send' },
186
+ { native: 'اذهب', normalized: 'go' },
187
+ { native: 'زد', normalized: 'increment' },
188
+ { native: 'أنقص', normalized: 'decrement' },
189
+ { native: 'خذ', normalized: 'take' },
190
+ ],
191
+ includeOperators: true,
192
+ caseInsensitive: false,
193
+ });
194
+
195
+ // ─── Chinese ────────────────────────────────────────────────────
196
+
197
+ export const ChineseLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
198
+ language: 'zh',
199
+ keywords: [
200
+ '添加',
201
+ '移除',
202
+ '切换',
203
+ '放置',
204
+ '设置',
205
+ '显示',
206
+ '隐藏',
207
+ '获取',
208
+ '等待',
209
+ '获取',
210
+ '发送',
211
+ '前往',
212
+ '增加',
213
+ '减少',
214
+ '取走',
215
+ '到',
216
+ '从',
217
+ '在',
218
+ '把',
219
+ '用',
220
+ ],
221
+ keywordExtras: [
222
+ { native: '添加', normalized: 'add' },
223
+ { native: '移除', normalized: 'remove' },
224
+ { native: '切换', normalized: 'toggle' },
225
+ { native: '放置', normalized: 'put' },
226
+ { native: '设置', normalized: 'set' },
227
+ { native: '显示', normalized: 'show' },
228
+ { native: '隐藏', normalized: 'hide' },
229
+ { native: '获取', normalized: 'get' },
230
+ { native: '等待', normalized: 'wait' },
231
+ { native: '发送', normalized: 'send' },
232
+ { native: '前往', normalized: 'go' },
233
+ { native: '增加', normalized: 'increment' },
234
+ { native: '减少', normalized: 'decrement' },
235
+ { native: '取走', normalized: 'take' },
236
+ ],
237
+ includeOperators: true,
238
+ caseInsensitive: false,
239
+ });
240
+
241
+ // ─── Korean ─────────────────────────────────────────────────────
242
+
243
+ export const KoreanLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
244
+ language: 'ko',
245
+ keywords: [
246
+ '추가',
247
+ '제거',
248
+ '전환',
249
+ '넣기',
250
+ '설정',
251
+ '표시',
252
+ '숨기기',
253
+ '가져오기',
254
+ '대기',
255
+ '가져오기',
256
+ '보내기',
257
+ '이동',
258
+ '증가',
259
+ '감소',
260
+ '가져가기',
261
+ '를',
262
+ '에',
263
+ '에서',
264
+ '에게',
265
+ '로',
266
+ ],
267
+ keywordExtras: [
268
+ { native: '추가', normalized: 'add' },
269
+ { native: '제거', normalized: 'remove' },
270
+ { native: '전환', normalized: 'toggle' },
271
+ { native: '넣기', normalized: 'put' },
272
+ { native: '설정', normalized: 'set' },
273
+ { native: '표시', normalized: 'show' },
274
+ { native: '숨기기', normalized: 'hide' },
275
+ { native: '가져오기', normalized: 'get' },
276
+ { native: '대기', normalized: 'wait' },
277
+ { native: '보내기', normalized: 'send' },
278
+ { native: '이동', normalized: 'go' },
279
+ { native: '증가', normalized: 'increment' },
280
+ { native: '감소', normalized: 'decrement' },
281
+ { native: '가져가기', normalized: 'take' },
282
+ ],
283
+ includeOperators: true,
284
+ caseInsensitive: false,
285
+ });
286
+
287
+ // ─── French ─────────────────────────────────────────────────────
288
+
289
+ export const FrenchLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
290
+ language: 'fr',
291
+ keywords: [
292
+ 'ajouter',
293
+ 'supprimer',
294
+ 'basculer',
295
+ 'mettre',
296
+ 'définir',
297
+ 'afficher',
298
+ 'masquer',
299
+ 'obtenir',
300
+ 'attendre',
301
+ 'récupérer',
302
+ 'envoyer',
303
+ 'aller',
304
+ 'incrémenter',
305
+ 'décrémenter',
306
+ 'prendre',
307
+ 'à',
308
+ 'de',
309
+ 'dans',
310
+ 'sur',
311
+ 'par',
312
+ ],
313
+ customExtractors: [new LatinExtendedIdentifierExtractor()],
314
+ includeOperators: true,
315
+ caseInsensitive: true,
316
+ });
317
+
318
+ // ─── Turkish ────────────────────────────────────────────────────
319
+
320
+ export const TurkishLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
321
+ language: 'tr',
322
+ keywords: [
323
+ 'ekle',
324
+ 'kaldır',
325
+ 'değiştir',
326
+ 'koy',
327
+ 'ayarla',
328
+ 'göster',
329
+ 'gizle',
330
+ 'al',
331
+ 'bekle',
332
+ 'getir',
333
+ 'gönder',
334
+ 'git',
335
+ 'artır',
336
+ 'azalt',
337
+ 'çıkar',
338
+ 'a',
339
+ 'dan',
340
+ 'da',
341
+ 'e',
342
+ 'ile',
343
+ ],
344
+ customExtractors: [new LatinExtendedIdentifierExtractor()],
345
+ includeOperators: true,
346
+ caseInsensitive: true,
347
+ });
348
+
349
+ // ─── German ─────────────────────────────────────────────────────
350
+
351
+ export const GermanLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
352
+ language: 'de',
353
+ keywords: [
354
+ 'hinzufügen',
355
+ 'entfernen',
356
+ 'umschalten',
357
+ 'setzen',
358
+ 'einstellen',
359
+ 'anzeigen',
360
+ 'verbergen',
361
+ 'abrufen',
362
+ 'warten',
363
+ 'abrufen',
364
+ 'senden',
365
+ 'gehen',
366
+ 'erhöhen',
367
+ 'verringern',
368
+ 'nehmen',
369
+ 'zu',
370
+ 'von',
371
+ 'in',
372
+ 'auf',
373
+ 'an',
374
+ ],
375
+ customExtractors: [new LatinExtendedIdentifierExtractor()],
376
+ includeOperators: true,
377
+ caseInsensitive: true,
378
+ });
379
+
380
+ // ─── Portuguese ─────────────────────────────────────────────────
381
+
382
+ export const PortugueseLearnTokenizer: LanguageTokenizer = createSimpleTokenizer({
383
+ language: 'pt',
384
+ keywords: [
385
+ 'adicionar',
386
+ 'remover',
387
+ 'alternar',
388
+ 'colocar',
389
+ 'definir',
390
+ 'mostrar',
391
+ 'esconder',
392
+ 'obter',
393
+ 'esperar',
394
+ 'buscar',
395
+ 'enviar',
396
+ 'ir',
397
+ 'incrementar',
398
+ 'decrementar',
399
+ 'pegar',
400
+ 'a',
401
+ 'de',
402
+ 'em',
403
+ 'para',
404
+ 'por',
405
+ ],
406
+ customExtractors: [new LatinExtendedIdentifierExtractor()],
407
+ includeOperators: true,
408
+ caseInsensitive: true,
409
+ });