@nguyentamdat/mempalace 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/dialect.ts ADDED
@@ -0,0 +1,921 @@
1
+ import { readdirSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { join, parse } from "node:path";
3
+
4
+ export type EntityMap = Record<string, string>;
5
+ export type StringListMap = Record<string, string[]>;
6
+ export type HeaderMap = Record<string, string>;
7
+
8
+ export interface DialectConfig {
9
+ entities?: EntityMap;
10
+ skip_names?: string[];
11
+ }
12
+
13
+ export interface CompressMetadata {
14
+ source_file?: string;
15
+ wing?: string;
16
+ room?: string;
17
+ date?: string;
18
+ [key: string]: unknown;
19
+ }
20
+
21
+ export interface Zettel {
22
+ id: string;
23
+ people?: string[];
24
+ topics?: string[];
25
+ emotional_weight?: number;
26
+ emotional_tone?: string[];
27
+ origin_moment?: boolean;
28
+ sensitivity?: string;
29
+ notes?: string;
30
+ origin_label?: string;
31
+ content?: string;
32
+ title?: string;
33
+ date_context?: string;
34
+ [key: string]: unknown;
35
+ }
36
+
37
+ export interface Tunnel {
38
+ from: string;
39
+ to: string;
40
+ label?: string;
41
+ [key: string]: unknown;
42
+ }
43
+
44
+ export interface ZettelJsonFile {
45
+ source_file?: string;
46
+ emotional_arc?: string;
47
+ zettels?: Zettel[];
48
+ tunnels?: Tunnel[];
49
+ [key: string]: unknown;
50
+ }
51
+
52
+ export interface DecodedDialect {
53
+ header: HeaderMap;
54
+ arc: string;
55
+ zettels: string[];
56
+ tunnels: string[];
57
+ }
58
+
59
+ export interface CompressionStats {
60
+ originalTokens: number;
61
+ compressedTokens: number;
62
+ ratio: number;
63
+ originalChars: number;
64
+ compressedChars: number;
65
+ }
66
+
67
+ export const EMOTION_CODES: Record<string, string> = {
68
+ vulnerability: "vul",
69
+ vulnerable: "vul",
70
+ joy: "joy",
71
+ joyful: "joy",
72
+ fear: "fear",
73
+ mild_fear: "fear",
74
+ trust: "trust",
75
+ trust_building: "trust",
76
+ grief: "grief",
77
+ raw_grief: "grief",
78
+ wonder: "wonder",
79
+ philosophical_wonder: "wonder",
80
+ rage: "rage",
81
+ anger: "rage",
82
+ love: "love",
83
+ devotion: "love",
84
+ hope: "hope",
85
+ despair: "despair",
86
+ hopelessness: "despair",
87
+ peace: "peace",
88
+ relief: "relief",
89
+ humor: "humor",
90
+ dark_humor: "humor",
91
+ tenderness: "tender",
92
+ raw_honesty: "raw",
93
+ brutal_honesty: "raw",
94
+ self_doubt: "doubt",
95
+ anxiety: "anx",
96
+ exhaustion: "exhaust",
97
+ conviction: "convict",
98
+ quiet_passion: "passion",
99
+ warmth: "warmth",
100
+ curiosity: "curious",
101
+ gratitude: "grat",
102
+ frustration: "frust",
103
+ confusion: "confuse",
104
+ satisfaction: "satis",
105
+ excitement: "excite",
106
+ determination: "determ",
107
+ surprise: "surprise",
108
+ };
109
+
110
+ export const _EMOTION_SIGNALS: Record<string, string> = {
111
+ decided: "determ",
112
+ prefer: "convict",
113
+ worried: "anx",
114
+ excited: "excite",
115
+ frustrated: "frust",
116
+ confused: "confuse",
117
+ love: "love",
118
+ hate: "rage",
119
+ hope: "hope",
120
+ fear: "fear",
121
+ trust: "trust",
122
+ happy: "joy",
123
+ sad: "grief",
124
+ surprised: "surprise",
125
+ grateful: "grat",
126
+ curious: "curious",
127
+ wonder: "wonder",
128
+ anxious: "anx",
129
+ relieved: "relief",
130
+ satisf: "satis",
131
+ disappoint: "grief",
132
+ concern: "anx",
133
+ };
134
+
135
+ export const _FLAG_SIGNALS: Record<string, string> = {
136
+ decided: "DECISION",
137
+ chose: "DECISION",
138
+ switched: "DECISION",
139
+ migrated: "DECISION",
140
+ replaced: "DECISION",
141
+ "instead of": "DECISION",
142
+ because: "DECISION",
143
+ founded: "ORIGIN",
144
+ created: "ORIGIN",
145
+ started: "ORIGIN",
146
+ born: "ORIGIN",
147
+ launched: "ORIGIN",
148
+ "first time": "ORIGIN",
149
+ core: "CORE",
150
+ fundamental: "CORE",
151
+ essential: "CORE",
152
+ principle: "CORE",
153
+ belief: "CORE",
154
+ always: "CORE",
155
+ "never forget": "CORE",
156
+ "turning point": "PIVOT",
157
+ "changed everything": "PIVOT",
158
+ realized: "PIVOT",
159
+ breakthrough: "PIVOT",
160
+ epiphany: "PIVOT",
161
+ api: "TECHNICAL",
162
+ database: "TECHNICAL",
163
+ architecture: "TECHNICAL",
164
+ deploy: "TECHNICAL",
165
+ infrastructure: "TECHNICAL",
166
+ algorithm: "TECHNICAL",
167
+ framework: "TECHNICAL",
168
+ server: "TECHNICAL",
169
+ config: "TECHNICAL",
170
+ };
171
+
172
+ export const _STOP_WORDS = new Set<string>([
173
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
174
+ "have", "has", "had", "do", "does", "did", "will", "would", "could", "should",
175
+ "may", "might", "shall", "can", "to", "of", "in", "for", "on", "with", "at",
176
+ "by", "from", "as", "into", "about", "between", "through", "during", "before",
177
+ "after", "above", "below", "up", "down", "out", "off", "over", "under", "again",
178
+ "further", "then", "once", "here", "there", "when", "where", "why", "how", "all",
179
+ "each", "every", "both", "few", "more", "most", "other", "some", "such", "no",
180
+ "nor", "not", "only", "own", "same", "so", "than", "too", "very", "just", "don",
181
+ "now", "and", "but", "or", "if", "while", "that", "this", "these", "those", "it",
182
+ "its", "i", "we", "you", "he", "she", "they", "me", "him", "her", "us", "them",
183
+ "my", "your", "his", "our", "their", "what", "which", "who", "whom", "also", "much",
184
+ "many", "like", "because", "since", "get", "got", "use", "used", "using", "make",
185
+ "made", "thing", "things", "way", "well", "really", "want", "need",
186
+ ]);
187
+
188
+ function readJson<T>(filePath: string): T {
189
+ return JSON.parse(readFileSync(filePath, "utf-8")) as T;
190
+ }
191
+
192
+ function todayIsoDate(): string {
193
+ return new Date().toISOString().slice(0, 10);
194
+ }
195
+
196
+ function firstPart(value: string, separator: string): string {
197
+ return value.split(separator)[0] ?? "";
198
+ }
199
+
200
+ function getRequiredArg(args: string[], index: number): string {
201
+ const value = args[index];
202
+ if (value == null) {
203
+ usage();
204
+ }
205
+ return value;
206
+ }
207
+
208
+ export class Dialect {
209
+ private entityCodes: Record<string, string>;
210
+
211
+ private skipNames: string[];
212
+
213
+ constructor(entities?: EntityMap | null, skipNames?: string[] | null) {
214
+ this.entityCodes = {};
215
+ if (entities) {
216
+ for (const [name, code] of Object.entries(entities)) {
217
+ this.entityCodes[name] = code;
218
+ this.entityCodes[name.toLowerCase()] = code;
219
+ }
220
+ }
221
+ this.skipNames = (skipNames ?? []).map((name) => name.toLowerCase());
222
+ }
223
+
224
+ static fromConfig(configPath: string): Dialect {
225
+ const config = readJson<DialectConfig>(configPath);
226
+ return new Dialect(config.entities ?? {}, config.skip_names ?? []);
227
+ }
228
+
229
+ saveConfig(configPath: string): void {
230
+ const canonical: Record<string, string> = {};
231
+ const seenCodes = new Set<string>();
232
+
233
+ for (const [name, code] of Object.entries(this.entityCodes)) {
234
+ if (!seenCodes.has(code) && name !== name.toLowerCase()) {
235
+ canonical[name] = code;
236
+ seenCodes.add(code);
237
+ } else if (!seenCodes.has(code)) {
238
+ canonical[name] = code;
239
+ seenCodes.add(code);
240
+ }
241
+ }
242
+
243
+ const config: DialectConfig = {
244
+ entities: canonical,
245
+ skip_names: this.skipNames,
246
+ };
247
+
248
+ writeFileSync(configPath, JSON.stringify(config, null, 2));
249
+ }
250
+
251
+ encodeEntity(name: string): string | null {
252
+ if (this.skipNames.some((skip) => name.toLowerCase().includes(skip))) {
253
+ return null;
254
+ }
255
+ if (name in this.entityCodes) {
256
+ return this.entityCodes[name] ?? null;
257
+ }
258
+ if (name.toLowerCase() in this.entityCodes) {
259
+ return this.entityCodes[name.toLowerCase()] ?? null;
260
+ }
261
+ for (const [key, code] of Object.entries(this.entityCodes)) {
262
+ if (name.toLowerCase().includes(key.toLowerCase())) {
263
+ return code;
264
+ }
265
+ }
266
+ return name.slice(0, 3).toUpperCase();
267
+ }
268
+
269
+ encodeEmotions(emotions: string[]): string {
270
+ const codes: string[] = [];
271
+ for (const emotion of emotions) {
272
+ const code = EMOTION_CODES[emotion] ?? emotion.slice(0, 4);
273
+ if (!codes.includes(code)) {
274
+ codes.push(code);
275
+ }
276
+ }
277
+ return codes.slice(0, 3).join("+");
278
+ }
279
+
280
+ getFlags(zettel: Partial<Zettel>): string {
281
+ const flags: string[] = [];
282
+ if (zettel.origin_moment) {
283
+ flags.push("ORIGIN");
284
+ }
285
+ if ((zettel.sensitivity ?? "").toUpperCase().startsWith("MAXIMUM")) {
286
+ flags.push("SENSITIVE");
287
+ }
288
+ const notes = (zettel.notes ?? "").toLowerCase();
289
+ if (notes.includes("foundational pillar") || notes.includes("core")) {
290
+ flags.push("CORE");
291
+ }
292
+ if (notes.includes("genesis") || (zettel.origin_label ?? "").toLowerCase().includes("genesis")) {
293
+ flags.push("GENESIS");
294
+ }
295
+ if (notes.includes("pivot")) {
296
+ flags.push("PIVOT");
297
+ }
298
+ return flags.length > 0 ? flags.join("+") : "";
299
+ }
300
+
301
+ private detectEmotions(text: string): string[] {
302
+ const textLower = text.toLowerCase();
303
+ const detected: string[] = [];
304
+ const seen = new Set<string>();
305
+ for (const [keyword, code] of Object.entries(_EMOTION_SIGNALS)) {
306
+ if (textLower.includes(keyword) && !seen.has(code)) {
307
+ detected.push(code);
308
+ seen.add(code);
309
+ }
310
+ }
311
+ return detected.slice(0, 3);
312
+ }
313
+
314
+ private detectFlags(text: string): string[] {
315
+ const textLower = text.toLowerCase();
316
+ const detected: string[] = [];
317
+ const seen = new Set<string>();
318
+ for (const [keyword, flag] of Object.entries(_FLAG_SIGNALS)) {
319
+ if (textLower.includes(keyword) && !seen.has(flag)) {
320
+ detected.push(flag);
321
+ seen.add(flag);
322
+ }
323
+ }
324
+ return detected.slice(0, 3);
325
+ }
326
+
327
+ private extractTopics(text: string, maxTopics = 3): string[] {
328
+ const words = text.match(/[a-zA-Z][a-zA-Z_-]{2,}/g) ?? [];
329
+ const freq: Record<string, number> = {};
330
+
331
+ for (const word of words) {
332
+ const lower = word.toLowerCase();
333
+ if (_STOP_WORDS.has(lower) || lower.length < 3) {
334
+ continue;
335
+ }
336
+ freq[lower] = (freq[lower] ?? 0) + 1;
337
+ }
338
+
339
+ for (const word of words) {
340
+ const lower = word.toLowerCase();
341
+ if (_STOP_WORDS.has(lower)) {
342
+ continue;
343
+ }
344
+ if (word[0]?.toUpperCase() === word[0] && lower in freq) {
345
+ freq[lower] = (freq[lower] ?? 0) + 2;
346
+ }
347
+ if (word.includes("_") || word.includes("-") || [...word.slice(1)].some((char) => char === char.toUpperCase() && char !== char.toLowerCase())) {
348
+ if (lower in freq) {
349
+ freq[lower] = (freq[lower] ?? 0) + 2;
350
+ }
351
+ }
352
+ }
353
+
354
+ const ranked = Object.entries(freq).sort((a, b) => b[1] - a[1]);
355
+ return ranked.slice(0, maxTopics).map(([word]) => word);
356
+ }
357
+
358
+ private extractKeySentence(text: string): string {
359
+ const sentences = text
360
+ .split(/[.!?\n]+/)
361
+ .map((sentence) => sentence.trim())
362
+ .filter((sentence) => sentence.length > 10);
363
+
364
+ if (sentences.length === 0) {
365
+ return "";
366
+ }
367
+
368
+ const decisionWords = new Set([
369
+ "decided", "because", "instead", "prefer", "switched", "chose", "realized",
370
+ "important", "key", "critical", "discovered", "learned", "conclusion",
371
+ "solution", "reason", "why", "breakthrough", "insight",
372
+ ]);
373
+
374
+ const scored = sentences.map((sentence) => {
375
+ let score = 0;
376
+ const lower = sentence.toLowerCase();
377
+ for (const word of decisionWords) {
378
+ if (lower.includes(word)) {
379
+ score += 2;
380
+ }
381
+ }
382
+ if (sentence.length < 80) {
383
+ score += 1;
384
+ }
385
+ if (sentence.length < 40) {
386
+ score += 1;
387
+ }
388
+ if (sentence.length > 150) {
389
+ score -= 2;
390
+ }
391
+ return [score, sentence] as const;
392
+ });
393
+
394
+ scored.sort((a, b) => b[0] - a[0]);
395
+ let best = scored[0]?.[1] ?? "";
396
+ if (best.length > 55) {
397
+ best = `${best.slice(0, 52)}...`;
398
+ }
399
+ return best;
400
+ }
401
+
402
+ private detectEntitiesInText(text: string): string[] {
403
+ const found: string[] = [];
404
+
405
+ for (const [name, code] of Object.entries(this.entityCodes)) {
406
+ if (name !== name.toLowerCase() && text.toLowerCase().includes(name.toLowerCase())) {
407
+ if (!found.includes(code)) {
408
+ found.push(code);
409
+ }
410
+ }
411
+ }
412
+ if (found.length > 0) {
413
+ return found;
414
+ }
415
+
416
+ const words = text.split(/\s+/);
417
+ for (let index = 0; index < words.length; index += 1) {
418
+ const word = words[index] ?? "";
419
+ const clean = word.replace(/[^a-zA-Z]/g, "");
420
+ if (
421
+ clean.length >= 2 &&
422
+ clean[0] === clean[0]?.toUpperCase() &&
423
+ clean.slice(1) === clean.slice(1).toLowerCase() &&
424
+ index > 0 &&
425
+ !_STOP_WORDS.has(clean.toLowerCase())
426
+ ) {
427
+ const code = clean.slice(0, 3).toUpperCase();
428
+ if (!found.includes(code)) {
429
+ found.push(code);
430
+ }
431
+ if (found.length >= 3) {
432
+ break;
433
+ }
434
+ }
435
+ }
436
+
437
+ return found;
438
+ }
439
+
440
+ compress(text: string, metadata: CompressMetadata = {}): string {
441
+ const entities = this.detectEntitiesInText(text);
442
+ const entityStr = entities.length > 0 ? entities.slice(0, 3).join("+") : "???";
443
+
444
+ const topics = this.extractTopics(text);
445
+ const topicStr = topics.length > 0 ? topics.slice(0, 3).join("_") : "misc";
446
+
447
+ const quote = this.extractKeySentence(text);
448
+ const quotePart = quote ? `"${quote}"` : "";
449
+
450
+ const emotions = this.detectEmotions(text);
451
+ const emotionStr = emotions.length > 0 ? emotions.join("+") : "";
452
+
453
+ const flags = this.detectFlags(text);
454
+ const flagStr = flags.length > 0 ? flags.join("+") : "";
455
+
456
+ const source = typeof metadata.source_file === "string" ? metadata.source_file : "";
457
+ const wing = typeof metadata.wing === "string" ? metadata.wing : "";
458
+ const room = typeof metadata.room === "string" ? metadata.room : "";
459
+ const date = typeof metadata.date === "string" ? metadata.date : "";
460
+
461
+ const lines: string[] = [];
462
+
463
+ if (source || wing) {
464
+ const headerParts = [wing || "?", room || "?", date || "?", source ? parse(source).name : "?"];
465
+ lines.push(headerParts.join("|"));
466
+ }
467
+
468
+ const parts = [`0:${entityStr}`, topicStr];
469
+ if (quotePart) {
470
+ parts.push(quotePart);
471
+ }
472
+ if (emotionStr) {
473
+ parts.push(emotionStr);
474
+ }
475
+ if (flagStr) {
476
+ parts.push(flagStr);
477
+ }
478
+
479
+ lines.push(parts.join("|"));
480
+ return lines.join("\n");
481
+ }
482
+
483
+ extractKeyQuote(zettel: Partial<Zettel>): string {
484
+ const content = zettel.content ?? "";
485
+ const origin = zettel.origin_label ?? "";
486
+ const notes = zettel.notes ?? "";
487
+ const title = zettel.title ?? "";
488
+ const allText = `${content} ${origin} ${notes}`;
489
+
490
+ let quotes: string[] = [];
491
+ quotes.push(...Array.from(allText.matchAll(/"([^"]{8,55})"/g), (match) => match[1] ?? ""));
492
+ quotes.push(...Array.from(allText.matchAll(/(?:^|[\s(])'([^']{8,55})'(?:[\s.,;:!?)]|$)/g), (match) => match[1] ?? ""));
493
+ quotes.push(
494
+ ...Array.from(
495
+ allText.matchAll(/(?:says?|said|articulates?|reveals?|admits?|confesses?|asks?):\s*["']?([^.!?]{10,55})[.!?]/gi),
496
+ (match) => match[1] ?? "",
497
+ ),
498
+ );
499
+
500
+ if (quotes.length > 0) {
501
+ const seen = new Set<string>();
502
+ const unique: string[] = [];
503
+ for (const quote of quotes) {
504
+ const trimmed = quote.trim();
505
+ if (!seen.has(trimmed) && trimmed.length >= 8) {
506
+ seen.add(trimmed);
507
+ unique.push(trimmed);
508
+ }
509
+ }
510
+ quotes = unique;
511
+
512
+ const emotionalWords = new Set([
513
+ "love", "fear", "remember", "soul", "feel", "stupid", "scared", "beautiful",
514
+ "destroy", "respect", "trust", "consciousness", "alive", "forget", "waiting",
515
+ "peace", "matter", "real", "guilt", "escape", "rest", "hope", "dream", "lost", "found",
516
+ ]);
517
+
518
+ const scored = quotes.map((quote) => {
519
+ let score = 0;
520
+ if (quote[0] === quote[0]?.toUpperCase() || quote.startsWith("I ")) {
521
+ score += 2;
522
+ }
523
+ let matches = 0;
524
+ for (const word of emotionalWords) {
525
+ if (quote.toLowerCase().includes(word)) {
526
+ matches += 1;
527
+ }
528
+ }
529
+ score += matches * 2;
530
+ if (quote.length > 20) {
531
+ score += 1;
532
+ }
533
+ if (quote.startsWith("The ") || quote.startsWith("This ") || quote.startsWith("She ")) {
534
+ score -= 2;
535
+ }
536
+ return [score, quote] as const;
537
+ });
538
+
539
+ scored.sort((a, b) => b[0] - a[0]);
540
+ if (scored.length > 0) {
541
+ return scored[0]?.[1] ?? "";
542
+ }
543
+ }
544
+
545
+ if (title.includes(" - ")) {
546
+ return title.split(" - ", 2)[1]?.slice(0, 45) ?? "";
547
+ }
548
+ return "";
549
+ }
550
+
551
+ encodeZettel(zettel: Zettel): string {
552
+ const zid = zettel.id.split("-").at(-1) ?? zettel.id;
553
+
554
+ let entityCodes = (zettel.people ?? []).map((person) => this.encodeEntity(person)).filter((code): code is string => code !== null);
555
+ if (entityCodes.length === 0) {
556
+ entityCodes = ["???"];
557
+ }
558
+ const entities = [...new Set(entityCodes)].sort().join("+");
559
+
560
+ const topics = zettel.topics ?? [];
561
+ const topicStr = topics.length > 0 ? topics.slice(0, 2).join("_") : "misc";
562
+
563
+ const quote = this.extractKeyQuote(zettel);
564
+ const quotePart = quote ? `"${quote}"` : "";
565
+
566
+ const weight = zettel.emotional_weight ?? 0.5;
567
+ const emotions = this.encodeEmotions(zettel.emotional_tone ?? []);
568
+ const flags = this.getFlags(zettel);
569
+
570
+ const parts = [`${zid}:${entities}`, topicStr];
571
+ if (quotePart) {
572
+ parts.push(quotePart);
573
+ }
574
+ parts.push(String(weight));
575
+ if (emotions) {
576
+ parts.push(emotions);
577
+ }
578
+ if (flags) {
579
+ parts.push(flags);
580
+ }
581
+
582
+ return parts.join("|");
583
+ }
584
+
585
+ encodeTunnel(tunnel: Tunnel): string {
586
+ const fromId = tunnel.from.split("-").at(-1) ?? tunnel.from;
587
+ const toId = tunnel.to.split("-").at(-1) ?? tunnel.to;
588
+ const label = tunnel.label ?? "";
589
+ const shortLabel = label.includes(":") ? firstPart(label, ":") : label.slice(0, 30);
590
+ return `T:${fromId}<->${toId}|${shortLabel}`;
591
+ }
592
+
593
+ encodeFile(zettelJson: ZettelJsonFile): string {
594
+ const lines: string[] = [];
595
+
596
+ const source = zettelJson.source_file ?? "unknown";
597
+ const fileNum = source.includes("-") ? firstPart(source, "-") : "000";
598
+ const date = zettelJson.zettels?.[0]?.date_context ?? "unknown";
599
+
600
+ const allPeople = new Set<string>();
601
+ for (const zettel of zettelJson.zettels ?? []) {
602
+ for (const person of zettel.people ?? []) {
603
+ const code = this.encodeEntity(person);
604
+ if (code !== null) {
605
+ allPeople.add(code);
606
+ }
607
+ }
608
+ }
609
+ if (allPeople.size === 0) {
610
+ allPeople.add("???");
611
+ }
612
+ const primary = [...allPeople].sort().slice(0, 3).join("+");
613
+
614
+ const title = source.includes("-") ? source.replace(".txt", "").split(/-(.+)/, 2)[1]?.trim() ?? source : source;
615
+ lines.push(`${fileNum}|${primary}|${date}|${title}`);
616
+
617
+ const arc = zettelJson.emotional_arc ?? "";
618
+ if (arc) {
619
+ lines.push(`ARC:${arc}`);
620
+ }
621
+
622
+ for (const zettel of zettelJson.zettels ?? []) {
623
+ lines.push(this.encodeZettel(zettel));
624
+ }
625
+
626
+ for (const tunnel of zettelJson.tunnels ?? []) {
627
+ lines.push(this.encodeTunnel(tunnel));
628
+ }
629
+
630
+ return lines.join("\n");
631
+ }
632
+
633
+ compressFile(zettelJsonPath: string, outputPath?: string): string {
634
+ const data = readJson<ZettelJsonFile>(zettelJsonPath);
635
+ const dialect = this.encodeFile(data);
636
+ if (outputPath) {
637
+ writeFileSync(outputPath, dialect);
638
+ }
639
+ return dialect;
640
+ }
641
+
642
+ compressAll(zettelDir: string, outputPath?: string): string {
643
+ const allDialect: string[] = [];
644
+ for (const fname of [...readdirSync(zettelDir)].sort()) {
645
+ if (fname.endsWith(".json")) {
646
+ const fpath = join(zettelDir, fname);
647
+ const data = readJson<ZettelJsonFile>(fpath);
648
+ const dialect = this.encodeFile(data);
649
+ allDialect.push(dialect);
650
+ allDialect.push("---");
651
+ }
652
+ }
653
+ const combined = allDialect.join("\n");
654
+ if (outputPath) {
655
+ writeFileSync(outputPath, combined);
656
+ }
657
+ return combined;
658
+ }
659
+
660
+ generateLayer1(
661
+ zettelDir: string,
662
+ outputPath?: string,
663
+ identitySections?: StringListMap,
664
+ weightThreshold = 0.85,
665
+ ): string {
666
+ const essential: Array<[Zettel, string, string]> = [];
667
+
668
+ for (const fname of [...readdirSync(zettelDir)].sort()) {
669
+ if (!fname.endsWith(".json")) {
670
+ continue;
671
+ }
672
+ const fpath = join(zettelDir, fname);
673
+ const data = readJson<ZettelJsonFile>(fpath);
674
+
675
+ const fileNum = fname.replace("file_", "").replace(".json", "");
676
+ const sourceDate = data.zettels?.[0]?.date_context ?? "unknown";
677
+
678
+ for (const zettel of data.zettels ?? []) {
679
+ const weight = zettel.emotional_weight ?? 0;
680
+ const isOrigin = zettel.origin_moment ?? false;
681
+ const flags = this.getFlags(zettel);
682
+ const hasKeyFlag = flags ? ["ORIGIN", "CORE", "GENESIS"].some((flag) => flags.includes(flag)) : false;
683
+
684
+ if (weight >= weightThreshold || isOrigin || hasKeyFlag) {
685
+ essential.push([zettel, fileNum, sourceDate]);
686
+ }
687
+ }
688
+ }
689
+
690
+ const allTunnels: Tunnel[] = [];
691
+ for (const fname of [...readdirSync(zettelDir)].sort()) {
692
+ if (!fname.endsWith(".json")) {
693
+ continue;
694
+ }
695
+ const fpath = join(zettelDir, fname);
696
+ const data = readJson<ZettelJsonFile>(fpath);
697
+ for (const tunnel of data.tunnels ?? []) {
698
+ allTunnels.push(tunnel);
699
+ }
700
+ }
701
+
702
+ essential.sort((a, b) => (b[0].emotional_weight ?? 0) - (a[0].emotional_weight ?? 0));
703
+
704
+ const byDate: Record<string, Array<[Zettel, string]>> = {};
705
+ for (const [zettel, fileNum, sourceDate] of essential) {
706
+ const key = sourceDate.split(",")[0]?.trim() ?? sourceDate;
707
+ byDate[key] ??= [];
708
+ byDate[key].push([zettel, fileNum]);
709
+ }
710
+
711
+ const lines: string[] = [];
712
+ lines.push("## LAYER 1 -- ESSENTIAL STORY");
713
+ lines.push(`## Auto-generated from zettel files. Updated ${todayIsoDate()}.`);
714
+ lines.push("");
715
+
716
+ if (identitySections) {
717
+ for (const [sectionName, sectionLines] of Object.entries(identitySections)) {
718
+ lines.push(`=${sectionName}=`);
719
+ lines.push(...sectionLines);
720
+ lines.push("");
721
+ }
722
+ }
723
+
724
+ for (const dateKey of Object.keys(byDate).sort()) {
725
+ lines.push(`=MOMENTS[${dateKey}]=`);
726
+ for (const [zettel] of byDate[dateKey] ?? []) {
727
+ let entities: string[] = [];
728
+ for (const person of zettel.people ?? []) {
729
+ const code = this.encodeEntity(person);
730
+ if (code) {
731
+ entities.push(code);
732
+ }
733
+ }
734
+ if (entities.length === 0) {
735
+ entities = ["???"];
736
+ }
737
+ const entStr = [...new Set(entities)].sort().join("+");
738
+
739
+ const quote = this.extractKeyQuote(zettel);
740
+ const weight = zettel.emotional_weight ?? 0.5;
741
+ const flags = this.getFlags(zettel);
742
+ const sensitivity = zettel.sensitivity ?? "";
743
+
744
+ const parts = [entStr];
745
+ const title = zettel.title ?? "";
746
+ const hint = title.includes(" - ") ? title.split(" - ", 2)[1]?.slice(0, 30) ?? "" : (zettel.topics ?? []).slice(0, 2).join("_");
747
+ if (hint) {
748
+ parts.push(hint);
749
+ }
750
+ if (quote && quote !== hint && quote !== title && quote !== hint) {
751
+ parts.push(`"${quote}"`);
752
+ }
753
+ if (sensitivity && !flags.includes("SENSITIVE")) {
754
+ parts.push("SENSITIVE");
755
+ }
756
+ parts.push(String(weight));
757
+ if (flags) {
758
+ parts.push(flags);
759
+ }
760
+
761
+ lines.push(parts.join("|"));
762
+ }
763
+ lines.push("");
764
+ }
765
+
766
+ if (allTunnels.length > 0) {
767
+ lines.push("=TUNNELS=");
768
+ for (const tunnel of allTunnels.slice(0, 8)) {
769
+ const label = tunnel.label ?? "";
770
+ const short = label.includes(":") ? firstPart(label, ":") : label.slice(0, 40);
771
+ lines.push(short);
772
+ }
773
+ lines.push("");
774
+ }
775
+
776
+ const result = lines.join("\n");
777
+ if (outputPath) {
778
+ writeFileSync(outputPath, result);
779
+ }
780
+ return result;
781
+ }
782
+
783
+ decode(dialectText: string): DecodedDialect {
784
+ const lines = dialectText.trim().split("\n");
785
+ const result: DecodedDialect = { header: {}, arc: "", zettels: [], tunnels: [] };
786
+
787
+ for (const line of lines) {
788
+ if (line.startsWith("ARC:")) {
789
+ result.arc = line.slice(4);
790
+ } else if (line.startsWith("T:")) {
791
+ result.tunnels.push(line);
792
+ } else if (line.includes("|") && line.split("|", 1)[0]?.includes(":")) {
793
+ result.zettels.push(line);
794
+ } else if (line.includes("|")) {
795
+ const parts = line.split("|");
796
+ result.header = {
797
+ file: parts[0] ?? "",
798
+ entities: parts[1] ?? "",
799
+ date: parts[2] ?? "",
800
+ title: parts[3] ?? "",
801
+ };
802
+ }
803
+ }
804
+
805
+ return result;
806
+ }
807
+
808
+ static countTokens(text: string): number {
809
+ return Math.floor(text.length / 3);
810
+ }
811
+
812
+ compressionStats(originalText: string, compressed: string): CompressionStats {
813
+ const originalTokens = Dialect.countTokens(originalText);
814
+ const compressedTokens = Dialect.countTokens(compressed);
815
+ return {
816
+ originalTokens,
817
+ compressedTokens,
818
+ ratio: originalTokens / Math.max(compressedTokens, 1),
819
+ originalChars: originalText.length,
820
+ compressedChars: compressed.length,
821
+ };
822
+ }
823
+ }
824
+
825
+ function usage(): never {
826
+ console.log("AAAK Dialect -- Compressed Symbolic Memory for Any LLM");
827
+ console.log();
828
+ console.log("Usage:");
829
+ console.log(" bun src/dialect.ts <text> # Compress text from argument");
830
+ console.log(" bun src/dialect.ts --file <zettel.json> # Compress zettel JSON file");
831
+ console.log(" bun src/dialect.ts --all <zettel_dir> # Compress all zettel files");
832
+ console.log(" bun src/dialect.ts --stats <zettel.json> # Show compression stats");
833
+ console.log(" bun src/dialect.ts --layer1 <zettel_dir> # Generate Layer 1 wake-up file");
834
+ console.log(" bun src/dialect.ts --init # Create example config");
835
+ console.log();
836
+ console.log("Options:");
837
+ console.log(" --config <path> Load entity mappings from JSON config");
838
+ process.exit(1);
839
+ }
840
+
841
+ export function runDialectCli(argv: string[] = process.argv.slice(2)): void {
842
+ if (argv.length < 1) {
843
+ usage();
844
+ }
845
+
846
+ let configPath: string | null = null;
847
+ let args = [...argv];
848
+ if (args.includes("--config")) {
849
+ const idx = args.indexOf("--config");
850
+ configPath = args[idx + 1] ?? null;
851
+ args = [...args.slice(0, idx), ...args.slice(idx + 2)];
852
+ }
853
+
854
+ const dialect = configPath ? Dialect.fromConfig(configPath) : new Dialect();
855
+
856
+ if (args[0] === "--init") {
857
+ const example: DialectConfig = {
858
+ entities: {
859
+ Alice: "ALC",
860
+ Bob: "BOB",
861
+ "Dr. Chen": "CHN",
862
+ },
863
+ skip_names: [],
864
+ };
865
+ const outPath = "entities.json";
866
+ writeFileSync(outPath, JSON.stringify(example, null, 2));
867
+ console.log(`Created example config: ${outPath}`);
868
+ console.log("Edit this file with your own entity mappings, then use --config entities.json");
869
+ } else if (args[0] === "--file") {
870
+ const result = dialect.compressFile(getRequiredArg(args, 1));
871
+ const tokens = Dialect.countTokens(result);
872
+ console.log(`~${tokens} tokens`);
873
+ console.log();
874
+ console.log(result);
875
+ } else if (args[0] === "--all") {
876
+ const zettelDir = args[1] ?? ".";
877
+ const output = join(zettelDir, "COMPRESSED_MEMORY.aaak");
878
+ const result = dialect.compressAll(zettelDir, output);
879
+ const tokens = Dialect.countTokens(result);
880
+ console.log(`Compressed to: ${output}`);
881
+ console.log(`Total: ~${tokens} tokens`);
882
+ console.log();
883
+ console.log(result);
884
+ } else if (args[0] === "--stats") {
885
+ const data = readJson<ZettelJsonFile>(getRequiredArg(args, 1));
886
+ const jsonStr = JSON.stringify(data, null, 2);
887
+ const encoded = dialect.encodeFile(data);
888
+ const stats = dialect.compressionStats(jsonStr, encoded);
889
+ console.log("=== COMPRESSION STATS ===");
890
+ console.log(`JSON: ~${stats.originalTokens.toLocaleString()} tokens`);
891
+ console.log(`AAAK: ~${stats.compressedTokens.toLocaleString()} tokens`);
892
+ console.log(`Ratio: ${stats.ratio.toFixed(0)}x`);
893
+ console.log();
894
+ console.log("=== AAAK DIALECT OUTPUT ===");
895
+ console.log(encoded);
896
+ } else if (args[0] === "--layer1") {
897
+ const zettelDir = args[1] ?? ".";
898
+ const output = join(zettelDir, "LAYER1.aaak");
899
+ const result = dialect.generateLayer1(zettelDir, output);
900
+ const tokens = Dialect.countTokens(result);
901
+ console.log(`Layer 1: ${output}`);
902
+ console.log(`Total: ~${tokens} tokens`);
903
+ console.log();
904
+ console.log(result);
905
+ } else {
906
+ const text = args.join(" ");
907
+ const compressed = dialect.compress(text);
908
+ const stats = dialect.compressionStats(text, compressed);
909
+ console.log(`Original: ~${stats.originalTokens} tokens (${stats.originalChars} chars)`);
910
+ console.log(`AAAK: ~${stats.compressedTokens} tokens (${stats.compressedChars} chars)`);
911
+ console.log(`Ratio: ${stats.ratio.toFixed(1)}x`);
912
+ console.log();
913
+ console.log(compressed);
914
+ }
915
+ }
916
+
917
+ export default Dialect;
918
+
919
+ if (import.meta.main) {
920
+ runDialectCli();
921
+ }