baby-daemon 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ /**
2
+ * idempotency.js
3
+ * ──────────────
4
+ * WHAT THIS FILE DOES:
5
+ * Prevents us from processing the same file version twice.
6
+ * Every time a file changes, we generate a unique "fingerprint" for it.
7
+ * If we've seen that fingerprint before → skip. If new → process.
8
+ *
9
+ * CONCEPT: Idempotency
10
+ * "Idempotent" means: doing something twice = doing it once.
11
+ * Example: pressing an elevator button twice doesn't call it twice.
12
+ * We want the same guarantee: even if the watcher fires 5 times for
13
+ * the same file save, we process it exactly once.
14
+ *
15
+ * CONCEPT: SHA-256 Hash
16
+ * A hash function takes ANY input (text, file path, anything)
17
+ * and produces a fixed-length "fingerprint" string.
18
+ * - Same input → ALWAYS same output
19
+ * - Different input → different output (even 1 char difference)
20
+ * - You CANNOT reverse it (you can't get the input back from the hash)
21
+ * SHA-256 produces a 64-character hex string, e.g: "a3f9b2c1..."
22
+ * Node.js has this built-in via the 'crypto' module.
23
+ */
24
+
25
+ import crypto from 'crypto'; // Built-in Node.js module — no install needed
26
+ import fs from 'fs'; // File System module — also built-in
27
+ import path from 'path'; // Path utilities — also built-in
28
+ import { fileURLToPath } from 'url';
29
+
30
+ // ─────────────────────────────────────────────────────────
31
+ // WHERE WE STORE PROCESSED KEYS
32
+ // ─────────────────────────────────────────────────────────
33
+
34
+ /**
35
+ * CONCEPT: __dirname equivalent in ES Modules
36
+ * In older Node.js (CommonJS), you had __dirname for "this file's folder".
37
+ * With modern ES Modules (which we use, see "type": "module" in package.json),
38
+ * you use import.meta.url instead.
39
+ * fileURLToPath(import.meta.url) converts the file URL to a standard OS path.
40
+ * path.dirname gets the folder containing this file (src/).
41
+ */
42
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
43
+
44
+ const KEYS_FILE = path.join(__dirname, '..', 'processed_keys.json');
45
+ // path.join builds a file path correctly for any OS
46
+ // '..' means "go up one folder" (from src/ to proj101/)
47
+ // Result: proj101/processed_keys.json
48
+
49
+ // ─────────────────────────────────────────────────────────
50
+ // LOAD KEYS FROM DISK
51
+ // ─────────────────────────────────────────────────────────
52
+
53
+ /**
54
+ * loadKeys()
55
+ * Reads the processed_keys.json file from disk and returns it as a JS Set.
56
+ *
57
+ * CONCEPT: Set vs Array
58
+ * Array: ordered list, allows duplicates, checking "does X exist?" = slow (scans every item)
59
+ * Set: unordered collection, NO duplicates, checking "does X exist?" = instant (O(1))
60
+ * For idempotency we only care about "have I seen this key?" → Set is perfect.
61
+ *
62
+ * CONCEPT: try/catch
63
+ * If the file doesn't exist yet (first run), fs.readFileSync throws an error.
64
+ * We catch it and return an empty Set instead of crashing.
65
+ */
66
+ function loadKeys() {
67
+ try {
68
+ const raw = fs.readFileSync(KEYS_FILE, 'utf-8');
69
+ // JSON.parse converts the JSON string into a JS object/array
70
+ const arr = JSON.parse(raw);
71
+ // We stored it as an array (JSON doesn't support Set), so we convert back
72
+ return new Set(arr);
73
+ } catch {
74
+ // File doesn't exist yet → start with empty Set
75
+ return new Set();
76
+ }
77
+ }
78
+
79
+ // ─────────────────────────────────────────────────────────
80
+ // SAVE KEYS TO DISK
81
+ // ─────────────────────────────────────────────────────────
82
+
83
+ /**
84
+ * saveKeys(keys)
85
+ * Writes the current Set of keys to processed_keys.json on disk.
86
+ *
87
+ * CONCEPT: Why save to disk at all?
88
+ * If we only kept keys in memory (a variable), they'd be lost when the
89
+ * program restarts. Saving to a JSON file makes them persistent across runs.
90
+ *
91
+ * CONCEPT: JSON.stringify with formatting
92
+ * JSON.stringify(value, null, 2) converts a JS value to a JSON string.
93
+ * The '2' means "indent with 2 spaces" → human-readable file.
94
+ */
95
+ function saveKeys(keys) {
96
+ // Convert Set to Array because JSON.stringify can't handle Set directly
97
+ const arr = Array.from(keys);
98
+ fs.writeFileSync(KEYS_FILE, JSON.stringify(arr, null, 2), 'utf-8');
99
+ }
100
+
101
+ // ─────────────────────────────────────────────────────────
102
+ // GENERATE THE IDEMPOTENCY KEY
103
+ // ─────────────────────────────────────────────────────────
104
+
105
+ /**
106
+ * getIdempotencyKey(filePath, mtimeMs)
107
+ *
108
+ * @param {string} filePath - Absolute path of the file, e.g. "C:/logs/chat_42.md"
109
+ * @param {number} mtimeMs - Last modified time in milliseconds (from fs.stat)
110
+ * @returns {string} - A 64-char SHA-256 hex string
111
+ *
112
+ * CONCEPT: Why combine path + mtime?
113
+ * Path alone: same file modified twice would have same key → would skip the 2nd change
114
+ * Mtime alone: two different files modified at same ms could collide (unlikely but possible)
115
+ * Together: unique fingerprint for "this exact file at this exact version"
116
+ *
117
+ * CONCEPT: Digest formats
118
+ * .digest('hex') = output as hexadecimal string (0-9, a-f) → easy to store, read, compare
119
+ * Other options: 'base64', 'binary' — hex is the most human-readable
120
+ */
121
+ export function getIdempotencyKey(filePath, mtimeMs) {
122
+ const raw = filePath + '|' + mtimeMs;
123
+ return crypto.createHash('sha256').update(raw).digest('hex');
124
+ }
125
+
126
+ // ─────────────────────────────────────────────────────────
127
+ // THE MAIN CHECK: Have we already processed this file version?
128
+ // ─────────────────────────────────────────────────────────
129
+
130
+ /**
131
+ * isAlreadyProcessed(filePath, mtimeMs)
132
+ * Returns true if this exact file version was processed before.
133
+ * Returns false if it's new (and you should process it).
134
+ */
135
+ export function isAlreadyProcessed(filePath, mtimeMs) {
136
+ const keys = loadKeys();
137
+ const key = getIdempotencyKey(filePath, mtimeMs);
138
+ return keys.has(key); // Set.has() is O(1) — instant lookup
139
+ }
140
+
141
+ // ─────────────────────────────────────────────────────────
142
+ // MARK A FILE VERSION AS PROCESSED
143
+ // ─────────────────────────────────────────────────────────
144
+
145
+ /**
146
+ * markAsProcessed(filePath, mtimeMs)
147
+ * Call this AFTER successfully processing a file.
148
+ * Adds the key to the store and persists it to disk.
149
+ *
150
+ * IMPORTANT: We only call this AFTER success.
151
+ * If processing fails halfway, we don't record the key,
152
+ * so the next run will retry it. That's intentional.
153
+ */
154
+ export function markAsProcessed(filePath, mtimeMs) {
155
+ const keys = loadKeys();
156
+ const key = getIdempotencyKey(filePath, mtimeMs);
157
+ keys.add(key);
158
+ saveKeys(keys);
159
+ }
@@ -0,0 +1,95 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { fileURLToPath } from 'url';
4
+
5
+ // Get path to memory.jsonl in the project root folder
6
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
7
+ const MEMORY_FILE = path.join(__dirname, '..', 'memory.jsonl');
8
+
9
+ /**
10
+ * saveMemoriesForFile(fileName, memories)
11
+ * Updates memory.jsonl by removing any old memories belonging to the given
12
+ * fileName and appending the newly extracted memories. Uses an atomic write.
13
+ *
14
+ * @param {string} fileName - Name of the source chat file
15
+ * @param {Array} memories - List of enriched memory objects
16
+ * @returns {number} The number of memories saved
17
+ */
18
+ export function saveMemoriesForFile(fileName, memories) {
19
+ try {
20
+ let allMemories = [];
21
+
22
+ // Read existing memories if file exists
23
+ if (fs.existsSync(MEMORY_FILE)) {
24
+ const content = fs.readFileSync(MEMORY_FILE, 'utf-8');
25
+ allMemories = content
26
+ .split('\n')
27
+ .filter(line => line.trim())
28
+ .map(line => {
29
+ try {
30
+ return JSON.parse(line);
31
+ } catch {
32
+ return null;
33
+ }
34
+ })
35
+ .filter(Boolean); // Filter out any null/parsed failures
36
+ }
37
+
38
+ // Filter out any existing memories that came from this specific file
39
+ const filteredMemories = allMemories.filter(
40
+ (mem) => mem.source?.chat_file !== fileName
41
+ );
42
+
43
+ // Normalize: ensure every incoming memory's source.chat_file matches fileName
44
+ // This prevents mismatches if memories were generated under a different name
45
+ const normalizedMemories = memories.map(mem => ({
46
+ ...mem,
47
+ source: { ...mem.source, chat_file: fileName }
48
+ }));
49
+
50
+ // Merge in the new memories
51
+ const updatedMemories = [...filteredMemories, ...normalizedMemories];
52
+
53
+ // Convert objects to JSON lines (JSONL format)
54
+ const linesToWrite = updatedMemories.map((mem) => JSON.stringify(mem)).join('\n') + '\n';
55
+
56
+ // Atomic write: write to temp file then rename
57
+ const tempFile = MEMORY_FILE + '.tmp';
58
+ fs.writeFileSync(tempFile, linesToWrite, 'utf-8');
59
+ fs.renameSync(tempFile, MEMORY_FILE);
60
+
61
+ return memories.length;
62
+ } catch (error) {
63
+ console.error(`Error saving memories for file ${fileName}:`, error.message);
64
+ throw error;
65
+ }
66
+ }
67
+
68
+ /**
69
+ * readAllMemories()
70
+ * Reads all memories from the JSONL file.
71
+ *
72
+ * @returns {Array} All memories in the store
73
+ */
74
+ export function readAllMemories() {
75
+ try {
76
+ if (!fs.existsSync(MEMORY_FILE)) {
77
+ return [];
78
+ }
79
+ const content = fs.readFileSync(MEMORY_FILE, 'utf-8');
80
+ return content
81
+ .split('\n')
82
+ .filter(line => line.trim())
83
+ .map(line => {
84
+ try {
85
+ return JSON.parse(line);
86
+ } catch {
87
+ return null;
88
+ }
89
+ })
90
+ .filter(Boolean);
91
+ } catch (error) {
92
+ console.error('Error reading all memories:', error.message);
93
+ throw error;
94
+ }
95
+ }
@@ -0,0 +1,151 @@
1
+ import { ai } from './config.js';
2
+ import crypto from 'crypto';
3
+
4
+ // ─────────────────────────────────────────────────────────
5
+ // SYSTEM INSTRUCTION & JSON SCHEMA
6
+ // ─────────────────────────────────────────────────────────
7
+
8
+ const systemInstruction = `You are a context compression engine for an AI coding assistant memory system.
9
+ Your job is to analyze the raw conversation log (or file contents) provided by the user and extract key technical developments into structured JSON memories.
10
+
11
+ For each memory:
12
+ - 'type': Categorize it as:
13
+ * 'decision' (only if both developer and assistant explicitly agreed and committed to something)
14
+ * 'proposed_idea' (ideas discussed but not yet implemented or decided)
15
+ * 'rejected_idea' (ideas considered and discarded)
16
+ * 'open_question' (questions remaining unresolved)
17
+ * 'bug' (bugs discovered during the session)
18
+ * 'resolved_bug' (bugs fixed during the session)
19
+ * 'architecture_note' (architectural or design details noted)
20
+ * 'file_change' (files created, modified, or deleted)
21
+ - 'content': A concise declarative statement summarizing the memory. Maintain precision: do not turn a 'maybe/proposal' into a 'completed migration'. Be clear about certainty.
22
+ - 'original_text': The exact quote or sentence from the log that provides evidence for this memory.
23
+ - 'confidence': A score between 0.0 (very tentative suggestion) and 1.0 (firm/confirmed fact).
24
+ - 'related_files': List any files mentioned in the context of this memory.
25
+ - 'tags': Simple, descriptive lowercase tags for keyword indexing (e.g. 'auth', 'redis', 'security').
26
+
27
+ Ignore small talk, greetings, minor formatting adjustments, or repetitive debugging output. Keep only information that is vital for another AI assistant or human developer continuing the project later.`;
28
+
29
+ const responseSchema = {
30
+ type: 'OBJECT',
31
+ properties: {
32
+ memories: {
33
+ type: 'ARRAY',
34
+ items: {
35
+ type: 'OBJECT',
36
+ properties: {
37
+ type: {
38
+ type: 'STRING',
39
+ enum: [
40
+ 'decision',
41
+ 'proposed_idea',
42
+ 'rejected_idea',
43
+ 'open_question',
44
+ 'bug',
45
+ 'resolved_bug',
46
+ 'architecture_note',
47
+ 'file_change'
48
+ ]
49
+ },
50
+ content: { type: 'STRING' },
51
+ original_text: { type: 'STRING' },
52
+ confidence: { type: 'NUMBER' },
53
+ related_files: {
54
+ type: 'ARRAY',
55
+ items: { type: 'STRING' }
56
+ },
57
+ tags: {
58
+ type: 'ARRAY',
59
+ items: { type: 'STRING' }
60
+ }
61
+ },
62
+ required: [
63
+ 'type',
64
+ 'content',
65
+ 'original_text',
66
+ 'confidence',
67
+ 'related_files',
68
+ 'tags'
69
+ ]
70
+ }
71
+ }
72
+ },
73
+ required: ['memories']
74
+ };
75
+
76
+ // ─────────────────────────────────────────────────────────
77
+ // HELPER: GENERATE HASH
78
+ // ─────────────────────────────────────────────────────────
79
+
80
+ /**
81
+ * generateMemoryHash(memory)
82
+ * Computes a SHA-256 hash based on core fields to prevent duplicate extraction of the same point.
83
+ */
84
+ function generateMemoryHash(memory) {
85
+ const raw = `${memory.type}|${memory.content}|${memory.original_text}`;
86
+ return crypto.createHash('sha256').update(raw).digest('hex');
87
+ }
88
+
89
+ // ─────────────────────────────────────────────────────────
90
+ // MAIN EXPORT: summarizeChatLog
91
+ // ─────────────────────────────────────────────────────────
92
+
93
+ /**
94
+ * summarizeChatLog(fileContent, fileName)
95
+ *
96
+ * Calls the Gemini API to extract structured memories from the log.
97
+ *
98
+ * @param {string} fileContent - The text of the chat log/file
99
+ * @param {string} fileName - The name of the file (for source reference)
100
+ * @returns {Promise<Array>} - Array of enriched memory objects
101
+ */
102
+ export async function summarizeChatLog(fileContent, fileName) {
103
+ if (!process.env.GEMINI_API_KEY) {
104
+ throw new Error('GEMINI_API_KEY is not set in environment or .env file.');
105
+ }
106
+
107
+ const prompt = `Here is the content of the file "${fileName}":\n\n${fileContent}`;
108
+
109
+ try {
110
+ const response = await ai.models.generateContent({
111
+ model: 'gemini-2.5-flash',
112
+ contents: prompt,
113
+ config: {
114
+ systemInstruction,
115
+ responseMimeType: 'application/json',
116
+ responseSchema,
117
+ }
118
+ });
119
+
120
+ if (!response.text) {
121
+ throw new Error('Received empty response text from Gemini API.');
122
+ }
123
+
124
+ const parsed = JSON.parse(response.text);
125
+ const rawMemories = parsed.memories || [];
126
+
127
+ // Map to enriched structure with IDs, timestamps, and hashes
128
+ return rawMemories.map((mem) => {
129
+ const enriched = {
130
+ id: `mem-${Date.now()}-${crypto.randomBytes(4).toString('hex')}`,
131
+ timestamp: new Date().toISOString(),
132
+ type: mem.type,
133
+ content: mem.content,
134
+ original_text: mem.original_text,
135
+ confidence: mem.confidence,
136
+ status: 'active',
137
+ related_files: mem.related_files || [],
138
+ tags: mem.tags || [],
139
+ source: {
140
+ chat_file: fileName,
141
+ },
142
+ };
143
+ enriched.hash = generateMemoryHash(enriched);
144
+ return enriched;
145
+ });
146
+
147
+ } catch (error) {
148
+ console.error(`\n ✗ Error during Gemini summarization for ${fileName}:`, error.message);
149
+ throw error;
150
+ }
151
+ }