npm - baby-daemon - Versions diffs - 1.0.0 - Mend

baby-daemon 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/src/idempotency.js ADDED Viewed

@@ -0,0 +1,159 @@
+/**
+ * idempotency.js
+ * ──────────────
+ * WHAT THIS FILE DOES:
+ *   Prevents us from processing the same file version twice.
+ *   Every time a file changes, we generate a unique "fingerprint" for it.
+ *   If we've seen that fingerprint before → skip. If new → process.
+ *
+ * CONCEPT: Idempotency
+ *   "Idempotent" means: doing something twice = doing it once.
+ *   Example: pressing an elevator button twice doesn't call it twice.
+ *   We want the same guarantee: even if the watcher fires 5 times for
+ *   the same file save, we process it exactly once.
+ *
+ * CONCEPT: SHA-256 Hash
+ *   A hash function takes ANY input (text, file path, anything)
+ *   and produces a fixed-length "fingerprint" string.
+ *   - Same input → ALWAYS same output
+ *   - Different input → different output (even 1 char difference)
+ *   - You CANNOT reverse it (you can't get the input back from the hash)
+ *   SHA-256 produces a 64-character hex string, e.g: "a3f9b2c1..."
+ *   Node.js has this built-in via the 'crypto' module.
+ */
+import crypto from 'crypto';  // Built-in Node.js module — no install needed
+import fs from 'fs';           // File System module — also built-in
+import path from 'path';       // Path utilities — also built-in
+import { fileURLToPath } from 'url';
+// ─────────────────────────────────────────────────────────
+// WHERE WE STORE PROCESSED KEYS
+// ─────────────────────────────────────────────────────────
+/**
+ * CONCEPT: __dirname equivalent in ES Modules
+ *   In older Node.js (CommonJS), you had __dirname for "this file's folder".
+ *   With modern ES Modules (which we use, see "type": "module" in package.json),
+ *   you use import.meta.url instead.
+ *   fileURLToPath(import.meta.url) converts the file URL to a standard OS path.
+ *   path.dirname gets the folder containing this file (src/).
+ */
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const KEYS_FILE = path.join(__dirname, '..', 'processed_keys.json');
+// path.join builds a file path correctly for any OS
+// '..' means "go up one folder" (from src/ to proj101/)
+// Result: proj101/processed_keys.json
+// ─────────────────────────────────────────────────────────
+// LOAD KEYS FROM DISK
+// ─────────────────────────────────────────────────────────
+/**
+ * loadKeys()
+ * Reads the processed_keys.json file from disk and returns it as a JS Set.
+ *
+ * CONCEPT: Set vs Array
+ *   Array: ordered list, allows duplicates, checking "does X exist?" = slow (scans every item)
+ *   Set:   unordered collection, NO duplicates, checking "does X exist?" = instant (O(1))
+ *   For idempotency we only care about "have I seen this key?" → Set is perfect.
+ *
+ * CONCEPT: try/catch
+ *   If the file doesn't exist yet (first run), fs.readFileSync throws an error.
+ *   We catch it and return an empty Set instead of crashing.
+ */
+function loadKeys() {
+  try {
+    const raw = fs.readFileSync(KEYS_FILE, 'utf-8');
+    // JSON.parse converts the JSON string into a JS object/array
+    const arr = JSON.parse(raw);
+    // We stored it as an array (JSON doesn't support Set), so we convert back
+    return new Set(arr);
+  } catch {
+    // File doesn't exist yet → start with empty Set
+    return new Set();
+  }
+}
+// ─────────────────────────────────────────────────────────
+// SAVE KEYS TO DISK
+// ─────────────────────────────────────────────────────────
+/**
+ * saveKeys(keys)
+ * Writes the current Set of keys to processed_keys.json on disk.
+ *
+ * CONCEPT: Why save to disk at all?
+ *   If we only kept keys in memory (a variable), they'd be lost when the
+ *   program restarts. Saving to a JSON file makes them persistent across runs.
+ *
+ * CONCEPT: JSON.stringify with formatting
+ *   JSON.stringify(value, null, 2) converts a JS value to a JSON string.
+ *   The '2' means "indent with 2 spaces" → human-readable file.
+ */
+function saveKeys(keys) {
+  // Convert Set to Array because JSON.stringify can't handle Set directly
+  const arr = Array.from(keys);
+  fs.writeFileSync(KEYS_FILE, JSON.stringify(arr, null, 2), 'utf-8');
+}
+// ─────────────────────────────────────────────────────────
+// GENERATE THE IDEMPOTENCY KEY
+// ─────────────────────────────────────────────────────────
+/**
+ * getIdempotencyKey(filePath, mtimeMs)
+ *
+ * @param {string} filePath - Absolute path of the file, e.g. "C:/logs/chat_42.md"
+ * @param {number} mtimeMs  - Last modified time in milliseconds (from fs.stat)
+ * @returns {string}        - A 64-char SHA-256 hex string
+ *
+ * CONCEPT: Why combine path + mtime?
+ *   Path alone: same file modified twice would have same key → would skip the 2nd change
+ *   Mtime alone: two different files modified at same ms could collide (unlikely but possible)
+ *   Together: unique fingerprint for "this exact file at this exact version"
+ *
+ * CONCEPT: Digest formats
+ *   .digest('hex') = output as hexadecimal string (0-9, a-f) → easy to store, read, compare
+ *   Other options: 'base64', 'binary' — hex is the most human-readable
+ */
+export function getIdempotencyKey(filePath, mtimeMs) {
+  const raw = filePath + '|' + mtimeMs;
+  return crypto.createHash('sha256').update(raw).digest('hex');
+}
+// ─────────────────────────────────────────────────────────
+// THE MAIN CHECK: Have we already processed this file version?
+// ─────────────────────────────────────────────────────────
+/**
+ * isAlreadyProcessed(filePath, mtimeMs)
+ * Returns true if this exact file version was processed before.
+ * Returns false if it's new (and you should process it).
+ */
+export function isAlreadyProcessed(filePath, mtimeMs) {
+  const keys = loadKeys();
+  const key = getIdempotencyKey(filePath, mtimeMs);
+  return keys.has(key); // Set.has() is O(1) — instant lookup
+}
+// ─────────────────────────────────────────────────────────
+// MARK A FILE VERSION AS PROCESSED
+// ─────────────────────────────────────────────────────────
+/**
+ * markAsProcessed(filePath, mtimeMs)
+ * Call this AFTER successfully processing a file.
+ * Adds the key to the store and persists it to disk.
+ *
+ * IMPORTANT: We only call this AFTER success.
+ * If processing fails halfway, we don't record the key,
+ * so the next run will retry it. That's intentional.
+ */
+export function markAsProcessed(filePath, mtimeMs) {
+  const keys = loadKeys();
+  const key = getIdempotencyKey(filePath, mtimeMs);
+  keys.add(key);
+  saveKeys(keys);
+}

package/src/memoryStore.js ADDED Viewed

@@ -0,0 +1,95 @@
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+// Get path to memory.jsonl in the project root folder
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const MEMORY_FILE = path.join(__dirname, '..', 'memory.jsonl');
+/**
+ * saveMemoriesForFile(fileName, memories)
+ * Updates memory.jsonl by removing any old memories belonging to the given
+ * fileName and appending the newly extracted memories. Uses an atomic write.
+ *
+ * @param {string} fileName - Name of the source chat file
+ * @param {Array} memories - List of enriched memory objects
+ * @returns {number} The number of memories saved
+ */
+export function saveMemoriesForFile(fileName, memories) {
+  try {
+    let allMemories = [];
+    // Read existing memories if file exists
+    if (fs.existsSync(MEMORY_FILE)) {
+      const content = fs.readFileSync(MEMORY_FILE, 'utf-8');
+      allMemories = content
+        .split('\n')
+        .filter(line => line.trim())
+        .map(line => {
+          try {
+            return JSON.parse(line);
+          } catch {
+            return null;
+          }
+        })
+        .filter(Boolean); // Filter out any null/parsed failures
+    }
+    // Filter out any existing memories that came from this specific file
+    const filteredMemories = allMemories.filter(
+      (mem) => mem.source?.chat_file !== fileName
+    );
+    // Normalize: ensure every incoming memory's source.chat_file matches fileName
+    // This prevents mismatches if memories were generated under a different name
+    const normalizedMemories = memories.map(mem => ({
+      ...mem,
+      source: { ...mem.source, chat_file: fileName }
+    }));
+    // Merge in the new memories
+    const updatedMemories = [...filteredMemories, ...normalizedMemories];
+    // Convert objects to JSON lines (JSONL format)
+    const linesToWrite = updatedMemories.map((mem) => JSON.stringify(mem)).join('\n') + '\n';
+    // Atomic write: write to temp file then rename
+    const tempFile = MEMORY_FILE + '.tmp';
+    fs.writeFileSync(tempFile, linesToWrite, 'utf-8');
+    fs.renameSync(tempFile, MEMORY_FILE);
+    return memories.length;
+  } catch (error) {
+    console.error(`Error saving memories for file ${fileName}:`, error.message);
+    throw error;
+  }
+}
+/**
+ * readAllMemories()
+ * Reads all memories from the JSONL file.
+ *
+ * @returns {Array} All memories in the store
+ */
+export function readAllMemories() {
+  try {
+    if (!fs.existsSync(MEMORY_FILE)) {
+      return [];
+    }
+    const content = fs.readFileSync(MEMORY_FILE, 'utf-8');
+    return content
+      .split('\n')
+      .filter(line => line.trim())
+      .map(line => {
+        try {
+          return JSON.parse(line);
+        } catch {
+          return null;
+        }
+      })
+      .filter(Boolean);
+  } catch (error) {
+    console.error('Error reading all memories:', error.message);
+    throw error;
+  }
+}

package/src/summarizer.js ADDED Viewed

@@ -0,0 +1,151 @@
+import { ai } from './config.js';
+import crypto from 'crypto';
+// ─────────────────────────────────────────────────────────
+// SYSTEM INSTRUCTION & JSON SCHEMA
+// ─────────────────────────────────────────────────────────
+const systemInstruction = `You are a context compression engine for an AI coding assistant memory system.
+Your job is to analyze the raw conversation log (or file contents) provided by the user and extract key technical developments into structured JSON memories.
+For each memory:
+- 'type': Categorize it as:
+  * 'decision' (only if both developer and assistant explicitly agreed and committed to something)
+  * 'proposed_idea' (ideas discussed but not yet implemented or decided)
+  * 'rejected_idea' (ideas considered and discarded)
+  * 'open_question' (questions remaining unresolved)
+  * 'bug' (bugs discovered during the session)
+  * 'resolved_bug' (bugs fixed during the session)
+  * 'architecture_note' (architectural or design details noted)
+  * 'file_change' (files created, modified, or deleted)
+- 'content': A concise declarative statement summarizing the memory. Maintain precision: do not turn a 'maybe/proposal' into a 'completed migration'. Be clear about certainty.
+- 'original_text': The exact quote or sentence from the log that provides evidence for this memory.
+- 'confidence': A score between 0.0 (very tentative suggestion) and 1.0 (firm/confirmed fact).
+- 'related_files': List any files mentioned in the context of this memory.
+- 'tags': Simple, descriptive lowercase tags for keyword indexing (e.g. 'auth', 'redis', 'security').
+Ignore small talk, greetings, minor formatting adjustments, or repetitive debugging output. Keep only information that is vital for another AI assistant or human developer continuing the project later.`;
+const responseSchema = {
+  type: 'OBJECT',
+  properties: {
+    memories: {
+      type: 'ARRAY',
+      items: {
+        type: 'OBJECT',
+        properties: {
+          type: {
+            type: 'STRING',
+            enum: [
+              'decision',
+              'proposed_idea',
+              'rejected_idea',
+              'open_question',
+              'bug',
+              'resolved_bug',
+              'architecture_note',
+              'file_change'
+            ]
+          },
+          content: { type: 'STRING' },
+          original_text: { type: 'STRING' },
+          confidence: { type: 'NUMBER' },
+          related_files: {
+            type: 'ARRAY',
+            items: { type: 'STRING' }
+          },
+          tags: {
+            type: 'ARRAY',
+            items: { type: 'STRING' }
+          }
+        },
+        required: [
+          'type',
+          'content',
+          'original_text',
+          'confidence',
+          'related_files',
+          'tags'
+        ]
+      }
+    }
+  },
+  required: ['memories']
+};
+// ─────────────────────────────────────────────────────────
+// HELPER: GENERATE HASH
+// ─────────────────────────────────────────────────────────
+/**
+ * generateMemoryHash(memory)
+ * Computes a SHA-256 hash based on core fields to prevent duplicate extraction of the same point.
+ */
+function generateMemoryHash(memory) {
+  const raw = `${memory.type}|${memory.content}|${memory.original_text}`;
+  return crypto.createHash('sha256').update(raw).digest('hex');
+}
+// ─────────────────────────────────────────────────────────
+// MAIN EXPORT: summarizeChatLog
+// ─────────────────────────────────────────────────────────
+/**
+ * summarizeChatLog(fileContent, fileName)
+ *
+ * Calls the Gemini API to extract structured memories from the log.
+ *
+ * @param {string} fileContent - The text of the chat log/file
+ * @param {string} fileName    - The name of the file (for source reference)
+ * @returns {Promise<Array>}   - Array of enriched memory objects
+ */
+export async function summarizeChatLog(fileContent, fileName) {
+  if (!process.env.GEMINI_API_KEY) {
+    throw new Error('GEMINI_API_KEY is not set in environment or .env file.');
+  }
+  const prompt = `Here is the content of the file "${fileName}":\n\n${fileContent}`;
+  try {
+    const response = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: prompt,
+      config: {
+        systemInstruction,
+        responseMimeType: 'application/json',
+        responseSchema,
+      }
+    });
+    if (!response.text) {
+      throw new Error('Received empty response text from Gemini API.');
+    }
+    const parsed = JSON.parse(response.text);
+    const rawMemories = parsed.memories || [];
+    // Map to enriched structure with IDs, timestamps, and hashes
+    return rawMemories.map((mem) => {
+      const enriched = {
+        id: `mem-${Date.now()}-${crypto.randomBytes(4).toString('hex')}`,
+        timestamp: new Date().toISOString(),
+        type: mem.type,
+        content: mem.content,
+        original_text: mem.original_text,
+        confidence: mem.confidence,
+        status: 'active',
+        related_files: mem.related_files || [],
+        tags: mem.tags || [],
+        source: {
+          chat_file: fileName,
+        },
+      };
+      enriched.hash = generateMemoryHash(enriched);
+      return enriched;
+    });
+  } catch (error) {
+    console.error(`\n  ✗ Error during Gemini summarization for ${fileName}:`, error.message);
+    throw error;
+  }
+}