npm - rhachet-roles-bhrain - Versions diffs - 0.1.1 → 0.3.0 - Mend

rhachet-roles-bhrain 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (159) hide show

package/dist/domain.operations/review/invokeClaudeCode.js ADDED Viewed

@@ -0,0 +1,92 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.invokeClaudeCode = void 0;
+const child_process_1 = require("child_process");
+const helpful_errors_1 = require("helpful-errors");
+/**
+ * .what = invokes claude-code cli with the prompt
+ * .why = executes the review via the specified brain (claude-code)
+ */
+const invokeClaudeCode = async (input) => {
+    // invoke claude-code cli via stdin to avoid E2BIG on large prompts
+    const output = await new Promise((resolve, reject) => {
+        const child = (0, child_process_1.spawn)('claude', ['-p', '-', '--output-format', 'json'], {
+            cwd: input.cwd,
+        });
+        let stdout = '';
+        let stderr = '';
+        child.stdout.on('data', (data) => {
+            stdout += data.toString();
+        });
+        child.stderr.on('data', (data) => {
+            stderr += data.toString();
+        });
+        child.on('error', reject);
+        child.on('close', (code) => {
+            if (code !== 0) {
+                // check for prompt too long error
+                if (stdout.includes('Prompt is too long')) {
+                    reject(new helpful_errors_1.BadRequestError('prompt is too long for claude context window; reduce --rules or --paths scope', {
+                        status: code,
+                        hint: 'try narrowing your glob patterns or excluding large files',
+                    }));
+                    return;
+                }
+                reject(new helpful_errors_1.UnexpectedCodePathError('claude-code exited with non-zero', {
+                    status: code,
+                    stdout: stdout.slice(0, 2000),
+                    stderr: stderr.slice(0, 2000),
+                }));
+                return;
+            }
+            resolve(stdout);
+        });
+        // write prompt to stdin and close
+        child.stdin.write(input.prompt);
+        child.stdin.end();
+    });
+    // parse the json response
+    const response = (() => {
+        try {
+            return JSON.parse(output);
+        }
+        catch {
+            throw new helpful_errors_1.UnexpectedCodePathError('failed to parse claude-code response', {
+                output,
+            });
+        }
+    })();
+    // extract review content from response
+    const review = (() => {
+        // claude-code json output has a 'result' field with the text content
+        if (response.result && typeof response.result === 'string') {
+            return response.result;
+        }
+        // fallback: look for text in message content
+        if (response.content && Array.isArray(response.content)) {
+            const textContent = response.content.find((c) => c.type === 'text');
+            if (textContent?.text) {
+                return textContent.text;
+            }
+        }
+        throw new helpful_errors_1.UnexpectedCodePathError('failed to extract review from response', {
+            response,
+        });
+    })();
+    // extract usage from response
+    const usage = (() => {
+        if (!response.usage)
+            throw new helpful_errors_1.UnexpectedCodePathError('response.usage not found', {
+                response,
+            });
+        return {
+            inputTokens: response.usage.input_tokens ?? 0,
+            inputTokensCacheCreation: response.usage.cache_creation_input_tokens ?? 0,
+            inputTokensCacheRead: response.usage.cache_read_input_tokens ?? 0,
+            outputTokens: response.usage.output_tokens ?? 0,
+        };
+    })();
+    return { response, review, usage };
+};
+exports.invokeClaudeCode = invokeClaudeCode;
+//# sourceMappingURL=invokeClaudeCode.js.map

package/dist/domain.operations/review/invokeClaudeCode.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"invokeClaudeCode.js","sourceRoot":"","sources":["../../../src/domain.operations/review/invokeClaudeCode.ts"],"names":[],"mappings":";;;AAAA,iDAAsC;AACtC,mDAA0E;AAa1E;;;GAGG;AACI,MAAM,gBAAgB,GAAG,KAAK,EAAE,KAGtC,EAAqE,EAAE;IACtE,mEAAmE;IACnE,MAAM,MAAM,GAAG,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC3D,MAAM,KAAK,GAAG,IAAA,qBAAK,EAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,iBAAiB,EAAE,MAAM,CAAC,EAAE;YACpE,GAAG,EAAE,KAAK,CAAC,GAAG;SACf,CAAC,CAAC;QAEH,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC5B,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC5B,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACzB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,kCAAkC;gBAClC,IAAI,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;oBAC1C,MAAM,CACJ,IAAI,gCAAe,CACjB,+EAA+E,EAC/E;wBACE,MAAM,EAAE,IAAI;wBACZ,IAAI,EAAE,2DAA2D;qBAClE,CACF,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,MAAM,CACJ,IAAI,wCAAuB,CAAC,kCAAkC,EAAE;oBAC9D,MAAM,EAAE,IAAI;oBACZ,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;oBAC7B,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;iBAC9B,CAAC,CACH,CAAC;gBACF,OAAO;YACT,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,kCAAkC;QAClC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAChC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;IACpB,CAAC,CAAC,CAAC;IAEH,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,wCAAuB,CAC/B,sCAAsC,EACtC;gBACE,MAAM;aACP,CACF,CAAC;QACJ,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,uCAAuC;IACvC,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE;QACnB,qEAAqE;QACrE,IAAI,QAAQ,CAAC,MAAM,IAAI,OAAO,QAAQ,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YAC3D,OAAO,QAAQ,CAAC,MAAM,CAAC;QACzB,CAAC;QAED,6CAA6C;QAC7C,IAAI,QAAQ,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACxD,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CACvC,CAAC,CAAmB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAC3C,CAAC;YACF,IAAI,WAAW,EAAE,IAAI,EAAE,CAAC;gBACtB,OAAO,WAAW,CAAC,IAAI,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,MAAM,IAAI,wCAAuB,CAC/B,wCAAwC,EACxC;YACE,QAAQ;SACT,CACF,CAAC;IACJ,CAAC,CAAC,EAAE,CAAC;IAEL,8BAA8B;IAC9B,MAAM,KAAK,GAAgB,CAAC,GAAG,EAAE;QAC/B,IAAI,CAAC,QAAQ,CAAC,KAAK;YACjB,MAAM,IAAI,wCAAuB,CAAC,0BAA0B,EAAE;gBAC5D,QAAQ;aACT,CAAC,CAAC;QACL,OAAO;YACL,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC;YAC7C,wBAAwB,EAAE,QAAQ,CAAC,KAAK,CAAC,2BAA2B,IAAI,CAAC;YACzE,oBAAoB,EAAE,QAAQ,CAAC,KAAK,CAAC,uBAAuB,IAAI,CAAC;YACjE,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,aAAa,IAAI,CAAC;SAChD,CAAC;IACJ,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC,CAAC;AA3GW,QAAA,gBAAgB,oBA2G3B"}

package/dist/domain.operations/review/writeInputArtifacts.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * .what = writes input artifacts to log directory for auditability
+ * .why = enables debugging, replay, and audit of review invocations
+ */
+export declare const writeInputArtifacts: (input: {
+    logDir: string;
+    args: {
+        rules: string | string[];
+        diffs?: string;
+        paths?: string | string[];
+        output: string;
+        mode: 'soft' | 'hard';
+    };
+    scope: {
+        ruleFiles: string[];
+        targetFiles: string[];
+    };
+    metrics: {
+        tokenEstimate: number;
+        contextWindowPercent: number;
+        costEstimate: number;
+    };
+    prompt: string;
+}) => Promise<{
+    argsPath: string;
+    promptPath: string;
+}>;

package/dist/domain.operations/review/writeInputArtifacts.js ADDED Viewed

@@ -0,0 +1,50 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || function (mod) {
+    if (mod && mod.__esModule) return mod;
+    var result = {};
+    if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
+    __setModuleDefault(result, mod);
+    return result;
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.writeInputArtifacts = void 0;
+const fs = __importStar(require("fs/promises"));
+const path = __importStar(require("path"));
+/**
+ * .what = writes input artifacts to log directory for auditability
+ * .why = enables debugging, replay, and audit of review invocations
+ */
+const writeInputArtifacts = async (input) => {
+    // ensure log directory exists
+    await fs.mkdir(input.logDir, { recursive: true });
+    // write input.args.json
+    const argsPath = path.join(input.logDir, 'input.args.json');
+    const argsContent = JSON.stringify({
+        args: input.args,
+        scope: input.scope,
+        metrics: input.metrics,
+    }, null, 2);
+    await fs.writeFile(argsPath, argsContent, 'utf-8');
+    // write input.prompt.md
+    const promptPath = path.join(input.logDir, 'input.prompt.md');
+    await fs.writeFile(promptPath, input.prompt, 'utf-8');
+    return { argsPath, promptPath };
+};
+exports.writeInputArtifacts = writeInputArtifacts;
+//# sourceMappingURL=writeInputArtifacts.js.map

package/dist/domain.operations/review/writeInputArtifacts.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"writeInputArtifacts.js","sourceRoot":"","sources":["../../../src/domain.operations/review/writeInputArtifacts.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAE7B;;;GAGG;AACI,MAAM,mBAAmB,GAAG,KAAK,EAAE,KAmBzC,EAAqD,EAAE;IACtD,8BAA8B;IAC9B,MAAM,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAElD,wBAAwB;IACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAChC;QACE,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,OAAO,EAAE,KAAK,CAAC,OAAO;KACvB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;IACF,MAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IAEnD,wBAAwB;IACxB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAC9D,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,KAAK,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAEtD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;AAClC,CAAC,CAAC;AAzCW,QAAA,mBAAmB,uBAyC9B"}

package/dist/domain.operations/review/writeOutputArtifacts.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * .what = writes output artifacts to log directory for auditability
+ * .why = enables debugging, replay, and audit of review results
+ */
+export declare const writeOutputArtifacts: (input: {
+    logDir: string;
+    response: object;
+    review: string;
+}) => Promise<{
+    responsePath: string;
+    reviewPath: string;
+}>;

package/dist/domain.operations/review/writeOutputArtifacts.js ADDED Viewed

@@ -0,0 +1,46 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || function (mod) {
+    if (mod && mod.__esModule) return mod;
+    var result = {};
+    if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
+    __setModuleDefault(result, mod);
+    return result;
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.writeOutputArtifacts = void 0;
+const fs = __importStar(require("fs/promises"));
+const path = __importStar(require("path"));
+/**
+ * .what = writes output artifacts to log directory for auditability
+ * .why = enables debugging, replay, and audit of review results
+ */
+const writeOutputArtifacts = async (input) => {
+    // ensure log directory exists (should already exist from writeInputArtifacts)
+    await fs.mkdir(input.logDir, { recursive: true });
+    // write output.response.json
+    const responsePath = path.join(input.logDir, 'output.response.json');
+    const responseContent = JSON.stringify(input.response, null, 2);
+    await fs.writeFile(responsePath, responseContent, 'utf-8');
+    // write output.review.md
+    const reviewPath = path.join(input.logDir, 'output.review.md');
+    await fs.writeFile(reviewPath, input.review, 'utf-8');
+    return { responsePath, reviewPath };
+};
+exports.writeOutputArtifacts = writeOutputArtifacts;
+//# sourceMappingURL=writeOutputArtifacts.js.map

package/dist/domain.operations/review/writeOutputArtifacts.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"writeOutputArtifacts.js","sourceRoot":"","sources":["../../../src/domain.operations/review/writeOutputArtifacts.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAE7B;;;GAGG;AACI,MAAM,oBAAoB,GAAG,KAAK,EAAE,KAI1C,EAAyD,EAAE;IAC1D,8EAA8E;IAC9E,MAAM,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAElD,6BAA6B;IAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,sBAAsB,CAAC,CAAC;IACrE,MAAM,eAAe,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAChE,MAAM,EAAE,CAAC,SAAS,CAAC,YAAY,EAAE,eAAe,EAAE,OAAO,CAAC,CAAC;IAE3D,yBAAyB;IACzB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,kBAAkB,CAAC,CAAC;IAC/D,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,KAAK,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAEtD,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC;AACtC,CAAC,CAAC;AAlBW,QAAA,oBAAoB,wBAkB/B"}

package/dist/roles/architect/briefs/brains.replic/arc000.sources.[catalog].md ADDED Viewed

@@ -0,0 +1,178 @@
+# research sources catalog
+## .what
+a comprehensive catalog of sources documenting replic brain architectures, reasoning patterns, and performance benchmarks.
+## .why
+these sources form the foundation for understanding how replic brains (LLMs behind REPLs) are constructed, compared, and evaluated.
+---
+## foundational papers
+### reasoning patterns
+| # | title | authors | url | date | relevance |
+|---|-------|---------|-----|------|-----------|
+| 1 | ReAct: Synergizing Reasoning and Acting in Language Models | Yao et al. | [arXiv:2210.03629](https://arxiv.org/abs/2210.03629) | 2022-10, ICLR 2023 | foundational pattern for interleaved reasoning + action in agentic loops |
+| 2 | Chain-of-Thought Prompting Elicits Reasoning in Large Language Models | Wei et al. | [arXiv:2201.11903](https://arxiv.org/abs/2201.11903) | 2022-01, NeurIPS 2022 | foundational technique enabling step-by-step reasoning |
+| 3 | Tree of Thoughts: Deliberate Problem Solving with Large Language Models | Yao et al. | [arXiv:2305.10601](https://arxiv.org/abs/2305.10601) | 2023-05, NeurIPS 2023 | exploration over reasoning paths, enables backtracking |
+| 4 | Reflexion: Language Agents with Verbal Reinforcement Learning | Shinn et al. | [arXiv:2303.11366](https://arxiv.org/abs/2303.11366) | 2023-03 | self-reflection and iterative refinement for agents |
+| 5 | Self-Consistency Improves Chain of Thought Reasoning | Wang et al. | [arXiv:2203.11171](https://arxiv.org/abs/2203.11171) | 2022-03 | multiple reasoning paths with majority voting |
+| 6 | Language Agent Tree Search (LATS) | Zhou et al. | [arXiv:2310.04406](https://arxiv.org/abs/2310.04406) | 2023-10, ICML 2024 | Monte Carlo tree search for language agents |
+### prompting techniques
+| # | title | authors | url | date | relevance |
+|---|-------|---------|-----|------|-----------|
+| 7 | Plan-and-Solve Prompting | Wang et al. | [arXiv:2305.04091](https://arxiv.org/abs/2305.04091) | 2023-05 | zero-shot decomposition into planning + execution |
+| 8 | Least-to-Most Prompting | Zhou et al. | [arXiv:2205.10625](https://arxiv.org/abs/2205.10625) | 2022-05 | decompose complex problems into simpler subproblems |
+| 9 | Self-Ask: Measuring and Narrowing the Compositionality Gap | Press et al. | [arXiv:2210.03350](https://arxiv.org/abs/2210.03350) | 2022-10, ICLR 2023 | model asks follow-up questions before answering |
+| 10 | Auto-CoT: Automatic Chain of Thought Prompting | Zhang et al. | [arXiv:2210.03493](https://arxiv.org/abs/2210.03493) | 2022-10 | automatic demonstration construction with diversity |
+| 11 | Show Your Work: Scratchpads for Intermediate Computation | Nye et al. | [arXiv:2112.00114](https://arxiv.org/abs/2112.00114) | 2021-12 | intermediate computation steps improve multi-step tasks |
+### tool use and function calling
+| # | title | authors | url | date | relevance |
+|---|-------|---------|-----|------|-----------|
+| 12 | Toolformer: Language Models Can Teach Themselves to Use Tools | Schick et al. | [arXiv:2302.04761](https://arxiv.org/abs/2302.04761) | 2023-02 | self-supervised tool use learning |
+| 13 | PAL: Program-Aided Language Models | Gao et al. | [arXiv:2211.10435](https://arxiv.org/abs/2211.10435) | 2022-11 | offload computation to program interpreter |
+### embodied and agentic systems
+| # | title | authors | url | date | relevance |
+|---|-------|---------|-----|------|-----------|
+| 14 | Inner Monologue: Embodied Reasoning through Planning with Language Models | Huang et al. | [arXiv:2207.05608](https://arxiv.org/abs/2207.05608) | 2022-07, CoRL | closed-loop feedback for robot planning |
+| 15 | Generative Agents: Interactive Simulacra of Human Behavior | Park et al. | [arXiv:2304.03442](https://arxiv.org/abs/2304.03442) | 2023-04 | memory architecture for believable agent behavior |
+| 16 | MemGPT: Towards LLMs as Operating Systems | Packer et al. | [arXiv:2310.08560](https://arxiv.org/abs/2310.08560) | 2023-10 | virtual context management, memory hierarchy |
+### surveys and meta-analyses
+| # | title | authors | url | date | relevance |
+|---|-------|---------|-----|------|-----------|
+| 17 | CoALA: Cognitive Architectures for Language Agents | Sumers et al. | [arXiv:2309.02427](https://arxiv.org/abs/2309.02427) | 2023-09 | systematic framework for language agent architectures |
+| 18 | AgentBench: Evaluating LLMs as Agents | Liu et al. | [arXiv:2308.03688](https://arxiv.org/abs/2308.03688) | 2023-08 | comprehensive benchmark for evaluating LLM agents |
+| 19 | Understanding the Planning of LLM Agents: A Survey | Huang et al. | [arXiv:2402.02716](https://arxiv.org/abs/2402.02716) | 2024-02 | taxonomy of planning approaches |
+| 20 | ADaPT: As-Needed Decomposition and Planning | Prasad et al. | [arXiv:2311.05772](https://arxiv.org/abs/2311.05772) | 2023-11, NAACL 2024 | adaptive decomposition based on task complexity |
+| 21 | Reasoning with Language Model Prompting: A Survey | Qiao et al. | [github.com/zjunlp](https://github.com/zjunlp/Prompt4ReasoningPapers) | 2023, ACL 2023 | comprehensive survey of prompting for reasoning |
+| 22 | LLM-Based Agents for Tool Learning: A Survey | - | [Springer](https://link.springer.com/article/10.1007/s41019-025-00296-9) | 2024 | survey of tool learning approaches |
+| 23 | A Survey of Task Planning with Large Language Models | - | [Intelligent Computing](https://spj.science.org/doi/10.34133/icomputing.0124) | 2024 | task planning and decomposition survey |
+---
+## claude-code architecture
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 24 | Building Effective Agents | Anthropic blog | [anthropic.com/research](https://www.anthropic.com/research/building-effective-agents) | 2024-12 | official guidance on agent architecture patterns |
+| 25 | Building Agents with the Claude Agent SDK | Anthropic engineering | [anthropic.com/engineering](https://www.anthropic.com/engineering/building-agents-with-the-claude-agent-sdk) | 2024 | SDK architecture and design principles |
+| 26 | Advanced Tool Use on Claude | Anthropic engineering | [anthropic.com/engineering](https://www.anthropic.com/engineering/advanced-tool-use) | 2024 | tool search, programmatic calling, examples |
+| 27 | Claude's Extended Thinking | Anthropic | [anthropic.com/news](https://www.anthropic.com/news/visible-extended-thinking) | 2025 | hybrid reasoning with thinking budgets |
+| 28 | The "think" Tool: Enabling Claude to Stop and Think | Anthropic engineering | [anthropic.com/engineering](https://www.anthropic.com/engineering/claude-think-tool) | 2024 | structured thinking during tool use |
+| 29 | Model Context Protocol (MCP) | Anthropic | [modelcontextprotocol.io](https://modelcontextprotocol.io/) | 2024 | standardized tool integration protocol |
+| 30 | Claude Agent SDK Repository | GitHub | [github.com/anthropics/claude-agent-sdk-python](https://github.com/anthropics/claude-agent-sdk-python) | 2024 | reference implementation |
+| 31 | Claude Code System Prompt Analysis | Zenn (community) | [zenn.dev](https://zenn.dev/) | 2024 | detailed analysis of system prompt structure |
+---
+## alternative architectures
+### openai codex
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 32 | Codex Cloud Architecture | OpenAI | [developers.openai.com/codex/cloud](https://developers.openai.com/codex/cloud/) | 2025 | cloud sandbox execution environment |
+| 33 | Codex CLI Features | OpenAI | [developers.openai.com/codex/cli/features](https://developers.openai.com/codex/cli/features/) | 2025 | CLI capabilities and workflows |
+| 34 | Codex Security Guide | OpenAI | [developers.openai.com/codex/security](https://developers.openai.com/codex/security/) | 2025 | sandboxing and network isolation |
+| 35 | Codex SDK | OpenAI | [developers.openai.com/codex/sdk](https://developers.openai.com/codex/sdk/) | 2025 | programmatic integration |
+### other coding assistants
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 36 | Devin AI Architecture | various | (multiple sources) | 2024 | autonomous software engineer |
+| 37 | Aider: AI Pair Programming | aider.chat | [aider.chat](https://aider.chat/) | 2024 | git-integrated coding assistant |
+| 38 | Cursor AI | cursor.com | [cursor.com](https://cursor.com/) | 2024 | AI-augmented IDE |
+| 39 | GitHub Copilot | GitHub | [github.blog](https://github.blog/) | 2024 | code completion and chat |
+---
+## context management
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 40 | MemGPT: Virtual Context Management | Packer et al. | [arXiv:2310.08560](https://arxiv.org/abs/2310.08560) | 2023-10 | hierarchical memory for extended context |
+| 41 | Extended Thinking Documentation | Anthropic | [support.claude.com](https://support.claude.com/en/articles/10574485-using-extended-thinking) | 2025 | thinking budgets and serial test-time compute |
+| 42 | Context Window Management Strategies | various | (multiple sources) | 2024 | summarization and compaction techniques |
+---
+## benchmarks and performance
+### code generation benchmarks
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 43 | SWE-bench: Software Engineering Benchmark | Princeton NLP | [swebench.com](https://www.swebench.com/) | 2024 | real-world github issue resolution |
+| 44 | HumanEval Benchmark | OpenAI | [paperswithcode.com](https://paperswithcode.com/dataset/humaneval) | 2021 | function synthesis from docstrings |
+| 45 | HumanEval Pro and MBPP Pro | - | [arXiv:2412.21199](https://arxiv.org/abs/2412.21199) | 2024-12, ACL 2025 | self-invoking code generation |
+| 46 | MBPP Benchmark | Google | [paperswithcode.com](https://paperswithcode.com/sota/code-generation-on-mbpp) | 2021 | mostly basic python problems |
+| 47 | EvalPlus Leaderboard | - | [evalplus.github.io](https://evalplus.github.io/leaderboard.html) | 2024 | augmented test suites for HumanEval/MBPP |
+### agent benchmarks
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 48 | AgentBench | Liu et al. | [arXiv:2308.03688](https://arxiv.org/abs/2308.03688) | 2023-08 | multi-environment agent evaluation |
+| 49 | LiveBench | - | [livebench.ai](https://livebench.ai/) | 2024 | continuously updated LLM evaluation |
+### tool comparisons
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 50 | Coding Agents Comparison | Artificial Analysis | [artificialanalysis.ai](https://artificialanalysis.ai/insights/coding-agents-comparison) | 2024 | Claude Code, Cursor, Copilot comparison |
+| 51 | AI Coding Agents Benchmark 2025 | Render | [render.com/blog](https://render.com/blog/ai-coding-agents-benchmark) | 2025 | practical benchmark results |
+| 52 | Best AI Code Apply Tools 2025 | Morph | [morphllm.com/comparisons](https://www.morphllm.com/comparisons) | 2025 | enterprise benchmarks |
+| 53 | Claude Code vs Cursor Deep Comparison | Qodo | [qodo.ai/blog](https://www.qodo.ai/blog/claude-code-vs-cursor/) | 2025 | detailed feature comparison |
+---
+## reasoning strategy comparisons
+| # | title | source | url | date | relevance |
+|---|-------|--------|-----|------|-----------|
+| 54 | ReAct vs CoT Performance | Google Research | [research.google/blog](https://research.google/blog/react-synergizing-reasoning-and-acting-in-language-models/) | 2022 | comparative analysis on HotPotQA, FEVER, ALFWorld |
+| 55 | Comprehensive Guide to ReAct Prompting | Mercity | [mercity.ai/blog-post](https://www.mercity.ai/blog-post/react-prompting-and-react-based-agentic-systems) | 2024 | practical guide with performance notes |
+---
+## key performance findings
+### reasoning strategies (from sources)
+| strategy | benchmark | performance | source |
+|----------|-----------|-------------|--------|
+| ReAct | HotPotQA | competitive with CoT, better grounding | [1] |
+| ReAct | ALFWorld | +34% over imitation learning | [1] |
+| CoT | HotPotQA | higher success, but 56% hallucination in failures | [1] |
+| Self-Consistency | various | significant improvement over single-path CoT | [5] |
+| Tree of Thoughts | Game of 24 | 74% (vs 4% CoT) | [3] |
+| LATS | HotPotQA | state-of-the-art with MCTS | [6] |
+| Reflexion | ALFWorld | +22% improvement via self-reflection | [4] |
+### code generation (from sources)
+| model/tool | HumanEval | MBPP | SWE-bench | source |
+|------------|-----------|------|-----------|--------|
+| o1-mini | 96.2% | - | - | [45] |
+| o1-mini (Pro) | 76.2% | - | - | [45] |
+| GPT-4o + planning | - | 84.8% | - | [43] |
+| Claude Code | - | - | ~49% (verified) | [44] |
+---
+## access date
+all sources accessed: 2025-12-23

package/dist/roles/architect/briefs/brains.replic/arc101.concept.llm.[article].md ADDED Viewed

@@ -0,0 +1,25 @@
+# llm (large language model)
+## .what
+a neural network trained on vast text corpora that predicts the next token in a sequence, enabling it to generate coherent text, follow instructions, and perform reasoning.
+## .why
+the llm is the core intelligence of a replic brain. it provides the reasoning and generation capabilities that power all downstream behaviors — from understanding natural language to generating code to deciding which tools to invoke.
+## dependsOn
+- (none — foundational primitive)
+## key characteristics
+- **autoregressive generation**: produces output one token at a time, conditioning on all previous tokens
+- **context window**: has a fixed maximum number of tokens it can process at once
+- **emergent capabilities**: reasoning, instruction-following, and tool use emerge at scale
+- **stochastic**: outputs are probabilistic, controlled via temperature parameter
+## sources
+- [Chain-of-Thought Prompting](https://arxiv.org/abs/2201.11903) — demonstrates reasoning emergence in LLMs
+- [CoALA: Cognitive Architectures for Language Agents](https://arxiv.org/abs/2309.02427) — positions LLM as central to agent architecture

package/dist/roles/architect/briefs/brains.replic/arc102.concept.repl.[article].md ADDED Viewed

@@ -0,0 +1,33 @@
+# repl (read-eval-print-loop)
+## .what
+an interactive programming pattern where the system reads user input, evaluates it, prints the result, and loops back to read more input.
+## .why
+the repl pattern enables iterative, conversational interaction. when an llm operates behind a repl, it can receive feedback, execute actions, observe results, and refine its approach — the foundation of agentic behavior.
+## dependsOn
+- (none — foundational primitive)
+## key characteristics
+- **read**: accept input from user or environment
+- **eval**: process and execute the input
+- **print**: display results or output
+- **loop**: repeat the cycle indefinitely
+## in replic brains
+the repl structure manifests as:
+1. **read**: receive user message or tool result
+2. **eval**: llm generates response/action
+3. **print**: emit response or execute tool
+4. **loop**: continue until task complete
+## sources
+- unix shell, lisp repl — historical precedent
+- [Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) — describes agentic loop as repl-like

package/dist/roles/architect/briefs/brains.replic/arc103.concept.replic-brain.[article].md ADDED Viewed

@@ -0,0 +1,35 @@
+# replic-brain
+## .what
+an llm operating behind a repl interface, forming an interactive system capable of iterative reasoning, tool use, and task completion through conversation.
+## .why
+the term "replic brain" names the specific architectural pattern where an llm's capabilities are harnessed through a read-eval-print-loop. this pattern enables the llm to act as an agent — receiving feedback, taking actions, and iterating toward goals.
+## dependsOn
+- `llm` — provides reasoning and generation
+- `repl` — provides interactive loop structure
+## key characteristics
+- **conversational**: maintains dialogue state across turns
+- **tool-capable**: can invoke external tools and process results
+- **iterative**: refines approach based on feedback
+- **goal-directed**: works toward completing user tasks
+## examples
+| name | organization | context |
+|------|--------------|---------|
+| claude code | anthropic | cli-based coding assistant |
+| codex | openai | cloud sandbox coding agent |
+| cursor | cursor inc | ide-integrated coding assistant |
+| aider | open source | git-integrated pair programmer |
+## sources
+- [Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) — describes the agentic loop pattern
+- [CoALA](https://arxiv.org/abs/2309.02427) — systematic framework for language agent architectures

package/dist/roles/architect/briefs/brains.replic/arc104.concept.context-window.[article].md ADDED Viewed

@@ -0,0 +1,40 @@
+# context-window
+## .what
+the fixed-size buffer of tokens that an llm can process in a single forward pass, representing its working memory during generation.
+## .why
+the context window is the fundamental constraint that shapes all replic brain architectures. it determines how much conversation history, code, and tool results can be processed simultaneously. strategies like context compaction, subagents, and memory hierarchies exist specifically to work within or around this limit.
+## dependsOn
+- `llm` — context window is a property of the llm
+## key characteristics
+- **fixed size**: measured in tokens (e.g., 200k tokens for claude)
+- **attention-based**: all tokens attend to all other tokens (O(n²) complexity)
+- **includes everything**: system prompt, conversation, tool calls, tool results
+- **ephemeral**: cleared between sessions (no persistent memory)
+## size examples (2024-2025)
+| model | context window |
+|-------|----------------|
+| gpt-4o | 128k tokens |
+| claude 3.5/4 | 200k tokens |
+| gemini 1.5 | 1m+ tokens |
+## implications for architecture
+- longer context = more code/conversation in single pass
+- summarization needed when context fills
+- subagents can isolate context usage
+- caching optimizes repeated prompts
+## sources
+- [MemGPT](https://arxiv.org/abs/2310.08560) — virtual context management
+- [Claude Documentation](https://docs.anthropic.com) — 200k token context

package/dist/roles/architect/briefs/brains.replic/arc105.concept.system-prompt.[article].md ADDED Viewed

@@ -0,0 +1,44 @@
+# system-prompt
+## .what
+the initial instructions provided to an llm at the start of a conversation that define its behavior, capabilities, constraints, and persona.
+## .why
+the system prompt is the primary mechanism for shaping a replic brain's behavior. it establishes what tools are available, how to use them, what style to adopt, and what constraints to follow. changes to the system prompt fundamentally alter the agent's capabilities.
+## dependsOn
+- `llm` — interprets and follows the system prompt
+- `context-window` — system prompt consumes context tokens
+## key characteristics
+- **persistent**: remains in context for entire conversation
+- **privileged**: typically cannot be overridden by user messages
+- **declarative**: describes desired behavior, not code
+- **composable**: can include tool definitions, examples, constraints
+## typical contents (replic brains)
+```
+- persona/role definition
+- available tools and their schemas
+- behavioral constraints
+- output format requirements
+- examples of desired behavior
+- safety guardrails
+```
+## examples
+| system | system prompt size | notable elements |
+|--------|-------------------|------------------|
+| claude code | ~15k tokens | tool definitions, coding guidelines, git workflow |
+| codex | varies | sandbox rules, security constraints |
+## sources
+- [Claude Code System Prompt Analysis](https://zenn.dev/) — detailed breakdown
+- [Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) — system prompt design