gyoshu 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/AGENTS.md +1039 -0
  2. package/README.ja.md +390 -0
  3. package/README.ko.md +385 -0
  4. package/README.md +459 -0
  5. package/README.zh.md +383 -0
  6. package/bin/gyoshu.js +295 -0
  7. package/install.sh +241 -0
  8. package/package.json +65 -0
  9. package/src/agent/baksa.md +494 -0
  10. package/src/agent/executor.md +1851 -0
  11. package/src/agent/gyoshu.md +2351 -0
  12. package/src/agent/jogyo-feedback.md +137 -0
  13. package/src/agent/jogyo-insight.md +359 -0
  14. package/src/agent/jogyo-paper-writer.md +370 -0
  15. package/src/agent/jogyo.md +1445 -0
  16. package/src/agent/plan-reviewer.md +1862 -0
  17. package/src/agent/plan.md +97 -0
  18. package/src/agent/task-orchestrator.md +1121 -0
  19. package/src/bridge/gyoshu_bridge.py +782 -0
  20. package/src/command/analyze-knowledge.md +840 -0
  21. package/src/command/analyze-plans.md +513 -0
  22. package/src/command/execute.md +893 -0
  23. package/src/command/generate-policy.md +924 -0
  24. package/src/command/generate-suggestions.md +1111 -0
  25. package/src/command/gyoshu-auto.md +258 -0
  26. package/src/command/gyoshu.md +1352 -0
  27. package/src/command/learn.md +1181 -0
  28. package/src/command/planner.md +630 -0
  29. package/src/lib/artifact-security.ts +159 -0
  30. package/src/lib/atomic-write.ts +107 -0
  31. package/src/lib/cell-identity.ts +176 -0
  32. package/src/lib/checkpoint-schema.ts +455 -0
  33. package/src/lib/environment-capture.ts +181 -0
  34. package/src/lib/filesystem-check.ts +84 -0
  35. package/src/lib/literature-client.ts +1048 -0
  36. package/src/lib/marker-parser.ts +474 -0
  37. package/src/lib/notebook-frontmatter.ts +835 -0
  38. package/src/lib/paths.ts +799 -0
  39. package/src/lib/pdf-export.ts +340 -0
  40. package/src/lib/quality-gates.ts +369 -0
  41. package/src/lib/readme-index.ts +462 -0
  42. package/src/lib/report-markdown.ts +870 -0
  43. package/src/lib/session-lock.ts +411 -0
  44. package/src/plugin/gyoshu-hooks.ts +140 -0
  45. package/src/skill/data-analysis/SKILL.md +369 -0
  46. package/src/skill/experiment-design/SKILL.md +374 -0
  47. package/src/skill/ml-rigor/SKILL.md +672 -0
  48. package/src/skill/scientific-method/SKILL.md +331 -0
  49. package/src/tool/checkpoint-manager.ts +1387 -0
  50. package/src/tool/gyoshu-completion.ts +493 -0
  51. package/src/tool/gyoshu-snapshot.ts +745 -0
  52. package/src/tool/literature-search.ts +389 -0
  53. package/src/tool/migration-tool.ts +1404 -0
  54. package/src/tool/notebook-search.ts +794 -0
  55. package/src/tool/notebook-writer.ts +391 -0
  56. package/src/tool/python-repl.ts +1038 -0
  57. package/src/tool/research-manager.ts +1494 -0
  58. package/src/tool/retrospective-store.ts +347 -0
  59. package/src/tool/session-manager.ts +565 -0
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Artifact Security Module
3
+ *
4
+ * Provides TOCTOU-resistant file operations with symlink protection.
5
+ *
6
+ * Security features:
7
+ * - O_NOFOLLOW: Prevents following symlinks on POSIX systems
8
+ * - Path validation: Rejects absolute paths, path traversal (../), symlinks
9
+ * - TOCTOU mitigation: Validates paths at open time, not just pre-check
10
+ * - Safe mkdir: Checks each path component with lstat before creation
11
+ */
12
+
13
+ import * as fs from 'fs/promises';
14
+ import { constants } from 'fs';
15
+ import * as path from 'path';
16
+
17
+ /**
18
+ * Validates that an artifact path is safe and within the artifact root.
19
+ *
20
+ * @param artifactPath - Relative path to the artifact
21
+ * @param artifactRoot - Root directory for artifacts
22
+ * @returns Resolved absolute path if valid
23
+ * @throws Error if path is absolute, contains traversal, or escapes root
24
+ */
25
+ export function validateArtifactPath(artifactPath: string, artifactRoot: string): string {
26
+ // Reject absolute paths
27
+ if (path.isAbsolute(artifactPath)) {
28
+ throw new Error(`Absolute paths not allowed: ${artifactPath}`);
29
+ }
30
+
31
+ // Normalize and resolve
32
+ const normalized = path.normalize(artifactPath);
33
+
34
+ // Reject path traversal - check segments, not string includes
35
+ const segments = normalized.split(path.sep);
36
+ if (segments.includes('..')) {
37
+ throw new Error(`Path traversal detected: ${artifactPath}`);
38
+ }
39
+
40
+ const resolved = path.resolve(artifactRoot, normalized);
41
+
42
+ // Must be under artifact root
43
+ if (!resolved.startsWith(path.resolve(artifactRoot) + path.sep)) {
44
+ throw new Error(`Path escaped artifact root: ${artifactPath}`);
45
+ }
46
+
47
+ return resolved;
48
+ }
49
+
50
+ /**
51
+ * Creates a directory path safely without following symlinks.
52
+ *
53
+ * Walks through each path component, checking with lstat to ensure
54
+ * no symlinks exist in the path that could lead to directory escape.
55
+ *
56
+ * @param dirPath - Target directory path to create
57
+ * @param root - Root directory that must contain the path
58
+ * @throws Error if symlink or non-directory found in path
59
+ */
60
+ export async function mkdirSafe(dirPath: string, root: string): Promise<void> {
61
+ const relative = path.relative(root, dirPath);
62
+ const parts = relative.split(path.sep).filter(Boolean);
63
+ let current = root;
64
+
65
+ for (const part of parts) {
66
+ current = path.join(current, part);
67
+
68
+ try {
69
+ const stat = await fs.lstat(current); // lstat doesn't follow symlinks
70
+ if (stat.isSymbolicLink()) {
71
+ throw new Error(`Symlink in path: ${current}`);
72
+ }
73
+ if (!stat.isDirectory()) {
74
+ throw new Error(`Not a directory: ${current}`);
75
+ }
76
+ } catch (e: unknown) {
77
+ const error = e as NodeJS.ErrnoException;
78
+ if (error.code === 'ENOENT') {
79
+ await fs.mkdir(current);
80
+ } else {
81
+ throw e;
82
+ }
83
+ }
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Opens a file without following symlinks (POSIX O_NOFOLLOW).
89
+ *
90
+ * Uses O_NOFOLLOW flag to prevent symlink following on POSIX systems.
91
+ * For write operations, also uses O_EXCL to ensure exclusive creation.
92
+ *
93
+ * @param filePath - Path to the file to open
94
+ * @param flags - Open mode: 'r' for read, 'w' or 'wx' for exclusive write
95
+ * @returns FileHandle if successful
96
+ * @throws Error if path is a symlink or not a regular file
97
+ */
98
+ export async function openNoFollow(filePath: string, flags: string): Promise<fs.FileHandle> {
99
+ // POSIX: O_NOFOLLOW prevents following symlinks
100
+ const numericFlags = flags.includes('w')
101
+ ? constants.O_WRONLY | constants.O_CREAT | constants.O_EXCL | constants.O_NOFOLLOW
102
+ : constants.O_RDONLY | constants.O_NOFOLLOW;
103
+
104
+ const fd = await fs.open(filePath, numericFlags);
105
+
106
+ // Verify it's a regular file
107
+ const stat = await fd.stat();
108
+ if (!stat.isFile()) {
109
+ await fd.close();
110
+ throw new Error(`Not a regular file: ${filePath}`);
111
+ }
112
+
113
+ return fd;
114
+ }
115
+
116
+ /**
117
+ * Safely writes an artifact with full TOCTOU protection.
118
+ *
119
+ * Combines all security measures:
120
+ * 1. Validates the path doesn't escape artifact root
121
+ * 2. Creates parent directories safely (no symlink following)
122
+ * 3. Opens file exclusively with O_NOFOLLOW
123
+ * 4. Re-validates via realpath after open (TOCTOU mitigation)
124
+ * 5. Writes data and syncs to disk
125
+ *
126
+ * @param artifactRoot - Root directory for artifacts
127
+ * @param relativePath - Relative path within artifact root
128
+ * @param data - Data to write
129
+ * @returns Absolute path where file was written
130
+ * @throws Error if any security check fails
131
+ */
132
+ export async function safeWriteArtifact(
133
+ artifactRoot: string,
134
+ relativePath: string,
135
+ data: Buffer
136
+ ): Promise<string> {
137
+ const targetPath = validateArtifactPath(relativePath, artifactRoot);
138
+
139
+ // Create directories safely
140
+ await mkdirSafe(path.dirname(targetPath), artifactRoot);
141
+
142
+ // Open without following symlinks, create exclusively
143
+ const fd = await openNoFollow(targetPath, 'wx');
144
+
145
+ try {
146
+ // Re-validate via realpath after file is created
147
+ const realPath = await fs.realpath(targetPath);
148
+ if (!realPath.startsWith(path.resolve(artifactRoot) + path.sep)) {
149
+ throw new Error('TOCTOU attack detected: file escaped artifact root');
150
+ }
151
+
152
+ await fd.write(data);
153
+ await fd.sync();
154
+ } finally {
155
+ await fd.close();
156
+ }
157
+
158
+ return targetPath;
159
+ }
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Atomic, durable file writes. Guarantees: same-dir temp (same filesystem),
3
+ * fsync before rename, directory fsync (best-effort), Windows PowerShell fallback.
4
+ */
5
+
6
+ import * as fs from 'fs/promises';
7
+ import * as path from 'path';
8
+ import * as crypto from 'crypto';
9
+ import { exec } from 'child_process';
10
+ import { promisify } from 'util';
11
+
12
+ const execAsync = promisify(exec);
13
+
14
+ export async function durableAtomicWrite(targetPath: string, data: string): Promise<void> {
15
+ const dir = path.dirname(targetPath);
16
+ const base = path.basename(targetPath);
17
+ const tempPath = path.join(dir, `.${base}.tmp.${crypto.randomUUID()}`);
18
+
19
+ let success = false;
20
+
21
+ try {
22
+ await fs.mkdir(dir, { recursive: true });
23
+
24
+ const fd = await fs.open(tempPath, 'w');
25
+ try {
26
+ await fd.write(data);
27
+ await fd.sync();
28
+ } finally {
29
+ await fd.close();
30
+ }
31
+
32
+ if (targetPath.endsWith('.json')) {
33
+ JSON.parse(data);
34
+ }
35
+
36
+ if (process.platform === 'win32') {
37
+ await atomicReplaceWindows(tempPath, targetPath);
38
+ } else {
39
+ await fs.rename(tempPath, targetPath);
40
+ }
41
+
42
+ success = true;
43
+
44
+ // Directory fsync - best-effort, some platforms don't support it
45
+ try {
46
+ const dirFd = await fs.open(dir, 'r');
47
+ try {
48
+ await dirFd.sync();
49
+ } finally {
50
+ await dirFd.close();
51
+ }
52
+ } catch (e) {
53
+ console.warn(`[atomic-write] Directory fsync failed for ${dir}: ${(e as Error).message}`);
54
+ }
55
+ } finally {
56
+ if (!success) {
57
+ await fs.unlink(tempPath).catch(() => {});
58
+ }
59
+ }
60
+ }
61
+
62
+ export async function atomicReplaceWindows(tempPath: string, targetPath: string): Promise<void> {
63
+ if (process.platform !== 'win32') {
64
+ await fs.rename(tempPath, targetPath);
65
+ return;
66
+ }
67
+
68
+ try {
69
+ const escapedTemp = tempPath.replace(/'/g, "''");
70
+ const escapedTarget = targetPath.replace(/'/g, "''");
71
+
72
+ await execAsync(
73
+ `Move-Item -Force -Path '${escapedTemp}' -Destination '${escapedTarget}'`,
74
+ { shell: 'powershell.exe' }
75
+ );
76
+ } catch (psError) {
77
+ try {
78
+ await fs.rename(tempPath, targetPath);
79
+ } catch (renameError) {
80
+ throw new Error(
81
+ `Windows atomic replace failed. ` +
82
+ `PowerShell: ${(psError as Error).message}. ` +
83
+ `Rename fallback: ${(renameError as Error).message}`
84
+ );
85
+ }
86
+ }
87
+ }
88
+
89
+ export async function fileExists(filePath: string): Promise<boolean> {
90
+ try {
91
+ await fs.access(filePath);
92
+ return true;
93
+ } catch {
94
+ return false;
95
+ }
96
+ }
97
+
98
+ export async function readFile<T = string>(
99
+ filePath: string,
100
+ parseJson: boolean = false
101
+ ): Promise<T> {
102
+ const content = await fs.readFile(filePath, 'utf-8');
103
+ if (parseJson) {
104
+ return JSON.parse(content) as T;
105
+ }
106
+ return content as T;
107
+ }
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Cell Identity Management for Jupyter Notebooks
3
+ *
4
+ * Provides deterministic cell ID generation and migration for nbformat 4.5 compatibility.
5
+ * Legacy notebooks (pre-4.5) lack cell IDs; this module backfills them deterministically
6
+ * to ensure reproducible cell identification across sessions.
7
+ *
8
+ * @module cell-identity
9
+ */
10
+
11
+ import * as crypto from 'crypto';
12
+
13
+ /**
14
+ * Represents a single cell in a Jupyter notebook.
15
+ * Supports code, markdown, and raw cell types as per nbformat spec.
16
+ */
17
+ export interface NotebookCell {
18
+ /** Type of the cell */
19
+ cell_type: 'code' | 'markdown' | 'raw';
20
+
21
+ /** Cell ID (optional in nbformat < 4.5, required in 4.5+) */
22
+ id?: string;
23
+
24
+ /** Cell source content - can be string or array of strings (one per line) */
25
+ source: string | string[];
26
+
27
+ /** Cell metadata */
28
+ metadata?: Record<string, unknown>;
29
+
30
+ /** Execution count for code cells (null if not executed) */
31
+ execution_count?: number | null;
32
+
33
+ /** Cell outputs for code cells */
34
+ outputs?: unknown[];
35
+ }
36
+
37
+ /**
38
+ * Represents a complete Jupyter notebook structure.
39
+ */
40
+ export interface Notebook {
41
+ /** Array of cells in the notebook */
42
+ cells: NotebookCell[];
43
+
44
+ /** Notebook-level metadata */
45
+ metadata: Record<string, unknown>;
46
+
47
+ /** Major format version (typically 4) */
48
+ nbformat: number;
49
+
50
+ /** Minor format version (4.5 required for cell IDs) */
51
+ nbformat_minor: number;
52
+ }
53
+
54
+ /**
55
+ * Result of notebook cell ID migration.
56
+ */
57
+ export interface MigrationResult {
58
+ /** Number of cells that were assigned new IDs */
59
+ migrated: number;
60
+ }
61
+
62
+ /**
63
+ * Computes a canonical content hash for a notebook cell.
64
+ *
65
+ * The hash is computed from the normalized source content:
66
+ * 1. Join array sources into a single string
67
+ * 2. Trim trailing whitespace
68
+ * 3. Normalize line endings to LF (Unix-style)
69
+ * 4. Compute SHA-256 hash
70
+ *
71
+ * This ensures identical content produces identical hashes regardless of
72
+ * how the source was stored (string vs array) or the original line endings.
73
+ *
74
+ * @param cell - The notebook cell to hash
75
+ * @returns A prefixed hash string in format "sha256:{hex}"
76
+ *
77
+ * @example
78
+ * ```typescript
79
+ * const hash = canonicalCellHash({
80
+ * cell_type: 'code',
81
+ * source: 'print("hello")\n'
82
+ * });
83
+ * // Returns: "sha256:abc123..."
84
+ * ```
85
+ */
86
+ export function canonicalCellHash(cell: NotebookCell): string {
87
+ const source = Array.isArray(cell.source) ? cell.source.join('') : cell.source;
88
+ const normalized = source.trimEnd().replace(/\r\n/g, '\n');
89
+ const hash = crypto.createHash('sha256').update(normalized, 'utf8').digest('hex');
90
+ return `sha256:${hash}`;
91
+ }
92
+
93
+ /**
94
+ * Ensures a cell has an ID, generating one deterministically if missing.
95
+ *
96
+ * For cells without an existing ID, a deterministic ID is generated based on:
97
+ * - The notebook file path (for uniqueness across notebooks)
98
+ * - The cell index (for uniqueness within the notebook)
99
+ * - The cell content hash (for detecting content changes)
100
+ *
101
+ * This combination ensures:
102
+ * - Same notebook + same position + same content = same ID
103
+ * - Different notebooks or positions get different IDs
104
+ * - Content changes result in different IDs (intentional for change detection)
105
+ *
106
+ * Generated IDs use format: "gyoshu-{8-char-hex}"
107
+ *
108
+ * @param cell - The notebook cell (modified in place if ID is added)
109
+ * @param index - Zero-based index of the cell in the notebook
110
+ * @param notebookPath - Path to the notebook file (used for uniqueness)
111
+ * @returns The cell's ID (existing or newly generated)
112
+ *
113
+ * @example
114
+ * ```typescript
115
+ * const cell = { cell_type: 'code', source: 'x = 1' };
116
+ * const id = ensureCellId(cell, 0, '/path/to/notebook.ipynb');
117
+ * // cell.id is now set, and id contains the same value
118
+ * ```
119
+ */
120
+ export function ensureCellId(cell: NotebookCell, index: number, notebookPath: string): string {
121
+ if (cell.id) {
122
+ return cell.id;
123
+ }
124
+
125
+ const contentHash = canonicalCellHash(cell);
126
+ const combined = `${notebookPath}:${index}:${contentHash}`;
127
+ const hash = crypto.createHash('sha256').update(combined).digest('hex').slice(0, 8);
128
+
129
+ const newId = `gyoshu-${hash}`;
130
+ cell.id = newId;
131
+ return newId;
132
+ }
133
+
134
+ /**
135
+ * Migrates all cells in a notebook to have IDs, updating format version if needed.
136
+ *
137
+ * This function:
138
+ * 1. Iterates through all cells in the notebook
139
+ * 2. Assigns deterministic IDs to cells that lack them
140
+ * 3. Updates the notebook format to nbformat 4.5 if any cells were migrated
141
+ *
142
+ * The notebook object is modified in place.
143
+ *
144
+ * @param notebook - The notebook to migrate (modified in place)
145
+ * @param notebookPath - Path to the notebook file (used for ID generation)
146
+ * @returns Object containing the count of migrated cells
147
+ *
148
+ * @example
149
+ * ```typescript
150
+ * const notebook = JSON.parse(fs.readFileSync('notebook.ipynb', 'utf8'));
151
+ * const result = migrateNotebookCellIds(notebook, 'notebook.ipynb');
152
+ * console.log(`Migrated ${result.migrated} cells`);
153
+ *
154
+ * if (result.migrated > 0) {
155
+ * fs.writeFileSync('notebook.ipynb', JSON.stringify(notebook, null, 2));
156
+ * }
157
+ * ```
158
+ */
159
+ export function migrateNotebookCellIds(notebook: Notebook, notebookPath: string): MigrationResult {
160
+ let migrated = 0;
161
+
162
+ for (let i = 0; i < notebook.cells.length; i++) {
163
+ const cell = notebook.cells[i];
164
+ if (!cell.id) {
165
+ ensureCellId(cell, i, notebookPath);
166
+ migrated++;
167
+ }
168
+ }
169
+
170
+ if (migrated > 0) {
171
+ notebook.nbformat = 4;
172
+ notebook.nbformat_minor = 5;
173
+ }
174
+
175
+ return { migrated };
176
+ }