@yeyuan98/opencode-bioresearcher-plugin 1.5.1 → 1.5.2-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/agents/bioresearcher/prompt.d.ts +1 -1
  2. package/dist/agents/bioresearcher/prompt.js +235 -27
  3. package/dist/agents/bioresearcherDR/prompt.d.ts +1 -1
  4. package/dist/agents/bioresearcherDR/prompt.js +8 -8
  5. package/dist/agents/bioresearcherDR_worker/prompt.d.ts +3 -2
  6. package/dist/agents/bioresearcherDR_worker/prompt.js +37 -12
  7. package/dist/shared/tool-restrictions.d.ts +2 -2
  8. package/dist/shared/tool-restrictions.js +4 -3
  9. package/dist/skills/bioresearcher-core/SKILL.md +58 -1
  10. package/dist/skills/bioresearcher-core/patterns/bioresearcher/analysis-methods.md +551 -0
  11. package/dist/skills/bioresearcher-core/patterns/bioresearcher/best-practices.md +647 -0
  12. package/dist/skills/bioresearcher-core/patterns/bioresearcher/python-standards.md +944 -0
  13. package/dist/skills/bioresearcher-core/patterns/bioresearcher/report-template.md +613 -0
  14. package/dist/skills/bioresearcher-core/patterns/bioresearcher/tool-selection.md +481 -0
  15. package/dist/skills/bioresearcher-core/patterns/citations.md +234 -0
  16. package/dist/skills/bioresearcher-core/patterns/rate-limiting.md +167 -0
  17. package/dist/skills/bioresearcher-tests/README.md +90 -90
  18. package/dist/skills/bioresearcher-tests/SKILL.md +255 -255
  19. package/dist/skills/bioresearcher-tests/pyproject.toml +6 -6
  20. package/dist/skills/bioresearcher-tests/test_cases/json_tests.md +137 -137
  21. package/dist/skills/bioresearcher-tests/test_cases/misc_tests.md +141 -141
  22. package/dist/skills/bioresearcher-tests/test_cases/parser_tests.md +80 -80
  23. package/dist/skills/bioresearcher-tests/test_cases/skill_tests.md +59 -59
  24. package/dist/skills/bioresearcher-tests/test_cases/table_tests.md +194 -194
  25. package/dist/skills/bioresearcher-tests/test_runner.py +607 -607
  26. package/dist/skills/long-table-summary/SKILL.md +224 -224
  27. package/dist/tools/sandbox/bash-parser.d.ts +17 -0
  28. package/dist/tools/sandbox/bash-parser.js +166 -0
  29. package/dist/tools/sandbox/escape-scenarios.test.d.ts +7 -0
  30. package/dist/tools/sandbox/escape-scenarios.test.js +182 -0
  31. package/dist/tools/sandbox/expander.d.ts +30 -0
  32. package/dist/tools/sandbox/expander.js +57 -0
  33. package/dist/tools/sandbox/final-verification.test.d.ts +6 -0
  34. package/dist/tools/sandbox/final-verification.test.js +70 -0
  35. package/dist/tools/sandbox/hooks.d.ts +25 -0
  36. package/dist/tools/sandbox/hooks.js +217 -0
  37. package/dist/tools/sandbox/index.d.ts +19 -0
  38. package/dist/tools/sandbox/index.js +24 -0
  39. package/dist/tools/sandbox/manager.d.ts +60 -0
  40. package/dist/tools/sandbox/manager.js +113 -0
  41. package/dist/tools/sandbox/sandbox.integration.test.d.ts +7 -0
  42. package/dist/tools/sandbox/sandbox.integration.test.js +106 -0
  43. package/dist/tools/sandbox/sandbox.test.d.ts +6 -0
  44. package/dist/tools/sandbox/sandbox.test.js +160 -0
  45. package/dist/tools/sandbox/tool.d.ts +66 -0
  46. package/dist/tools/sandbox/tool.js +163 -0
  47. package/dist/tools/sandbox/types.d.ts +38 -0
  48. package/dist/tools/sandbox/types.js +6 -0
  49. package/dist/tools/sandbox/validator.d.ts +33 -0
  50. package/dist/tools/sandbox/validator.js +150 -0
  51. package/dist/tools/skill/registry.js +0 -1
  52. package/dist/tools/table/utils.js +4 -4
  53. package/package.json +1 -1
@@ -1,19 +1,19 @@
1
1
  ---
2
2
  name: long-table-summary
3
3
  description: Batch-process large tables using parallel subagents for summarization
4
- allowedTools:
5
- - Bash
6
- - Read
7
- - Write
8
- - Question
9
- - Task
10
- - tableListSheets
11
- - tableGetSheetPreview
12
- - tableGetHeaders
13
- - tableGetRange
14
- - jsonValidate
15
- - jsonInfer
16
- ---
4
+ allowedTools:
5
+ - Bash
6
+ - Read
7
+ - Write
8
+ - Question
9
+ - Task
10
+ - tableListSheets
11
+ - tableGetSheetPreview
12
+ - tableGetHeaders
13
+ - tableGetRange
14
+ - jsonValidate
15
+ - jsonInfer
16
+ ---
17
17
 
18
18
  # Long Table Summary
19
19
 
@@ -112,70 +112,70 @@ If user selects "Yes, I want to modify":
112
112
  - Update the JSON accordingly
113
113
  - Repeat the approval question
114
114
 
115
- Continue until user explicitly confirms that the instruction JSON is correct.
116
-
117
- ### Step 6.5: Generate Output JSON Schema
118
-
119
- Generate a JSON Schema that defines the exact output structure. All fields are required.
120
-
121
- **Default value for unavailable data:** Use `"NA"` (string) for any field where data cannot be extracted.
122
-
123
- **Construct example output:**
124
-
125
- 1. Start with the base structure:
126
- ```json
127
- {
128
- "batch_number": 1,
129
- "row_count": 30,
130
- "summaries": [
131
- {
132
- "row_number": 2
133
- }
134
- ]
135
- }
136
- ```
137
-
138
- 2. Add each user-specified field with an example value (use `"NA"` if the field might be empty):
139
-
140
- For example, if user provided:
141
- ```json
142
- {
143
- "species": "Species classification: Tier1/Tier2/NA",
144
- "topic": "Main topic: Oncology/Immunology/Other"
145
- }
146
- ```
147
-
148
- Construct the example output:
149
- ```json
150
- {
151
- "batch_number": 1,
152
- "row_count": 30,
153
- "summaries": [
154
- {
155
- "row_number": 2,
156
- "species": "Tier1",
157
- "topic": "Oncology"
158
- }
159
- ]
160
- }
161
- ```
162
-
163
- **Generate schema with strict mode:**
164
-
165
- ```typescript
166
- jsonInfer data='<example_output_json>' strict=true
167
- ```
168
-
169
- **Save the returned schema to:**
170
-
171
- ```bash
172
- Write file: .long-table-summary/{topic}/schema.json
173
- Content: <schema_from_jsonInfer>
174
- ```
175
-
176
- This schema file will be used by all subagents to validate their outputs before writing.
177
-
178
- ### Step 7: Autogenerate Topic Name
115
+ Continue until user explicitly confirms that the instruction JSON is correct.
116
+
117
+ ### Step 6.5: Generate Output JSON Schema
118
+
119
+ Generate a JSON Schema that defines the exact output structure. All fields are required.
120
+
121
+ **Default value for unavailable data:** Use `"NA"` (string) for any field where data cannot be extracted.
122
+
123
+ **Construct example output:**
124
+
125
+ 1. Start with the base structure:
126
+ ```json
127
+ {
128
+ "batch_number": 1,
129
+ "row_count": 30,
130
+ "summaries": [
131
+ {
132
+ "row_number": 2
133
+ }
134
+ ]
135
+ }
136
+ ```
137
+
138
+ 2. Add each user-specified field with an example value (use `"NA"` if the field might be empty):
139
+
140
+ For example, if user provided:
141
+ ```json
142
+ {
143
+ "species": "Species classification: Tier1/Tier2/NA",
144
+ "topic": "Main topic: Oncology/Immunology/Other"
145
+ }
146
+ ```
147
+
148
+ Construct the example output:
149
+ ```json
150
+ {
151
+ "batch_number": 1,
152
+ "row_count": 30,
153
+ "summaries": [
154
+ {
155
+ "row_number": 2,
156
+ "species": "Tier1",
157
+ "topic": "Oncology"
158
+ }
159
+ ]
160
+ }
161
+ ```
162
+
163
+ **Generate schema with strict mode:**
164
+
165
+ ```typescript
166
+ jsonInfer data='<example_output_json>' strict=true
167
+ ```
168
+
169
+ **Save the returned schema to:**
170
+
171
+ ```bash
172
+ Write file: .long-table-summary/{topic}/schema.json
173
+ Content: <schema_from_jsonInfer>
174
+ ```
175
+
176
+ This schema file will be used by all subagents to validate their outputs before writing.
177
+
178
+ ### Step 7: Autogenerate Topic Name
179
179
 
180
180
  Generate the topic name by combining:
181
181
  - Base filename (without extension)
@@ -216,77 +216,77 @@ Example for 90 rows with 30 per batch:
216
216
 
217
217
  **Note:** Row 1 is the header, data starts at row 2.
218
218
 
219
- ### Step 10: Create Subagent Prompt Template
220
-
221
- Create a template with `{placeholder}` format (single braces):
222
-
223
- ```markdown
224
- # Batch Data Summarization Task
225
-
226
- ## Input File
227
- - Path: `{file_path}`
228
- - Sheet: `{sheet_name}`
229
-
230
- ## Row Range
231
- - Batch: {batch_number}
232
- - Rows: {row_start} to {row_end}
233
-
234
- ## Summarization Instructions
235
-
236
- For each row, extract these fields:
237
-
238
- {instructions_json}
239
-
240
- **Default for unavailable data:** If a field cannot be extracted, use `"NA"` as the value.
241
-
242
- ## Output Structure
243
-
244
- ```json
245
- {
246
- "batch_number": {batch_number},
247
- "row_count": <number_of_rows_in_this_batch>,
248
- "summaries": [
249
- {
250
- "row_number": <row_number>,
251
- "<field_1>": "<value_or_NA>",
252
- "<field_2>": "<value_or_NA>"
253
- }
254
- ]
255
- }
256
- ```
257
-
258
- ## Output Schema
259
-
260
- Your output must conform to this schema: `{schema_path}`
261
-
262
- All fields are required. Use `"NA"` for unavailable values.
263
-
264
- ## Mandatory Workflow
265
-
266
- **Step 1:** Read rows using `tableGetRange`:
267
- ```typescript
268
- tableGetRange file_path="{file_path}" sheet_name="{sheet_name}" range="A{row_start}:Z{row_end}"
269
- ```
270
-
271
- **Step 2:** Build JSON in memory with all required fields
272
-
273
- **Step 3:** Validate BEFORE writing:
274
- ```typescript
275
- jsonValidate data='<your_complete_json>' schema="{schema_path}"
276
- ```
277
-
278
- **Step 4:** Check result:
279
- - If `valid: true` → Go to Step 5
280
- - If `valid: false` → Fix errors listed in `errors` array, return to Step 3
281
-
282
- **Step 5:** Write validated JSON to `{output_file}`
283
-
284
- Output file should contain ONLY the JSON object (no markdown, no extra text).
285
-
286
- ## Output Path
287
- `{output_file}`
288
- ```
289
- ```
219
+ ### Step 10: Create Subagent Prompt Template
220
+
221
+ Create a template with `{placeholder}` format (single braces):
222
+
223
+ ```markdown
224
+ # Batch Data Summarization Task
225
+
226
+ ## Input File
227
+ - Path: `{file_path}`
228
+ - Sheet: `{sheet_name}`
229
+
230
+ ## Row Range
231
+ - Batch: {batch_number}
232
+ - Rows: {row_start} to {row_end}
233
+
234
+ ## Summarization Instructions
235
+
236
+ For each row, extract these fields:
237
+
238
+ {instructions_json}
239
+
240
+ **Default for unavailable data:** If a field cannot be extracted, use `"NA"` as the value.
241
+
242
+ ## Output Structure
243
+
244
+ ```json
245
+ {
246
+ "batch_number": {batch_number},
247
+ "row_count": <number_of_rows_in_this_batch>,
248
+ "summaries": [
249
+ {
250
+ "row_number": <row_number>,
251
+ "<field_1>": "<value_or_NA>",
252
+ "<field_2>": "<value_or_NA>"
253
+ }
254
+ ]
255
+ }
256
+ ```
257
+
258
+ ## Output Schema
259
+
260
+ Your output must conform to this schema: `{schema_path}`
261
+
262
+ All fields are required. Use `"NA"` for unavailable values.
263
+
264
+ ## Mandatory Workflow
265
+
266
+ **Step 1:** Read rows using `tableGetRange`:
267
+ ```typescript
268
+ tableGetRange file_path="{file_path}" sheet_name="{sheet_name}" range="A{row_start}:Z{row_end}"
269
+ ```
270
+
271
+ **Step 2:** Build JSON in memory with all required fields
272
+
273
+ **Step 3:** Validate BEFORE writing:
274
+ ```typescript
275
+ jsonValidate data='<your_complete_json>' schema="{schema_path}"
276
+ ```
277
+
278
+ **Step 4:** Check result:
279
+ - If `valid: true` → Go to Step 5
280
+ - If `valid: false` → Fix errors listed in `errors` array, return to Step 3
281
+
282
+ **Step 5:** Write validated JSON to `{output_file}`
283
+
284
+ Output file should contain ONLY the JSON object (no markdown, no extra text).
285
+
286
+ ## Output Path
287
+ `{output_file}`
288
+ ```
289
+ ```
290
290
 
291
291
  ### Step 11: Create Directory Structure
292
292
 
@@ -305,33 +305,33 @@ Use `generate_prompts.py`:
305
305
 
306
306
  **Before Step 13 and Step 17:** Extract the full path to the skill directory from the `<skill_files>` section in the skill tool output. Use this path as `<skill_path>` in the commands below.
307
307
 
308
- **Unix-like shells:**
309
- ```bash
310
- uv run python <skill_path>/generate_prompts.py \
311
- --template .long-table-summary/{topic}/subagent_template.md \
312
- --output-dir .long-table-summary/{topic}/prompts \
313
- --num-batches {num_batches} \
314
- --sheet-name "{sheet_name}" \
315
- --file-path "{input_file}" \
316
- --start-row 2 \
317
- --batch-size {batch_size} \
318
- --instructions '{instructions_json}' \
319
- --schema-path ".long-table-summary/{topic}/schema.json"
320
- ```
321
-
322
- **For Windows cmd.exe:**
323
- ```bash
324
- uv.exe run python <skill_path>\generate_prompts.py ^
325
- --template .long-table-summary\{topic}\subagent_template.md ^
326
- --output-dir .long-table-summary\{topic}\prompts ^
327
- --num-batches {num_batches} ^
328
- --sheet-name "{sheet_name}" ^
329
- --file-path "{input_file}" ^
330
- --start-row 2 ^
331
- --batch-size {batch_size} ^
332
- --instructions "{instructions_json}" ^
333
- --schema-path ".long-table-summary\{topic}\schema.json"
334
- ```
308
+ **Unix-like shells:**
309
+ ```bash
310
+ uv run python <skill_path>/generate_prompts.py \
311
+ --template .long-table-summary/{topic}/subagent_template.md \
312
+ --output-dir .long-table-summary/{topic}/prompts \
313
+ --num-batches {num_batches} \
314
+ --sheet-name "{sheet_name}" \
315
+ --file-path "{input_file}" \
316
+ --start-row 2 \
317
+ --batch-size {batch_size} \
318
+ --instructions '{instructions_json}' \
319
+ --schema-path ".long-table-summary/{topic}/schema.json"
320
+ ```
321
+
322
+ **For Windows cmd.exe:**
323
+ ```bash
324
+ uv.exe run python <skill_path>\generate_prompts.py ^
325
+ --template .long-table-summary\{topic}\subagent_template.md ^
326
+ --output-dir .long-table-summary\{topic}\prompts ^
327
+ --num-batches {num_batches} ^
328
+ --sheet-name "{sheet_name}" ^
329
+ --file-path "{input_file}" ^
330
+ --start-row 2 ^
331
+ --batch-size {batch_size} ^
332
+ --instructions "{instructions_json}" ^
333
+ --schema-path ".long-table-summary\{topic}\schema.json"
334
+ ```
335
335
 
336
336
  **Note:** The `{instructions_json}` is the user-confirmed JSON from Step 6.
337
337
 
@@ -374,31 +374,31 @@ For example:
374
374
 
375
375
  Do NOT inspect individual subagent outputs midway.
376
376
 
377
- ### Step 16: Check for Missing Outputs
378
-
379
- After all batches are done, check for missing outputs:
380
-
381
- ```bash
382
- ls .long-table-summary/{topic}/outputs/
383
- ```
384
-
385
- Missing files indicate subagent failure. If any are missing:
386
-
387
- 1. Ask user using the `question` tool:
388
- - "{number} batches failed. Retry failed batches or proceed with available outputs?"
389
-
390
- 2. **Options:**
391
- - "Retry failed batches"
392
- - "Proceed with available outputs"
393
-
394
- 3. **If user selects "Retry":**
395
- - Re-launch subagent with same prompt file for each failed batch
396
-
397
- 4. **If user selects "Proceed":**
398
- - Continue to Step 17 with available outputs
399
-
400
- Note: Since subagents validate their outputs before writing, existing files should contain valid JSON.
401
-
377
+ ### Step 16: Check for Missing Outputs
378
+
379
+ After all batches are done, check for missing outputs:
380
+
381
+ ```bash
382
+ ls .long-table-summary/{topic}/outputs/
383
+ ```
384
+
385
+ Missing files indicate subagent failure. If any are missing:
386
+
387
+ 1. Ask user using the `question` tool:
388
+ - "{number} batches failed. Retry failed batches or proceed with available outputs?"
389
+
390
+ 2. **Options:**
391
+ - "Retry failed batches"
392
+ - "Proceed with available outputs"
393
+
394
+ 3. **If user selects "Retry":**
395
+ - Re-launch subagent with same prompt file for each failed batch
396
+
397
+ 4. **If user selects "Proceed":**
398
+ - Continue to Step 17 with available outputs
399
+
400
+ Note: Since subagents validate their outputs before writing, existing files should contain valid JSON.
401
+
402
402
 
403
403
  ### Step 17: Combine All JSON Outputs
404
404
 
@@ -444,30 +444,30 @@ Provide user with:
444
444
 
445
445
  ## Python Scripts
446
446
 
447
- ### Script 1: `generate_prompts.py`
448
-
449
- **Arguments:**
450
- - `--template`: Path to subagent_template.md
451
- - `--output-dir`: Directory for generated prompts
452
- - `--num-batches`: Total number of batches
453
- - `--sheet-name`: Sheet name
454
- - `--file-path`: Full path to the input table file
455
- - `--start-row`: Starting data row (default: 2)
456
- - `--batch-size`: Rows per batch (default: 30)
457
- - `--instructions`: User-confirmed JSON with summarization fields
458
- - `--schema-path`: Path to output JSON Schema file (required)
459
- - `--dry-run`: Validate without creating files (optional)
460
- - `--verbose`: Enable verbose output for debugging (optional)
461
-
462
- **Placeholders to replace:**
463
- - `{file_path}` → Absolute input file path
464
- - `{sheet_name}` → Sheet name
465
- - `{batch_number}` → Batch number (001, 002, etc.)
466
- - `{row_start}` → Start row
467
- - `{row_end}` → End row
468
- - `{output_file}` → Output file path
469
- - `{instructions_json}` → User's JSON instruction (properly escaped for markdown code block)
470
- - `{schema_path}` → Path to output JSON Schema file
447
+ ### Script 1: `generate_prompts.py`
448
+
449
+ **Arguments:**
450
+ - `--template`: Path to subagent_template.md
451
+ - `--output-dir`: Directory for generated prompts
452
+ - `--num-batches`: Total number of batches
453
+ - `--sheet-name`: Sheet name
454
+ - `--file-path`: Full path to the input table file
455
+ - `--start-row`: Starting data row (default: 2)
456
+ - `--batch-size`: Rows per batch (default: 30)
457
+ - `--instructions`: User-confirmed JSON with summarization fields
458
+ - `--schema-path`: Path to output JSON Schema file (required)
459
+ - `--dry-run`: Validate without creating files (optional)
460
+ - `--verbose`: Enable verbose output for debugging (optional)
461
+
462
+ **Placeholders to replace:**
463
+ - `{file_path}` → Absolute input file path
464
+ - `{sheet_name}` → Sheet name
465
+ - `{batch_number}` → Batch number (001, 002, etc.)
466
+ - `{row_start}` → Start row
467
+ - `{row_end}` → End row
468
+ - `{output_file}` → Output file path
469
+ - `{instructions_json}` → User's JSON instruction (properly escaped for markdown code block)
470
+ - `{schema_path}` → Path to output JSON Schema file
471
471
 
472
472
  ### Script 2: `combine_outputs.py`
473
473
 
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Bash Command Path Parser
3
+ *
4
+ * Extracts potential file paths from bash commands for sandbox validation.
5
+ * Uses heuristics to identify path-like strings.
6
+ */
7
+ import type { BashPathExtractionResult } from './types';
8
+ /**
9
+ * Extract potential file paths from a bash command string.
10
+ */
11
+ export declare function extractPathsFromBashCommand(command: string): BashPathExtractionResult;
12
+ /**
13
+ * Extract ALL arguments from file operation commands.
14
+ * Includes variables ($VAR), command substitutions ($(cmd)), etc.
15
+ * These will be expanded and validated before command execution.
16
+ */
17
+ export declare function extractAllPathArgs(command: string): string[];
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Bash Command Path Parser
3
+ *
4
+ * Extracts potential file paths from bash commands for sandbox validation.
5
+ * Uses heuristics to identify path-like strings.
6
+ */
7
+ import { isAbsolutePath, looksLikePath } from './validator';
8
+ /**
9
+ * Regular expressions for extracting paths from bash commands.
10
+ */
11
+ const PATH_PATTERNS = {
12
+ // Double-quoted strings
13
+ doubleQuoted: /"([^"]+)"/g,
14
+ // Single-quoted strings
15
+ singleQuoted: /'([^']+)'/g,
16
+ // Common file operation commands with their path arguments
17
+ fileCommands: /\b(?:cat|head|tail|less|more|rm|cp|mv|touch|mkdir|rmdir|ls|find|chmod|chown|ln|tar|unzip|zip|gzip|gunzip)\s+((?:-[a-zA-Z]+\s+)*)([^\s;|&><(){}]+)/gi,
18
+ // Input/output redirection
19
+ redirection: /(?:<|>|>>|2>|&>|&>>)\s*([^\s;|&]+)/g,
20
+ // Python/Node script execution
21
+ scriptExecution: /\b(?:python|python3|node|uv\s+run\s+python)\s+((?:-[a-zA-Z]+\s+)*)([^\s;|&]+\.py(?:[^\s;|&]*)?)/gi,
22
+ // Source/dot command
23
+ sourceCommand: /(?:^|\s)(?:source|\.)\s+([^\s;|&]+)/g,
24
+ // Here-document markers (skip these)
25
+ heredoc: /<<\s*['"]?(\w+)['"]?/g,
26
+ };
27
+ /**
28
+ * Extract potential file paths from a bash command string.
29
+ */
30
+ export function extractPathsFromBashCommand(command) {
31
+ const paths = new Set();
32
+ const absolutePaths = new Set();
33
+ // Track heredoc markers to exclude them
34
+ const heredocMarkers = new Set();
35
+ let match;
36
+ // Find heredoc markers first
37
+ while ((match = PATH_PATTERNS.heredoc.exec(command)) !== null) {
38
+ heredocMarkers.add(match[1]);
39
+ }
40
+ // Extract from double-quoted strings
41
+ PATH_PATTERNS.doubleQuoted.lastIndex = 0;
42
+ while ((match = PATH_PATTERNS.doubleQuoted.exec(command)) !== null) {
43
+ const content = match[1];
44
+ if (content && !heredocMarkers.has(content) && looksLikePath(content)) {
45
+ paths.add(content);
46
+ if (isAbsolutePath(content)) {
47
+ absolutePaths.add(content);
48
+ }
49
+ }
50
+ }
51
+ // Extract from single-quoted strings
52
+ PATH_PATTERNS.singleQuoted.lastIndex = 0;
53
+ while ((match = PATH_PATTERNS.singleQuoted.exec(command)) !== null) {
54
+ const content = match[1];
55
+ if (content && !heredocMarkers.has(content) && looksLikePath(content)) {
56
+ paths.add(content);
57
+ if (isAbsolutePath(content)) {
58
+ absolutePaths.add(content);
59
+ }
60
+ }
61
+ }
62
+ // Extract from file operation commands
63
+ PATH_PATTERNS.fileCommands.lastIndex = 0;
64
+ while ((match = PATH_PATTERNS.fileCommands.exec(command)) !== null) {
65
+ // match[2] is the path after any flags
66
+ const filePath = match[2];
67
+ if (filePath && looksLikePath(filePath)) {
68
+ paths.add(filePath);
69
+ if (isAbsolutePath(filePath)) {
70
+ absolutePaths.add(filePath);
71
+ }
72
+ }
73
+ }
74
+ // Extract from redirections
75
+ PATH_PATTERNS.redirection.lastIndex = 0;
76
+ while ((match = PATH_PATTERNS.redirection.exec(command)) !== null) {
77
+ const filePath = match[1];
78
+ if (filePath && looksLikePath(filePath)) {
79
+ paths.add(filePath);
80
+ if (isAbsolutePath(filePath)) {
81
+ absolutePaths.add(filePath);
82
+ }
83
+ }
84
+ }
85
+ // Extract from script execution
86
+ PATH_PATTERNS.scriptExecution.lastIndex = 0;
87
+ while ((match = PATH_PATTERNS.scriptExecution.exec(command)) !== null) {
88
+ // match[2] is the script path after any flags
89
+ const scriptPath = match[2];
90
+ if (scriptPath && looksLikePath(scriptPath)) {
91
+ paths.add(scriptPath);
92
+ if (isAbsolutePath(scriptPath)) {
93
+ absolutePaths.add(scriptPath);
94
+ }
95
+ }
96
+ }
97
+ // Extract from source commands
98
+ PATH_PATTERNS.sourceCommand.lastIndex = 0;
99
+ while ((match = PATH_PATTERNS.sourceCommand.exec(command)) !== null) {
100
+ const sourcePath = match[1];
101
+ if (sourcePath && looksLikePath(sourcePath)) {
102
+ paths.add(sourcePath);
103
+ if (isAbsolutePath(sourcePath)) {
104
+ absolutePaths.add(sourcePath);
105
+ }
106
+ }
107
+ }
108
+ return {
109
+ paths: Array.from(paths),
110
+ hasAbsolutePath: absolutePaths.size > 0,
111
+ absolutePaths: Array.from(absolutePaths)
112
+ };
113
+ }
114
+ /**
115
+ * Extract ALL arguments from file operation commands.
116
+ * Includes variables ($VAR), command substitutions ($(cmd)), etc.
117
+ * These will be expanded and validated before command execution.
118
+ */
119
+ export function extractAllPathArgs(command) {
120
+ const args = new Set();
121
+ // File commands that take path arguments
122
+ const fileCommands = [
123
+ 'cat', 'head', 'tail', 'less', 'more', 'rm', 'cp', 'mv',
124
+ 'touch', 'mkdir', 'rmdir', 'ls', 'find', 'chmod', 'chown',
125
+ 'ln', 'tar', 'unzip', 'zip', 'gzip', 'gunzip', 'dd',
126
+ 'sh', 'bash', 'python', 'python3', 'node'
127
+ ];
128
+ // Pattern to find command names
129
+ const cmdPattern = new RegExp(`\\b(${fileCommands.join('|')})\\b`, 'gi');
130
+ // Find all command occurrences
131
+ let cmdMatch;
132
+ const commandPositions = [];
133
+ while ((cmdMatch = cmdPattern.exec(command)) !== null) {
134
+ commandPositions.push({
135
+ cmd: cmdMatch[1].toLowerCase(),
136
+ index: cmdMatch.index
137
+ });
138
+ }
139
+ // For each command, extract all following arguments until next command or shell operator
140
+ for (const { cmd, index } of commandPositions) {
141
+ // Get the substring after the command
142
+ const afterCmd = command.substring(index + cmd.length);
143
+ // Split by shell operators and whitespace to get individual arguments
144
+ // This is a simplified approach - doesn't handle quotes perfectly but catches most cases
145
+ const parts = afterCmd.split(/[\s;|&><]+/).filter(p => p.length > 0);
146
+ for (const part of parts) {
147
+ // Skip flags (start with -)
148
+ if (part.startsWith('-'))
149
+ continue;
150
+ // Stop if we hit another command
151
+ if (fileCommands.includes(part.toLowerCase()))
152
+ break;
153
+ // This looks like a path argument
154
+ args.add(part);
155
+ }
156
+ }
157
+ // Extract redirection targets: > file, < file, >> file, 2> file
158
+ // Allow { } ( } for consistency
159
+ const redirPattern = /(?:<|>|>>|2>|&>|&>>)\s*([^\s;|&><]+)/g;
160
+ let match;
161
+ while ((match = redirPattern.exec(command)) !== null) {
162
+ if (match[1])
163
+ args.add(match[1]);
164
+ }
165
+ return Array.from(args);
166
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Real-World Escape Scenario Tests
3
+ *
4
+ * Tests the actual escape vectors discovered during security analysis
5
+ * to verify they are properly blocked by the sandbox.
6
+ */
7
+ export {};