@yeyuan98/opencode-bioresearcher-plugin 1.5.1 → 1.5.2-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/bioresearcher/prompt.d.ts +1 -1
- package/dist/agents/bioresearcher/prompt.js +235 -27
- package/dist/agents/bioresearcherDR/prompt.d.ts +1 -1
- package/dist/agents/bioresearcherDR/prompt.js +8 -8
- package/dist/agents/bioresearcherDR_worker/prompt.d.ts +3 -2
- package/dist/agents/bioresearcherDR_worker/prompt.js +37 -12
- package/dist/shared/tool-restrictions.d.ts +2 -2
- package/dist/shared/tool-restrictions.js +4 -3
- package/dist/skills/bioresearcher-core/SKILL.md +58 -1
- package/dist/skills/bioresearcher-core/patterns/bioresearcher/analysis-methods.md +551 -0
- package/dist/skills/bioresearcher-core/patterns/bioresearcher/best-practices.md +647 -0
- package/dist/skills/bioresearcher-core/patterns/bioresearcher/python-standards.md +944 -0
- package/dist/skills/bioresearcher-core/patterns/bioresearcher/report-template.md +613 -0
- package/dist/skills/bioresearcher-core/patterns/bioresearcher/tool-selection.md +481 -0
- package/dist/skills/bioresearcher-core/patterns/citations.md +234 -0
- package/dist/skills/bioresearcher-core/patterns/rate-limiting.md +167 -0
- package/dist/skills/bioresearcher-tests/README.md +90 -90
- package/dist/skills/bioresearcher-tests/SKILL.md +255 -255
- package/dist/skills/bioresearcher-tests/pyproject.toml +6 -6
- package/dist/skills/bioresearcher-tests/test_cases/json_tests.md +137 -137
- package/dist/skills/bioresearcher-tests/test_cases/misc_tests.md +141 -141
- package/dist/skills/bioresearcher-tests/test_cases/parser_tests.md +80 -80
- package/dist/skills/bioresearcher-tests/test_cases/skill_tests.md +59 -59
- package/dist/skills/bioresearcher-tests/test_cases/table_tests.md +194 -194
- package/dist/skills/bioresearcher-tests/test_runner.py +607 -607
- package/dist/skills/long-table-summary/SKILL.md +224 -224
- package/dist/tools/sandbox/bash-parser.d.ts +17 -0
- package/dist/tools/sandbox/bash-parser.js +166 -0
- package/dist/tools/sandbox/escape-scenarios.test.d.ts +7 -0
- package/dist/tools/sandbox/escape-scenarios.test.js +182 -0
- package/dist/tools/sandbox/expander.d.ts +30 -0
- package/dist/tools/sandbox/expander.js +57 -0
- package/dist/tools/sandbox/final-verification.test.d.ts +6 -0
- package/dist/tools/sandbox/final-verification.test.js +70 -0
- package/dist/tools/sandbox/hooks.d.ts +25 -0
- package/dist/tools/sandbox/hooks.js +217 -0
- package/dist/tools/sandbox/index.d.ts +19 -0
- package/dist/tools/sandbox/index.js +24 -0
- package/dist/tools/sandbox/manager.d.ts +60 -0
- package/dist/tools/sandbox/manager.js +113 -0
- package/dist/tools/sandbox/sandbox.integration.test.d.ts +7 -0
- package/dist/tools/sandbox/sandbox.integration.test.js +106 -0
- package/dist/tools/sandbox/sandbox.test.d.ts +6 -0
- package/dist/tools/sandbox/sandbox.test.js +160 -0
- package/dist/tools/sandbox/tool.d.ts +66 -0
- package/dist/tools/sandbox/tool.js +163 -0
- package/dist/tools/sandbox/types.d.ts +38 -0
- package/dist/tools/sandbox/types.js +6 -0
- package/dist/tools/sandbox/validator.d.ts +33 -0
- package/dist/tools/sandbox/validator.js +150 -0
- package/dist/tools/skill/registry.js +0 -1
- package/dist/tools/table/utils.js +4 -4
- package/package.json +1 -1
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: long-table-summary
|
|
3
3
|
description: Batch-process large tables using parallel subagents for summarization
|
|
4
|
-
allowedTools:
|
|
5
|
-
- Bash
|
|
6
|
-
- Read
|
|
7
|
-
- Write
|
|
8
|
-
- Question
|
|
9
|
-
- Task
|
|
10
|
-
- tableListSheets
|
|
11
|
-
- tableGetSheetPreview
|
|
12
|
-
- tableGetHeaders
|
|
13
|
-
- tableGetRange
|
|
14
|
-
- jsonValidate
|
|
15
|
-
- jsonInfer
|
|
16
|
-
---
|
|
4
|
+
allowedTools:
|
|
5
|
+
- Bash
|
|
6
|
+
- Read
|
|
7
|
+
- Write
|
|
8
|
+
- Question
|
|
9
|
+
- Task
|
|
10
|
+
- tableListSheets
|
|
11
|
+
- tableGetSheetPreview
|
|
12
|
+
- tableGetHeaders
|
|
13
|
+
- tableGetRange
|
|
14
|
+
- jsonValidate
|
|
15
|
+
- jsonInfer
|
|
16
|
+
---
|
|
17
17
|
|
|
18
18
|
# Long Table Summary
|
|
19
19
|
|
|
@@ -112,70 +112,70 @@ If user selects "Yes, I want to modify":
|
|
|
112
112
|
- Update the JSON accordingly
|
|
113
113
|
- Repeat the approval question
|
|
114
114
|
|
|
115
|
-
Continue until user explicitly confirms that the instruction JSON is correct.
|
|
116
|
-
|
|
117
|
-
### Step 6.5: Generate Output JSON Schema
|
|
118
|
-
|
|
119
|
-
Generate a JSON Schema that defines the exact output structure. All fields are required.
|
|
120
|
-
|
|
121
|
-
**Default value for unavailable data:** Use `"NA"` (string) for any field where data cannot be extracted.
|
|
122
|
-
|
|
123
|
-
**Construct example output:**
|
|
124
|
-
|
|
125
|
-
1. Start with the base structure:
|
|
126
|
-
```json
|
|
127
|
-
{
|
|
128
|
-
"batch_number": 1,
|
|
129
|
-
"row_count": 30,
|
|
130
|
-
"summaries": [
|
|
131
|
-
{
|
|
132
|
-
"row_number": 2
|
|
133
|
-
}
|
|
134
|
-
]
|
|
135
|
-
}
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
2. Add each user-specified field with an example value (use `"NA"` if the field might be empty):
|
|
139
|
-
|
|
140
|
-
For example, if user provided:
|
|
141
|
-
```json
|
|
142
|
-
{
|
|
143
|
-
"species": "Species classification: Tier1/Tier2/NA",
|
|
144
|
-
"topic": "Main topic: Oncology/Immunology/Other"
|
|
145
|
-
}
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
Construct the example output:
|
|
149
|
-
```json
|
|
150
|
-
{
|
|
151
|
-
"batch_number": 1,
|
|
152
|
-
"row_count": 30,
|
|
153
|
-
"summaries": [
|
|
154
|
-
{
|
|
155
|
-
"row_number": 2,
|
|
156
|
-
"species": "Tier1",
|
|
157
|
-
"topic": "Oncology"
|
|
158
|
-
}
|
|
159
|
-
]
|
|
160
|
-
}
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
**Generate schema with strict mode:**
|
|
164
|
-
|
|
165
|
-
```typescript
|
|
166
|
-
jsonInfer data='<example_output_json>' strict=true
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
**Save the returned schema to:**
|
|
170
|
-
|
|
171
|
-
```bash
|
|
172
|
-
Write file: .long-table-summary/{topic}/schema.json
|
|
173
|
-
Content: <schema_from_jsonInfer>
|
|
174
|
-
```
|
|
175
|
-
|
|
176
|
-
This schema file will be used by all subagents to validate their outputs before writing.
|
|
177
|
-
|
|
178
|
-
### Step 7: Autogenerate Topic Name
|
|
115
|
+
Continue until user explicitly confirms that the instruction JSON is correct.
|
|
116
|
+
|
|
117
|
+
### Step 6.5: Generate Output JSON Schema
|
|
118
|
+
|
|
119
|
+
Generate a JSON Schema that defines the exact output structure. All fields are required.
|
|
120
|
+
|
|
121
|
+
**Default value for unavailable data:** Use `"NA"` (string) for any field where data cannot be extracted.
|
|
122
|
+
|
|
123
|
+
**Construct example output:**
|
|
124
|
+
|
|
125
|
+
1. Start with the base structure:
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"batch_number": 1,
|
|
129
|
+
"row_count": 30,
|
|
130
|
+
"summaries": [
|
|
131
|
+
{
|
|
132
|
+
"row_number": 2
|
|
133
|
+
}
|
|
134
|
+
]
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
2. Add each user-specified field with an example value (use `"NA"` if the field might be empty):
|
|
139
|
+
|
|
140
|
+
For example, if user provided:
|
|
141
|
+
```json
|
|
142
|
+
{
|
|
143
|
+
"species": "Species classification: Tier1/Tier2/NA",
|
|
144
|
+
"topic": "Main topic: Oncology/Immunology/Other"
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Construct the example output:
|
|
149
|
+
```json
|
|
150
|
+
{
|
|
151
|
+
"batch_number": 1,
|
|
152
|
+
"row_count": 30,
|
|
153
|
+
"summaries": [
|
|
154
|
+
{
|
|
155
|
+
"row_number": 2,
|
|
156
|
+
"species": "Tier1",
|
|
157
|
+
"topic": "Oncology"
|
|
158
|
+
}
|
|
159
|
+
]
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Generate schema with strict mode:**
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
jsonInfer data='<example_output_json>' strict=true
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Save the returned schema to:**
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
Write file: .long-table-summary/{topic}/schema.json
|
|
173
|
+
Content: <schema_from_jsonInfer>
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
This schema file will be used by all subagents to validate their outputs before writing.
|
|
177
|
+
|
|
178
|
+
### Step 7: Autogenerate Topic Name
|
|
179
179
|
|
|
180
180
|
Generate the topic name by combining:
|
|
181
181
|
- Base filename (without extension)
|
|
@@ -216,77 +216,77 @@ Example for 90 rows with 30 per batch:
|
|
|
216
216
|
|
|
217
217
|
**Note:** Row 1 is the header, data starts at row 2.
|
|
218
218
|
|
|
219
|
-
### Step 10: Create Subagent Prompt Template
|
|
220
|
-
|
|
221
|
-
Create a template with `{placeholder}` format (single braces):
|
|
222
|
-
|
|
223
|
-
```markdown
|
|
224
|
-
# Batch Data Summarization Task
|
|
225
|
-
|
|
226
|
-
## Input File
|
|
227
|
-
- Path: `{file_path}`
|
|
228
|
-
- Sheet: `{sheet_name}`
|
|
229
|
-
|
|
230
|
-
## Row Range
|
|
231
|
-
- Batch: {batch_number}
|
|
232
|
-
- Rows: {row_start} to {row_end}
|
|
233
|
-
|
|
234
|
-
## Summarization Instructions
|
|
235
|
-
|
|
236
|
-
For each row, extract these fields:
|
|
237
|
-
|
|
238
|
-
{instructions_json}
|
|
239
|
-
|
|
240
|
-
**Default for unavailable data:** If a field cannot be extracted, use `"NA"` as the value.
|
|
241
|
-
|
|
242
|
-
## Output Structure
|
|
243
|
-
|
|
244
|
-
```json
|
|
245
|
-
{
|
|
246
|
-
"batch_number": {batch_number},
|
|
247
|
-
"row_count": <number_of_rows_in_this_batch>,
|
|
248
|
-
"summaries": [
|
|
249
|
-
{
|
|
250
|
-
"row_number": <row_number>,
|
|
251
|
-
"<field_1>": "<value_or_NA>",
|
|
252
|
-
"<field_2>": "<value_or_NA>"
|
|
253
|
-
}
|
|
254
|
-
]
|
|
255
|
-
}
|
|
256
|
-
```
|
|
257
|
-
|
|
258
|
-
## Output Schema
|
|
259
|
-
|
|
260
|
-
Your output must conform to this schema: `{schema_path}`
|
|
261
|
-
|
|
262
|
-
All fields are required. Use `"NA"` for unavailable values.
|
|
263
|
-
|
|
264
|
-
## Mandatory Workflow
|
|
265
|
-
|
|
266
|
-
**Step 1:** Read rows using `tableGetRange`:
|
|
267
|
-
```typescript
|
|
268
|
-
tableGetRange file_path="{file_path}" sheet_name="{sheet_name}" range="A{row_start}:Z{row_end}"
|
|
269
|
-
```
|
|
270
|
-
|
|
271
|
-
**Step 2:** Build JSON in memory with all required fields
|
|
272
|
-
|
|
273
|
-
**Step 3:** Validate BEFORE writing:
|
|
274
|
-
```typescript
|
|
275
|
-
jsonValidate data='<your_complete_json>' schema="{schema_path}"
|
|
276
|
-
```
|
|
277
|
-
|
|
278
|
-
**Step 4:** Check result:
|
|
279
|
-
- If `valid: true` → Go to Step 5
|
|
280
|
-
- If `valid: false` → Fix errors listed in `errors` array, return to Step 3
|
|
281
|
-
|
|
282
|
-
**Step 5:** Write validated JSON to `{output_file}`
|
|
283
|
-
|
|
284
|
-
Output file should contain ONLY the JSON object (no markdown, no extra text).
|
|
285
|
-
|
|
286
|
-
## Output Path
|
|
287
|
-
`{output_file}`
|
|
288
|
-
```
|
|
289
|
-
```
|
|
219
|
+
### Step 10: Create Subagent Prompt Template
|
|
220
|
+
|
|
221
|
+
Create a template with `{placeholder}` format (single braces):
|
|
222
|
+
|
|
223
|
+
```markdown
|
|
224
|
+
# Batch Data Summarization Task
|
|
225
|
+
|
|
226
|
+
## Input File
|
|
227
|
+
- Path: `{file_path}`
|
|
228
|
+
- Sheet: `{sheet_name}`
|
|
229
|
+
|
|
230
|
+
## Row Range
|
|
231
|
+
- Batch: {batch_number}
|
|
232
|
+
- Rows: {row_start} to {row_end}
|
|
233
|
+
|
|
234
|
+
## Summarization Instructions
|
|
235
|
+
|
|
236
|
+
For each row, extract these fields:
|
|
237
|
+
|
|
238
|
+
{instructions_json}
|
|
239
|
+
|
|
240
|
+
**Default for unavailable data:** If a field cannot be extracted, use `"NA"` as the value.
|
|
241
|
+
|
|
242
|
+
## Output Structure
|
|
243
|
+
|
|
244
|
+
```json
|
|
245
|
+
{
|
|
246
|
+
"batch_number": {batch_number},
|
|
247
|
+
"row_count": <number_of_rows_in_this_batch>,
|
|
248
|
+
"summaries": [
|
|
249
|
+
{
|
|
250
|
+
"row_number": <row_number>,
|
|
251
|
+
"<field_1>": "<value_or_NA>",
|
|
252
|
+
"<field_2>": "<value_or_NA>"
|
|
253
|
+
}
|
|
254
|
+
]
|
|
255
|
+
}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Output Schema
|
|
259
|
+
|
|
260
|
+
Your output must conform to this schema: `{schema_path}`
|
|
261
|
+
|
|
262
|
+
All fields are required. Use `"NA"` for unavailable values.
|
|
263
|
+
|
|
264
|
+
## Mandatory Workflow
|
|
265
|
+
|
|
266
|
+
**Step 1:** Read rows using `tableGetRange`:
|
|
267
|
+
```typescript
|
|
268
|
+
tableGetRange file_path="{file_path}" sheet_name="{sheet_name}" range="A{row_start}:Z{row_end}"
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Step 2:** Build JSON in memory with all required fields
|
|
272
|
+
|
|
273
|
+
**Step 3:** Validate BEFORE writing:
|
|
274
|
+
```typescript
|
|
275
|
+
jsonValidate data='<your_complete_json>' schema="{schema_path}"
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
**Step 4:** Check result:
|
|
279
|
+
- If `valid: true` → Go to Step 5
|
|
280
|
+
- If `valid: false` → Fix errors listed in `errors` array, return to Step 3
|
|
281
|
+
|
|
282
|
+
**Step 5:** Write validated JSON to `{output_file}`
|
|
283
|
+
|
|
284
|
+
Output file should contain ONLY the JSON object (no markdown, no extra text).
|
|
285
|
+
|
|
286
|
+
## Output Path
|
|
287
|
+
`{output_file}`
|
|
288
|
+
```
|
|
289
|
+
```
|
|
290
290
|
|
|
291
291
|
### Step 11: Create Directory Structure
|
|
292
292
|
|
|
@@ -305,33 +305,33 @@ Use `generate_prompts.py`:
|
|
|
305
305
|
|
|
306
306
|
**Before Step 13 and Step 17:** Extract the full path to the skill directory from the `<skill_files>` section in the skill tool output. Use this path as `<skill_path>` in the commands below.
|
|
307
307
|
|
|
308
|
-
**Unix-like shells:**
|
|
309
|
-
```bash
|
|
310
|
-
uv run python <skill_path>/generate_prompts.py \
|
|
311
|
-
--template .long-table-summary/{topic}/subagent_template.md \
|
|
312
|
-
--output-dir .long-table-summary/{topic}/prompts \
|
|
313
|
-
--num-batches {num_batches} \
|
|
314
|
-
--sheet-name "{sheet_name}" \
|
|
315
|
-
--file-path "{input_file}" \
|
|
316
|
-
--start-row 2 \
|
|
317
|
-
--batch-size {batch_size} \
|
|
318
|
-
--instructions '{instructions_json}' \
|
|
319
|
-
--schema-path ".long-table-summary/{topic}/schema.json"
|
|
320
|
-
```
|
|
321
|
-
|
|
322
|
-
**For Windows cmd.exe:**
|
|
323
|
-
```bash
|
|
324
|
-
uv.exe run python <skill_path>\generate_prompts.py ^
|
|
325
|
-
--template .long-table-summary\{topic}\subagent_template.md ^
|
|
326
|
-
--output-dir .long-table-summary\{topic}\prompts ^
|
|
327
|
-
--num-batches {num_batches} ^
|
|
328
|
-
--sheet-name "{sheet_name}" ^
|
|
329
|
-
--file-path "{input_file}" ^
|
|
330
|
-
--start-row 2 ^
|
|
331
|
-
--batch-size {batch_size} ^
|
|
332
|
-
--instructions "{instructions_json}" ^
|
|
333
|
-
--schema-path ".long-table-summary\{topic}\schema.json"
|
|
334
|
-
```
|
|
308
|
+
**Unix-like shells:**
|
|
309
|
+
```bash
|
|
310
|
+
uv run python <skill_path>/generate_prompts.py \
|
|
311
|
+
--template .long-table-summary/{topic}/subagent_template.md \
|
|
312
|
+
--output-dir .long-table-summary/{topic}/prompts \
|
|
313
|
+
--num-batches {num_batches} \
|
|
314
|
+
--sheet-name "{sheet_name}" \
|
|
315
|
+
--file-path "{input_file}" \
|
|
316
|
+
--start-row 2 \
|
|
317
|
+
--batch-size {batch_size} \
|
|
318
|
+
--instructions '{instructions_json}' \
|
|
319
|
+
--schema-path ".long-table-summary/{topic}/schema.json"
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**For Windows cmd.exe:**
|
|
323
|
+
```bash
|
|
324
|
+
uv.exe run python <skill_path>\generate_prompts.py ^
|
|
325
|
+
--template .long-table-summary\{topic}\subagent_template.md ^
|
|
326
|
+
--output-dir .long-table-summary\{topic}\prompts ^
|
|
327
|
+
--num-batches {num_batches} ^
|
|
328
|
+
--sheet-name "{sheet_name}" ^
|
|
329
|
+
--file-path "{input_file}" ^
|
|
330
|
+
--start-row 2 ^
|
|
331
|
+
--batch-size {batch_size} ^
|
|
332
|
+
--instructions "{instructions_json}" ^
|
|
333
|
+
--schema-path ".long-table-summary\{topic}\schema.json"
|
|
334
|
+
```
|
|
335
335
|
|
|
336
336
|
**Note:** The `{instructions_json}` is the user-confirmed JSON from Step 6.
|
|
337
337
|
|
|
@@ -374,31 +374,31 @@ For example:
|
|
|
374
374
|
|
|
375
375
|
Do NOT inspect individual subagent outputs midway.
|
|
376
376
|
|
|
377
|
-
### Step 16: Check for Missing Outputs
|
|
378
|
-
|
|
379
|
-
After all batches are done, check for missing outputs:
|
|
380
|
-
|
|
381
|
-
```bash
|
|
382
|
-
ls .long-table-summary/{topic}/outputs/
|
|
383
|
-
```
|
|
384
|
-
|
|
385
|
-
Missing files indicate subagent failure. If any are missing:
|
|
386
|
-
|
|
387
|
-
1. Ask user using the `question` tool:
|
|
388
|
-
- "{number} batches failed. Retry failed batches or proceed with available outputs?"
|
|
389
|
-
|
|
390
|
-
2. **Options:**
|
|
391
|
-
- "Retry failed batches"
|
|
392
|
-
- "Proceed with available outputs"
|
|
393
|
-
|
|
394
|
-
3. **If user selects "Retry":**
|
|
395
|
-
- Re-launch subagent with same prompt file for each failed batch
|
|
396
|
-
|
|
397
|
-
4. **If user selects "Proceed":**
|
|
398
|
-
- Continue to Step 17 with available outputs
|
|
399
|
-
|
|
400
|
-
Note: Since subagents validate their outputs before writing, existing files should contain valid JSON.
|
|
401
|
-
|
|
377
|
+
### Step 16: Check for Missing Outputs
|
|
378
|
+
|
|
379
|
+
After all batches are done, check for missing outputs:
|
|
380
|
+
|
|
381
|
+
```bash
|
|
382
|
+
ls .long-table-summary/{topic}/outputs/
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
Missing files indicate subagent failure. If any are missing:
|
|
386
|
+
|
|
387
|
+
1. Ask user using the `question` tool:
|
|
388
|
+
- "{number} batches failed. Retry failed batches or proceed with available outputs?"
|
|
389
|
+
|
|
390
|
+
2. **Options:**
|
|
391
|
+
- "Retry failed batches"
|
|
392
|
+
- "Proceed with available outputs"
|
|
393
|
+
|
|
394
|
+
3. **If user selects "Retry":**
|
|
395
|
+
- Re-launch subagent with same prompt file for each failed batch
|
|
396
|
+
|
|
397
|
+
4. **If user selects "Proceed":**
|
|
398
|
+
- Continue to Step 17 with available outputs
|
|
399
|
+
|
|
400
|
+
Note: Since subagents validate their outputs before writing, existing files should contain valid JSON.
|
|
401
|
+
|
|
402
402
|
|
|
403
403
|
### Step 17: Combine All JSON Outputs
|
|
404
404
|
|
|
@@ -444,30 +444,30 @@ Provide user with:
|
|
|
444
444
|
|
|
445
445
|
## Python Scripts
|
|
446
446
|
|
|
447
|
-
### Script 1: `generate_prompts.py`
|
|
448
|
-
|
|
449
|
-
**Arguments:**
|
|
450
|
-
- `--template`: Path to subagent_template.md
|
|
451
|
-
- `--output-dir`: Directory for generated prompts
|
|
452
|
-
- `--num-batches`: Total number of batches
|
|
453
|
-
- `--sheet-name`: Sheet name
|
|
454
|
-
- `--file-path`: Full path to the input table file
|
|
455
|
-
- `--start-row`: Starting data row (default: 2)
|
|
456
|
-
- `--batch-size`: Rows per batch (default: 30)
|
|
457
|
-
- `--instructions`: User-confirmed JSON with summarization fields
|
|
458
|
-
- `--schema-path`: Path to output JSON Schema file (required)
|
|
459
|
-
- `--dry-run`: Validate without creating files (optional)
|
|
460
|
-
- `--verbose`: Enable verbose output for debugging (optional)
|
|
461
|
-
|
|
462
|
-
**Placeholders to replace:**
|
|
463
|
-
- `{file_path}` → Absolute input file path
|
|
464
|
-
- `{sheet_name}` → Sheet name
|
|
465
|
-
- `{batch_number}` → Batch number (001, 002, etc.)
|
|
466
|
-
- `{row_start}` → Start row
|
|
467
|
-
- `{row_end}` → End row
|
|
468
|
-
- `{output_file}` → Output file path
|
|
469
|
-
- `{instructions_json}` → User's JSON instruction (properly escaped for markdown code block)
|
|
470
|
-
- `{schema_path}` → Path to output JSON Schema file
|
|
447
|
+
### Script 1: `generate_prompts.py`
|
|
448
|
+
|
|
449
|
+
**Arguments:**
|
|
450
|
+
- `--template`: Path to subagent_template.md
|
|
451
|
+
- `--output-dir`: Directory for generated prompts
|
|
452
|
+
- `--num-batches`: Total number of batches
|
|
453
|
+
- `--sheet-name`: Sheet name
|
|
454
|
+
- `--file-path`: Full path to the input table file
|
|
455
|
+
- `--start-row`: Starting data row (default: 2)
|
|
456
|
+
- `--batch-size`: Rows per batch (default: 30)
|
|
457
|
+
- `--instructions`: User-confirmed JSON with summarization fields
|
|
458
|
+
- `--schema-path`: Path to output JSON Schema file (required)
|
|
459
|
+
- `--dry-run`: Validate without creating files (optional)
|
|
460
|
+
- `--verbose`: Enable verbose output for debugging (optional)
|
|
461
|
+
|
|
462
|
+
**Placeholders to replace:**
|
|
463
|
+
- `{file_path}` → Absolute input file path
|
|
464
|
+
- `{sheet_name}` → Sheet name
|
|
465
|
+
- `{batch_number}` → Batch number (001, 002, etc.)
|
|
466
|
+
- `{row_start}` → Start row
|
|
467
|
+
- `{row_end}` → End row
|
|
468
|
+
- `{output_file}` → Output file path
|
|
469
|
+
- `{instructions_json}` → User's JSON instruction (properly escaped for markdown code block)
|
|
470
|
+
- `{schema_path}` → Path to output JSON Schema file
|
|
471
471
|
|
|
472
472
|
### Script 2: `combine_outputs.py`
|
|
473
473
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bash Command Path Parser
|
|
3
|
+
*
|
|
4
|
+
* Extracts potential file paths from bash commands for sandbox validation.
|
|
5
|
+
* Uses heuristics to identify path-like strings.
|
|
6
|
+
*/
|
|
7
|
+
import type { BashPathExtractionResult } from './types';
|
|
8
|
+
/**
|
|
9
|
+
* Extract potential file paths from a bash command string.
|
|
10
|
+
*/
|
|
11
|
+
export declare function extractPathsFromBashCommand(command: string): BashPathExtractionResult;
|
|
12
|
+
/**
|
|
13
|
+
* Extract ALL arguments from file operation commands.
|
|
14
|
+
* Includes variables ($VAR), command substitutions ($(cmd)), etc.
|
|
15
|
+
* These will be expanded and validated before command execution.
|
|
16
|
+
*/
|
|
17
|
+
export declare function extractAllPathArgs(command: string): string[];
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bash Command Path Parser
|
|
3
|
+
*
|
|
4
|
+
* Extracts potential file paths from bash commands for sandbox validation.
|
|
5
|
+
* Uses heuristics to identify path-like strings.
|
|
6
|
+
*/
|
|
7
|
+
import { isAbsolutePath, looksLikePath } from './validator';
|
|
8
|
+
/**
|
|
9
|
+
* Regular expressions for extracting paths from bash commands.
|
|
10
|
+
*/
|
|
11
|
+
const PATH_PATTERNS = {
|
|
12
|
+
// Double-quoted strings
|
|
13
|
+
doubleQuoted: /"([^"]+)"/g,
|
|
14
|
+
// Single-quoted strings
|
|
15
|
+
singleQuoted: /'([^']+)'/g,
|
|
16
|
+
// Common file operation commands with their path arguments
|
|
17
|
+
fileCommands: /\b(?:cat|head|tail|less|more|rm|cp|mv|touch|mkdir|rmdir|ls|find|chmod|chown|ln|tar|unzip|zip|gzip|gunzip)\s+((?:-[a-zA-Z]+\s+)*)([^\s;|&><(){}]+)/gi,
|
|
18
|
+
// Input/output redirection
|
|
19
|
+
redirection: /(?:<|>|>>|2>|&>|&>>)\s*([^\s;|&]+)/g,
|
|
20
|
+
// Python/Node script execution
|
|
21
|
+
scriptExecution: /\b(?:python|python3|node|uv\s+run\s+python)\s+((?:-[a-zA-Z]+\s+)*)([^\s;|&]+\.py(?:[^\s;|&]*)?)/gi,
|
|
22
|
+
// Source/dot command
|
|
23
|
+
sourceCommand: /(?:^|\s)(?:source|\.)\s+([^\s;|&]+)/g,
|
|
24
|
+
// Here-document markers (skip these)
|
|
25
|
+
heredoc: /<<\s*['"]?(\w+)['"]?/g,
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Extract potential file paths from a bash command string.
|
|
29
|
+
*/
|
|
30
|
+
export function extractPathsFromBashCommand(command) {
|
|
31
|
+
const paths = new Set();
|
|
32
|
+
const absolutePaths = new Set();
|
|
33
|
+
// Track heredoc markers to exclude them
|
|
34
|
+
const heredocMarkers = new Set();
|
|
35
|
+
let match;
|
|
36
|
+
// Find heredoc markers first
|
|
37
|
+
while ((match = PATH_PATTERNS.heredoc.exec(command)) !== null) {
|
|
38
|
+
heredocMarkers.add(match[1]);
|
|
39
|
+
}
|
|
40
|
+
// Extract from double-quoted strings
|
|
41
|
+
PATH_PATTERNS.doubleQuoted.lastIndex = 0;
|
|
42
|
+
while ((match = PATH_PATTERNS.doubleQuoted.exec(command)) !== null) {
|
|
43
|
+
const content = match[1];
|
|
44
|
+
if (content && !heredocMarkers.has(content) && looksLikePath(content)) {
|
|
45
|
+
paths.add(content);
|
|
46
|
+
if (isAbsolutePath(content)) {
|
|
47
|
+
absolutePaths.add(content);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// Extract from single-quoted strings
|
|
52
|
+
PATH_PATTERNS.singleQuoted.lastIndex = 0;
|
|
53
|
+
while ((match = PATH_PATTERNS.singleQuoted.exec(command)) !== null) {
|
|
54
|
+
const content = match[1];
|
|
55
|
+
if (content && !heredocMarkers.has(content) && looksLikePath(content)) {
|
|
56
|
+
paths.add(content);
|
|
57
|
+
if (isAbsolutePath(content)) {
|
|
58
|
+
absolutePaths.add(content);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
// Extract from file operation commands
|
|
63
|
+
PATH_PATTERNS.fileCommands.lastIndex = 0;
|
|
64
|
+
while ((match = PATH_PATTERNS.fileCommands.exec(command)) !== null) {
|
|
65
|
+
// match[2] is the path after any flags
|
|
66
|
+
const filePath = match[2];
|
|
67
|
+
if (filePath && looksLikePath(filePath)) {
|
|
68
|
+
paths.add(filePath);
|
|
69
|
+
if (isAbsolutePath(filePath)) {
|
|
70
|
+
absolutePaths.add(filePath);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Extract from redirections
|
|
75
|
+
PATH_PATTERNS.redirection.lastIndex = 0;
|
|
76
|
+
while ((match = PATH_PATTERNS.redirection.exec(command)) !== null) {
|
|
77
|
+
const filePath = match[1];
|
|
78
|
+
if (filePath && looksLikePath(filePath)) {
|
|
79
|
+
paths.add(filePath);
|
|
80
|
+
if (isAbsolutePath(filePath)) {
|
|
81
|
+
absolutePaths.add(filePath);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
// Extract from script execution
|
|
86
|
+
PATH_PATTERNS.scriptExecution.lastIndex = 0;
|
|
87
|
+
while ((match = PATH_PATTERNS.scriptExecution.exec(command)) !== null) {
|
|
88
|
+
// match[2] is the script path after any flags
|
|
89
|
+
const scriptPath = match[2];
|
|
90
|
+
if (scriptPath && looksLikePath(scriptPath)) {
|
|
91
|
+
paths.add(scriptPath);
|
|
92
|
+
if (isAbsolutePath(scriptPath)) {
|
|
93
|
+
absolutePaths.add(scriptPath);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
// Extract from source commands
|
|
98
|
+
PATH_PATTERNS.sourceCommand.lastIndex = 0;
|
|
99
|
+
while ((match = PATH_PATTERNS.sourceCommand.exec(command)) !== null) {
|
|
100
|
+
const sourcePath = match[1];
|
|
101
|
+
if (sourcePath && looksLikePath(sourcePath)) {
|
|
102
|
+
paths.add(sourcePath);
|
|
103
|
+
if (isAbsolutePath(sourcePath)) {
|
|
104
|
+
absolutePaths.add(sourcePath);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
paths: Array.from(paths),
|
|
110
|
+
hasAbsolutePath: absolutePaths.size > 0,
|
|
111
|
+
absolutePaths: Array.from(absolutePaths)
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Extract ALL arguments from file operation commands.
|
|
116
|
+
* Includes variables ($VAR), command substitutions ($(cmd)), etc.
|
|
117
|
+
* These will be expanded and validated before command execution.
|
|
118
|
+
*/
|
|
119
|
+
export function extractAllPathArgs(command) {
|
|
120
|
+
const args = new Set();
|
|
121
|
+
// File commands that take path arguments
|
|
122
|
+
const fileCommands = [
|
|
123
|
+
'cat', 'head', 'tail', 'less', 'more', 'rm', 'cp', 'mv',
|
|
124
|
+
'touch', 'mkdir', 'rmdir', 'ls', 'find', 'chmod', 'chown',
|
|
125
|
+
'ln', 'tar', 'unzip', 'zip', 'gzip', 'gunzip', 'dd',
|
|
126
|
+
'sh', 'bash', 'python', 'python3', 'node'
|
|
127
|
+
];
|
|
128
|
+
// Pattern to find command names
|
|
129
|
+
const cmdPattern = new RegExp(`\\b(${fileCommands.join('|')})\\b`, 'gi');
|
|
130
|
+
// Find all command occurrences
|
|
131
|
+
let cmdMatch;
|
|
132
|
+
const commandPositions = [];
|
|
133
|
+
while ((cmdMatch = cmdPattern.exec(command)) !== null) {
|
|
134
|
+
commandPositions.push({
|
|
135
|
+
cmd: cmdMatch[1].toLowerCase(),
|
|
136
|
+
index: cmdMatch.index
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
// For each command, extract all following arguments until next command or shell operator
|
|
140
|
+
for (const { cmd, index } of commandPositions) {
|
|
141
|
+
// Get the substring after the command
|
|
142
|
+
const afterCmd = command.substring(index + cmd.length);
|
|
143
|
+
// Split by shell operators and whitespace to get individual arguments
|
|
144
|
+
// This is a simplified approach - doesn't handle quotes perfectly but catches most cases
|
|
145
|
+
const parts = afterCmd.split(/[\s;|&><]+/).filter(p => p.length > 0);
|
|
146
|
+
for (const part of parts) {
|
|
147
|
+
// Skip flags (start with -)
|
|
148
|
+
if (part.startsWith('-'))
|
|
149
|
+
continue;
|
|
150
|
+
// Stop if we hit another command
|
|
151
|
+
if (fileCommands.includes(part.toLowerCase()))
|
|
152
|
+
break;
|
|
153
|
+
// This looks like a path argument
|
|
154
|
+
args.add(part);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Extract redirection targets: > file, < file, >> file, 2> file
|
|
158
|
+
// Allow { } ( } for consistency
|
|
159
|
+
const redirPattern = /(?:<|>|>>|2>|&>|&>>)\s*([^\s;|&><]+)/g;
|
|
160
|
+
let match;
|
|
161
|
+
while ((match = redirPattern.exec(command)) !== null) {
|
|
162
|
+
if (match[1])
|
|
163
|
+
args.add(match[1]);
|
|
164
|
+
}
|
|
165
|
+
return Array.from(args);
|
|
166
|
+
}
|