@yeyuan98/opencode-bioresearcher-plugin 1.5.0-alpha.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -36
- package/dist/index.js +8 -6
- package/dist/skills/bioresearcher-tests/README.md +90 -0
- package/dist/skills/bioresearcher-tests/SKILL.md +255 -0
- package/dist/skills/bioresearcher-tests/pyproject.toml +6 -0
- package/dist/skills/bioresearcher-tests/resources/json_samples/in_markdown.md.gz +0 -0
- package/dist/skills/bioresearcher-tests/resources/json_samples/nested_object.json.gz +0 -0
- package/dist/skills/bioresearcher-tests/resources/json_samples/schema_draft7.json.gz +0 -0
- package/dist/skills/bioresearcher-tests/resources/json_samples/simple_array.json.gz +0 -0
- package/dist/skills/bioresearcher-tests/resources/json_samples/simple_object.json.gz +0 -0
- package/dist/skills/bioresearcher-tests/resources/obo_sample.obo.gz +0 -0
- package/dist/skills/bioresearcher-tests/resources/pubmed_sample.xml.gz +0 -0
- package/dist/skills/bioresearcher-tests/resources/table_sample.xlsx.gz +0 -0
- package/dist/skills/bioresearcher-tests/test_cases/json_tests.md +137 -0
- package/dist/skills/bioresearcher-tests/test_cases/misc_tests.md +141 -0
- package/dist/skills/bioresearcher-tests/test_cases/parser_tests.md +80 -0
- package/dist/skills/bioresearcher-tests/test_cases/skill_tests.md +59 -0
- package/dist/skills/bioresearcher-tests/test_cases/table_tests.md +194 -0
- package/dist/skills/bioresearcher-tests/test_runner.py +607 -0
- package/dist/skills/env-jsonc-setup/SKILL.md +206 -206
- package/dist/skills/long-table-summary/SKILL.md +224 -153
- package/dist/skills/long-table-summary/combine_outputs.py +55 -9
- package/dist/skills/long-table-summary/generate_prompts.py +9 -0
- package/dist/skills/pubmed-weekly/pubmed_weekly.py +130 -29
- package/dist/{db-tools → tools/db}/backends/mysql/translator.js +23 -23
- package/dist/{db-tools → tools/db}/tools.js +34 -34
- package/dist/{misc-tools → tools/misc}/json-validate.js +4 -5
- package/dist/tools/parser/obo/index.d.ts +2 -0
- package/dist/tools/parser/obo/index.js +2 -0
- package/dist/tools/parser/obo/obo.d.ts +17 -0
- package/dist/tools/parser/obo/obo.js +216 -0
- package/dist/tools/parser/obo/types.d.ts +166 -0
- package/dist/tools/parser/obo/utils.d.ts +21 -0
- package/dist/tools/parser/obo/utils.js +411 -0
- package/dist/tools/parser/pubmed/types.js +1 -0
- package/dist/{skill-tools → tools/skill}/registry.js +1 -1
- package/package.json +1 -1
- package/dist/db-tools/executor.d.ts +0 -13
- package/dist/db-tools/executor.js +0 -54
- package/dist/db-tools/pool.d.ts +0 -8
- package/dist/db-tools/pool.js +0 -49
- package/dist/db-tools/tools/index.d.ts +0 -27
- package/dist/db-tools/tools/index.js +0 -191
- package/dist/db-tools/types.d.ts +0 -94
- package/dist/db-tools/types.js +0 -40
- package/dist/misc-tools/json-tools.d.ts +0 -33
- package/dist/misc-tools/json-tools.js +0 -187
- package/dist/skill/frontmatter.d.ts +0 -2
- package/dist/skill/frontmatter.js +0 -65
- package/dist/skill/index.d.ts +0 -3
- package/dist/skill/index.js +0 -2
- package/dist/skill/registry.d.ts +0 -11
- package/dist/skill/registry.js +0 -64
- package/dist/skill/tool.d.ts +0 -9
- package/dist/skill/tool.js +0 -115
- package/dist/skill/types.d.ts +0 -22
- package/dist/skill/types.js +0 -7
- /package/dist/{db-tools → tools/db}/backends/index.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/index.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/backend.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/backend.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/connection.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/connection.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/index.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/index.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/translator.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/mongodb/translator.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mysql/backend.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/mysql/backend.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mysql/connection.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/mysql/connection.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mysql/index.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/backends/mysql/index.js +0 -0
- /package/dist/{db-tools → tools/db}/backends/mysql/translator.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/core/base.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/core/base.js +0 -0
- /package/dist/{db-tools → tools/db}/core/config-loader.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/core/config-loader.js +0 -0
- /package/dist/{db-tools → tools/db}/core/index.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/core/index.js +0 -0
- /package/dist/{db-tools → tools/db}/core/jsonc-parser.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/core/jsonc-parser.js +0 -0
- /package/dist/{db-tools → tools/db}/core/validator.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/core/validator.js +0 -0
- /package/dist/{db-tools → tools/db}/index.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/index.js +0 -0
- /package/dist/{db-tools → tools/db}/interface/backend.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/interface/backend.js +0 -0
- /package/dist/{db-tools → tools/db}/interface/connection.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/interface/connection.js +0 -0
- /package/dist/{db-tools → tools/db}/interface/index.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/interface/index.js +0 -0
- /package/dist/{db-tools → tools/db}/interface/query.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/interface/query.js +0 -0
- /package/dist/{db-tools → tools/db}/interface/schema.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/interface/schema.js +0 -0
- /package/dist/{db-tools → tools/db}/tools.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/utils.d.ts +0 -0
- /package/dist/{db-tools → tools/db}/utils.js +0 -0
- /package/dist/{misc-tools → tools/misc}/calculator.d.ts +0 -0
- /package/dist/{misc-tools → tools/misc}/calculator.js +0 -0
- /package/dist/{misc-tools → tools/misc}/index.d.ts +0 -0
- /package/dist/{misc-tools → tools/misc}/index.js +0 -0
- /package/dist/{misc-tools → tools/misc}/json-extract.d.ts +0 -0
- /package/dist/{misc-tools → tools/misc}/json-extract.js +0 -0
- /package/dist/{misc-tools → tools/misc}/json-infer.d.ts +0 -0
- /package/dist/{misc-tools → tools/misc}/json-infer.js +0 -0
- /package/dist/{misc-tools → tools/misc}/json-validate.d.ts +0 -0
- /package/dist/{misc-tools → tools/misc}/timer.d.ts +0 -0
- /package/dist/{misc-tools → tools/misc}/timer.js +0 -0
- /package/dist/{parser-tools/pubmed → tools/parser/obo}/types.js +0 -0
- /package/dist/{parser-tools → tools/parser}/pubmed/index.d.ts +0 -0
- /package/dist/{parser-tools → tools/parser}/pubmed/index.js +0 -0
- /package/dist/{parser-tools → tools/parser}/pubmed/pubmed.d.ts +0 -0
- /package/dist/{parser-tools → tools/parser}/pubmed/pubmed.js +0 -0
- /package/dist/{parser-tools → tools/parser}/pubmed/types.d.ts +0 -0
- /package/dist/{parser-tools → tools/parser}/pubmed/utils.d.ts +0 -0
- /package/dist/{parser-tools → tools/parser}/pubmed/utils.js +0 -0
- /package/dist/{skill-tools → tools/skill}/frontmatter.d.ts +0 -0
- /package/dist/{skill-tools → tools/skill}/frontmatter.js +0 -0
- /package/dist/{skill-tools → tools/skill}/index.d.ts +0 -0
- /package/dist/{skill-tools → tools/skill}/index.js +0 -0
- /package/dist/{skill-tools → tools/skill}/registry.d.ts +0 -0
- /package/dist/{skill-tools → tools/skill}/tool.d.ts +0 -0
- /package/dist/{skill-tools → tools/skill}/tool.js +0 -0
- /package/dist/{skill-tools → tools/skill}/types.d.ts +0 -0
- /package/dist/{skill-tools → tools/skill}/types.js +0 -0
- /package/dist/{table-tools → tools/table}/index.d.ts +0 -0
- /package/dist/{table-tools → tools/table}/index.js +0 -0
- /package/dist/{table-tools → tools/table}/tools.d.ts +0 -0
- /package/dist/{table-tools → tools/table}/tools.js +0 -0
- /package/dist/{table-tools → tools/table}/utils.d.ts +0 -0
- /package/dist/{table-tools → tools/table}/utils.js +0 -0
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: long-table-summary
|
|
3
3
|
description: Batch-process large tables using parallel subagents for summarization
|
|
4
|
-
allowedTools:
|
|
5
|
-
- Bash
|
|
6
|
-
- Read
|
|
7
|
-
- Write
|
|
8
|
-
- Question
|
|
9
|
-
- Task
|
|
10
|
-
- tableListSheets
|
|
11
|
-
- tableGetSheetPreview
|
|
12
|
-
- tableGetHeaders
|
|
13
|
-
- tableGetRange
|
|
14
|
-
|
|
4
|
+
allowedTools:
|
|
5
|
+
- Bash
|
|
6
|
+
- Read
|
|
7
|
+
- Write
|
|
8
|
+
- Question
|
|
9
|
+
- Task
|
|
10
|
+
- tableListSheets
|
|
11
|
+
- tableGetSheetPreview
|
|
12
|
+
- tableGetHeaders
|
|
13
|
+
- tableGetRange
|
|
14
|
+
- jsonValidate
|
|
15
|
+
- jsonInfer
|
|
16
|
+
---
|
|
15
17
|
|
|
16
18
|
# Long Table Summary
|
|
17
19
|
|
|
@@ -110,9 +112,70 @@ If user selects "Yes, I want to modify":
|
|
|
110
112
|
- Update the JSON accordingly
|
|
111
113
|
- Repeat the approval question
|
|
112
114
|
|
|
113
|
-
Continue until user explicitly confirms that the instruction JSON is correct.
|
|
114
|
-
|
|
115
|
-
### Step
|
|
115
|
+
Continue until user explicitly confirms that the instruction JSON is correct.
|
|
116
|
+
|
|
117
|
+
### Step 6.5: Generate Output JSON Schema
|
|
118
|
+
|
|
119
|
+
Generate a JSON Schema that defines the exact output structure. All fields are required.
|
|
120
|
+
|
|
121
|
+
**Default value for unavailable data:** Use `"NA"` (string) for any field where data cannot be extracted.
|
|
122
|
+
|
|
123
|
+
**Construct example output:**
|
|
124
|
+
|
|
125
|
+
1. Start with the base structure:
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"batch_number": 1,
|
|
129
|
+
"row_count": 30,
|
|
130
|
+
"summaries": [
|
|
131
|
+
{
|
|
132
|
+
"row_number": 2
|
|
133
|
+
}
|
|
134
|
+
]
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
2. Add each user-specified field with an example value (use `"NA"` if the field might be empty):
|
|
139
|
+
|
|
140
|
+
For example, if user provided:
|
|
141
|
+
```json
|
|
142
|
+
{
|
|
143
|
+
"species": "Species classification: Tier1/Tier2/NA",
|
|
144
|
+
"topic": "Main topic: Oncology/Immunology/Other"
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Construct the example output:
|
|
149
|
+
```json
|
|
150
|
+
{
|
|
151
|
+
"batch_number": 1,
|
|
152
|
+
"row_count": 30,
|
|
153
|
+
"summaries": [
|
|
154
|
+
{
|
|
155
|
+
"row_number": 2,
|
|
156
|
+
"species": "Tier1",
|
|
157
|
+
"topic": "Oncology"
|
|
158
|
+
}
|
|
159
|
+
]
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Generate schema with strict mode:**
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
jsonInfer data='<example_output_json>' strict=true
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Save the returned schema to:**
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
Write file: .long-table-summary/{topic}/schema.json
|
|
173
|
+
Content: <schema_from_jsonInfer>
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
This schema file will be used by all subagents to validate their outputs before writing.
|
|
177
|
+
|
|
178
|
+
### Step 7: Autogenerate Topic Name
|
|
116
179
|
|
|
117
180
|
Generate the topic name by combining:
|
|
118
181
|
- Base filename (without extension)
|
|
@@ -153,61 +216,77 @@ Example for 90 rows with 30 per batch:
|
|
|
153
216
|
|
|
154
217
|
**Note:** Row 1 is the header, data starts at row 2.
|
|
155
218
|
|
|
156
|
-
### Step 10: Create Subagent Prompt Template
|
|
157
|
-
|
|
158
|
-
Create a template with `{placeholder}` format (single braces):
|
|
159
|
-
|
|
160
|
-
```markdown
|
|
161
|
-
# Batch Data Summarization Task
|
|
162
|
-
|
|
163
|
-
## Input File
|
|
164
|
-
-
|
|
165
|
-
- Sheet
|
|
166
|
-
|
|
167
|
-
## Row Range
|
|
168
|
-
- Batch
|
|
169
|
-
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
{
|
|
184
|
-
"
|
|
185
|
-
"
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
"
|
|
189
|
-
<
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
219
|
+
### Step 10: Create Subagent Prompt Template
|
|
220
|
+
|
|
221
|
+
Create a template with `{placeholder}` format (single braces):
|
|
222
|
+
|
|
223
|
+
```markdown
|
|
224
|
+
# Batch Data Summarization Task
|
|
225
|
+
|
|
226
|
+
## Input File
|
|
227
|
+
- Path: `{file_path}`
|
|
228
|
+
- Sheet: `{sheet_name}`
|
|
229
|
+
|
|
230
|
+
## Row Range
|
|
231
|
+
- Batch: {batch_number}
|
|
232
|
+
- Rows: {row_start} to {row_end}
|
|
233
|
+
|
|
234
|
+
## Summarization Instructions
|
|
235
|
+
|
|
236
|
+
For each row, extract these fields:
|
|
237
|
+
|
|
238
|
+
{instructions_json}
|
|
239
|
+
|
|
240
|
+
**Default for unavailable data:** If a field cannot be extracted, use `"NA"` as the value.
|
|
241
|
+
|
|
242
|
+
## Output Structure
|
|
243
|
+
|
|
244
|
+
```json
|
|
245
|
+
{
|
|
246
|
+
"batch_number": {batch_number},
|
|
247
|
+
"row_count": <number_of_rows_in_this_batch>,
|
|
248
|
+
"summaries": [
|
|
249
|
+
{
|
|
250
|
+
"row_number": <row_number>,
|
|
251
|
+
"<field_1>": "<value_or_NA>",
|
|
252
|
+
"<field_2>": "<value_or_NA>"
|
|
253
|
+
}
|
|
254
|
+
]
|
|
255
|
+
}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Output Schema
|
|
259
|
+
|
|
260
|
+
Your output must conform to this schema: `{schema_path}`
|
|
261
|
+
|
|
262
|
+
All fields are required. Use `"NA"` for unavailable values.
|
|
263
|
+
|
|
264
|
+
## Mandatory Workflow
|
|
265
|
+
|
|
266
|
+
**Step 1:** Read rows using `tableGetRange`:
|
|
267
|
+
```typescript
|
|
268
|
+
tableGetRange file_path="{file_path}" sheet_name="{sheet_name}" range="A{row_start}:Z{row_end}"
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Step 2:** Build JSON in memory with all required fields
|
|
272
|
+
|
|
273
|
+
**Step 3:** Validate BEFORE writing:
|
|
274
|
+
```typescript
|
|
275
|
+
jsonValidate data='<your_complete_json>' schema="{schema_path}"
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
**Step 4:** Check result:
|
|
279
|
+
- If `valid: true` → Go to Step 5
|
|
280
|
+
- If `valid: false` → Fix errors listed in `errors` array, return to Step 3
|
|
281
|
+
|
|
282
|
+
**Step 5:** Write validated JSON to `{output_file}`
|
|
283
|
+
|
|
284
|
+
Output file should contain ONLY the JSON object (no markdown, no extra text).
|
|
285
|
+
|
|
286
|
+
## Output Path
|
|
287
|
+
`{output_file}`
|
|
288
|
+
```
|
|
289
|
+
```
|
|
211
290
|
|
|
212
291
|
### Step 11: Create Directory Structure
|
|
213
292
|
|
|
@@ -226,31 +305,33 @@ Use `generate_prompts.py`:
|
|
|
226
305
|
|
|
227
306
|
**Before Step 13 and Step 17:** Extract the full path to the skill directory from the `<skill_files>` section in the skill tool output. Use this path as `<skill_path>` in the commands below.
|
|
228
307
|
|
|
229
|
-
**Unix-like shells:**
|
|
230
|
-
```bash
|
|
231
|
-
uv run python <skill_path>/generate_prompts.py \
|
|
232
|
-
--template .long-table-summary/{topic}/subagent_template.md \
|
|
233
|
-
--output-dir .long-table-summary/{topic}/prompts \
|
|
234
|
-
--num-batches {num_batches} \
|
|
235
|
-
--sheet-name "{sheet_name}" \
|
|
236
|
-
--file-path "{input_file}" \
|
|
237
|
-
--start-row 2 \
|
|
238
|
-
--batch-size {batch_size} \
|
|
239
|
-
--instructions '{instructions_json}'
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
--
|
|
247
|
-
--
|
|
248
|
-
--
|
|
249
|
-
--
|
|
250
|
-
--
|
|
251
|
-
--
|
|
252
|
-
--
|
|
253
|
-
|
|
308
|
+
**Unix-like shells:**
|
|
309
|
+
```bash
|
|
310
|
+
uv run python <skill_path>/generate_prompts.py \
|
|
311
|
+
--template .long-table-summary/{topic}/subagent_template.md \
|
|
312
|
+
--output-dir .long-table-summary/{topic}/prompts \
|
|
313
|
+
--num-batches {num_batches} \
|
|
314
|
+
--sheet-name "{sheet_name}" \
|
|
315
|
+
--file-path "{input_file}" \
|
|
316
|
+
--start-row 2 \
|
|
317
|
+
--batch-size {batch_size} \
|
|
318
|
+
--instructions '{instructions_json}' \
|
|
319
|
+
--schema-path ".long-table-summary/{topic}/schema.json"
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**For Windows cmd.exe:**
|
|
323
|
+
```bash
|
|
324
|
+
uv.exe run python <skill_path>\generate_prompts.py ^
|
|
325
|
+
--template .long-table-summary\{topic}\subagent_template.md ^
|
|
326
|
+
--output-dir .long-table-summary\{topic}\prompts ^
|
|
327
|
+
--num-batches {num_batches} ^
|
|
328
|
+
--sheet-name "{sheet_name}" ^
|
|
329
|
+
--file-path "{input_file}" ^
|
|
330
|
+
--start-row 2 ^
|
|
331
|
+
--batch-size {batch_size} ^
|
|
332
|
+
--instructions "{instructions_json}" ^
|
|
333
|
+
--schema-path ".long-table-summary\{topic}\schema.json"
|
|
334
|
+
```
|
|
254
335
|
|
|
255
336
|
**Note:** The `{instructions_json}` is the user-confirmed JSON from Step 6.
|
|
256
337
|
|
|
@@ -293,43 +374,31 @@ For example:
|
|
|
293
374
|
|
|
294
375
|
Do NOT inspect individual subagent outputs midway.
|
|
295
376
|
|
|
296
|
-
### Step 16:
|
|
297
|
-
|
|
298
|
-
After all batches are done, check for missing outputs:
|
|
299
|
-
|
|
300
|
-
```bash
|
|
301
|
-
ls .long-table-summary/{topic}/outputs/
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
3. **If user selects "Continue":**
|
|
323
|
-
- For each failed batch:
|
|
324
|
-
a. Wait 2 seconds
|
|
325
|
-
b. Retry with the same `subagent_type="general"`
|
|
326
|
-
c. Up to 3 retry attempts
|
|
327
|
-
- After retries, check again for remaining failures
|
|
328
|
-
- If batches are still failing, repeat the question with the updated failed list
|
|
329
|
-
|
|
330
|
-
4. **If user selects "Stop":**
|
|
331
|
-
- Do not retry any more batches
|
|
332
|
-
- Proceed to Step 17 with whatever outputs exist
|
|
377
|
+
### Step 16: Check for Missing Outputs
|
|
378
|
+
|
|
379
|
+
After all batches are done, check for missing outputs:
|
|
380
|
+
|
|
381
|
+
```bash
|
|
382
|
+
ls .long-table-summary/{topic}/outputs/
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
Missing files indicate subagent failure. If any are missing:
|
|
386
|
+
|
|
387
|
+
1. Ask user using the `question` tool:
|
|
388
|
+
- "{number} batches failed. Retry failed batches or proceed with available outputs?"
|
|
389
|
+
|
|
390
|
+
2. **Options:**
|
|
391
|
+
- "Retry failed batches"
|
|
392
|
+
- "Proceed with available outputs"
|
|
393
|
+
|
|
394
|
+
3. **If user selects "Retry":**
|
|
395
|
+
- Re-launch subagent with same prompt file for each failed batch
|
|
396
|
+
|
|
397
|
+
4. **If user selects "Proceed":**
|
|
398
|
+
- Continue to Step 17 with available outputs
|
|
399
|
+
|
|
400
|
+
Note: Since subagents validate their outputs before writing, existing files should contain valid JSON.
|
|
401
|
+
|
|
333
402
|
|
|
334
403
|
### Step 17: Combine All JSON Outputs
|
|
335
404
|
|
|
@@ -375,28 +444,30 @@ Provide user with:
|
|
|
375
444
|
|
|
376
445
|
## Python Scripts
|
|
377
446
|
|
|
378
|
-
### Script 1: `generate_prompts.py`
|
|
379
|
-
|
|
380
|
-
**Arguments:**
|
|
381
|
-
- `--template`: Path to subagent_template.md
|
|
382
|
-
- `--output-dir`: Directory for generated prompts
|
|
383
|
-
- `--num-batches`: Total number of batches
|
|
384
|
-
- `--sheet-name`: Sheet name
|
|
385
|
-
- `--file-path`: Full path to the input table file
|
|
386
|
-
- `--start-row`: Starting data row (default: 2)
|
|
387
|
-
- `--batch-size`: Rows per batch (default: 30)
|
|
388
|
-
- `--instructions`: User-confirmed JSON with summarization fields
|
|
389
|
-
- `--
|
|
390
|
-
- `--
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
- `{
|
|
395
|
-
- `{
|
|
396
|
-
- `{
|
|
397
|
-
- `{
|
|
398
|
-
- `{
|
|
399
|
-
- `{
|
|
447
|
+
### Script 1: `generate_prompts.py`
|
|
448
|
+
|
|
449
|
+
**Arguments:**
|
|
450
|
+
- `--template`: Path to subagent_template.md
|
|
451
|
+
- `--output-dir`: Directory for generated prompts
|
|
452
|
+
- `--num-batches`: Total number of batches
|
|
453
|
+
- `--sheet-name`: Sheet name
|
|
454
|
+
- `--file-path`: Full path to the input table file
|
|
455
|
+
- `--start-row`: Starting data row (default: 2)
|
|
456
|
+
- `--batch-size`: Rows per batch (default: 30)
|
|
457
|
+
- `--instructions`: User-confirmed JSON with summarization fields
|
|
458
|
+
- `--schema-path`: Path to output JSON Schema file (required)
|
|
459
|
+
- `--dry-run`: Validate without creating files (optional)
|
|
460
|
+
- `--verbose`: Enable verbose output for debugging (optional)
|
|
461
|
+
|
|
462
|
+
**Placeholders to replace:**
|
|
463
|
+
- `{file_path}` → Absolute input file path
|
|
464
|
+
- `{sheet_name}` → Sheet name
|
|
465
|
+
- `{batch_number}` → Batch number (001, 002, etc.)
|
|
466
|
+
- `{row_start}` → Start row
|
|
467
|
+
- `{row_end}` → End row
|
|
468
|
+
- `{output_file}` → Output file path
|
|
469
|
+
- `{instructions_json}` → User's JSON instruction (properly escaped for markdown code block)
|
|
470
|
+
- `{schema_path}` → Path to output JSON Schema file
|
|
400
471
|
|
|
401
472
|
### Script 2: `combine_outputs.py`
|
|
402
473
|
|
|
@@ -37,20 +37,17 @@ def read_json_outputs(input_dir: str, verbose: bool = False) -> Dict[str, Any]:
|
|
|
37
37
|
with open(batch_file, "r", encoding="utf-8") as f:
|
|
38
38
|
content = f.read().strip()
|
|
39
39
|
|
|
40
|
-
#
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
if json_start == -1 or json_end == 0:
|
|
40
|
+
# Extract JSON using brace matching (string-aware, handles nested structures)
|
|
41
|
+
extracted = extract_json_from_content(content)
|
|
42
|
+
if extracted is None:
|
|
45
43
|
if verbose:
|
|
46
|
-
print(f"Warning: No JSON found in {batch_file.name}")
|
|
44
|
+
print(f"Warning: No valid JSON found in {batch_file.name}")
|
|
47
45
|
continue
|
|
48
46
|
|
|
49
|
-
|
|
50
|
-
data = json.loads(json_str)
|
|
51
|
-
all_summaries.append(data)
|
|
47
|
+
all_summaries.append(extracted)
|
|
52
48
|
|
|
53
49
|
if verbose:
|
|
50
|
+
data = extracted
|
|
54
51
|
print(
|
|
55
52
|
f"Parsed: {batch_file.name} - {len(data.get('summaries', []))} summaries"
|
|
56
53
|
)
|
|
@@ -67,6 +64,55 @@ def read_json_outputs(input_dir: str, verbose: bool = False) -> Dict[str, Any]:
|
|
|
67
64
|
return {"success": True, "summaries": all_summaries}
|
|
68
65
|
|
|
69
66
|
|
|
67
|
+
def extract_json_from_content(content: str) -> dict | None:
|
|
68
|
+
"""Extract JSON from content using string-aware brace matching.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
content: File content string
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Parsed JSON dict or None if not found
|
|
75
|
+
"""
|
|
76
|
+
# Try direct parse first (file contains only JSON)
|
|
77
|
+
try:
|
|
78
|
+
return json.loads(content)
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
# Find JSON object boundaries with brace matching
|
|
83
|
+
start = content.find("{")
|
|
84
|
+
if start == -1:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
depth = 0
|
|
88
|
+
in_string = False
|
|
89
|
+
escape = False
|
|
90
|
+
|
|
91
|
+
for i in range(start, len(content)):
|
|
92
|
+
char = content[i]
|
|
93
|
+
if escape:
|
|
94
|
+
escape = False
|
|
95
|
+
continue
|
|
96
|
+
if char == "\\":
|
|
97
|
+
escape = True
|
|
98
|
+
continue
|
|
99
|
+
if char == '"':
|
|
100
|
+
in_string = not in_string
|
|
101
|
+
continue
|
|
102
|
+
if not in_string:
|
|
103
|
+
if char == "{":
|
|
104
|
+
depth += 1
|
|
105
|
+
elif char == "}":
|
|
106
|
+
depth -= 1
|
|
107
|
+
if depth == 0:
|
|
108
|
+
try:
|
|
109
|
+
return json.loads(content[start : i + 1])
|
|
110
|
+
except json.JSONDecodeError:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
70
116
|
def merge_summaries(
|
|
71
117
|
summaries: List[Dict[str, Any]],
|
|
72
118
|
deduplicate: bool = False,
|
|
@@ -17,6 +17,7 @@ def generate_prompts(
|
|
|
17
17
|
batch_size,
|
|
18
18
|
file_path,
|
|
19
19
|
instructions,
|
|
20
|
+
schema_path,
|
|
20
21
|
dry_run=False,
|
|
21
22
|
verbose=False,
|
|
22
23
|
):
|
|
@@ -31,6 +32,7 @@ def generate_prompts(
|
|
|
31
32
|
batch_size: Rows per batch
|
|
32
33
|
file_path: Full path to input table file
|
|
33
34
|
instructions: User-provided summarization instructions (JSON string)
|
|
35
|
+
schema_path: Path to output JSON Schema file
|
|
34
36
|
dry_run: Validate without creating files
|
|
35
37
|
verbose: Enable verbose output
|
|
36
38
|
"""
|
|
@@ -83,6 +85,7 @@ def generate_prompts(
|
|
|
83
85
|
content = content.replace("{row_end}", str(row_end))
|
|
84
86
|
content = content.replace("{output_file}", output_file)
|
|
85
87
|
content = content.replace("{instructions_json}", instructions_escaped)
|
|
88
|
+
content = content.replace("{schema_path}", schema_path)
|
|
86
89
|
|
|
87
90
|
# Dry run mode - skip actual file writes
|
|
88
91
|
if dry_run:
|
|
@@ -147,6 +150,11 @@ def main():
|
|
|
147
150
|
required=True,
|
|
148
151
|
help="User-provided summarization instructions (JSON string)",
|
|
149
152
|
)
|
|
153
|
+
parser.add_argument(
|
|
154
|
+
"--schema-path",
|
|
155
|
+
required=True,
|
|
156
|
+
help="Path to output JSON Schema file (relative or absolute)",
|
|
157
|
+
)
|
|
150
158
|
parser.add_argument(
|
|
151
159
|
"--dry-run",
|
|
152
160
|
action="store_true",
|
|
@@ -200,6 +208,7 @@ def main():
|
|
|
200
208
|
batch_size=args.batch_size,
|
|
201
209
|
file_path=args.file_path,
|
|
202
210
|
instructions=args.instructions,
|
|
211
|
+
schema_path=args.schema_path,
|
|
203
212
|
dry_run=args.dry_run,
|
|
204
213
|
verbose=args.verbose,
|
|
205
214
|
)
|