@yeyuan98/opencode-bioresearcher-plugin 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/dist/index.js +4 -1
- package/dist/misc-tools/index.d.ts +3 -0
- package/dist/misc-tools/index.js +3 -0
- package/dist/misc-tools/json-extract.d.ts +13 -0
- package/dist/misc-tools/json-extract.js +394 -0
- package/dist/misc-tools/json-infer.d.ts +13 -0
- package/dist/misc-tools/json-infer.js +199 -0
- package/dist/misc-tools/json-tools.d.ts +33 -0
- package/dist/misc-tools/json-tools.js +187 -0
- package/dist/misc-tools/json-validate.d.ts +13 -0
- package/dist/misc-tools/json-validate.js +228 -0
- package/dist/skills/bioresearcher-core/README.md +210 -0
- package/dist/skills/bioresearcher-core/SKILL.md +128 -0
- package/dist/skills/bioresearcher-core/examples/contexts.json +29 -0
- package/dist/skills/bioresearcher-core/examples/data-exchange-example.md +303 -0
- package/dist/skills/bioresearcher-core/examples/template.md +49 -0
- package/dist/skills/bioresearcher-core/patterns/calculator.md +215 -0
- package/dist/skills/bioresearcher-core/patterns/data-exchange.md +406 -0
- package/dist/skills/bioresearcher-core/patterns/json-tools.md +263 -0
- package/dist/skills/bioresearcher-core/patterns/progress.md +127 -0
- package/dist/skills/bioresearcher-core/patterns/retry.md +110 -0
- package/dist/skills/bioresearcher-core/patterns/shell-commands.md +79 -0
- package/dist/skills/bioresearcher-core/patterns/subagent-waves.md +186 -0
- package/dist/skills/bioresearcher-core/patterns/table-tools.md +260 -0
- package/dist/skills/bioresearcher-core/patterns/user-confirmation.md +187 -0
- package/dist/skills/bioresearcher-core/python/template.md +273 -0
- package/dist/skills/bioresearcher-core/python/template.py +323 -0
- package/dist/skills/long-table-summary/SKILL.md +437 -0
- package/dist/skills/long-table-summary/combine_outputs.py +336 -0
- package/dist/skills/long-table-summary/generate_prompts.py +211 -0
- package/dist/skills/long-table-summary/pyproject.toml +8 -0
- package/dist/skills/pubmed-weekly/SKILL.md +329 -329
- package/dist/skills/pubmed-weekly/pubmed_weekly.py +411 -411
- package/dist/skills/pubmed-weekly/pyproject.toml +8 -8
- package/package.json +7 -2
package/README.md
CHANGED
|
@@ -77,6 +77,18 @@ Download pubmed article data from https://ftp.ncbi.nlm.nih.gov/pubmed/updatefile
|
|
|
77
77
|
|
|
78
78
|
Reference: [PubMed Download Data](https://pubmed.ncbi.nlm.nih.gov/download/).
|
|
79
79
|
|
|
80
|
+
### JSON Tools
|
|
81
|
+
|
|
82
|
+
Extract, validate, and infer JSON schemas from data.
|
|
83
|
+
|
|
84
|
+
**Robust JSON handling for LLM workflows.**
|
|
85
|
+
|
|
86
|
+
```text
|
|
87
|
+
Extract JSON from output.md using jsonExtract tool.
|
|
88
|
+
Validate data.json against schema.json using jsonValidate tool.
|
|
89
|
+
Infer schema from sample.json using jsonInfer tool.
|
|
90
|
+
```
|
|
91
|
+
|
|
80
92
|
## Skills
|
|
81
93
|
|
|
82
94
|
Skills are reusable prompt templates discovered from multiple paths:
|
|
@@ -97,6 +109,8 @@ See [skill-tools/README.md](skill-tools/README.md) for full documentation.
|
|
|
97
109
|
- `demo-skill`: showcase skill tool mechanisms.
|
|
98
110
|
- `python-setup-uv`: setup python runtime in your working directory with uv.
|
|
99
111
|
- `pubmed-weekly`: automated download of pubmed daily update files over the past one week.
|
|
112
|
+
- `long-table-summary`: batch-process large tables using parallel subagents for summarization.
|
|
113
|
+
- `bioresearcher-core`: core patterns and utilities (retry, JSON tools, subagent waves) for skill development.
|
|
100
114
|
|
|
101
115
|
Prompt the following and follow along:
|
|
102
116
|
|
package/dist/index.js
CHANGED
|
@@ -2,7 +2,7 @@ import { createBioResearcherAgent } from "./agents/bioresearcher/index";
|
|
|
2
2
|
import { createBioResearcherDRAgent } from "./agents/bioresearcherDR/index";
|
|
3
3
|
import { createBioResearcherDRWorkerAgent } from "./agents/bioresearcherDR_worker/index";
|
|
4
4
|
import { tableTools } from "./table-tools/index";
|
|
5
|
-
import { blockingTimer, calculator } from "./misc-tools/index";
|
|
5
|
+
import { blockingTimer, calculator, jsonExtract, jsonValidate, jsonInfer } from "./misc-tools/index";
|
|
6
6
|
import { parse_pubmed_articleSet } from "./parser-tools/pubmed";
|
|
7
7
|
import { SkillTool } from "./skill-tools";
|
|
8
8
|
export const BioResearcherPlugin = async () => {
|
|
@@ -18,6 +18,9 @@ export const BioResearcherPlugin = async () => {
|
|
|
18
18
|
...tableTools,
|
|
19
19
|
blockingTimer,
|
|
20
20
|
calculator,
|
|
21
|
+
jsonExtract,
|
|
22
|
+
jsonValidate,
|
|
23
|
+
jsonInfer,
|
|
21
24
|
parse_pubmed_articleSet
|
|
22
25
|
}
|
|
23
26
|
};
|
package/dist/misc-tools/index.js
CHANGED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { ToolContext } from '@opencode-ai/plugin/tool';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
export declare const jsonExtract: {
|
|
4
|
+
description: string;
|
|
5
|
+
args: {
|
|
6
|
+
file_path: z.ZodString;
|
|
7
|
+
return_all: z.ZodDefault<z.ZodBoolean>;
|
|
8
|
+
};
|
|
9
|
+
execute(args: {
|
|
10
|
+
file_path: string;
|
|
11
|
+
return_all: boolean;
|
|
12
|
+
}, context: ToolContext): Promise<string>;
|
|
13
|
+
};
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
import { tool } from '@opencode-ai/plugin/tool';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
const MAX_FILE_SIZE = 200 * 1024 * 1024;
|
|
6
|
+
const BINARY_EXTENSIONS = [
|
|
7
|
+
'.bin', '.exe', '.dll', '.so', '.dylib',
|
|
8
|
+
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico',
|
|
9
|
+
'.pdf', '.zip', '.tar', '.gz', '.rar',
|
|
10
|
+
'.mp3', '.mp4', '.avi', '.mov', '.wav'
|
|
11
|
+
];
|
|
12
|
+
function findMatchingBrace(content, startIdx) {
|
|
13
|
+
let depth = 0;
|
|
14
|
+
let inString = false;
|
|
15
|
+
let escape = false;
|
|
16
|
+
for (let i = startIdx; i < content.length; i++) {
|
|
17
|
+
const char = content[i];
|
|
18
|
+
if (escape) {
|
|
19
|
+
escape = false;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
if (char === '\\') {
|
|
23
|
+
escape = true;
|
|
24
|
+
continue;
|
|
25
|
+
}
|
|
26
|
+
if (char === '"') {
|
|
27
|
+
inString = !inString;
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
if (!inString) {
|
|
31
|
+
if (char === '{')
|
|
32
|
+
depth++;
|
|
33
|
+
if (char === '}') {
|
|
34
|
+
depth--;
|
|
35
|
+
if (depth === 0)
|
|
36
|
+
return i;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return -1;
|
|
41
|
+
}
|
|
42
|
+
function findMatchingBracket(content, startIdx) {
|
|
43
|
+
let depth = 0;
|
|
44
|
+
let inString = false;
|
|
45
|
+
let escape = false;
|
|
46
|
+
for (let i = startIdx; i < content.length; i++) {
|
|
47
|
+
const char = content[i];
|
|
48
|
+
if (escape) {
|
|
49
|
+
escape = false;
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
if (char === '\\') {
|
|
53
|
+
escape = true;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
if (char === '"') {
|
|
57
|
+
inString = !inString;
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
if (!inString) {
|
|
61
|
+
if (char === '[')
|
|
62
|
+
depth++;
|
|
63
|
+
if (char === ']') {
|
|
64
|
+
depth--;
|
|
65
|
+
if (depth === 0)
|
|
66
|
+
return i;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return -1;
|
|
71
|
+
}
|
|
72
|
+
function isPathWithinDirectory(filePath, directory) {
|
|
73
|
+
const normalizedPath = path.normalize(path.resolve(filePath));
|
|
74
|
+
const normalizedDir = path.normalize(path.resolve(directory));
|
|
75
|
+
return normalizedPath.startsWith(normalizedDir + path.sep) ||
|
|
76
|
+
normalizedPath === normalizedDir;
|
|
77
|
+
}
|
|
78
|
+
function extractJsonFromContent(content) {
|
|
79
|
+
const trimmed = content.trim();
|
|
80
|
+
if (trimmed.length === 0) {
|
|
81
|
+
return { success: false, error: 'EMPTY_FILE' };
|
|
82
|
+
}
|
|
83
|
+
const jsonBlockMatch = trimmed.match(/```json\s*([\s\S]*?)\s*```/i);
|
|
84
|
+
if (jsonBlockMatch) {
|
|
85
|
+
try {
|
|
86
|
+
const jsonStr = jsonBlockMatch[1].trim();
|
|
87
|
+
const parsed = JSON.parse(jsonStr);
|
|
88
|
+
return {
|
|
89
|
+
success: true,
|
|
90
|
+
data: parsed,
|
|
91
|
+
method: 'json_code_block',
|
|
92
|
+
dataType: Array.isArray(parsed) ? 'array' : 'object'
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
catch { }
|
|
96
|
+
}
|
|
97
|
+
const codeBlockMatch = trimmed.match(/```\s*([\s\S]*?)\s*```/i);
|
|
98
|
+
if (codeBlockMatch) {
|
|
99
|
+
try {
|
|
100
|
+
const jsonStr = codeBlockMatch[1].trim();
|
|
101
|
+
const parsed = JSON.parse(jsonStr);
|
|
102
|
+
return {
|
|
103
|
+
success: true,
|
|
104
|
+
data: parsed,
|
|
105
|
+
method: 'code_block',
|
|
106
|
+
dataType: Array.isArray(parsed) ? 'array' : 'object'
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
catch { }
|
|
110
|
+
}
|
|
111
|
+
const jsonStart = trimmed.indexOf('{');
|
|
112
|
+
if (jsonStart !== -1) {
|
|
113
|
+
const jsonEnd = findMatchingBrace(trimmed, jsonStart);
|
|
114
|
+
if (jsonEnd !== -1 && jsonEnd > jsonStart) {
|
|
115
|
+
try {
|
|
116
|
+
const jsonStr = trimmed.slice(jsonStart, jsonEnd + 1);
|
|
117
|
+
const parsed = JSON.parse(jsonStr);
|
|
118
|
+
return {
|
|
119
|
+
success: true,
|
|
120
|
+
data: parsed,
|
|
121
|
+
method: 'object',
|
|
122
|
+
dataType: 'object'
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
catch { }
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
const arrStart = trimmed.indexOf('[');
|
|
129
|
+
if (arrStart !== -1) {
|
|
130
|
+
const arrEnd = findMatchingBracket(trimmed, arrStart);
|
|
131
|
+
if (arrEnd !== -1 && arrEnd > arrStart) {
|
|
132
|
+
try {
|
|
133
|
+
const jsonStr = trimmed.slice(arrStart, arrEnd + 1);
|
|
134
|
+
const parsed = JSON.parse(jsonStr);
|
|
135
|
+
return {
|
|
136
|
+
success: true,
|
|
137
|
+
data: parsed,
|
|
138
|
+
method: 'array',
|
|
139
|
+
dataType: 'array'
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
catch { }
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return { success: false, error: 'NO_JSON_FOUND' };
|
|
146
|
+
}
|
|
147
|
+
function extractAllJsonFromContent(content) {
|
|
148
|
+
const trimmed = content.trim();
|
|
149
|
+
if (trimmed.length === 0) {
|
|
150
|
+
return { success: false, error: 'EMPTY_FILE' };
|
|
151
|
+
}
|
|
152
|
+
const extractedObjects = [];
|
|
153
|
+
let method = undefined;
|
|
154
|
+
const jsonBlockRegex = /```json\s*([\s\S]*?)\s*```/gi;
|
|
155
|
+
let match;
|
|
156
|
+
while ((match = jsonBlockRegex.exec(trimmed)) !== null) {
|
|
157
|
+
try {
|
|
158
|
+
const jsonStr = match[1].trim();
|
|
159
|
+
const parsed = JSON.parse(jsonStr);
|
|
160
|
+
extractedObjects.push(parsed);
|
|
161
|
+
method = 'json_code_block';
|
|
162
|
+
}
|
|
163
|
+
catch { }
|
|
164
|
+
}
|
|
165
|
+
if (extractedObjects.length > 0) {
|
|
166
|
+
return {
|
|
167
|
+
success: true,
|
|
168
|
+
data: extractedObjects,
|
|
169
|
+
method: method,
|
|
170
|
+
dataType: 'mixed',
|
|
171
|
+
count: extractedObjects.length
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
const codeBlockRegex = /```\s*([\s\S]*?)\s*```/gi;
|
|
175
|
+
while ((match = codeBlockRegex.exec(trimmed)) !== null) {
|
|
176
|
+
try {
|
|
177
|
+
const jsonStr = match[1].trim();
|
|
178
|
+
const parsed = JSON.parse(jsonStr);
|
|
179
|
+
extractedObjects.push(parsed);
|
|
180
|
+
method = 'code_block';
|
|
181
|
+
}
|
|
182
|
+
catch { }
|
|
183
|
+
}
|
|
184
|
+
if (extractedObjects.length > 0) {
|
|
185
|
+
return {
|
|
186
|
+
success: true,
|
|
187
|
+
data: extractedObjects,
|
|
188
|
+
method: method,
|
|
189
|
+
dataType: 'mixed',
|
|
190
|
+
count: extractedObjects.length
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
const objects = [];
|
|
194
|
+
const arrays = [];
|
|
195
|
+
let pos = 0;
|
|
196
|
+
while (pos < trimmed.length) {
|
|
197
|
+
const objStart = trimmed.indexOf('{', pos);
|
|
198
|
+
const arrStart = trimmed.indexOf('[', pos);
|
|
199
|
+
if (objStart === -1 && arrStart === -1)
|
|
200
|
+
break;
|
|
201
|
+
let nextStart = -1;
|
|
202
|
+
let nextEnd = -1;
|
|
203
|
+
let type = null;
|
|
204
|
+
if (objStart !== -1 && (arrStart === -1 || objStart < arrStart)) {
|
|
205
|
+
nextStart = objStart;
|
|
206
|
+
const objEnd = findMatchingBrace(trimmed, objStart);
|
|
207
|
+
if (objEnd !== -1 && objEnd > objStart) {
|
|
208
|
+
nextEnd = objEnd;
|
|
209
|
+
type = 'object';
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
else if (arrStart !== -1) {
|
|
213
|
+
nextStart = arrStart;
|
|
214
|
+
const arrEnd = findMatchingBracket(trimmed, arrStart);
|
|
215
|
+
if (arrEnd !== -1 && arrEnd > arrStart) {
|
|
216
|
+
nextEnd = arrEnd;
|
|
217
|
+
type = 'array';
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
if (nextStart !== -1 && nextEnd !== -1) {
|
|
221
|
+
const jsonStr = trimmed.slice(nextStart, nextEnd + 1);
|
|
222
|
+
try {
|
|
223
|
+
const parsed = JSON.parse(jsonStr);
|
|
224
|
+
extractedObjects.push(parsed);
|
|
225
|
+
if (type === 'object')
|
|
226
|
+
objects.push(jsonStr);
|
|
227
|
+
else
|
|
228
|
+
arrays.push(jsonStr);
|
|
229
|
+
pos = nextEnd + 1;
|
|
230
|
+
}
|
|
231
|
+
catch { }
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
if (extractedObjects.length === 0) {
|
|
238
|
+
return { success: false, error: 'NO_JSON_FOUND' };
|
|
239
|
+
}
|
|
240
|
+
const finalMethod = method || (objects.length > 0 && arrays.length > 0 ? 'object' : objects.length > 0 ? 'object' : 'array');
|
|
241
|
+
return {
|
|
242
|
+
success: true,
|
|
243
|
+
data: extractedObjects,
|
|
244
|
+
method: finalMethod,
|
|
245
|
+
dataType: objects.length > 0 && arrays.length > 0 ? 'mixed' : objects.length > 0 ? 'object' : 'array',
|
|
246
|
+
count: extractedObjects.length
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
export const jsonExtract = tool({
|
|
250
|
+
description: "Extract and parse JSON from a file. Handles markdown code blocks (```json) and raw JSON with string-aware extraction. Returns first JSON found with metadata.",
|
|
251
|
+
args: {
|
|
252
|
+
file_path: z.string().describe("Path to file containing JSON (absolute or relative to project directory)"),
|
|
253
|
+
return_all: z.boolean().default(false).describe("If true, returns all JSON objects found as array; if false (default), returns first only")
|
|
254
|
+
},
|
|
255
|
+
execute: async (args, context) => {
|
|
256
|
+
try {
|
|
257
|
+
const resolvedPath = path.isAbsolute(args.file_path)
|
|
258
|
+
? args.file_path
|
|
259
|
+
: path.join(context.directory, args.file_path);
|
|
260
|
+
if (!isPathWithinDirectory(resolvedPath, context.directory)) {
|
|
261
|
+
return JSON.stringify({
|
|
262
|
+
success: false,
|
|
263
|
+
error: {
|
|
264
|
+
code: 'PATH_TRAVERSAL',
|
|
265
|
+
message: 'Path must be within project directory',
|
|
266
|
+
details: `Attempted path: ${args.file_path}`,
|
|
267
|
+
hints: ['Use a relative path within the project directory', 'Ensure the path does not contain ..']
|
|
268
|
+
}
|
|
269
|
+
}, null, 2);
|
|
270
|
+
}
|
|
271
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
272
|
+
return JSON.stringify({
|
|
273
|
+
success: false,
|
|
274
|
+
error: {
|
|
275
|
+
code: 'FILE_NOT_FOUND',
|
|
276
|
+
message: `File not found: ${args.file_path}`,
|
|
277
|
+
details: `Resolved path: ${resolvedPath}`,
|
|
278
|
+
hints: ['Check the file path for typos', 'Use a relative path from the project root']
|
|
279
|
+
}
|
|
280
|
+
}, null, 2);
|
|
281
|
+
}
|
|
282
|
+
const stats = fs.statSync(resolvedPath);
|
|
283
|
+
if (stats.isDirectory()) {
|
|
284
|
+
return JSON.stringify({
|
|
285
|
+
success: false,
|
|
286
|
+
error: {
|
|
287
|
+
code: 'FILE_NOT_FOUND',
|
|
288
|
+
message: 'Path is a directory, not a file',
|
|
289
|
+
details: `Path: ${args.file_path}`,
|
|
290
|
+
hints: ['Provide a file path, not a directory']
|
|
291
|
+
}
|
|
292
|
+
}, null, 2);
|
|
293
|
+
}
|
|
294
|
+
if (stats.size > MAX_FILE_SIZE) {
|
|
295
|
+
const sizeMB = (stats.size / 1024 / 1024).toFixed(2);
|
|
296
|
+
return JSON.stringify({
|
|
297
|
+
success: false,
|
|
298
|
+
error: {
|
|
299
|
+
code: 'FILE_TOO_LARGE',
|
|
300
|
+
message: `File exceeds maximum size of 200MB`,
|
|
301
|
+
details: `File size: ${sizeMB}MB`,
|
|
302
|
+
hints: ['Use a smaller file', 'Consider splitting the file', 'Increase the MAX_FILE_SIZE constant']
|
|
303
|
+
}
|
|
304
|
+
}, null, 2);
|
|
305
|
+
}
|
|
306
|
+
const ext = path.extname(resolvedPath).toLowerCase();
|
|
307
|
+
if (BINARY_EXTENSIONS.includes(ext)) {
|
|
308
|
+
return JSON.stringify({
|
|
309
|
+
success: false,
|
|
310
|
+
error: {
|
|
311
|
+
code: 'BINARY_FILE',
|
|
312
|
+
message: `Binary file format not supported: ${ext}`,
|
|
313
|
+
details: `File extension: ${ext}`,
|
|
314
|
+
hints: ['Use a text-based file (JSON, Markdown, TXT)', 'Provide file contents as plain text']
|
|
315
|
+
}
|
|
316
|
+
}, null, 2);
|
|
317
|
+
}
|
|
318
|
+
const content = fs.readFileSync(resolvedPath, 'utf-8');
|
|
319
|
+
const result = args.return_all
|
|
320
|
+
? extractAllJsonFromContent(content)
|
|
321
|
+
: extractJsonFromContent(content);
|
|
322
|
+
if (result.success) {
|
|
323
|
+
return JSON.stringify({
|
|
324
|
+
success: true,
|
|
325
|
+
data: result.data,
|
|
326
|
+
metadata: {
|
|
327
|
+
method: result.method,
|
|
328
|
+
dataType: result.dataType,
|
|
329
|
+
fileSize: stats.size,
|
|
330
|
+
...(args.return_all && result.count !== undefined ? { count: result.count } : {})
|
|
331
|
+
}
|
|
332
|
+
}, null, 2);
|
|
333
|
+
}
|
|
334
|
+
else {
|
|
335
|
+
return JSON.stringify({
|
|
336
|
+
success: false,
|
|
337
|
+
error: {
|
|
338
|
+
code: result.error,
|
|
339
|
+
message: getErrorMessage(result.error),
|
|
340
|
+
details: `File: ${args.file_path}`,
|
|
341
|
+
hints: getErrorHints(result.error)
|
|
342
|
+
}
|
|
343
|
+
}, null, 2);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
catch (error) {
|
|
347
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
348
|
+
const code = message.includes('encoding') || message.includes('EILSEQ')
|
|
349
|
+
? 'ENCODING_ERROR'
|
|
350
|
+
: 'NO_JSON_FOUND';
|
|
351
|
+
return JSON.stringify({
|
|
352
|
+
success: false,
|
|
353
|
+
error: {
|
|
354
|
+
code,
|
|
355
|
+
message: code === 'ENCODING_ERROR'
|
|
356
|
+
? 'File encoding error - could not read as UTF-8'
|
|
357
|
+
: 'Unexpected error while extracting JSON',
|
|
358
|
+
details: message,
|
|
359
|
+
hints: code === 'ENCODING_ERROR'
|
|
360
|
+
? ['Ensure file is UTF-8 encoded', 'Try converting file encoding']
|
|
361
|
+
: ['Check file contents', 'Ensure file is not corrupted']
|
|
362
|
+
}
|
|
363
|
+
}, null, 2);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
});
|
|
367
|
+
function getErrorMessage(code) {
|
|
368
|
+
const messages = {
|
|
369
|
+
'EMPTY_FILE': 'File is empty',
|
|
370
|
+
'NO_JSON_FOUND': 'No valid JSON found in file',
|
|
371
|
+
'FILE_NOT_FOUND': 'File not found',
|
|
372
|
+
'FILE_TOO_LARGE': 'File exceeds maximum size',
|
|
373
|
+
'BINARY_FILE': 'Binary file format not supported',
|
|
374
|
+
'PATH_TRAVERSAL': 'Path must be within project directory',
|
|
375
|
+
'ENCODING_ERROR': 'File encoding error - could not read as UTF-8'
|
|
376
|
+
};
|
|
377
|
+
return messages[code] || 'Extraction failed';
|
|
378
|
+
}
|
|
379
|
+
function getErrorHints(code) {
|
|
380
|
+
const hints = {
|
|
381
|
+
'EMPTY_FILE': ['Provide a file with content'],
|
|
382
|
+
'NO_JSON_FOUND': [
|
|
383
|
+
'Ensure file contains valid JSON',
|
|
384
|
+
'JSON can be in markdown code blocks (```json) or raw',
|
|
385
|
+
'Both objects {...} and arrays [...] are supported'
|
|
386
|
+
],
|
|
387
|
+
'FILE_NOT_FOUND': ['Check the file path for typos', 'Use a relative path from the project root'],
|
|
388
|
+
'FILE_TOO_LARGE': ['Use a smaller file', 'Consider splitting the file'],
|
|
389
|
+
'BINARY_FILE': ['Use a text-based file (JSON, Markdown, TXT)'],
|
|
390
|
+
'PATH_TRAVERSAL': ['Use a relative path within the project directory', 'Ensure the path does not contain ..'],
|
|
391
|
+
'ENCODING_ERROR': ['Ensure file is UTF-8 encoded', 'Try converting file encoding']
|
|
392
|
+
};
|
|
393
|
+
return hints[code] || ['Check file contents'];
|
|
394
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { ToolContext } from '@opencode-ai/plugin/tool';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
export declare const jsonInfer: {
|
|
4
|
+
description: string;
|
|
5
|
+
args: {
|
|
6
|
+
data: z.ZodString;
|
|
7
|
+
strict: z.ZodDefault<z.ZodBoolean>;
|
|
8
|
+
};
|
|
9
|
+
execute(args: {
|
|
10
|
+
data: string;
|
|
11
|
+
strict: boolean;
|
|
12
|
+
}, context: ToolContext): Promise<string>;
|
|
13
|
+
};
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
import { tool } from '@opencode-ai/plugin/tool';
|
|
2
|
+
import { z, toJSONSchema } from 'zod';
|
|
3
|
+
function isISODateString(value) {
|
|
4
|
+
return /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?$/.test(value);
|
|
5
|
+
}
|
|
6
|
+
function collectArrayElementTypes(data, strict) {
|
|
7
|
+
const warnings = [];
|
|
8
|
+
if (data.length === 0) {
|
|
9
|
+
warnings.push({
|
|
10
|
+
code: 'EMPTY_ARRAY',
|
|
11
|
+
message: 'Empty array - cannot infer element type',
|
|
12
|
+
details: 'Using "any" as element type since no data available',
|
|
13
|
+
hints: ['Provide at least one element in the array for type inference', 'Use z.any() if empty arrays are expected']
|
|
14
|
+
});
|
|
15
|
+
return { schemas: [z.any()], types: new Set(['any']), warnings };
|
|
16
|
+
}
|
|
17
|
+
const typeMap = new Map();
|
|
18
|
+
for (let i = 0; i < data.length; i++) {
|
|
19
|
+
const elementSchema = inferZodSchema(data[i], strict);
|
|
20
|
+
const typeKey = JSON.stringify(elementSchema._zod.def);
|
|
21
|
+
if (!typeMap.has(typeKey)) {
|
|
22
|
+
typeMap.set(typeKey, elementSchema);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
const schemas = Array.from(typeMap.values());
|
|
26
|
+
const types = new Set(schemas.map(s => s._zod.def.type));
|
|
27
|
+
// Limit union to 2 types per plan specification
|
|
28
|
+
if (types.size > 2) {
|
|
29
|
+
const unionTypes = Array.from(types).join(', ');
|
|
30
|
+
warnings.push({
|
|
31
|
+
code: 'MIXED_TYPE_ARRAY',
|
|
32
|
+
message: 'Too many types in array for union',
|
|
33
|
+
details: `Found ${types.size} types (${unionTypes}), using "any" as element type`,
|
|
34
|
+
hints: ['Limit array elements to 1 or 2 types for better type inference', 'Consider using explicit schemas for complex mixed-type arrays']
|
|
35
|
+
});
|
|
36
|
+
return { schemas: [z.array(z.any())], types: new Set(['any']), warnings };
|
|
37
|
+
}
|
|
38
|
+
if (types.size === 2) {
|
|
39
|
+
const unionTypes = Array.from(types).join(', ');
|
|
40
|
+
warnings.push({
|
|
41
|
+
code: 'MIXED_TYPE_ARRAY',
|
|
42
|
+
message: `Array contains 2 different types: ${unionTypes}`,
|
|
43
|
+
details: `Types found: ${unionTypes}. Using union type.`,
|
|
44
|
+
hints: ['For stronger type safety, consider keeping array elements homogeneous', 'Union types are limited to 2 variants for inference']
|
|
45
|
+
});
|
|
46
|
+
const unionSchema = z.union(schemas);
|
|
47
|
+
return { schemas: [z.array(unionSchema)], types, warnings };
|
|
48
|
+
}
|
|
49
|
+
return { schemas: [z.array(schemas[0])], types, warnings };
|
|
50
|
+
}
|
|
51
|
+
function inferZodSchema(data, strict) {
|
|
52
|
+
if (data === null) {
|
|
53
|
+
return z.null();
|
|
54
|
+
}
|
|
55
|
+
if (Array.isArray(data)) {
|
|
56
|
+
const { schemas, warnings } = collectArrayElementTypes(data, strict);
|
|
57
|
+
return schemas[0];
|
|
58
|
+
}
|
|
59
|
+
if (typeof data === 'object' && data !== null) {
|
|
60
|
+
const shape = {};
|
|
61
|
+
for (const [key, value] of Object.entries(data)) {
|
|
62
|
+
const fieldSchema = inferZodSchema(value, strict);
|
|
63
|
+
shape[key] = strict ? fieldSchema : fieldSchema.optional();
|
|
64
|
+
}
|
|
65
|
+
return z.object(shape);
|
|
66
|
+
}
|
|
67
|
+
if (typeof data === 'string') {
|
|
68
|
+
if (isISODateString(data)) {
|
|
69
|
+
return z.string().datetime();
|
|
70
|
+
}
|
|
71
|
+
return z.string();
|
|
72
|
+
}
|
|
73
|
+
if (typeof data === 'number') {
|
|
74
|
+
return Number.isInteger(data) ? z.number().int() : z.number();
|
|
75
|
+
}
|
|
76
|
+
if (typeof data === 'boolean') {
|
|
77
|
+
return z.boolean();
|
|
78
|
+
}
|
|
79
|
+
return z.any();
|
|
80
|
+
}
|
|
81
|
+
function inferZodSchemaWithWarnings(data, strict) {
|
|
82
|
+
const warnings = [];
|
|
83
|
+
if (Array.isArray(data)) {
|
|
84
|
+
const result = collectArrayElementTypes(data, strict);
|
|
85
|
+
return {
|
|
86
|
+
schema: result.schemas[0],
|
|
87
|
+
warnings: result.warnings
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
if (typeof data === 'object' && data !== null) {
|
|
91
|
+
const capturedKeys = new Set();
|
|
92
|
+
for (const key of Object.keys(data)) {
|
|
93
|
+
capturedKeys.add(key);
|
|
94
|
+
}
|
|
95
|
+
if (capturedKeys.size > 10 && !strict) {
|
|
96
|
+
warnings.push({
|
|
97
|
+
code: 'PARTIAL_OBJECT_SCHEMA',
|
|
98
|
+
message: `Object has many properties (${capturedKeys.size})`,
|
|
99
|
+
details: `Only fields from first object instance were analyzed for type inference`,
|
|
100
|
+
hints: ['For complex objects with varying keys, consider using explicit schemas or splitting into smaller, more homogeneous objects']
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
const schema = z.object(Object.fromEntries(Object.entries(data).map(([key, value]) => [key, strict ? inferZodSchema(value, true) : inferZodSchema(value, false).optional()])));
|
|
104
|
+
return { schema, warnings };
|
|
105
|
+
}
|
|
106
|
+
if (typeof data === 'string') {
|
|
107
|
+
if (isISODateString(data)) {
|
|
108
|
+
return {
|
|
109
|
+
schema: z.string().datetime(),
|
|
110
|
+
warnings: []
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
return {
|
|
114
|
+
schema: z.string(),
|
|
115
|
+
warnings: []
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
if (typeof data === 'number') {
|
|
119
|
+
return {
|
|
120
|
+
schema: Number.isInteger(data) ? z.number().int() : z.number(),
|
|
121
|
+
warnings: []
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
if (typeof data === 'boolean') {
|
|
125
|
+
return {
|
|
126
|
+
schema: z.boolean(),
|
|
127
|
+
warnings: []
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
warnings.push({
|
|
131
|
+
code: 'UNKNOWN_TYPE',
|
|
132
|
+
message: 'Unknown value type',
|
|
133
|
+
details: `Value type: ${typeof data}`,
|
|
134
|
+
hints: ['Using "any" type for unknown values - consider explicitly defining expected types']
|
|
135
|
+
});
|
|
136
|
+
return {
|
|
137
|
+
schema: z.any(),
|
|
138
|
+
warnings
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
export const jsonInfer = tool({
|
|
142
|
+
description: "Infer a JSON Schema from example JSON data. Reports warnings for mixed-type arrays and partial object inference. Generates valid JSON Schema Draft-2020-12.",
|
|
143
|
+
args: {
|
|
144
|
+
data: z.string().describe("Example JSON data string"),
|
|
145
|
+
strict: z.boolean().default(false).describe("If true, all fields are required; if false (default), fields are optional")
|
|
146
|
+
},
|
|
147
|
+
execute: async (args, context) => {
|
|
148
|
+
let parsedData;
|
|
149
|
+
try {
|
|
150
|
+
parsedData = JSON.parse(args.data);
|
|
151
|
+
}
|
|
152
|
+
catch (e) {
|
|
153
|
+
return JSON.stringify({
|
|
154
|
+
success: false,
|
|
155
|
+
error: {
|
|
156
|
+
code: 'INVALID_JSON_DATA',
|
|
157
|
+
message: 'Data is not valid JSON',
|
|
158
|
+
details: e instanceof Error ? e.message : 'Parse error',
|
|
159
|
+
hints: [
|
|
160
|
+
'Ensure data is valid JSON syntax',
|
|
161
|
+
'Example: {"name": "value"} or [1, 2, 3]'
|
|
162
|
+
]
|
|
163
|
+
}
|
|
164
|
+
}, null, 2);
|
|
165
|
+
}
|
|
166
|
+
try {
|
|
167
|
+
const { schema, warnings } = inferZodSchemaWithWarnings(parsedData, args.strict);
|
|
168
|
+
const jsonSchema = toJSONSchema(schema);
|
|
169
|
+
const response = {
|
|
170
|
+
success: true,
|
|
171
|
+
data: jsonSchema,
|
|
172
|
+
metadata: {
|
|
173
|
+
inferredType: Array.isArray(parsedData) ? 'array' :
|
|
174
|
+
typeof parsedData === 'object' ? 'object' : typeof parsedData,
|
|
175
|
+
strictMode: args.strict
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
if (warnings.length > 0) {
|
|
179
|
+
response.warnings = warnings;
|
|
180
|
+
}
|
|
181
|
+
return JSON.stringify(response, null, 2);
|
|
182
|
+
}
|
|
183
|
+
catch (e) {
|
|
184
|
+
return JSON.stringify({
|
|
185
|
+
success: false,
|
|
186
|
+
error: {
|
|
187
|
+
code: 'INFERENCE_FAILED',
|
|
188
|
+
message: 'Failed to infer schema from data',
|
|
189
|
+
details: e instanceof Error ? e.message : 'Inference error',
|
|
190
|
+
hints: [
|
|
191
|
+
'Try with simpler data',
|
|
192
|
+
'Ensure data structure is not circular',
|
|
193
|
+
'Check for complex nested structures'
|
|
194
|
+
]
|
|
195
|
+
}
|
|
196
|
+
}, null, 2);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
});
|