@yeyuan98/opencode-bioresearcher-plugin 1.3.1-alpha.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +14 -0
  2. package/dist/index.js +4 -1
  3. package/dist/misc-tools/index.d.ts +3 -0
  4. package/dist/misc-tools/index.js +3 -0
  5. package/dist/misc-tools/json-extract.d.ts +13 -0
  6. package/dist/misc-tools/json-extract.js +394 -0
  7. package/dist/misc-tools/json-infer.d.ts +13 -0
  8. package/dist/misc-tools/json-infer.js +199 -0
  9. package/dist/misc-tools/json-tools.d.ts +33 -0
  10. package/dist/misc-tools/json-tools.js +187 -0
  11. package/dist/misc-tools/json-validate.d.ts +13 -0
  12. package/dist/misc-tools/json-validate.js +228 -0
  13. package/dist/skills/bioresearcher-core/README.md +210 -0
  14. package/dist/skills/bioresearcher-core/SKILL.md +128 -0
  15. package/dist/skills/bioresearcher-core/examples/contexts.json +29 -0
  16. package/dist/skills/bioresearcher-core/examples/data-exchange-example.md +303 -0
  17. package/dist/skills/bioresearcher-core/examples/template.md +49 -0
  18. package/dist/skills/bioresearcher-core/patterns/calculator.md +215 -0
  19. package/dist/skills/bioresearcher-core/patterns/data-exchange.md +406 -0
  20. package/dist/skills/bioresearcher-core/patterns/json-tools.md +263 -0
  21. package/dist/skills/bioresearcher-core/patterns/progress.md +127 -0
  22. package/dist/skills/bioresearcher-core/patterns/retry.md +110 -0
  23. package/dist/skills/bioresearcher-core/patterns/shell-commands.md +79 -0
  24. package/dist/skills/bioresearcher-core/patterns/subagent-waves.md +186 -0
  25. package/dist/skills/bioresearcher-core/patterns/table-tools.md +260 -0
  26. package/dist/skills/bioresearcher-core/patterns/user-confirmation.md +187 -0
  27. package/dist/skills/bioresearcher-core/python/template.md +273 -0
  28. package/dist/skills/bioresearcher-core/python/template.py +323 -0
  29. package/dist/skills/long-table-summary/SKILL.md +437 -0
  30. package/dist/skills/long-table-summary/combine_outputs.py +336 -0
  31. package/dist/skills/long-table-summary/generate_prompts.py +211 -0
  32. package/dist/skills/long-table-summary/pyproject.toml +8 -0
  33. package/dist/skills/pubmed-weekly/SKILL.md +329 -329
  34. package/dist/skills/pubmed-weekly/pubmed_weekly.py +411 -411
  35. package/dist/skills/pubmed-weekly/pyproject.toml +8 -8
  36. package/package.json +7 -2
package/README.md CHANGED
@@ -77,6 +77,18 @@ Download pubmed article data from https://ftp.ncbi.nlm.nih.gov/pubmed/updatefile
77
77
 
78
78
  Reference: [PubMed Download Data](https://pubmed.ncbi.nlm.nih.gov/download/).
79
79
 
80
+ ### JSON Tools
81
+
82
+ Extract, validate, and infer JSON schemas from data.
83
+
84
+ **Robust JSON handling for LLM workflows.**
85
+
86
+ ```text
87
+ Extract JSON from output.md using jsonExtract tool.
88
+ Validate data.json against schema.json using jsonValidate tool.
89
+ Infer schema from sample.json using jsonInfer tool.
90
+ ```
91
+
80
92
  ## Skills
81
93
 
82
94
  Skills are reusable prompt templates discovered from multiple paths:
@@ -97,6 +109,8 @@ See [skill-tools/README.md](skill-tools/README.md) for full documentation.
97
109
  - `demo-skill`: showcase skill tool mechanisms.
98
110
  - `python-setup-uv`: setup python runtime in your working directory with uv.
99
111
  - `pubmed-weekly`: automated download of pubmed daily update files over the past one week.
112
+ - `long-table-summary`: batch-process large tables using parallel subagents for summarization.
113
+ - `bioresearcher-core`: core patterns and utilities (retry, JSON tools, subagent waves) for skill development.
100
114
 
101
115
  Prompt the following and follow along:
102
116
 
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@ import { createBioResearcherAgent } from "./agents/bioresearcher/index";
2
2
  import { createBioResearcherDRAgent } from "./agents/bioresearcherDR/index";
3
3
  import { createBioResearcherDRWorkerAgent } from "./agents/bioresearcherDR_worker/index";
4
4
  import { tableTools } from "./table-tools/index";
5
- import { blockingTimer, calculator } from "./misc-tools/index";
5
+ import { blockingTimer, calculator, jsonExtract, jsonValidate, jsonInfer } from "./misc-tools/index";
6
6
  import { parse_pubmed_articleSet } from "./parser-tools/pubmed";
7
7
  import { SkillTool } from "./skill-tools";
8
8
  export const BioResearcherPlugin = async () => {
@@ -18,6 +18,9 @@ export const BioResearcherPlugin = async () => {
18
18
  ...tableTools,
19
19
  blockingTimer,
20
20
  calculator,
21
+ jsonExtract,
22
+ jsonValidate,
23
+ jsonInfer,
21
24
  parse_pubmed_articleSet
22
25
  }
23
26
  };
@@ -1,2 +1,5 @@
1
1
  export { blockingTimer } from './timer';
2
2
  export { calculator } from './calculator';
3
+ export { jsonExtract } from './json-extract';
4
+ export { jsonValidate } from './json-validate';
5
+ export { jsonInfer } from './json-infer';
@@ -1,2 +1,5 @@
1
1
  export { blockingTimer } from './timer';
2
2
  export { calculator } from './calculator';
3
+ export { jsonExtract } from './json-extract';
4
+ export { jsonValidate } from './json-validate';
5
+ export { jsonInfer } from './json-infer';
@@ -0,0 +1,13 @@
1
+ import { ToolContext } from '@opencode-ai/plugin/tool';
2
+ import { z } from 'zod';
3
+ export declare const jsonExtract: {
4
+ description: string;
5
+ args: {
6
+ file_path: z.ZodString;
7
+ return_all: z.ZodDefault<z.ZodBoolean>;
8
+ };
9
+ execute(args: {
10
+ file_path: string;
11
+ return_all: boolean;
12
+ }, context: ToolContext): Promise<string>;
13
+ };
@@ -0,0 +1,394 @@
1
+ import { tool } from '@opencode-ai/plugin/tool';
2
+ import { z } from 'zod';
3
+ import * as fs from 'fs';
4
+ import * as path from 'path';
5
+ const MAX_FILE_SIZE = 200 * 1024 * 1024;
6
+ const BINARY_EXTENSIONS = [
7
+ '.bin', '.exe', '.dll', '.so', '.dylib',
8
+ '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico',
9
+ '.pdf', '.zip', '.tar', '.gz', '.rar',
10
+ '.mp3', '.mp4', '.avi', '.mov', '.wav'
11
+ ];
12
+ function findMatchingBrace(content, startIdx) {
13
+ let depth = 0;
14
+ let inString = false;
15
+ let escape = false;
16
+ for (let i = startIdx; i < content.length; i++) {
17
+ const char = content[i];
18
+ if (escape) {
19
+ escape = false;
20
+ continue;
21
+ }
22
+ if (char === '\\') {
23
+ escape = true;
24
+ continue;
25
+ }
26
+ if (char === '"') {
27
+ inString = !inString;
28
+ continue;
29
+ }
30
+ if (!inString) {
31
+ if (char === '{')
32
+ depth++;
33
+ if (char === '}') {
34
+ depth--;
35
+ if (depth === 0)
36
+ return i;
37
+ }
38
+ }
39
+ }
40
+ return -1;
41
+ }
42
+ function findMatchingBracket(content, startIdx) {
43
+ let depth = 0;
44
+ let inString = false;
45
+ let escape = false;
46
+ for (let i = startIdx; i < content.length; i++) {
47
+ const char = content[i];
48
+ if (escape) {
49
+ escape = false;
50
+ continue;
51
+ }
52
+ if (char === '\\') {
53
+ escape = true;
54
+ continue;
55
+ }
56
+ if (char === '"') {
57
+ inString = !inString;
58
+ continue;
59
+ }
60
+ if (!inString) {
61
+ if (char === '[')
62
+ depth++;
63
+ if (char === ']') {
64
+ depth--;
65
+ if (depth === 0)
66
+ return i;
67
+ }
68
+ }
69
+ }
70
+ return -1;
71
+ }
72
+ function isPathWithinDirectory(filePath, directory) {
73
+ const normalizedPath = path.normalize(path.resolve(filePath));
74
+ const normalizedDir = path.normalize(path.resolve(directory));
75
+ return normalizedPath.startsWith(normalizedDir + path.sep) ||
76
+ normalizedPath === normalizedDir;
77
+ }
78
+ function extractJsonFromContent(content) {
79
+ const trimmed = content.trim();
80
+ if (trimmed.length === 0) {
81
+ return { success: false, error: 'EMPTY_FILE' };
82
+ }
83
+ const jsonBlockMatch = trimmed.match(/```json\s*([\s\S]*?)\s*```/i);
84
+ if (jsonBlockMatch) {
85
+ try {
86
+ const jsonStr = jsonBlockMatch[1].trim();
87
+ const parsed = JSON.parse(jsonStr);
88
+ return {
89
+ success: true,
90
+ data: parsed,
91
+ method: 'json_code_block',
92
+ dataType: Array.isArray(parsed) ? 'array' : 'object'
93
+ };
94
+ }
95
+ catch { }
96
+ }
97
+ const codeBlockMatch = trimmed.match(/```\s*([\s\S]*?)\s*```/i);
98
+ if (codeBlockMatch) {
99
+ try {
100
+ const jsonStr = codeBlockMatch[1].trim();
101
+ const parsed = JSON.parse(jsonStr);
102
+ return {
103
+ success: true,
104
+ data: parsed,
105
+ method: 'code_block',
106
+ dataType: Array.isArray(parsed) ? 'array' : 'object'
107
+ };
108
+ }
109
+ catch { }
110
+ }
111
+ const jsonStart = trimmed.indexOf('{');
112
+ if (jsonStart !== -1) {
113
+ const jsonEnd = findMatchingBrace(trimmed, jsonStart);
114
+ if (jsonEnd !== -1 && jsonEnd > jsonStart) {
115
+ try {
116
+ const jsonStr = trimmed.slice(jsonStart, jsonEnd + 1);
117
+ const parsed = JSON.parse(jsonStr);
118
+ return {
119
+ success: true,
120
+ data: parsed,
121
+ method: 'object',
122
+ dataType: 'object'
123
+ };
124
+ }
125
+ catch { }
126
+ }
127
+ }
128
+ const arrStart = trimmed.indexOf('[');
129
+ if (arrStart !== -1) {
130
+ const arrEnd = findMatchingBracket(trimmed, arrStart);
131
+ if (arrEnd !== -1 && arrEnd > arrStart) {
132
+ try {
133
+ const jsonStr = trimmed.slice(arrStart, arrEnd + 1);
134
+ const parsed = JSON.parse(jsonStr);
135
+ return {
136
+ success: true,
137
+ data: parsed,
138
+ method: 'array',
139
+ dataType: 'array'
140
+ };
141
+ }
142
+ catch { }
143
+ }
144
+ }
145
+ return { success: false, error: 'NO_JSON_FOUND' };
146
+ }
147
+ function extractAllJsonFromContent(content) {
148
+ const trimmed = content.trim();
149
+ if (trimmed.length === 0) {
150
+ return { success: false, error: 'EMPTY_FILE' };
151
+ }
152
+ const extractedObjects = [];
153
+ let method = undefined;
154
+ const jsonBlockRegex = /```json\s*([\s\S]*?)\s*```/gi;
155
+ let match;
156
+ while ((match = jsonBlockRegex.exec(trimmed)) !== null) {
157
+ try {
158
+ const jsonStr = match[1].trim();
159
+ const parsed = JSON.parse(jsonStr);
160
+ extractedObjects.push(parsed);
161
+ method = 'json_code_block';
162
+ }
163
+ catch { }
164
+ }
165
+ if (extractedObjects.length > 0) {
166
+ return {
167
+ success: true,
168
+ data: extractedObjects,
169
+ method: method,
170
+ dataType: 'mixed',
171
+ count: extractedObjects.length
172
+ };
173
+ }
174
+ const codeBlockRegex = /```\s*([\s\S]*?)\s*```/gi;
175
+ while ((match = codeBlockRegex.exec(trimmed)) !== null) {
176
+ try {
177
+ const jsonStr = match[1].trim();
178
+ const parsed = JSON.parse(jsonStr);
179
+ extractedObjects.push(parsed);
180
+ method = 'code_block';
181
+ }
182
+ catch { }
183
+ }
184
+ if (extractedObjects.length > 0) {
185
+ return {
186
+ success: true,
187
+ data: extractedObjects,
188
+ method: method,
189
+ dataType: 'mixed',
190
+ count: extractedObjects.length
191
+ };
192
+ }
193
+ const objects = [];
194
+ const arrays = [];
195
+ let pos = 0;
196
+ while (pos < trimmed.length) {
197
+ const objStart = trimmed.indexOf('{', pos);
198
+ const arrStart = trimmed.indexOf('[', pos);
199
+ if (objStart === -1 && arrStart === -1)
200
+ break;
201
+ let nextStart = -1;
202
+ let nextEnd = -1;
203
+ let type = null;
204
+ if (objStart !== -1 && (arrStart === -1 || objStart < arrStart)) {
205
+ nextStart = objStart;
206
+ const objEnd = findMatchingBrace(trimmed, objStart);
207
+ if (objEnd !== -1 && objEnd > objStart) {
208
+ nextEnd = objEnd;
209
+ type = 'object';
210
+ }
211
+ }
212
+ else if (arrStart !== -1) {
213
+ nextStart = arrStart;
214
+ const arrEnd = findMatchingBracket(trimmed, arrStart);
215
+ if (arrEnd !== -1 && arrEnd > arrStart) {
216
+ nextEnd = arrEnd;
217
+ type = 'array';
218
+ }
219
+ }
220
+ if (nextStart !== -1 && nextEnd !== -1) {
221
+ const jsonStr = trimmed.slice(nextStart, nextEnd + 1);
222
+ try {
223
+ const parsed = JSON.parse(jsonStr);
224
+ extractedObjects.push(parsed);
225
+ if (type === 'object')
226
+ objects.push(jsonStr);
227
+ else
228
+ arrays.push(jsonStr);
229
+ pos = nextEnd + 1;
230
+ }
231
+ catch { }
232
+ }
233
+ else {
234
+ break;
235
+ }
236
+ }
237
+ if (extractedObjects.length === 0) {
238
+ return { success: false, error: 'NO_JSON_FOUND' };
239
+ }
240
+ const finalMethod = method || (objects.length > 0 && arrays.length > 0 ? 'object' : objects.length > 0 ? 'object' : 'array');
241
+ return {
242
+ success: true,
243
+ data: extractedObjects,
244
+ method: finalMethod,
245
+ dataType: objects.length > 0 && arrays.length > 0 ? 'mixed' : objects.length > 0 ? 'object' : 'array',
246
+ count: extractedObjects.length
247
+ };
248
+ }
249
+ export const jsonExtract = tool({
250
+ description: "Extract and parse JSON from a file. Handles markdown code blocks (```json) and raw JSON with string-aware extraction. Returns first JSON found with metadata.",
251
+ args: {
252
+ file_path: z.string().describe("Path to file containing JSON (absolute or relative to project directory)"),
253
+ return_all: z.boolean().default(false).describe("If true, returns all JSON objects found as array; if false (default), returns first only")
254
+ },
255
+ execute: async (args, context) => {
256
+ try {
257
+ const resolvedPath = path.isAbsolute(args.file_path)
258
+ ? args.file_path
259
+ : path.join(context.directory, args.file_path);
260
+ if (!isPathWithinDirectory(resolvedPath, context.directory)) {
261
+ return JSON.stringify({
262
+ success: false,
263
+ error: {
264
+ code: 'PATH_TRAVERSAL',
265
+ message: 'Path must be within project directory',
266
+ details: `Attempted path: ${args.file_path}`,
267
+ hints: ['Use a relative path within the project directory', 'Ensure the path does not contain ..']
268
+ }
269
+ }, null, 2);
270
+ }
271
+ if (!fs.existsSync(resolvedPath)) {
272
+ return JSON.stringify({
273
+ success: false,
274
+ error: {
275
+ code: 'FILE_NOT_FOUND',
276
+ message: `File not found: ${args.file_path}`,
277
+ details: `Resolved path: ${resolvedPath}`,
278
+ hints: ['Check the file path for typos', 'Use a relative path from the project root']
279
+ }
280
+ }, null, 2);
281
+ }
282
+ const stats = fs.statSync(resolvedPath);
283
+ if (stats.isDirectory()) {
284
+ return JSON.stringify({
285
+ success: false,
286
+ error: {
287
+ code: 'FILE_NOT_FOUND',
288
+ message: 'Path is a directory, not a file',
289
+ details: `Path: ${args.file_path}`,
290
+ hints: ['Provide a file path, not a directory']
291
+ }
292
+ }, null, 2);
293
+ }
294
+ if (stats.size > MAX_FILE_SIZE) {
295
+ const sizeMB = (stats.size / 1024 / 1024).toFixed(2);
296
+ return JSON.stringify({
297
+ success: false,
298
+ error: {
299
+ code: 'FILE_TOO_LARGE',
300
+ message: `File exceeds maximum size of 200MB`,
301
+ details: `File size: ${sizeMB}MB`,
302
+ hints: ['Use a smaller file', 'Consider splitting the file', 'Increase the MAX_FILE_SIZE constant']
303
+ }
304
+ }, null, 2);
305
+ }
306
+ const ext = path.extname(resolvedPath).toLowerCase();
307
+ if (BINARY_EXTENSIONS.includes(ext)) {
308
+ return JSON.stringify({
309
+ success: false,
310
+ error: {
311
+ code: 'BINARY_FILE',
312
+ message: `Binary file format not supported: ${ext}`,
313
+ details: `File extension: ${ext}`,
314
+ hints: ['Use a text-based file (JSON, Markdown, TXT)', 'Provide file contents as plain text']
315
+ }
316
+ }, null, 2);
317
+ }
318
+ const content = fs.readFileSync(resolvedPath, 'utf-8');
319
+ const result = args.return_all
320
+ ? extractAllJsonFromContent(content)
321
+ : extractJsonFromContent(content);
322
+ if (result.success) {
323
+ return JSON.stringify({
324
+ success: true,
325
+ data: result.data,
326
+ metadata: {
327
+ method: result.method,
328
+ dataType: result.dataType,
329
+ fileSize: stats.size,
330
+ ...(args.return_all && result.count !== undefined ? { count: result.count } : {})
331
+ }
332
+ }, null, 2);
333
+ }
334
+ else {
335
+ return JSON.stringify({
336
+ success: false,
337
+ error: {
338
+ code: result.error,
339
+ message: getErrorMessage(result.error),
340
+ details: `File: ${args.file_path}`,
341
+ hints: getErrorHints(result.error)
342
+ }
343
+ }, null, 2);
344
+ }
345
+ }
346
+ catch (error) {
347
+ const message = error instanceof Error ? error.message : 'Unknown error';
348
+ const code = message.includes('encoding') || message.includes('EILSEQ')
349
+ ? 'ENCODING_ERROR'
350
+ : 'NO_JSON_FOUND';
351
+ return JSON.stringify({
352
+ success: false,
353
+ error: {
354
+ code,
355
+ message: code === 'ENCODING_ERROR'
356
+ ? 'File encoding error - could not read as UTF-8'
357
+ : 'Unexpected error while extracting JSON',
358
+ details: message,
359
+ hints: code === 'ENCODING_ERROR'
360
+ ? ['Ensure file is UTF-8 encoded', 'Try converting file encoding']
361
+ : ['Check file contents', 'Ensure file is not corrupted']
362
+ }
363
+ }, null, 2);
364
+ }
365
+ }
366
+ });
367
+ function getErrorMessage(code) {
368
+ const messages = {
369
+ 'EMPTY_FILE': 'File is empty',
370
+ 'NO_JSON_FOUND': 'No valid JSON found in file',
371
+ 'FILE_NOT_FOUND': 'File not found',
372
+ 'FILE_TOO_LARGE': 'File exceeds maximum size',
373
+ 'BINARY_FILE': 'Binary file format not supported',
374
+ 'PATH_TRAVERSAL': 'Path must be within project directory',
375
+ 'ENCODING_ERROR': 'File encoding error - could not read as UTF-8'
376
+ };
377
+ return messages[code] || 'Extraction failed';
378
+ }
379
+ function getErrorHints(code) {
380
+ const hints = {
381
+ 'EMPTY_FILE': ['Provide a file with content'],
382
+ 'NO_JSON_FOUND': [
383
+ 'Ensure file contains valid JSON',
384
+ 'JSON can be in markdown code blocks (```json) or raw',
385
+ 'Both objects {...} and arrays [...] are supported'
386
+ ],
387
+ 'FILE_NOT_FOUND': ['Check the file path for typos', 'Use a relative path from the project root'],
388
+ 'FILE_TOO_LARGE': ['Use a smaller file', 'Consider splitting the file'],
389
+ 'BINARY_FILE': ['Use a text-based file (JSON, Markdown, TXT)'],
390
+ 'PATH_TRAVERSAL': ['Use a relative path within the project directory', 'Ensure the path does not contain ..'],
391
+ 'ENCODING_ERROR': ['Ensure file is UTF-8 encoded', 'Try converting file encoding']
392
+ };
393
+ return hints[code] || ['Check file contents'];
394
+ }
@@ -0,0 +1,13 @@
1
+ import { ToolContext } from '@opencode-ai/plugin/tool';
2
+ import { z } from 'zod';
3
+ export declare const jsonInfer: {
4
+ description: string;
5
+ args: {
6
+ data: z.ZodString;
7
+ strict: z.ZodDefault<z.ZodBoolean>;
8
+ };
9
+ execute(args: {
10
+ data: string;
11
+ strict: boolean;
12
+ }, context: ToolContext): Promise<string>;
13
+ };
@@ -0,0 +1,199 @@
1
+ import { tool } from '@opencode-ai/plugin/tool';
2
+ import { z, toJSONSchema } from 'zod';
3
+ function isISODateString(value) {
4
+ return /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?$/.test(value);
5
+ }
6
+ function collectArrayElementTypes(data, strict) {
7
+ const warnings = [];
8
+ if (data.length === 0) {
9
+ warnings.push({
10
+ code: 'EMPTY_ARRAY',
11
+ message: 'Empty array - cannot infer element type',
12
+ details: 'Using "any" as element type since no data available',
13
+ hints: ['Provide at least one element in the array for type inference', 'Use z.any() if empty arrays are expected']
14
+ });
15
+ return { schemas: [z.any()], types: new Set(['any']), warnings };
16
+ }
17
+ const typeMap = new Map();
18
+ for (let i = 0; i < data.length; i++) {
19
+ const elementSchema = inferZodSchema(data[i], strict);
20
+ const typeKey = JSON.stringify(elementSchema._zod.def);
21
+ if (!typeMap.has(typeKey)) {
22
+ typeMap.set(typeKey, elementSchema);
23
+ }
24
+ }
25
+ const schemas = Array.from(typeMap.values());
26
+ const types = new Set(schemas.map(s => s._zod.def.type));
27
+ // Limit union to 2 types per plan specification
28
+ if (types.size > 2) {
29
+ const unionTypes = Array.from(types).join(', ');
30
+ warnings.push({
31
+ code: 'MIXED_TYPE_ARRAY',
32
+ message: 'Too many types in array for union',
33
+ details: `Found ${types.size} types (${unionTypes}), using "any" as element type`,
34
+ hints: ['Limit array elements to 1 or 2 types for better type inference', 'Consider using explicit schemas for complex mixed-type arrays']
35
+ });
36
+ return { schemas: [z.array(z.any())], types: new Set(['any']), warnings };
37
+ }
38
+ if (types.size === 2) {
39
+ const unionTypes = Array.from(types).join(', ');
40
+ warnings.push({
41
+ code: 'MIXED_TYPE_ARRAY',
42
+ message: `Array contains 2 different types: ${unionTypes}`,
43
+ details: `Types found: ${unionTypes}. Using union type.`,
44
+ hints: ['For stronger type safety, consider keeping array elements homogeneous', 'Union types are limited to 2 variants for inference']
45
+ });
46
+ const unionSchema = z.union(schemas);
47
+ return { schemas: [z.array(unionSchema)], types, warnings };
48
+ }
49
+ return { schemas: [z.array(schemas[0])], types, warnings };
50
+ }
51
+ function inferZodSchema(data, strict) {
52
+ if (data === null) {
53
+ return z.null();
54
+ }
55
+ if (Array.isArray(data)) {
56
+ const { schemas, warnings } = collectArrayElementTypes(data, strict);
57
+ return schemas[0];
58
+ }
59
+ if (typeof data === 'object' && data !== null) {
60
+ const shape = {};
61
+ for (const [key, value] of Object.entries(data)) {
62
+ const fieldSchema = inferZodSchema(value, strict);
63
+ shape[key] = strict ? fieldSchema : fieldSchema.optional();
64
+ }
65
+ return z.object(shape);
66
+ }
67
+ if (typeof data === 'string') {
68
+ if (isISODateString(data)) {
69
+ return z.string().datetime();
70
+ }
71
+ return z.string();
72
+ }
73
+ if (typeof data === 'number') {
74
+ return Number.isInteger(data) ? z.number().int() : z.number();
75
+ }
76
+ if (typeof data === 'boolean') {
77
+ return z.boolean();
78
+ }
79
+ return z.any();
80
+ }
81
+ function inferZodSchemaWithWarnings(data, strict) {
82
+ const warnings = [];
83
+ if (Array.isArray(data)) {
84
+ const result = collectArrayElementTypes(data, strict);
85
+ return {
86
+ schema: result.schemas[0],
87
+ warnings: result.warnings
88
+ };
89
+ }
90
+ if (typeof data === 'object' && data !== null) {
91
+ const capturedKeys = new Set();
92
+ for (const key of Object.keys(data)) {
93
+ capturedKeys.add(key);
94
+ }
95
+ if (capturedKeys.size > 10 && !strict) {
96
+ warnings.push({
97
+ code: 'PARTIAL_OBJECT_SCHEMA',
98
+ message: `Object has many properties (${capturedKeys.size})`,
99
+ details: `Only fields from first object instance were analyzed for type inference`,
100
+ hints: ['For complex objects with varying keys, consider using explicit schemas or splitting into smaller, more homogeneous objects']
101
+ });
102
+ }
103
+ const schema = z.object(Object.fromEntries(Object.entries(data).map(([key, value]) => [key, strict ? inferZodSchema(value, true) : inferZodSchema(value, false).optional()])));
104
+ return { schema, warnings };
105
+ }
106
+ if (typeof data === 'string') {
107
+ if (isISODateString(data)) {
108
+ return {
109
+ schema: z.string().datetime(),
110
+ warnings: []
111
+ };
112
+ }
113
+ return {
114
+ schema: z.string(),
115
+ warnings: []
116
+ };
117
+ }
118
+ if (typeof data === 'number') {
119
+ return {
120
+ schema: Number.isInteger(data) ? z.number().int() : z.number(),
121
+ warnings: []
122
+ };
123
+ }
124
+ if (typeof data === 'boolean') {
125
+ return {
126
+ schema: z.boolean(),
127
+ warnings: []
128
+ };
129
+ }
130
+ warnings.push({
131
+ code: 'UNKNOWN_TYPE',
132
+ message: 'Unknown value type',
133
+ details: `Value type: ${typeof data}`,
134
+ hints: ['Using "any" type for unknown values - consider explicitly defining expected types']
135
+ });
136
+ return {
137
+ schema: z.any(),
138
+ warnings
139
+ };
140
+ }
141
+ export const jsonInfer = tool({
142
+ description: "Infer a JSON Schema from example JSON data. Reports warnings for mixed-type arrays and partial object inference. Generates valid JSON Schema Draft-2020-12.",
143
+ args: {
144
+ data: z.string().describe("Example JSON data string"),
145
+ strict: z.boolean().default(false).describe("If true, all fields are required; if false (default), fields are optional")
146
+ },
147
+ execute: async (args, context) => {
148
+ let parsedData;
149
+ try {
150
+ parsedData = JSON.parse(args.data);
151
+ }
152
+ catch (e) {
153
+ return JSON.stringify({
154
+ success: false,
155
+ error: {
156
+ code: 'INVALID_JSON_DATA',
157
+ message: 'Data is not valid JSON',
158
+ details: e instanceof Error ? e.message : 'Parse error',
159
+ hints: [
160
+ 'Ensure data is valid JSON syntax',
161
+ 'Example: {"name": "value"} or [1, 2, 3]'
162
+ ]
163
+ }
164
+ }, null, 2);
165
+ }
166
+ try {
167
+ const { schema, warnings } = inferZodSchemaWithWarnings(parsedData, args.strict);
168
+ const jsonSchema = toJSONSchema(schema);
169
+ const response = {
170
+ success: true,
171
+ data: jsonSchema,
172
+ metadata: {
173
+ inferredType: Array.isArray(parsedData) ? 'array' :
174
+ typeof parsedData === 'object' ? 'object' : typeof parsedData,
175
+ strictMode: args.strict
176
+ }
177
+ };
178
+ if (warnings.length > 0) {
179
+ response.warnings = warnings;
180
+ }
181
+ return JSON.stringify(response, null, 2);
182
+ }
183
+ catch (e) {
184
+ return JSON.stringify({
185
+ success: false,
186
+ error: {
187
+ code: 'INFERENCE_FAILED',
188
+ message: 'Failed to infer schema from data',
189
+ details: e instanceof Error ? e.message : 'Inference error',
190
+ hints: [
191
+ 'Try with simpler data',
192
+ 'Ensure data structure is not circular',
193
+ 'Check for complex nested structures'
194
+ ]
195
+ }
196
+ }, null, 2);
197
+ }
198
+ }
199
+ });