@manfred-kunze-dev/backbone-mcp-server 2.8.0-dev.1 → 2.8.0-dev.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,7 +32,7 @@ function formatExportDocument(doc) {
32
32
  }
33
33
  export function register(server, client) {
34
34
  // ── convert_document ────────────────────────────────────────────────────
35
- server.tool("backbone_convert_document", "Convert documents to Markdown, text, HTML, or JSON. Accepts URLs, base64 data, or local file paths. Local files are automatically read and base64-encoded. Use async mode for large documents.", {
35
+ server.tool("backbone_convert_document", "Convert documents (PDF, DOCX, XLSX, images, etc.) to Markdown, text, HTML, or JSON. Accepts URLs, base64 data, or local file paths. Local files are automatically read and base64-encoded. Use this to convert binary files before passing them to backbone_create_extraction. Pipeline options: 'fast' (default, fast text extraction), 'ocr' (OCR with layout analysis), or 'vlm' (vision language model for image-heavy/complex layouts). Use async mode for large documents.", {
36
36
  sources: z
37
37
  .array(z.object({
38
38
  type: z.enum(["url", "base64", "file"]).describe("Source type: 'url' for HTTP URLs, 'base64' for base64-encoded data, 'file' for local file paths"),
@@ -42,20 +42,30 @@ export function register(server, client) {
42
42
  path: z.string().optional().describe("Local file path (when type='file')"),
43
43
  }))
44
44
  .describe("List of document sources to convert"),
45
- outputFormats: z
46
- .array(z.string())
47
- .optional()
48
- .describe("Output formats, e.g. ['markdown', 'text', 'html', 'json']"),
49
45
  pipeline: z
50
46
  .string()
51
47
  .optional()
52
- .describe("Processing pipeline: 'standard' (default) or 'vlm' (vision language model for image-heavy documents)"),
48
+ .describe("Processing pipeline: 'fast' (default, fast text extraction), 'ocr' (OCR with layout analysis), or 'vlm' (vision language model for image-heavy documents)"),
49
+ options: z
50
+ .object({
51
+ outputFormats: z.array(z.string()).optional().describe("Output formats, e.g. ['md', 'text', 'html', 'json']. Only relevant for ocr/vlm pipelines."),
52
+ ocrEngine: z.string().optional().describe("OCR engine (e.g. 'easyocr', 'tesseract')"),
53
+ ocrLanguages: z.array(z.string()).optional().describe("OCR languages (e.g. ['en', 'de'])"),
54
+ pdfBackend: z.string().optional().describe("PDF backend (e.g. 'dlparser', 'pypdfium2')"),
55
+ tableStructure: z.boolean().optional().describe("Enable table structure detection"),
56
+ maxPages: z.number().optional().describe("Maximum number of pages to process"),
57
+ imageExportMode: z.string().optional().describe("Image export mode (e.g. 'placeholder', 'embedded')"),
58
+ imagesScale: z.number().optional().describe("Scale factor for exported images"),
59
+ pictureClassification: z.boolean().optional().describe("Enable picture classification"),
60
+ })
61
+ .optional()
62
+ .describe("Options for ocr/vlm pipelines. Ignored when pipeline is 'fast'."),
53
63
  async: z
54
64
  .boolean()
55
65
  .optional()
56
66
  .default(false)
57
67
  .describe("If true, submit as async task and return task ID for polling"),
58
- }, async ({ sources, outputFormats, pipeline, async: isAsync }) => {
68
+ }, async ({ sources, pipeline, options: pipelineOptions, async: isAsync }) => {
59
69
  try {
60
70
  const apiSources = [];
61
71
  for (const src of sources) {
@@ -79,12 +89,36 @@ export function register(server, client) {
79
89
  apiSources.push({ kind: "base64", content: b64, filename: name, mimeType: getMimeType(name) });
80
90
  }
81
91
  }
92
+ // Build nested options matching new API shape
93
+ const opts = {};
94
+ if (pipeline)
95
+ opts.pipeline = pipeline;
96
+ if (pipelineOptions) {
97
+ const nested = {};
98
+ if (pipelineOptions.outputFormats)
99
+ nested.outputFormats = pipelineOptions.outputFormats.map(mapOutputFormat);
100
+ if (pipelineOptions.ocrEngine)
101
+ nested.ocrEngine = pipelineOptions.ocrEngine;
102
+ if (pipelineOptions.ocrLanguages)
103
+ nested.ocrLanguages = pipelineOptions.ocrLanguages;
104
+ if (pipelineOptions.pdfBackend)
105
+ nested.pdfBackend = pipelineOptions.pdfBackend;
106
+ if (pipelineOptions.tableStructure !== undefined)
107
+ nested.tableStructure = pipelineOptions.tableStructure;
108
+ if (pipelineOptions.maxPages !== undefined)
109
+ nested.maxPages = pipelineOptions.maxPages;
110
+ if (pipelineOptions.imageExportMode)
111
+ nested.imageExportMode = pipelineOptions.imageExportMode;
112
+ if (pipelineOptions.imagesScale !== undefined)
113
+ nested.imagesScale = pipelineOptions.imagesScale;
114
+ if (pipelineOptions.pictureClassification !== undefined)
115
+ nested.pictureClassification = pipelineOptions.pictureClassification;
116
+ if (Object.keys(nested).length > 0)
117
+ opts.options = nested;
118
+ }
82
119
  const body = {
83
120
  sources: apiSources,
84
- options: {
85
- ...(outputFormats ? { toFormats: outputFormats.map(mapOutputFormat) } : {}),
86
- ...(pipeline ? { pipeline } : {}),
87
- },
121
+ options: Object.keys(opts).length > 0 ? opts : undefined,
88
122
  };
89
123
  if (isAsync) {
90
124
  const { data } = await client.POST("/v1/convert/source/async", { body });
@@ -11,7 +11,7 @@ const IMAGE_MIME_TYPES = {
11
11
  };
12
12
  export function register(server, client) {
13
13
  // ── create_extraction ───────────────────────────────────────────────────
14
- server.tool("backbone_create_extraction", "Extract structured data from text and/or images using a schema and AI model. Supports text-only, vision (images), or combined extraction. Use a vision-capable model (e.g. openai/gpt-4o) when providing images.", {
14
+ server.tool("backbone_create_extraction", "Extract structured data from text and/or images using a schema and AI model. Supports text-only, vision (images), or combined extraction. Use a vision-capable model (e.g. openai/gpt-4o) when providing images. IMPORTANT: inputText accepts only plain text/markdown strings. For binary files (PDF, DOCX, XLSX, etc.), first convert them using backbone_convert_document to get markdown, then pass the mdContent here as inputText. Use async=true for large documents to avoid gateway timeouts.", {
15
15
  projectId: z.string().describe("The project ID"),
16
16
  schemaId: z.string().describe("The schema ID to extract with"),
17
17
  schemaVersionId: z
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@manfred-kunze-dev/backbone-mcp-server",
3
- "version": "2.8.0-dev.1",
3
+ "version": "2.8.0-dev.3",
4
4
  "description": "MCP server for the Backbone AI platform",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",