@exactpdf/mcp 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -55,20 +55,28 @@ For a local checkout of this monorepo:
55
55
  | `exactpdf_images_to_pdf` | POST /api/v1/images-to-pdf | 1 |
56
56
  | `exactpdf_extract_text` | POST /api/v1/extract-text | 1 |
57
57
  | `exactpdf_pdf_structured_markdown` | POST /api/v1/pdf-structured-markdown | 1 |
58
- | `exactpdf_pdf_to_audiobook` | POST /api/v1/pdf-to-audiobook | 10 |
59
- | `exactpdf_translate_and_speak` | POST /api/v1/translate-and-speak | 20 |
60
- | `exactpdf_presentation_narration` | POST /api/v1/presentation-narration | 10 |
58
+ | `exactpdf_estimate_speech_cost` | local estimate | 0 |
59
+ | `exactpdf_voice_preview` | POST /api/v1/voice-preview | 0 |
60
+ | `exactpdf_pdf_to_speech` | POST /api/v1/pdf-to-speech | 1/min |
61
+ | `exactpdf_pdf_to_audiobook` | POST /api/v1/pdf-to-audiobook | 1/min |
62
+ | `exactpdf_generate_audiobook` | POST /api/v1/generate-audiobook | 1/min |
63
+ | `exactpdf_translate_and_speak` | POST /api/v1/translate-and-speak | 3/min |
64
+ | `exactpdf_presentation_narration` | POST /api/v1/presentation-narration | 1/min |
61
65
  | `exactpdf_job_status` | GET /api/v1/jobs/:id | 0 |
66
+ | `exactpdf_get_speech_job` | GET /api/v1/speech-jobs/:id | 0 |
67
+ | `exactpdf_download_audio` | GET /api/v1/speech-jobs/:id/download | 0 |
62
68
 
63
69
  Async audiobook flow:
64
70
 
65
71
  ```text
66
- 1. exactpdf_pdf_to_audiobook(path="/abs/book.pdf", voice_style="audiobook")
67
- 2. exactpdf_job_status(job_id="...", download=false)
68
- 3. exactpdf_job_status(job_id="...", download=true) after status is succeeded
72
+ 1. exactpdf_estimate_speech_cost(path="/abs/book.pdf", mode="audiobook")
73
+ 2. exactpdf_voice_preview(text="Read the first paragraph in an audiobook tone", voice_style="audiobook")
74
+ 3. exactpdf_generate_audiobook(path="/abs/book.pdf", voice_style="audiobook")
75
+ 4. exactpdf_get_speech_job(job_id="...", download=false)
76
+ 5. exactpdf_download_audio(job_id="...") after status is succeeded
69
77
  ```
70
78
 
71
- `exactpdf_pdf_to_audiobook` accepts optional `callback_url` and `webhook_secret`. ExactPDF signs webhook bodies with `x-exactpdf-signature: sha256=<hmac>` over `timestamp.body`.
79
+ `exactpdf_pdf_to_speech`, `exactpdf_pdf_to_audiobook`, and `exactpdf_generate_audiobook` accept optional `callback_url` and `webhook_secret`. ExactPDF signs webhook bodies with `x-exactpdf-signature: sha256=<hmac>` over `timestamp.body`.
72
80
 
73
81
  Multilingual speech flow:
74
82
 
package/dist/run.js CHANGED
@@ -5,7 +5,7 @@
5
5
  *
6
6
  * @see https://exactpdf.com/docs/api
7
7
  */
8
- import { mkdir, readFile, writeFile } from 'node:fs/promises';
8
+ import { mkdir, readFile, stat, writeFile } from 'node:fs/promises';
9
9
  import { tmpdir } from 'node:os';
10
10
  import { basename, join } from 'node:path';
11
11
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
@@ -24,8 +24,8 @@ function requireKey() {
24
24
  }
25
25
  return k;
26
26
  }
27
- const server = new McpServer({ name: 'exactpdf', version: '0.2.5' }, {
28
- instructions: 'ExactPDF API tools: exactpdf_account + exactpdf_pdf_info (free); merge, split, rotate, compress, images→PDF, extract-text, pdf-structured-markdown (1 credit each on success); async pdf-to-audiobook (10 credits), translate-and-speak (20 credits), presentation narration (10 credits), plus job polling. Set EXACTPDF_API_KEY.',
27
+ const server = new McpServer({ name: 'exactpdf', version: '0.2.7' }, {
28
+ instructions: 'ExactPDF API tools: exactpdf_account + exactpdf_pdf_info (free); merge, split, rotate, compress, images→PDF, extract-text, pdf-structured-markdown (1 credit each on success); voice-preview (free), async pdf-to-speech/pdf-to-audiobook/generate-audiobook (10 credits), translate-and-speak (20 credits), presentation narration (10 credits), plus speech job polling/download. Set EXACTPDF_API_KEY.',
29
29
  });
30
30
  async function saveBinaryFromResponse(res, prefix, fallbackExt) {
31
31
  const buf = Buffer.from(await res.arrayBuffer());
@@ -652,6 +652,241 @@ server.registerTool('exactpdf_presentation_narration', {
652
652
  ],
653
653
  };
654
654
  });
655
+ const speechJobSchema = z.object({
656
+ path: z.string().describe('Absolute path to a PDF file'),
657
+ output_format: z.enum(['mp3', 'wav', 'zip']).optional().describe('Audio export format. Default: mp3.'),
658
+ voice_style: z
659
+ .enum(['professional', 'audiobook', 'educational', 'presenter', 'conversational'])
660
+ .optional()
661
+ .describe('Narration style.'),
662
+ voice_id: z.string().optional().describe('Optional provider voice id, if your account supports it.'),
663
+ language: z.string().optional().describe('Language hint such as en, hi, es, fr.'),
664
+ speed: z.number().min(0.5).max(2).optional().describe('Speech speed multiplier, 0.5-2.0.'),
665
+ page_range: z.string().optional().describe('Optional page range, e.g. "1-10" or "2,4-7".'),
666
+ normalize_pauses: z.boolean().optional().describe('Normalize pauses between extracted sections.'),
667
+ preserve_chapters: z.boolean().optional().describe('Keep detected headings as audiobook chapters.'),
668
+ pronunciation_fixes: z.string().optional().describe('JSON array of pronunciation replacements.'),
669
+ callback_url: z.string().url().optional().describe('Optional https webhook URL for job completion.'),
670
+ webhook_secret: z.string().min(12).max(256).optional().describe('Optional webhook HMAC signing secret.'),
671
+ });
672
+ async function submitSpeechJob(endpoint, args, label) {
673
+ const key = requireKey();
674
+ const form = new FormData();
675
+ form.append('file', new Blob([await readFile(args.path)], { type: 'application/pdf' }), basename(args.path));
676
+ if (args.output_format)
677
+ form.append('output_format', args.output_format);
678
+ if (args.voice_style)
679
+ form.append('voice_style', args.voice_style);
680
+ if (args.voice_id)
681
+ form.append('voice_id', args.voice_id);
682
+ if (args.language)
683
+ form.append('language', args.language);
684
+ if (typeof args.speed === 'number')
685
+ form.append('speed', String(args.speed));
686
+ if (args.page_range)
687
+ form.append('page_range', args.page_range);
688
+ if (typeof args.normalize_pauses === 'boolean')
689
+ form.append('normalize_pauses', String(args.normalize_pauses));
690
+ if (typeof args.preserve_chapters === 'boolean')
691
+ form.append('preserve_chapters', String(args.preserve_chapters));
692
+ if (args.pronunciation_fixes)
693
+ form.append('pronunciation_fixes', args.pronunciation_fixes);
694
+ if (args.callback_url)
695
+ form.append('callback_url', args.callback_url);
696
+ if (args.webhook_secret)
697
+ form.append('webhook_secret', args.webhook_secret);
698
+ const res = await fetch(`${BASE}${endpoint}`, {
699
+ method: 'POST',
700
+ headers: {
701
+ Authorization: `Bearer ${key}`,
702
+ Accept: 'application/json',
703
+ },
704
+ body: form,
705
+ });
706
+ const raw = await res.text();
707
+ if (!res.ok) {
708
+ return {
709
+ content: [{ type: 'text', text: `${label} failed HTTP ${res.status}\n${raw.slice(0, 8000)}` }],
710
+ isError: true,
711
+ };
712
+ }
713
+ return {
714
+ content: [{ type: 'text', text: `${label} job submitted HTTP ${res.status}\n${raw.slice(0, 120_000)}` }],
715
+ };
716
+ }
717
+ async function pollSpeechJob(jobId, download, downloadEndpoint = false) {
718
+ const key = requireKey();
719
+ const endpoint = downloadEndpoint
720
+ ? `/api/v1/speech-jobs/${encodeURIComponent(jobId)}/download`
721
+ : `/api/v1/speech-jobs/${encodeURIComponent(jobId)}`;
722
+ const res = await fetch(`${BASE}${endpoint}`, {
723
+ redirect: downloadEndpoint ? 'manual' : 'follow',
724
+ headers: {
725
+ Authorization: `Bearer ${key}`,
726
+ Accept: downloadEndpoint ? '*/*' : 'application/json',
727
+ },
728
+ });
729
+ if (downloadEndpoint) {
730
+ const location = res.headers.get('location');
731
+ if (res.status >= 300 && res.status < 400 && location) {
732
+ const fileRes = await fetch(location);
733
+ if (!fileRes.ok) {
734
+ return {
735
+ content: [{ type: 'text', text: `download-audio signed URL failed HTTP ${fileRes.status}` }],
736
+ isError: true,
737
+ };
738
+ }
739
+ const ext = extensionFromContentType(fileRes.headers.get('content-type'), 'mp3');
740
+ return {
741
+ content: [{ type: 'text', text: `Saved audio: ${await saveBinaryFromResponse(fileRes, `exactpdf-audio-${jobId}`, ext)}` }],
742
+ };
743
+ }
744
+ const raw = await res.text();
745
+ return {
746
+ content: [{ type: 'text', text: `download-audio failed HTTP ${res.status}\n${raw.slice(0, 8000)}` }],
747
+ isError: true,
748
+ };
749
+ }
750
+ const raw = await res.text();
751
+ if (!res.ok) {
752
+ return {
753
+ content: [{ type: 'text', text: `get-speech-job failed HTTP ${res.status}\n${raw.slice(0, 8000)}` }],
754
+ isError: true,
755
+ };
756
+ }
757
+ let saved = '';
758
+ if (download) {
759
+ const parsed = JSON.parse(raw);
760
+ const resultUrl = parsed.job?.result_url;
761
+ if (parsed.job?.status === 'succeeded' && resultUrl) {
762
+ const fileRes = await fetch(resultUrl);
763
+ if (!fileRes.ok) {
764
+ return {
765
+ content: [{ type: 'text', text: `Job succeeded, but result download failed HTTP ${fileRes.status}\n${raw.slice(0, 8000)}` }],
766
+ isError: true,
767
+ };
768
+ }
769
+ const ext = extensionFromContentType(fileRes.headers.get('content-type'), 'mp3');
770
+ saved = `\nSaved result: ${await saveBinaryFromResponse(fileRes, `exactpdf-speech-job-${jobId}`, ext)}`;
771
+ }
772
+ }
773
+ return {
774
+ content: [{ type: 'text', text: `HTTP ${res.status}\n${raw.slice(0, 120_000)}${saved}` }],
775
+ };
776
+ }
777
+ server.registerTool('exactpdf_estimate_speech_cost', {
778
+ description: 'Estimate PDF speech duration and current ExactPDF API credit cost before submitting a paid job. No API credits consumed.',
779
+ inputSchema: z.object({
780
+ path: z.string().optional().describe('Absolute path to a PDF file. Used for a rough size-based duration estimate.'),
781
+ characters: z.number().int().positive().optional().describe('Known narration character count, if already extracted.'),
782
+ minutes: z.number().positive().optional().describe('Known generated minutes, if already estimated.'),
783
+ mode: z.enum(['speech', 'audiobook', 'presentation', 'translate']).optional().describe('Default: audiobook.'),
784
+ target_language: z.string().optional().describe('Set for translation + speech estimates.'),
785
+ }),
786
+ }, async ({ path, characters, minutes, mode, target_language }) => {
787
+ let estimatedChars = characters ?? 0;
788
+ let fileBytes = null;
789
+ if (!estimatedChars && path) {
790
+ const info = await stat(path);
791
+ fileBytes = info.size;
792
+ estimatedChars = Math.max(1_000, Math.round(info.size / 8));
793
+ }
794
+ const estimatedMinutes = minutes ?? Math.max(1, Math.ceil(estimatedChars / 900));
795
+ const selectedMode = target_language ? 'translate' : (mode ?? 'audiobook');
796
+ const creditsPerMinute = selectedMode === 'translate' ? 3 : 1;
797
+ const credits = Math.max(1, estimatedMinutes * creditsPerMinute);
798
+ return {
799
+ content: [
800
+ {
801
+ type: 'text',
802
+ text: JSON.stringify({
803
+ ok: true,
804
+ estimate: {
805
+ mode: selectedMode,
806
+ file_bytes: fileBytes,
807
+ characters: estimatedChars || null,
808
+ generated_minutes: estimatedMinutes,
809
+ credits_per_minute: creditsPerMinute,
810
+ current_credit_cost: credits,
811
+ note: 'ExactPDF charges async speech jobs by estimated generated minutes: 1 credit/minute for standard speech, 3 credits/minute for translate-and-speak. Path-based estimates are rough until the API extracts PDF text.',
812
+ },
813
+ }, null, 2),
814
+ },
815
+ ],
816
+ };
817
+ });
818
+ server.registerTool('exactpdf_voice_preview', {
819
+ description: 'Generate a short server-side voice preview before submitting a paid PDF speech job. Saves MP3/WAV locally. Does not consume credits.',
820
+ inputSchema: z.object({
821
+ text: z.string().optional().describe('Preview text, capped server-side at 600 characters.'),
822
+ output_format: z.enum(['mp3', 'wav']).optional().describe('Audio preview format. Default: mp3.'),
823
+ voice_style: z
824
+ .enum(['professional', 'audiobook', 'educational', 'presenter', 'conversational'])
825
+ .optional()
826
+ .describe('Narration style. Default: professional.'),
827
+ voice_id: z.string().optional().describe('Optional Google Cloud voice name, e.g. en-US-Standard-H.'),
828
+ language: z.string().optional().describe('Language hint such as en, hi, es, fr. Default: en.'),
829
+ speed: z.number().min(0.5).max(2).optional().describe('Speech speed multiplier, 0.5-2.0.'),
830
+ }),
831
+ }, async ({ text, output_format, voice_style, voice_id, language, speed }) => {
832
+ const key = requireKey();
833
+ const res = await fetch(`${BASE}/api/v1/voice-preview`, {
834
+ method: 'POST',
835
+ headers: {
836
+ Authorization: `Bearer ${key}`,
837
+ Accept: '*/*',
838
+ 'Content-Type': 'application/json',
839
+ },
840
+ body: JSON.stringify({
841
+ ...(text ? { text } : {}),
842
+ ...(output_format ? { output_format } : {}),
843
+ ...(voice_style ? { voice_style } : {}),
844
+ ...(voice_id ? { voice_id } : {}),
845
+ ...(language ? { language } : {}),
846
+ ...(typeof speed === 'number' ? { speed } : {}),
847
+ }),
848
+ });
849
+ if (!res.ok) {
850
+ const raw = await res.text();
851
+ return {
852
+ content: [{ type: 'text', text: `voice-preview failed HTTP ${res.status}\n${raw.slice(0, 8000)}` }],
853
+ isError: true,
854
+ };
855
+ }
856
+ const ext = extensionFromContentType(res.headers.get('content-type'), output_format ?? 'mp3');
857
+ const outPath = await saveBinaryFromResponse(res, 'exactpdf-voice-preview', ext);
858
+ return {
859
+ content: [
860
+ {
861
+ type: 'text',
862
+ text: `Voice preview saved: ${outPath}\n` +
863
+ `Credits used: ${res.headers.get('x-credits-used') ?? '0'}\n` +
864
+ `Provider: ${res.headers.get('x-exactpdf-provider') ?? 'unknown'}`,
865
+ },
866
+ ],
867
+ };
868
+ });
869
+ server.registerTool('exactpdf_pdf_to_speech', {
870
+ description: 'Create an async PDF→speech job via /api/v1/pdf-to-speech (10 credits). Alias of the production audiobook pipeline.',
871
+ inputSchema: speechJobSchema,
872
+ }, async (args) => submitSpeechJob('/api/v1/pdf-to-speech', args, 'pdf-to-speech'));
873
+ server.registerTool('exactpdf_generate_audiobook', {
874
+ description: 'Create an async audiobook job via /api/v1/generate-audiobook (10 credits). Alias of the production audiobook pipeline.',
875
+ inputSchema: speechJobSchema,
876
+ }, async (args) => submitSpeechJob('/api/v1/generate-audiobook', args, 'generate-audiobook'));
877
+ server.registerTool('exactpdf_get_speech_job', {
878
+ description: 'Poll /api/v1/speech-jobs/:id for async PDF speech, audiobook, translation, or presentation narration jobs.',
879
+ inputSchema: z.object({
880
+ job_id: z.string().describe('ExactPDF speech job id.'),
881
+ download: z.boolean().optional().describe('When true, save result_url to EXACTPDF_API_OUTPUT_DIR if job succeeded.'),
882
+ }),
883
+ }, async ({ job_id, download }) => pollSpeechJob(job_id, Boolean(download)));
884
+ server.registerTool('exactpdf_download_audio', {
885
+ description: 'Download a succeeded speech job through /api/v1/speech-jobs/:id/download and save it locally.',
886
+ inputSchema: z.object({
887
+ job_id: z.string().describe('ExactPDF speech job id.'),
888
+ }),
889
+ }, async ({ job_id }) => pollSpeechJob(job_id, true, true));
655
890
  async function main() {
656
891
  const transport = new StdioServerTransport();
657
892
  await server.connect(transport);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@exactpdf/mcp",
3
- "version": "0.2.5",
4
- "description": "MCP server for ExactPDF — PDF tools, async PDF to audiobook jobs, polling, and API credits.",
3
+ "version": "0.2.7",
4
+ "description": "MCP server for ExactPDF — PDF tools, voice previews, async PDF speech/audiobook jobs, polling, and API credits.",
5
5
  "mcpName": "com.exactpdf/mcp",
6
6
  "type": "module",
7
7
  "main": "./dist/run.js",
package/server.json CHANGED
@@ -2,14 +2,14 @@
2
2
  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
3
3
  "name": "com.exactpdf/mcp",
4
4
  "title": "ExactPDF",
5
- "description": "Agent-facing PDF API: merge, split, rotate, compress, images, metadata, text, Markdown, async audiobook, multilingual speech, and presentation narration jobs.",
6
- "version": "0.2.5",
5
+ "description": "Agent-facing PDF API: merge, split, rotate, compress, images, metadata, text, Markdown, voice previews, async speech/audiobook, multilingual speech, and presentation narration jobs.",
6
+ "version": "0.2.7",
7
7
  "websiteUrl": "https://exactpdf.com/docs/api",
8
8
  "packages": [
9
9
  {
10
10
  "registryType": "npm",
11
11
  "identifier": "@exactpdf/mcp",
12
- "version": "0.2.5",
12
+ "version": "0.2.7",
13
13
  "transport": {
14
14
  "type": "stdio"
15
15
  }