cjeu-mcp 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cellar.d.ts CHANGED
@@ -19,3 +19,14 @@ export declare function lookupManifestations(celex: string): Promise<{
19
19
  pdfUrl: string;
20
20
  htmlUrl: string;
21
21
  }>;
22
+ /**
23
+ * Download a document as PDF bytes.
24
+ * If only HTML is available, converts it to PDF via Puppeteer.
25
+ * Returns { data, format } or null if unavailable.
26
+ * format is always "pdf" unless Puppeteer conversion itself crashes,
27
+ * in which case "html" is returned as a last resort.
28
+ */
29
+ export declare function downloadDocument(pdfUrl: string, htmlUrl: string): Promise<{
30
+ data: Buffer;
31
+ format: "pdf" | "html";
32
+ } | null>;
package/dist/cellar.js CHANGED
@@ -1,5 +1,6 @@
1
1
  /** Cellar REST API download + content negotiation — ported from app.py */
2
2
  import * as cheerio from "cheerio";
3
+ import puppeteer from "puppeteer";
3
4
  const FETCH_TIMEOUT = 30_000;
4
5
  function log(msg) {
5
6
  process.stderr.write(`[cjeu-mcp] ${msg}\n`);
@@ -113,6 +114,82 @@ LIMIT 1`;
113
114
  return { pdfUrl: "", htmlUrl: "" };
114
115
  }
115
116
  }
117
+ /**
118
+ * Convert HTML content to PDF using Puppeteer.
119
+ */
120
+ async function htmlToPdf(htmlContent) {
121
+ const browser = await puppeteer.launch({
122
+ headless: true,
123
+ args: ["--no-sandbox"],
124
+ });
125
+ try {
126
+ const page = await browser.newPage();
127
+ await page.setContent(htmlContent, { waitUntil: "networkidle0" });
128
+ const pdfBuf = await page.pdf({
129
+ format: "A4",
130
+ margin: { top: "20mm", bottom: "20mm", left: "15mm", right: "15mm" },
131
+ });
132
+ return Buffer.from(pdfBuf);
133
+ }
134
+ finally {
135
+ await browser.close();
136
+ }
137
+ }
138
+ /**
139
+ * Download a document as PDF bytes.
140
+ * If only HTML is available, converts it to PDF via Puppeteer.
141
+ * Returns { data, format } or null if unavailable.
142
+ * format is always "pdf" unless Puppeteer conversion itself crashes,
143
+ * in which case "html" is returned as a last resort.
144
+ */
145
+ export async function downloadDocument(pdfUrl, htmlUrl) {
146
+ // 1. Try PDF manifestation
147
+ if (pdfUrl) {
148
+ try {
149
+ const resp = await fetchWithTimeout(pdfUrl, {
150
+ headers: { "User-Agent": "cjeu-mcp/1.0", Accept: "*/*" },
151
+ });
152
+ if (resp.ok) {
153
+ const buf = Buffer.from(await resp.arrayBuffer());
154
+ if (buf.length > 0 && buf.subarray(0, 5).toString() === "%PDF-") {
155
+ return { data: buf, format: "pdf" };
156
+ }
157
+ }
158
+ }
159
+ catch (e) {
160
+ log(`PDF download failed for ${pdfUrl}: ${e}`);
161
+ }
162
+ }
163
+ // 2. Try HTML manifestation → convert to PDF
164
+ if (htmlUrl) {
165
+ try {
166
+ const resp = await fetchWithTimeout(htmlUrl, {
167
+ headers: {
168
+ "User-Agent": "cjeu-mcp/1.0",
169
+ Accept: "application/xhtml+xml, text/html",
170
+ },
171
+ });
172
+ if (resp.ok) {
173
+ const html = await resp.text();
174
+ if (html.trim() && !html.includes("awsWafCookie")) {
175
+ try {
176
+ const pdfBuf = await htmlToPdf(html);
177
+ log(`Converted HTML to PDF (${pdfBuf.length} bytes)`);
178
+ return { data: pdfBuf, format: "pdf" };
179
+ }
180
+ catch (e) {
181
+ log(`HTML→PDF conversion failed, saving as HTML: ${e}`);
182
+ return { data: Buffer.from(html, "utf-8"), format: "html" };
183
+ }
184
+ }
185
+ }
186
+ }
187
+ catch (e) {
188
+ log(`HTML download failed for ${htmlUrl}: ${e}`);
189
+ }
190
+ }
191
+ return null;
192
+ }
116
193
  /**
117
194
  * Extract clean legal text from an HTML document using cheerio.
118
195
  * Strips navigation, headers, footers, metadata — keeps only the legal content.
package/dist/index.js CHANGED
@@ -3,8 +3,12 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { z } from "zod";
5
5
  import { parseCaseNumber } from "./celex.js";
6
- import { queryCellar, queryFirstInstance, searchByKeyword, } from "./sparql.js";
7
- import { fetchDocumentText, getPdfUrls, lookupManifestations, } from "./cellar.js";
6
+ import { queryCellar, queryCellarByCelex, queryFirstInstance, searchByKeyword, } from "./sparql.js";
7
+ import { fetchDocumentText, getPdfUrls, lookupManifestations, downloadDocument, } from "./cellar.js";
8
+ import * as fs from "node:fs";
9
+ import * as path from "node:path";
10
+ import * as os from "node:os";
11
+ import archiver from "archiver";
8
12
  function log(msg) {
9
13
  process.stderr.write(`[cjeu-mcp] ${msg}\n`);
10
14
  }
@@ -204,6 +208,135 @@ server.tool("search_by_keyword", "Search CJEU case law by keyword in title/subje
204
208
  }
205
209
  });
206
210
  // ---------------------------------------------------------------------------
211
+ // Tool 4: download_documents
212
+ // ---------------------------------------------------------------------------
213
+ const TYPE_TO_FRENCH = {
214
+ Judgment: "Arret",
215
+ Order: "Ordonnance",
216
+ "Opinion of Advocate General": "ConclusionDeAvocatGeneral",
217
+ "View of Advocate General": "ConclusionDeAvocatGeneral",
218
+ "First Instance Judgment": "ArretDePremiereInstance",
219
+ };
220
+ function expandHome(p) {
221
+ if (p.startsWith("~/") || p === "~") {
222
+ return path.join(os.homedir(), p.slice(1));
223
+ }
224
+ return p;
225
+ }
226
+ function sanitizeForPath(caseNumber) {
227
+ return caseNumber.replace(/\//g, "-");
228
+ }
229
+ server.tool("download_documents", "Download all documents for a CJEU case as PDFs (or HTML fallback) and create a ZIP archive", {
230
+ caseNumber: z.string().describe("CJEU case number, e.g. C-131/12"),
231
+ outputDir: z
232
+ .string()
233
+ .default("~/Downloads/CJEU")
234
+ .describe("Folder to save documents to"),
235
+ }, async ({ caseNumber, outputDir }) => {
236
+ const parsed = parseCaseNumber(caseNumber);
237
+ if (!parsed) {
238
+ return {
239
+ content: [
240
+ {
241
+ type: "text",
242
+ text: `Could not parse case number: "${caseNumber}". Use format like C-131/12 or T-29/10.`,
243
+ },
244
+ ],
245
+ isError: true,
246
+ };
247
+ }
248
+ const { prefix, numberPadded, fullYear, formatted } = parsed;
249
+ const yearStr = String(fullYear);
250
+ const caseDirName = sanitizeForPath(formatted);
251
+ log(`download_documents: ${formatted} -> ${outputDir}`);
252
+ // Find documents
253
+ let docs = await queryCellar(yearStr, numberPadded);
254
+ // Also fetch first-instance judgments for C- cases
255
+ if (prefix === "C") {
256
+ try {
257
+ const fiCelexNumbers = await queryFirstInstance(yearStr, numberPadded);
258
+ for (const fiCelex of fiCelexNumbers) {
259
+ const fiDocs = await queryCellarByCelex(fiCelex);
260
+ for (const d of fiDocs) {
261
+ if (d.typeLabel === "Judgment") {
262
+ d.typeLabel = "First Instance Judgment";
263
+ docs.push(d);
264
+ }
265
+ }
266
+ }
267
+ }
268
+ catch (e) {
269
+ log(`First instance query failed: ${e}`);
270
+ }
271
+ }
272
+ if (docs.length === 0) {
273
+ return {
274
+ content: [
275
+ { type: "text", text: `No documents found for case ${formatted}.` },
276
+ ],
277
+ };
278
+ }
279
+ // Create output directory
280
+ const resolvedDir = expandHome(outputDir);
281
+ const caseDir = path.join(resolvedDir, caseDirName);
282
+ fs.mkdirSync(caseDir, { recursive: true });
283
+ // Pre-count types to know which need numbering
284
+ const typeCountsTotal = {};
285
+ for (const doc of docs) {
286
+ if (!doc.pdfAvailable && !doc.htmlAvailable)
287
+ continue;
288
+ const frenchName = TYPE_TO_FRENCH[doc.typeLabel] ?? doc.typeLabel.replace(/\s+/g, "");
289
+ typeCountsTotal[frenchName] = (typeCountsTotal[frenchName] ?? 0) + 1;
290
+ }
291
+ const typeCurrent = {};
292
+ const files = [];
293
+ for (const doc of docs) {
294
+ if (!doc.pdfAvailable && !doc.htmlAvailable) {
295
+ log(`Skipping ${doc.celex}: no PDF or HTML available`);
296
+ continue;
297
+ }
298
+ const result = await downloadDocument(doc.pdfUrl, doc.htmlUrl);
299
+ if (!result) {
300
+ log(`Failed to download ${doc.celex}`);
301
+ continue;
302
+ }
303
+ const frenchName = TYPE_TO_FRENCH[doc.typeLabel] ?? doc.typeLabel.replace(/\s+/g, "");
304
+ typeCurrent[frenchName] = (typeCurrent[frenchName] ?? 0) + 1;
305
+ const ext = result.format === "pdf" ? "pdf" : "html";
306
+ const needsNumber = (typeCountsTotal[frenchName] ?? 0) > 1;
307
+ const suffix = needsNumber ? `_${typeCurrent[frenchName]}` : "";
308
+ const fileName = `${caseDirName}_${frenchName}${suffix}.${ext}`;
309
+ const filePath = path.join(caseDir, fileName);
310
+ fs.writeFileSync(filePath, result.data);
311
+ files.push({
312
+ name: fileName,
313
+ type: doc.typeLabel,
314
+ date: doc.date,
315
+ format: result.format,
316
+ });
317
+ }
318
+ // Create ZIP
319
+ const zipPath = path.join(resolvedDir, `${caseDirName}.zip`);
320
+ await new Promise((resolve, reject) => {
321
+ const output = fs.createWriteStream(zipPath);
322
+ const archive = archiver("zip", { zlib: { level: 9 } });
323
+ output.on("close", resolve);
324
+ archive.on("error", reject);
325
+ archive.pipe(output);
326
+ archive.directory(caseDir, caseDirName);
327
+ archive.finalize();
328
+ });
329
+ const result = {
330
+ caseNumber: formatted,
331
+ folder: caseDir + "/",
332
+ zipFile: zipPath,
333
+ files,
334
+ };
335
+ return {
336
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
337
+ };
338
+ });
339
+ // ---------------------------------------------------------------------------
207
340
  // Start server
208
341
  // ---------------------------------------------------------------------------
209
342
  async function main() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cjeu-mcp",
3
- "version": "1.0.0",
3
+ "version": "1.1.1",
4
4
  "description": "MCP server for accessing EU Court of Justice case law — search cases, read judgments, AG opinions, and orders",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,19 +12,31 @@
12
12
  "start": "node dist/index.js",
13
13
  "prepublishOnly": "npm run build"
14
14
  },
15
- "keywords": ["mcp", "cjeu", "eu-law", "court-of-justice", "legal", "case-law"],
15
+ "keywords": [
16
+ "mcp",
17
+ "cjeu",
18
+ "eu-law",
19
+ "court-of-justice",
20
+ "legal",
21
+ "case-law"
22
+ ],
16
23
  "license": "MIT",
17
- "files": ["dist"],
24
+ "files": [
25
+ "dist"
26
+ ],
18
27
  "engines": {
19
28
  "node": ">=18.0.0"
20
29
  },
21
30
  "dependencies": {
22
31
  "@modelcontextprotocol/sdk": "^1.12.1",
32
+ "archiver": "^7.0.1",
23
33
  "cheerio": "^1.0.0",
34
+ "puppeteer": "^24.40.0",
24
35
  "zod": "^3.24.0"
25
36
  },
26
37
  "devDependencies": {
27
- "typescript": "^5.7.0",
28
- "@types/node": "^22.0.0"
38
+ "@types/archiver": "^7.0.0",
39
+ "@types/node": "^22.0.0",
40
+ "typescript": "^5.7.0"
29
41
  }
30
42
  }