cjeu-mcp 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cellar.d.ts CHANGED
@@ -19,3 +19,11 @@ export declare function lookupManifestations(celex: string): Promise<{
19
19
  pdfUrl: string;
20
20
  htmlUrl: string;
21
21
  }>;
22
+ /**
23
+ * Download a document as bytes (PDF preferred, HTML fallback).
24
+ * Returns { data, format } or null if unavailable.
25
+ */
26
+ export declare function downloadDocument(pdfUrl: string, htmlUrl: string): Promise<{
27
+ data: Buffer;
28
+ format: "pdf" | "html";
29
+ } | null>;
package/dist/cellar.js CHANGED
@@ -113,6 +113,50 @@ LIMIT 1`;
113
113
  return { pdfUrl: "", htmlUrl: "" };
114
114
  }
115
115
  }
116
+ /**
117
+ * Download a document as bytes (PDF preferred, HTML fallback).
118
+ * Returns { data, format } or null if unavailable.
119
+ */
120
+ export async function downloadDocument(pdfUrl, htmlUrl) {
121
+ // 1. Try PDF manifestation
122
+ if (pdfUrl) {
123
+ try {
124
+ const resp = await fetchWithTimeout(pdfUrl, {
125
+ headers: { "User-Agent": "cjeu-mcp/1.0", Accept: "*/*" },
126
+ });
127
+ if (resp.ok) {
128
+ const buf = Buffer.from(await resp.arrayBuffer());
129
+ if (buf.length > 0 && buf.subarray(0, 5).toString() === "%PDF-") {
130
+ return { data: buf, format: "pdf" };
131
+ }
132
+ }
133
+ }
134
+ catch (e) {
135
+ log(`PDF download failed for ${pdfUrl}: ${e}`);
136
+ }
137
+ }
138
+ // 2. Try HTML manifestation
139
+ if (htmlUrl) {
140
+ try {
141
+ const resp = await fetchWithTimeout(htmlUrl, {
142
+ headers: {
143
+ "User-Agent": "cjeu-mcp/1.0",
144
+ Accept: "application/xhtml+xml, text/html",
145
+ },
146
+ });
147
+ if (resp.ok) {
148
+ const text = await resp.text();
149
+ if (text.trim() && !text.includes("awsWafCookie")) {
150
+ return { data: Buffer.from(text, "utf-8"), format: "html" };
151
+ }
152
+ }
153
+ }
154
+ catch (e) {
155
+ log(`HTML download failed for ${htmlUrl}: ${e}`);
156
+ }
157
+ }
158
+ return null;
159
+ }
116
160
  /**
117
161
  * Extract clean legal text from an HTML document using cheerio.
118
162
  * Strips navigation, headers, footers, metadata — keeps only the legal content.
package/dist/index.js CHANGED
@@ -3,8 +3,12 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { z } from "zod";
5
5
  import { parseCaseNumber } from "./celex.js";
6
- import { queryCellar, queryFirstInstance, searchByKeyword, } from "./sparql.js";
7
- import { fetchDocumentText, getPdfUrls, lookupManifestations, } from "./cellar.js";
6
+ import { queryCellar, queryCellarByCelex, queryFirstInstance, searchByKeyword, } from "./sparql.js";
7
+ import { fetchDocumentText, getPdfUrls, lookupManifestations, downloadDocument, } from "./cellar.js";
8
+ import * as fs from "node:fs";
9
+ import * as path from "node:path";
10
+ import * as os from "node:os";
11
+ import archiver from "archiver";
8
12
  function log(msg) {
9
13
  process.stderr.write(`[cjeu-mcp] ${msg}\n`);
10
14
  }
@@ -204,6 +208,135 @@ server.tool("search_by_keyword", "Search CJEU case law by keyword in title/subje
204
208
  }
205
209
  });
206
210
  // ---------------------------------------------------------------------------
211
+ // Tool 4: download_documents
212
+ // ---------------------------------------------------------------------------
213
+ const TYPE_TO_FRENCH = {
214
+ Judgment: "Arret",
215
+ Order: "Ordonnance",
216
+ "Opinion of Advocate General": "ConclusionDeAvocatGeneral",
217
+ "View of Advocate General": "ConclusionDeAvocatGeneral",
218
+ "First Instance Judgment": "ArretDePremiereInstance",
219
+ };
220
+ function expandHome(p) {
221
+ if (p.startsWith("~/") || p === "~") {
222
+ return path.join(os.homedir(), p.slice(1));
223
+ }
224
+ return p;
225
+ }
226
+ function sanitizeForPath(caseNumber) {
227
+ return caseNumber.replace(/\//g, "-");
228
+ }
229
+ server.tool("download_documents", "Download all documents for a CJEU case as PDFs (or HTML fallback) and create a ZIP archive", {
230
+ caseNumber: z.string().describe("CJEU case number, e.g. C-131/12"),
231
+ outputDir: z
232
+ .string()
233
+ .default("~/Downloads/CJEU")
234
+ .describe("Folder to save documents to"),
235
+ }, async ({ caseNumber, outputDir }) => {
236
+ const parsed = parseCaseNumber(caseNumber);
237
+ if (!parsed) {
238
+ return {
239
+ content: [
240
+ {
241
+ type: "text",
242
+ text: `Could not parse case number: "${caseNumber}". Use format like C-131/12 or T-29/10.`,
243
+ },
244
+ ],
245
+ isError: true,
246
+ };
247
+ }
248
+ const { prefix, numberPadded, fullYear, formatted } = parsed;
249
+ const yearStr = String(fullYear);
250
+ const caseDirName = sanitizeForPath(formatted);
251
+ log(`download_documents: ${formatted} -> ${outputDir}`);
252
+ // Find documents
253
+ let docs = await queryCellar(yearStr, numberPadded);
254
+ // Also fetch first-instance judgments for C- cases
255
+ if (prefix === "C") {
256
+ try {
257
+ const fiCelexNumbers = await queryFirstInstance(yearStr, numberPadded);
258
+ for (const fiCelex of fiCelexNumbers) {
259
+ const fiDocs = await queryCellarByCelex(fiCelex);
260
+ for (const d of fiDocs) {
261
+ if (d.typeLabel === "Judgment") {
262
+ d.typeLabel = "First Instance Judgment";
263
+ docs.push(d);
264
+ }
265
+ }
266
+ }
267
+ }
268
+ catch (e) {
269
+ log(`First instance query failed: ${e}`);
270
+ }
271
+ }
272
+ if (docs.length === 0) {
273
+ return {
274
+ content: [
275
+ { type: "text", text: `No documents found for case ${formatted}.` },
276
+ ],
277
+ };
278
+ }
279
+ // Create output directory
280
+ const resolvedDir = expandHome(outputDir);
281
+ const caseDir = path.join(resolvedDir, caseDirName);
282
+ fs.mkdirSync(caseDir, { recursive: true });
283
+ // Pre-count types to know which need numbering
284
+ const typeCountsTotal = {};
285
+ for (const doc of docs) {
286
+ if (!doc.pdfAvailable && !doc.htmlAvailable)
287
+ continue;
288
+ const frenchName = TYPE_TO_FRENCH[doc.typeLabel] ?? doc.typeLabel.replace(/\s+/g, "");
289
+ typeCountsTotal[frenchName] = (typeCountsTotal[frenchName] ?? 0) + 1;
290
+ }
291
+ const typeCurrent = {};
292
+ const files = [];
293
+ for (const doc of docs) {
294
+ if (!doc.pdfAvailable && !doc.htmlAvailable) {
295
+ log(`Skipping ${doc.celex}: no PDF or HTML available`);
296
+ continue;
297
+ }
298
+ const result = await downloadDocument(doc.pdfUrl, doc.htmlUrl);
299
+ if (!result) {
300
+ log(`Failed to download ${doc.celex}`);
301
+ continue;
302
+ }
303
+ const frenchName = TYPE_TO_FRENCH[doc.typeLabel] ?? doc.typeLabel.replace(/\s+/g, "");
304
+ typeCurrent[frenchName] = (typeCurrent[frenchName] ?? 0) + 1;
305
+ const ext = result.format === "pdf" ? "pdf" : "html";
306
+ const needsNumber = (typeCountsTotal[frenchName] ?? 0) > 1;
307
+ const suffix = needsNumber ? `_${typeCurrent[frenchName]}` : "";
308
+ const fileName = `${caseDirName}_${frenchName}${suffix}.${ext}`;
309
+ const filePath = path.join(caseDir, fileName);
310
+ fs.writeFileSync(filePath, result.data);
311
+ files.push({
312
+ name: fileName,
313
+ type: doc.typeLabel,
314
+ date: doc.date,
315
+ format: result.format,
316
+ });
317
+ }
318
+ // Create ZIP
319
+ const zipPath = path.join(resolvedDir, `${caseDirName}.zip`);
320
+ await new Promise((resolve, reject) => {
321
+ const output = fs.createWriteStream(zipPath);
322
+ const archive = archiver("zip", { zlib: { level: 9 } });
323
+ output.on("close", resolve);
324
+ archive.on("error", reject);
325
+ archive.pipe(output);
326
+ archive.directory(caseDir, caseDirName);
327
+ archive.finalize();
328
+ });
329
+ const result = {
330
+ caseNumber: formatted,
331
+ folder: caseDir + "/",
332
+ zipFile: zipPath,
333
+ files,
334
+ };
335
+ return {
336
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
337
+ };
338
+ });
339
+ // ---------------------------------------------------------------------------
207
340
  // Start server
208
341
  // ---------------------------------------------------------------------------
209
342
  async function main() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cjeu-mcp",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "MCP server for accessing EU Court of Justice case law — search cases, read judgments, AG opinions, and orders",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,19 +12,30 @@
12
12
  "start": "node dist/index.js",
13
13
  "prepublishOnly": "npm run build"
14
14
  },
15
- "keywords": ["mcp", "cjeu", "eu-law", "court-of-justice", "legal", "case-law"],
15
+ "keywords": [
16
+ "mcp",
17
+ "cjeu",
18
+ "eu-law",
19
+ "court-of-justice",
20
+ "legal",
21
+ "case-law"
22
+ ],
16
23
  "license": "MIT",
17
- "files": ["dist"],
24
+ "files": [
25
+ "dist"
26
+ ],
18
27
  "engines": {
19
28
  "node": ">=18.0.0"
20
29
  },
21
30
  "dependencies": {
22
31
  "@modelcontextprotocol/sdk": "^1.12.1",
32
+ "archiver": "^7.0.1",
23
33
  "cheerio": "^1.0.0",
24
34
  "zod": "^3.24.0"
25
35
  },
26
36
  "devDependencies": {
27
- "typescript": "^5.7.0",
28
- "@types/node": "^22.0.0"
37
+ "@types/archiver": "^7.0.0",
38
+ "@types/node": "^22.0.0",
39
+ "typescript": "^5.7.0"
29
40
  }
30
41
  }