@iflow-mcp/georgejeffers-uk-case-law-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,495 @@
1
+ // src/tna-client.ts
2
+ // ============================================================================
3
+ // THE NATIONAL ARCHIVES API CLIENT
4
+ // ============================================================================
5
+ //
6
+ // Handles all communication with the TNA Find Case Law API.
7
+ //
8
+ // Rate limits: 1,000 requests per 5-minute rolling window
9
+ // No authentication required.
10
+ // ============================================================================
11
+
12
+ import { XMLParser } from 'fast-xml-parser';
13
+ import type { CaseMetadata, CaseParagraph, SearchResult, DocumentUrls } from './types.js';
14
+
15
+ const TNA_BASE_URL = 'https://caselaw.nationalarchives.gov.uk';
16
+
17
+ // Generate URLs for a document
18
+ function generateDocumentUrls(documentUri: string): DocumentUrls {
19
+ const normalizedUri = documentUri.startsWith('/') ? documentUri.substring(1) : documentUri;
20
+ return {
21
+ web: `${TNA_BASE_URL}/${normalizedUri}`,
22
+ pdf: `${TNA_BASE_URL}/${normalizedUri}/data.pdf`,
23
+ xml: `${TNA_BASE_URL}/${normalizedUri}/data.xml`,
24
+ };
25
+ }
26
+
27
+ // Rate limiting: simple token bucket
28
+ export const RATE_LIMIT_MAX_REQUESTS = 900; // Stay under 1000 limit
29
+ export const RATE_LIMIT_WINDOW_MS = 5 * 60 * 1000; // 5 minutes
30
+
31
+ let requestCount = 0;
32
+ let windowStart = Date.now();
33
+
34
+ async function checkRateLimit(): Promise<void> {
35
+ const now = Date.now();
36
+ if (now - windowStart > RATE_LIMIT_WINDOW_MS) {
37
+ // Reset window
38
+ requestCount = 0;
39
+ windowStart = now;
40
+ }
41
+
42
+ if (requestCount >= RATE_LIMIT_MAX_REQUESTS) {
43
+ const waitTime = RATE_LIMIT_WINDOW_MS - (now - windowStart);
44
+ console.error(`Rate limit reached, waiting ${waitTime}ms`);
45
+ await new Promise(resolve => setTimeout(resolve, waitTime));
46
+ requestCount = 0;
47
+ windowStart = Date.now();
48
+ }
49
+
50
+ requestCount++;
51
+ }
52
+
53
+ // ============================================================================
54
+ // COURT CODE MAPPING
55
+ // ============================================================================
56
+
57
+ // Map user-friendly court names to TNA API codes
58
+ export const COURT_CODE_MAP: Record<string, string[]> = {
59
+ supreme_court: ['uksc'],
60
+ court_of_appeal: ['ewca/civ', 'ewca/crim'],
61
+ high_court: [
62
+ 'ewhc/ch', 'ewhc/qb', 'ewhc/kb', 'ewhc/admin',
63
+ 'ewhc/comm', 'ewhc/patents', 'ewhc/ipec', 'ewhc/tcc', 'ewhc/fam'
64
+ ],
65
+ tribunals: ['eat', 'ukut/iac', 'ukut/lc', 'ukut/aac'],
66
+ };
67
+
68
+ // Map legal areas to relevant courts
69
+ export const LEGAL_AREA_COURTS: Record<string, string[]> = {
70
+ intellectual_property: ['ewhc/patents', 'ewhc/ipec', 'ewhc/ch'],
71
+ commercial: ['ewhc/comm', 'ewhc/ch'],
72
+ employment: ['eat'],
73
+ immigration: ['ukut/iac'],
74
+ family: ['ewhc/fam'],
75
+ };
76
+
77
+ // ============================================================================
78
+ // SEARCH FUNCTION
79
+ // ============================================================================
80
+
81
+ export interface TnaSearchParams {
82
+ query: string;
83
+ courts?: string[];
84
+ yearFrom?: number;
85
+ yearTo?: number;
86
+ limit?: number;
87
+ page?: number;
88
+ }
89
+
90
+ export async function searchTna(params: TnaSearchParams): Promise<SearchResult[]> {
91
+ await checkRateLimit();
92
+
93
+ const url = new URL(`${TNA_BASE_URL}/atom.xml`);
94
+ url.searchParams.set('query', params.query);
95
+ url.searchParams.set('order', '-date'); // Newest first
96
+ url.searchParams.set('per_page', String(Math.min(params.limit || 50, 50)));
97
+
98
+ if (params.page) {
99
+ url.searchParams.set('page', String(params.page));
100
+ }
101
+
102
+ // Note: Court filtering via API is currently disabled due to API format issues
103
+ // The TNA API returns 400 for court parameter. Filter results locally instead.
104
+ // TODO: Investigate correct court filter format when TNA API docs are updated
105
+
106
+ // Note: TNA API doesn't have native year filtering in search
107
+ // We filter results after fetching
108
+
109
+ const response = await fetch(url.toString(), {
110
+ headers: {
111
+ 'Accept': 'application/atom+xml',
112
+ 'User-Agent': 'UKCaseLawMCP/1.0',
113
+ },
114
+ });
115
+
116
+ if (!response.ok) {
117
+ if (response.status === 429) {
118
+ throw new Error('TNA API rate limit exceeded. Please wait a few minutes.');
119
+ }
120
+ throw new Error(`TNA API error: ${response.status} ${response.statusText}`);
121
+ }
122
+
123
+ const xml = await response.text();
124
+ const results = parseAtomFeed(xml);
125
+
126
+ // Apply year filtering
127
+ return results.filter(r => {
128
+ if (!r.date) return true;
129
+ const year = new Date(r.date).getFullYear();
130
+ if (params.yearFrom && year < params.yearFrom) return false;
131
+ if (params.yearTo && year > params.yearTo) return false;
132
+ return true;
133
+ });
134
+ }
135
+
136
+ // ============================================================================
137
+ // ATOM FEED PARSER
138
+ // ============================================================================
139
+
140
+ interface AtomLink {
141
+ '@_href': string;
142
+ '@_type': string;
143
+ }
144
+
145
+ interface AtomEntry {
146
+ id: string;
147
+ title: string | { '#text': string };
148
+ published?: string;
149
+ updated?: string;
150
+ summary?: string | { '#text': string };
151
+ author?: { name: string };
152
+ link?: AtomLink | AtomLink[];
153
+ 'tna:identifier'?: Array<{ '@_value': string; '@_type': string; '@_slug'?: string }> | { '@_value': string; '@_type': string; '@_slug'?: string };
154
+ 'tna:uri'?: string;
155
+ }
156
+
157
+ interface AtomFeedResult {
158
+ feed: {
159
+ entry?: AtomEntry | AtomEntry[];
160
+ };
161
+ }
162
+
163
+ function parseAtomFeed(xml: string): SearchResult[] {
164
+ const parser = new XMLParser({
165
+ ignoreAttributes: false,
166
+ attributeNamePrefix: '@_',
167
+ });
168
+
169
+ const feed = parser.parse(xml) as AtomFeedResult;
170
+
171
+ // Handle empty results
172
+ if (!feed.feed || !feed.feed.entry) {
173
+ return [];
174
+ }
175
+
176
+ // Normalize to array (single result comes as object)
177
+ const entries = Array.isArray(feed.feed.entry)
178
+ ? feed.feed.entry
179
+ : [feed.feed.entry];
180
+
181
+ return entries.map((entry) => {
182
+ // Extract neutral citation and document slug from identifier elements
183
+ let neutralCitation: string | null = null;
184
+ let documentSlug: string | null = null;
185
+
186
+ if (entry['tna:identifier']) {
187
+ const identifiers = Array.isArray(entry['tna:identifier'])
188
+ ? entry['tna:identifier']
189
+ : [entry['tna:identifier']];
190
+
191
+ const ncn = identifiers.find((i) => i['@_type'] === 'ukncn');
192
+ if (ncn) {
193
+ neutralCitation = ncn['@_value'];
194
+ // The slug is in the same identifier element
195
+ documentSlug = ncn['@_slug'] || null;
196
+ }
197
+ }
198
+
199
+ // Fall back to extracting URI from the alternate link if slug not found
200
+ if (!documentSlug && entry.link) {
201
+ const links = Array.isArray(entry.link) ? entry.link : [entry.link];
202
+ const xmlLink = links.find((l) => l['@_type'] === 'application/akn+xml');
203
+ if (xmlLink?.['@_href']) {
204
+ // Extract slug from URL like https://caselaw.../ewca/civ/2025/1633/data.xml
205
+ const match = xmlLink['@_href'].match(/nationalarchives\.gov\.uk\/(.+)\/data\.xml$/);
206
+ if (match?.[1]) {
207
+ documentSlug = match[1];
208
+ }
209
+ }
210
+ }
211
+
212
+ // Extract snippet - handle cases where summary is an object or empty
213
+ let snippet = '';
214
+ if (entry.summary) {
215
+ if (typeof entry.summary === 'string') {
216
+ snippet = entry.summary;
217
+ } else if (entry.summary['#text']) {
218
+ snippet = entry.summary['#text'];
219
+ }
220
+ }
221
+
222
+ const docUri = documentSlug || entry['tna:uri'] || extractUriFromId(entry.id);
223
+
224
+ // Handle title being string or object
225
+ let title = 'Untitled';
226
+ if (typeof entry.title === 'string') {
227
+ title = entry.title;
228
+ } else if (entry.title && typeof entry.title === 'object' && entry.title['#text']) {
229
+ title = entry.title['#text'];
230
+ }
231
+
232
+ return {
233
+ documentUri: docUri,
234
+ neutralCitation,
235
+ title,
236
+ court: entry.author?.name || 'Unknown Court',
237
+ date: entry.published ? entry.published.substring(0, 10) : null,
238
+ snippet,
239
+ source: 'tna' as const,
240
+ score: 1.0, // TNA doesn't provide relevance scores
241
+ urls: generateDocumentUrls(docUri),
242
+ };
243
+ });
244
+ }
245
+
246
+ function extractUriFromId(id: string): string {
247
+ // ID format: https://caselaw.nationalarchives.gov.uk/ewca/civ/2007/588
248
+ const match = id.match(/nationalarchives\.gov\.uk\/(.+)$/);
249
+ return match?.[1] ?? id;
250
+ }
251
+
252
+ // ============================================================================
253
+ // GET CASE CONTENT
254
+ // ============================================================================
255
+
256
+ export async function getTnaCaseContent(uri: string): Promise<{
257
+ metadata: CaseMetadata;
258
+ paragraphs: CaseParagraph[];
259
+ judges: string[];
260
+ } | null> {
261
+ await checkRateLimit();
262
+
263
+ // Normalize URI (remove leading slash if present)
264
+ const normalizedUri = uri.startsWith('/') ? uri.substring(1) : uri;
265
+
266
+ const url = `${TNA_BASE_URL}/${normalizedUri}/data.xml`;
267
+
268
+ const response = await fetch(url, {
269
+ headers: {
270
+ 'Accept': 'application/xml',
271
+ 'User-Agent': 'UKCaseLawMCP/1.0',
272
+ },
273
+ });
274
+
275
+ if (!response.ok) {
276
+ if (response.status === 404) {
277
+ return null;
278
+ }
279
+ throw new Error(`TNA API error: ${response.status} ${response.statusText}`);
280
+ }
281
+
282
+ const xml = await response.text();
283
+ return parseLegalDocML(xml, normalizedUri);
284
+ }
285
+
286
+ // ============================================================================
287
+ // LEGALDOCML PARSER
288
+ // ============================================================================
289
+ //
290
+ // TNA uses Akoma Ntoso (LegalDocML) XML format.
291
+ // Key namespaces:
292
+ // - akn: http://docs.oasis-open.org/legaldocml/ns/akn/3.0
293
+ // - uk: https://caselaw.nationalarchives.gov.uk/akn
294
+ // ============================================================================
295
+
296
+ function parseLegalDocML(xml: string, uri: string): {
297
+ metadata: CaseMetadata;
298
+ paragraphs: CaseParagraph[];
299
+ judges: string[];
300
+ } {
301
+ const parser = new XMLParser({
302
+ ignoreAttributes: false,
303
+ attributeNamePrefix: '@_',
304
+ removeNSPrefix: true, // Remove namespace prefixes for easier access
305
+ });
306
+
307
+ const doc = parser.parse(xml);
308
+
309
+ // Navigate to the judgment content
310
+ const akomaNtoso = doc.akomaNtoso || doc['akn:akomaNtoso'] || {};
311
+ const judgment = akomaNtoso.judgment || {};
312
+ const meta = judgment.meta || {};
313
+ const judgmentBody = judgment.judgmentBody || {};
314
+
315
+ // Extract metadata
316
+ const identification = meta.identification || {};
317
+ const frbrWork = identification.FRBRWork || {};
318
+
319
+ // Get title
320
+ let title = 'Untitled';
321
+ if (frbrWork.FRBRname && frbrWork.FRBRname['@_value']) {
322
+ title = frbrWork.FRBRname['@_value'];
323
+ }
324
+
325
+ // Get date
326
+ let date: string | null = null;
327
+ if (frbrWork.FRBRdate && frbrWork.FRBRdate['@_date']) {
328
+ date = frbrWork.FRBRdate['@_date'];
329
+ }
330
+
331
+ // Get neutral citation
332
+ let neutralCitation: string | null = null;
333
+ const proprietary = meta.proprietary || {};
334
+ if (proprietary.cite) {
335
+ neutralCitation = proprietary.cite;
336
+ }
337
+
338
+ // Get court
339
+ let court = 'Unknown';
340
+ let courtName = 'Unknown Court';
341
+ if (proprietary.court) {
342
+ court = proprietary.court;
343
+ }
344
+ if (proprietary.courtName) {
345
+ courtName = proprietary.courtName;
346
+ }
347
+
348
+ // Extract judges from header
349
+ const judges: string[] = [];
350
+ const header = judgment.header || {};
351
+ if (header.judge) {
352
+ const judgeElements = Array.isArray(header.judge) ? header.judge : [header.judge];
353
+ for (const j of judgeElements) {
354
+ if (typeof j === 'string') {
355
+ judges.push(j);
356
+ } else if (j['#text']) {
357
+ judges.push(j['#text']);
358
+ }
359
+ }
360
+ }
361
+
362
+ // Extract paragraphs from judgment body
363
+ const paragraphs: CaseParagraph[] = [];
364
+ extractParagraphs(judgmentBody, paragraphs);
365
+
366
+ return {
367
+ metadata: {
368
+ documentUri: uri,
369
+ neutralCitation,
370
+ title,
371
+ court,
372
+ courtName,
373
+ date,
374
+ source: 'tna',
375
+ urls: generateDocumentUrls(uri),
376
+ },
377
+ paragraphs,
378
+ judges,
379
+ };
380
+ }
381
+
382
+ function extractParagraphs(node: any, paragraphs: CaseParagraph[], depth = 0): void {
383
+ if (!node || typeof node !== 'object') return;
384
+
385
+ // Look for paragraph elements
386
+ if (node.paragraph || node.p) {
387
+ const paraElements = node.paragraph || node.p;
388
+ const paras = Array.isArray(paraElements) ? paraElements : [paraElements];
389
+
390
+ for (const para of paras) {
391
+ // Get paragraph number - handle number, string, or object with #text
392
+ let paraNum = paragraphs.length + 1;
393
+ if (para.num !== undefined && para.num !== null) {
394
+ if (typeof para.num === 'number') {
395
+ paraNum = para.num;
396
+ } else if (typeof para.num === 'string') {
397
+ const match = para.num.match(/\d+/);
398
+ if (match?.[0]) {
399
+ paraNum = parseInt(match[0], 10);
400
+ }
401
+ } else if (typeof para.num === 'object' && para.num['#text']) {
402
+ const numText = String(para.num['#text']);
403
+ const match = numText.match(/\d+/);
404
+ if (match?.[0]) {
405
+ paraNum = parseInt(match[0], 10);
406
+ }
407
+ }
408
+ }
409
+
410
+ // Get paragraph text
411
+ let text = '';
412
+ if (para.content) {
413
+ text = extractText(para.content);
414
+ } else if (para['#text']) {
415
+ text = para['#text'];
416
+ } else {
417
+ text = extractText(para);
418
+ }
419
+
420
+ if (text.trim()) {
421
+ paragraphs.push({
422
+ number: paraNum,
423
+ text: text.trim(),
424
+ });
425
+ }
426
+ }
427
+ }
428
+
429
+ // Recurse into child nodes
430
+ for (const key of Object.keys(node)) {
431
+ if (key.startsWith('@_') || key === '#text') continue;
432
+ extractParagraphs(node[key], paragraphs, depth + 1);
433
+ }
434
+ }
435
+
436
+ function extractText(node: any): string {
437
+ if (typeof node === 'string') return node;
438
+ if (typeof node !== 'object' || node === null) return '';
439
+
440
+ let text = '';
441
+
442
+ if (node['#text']) {
443
+ text += node['#text'];
444
+ }
445
+
446
+ for (const key of Object.keys(node)) {
447
+ if (key.startsWith('@_') || key === '#text') continue;
448
+ const child = node[key];
449
+ if (Array.isArray(child)) {
450
+ for (const item of child) {
451
+ text += ' ' + extractText(item);
452
+ }
453
+ } else {
454
+ text += ' ' + extractText(child);
455
+ }
456
+ }
457
+
458
+ return text.replace(/\s+/g, ' ').trim();
459
+ }
460
+
461
+ // ============================================================================
462
+ // CITATION PARSING
463
+ // ============================================================================
464
+
465
+ // Convert neutral citation to TNA URI
466
+ export function citationToUri(citation: string): string | null {
467
+ // Pattern: [YEAR] COURT NUMBER
468
+ // Examples:
469
+ // [2024] UKSC 1 -> uksc/2024/1
470
+ // [2007] EWCA Civ 588 -> ewca/civ/2007/588
471
+ // [2023] EWHC 123 (Patents) -> ewhc/patents/2023/123
472
+
473
+ const match = citation.match(/\[(\d{4})\]\s+(\w+)\s+(?:(\w+)\s+)?(\d+)/);
474
+ if (!match) return null;
475
+
476
+ const year = match[1];
477
+ const court1 = match[2];
478
+ const court2 = match[3];
479
+ const number = match[4];
480
+
481
+ if (!year || !court1 || !number) return null;
482
+
483
+ const courtParts = [court1.toLowerCase()];
484
+ if (court2) {
485
+ courtParts.push(court2.toLowerCase());
486
+ }
487
+
488
+ // Handle subdivision in parentheses like (Patents)
489
+ const subdivMatch = citation.match(/\((\w+)\)/);
490
+ if (subdivMatch?.[1]) {
491
+ courtParts.push(subdivMatch[1].toLowerCase());
492
+ }
493
+
494
+ return `${courtParts.join('/')}/${year}/${number}`;
495
+ }
package/src/types.ts ADDED
@@ -0,0 +1,59 @@
1
+ // src/types.ts
2
+ // ============================================================================
3
+ // SHARED TYPE DEFINITIONS
4
+ // ============================================================================
5
+
6
+ export interface DocumentUrls {
7
+ web: string; // View on TNA website
8
+ pdf: string; // PDF download/embed
9
+ xml: string; // Machine-readable LegalDocML
10
+ }
11
+
12
+ export interface CaseMetadata {
13
+ id?: string;
14
+ documentUri: string;
15
+ neutralCitation: string | null;
16
+ title: string;
17
+ court: string;
18
+ courtName: string;
19
+ date: string | null;
20
+ source: 'tna' | 'bailii';
21
+ urls: DocumentUrls;
22
+ }
23
+
24
+ export interface CaseParagraph {
25
+ number: number;
26
+ text: string;
27
+ }
28
+
29
+ export interface CaseContent {
30
+ metadata: CaseMetadata;
31
+ paragraphs: CaseParagraph[];
32
+ judges?: string[];
33
+ parties?: {
34
+ claimants: string[];
35
+ defendants: string[];
36
+ };
37
+ truncated: boolean;
38
+ remainingParagraphs: number;
39
+ }
40
+
41
+ export interface SearchResult {
42
+ documentUri: string;
43
+ neutralCitation: string | null;
44
+ title: string;
45
+ court: string;
46
+ date: string | null;
47
+ snippet: string;
48
+ source: 'tna' | 'bailii';
49
+ score: number;
50
+ urls: DocumentUrls;
51
+ }
52
+
53
+ export interface CitationResult {
54
+ citation: string;
55
+ title: string;
56
+ court: string;
57
+ date: string | null;
58
+ relationship: 'citing' | 'cited';
59
+ }
package/tsconfig.json ADDED
@@ -0,0 +1 @@
1
+ {"compilerOptions": {"lib": ["ESNext"], "target": "ESNext", "module": "NodeNext", "moduleResolution": "NodeNext", "allowJs": true, "strict": true, "skipLibCheck": true, "noFallthroughCasesInSwitch": true, "noUncheckedIndexedAccess": true, "noImplicitOverride": true, "noUnusedLocals": false, "noUnusedParameters": false, "noPropertyAccessFromIndexSignature": false, "outDir": "./dist", "rootDir": "./src", "declaration": true, "declarationMap": true, "sourceMap": true}, "include": ["src/**/*"], "exclude": ["node_modules", "dist"]}