@ansvar/eu-regulations-mcp 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -22
- package/data/regulations.db +0 -0
- package/dist/database/sqlite-adapter.d.ts +2 -2
- package/dist/database/sqlite-adapter.d.ts.map +1 -1
- package/dist/database/sqlite-adapter.js.map +1 -1
- package/dist/http-server.js +27 -5
- package/dist/http-server.js.map +1 -1
- package/dist/index.js +27 -4
- package/dist/index.js.map +1 -1
- package/dist/tools/about.d.ts +40 -0
- package/dist/tools/about.d.ts.map +1 -0
- package/dist/tools/about.js +61 -0
- package/dist/tools/about.js.map +1 -0
- package/dist/tools/list.d.ts +7 -0
- package/dist/tools/list.d.ts.map +1 -1
- package/dist/tools/list.js +73 -8
- package/dist/tools/list.js.map +1 -1
- package/dist/tools/registry.d.ts +11 -1
- package/dist/tools/registry.d.ts.map +1 -1
- package/dist/tools/registry.js +56 -4
- package/dist/tools/registry.js.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +17 -5
- package/dist/worker.js.map +1 -1
- package/package.json +6 -5
- package/scripts/add-cross-references.sql +0 -200
- package/scripts/analyze-survey-responses.ts +0 -285
- package/scripts/build-db.ts +0 -421
- package/scripts/bulk-reingest-all.ts +0 -331
- package/scripts/check-updates.ts +0 -294
- package/scripts/extract-eprivacy-recitals.ts +0 -98
- package/scripts/ingest-eurlex-browser.ts +0 -113
- package/scripts/ingest-eurlex.ts +0 -349
- package/scripts/ingest-unece.ts +0 -382
- package/scripts/migrate-postgres.ts +0 -445
- package/scripts/migrate-to-postgres.ts +0 -353
- package/scripts/reingest-all-with-recitals.sh +0 -81
- package/scripts/sync-versions.ts +0 -206
- package/scripts/test-cross-refs.js +0 -26
- package/scripts/test-postgres-adapter.ts +0 -146
- package/scripts/update-dora-rts-metadata.ts +0 -112
- package/src/database/postgres-adapter.ts +0 -84
- package/src/database/sqlite-adapter.ts +0 -44
- package/src/database/types.ts +0 -10
- package/src/http-server.ts +0 -149
- package/src/index.ts +0 -61
- package/src/middleware/rate-limit.ts +0 -104
- package/src/tools/applicability.ts +0 -167
- package/src/tools/article.ts +0 -81
- package/src/tools/compare.ts +0 -217
- package/src/tools/definitions.ts +0 -49
- package/src/tools/evidence.ts +0 -84
- package/src/tools/list.ts +0 -124
- package/src/tools/map.ts +0 -86
- package/src/tools/recital.ts +0 -60
- package/src/tools/registry.ts +0 -311
- package/src/tools/search.ts +0 -297
- package/src/worker.ts +0 -708
package/src/tools/search.ts
DELETED
|
@@ -1,297 +0,0 @@
|
|
|
1
|
-
import type { DatabaseAdapter } from '../database/types.js';
|
|
2
|
-
|
|
3
|
-
export interface SearchInput {
|
|
4
|
-
query: string;
|
|
5
|
-
regulations?: string[];
|
|
6
|
-
limit?: number;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
export interface SearchResult {
|
|
10
|
-
regulation: string;
|
|
11
|
-
article: string;
|
|
12
|
-
title: string;
|
|
13
|
-
snippet: string;
|
|
14
|
-
relevance: number;
|
|
15
|
-
type?: 'article' | 'recital';
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Escape special FTS5 query characters and build optimal search query for SQLite.
|
|
20
|
-
*/
|
|
21
|
-
function escapeFts5Query(query: string): string {
|
|
22
|
-
const stopwords = new Set(['a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']);
|
|
23
|
-
|
|
24
|
-
const words = query
|
|
25
|
-
.replace(/[*+^():.§/|;=~!@#$%&\\{}[\],<>]/g, '')
|
|
26
|
-
.replace(/['"]/g, '')
|
|
27
|
-
.replace(/-/g, ' ')
|
|
28
|
-
.split(/\s+/)
|
|
29
|
-
.filter(word => word.length > 2 && !stopwords.has(word.toLowerCase()));
|
|
30
|
-
|
|
31
|
-
if (words.length === 0) {
|
|
32
|
-
return '';
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
if (words.length <= 3) {
|
|
36
|
-
return words.join(' '); // Space is AND in FTS5
|
|
37
|
-
} else {
|
|
38
|
-
return words.map(word => `${word}*`).join(' OR '); // Suffix * for prefix matching
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* Build PostgreSQL full-text search query from user input.
|
|
44
|
-
*/
|
|
45
|
-
function buildPostgresQuery(query: string): string {
|
|
46
|
-
const stopwords = new Set(['a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']);
|
|
47
|
-
|
|
48
|
-
const words = query
|
|
49
|
-
.replace(/['"]/g, '')
|
|
50
|
-
.replace(/-/g, ' ')
|
|
51
|
-
.split(/\s+/)
|
|
52
|
-
.filter(word => word.length > 2 && !stopwords.has(word.toLowerCase()));
|
|
53
|
-
|
|
54
|
-
if (words.length === 0) {
|
|
55
|
-
return '';
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
if (words.length <= 3) {
|
|
59
|
-
return words.join(' & ');
|
|
60
|
-
} else {
|
|
61
|
-
return words.map(word => `${word}:*`).join(' | ');
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
async function searchSqlite(
|
|
66
|
-
db: DatabaseAdapter,
|
|
67
|
-
query: string,
|
|
68
|
-
regulations: string[] | undefined,
|
|
69
|
-
limit: number
|
|
70
|
-
): Promise<SearchResult[]> {
|
|
71
|
-
const escapedQuery = escapeFts5Query(query);
|
|
72
|
-
if (!escapedQuery) {
|
|
73
|
-
return [];
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
const params: (string | number)[] = [escapedQuery];
|
|
77
|
-
|
|
78
|
-
let regulationFilter = '';
|
|
79
|
-
if (regulations && regulations.length > 0) {
|
|
80
|
-
const placeholders = regulations.map(() => '?').join(', ');
|
|
81
|
-
regulationFilter = ` AND regulation IN (${placeholders})`;
|
|
82
|
-
params.push(...regulations);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
const articlesQuery = `
|
|
86
|
-
SELECT
|
|
87
|
-
articles_fts.regulation,
|
|
88
|
-
articles_fts.article_number as article,
|
|
89
|
-
articles_fts.title,
|
|
90
|
-
snippet(articles_fts, 3, '>>>', '<<<', '...', 32) as snippet,
|
|
91
|
-
bm25(articles_fts) as relevance,
|
|
92
|
-
'article' as type
|
|
93
|
-
FROM articles_fts
|
|
94
|
-
WHERE articles_fts MATCH ?
|
|
95
|
-
${regulationFilter}
|
|
96
|
-
ORDER BY bm25(articles_fts)
|
|
97
|
-
LIMIT ?
|
|
98
|
-
`;
|
|
99
|
-
|
|
100
|
-
const recitalsQuery = `
|
|
101
|
-
SELECT
|
|
102
|
-
recitals_fts.regulation,
|
|
103
|
-
CAST(recitals_fts.recital_number AS TEXT) as article,
|
|
104
|
-
'Recital ' || recitals_fts.recital_number as title,
|
|
105
|
-
snippet(recitals_fts, 2, '>>>', '<<<', '...', 32) as snippet,
|
|
106
|
-
bm25(recitals_fts) as relevance,
|
|
107
|
-
'recital' as type
|
|
108
|
-
FROM recitals_fts
|
|
109
|
-
WHERE recitals_fts MATCH ?
|
|
110
|
-
${regulationFilter}
|
|
111
|
-
ORDER BY bm25(recitals_fts)
|
|
112
|
-
LIMIT ?
|
|
113
|
-
`;
|
|
114
|
-
|
|
115
|
-
const articlesParams = [...params, limit];
|
|
116
|
-
const recitalsParams = [...params, limit];
|
|
117
|
-
|
|
118
|
-
let articleResult, recitalResult;
|
|
119
|
-
try {
|
|
120
|
-
articleResult = await db.query(articlesQuery, articlesParams);
|
|
121
|
-
recitalResult = await db.query(recitalsQuery, recitalsParams);
|
|
122
|
-
} catch (error) {
|
|
123
|
-
// FTS5 syntax errors from malformed queries - return empty results
|
|
124
|
-
if (error instanceof Error && (error.message.includes('fts5: syntax error') || error.message.includes('SQLITE_ERROR'))) {
|
|
125
|
-
return [];
|
|
126
|
-
}
|
|
127
|
-
throw error;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const articleRows = articleResult.rows as Array<{
|
|
131
|
-
regulation: string;
|
|
132
|
-
article: string;
|
|
133
|
-
title: string;
|
|
134
|
-
snippet: string;
|
|
135
|
-
relevance: number;
|
|
136
|
-
type: 'article' | 'recital';
|
|
137
|
-
}>;
|
|
138
|
-
|
|
139
|
-
const recitalRows = recitalResult.rows as Array<{
|
|
140
|
-
regulation: string;
|
|
141
|
-
article: string;
|
|
142
|
-
title: string;
|
|
143
|
-
snippet: string;
|
|
144
|
-
relevance: number;
|
|
145
|
-
type: 'article' | 'recital';
|
|
146
|
-
}>;
|
|
147
|
-
|
|
148
|
-
const combined = [...articleRows, ...recitalRows]
|
|
149
|
-
.map(row => ({
|
|
150
|
-
...row,
|
|
151
|
-
relevance: Math.abs(row.relevance),
|
|
152
|
-
}))
|
|
153
|
-
.sort((a, b) => {
|
|
154
|
-
if (Math.abs(a.relevance - b.relevance) > 0.01) {
|
|
155
|
-
return b.relevance - a.relevance;
|
|
156
|
-
}
|
|
157
|
-
if (a.type === 'article' && b.type === 'recital') return -1;
|
|
158
|
-
if (a.type === 'recital' && b.type === 'article') return 1;
|
|
159
|
-
return 0;
|
|
160
|
-
})
|
|
161
|
-
.slice(0, limit);
|
|
162
|
-
|
|
163
|
-
return combined;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
async function searchPostgres(
|
|
167
|
-
db: DatabaseAdapter,
|
|
168
|
-
query: string,
|
|
169
|
-
regulations: string[] | undefined,
|
|
170
|
-
limit: number
|
|
171
|
-
): Promise<SearchResult[]> {
|
|
172
|
-
const postgresQuery = buildPostgresQuery(query);
|
|
173
|
-
if (!postgresQuery) {
|
|
174
|
-
return [];
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
const params: (string | number)[] = [postgresQuery];
|
|
178
|
-
|
|
179
|
-
let regulationFilter = '';
|
|
180
|
-
if (regulations && regulations.length > 0) {
|
|
181
|
-
const placeholders = regulations.map((_, i) => `$${i + 2}`).join(', ');
|
|
182
|
-
regulationFilter = ` AND a.regulation IN (${placeholders})`;
|
|
183
|
-
params.push(...regulations);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
const articlesQuery = `
|
|
187
|
-
SELECT
|
|
188
|
-
a.regulation,
|
|
189
|
-
a.article_number as article,
|
|
190
|
-
a.title,
|
|
191
|
-
ts_headline('english', a.text, plainto_tsquery('english', $1),
|
|
192
|
-
'StartSel=>>>, StopSel=<<<, MaxWords=32, MinWords=16') as snippet,
|
|
193
|
-
ts_rank(to_tsvector('english', COALESCE(a.title, '') || ' ' || a.text),
|
|
194
|
-
plainto_tsquery('english', $1)) as relevance,
|
|
195
|
-
'article' as type
|
|
196
|
-
FROM articles a
|
|
197
|
-
WHERE to_tsvector('english', COALESCE(a.title, '') || ' ' || a.text) @@ plainto_tsquery('english', $1)
|
|
198
|
-
${regulationFilter}
|
|
199
|
-
ORDER BY relevance DESC
|
|
200
|
-
LIMIT $${params.length + 1}
|
|
201
|
-
`;
|
|
202
|
-
|
|
203
|
-
const recitalsQuery = `
|
|
204
|
-
SELECT
|
|
205
|
-
r.regulation,
|
|
206
|
-
r.recital_number::TEXT as article,
|
|
207
|
-
'Recital ' || r.recital_number as title,
|
|
208
|
-
ts_headline('english', r.text, plainto_tsquery('english', $1),
|
|
209
|
-
'StartSel=>>>, StopSel=<<<, MaxWords=32, MinWords=16') as snippet,
|
|
210
|
-
ts_rank(to_tsvector('english', r.text), plainto_tsquery('english', $1)) as relevance,
|
|
211
|
-
'recital' as type
|
|
212
|
-
FROM recitals r
|
|
213
|
-
WHERE to_tsvector('english', r.text) @@ plainto_tsquery('english', $1)
|
|
214
|
-
${regulationFilter.replace(/a\.regulation/g, 'r.regulation')}
|
|
215
|
-
ORDER BY relevance DESC
|
|
216
|
-
LIMIT $${params.length + 1}
|
|
217
|
-
`;
|
|
218
|
-
|
|
219
|
-
const articlesParams = [...params, limit];
|
|
220
|
-
const recitalsParams = [...params, limit];
|
|
221
|
-
|
|
222
|
-
let articleResult, recitalResult;
|
|
223
|
-
try {
|
|
224
|
-
articleResult = await db.query(articlesQuery, articlesParams);
|
|
225
|
-
recitalResult = await db.query(recitalsQuery, recitalsParams);
|
|
226
|
-
} catch (error) {
|
|
227
|
-
// FTS5 syntax errors from malformed queries - return empty results
|
|
228
|
-
if (error instanceof Error && (error.message.includes('fts5: syntax error') || error.message.includes('SQLITE_ERROR'))) {
|
|
229
|
-
return [];
|
|
230
|
-
}
|
|
231
|
-
throw error;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
const articleRows = articleResult.rows as Array<{
|
|
235
|
-
regulation: string;
|
|
236
|
-
article: string;
|
|
237
|
-
title: string;
|
|
238
|
-
snippet: string;
|
|
239
|
-
relevance: number;
|
|
240
|
-
type: 'article' | 'recital';
|
|
241
|
-
}>;
|
|
242
|
-
|
|
243
|
-
const recitalRows = recitalResult.rows as Array<{
|
|
244
|
-
regulation: string;
|
|
245
|
-
article: string;
|
|
246
|
-
title: string;
|
|
247
|
-
snippet: string;
|
|
248
|
-
relevance: number;
|
|
249
|
-
type: 'article' | 'recital';
|
|
250
|
-
}>;
|
|
251
|
-
|
|
252
|
-
const combined = [...articleRows, ...recitalRows]
|
|
253
|
-
.map(row => ({
|
|
254
|
-
...row,
|
|
255
|
-
relevance: Math.abs(row.relevance),
|
|
256
|
-
}))
|
|
257
|
-
.sort((a, b) => {
|
|
258
|
-
if (Math.abs(a.relevance - b.relevance) > 0.01) {
|
|
259
|
-
return b.relevance - a.relevance;
|
|
260
|
-
}
|
|
261
|
-
if (a.type === 'article' && b.type === 'recital') return -1;
|
|
262
|
-
if (a.type === 'recital' && b.type === 'article') return 1;
|
|
263
|
-
return 0;
|
|
264
|
-
})
|
|
265
|
-
.slice(0, limit);
|
|
266
|
-
|
|
267
|
-
return combined;
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
export async function searchRegulations(
|
|
271
|
-
db: DatabaseAdapter,
|
|
272
|
-
input: SearchInput
|
|
273
|
-
): Promise<SearchResult[]> {
|
|
274
|
-
let { query, regulations, limit = 10 } = input;
|
|
275
|
-
|
|
276
|
-
if (!Number.isFinite(limit) || limit < 0) {
|
|
277
|
-
limit = 10;
|
|
278
|
-
}
|
|
279
|
-
limit = Math.min(Math.floor(limit), 1000);
|
|
280
|
-
|
|
281
|
-
if (!query || query.trim().length === 0) {
|
|
282
|
-
return [];
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
try {
|
|
286
|
-
if (db.type === 'sqlite') {
|
|
287
|
-
return await searchSqlite(db, query, regulations, limit);
|
|
288
|
-
} else {
|
|
289
|
-
return await searchPostgres(db, query, regulations, limit);
|
|
290
|
-
}
|
|
291
|
-
} catch (error) {
|
|
292
|
-
if (error instanceof Error && (error.message.includes('tsquery') || error.message.includes('MATCH'))) {
|
|
293
|
-
return [];
|
|
294
|
-
}
|
|
295
|
-
throw error;
|
|
296
|
-
}
|
|
297
|
-
}
|