iflow-mcp_ayunis-legal-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/METADATA +606 -0
- iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/RECORD +8 -0
- iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/WHEEL +5 -0
- iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
- iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/top_level.txt +1 -0
- legal_mcp/server/__init__.py +5 -0
- legal_mcp/server/main.py +226 -0
|
@@ -0,0 +1,606 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iflow-mcp_ayunis-legal-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: German Legal Texts Search System - MCP Server
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: fastmcp>=2.12.0
|
|
9
|
+
Requires-Dist: pydantic>=2.12.0
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
Dynamic: requires-python
|
|
12
|
+
|
|
13
|
+
# Legal MCP - German Legal Texts Search System
|
|
14
|
+
|
|
15
|
+
[](https://opensource.org/licenses/MIT)
|
|
16
|
+
[](https://www.python.org/downloads/)
|
|
17
|
+
[](https://fastapi.tiangolo.com/)
|
|
18
|
+
[](https://www.docker.com/)
|
|
19
|
+
|
|
20
|
+
A comprehensive system for searching and analyzing German legal texts using vector embeddings and semantic search, consisting of:
|
|
21
|
+
|
|
22
|
+
- **Store API**: FastAPI backend with PostgreSQL, pgvector, and Ollama embeddings
|
|
23
|
+
- **MCP Server**: FastMCP server providing tools for AI assistants to query legal texts
|
|
24
|
+
- **CLI Tool**: Command-line interface for importing and querying legal texts
|
|
25
|
+
- **Web Scraper**: Automatic extraction of legal texts from gesetze-im-internet.de
|
|
26
|
+
- **XML Parser**: Comprehensive parser for German legal XML format (gii-norm.dtd)
|
|
27
|
+
|
|
28
|
+
## Table of Contents
|
|
29
|
+
|
|
30
|
+
- [Features](#features)
|
|
31
|
+
- [Architecture](#architecture)
|
|
32
|
+
- [Quick Start](#quick-start)
|
|
33
|
+
- [CLI Tool](#cli-tool)
|
|
34
|
+
- [Environment Configuration](#environment-configuration)
|
|
35
|
+
- [API Documentation](#api-documentation)
|
|
36
|
+
- [Legal Text Features](#legal-text-features)
|
|
37
|
+
- [XML Parser](#xml-parser)
|
|
38
|
+
- [Development](#development)
|
|
39
|
+
- [Docker Commands](#docker-commands)
|
|
40
|
+
- [Troubleshooting](#troubleshooting)
|
|
41
|
+
- [Project Structure](#project-structure)
|
|
42
|
+
- [Technology Stack](#technology-stack)
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
### Store API Features
|
|
47
|
+
|
|
48
|
+
- ποΈ **PostgreSQL + pgvector** - Vector database for semantic search
|
|
49
|
+
- π€ **Ollama Integration** - Generate embeddings for legal texts
|
|
50
|
+
- π **Web Scraping** - Automatic extraction from gesetze-im-internet.de
|
|
51
|
+
- π **XML Parsing** - Comprehensive parser for German legal XML format
|
|
52
|
+
- π **Semantic Search** - Vector-based similarity search for legal texts
|
|
53
|
+
- π **Metadata Tracking** - Full document metadata and versioning
|
|
54
|
+
- π **RESTful API** - FastAPI with automatic documentation
|
|
55
|
+
- π³ **Docker Support** - Easy deployment with containerization
|
|
56
|
+
|
|
57
|
+
### MCP Server Features
|
|
58
|
+
|
|
59
|
+
- π§ **FastMCP** - Modern MCP server implementation
|
|
60
|
+
- π€ **AI Assistant Integration** - Provides tools for querying legal texts
|
|
61
|
+
- π **HTTP API Client** - Connects to Store API for data access
|
|
62
|
+
|
|
63
|
+
### CLI Tool Features
|
|
64
|
+
|
|
65
|
+
- π **List Commands** - View imported codes and available catalog
|
|
66
|
+
- π₯ **Import Commands** - Import legal codes with progress indication
|
|
67
|
+
- π **Query Commands** - Retrieve texts by code, section, and sub-section
|
|
68
|
+
- π **Search Commands** - Semantic search with similarity scoring
|
|
69
|
+
- π **Multiple Output Formats** - Table view or JSON output
|
|
70
|
+
- βοΈ **Configurable** - Custom API URL support via flag or environment variable
|
|
71
|
+
|
|
72
|
+
## Architecture
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
76
|
+
β β
|
|
77
|
+
β Docker Network: legal-mcp-network β
|
|
78
|
+
β β
|
|
79
|
+
β ββββββββββββββββββ β
|
|
80
|
+
β β MCP Server β :8001 β
|
|
81
|
+
β β (FastMCP) β β
|
|
82
|
+
β βββββββββ¬βββββββββ β
|
|
83
|
+
β β β
|
|
84
|
+
β β LEGAL_API_BASE_URL β
|
|
85
|
+
β β http://store-api:8000 β
|
|
86
|
+
β β β
|
|
87
|
+
β βΌ β
|
|
88
|
+
β ββββββββββββββββββ β
|
|
89
|
+
β β Store API β :8000 β
|
|
90
|
+
β β (FastAPI) β β
|
|
91
|
+
β βββββββββ¬βββββββββ β
|
|
92
|
+
β β β
|
|
93
|
+
β β DATABASE_URL β
|
|
94
|
+
β β postgresql://postgres:5432 β
|
|
95
|
+
β β OLLAMA_BASE_URL β
|
|
96
|
+
β β β
|
|
97
|
+
β βΌ β
|
|
98
|
+
β ββββββββββββββββββ β
|
|
99
|
+
β β PostgreSQL β :5432 β
|
|
100
|
+
β β + pgvector β β
|
|
101
|
+
β ββββββββββββββββββ β
|
|
102
|
+
β β
|
|
103
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
104
|
+
β
|
|
105
|
+
β External Ollama Service
|
|
106
|
+
β (for embeddings)
|
|
107
|
+
βΌ
|
|
108
|
+
ββββββββββββββββββ
|
|
109
|
+
β Ollama API β
|
|
110
|
+
β (Remote/Local)β
|
|
111
|
+
ββββββββββββββββββ
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Quick Start
|
|
115
|
+
|
|
116
|
+
### Prerequisites
|
|
117
|
+
|
|
118
|
+
- Docker and Docker Compose
|
|
119
|
+
- Ollama (local or remote endpoint for embeddings)
|
|
120
|
+
- Git
|
|
121
|
+
|
|
122
|
+
> β οΈ **Important: Ollama Embedding Model**
|
|
123
|
+
>
|
|
124
|
+
> By default, this project uses the embedding model: `ryanshillington/Qwen3-Embedding-4B:latest`
|
|
125
|
+
>
|
|
126
|
+
> You must pull this model (or your configured alternative) before importing legal texts:
|
|
127
|
+
> ```bash
|
|
128
|
+
> ollama pull ryanshillington/Qwen3-Embedding-4B:latest
|
|
129
|
+
> ```
|
|
130
|
+
>
|
|
131
|
+
> You can use a different model by setting the `OLLAMA_EMBEDDING_MODEL` environment variable, but **the model must produce 2560-dimensional vectors**. Using a model with different dimensions will cause errors, as the database schema is fixed at 2560 dimensions. Changing to a model with different dimensions would require database schema modifications and re-importing all legal texts.
|
|
132
|
+
|
|
133
|
+
### 1. Clone and Setup
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
# Clone the repository
|
|
137
|
+
git clone <repository-url>
|
|
138
|
+
cd legal-mcp
|
|
139
|
+
|
|
140
|
+
# Copy environment file
|
|
141
|
+
cp .env.example .env
|
|
142
|
+
|
|
143
|
+
# Edit .env with your configuration
|
|
144
|
+
# Update OLLAMA_BASE_URL and OLLAMA_AUTH_TOKEN if needed
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### 2. Start All Services
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# Build and start all services
|
|
151
|
+
docker-compose up -d
|
|
152
|
+
|
|
153
|
+
# Check service status
|
|
154
|
+
docker-compose ps
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
This will start:
|
|
158
|
+
|
|
159
|
+
- **PostgreSQL** (port 5432) - Database with pgvector extension
|
|
160
|
+
- **Store API** (port 8000) - FastAPI backend for legal texts
|
|
161
|
+
- **MCP Server** (port 8001) - FastMCP server for AI assistants
|
|
162
|
+
|
|
163
|
+
### 3. Run Database Migrations
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# Run Alembic migrations to set up the database
|
|
167
|
+
docker-compose exec store-api alembic upgrade head
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### 4. Import Legal Texts
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
# Import a test legal code (e.g., rag_1)
|
|
174
|
+
curl -X POST http://localhost:8000/legal-texts/gesetze-im-internet/rag_1
|
|
175
|
+
|
|
176
|
+
# Import German Civil Code (BGB)
|
|
177
|
+
curl -X POST http://localhost:8000/legal-texts/gesetze-im-internet/bgb
|
|
178
|
+
|
|
179
|
+
# Import other legal codes
|
|
180
|
+
curl -X POST http://localhost:8000/legal-texts/gesetze-im-internet/stgb # Criminal Code
|
|
181
|
+
curl -X POST http://localhost:8000/legal-texts/gesetze-im-internet/gg # Constitution
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### 5. Test the API
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Check API health
|
|
188
|
+
curl http://localhost:8000/health
|
|
189
|
+
|
|
190
|
+
# Query legal texts by section
|
|
191
|
+
curl "http://localhost:8000/legal-texts/gesetze-im-internet/rag_1?section=%C2%A7%201"
|
|
192
|
+
|
|
193
|
+
# Semantic search (requires embeddings)
|
|
194
|
+
curl "http://localhost:8000/legal-texts/gesetze-im-internet/rag_1/search?q=Versicherung&limit=5"
|
|
195
|
+
|
|
196
|
+
# Access interactive API documentation
|
|
197
|
+
open http://localhost:8000/docs
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## CLI Tool
|
|
201
|
+
|
|
202
|
+
The CLI provides a convenient command-line interface for managing legal texts without writing code.
|
|
203
|
+
|
|
204
|
+
### Installation
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Install in development mode (from project root)
|
|
208
|
+
pip install -e .
|
|
209
|
+
|
|
210
|
+
# Verify installation
|
|
211
|
+
legal-mcp --help
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Prerequisites
|
|
215
|
+
|
|
216
|
+
The CLI requires the Store API to be running:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
# Start all services
|
|
220
|
+
docker-compose up -d
|
|
221
|
+
|
|
222
|
+
# Verify Store API is running
|
|
223
|
+
curl http://localhost:8000/health
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Available Commands
|
|
227
|
+
|
|
228
|
+
#### List Commands
|
|
229
|
+
|
|
230
|
+
**List Imported Codes**
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
# Show all imported legal codes in table format
|
|
234
|
+
legal-mcp list codes
|
|
235
|
+
|
|
236
|
+
# Output as JSON
|
|
237
|
+
legal-mcp list codes --json
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
**List Available Catalog**
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
# Show all available legal codes that can be imported
|
|
244
|
+
legal-mcp list catalog
|
|
245
|
+
|
|
246
|
+
# Output as JSON
|
|
247
|
+
legal-mcp list catalog --json
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
#### Import Command
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
# Import a single legal code
|
|
254
|
+
legal-mcp import --code bgb
|
|
255
|
+
|
|
256
|
+
# Import multiple legal codes
|
|
257
|
+
legal-mcp import --code bgb --code stgb --code gg
|
|
258
|
+
|
|
259
|
+
# Import with JSON output
|
|
260
|
+
legal-mcp import --code bgb --json
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
The import command displays a spinner while processing and shows progress for each code.
|
|
264
|
+
|
|
265
|
+
#### Query Command
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
# Query all texts for a legal code
|
|
269
|
+
legal-mcp query bgb
|
|
270
|
+
|
|
271
|
+
# Query specific section
|
|
272
|
+
legal-mcp query bgb --section "Β§ 1"
|
|
273
|
+
|
|
274
|
+
# Query specific sub-section
|
|
275
|
+
legal-mcp query bgb --section "Β§ 1" --sub-section "1"
|
|
276
|
+
|
|
277
|
+
# Output as JSON
|
|
278
|
+
legal-mcp query bgb --section "Β§ 1" --json
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
#### Search Command
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
# Semantic search in a legal code
|
|
285
|
+
legal-mcp search bgb "Kaufvertrag"
|
|
286
|
+
|
|
287
|
+
# Limit number of results
|
|
288
|
+
legal-mcp search bgb "Kaufvertrag" --limit 5
|
|
289
|
+
|
|
290
|
+
# Set similarity cutoff threshold (0-2, lower = stricter)
|
|
291
|
+
legal-mcp search bgb "Kaufvertrag" --cutoff 0.5
|
|
292
|
+
|
|
293
|
+
# Output as JSON
|
|
294
|
+
legal-mcp search bgb "Kaufvertrag" --json
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Configuration
|
|
298
|
+
|
|
299
|
+
**Default API URL**: `http://localhost:8000`
|
|
300
|
+
|
|
301
|
+
**Override with environment variable:**
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
export LEGAL_API_BASE_URL=http://custom-host:8000
|
|
305
|
+
legal-mcp list codes
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
**Override with command flag:**
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
legal-mcp list codes --api-url http://custom-host:8000
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### Output Formats
|
|
315
|
+
|
|
316
|
+
**Table Format (default):**
|
|
317
|
+
|
|
318
|
+
- Clean, formatted tables with Rich library
|
|
319
|
+
- Text truncation for readability
|
|
320
|
+
- Color-coded output
|
|
321
|
+
|
|
322
|
+
**JSON Format:**
|
|
323
|
+
|
|
324
|
+
- Complete data with full text content
|
|
325
|
+
- Machine-readable for scripting
|
|
326
|
+
- Use `--json` flag with any command
|
|
327
|
+
|
|
328
|
+
### Example Workflow
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
# 1. Check available legal codes
|
|
332
|
+
legal-mcp list catalog
|
|
333
|
+
|
|
334
|
+
# 2. Import desired codes
|
|
335
|
+
legal-mcp import --code bgb --code stgb
|
|
336
|
+
|
|
337
|
+
# 3. Verify imports
|
|
338
|
+
legal-mcp list codes
|
|
339
|
+
|
|
340
|
+
# 4. Query specific sections
|
|
341
|
+
legal-mcp query bgb --section "Β§ 433"
|
|
342
|
+
|
|
343
|
+
# 5. Perform semantic search
|
|
344
|
+
legal-mcp search bgb "Kaufvertrag" --limit 10
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
## Environment Configuration
|
|
348
|
+
|
|
349
|
+
The application uses a `.env` file for configuration. See `.env.example` for a template.
|
|
350
|
+
|
|
351
|
+
### Required Environment Variables
|
|
352
|
+
|
|
353
|
+
```bash
|
|
354
|
+
# Ollama Configuration
|
|
355
|
+
OLLAMA_BASE_URL=https://your-ollama-endpoint.com
|
|
356
|
+
OLLAMA_AUTH_TOKEN=your-auth-token-here
|
|
357
|
+
OLLAMA_EMBEDDING_MODEL=ryanshillington/Qwen3-Embedding-4B:latest # Optional, this is the default
|
|
358
|
+
|
|
359
|
+
# PostgreSQL Configuration
|
|
360
|
+
POSTGRES_HOST=postgres # Use 'postgres' in Docker, 'localhost' for local dev
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
> **Note:** The `OLLAMA_EMBEDDING_MODEL` variable allows you to use a different embedding model. However, **any alternative model must produce 2560-dimensional vectors** to be compatible with the database schema. The default model (`ryanshillington/Qwen3-Embedding-4B:latest`) is recommended.
|
|
364
|
+
|
|
365
|
+
### Additional Configuration (set in docker-compose.yml)
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
# Database URL (automatically constructed)
|
|
369
|
+
DATABASE_URL=postgresql+asyncpg://legal_mcp:legal_mcp_password@postgres:5432/legal_mcp_db
|
|
370
|
+
|
|
371
|
+
# MCP Server Configuration
|
|
372
|
+
LEGAL_API_BASE_URL=http://store-api:8000
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
## API Documentation
|
|
376
|
+
|
|
377
|
+
Once running, access the interactive API documentation:
|
|
378
|
+
|
|
379
|
+
- **Swagger UI**: http://localhost:8000/docs
|
|
380
|
+
- **ReDoc**: http://localhost:8000/redoc
|
|
381
|
+
|
|
382
|
+
### Main Endpoints
|
|
383
|
+
|
|
384
|
+
#### Legal Texts
|
|
385
|
+
|
|
386
|
+
- `POST /legal-texts/gesetze-im-internet/{book}` - Import legal text with embeddings
|
|
387
|
+
- `GET /legal-texts/gesetze-im-internet/{code}` - Query legal texts by code/section
|
|
388
|
+
- `GET /legal-texts/gesetze-im-internet/{code}/search` - Semantic search with embeddings
|
|
389
|
+
|
|
390
|
+
#### System
|
|
391
|
+
|
|
392
|
+
- `GET /health` - Health check endpoint
|
|
393
|
+
- `GET /` - API information
|
|
394
|
+
|
|
395
|
+
## MCP Server
|
|
396
|
+
|
|
397
|
+
The MCP Server provides tools for AI assistants to interact with the legal text database through the Model Context Protocol.
|
|
398
|
+
|
|
399
|
+
### Available Tools
|
|
400
|
+
|
|
401
|
+
The MCP Server exposes the following tools:
|
|
402
|
+
|
|
403
|
+
- **`search_legal_texts`** - Perform semantic search on legal texts
|
|
404
|
+
- Parameters: `query`, `code`, `limit` (1-20), `cutoff` (0-2)
|
|
405
|
+
- Returns: List of matching legal text sections with similarity scores
|
|
406
|
+
|
|
407
|
+
- **`get_legal_section`** - Retrieve specific legal text sections
|
|
408
|
+
- Parameters: `code`, `section`, `sub_section` (optional)
|
|
409
|
+
- Returns: List of legal text sections matching the criteria
|
|
410
|
+
|
|
411
|
+
- **`import_legal_code`** - Import a complete legal code from Gesetze im Internet
|
|
412
|
+
- Parameters: `code`
|
|
413
|
+
- Returns: Success message with import statistics
|
|
414
|
+
|
|
415
|
+
- **`get_available_codes`** - Get all available legal codes in the database
|
|
416
|
+
- Returns: List of legal code identifiers
|
|
417
|
+
|
|
418
|
+
### Using the MCP Server
|
|
419
|
+
|
|
420
|
+
The MCP Server runs on port 8001 and can be accessed by MCP-compatible clients:
|
|
421
|
+
|
|
422
|
+
```bash
|
|
423
|
+
# Check MCP server is running
|
|
424
|
+
curl http://localhost:8001/health
|
|
425
|
+
|
|
426
|
+
# The MCP server automatically connects to the Store API
|
|
427
|
+
# using LEGAL_API_BASE_URL environment variable
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
For AI assistants, configure the MCP client to connect to `http://localhost:8001` (or the appropriate host/port for your deployment).
|
|
431
|
+
|
|
432
|
+
## Legal Text Features
|
|
433
|
+
|
|
434
|
+
### Importing Legal Texts
|
|
435
|
+
|
|
436
|
+
The system automatically:
|
|
437
|
+
|
|
438
|
+
1. Scrapes legal text XML from gesetze-im-internet.de
|
|
439
|
+
2. Parses the XML into structured legal text sections
|
|
440
|
+
3. Generates embeddings for each text section using Ollama
|
|
441
|
+
4. Stores the texts with their embeddings in PostgreSQL with pgvector
|
|
442
|
+
|
|
443
|
+
### Querying Legal Texts
|
|
444
|
+
|
|
445
|
+
Query by section identifier:
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
curl "http://localhost:8000/legal-texts/gesetze-im-internet/bgb?section=%C2%A7%201"
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
### Semantic Search
|
|
452
|
+
|
|
453
|
+
Search using natural language with vector similarity:
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
curl "http://localhost:8000/legal-texts/gesetze-im-internet/bgb/search?q=Kaufvertrag&limit=5&cutoff=0.7"
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
Parameters:
|
|
460
|
+
|
|
461
|
+
- `q` - Search query (required)
|
|
462
|
+
- `limit` - Maximum results (1-100, default: 10)
|
|
463
|
+
- `cutoff` - Similarity threshold (0-2, default: 0.5)
|
|
464
|
+
- Lower values = stricter matching
|
|
465
|
+
- 0.3-0.5: Very strict
|
|
466
|
+
- 0.6-0.7: Good balance
|
|
467
|
+
- 0.8-1.0: More permissive
|
|
468
|
+
|
|
469
|
+
## XML Parser
|
|
470
|
+
|
|
471
|
+
The system includes a comprehensive parser for the gii-norm.dtd format used by gesetze-im-internet.de.
|
|
472
|
+
|
|
473
|
+
### Parser Features
|
|
474
|
+
|
|
475
|
+
- **Complete DTD Coverage** - All major elements from gii-norm.dtd
|
|
476
|
+
- **Structured Data** - Type-safe dataclasses for all structures
|
|
477
|
+
- **Text Extraction** - Handles complex nested text with formatting
|
|
478
|
+
- **Table Support** - Captures table structures
|
|
479
|
+
- **Footnote Handling** - Extracts footnotes with references
|
|
480
|
+
- **Metadata Parsing** - Complete metadata extraction
|
|
481
|
+
|
|
482
|
+
### Using the Parser
|
|
483
|
+
|
|
484
|
+
```python
|
|
485
|
+
from app.scrapers import GesetzteImInternetScraper
|
|
486
|
+
|
|
487
|
+
# The scraper automatically uses the XML parser
|
|
488
|
+
scraper = GesetzteImInternetScraper()
|
|
489
|
+
legal_texts = scraper.scrape('bgb')
|
|
490
|
+
|
|
491
|
+
for text in legal_texts:
|
|
492
|
+
print(f"Section: {text.section}")
|
|
493
|
+
print(f"Text: {text.text}")
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
### Parsed Metadata
|
|
497
|
+
|
|
498
|
+
The parser extracts:
|
|
499
|
+
|
|
500
|
+
- Legal abbreviations (jurabk, amtabk)
|
|
501
|
+
- Dates (ausfertigung-datum)
|
|
502
|
+
- Citations (fundstelle)
|
|
503
|
+
- Titles (kurzue, langue, titel)
|
|
504
|
+
- Structural classification (gliederungseinheit)
|
|
505
|
+
- Section designations (enbez)
|
|
506
|
+
- Version information (standangabe)
|
|
507
|
+
|
|
508
|
+
## Development
|
|
509
|
+
|
|
510
|
+
### Local Development (without Docker)
|
|
511
|
+
|
|
512
|
+
1. **Install dependencies:**
|
|
513
|
+
|
|
514
|
+
```bash
|
|
515
|
+
python -m venv .venv
|
|
516
|
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
|
517
|
+
pip install -r requirements.txt
|
|
518
|
+
|
|
519
|
+
# Install CLI tool in development mode
|
|
520
|
+
pip install -e .
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
2. **Set up local database:**
|
|
524
|
+
|
|
525
|
+
```bash
|
|
526
|
+
# Start only PostgreSQL
|
|
527
|
+
docker-compose up postgres -d
|
|
528
|
+
|
|
529
|
+
# Update .env to use localhost
|
|
530
|
+
# POSTGRES_HOST=localhost
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
3. **Run migrations:**
|
|
534
|
+
|
|
535
|
+
```bash
|
|
536
|
+
cd store
|
|
537
|
+
alembic upgrade head
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
4. **Start Store API:**
|
|
541
|
+
|
|
542
|
+
```bash
|
|
543
|
+
cd store
|
|
544
|
+
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
5. **Start MCP Server:**
|
|
548
|
+
|
|
549
|
+
```bash
|
|
550
|
+
cd mcp
|
|
551
|
+
export LEGAL_API_BASE_URL=http://localhost:8000
|
|
552
|
+
python -m server.main
|
|
553
|
+
```
|
|
554
|
+
|
|
555
|
+
### Running Tests
|
|
556
|
+
|
|
557
|
+
```bash
|
|
558
|
+
# Run all tests
|
|
559
|
+
pytest
|
|
560
|
+
|
|
561
|
+
# Run with coverage
|
|
562
|
+
pytest --cov=app tests/
|
|
563
|
+
|
|
564
|
+
# Run specific test file
|
|
565
|
+
pytest tests/test_main.py -v
|
|
566
|
+
|
|
567
|
+
# Run CLI tests specifically
|
|
568
|
+
pytest tests/cli/ -v
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
## Contributing
|
|
572
|
+
|
|
573
|
+
We welcome contributions from the community! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details on:
|
|
574
|
+
|
|
575
|
+
- How to report bugs
|
|
576
|
+
- How to suggest features
|
|
577
|
+
- How to submit pull requests
|
|
578
|
+
- Development setup instructions
|
|
579
|
+
- Code style guidelines
|
|
580
|
+
|
|
581
|
+
## Code of Conduct
|
|
582
|
+
|
|
583
|
+
This project adheres to the Contributor Covenant [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior through the project's reporting mechanisms.
|
|
584
|
+
|
|
585
|
+
## Security
|
|
586
|
+
|
|
587
|
+
Security is important to us. If you discover a security vulnerability, please follow our [Security Policy](SECURITY.md) for responsible disclosure. Do not open public issues for security vulnerabilities.
|
|
588
|
+
|
|
589
|
+
## License
|
|
590
|
+
|
|
591
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
592
|
+
|
|
593
|
+
## Acknowledgments
|
|
594
|
+
|
|
595
|
+
- Legal texts sourced from [Gesetze im Internet](https://www.gesetze-im-internet.de/)
|
|
596
|
+
- Built with [FastAPI](https://fastapi.tiangolo.com/), [FastMCP](https://github.com/jlowin/fastmcp), and [Ollama](https://ollama.ai/)
|
|
597
|
+
- Vector similarity search powered by [pgvector](https://github.com/pgvector/pgvector)
|
|
598
|
+
|
|
599
|
+
## Support
|
|
600
|
+
|
|
601
|
+
- **Issues**: Open an issue on GitHub for bugs or feature requests
|
|
602
|
+
- **Discussions**: Use GitHub Discussions for questions and community chat
|
|
603
|
+
|
|
604
|
+
---
|
|
605
|
+
|
|
606
|
+
Made with β€οΈ for the gov tech community
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/licenses/LICENSE,sha256=nYmjRf5ph8_xc4KESfeKX68TJnUJbhrNRJMy8NjxUpw,1070
|
|
2
|
+
legal_mcp/server/__init__.py,sha256=KZZ7Iwv2vPdo5EapB2RmLp-hR2do4LAgcEPYFxq1HAw,120
|
|
3
|
+
legal_mcp/server/main.py,sha256=HRdohwL15e3quEOFcCr1CCHPYUt5CSHK6sXA0NY0uWw,8153
|
|
4
|
+
iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/METADATA,sha256=bicwRe8inxBB14UKjZqtGbBMVTmCCey_Jpprsi2q9Cw,17895
|
|
5
|
+
iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
6
|
+
iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/entry_points.txt,sha256=mBabvmJrf7Z4wwbLqOotz2p5Vl0PNcPL7u1zBYvDUaA,50
|
|
7
|
+
iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/top_level.txt,sha256=MSYuSpfYm0Jd5SISSUE1vUCdMQsxhP6IAsE8RlIS7n8,10
|
|
8
|
+
iflow_mcp_ayunis_legal_mcp-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Daniel Benner
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
legal_mcp
|
legal_mcp/server/main.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ABOUTME: Legal MCP Server providing tools for querying German legal texts
|
|
3
|
+
ABOUTME: Standalone version with mock data for local testing
|
|
4
|
+
|
|
5
|
+
A FastMCP server providing tools for querying German legal texts.
|
|
6
|
+
This is a standalone version that uses mock data for demonstration purposes.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from fastmcp import FastMCP
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
from typing import List, Optional
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
# Configure logging
|
|
15
|
+
logging.basicConfig(level=logging.INFO)
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Initialize FastMCP server
|
|
19
|
+
mcp = FastMCP(
|
|
20
|
+
name="Legal MCP Server",
|
|
21
|
+
include_fastmcp_meta=True,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LegalTextResult(BaseModel):
|
|
26
|
+
"""Result from legal text query"""
|
|
27
|
+
text: str
|
|
28
|
+
code: str
|
|
29
|
+
section: str
|
|
30
|
+
sub_section: str
|
|
31
|
+
similarity_score: Optional[float] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Mock data for demonstration purposes
|
|
35
|
+
MOCK_LEGAL_TEXTS = {
|
|
36
|
+
"bgb": [
|
|
37
|
+
{
|
|
38
|
+
"text": "BΓΌrgerliches Gesetzbuch (BGB) - Β§ 1: Begin der RechtsfΓ€higkeit",
|
|
39
|
+
"section": "Β§ 1",
|
|
40
|
+
"sub_section": "",
|
|
41
|
+
"content": "Die RechtsfΓ€higkeit des Menschen beginnt mit der Vollendung der Geburt."
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"text": "BΓΌrgerliches Gesetzbuch (BGB) - Β§ 2: VolljΓ€hrigkeit",
|
|
45
|
+
"section": "Β§ 2",
|
|
46
|
+
"sub_section": "",
|
|
47
|
+
"content": "VolljΓ€hrig ist, wer das 18. Lebensjahr vollendet hat."
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"text": "BΓΌrgerliches Gesetzbuch (BGB) - Β§ 433: Vertragstypische Pflichten beim Kaufvertrag",
|
|
51
|
+
"section": "Β§ 433",
|
|
52
|
+
"sub_section": "",
|
|
53
|
+
"content": "(1) Durch den Kaufvertrag wird der VerkΓ€ufer einer Sache verpflichtet, dem KΓ€ufer die Sache zu ΓΌbergeben und das Eigentum an der Sache zu verschaffen. (2) Der KΓ€ufer ist verpflichtet, dem VerkΓ€ufer den vereinbarten Kaufpreis zu zahlen und die gekaufte Sache abzunehmen."
|
|
54
|
+
}
|
|
55
|
+
],
|
|
56
|
+
"stgb": [
|
|
57
|
+
{
|
|
58
|
+
"text": "Strafgesetzbuch (StGB) - Β§ 1: Keine Strafe ohne Gesetz",
|
|
59
|
+
"section": "Β§ 1",
|
|
60
|
+
"sub_section": "",
|
|
61
|
+
"content": "Eine Tat kann nur bestraft werden, wenn die Strafbarkeit gesetzlich bestimmt war, bevor die Tat begangen wurde."
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"text": "Strafgesetzbuch (StGB) - Β§ 2: Zeitliche Geltung",
|
|
65
|
+
"section": "Β§ 2",
|
|
66
|
+
"sub_section": "",
|
|
67
|
+
"content": "Die Strafe und ihre Nebenfolgen bestimmen sich nach dem Gesetz, das zur Zeit der Tat gilt."
|
|
68
|
+
}
|
|
69
|
+
],
|
|
70
|
+
"gg": [
|
|
71
|
+
{
|
|
72
|
+
"text": "Grundgesetz (GG) - Art. 1: MenschenwΓΌrde",
|
|
73
|
+
"section": "Art 1",
|
|
74
|
+
"sub_section": "",
|
|
75
|
+
"content": "(1) Die WΓΌrde des Menschen ist unantastbar. Sie zu achten und zu schΓΌtzen ist Verpflichtung aller staatlichen Gewalt. (2) Das Deutsche Volk bekennt sich darum zu unverletzlichen und unverΓ€uΓerlichen Menschenrechten als Grundlage jeder menschlichen Gemeinschaft, des Friedens und der Gerechtigkeit in der Welt."
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@mcp.tool()
|
|
82
|
+
async def search_legal_texts(
|
|
83
|
+
query: str = Field(description="The search query text"),
|
|
84
|
+
code: str = Field(description="Legal code identifier (e.g., 'bgb', 'stgb')"),
|
|
85
|
+
limit: int = Field(default=5, description="Maximum number of results", ge=1, le=20),
|
|
86
|
+
cutoff: float = Field(
|
|
87
|
+
default=0.7,
|
|
88
|
+
description="Similarity threshold (0-2, lower is more similar)",
|
|
89
|
+
ge=0.0,
|
|
90
|
+
le=2.0,
|
|
91
|
+
),
|
|
92
|
+
) -> List[LegalTextResult]:
|
|
93
|
+
"""
|
|
94
|
+
Perform semantic search on German legal texts.
|
|
95
|
+
|
|
96
|
+
Searches through legal codes using semantic similarity to find relevant
|
|
97
|
+
sections based on the query text. Lower similarity scores indicate better matches.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
query: Natural language search query
|
|
101
|
+
code: Legal code to search (bgb=Civil Code, stgb=Criminal Code)
|
|
102
|
+
limit: Maximum number of results to return (1-20)
|
|
103
|
+
cutoff: Maximum similarity distance threshold (0-2)
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
List of matching legal text sections with similarity scores
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
# Simple keyword-based search for demonstration
|
|
110
|
+
results = []
|
|
111
|
+
code_lower = code.lower()
|
|
112
|
+
|
|
113
|
+
if code_lower not in MOCK_LEGAL_TEXTS:
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
for text_data in MOCK_LEGAL_TEXTS[code_lower]:
|
|
117
|
+
content = text_data["content"].lower()
|
|
118
|
+
query_lower = query.lower()
|
|
119
|
+
|
|
120
|
+
# Simple matching: check if query words appear in content
|
|
121
|
+
words = query_lower.split()
|
|
122
|
+
match_count = sum(1 for word in words if word in content)
|
|
123
|
+
|
|
124
|
+
if match_count > 0:
|
|
125
|
+
# Calculate a mock similarity score
|
|
126
|
+
similarity_score = max(0.0, 1.0 - (match_count / len(words)))
|
|
127
|
+
results.append(
|
|
128
|
+
LegalTextResult(
|
|
129
|
+
text=text_data["content"],
|
|
130
|
+
code=code,
|
|
131
|
+
section=text_data["section"],
|
|
132
|
+
sub_section=text_data["sub_section"],
|
|
133
|
+
similarity_score=similarity_score,
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Sort by similarity score and limit results
|
|
138
|
+
results.sort(key=lambda x: x.similarity_score or 0)
|
|
139
|
+
return results[:limit]
|
|
140
|
+
except Exception as e:
|
|
141
|
+
logger.error(f"Error searching legal texts: {e}")
|
|
142
|
+
raise RuntimeError(f"Error searching legal texts: {str(e)}")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@mcp.tool()
|
|
146
|
+
async def get_legal_section(
|
|
147
|
+
code: str = Field(description="Legal code identifier (e.g., 'bgb', 'stgb')"),
|
|
148
|
+
section: str = Field(description="Section identifier (e.g., 'Β§ 1', 'Art 1')"),
|
|
149
|
+
sub_section: Optional[str] = Field(
|
|
150
|
+
default=None,
|
|
151
|
+
description="Optional sub-section identifier (e.g., '1', '2a')",
|
|
152
|
+
),
|
|
153
|
+
) -> List[LegalTextResult]:
|
|
154
|
+
"""
|
|
155
|
+
Retrieve specific legal text sections by code and section number.
|
|
156
|
+
|
|
157
|
+
Gets the exact text of a specific legal section or sub-section from
|
|
158
|
+
German legal codes.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
code: Legal code identifier (bgb, stgb, etc.)
|
|
162
|
+
section: Section identifier (e.g., 'Β§ 1')
|
|
163
|
+
sub_section: Optional sub-section identifier
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
List of legal text sections matching the criteria
|
|
167
|
+
"""
|
|
168
|
+
try:
|
|
169
|
+
code_lower = code.lower()
|
|
170
|
+
if code_lower not in MOCK_LEGAL_TEXTS:
|
|
171
|
+
return []
|
|
172
|
+
|
|
173
|
+
results = []
|
|
174
|
+
for text_data in MOCK_LEGAL_TEXTS[code_lower]:
|
|
175
|
+
# Match section
|
|
176
|
+
section_match = text_data["section"].lower() == section.lower() or \
|
|
177
|
+
text_data["section"].lower().replace(" ", "") == section.lower().replace(" ", "")
|
|
178
|
+
|
|
179
|
+
# Match sub-section if provided
|
|
180
|
+
sub_section_match = True
|
|
181
|
+
if sub_section:
|
|
182
|
+
sub_section_match = text_data["sub_section"] == sub_section
|
|
183
|
+
|
|
184
|
+
if section_match and sub_section_match:
|
|
185
|
+
results.append(
|
|
186
|
+
LegalTextResult(
|
|
187
|
+
text=text_data["content"],
|
|
188
|
+
code=code,
|
|
189
|
+
section=text_data["section"],
|
|
190
|
+
sub_section=text_data["sub_section"],
|
|
191
|
+
)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return results
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.error(f"Error getting legal section: {e}")
|
|
197
|
+
raise RuntimeError(f"Error getting legal section: {str(e)}")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@mcp.tool()
|
|
201
|
+
async def get_available_codes() -> List[str]:
|
|
202
|
+
"""
|
|
203
|
+
Get all available legal codes in the database.
|
|
204
|
+
|
|
205
|
+
Returns a list of legal code identifiers that have been imported
|
|
206
|
+
and are available for querying and searching.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
List of available legal code identifiers (e.g., ['bgb', 'stgb', 'gg'])
|
|
210
|
+
"""
|
|
211
|
+
try:
|
|
212
|
+
return list(MOCK_LEGAL_TEXTS.keys())
|
|
213
|
+
except Exception as e:
|
|
214
|
+
logger.error(f"Error getting available codes: {e}")
|
|
215
|
+
raise RuntimeError(f"Error getting available codes: {str(e)}")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def main():
|
|
219
|
+
"""Entry point for the MCP server"""
|
|
220
|
+
# Run with stdio transport for local testing
|
|
221
|
+
mcp.run()
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# For running with the FastMCP CLI or directly
|
|
225
|
+
if __name__ == "__main__":
|
|
226
|
+
main()
|