irish-census-mcp 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- irish_census_mcp-0.2.0/.github/workflows/publish.yml +90 -0
- irish_census_mcp-0.2.0/.gitignore +37 -0
- irish_census_mcp-0.2.0/1901_1911_CENSUS.md +281 -0
- irish_census_mcp-0.2.0/1926_CENSUS.md +324 -0
- irish_census_mcp-0.2.0/LICENSE +21 -0
- irish_census_mcp-0.2.0/MCP_ARCHITECTURE.md +458 -0
- irish_census_mcp-0.2.0/PKG-INFO +553 -0
- irish_census_mcp-0.2.0/PRE_FAMINE_CENSUS.md +281 -0
- irish_census_mcp-0.2.0/README.md +540 -0
- irish_census_mcp-0.2.0/fastmcp.json +6 -0
- irish_census_mcp-0.2.0/pyproject.toml +36 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/__init__.py +9 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/__main__.py +6 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/api.py +236 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/gateway.py +532 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/matching.py +133 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/normalize.py +148 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/places.py +211 -0
- irish_census_mcp-0.2.0/src/irish_census_mcp/server.py +198 -0
- irish_census_mcp-0.2.0/tests/test_live_smoke.py +139 -0
- irish_census_mcp-0.2.0/tests/test_walkthrough.py +91 -0
- irish_census_mcp-0.2.0/uv.lock +1521 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build:
|
|
14
|
+
name: Build distribution
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- name: Checkout
|
|
18
|
+
uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@v6
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.11"
|
|
24
|
+
|
|
25
|
+
- name: Verify package imports
|
|
26
|
+
run: |
|
|
27
|
+
uv sync --frozen
|
|
28
|
+
uv run python -c "
|
|
29
|
+
import asyncio
|
|
30
|
+
from irish_census_mcp.server import mcp
|
|
31
|
+
async def check():
|
|
32
|
+
tools = await mcp.list_tools()
|
|
33
|
+
names = {t.name for t in tools}
|
|
34
|
+
expected = {'resolve_place', 'search_people', 'get_household',
|
|
35
|
+
'get_person', 'find_relatives', 'get_scan_url'}
|
|
36
|
+
assert names == expected, f'tool mismatch: got {names}, expected {expected}'
|
|
37
|
+
print(f'OK: {len(tools)} tools registered on {mcp.name!r}')
|
|
38
|
+
asyncio.run(check())
|
|
39
|
+
"
|
|
40
|
+
|
|
41
|
+
- name: Verify version matches the pushed tag
|
|
42
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
43
|
+
run: |
|
|
44
|
+
TAG_VERSION="${GITHUB_REF_NAME#v}"
|
|
45
|
+
PKG_VERSION=$(uv run python -c "from importlib.metadata import version; print(version('irish-census-mcp'))")
|
|
46
|
+
echo "tag=$TAG_VERSION pkg=$PKG_VERSION"
|
|
47
|
+
if [ "$TAG_VERSION" != "$PKG_VERSION" ]; then
|
|
48
|
+
echo "::error::Tag $TAG_VERSION does not match package version $PKG_VERSION. Bump pyproject.toml before tagging."
|
|
49
|
+
exit 1
|
|
50
|
+
fi
|
|
51
|
+
|
|
52
|
+
- name: Build sdist and wheel
|
|
53
|
+
run: uv build
|
|
54
|
+
|
|
55
|
+
- name: Inspect built artifacts
|
|
56
|
+
run: |
|
|
57
|
+
ls -lh dist/
|
|
58
|
+
uv run python -m zipfile -l dist/*.whl
|
|
59
|
+
|
|
60
|
+
- name: Upload distribution artifacts
|
|
61
|
+
uses: actions/upload-artifact@v4
|
|
62
|
+
with:
|
|
63
|
+
name: dist
|
|
64
|
+
path: dist/
|
|
65
|
+
if-no-files-found: error
|
|
66
|
+
|
|
67
|
+
publish:
|
|
68
|
+
name: Publish to PyPI
|
|
69
|
+
needs: build
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
# Only publish on actual tag pushes — not workflow_dispatch dry-runs.
|
|
72
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
73
|
+
environment:
|
|
74
|
+
name: pypi
|
|
75
|
+
url: https://pypi.org/p/irish-census-mcp
|
|
76
|
+
permissions:
|
|
77
|
+
id-token: write # required for Trusted Publishing (OIDC)
|
|
78
|
+
steps:
|
|
79
|
+
- name: Download distribution artifacts
|
|
80
|
+
uses: actions/download-artifact@v4
|
|
81
|
+
with:
|
|
82
|
+
name: dist
|
|
83
|
+
path: dist/
|
|
84
|
+
|
|
85
|
+
- name: Publish to PyPI
|
|
86
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
87
|
+
with:
|
|
88
|
+
# Trusted Publishing is configured at https://pypi.org/manage/account/publishing/
|
|
89
|
+
# No API token needed — the action authenticates via OIDC.
|
|
90
|
+
verbose: true
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
.eggs/
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
.coverage
|
|
10
|
+
.coverage.*
|
|
11
|
+
htmlcov/
|
|
12
|
+
.tox/
|
|
13
|
+
.nox/
|
|
14
|
+
.mypy_cache/
|
|
15
|
+
.ruff_cache/
|
|
16
|
+
.pytest_cache/
|
|
17
|
+
|
|
18
|
+
# Virtual environments
|
|
19
|
+
.venv/
|
|
20
|
+
venv/
|
|
21
|
+
env/
|
|
22
|
+
|
|
23
|
+
# uv
|
|
24
|
+
.uv/
|
|
25
|
+
|
|
26
|
+
# Editor / IDE
|
|
27
|
+
.vscode/
|
|
28
|
+
.idea/
|
|
29
|
+
*.swp
|
|
30
|
+
*.swo
|
|
31
|
+
.DS_Store
|
|
32
|
+
|
|
33
|
+
# Local Claude Code state (per-user, not part of the project)
|
|
34
|
+
.claude/
|
|
35
|
+
|
|
36
|
+
# Downloaded scans during testing
|
|
37
|
+
*.pdf
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
# Irish 1901 & 1911 Census — API Reference
|
|
2
|
+
|
|
3
|
+
Unofficial reverse-engineered notes for the public JSON API behind the
|
|
4
|
+
National Archives of Ireland's 1901/1911 census search at
|
|
5
|
+
[`https://www.census.nationalarchives.ie/`](https://www.census.nationalarchives.ie/).
|
|
6
|
+
The same API is also called from the newer 2026 unified site.
|
|
7
|
+
|
|
8
|
+
These are the two surviving full Irish censuses from before independence —
|
|
9
|
+
all earlier 19th-century enumerators' returns (1861, 1871, 1881, 1891)
|
|
10
|
+
were destroyed in the Four Courts fire of 1922; the 1901 and 1911 books
|
|
11
|
+
survived because they were stored elsewhere. This corpus covers all 32
|
|
12
|
+
counties (i.e. the whole island, not just the later Free State).
|
|
13
|
+
|
|
14
|
+
> **No authentication required.** CORS is wide-open
|
|
15
|
+
> (`Access-Control-Allow-Origin: *`). Throttle yourself and identify with
|
|
16
|
+
> a sensible `User-Agent`.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Host
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
https://api-census.nationalarchives.ie
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Backend is `nginx/1.26.2` with a `census` handler. Responses carry
|
|
27
|
+
`x-cached: HIT|EXPIRED` headers, so the upstream is well-cached.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Endpoints
|
|
32
|
+
|
|
33
|
+
| Endpoint | Purpose |
|
|
34
|
+
| --- | --- |
|
|
35
|
+
| `GET /census/query` | Person search |
|
|
36
|
+
| `GET /census/facets` | Facet counts |
|
|
37
|
+
| `GET /census/image/{nai_id}.pdf` | Scan PDF (307-redirects to signed Linode URL) |
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## `GET /census/query`
|
|
42
|
+
|
|
43
|
+
### Query parameters
|
|
44
|
+
|
|
45
|
+
Filters use Django-style suffixes; no suffix means exact match. Unknown
|
|
46
|
+
parameters are silently ignored (no validation error).
|
|
47
|
+
|
|
48
|
+
| Parameter | Type | Example | Notes |
|
|
49
|
+
| --- | --- | --- | --- |
|
|
50
|
+
| `surname` / `surname__icontains` / `surname__iexact` | str | `Murphy` | |
|
|
51
|
+
| `firstname` / `firstname__icontains` / `firstname__iexact` | str | `Denis` | **One word**, not `first_name` |
|
|
52
|
+
| `census_year` | `1901` \| `1911` | `1911` | Filter to one census |
|
|
53
|
+
| `county` | str | `Cork` | All 32 counties; spelling per 1901-era usage (`Londonderry`, `Queens` for Laois, `Kings` for Offaly) |
|
|
54
|
+
| `ded` / `ded__icontains` | str | `Skibbereen Rural` | District Electoral Division |
|
|
55
|
+
| `townland` / `townland__icontains` | str | `Coolnagarrane` | |
|
|
56
|
+
| `house_number` | str | `7` | |
|
|
57
|
+
| `sex` | `M` / `F` | `F` | |
|
|
58
|
+
| `age` / `age__gte` / `age__lte` | int | `80` / `85` | Integer column — range filters work directly |
|
|
59
|
+
| `religion` | str | `Roman Catholic` | Raw transcription (`R Catholic`, `R.C.`, etc. all appear) |
|
|
60
|
+
| `religion_updated` | str | `Roman Catholic` | **Normalized — prefer this for filtering** |
|
|
61
|
+
| `occupation` / `occupation__icontains` | str | `Farmer` | |
|
|
62
|
+
| `occupation_updated` | str | `Farmer` | Normalized |
|
|
63
|
+
| `language` | str | `Irish & English` | Raw |
|
|
64
|
+
| `language_updated` | str | | Normalized |
|
|
65
|
+
| `relation_to_head` | str | `Head of Family` | Raw |
|
|
66
|
+
| `relation_to_head_updated` | str | | Normalized |
|
|
67
|
+
| `marriage_status` | str | `Married`, `Single`, `Widower`, `Not Married` | |
|
|
68
|
+
| `marriage_years` | int | `15` | Integer where parseable |
|
|
69
|
+
| `children_born` / `children_living` | int | | Numeric in 1911 (1901 forms didn't ask) |
|
|
70
|
+
| `birthplace` / `birthplace__icontains` | str | `Co Dublin` | Raw transcription — very inconsistent |
|
|
71
|
+
| `education` | str | `Read and write` | |
|
|
72
|
+
| `deafdumb` | str | | Disability column from the form |
|
|
73
|
+
| `image_group` | str | `388680` | All people in a household share this — use it to reconstruct a family |
|
|
74
|
+
| `id` | int | `1434682` | Per-person primary key |
|
|
75
|
+
| `limit` | int | `30` | Default page size |
|
|
76
|
+
| `offset` | int | `0` | Pagination cursor |
|
|
77
|
+
|
|
78
|
+
### Response
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"results": [
|
|
83
|
+
{
|
|
84
|
+
"id": 1434682,
|
|
85
|
+
"census_year": 1911,
|
|
86
|
+
"county": "Cork",
|
|
87
|
+
"ded": "Cork No. 4 Urban (part of)",
|
|
88
|
+
"townland": "Rathmore Buildings",
|
|
89
|
+
"house_number": "72",
|
|
90
|
+
"firstname": "Denis",
|
|
91
|
+
"surname": "Murphy",
|
|
92
|
+
"age": 34,
|
|
93
|
+
"sex": "M",
|
|
94
|
+
"relation_to_head": "Son",
|
|
95
|
+
"relation_to_head_updated": "Son",
|
|
96
|
+
"religion": "R Catholic",
|
|
97
|
+
"religion_updated": "Roman Catholic",
|
|
98
|
+
"education": "Read and write",
|
|
99
|
+
"occupation": "Labourer",
|
|
100
|
+
"occupation_updated": "Labourer",
|
|
101
|
+
"marriage_status": "Single",
|
|
102
|
+
"marriage_years": null,
|
|
103
|
+
"children_born": null,
|
|
104
|
+
"children_living": null,
|
|
105
|
+
"birthplace": "Cork City",
|
|
106
|
+
"language": "English",
|
|
107
|
+
"language_updated": "English Only",
|
|
108
|
+
"deafdumb": null,
|
|
109
|
+
"image_group": "388680",
|
|
110
|
+
"images": [
|
|
111
|
+
{ "form": "Form A", "side": "1", "id": "nai001861365", "url": "/census/image/nai001861365.pdf" },
|
|
112
|
+
{ "form": "Form A", "side": "2", "id": "nai001861366", "url": "/census/image/nai001861366.pdf" },
|
|
113
|
+
{ "form": "Form N", "side": "1", "id": "nai001861204", "url": "/census/image/nai001861204.pdf" },
|
|
114
|
+
{ "form": "Form B1", "side": "1", "id": "nai001861206", "url": "/census/image/nai001861206.pdf" }
|
|
115
|
+
]
|
|
116
|
+
}
|
|
117
|
+
],
|
|
118
|
+
"meta": {
|
|
119
|
+
"count": 8830075,
|
|
120
|
+
"next": "?limit=1&offset=1",
|
|
121
|
+
"prev": null
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Big convenience compared to the 1926 API:** each row already embeds an
|
|
127
|
+
`images` array — no follow-up `related_images` call needed.
|
|
128
|
+
|
|
129
|
+
### Forms returned
|
|
130
|
+
|
|
131
|
+
| Form | Meaning |
|
|
132
|
+
| --- | --- |
|
|
133
|
+
| **Form A** | Individual return — the household head's record of every person resident on census night |
|
|
134
|
+
| **Form B1** | House and Building Return — building fabric (walls, roof, windows, rooms) |
|
|
135
|
+
| **Form B2** | Out-Offices and Farm-Steadings Return — stables, cow houses, piggeries, barns |
|
|
136
|
+
| **Form N** | Enumerator's Abstract — summary page the enumerator compiled |
|
|
137
|
+
|
|
138
|
+
Each form may have 2 `side`s (front and back of the original page).
|
|
139
|
+
Institutional returns (workhouses, asylums, barracks, ships) use
|
|
140
|
+
different form letters (D, E, F, G, H) and may or may not appear here.
|
|
141
|
+
|
|
142
|
+
### Row counts
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
1901: 4,434,938 rows
|
|
146
|
+
1911: 4,395,137 rows
|
|
147
|
+
Total: 8,830,075
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## `GET /census/facets`
|
|
153
|
+
|
|
154
|
+
Same filter parameters as `query`. Returns top-N value counts to drive
|
|
155
|
+
sidebar drill-down.
|
|
156
|
+
|
|
157
|
+
Returned facet fields (top 20 values each, top 14 for `age`):
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
county, ded, townland, surname, age, birthplace,
|
|
161
|
+
religion_updated, occupation_updated, language_updated
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Response shape:
|
|
165
|
+
|
|
166
|
+
```json
|
|
167
|
+
[
|
|
168
|
+
{
|
|
169
|
+
"field": "occupation_updated",
|
|
170
|
+
"counts": [
|
|
171
|
+
{"value": "Scholar", "ct": 1693048},
|
|
172
|
+
{"value": "Farmer", "ct": 775485},
|
|
173
|
+
...
|
|
174
|
+
]
|
|
175
|
+
},
|
|
176
|
+
...
|
|
177
|
+
]
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
(`Scholar` topping the occupation list reflects compulsory schooling — every
|
|
181
|
+
school-age child was recorded as "Scholar".)
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## `GET /census/image/{nai_id}.pdf`
|
|
186
|
+
|
|
187
|
+
Returns the scan as a PDF.
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
GET /census/image/nai003096222.pdf
|
|
191
|
+
→ 307 Found
|
|
192
|
+
Location: https://nl-ams-1.linodeobjects.com/nai-census/1901-11/000/003/096/222.pdf
|
|
193
|
+
?AWSAccessKeyId=…&Signature=…&Expires=1778444264
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
- The redirect target is a **signed Linode Object Storage URL** that
|
|
197
|
+
**expires in 30 minutes** (`Expires` query param + `Cache-Control: max-age=1800`).
|
|
198
|
+
Don't cache the redirected URL — cache the API URL and let the server
|
|
199
|
+
re-sign on each request.
|
|
200
|
+
- Hosted in Linode's Amsterdam region (`nl-ams-1`).
|
|
201
|
+
- The `nai_id` is exactly the string in `images[].id` (e.g. `nai001861365`).
|
|
202
|
+
Always append `.pdf` — bare IDs return 404.
|
|
203
|
+
- A typical Form A side is ~600 KB, single page; institutional forms can
|
|
204
|
+
be much larger.
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Typical lookup flow
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
BASE=https://api-census.nationalarchives.ie/census
|
|
212
|
+
|
|
213
|
+
# 1. Search
|
|
214
|
+
curl -s "$BASE/query?surname__icontains=Murphy&county=Cork&census_year=1911&limit=10"
|
|
215
|
+
|
|
216
|
+
# 2. Pick a row — note its image_group to fetch household-mates
|
|
217
|
+
curl -s "$BASE/query?image_group=388680"
|
|
218
|
+
|
|
219
|
+
# 3. Download a scan (the URL from images[].url is relative, prepend host)
|
|
220
|
+
curl -sL -o form_a_side_1.pdf \
|
|
221
|
+
"$BASE/image/nai001861365.pdf"
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
The 1926 API requires three calls to do this (query → related_images →
|
|
225
|
+
image_c26). Here, steps 1 already gives you the image URLs.
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## Data quality notes
|
|
230
|
+
|
|
231
|
+
- The **raw** columns (`religion`, `occupation`, `relation_to_head`,
|
|
232
|
+
`language`, `birthplace`) preserve enumerator handwriting variation —
|
|
233
|
+
expect every plausible abbreviation of "Roman Catholic": `R Catholic`,
|
|
234
|
+
`R.C.`, `Rom Cath`, `Roman Catholic`, …
|
|
235
|
+
- The **`*_updated`** columns are post-hoc normalizations. Filter on
|
|
236
|
+
those; display the raw column if you want fidelity to the original.
|
|
237
|
+
- `birthplace` is the noisiest field — free text covering counties,
|
|
238
|
+
countries, ships, regiments, and one-off place names. Use
|
|
239
|
+
`__icontains` rather than exact match.
|
|
240
|
+
- `children_born` / `children_living` only appear on 1911 forms — the
|
|
241
|
+
1901 schema didn't ask the question. Don't treat them as numerics
|
|
242
|
+
without null-checking.
|
|
243
|
+
- `marriage_years` is integer-typed in this API (unlike the 1926 schema
|
|
244
|
+
where it's a raw string).
|
|
245
|
+
- 1911 `age` for women may show systematic **suffrage-protest blanks** —
|
|
246
|
+
some respondents refused to fill it in. Don't read too much into
|
|
247
|
+
null-age clusters.
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
251
|
+
## County names — gotchas
|
|
252
|
+
|
|
253
|
+
The 1901/1911 census predates the renaming of two counties:
|
|
254
|
+
|
|
255
|
+
| Modern name | 1901/1911 name |
|
|
256
|
+
| --- | --- |
|
|
257
|
+
| Laois | `Queens` (sometimes `Queen's`) |
|
|
258
|
+
| Offaly | `Kings` (sometimes `King's`) |
|
|
259
|
+
| Derry / Londonderry | `Londonderry` |
|
|
260
|
+
|
|
261
|
+
If you're cross-referencing with the 1926 corpus (Free State only,
|
|
262
|
+
modern names), you'll need to map between these.
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## Operational details
|
|
267
|
+
|
|
268
|
+
- **Server:** `nginx/1.26.2` with `x-handler: census`
|
|
269
|
+
- **CORS:** `Access-Control-Allow-Origin: *` (no origin pinning, unlike the 1926 API)
|
|
270
|
+
- **HTTPS:** mandatory
|
|
271
|
+
- **Unknown query params:** silently ignored, no 422
|
|
272
|
+
- **Pagination:** `meta.next` / `meta.prev` are relative query strings; append to the endpoint path
|
|
273
|
+
- **Validation errors:** `{"detail": "..."}` JSON body
|
|
274
|
+
- **Image redirects:** 307 with a 1800s-second TTL — single-flight per ID is plenty
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## See also
|
|
279
|
+
|
|
280
|
+
- [`1926_CENSUS.md`](./1926_CENSUS.md) — the post-independence 1926 census (different host, different schema, three-step image flow)
|
|
281
|
+
- [`PRE_FAMINE_CENSUS.md`](./PRE_FAMINE_CENSUS.md) — surviving fragments from 1821, 1831, 1841, 1851 (same host as this API, `query_c19` / `facets_c19` endpoints)
|