apra-mcp 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apra_mcp-0.1.2/.github/workflows/codeql.yml +37 -0
- apra_mcp-0.1.2/.github/workflows/publish.yml +58 -0
- apra_mcp-0.1.2/.github/workflows/refresh-urls.yml +56 -0
- apra_mcp-0.1.2/.github/workflows/test.yml +47 -0
- apra_mcp-0.1.2/.gitignore +17 -0
- apra_mcp-0.1.2/CHANGELOG.md +172 -0
- apra_mcp-0.1.2/CODE_OF_CONDUCT.md +37 -0
- apra_mcp-0.1.2/CONTRIBUTING.md +110 -0
- apra_mcp-0.1.2/LICENSE +21 -0
- apra_mcp-0.1.2/PKG-INFO +169 -0
- apra_mcp-0.1.2/README.md +135 -0
- apra_mcp-0.1.2/SECURITY.md +44 -0
- apra_mcp-0.1.2/examples/claude_desktop_config.json +8 -0
- apra_mcp-0.1.2/examples/demo_prompts.md +77 -0
- apra_mcp-0.1.2/glama.json +4 -0
- apra_mcp-0.1.2/llms.txt +99 -0
- apra_mcp-0.1.2/pyproject.toml +62 -0
- apra_mcp-0.1.2/scripts/refresh_seed.py +83 -0
- apra_mcp-0.1.2/src/apra_mcp/__init__.py +8 -0
- apra_mcp-0.1.2/src/apra_mcp/cache.py +176 -0
- apra_mcp-0.1.2/src/apra_mcp/catalog.py +44 -0
- apra_mcp-0.1.2/src/apra_mcp/client.py +197 -0
- apra_mcp-0.1.2/src/apra_mcp/curated.py +379 -0
- apra_mcp-0.1.2/src/apra_mcp/data/curated/ADI_KEY_STATS.yaml +149 -0
- apra_mcp-0.1.2/src/apra_mcp/data/curated/ADI_RISK_WEIGHTED_ASSETS.yaml +137 -0
- apra_mcp-0.1.2/src/apra_mcp/data/curated/INSURANCE_GENERAL.yaml +145 -0
- apra_mcp-0.1.2/src/apra_mcp/data/curated/INSURANCE_GENERAL_HISTORICAL.yaml +128 -0
- apra_mcp-0.1.2/src/apra_mcp/data/curated/LIFE_INSURANCE.yaml +112 -0
- apra_mcp-0.1.2/src/apra_mcp/data/curated/LIFE_INSURANCE_HISTORICAL.yaml +102 -0
- apra_mcp-0.1.2/src/apra_mcp/data/curated/SUPER_FUND_LEVEL.yaml +184 -0
- apra_mcp-0.1.2/src/apra_mcp/data/seed_urls.json +15 -0
- apra_mcp-0.1.2/src/apra_mcp/discovery.py +307 -0
- apra_mcp-0.1.2/src/apra_mcp/models.py +120 -0
- apra_mcp-0.1.2/src/apra_mcp/parsing.py +115 -0
- apra_mcp-0.1.2/src/apra_mcp/py.typed +0 -0
- apra_mcp-0.1.2/src/apra_mcp/server.py +737 -0
- apra_mcp-0.1.2/src/apra_mcp/shaping.py +482 -0
- apra_mcp-0.1.2/tests/__init__.py +0 -0
- apra_mcp-0.1.2/tests/conftest.py +64 -0
- apra_mcp-0.1.2/tests/fixtures/adi_key_stats_sample.xlsx +0 -0
- apra_mcp-0.1.2/tests/fixtures/adi_rwa_sample.xlsx +0 -0
- apra_mcp-0.1.2/tests/fixtures/insurance_general_historical_sample.xlsx +0 -0
- apra_mcp-0.1.2/tests/fixtures/insurance_general_sample.xlsx +0 -0
- apra_mcp-0.1.2/tests/fixtures/life_insurance_historical_sample.xlsx +0 -0
- apra_mcp-0.1.2/tests/fixtures/life_insurance_sample.xlsx +0 -0
- apra_mcp-0.1.2/tests/fixtures/super_fund_level_sample.xlsx +0 -0
- apra_mcp-0.1.2/tests/test_cache.py +198 -0
- apra_mcp-0.1.2/tests/test_client.py +181 -0
- apra_mcp-0.1.2/tests/test_curated.py +208 -0
- apra_mcp-0.1.2/tests/test_customer_flows.py +171 -0
- apra_mcp-0.1.2/tests/test_did_you_mean.py +55 -0
- apra_mcp-0.1.2/tests/test_discovery.py +253 -0
- apra_mcp-0.1.2/tests/test_edge_data.py +210 -0
- apra_mcp-0.1.2/tests/test_edge_inputs.py +154 -0
- apra_mcp-0.1.2/tests/test_integration.py +207 -0
- apra_mcp-0.1.2/tests/test_latest_long_format.py +106 -0
- apra_mcp-0.1.2/tests/test_parsing.py +130 -0
- apra_mcp-0.1.2/tests/test_period_normalisation.py +147 -0
- apra_mcp-0.1.2/tests/test_resilience.py +151 -0
- apra_mcp-0.1.2/tests/test_server_validation.py +225 -0
- apra_mcp-0.1.2/tests/test_shaping.py +282 -0
- apra_mcp-0.1.2/tests/test_top_n.py +146 -0
- apra_mcp-0.1.2/uv.lock +1748 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: codeql
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
schedule:
|
|
9
|
+
- cron: "57 6 * * 1" # Mondays 06:57 UTC, offset from sister MCPs
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
analyze:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
permissions:
|
|
15
|
+
actions: read
|
|
16
|
+
contents: read
|
|
17
|
+
security-events: write
|
|
18
|
+
strategy:
|
|
19
|
+
fail-fast: false
|
|
20
|
+
matrix:
|
|
21
|
+
language: [python]
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Initialise CodeQL
|
|
26
|
+
uses: github/codeql-action/init@v3
|
|
27
|
+
with:
|
|
28
|
+
languages: ${{ matrix.language }}
|
|
29
|
+
queries: security-extended
|
|
30
|
+
|
|
31
|
+
- name: Autobuild
|
|
32
|
+
uses: github/codeql-action/autobuild@v3
|
|
33
|
+
|
|
34
|
+
- name: Analyze
|
|
35
|
+
uses: github/codeql-action/analyze@v3
|
|
36
|
+
with:
|
|
37
|
+
category: "/language:${{ matrix.language }}"
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
name: publish
|
|
2
|
+
|
|
3
|
+
# Publishes the wheel + sdist to PyPI on every published Release.
|
|
4
|
+
# Uses PyPI Trusted Publishing (OIDC) — no token in repo secrets.
|
|
5
|
+
#
|
|
6
|
+
# Setup (one-time, done once on PyPI):
|
|
7
|
+
# pypi.org → Your projects → apra-mcp → Publishing
|
|
8
|
+
# (or for the first release: pypi.org → "Pending publishers" → Add)
|
|
9
|
+
# Owner=Bigred97, Repository=apra-mcp, Workflow=publish.yml, Environment=pypi
|
|
10
|
+
#
|
|
11
|
+
# Trigger: create a GitHub Release. The release's tag (e.g. v0.1.3) drives
|
|
12
|
+
# the version that gets uploaded. Make sure pyproject.toml's `version`
|
|
13
|
+
# matches before tagging.
|
|
14
|
+
|
|
15
|
+
on:
|
|
16
|
+
release:
|
|
17
|
+
types: [published]
|
|
18
|
+
workflow_dispatch:
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
build:
|
|
22
|
+
name: Build distributions
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
- name: Install uv
|
|
27
|
+
uses: astral-sh/setup-uv@v3
|
|
28
|
+
with:
|
|
29
|
+
enable-cache: true
|
|
30
|
+
- name: Set up Python
|
|
31
|
+
run: uv python install 3.12
|
|
32
|
+
- name: Build wheel + sdist
|
|
33
|
+
run: uv build
|
|
34
|
+
- name: Verify wheel installs cleanly
|
|
35
|
+
run: |
|
|
36
|
+
uv run --isolated --with ./dist/*.whl python -c \
|
|
37
|
+
"import apra_mcp.server as s; n = len(s.list_curated()); assert n >= 7, f'expected >=7 curated, got {n}'; print(f'OK ({n} curated datasets)')"
|
|
38
|
+
- uses: actions/upload-artifact@v4
|
|
39
|
+
with:
|
|
40
|
+
name: dist
|
|
41
|
+
path: dist/
|
|
42
|
+
|
|
43
|
+
publish:
|
|
44
|
+
name: Publish to PyPI
|
|
45
|
+
needs: build
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
environment:
|
|
48
|
+
name: pypi
|
|
49
|
+
url: https://pypi.org/p/apra-mcp
|
|
50
|
+
permissions:
|
|
51
|
+
id-token: write # required for trusted publishing
|
|
52
|
+
steps:
|
|
53
|
+
- uses: actions/download-artifact@v4
|
|
54
|
+
with:
|
|
55
|
+
name: dist
|
|
56
|
+
path: dist/
|
|
57
|
+
- name: Publish to PyPI via OIDC
|
|
58
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: refresh-seed-urls
|
|
2
|
+
|
|
3
|
+
# Daily scrape of every APRA landing page in apra-mcp's curated set.
|
|
4
|
+
# - When the scraped URLs differ from data/seed_urls.json, opens a PR.
|
|
5
|
+
# - When any URL returns non-2xx, opens a GitHub issue.
|
|
6
|
+
# Effect: even users on a months-old pip install stay current for weeks
|
|
7
|
+
# because new wheels carry the refreshed seed.
|
|
8
|
+
|
|
9
|
+
on:
|
|
10
|
+
schedule:
|
|
11
|
+
- cron: "30 13 * * *" # 13:30 UTC daily (post-APRA-publication window)
|
|
12
|
+
workflow_dispatch:
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: write
|
|
16
|
+
pull-requests: write
|
|
17
|
+
issues: write
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
refresh:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
uses: astral-sh/setup-uv@v3
|
|
27
|
+
with:
|
|
28
|
+
enable-cache: true
|
|
29
|
+
|
|
30
|
+
- name: Set up Python
|
|
31
|
+
run: uv python install 3.12
|
|
32
|
+
|
|
33
|
+
- name: Sync dependencies
|
|
34
|
+
run: uv sync --extra dev && uv pip install -e .
|
|
35
|
+
|
|
36
|
+
- name: Run scraper for every curated dataset
|
|
37
|
+
id: scrape
|
|
38
|
+
run: |
|
|
39
|
+
uv run python scripts/refresh_seed.py > seed_diff.txt 2>&1 || echo "scrape_failed=true" >> $GITHUB_OUTPUT
|
|
40
|
+
cat seed_diff.txt
|
|
41
|
+
|
|
42
|
+
- name: Open PR if URLs changed
|
|
43
|
+
if: steps.scrape.outputs.scrape_failed != 'true'
|
|
44
|
+
uses: peter-evans/create-pull-request@v6
|
|
45
|
+
with:
|
|
46
|
+
commit-message: "chore(seed): refresh URLs from APRA landing pages"
|
|
47
|
+
title: "chore(seed): refresh URLs from APRA landing pages"
|
|
48
|
+
body: |
|
|
49
|
+
Automated PR from `.github/workflows/refresh-urls.yml`.
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
${{ steps.scrape.outputs.diff }}
|
|
53
|
+
```
|
|
54
|
+
branch: refresh-seed-urls
|
|
55
|
+
delete-branch: true
|
|
56
|
+
add-paths: src/apra_mcp/data/seed_urls.json
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
- name: Install uv
|
|
19
|
+
uses: astral-sh/setup-uv@v3
|
|
20
|
+
with:
|
|
21
|
+
enable-cache: true
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
run: uv python install ${{ matrix.python-version }}
|
|
24
|
+
- name: Sync dependencies
|
|
25
|
+
run: uv sync --extra dev
|
|
26
|
+
- name: Install package
|
|
27
|
+
run: uv pip install -e .
|
|
28
|
+
- name: Run unit tests
|
|
29
|
+
run: uv run pytest -q
|
|
30
|
+
|
|
31
|
+
build:
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
needs: test
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/checkout@v4
|
|
36
|
+
- name: Install uv
|
|
37
|
+
uses: astral-sh/setup-uv@v3
|
|
38
|
+
- name: Build wheel + sdist
|
|
39
|
+
run: uv build
|
|
40
|
+
- name: Verify wheel installs cleanly
|
|
41
|
+
run: |
|
|
42
|
+
uv run --isolated --with ./dist/*.whl python -c \
|
|
43
|
+
"import apra_mcp.server as s; n = len(s.list_curated()); assert n >= 7, f'expected >=7 curated, got {n}'; print(f'OK ({n} curated datasets)')"
|
|
44
|
+
- uses: actions/upload-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist
|
|
47
|
+
path: dist/
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.2] — 2026-05-13
|
|
9
|
+
|
|
10
|
+
### Bug fixes (real customer impact)
|
|
11
|
+
|
|
12
|
+
- **`latest()` was returning a single record for long-format datasets**
|
|
13
|
+
(`INSURANCE_GENERAL`, `INSURANCE_GENERAL_HISTORICAL`, `LIFE_INSURANCE`,
|
|
14
|
+
`LIFE_INSURANCE_HISTORICAL`). Root cause: `last_n=1` was implemented as
|
|
15
|
+
"keep 1 per measure", but long-format datasets carry a single `value`
|
|
16
|
+
measure with the semantic metric in the `data_item` dimension —
|
|
17
|
+
collapsing to 1 record per measure threw away the whole table. Fix:
|
|
18
|
+
detect long-format mode (one measure + period_column declared) and
|
|
19
|
+
switch to "keep all records at the most recent period(s)". `latest()`
|
|
20
|
+
on insurance datasets now returns hundreds of records (the latest
|
|
21
|
+
quarter's worth) instead of one.
|
|
22
|
+
- **Period filters with quarter shorthand (`2025-Q4`) and bare years
|
|
23
|
+
(`2024`) silently returned zero rows.** The source `period_column`
|
|
24
|
+
stores ISO dates (`2025-12-31`) and string-comparing them against
|
|
25
|
+
`"2025-Q4"` excluded everything (`Q` > `1` in ASCII). Fix: a new
|
|
26
|
+
`_expand_period_input` normaliser converts user-supplied periods to
|
|
27
|
+
ISO `YYYY-MM-DD` bounds before comparison. Supports `YYYY`, `YYYY-MM`,
|
|
28
|
+
`YYYY-Qx` (and lowercase `q`), and ISO dates.
|
|
29
|
+
|
|
30
|
+
### UX
|
|
31
|
+
|
|
32
|
+
- **"Did you mean?" suggestions on unknown filter values.** Closest
|
|
33
|
+
RapidFuzz match (WRatio ≥ 70) is offered in the error message:
|
|
34
|
+
`Unknown value 'major' for filter 'sector'. Did you mean 'major_banks'?`
|
|
35
|
+
Permissive dimensions (fund_name, data_item) still pass unknowns
|
|
36
|
+
through unchanged.
|
|
37
|
+
|
|
38
|
+
### Documentation honesty fix
|
|
39
|
+
|
|
40
|
+
- Corrected `period_coverage` metadata on three snapshot datasets
|
|
41
|
+
(`ADI_KEY_STATS`, `ADI_RISK_WEIGHTED_ASSETS`, `SUPER_FUND_LEVEL`).
|
|
42
|
+
The APRA "centralised publication" XLSX is a SNAPSHOT of the latest
|
|
43
|
+
reporting quarter, not the multi-year history the filename suggests.
|
|
44
|
+
The YAML descriptions and `period_coverage` strings now say so
|
|
45
|
+
explicitly. The insurance long-format datasets remain true time
|
|
46
|
+
series (Sep 2023 → Dec 2025 for current; back to 2002/2008 for
|
|
47
|
+
historical).
|
|
48
|
+
|
|
49
|
+
### Tests
|
|
50
|
+
|
|
51
|
+
- 263 unit tests (up from 229 — 34 new covering the fixes)
|
|
52
|
+
- 16 live integration tests
|
|
53
|
+
- Zero-flake across 10 sequential runs
|
|
54
|
+
|
|
55
|
+
## [0.1.1] — 2026-05-12
|
|
56
|
+
|
|
57
|
+
### Attribution correction
|
|
58
|
+
|
|
59
|
+
- **Attribution string switched from CC-BY 4.0 International to CC-BY 3.0
|
|
60
|
+
Australia** to align with APRA's actual licence terms. Both the
|
|
61
|
+
`attribution` field on every `DataResponse` and the README/llms.txt/docs
|
|
62
|
+
now read "Creative Commons Attribution 3.0 Australia" with the
|
|
63
|
+
https://creativecommons.org/licenses/by/3.0/au/ URL. No code-shape
|
|
64
|
+
changes — only the licence text + URL.
|
|
65
|
+
- This brings apra-mcp in line with the sister packages (abs-mcp, ato-mcp,
|
|
66
|
+
rba-mcp), which all carry CC-BY 3.0 AU attribution.
|
|
67
|
+
- Tests updated; 229 unit + 16 live remain green.
|
|
68
|
+
|
|
69
|
+
### Dataset scope — what shipped and what's deferred
|
|
70
|
+
|
|
71
|
+
The v0.1.0 spec listed six curated datasets including `ADI_PROPERTY_EXPOSURES`
|
|
72
|
+
and `SUPER_AGGREGATE`. After inspecting the actual APRA XLSX layouts, the
|
|
73
|
+
final v0.1.x cut substitutes:
|
|
74
|
+
|
|
75
|
+
- **Shipped** (7 datasets, all long-format / wide layout — cleaner to parse,
|
|
76
|
+
easier to filter):
|
|
77
|
+
- `ADI_KEY_STATS` — per-bank capital + key ratios (Table 1 from the ADI
|
|
78
|
+
centralised publication; entity-level, the more valuable cut)
|
|
79
|
+
- `ADI_RISK_WEIGHTED_ASSETS` — per-bank RWA breakdown (Table 2 from the
|
|
80
|
+
same file; a free bonus that emerged from the inspection pass)
|
|
81
|
+
- `SUPER_FUND_LEVEL` — fund-by-fund detail
|
|
82
|
+
- `INSURANCE_GENERAL` + `INSURANCE_GENERAL_HISTORICAL`
|
|
83
|
+
- `LIFE_INSURANCE` + `LIFE_INSURANCE_HISTORICAL`
|
|
84
|
+
|
|
85
|
+
- **Deferred to v0.2** (both are transposed multi-tab industry-aggregate
|
|
86
|
+
files that need a transposed-layout parser before they can ship cleanly):
|
|
87
|
+
- `ADI_PROPERTY_EXPOSURES` — industry-aggregate commercial property
|
|
88
|
+
exposures + residential mortgage approvals from the ADI property file
|
|
89
|
+
- `SUPER_AGGREGATE` — quarterly superannuation performance industry totals
|
|
90
|
+
(the multi-tab presentational file, distinct from `SUPER_FUND_LEVEL`)
|
|
91
|
+
|
|
92
|
+
Net coverage is broader than the original spec (entity-level RWA is a clear
|
|
93
|
+
value-add for any agent that asks "which banks carry the most credit risk").
|
|
94
|
+
|
|
95
|
+
## [0.1.0] — 2026-05-12
|
|
96
|
+
|
|
97
|
+
### Initial release
|
|
98
|
+
|
|
99
|
+
apra-mcp v0.1.0 ships seven curated APRA datasets across banking,
|
|
100
|
+
superannuation, and insurance, exposed through a six-tool MCP surface that
|
|
101
|
+
mirrors abs-mcp / rba-mcp / ato-mcp.
|
|
102
|
+
|
|
103
|
+
### Tools (6)
|
|
104
|
+
|
|
105
|
+
- `search_datasets(query, limit=10)` — fuzzy search the curated catalog
|
|
106
|
+
- `describe_dataset(dataset_id)` — list dimensions, measures, framework info
|
|
107
|
+
- `get_data(dataset_id, filters, measures, start_period, end_period, format)`
|
|
108
|
+
- `latest(dataset_id, filters, measures)` — shortcut to last observation per measure
|
|
109
|
+
- `top_n(dataset_id, measure, n, filters, direction)` — server-side ranking
|
|
110
|
+
- `list_curated()` — enumerate curated IDs
|
|
111
|
+
|
|
112
|
+
### Curated datasets (7)
|
|
113
|
+
|
|
114
|
+
- **`ADI_KEY_STATS`** — per-bank CET1 / Tier 1 / total capital + RWA + ratios,
|
|
115
|
+
every quarter since March 2013. Plain-English `institution: cba` aliases
|
|
116
|
+
for the Big 4 + Macquarie + 70 other ADIs, sector enum, mutual flag.
|
|
117
|
+
- **`ADI_RISK_WEIGHTED_ASSETS`** — per-bank RWA broken down by credit /
|
|
118
|
+
operational / market risk, plus IRRBB and traded-market-risk sub-components.
|
|
119
|
+
- **`SUPER_FUND_LEVEL`** — fund-by-fund member counts, benefits, median age,
|
|
120
|
+
active/inactive splits. Plain-English aliases for AustralianSuper, Aware,
|
|
121
|
+
HOSTPLUS, REST, UniSuper, HESTA, Cbus etc.
|
|
122
|
+
- **`INSURANCE_GENERAL`** — long-format quarterly general insurance database
|
|
123
|
+
(post-AASB17). 14 dimensions × 1 value column × ~24k rows.
|
|
124
|
+
- **`INSURANCE_GENERAL_HISTORICAL`** — pre-AASB17 GI archive (Dec 2002 → Jun 2023).
|
|
125
|
+
- **`LIFE_INSURANCE`** — long-format quarterly life insurance database
|
|
126
|
+
(post-AASB17). 9 dimensions × 1 value column × ~10.6k rows.
|
|
127
|
+
- **`LIFE_INSURANCE_HISTORICAL`** — pre-AASB17 LI archive (Jun 2008 → Jun 2023).
|
|
128
|
+
|
|
129
|
+
### Reliability engineering
|
|
130
|
+
|
|
131
|
+
- **3-tier URL discovery** — apra.gov.au publishes XLSX at date-versioned
|
|
132
|
+
paths that change every quarter. The discovery layer scrapes the canonical
|
|
133
|
+
landing page (with ETag-based conditional GET — 304s cost zero bytes), and
|
|
134
|
+
falls back to a CI-refreshed seed manifest, and finally to the YAML default.
|
|
135
|
+
- **Schema-fingerprint warning surface** — `_apply_aliases` raises an
|
|
136
|
+
actionable `ValueError` if any expected column disappears from the source
|
|
137
|
+
XLSX, with the first 6 columns it actually saw embedded in the message.
|
|
138
|
+
- **Cache self-heal** — corrupt `~/.apra-mcp/cache.db` is detected on init
|
|
139
|
+
and silently rebuilt.
|
|
140
|
+
- **In-flight request dedup** — 50 parallel callers asking for the same XLSX
|
|
141
|
+
fan in to exactly one HTTP request.
|
|
142
|
+
- **Host pinning** — `fetch_resource` refuses any URL outside `apra.gov.au`,
|
|
143
|
+
defense-in-depth against scraper or seed-manifest corruption.
|
|
144
|
+
|
|
145
|
+
### Trust contract
|
|
146
|
+
|
|
147
|
+
Every response includes:
|
|
148
|
+
|
|
149
|
+
- `source = "Australian Prudential Regulation Authority"`
|
|
150
|
+
- `source_url` — canonical APRA landing page
|
|
151
|
+
- `download_url` — the actual XLSX URL used (post-discovery)
|
|
152
|
+
- `attribution` — CC-BY 3.0 Australia string + license link
|
|
153
|
+
- `retrieved_at` — ISO UTC timestamp
|
|
154
|
+
- `server_version` — apra-mcp wheel version
|
|
155
|
+
- `stale` + `stale_reason` — true when the live scrape failed and we served
|
|
156
|
+
from the bundled seed
|
|
157
|
+
- `framework` — basis (post-AASB17 / pre-AASB17), break date, cross-reference
|
|
158
|
+
to the paired historical dataset (insurance datasets only)
|
|
159
|
+
|
|
160
|
+
### Permissive filters + wildcards
|
|
161
|
+
|
|
162
|
+
Dimensions flagged `permissive: true` accept any string value and support
|
|
163
|
+
substring matching: `{"institution": "macquarie*"}` substring-matches every
|
|
164
|
+
Macquarie entity. Useful for entity-name and data-item dimensions where
|
|
165
|
+
exhaustively enumerating ~100 long names in the YAML isn't realistic.
|
|
166
|
+
|
|
167
|
+
### Quality bar
|
|
168
|
+
|
|
169
|
+
- 229 unit tests, 16 live integration tests against apra.gov.au
|
|
170
|
+
- Zero-flake: full unit suite passes 10/10 sequential runs
|
|
171
|
+
- Schema fingerprint guards catch column renames
|
|
172
|
+
- Defensive validation guards on every MCP tool with "Try X" hints
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone.
|
|
7
|
+
|
|
8
|
+
## Our Standards
|
|
9
|
+
|
|
10
|
+
Examples of behavior that contributes to a positive environment:
|
|
11
|
+
|
|
12
|
+
- Demonstrating empathy and kindness toward other people
|
|
13
|
+
- Being respectful of differing opinions, viewpoints, and experiences
|
|
14
|
+
- Giving and gracefully accepting constructive feedback
|
|
15
|
+
- Accepting responsibility for our mistakes
|
|
16
|
+
- Focusing on what is best for the overall community
|
|
17
|
+
|
|
18
|
+
Examples of unacceptable behavior:
|
|
19
|
+
|
|
20
|
+
- Trolling, insulting comments, and personal or political attacks
|
|
21
|
+
- Public or private harassment
|
|
22
|
+
- Publishing others' private information without explicit permission
|
|
23
|
+
- Other conduct which could reasonably be considered inappropriate in a
|
|
24
|
+
professional setting
|
|
25
|
+
|
|
26
|
+
## Enforcement
|
|
27
|
+
|
|
28
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
29
|
+
reported to the project maintainer at hvass97@gmail.com. All complaints will
|
|
30
|
+
be reviewed and investigated promptly and fairly.
|
|
31
|
+
|
|
32
|
+
## Attribution
|
|
33
|
+
|
|
34
|
+
This Code of Conduct is adapted from the
|
|
35
|
+
[Contributor Covenant](https://www.contributor-covenant.org), version 2.1,
|
|
36
|
+
available at
|
|
37
|
+
https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
Pull requests welcome. The goal: keep apra-mcp's surface uniform with its
|
|
4
|
+
sister packages (abs-mcp, rba-mcp, ato-mcp) so an agent that uses all four
|
|
5
|
+
gets a consistent shape.
|
|
6
|
+
|
|
7
|
+
## Setup
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
git clone https://github.com/Bigred97/apra-mcp.git
|
|
11
|
+
cd apra-mcp
|
|
12
|
+
uv venv
|
|
13
|
+
uv pip install -e ".[dev]"
|
|
14
|
+
pytest # 229 unit tests, ~12s
|
|
15
|
+
pytest -m live # 16 live integration tests against apra.gov.au, ~20s
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Adding a curated dataset
|
|
19
|
+
|
|
20
|
+
Drop one YAML file into `src/apra_mcp/data/curated/`. The schema:
|
|
21
|
+
|
|
22
|
+
```yaml
|
|
23
|
+
id: NEW_DATASET # SCREAMING_SNAKE_CASE
|
|
24
|
+
name: Human-readable title
|
|
25
|
+
description: |
|
|
26
|
+
Paragraph describing the dataset, including period coverage and any
|
|
27
|
+
caveats. Surfaces in describe_dataset() and search results.
|
|
28
|
+
period_coverage: "September 2023 → latest quarter"
|
|
29
|
+
update_frequency: quarterly
|
|
30
|
+
source_url: https://www.apra.gov.au/... # the landing page
|
|
31
|
+
download_url: https://www.apra.gov.au/... # initial XLSX URL (fallback)
|
|
32
|
+
format: xlsx
|
|
33
|
+
sheet: Database
|
|
34
|
+
header_row: 1
|
|
35
|
+
layout: wide
|
|
36
|
+
cache_kind: data
|
|
37
|
+
period_column: Reporting Period # the source-column name
|
|
38
|
+
search_keywords:
|
|
39
|
+
- keyword
|
|
40
|
+
- other keyword
|
|
41
|
+
discovery: # required for live URL resolution
|
|
42
|
+
landing_url: https://www.apra.gov.au/...
|
|
43
|
+
filename_pattern: '(?i)pattern\s+to\s+match'
|
|
44
|
+
prefer_database: true # optional
|
|
45
|
+
exclude_patterns: # optional
|
|
46
|
+
- '(?i)historical'
|
|
47
|
+
- '(?i)specifications'
|
|
48
|
+
framework: # optional, insurance-only
|
|
49
|
+
current_basis: post-AASB17
|
|
50
|
+
break_date: "2023-09-30"
|
|
51
|
+
break_reason: ...
|
|
52
|
+
historical_dataset: PAIRED_HISTORICAL_KEY
|
|
53
|
+
columns:
|
|
54
|
+
alias:
|
|
55
|
+
source_column: "Exact source header"
|
|
56
|
+
description: User-facing column documentation.
|
|
57
|
+
role: dimension # dimension | measure | id
|
|
58
|
+
dtype: string # int | float | string | date
|
|
59
|
+
permissive: true # optional, allows wildcard match
|
|
60
|
+
dimension_values: # optional, alias maps
|
|
61
|
+
alias:
|
|
62
|
+
user_alias: "Canonical Value In Source"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Then:
|
|
66
|
+
1. Add a fixture (head-only 80–200 row XLSX) to `tests/fixtures/`
|
|
67
|
+
2. Add the dataset's URL to `src/apra_mcp/data/seed_urls.json`
|
|
68
|
+
3. Add tests in the existing test files (test_curated.py confirms loading;
|
|
69
|
+
test_customer_flows.py runs an end-to-end flow)
|
|
70
|
+
4. Run `pytest` 10 times for zero-flake confirmation
|
|
71
|
+
|
|
72
|
+
## Discovery filename_pattern
|
|
73
|
+
|
|
74
|
+
The discovery layer scrapes the landing page HTML and regex-matches the
|
|
75
|
+
decoded filename of every `<a href="...xlsx">`. Test with:
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from apra_mcp.discovery import resolve_via_scrape, DiscoverySpec
|
|
79
|
+
from apra_mcp.client import APRAClient
|
|
80
|
+
spec = DiscoverySpec(landing_url="https://...", filename_pattern=r"...")
|
|
81
|
+
async with APRAClient() as c:
|
|
82
|
+
url = await resolve_via_scrape(c, spec)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
When multiple files match, the one with the latest-dated filename wins.
|
|
86
|
+
Use `exclude_patterns` to skip historical or specifications variants.
|
|
87
|
+
|
|
88
|
+
## Style
|
|
89
|
+
|
|
90
|
+
- Mirror the patterns in existing files. Consistency across the four MCPs
|
|
91
|
+
matters more than micro-optimisations.
|
|
92
|
+
- Every MCP tool parameter must use `Annotated[Type, Field(description=...,
|
|
93
|
+
examples=[...])]`. This is what gives the package its Glama
|
|
94
|
+
tool-definition-quality score.
|
|
95
|
+
- No new dependencies beyond `fastmcp`, `httpx`, `pydantic`, `rapidfuzz`,
|
|
96
|
+
`pandas`, `openpyxl`, `aiosqlite`, `PyYAML`.
|
|
97
|
+
- No defensive code for impossible scenarios — trust internal types.
|
|
98
|
+
- Default to no comments. Add one only when the *why* is non-obvious.
|
|
99
|
+
|
|
100
|
+
## Reporting bugs
|
|
101
|
+
|
|
102
|
+
Open an issue: https://github.com/Bigred97/apra-mcp/issues
|
|
103
|
+
|
|
104
|
+
Especially helpful:
|
|
105
|
+
|
|
106
|
+
- "APRA changed the shape of dataset X" — paste the error message from
|
|
107
|
+
`_apply_aliases`; the schema fingerprint guard prints the first 6
|
|
108
|
+
columns it actually saw.
|
|
109
|
+
- "Live scrape returned the wrong file" — paste the resolved URL and the
|
|
110
|
+
filename_pattern it used.
|
apra_mcp-0.1.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Harry Vass
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|