apra-mcp 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. apra_mcp-0.1.2/.github/workflows/codeql.yml +37 -0
  2. apra_mcp-0.1.2/.github/workflows/publish.yml +58 -0
  3. apra_mcp-0.1.2/.github/workflows/refresh-urls.yml +56 -0
  4. apra_mcp-0.1.2/.github/workflows/test.yml +47 -0
  5. apra_mcp-0.1.2/.gitignore +17 -0
  6. apra_mcp-0.1.2/CHANGELOG.md +172 -0
  7. apra_mcp-0.1.2/CODE_OF_CONDUCT.md +37 -0
  8. apra_mcp-0.1.2/CONTRIBUTING.md +110 -0
  9. apra_mcp-0.1.2/LICENSE +21 -0
  10. apra_mcp-0.1.2/PKG-INFO +169 -0
  11. apra_mcp-0.1.2/README.md +135 -0
  12. apra_mcp-0.1.2/SECURITY.md +44 -0
  13. apra_mcp-0.1.2/examples/claude_desktop_config.json +8 -0
  14. apra_mcp-0.1.2/examples/demo_prompts.md +77 -0
  15. apra_mcp-0.1.2/glama.json +4 -0
  16. apra_mcp-0.1.2/llms.txt +99 -0
  17. apra_mcp-0.1.2/pyproject.toml +62 -0
  18. apra_mcp-0.1.2/scripts/refresh_seed.py +83 -0
  19. apra_mcp-0.1.2/src/apra_mcp/__init__.py +8 -0
  20. apra_mcp-0.1.2/src/apra_mcp/cache.py +176 -0
  21. apra_mcp-0.1.2/src/apra_mcp/catalog.py +44 -0
  22. apra_mcp-0.1.2/src/apra_mcp/client.py +197 -0
  23. apra_mcp-0.1.2/src/apra_mcp/curated.py +379 -0
  24. apra_mcp-0.1.2/src/apra_mcp/data/curated/ADI_KEY_STATS.yaml +149 -0
  25. apra_mcp-0.1.2/src/apra_mcp/data/curated/ADI_RISK_WEIGHTED_ASSETS.yaml +137 -0
  26. apra_mcp-0.1.2/src/apra_mcp/data/curated/INSURANCE_GENERAL.yaml +145 -0
  27. apra_mcp-0.1.2/src/apra_mcp/data/curated/INSURANCE_GENERAL_HISTORICAL.yaml +128 -0
  28. apra_mcp-0.1.2/src/apra_mcp/data/curated/LIFE_INSURANCE.yaml +112 -0
  29. apra_mcp-0.1.2/src/apra_mcp/data/curated/LIFE_INSURANCE_HISTORICAL.yaml +102 -0
  30. apra_mcp-0.1.2/src/apra_mcp/data/curated/SUPER_FUND_LEVEL.yaml +184 -0
  31. apra_mcp-0.1.2/src/apra_mcp/data/seed_urls.json +15 -0
  32. apra_mcp-0.1.2/src/apra_mcp/discovery.py +307 -0
  33. apra_mcp-0.1.2/src/apra_mcp/models.py +120 -0
  34. apra_mcp-0.1.2/src/apra_mcp/parsing.py +115 -0
  35. apra_mcp-0.1.2/src/apra_mcp/py.typed +0 -0
  36. apra_mcp-0.1.2/src/apra_mcp/server.py +737 -0
  37. apra_mcp-0.1.2/src/apra_mcp/shaping.py +482 -0
  38. apra_mcp-0.1.2/tests/__init__.py +0 -0
  39. apra_mcp-0.1.2/tests/conftest.py +64 -0
  40. apra_mcp-0.1.2/tests/fixtures/adi_key_stats_sample.xlsx +0 -0
  41. apra_mcp-0.1.2/tests/fixtures/adi_rwa_sample.xlsx +0 -0
  42. apra_mcp-0.1.2/tests/fixtures/insurance_general_historical_sample.xlsx +0 -0
  43. apra_mcp-0.1.2/tests/fixtures/insurance_general_sample.xlsx +0 -0
  44. apra_mcp-0.1.2/tests/fixtures/life_insurance_historical_sample.xlsx +0 -0
  45. apra_mcp-0.1.2/tests/fixtures/life_insurance_sample.xlsx +0 -0
  46. apra_mcp-0.1.2/tests/fixtures/super_fund_level_sample.xlsx +0 -0
  47. apra_mcp-0.1.2/tests/test_cache.py +198 -0
  48. apra_mcp-0.1.2/tests/test_client.py +181 -0
  49. apra_mcp-0.1.2/tests/test_curated.py +208 -0
  50. apra_mcp-0.1.2/tests/test_customer_flows.py +171 -0
  51. apra_mcp-0.1.2/tests/test_did_you_mean.py +55 -0
  52. apra_mcp-0.1.2/tests/test_discovery.py +253 -0
  53. apra_mcp-0.1.2/tests/test_edge_data.py +210 -0
  54. apra_mcp-0.1.2/tests/test_edge_inputs.py +154 -0
  55. apra_mcp-0.1.2/tests/test_integration.py +207 -0
  56. apra_mcp-0.1.2/tests/test_latest_long_format.py +106 -0
  57. apra_mcp-0.1.2/tests/test_parsing.py +130 -0
  58. apra_mcp-0.1.2/tests/test_period_normalisation.py +147 -0
  59. apra_mcp-0.1.2/tests/test_resilience.py +151 -0
  60. apra_mcp-0.1.2/tests/test_server_validation.py +225 -0
  61. apra_mcp-0.1.2/tests/test_shaping.py +282 -0
  62. apra_mcp-0.1.2/tests/test_top_n.py +146 -0
  63. apra_mcp-0.1.2/uv.lock +1748 -0
@@ -0,0 +1,37 @@
1
+ name: codeql
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+ schedule:
9
+ - cron: "57 6 * * 1" # Mondays 06:57 UTC, offset from sister MCPs
10
+
11
+ jobs:
12
+ analyze:
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ actions: read
16
+ contents: read
17
+ security-events: write
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ language: [python]
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - name: Initialise CodeQL
26
+ uses: github/codeql-action/init@v3
27
+ with:
28
+ languages: ${{ matrix.language }}
29
+ queries: security-extended
30
+
31
+ - name: Autobuild
32
+ uses: github/codeql-action/autobuild@v3
33
+
34
+ - name: Analyze
35
+ uses: github/codeql-action/analyze@v3
36
+ with:
37
+ category: "/language:${{ matrix.language }}"
@@ -0,0 +1,58 @@
1
+ name: publish
2
+
3
+ # Publishes the wheel + sdist to PyPI on every published Release.
4
+ # Uses PyPI Trusted Publishing (OIDC) — no token in repo secrets.
5
+ #
6
+ # Setup (one-time, done once on PyPI):
7
+ # pypi.org → Your projects → apra-mcp → Publishing
8
+ # (or for the first release: pypi.org → "Pending publishers" → Add)
9
+ # Owner=Bigred97, Repository=apra-mcp, Workflow=publish.yml, Environment=pypi
10
+ #
11
+ # Trigger: create a GitHub Release. The release's tag (e.g. v0.1.3) drives
12
+ # the version that gets uploaded. Make sure pyproject.toml's `version`
13
+ # matches before tagging.
14
+
15
+ on:
16
+ release:
17
+ types: [published]
18
+ workflow_dispatch:
19
+
20
+ jobs:
21
+ build:
22
+ name: Build distributions
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - name: Install uv
27
+ uses: astral-sh/setup-uv@v3
28
+ with:
29
+ enable-cache: true
30
+ - name: Set up Python
31
+ run: uv python install 3.12
32
+ - name: Build wheel + sdist
33
+ run: uv build
34
+ - name: Verify wheel installs cleanly
35
+ run: |
36
+ uv run --isolated --with ./dist/*.whl python -c \
37
+ "import apra_mcp.server as s; n = len(s.list_curated()); assert n >= 7, f'expected >=7 curated, got {n}'; print(f'OK ({n} curated datasets)')"
38
+ - uses: actions/upload-artifact@v4
39
+ with:
40
+ name: dist
41
+ path: dist/
42
+
43
+ publish:
44
+ name: Publish to PyPI
45
+ needs: build
46
+ runs-on: ubuntu-latest
47
+ environment:
48
+ name: pypi
49
+ url: https://pypi.org/p/apra-mcp
50
+ permissions:
51
+ id-token: write # required for trusted publishing
52
+ steps:
53
+ - uses: actions/download-artifact@v4
54
+ with:
55
+ name: dist
56
+ path: dist/
57
+ - name: Publish to PyPI via OIDC
58
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,56 @@
1
+ name: refresh-seed-urls
2
+
3
+ # Daily scrape of every APRA landing page in apra-mcp's curated set.
4
+ # - When the scraped URLs differ from data/seed_urls.json, opens a PR.
5
+ # - When any URL returns non-2xx, opens a GitHub issue.
6
+ # Effect: even users on a months-old pip install stay current for weeks
7
+ # because new wheels carry the refreshed seed.
8
+
9
+ on:
10
+ schedule:
11
+ - cron: "30 13 * * *" # 13:30 UTC daily (post-APRA-publication window)
12
+ workflow_dispatch:
13
+
14
+ permissions:
15
+ contents: write
16
+ pull-requests: write
17
+ issues: write
18
+
19
+ jobs:
20
+ refresh:
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v3
27
+ with:
28
+ enable-cache: true
29
+
30
+ - name: Set up Python
31
+ run: uv python install 3.12
32
+
33
+ - name: Sync dependencies
34
+ run: uv sync --extra dev && uv pip install -e .
35
+
36
+ - name: Run scraper for every curated dataset
37
+ id: scrape
38
+ run: |
39
+ uv run python scripts/refresh_seed.py > seed_diff.txt 2>&1 || echo "scrape_failed=true" >> $GITHUB_OUTPUT
40
+ cat seed_diff.txt
41
+
42
+ - name: Open PR if URLs changed
43
+ if: steps.scrape.outputs.scrape_failed != 'true'
44
+ uses: peter-evans/create-pull-request@v6
45
+ with:
46
+ commit-message: "chore(seed): refresh URLs from APRA landing pages"
47
+ title: "chore(seed): refresh URLs from APRA landing pages"
48
+ body: |
49
+ Automated PR from `.github/workflows/refresh-urls.yml`.
50
+
51
+ ```
52
+ ${{ steps.scrape.outputs.diff }}
53
+ ```
54
+ branch: refresh-seed-urls
55
+ delete-branch: true
56
+ add-paths: src/apra_mcp/data/seed_urls.json
@@ -0,0 +1,47 @@
1
+ name: tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.11", "3.12", "3.13"]
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v3
20
+ with:
21
+ enable-cache: true
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ run: uv python install ${{ matrix.python-version }}
24
+ - name: Sync dependencies
25
+ run: uv sync --extra dev
26
+ - name: Install package
27
+ run: uv pip install -e .
28
+ - name: Run unit tests
29
+ run: uv run pytest -q
30
+
31
+ build:
32
+ runs-on: ubuntu-latest
33
+ needs: test
34
+ steps:
35
+ - uses: actions/checkout@v4
36
+ - name: Install uv
37
+ uses: astral-sh/setup-uv@v3
38
+ - name: Build wheel + sdist
39
+ run: uv build
40
+ - name: Verify wheel installs cleanly
41
+ run: |
42
+ uv run --isolated --with ./dist/*.whl python -c \
43
+ "import apra_mcp.server as s; n = len(s.list_curated()); assert n >= 7, f'expected >=7 curated, got {n}'; print(f'OK ({n} curated datasets)')"
44
+ - uses: actions/upload-artifact@v4
45
+ with:
46
+ name: dist
47
+ path: dist/
@@ -0,0 +1,17 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ .venv/
6
+ venv/
7
+ dist/
8
+ build/
9
+ .coverage
10
+ .coverage.*
11
+ htmlcov/
12
+ .DS_Store
13
+ *.swp
14
+ *.swo
15
+ .idea/
16
+ .vscode/
17
+ .apra-mcp-cache/
@@ -0,0 +1,172 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.2] — 2026-05-13
9
+
10
+ ### Bug fixes (real customer impact)
11
+
12
+ - **`latest()` was returning a single record for long-format datasets**
13
+ (`INSURANCE_GENERAL`, `INSURANCE_GENERAL_HISTORICAL`, `LIFE_INSURANCE`,
14
+ `LIFE_INSURANCE_HISTORICAL`). Root cause: `last_n=1` was implemented as
15
+ "keep 1 per measure", but long-format datasets carry a single `value`
16
+ measure with the semantic metric in the `data_item` dimension —
17
+ collapsing to 1 record per measure threw away the whole table. Fix:
18
+ detect long-format mode (one measure + period_column declared) and
19
+ switch to "keep all records at the most recent period(s)". `latest()`
20
+ on insurance datasets now returns hundreds of records (the latest
21
+ quarter's worth) instead of one.
22
+ - **Period filters with quarter shorthand (`2025-Q4`) and bare years
23
+ (`2024`) silently returned zero rows.** The source `period_column`
24
+ stores ISO dates (`2025-12-31`) and string-comparing them against
25
+ `"2025-Q4"` excluded everything (`Q` > `1` in ASCII). Fix: a new
26
+ `_expand_period_input` normaliser converts user-supplied periods to
27
+ ISO `YYYY-MM-DD` bounds before comparison. Supports `YYYY`, `YYYY-MM`,
28
+ `YYYY-Qx` (and lowercase `q`), and ISO dates.
29
+
30
+ ### UX
31
+
32
+ - **"Did you mean?" suggestions on unknown filter values.** Closest
33
+ RapidFuzz match (WRatio ≥ 70) is offered in the error message:
34
+ `Unknown value 'major' for filter 'sector'. Did you mean 'major_banks'?`
35
+ Permissive dimensions (fund_name, data_item) still pass unknowns
36
+ through unchanged.
37
+
38
+ ### Documentation honesty fix
39
+
40
+ - Corrected `period_coverage` metadata on three snapshot datasets
41
+ (`ADI_KEY_STATS`, `ADI_RISK_WEIGHTED_ASSETS`, `SUPER_FUND_LEVEL`).
42
+ The APRA "centralised publication" XLSX is a SNAPSHOT of the latest
43
+ reporting quarter, not the multi-year history the filename suggests.
44
+ The YAML descriptions and `period_coverage` strings now say so
45
+ explicitly. The insurance long-format datasets remain true time
46
+ series (Sep 2023 → Dec 2025 for current; back to 2002/2008 for
47
+ historical).
48
+
49
+ ### Tests
50
+
51
+ - 263 unit tests (up from 229 — 34 new covering the fixes)
52
+ - 16 live integration tests
53
+ - Zero-flake across 10 sequential runs
54
+
55
+ ## [0.1.1] — 2026-05-12
56
+
57
+ ### Attribution correction
58
+
59
+ - **Attribution string switched from CC-BY 4.0 International to CC-BY 3.0
60
+ Australia** to align with APRA's actual licence terms. Both the
61
+ `attribution` field on every `DataResponse` and the README/llms.txt/docs
62
+ now read "Creative Commons Attribution 3.0 Australia" with the
63
+ https://creativecommons.org/licenses/by/3.0/au/ URL. No code-shape
64
+ changes — only the licence text + URL.
65
+ - This brings apra-mcp in line with the sister packages (abs-mcp, ato-mcp,
66
+ rba-mcp), which all carry CC-BY 3.0 AU attribution.
67
+ - Tests updated; 229 unit + 16 live remain green.
68
+
69
+ ### Dataset scope — what shipped and what's deferred
70
+
71
+ The v0.1.0 spec listed six curated datasets including `ADI_PROPERTY_EXPOSURES`
72
+ and `SUPER_AGGREGATE`. After inspecting the actual APRA XLSX layouts, the
73
+ final v0.1.x cut substitutes:
74
+
75
+ - **Shipped** (7 datasets, all long-format / wide layout — cleaner to parse,
76
+ easier to filter):
77
+ - `ADI_KEY_STATS` — per-bank capital + key ratios (Table 1 from the ADI
78
+ centralised publication; entity-level, the more valuable cut)
79
+ - `ADI_RISK_WEIGHTED_ASSETS` — per-bank RWA breakdown (Table 2 from the
80
+ same file; a free bonus that emerged from the inspection pass)
81
+ - `SUPER_FUND_LEVEL` — fund-by-fund detail
82
+ - `INSURANCE_GENERAL` + `INSURANCE_GENERAL_HISTORICAL`
83
+ - `LIFE_INSURANCE` + `LIFE_INSURANCE_HISTORICAL`
84
+
85
+ - **Deferred to v0.2** (both are transposed multi-tab industry-aggregate
86
+ files that need a transposed-layout parser before they can ship cleanly):
87
+ - `ADI_PROPERTY_EXPOSURES` — industry-aggregate commercial property
88
+ exposures + residential mortgage approvals from the ADI property file
89
+ - `SUPER_AGGREGATE` — quarterly superannuation performance industry totals
90
+ (the multi-tab presentational file, distinct from `SUPER_FUND_LEVEL`)
91
+
92
+ Net coverage is broader than the original spec (entity-level RWA is a clear
93
+ value-add for any agent that asks "which banks carry the most credit risk").
94
+
95
+ ## [0.1.0] — 2026-05-12
96
+
97
+ ### Initial release
98
+
99
+ apra-mcp v0.1.0 ships seven curated APRA datasets across banking,
100
+ superannuation, and insurance, exposed through a six-tool MCP surface that
101
+ mirrors abs-mcp / rba-mcp / ato-mcp.
102
+
103
+ ### Tools (6)
104
+
105
+ - `search_datasets(query, limit=10)` — fuzzy search the curated catalog
106
+ - `describe_dataset(dataset_id)` — list dimensions, measures, framework info
107
+ - `get_data(dataset_id, filters, measures, start_period, end_period, format)`
108
+ - `latest(dataset_id, filters, measures)` — shortcut to last observation per measure
109
+ - `top_n(dataset_id, measure, n, filters, direction)` — server-side ranking
110
+ - `list_curated()` — enumerate curated IDs
111
+
112
+ ### Curated datasets (7)
113
+
114
+ - **`ADI_KEY_STATS`** — per-bank CET1 / Tier 1 / total capital + RWA + ratios,
115
+ every quarter since March 2013. Plain-English `institution: cba` aliases
116
+ for the Big 4 + Macquarie + 70 other ADIs, sector enum, mutual flag.
117
+ - **`ADI_RISK_WEIGHTED_ASSETS`** — per-bank RWA broken down by credit /
118
+ operational / market risk, plus IRRBB and traded-market-risk sub-components.
119
+ - **`SUPER_FUND_LEVEL`** — fund-by-fund member counts, benefits, median age,
120
+ active/inactive splits. Plain-English aliases for AustralianSuper, Aware,
121
+ HOSTPLUS, REST, UniSuper, HESTA, Cbus etc.
122
+ - **`INSURANCE_GENERAL`** — long-format quarterly general insurance database
123
+ (post-AASB17). 14 dimensions × 1 value column × ~24k rows.
124
+ - **`INSURANCE_GENERAL_HISTORICAL`** — pre-AASB17 GI archive (Dec 2002 → Jun 2023).
125
+ - **`LIFE_INSURANCE`** — long-format quarterly life insurance database
126
+ (post-AASB17). 9 dimensions × 1 value column × ~10.6k rows.
127
+ - **`LIFE_INSURANCE_HISTORICAL`** — pre-AASB17 LI archive (Jun 2008 → Jun 2023).
128
+
129
+ ### Reliability engineering
130
+
131
+ - **3-tier URL discovery** — apra.gov.au publishes XLSX at date-versioned
132
+ paths that change every quarter. The discovery layer scrapes the canonical
133
+ landing page (with ETag-based conditional GET — 304s cost zero bytes), and
134
+ falls back to a CI-refreshed seed manifest, and finally to the YAML default.
135
+ - **Schema-fingerprint warning surface** — `_apply_aliases` raises an
136
+ actionable `ValueError` if any expected column disappears from the source
137
+ XLSX, with the first 6 columns it actually saw embedded in the message.
138
+ - **Cache self-heal** — corrupt `~/.apra-mcp/cache.db` is detected on init
139
+ and silently rebuilt.
140
+ - **In-flight request dedup** — 50 parallel callers asking for the same XLSX
141
+ fan in to exactly one HTTP request.
142
+ - **Host pinning** — `fetch_resource` refuses any URL outside `apra.gov.au`,
143
+ defense-in-depth against scraper or seed-manifest corruption.
144
+
145
+ ### Trust contract
146
+
147
+ Every response includes:
148
+
149
+ - `source = "Australian Prudential Regulation Authority"`
150
+ - `source_url` — canonical APRA landing page
151
+ - `download_url` — the actual XLSX URL used (post-discovery)
152
+ - `attribution` — CC-BY 3.0 Australia string + license link
153
+ - `retrieved_at` — ISO UTC timestamp
154
+ - `server_version` — apra-mcp wheel version
155
+ - `stale` + `stale_reason` — true when the live scrape failed and we served
156
+ from the bundled seed
157
+ - `framework` — basis (post-AASB17 / pre-AASB17), break date, cross-reference
158
+ to the paired historical dataset (insurance datasets only)
159
+
160
+ ### Permissive filters + wildcards
161
+
162
+ Dimensions flagged `permissive: true` accept any string value and support
163
+ substring matching: `{"institution": "macquarie*"}` substring-matches every
164
+ Macquarie entity. Useful for entity-name and data-item dimensions where
165
+ exhaustively enumerating ~100 long names in the YAML isn't realistic.
166
+
167
+ ### Quality bar
168
+
169
+ - 229 unit tests, 16 live integration tests against apra.gov.au
170
+ - Zero-flake: full unit suite passes 10/10 sequential runs
171
+ - Schema fingerprint guards catch column renames
172
+ - Defensive validation guards on every MCP tool with "Try X" hints
@@ -0,0 +1,37 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone.
7
+
8
+ ## Our Standards
9
+
10
+ Examples of behavior that contributes to a positive environment:
11
+
12
+ - Demonstrating empathy and kindness toward other people
13
+ - Being respectful of differing opinions, viewpoints, and experiences
14
+ - Giving and gracefully accepting constructive feedback
15
+ - Accepting responsibility for our mistakes
16
+ - Focusing on what is best for the overall community
17
+
18
+ Examples of unacceptable behavior:
19
+
20
+ - Trolling, insulting comments, and personal or political attacks
21
+ - Public or private harassment
22
+ - Publishing others' private information without explicit permission
23
+ - Other conduct which could reasonably be considered inappropriate in a
24
+ professional setting
25
+
26
+ ## Enforcement
27
+
28
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
29
+ reported to the project maintainer at hvass97@gmail.com. All complaints will
30
+ be reviewed and investigated promptly and fairly.
31
+
32
+ ## Attribution
33
+
34
+ This Code of Conduct is adapted from the
35
+ [Contributor Covenant](https://www.contributor-covenant.org), version 2.1,
36
+ available at
37
+ https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
@@ -0,0 +1,110 @@
1
+ # Contributing
2
+
3
+ Pull requests welcome. The goal: keep apra-mcp's surface uniform with its
4
+ sister packages (abs-mcp, rba-mcp, ato-mcp) so an agent that uses all four
5
+ gets a consistent shape.
6
+
7
+ ## Setup
8
+
9
+ ```bash
10
+ git clone https://github.com/Bigred97/apra-mcp.git
11
+ cd apra-mcp
12
+ uv venv
13
+ uv pip install -e ".[dev]"
14
+ pytest # 229 unit tests, ~12s
15
+ pytest -m live # 16 live integration tests against apra.gov.au, ~20s
16
+ ```
17
+
18
+ ## Adding a curated dataset
19
+
20
+ Drop one YAML file into `src/apra_mcp/data/curated/`. The schema:
21
+
22
+ ```yaml
23
+ id: NEW_DATASET # SCREAMING_SNAKE_CASE
24
+ name: Human-readable title
25
+ description: |
26
+ Paragraph describing the dataset, including period coverage and any
27
+ caveats. Surfaces in describe_dataset() and search results.
28
+ period_coverage: "September 2023 → latest quarter"
29
+ update_frequency: quarterly
30
+ source_url: https://www.apra.gov.au/... # the landing page
31
+ download_url: https://www.apra.gov.au/... # initial XLSX URL (fallback)
32
+ format: xlsx
33
+ sheet: Database
34
+ header_row: 1
35
+ layout: wide
36
+ cache_kind: data
37
+ period_column: Reporting Period # the source-column name
38
+ search_keywords:
39
+ - keyword
40
+ - other keyword
41
+ discovery: # required for live URL resolution
42
+ landing_url: https://www.apra.gov.au/...
43
+ filename_pattern: '(?i)pattern\s+to\s+match'
44
+ prefer_database: true # optional
45
+ exclude_patterns: # optional
46
+ - '(?i)historical'
47
+ - '(?i)specifications'
48
+ framework: # optional, insurance-only
49
+ current_basis: post-AASB17
50
+ break_date: "2023-09-30"
51
+ break_reason: ...
52
+ historical_dataset: PAIRED_HISTORICAL_KEY
53
+ columns:
54
+ alias:
55
+ source_column: "Exact source header"
56
+ description: User-facing column documentation.
57
+ role: dimension # dimension | measure | id
58
+ dtype: string # int | float | string | date
59
+ permissive: true # optional, allows wildcard match
60
+ dimension_values: # optional, alias maps
61
+ alias:
62
+ user_alias: "Canonical Value In Source"
63
+ ```
64
+
65
+ Then:
66
+ 1. Add a fixture (head-only 80–200 row XLSX) to `tests/fixtures/`
67
+ 2. Add the dataset's URL to `src/apra_mcp/data/seed_urls.json`
68
+ 3. Add tests in the existing test files (test_curated.py confirms loading;
69
+ test_customer_flows.py runs an end-to-end flow)
70
+ 4. Run `pytest` 10 times for zero-flake confirmation
71
+
72
+ ## Discovery filename_pattern
73
+
74
+ The discovery layer scrapes the landing page HTML and regex-matches the
75
+ decoded filename of every `<a href="...xlsx">`. Test with:
76
+
77
+ ```python
78
+ from apra_mcp.discovery import resolve_via_scrape, DiscoverySpec
79
+ from apra_mcp.client import APRAClient
80
+ spec = DiscoverySpec(landing_url="https://...", filename_pattern=r"...")
81
+ async with APRAClient() as c:
82
+ url = await resolve_via_scrape(c, spec)
83
+ ```
84
+
85
+ When multiple files match, the one with the latest-dated filename wins.
86
+ Use `exclude_patterns` to skip historical or specifications variants.
87
+
88
+ ## Style
89
+
90
+ - Mirror the patterns in existing files. Consistency across the four MCPs
91
+ matters more than micro-optimisations.
92
+ - Every MCP tool parameter must use `Annotated[Type, Field(description=...,
93
+ examples=[...])]`. This is what gives the package its Glama
94
+ tool-definition-quality score.
95
+ - No new dependencies beyond `fastmcp`, `httpx`, `pydantic`, `rapidfuzz`,
96
+ `pandas`, `openpyxl`, `aiosqlite`, `PyYAML`.
97
+ - No defensive code for impossible scenarios — trust internal types.
98
+ - Default to no comments. Add one only when the *why* is non-obvious.
99
+
100
+ ## Reporting bugs
101
+
102
+ Open an issue: https://github.com/Bigred97/apra-mcp/issues
103
+
104
+ Especially helpful:
105
+
106
+ - "APRA changed the shape of dataset X" — paste the error message from
107
+ `_apply_aliases`; the schema fingerprint guard prints the first 6
108
+ columns it actually saw.
109
+ - "Live scrape returned the wrong file" — paste the resolved URL and the
110
+ filename_pattern it used.
apra_mcp-0.1.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Harry Vass
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.