astraea-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. astraea_framework-0.1.0.dist-info/METADATA +219 -0
  2. astraea_framework-0.1.0.dist-info/RECORD +49 -0
  3. astraea_framework-0.1.0.dist-info/WHEEL +5 -0
  4. astraea_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
  5. astraea_framework-0.1.0.dist-info/top_level.txt +4 -0
  6. core/__init__.py +1 -0
  7. core/api.py +1005 -0
  8. core/browser.py +105 -0
  9. core/embedder.py +94 -0
  10. core/feedback.py +74 -0
  11. core/generator.py +72 -0
  12. core/jurisdiction.py +224 -0
  13. core/legislation.py +132 -0
  14. core/mcp.py +207 -0
  15. core/pipeline.py +108 -0
  16. core/queue.py +176 -0
  17. core/reranker.py +87 -0
  18. core/retriever.py +206 -0
  19. core/routing.py +76 -0
  20. core/sanitize.py +94 -0
  21. core/security.py +40 -0
  22. core/service.py +181 -0
  23. examples/minimal_jurisdiction/__init__.py +3 -0
  24. examples/minimal_jurisdiction/jurisdiction.py +35 -0
  25. ingest/__init__.py +0 -0
  26. ingest/base.py +88 -0
  27. ingest/run_nsw_tenancy.py +115 -0
  28. jurisdictions/__init__.py +1 -0
  29. jurisdictions/nsw_tenancy/__init__.py +3 -0
  30. jurisdictions/nsw_tenancy/jurisdiction.py +108 -0
  31. jurisdictions/nsw_tenancy/prompt.py +20 -0
  32. jurisdictions/nsw_tenancy/routes.py +97 -0
  33. jurisdictions/nsw_tenancy/scraper.py +182 -0
  34. jurisdictions/nz_employment/__init__.py +3 -0
  35. jurisdictions/nz_employment/jurisdiction.py +119 -0
  36. jurisdictions/nz_employment/prompt.py +24 -0
  37. jurisdictions/nz_employment/routes.py +103 -0
  38. jurisdictions/nz_legal/__init__.py +3 -0
  39. jurisdictions/nz_legal/app.py +4 -0
  40. jurisdictions/nz_legal/contrasting.py +225 -0
  41. jurisdictions/nz_legal/jurisdiction.py +105 -0
  42. jurisdictions/nz_legal/routes.py +332 -0
  43. jurisdictions/nz_legal/scroll.py +277 -0
  44. jurisdictions/nz_tenancy/__init__.py +3 -0
  45. jurisdictions/nz_tenancy/jurisdiction.py +161 -0
  46. jurisdictions/nz_tenancy/mcp_server.py +30 -0
  47. jurisdictions/nz_tenancy/prompt.py +96 -0
  48. jurisdictions/nz_tenancy/routes.py +218 -0
  49. jurisdictions/nz_tenancy/scraper.py +8 -0
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: astraea-framework
3
+ Version: 0.1.0
4
+ Summary: Open-justice RAG framework - jurisdiction-specific legal Q&A over public court decisions
5
+ Author-email: jsaputra@riseup.net
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/jwongso/astraea
8
+ Project-URL: Issues, https://github.com/jwongso/astraea/issues
9
+ Keywords: legal,rag,llm,tenancy,jurisdiction
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: fastapi>=0.111
19
+ Requires-Dist: uvicorn[standard]>=0.29
20
+ Requires-Dist: qdrant-client>=1.9
21
+ Requires-Dist: sentence-transformers>=3.0
22
+ Requires-Dist: pydantic>=2.7
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: redis>=5.0
25
+ Requires-Dist: cachetools>=5.3
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=8; extra == "dev"
28
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
29
+ Requires-Dist: ruff>=0.4; extra == "dev"
30
+ Provides-Extra: ingest
31
+ Requires-Dist: beautifulsoup4>=4.12; extra == "ingest"
32
+ Requires-Dist: pypdf>=4.0; extra == "ingest"
33
+ Requires-Dist: pytesseract>=0.3; extra == "ingest"
34
+ Requires-Dist: pillow>=10.0; extra == "ingest"
35
+ Dynamic: license-file
36
+
37
+ # Astraea
38
+
39
+ Open-justice RAG framework for building jurisdiction-specific legal Q&A tools over public court decisions.
40
+
41
+ Named after Astraea, the Greek goddess of justice who carried the scales.
42
+
43
+ ---
44
+
45
+ ## What it is
46
+
47
+ A small runtime framework that provides the infrastructure for legal RAG tools - SSE streaming,
48
+ concurrent request queue, statute routing, live legislation anchors, citation verification,
49
+ security hardening, and smoke tests - so that a new jurisdiction only needs to provide one Python module.
50
+
51
+ ```python
52
+ from jurisdictions.nz_tenancy import jurisdiction
53
+ from core.api import create_app
54
+
55
+ app = create_app(jurisdiction)
56
+ ```
57
+
58
+ ---
59
+
60
+ ## Design principles
61
+
62
+ - **One process = one jurisdiction.** No multi-tenancy, no plugin registry. Simple deployment.
63
+ - **Four required things.** A jurisdiction must provide: a name, a corpus config, a system prompt, and a route table. Everything else has a working default.
64
+ - **Security and queue are non-overridable.** Input sanitization, request body limits, security headers, and queue concurrency are enforced by core regardless of jurisdiction config.
65
+ - **Scraper is offline.** Ingestion runs separately from the API. Core only needs a populated Qdrant collection conforming to `schemas/qdrant_payload.schema.json`.
66
+ - **Tests are data-driven.** Jurisdictions provide smoke test fixtures; core runs the test suite against them automatically.
67
+
68
+ ---
69
+
70
+ ## Supported jurisdictions
71
+
72
+ | Jurisdiction | Status | Corpus |
73
+ |---|---|---|
74
+ | NZ Tenancy (`nz_tenancy`) | Live - tenancy.localrun.ai | 31,000+ Tenancy Tribunal decisions, RTA 1986 + Healthy Homes Standards 2019 |
75
+ | NZ Legal (`nz_legal`) | Live - nz-legal-rag.localrun.ai | All NZ courts, 3M+ chunks (NZHC, NZCA, NZSC, NZERA, NZEmpC, NZTT) |
76
+ | NZ Employment (`nz_employment`) | Ready | 300+ ERA + Employment Court decisions through May 2026, live ERA 2000 |
77
+ | NSW Tenancy (`nsw_tenancy`) | PoC (framework demo) | Proves interface generalises - not actively developed |
78
+
79
+ ---
80
+
81
+ ## Adding a new jurisdiction
82
+
83
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full fork-to-running walkthrough.
84
+
85
+ Quick version:
86
+
87
+ 1. Copy `examples/minimal_jurisdiction/` to `jurisdictions/your_name/`
88
+ 2. Implement the 4 required properties in `jurisdiction.py`
89
+ 3. Run the contract tests: `pytest tests/core/test_jurisdiction_contract.py --jurisdiction your_name`
90
+ 4. Ingest your corpus into Qdrant (see `ingest/` and `schemas/qdrant_payload.schema.json`)
91
+ 5. Add smoke fixtures and run: `pytest tests/jurisdictions/test_smoke.py --jurisdiction your_name -m retrieval`
92
+
93
+ ---
94
+
95
+ ## Jurisdiction extension points
96
+
97
+ Beyond the 4 required properties, jurisdictions can opt into additional behaviour:
98
+
99
+ ### Extra routes (`register_routes`)
100
+
101
+ Add jurisdiction-specific endpoints (e.g. structured data trackers) on top of the core API:
102
+
103
+ ```python
104
+ def register_routes(self, app: FastAPI) -> None:
105
+ from jurisdictions.nz_legal.routes import register
106
+ register(app)
107
+ ```
108
+
109
+ Called at the end of `create_app()`. Route handlers access pipeline and store via `request.app.state`.
110
+
111
+ `nz_legal` uses this to expose `/search`, `/notable`, `/sentencing-tracker`, `/pg-tracker`, and `/contrasting-cases`.
112
+
113
+ ### Federated per-Act legislation retrieval (`leg_sources`)
114
+
115
+ By default, legislation retrieval does one vector search across the entire legislation collection.
116
+ As a corpus grows (more Acts), smaller Acts get crowded out by larger ones on embedding similarity alone.
117
+
118
+ Override `leg_sources` to run one search per registered Act in parallel, each with its own `top_k` quota.
119
+ The re-ranker phase (Phase 2) can then select the best sections across all sources without manual routes:
120
+
121
+ ```python
122
+ from core.jurisdiction import LegislationSource
123
+
124
+ @property
125
+ def leg_sources(self) -> list[LegislationSource]:
126
+ return [
127
+ LegislationSource("RTA", "Residential Tenancies Act 1986", default_top_k=6, boost_top_k=10),
128
+ LegislationSource("HHS2019","Residential Tenancies (Healthy Homes Standards) Regulations 2019", default_top_k=4, boost_top_k=8),
129
+ ]
130
+ ```
131
+
132
+ When a matched route targets a specific Act (e.g. `healthy_homes` route targets `HHS2019`), that
133
+ Act's search uses `boost_top_k` instead of `default_top_k`, giving it more candidates before ranking.
134
+
135
+ Routes remain as hard floor guarantees - forced sections are always included in the candidate pool
136
+ regardless of federated search results. This means a cross-encoder re-ranker (Phase 2) can
137
+ reorder freely without risking that a critical section is dropped.
138
+
139
+ A `CrossEncoderReranker` (Phase 1: log-only) is available in `core/reranker.py`. It scores
140
+ candidates after federated search and logs the scores for observability without affecting ranking.
141
+ Promote to production ranking after benchmarking shows it matches route-based quality.
142
+
143
+ ### Case retrieval augmentation (`case_synthetic_query` on `StatuteRoute`)
144
+
145
+ When a matched route defines `case_synthetic_query`, a supplementary case retrieval pass
146
+ runs with that query and unique results are merged into context (up to 8 total chunks).
147
+
148
+ Fixes cases where the query rewriter drops legally significant framing that is obvious
149
+ from the original question but lost in rewriting:
150
+
151
+ ```python
152
+ StatuteRoute(
153
+ intent="sham_flatmate_agreement",
154
+ include_any=("flatmate agreement", "meant to be tenants", ...),
155
+ forced_sections=("NZLEG/RTA/s5",),
156
+ synthetic_query="...",
157
+ case_synthetic_query=(
158
+ "flatmate agreement landlord not living property sham tenancy RTA applies "
159
+ "boarder licensee residential tenancy act tenant rights eviction notice"
160
+ ),
161
+ )
162
+ ```
163
+
164
+ ### Smoke fixture source count (`min_sources` on `SmokeFixture`)
165
+
166
+ Assert that supplementary retrieval ran and returned the expected number of case sources:
167
+
168
+ ```python
169
+ SmokeFixture(
170
+ question="My landlord put us on a flatmate agreement...",
171
+ expected_sections=[],
172
+ min_sources=6,
173
+ description="sham_flatmate_agreement route - case_synthetic_query augmentation",
174
+ )
175
+ ```
176
+
177
+ ---
178
+
179
+ ## Qdrant payload schema
180
+
181
+ All jurisdictions must produce chunks conforming to `schemas/qdrant_payload.schema.json`.
182
+
183
+ Required fields: `document_id`, `court`, `court_name`, `title`, `date`, `url`, `text`, `source_type`.
184
+
185
+ ---
186
+
187
+ ## Stack
188
+
189
+ | Component | Technology |
190
+ |---|---|
191
+ | Vector database | Qdrant |
192
+ | Embeddings | nomic-embed-text-v1.5 / Qwen3-Embedding-0.6B via sentence-transformers |
193
+ | LLM inference | llama.cpp (OpenAI-compatible) |
194
+ | API | FastAPI + SSE streaming |
195
+ | Cache | Redis (web verify results) |
196
+ | Queue | Semaphore-based, per-IP fairness |
197
+
198
+ ---
199
+
200
+ ## Milestones
201
+
202
+ - [x] Milestone 0 - core interface design, runtime modules, `nz_tenancy` jurisdiction
203
+ - [x] Milestone 1 - `nsw_tenancy` skeleton + `nz_legal` + `nz_employment` prove interface generalises
204
+ - [x] Milestone 2 - smoke test runner wired to pytest (Tier 1/2/3), Docker Compose
205
+ - [x] Milestone 3 - CONTRIBUTING.md, packaging, NSW NCAT scraper + corpus (225+ decisions)
206
+ - [x] Milestone 4 - `nz_legal` migration: tracker endpoints, contrasting cases, `register_routes` hook
207
+ - [x] Milestone 5 - federated per-Act legislation retrieval, Healthy Homes Standards 2019 corpus, cross-encoder reranker (Phase 1 log-only), Qdrant payload indexes for fast filtered search
208
+
209
+ ---
210
+
211
+ ## Related project
212
+
213
+ The NZ tenancy tool running on this framework: https://tenancy.localrun.ai
214
+
215
+ Source: https://github.com/jwongso/nz-legal-rag
216
+
217
+ ---
218
+
219
+ MIT License. Not legal advice.
@@ -0,0 +1,49 @@
1
+ astraea_framework-0.1.0.dist-info/licenses/LICENSE,sha256=buYdy5JwGudTRS33aMwE2HvaDg707r0ZEwxznPTeyB8,1076
2
+ core/__init__.py,sha256=c9lekMTB52ytmIGeBRtbyLuMFh5mE5TdZ3ZRzu7BxHM,45
3
+ core/api.py,sha256=YYlSaFkc48aieYE0Q0puCxUKK9ORczEsXcsM5yxV_lA,40360
4
+ core/browser.py,sha256=_2HTBJs3ebgK9XiW0z4b7iRoq7XzpwqymNweSMM_II0,3765
5
+ core/embedder.py,sha256=8eNT0uPmo_58IFrUCqCfsrfABQyKnIkroO8eXF8QPyM,3246
6
+ core/feedback.py,sha256=LNSbO4c1XdZQvPOlNSFDIuHn2ooHb_k6n1n8x0CYm8Q,2830
7
+ core/generator.py,sha256=labRa4c0_aX7PvwoABwcb7jGdLms_vsOlYwYxFS-u_Q,2705
8
+ core/jurisdiction.py,sha256=nZDT511F9XQXTj1HYmFAVAKoNHGF2W3tRN2_8sk-t0c,8600
9
+ core/legislation.py,sha256=I65anLF3FDdU_iZkda0Riq8gHEihKd0IVclQCecrOwk,4330
10
+ core/mcp.py,sha256=Gp8D0FsncgRV9VgB0hJIGdfhKIXWmH8Wt2YAzEuyPN0,7654
11
+ core/pipeline.py,sha256=5LX-KfJ0MvgeHajB0Qqoe53ZqvfxdqUAlczC3xEdS40,3315
12
+ core/queue.py,sha256=PfGpiuBfDpqFfwrr7grDgj-TahxZqkOa6iaeHWYZcDY,5574
13
+ core/reranker.py,sha256=ZISSqRJ7CU0CjUhyRMHAWZtLxHKVG8vEJGOV3Rx_o2U,2979
14
+ core/retriever.py,sha256=jNC3QOweBrfUxbD6YRopB3Q77rlD-Pcdw4C9v5Mo5bc,6483
15
+ core/routing.py,sha256=7Q_Tbcx6aK0QnnC0n8fZZryzgnIgvW-0ENxWnlKG5hk,2825
16
+ core/sanitize.py,sha256=ECD5t8x01ODaBz7x1GlHQ5AI1t6tWCu8UxR_2U4JOzc,3194
17
+ core/security.py,sha256=CyGWGaVweG3ZfLT6OOxS3P9a-8auSAC0nb1ts4oNBb0,1493
18
+ core/service.py,sha256=5FlkZvK_SoXauomwPC3XiqwKPwqA4Q5jbexJ8aHJ_1A,6351
19
+ examples/minimal_jurisdiction/__init__.py,sha256=wTjJOApiNu8cyG5gFH1OTm7yNKZ6iuuu7qMITQukO4Y,96
20
+ examples/minimal_jurisdiction/jurisdiction.py,sha256=qDoc6tRWPAKCqgFdNpTjL-oiVQ7uznDSoLVFzGy0WL0,925
21
+ ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ ingest/base.py,sha256=unlwS8rDDNL2NXg7dAUM6TFN7MqBafEuD-D3b1X1vBs,2615
23
+ ingest/run_nsw_tenancy.py,sha256=VoDiXzZOWU6PtSAejgf18dC2M7ZkWkdX3J2IcGztCuo,3787
24
+ jurisdictions/__init__.py,sha256=c9lekMTB52ytmIGeBRtbyLuMFh5mE5TdZ3ZRzu7BxHM,45
25
+ jurisdictions/nsw_tenancy/__init__.py,sha256=0QgKqa5OifEl056__N0N8Rgn69DKHpHrJ3oSRL9gdI4,115
26
+ jurisdictions/nsw_tenancy/jurisdiction.py,sha256=rvlS70fTetsg5-YNXjA172Z2j4boJ5c3a8VRSwB8fos,3679
27
+ jurisdictions/nsw_tenancy/prompt.py,sha256=1eFK5IN86oS53QmbxXBC5aFfSqEX9rpTzkBROzESmok,1433
28
+ jurisdictions/nsw_tenancy/routes.py,sha256=BQY0DSuhjavcfdUELyj7NB4H8XjPnFekHaez0kiUAFM,4428
29
+ jurisdictions/nsw_tenancy/scraper.py,sha256=E8UBAXIZqU2yhduhl0BeO2SWN2cxSgfymR0bSlJ_lYA,6433
30
+ jurisdictions/nz_employment/__init__.py,sha256=0gwVh9bg8HtxNhroABHnA752jDU8rKhh8SjCWkGLzLk,121
31
+ jurisdictions/nz_employment/jurisdiction.py,sha256=wlNyr0Hk09nZxZ9zwy20j3Lrx02FgQY6Wax3FL6byRg,4201
32
+ jurisdictions/nz_employment/prompt.py,sha256=GJlhwRKZAjKlgybqBmhVCDbgCq2vlTsnaXIIHZbamas,1804
33
+ jurisdictions/nz_employment/routes.py,sha256=UQOZPtJrqqldT_G6CNC8DtdVVV-iHWb1pf21HW9i4sQ,4800
34
+ jurisdictions/nz_legal/__init__.py,sha256=FrljWpT41T82ozAkGsz-k39Opn8j5d6f-7oFTI9FP8w,106
35
+ jurisdictions/nz_legal/app.py,sha256=HjRbjlTawB9izFz7DZ0nCgI6QMfxyxoT94ZXSpPHIiE,141
36
+ jurisdictions/nz_legal/contrasting.py,sha256=DXUh3OiEqHhZ6nOBqXp_G0aiJk2nN-botd2xO1JyQGA,7563
37
+ jurisdictions/nz_legal/jurisdiction.py,sha256=78O2R1bIt8Ra6LzP08Ypbk1hpS5OOnEf0LC5TgezHb8,3942
38
+ jurisdictions/nz_legal/routes.py,sha256=Ae-10G_lhliqrh_MTzFSSNFBPmjhMUa-l_h0jIGn6O8,10787
39
+ jurisdictions/nz_legal/scroll.py,sha256=4I3RW22oS9RerIzH2wzNtnIaY-qQxIS3BVvRrIxp3xk,10845
40
+ jurisdictions/nz_tenancy/__init__.py,sha256=x8Ta9elz_-A119SMp_-YWHewKDjWUbW1Z5kUqdiFa9M,112
41
+ jurisdictions/nz_tenancy/jurisdiction.py,sha256=AEzUJv5HcxMknFQc0Wzv6YWfTFRP-8VgDFe1aWI7lkk,6760
42
+ jurisdictions/nz_tenancy/mcp_server.py,sha256=USfRLsvTnMk93-lBrhqOrGa32PZHTrYai1xOVOnwvW0,935
43
+ jurisdictions/nz_tenancy/prompt.py,sha256=38PDP6Fqh4dTWQGNLuNR5dn9bYyQx-aru4lqOz1H540,7510
44
+ jurisdictions/nz_tenancy/routes.py,sha256=uZb0N5hAx4WKN_IQXT5r97DpT4rAa-29_XMnxqth0rE,9832
45
+ jurisdictions/nz_tenancy/scraper.py,sha256=F17afOMJFtPxfNR8FPw9W7b4BZ5cHJxsJvXUonqv89Y,247
46
+ astraea_framework-0.1.0.dist-info/METADATA,sha256=cs7Z6Rq3yqIEaTeGM3l666q90Upf60GJwGWTlDozB-0,8757
47
+ astraea_framework-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
48
+ astraea_framework-0.1.0.dist-info/top_level.txt,sha256=AZGdbLZ14EMCesgcPBSe9qPAm93SiRYk2WZa93CY1wQ,35
49
+ astraea_framework-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 jsaputra@riseup.net
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ core
2
+ examples
3
+ ingest
4
+ jurisdictions
core/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # TODO: Milestone 0 - port from nz-legal-rag