commoner-probe 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commoner_probe-0.4.0/CHANGELOG.md +118 -0
- commoner_probe-0.4.0/LICENSE +21 -0
- commoner_probe-0.4.0/MANIFEST.in +13 -0
- commoner_probe-0.4.0/PKG-INFO +531 -0
- commoner_probe-0.4.0/README.md +493 -0
- commoner_probe-0.4.0/commoner_probe/__init__.py +62 -0
- commoner_probe-0.4.0/commoner_probe/__main__.py +5 -0
- commoner_probe-0.4.0/commoner_probe/answers.py +598 -0
- commoner_probe-0.4.0/commoner_probe/atr_linkage.py +275 -0
- commoner_probe-0.4.0/commoner_probe/base.py +169 -0
- commoner_probe-0.4.0/commoner_probe/cli.py +466 -0
- commoner_probe-0.4.0/commoner_probe/committees.py +603 -0
- commoner_probe-0.4.0/commoner_probe/corpus.py +312 -0
- commoner_probe-0.4.0/commoner_probe/csr/__init__.py +6 -0
- commoner_probe-0.4.0/commoner_probe/csr/mca.py +178 -0
- commoner_probe-0.4.0/commoner_probe/dmft/__init__.py +3 -0
- commoner_probe-0.4.0/commoner_probe/dmft/mines.py +238 -0
- commoner_probe-0.4.0/commoner_probe/entities.py +440 -0
- commoner_probe-0.4.0/commoner_probe/evidence.py +250 -0
- commoner_probe-0.4.0/commoner_probe/example_topics/__init__.py +23 -0
- commoner_probe-0.4.0/commoner_probe/example_topics/affirmative_action.json +82 -0
- commoner_probe-0.4.0/commoner_probe/example_topics/home_affairs_starred.json +31 -0
- commoner_probe-0.4.0/commoner_probe/example_topics/libraries.json +66 -0
- commoner_probe-0.4.0/commoner_probe/example_topics/mines_dmft_pmkkky.json +26 -0
- commoner_probe-0.4.0/commoner_probe/example_topics/narcotics_substance.json +44 -0
- commoner_probe-0.4.0/commoner_probe/http_client.py +206 -0
- commoner_probe-0.4.0/commoner_probe/members.py +127 -0
- commoner_probe-0.4.0/commoner_probe/neva.py +663 -0
- commoner_probe-0.4.0/commoner_probe/records.py +350 -0
- commoner_probe-0.4.0/commoner_probe/resolver.py +169 -0
- commoner_probe-0.4.0/commoner_probe/runlog.py +189 -0
- commoner_probe-0.4.0/commoner_probe/sansad.py +469 -0
- commoner_probe-0.4.0/commoner_probe/schemas/__init__.py +60 -0
- commoner_probe-0.4.0/commoner_probe/schemas/answers_atr_response.schema.json +22 -0
- commoner_probe-0.4.0/commoner_probe/schemas/answers_dfg_recommendation.schema.json +21 -0
- commoner_probe-0.4.0/commoner_probe/schemas/answers_qa_response.schema.json +27 -0
- commoner_probe-0.4.0/commoner_probe/schemas/atr_linkage.schema.json +19 -0
- commoner_probe-0.4.0/commoner_probe/schemas/committee_members.schema.json +43 -0
- commoner_probe-0.4.0/commoner_probe/schemas/entities_bureaucratic_posting.schema.json +19 -0
- commoner_probe-0.4.0/commoner_probe/schemas/entities_committee_membership.schema.json +18 -0
- commoner_probe-0.4.0/commoner_probe/schemas/entities_ministerial_appointment.schema.json +17 -0
- commoner_probe-0.4.0/commoner_probe/schemas/entities_mp_membership.schema.json +20 -0
- commoner_probe-0.4.0/commoner_probe/schemas/entities_person.schema.json +16 -0
- commoner_probe-0.4.0/commoner_probe/schemas/manifest_committee_report.schema.json +43 -0
- commoner_probe-0.4.0/commoner_probe/schemas/manifest_mca_csr.schema.json +72 -0
- commoner_probe-0.4.0/commoner_probe/schemas/manifest_mines_dmft.schema.json +94 -0
- commoner_probe-0.4.0/commoner_probe/schemas/manifest_qa.schema.json +58 -0
- commoner_probe-0.4.0/commoner_probe/schemas/runs.schema.json +39 -0
- commoner_probe-0.4.0/commoner_probe/schemas/state_assembly_member.schema.json +64 -0
- commoner_probe-0.4.0/commoner_probe/schemas/state_assembly_paper_laid.schema.json +24 -0
- commoner_probe-0.4.0/commoner_probe/schemas/state_assembly_question.schema.json +84 -0
- commoner_probe-0.4.0/commoner_probe/schemas/state_assembly_question_unlisted.schema.json +84 -0
- commoner_probe-0.4.0/commoner_probe/stats.py +235 -0
- commoner_probe-0.4.0/commoner_probe/textparse.py +79 -0
- commoner_probe-0.4.0/commoner_probe/topics.py +44 -0
- commoner_probe-0.4.0/commoner_probe/url_safety.py +82 -0
- commoner_probe-0.4.0/commoner_probe/validate.py +213 -0
- commoner_probe-0.4.0/commoner_probe.egg-info/PKG-INFO +531 -0
- commoner_probe-0.4.0/commoner_probe.egg-info/SOURCES.txt +93 -0
- commoner_probe-0.4.0/commoner_probe.egg-info/dependency_links.txt +1 -0
- commoner_probe-0.4.0/commoner_probe.egg-info/entry_points.txt +2 -0
- commoner_probe-0.4.0/commoner_probe.egg-info/requires.txt +18 -0
- commoner_probe-0.4.0/commoner_probe.egg-info/top_level.txt +1 -0
- commoner_probe-0.4.0/docs/ENDPOINTS.md +132 -0
- commoner_probe-0.4.0/docs/INTEGRATION_SMOKE.md +97 -0
- commoner_probe-0.4.0/docs/RATIONALE.md +154 -0
- commoner_probe-0.4.0/docs/SCHEMAS.md +668 -0
- commoner_probe-0.4.0/pyproject.toml +78 -0
- commoner_probe-0.4.0/setup.cfg +4 -0
- commoner_probe-0.4.0/tests/test_adapters.py +121 -0
- commoner_probe-0.4.0/tests/test_answers.py +348 -0
- commoner_probe-0.4.0/tests/test_atr_linkage.py +225 -0
- commoner_probe-0.4.0/tests/test_check_leaks.py +204 -0
- commoner_probe-0.4.0/tests/test_committees.py +348 -0
- commoner_probe-0.4.0/tests/test_corpus_loader.py +264 -0
- commoner_probe-0.4.0/tests/test_csr_mca.py +165 -0
- commoner_probe-0.4.0/tests/test_dmft_mines.py +193 -0
- commoner_probe-0.4.0/tests/test_docs_sync.py +71 -0
- commoner_probe-0.4.0/tests/test_entities.py +216 -0
- commoner_probe-0.4.0/tests/test_evidence_dmft.py +153 -0
- commoner_probe-0.4.0/tests/test_init_topic_cli.py +61 -0
- commoner_probe-0.4.0/tests/test_qa_structured_parse.py +212 -0
- commoner_probe-0.4.0/tests/test_report_type.py +221 -0
- commoner_probe-0.4.0/tests/test_resolve_askers.py +116 -0
- commoner_probe-0.4.0/tests/test_resolver.py +150 -0
- commoner_probe-0.4.0/tests/test_runlog.py +246 -0
- commoner_probe-0.4.0/tests/test_schemas.py +580 -0
- commoner_probe-0.4.0/tests/test_security_hardening.py +110 -0
- commoner_probe-0.4.0/tests/test_smoke_fixture.py +129 -0
- commoner_probe-0.4.0/tests/test_state_assembly.py +155 -0
- commoner_probe-0.4.0/tests/test_textparse.py +53 -0
- commoner_probe-0.4.0/tests/test_topics.py +62 -0
- commoner_probe-0.4.0/tests/test_url_encoding.py +52 -0
- commoner_probe-0.4.0/tests/test_url_safety.py +85 -0
- commoner_probe-0.4.0/tests/test_validate_cli.py +111 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.4.0 (2026-06-22)
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- **`commoner-probe mca-csr`** — download MCA CDM CSR company-spend CSV exports by financial year.
|
|
8
|
+
- **`manifest_mca_csr` schema** and `ManifestMcaCsrRecord` / `Corpus.manifest_mca_csr()` for typed access to MCA CSR manifest records.
|
|
9
|
+
- **`commoner-probe mines-dmft`** — acquire Ministry of Mines / Odisha DMFT public disclosure files with source provenance.
|
|
10
|
+
- **`commoner-probe evidence dmft`** — build side-by-side DMFT evidence bundles from executive disclosure and Sansad oversight records.
|
|
11
|
+
- **`docs/ENDPOINTS.md`** — public source-family endpoint reference.
|
|
12
|
+
- **`narcotics_substance` built-in topic** for NDPS, trafficking, and substance-abuse oversight records.
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
|
|
16
|
+
- **Relicensed**: AGPL-3.0-or-later → MIT, so `commoner-probe` can be the permissive shared acquisition floor that downstream repos (including the non-AGPL `sansad-semantic-crawler`) depend on without copyleft friction.
|
|
17
|
+
- `commoner_probe.csr.mca` now uses the verified MCA CDM live contract: `GET /csr-data` for the CSRF-bearing form and `POST /cdm/export.php` for CSV export.
|
|
18
|
+
- Public packaging now includes only release-facing docs; local coordination files (`notes/`, `memory/`, `.ai/`, `.beads/`, `.codex/`, `WORKING.md`, `TODO.md`) are ignored and removed from the tracked public tree.
|
|
19
|
+
- `scripts/check_leaks.py` now blocks private coordination paths if they are accidentally staged.
|
|
20
|
+
|
|
21
|
+
## 0.3.0 (2026-06-06)
|
|
22
|
+
|
|
23
|
+
### Breaking changes
|
|
24
|
+
|
|
25
|
+
- **Package renamed**: `sansad-crawler` → `commoner-probe`. Update your `pip install` and imports.
|
|
26
|
+
- Python: `from sansad_crawler import ...` → `from commoner_probe import ...`
|
|
27
|
+
- CLI: `sansad-crawl` → `commoner-probe`
|
|
28
|
+
- Subcommands renamed: `crawl` → `sansad`, `crawl-committees` → `committees`, `extract-atr-linkage` → `atr-linkage`
|
|
29
|
+
- **New subcommand added**: `state-assembly` (NeVA state assembly portals)
|
|
30
|
+
- **Schema field renamed**: `crawled_at` → `probed_at` in all output records
|
|
31
|
+
- **Relicensed**: MIT → AGPL-3.0-or-later
|
|
32
|
+
|
|
33
|
+
### Added
|
|
34
|
+
|
|
35
|
+
- **`commoner-probe state-assembly`** — probe NeVA state assembly portals (`{portal}.neva.gov.in`). Writes `questions.jsonl`, `questions_unlisted.jsonl`, `members.jsonl`, `papers_laid.jsonl`. Tested on Gujarat assembly 15.
|
|
36
|
+
- **HTTP hardening** (`commoner_probe/http_client.py`): SSRF guard, robots.txt checking, per-domain rate limiting (1 req/s), exponential backoff (3 retries), optional `requests_cache` (6h TTL). Install via `pip install commoner-probe[cache]`.
|
|
37
|
+
- **Committee composition** (`CommitteeProbe.probe_composition()`): writes `committee_members.jsonl`.
|
|
38
|
+
- **`filter_fn` hook on `TopicProfile`**: callable injected by analytics layer at runtime.
|
|
39
|
+
- **`classifier_config` in `TopicProfile`**: propagated to `_runs.jsonl` for corpus auditability.
|
|
40
|
+
- **JSON schemas for new outputs**: `committee_members`, `state_assembly_question`, `state_assembly_question_unlisted`, `state_assembly_member`, `state_assembly_paper_laid`.
|
|
41
|
+
- **`commoner-probe init-topic`**: write a bundled example topic profile to disk (built-ins: `libraries`, `home_affairs_starred`, `affirmative_action`).
|
|
42
|
+
- **Single-sourced version**: `__version__` reads from `importlib.metadata` with pyproject fallback.
|
|
43
|
+
- **GitHub Actions**: CI (matrix 3.10–3.12, ruff, pytest) and OIDC PyPI release workflow.
|
|
44
|
+
- **`MANIFEST.in`**, **`CONTRIBUTING.md`**, **`CODE_OF_CONDUCT.md`** (Contributor Covenant v2.1).
|
|
45
|
+
|
|
46
|
+
### Changed
|
|
47
|
+
|
|
48
|
+
- Base class `BaseCrawler` → `BaseProbe`; `crawl_ls`/`crawl_rs` → `probe_ls`/`probe_rs`; `crawl_composition` → `probe_composition`.
|
|
49
|
+
- User-Agent: `commoner-probe/0.3.0`.
|
|
50
|
+
- HTTP cache env var: `COMMONER_CACHE_DIR` (was `SANSAD_CACHE_DIR`; old name still honoured with deprecation warning).
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## 0.2.0 (2026-05-21)
|
|
55
|
+
|
|
56
|
+
### Added
|
|
57
|
+
|
|
58
|
+
- **`docs/SCHEMAS.md`** — complete field-level reference for every output
|
|
59
|
+
stream: all four manifest record shapes (LS Q/A, RS Q/A, LS committee,
|
|
60
|
+
RS committee), `_runs.jsonl`, three `answers.jsonl` kinds,
|
|
61
|
+
`atr_linkage.jsonl`, and five `entities/*.jsonl` files. Includes
|
|
62
|
+
controlled vocabularies and join-key documentation.
|
|
63
|
+
|
|
64
|
+
- **JSON Schemas** — twelve Draft-2020-12 schemas shipped as package data
|
|
65
|
+
under `sansad_crawler/schemas/`. Exposed via
|
|
66
|
+
`sansad_crawler.schemas.load(name)` and `schemas.list_all()`.
|
|
67
|
+
|
|
68
|
+
- **`sansad_crawler/records.py`** — typed dataclasses for every record kind
|
|
69
|
+
(`ManifestQaRecord`, `ManifestCommitteeReportRecord`, `AnswerQaResponse`,
|
|
70
|
+
`AnswerAtrResponse`, `AnswerDfgRecommendation`, `AtrLinkageRecord`,
|
|
71
|
+
`RunRecord`). Each has `from_dict()` that tolerates unknown keys and
|
|
72
|
+
missing optional fields.
|
|
73
|
+
|
|
74
|
+
- **`sansad_crawler/corpus.py`** — `Corpus` class with streaming iterators
|
|
75
|
+
(`manifest_qa`, `manifest_committee_reports`, `answers_qa`, `answers_atr`,
|
|
76
|
+
`answers_dfg`, `atr_linkages`, `runs`, `entities`), join helpers
|
|
77
|
+
(`join_qa`, `join_atr_chain`), and an opt-in `to_dataframe(stream)` that
|
|
78
|
+
requires `pip install sansad-crawler[pandas]`.
|
|
79
|
+
|
|
80
|
+
- **`sansad-crawl stats`** — new CLI subcommand that prints corpus health:
|
|
81
|
+
record counts by house/year/ministry/committee/report_type, answers
|
|
82
|
+
extraction coverage, entity resolution rate, and date ranges. Use
|
|
83
|
+
`--json` for machine-readable output.
|
|
84
|
+
|
|
85
|
+
- **`sansad-crawl validate`** — new CLI subcommand that validates every
|
|
86
|
+
JSONL file in a corpus against its JSON Schema. Requires
|
|
87
|
+
`pip install sansad-crawler[dev]`. Prints line numbers and JSON pointers
|
|
88
|
+
on failure; exits 1 on any error.
|
|
89
|
+
|
|
90
|
+
- **`[dev]` optional-dependency group** — `jsonschema>=4.20` and
|
|
91
|
+
`pytest>=7`. Install with `pip install sansad-crawler[dev]`.
|
|
92
|
+
|
|
93
|
+
- **`[pandas]` optional-dependency group** — `pandas>=2.0`. Install with
|
|
94
|
+
`pip install sansad-crawler[pandas]`.
|
|
95
|
+
|
|
96
|
+
- **`examples/usage.py`** — demonstration script for the `Corpus` API.
|
|
97
|
+
|
|
98
|
+
### Changed (non-breaking)
|
|
99
|
+
|
|
100
|
+
- `sansad_crawler.__init__` now re-exports `Corpus`, `QaPair`, `AtrChain`,
|
|
101
|
+
all record dataclasses, and the `schemas` module.
|
|
102
|
+
- `run_id` and `crawled_at` in manifest schemas changed from `required` to
|
|
103
|
+
optional (always present in freshly crawled corpora; may be absent in
|
|
104
|
+
synthetic or backfilled data).
|
|
105
|
+
|
|
106
|
+
### Unchanged
|
|
107
|
+
|
|
108
|
+
Crawler behaviour, extractor logic, and manifest field set are unchanged.
|
|
109
|
+
All corpora produced by v0.1.0 load and validate cleanly under v0.2.0.
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## 0.1.0 (2026-05-21)
|
|
114
|
+
|
|
115
|
+
Initial release. Lok Sabha + Rajya Sabha Q/A crawler, standing-committee
|
|
116
|
+
report crawler, regex-based Q/A and ATR extractors, ATR linkage extractor,
|
|
117
|
+
entity resolution, four CLI subcommands (`crawl`, `crawl-committees`,
|
|
118
|
+
`extract-answers`, `extract-atr-linkage`).
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Commoner LLP
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
include LICENSE
|
|
2
|
+
include README.md
|
|
3
|
+
include CHANGELOG.md
|
|
4
|
+
include docs/SCHEMAS.md
|
|
5
|
+
include docs/INTEGRATION_SMOKE.md
|
|
6
|
+
include docs/RATIONALE.md
|
|
7
|
+
include docs/ENDPOINTS.md
|
|
8
|
+
prune .beads
|
|
9
|
+
prune .ai
|
|
10
|
+
prune .claude
|
|
11
|
+
prune .codex
|
|
12
|
+
prune notes
|
|
13
|
+
prune memory
|
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: commoner-probe
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Sousveillance infrastructure for state mandatory-disclosure portals — parliamentary questions, committee reports, budget data, and state assembly records.
|
|
5
|
+
Author: Sreeram Ramasubramanian
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/CommonerLLP/commoner-probe
|
|
8
|
+
Project-URL: Source, https://github.com/CommonerLLP/commoner-probe
|
|
9
|
+
Project-URL: Issues, https://github.com/CommonerLLP/commoner-probe/issues
|
|
10
|
+
Project-URL: Documentation, https://github.com/CommonerLLP/commoner-probe/blob/master/README.md
|
|
11
|
+
Project-URL: Changelog, https://github.com/CommonerLLP/commoner-probe/blob/master/CHANGELOG.md
|
|
12
|
+
Keywords: parliament-of-india,lok-sabha,rajya-sabha,parliamentary-questions,standing-committees,state-assembly,neva,public-records,civic-tech,sousveillance,counter-forensics,right-to-information,open-government,data-justice
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Topic :: Sociology
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Provides-Extra: pdf
|
|
25
|
+
Requires-Dist: pdfminer.six>=20231228; extra == "pdf"
|
|
26
|
+
Provides-Extra: http
|
|
27
|
+
Requires-Dist: requests>=2.31.0; extra == "http"
|
|
28
|
+
Provides-Extra: pandas
|
|
29
|
+
Requires-Dist: pandas>=2.0; extra == "pandas"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: jsonschema>=4.20; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
33
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
34
|
+
Provides-Extra: all
|
|
35
|
+
Requires-Dist: pdfminer.six>=20231228; extra == "all"
|
|
36
|
+
Requires-Dist: requests>=2.31.0; extra == "all"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# commoner-probe
|
|
40
|
+
|
|
41
|
+
Sousveillance infrastructure for the state's mandatory disclosure systems.
|
|
42
|
+
|
|
43
|
+
A commoner probes the state's own paperwork — parliamentary questions, committee
|
|
44
|
+
reports, state assembly records — and turns it into evidence. `commoner-probe`
|
|
45
|
+
automates the acquisition so you can focus on the analysis.
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install "commoner-probe[all]"
|
|
49
|
+
import commoner_probe as probe # alias used throughout CommonerLLP toolchain
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Why this exists
|
|
55
|
+
|
|
56
|
+
Parliamentary questions, committee reports, state assembly records, CSR
|
|
57
|
+
exports, and public mining-district disclosures are mandatory or official
|
|
58
|
+
public disclosures. The data exists. The problem
|
|
59
|
+
is that it lives across undocumented portals with inconsistent APIs, no bulk
|
|
60
|
+
export, and PDFs that require extraction to read programmatically.
|
|
61
|
+
|
|
62
|
+
`commoner-probe` handles the entire acquisition pipeline:
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
public disclosure portals → manifest.jsonl → files/PDFs → extracted records → your analysis
|
|
66
|
+
(metadata) (raw source) (structured text)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Classification, topic modelling, and dossier generation are intentionally out
|
|
70
|
+
of scope. This library does one thing: acquire public disclosure data into
|
|
71
|
+
provenance-rich, schema-validated JSONL and source files.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Install
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install "commoner-probe[all]" # requests + PDF extraction
|
|
79
|
+
pip install "commoner-probe[all,dev]" # + schema validation and tests
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Five-minute quickstart
|
|
85
|
+
|
|
86
|
+
### Step 1 — Write a topic profile
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"name": "climate",
|
|
91
|
+
"description": "Climate change and environmental policy",
|
|
92
|
+
"search_groups": {
|
|
93
|
+
"climate": ["climate change", "global warming", "net zero"],
|
|
94
|
+
"air_quality": ["air pollution", "AQI", "particulate matter"]
|
|
95
|
+
},
|
|
96
|
+
"lok_sabha_ministries": ["ENVIRONMENT", "POWER", "PETROLEUM"],
|
|
97
|
+
"rajya_sabha_ministry_likes": ["ENVIRONMENT", "POWER", "PETROLEUM"]
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Step 2 — Probe parliamentary questions
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
commoner-probe sansad \
|
|
105
|
+
--topic topic.json \
|
|
106
|
+
--out data/climate \
|
|
107
|
+
--house both \
|
|
108
|
+
--from-date 2019-01-01
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Writes `data/climate/manifest.jsonl` — one record per question from both houses.
|
|
112
|
+
|
|
113
|
+
### Step 3 — Probe committee reports
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
commoner-probe committees \
|
|
117
|
+
--topic topic.json \
|
|
118
|
+
--out data/climate-committees \
|
|
119
|
+
--house both
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
One record per standing committee report (LS and RS DRSCs).
|
|
123
|
+
|
|
124
|
+
### Step 4 — Extract text from PDFs
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
commoner-probe extract-answers --out data/climate
|
|
128
|
+
commoner-probe extract-answers --out data/climate-committees
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Parses downloaded PDFs into `answers.jsonl`: Q/A pairs, committee
|
|
132
|
+
recommendations, and government responses.
|
|
133
|
+
|
|
134
|
+
### Step 5 — Load in Python
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
import commoner_probe as probe
|
|
138
|
+
|
|
139
|
+
c = probe.Corpus("data/climate")
|
|
140
|
+
|
|
141
|
+
for r in c.manifest_qa():
|
|
142
|
+
print(r.date, r.house, r.ministry, r.title)
|
|
143
|
+
|
|
144
|
+
for pair in c.join_qa():
|
|
145
|
+
if pair.answers:
|
|
146
|
+
print(pair.manifest.title)
|
|
147
|
+
print(pair.answers[0].question_text[:200])
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## What you can study
|
|
153
|
+
|
|
154
|
+
### Parliamentary questions (Lok Sabha + Rajya Sabha)
|
|
155
|
+
|
|
156
|
+
Each record carries who asked (MP name, party, state), which ministry answered,
|
|
157
|
+
question number, type (starred / unstarred), date, session, and the full PDF.
|
|
158
|
+
After `extract-answers` — extracted question and answer text.
|
|
159
|
+
|
|
160
|
+
**Typical research questions**: ministry responsiveness rates, which MPs ask
|
|
161
|
+
the most questions by topic, how the same policy question evolves across
|
|
162
|
+
sessions, party-level questioning patterns.
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
import commoner_probe as probe
|
|
166
|
+
from collections import Counter
|
|
167
|
+
|
|
168
|
+
c = probe.Corpus("data/climate")
|
|
169
|
+
ministry_counts = Counter(r.ministry for r in c.manifest_qa())
|
|
170
|
+
for ministry, n in ministry_counts.most_common(10):
|
|
171
|
+
print(f"{ministry}: {n}")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Standing committee reports (LS + RS DRSCs)
|
|
175
|
+
|
|
176
|
+
Committee reports come in four shapes:
|
|
177
|
+
|
|
178
|
+
| `report_type` | What it is |
|
|
179
|
+
|---|---|
|
|
180
|
+
| `demands_for_grants` | Annual budget scrutiny — the committee dissects ministry spending |
|
|
181
|
+
| `bill` | The committee's examination of a pending bill before it passes |
|
|
182
|
+
| `subject` | Own-initiative policy investigation — deepest substantive record |
|
|
183
|
+
| `action_taken` | The government's formal response to the committee's recommendations |
|
|
184
|
+
|
|
185
|
+
Action Taken Reports (ATRs) are the government's formal written responses to
|
|
186
|
+
committee recommendations. The `atr-linkage` command connects each ATR back
|
|
187
|
+
to the original report, enabling lifecycle analysis:
|
|
188
|
+
*recommendation → government rejection/acceptance → follow-up*.
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
import commoner_probe as probe
|
|
192
|
+
|
|
193
|
+
c = probe.Corpus("data/climate-committees")
|
|
194
|
+
|
|
195
|
+
for chain in c.join_atr_chain():
|
|
196
|
+
print(f"Report: {chain.original and chain.original.title}")
|
|
197
|
+
print(f" Recommendations: {len(chain.original_observations)}")
|
|
198
|
+
print(f" Government responses: {len(chain.atr_answers)}")
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### State assembly records (NeVA portals)
|
|
202
|
+
|
|
203
|
+
From 2020, sub-national governments have been adopting NIC's NeVA (National
|
|
204
|
+
e-Vidhan Application) infrastructure under a centrally sponsored scheme run
|
|
205
|
+
by the Ministry of Parliamentary Affairs. Most state assemblies are onboarding,
|
|
206
|
+
though coverage varies. The `state-assembly` command probes any NeVA portal:
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
commoner-probe state-assembly \
|
|
210
|
+
--portal gujarat \
|
|
211
|
+
--state GJ \
|
|
212
|
+
--out data/gujarat-assembly \
|
|
213
|
+
--assemblies 15
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### MCA CSR company-spend exports
|
|
217
|
+
|
|
218
|
+
The Ministry of Corporate Affairs CDM CSR data page exposes downloadable CSV
|
|
219
|
+
exports by financial year. These records compare reporting/spending companies
|
|
220
|
+
and project-sector amounts. They do not identify CSR consultants or implementing
|
|
221
|
+
agencies unless MCA publishes that in the source export.
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
commoner-probe mca-csr \
|
|
225
|
+
--out data/mca-csr \
|
|
226
|
+
--years 2022-23,2021-22
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
import commoner_probe as probe
|
|
231
|
+
|
|
232
|
+
c = probe.Corpus("data/mca-csr")
|
|
233
|
+
for r in c.manifest_mca_csr():
|
|
234
|
+
print(r.financial_year, r.status, r.filename)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### Mines DMFT / PMKKKY disclosures
|
|
238
|
+
|
|
239
|
+
`mines-dmft` acquires raw Ministry of Mines and Odisha DMFT public disclosure
|
|
240
|
+
files. Ministry CSVs are current cumulative snapshots timestamped by the
|
|
241
|
+
source; treat them as snapshots, not fiscal-year series.
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
commoner-probe mines-dmft \
|
|
245
|
+
--out data/mines-dmft \
|
|
246
|
+
--sources mines-gov-in,odisha
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Pair the executive disclosure snapshots with Sansad oversight records without
|
|
250
|
+
flattening the source families:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
commoner-probe evidence dmft \
|
|
254
|
+
--mines-dmft-dir data/mines-dmft \
|
|
255
|
+
--sansad-dir data/sansad/mines-dmft-pmkkky \
|
|
256
|
+
--out data/evidence/dmft.json
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## All commands
|
|
262
|
+
|
|
263
|
+
### `commoner-probe sansad` — parliamentary questions
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
commoner-probe sansad \
|
|
267
|
+
--topic topic.json \
|
|
268
|
+
--out data/climate \
|
|
269
|
+
--house both \
|
|
270
|
+
--from-date 2019-01-01 \
|
|
271
|
+
--to-date 2026-01-01
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
| Flag | Default | What it does |
|
|
275
|
+
|---|---|---|
|
|
276
|
+
| `--topic` | required | Path to topic profile JSON |
|
|
277
|
+
| `--out` | required | Output corpus directory |
|
|
278
|
+
| `--house` | `both` | `ls`, `rs`, or `both` |
|
|
279
|
+
| `--from-date` | — | Earliest question date (YYYY-MM-DD) |
|
|
280
|
+
| `--to-date` | — | Latest question date |
|
|
281
|
+
| `--qtype` | `both` | `starred`, `unstarred`, or `both` |
|
|
282
|
+
| `--sessions` | `1-267` | Rajya Sabha session range |
|
|
283
|
+
| `--no-download` | off | Skip PDF downloads; metadata only |
|
|
284
|
+
| `--with-entities` | off | Resolve asker names to stable entity IDs |
|
|
285
|
+
| `--max-records N` | — | Stop after N new records per house (smoke-test) |
|
|
286
|
+
| `--max-buckets N` | — | Only run the first N search/ministry combos |
|
|
287
|
+
| `--reset` | off | Wipe existing manifest and start fresh |
|
|
288
|
+
|
|
289
|
+
### `commoner-probe committees` — standing committee reports
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
commoner-probe committees \
|
|
293
|
+
--topic topic.json \
|
|
294
|
+
--out data/committees \
|
|
295
|
+
--house both \
|
|
296
|
+
--committees finance,education
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
| Flag | Default | What it does |
|
|
300
|
+
|---|---|---|
|
|
301
|
+
| `--committees` | all | Comma-separated committee slugs |
|
|
302
|
+
| `--lok-sabha-no` | `18` | LS number for LS reports |
|
|
303
|
+
| `--from-date` / `--to-date` | — | Date range filter |
|
|
304
|
+
| `--no-download` | off | Skip PDF downloads |
|
|
305
|
+
|
|
306
|
+
**Available LS committees** (16 DRSCs):
|
|
307
|
+
`agriculture`, `chemicals`, `coal`, `communications`, `consumer_affairs`,
|
|
308
|
+
`defence`, `energy`, `external_affairs`, `finance`, `housing`, `labour`,
|
|
309
|
+
`petroleum`, `railways`, `rural_development`, `social_justice`, `water_resources`
|
|
310
|
+
|
|
311
|
+
**Available RS committees** (8 DRSCs):
|
|
312
|
+
`commerce`, `education`, `health`, `home_affairs`, `industry`, `personnel`,
|
|
313
|
+
`science`, `transport`
|
|
314
|
+
|
|
315
|
+
### `commoner-probe extract-answers` — PDF text extraction
|
|
316
|
+
|
|
317
|
+
```bash
|
|
318
|
+
commoner-probe extract-answers --out data/climate
|
|
319
|
+
commoner-probe extract-answers --out data/climate --refresh
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
Reads `manifest.jsonl` and downloaded PDFs; writes `answers.jsonl` with:
|
|
323
|
+
|
|
324
|
+
- `qa_response` — (question_text, answer_text) pairs from Q/A PDFs
|
|
325
|
+
- `atr_response` — (recommendation_no, recommendation_text, response_text) triples from ATR PDFs
|
|
326
|
+
- `dfg_recommendation` — numbered observation paragraphs from DFG/Bill/Subject PDFs
|
|
327
|
+
|
|
328
|
+
Requires `pip install "commoner-probe[pdf]"`.
|
|
329
|
+
|
|
330
|
+
### `commoner-probe atr-linkage` — ATR → original report
|
|
331
|
+
|
|
332
|
+
```bash
|
|
333
|
+
commoner-probe atr-linkage --out data/committees
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
Writes `atr_linkage.jsonl` — each ATR linked back to the report it responds to.
|
|
337
|
+
Safe to re-run (idempotent overwrite).
|
|
338
|
+
|
|
339
|
+
### `commoner-probe state-assembly` — state legislature records
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
commoner-probe state-assembly \
|
|
343
|
+
--portal gujarat \
|
|
344
|
+
--state GJ \
|
|
345
|
+
--out data/gujarat \
|
|
346
|
+
--assemblies 15
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
### `commoner-probe mca-csr` — MCA CSR company-spend exports
|
|
350
|
+
|
|
351
|
+
```bash
|
|
352
|
+
commoner-probe mca-csr \
|
|
353
|
+
--out data/mca-csr \
|
|
354
|
+
--years 2022-23
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
Downloads CSV exports from the MCA CDM CSR data page and writes one
|
|
358
|
+
`manifest.jsonl` record per financial year. Use `--dry-run` to print manifest
|
|
359
|
+
records without opening a network session.
|
|
360
|
+
|
|
361
|
+
### `commoner-probe mines-dmft` — Ministry of Mines / DMFT files
|
|
362
|
+
|
|
363
|
+
```bash
|
|
364
|
+
commoner-probe mines-dmft \
|
|
365
|
+
--out data/mines-dmft \
|
|
366
|
+
--sources mines-gov-in,odisha
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
Downloads raw Ministry of Mines static CSV snapshots and Odisha DMFT public
|
|
370
|
+
JSON/report surfaces. Use `--dry-run` to print manifest records without opening
|
|
371
|
+
network sessions.
|
|
372
|
+
|
|
373
|
+
### `commoner-probe evidence dmft` — cross-source evidence bundle
|
|
374
|
+
|
|
375
|
+
```bash
|
|
376
|
+
commoner-probe evidence dmft \
|
|
377
|
+
--mines-dmft-dir data/mines-dmft \
|
|
378
|
+
--sansad-dir data/sansad/mines-dmft-pmkkky \
|
|
379
|
+
--out data/evidence/dmft.json
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
Builds a JSON bundle with separate `executive_disclosure` and
|
|
383
|
+
`parliamentary_oversight` sections. It does not merge unlike source families
|
|
384
|
+
into one table.
|
|
385
|
+
|
|
386
|
+
### `commoner-probe stats` — corpus health
|
|
387
|
+
|
|
388
|
+
```bash
|
|
389
|
+
commoner-probe stats --out data/climate
|
|
390
|
+
commoner-probe stats --out data/climate --json
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
### `commoner-probe validate` — schema validation
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
commoner-probe validate --out data/climate
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
Validates every JSONL file against its JSON Schema. Exits 1 on errors.
|
|
400
|
+
Requires `[dev]` extra.
|
|
401
|
+
|
|
402
|
+
---
|
|
403
|
+
|
|
404
|
+
## Topic profile
|
|
405
|
+
|
|
406
|
+
Controls what the probe acquires:
|
|
407
|
+
|
|
408
|
+
```json
|
|
409
|
+
{
|
|
410
|
+
"name": "libraries",
|
|
411
|
+
"description": "Public library infrastructure and policy",
|
|
412
|
+
"search_groups": {
|
|
413
|
+
"public_libraries": ["public library", "rural library"],
|
|
414
|
+
"policy": ["National Mission on Libraries", "RRRLF"]
|
|
415
|
+
},
|
|
416
|
+
"lok_sabha_ministries": ["CULTURE", "EDUCATION"],
|
|
417
|
+
"rajya_sabha_ministry_likes": ["CULTURE", "EDUCATION"]
|
|
418
|
+
}
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
- `search_groups` — keyword groups for LS full-text search. Each query runs
|
|
422
|
+
independently; results are union-deduped on `key`.
|
|
423
|
+
- `lok_sabha_ministries` — exact ministry filter for LS (case-sensitive).
|
|
424
|
+
- `rajya_sabha_ministry_likes` — ministry LIKE filter for RS (prefix match).
|
|
425
|
+
|
|
426
|
+
See `examples/topics/` for working examples.
|
|
427
|
+
|
|
428
|
+
---
|
|
429
|
+
|
|
430
|
+
## Output files
|
|
431
|
+
|
|
432
|
+
| File | Contents |
|
|
433
|
+
|------|----------|
|
|
434
|
+
| `manifest.jsonl` | One record per question or committee report |
|
|
435
|
+
| `_runs.jsonl` | Audit log: scope, topic hash, errors, per-bucket counts |
|
|
436
|
+
| `answers.jsonl` | Extracted Q/A and recommendation/response pairs |
|
|
437
|
+
| `atr_linkage.jsonl` | ATR → original report linkages |
|
|
438
|
+
| source CSV/JSON/HTML files | Raw source files for source-specific probes such as MCA CSR and DMFT |
|
|
439
|
+
| `pdfs/ls/` | Downloaded LS PDFs |
|
|
440
|
+
| `pdfs/rs/` | Downloaded RS PDFs |
|
|
441
|
+
| `probe.log` | Human-readable probe progress log |
|
|
442
|
+
|
|
443
|
+
For complete field-level documentation see [`docs/SCHEMAS.md`](docs/SCHEMAS.md).
|
|
444
|
+
|
|
445
|
+
---
|
|
446
|
+
|
|
447
|
+
## Entity resolution (`--with-entities`)
|
|
448
|
+
|
|
449
|
+
Pass `--with-entities` to `commoner-probe sansad` to resolve asker names to
|
|
450
|
+
stable `entity_id` values. On first run the entity store is populated from
|
|
451
|
+
the sansad.in MP roster; subsequent runs reuse the local cache.
|
|
452
|
+
|
|
453
|
+
Resolved entity IDs join across corpora and sessions — useful for studying
|
|
454
|
+
the same MP's questioning behaviour over time or across houses.
|
|
455
|
+
|
|
456
|
+
---
|
|
457
|
+
|
|
458
|
+
## Python API
|
|
459
|
+
|
|
460
|
+
```python
|
|
461
|
+
import commoner_probe as probe
|
|
462
|
+
|
|
463
|
+
c = probe.Corpus("data/climate")
|
|
464
|
+
|
|
465
|
+
# Typed iterators
|
|
466
|
+
for r in c.manifest_qa(): # ManifestQaRecord
|
|
467
|
+
...
|
|
468
|
+
for r in c.manifest_committee_reports(): # ManifestCommitteeReportRecord
|
|
469
|
+
...
|
|
470
|
+
for r in c.answers_qa(): # AnswerQaResponse
|
|
471
|
+
...
|
|
472
|
+
for r in c.answers_atr(): # AnswerAtrResponse
|
|
473
|
+
...
|
|
474
|
+
for r in c.answers_dfg(): # AnswerDfgRecommendation
|
|
475
|
+
...
|
|
476
|
+
for r in c.atr_linkages(): # AtrLinkageRecord
|
|
477
|
+
...
|
|
478
|
+
for r in c.manifest_mca_csr(): # ManifestMcaCsrRecord
|
|
479
|
+
...
|
|
480
|
+
for r in c.manifest_mines_dmft(): # ManifestMinesDmftRecord
|
|
481
|
+
...
|
|
482
|
+
for r in c.runs(): # RunRecord
|
|
483
|
+
...
|
|
484
|
+
|
|
485
|
+
# Join helpers
|
|
486
|
+
for pair in c.join_qa(): # manifest + extracted answers
|
|
487
|
+
...
|
|
488
|
+
for chain in c.join_atr_chain(): # ATR + original report + observations
|
|
489
|
+
...
|
|
490
|
+
|
|
491
|
+
# pandas (pip install commoner-probe[pandas])
|
|
492
|
+
df = c.to_dataframe("manifest_committee_reports")
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
See [`examples/usage.py`](examples/usage.py) for a runnable walkthrough.
|
|
496
|
+
See [`docs/ENDPOINTS.md`](docs/ENDPOINTS.md) for source-family endpoint notes.
|
|
497
|
+
|
|
498
|
+
---
|
|
499
|
+
|
|
500
|
+
## License
|
|
501
|
+
|
|
502
|
+
MIT License — see [`LICENSE`](LICENSE).
|
|
503
|
+
|
|
504
|
+
`commoner-probe` is sousveillance infrastructure, built for the commons. It is
|
|
505
|
+
released under the permissive MIT license so it can serve as a shared
|
|
506
|
+
acquisition floor that any downstream project — including the other repos in the
|
|
507
|
+
CommonerLLP federation, whatever their own licenses — can build on without
|
|
508
|
+
copyleft friction.
|
|
509
|
+
|
|
510
|
+
---
|
|
511
|
+
|
|
512
|
+
## Upcoming
|
|
513
|
+
|
|
514
|
+
### Floor debates
|
|
515
|
+
|
|
516
|
+
sansad.in exposes full debate proceedings via `api_ls/debate/text-of-debate`
|
|
517
|
+
(structured JSON, 17th Lok Sabha onwards). Each record covers a single day:
|
|
518
|
+
type of business, member who spoke, and verbatim text. The richest longitudinal
|
|
519
|
+
record of what MPs say on the floor.
|
|
520
|
+
|
|
521
|
+
### Bills and legislation
|
|
522
|
+
|
|
523
|
+
`sansad.in/ls/legislation/bills` lists every bill since independence with
|
|
524
|
+
introduction date, debate dates, and status at each stage. Enables tracking
|
|
525
|
+
legislative velocity, committee scrutiny rates, and private member bill outcomes.
|
|
526
|
+
|
|
527
|
+
### MP profiles and career timelines
|
|
528
|
+
|
|
529
|
+
Structured biographical data for each member: constituency, state, party, terms
|
|
530
|
+
served, educational background, declared profession. Pairs with the Q/A corpus
|
|
531
|
+
for studies of how MP background predicts parliamentary participation.
|