bharatdata 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bharatdata-0.0.1/PKG-INFO +11 -0
- bharatdata-0.0.1/README.md +162 -0
- bharatdata-0.0.1/pyproject.toml +22 -0
- bharatdata-0.0.1/setup.cfg +4 -0
- bharatdata-0.0.1/src/bharatdata/__init__.py +3 -0
- bharatdata-0.0.1/src/bharatdata/client.py +108 -0
- bharatdata-0.0.1/src/bharatdata.egg-info/PKG-INFO +11 -0
- bharatdata-0.0.1/src/bharatdata.egg-info/SOURCES.txt +9 -0
- bharatdata-0.0.1/src/bharatdata.egg-info/dependency_links.txt +1 -0
- bharatdata-0.0.1/src/bharatdata.egg-info/requires.txt +6 -0
- bharatdata-0.0.1/src/bharatdata.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bharatdata
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Clean, queryable Indian public data SDK
|
|
5
|
+
Author: BharatData Team
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Requires-Dist: requests>=2.28.0
|
|
8
|
+
Requires-Dist: pandas>=1.5.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
11
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="../../docs/assets/logo_full.png" alt="BharatData" height="72" />
|
|
3
|
+
<h1>BharatData Python SDK</h1>
|
|
4
|
+
<em>Official Python client for the BharatData API. Built for data scientists and researchers.</em>
|
|
5
|
+
<br/><br/>
|
|
6
|
+
|
|
7
|
+
[](https://pypi.org/project/bharatdata/)
|
|
8
|
+
[](https://www.python.org/downloads/)
|
|
9
|
+
[](../../LICENSE)
|
|
10
|
+
|
|
11
|
+
</div>
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install bharatdata
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Requirements:** Python 3.9+ | `pandas` | `requests`
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import bharatdata as bd
|
|
29
|
+
|
|
30
|
+
# Query into a Pandas DataFrame — no config needed
|
|
31
|
+
df = bd.query(
|
|
32
|
+
"ncrb-crime",
|
|
33
|
+
level="district",
|
|
34
|
+
filters={"entity": "Maharashtra", "year": "2023"},
|
|
35
|
+
limit=100
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
print(f"Shape: {df.shape}")
|
|
39
|
+
print(df[['district', 'total_cases']].head(10))
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Core Methods
|
|
45
|
+
|
|
46
|
+
### `bd.list_datasets()` → `pd.DataFrame`
|
|
47
|
+
All registered datasets as a DataFrame.
|
|
48
|
+
|
|
49
|
+
### `bd.query(dataset_id, level, filters, limit, sort, order)` → `pd.DataFrame`
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
# Multi-year trend query
|
|
53
|
+
df = bd.query(
|
|
54
|
+
"ncrb-crime",
|
|
55
|
+
level="state",
|
|
56
|
+
filters={"entity": "Delhi", "year": "2019,2020,2021,2022,2023"},
|
|
57
|
+
sort="year", order="asc"
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Parameters:**
|
|
62
|
+
|
|
63
|
+
| Parameter | Type | Default | Description |
|
|
64
|
+
| :--- | :--- | :--- | :--- |
|
|
65
|
+
| `dataset_id` | `str` | Required | Dataset ID |
|
|
66
|
+
| `level` | `str` | `"district"` | `"district"`, `"state"`, `"national"` |
|
|
67
|
+
| `filters` | `dict` | `{}` | `entity`, `year`, `category`, etc. |
|
|
68
|
+
| `limit` | `int` | `100` | Max rows (max: 500) |
|
|
69
|
+
| `sort` | `str` | `None` | Column to sort by |
|
|
70
|
+
| `order` | `str` | `"desc"` | `"asc"` or `"desc"` |
|
|
71
|
+
|
|
72
|
+
### `bd.query_ai(prompt)` → `dict`
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
result = bd.query_ai("Which states have the highest murder rates in 2023?")
|
|
76
|
+
print(result['narrative']) # AI analysis
|
|
77
|
+
print(result['data']) # pd.DataFrame
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Helpers
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
bd.list_states() # -> list[str]
|
|
84
|
+
bd.list_years("ncrb-crime") # -> list[int]
|
|
85
|
+
bd.list_fields("ncrb-crime") # -> list[str]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Research Workflows
|
|
91
|
+
|
|
92
|
+
### Year-over-Year Trend
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
df = bd.query("ncrb-crime", level="state",
|
|
96
|
+
filters={"entity": "Maharashtra", "year": "2019,2020,2021,2022,2023"},
|
|
97
|
+
sort="year", order="asc", limit=500)
|
|
98
|
+
|
|
99
|
+
annual = df.groupby('year')['total_cases'].sum()
|
|
100
|
+
annual.plot(kind='line', marker='o', color='#1A237E')
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### All-India State Comparison
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
df = bd.query("ncrb-crime", level="state", filters={"year": "2023"}, limit=500)
|
|
107
|
+
top_states = df.groupby('state')['total_cases'].sum().nlargest(10)
|
|
108
|
+
print(top_states.to_markdown())
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Panel Data for Regression
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
import pandas as pd
|
|
115
|
+
panel = pd.concat([
|
|
116
|
+
bd.query("ncrb-crime", level="state", filters={"year": str(y)}, limit=500)
|
|
117
|
+
for y in range(2015, 2024)
|
|
118
|
+
], ignore_index=True)
|
|
119
|
+
print(panel.shape)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Merge with External Data
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
crime = bd.query("ncrb-crime", level="state", filters={"year": "2023"}, limit=500)
|
|
126
|
+
state_totals = crime.groupby('state')['total_cases'].sum().reset_index()
|
|
127
|
+
|
|
128
|
+
population = pd.read_csv("state_population.csv")
|
|
129
|
+
merged = state_totals.merge(population, on='state')
|
|
130
|
+
merged['rate_per_lakh'] = (merged['total_cases'] / merged['population']) * 100_000
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## PDF Export
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
# Pandoc — professional academic PDF
|
|
139
|
+
pandoc README.md -o report.pdf --pdf-engine=xelatex --toc
|
|
140
|
+
|
|
141
|
+
# Jupyter notebook export
|
|
142
|
+
jupyter nbconvert --to pdf research_notebook.ipynb
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Academic Citation
|
|
148
|
+
|
|
149
|
+
**APA 7:**
|
|
150
|
+
```
|
|
151
|
+
National Crime Records Bureau. (2023). Crime in India: 2023 [Data set].
|
|
152
|
+
Ministry of Home Affairs, Government of India. Normalized and accessed
|
|
153
|
+
via BharatData Python SDK (https://pypi.org/project/bharatdata/).
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
See full [ATTRIBUTION.md](../../docs/legal/ATTRIBUTION.md) for other formats.
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
<div align="center">
|
|
161
|
+
<sub>Generated by the BharatData Team | Not affiliated with any government body</sub>
|
|
162
|
+
</div>
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "bharatdata"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Clean, queryable Indian public data SDK"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "BharatData Team" }
|
|
11
|
+
]
|
|
12
|
+
dependencies = [
|
|
13
|
+
"requests>=2.28.0",
|
|
14
|
+
"pandas>=1.5.0"
|
|
15
|
+
]
|
|
16
|
+
requires-python = ">=3.8"
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
dev = [
|
|
20
|
+
"pytest>=7.0.0",
|
|
21
|
+
"ruff>=0.1.0"
|
|
22
|
+
]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import List, Dict, Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BharatData:
|
|
7
|
+
def __init__(self, base_url: str = "https://api.bharatdata.org"):
|
|
8
|
+
self.base_url = base_url.rstrip("/")
|
|
9
|
+
|
|
10
|
+
def _request(self, path: str, params: Optional[Dict[str, Any]] = None,
|
|
11
|
+
return_full: bool = False) -> Any:
|
|
12
|
+
url = f"{self.base_url}{path}"
|
|
13
|
+
response = requests.get(url, params=params)
|
|
14
|
+
|
|
15
|
+
if response.status_code != 200:
|
|
16
|
+
try:
|
|
17
|
+
error_msg = response.json().get("error", "Unknown error")
|
|
18
|
+
except Exception:
|
|
19
|
+
error_msg = response.text
|
|
20
|
+
raise Exception(
|
|
21
|
+
f"API Error: {error_msg} (Status: {response.status_code})"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
res_json = response.json()
|
|
25
|
+
if return_full:
|
|
26
|
+
return res_json
|
|
27
|
+
return res_json.get("data")
|
|
28
|
+
|
|
29
|
+
def list_datasets(self) -> List[Dict[str, Any]]:
|
|
30
|
+
"""List all available datasets in the BharatData Registry."""
|
|
31
|
+
return self._request("/v1/registry")
|
|
32
|
+
|
|
33
|
+
def get_dataset_metadata(self, dataset_id: str) -> Dict[str, Any]:
|
|
34
|
+
"""Get full metadata for a specific dataset."""
|
|
35
|
+
return self._request(f"/v1/registry/{dataset_id}")
|
|
36
|
+
|
|
37
|
+
def query(self, dataset_id: str, level: str, **params) -> Dict[str, Any]:
|
|
38
|
+
"""
|
|
39
|
+
Universal Query: Fetch data from the registered datasets.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
dataset_id: The ID of the dataset (e.g., 'ncrb-crime')
|
|
43
|
+
level: The granularity level (e.g., 'summary', 'state', 'district')
|
|
44
|
+
**params: Query parameters (e.g., entity='Delhi', year=2023)
|
|
45
|
+
"""
|
|
46
|
+
return self._request(
|
|
47
|
+
f"/v1/data/{dataset_id}/{level}",
|
|
48
|
+
params=params,
|
|
49
|
+
return_full=True
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def get_crime_summary(
|
|
53
|
+
self, state: str, year: int, category: str
|
|
54
|
+
) -> List[Dict[str, Any]]:
|
|
55
|
+
"""Backward compatibility for existing crime reports."""
|
|
56
|
+
params = {"entity": state, "year": year, "category": category}
|
|
57
|
+
res = self.query("ncrb-crime", "summary", **params)
|
|
58
|
+
return res.get("data", [])
|
|
59
|
+
|
|
60
|
+
def to_dataframe(
|
|
61
|
+
self, response: Union[List[Dict[str, Any]], Dict[str, Any]]
|
|
62
|
+
) -> pd.DataFrame:
|
|
63
|
+
"""
|
|
64
|
+
Converts API response to a pandas DataFrame.
|
|
65
|
+
Handles both the raw data list and the full response envelope.
|
|
66
|
+
"""
|
|
67
|
+
if isinstance(response, dict):
|
|
68
|
+
data = response.get("data", [])
|
|
69
|
+
else:
|
|
70
|
+
data = response
|
|
71
|
+
|
|
72
|
+
if not data:
|
|
73
|
+
return pd.DataFrame()
|
|
74
|
+
|
|
75
|
+
df = pd.DataFrame(data)
|
|
76
|
+
|
|
77
|
+
# If it was a full response, attach metadata as an attribute
|
|
78
|
+
if isinstance(response, dict) and "metadata" in response:
|
|
79
|
+
df.attrs["metadata"] = response["metadata"]
|
|
80
|
+
|
|
81
|
+
return df
|
|
82
|
+
|
|
83
|
+
def get_states(self) -> List[str]:
|
|
84
|
+
return self._request("/v1/meta/states")
|
|
85
|
+
|
|
86
|
+
def get_categories(self) -> List[str]:
|
|
87
|
+
return self._request("/v1/meta/categories")
|
|
88
|
+
|
|
89
|
+
def get_years(self) -> List[int]:
|
|
90
|
+
return self._request("/v1/meta/years")
|
|
91
|
+
|
|
92
|
+
def cite(self, record_or_df: Union[Dict[str, Any], pd.DataFrame]) -> str:
|
|
93
|
+
"""Generate a standard citation for a data record or DataFrame."""
|
|
94
|
+
if isinstance(record_or_df, pd.DataFrame):
|
|
95
|
+
meta = record_or_df.attrs.get("metadata", {})
|
|
96
|
+
source = meta.get(
|
|
97
|
+
"attribution", "BharatData / Government of India"
|
|
98
|
+
)
|
|
99
|
+
dataset = meta.get("dataset", "Unknown Dataset")
|
|
100
|
+
return (f"Source: {source} (via BharatData: {dataset}). "
|
|
101
|
+
f"Accessed: {meta.get('timestamp', 'Recent')}")
|
|
102
|
+
|
|
103
|
+
source = record_or_df.get("source_file", "Official Report")
|
|
104
|
+
date = record_or_df.get("collection_date", "Unspecified")
|
|
105
|
+
return (
|
|
106
|
+
f"Source: BharatData / Government Source ({source}). "
|
|
107
|
+
f"Accessed: {date}"
|
|
108
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bharatdata
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Clean, queryable Indian public data SDK
|
|
5
|
+
Author: BharatData Team
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Requires-Dist: requests>=2.28.0
|
|
8
|
+
Requires-Dist: pandas>=1.5.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
11
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/bharatdata/__init__.py
|
|
4
|
+
src/bharatdata/client.py
|
|
5
|
+
src/bharatdata.egg-info/PKG-INFO
|
|
6
|
+
src/bharatdata.egg-info/SOURCES.txt
|
|
7
|
+
src/bharatdata.egg-info/dependency_links.txt
|
|
8
|
+
src/bharatdata.egg-info/requires.txt
|
|
9
|
+
src/bharatdata.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
bharatdata
|