bharatdata 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: bharatdata
3
+ Version: 0.0.1
4
+ Summary: Clean, queryable Indian public data SDK
5
+ Author: BharatData Team
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: requests>=2.28.0
8
+ Requires-Dist: pandas>=1.5.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
11
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
@@ -0,0 +1,162 @@
1
+ <div align="center">
2
+ <img src="../../docs/assets/logo_full.png" alt="BharatData" height="72" />
3
+ <h1>BharatData Python SDK</h1>
4
+ <em>Official Python client for the BharatData API. Built for data scientists and researchers.</em>
5
+ <br/><br/>
6
+
7
+ [![PyPI version](https://img.shields.io/pypi/v/bharatdata)](https://pypi.org/project/bharatdata/)
8
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](../../LICENSE)
10
+
11
+ </div>
12
+
13
+ ---
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pip install bharatdata
19
+ ```
20
+
21
+ **Requirements:** Python 3.9+ | `pandas` | `requests`
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ```python
28
+ import bharatdata as bd
29
+
30
+ # Query into a Pandas DataFrame — no config needed
31
+ df = bd.query(
32
+ "ncrb-crime",
33
+ level="district",
34
+ filters={"entity": "Maharashtra", "year": "2023"},
35
+ limit=100
36
+ )
37
+
38
+ print(f"Shape: {df.shape}")
39
+ print(df[['district', 'total_cases']].head(10))
40
+ ```
41
+
42
+ ---
43
+
44
+ ## Core Methods
45
+
46
+ ### `bd.list_datasets()` → `pd.DataFrame`
47
+ All registered datasets as a DataFrame.
48
+
49
+ ### `bd.query(dataset_id, level, filters, limit, sort, order)` → `pd.DataFrame`
50
+
51
+ ```python
52
+ # Multi-year trend query
53
+ df = bd.query(
54
+ "ncrb-crime",
55
+ level="state",
56
+ filters={"entity": "Delhi", "year": "2019,2020,2021,2022,2023"},
57
+ sort="year", order="asc"
58
+ )
59
+ ```
60
+
61
+ **Parameters:**
62
+
63
+ | Parameter | Type | Default | Description |
64
+ | :--- | :--- | :--- | :--- |
65
+ | `dataset_id` | `str` | Required | Dataset ID |
66
+ | `level` | `str` | `"district"` | `"district"`, `"state"`, `"national"` |
67
+ | `filters` | `dict` | `{}` | `entity`, `year`, `category`, etc. |
68
+ | `limit` | `int` | `100` | Max rows (max: 500) |
69
+ | `sort` | `str` | `None` | Column to sort by |
70
+ | `order` | `str` | `"desc"` | `"asc"` or `"desc"` |
71
+
72
+ ### `bd.query_ai(prompt)` → `dict`
73
+
74
+ ```python
75
+ result = bd.query_ai("Which states have the highest murder rates in 2023?")
76
+ print(result['narrative']) # AI analysis
77
+ print(result['data']) # pd.DataFrame
78
+ ```
79
+
80
+ ### Helpers
81
+
82
+ ```python
83
+ bd.list_states() # -> list[str]
84
+ bd.list_years("ncrb-crime") # -> list[int]
85
+ bd.list_fields("ncrb-crime") # -> list[str]
86
+ ```
87
+
88
+ ---
89
+
90
+ ## Research Workflows
91
+
92
+ ### Year-over-Year Trend
93
+
94
+ ```python
95
+ df = bd.query("ncrb-crime", level="state",
96
+ filters={"entity": "Maharashtra", "year": "2019,2020,2021,2022,2023"},
97
+ sort="year", order="asc", limit=500)
98
+
99
+ annual = df.groupby('year')['total_cases'].sum()
100
+ annual.plot(kind='line', marker='o', color='#1A237E')
101
+ ```
102
+
103
+ ### All-India State Comparison
104
+
105
+ ```python
106
+ df = bd.query("ncrb-crime", level="state", filters={"year": "2023"}, limit=500)
107
+ top_states = df.groupby('state')['total_cases'].sum().nlargest(10)
108
+ print(top_states.to_markdown())
109
+ ```
110
+
111
+ ### Panel Data for Regression
112
+
113
+ ```python
114
+ import pandas as pd
115
+ panel = pd.concat([
116
+ bd.query("ncrb-crime", level="state", filters={"year": str(y)}, limit=500)
117
+ for y in range(2015, 2024)
118
+ ], ignore_index=True)
119
+ print(panel.shape)
120
+ ```
121
+
122
+ ### Merge with External Data
123
+
124
+ ```python
125
+ crime = bd.query("ncrb-crime", level="state", filters={"year": "2023"}, limit=500)
126
+ state_totals = crime.groupby('state')['total_cases'].sum().reset_index()
127
+
128
+ population = pd.read_csv("state_population.csv")
129
+ merged = state_totals.merge(population, on='state')
130
+ merged['rate_per_lakh'] = (merged['total_cases'] / merged['population']) * 100_000
131
+ ```
132
+
133
+ ---
134
+
135
+ ## PDF Export
136
+
137
+ ```bash
138
+ # Pandoc — professional academic PDF
139
+ pandoc README.md -o report.pdf --pdf-engine=xelatex --toc
140
+
141
+ # Jupyter notebook export
142
+ jupyter nbconvert --to pdf research_notebook.ipynb
143
+ ```
144
+
145
+ ---
146
+
147
+ ## Academic Citation
148
+
149
+ **APA 7:**
150
+ ```
151
+ National Crime Records Bureau. (2023). Crime in India: 2023 [Data set].
152
+ Ministry of Home Affairs, Government of India. Normalized and accessed
153
+ via BharatData Python SDK (https://pypi.org/project/bharatdata/).
154
+ ```
155
+
156
+ See full [ATTRIBUTION.md](../../docs/legal/ATTRIBUTION.md) for other formats.
157
+
158
+ ---
159
+
160
+ <div align="center">
161
+ <sub>Generated by the BharatData Team &nbsp;|&nbsp; Not affiliated with any government body</sub>
162
+ </div>
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "bharatdata"
7
+ version = "0.0.1"
8
+ description = "Clean, queryable Indian public data SDK"
9
+ authors = [
10
+ { name = "BharatData Team" }
11
+ ]
12
+ dependencies = [
13
+ "requests>=2.28.0",
14
+ "pandas>=1.5.0"
15
+ ]
16
+ requires-python = ">=3.8"
17
+
18
+ [project.optional-dependencies]
19
+ dev = [
20
+ "pytest>=7.0.0",
21
+ "ruff>=0.1.0"
22
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from .client import BharatData
2
+
3
+ __all__ = ["BharatData"]
@@ -0,0 +1,108 @@
1
+ import requests
2
+ import pandas as pd
3
+ from typing import List, Dict, Any, Optional, Union
4
+
5
+
6
+ class BharatData:
7
+ def __init__(self, base_url: str = "https://api.bharatdata.org"):
8
+ self.base_url = base_url.rstrip("/")
9
+
10
+ def _request(self, path: str, params: Optional[Dict[str, Any]] = None,
11
+ return_full: bool = False) -> Any:
12
+ url = f"{self.base_url}{path}"
13
+ response = requests.get(url, params=params)
14
+
15
+ if response.status_code != 200:
16
+ try:
17
+ error_msg = response.json().get("error", "Unknown error")
18
+ except Exception:
19
+ error_msg = response.text
20
+ raise Exception(
21
+ f"API Error: {error_msg} (Status: {response.status_code})"
22
+ )
23
+
24
+ res_json = response.json()
25
+ if return_full:
26
+ return res_json
27
+ return res_json.get("data")
28
+
29
+ def list_datasets(self) -> List[Dict[str, Any]]:
30
+ """List all available datasets in the BharatData Registry."""
31
+ return self._request("/v1/registry")
32
+
33
+ def get_dataset_metadata(self, dataset_id: str) -> Dict[str, Any]:
34
+ """Get full metadata for a specific dataset."""
35
+ return self._request(f"/v1/registry/{dataset_id}")
36
+
37
+ def query(self, dataset_id: str, level: str, **params) -> Dict[str, Any]:
38
+ """
39
+ Universal Query: Fetch data from the registered datasets.
40
+
41
+ Args:
42
+ dataset_id: The ID of the dataset (e.g., 'ncrb-crime')
43
+ level: The granularity level (e.g., 'summary', 'state', 'district')
44
+ **params: Query parameters (e.g., entity='Delhi', year=2023)
45
+ """
46
+ return self._request(
47
+ f"/v1/data/{dataset_id}/{level}",
48
+ params=params,
49
+ return_full=True
50
+ )
51
+
52
+ def get_crime_summary(
53
+ self, state: str, year: int, category: str
54
+ ) -> List[Dict[str, Any]]:
55
+ """Backward compatibility for existing crime reports."""
56
+ params = {"entity": state, "year": year, "category": category}
57
+ res = self.query("ncrb-crime", "summary", **params)
58
+ return res.get("data", [])
59
+
60
+ def to_dataframe(
61
+ self, response: Union[List[Dict[str, Any]], Dict[str, Any]]
62
+ ) -> pd.DataFrame:
63
+ """
64
+ Converts API response to a pandas DataFrame.
65
+ Handles both the raw data list and the full response envelope.
66
+ """
67
+ if isinstance(response, dict):
68
+ data = response.get("data", [])
69
+ else:
70
+ data = response
71
+
72
+ if not data:
73
+ return pd.DataFrame()
74
+
75
+ df = pd.DataFrame(data)
76
+
77
+ # If it was a full response, attach metadata as an attribute
78
+ if isinstance(response, dict) and "metadata" in response:
79
+ df.attrs["metadata"] = response["metadata"]
80
+
81
+ return df
82
+
83
+ def get_states(self) -> List[str]:
84
+ return self._request("/v1/meta/states")
85
+
86
+ def get_categories(self) -> List[str]:
87
+ return self._request("/v1/meta/categories")
88
+
89
+ def get_years(self) -> List[int]:
90
+ return self._request("/v1/meta/years")
91
+
92
+ def cite(self, record_or_df: Union[Dict[str, Any], pd.DataFrame]) -> str:
93
+ """Generate a standard citation for a data record or DataFrame."""
94
+ if isinstance(record_or_df, pd.DataFrame):
95
+ meta = record_or_df.attrs.get("metadata", {})
96
+ source = meta.get(
97
+ "attribution", "BharatData / Government of India"
98
+ )
99
+ dataset = meta.get("dataset", "Unknown Dataset")
100
+ return (f"Source: {source} (via BharatData: {dataset}). "
101
+ f"Accessed: {meta.get('timestamp', 'Recent')}")
102
+
103
+ source = record_or_df.get("source_file", "Official Report")
104
+ date = record_or_df.get("collection_date", "Unspecified")
105
+ return (
106
+ f"Source: BharatData / Government Source ({source}). "
107
+ f"Accessed: {date}"
108
+ )
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: bharatdata
3
+ Version: 0.0.1
4
+ Summary: Clean, queryable Indian public data SDK
5
+ Author: BharatData Team
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: requests>=2.28.0
8
+ Requires-Dist: pandas>=1.5.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
11
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/bharatdata/__init__.py
4
+ src/bharatdata/client.py
5
+ src/bharatdata.egg-info/PKG-INFO
6
+ src/bharatdata.egg-info/SOURCES.txt
7
+ src/bharatdata.egg-info/dependency_links.txt
8
+ src/bharatdata.egg-info/requires.txt
9
+ src/bharatdata.egg-info/top_level.txt
@@ -0,0 +1,6 @@
1
+ requests>=2.28.0
2
+ pandas>=1.5.0
3
+
4
+ [dev]
5
+ pytest>=7.0.0
6
+ ruff>=0.1.0
@@ -0,0 +1 @@
1
+ bharatdata