pixbr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+ branches: [main, master]
8
+
9
+ jobs:
10
+ test:
11
+ name: test (py${{ matrix.python-version }})
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Set up Python ${{ matrix.python-version }}
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+ cache: pip
26
+
27
+ - name: Install package with dev dependencies
28
+ run: |
29
+ python -m pip install --upgrade pip
30
+ python -m pip install -e ".[dev]"
31
+
32
+ - name: Run tests
33
+ run: python -m pytest -q
@@ -0,0 +1,49 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ push:
7
+ tags: ["v*"]
8
+
9
+ jobs:
10
+ build:
11
+ name: Build distribution
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+
20
+ - name: Build sdist and wheel
21
+ run: |
22
+ python -m pip install --upgrade pip build
23
+ python -m build
24
+
25
+ - name: Upload build artifacts
26
+ uses: actions/upload-artifact@v4
27
+ with:
28
+ name: dist
29
+ path: dist/
30
+
31
+ publish:
32
+ name: Publish to PyPI
33
+ needs: build
34
+ runs-on: ubuntu-latest
35
+ environment:
36
+ name: pypi
37
+ url: https://pypi.org/p/pixbr
38
+ permissions:
39
+ # Required for PyPI Trusted Publishing (OIDC). No API token needed.
40
+ id-token: write
41
+ steps:
42
+ - name: Download build artifacts
43
+ uses: actions/download-artifact@v4
44
+ with:
45
+ name: dist
46
+ path: dist/
47
+
48
+ - name: Publish to PyPI
49
+ uses: pypa/gh-action-pypi-publish@release/v1
pixbr-0.1.0/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ build/
6
+ dist/
7
+ .pytest_cache/
8
+ .DS_Store
pixbr-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: pixbr
3
+ Version: 0.1.0
4
+ Summary: Access Brazilian Central Bank PIX Open Data API (pandas-friendly)
5
+ Project-URL: Homepage, https://github.com/StrategicProjects/pixbr
6
+ Project-URL: Issues, https://github.com/StrategicProjects/pixbr/issues
7
+ Author-email: Andre Leite <leite@castlab.org>, Marcos Wasilew <marcos.wasilew@gmail.com>, Hugo Vasconcelos <hugo.vasconcelos@ufpe.br>, Diogo Bezerra <diogo.bezerra@ufpe.br>
8
+ License: MIT
9
+ Keywords: banco central,bcb,brazil,odata,open data,pix
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Scientific/Engineering
15
+ Requires-Python: >=3.9
16
+ Requires-Dist: httpx>=0.27
17
+ Requires-Dist: pandas>=2.0
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=8.0; extra == 'dev'
20
+ Requires-Dist: respx>=0.21; extra == 'dev'
21
+ Description-Content-Type: text/markdown
22
+
23
+ # pixbr
24
+
25
+ Python client for the **Brazilian Central Bank (BCB) PIX Open Data API**
26
+ ([Olinda / OData service](https://olinda.bcb.gov.br/olinda/servico/Pix_DadosAbertos/versao/v1/aplicacao)).
27
+ It hides the BCB's unusual OData URL syntax and returns
28
+ [pandas](https://pandas.pydata.org/) DataFrames.
29
+
30
+ This is the Python counterpart of the R package
31
+ [`pixr`](https://github.com/StrategicProjects/pixr).
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install pixbr # once published
37
+ # or, from source:
38
+ pip install -e ".[dev]"
39
+ ```
40
+
41
+ ## Quick start
42
+
43
+ Reusable client (recommended for multiple requests):
44
+
45
+ ```python
46
+ from pixbr import PixClient
47
+
48
+ client = PixClient()
49
+
50
+ # PIX keys stock by participant (date in YYYY-MM-DD)
51
+ keys = client.keys("2025-12-01", filter="TipoChave eq 'CPF'", top=100)
52
+
53
+ # Transaction statistics (database in YYYYMM)
54
+ stats = client.transaction_stats("202509", filter="NATUREZA eq 'P2P'")
55
+
56
+ # Transactions by municipality
57
+ muni = client.transactions_by_municipality("202512", filter="Sigla_Regiao eq 'NE'")
58
+
59
+ # Fraud statistics (MED)
60
+ fraud = client.fraud_stats("202509", top=100)
61
+ ```
62
+
63
+ Module-level convenience functions mirror the `pixr` names:
64
+
65
+ ```python
66
+ from pixbr import get_pix_transaction_stats, get_pix_summary, format_brl
67
+
68
+ df = get_pix_transaction_stats("202509")
69
+ summary = get_pix_summary("202509", group_by="PAG_REGIAO")
70
+ format_brl(1234567.89) # 'R$ 1.234.567,89'
71
+ ```
72
+
73
+ ## Endpoints
74
+
75
+ | Endpoint | Parameter | `PixClient` method | Convenience function |
76
+ |---|---|---|---|
77
+ | `ChavesPix` | `Data` (YYYY-MM-DD) | `.keys()` | `get_pix_keys()` |
78
+ | `TransacoesPixPorMunicipio` | `DataBase` (YYYYMM) | `.transactions_by_municipality()` | `get_pix_transactions_by_municipality()` |
79
+ | `EstatisticasTransacoesPix` | `Database` (YYYYMM) | `.transaction_stats()` | `get_pix_transaction_stats()` |
80
+ | `EstatisticasFraudesPix` | `Database` (YYYYMM) | `.fraud_stats()` | `get_pix_fraud_stats()` |
81
+
82
+ Use `pix_endpoints()` and `pix_columns("keys"|"municipality"|"stats"|"fraud")`
83
+ to inspect available endpoints and columns.
84
+
85
+ ## OData query parameters
86
+
87
+ All endpoint methods accept the common OData parameters:
88
+
89
+ - `filter` — OData filter expression, e.g. `"NATUREZA eq 'P2P' and PAG_REGIAO eq 'SUDESTE'"`
90
+ - `columns` — list of columns to select (unknown columns are dropped with a warning)
91
+ - `orderby` — `"Column"` (asc) or `"Column desc"`
92
+ - `top` — maximum number of records
93
+
94
+ > **Note:** `skip` is **not supported** by the BCB PIX API; passing it emits a
95
+ > warning and is ignored. Use `top` to limit results.
96
+
97
+ ## Notes
98
+
99
+ - `PixClient(timeout=..., max_retries=..., verbose=...)` configures the HTTP
100
+ session. The default timeout is 120s — the BCB API can be slow for large
101
+ queries.
102
+ - `client.build_url(...)` / `pix_url(...)` return the request URL without
103
+ sending it (handy for debugging).
104
+ - `client.ping()` / `pix_ping()` test connectivity to all four endpoints.
105
+
106
+ ## License
107
+
108
+ MIT
pixbr-0.1.0/README.md ADDED
@@ -0,0 +1,86 @@
1
+ # pixbr
2
+
3
+ Python client for the **Brazilian Central Bank (BCB) PIX Open Data API**
4
+ ([Olinda / OData service](https://olinda.bcb.gov.br/olinda/servico/Pix_DadosAbertos/versao/v1/aplicacao)).
5
+ It hides the BCB's unusual OData URL syntax and returns
6
+ [pandas](https://pandas.pydata.org/) DataFrames.
7
+
8
+ This is the Python counterpart of the R package
9
+ [`pixr`](https://github.com/StrategicProjects/pixr).
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install pixbr # once published
15
+ # or, from source:
16
+ pip install -e ".[dev]"
17
+ ```
18
+
19
+ ## Quick start
20
+
21
+ Reusable client (recommended for multiple requests):
22
+
23
+ ```python
24
+ from pixbr import PixClient
25
+
26
+ client = PixClient()
27
+
28
+ # PIX keys stock by participant (date in YYYY-MM-DD)
29
+ keys = client.keys("2025-12-01", filter="TipoChave eq 'CPF'", top=100)
30
+
31
+ # Transaction statistics (database in YYYYMM)
32
+ stats = client.transaction_stats("202509", filter="NATUREZA eq 'P2P'")
33
+
34
+ # Transactions by municipality
35
+ muni = client.transactions_by_municipality("202512", filter="Sigla_Regiao eq 'NE'")
36
+
37
+ # Fraud statistics (MED)
38
+ fraud = client.fraud_stats("202509", top=100)
39
+ ```
40
+
41
+ Module-level convenience functions mirror the `pixr` names:
42
+
43
+ ```python
44
+ from pixbr import get_pix_transaction_stats, get_pix_summary, format_brl
45
+
46
+ df = get_pix_transaction_stats("202509")
47
+ summary = get_pix_summary("202509", group_by="PAG_REGIAO")
48
+ format_brl(1234567.89) # 'R$ 1.234.567,89'
49
+ ```
50
+
51
+ ## Endpoints
52
+
53
+ | Endpoint | Parameter | `PixClient` method | Convenience function |
54
+ |---|---|---|---|
55
+ | `ChavesPix` | `Data` (YYYY-MM-DD) | `.keys()` | `get_pix_keys()` |
56
+ | `TransacoesPixPorMunicipio` | `DataBase` (YYYYMM) | `.transactions_by_municipality()` | `get_pix_transactions_by_municipality()` |
57
+ | `EstatisticasTransacoesPix` | `Database` (YYYYMM) | `.transaction_stats()` | `get_pix_transaction_stats()` |
58
+ | `EstatisticasFraudesPix` | `Database` (YYYYMM) | `.fraud_stats()` | `get_pix_fraud_stats()` |
59
+
60
+ Use `pix_endpoints()` and `pix_columns("keys"|"municipality"|"stats"|"fraud")`
61
+ to inspect available endpoints and columns.
62
+
63
+ ## OData query parameters
64
+
65
+ All endpoint methods accept the common OData parameters:
66
+
67
+ - `filter` — OData filter expression, e.g. `"NATUREZA eq 'P2P' and PAG_REGIAO eq 'SUDESTE'"`
68
+ - `columns` — list of columns to select (unknown columns are dropped with a warning)
69
+ - `orderby` — `"Column"` (asc) or `"Column desc"`
70
+ - `top` — maximum number of records
71
+
72
+ > **Note:** `skip` is **not supported** by the BCB PIX API; passing it emits a
73
+ > warning and is ignored. Use `top` to limit results.
74
+
75
+ ## Notes
76
+
77
+ - `PixClient(timeout=..., max_retries=..., verbose=...)` configures the HTTP
78
+ session. The default timeout is 120s — the BCB API can be slow for large
79
+ queries.
80
+ - `client.build_url(...)` / `pix_url(...)` return the request URL without
81
+ sending it (handy for debugging).
82
+ - `client.ping()` / `pix_ping()` test connectivity to all four endpoints.
83
+
84
+ ## License
85
+
86
+ MIT
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pixbr"
7
+ version = "0.1.0"
8
+ description = "Access Brazilian Central Bank PIX Open Data API (pandas-friendly)"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "Andre Leite", email = "leite@castlab.org" },
14
+ { name = "Marcos Wasilew", email = "marcos.wasilew@gmail.com" },
15
+ { name = "Hugo Vasconcelos", email = "hugo.vasconcelos@ufpe.br" },
16
+ { name = "Diogo Bezerra", email = "diogo.bezerra@ufpe.br" },
17
+ ]
18
+ keywords = ["pix", "bcb", "banco central", "open data", "odata", "brazil"]
19
+ classifiers = [
20
+ "Development Status :: 4 - Beta",
21
+ "Intended Audience :: Science/Research",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Programming Language :: Python :: 3",
24
+ "Topic :: Scientific/Engineering",
25
+ ]
26
+ dependencies = [
27
+ "httpx>=0.27",
28
+ "pandas>=2.0",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ dev = [
33
+ "pytest>=8.0",
34
+ "respx>=0.21",
35
+ ]
36
+
37
+ [project.urls]
38
+ Homepage = "https://github.com/StrategicProjects/pixbr"
39
+ Issues = "https://github.com/StrategicProjects/pixbr/issues"
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["src/pixbr"]
43
+
44
+ [tool.pytest.ini_options]
45
+ testpaths = ["tests"]
@@ -0,0 +1,69 @@
1
+ """pixbr — access the Brazilian Central Bank PIX Open Data API from Python.
2
+
3
+ Two ways to use it:
4
+
5
+ 1. A reusable client (recommended for multiple requests)::
6
+
7
+ from pixbr import PixClient
8
+ client = PixClient()
9
+ df = client.transaction_stats("202509", filter="NATUREZA eq 'P2P'")
10
+
11
+ 2. Module-level convenience functions mirroring the R package ``pixr``::
12
+
13
+ from pixbr import get_pix_transaction_stats
14
+ df = get_pix_transaction_stats("202509")
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from .client import ENDPOINTS, PixApiError, PixClient
20
+ from .utils import (
21
+ format_brl,
22
+ pix_columns,
23
+ pix_endpoints,
24
+ year_month_to_date,
25
+ )
26
+ from .api import (
27
+ get_pix_fraud_stats,
28
+ get_pix_fraud_stats_multi,
29
+ get_pix_keys,
30
+ get_pix_keys_by_type,
31
+ get_pix_keys_summary,
32
+ get_pix_summary,
33
+ get_pix_transaction_stats,
34
+ get_pix_transaction_stats_multi,
35
+ get_pix_transactions_by_municipality,
36
+ get_pix_transactions_by_region,
37
+ get_pix_transactions_by_state,
38
+ pix_ping,
39
+ pix_query,
40
+ pix_url,
41
+ )
42
+
43
+ __version__ = "0.1.0"
44
+
45
+ __all__ = [
46
+ "PixClient",
47
+ "PixApiError",
48
+ "ENDPOINTS",
49
+ # utils
50
+ "format_brl",
51
+ "pix_columns",
52
+ "pix_endpoints",
53
+ "year_month_to_date",
54
+ # convenience functions
55
+ "get_pix_keys",
56
+ "get_pix_keys_summary",
57
+ "get_pix_keys_by_type",
58
+ "get_pix_transaction_stats",
59
+ "get_pix_transaction_stats_multi",
60
+ "get_pix_summary",
61
+ "get_pix_transactions_by_municipality",
62
+ "get_pix_transactions_by_state",
63
+ "get_pix_transactions_by_region",
64
+ "get_pix_fraud_stats",
65
+ "get_pix_fraud_stats_multi",
66
+ "pix_ping",
67
+ "pix_query",
68
+ "pix_url",
69
+ ]
@@ -0,0 +1,168 @@
1
+ """URL building and parameter helpers for the BCB PIX Open Data (OData) API.
2
+
3
+ The BCB Olinda API uses a non-standard OData syntax where endpoint parameters
4
+ are passed as function arguments in the URL path *and* repeated as named query
5
+ parameters, e.g.::
6
+
7
+ ChavesPix(Data=@Data)?$format=json&@Data='2025-12-01'&$top=10
8
+
9
+ These helpers reproduce the exact URL construction used by the R package
10
+ ``pixr`` (which deliberately avoids standard percent-encoding, encoding only
11
+ spaces as ``%20``). They are pure functions so they can be unit-tested without
12
+ hitting the network.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+ import warnings
19
+ from datetime import date, datetime
20
+ from typing import Mapping, Optional, Sequence, Union
21
+
22
+ BASE_URL = "https://olinda.bcb.gov.br/olinda/servico/Pix_DadosAbertos/versao/v1/odata"
23
+ DEFAULT_TIMEOUT = 120
24
+
25
+ ParamValue = Union[str, int, float]
26
+
27
+
28
+ def build_url(
29
+ endpoint: str,
30
+ params: Optional[Mapping[str, ParamValue]] = None,
31
+ *,
32
+ fmt: str = "json",
33
+ filter: Optional[str] = None,
34
+ select: Optional[Sequence[str]] = None,
35
+ orderby: Optional[str] = None,
36
+ top: Optional[int] = None,
37
+ skip: Optional[int] = None,
38
+ base_url: str = BASE_URL,
39
+ ) -> str:
40
+ """Build a full request URL for a BCB PIX OData endpoint.
41
+
42
+ Mirrors ``pixr::pix_request`` URL construction exactly.
43
+ """
44
+ # Endpoint path, with function-style parameter declaration.
45
+ if params:
46
+ func_params = ",".join(f"{name}=@{name}" for name in params)
47
+ endpoint_url = f"{base_url}/{endpoint}({func_params})"
48
+ else:
49
+ endpoint_url = f"{base_url}/{endpoint}"
50
+
51
+ query_parts: list[str] = [f"$format={fmt}"]
52
+
53
+ # Endpoint parameters as @Param='value' (quoted for strings).
54
+ if params:
55
+ for name, value in params.items():
56
+ if isinstance(value, str):
57
+ query_parts.append(f"@{name}='{value}'")
58
+ else:
59
+ query_parts.append(f"@{name}={value}")
60
+
61
+ if filter:
62
+ # Collapse whitespace after commas and opening parens, matching pixr.
63
+ filter = re.sub(r",\s+", ",", filter)
64
+ filter = re.sub(r"\(\s+", "(", filter)
65
+ query_parts.append(f"$filter={filter}")
66
+
67
+ if select:
68
+ query_parts.append(f"$select={','.join(select)}")
69
+
70
+ if orderby:
71
+ query_parts.append(f"$orderby={orderby}")
72
+
73
+ if top is not None:
74
+ query_parts.append(f"$top={int(top)}")
75
+
76
+ if skip is not None:
77
+ warnings.warn(
78
+ "Parameter 'skip' is not supported by the BCB PIX API; "
79
+ "pagination with skip is not available. Use 'top' to limit results.",
80
+ stacklevel=2,
81
+ )
82
+
83
+ query_string = "&".join(query_parts)
84
+ # The API is picky about encoding: encode only spaces, like pixr.
85
+ query_string = query_string.replace(" ", "%20")
86
+ return f"{endpoint_url}?{query_string}"
87
+
88
+
89
+ def parse_year_month(year_month: Union[str, int, date, None]) -> Optional[str]:
90
+ """Normalize a year-month to ``YYYYMM`` string form."""
91
+ if year_month is None:
92
+ return None
93
+ if isinstance(year_month, (date, datetime)):
94
+ return year_month.strftime("%Y%m")
95
+ s = str(year_month)
96
+ if not re.fullmatch(r"\d{6}", s):
97
+ raise ValueError(
98
+ f"Invalid year_month format: {s!r}. "
99
+ "Expected YYYYMM (e.g. '202312' for December 2023)."
100
+ )
101
+ return s
102
+
103
+
104
+ def parse_date_param(value: Union[str, date, None]) -> Optional[str]:
105
+ """Normalize a date to ``YYYY-MM-DD`` string form (for the ChavesPix endpoint)."""
106
+ if value is None:
107
+ return None
108
+ if isinstance(value, (date, datetime)):
109
+ return value.strftime("%Y-%m-%d")
110
+ s = str(value)
111
+ if not re.fullmatch(r"\d{4}-\d{2}-\d{2}", s):
112
+ raise ValueError(
113
+ f"Invalid date format: {s!r}. Expected YYYY-MM-DD (e.g. '2025-12-01')."
114
+ )
115
+ return s
116
+
117
+
118
+ def validate_columns(
119
+ columns: Optional[Sequence[str]], valid_columns: Sequence[str]
120
+ ) -> Optional[list[str]]:
121
+ """Drop unknown column names (with a warning), preserving order."""
122
+ if columns is None:
123
+ return None
124
+ valid_set = set(valid_columns)
125
+ invalid = [c for c in columns if c not in valid_set]
126
+ if invalid:
127
+ warnings.warn(
128
+ f"Unknown column(s) ignored: {invalid}. Valid columns: {list(valid_columns)}",
129
+ stacklevel=2,
130
+ )
131
+ kept = [c for c in columns if c in valid_set]
132
+ return kept or None
133
+
134
+
135
+ def format_orderby(
136
+ orderby: Optional[str], valid_columns: Optional[Sequence[str]] = None
137
+ ) -> Optional[str]:
138
+ """Format an orderby spec into the OData ``"Column asc|desc"`` form.
139
+
140
+ Accepts a leading ``-`` for descending order, or an already-formatted
141
+ ``"Column desc"`` string (passed through after validation).
142
+ """
143
+ if not orderby:
144
+ return None
145
+
146
+ if orderby.startswith("-"):
147
+ col, direction = orderby[1:], "desc"
148
+ elif " " in orderby:
149
+ # Already "Column asc/desc" form.
150
+ col = orderby.split(" ", 1)[0]
151
+ if valid_columns is not None and col not in valid_columns:
152
+ warnings.warn(
153
+ f"Invalid orderby column {col!r}; orderby will be ignored.",
154
+ stacklevel=2,
155
+ )
156
+ return None
157
+ return orderby
158
+ else:
159
+ col, direction = orderby, "asc"
160
+
161
+ if valid_columns is not None and col not in valid_columns:
162
+ warnings.warn(
163
+ f"Invalid orderby column {col!r}; orderby will be ignored.",
164
+ stacklevel=2,
165
+ )
166
+ return None
167
+
168
+ return f"{col} {direction}"
@@ -0,0 +1,119 @@
1
+ """Convenience aggregations over the raw endpoint data (pandas-based).
2
+
3
+ These mirror the dplyr-based summaries in ``pixr`` (get_pix_summary,
4
+ get_pix_keys_summary, get_pix_transactions_by_state, etc.).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import List, Sequence, Union
10
+
11
+ import pandas as pd
12
+
13
+ from .client import PixClient
14
+
15
+
16
+ def keys_summary(client: PixClient, date: str, n_top: int = 20) -> pd.DataFrame:
17
+ """Total keys by institution, sorted descending, limited to ``n_top``."""
18
+ data = client.keys(date=date)
19
+ if data.empty:
20
+ return data
21
+ agg = (
22
+ data.groupby(["Nome", "ISPB"])
23
+ .agg(
24
+ total_keys=("qtdChaves", "sum"),
25
+ n_key_types=("TipoChave", "nunique"),
26
+ )
27
+ .reset_index()
28
+ )
29
+ pf = data[data["NaturezaUsuario"] == "PF"].groupby(["Nome", "ISPB"])["qtdChaves"].sum()
30
+ pj = data[data["NaturezaUsuario"] == "PJ"].groupby(["Nome", "ISPB"])["qtdChaves"].sum()
31
+ agg["pf_keys"] = agg.set_index(["Nome", "ISPB"]).index.map(pf).fillna(0).to_numpy()
32
+ agg["pj_keys"] = agg.set_index(["Nome", "ISPB"]).index.map(pj).fillna(0).to_numpy()
33
+ return agg.sort_values("total_keys", ascending=False).head(n_top).reset_index(drop=True)
34
+
35
+
36
+ def keys_by_type(client: PixClient, date: str) -> pd.DataFrame:
37
+ """Total keys grouped by key type and user nature."""
38
+ data = client.keys(date=date)
39
+ if data.empty:
40
+ return data
41
+ return (
42
+ data.groupby(["TipoChave", "NaturezaUsuario"])
43
+ .agg(total_keys=("qtdChaves", "sum"), n_institutions=("ISPB", "nunique"))
44
+ .reset_index()
45
+ .sort_values("total_keys", ascending=False)
46
+ .reset_index(drop=True)
47
+ )
48
+
49
+
50
+ def transaction_summary(
51
+ client: PixClient, database: str, group_by: Union[str, Sequence[str]] = "NATUREZA"
52
+ ) -> pd.DataFrame:
53
+ """Aggregate transaction statistics by one or more grouping columns."""
54
+ data = client.transaction_stats(database=database)
55
+ if data.empty:
56
+ return data
57
+ keys = [group_by] if isinstance(group_by, str) else list(group_by)
58
+ grouped = data.groupby(keys)
59
+ out = grouped.agg(
60
+ total_value=("VALOR", "sum"),
61
+ total_count=("QUANTIDADE", "sum"),
62
+ n_records=("VALOR", "size"),
63
+ ).reset_index()
64
+ out["avg_value"] = out["total_value"] / out["total_count"]
65
+ return out.sort_values("total_value", ascending=False).reset_index(drop=True)
66
+
67
+
68
+ _MUNI_SUM_COLS = [
69
+ "VL_PagadorPF", "QT_PagadorPF", "VL_PagadorPJ", "QT_PagadorPJ",
70
+ "VL_RecebedorPF", "QT_RecebedorPF", "VL_RecebedorPJ", "QT_RecebedorPJ",
71
+ ]
72
+
73
+
74
+ def _muni_lower(col: str) -> str:
75
+ return col.lower().replace("pagadorpf", "pagador_pf").replace("pagadorpj", "pagador_pj") \
76
+ .replace("recebedorpf", "recebedor_pf").replace("recebedorpj", "recebedor_pj")
77
+
78
+
79
+ def transactions_by_state(client: PixClient, database: str) -> pd.DataFrame:
80
+ """Aggregate municipality data to the state level."""
81
+ return _aggregate_geo(
82
+ client, database,
83
+ keys=["AnoMes", "Estado_Ibge", "Estado", "Sigla_Regiao", "Regiao"],
84
+ count_name="n_municipalities",
85
+ count_kind="size",
86
+ )
87
+
88
+
89
+ def transactions_by_region(client: PixClient, database: str) -> pd.DataFrame:
90
+ """Aggregate municipality data to the region level."""
91
+ data = client.transactions_by_municipality(database=database)
92
+ if data.empty:
93
+ return data
94
+ agg = {c: "sum" for c in _MUNI_SUM_COLS if c in data.columns}
95
+ out = data.groupby(["AnoMes", "Sigla_Regiao", "Regiao"]).agg(
96
+ n_states=("Estado_Ibge", "nunique"),
97
+ n_municipalities=("Estado_Ibge", "size"),
98
+ **{_muni_lower(c): (c, "sum") for c in _MUNI_SUM_COLS if c in data.columns},
99
+ ).reset_index()
100
+ sort_col = "vl_pagador_pf" if "vl_pagador_pf" in out.columns else out.columns[-1]
101
+ return out.sort_values(sort_col, ascending=False).reset_index(drop=True)
102
+
103
+
104
+ def _aggregate_geo(
105
+ client: PixClient,
106
+ database: str,
107
+ keys: List[str],
108
+ count_name: str,
109
+ count_kind: str,
110
+ ) -> pd.DataFrame:
111
+ data = client.transactions_by_municipality(database=database)
112
+ if data.empty:
113
+ return data
114
+ out = data.groupby(keys).agg(
115
+ **{count_name: ("AnoMes", count_kind)},
116
+ **{_muni_lower(c): (c, "sum") for c in _MUNI_SUM_COLS if c in data.columns},
117
+ ).reset_index()
118
+ sort_col = "vl_pagador_pf" if "vl_pagador_pf" in out.columns else out.columns[-1]
119
+ return out.sort_values(sort_col, ascending=False).reset_index(drop=True)