comexpy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
comexpy/__init__.py ADDED
@@ -0,0 +1,102 @@
1
+ """comexpy — access the Brazilian Foreign Trade Statistics API (ComexStat).
2
+
3
+ A pandas-friendly Python port of the R package ``comexr``. Query Brazilian
4
+ export and import data — general trade statistics (1997-present), city-level
5
+ data, and historical records (1989-1996) — plus auxiliary tables for product
6
+ codes (NCM, NBM, HS), countries, economic blocs and classifications (CGCE,
7
+ SITC, ISIC). The public function names mirror the R API so knowledge
8
+ transfers directly between the two.
9
+
10
+ Every query/table function returns a :class:`pandas.DataFrame`; detail
11
+ functions return a ``dict``.
12
+
13
+ Quick start
14
+ -----------
15
+ >>> import comexpy
16
+ >>> comexpy.comex_export("2024-01", "2024-12", details="country") # doctest: +SKIP
17
+ >>> comexpy.comex_countries(search="china") # doctest: +SKIP
18
+ >>> comexpy.comex_details("general") # doctest: +SKIP
19
+ """
20
+ from __future__ import annotations
21
+
22
+ from ._client import BASE_URL, ComexError, get_options, set_options
23
+ from ._format import DETAILS_MAP
24
+ from ._msg import set_verbose
25
+ from .historical import comex_historical
26
+ from .query import comex_export, comex_import, comex_query
27
+ from .query_city import comex_query_city
28
+ from .tables import (
29
+ comex_available_years,
30
+ comex_blocs,
31
+ comex_cities,
32
+ comex_city_detail,
33
+ comex_countries,
34
+ comex_country_detail,
35
+ comex_customs_unit_detail,
36
+ comex_customs_units,
37
+ comex_details,
38
+ comex_filter_values,
39
+ comex_filters,
40
+ comex_last_update,
41
+ comex_metrics,
42
+ comex_state_detail,
43
+ comex_states,
44
+ comex_transport_mode_detail,
45
+ comex_transport_modes,
46
+ )
47
+ from .tables_classifications import comex_cgce, comex_isic, comex_sitc
48
+ from .tables_products import (
49
+ comex_hs,
50
+ comex_nbm,
51
+ comex_nbm_detail,
52
+ comex_ncm,
53
+ comex_ncm_detail,
54
+ )
55
+
56
+ __version__ = "0.1.0"
57
+
58
+ __all__ = [
59
+ # Query functions (POST)
60
+ "comex_query",
61
+ "comex_export",
62
+ "comex_import",
63
+ "comex_query_city",
64
+ "comex_historical",
65
+ # API metadata (GET)
66
+ "comex_last_update",
67
+ "comex_available_years",
68
+ "comex_filters",
69
+ "comex_filter_values",
70
+ "comex_details",
71
+ "comex_metrics",
72
+ # Auxiliary tables - geography
73
+ "comex_countries",
74
+ "comex_country_detail",
75
+ "comex_blocs",
76
+ "comex_states",
77
+ "comex_state_detail",
78
+ "comex_cities",
79
+ "comex_city_detail",
80
+ "comex_transport_modes",
81
+ "comex_transport_mode_detail",
82
+ "comex_customs_units",
83
+ "comex_customs_unit_detail",
84
+ # Auxiliary tables - products
85
+ "comex_ncm",
86
+ "comex_ncm_detail",
87
+ "comex_nbm",
88
+ "comex_nbm_detail",
89
+ "comex_hs",
90
+ # Auxiliary tables - classifications
91
+ "comex_cgce",
92
+ "comex_sitc",
93
+ "comex_isic",
94
+ # Configuration / helpers
95
+ "set_options",
96
+ "get_options",
97
+ "set_verbose",
98
+ "ComexError",
99
+ "DETAILS_MAP",
100
+ "BASE_URL",
101
+ "__version__",
102
+ ]
comexpy/_client.py ADDED
@@ -0,0 +1,202 @@
1
+ """HTTP layer for the ComexStat API.
2
+
3
+ Mirrors ``comex_get`` / ``comex_post`` from the R package's ``utils.R``:
4
+
5
+ * GET and POST helpers against :data:`BASE_URL`.
6
+ * User-configurable retry/timeout behaviour (the ComexStat API rate-limits
7
+ aggressively with HTTP 429 and a 10-second recommended back-off).
8
+ * Automatic retry on SSL certificate-verification failures — the ComexStat
9
+ servers use ICP-Brasil certificates that some systems do not trust.
10
+
11
+ On failure a :class:`ComexError` is raised with a friendly message.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import time
16
+ from typing import Any, Mapping, Optional
17
+
18
+ import requests
19
+
20
+ from . import _msg
21
+
22
+ BASE_URL = "https://api-comexstat.mdic.gov.br"
23
+
24
+ _USER_AGENT = "comexpy (Python package)"
25
+
26
+ # Defaults mirror the R package options (comexr.*). The API recommends a
27
+ # 10-second wait after a 429, so retry_time defaults to 10.
28
+ _CONFIG = {
29
+ "timeout_get": 60,
30
+ "timeout_post": 120,
31
+ "max_tries": 3,
32
+ "retry_time": 10,
33
+ "ssl_verify": True,
34
+ }
35
+
36
+
37
+ class ComexError(RuntimeError):
38
+ """Raised when a ComexStat API request fails."""
39
+
40
+
41
+ def set_options(
42
+ *,
43
+ timeout_get: Optional[int] = None,
44
+ timeout_post: Optional[int] = None,
45
+ max_tries: Optional[int] = None,
46
+ retry_time: Optional[int] = None,
47
+ ssl_verify: Optional[bool] = None,
48
+ ) -> None:
49
+ """Configure HTTP retry/timeout behaviour (equivalent to the R options).
50
+
51
+ The ComexStat API frequently returns rate-limit errors (HTTP 429,
52
+ *"Você excedeu o limite de solicitações..."*) or times out. Adjust these
53
+ settings to work around such errors without overloading the servers.
54
+
55
+ Parameters
56
+ ----------
57
+ timeout_get : int, optional
58
+ Seconds to wait for a response on GET requests (default 60).
59
+ timeout_post : int, optional
60
+ Seconds to wait for a response on POST requests (default 120).
61
+ max_tries : int, optional
62
+ Maximum number of attempts for a failing request (default 3).
63
+ Adjusting ``retry_time`` is generally a better way to avoid errors.
64
+ retry_time : int, optional
65
+ Seconds to wait between retries after a transient failure
66
+ (default 10, matching the API's recommended back-off).
67
+ ssl_verify : bool, optional
68
+ Whether to verify SSL certificates. Set to ``False`` to skip
69
+ verification when the ICP-Brasil certificate chain is not trusted.
70
+ """
71
+ if timeout_get is not None:
72
+ _CONFIG["timeout_get"] = int(timeout_get)
73
+ if timeout_post is not None:
74
+ _CONFIG["timeout_post"] = int(timeout_post)
75
+ if max_tries is not None:
76
+ _CONFIG["max_tries"] = int(max_tries)
77
+ if retry_time is not None:
78
+ _CONFIG["retry_time"] = int(retry_time)
79
+ if ssl_verify is not None:
80
+ _CONFIG["ssl_verify"] = bool(ssl_verify)
81
+
82
+
83
+ def get_options() -> dict:
84
+ """Return a copy of the current HTTP configuration."""
85
+ return dict(_CONFIG)
86
+
87
+
88
+ def _is_ssl_error(exc: Exception) -> bool:
89
+ text = f"{exc} {getattr(exc, '__cause__', '')}".lower()
90
+ return any(t in text for t in ("ssl", "certificate", "peer"))
91
+
92
+
93
+ def _perform(method: str, url: str, **kwargs: Any) -> requests.Response:
94
+ """Perform a request with retries and SSL auto-fallback."""
95
+ max_tries = max(1, int(_CONFIG["max_tries"]))
96
+ retry_time = int(_CONFIG["retry_time"])
97
+ verify = bool(_CONFIG["ssl_verify"])
98
+
99
+ last_exc: Optional[Exception] = None
100
+ for attempt in range(1, max_tries + 1):
101
+ try:
102
+ resp = requests.request(method, url, verify=verify, **kwargs)
103
+ except requests.exceptions.SSLError as exc:
104
+ # SSL verification failed: retry once without verification and
105
+ # remember the choice for the rest of the session.
106
+ if verify:
107
+ _msg.warn(
108
+ "SSL certificate verification failed. Retrying without "
109
+ "SSL verification. To suppress this, call "
110
+ "comexpy.set_options(ssl_verify=False)."
111
+ )
112
+ _CONFIG["ssl_verify"] = False
113
+ verify = False
114
+ last_exc = exc
115
+ continue
116
+ last_exc = exc
117
+ except requests.RequestException as exc:
118
+ last_exc = exc
119
+ else:
120
+ # Retry on rate-limit / transient server errors.
121
+ if resp.status_code in (429, 500, 502, 503, 504) and attempt < max_tries:
122
+ time.sleep(retry_time)
123
+ continue
124
+ return resp
125
+
126
+ if attempt < max_tries:
127
+ time.sleep(retry_time)
128
+
129
+ raise ComexError(
130
+ f"Failed to perform HTTP request to the ComexStat API.\n"
131
+ f" x {last_exc}\n i URL: {url}"
132
+ )
133
+
134
+
135
+ def _check(resp: requests.Response, endpoint: str) -> Any:
136
+ if resp.status_code >= 400:
137
+ try:
138
+ body = resp.json()
139
+ msg = body.get("message") or (
140
+ body.get("error", {}).get("message")
141
+ if isinstance(body.get("error"), dict)
142
+ else None
143
+ )
144
+ except ValueError:
145
+ msg = None
146
+ msg = msg or f"HTTP {resp.status_code}"
147
+ raise ComexError(
148
+ f"API request failed (HTTP {resp.status_code})\n"
149
+ f" i Endpoint: {endpoint}\n i Message: {msg}"
150
+ )
151
+ try:
152
+ return resp.json()
153
+ except ValueError as exc: # pragma: no cover
154
+ raise ComexError(
155
+ f"Could not parse API response as JSON.\n i Endpoint: {endpoint}"
156
+ ) from exc
157
+
158
+
159
+ def comex_get(
160
+ endpoint: str,
161
+ query: Optional[Mapping[str, Any]] = None,
162
+ verbose: bool = True,
163
+ ) -> Any:
164
+ """Perform a GET request to the ComexStat API and return parsed JSON."""
165
+ url = BASE_URL + endpoint
166
+ if verbose:
167
+ _msg.step(f"GET {endpoint}")
168
+ clean = {k: v for k, v in (query or {}).items() if v is not None}
169
+ resp = _perform(
170
+ "GET",
171
+ url,
172
+ params=clean or None,
173
+ headers={"Accept": "application/json", "User-Agent": _USER_AGENT},
174
+ timeout=_CONFIG["timeout_get"],
175
+ )
176
+ return _check(resp, endpoint)
177
+
178
+
179
+ def comex_post(
180
+ endpoint: str,
181
+ body: Any,
182
+ query: Optional[Mapping[str, Any]] = None,
183
+ verbose: bool = True,
184
+ ) -> Any:
185
+ """Perform a POST request to the ComexStat API and return parsed JSON."""
186
+ url = BASE_URL + endpoint
187
+ if verbose:
188
+ _msg.step(f"POST {endpoint}")
189
+ clean = {k: v for k, v in (query or {}).items() if v is not None}
190
+ resp = _perform(
191
+ "POST",
192
+ url,
193
+ params=clean or None,
194
+ json=body,
195
+ headers={
196
+ "Accept": "application/json",
197
+ "Content-Type": "application/json",
198
+ "User-Agent": _USER_AGENT,
199
+ },
200
+ timeout=_CONFIG["timeout_post"],
201
+ )
202
+ return _check(resp, endpoint)
comexpy/_format.py ADDED
@@ -0,0 +1,275 @@
1
+ """Response conversion, validation and name mappings.
2
+
3
+ Ported from the R package's ``utils.R``:
4
+
5
+ * :func:`response_to_df` / :func:`extract_single` handle the ComexStat
6
+ response shapes empirically observed across endpoints.
7
+ * :func:`validate_period` / :func:`convert_flow` validate user arguments.
8
+ * :data:`DETAILS_MAP` translates user-friendly aliases (``hs4``,
9
+ ``transport_mode``, ``cgce_n1``, …) to the API's internal names
10
+ (``heading``, ``via``, ``BECLevel1``, …).
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ from typing import Any, Mapping, Optional
16
+
17
+ import pandas as pd
18
+
19
+ # -------------------------------------------------------------------------
20
+ # Response conversion
21
+ # -------------------------------------------------------------------------
22
+
23
+
24
+ def _flatten_value(val: Any) -> Any:
25
+ """Collapse nested lists to a comma-joined string; keep scalars as-is."""
26
+ if val is None:
27
+ return None
28
+ if isinstance(val, (list, tuple)):
29
+ return ", ".join(str(v) for v in val)
30
+ if isinstance(val, dict):
31
+ return ", ".join(str(v) for v in val.values())
32
+ return val
33
+
34
+
35
+ def response_to_df(response: Any, path: str = "data") -> pd.DataFrame:
36
+ """Convert a ComexStat API response into a :class:`pandas.DataFrame`.
37
+
38
+ Handles every known response pattern:
39
+
40
+ * ``{"data": {"list": [...rows...], "count": N}}`` — most endpoints,
41
+ including POST ``/general`` and ``/cities``.
42
+ * ``{"data": [...rows...]}`` — ``/tables/uf``, ``/tables/cities``,
43
+ ``/tables/ways``, ``/tables/urf``, POST ``/historical-data/``.
44
+ * ``{"data": [[...rows...]]}`` — ``/general/filters/{filter}``.
45
+ """
46
+ data: Any = response
47
+ if isinstance(response, Mapping) and path in response:
48
+ data = response[path]
49
+
50
+ if data is None:
51
+ return pd.DataFrame()
52
+
53
+ # Pattern 1: {"list": [...], "count": N}
54
+ if isinstance(data, Mapping) and "list" in data:
55
+ data = data["list"]
56
+
57
+ if data is None:
58
+ return pd.DataFrame()
59
+
60
+ # Pattern 3: unwrap single-element unnamed lists wrapping the rows.
61
+ while (
62
+ isinstance(data, (list, tuple))
63
+ and len(data) == 1
64
+ and isinstance(data[0], (list, tuple))
65
+ ):
66
+ data = data[0]
67
+
68
+ if isinstance(data, (list, tuple)):
69
+ if len(data) == 0:
70
+ return pd.DataFrame()
71
+ rows = [
72
+ {k: _flatten_value(v) for k, v in row.items()}
73
+ if isinstance(row, Mapping)
74
+ else {"value": _flatten_value(row)}
75
+ for row in data
76
+ ]
77
+ return pd.DataFrame(rows)
78
+
79
+ # Named object that is not a list of rows -> single-row frame.
80
+ if isinstance(data, Mapping):
81
+ return pd.DataFrame([{k: _flatten_value(v) for k, v in data.items()}])
82
+
83
+ return pd.DataFrame()
84
+
85
+
86
+ def extract_single(response: Any) -> Any:
87
+ """Extract a single record (dict/scalar) from a detail-endpoint response.
88
+
89
+ Handles ``{"data": {...}}``, ``{"data": [{...}]}``,
90
+ ``{"data": {"list": [{...}]}}`` and ``{"data": null}``.
91
+ """
92
+ if not isinstance(response, Mapping):
93
+ return None
94
+ data = response.get("data")
95
+ if data is None:
96
+ return None
97
+
98
+ if isinstance(data, Mapping) and "list" in data:
99
+ lst = data["list"]
100
+ if not lst:
101
+ return None
102
+ return lst[0]
103
+
104
+ if isinstance(data, (list, tuple)):
105
+ if len(data) == 0:
106
+ return None
107
+ return data[0]
108
+
109
+ return data
110
+
111
+
112
+ # -------------------------------------------------------------------------
113
+ # Validation
114
+ # -------------------------------------------------------------------------
115
+
116
+ _PERIOD_RE = re.compile(r"^\d{4}-\d{2}$")
117
+
118
+
119
+ def validate_period(start_period: str, end_period: str) -> None:
120
+ """Validate ``YYYY-MM`` period strings and their ordering."""
121
+ if not _PERIOD_RE.match(str(start_period)):
122
+ raise ValueError(
123
+ f"Invalid start period: {start_period}. "
124
+ "Use format 'YYYY-MM' (e.g. '2023-01')."
125
+ )
126
+ if not _PERIOD_RE.match(str(end_period)):
127
+ raise ValueError(
128
+ f"Invalid end period: {end_period}. "
129
+ "Use format 'YYYY-MM' (e.g. '2023-12')."
130
+ )
131
+ if start_period > end_period:
132
+ raise ValueError("Start period must be before or equal to end period.")
133
+
134
+
135
+ def convert_flow(flow: str) -> str:
136
+ """Normalise a trade-flow argument to ``"export"`` or ``"import"``."""
137
+ fl = str(flow).lower()
138
+ if fl in ("exp", "export", "exports"):
139
+ return "export"
140
+ if fl in ("imp", "import", "imports"):
141
+ return "import"
142
+ raise ValueError(f"Invalid flow: {flow}. Use 'export' or 'import'.")
143
+
144
+
145
+ # -------------------------------------------------------------------------
146
+ # Name mappings (user-friendly -> API names)
147
+ # -------------------------------------------------------------------------
148
+
149
+ #: Verified against the live endpoints (/general/filters, /general/details,
150
+ #: /cities/filters, /historical-data/filters). The map also maps every API
151
+ #: name to itself, so callers who already know the API name may pass it
152
+ #: verbatim.
153
+ DETAILS_MAP = {
154
+ # Geographic
155
+ "country": "country",
156
+ "bloc": "economicBlock",
157
+ "economic_block": "economicBlock",
158
+ "economicBlock": "economicBlock",
159
+ "state": "state",
160
+ "city": "city",
161
+ "transport_mode": "via",
162
+ "via": "via",
163
+ "customs_unit": "urf",
164
+ "urf": "urf",
165
+ # Products - NCM and Harmonized System (HS2/HS4/HS6)
166
+ "ncm": "ncm",
167
+ "hs6": "subHeading",
168
+ "sh6": "subHeading",
169
+ "subheading": "subHeading",
170
+ "subHeading": "subHeading",
171
+ "hs4": "heading",
172
+ "sh4": "heading",
173
+ "heading": "heading",
174
+ "hs2": "chapter",
175
+ "sh2": "chapter",
176
+ "chapter": "chapter",
177
+ "section": "section",
178
+ # CGCE (a.k.a. BEC - Broad Economic Categories)
179
+ "cgce_n1": "BECLevel1",
180
+ "cgce_n2": "BECLevel2",
181
+ "cgce_n3": "BECLevel3",
182
+ "BECLevel1": "BECLevel1",
183
+ "BECLevel2": "BECLevel2",
184
+ "BECLevel3": "BECLevel3",
185
+ # SITC / CUCI
186
+ "sitc_section": "SITCSection",
187
+ "sitc_division": "SITCDivision",
188
+ "sitc_chapter": "SITCDivision",
189
+ "sitc_group": "SITCGroup",
190
+ "sitc_position": "SITCGroup",
191
+ "sitc_subgroup": "SITCSubGroup",
192
+ "sitc_subposition": "SITCSubGroup",
193
+ "sitc_basic_heading": "SITCBasicHeading",
194
+ "sitc_item": "SITCBasicHeading",
195
+ "SITCSection": "SITCSection",
196
+ "SITCDivision": "SITCDivision",
197
+ "SITCGroup": "SITCGroup",
198
+ "SITCSubGroup": "SITCSubGroup",
199
+ "SITCBasicHeading": "SITCBasicHeading",
200
+ # ISIC
201
+ "isic_section": "ISICSection",
202
+ "isic_division": "ISICDivision",
203
+ "isic_group": "ISICGroup",
204
+ "isic_class": "ISICClass",
205
+ "ISICSection": "ISICSection",
206
+ "ISICDivision": "ISICDivision",
207
+ "ISICGroup": "ISICGroup",
208
+ "ISICClass": "ISICClass",
209
+ # NBM (historical)
210
+ "nbm": "nbm",
211
+ }
212
+
213
+ _API_NAMES = set(DETAILS_MAP.values())
214
+
215
+
216
+ def get_api_name(name: str) -> str:
217
+ """Translate a user-friendly detail/filter alias to its API name."""
218
+ if name in DETAILS_MAP:
219
+ return DETAILS_MAP[name]
220
+ if name in _API_NAMES:
221
+ return name
222
+ from . import _msg
223
+
224
+ _msg.warn(f"Unknown detail/filter: {name}. Will be sent as-is.")
225
+ return name
226
+
227
+
228
+ def _as_list(x: Any) -> list:
229
+ if x is None:
230
+ return []
231
+ if isinstance(x, (list, tuple, set)):
232
+ return list(x)
233
+ return [x]
234
+
235
+
236
+ def build_details(details: Any) -> list:
237
+ """Build the API ``details`` list from user aliases."""
238
+ return [get_api_name(d) for d in _as_list(details)]
239
+
240
+
241
+ def build_filters(filters: Optional[Mapping[str, Any]]) -> list:
242
+ """Build the API ``filters`` list from a name -> values mapping."""
243
+ if not filters:
244
+ return []
245
+ return [
246
+ {"filter": get_api_name(name), "values": _as_list(values)}
247
+ for name, values in filters.items()
248
+ ]
249
+
250
+
251
+ def build_metrics(
252
+ metric_fob: bool = True,
253
+ metric_kg: bool = True,
254
+ metric_statistic: bool = False,
255
+ metric_freight: bool = False,
256
+ metric_insurance: bool = False,
257
+ metric_cif: bool = False,
258
+ ) -> list:
259
+ """Build the API ``metrics`` list from the metric flags."""
260
+ metrics = []
261
+ if metric_fob:
262
+ metrics.append("metricFOB")
263
+ if metric_kg:
264
+ metrics.append("metricKG")
265
+ if metric_statistic:
266
+ metrics.append("metricStatistic")
267
+ if metric_freight:
268
+ metrics.append("metricFreight")
269
+ if metric_insurance:
270
+ metrics.append("metricInsurance")
271
+ if metric_cif:
272
+ metrics.append("metricCIF")
273
+ if not metrics:
274
+ raise ValueError("At least one metric must be selected.")
275
+ return metrics
comexpy/_msg.py ADDED
@@ -0,0 +1,47 @@
1
+ """Lightweight console messages, mirroring the R package's cli alerts.
2
+
3
+ Messages go to stderr and can be silenced with :func:`set_verbose`.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import sys
8
+
9
+ _VERBOSE = True
10
+
11
+
12
+ def set_verbose(verbose: bool) -> None:
13
+ """Enable or disable informational messages (success/step/info).
14
+
15
+ Warnings are always shown. Errors are raised as exceptions.
16
+
17
+ Parameters
18
+ ----------
19
+ verbose : bool
20
+ If ``False``, suppress progress and success messages.
21
+ """
22
+ global _VERBOSE
23
+ _VERBOSE = bool(verbose)
24
+
25
+
26
+ def is_verbose() -> bool:
27
+ return _VERBOSE
28
+
29
+
30
+ def info(message: str) -> None:
31
+ if _VERBOSE:
32
+ print(f"ℹ {message}", file=sys.stderr)
33
+
34
+
35
+ def step(message: str) -> None:
36
+ if _VERBOSE:
37
+ print(f"→ {message}", file=sys.stderr)
38
+
39
+
40
+ def success(message: str) -> None:
41
+ if _VERBOSE:
42
+ print(f"✔ {message}", file=sys.stderr)
43
+
44
+
45
+ def warn(message: str) -> None:
46
+ # Warnings are always shown, regardless of verbosity.
47
+ print(f"! {message}", file=sys.stderr)