mcp-bcrp 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/PKG-INFO +3 -3
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/README.md +2 -2
- mcp_bcrp-0.1.3/examples/Guia_Usuario_BCRP.ipynb +191 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp/__init__.py +1 -1
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp/_version.py +3 -3
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp/client.py +21 -0
- mcp_bcrp-0.1.3/mcp_bcrp/search_engine.py +229 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp/server.py +20 -26
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/PKG-INFO +3 -3
- mcp_bcrp-0.1.2/examples/Guia_Usuario_BCRP.ipynb +0 -207
- mcp_bcrp-0.1.2/mcp_bcrp/search_engine.py +0 -237
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/.github/workflows/ci.yml +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/.github/workflows/publish.yml +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/.gitignore +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/CONTRIBUTING.md +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/LICENSE +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/examples/basic_usage.py +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp/__main__.py +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/SOURCES.txt +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/dependency_links.txt +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/entry_points.txt +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/requires.txt +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/top_level.txt +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/pyproject.toml +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/run.py +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/setup.cfg +0 -0
- {mcp_bcrp-0.1.2 → mcp_bcrp-0.1.3}/tests/test_basic.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-bcrp
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: MCP Server for Banco Central de Reserva del Perú (BCRP) Statistical API
|
|
5
5
|
Author-email: Maykol Medrano <mmedrano2@uc.cl>
|
|
6
6
|
License: MIT
|
|
@@ -121,8 +121,8 @@ pip install -e .
|
|
|
121
121
|
### With Optional Dependencies
|
|
122
122
|
|
|
123
123
|
```bash
|
|
124
|
-
pip install mcp-bcrp[charts] # Include matplotlib for chart generation
|
|
125
|
-
pip install mcp-bcrp[dev] # Include development dependencies
|
|
124
|
+
pip install "mcp-bcrp[charts]" # Include matplotlib for chart generation
|
|
125
|
+
pip install "mcp-bcrp[dev]" # Include development dependencies
|
|
126
126
|
```
|
|
127
127
|
|
|
128
128
|
---
|
|
@@ -85,8 +85,8 @@ pip install -e .
|
|
|
85
85
|
### With Optional Dependencies
|
|
86
86
|
|
|
87
87
|
```bash
|
|
88
|
-
pip install mcp-bcrp[charts] # Include matplotlib for chart generation
|
|
89
|
-
pip install mcp-bcrp[dev] # Include development dependencies
|
|
88
|
+
pip install "mcp-bcrp[charts]" # Include matplotlib for chart generation
|
|
89
|
+
pip install "mcp-bcrp[dev]" # Include development dependencies
|
|
90
90
|
```
|
|
91
91
|
|
|
92
92
|
---
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# 📊 Guía de Usuario: mcp-bcrp (v0.1.3)\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"Bienvenido a la guía oficial de **mcp-bcrp**, la librería y servidor MCP para acceder a las estadísticas del **Banco Central de Reserva del Perú (BCRP)**.\n",
|
|
10
|
+
"\n",
|
|
11
|
+
"[](https://github.com/MaykolMedrano/mcp_bcrp)\n",
|
|
12
|
+
"[](https://pypi.org/project/mcp-bcrp/)\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"Esta versión incluye un **Buscador Interactivo** y **Tablas Inteligentes** que resuelven nombres automáticamente."
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"cell_type": "markdown",
|
|
19
|
+
"metadata": {},
|
|
20
|
+
"source": [
|
|
21
|
+
"## 1. Instalación\n",
|
|
22
|
+
"\n",
|
|
23
|
+
"Instalamos la librería usando el comando mágico `%pip` (el más recomendado en notebooks)."
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"cell_type": "code",
|
|
28
|
+
"execution_count": null,
|
|
29
|
+
"metadata": {},
|
|
30
|
+
"outputs": [],
|
|
31
|
+
"source": [
|
|
32
|
+
"%pip install \"mcp-bcrp[charts]\" -U\n",
|
|
33
|
+
"\n",
|
|
34
|
+
"# NOTA: Si es la primera vez que instalas, reinicia el entorno (Runtime -> Restart session)."
|
|
35
|
+
]
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"cell_type": "markdown",
|
|
39
|
+
"metadata": {},
|
|
40
|
+
"source": [
|
|
41
|
+
"## 2. Configuración y Metadatos\n",
|
|
42
|
+
"\n",
|
|
43
|
+
"Cargamos los metadatos para habilitar la búsqueda inteligente."
|
|
44
|
+
]
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"cell_type": "code",
|
|
48
|
+
"execution_count": null,
|
|
49
|
+
"metadata": {},
|
|
50
|
+
"outputs": [],
|
|
51
|
+
"source": [
|
|
52
|
+
"from mcp_bcrp.client import AsyncBCRPClient, BCRPMetadata\n",
|
|
53
|
+
"import pandas as pd\n",
|
|
54
|
+
"\n",
|
|
55
|
+
"metadata = BCRPMetadata()\n",
|
|
56
|
+
"await metadata.load()"
|
|
57
|
+
]
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"cell_type": "markdown",
|
|
61
|
+
"metadata": {},
|
|
62
|
+
"source": [
|
|
63
|
+
"## 3. Buscador Inteligente e Interactivo\n",
|
|
64
|
+
"\n",
|
|
65
|
+
"A diferencia de otros buscadores, `mcp-bcrp` entiende sinónimos comunes (ej: \"tc\" -> \"tipo de cambio\") y si hay dudas, te ofrece opciones."
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"cell_type": "code",
|
|
70
|
+
"execution_count": null,
|
|
71
|
+
"metadata": {},
|
|
72
|
+
"outputs": [],
|
|
73
|
+
"source": [
|
|
74
|
+
"# Ejemplo 1: Búsqueda con sinónimos y lenguaje natural\n",
|
|
75
|
+
"query = \"precio internacional cobre\"\n",
|
|
76
|
+
"resultado = metadata.solve(query)\n",
|
|
77
|
+
"\n",
|
|
78
|
+
"if \"candidates\" in resultado:\n",
|
|
79
|
+
" print(\"🔍 Encontré varias opciones, ¿cuál necesitas?\")\n",
|
|
80
|
+
" display(pd.DataFrame(resultado[\"candidates\"]))\n",
|
|
81
|
+
"else:\n",
|
|
82
|
+
" print(f\"📌 Única coincidencia encontrada: {resultado.get('name')}\")\n",
|
|
83
|
+
" print(f\"Código: {resultado.get('codigo_serie')}\")"
|
|
84
|
+
]
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"cell_type": "code",
|
|
88
|
+
"execution_count": null,
|
|
89
|
+
"metadata": {},
|
|
90
|
+
"outputs": [],
|
|
91
|
+
"source": [
|
|
92
|
+
"# Ejemplo 2: Búsqueda de Tipo de Cambio (TC)\n",
|
|
93
|
+
"query_tc = \"tc venta\"\n",
|
|
94
|
+
"resultado_tc = metadata.solve(query_tc)\n",
|
|
95
|
+
"\n",
|
|
96
|
+
"if \"candidates\" in resultado_tc:\n",
|
|
97
|
+
" print(\"🔍 Opciones para Tipo de Cambio:\")\n",
|
|
98
|
+
" display(pd.DataFrame(resultado_tc[\"candidates\"]))\n",
|
|
99
|
+
"else: \n",
|
|
100
|
+
" print(f\"✅ Resultado: {resultado_tc.get('name')}\")"
|
|
101
|
+
]
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"cell_type": "markdown",
|
|
105
|
+
"metadata": {},
|
|
106
|
+
"source": [
|
|
107
|
+
"## 4. Tablas Inteligentes (Multi-indicador)\n",
|
|
108
|
+
"\n",
|
|
109
|
+
"Puedes pedir varios códigos y la librería resolverá los nombres por ti automáticamente."
|
|
110
|
+
]
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"cell_type": "code",
|
|
114
|
+
"execution_count": null,
|
|
115
|
+
"metadata": {},
|
|
116
|
+
"outputs": [],
|
|
117
|
+
"source": [
|
|
118
|
+
"from mcp_bcrp.server import get_table\n",
|
|
119
|
+
"import json\n",
|
|
120
|
+
"\n",
|
|
121
|
+
"# Pedimos Inflación y Tipo de Cambio interbancario de una vez\n",
|
|
122
|
+
"series = [\"PN01271PM\", \"PD04637PD\"]\n",
|
|
123
|
+
"\n",
|
|
124
|
+
"tabla_raw = await get_table(series_codes=series, period=\"2024-01/2024-12\")\n",
|
|
125
|
+
"df_tabla = pd.DataFrame(json.loads(tabla_raw))\n",
|
|
126
|
+
"\n",
|
|
127
|
+
"print(\"Tabla con nombres automáticos:\")\n",
|
|
128
|
+
"display(df_tabla.head())"
|
|
129
|
+
]
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"cell_type": "markdown",
|
|
133
|
+
"metadata": {},
|
|
134
|
+
"source": [
|
|
135
|
+
"## 5. Visualización Avanzada\n",
|
|
136
|
+
"\n",
|
|
137
|
+
"Generación de gráficos con leyendas automáticas."
|
|
138
|
+
]
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"cell_type": "code",
|
|
142
|
+
"execution_count": null,
|
|
143
|
+
"metadata": {},
|
|
144
|
+
"outputs": [],
|
|
145
|
+
"source": [
|
|
146
|
+
"from mcp_bcrp.server import plot_chart\n",
|
|
147
|
+
"from IPython.display import Image\n",
|
|
148
|
+
"\n",
|
|
149
|
+
"res = await plot_chart(\n",
|
|
150
|
+
" series_codes=[\"PD38048AM\", \"PD38049AM\"],\n",
|
|
151
|
+
" period=\"2022-01/2024-12\",\n",
|
|
152
|
+
" title=\"Expectativas de Crecimiento PBI\"\n",
|
|
153
|
+
")\n",
|
|
154
|
+
"\n",
|
|
155
|
+
"info = json.loads(res)\n",
|
|
156
|
+
"Image(info['chart_path'])"
|
|
157
|
+
]
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
"cell_type": "markdown",
|
|
161
|
+
"metadata": {},
|
|
162
|
+
"source": [
|
|
163
|
+
"---\n",
|
|
164
|
+
"### Recursos\n",
|
|
165
|
+
"- **Issue Tracker**: [GitHub Issues](https://github.com/MaykolMedrano/mcp_bcrp/issues)\n",
|
|
166
|
+
"- **Documentación SDK**: [PyPI mcp-bcrp](https://pypi.org/project/mcp-bcrp/)"
|
|
167
|
+
]
|
|
168
|
+
}
|
|
169
|
+
],
|
|
170
|
+
"metadata": {
|
|
171
|
+
"kernelspec": {
|
|
172
|
+
"display_name": "Python 3",
|
|
173
|
+
"language": "python",
|
|
174
|
+
"name": "python3"
|
|
175
|
+
},
|
|
176
|
+
"language_info": {
|
|
177
|
+
"codemirror_mode": {
|
|
178
|
+
"name": "ipython",
|
|
179
|
+
"version": 3
|
|
180
|
+
},
|
|
181
|
+
"file_extension": ".py",
|
|
182
|
+
"mimetype": "text/x-python",
|
|
183
|
+
"name": "python",
|
|
184
|
+
"nbconvert_exporter": "python",
|
|
185
|
+
"pygments_lexer": "ipython3",
|
|
186
|
+
"version": "3.11.1"
|
|
187
|
+
}
|
|
188
|
+
},
|
|
189
|
+
"nbformat": 4,
|
|
190
|
+
"nbformat_minor": 5
|
|
191
|
+
}
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 3)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g1c39c6e85'
|
|
@@ -154,6 +154,27 @@ class BCRPMetadata:
|
|
|
154
154
|
mask &= kw_mask
|
|
155
155
|
return self.df[mask].head(limit)
|
|
156
156
|
|
|
157
|
+
def get_series_names(self, codes: List[str]) -> List[str]:
|
|
158
|
+
"""
|
|
159
|
+
Retrieve original names for a list of series codes.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
codes: List of BCRP series codes.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of names corresponding to the codes.
|
|
166
|
+
Uses the code itself if name is not found.
|
|
167
|
+
"""
|
|
168
|
+
if self.df.empty:
|
|
169
|
+
return codes
|
|
170
|
+
|
|
171
|
+
# Standardize columns to search
|
|
172
|
+
code_col = "Código de serie" if "Código de serie" in self.df.columns else "Codigo de serie"
|
|
173
|
+
name_col = "Nombre de serie"
|
|
174
|
+
|
|
175
|
+
mapping = dict(zip(self.df[code_col], self.df[name_col]))
|
|
176
|
+
return [mapping.get(code, code) for code in codes]
|
|
177
|
+
|
|
157
178
|
class AsyncBCRPClient:
|
|
158
179
|
"""
|
|
159
180
|
Async client for BCRP (Banco Central de Reserva del Perú) Statistical API.
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deterministic Search Engine for BCRP Series.
|
|
3
|
+
|
|
4
|
+
Pipeline:
|
|
5
|
+
1. Canonical Normalization (lowercase, remove accents, synonyms)
|
|
6
|
+
2. Attribute Extraction (currency, horizon, component, side)
|
|
7
|
+
3. Hard Filters
|
|
8
|
+
4. Fuzzy Scoring with RapidFuzz (Token Set Ratio)
|
|
9
|
+
5. Interactive Candidate Resolution
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import logging
|
|
14
|
+
import unicodedata
|
|
15
|
+
import re
|
|
16
|
+
from typing import Dict, Any, List
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from rapidfuzz import fuzz
|
|
20
|
+
except ImportError:
|
|
21
|
+
fuzz = None
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger("mcp_bcrp")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SearchEngine:
|
|
27
|
+
"""
|
|
28
|
+
Interactive Search Engine for BCRP Series.
|
|
29
|
+
|
|
30
|
+
Implements a pipeline for robust series resolution:
|
|
31
|
+
1. Canonical Normalization with Synonym Support
|
|
32
|
+
2. Attribute Filtering (Currency, Side)
|
|
33
|
+
3. Fuzzy Set Scoring
|
|
34
|
+
4. Multi-candidate Result Generation
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
STOPWORDS = {'de', 'del', 'el', 'la', 'los', 'las', 'y', 'en', 'al', 'con', 'por', 'precio', 'valor', 'indicador'}
|
|
38
|
+
|
|
39
|
+
# Synonym map for common abbreviations
|
|
40
|
+
SYNONYMS = {
|
|
41
|
+
"tc": "tipo cambio",
|
|
42
|
+
"t.c.": "tipo cambio",
|
|
43
|
+
"pbi": "producto bruto interno",
|
|
44
|
+
"internacional": "lme londres Chicago nymex",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def __init__(self, metadata_df: pd.DataFrame):
|
|
48
|
+
"""
|
|
49
|
+
Initialize search engine with BCRP metadata.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
metadata_df: DataFrame with BCRP series metadata.
|
|
53
|
+
"""
|
|
54
|
+
self.df = metadata_df
|
|
55
|
+
self._preprocess_metadata()
|
|
56
|
+
|
|
57
|
+
def _normalize(self, text: str) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Canonical normalization of text.
|
|
60
|
+
|
|
61
|
+
Applies: lowercase, accent removal, punctuation removal,
|
|
62
|
+
synonym expansion, stopword filtering.
|
|
63
|
+
"""
|
|
64
|
+
if not isinstance(text, str):
|
|
65
|
+
return ""
|
|
66
|
+
|
|
67
|
+
text = text.lower()
|
|
68
|
+
# Remove accents
|
|
69
|
+
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
|
|
70
|
+
# Replace punctuation
|
|
71
|
+
text = re.sub(r'[^\w\s]', ' ', text)
|
|
72
|
+
|
|
73
|
+
# Apply synonyms (simple replacement)
|
|
74
|
+
for syn, target in self.SYNONYMS.items():
|
|
75
|
+
if syn in text.split():
|
|
76
|
+
text = text.replace(syn, target)
|
|
77
|
+
|
|
78
|
+
tokens = text.split()
|
|
79
|
+
clean_tokens = [t for t in tokens if t not in self.STOPWORDS]
|
|
80
|
+
|
|
81
|
+
return " ".join(clean_tokens)
|
|
82
|
+
|
|
83
|
+
def _extract_attributes(self, text_norm: str) -> Dict[str, Any]:
|
|
84
|
+
"""Extract structured attributes to help disambiguate."""
|
|
85
|
+
attrs = {
|
|
86
|
+
"currency": None,
|
|
87
|
+
"side": None, # compra / venta
|
|
88
|
+
"horizon": None,
|
|
89
|
+
"component": None
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
tokens = set(text_norm.split())
|
|
93
|
+
|
|
94
|
+
# Currency
|
|
95
|
+
if any(t in tokens for t in ['us', 'usd', 'dolares']):
|
|
96
|
+
attrs['currency'] = 'usd'
|
|
97
|
+
elif any(t in tokens for t in ['s', 'pen', 'soles']):
|
|
98
|
+
attrs['currency'] = 'pen'
|
|
99
|
+
|
|
100
|
+
# Side (Critical for FX)
|
|
101
|
+
if "compra" in tokens:
|
|
102
|
+
attrs['side'] = 'compra'
|
|
103
|
+
elif "venta" in tokens:
|
|
104
|
+
attrs['side'] = 'venta'
|
|
105
|
+
|
|
106
|
+
# Horizon
|
|
107
|
+
if "corto" in tokens:
|
|
108
|
+
attrs['horizon'] = 'corto'
|
|
109
|
+
elif "largo" in tokens:
|
|
110
|
+
attrs['horizon'] = 'largo'
|
|
111
|
+
|
|
112
|
+
return attrs
|
|
113
|
+
|
|
114
|
+
def _preprocess_metadata(self):
|
|
115
|
+
"""Pre-calculate normalized search corpus."""
|
|
116
|
+
if self.df.empty:
|
|
117
|
+
self.search_corpus = []
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
processed = []
|
|
121
|
+
for idx, row in self.df.iterrows():
|
|
122
|
+
raw_name = str(row.get('Nombre de serie', ''))
|
|
123
|
+
name_norm = self._normalize(raw_name)
|
|
124
|
+
attrs = self._extract_attributes(name_norm)
|
|
125
|
+
|
|
126
|
+
# Use original code column names if possible
|
|
127
|
+
code = row.get("Código de serie") or row.get("Codigo de serie")
|
|
128
|
+
|
|
129
|
+
item = {
|
|
130
|
+
"idx": idx,
|
|
131
|
+
"codigo_serie": code,
|
|
132
|
+
"name_original": raw_name,
|
|
133
|
+
"name_norm": name_norm,
|
|
134
|
+
"tokens": set(name_norm.split()),
|
|
135
|
+
"currency": attrs['currency'],
|
|
136
|
+
"side": attrs['side'],
|
|
137
|
+
"horizon": attrs['horizon']
|
|
138
|
+
}
|
|
139
|
+
processed.append(item)
|
|
140
|
+
|
|
141
|
+
self.search_corpus = processed
|
|
142
|
+
|
|
143
|
+
def solve(self, query: str) -> Dict[str, Any]:
|
|
144
|
+
"""
|
|
145
|
+
Resolve query with interactive candidate logic.
|
|
146
|
+
"""
|
|
147
|
+
if not self.search_corpus:
|
|
148
|
+
return {"error": "no_match", "reason": "empty_corpus"}
|
|
149
|
+
|
|
150
|
+
q_norm = self._normalize(query)
|
|
151
|
+
q_attrs = self._extract_attributes(q_norm)
|
|
152
|
+
q_tokens = set(q_norm.split())
|
|
153
|
+
|
|
154
|
+
if not q_tokens:
|
|
155
|
+
return {"error": "no_match", "reason": "empty_query"}
|
|
156
|
+
|
|
157
|
+
# Scoring
|
|
158
|
+
scored = []
|
|
159
|
+
for c in self.search_corpus:
|
|
160
|
+
if not fuzz:
|
|
161
|
+
# Basic token overlap fallback
|
|
162
|
+
intersection = len(q_tokens & c['tokens'])
|
|
163
|
+
score = (intersection / len(q_tokens)) * 100 if q_tokens else 0
|
|
164
|
+
else:
|
|
165
|
+
# Token Set Ratio is perfect for finding "query" inside "long technical title"
|
|
166
|
+
score = fuzz.token_set_ratio(q_norm, c['name_norm'])
|
|
167
|
+
|
|
168
|
+
# Boost if specific side (compra/venta) matches
|
|
169
|
+
if q_attrs['side'] and c['side'] == q_attrs['side']:
|
|
170
|
+
score += 5
|
|
171
|
+
elif q_attrs['side'] and c['side'] and c['side'] != q_attrs['side']:
|
|
172
|
+
score -= 10
|
|
173
|
+
|
|
174
|
+
if score >= 65:
|
|
175
|
+
scored.append({
|
|
176
|
+
"codigo_serie": c['codigo_serie'],
|
|
177
|
+
"name": c['name_original'],
|
|
178
|
+
"score": score
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
scored.sort(key=lambda x: x['score'], reverse=True)
|
|
182
|
+
|
|
183
|
+
if not scored:
|
|
184
|
+
return {"error": "no_match", "reason": "low_confidence"}
|
|
185
|
+
|
|
186
|
+
# Logic for result type
|
|
187
|
+
top_score = scored[0]['score']
|
|
188
|
+
|
|
189
|
+
# 1. Check for ties or very close matches at the top
|
|
190
|
+
# If multiple series have top_score, or are very close (within 2 pts), return candidates.
|
|
191
|
+
high_tier = [s for s in scored if s['score'] >= (top_score - 2)]
|
|
192
|
+
|
|
193
|
+
if len(high_tier) > 1 and top_score < 100:
|
|
194
|
+
# Ambiguity if multiple high matches, unless one is perfect 100 and there are no other 100s
|
|
195
|
+
pass # fall through to candidates logic
|
|
196
|
+
elif len(high_tier) == 1 and top_score >= 85:
|
|
197
|
+
# Single clear winner with good score
|
|
198
|
+
return {
|
|
199
|
+
"codigo_serie": high_tier[0]['codigo_serie'],
|
|
200
|
+
"confidence": round(high_tier[0]['score'] / 100.0, 2),
|
|
201
|
+
"name": high_tier[0]['name']
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# If top_score is 100, but there are multiple 100s, it's ambiguous
|
|
205
|
+
top_tier_100 = [s for s in scored if s['score'] == 100]
|
|
206
|
+
if len(top_tier_100) == 1:
|
|
207
|
+
return {
|
|
208
|
+
"codigo_serie": top_tier_100[0]['codigo_serie'],
|
|
209
|
+
"confidence": 1.0,
|
|
210
|
+
"name": top_tier_100[0]['name']
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# 2. Interactive Candidates
|
|
214
|
+
# Return top 5 matches if confidence is mixed or tied
|
|
215
|
+
candidates = []
|
|
216
|
+
seen_codes = set()
|
|
217
|
+
for s in scored[:5]:
|
|
218
|
+
if s['codigo_serie'] not in seen_codes:
|
|
219
|
+
candidates.append({
|
|
220
|
+
"codigo": s['codigo_serie'],
|
|
221
|
+
"nombre": s['name']
|
|
222
|
+
})
|
|
223
|
+
seen_codes.add(s['codigo_serie'])
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
"error": "ambiguedad",
|
|
227
|
+
"reason": "multiple_candidates",
|
|
228
|
+
"candidates": candidates
|
|
229
|
+
}
|
|
@@ -153,29 +153,17 @@ async def get_table(
|
|
|
153
153
|
if df.empty:
|
|
154
154
|
return "No data found."
|
|
155
155
|
|
|
156
|
-
# 2.
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
# Or is it Index? usebcrp 'variation=1' implies monthly variation.
|
|
161
|
-
|
|
162
|
-
# Ensure time is datetime
|
|
163
|
-
# BCRP returns 'Mmm.YY' or similar sometimes. helper parsing might be needed.
|
|
164
|
-
# But for now, let's just return the raw data properly formatted,
|
|
165
|
-
# Maybe adding a simple pct_change if it's numeric.
|
|
166
|
-
|
|
167
|
-
# For simplicity and reliability in this refactor, we will return the raw values
|
|
168
|
-
# but structured neatly. Re-implementing full 'table' logic from usebcrp might be overkill
|
|
169
|
-
# if the user just wants the data.
|
|
170
|
-
|
|
171
|
-
# However, to be helpful, let's try to set names if provided
|
|
172
|
-
if names:
|
|
173
|
-
# Map codes to names
|
|
174
|
-
# columns are 'time' + codes.
|
|
175
|
-
mapping = {code: name for code, name in zip(series_codes, names)}
|
|
176
|
-
df.rename(columns=mapping, inplace=True)
|
|
156
|
+
# 2. Resolve Names if not provided
|
|
157
|
+
if not names:
|
|
158
|
+
await metadata_client.load()
|
|
159
|
+
names = metadata_client.get_series_names(series_codes)
|
|
177
160
|
|
|
178
|
-
|
|
161
|
+
# 3. Rename columns
|
|
162
|
+
mapping = {code: name for code, name in zip(series_codes, names)}
|
|
163
|
+
df.rename(columns=mapping, inplace=True)
|
|
164
|
+
|
|
165
|
+
return df.to_json(orient='records', date_format='iso', indent=2)
|
|
166
|
+
|
|
179
167
|
|
|
180
168
|
except Exception as e:
|
|
181
169
|
return f"Table generation failed: {str(e)}"
|
|
@@ -234,12 +222,18 @@ async def plot_chart(
|
|
|
234
222
|
df['time'] = df['time'].apply(parse_spanish_date)
|
|
235
223
|
df = df.set_index('time')
|
|
236
224
|
|
|
237
|
-
# 4.
|
|
225
|
+
# 4. Resolve Names if not provided
|
|
226
|
+
if not names:
|
|
227
|
+
await metadata_client.load()
|
|
228
|
+
names = metadata_client.get_series_names(series_codes)
|
|
229
|
+
|
|
230
|
+
# 5. Plot each series
|
|
238
231
|
colors = ['#1a5fb4', '#e01b24', '#33d17a', '#ff7800', '#9141ac']
|
|
239
232
|
for idx, code in enumerate(series_codes):
|
|
240
|
-
if code in df.columns
|
|
241
|
-
|
|
242
|
-
|
|
233
|
+
col_name = code if code in df.columns else (names[idx] if names and names[idx] in df.columns else None)
|
|
234
|
+
if col_name:
|
|
235
|
+
series = df[col_name].dropna()
|
|
236
|
+
label = names[idx] if names and idx < len(names) else col_name
|
|
243
237
|
color = colors[idx % len(colors)]
|
|
244
238
|
ax.plot(series.index, series.values, linewidth=2.5,
|
|
245
239
|
label=label, color=color)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-bcrp
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: MCP Server for Banco Central de Reserva del Perú (BCRP) Statistical API
|
|
5
5
|
Author-email: Maykol Medrano <mmedrano2@uc.cl>
|
|
6
6
|
License: MIT
|
|
@@ -121,8 +121,8 @@ pip install -e .
|
|
|
121
121
|
### With Optional Dependencies
|
|
122
122
|
|
|
123
123
|
```bash
|
|
124
|
-
pip install mcp-bcrp[charts] # Include matplotlib for chart generation
|
|
125
|
-
pip install mcp-bcrp[dev] # Include development dependencies
|
|
124
|
+
pip install "mcp-bcrp[charts]" # Include matplotlib for chart generation
|
|
125
|
+
pip install "mcp-bcrp[dev]" # Include development dependencies
|
|
126
126
|
```
|
|
127
127
|
|
|
128
128
|
---
|
|
@@ -1,207 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"cells": [
|
|
3
|
-
{
|
|
4
|
-
"cell_type": "markdown",
|
|
5
|
-
"metadata": {},
|
|
6
|
-
"source": [
|
|
7
|
-
"# 📊 Guía de Usuario: mcp-bcrp\n",
|
|
8
|
-
"\n",
|
|
9
|
-
"Bienvenido a la guía oficial de **mcp-bcrp**, la librería y servidor MCP para acceder a las estadísticas del **Banco Central de Reserva del Perú (BCRP)** de manera profesional y eficiente.\n",
|
|
10
|
-
"\n",
|
|
11
|
-
"[](https://github.com/MaykolMedrano/mcp_bcrp)\n",
|
|
12
|
-
"[](https://pypi.org/project/mcp-bcrp/)\n",
|
|
13
|
-
"\n",
|
|
14
|
-
"Esta guía te llevará desde la instalación básica hasta la generación de gráficos avanzados y análisis de datos."
|
|
15
|
-
]
|
|
16
|
-
},
|
|
17
|
-
{
|
|
18
|
-
"cell_type": "markdown",
|
|
19
|
-
"metadata": {},
|
|
20
|
-
"source": [
|
|
21
|
-
"## 1. Instalación\n",
|
|
22
|
-
"\n",
|
|
23
|
-
"Primero, instalamos la librería desde PyPI. Recomendamos incluir `[charts]` para poder generar visualizaciones."
|
|
24
|
-
]
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"cell_type": "code",
|
|
28
|
-
"execution_count": null,
|
|
29
|
-
"metadata": {},
|
|
30
|
-
"outputs": [],
|
|
31
|
-
"source": [
|
|
32
|
-
"!pip install mcp-bcrp[charts] -U"
|
|
33
|
-
]
|
|
34
|
-
},
|
|
35
|
-
{
|
|
36
|
-
"cell_type": "markdown",
|
|
37
|
-
"metadata": {},
|
|
38
|
-
"source": [
|
|
39
|
-
"## 2. Configuración y Metadatos\n",
|
|
40
|
-
"\n",
|
|
41
|
-
"La librería utiliza un motor de búsqueda determinista que requiere un catálogo de metadatos (~17MB). La primera vez que lo uses, se descargará automáticamente y se guardará en tu cache local."
|
|
42
|
-
]
|
|
43
|
-
},
|
|
44
|
-
{
|
|
45
|
-
"cell_type": "code",
|
|
46
|
-
"execution_count": null,
|
|
47
|
-
"metadata": {},
|
|
48
|
-
"outputs": [],
|
|
49
|
-
"source": [
|
|
50
|
-
"import asyncio\n",
|
|
51
|
-
"from mcp_bcrp.client import AsyncBCRPClient, BCRPMetadata\n",
|
|
52
|
-
"import pandas as pd\n",
|
|
53
|
-
"\n",
|
|
54
|
-
"# Inicializamos el cliente de metadatos\n",
|
|
55
|
-
"metadata = BCRPMetadata()\n",
|
|
56
|
-
"\n",
|
|
57
|
-
"# Cargamos los metadatos (descarga automática si no existe)\n",
|
|
58
|
-
"await metadata.load()\n",
|
|
59
|
-
"\n",
|
|
60
|
-
"print(f\"Metadatos cargados: {len(metadata.df)} series disponibles.\")"
|
|
61
|
-
]
|
|
62
|
-
},
|
|
63
|
-
{
|
|
64
|
-
"cell_type": "markdown",
|
|
65
|
-
"metadata": {},
|
|
66
|
-
"source": [
|
|
67
|
-
"## 3. Búsqueda de Indicadores\n",
|
|
68
|
-
"\n",
|
|
69
|
-
"El motor de búsqueda es **determinista**. Puede encontrar series específicas basándose en atributos como moneda, horizonte y tipo de componente."
|
|
70
|
-
]
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
"cell_type": "code",
|
|
74
|
-
"execution_count": null,
|
|
75
|
-
"metadata": {},
|
|
76
|
-
"outputs": [],
|
|
77
|
-
"source": [
|
|
78
|
-
"# Ejemplo: Buscar la tasa de interés de política monetaria\n",
|
|
79
|
-
"query = \"tasa interes politica monetaria\"\n",
|
|
80
|
-
"resultado = metadata.solve(query)\n",
|
|
81
|
-
"\n",
|
|
82
|
-
"print(\"Resultado de búsqueda:\")\n",
|
|
83
|
-
"print(f\"Código: {resultado['codigo_serie']}\")\n",
|
|
84
|
-
"print(f\"Nombre: {resultado['name_original']}\")\n",
|
|
85
|
-
"print(f\"Confianza: {resultado['confidence']}\")"
|
|
86
|
-
]
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
"cell_type": "markdown",
|
|
90
|
-
"metadata": {},
|
|
91
|
-
"source": [
|
|
92
|
-
"## 4. Obtención de Datos (Series Temporales)\n",
|
|
93
|
-
"\n",
|
|
94
|
-
"Usamos `AsyncBCRPClient` para descargar los datos. El cliente detecta automáticamente la frecuencia (diaria, mensual, trimestral, anual)."
|
|
95
|
-
]
|
|
96
|
-
},
|
|
97
|
-
{
|
|
98
|
-
"cell_type": "code",
|
|
99
|
-
"execution_count": null,
|
|
100
|
-
"metadata": {},
|
|
101
|
-
"outputs": [],
|
|
102
|
-
"source": [
|
|
103
|
-
"client = AsyncBCRPClient()\n",
|
|
104
|
-
"\n",
|
|
105
|
-
"# Descargamos datos de Inflación y Tipo de Cambio\n",
|
|
106
|
-
"series_a_pedir = [\"PN01271PM\", \"PD04637PD\"] # IPC Variación % y T.C. Venta\n",
|
|
107
|
-
"\n",
|
|
108
|
-
"df = await client.get_series(\n",
|
|
109
|
-
" codes=series_a_pedir,\n",
|
|
110
|
-
" start_date=\"2023-01-01\",\n",
|
|
111
|
-
" end_date=\"2024-12-31\"\n",
|
|
112
|
-
")\n",
|
|
113
|
-
"\n",
|
|
114
|
-
"print(\"Primeras filas de los datos descargados:\")\n",
|
|
115
|
-
"print(df.head())"
|
|
116
|
-
]
|
|
117
|
-
},
|
|
118
|
-
{
|
|
119
|
-
"cell_type": "markdown",
|
|
120
|
-
"metadata": {},
|
|
121
|
-
"source": [
|
|
122
|
-
"## 5. Visualización Profesional\n",
|
|
123
|
-
"\n",
|
|
124
|
-
"La librería incluye herramientas para generar gráficos listos para reportes, manejando automáticamente el formato de fechas del BCRP."
|
|
125
|
-
]
|
|
126
|
-
},
|
|
127
|
-
{
|
|
128
|
-
"cell_type": "code",
|
|
129
|
-
"execution_count": null,
|
|
130
|
-
"metadata": {},
|
|
131
|
-
"outputs": [],
|
|
132
|
-
"source": [
|
|
133
|
-
"from mcp_bcrp.server import _plot_chart # Usamos la lógica interna para el ejemplo\n",
|
|
134
|
-
"\n",
|
|
135
|
-
"# Generamos un gráfico de Expectativas de PBI\n",
|
|
136
|
-
"img_path = \"expectativas_pbi.png\"\n",
|
|
137
|
-
"\n",
|
|
138
|
-
"await _plot_chart(\n",
|
|
139
|
-
" series_codes=[\"PD38048AM\"],\n",
|
|
140
|
-
" period=\"2020-01/2024-12\",\n",
|
|
141
|
-
" title=\"Expectativas del PBI a 12 meses\",\n",
|
|
142
|
-
" names=[\"Expectativa PBI (%)\"],\n",
|
|
143
|
-
" output_path=img_path\n",
|
|
144
|
-
")\n",
|
|
145
|
-
"\n",
|
|
146
|
-
"from IPython.display import Image\n",
|
|
147
|
-
"Image(img_path)"
|
|
148
|
-
]
|
|
149
|
-
},
|
|
150
|
-
{
|
|
151
|
-
"cell_type": "markdown",
|
|
152
|
-
"metadata": {},
|
|
153
|
-
"source": [
|
|
154
|
-
"## 6. Análisis Estadístico Rápido\n",
|
|
155
|
-
"\n",
|
|
156
|
-
"Al obtener un `pandas.DataFrame`, podemos realizar análisis técnicos de inmediato."
|
|
157
|
-
]
|
|
158
|
-
},
|
|
159
|
-
{
|
|
160
|
-
"cell_type": "code",
|
|
161
|
-
"execution_count": null,
|
|
162
|
-
"metadata": {},
|
|
163
|
-
"outputs": [],
|
|
164
|
-
"source": [
|
|
165
|
-
"import matplotlib.pyplot as plt\n",
|
|
166
|
-
"\n",
|
|
167
|
-
"# Correlación móvil entre Inflación y Expectativas (si tuviéramos ambos)\n",
|
|
168
|
-
"print(\"Resumen estadístico de las series descargadas:\")\n",
|
|
169
|
-
"print(df.describe())"
|
|
170
|
-
]
|
|
171
|
-
},
|
|
172
|
-
{
|
|
173
|
-
"cell_type": "markdown",
|
|
174
|
-
"metadata": {},
|
|
175
|
-
"source": [
|
|
176
|
-
"---\n",
|
|
177
|
-
"### ¿Necesitas más ayuda?\n",
|
|
178
|
-
"\n",
|
|
179
|
-
"- **Documentación completa**: [README.md](https://github.com/MaykolMedrano/mcp_bcrp/blob/main/README.md)\n",
|
|
180
|
-
"- **Reportar problemas**: [GitHub Issues](https://github.com/MaykolMedrano/mcp_bcrp/issues)\n",
|
|
181
|
-
"\n",
|
|
182
|
-
"Desarrollado con ❤️ para la comunidad de analistas económicos de Perú."
|
|
183
|
-
]
|
|
184
|
-
}
|
|
185
|
-
],
|
|
186
|
-
"metadata": {
|
|
187
|
-
"kernelspec": {
|
|
188
|
-
"display_name": "Python 3",
|
|
189
|
-
"language": "python",
|
|
190
|
-
"name": "python3"
|
|
191
|
-
},
|
|
192
|
-
"language_info": {
|
|
193
|
-
"codemirror_mode": {
|
|
194
|
-
"name": "ipython",
|
|
195
|
-
"version": 3
|
|
196
|
-
},
|
|
197
|
-
"file_extension": ".py",
|
|
198
|
-
"mimetype": "text/x-python",
|
|
199
|
-
"name": "python",
|
|
200
|
-
"nbconvert_exporter": "python",
|
|
201
|
-
"pygments_lexer": "ipython3",
|
|
202
|
-
"version": "3.11.1"
|
|
203
|
-
}
|
|
204
|
-
},
|
|
205
|
-
"nbformat": 4,
|
|
206
|
-
"nbformat_minor": 5
|
|
207
|
-
}
|
|
@@ -1,237 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Deterministic Search Engine for BCRP Series.
|
|
3
|
-
|
|
4
|
-
Pipeline:
|
|
5
|
-
1. Canonical Normalization (lowercase, remove accents, stopwords)
|
|
6
|
-
2. Attribute Extraction (currency, horizon, component)
|
|
7
|
-
3. Hard Filters
|
|
8
|
-
4. Fuzzy Scoring with RapidFuzz
|
|
9
|
-
5. Ambiguity Detection
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
import logging
|
|
14
|
-
import unicodedata
|
|
15
|
-
import re
|
|
16
|
-
from typing import Dict, Any
|
|
17
|
-
|
|
18
|
-
try:
|
|
19
|
-
from rapidfuzz import fuzz
|
|
20
|
-
except ImportError:
|
|
21
|
-
fuzz = None
|
|
22
|
-
|
|
23
|
-
logger = logging.getLogger("mcp_bcrp")
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class SearchEngine:
|
|
27
|
-
"""
|
|
28
|
-
Deterministic Search Engine for BCRP Series.
|
|
29
|
-
|
|
30
|
-
Implements a pipeline for univocal series resolution:
|
|
31
|
-
1. Canonical Normalization
|
|
32
|
-
2. Hard Filters (Currency, Horizon, Component)
|
|
33
|
-
3. Fuzzy Scoring
|
|
34
|
-
4. Ambiguity Detection
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
STOPWORDS = {'de', 'del', 'el', 'la', 'los', 'las', 'y', 'en', 'al', 'con', 'por'}
|
|
38
|
-
|
|
39
|
-
def __init__(self, metadata_df: pd.DataFrame):
|
|
40
|
-
"""
|
|
41
|
-
Initialize search engine with BCRP metadata.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
metadata_df: DataFrame with BCRP series metadata.
|
|
45
|
-
"""
|
|
46
|
-
self.df = metadata_df
|
|
47
|
-
self._preprocess_metadata()
|
|
48
|
-
|
|
49
|
-
def _normalize(self, text: str) -> str:
|
|
50
|
-
"""
|
|
51
|
-
Canonical normalization of text.
|
|
52
|
-
|
|
53
|
-
Applies: lowercase, accent removal, punctuation removal,
|
|
54
|
-
stopword filtering, and space collapsing.
|
|
55
|
-
|
|
56
|
-
Args:
|
|
57
|
-
text: Raw input text.
|
|
58
|
-
|
|
59
|
-
Returns:
|
|
60
|
-
Normalized string with clean tokens.
|
|
61
|
-
"""
|
|
62
|
-
if not isinstance(text, str):
|
|
63
|
-
return ""
|
|
64
|
-
|
|
65
|
-
text = text.lower()
|
|
66
|
-
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
|
|
67
|
-
text = re.sub(r'[^\w\s]', ' ', text)
|
|
68
|
-
tokens = text.split()
|
|
69
|
-
clean_tokens = [t for t in tokens if t not in self.STOPWORDS]
|
|
70
|
-
|
|
71
|
-
return " ".join(clean_tokens)
|
|
72
|
-
|
|
73
|
-
def _extract_attributes(self, text_norm: str) -> Dict[str, Any]:
|
|
74
|
-
"""
|
|
75
|
-
Extract structured attributes from normalized text.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
text_norm: Normalized text string.
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
Dictionary with currency, horizon, component, and scale.
|
|
82
|
-
"""
|
|
83
|
-
attrs = {
|
|
84
|
-
"currency": None,
|
|
85
|
-
"horizon": None,
|
|
86
|
-
"component": None,
|
|
87
|
-
"scale": None
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
tokens = set(text_norm.split())
|
|
91
|
-
|
|
92
|
-
# Currency detection
|
|
93
|
-
if any(t in tokens for t in ['us', 'usd', 'dolares']):
|
|
94
|
-
attrs['currency'] = 'usd'
|
|
95
|
-
elif any(t in tokens for t in ['s', 'pen', 'soles']):
|
|
96
|
-
attrs['currency'] = 'pen'
|
|
97
|
-
|
|
98
|
-
# Horizon detection
|
|
99
|
-
if "corto" in text_norm:
|
|
100
|
-
attrs['horizon'] = 'corto'
|
|
101
|
-
elif "largo" in text_norm:
|
|
102
|
-
attrs['horizon'] = 'largo'
|
|
103
|
-
|
|
104
|
-
# Component detection
|
|
105
|
-
if "activos" in text_norm:
|
|
106
|
-
attrs['component'] = 'activos'
|
|
107
|
-
elif "pasivos" in text_norm:
|
|
108
|
-
attrs['component'] = 'pasivos'
|
|
109
|
-
|
|
110
|
-
# Scale detection
|
|
111
|
-
if "millones" in text_norm:
|
|
112
|
-
attrs['scale'] = 'millones'
|
|
113
|
-
elif "miles" in text_norm:
|
|
114
|
-
attrs['scale'] = 'miles'
|
|
115
|
-
|
|
116
|
-
return attrs
|
|
117
|
-
|
|
118
|
-
def _preprocess_metadata(self):
|
|
119
|
-
"""Pre-calculate normalized tokens and attributes for all series."""
|
|
120
|
-
if self.df.empty:
|
|
121
|
-
self.search_corpus = []
|
|
122
|
-
return
|
|
123
|
-
|
|
124
|
-
processed = []
|
|
125
|
-
for idx, row in self.df.iterrows():
|
|
126
|
-
raw_name = str(row.get('Nombre de serie', ''))
|
|
127
|
-
name_norm = self._normalize(raw_name)
|
|
128
|
-
attrs = self._extract_attributes(name_norm)
|
|
129
|
-
|
|
130
|
-
item = {
|
|
131
|
-
"idx": idx,
|
|
132
|
-
"codigo_serie": row.get("Código de serie") or row.get("Codigo de serie"),
|
|
133
|
-
"name_original": raw_name,
|
|
134
|
-
"name_norm": name_norm,
|
|
135
|
-
"tokens": set(name_norm.split()),
|
|
136
|
-
"currency": attrs['currency'],
|
|
137
|
-
"horizon": attrs['horizon'],
|
|
138
|
-
"component": attrs['component'],
|
|
139
|
-
"scale": attrs['scale']
|
|
140
|
-
}
|
|
141
|
-
processed.append(item)
|
|
142
|
-
|
|
143
|
-
self.search_corpus = processed
|
|
144
|
-
|
|
145
|
-
def solve(self, query: str) -> Dict[str, Any]:
|
|
146
|
-
"""
|
|
147
|
-
Resolve query to a single series deterministically.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
query: Search query (e.g., "tipo de cambio USD")
|
|
151
|
-
|
|
152
|
-
Returns:
|
|
153
|
-
Dict with 'codigo_serie' and 'confidence' on success,
|
|
154
|
-
or 'error' and 'reason' on failure/ambiguity.
|
|
155
|
-
"""
|
|
156
|
-
if not self.search_corpus:
|
|
157
|
-
return {"error": "no_match", "reason": "empty_corpus"}
|
|
158
|
-
|
|
159
|
-
# Parse and normalize query
|
|
160
|
-
q_norm = self._normalize(query)
|
|
161
|
-
q_attrs = self._extract_attributes(q_norm)
|
|
162
|
-
q_tokens = set(q_norm.split())
|
|
163
|
-
|
|
164
|
-
if not q_tokens:
|
|
165
|
-
return {"error": "no_match", "reason": "empty_query"}
|
|
166
|
-
|
|
167
|
-
candidates = self.search_corpus
|
|
168
|
-
|
|
169
|
-
# Apply hard filters (currency, horizon, component)
|
|
170
|
-
if q_attrs['currency']:
|
|
171
|
-
candidates = [c for c in candidates if c['currency'] == q_attrs['currency']]
|
|
172
|
-
|
|
173
|
-
if q_attrs['horizon']:
|
|
174
|
-
candidates = [c for c in candidates if c['horizon'] == q_attrs['horizon']]
|
|
175
|
-
|
|
176
|
-
if q_attrs['component']:
|
|
177
|
-
candidates = [c for c in candidates if c['component'] == q_attrs['component']]
|
|
178
|
-
|
|
179
|
-
if not candidates:
|
|
180
|
-
return {"error": "no_match", "reason": "filters_eliminated_all"}
|
|
181
|
-
|
|
182
|
-
# Score candidates using fuzzy matching
|
|
183
|
-
scored_candidates = []
|
|
184
|
-
for c in candidates:
|
|
185
|
-
score = 0
|
|
186
|
-
if fuzz:
|
|
187
|
-
score = fuzz.token_sort_ratio(q_norm, c['name_norm'])
|
|
188
|
-
|
|
189
|
-
# Penalize missing query tokens
|
|
190
|
-
q_extras = len(q_tokens - c['tokens'])
|
|
191
|
-
final_score = score - (5 * q_extras)
|
|
192
|
-
|
|
193
|
-
if final_score >= 80:
|
|
194
|
-
scored_candidates.append({
|
|
195
|
-
"series": c,
|
|
196
|
-
"score": final_score,
|
|
197
|
-
"original_score": score,
|
|
198
|
-
"missing_query_tokens": q_tokens - c['tokens']
|
|
199
|
-
})
|
|
200
|
-
|
|
201
|
-
scored_candidates.sort(key=lambda x: x['score'], reverse=True)
|
|
202
|
-
|
|
203
|
-
if not scored_candidates:
|
|
204
|
-
return {"error": "no_match", "reason": "low_score"}
|
|
205
|
-
|
|
206
|
-
top = scored_candidates[0]
|
|
207
|
-
|
|
208
|
-
# Single match: return directly
|
|
209
|
-
if len(scored_candidates) == 1:
|
|
210
|
-
return {
|
|
211
|
-
"codigo_serie": top['series']['codigo_serie'],
|
|
212
|
-
"confidence": round(top['score'] / 100.0, 2),
|
|
213
|
-
"name": top['series']['name_original']
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
# Multiple matches: check for ambiguity
|
|
217
|
-
candidates_top_tier = [
|
|
218
|
-
x for x in scored_candidates
|
|
219
|
-
if x['score'] >= (top['score'] - 5)
|
|
220
|
-
]
|
|
221
|
-
|
|
222
|
-
currencies = set(x['series']['currency'] for x in candidates_top_tier)
|
|
223
|
-
components = set(x['series']['component'] for x in candidates_top_tier)
|
|
224
|
-
|
|
225
|
-
if len(currencies) > 1 or len(components) > 1:
|
|
226
|
-
return {
|
|
227
|
-
"error": "ambiguedad",
|
|
228
|
-
"candidates": [x['series']['codigo_serie'] for x in candidates_top_tier[:5]],
|
|
229
|
-
"reason": "mixed_attributes_in_top_results"
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
# Deterministic winner
|
|
233
|
-
return {
|
|
234
|
-
"codigo_serie": top['series']['codigo_serie'],
|
|
235
|
-
"confidence": round(top['score'] / 100.0, 2),
|
|
236
|
-
"name": top['series']['name_original']
|
|
237
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|