mcp-bcrp 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/.github/workflows/ci.yml +1 -1
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/PKG-INFO +11 -12
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/README.md +8 -10
- mcp_bcrp-0.1.3/examples/Guia_Usuario_BCRP.ipynb +191 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp/__init__.py +1 -1
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp/_version.py +3 -3
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp/client.py +21 -0
- mcp_bcrp-0.1.3/mcp_bcrp/search_engine.py +229 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp/server.py +20 -26
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/PKG-INFO +11 -12
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/SOURCES.txt +1 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/pyproject.toml +2 -1
- mcp_bcrp-0.1.1/mcp_bcrp/search_engine.py +0 -237
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/.github/workflows/publish.yml +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/.gitignore +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/CONTRIBUTING.md +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/LICENSE +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/examples/basic_usage.py +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp/__main__.py +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/dependency_links.txt +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/entry_points.txt +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/requires.txt +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/mcp_bcrp.egg-info/top_level.txt +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/run.py +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/setup.cfg +0 -0
- {mcp_bcrp-0.1.1 → mcp_bcrp-0.1.3}/tests/test_basic.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-bcrp
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: MCP Server for Banco Central de Reserva del Perú (BCRP) Statistical API
|
|
5
5
|
Author-email: Maykol Medrano <mmedrano2@uc.cl>
|
|
6
6
|
License: MIT
|
|
@@ -14,13 +14,14 @@ Classifier: Intended Audience :: Financial and Insurance Industry
|
|
|
14
14
|
Classifier: Intended Audience :: Science/Research
|
|
15
15
|
Classifier: License :: OSI Approved :: MIT License
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
22
|
Classifier: Topic :: Office/Business :: Financial
|
|
22
23
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
23
|
-
Requires-Python: >=3.
|
|
24
|
+
Requires-Python: >=3.9
|
|
24
25
|
Description-Content-Type: text/markdown
|
|
25
26
|
License-File: LICENSE
|
|
26
27
|
Requires-Dist: fastmcp>=0.1.0
|
|
@@ -36,16 +37,14 @@ Dynamic: license-file
|
|
|
36
37
|
# mcp-bcrp
|
|
37
38
|
|
|
38
39
|
[](https://www.python.org/downloads/)
|
|
39
|
-
[](https://github.com/psf/black)
|
|
40
|
+
[](https://github.com/MaykolMedrano/mcp_bcrp)
|
|
41
|
+
[](https://pypi.org/project/mcp-bcrp/)
|
|
42
|
+
[](https://opensource.org/licenses/MIT)
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
**MCP Server for Banco Central de Reserva del Peru (BCRP) Statistical API**
|
|
44
|
+
[-green?style=for-the-badge&logo=jupyter)](https://github.com/MaykolMedrano/mcp_bcrp/blob/main/examples/Guia_Usuario_BCRP.ipynb)
|
|
45
|
+
[](https://colab.research.google.com/github/MaykolMedrano/mcp_bcrp/blob/main/examples/Guia_Usuario_BCRP.ipynb)
|
|
47
46
|
|
|
48
|
-
|
|
47
|
+
MCP Server and Python library for the **Banco Central de Reserva del Perú (BCRP)** Statistical API. Access over 5,000 macroeconomic indicators directly from your AI agent or Python environment.
|
|
49
48
|
|
|
50
49
|
---
|
|
51
50
|
|
|
@@ -122,8 +121,8 @@ pip install -e .
|
|
|
122
121
|
### With Optional Dependencies
|
|
123
122
|
|
|
124
123
|
```bash
|
|
125
|
-
pip install mcp-bcrp[charts] # Include matplotlib for chart generation
|
|
126
|
-
pip install mcp-bcrp[dev] # Include development dependencies
|
|
124
|
+
pip install "mcp-bcrp[charts]" # Include matplotlib for chart generation
|
|
125
|
+
pip install "mcp-bcrp[dev]" # Include development dependencies
|
|
127
126
|
```
|
|
128
127
|
|
|
129
128
|
---
|
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
# mcp-bcrp
|
|
2
2
|
|
|
3
3
|
[](https://www.python.org/downloads/)
|
|
4
|
-
[](https://github.com/psf/black)
|
|
4
|
+
[](https://github.com/MaykolMedrano/mcp_bcrp)
|
|
5
|
+
[](https://pypi.org/project/mcp-bcrp/)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
**MCP Server for Banco Central de Reserva del Peru (BCRP) Statistical API**
|
|
8
|
+
[-green?style=for-the-badge&logo=jupyter)](https://github.com/MaykolMedrano/mcp_bcrp/blob/main/examples/Guia_Usuario_BCRP.ipynb)
|
|
9
|
+
[](https://colab.research.google.com/github/MaykolMedrano/mcp_bcrp/blob/main/examples/Guia_Usuario_BCRP.ipynb)
|
|
12
10
|
|
|
13
|
-
|
|
11
|
+
MCP Server and Python library for the **Banco Central de Reserva del Perú (BCRP)** Statistical API. Access over 5,000 macroeconomic indicators directly from your AI agent or Python environment.
|
|
14
12
|
|
|
15
13
|
---
|
|
16
14
|
|
|
@@ -87,8 +85,8 @@ pip install -e .
|
|
|
87
85
|
### With Optional Dependencies
|
|
88
86
|
|
|
89
87
|
```bash
|
|
90
|
-
pip install mcp-bcrp[charts] # Include matplotlib for chart generation
|
|
91
|
-
pip install mcp-bcrp[dev] # Include development dependencies
|
|
88
|
+
pip install "mcp-bcrp[charts]" # Include matplotlib for chart generation
|
|
89
|
+
pip install "mcp-bcrp[dev]" # Include development dependencies
|
|
92
90
|
```
|
|
93
91
|
|
|
94
92
|
---
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# 📊 Guía de Usuario: mcp-bcrp (v0.1.3)\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"Bienvenido a la guía oficial de **mcp-bcrp**, la librería y servidor MCP para acceder a las estadísticas del **Banco Central de Reserva del Perú (BCRP)**.\n",
|
|
10
|
+
"\n",
|
|
11
|
+
"[](https://github.com/MaykolMedrano/mcp_bcrp)\n",
|
|
12
|
+
"[](https://pypi.org/project/mcp-bcrp/)\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"Esta versión incluye un **Buscador Interactivo** y **Tablas Inteligentes** que resuelven nombres automáticamente."
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"cell_type": "markdown",
|
|
19
|
+
"metadata": {},
|
|
20
|
+
"source": [
|
|
21
|
+
"## 1. Instalación\n",
|
|
22
|
+
"\n",
|
|
23
|
+
"Instalamos la librería usando el comando mágico `%pip` (el más recomendado en notebooks)."
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"cell_type": "code",
|
|
28
|
+
"execution_count": null,
|
|
29
|
+
"metadata": {},
|
|
30
|
+
"outputs": [],
|
|
31
|
+
"source": [
|
|
32
|
+
"%pip install \"mcp-bcrp[charts]\" -U\n",
|
|
33
|
+
"\n",
|
|
34
|
+
"# NOTA: Si es la primera vez que instalas, reinicia el entorno (Runtime -> Restart session)."
|
|
35
|
+
]
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"cell_type": "markdown",
|
|
39
|
+
"metadata": {},
|
|
40
|
+
"source": [
|
|
41
|
+
"## 2. Configuración y Metadatos\n",
|
|
42
|
+
"\n",
|
|
43
|
+
"Cargamos los metadatos para habilitar la búsqueda inteligente."
|
|
44
|
+
]
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"cell_type": "code",
|
|
48
|
+
"execution_count": null,
|
|
49
|
+
"metadata": {},
|
|
50
|
+
"outputs": [],
|
|
51
|
+
"source": [
|
|
52
|
+
"from mcp_bcrp.client import AsyncBCRPClient, BCRPMetadata\n",
|
|
53
|
+
"import pandas as pd\n",
|
|
54
|
+
"\n",
|
|
55
|
+
"metadata = BCRPMetadata()\n",
|
|
56
|
+
"await metadata.load()"
|
|
57
|
+
]
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"cell_type": "markdown",
|
|
61
|
+
"metadata": {},
|
|
62
|
+
"source": [
|
|
63
|
+
"## 3. Buscador Inteligente e Interactivo\n",
|
|
64
|
+
"\n",
|
|
65
|
+
"A diferencia de otros buscadores, `mcp-bcrp` entiende sinónimos comunes (ej: \"tc\" -> \"tipo de cambio\") y si hay dudas, te ofrece opciones."
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"cell_type": "code",
|
|
70
|
+
"execution_count": null,
|
|
71
|
+
"metadata": {},
|
|
72
|
+
"outputs": [],
|
|
73
|
+
"source": [
|
|
74
|
+
"# Ejemplo 1: Búsqueda con sinónimos y lenguaje natural\n",
|
|
75
|
+
"query = \"precio internacional cobre\"\n",
|
|
76
|
+
"resultado = metadata.solve(query)\n",
|
|
77
|
+
"\n",
|
|
78
|
+
"if \"candidates\" in resultado:\n",
|
|
79
|
+
" print(\"🔍 Encontré varias opciones, ¿cuál necesitas?\")\n",
|
|
80
|
+
" display(pd.DataFrame(resultado[\"candidates\"]))\n",
|
|
81
|
+
"else:\n",
|
|
82
|
+
" print(f\"📌 Única coincidencia encontrada: {resultado.get('name')}\")\n",
|
|
83
|
+
" print(f\"Código: {resultado.get('codigo_serie')}\")"
|
|
84
|
+
]
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"cell_type": "code",
|
|
88
|
+
"execution_count": null,
|
|
89
|
+
"metadata": {},
|
|
90
|
+
"outputs": [],
|
|
91
|
+
"source": [
|
|
92
|
+
"# Ejemplo 2: Búsqueda de Tipo de Cambio (TC)\n",
|
|
93
|
+
"query_tc = \"tc venta\"\n",
|
|
94
|
+
"resultado_tc = metadata.solve(query_tc)\n",
|
|
95
|
+
"\n",
|
|
96
|
+
"if \"candidates\" in resultado_tc:\n",
|
|
97
|
+
" print(\"🔍 Opciones para Tipo de Cambio:\")\n",
|
|
98
|
+
" display(pd.DataFrame(resultado_tc[\"candidates\"]))\n",
|
|
99
|
+
"else: \n",
|
|
100
|
+
" print(f\"✅ Resultado: {resultado_tc.get('name')}\")"
|
|
101
|
+
]
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"cell_type": "markdown",
|
|
105
|
+
"metadata": {},
|
|
106
|
+
"source": [
|
|
107
|
+
"## 4. Tablas Inteligentes (Multi-indicador)\n",
|
|
108
|
+
"\n",
|
|
109
|
+
"Puedes pedir varios códigos y la librería resolverá los nombres por ti automáticamente."
|
|
110
|
+
]
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"cell_type": "code",
|
|
114
|
+
"execution_count": null,
|
|
115
|
+
"metadata": {},
|
|
116
|
+
"outputs": [],
|
|
117
|
+
"source": [
|
|
118
|
+
"from mcp_bcrp.server import get_table\n",
|
|
119
|
+
"import json\n",
|
|
120
|
+
"\n",
|
|
121
|
+
"# Pedimos Inflación y Tipo de Cambio interbancario de una vez\n",
|
|
122
|
+
"series = [\"PN01271PM\", \"PD04637PD\"]\n",
|
|
123
|
+
"\n",
|
|
124
|
+
"tabla_raw = await get_table(series_codes=series, period=\"2024-01/2024-12\")\n",
|
|
125
|
+
"df_tabla = pd.DataFrame(json.loads(tabla_raw))\n",
|
|
126
|
+
"\n",
|
|
127
|
+
"print(\"Tabla con nombres automáticos:\")\n",
|
|
128
|
+
"display(df_tabla.head())"
|
|
129
|
+
]
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"cell_type": "markdown",
|
|
133
|
+
"metadata": {},
|
|
134
|
+
"source": [
|
|
135
|
+
"## 5. Visualización Avanzada\n",
|
|
136
|
+
"\n",
|
|
137
|
+
"Generación de gráficos con leyendas automáticas."
|
|
138
|
+
]
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"cell_type": "code",
|
|
142
|
+
"execution_count": null,
|
|
143
|
+
"metadata": {},
|
|
144
|
+
"outputs": [],
|
|
145
|
+
"source": [
|
|
146
|
+
"from mcp_bcrp.server import plot_chart\n",
|
|
147
|
+
"from IPython.display import Image\n",
|
|
148
|
+
"\n",
|
|
149
|
+
"res = await plot_chart(\n",
|
|
150
|
+
" series_codes=[\"PD38048AM\", \"PD38049AM\"],\n",
|
|
151
|
+
" period=\"2022-01/2024-12\",\n",
|
|
152
|
+
" title=\"Expectativas de Crecimiento PBI\"\n",
|
|
153
|
+
")\n",
|
|
154
|
+
"\n",
|
|
155
|
+
"info = json.loads(res)\n",
|
|
156
|
+
"Image(info['chart_path'])"
|
|
157
|
+
]
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
"cell_type": "markdown",
|
|
161
|
+
"metadata": {},
|
|
162
|
+
"source": [
|
|
163
|
+
"---\n",
|
|
164
|
+
"### Recursos\n",
|
|
165
|
+
"- **Issue Tracker**: [GitHub Issues](https://github.com/MaykolMedrano/mcp_bcrp/issues)\n",
|
|
166
|
+
"- **Documentación SDK**: [PyPI mcp-bcrp](https://pypi.org/project/mcp-bcrp/)"
|
|
167
|
+
]
|
|
168
|
+
}
|
|
169
|
+
],
|
|
170
|
+
"metadata": {
|
|
171
|
+
"kernelspec": {
|
|
172
|
+
"display_name": "Python 3",
|
|
173
|
+
"language": "python",
|
|
174
|
+
"name": "python3"
|
|
175
|
+
},
|
|
176
|
+
"language_info": {
|
|
177
|
+
"codemirror_mode": {
|
|
178
|
+
"name": "ipython",
|
|
179
|
+
"version": 3
|
|
180
|
+
},
|
|
181
|
+
"file_extension": ".py",
|
|
182
|
+
"mimetype": "text/x-python",
|
|
183
|
+
"name": "python",
|
|
184
|
+
"nbconvert_exporter": "python",
|
|
185
|
+
"pygments_lexer": "ipython3",
|
|
186
|
+
"version": "3.11.1"
|
|
187
|
+
}
|
|
188
|
+
},
|
|
189
|
+
"nbformat": 4,
|
|
190
|
+
"nbformat_minor": 5
|
|
191
|
+
}
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 3)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g1c39c6e85'
|
|
@@ -154,6 +154,27 @@ class BCRPMetadata:
|
|
|
154
154
|
mask &= kw_mask
|
|
155
155
|
return self.df[mask].head(limit)
|
|
156
156
|
|
|
157
|
+
def get_series_names(self, codes: List[str]) -> List[str]:
|
|
158
|
+
"""
|
|
159
|
+
Retrieve original names for a list of series codes.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
codes: List of BCRP series codes.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of names corresponding to the codes.
|
|
166
|
+
Uses the code itself if name is not found.
|
|
167
|
+
"""
|
|
168
|
+
if self.df.empty:
|
|
169
|
+
return codes
|
|
170
|
+
|
|
171
|
+
# Standardize columns to search
|
|
172
|
+
code_col = "Código de serie" if "Código de serie" in self.df.columns else "Codigo de serie"
|
|
173
|
+
name_col = "Nombre de serie"
|
|
174
|
+
|
|
175
|
+
mapping = dict(zip(self.df[code_col], self.df[name_col]))
|
|
176
|
+
return [mapping.get(code, code) for code in codes]
|
|
177
|
+
|
|
157
178
|
class AsyncBCRPClient:
|
|
158
179
|
"""
|
|
159
180
|
Async client for BCRP (Banco Central de Reserva del Perú) Statistical API.
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deterministic Search Engine for BCRP Series.
|
|
3
|
+
|
|
4
|
+
Pipeline:
|
|
5
|
+
1. Canonical Normalization (lowercase, remove accents, synonyms)
|
|
6
|
+
2. Attribute Extraction (currency, horizon, component, side)
|
|
7
|
+
3. Hard Filters
|
|
8
|
+
4. Fuzzy Scoring with RapidFuzz (Token Set Ratio)
|
|
9
|
+
5. Interactive Candidate Resolution
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import logging
|
|
14
|
+
import unicodedata
|
|
15
|
+
import re
|
|
16
|
+
from typing import Dict, Any, List
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from rapidfuzz import fuzz
|
|
20
|
+
except ImportError:
|
|
21
|
+
fuzz = None
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger("mcp_bcrp")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SearchEngine:
|
|
27
|
+
"""
|
|
28
|
+
Interactive Search Engine for BCRP Series.
|
|
29
|
+
|
|
30
|
+
Implements a pipeline for robust series resolution:
|
|
31
|
+
1. Canonical Normalization with Synonym Support
|
|
32
|
+
2. Attribute Filtering (Currency, Side)
|
|
33
|
+
3. Fuzzy Set Scoring
|
|
34
|
+
4. Multi-candidate Result Generation
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
STOPWORDS = {'de', 'del', 'el', 'la', 'los', 'las', 'y', 'en', 'al', 'con', 'por', 'precio', 'valor', 'indicador'}
|
|
38
|
+
|
|
39
|
+
# Synonym map for common abbreviations
|
|
40
|
+
SYNONYMS = {
|
|
41
|
+
"tc": "tipo cambio",
|
|
42
|
+
"t.c.": "tipo cambio",
|
|
43
|
+
"pbi": "producto bruto interno",
|
|
44
|
+
"internacional": "lme londres Chicago nymex",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def __init__(self, metadata_df: pd.DataFrame):
|
|
48
|
+
"""
|
|
49
|
+
Initialize search engine with BCRP metadata.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
metadata_df: DataFrame with BCRP series metadata.
|
|
53
|
+
"""
|
|
54
|
+
self.df = metadata_df
|
|
55
|
+
self._preprocess_metadata()
|
|
56
|
+
|
|
57
|
+
def _normalize(self, text: str) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Canonical normalization of text.
|
|
60
|
+
|
|
61
|
+
Applies: lowercase, accent removal, punctuation removal,
|
|
62
|
+
synonym expansion, stopword filtering.
|
|
63
|
+
"""
|
|
64
|
+
if not isinstance(text, str):
|
|
65
|
+
return ""
|
|
66
|
+
|
|
67
|
+
text = text.lower()
|
|
68
|
+
# Remove accents
|
|
69
|
+
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
|
|
70
|
+
# Replace punctuation
|
|
71
|
+
text = re.sub(r'[^\w\s]', ' ', text)
|
|
72
|
+
|
|
73
|
+
# Apply synonyms (simple replacement)
|
|
74
|
+
for syn, target in self.SYNONYMS.items():
|
|
75
|
+
if syn in text.split():
|
|
76
|
+
text = text.replace(syn, target)
|
|
77
|
+
|
|
78
|
+
tokens = text.split()
|
|
79
|
+
clean_tokens = [t for t in tokens if t not in self.STOPWORDS]
|
|
80
|
+
|
|
81
|
+
return " ".join(clean_tokens)
|
|
82
|
+
|
|
83
|
+
def _extract_attributes(self, text_norm: str) -> Dict[str, Any]:
|
|
84
|
+
"""Extract structured attributes to help disambiguate."""
|
|
85
|
+
attrs = {
|
|
86
|
+
"currency": None,
|
|
87
|
+
"side": None, # compra / venta
|
|
88
|
+
"horizon": None,
|
|
89
|
+
"component": None
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
tokens = set(text_norm.split())
|
|
93
|
+
|
|
94
|
+
# Currency
|
|
95
|
+
if any(t in tokens for t in ['us', 'usd', 'dolares']):
|
|
96
|
+
attrs['currency'] = 'usd'
|
|
97
|
+
elif any(t in tokens for t in ['s', 'pen', 'soles']):
|
|
98
|
+
attrs['currency'] = 'pen'
|
|
99
|
+
|
|
100
|
+
# Side (Critical for FX)
|
|
101
|
+
if "compra" in tokens:
|
|
102
|
+
attrs['side'] = 'compra'
|
|
103
|
+
elif "venta" in tokens:
|
|
104
|
+
attrs['side'] = 'venta'
|
|
105
|
+
|
|
106
|
+
# Horizon
|
|
107
|
+
if "corto" in tokens:
|
|
108
|
+
attrs['horizon'] = 'corto'
|
|
109
|
+
elif "largo" in tokens:
|
|
110
|
+
attrs['horizon'] = 'largo'
|
|
111
|
+
|
|
112
|
+
return attrs
|
|
113
|
+
|
|
114
|
+
def _preprocess_metadata(self):
|
|
115
|
+
"""Pre-calculate normalized search corpus."""
|
|
116
|
+
if self.df.empty:
|
|
117
|
+
self.search_corpus = []
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
processed = []
|
|
121
|
+
for idx, row in self.df.iterrows():
|
|
122
|
+
raw_name = str(row.get('Nombre de serie', ''))
|
|
123
|
+
name_norm = self._normalize(raw_name)
|
|
124
|
+
attrs = self._extract_attributes(name_norm)
|
|
125
|
+
|
|
126
|
+
# Use original code column names if possible
|
|
127
|
+
code = row.get("Código de serie") or row.get("Codigo de serie")
|
|
128
|
+
|
|
129
|
+
item = {
|
|
130
|
+
"idx": idx,
|
|
131
|
+
"codigo_serie": code,
|
|
132
|
+
"name_original": raw_name,
|
|
133
|
+
"name_norm": name_norm,
|
|
134
|
+
"tokens": set(name_norm.split()),
|
|
135
|
+
"currency": attrs['currency'],
|
|
136
|
+
"side": attrs['side'],
|
|
137
|
+
"horizon": attrs['horizon']
|
|
138
|
+
}
|
|
139
|
+
processed.append(item)
|
|
140
|
+
|
|
141
|
+
self.search_corpus = processed
|
|
142
|
+
|
|
143
|
+
def solve(self, query: str) -> Dict[str, Any]:
|
|
144
|
+
"""
|
|
145
|
+
Resolve query with interactive candidate logic.
|
|
146
|
+
"""
|
|
147
|
+
if not self.search_corpus:
|
|
148
|
+
return {"error": "no_match", "reason": "empty_corpus"}
|
|
149
|
+
|
|
150
|
+
q_norm = self._normalize(query)
|
|
151
|
+
q_attrs = self._extract_attributes(q_norm)
|
|
152
|
+
q_tokens = set(q_norm.split())
|
|
153
|
+
|
|
154
|
+
if not q_tokens:
|
|
155
|
+
return {"error": "no_match", "reason": "empty_query"}
|
|
156
|
+
|
|
157
|
+
# Scoring
|
|
158
|
+
scored = []
|
|
159
|
+
for c in self.search_corpus:
|
|
160
|
+
if not fuzz:
|
|
161
|
+
# Basic token overlap fallback
|
|
162
|
+
intersection = len(q_tokens & c['tokens'])
|
|
163
|
+
score = (intersection / len(q_tokens)) * 100 if q_tokens else 0
|
|
164
|
+
else:
|
|
165
|
+
# Token Set Ratio is perfect for finding "query" inside "long technical title"
|
|
166
|
+
score = fuzz.token_set_ratio(q_norm, c['name_norm'])
|
|
167
|
+
|
|
168
|
+
# Boost if specific side (compra/venta) matches
|
|
169
|
+
if q_attrs['side'] and c['side'] == q_attrs['side']:
|
|
170
|
+
score += 5
|
|
171
|
+
elif q_attrs['side'] and c['side'] and c['side'] != q_attrs['side']:
|
|
172
|
+
score -= 10
|
|
173
|
+
|
|
174
|
+
if score >= 65:
|
|
175
|
+
scored.append({
|
|
176
|
+
"codigo_serie": c['codigo_serie'],
|
|
177
|
+
"name": c['name_original'],
|
|
178
|
+
"score": score
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
scored.sort(key=lambda x: x['score'], reverse=True)
|
|
182
|
+
|
|
183
|
+
if not scored:
|
|
184
|
+
return {"error": "no_match", "reason": "low_confidence"}
|
|
185
|
+
|
|
186
|
+
# Logic for result type
|
|
187
|
+
top_score = scored[0]['score']
|
|
188
|
+
|
|
189
|
+
# 1. Check for ties or very close matches at the top
|
|
190
|
+
# If multiple series have top_score, or are very close (within 2 pts), return candidates.
|
|
191
|
+
high_tier = [s for s in scored if s['score'] >= (top_score - 2)]
|
|
192
|
+
|
|
193
|
+
if len(high_tier) > 1 and top_score < 100:
|
|
194
|
+
# Ambiguity if multiple high matches, unless one is perfect 100 and there are no other 100s
|
|
195
|
+
pass # fall through to candidates logic
|
|
196
|
+
elif len(high_tier) == 1 and top_score >= 85:
|
|
197
|
+
# Single clear winner with good score
|
|
198
|
+
return {
|
|
199
|
+
"codigo_serie": high_tier[0]['codigo_serie'],
|
|
200
|
+
"confidence": round(high_tier[0]['score'] / 100.0, 2),
|
|
201
|
+
"name": high_tier[0]['name']
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# If top_score is 100, but there are multiple 100s, it's ambiguous
|
|
205
|
+
top_tier_100 = [s for s in scored if s['score'] == 100]
|
|
206
|
+
if len(top_tier_100) == 1:
|
|
207
|
+
return {
|
|
208
|
+
"codigo_serie": top_tier_100[0]['codigo_serie'],
|
|
209
|
+
"confidence": 1.0,
|
|
210
|
+
"name": top_tier_100[0]['name']
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# 2. Interactive Candidates
|
|
214
|
+
# Return top 5 matches if confidence is mixed or tied
|
|
215
|
+
candidates = []
|
|
216
|
+
seen_codes = set()
|
|
217
|
+
for s in scored[:5]:
|
|
218
|
+
if s['codigo_serie'] not in seen_codes:
|
|
219
|
+
candidates.append({
|
|
220
|
+
"codigo": s['codigo_serie'],
|
|
221
|
+
"nombre": s['name']
|
|
222
|
+
})
|
|
223
|
+
seen_codes.add(s['codigo_serie'])
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
"error": "ambiguedad",
|
|
227
|
+
"reason": "multiple_candidates",
|
|
228
|
+
"candidates": candidates
|
|
229
|
+
}
|
|
@@ -153,29 +153,17 @@ async def get_table(
|
|
|
153
153
|
if df.empty:
|
|
154
154
|
return "No data found."
|
|
155
155
|
|
|
156
|
-
# 2.
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
# Or is it Index? usebcrp 'variation=1' implies monthly variation.
|
|
161
|
-
|
|
162
|
-
# Ensure time is datetime
|
|
163
|
-
# BCRP returns 'Mmm.YY' or similar sometimes. helper parsing might be needed.
|
|
164
|
-
# But for now, let's just return the raw data properly formatted,
|
|
165
|
-
# Maybe adding a simple pct_change if it's numeric.
|
|
166
|
-
|
|
167
|
-
# For simplicity and reliability in this refactor, we will return the raw values
|
|
168
|
-
# but structured neatly. Re-implementing full 'table' logic from usebcrp might be overkill
|
|
169
|
-
# if the user just wants the data.
|
|
170
|
-
|
|
171
|
-
# However, to be helpful, let's try to set names if provided
|
|
172
|
-
if names:
|
|
173
|
-
# Map codes to names
|
|
174
|
-
# columns are 'time' + codes.
|
|
175
|
-
mapping = {code: name for code, name in zip(series_codes, names)}
|
|
176
|
-
df.rename(columns=mapping, inplace=True)
|
|
156
|
+
# 2. Resolve Names if not provided
|
|
157
|
+
if not names:
|
|
158
|
+
await metadata_client.load()
|
|
159
|
+
names = metadata_client.get_series_names(series_codes)
|
|
177
160
|
|
|
178
|
-
|
|
161
|
+
# 3. Rename columns
|
|
162
|
+
mapping = {code: name for code, name in zip(series_codes, names)}
|
|
163
|
+
df.rename(columns=mapping, inplace=True)
|
|
164
|
+
|
|
165
|
+
return df.to_json(orient='records', date_format='iso', indent=2)
|
|
166
|
+
|
|
179
167
|
|
|
180
168
|
except Exception as e:
|
|
181
169
|
return f"Table generation failed: {str(e)}"
|
|
@@ -234,12 +222,18 @@ async def plot_chart(
|
|
|
234
222
|
df['time'] = df['time'].apply(parse_spanish_date)
|
|
235
223
|
df = df.set_index('time')
|
|
236
224
|
|
|
237
|
-
# 4.
|
|
225
|
+
# 4. Resolve Names if not provided
|
|
226
|
+
if not names:
|
|
227
|
+
await metadata_client.load()
|
|
228
|
+
names = metadata_client.get_series_names(series_codes)
|
|
229
|
+
|
|
230
|
+
# 5. Plot each series
|
|
238
231
|
colors = ['#1a5fb4', '#e01b24', '#33d17a', '#ff7800', '#9141ac']
|
|
239
232
|
for idx, code in enumerate(series_codes):
|
|
240
|
-
if code in df.columns
|
|
241
|
-
|
|
242
|
-
|
|
233
|
+
col_name = code if code in df.columns else (names[idx] if names and names[idx] in df.columns else None)
|
|
234
|
+
if col_name:
|
|
235
|
+
series = df[col_name].dropna()
|
|
236
|
+
label = names[idx] if names and idx < len(names) else col_name
|
|
243
237
|
color = colors[idx % len(colors)]
|
|
244
238
|
ax.plot(series.index, series.values, linewidth=2.5,
|
|
245
239
|
label=label, color=color)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-bcrp
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: MCP Server for Banco Central de Reserva del Perú (BCRP) Statistical API
|
|
5
5
|
Author-email: Maykol Medrano <mmedrano2@uc.cl>
|
|
6
6
|
License: MIT
|
|
@@ -14,13 +14,14 @@ Classifier: Intended Audience :: Financial and Insurance Industry
|
|
|
14
14
|
Classifier: Intended Audience :: Science/Research
|
|
15
15
|
Classifier: License :: OSI Approved :: MIT License
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
22
|
Classifier: Topic :: Office/Business :: Financial
|
|
22
23
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
23
|
-
Requires-Python: >=3.
|
|
24
|
+
Requires-Python: >=3.9
|
|
24
25
|
Description-Content-Type: text/markdown
|
|
25
26
|
License-File: LICENSE
|
|
26
27
|
Requires-Dist: fastmcp>=0.1.0
|
|
@@ -36,16 +37,14 @@ Dynamic: license-file
|
|
|
36
37
|
# mcp-bcrp
|
|
37
38
|
|
|
38
39
|
[](https://www.python.org/downloads/)
|
|
39
|
-
[](https://github.com/psf/black)
|
|
40
|
+
[](https://github.com/MaykolMedrano/mcp_bcrp)
|
|
41
|
+
[](https://pypi.org/project/mcp-bcrp/)
|
|
42
|
+
[](https://opensource.org/licenses/MIT)
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
**MCP Server for Banco Central de Reserva del Peru (BCRP) Statistical API**
|
|
44
|
+
[-green?style=for-the-badge&logo=jupyter)](https://github.com/MaykolMedrano/mcp_bcrp/blob/main/examples/Guia_Usuario_BCRP.ipynb)
|
|
45
|
+
[](https://colab.research.google.com/github/MaykolMedrano/mcp_bcrp/blob/main/examples/Guia_Usuario_BCRP.ipynb)
|
|
47
46
|
|
|
48
|
-
|
|
47
|
+
MCP Server and Python library for the **Banco Central de Reserva del Perú (BCRP)** Statistical API. Access over 5,000 macroeconomic indicators directly from your AI agent or Python environment.
|
|
49
48
|
|
|
50
49
|
---
|
|
51
50
|
|
|
@@ -122,8 +121,8 @@ pip install -e .
|
|
|
122
121
|
### With Optional Dependencies
|
|
123
122
|
|
|
124
123
|
```bash
|
|
125
|
-
pip install mcp-bcrp[charts] # Include matplotlib for chart generation
|
|
126
|
-
pip install mcp-bcrp[dev] # Include development dependencies
|
|
124
|
+
pip install "mcp-bcrp[charts]" # Include matplotlib for chart generation
|
|
125
|
+
pip install "mcp-bcrp[dev]" # Include development dependencies
|
|
127
126
|
```
|
|
128
127
|
|
|
129
128
|
---
|
|
@@ -7,7 +7,7 @@ name = "mcp-bcrp"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = "MCP Server for Banco Central de Reserva del Perú (BCRP) Statistical API"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
11
|
license = {text = "MIT"}
|
|
12
12
|
authors = [
|
|
13
13
|
{name = "Maykol Medrano", email = "mmedrano2@uc.cl"}
|
|
@@ -29,6 +29,7 @@ classifiers = [
|
|
|
29
29
|
"Intended Audience :: Science/Research",
|
|
30
30
|
"License :: OSI Approved :: MIT License",
|
|
31
31
|
"Programming Language :: Python :: 3",
|
|
32
|
+
"Programming Language :: Python :: 3.9",
|
|
32
33
|
"Programming Language :: Python :: 3.10",
|
|
33
34
|
"Programming Language :: Python :: 3.11",
|
|
34
35
|
"Programming Language :: Python :: 3.12",
|
|
@@ -1,237 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Deterministic Search Engine for BCRP Series.
|
|
3
|
-
|
|
4
|
-
Pipeline:
|
|
5
|
-
1. Canonical Normalization (lowercase, remove accents, stopwords)
|
|
6
|
-
2. Attribute Extraction (currency, horizon, component)
|
|
7
|
-
3. Hard Filters
|
|
8
|
-
4. Fuzzy Scoring with RapidFuzz
|
|
9
|
-
5. Ambiguity Detection
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
import logging
|
|
14
|
-
import unicodedata
|
|
15
|
-
import re
|
|
16
|
-
from typing import Dict, Any
|
|
17
|
-
|
|
18
|
-
try:
|
|
19
|
-
from rapidfuzz import fuzz
|
|
20
|
-
except ImportError:
|
|
21
|
-
fuzz = None
|
|
22
|
-
|
|
23
|
-
logger = logging.getLogger("mcp_bcrp")
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class SearchEngine:
|
|
27
|
-
"""
|
|
28
|
-
Deterministic Search Engine for BCRP Series.
|
|
29
|
-
|
|
30
|
-
Implements a pipeline for univocal series resolution:
|
|
31
|
-
1. Canonical Normalization
|
|
32
|
-
2. Hard Filters (Currency, Horizon, Component)
|
|
33
|
-
3. Fuzzy Scoring
|
|
34
|
-
4. Ambiguity Detection
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
STOPWORDS = {'de', 'del', 'el', 'la', 'los', 'las', 'y', 'en', 'al', 'con', 'por'}
|
|
38
|
-
|
|
39
|
-
def __init__(self, metadata_df: pd.DataFrame):
|
|
40
|
-
"""
|
|
41
|
-
Initialize search engine with BCRP metadata.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
metadata_df: DataFrame with BCRP series metadata.
|
|
45
|
-
"""
|
|
46
|
-
self.df = metadata_df
|
|
47
|
-
self._preprocess_metadata()
|
|
48
|
-
|
|
49
|
-
def _normalize(self, text: str) -> str:
|
|
50
|
-
"""
|
|
51
|
-
Canonical normalization of text.
|
|
52
|
-
|
|
53
|
-
Applies: lowercase, accent removal, punctuation removal,
|
|
54
|
-
stopword filtering, and space collapsing.
|
|
55
|
-
|
|
56
|
-
Args:
|
|
57
|
-
text: Raw input text.
|
|
58
|
-
|
|
59
|
-
Returns:
|
|
60
|
-
Normalized string with clean tokens.
|
|
61
|
-
"""
|
|
62
|
-
if not isinstance(text, str):
|
|
63
|
-
return ""
|
|
64
|
-
|
|
65
|
-
text = text.lower()
|
|
66
|
-
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
|
|
67
|
-
text = re.sub(r'[^\w\s]', ' ', text)
|
|
68
|
-
tokens = text.split()
|
|
69
|
-
clean_tokens = [t for t in tokens if t not in self.STOPWORDS]
|
|
70
|
-
|
|
71
|
-
return " ".join(clean_tokens)
|
|
72
|
-
|
|
73
|
-
def _extract_attributes(self, text_norm: str) -> Dict[str, Any]:
|
|
74
|
-
"""
|
|
75
|
-
Extract structured attributes from normalized text.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
text_norm: Normalized text string.
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
Dictionary with currency, horizon, component, and scale.
|
|
82
|
-
"""
|
|
83
|
-
attrs = {
|
|
84
|
-
"currency": None,
|
|
85
|
-
"horizon": None,
|
|
86
|
-
"component": None,
|
|
87
|
-
"scale": None
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
tokens = set(text_norm.split())
|
|
91
|
-
|
|
92
|
-
# Currency detection
|
|
93
|
-
if any(t in tokens for t in ['us', 'usd', 'dolares']):
|
|
94
|
-
attrs['currency'] = 'usd'
|
|
95
|
-
elif any(t in tokens for t in ['s', 'pen', 'soles']):
|
|
96
|
-
attrs['currency'] = 'pen'
|
|
97
|
-
|
|
98
|
-
# Horizon detection
|
|
99
|
-
if "corto" in text_norm:
|
|
100
|
-
attrs['horizon'] = 'corto'
|
|
101
|
-
elif "largo" in text_norm:
|
|
102
|
-
attrs['horizon'] = 'largo'
|
|
103
|
-
|
|
104
|
-
# Component detection
|
|
105
|
-
if "activos" in text_norm:
|
|
106
|
-
attrs['component'] = 'activos'
|
|
107
|
-
elif "pasivos" in text_norm:
|
|
108
|
-
attrs['component'] = 'pasivos'
|
|
109
|
-
|
|
110
|
-
# Scale detection
|
|
111
|
-
if "millones" in text_norm:
|
|
112
|
-
attrs['scale'] = 'millones'
|
|
113
|
-
elif "miles" in text_norm:
|
|
114
|
-
attrs['scale'] = 'miles'
|
|
115
|
-
|
|
116
|
-
return attrs
|
|
117
|
-
|
|
118
|
-
def _preprocess_metadata(self):
|
|
119
|
-
"""Pre-calculate normalized tokens and attributes for all series."""
|
|
120
|
-
if self.df.empty:
|
|
121
|
-
self.search_corpus = []
|
|
122
|
-
return
|
|
123
|
-
|
|
124
|
-
processed = []
|
|
125
|
-
for idx, row in self.df.iterrows():
|
|
126
|
-
raw_name = str(row.get('Nombre de serie', ''))
|
|
127
|
-
name_norm = self._normalize(raw_name)
|
|
128
|
-
attrs = self._extract_attributes(name_norm)
|
|
129
|
-
|
|
130
|
-
item = {
|
|
131
|
-
"idx": idx,
|
|
132
|
-
"codigo_serie": row.get("Código de serie") or row.get("Codigo de serie"),
|
|
133
|
-
"name_original": raw_name,
|
|
134
|
-
"name_norm": name_norm,
|
|
135
|
-
"tokens": set(name_norm.split()),
|
|
136
|
-
"currency": attrs['currency'],
|
|
137
|
-
"horizon": attrs['horizon'],
|
|
138
|
-
"component": attrs['component'],
|
|
139
|
-
"scale": attrs['scale']
|
|
140
|
-
}
|
|
141
|
-
processed.append(item)
|
|
142
|
-
|
|
143
|
-
self.search_corpus = processed
|
|
144
|
-
|
|
145
|
-
def solve(self, query: str) -> Dict[str, Any]:
|
|
146
|
-
"""
|
|
147
|
-
Resolve query to a single series deterministically.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
query: Search query (e.g., "tipo de cambio USD")
|
|
151
|
-
|
|
152
|
-
Returns:
|
|
153
|
-
Dict with 'codigo_serie' and 'confidence' on success,
|
|
154
|
-
or 'error' and 'reason' on failure/ambiguity.
|
|
155
|
-
"""
|
|
156
|
-
if not self.search_corpus:
|
|
157
|
-
return {"error": "no_match", "reason": "empty_corpus"}
|
|
158
|
-
|
|
159
|
-
# Parse and normalize query
|
|
160
|
-
q_norm = self._normalize(query)
|
|
161
|
-
q_attrs = self._extract_attributes(q_norm)
|
|
162
|
-
q_tokens = set(q_norm.split())
|
|
163
|
-
|
|
164
|
-
if not q_tokens:
|
|
165
|
-
return {"error": "no_match", "reason": "empty_query"}
|
|
166
|
-
|
|
167
|
-
candidates = self.search_corpus
|
|
168
|
-
|
|
169
|
-
# Apply hard filters (currency, horizon, component)
|
|
170
|
-
if q_attrs['currency']:
|
|
171
|
-
candidates = [c for c in candidates if c['currency'] == q_attrs['currency']]
|
|
172
|
-
|
|
173
|
-
if q_attrs['horizon']:
|
|
174
|
-
candidates = [c for c in candidates if c['horizon'] == q_attrs['horizon']]
|
|
175
|
-
|
|
176
|
-
if q_attrs['component']:
|
|
177
|
-
candidates = [c for c in candidates if c['component'] == q_attrs['component']]
|
|
178
|
-
|
|
179
|
-
if not candidates:
|
|
180
|
-
return {"error": "no_match", "reason": "filters_eliminated_all"}
|
|
181
|
-
|
|
182
|
-
# Score candidates using fuzzy matching
|
|
183
|
-
scored_candidates = []
|
|
184
|
-
for c in candidates:
|
|
185
|
-
score = 0
|
|
186
|
-
if fuzz:
|
|
187
|
-
score = fuzz.token_sort_ratio(q_norm, c['name_norm'])
|
|
188
|
-
|
|
189
|
-
# Penalize missing query tokens
|
|
190
|
-
q_extras = len(q_tokens - c['tokens'])
|
|
191
|
-
final_score = score - (5 * q_extras)
|
|
192
|
-
|
|
193
|
-
if final_score >= 80:
|
|
194
|
-
scored_candidates.append({
|
|
195
|
-
"series": c,
|
|
196
|
-
"score": final_score,
|
|
197
|
-
"original_score": score,
|
|
198
|
-
"missing_query_tokens": q_tokens - c['tokens']
|
|
199
|
-
})
|
|
200
|
-
|
|
201
|
-
scored_candidates.sort(key=lambda x: x['score'], reverse=True)
|
|
202
|
-
|
|
203
|
-
if not scored_candidates:
|
|
204
|
-
return {"error": "no_match", "reason": "low_score"}
|
|
205
|
-
|
|
206
|
-
top = scored_candidates[0]
|
|
207
|
-
|
|
208
|
-
# Single match: return directly
|
|
209
|
-
if len(scored_candidates) == 1:
|
|
210
|
-
return {
|
|
211
|
-
"codigo_serie": top['series']['codigo_serie'],
|
|
212
|
-
"confidence": round(top['score'] / 100.0, 2),
|
|
213
|
-
"name": top['series']['name_original']
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
# Multiple matches: check for ambiguity
|
|
217
|
-
candidates_top_tier = [
|
|
218
|
-
x for x in scored_candidates
|
|
219
|
-
if x['score'] >= (top['score'] - 5)
|
|
220
|
-
]
|
|
221
|
-
|
|
222
|
-
currencies = set(x['series']['currency'] for x in candidates_top_tier)
|
|
223
|
-
components = set(x['series']['component'] for x in candidates_top_tier)
|
|
224
|
-
|
|
225
|
-
if len(currencies) > 1 or len(components) > 1:
|
|
226
|
-
return {
|
|
227
|
-
"error": "ambiguedad",
|
|
228
|
-
"candidates": [x['series']['codigo_serie'] for x in candidates_top_tier[:5]],
|
|
229
|
-
"reason": "mixed_attributes_in_top_results"
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
# Deterministic winner
|
|
233
|
-
return {
|
|
234
|
-
"codigo_serie": top['series']['codigo_serie'],
|
|
235
|
-
"confidence": round(top['score'] / 100.0, 2),
|
|
236
|
-
"name": top['series']['name_original']
|
|
237
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|