dataforge-studio 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. dataforge_studio-1.0.1/PKG-INFO +119 -0
  2. dataforge_studio-1.0.1/README.md +97 -0
  3. dataforge_studio-1.0.1/dataforge_studio.egg-info/PKG-INFO +119 -0
  4. dataforge_studio-1.0.1/dataforge_studio.egg-info/SOURCES.txt +33 -0
  5. dataforge_studio-1.0.1/dataforge_studio.egg-info/dependency_links.txt +1 -0
  6. dataforge_studio-1.0.1/dataforge_studio.egg-info/entry_points.txt +2 -0
  7. dataforge_studio-1.0.1/dataforge_studio.egg-info/requires.txt +7 -0
  8. dataforge_studio-1.0.1/dataforge_studio.egg-info/top_level.txt +1 -0
  9. dataforge_studio-1.0.1/dataici/__init__.py +3 -0
  10. dataforge_studio-1.0.1/dataici/blocks/__init__.py +0 -0
  11. dataforge_studio-1.0.1/dataici/blocks/aggregate.py +50 -0
  12. dataforge_studio-1.0.1/dataici/blocks/append_column.py +18 -0
  13. dataforge_studio-1.0.1/dataici/blocks/concatenate.py +70 -0
  14. dataforge_studio-1.0.1/dataici/blocks/drop_columns.py +19 -0
  15. dataforge_studio-1.0.1/dataici/blocks/filter_rows.py +120 -0
  16. dataforge_studio-1.0.1/dataici/blocks/handle_missings.py +160 -0
  17. dataforge_studio-1.0.1/dataici/blocks/load_csv.py +68 -0
  18. dataforge_studio-1.0.1/dataici/blocks/read_excel.py +47 -0
  19. dataforge_studio-1.0.1/dataici/blocks/rename_columns.py +25 -0
  20. dataforge_studio-1.0.1/dataici/blocks/reorder_columns.py +19 -0
  21. dataforge_studio-1.0.1/dataici/blocks/replace_values.py +154 -0
  22. dataforge_studio-1.0.1/dataici/blocks/resample.py +68 -0
  23. dataforge_studio-1.0.1/dataici/blocks/sample_rows.py +49 -0
  24. dataforge_studio-1.0.1/dataici/blocks/select_columns.py +19 -0
  25. dataforge_studio-1.0.1/dataici/blocks/set_dtypes.py +46 -0
  26. dataforge_studio-1.0.1/dataici/blocks/set_index.py +24 -0
  27. dataforge_studio-1.0.1/dataici/blocks/write_csv.py +49 -0
  28. dataforge_studio-1.0.1/dataici/charts.py +202 -0
  29. dataforge_studio-1.0.1/dataici/cli.py +35 -0
  30. dataforge_studio-1.0.1/dataici/main.py +349 -0
  31. dataforge_studio-1.0.1/dataici/static/assets/index-CYGnphoW.js +74 -0
  32. dataforge_studio-1.0.1/dataici/static/assets/index-DLK3-mBP.css +1 -0
  33. dataforge_studio-1.0.1/dataici/static/index.html +13 -0
  34. dataforge_studio-1.0.1/pyproject.toml +47 -0
  35. dataforge_studio-1.0.1/setup.cfg +4 -0
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataforge-studio
3
+ Version: 1.0.1
4
+ Summary: Studio visual de preprocesamiento de datos — Universidad Alberto Hurtado
5
+ Author-email: Álvaro Riquelme <alvaroriquelme.14@gmail.com>
6
+ License-Expression: LicenseRef-Proprietary
7
+ Project-URL: Homepage, https://dataforgeUAH.github.io/dataici
8
+ Keywords: data,preprocessing,pandas,visual,pipeline,uah
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
12
+ Classifier: Intended Audience :: Education
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: fastapi>=0.110.0
16
+ Requires-Dist: uvicorn[standard]>=0.29.0
17
+ Requires-Dist: pandas>=2.0.0
18
+ Requires-Dist: openpyxl>=3.1.0
19
+ Requires-Dist: python-multipart>=0.0.9
20
+ Requires-Dist: matplotlib>=3.7.0
21
+ Requires-Dist: numpy>=1.24.0
22
+
23
+ # DataICI — v0.2
24
+
25
+ Herramienta visual de preprocesamiento de datos para estudiantes de Ingeniería Civil Industrial.
26
+
27
+ ## Requisitos previos
28
+ - Python 3.9+ → https://python.org
29
+ - Node.js 18+ → https://nodejs.org
30
+
31
+ ---
32
+
33
+ ## Instalación y ejecución
34
+
35
+ ### 1. Backend (FastAPI + pandas)
36
+
37
+ Abre una terminal en la carpeta `dataici/`:
38
+
39
+ ```bash
40
+ # Windows
41
+ cd backend
42
+ pip install -r requirements.txt
43
+ uvicorn main:app --reload
44
+
45
+ # Mac
46
+ cd backend
47
+ pip3 install -r requirements.txt
48
+ uvicorn main:app --reload
49
+ ```
50
+
51
+ Backend corriendo en: http://localhost:8000
52
+
53
+ ---
54
+
55
+ ### 2. Frontend (React)
56
+
57
+ Abre **otra terminal**:
58
+
59
+ ```bash
60
+ cd frontend
61
+ npm install
62
+ npm run dev
63
+ ```
64
+
65
+ App disponible en: http://localhost:5173
66
+
67
+ ---
68
+
69
+ ## Estructura del proyecto
70
+
71
+ ```
72
+ dataici/
73
+ ├── backend/
74
+ │ ├── main.py ← API FastAPI
75
+ │ ├── requirements.txt
76
+ │ └── blocks/ ← un archivo por bloque
77
+ │ ├── load_csv.py
78
+ │ ├── drop_nulls.py
79
+ │ ├── filter_rows.py
80
+ │ ├── groupby.py
81
+ │ └── export_csv.py
82
+
83
+ └── frontend/
84
+ ├── package.json
85
+ ├── vite.config.js
86
+ └── src/
87
+ ├── App.jsx ← app principal
88
+ ├── nodes/
89
+ │ └── BlockNode.jsx ← nodo del canvas
90
+ └── panels/
91
+ ├── Sidebar.jsx ← bloques disponibles
92
+ ├── ParamsPanel.jsx ← parámetros del bloque
93
+ └── PreviewPanel.jsx ← resultados
94
+ ```
95
+
96
+ ---
97
+
98
+ ## Cómo agregar un nuevo bloque
99
+
100
+ Solo crear `backend/blocks/nuevo_bloque.py`. El frontend lo detecta automáticamente.
101
+
102
+ ```python
103
+ METADATA = {
104
+ "type": "mi_bloque",
105
+ "label": "Mi bloque",
106
+ "category": "Limpieza", # Entrada / Salida | Limpieza | Análisis
107
+ "params": [
108
+ {"key": "columna", "label": "Columna", "type": "text", "default": ""},
109
+ {"key": "metodo", "label": "Método", "type": "select", "options": ["a", "b"], "default": "a"},
110
+ {"key": "activo", "label": "Activar", "type": "toggle", "default": False},
111
+ ]
112
+ }
113
+
114
+ def run(df, params):
115
+ col = params.get("columna")
116
+ df = df.drop(columns=[col])
117
+ code = [f'df = df.drop(columns=["{col}"])']
118
+ return df, code
119
+ ```
@@ -0,0 +1,97 @@
1
+ # DataICI — v0.2
2
+
3
+ Herramienta visual de preprocesamiento de datos para estudiantes de Ingeniería Civil Industrial.
4
+
5
+ ## Requisitos previos
6
+ - Python 3.9+ → https://python.org
7
+ - Node.js 18+ → https://nodejs.org
8
+
9
+ ---
10
+
11
+ ## Instalación y ejecución
12
+
13
+ ### 1. Backend (FastAPI + pandas)
14
+
15
+ Abre una terminal en la carpeta `dataici/`:
16
+
17
+ ```bash
18
+ # Windows
19
+ cd backend
20
+ pip install -r requirements.txt
21
+ uvicorn main:app --reload
22
+
23
+ # Mac
24
+ cd backend
25
+ pip3 install -r requirements.txt
26
+ uvicorn main:app --reload
27
+ ```
28
+
29
+ Backend corriendo en: http://localhost:8000
30
+
31
+ ---
32
+
33
+ ### 2. Frontend (React)
34
+
35
+ Abre **otra terminal**:
36
+
37
+ ```bash
38
+ cd frontend
39
+ npm install
40
+ npm run dev
41
+ ```
42
+
43
+ App disponible en: http://localhost:5173
44
+
45
+ ---
46
+
47
+ ## Estructura del proyecto
48
+
49
+ ```
50
+ dataici/
51
+ ├── backend/
52
+ │ ├── main.py ← API FastAPI
53
+ │ ├── requirements.txt
54
+ │ └── blocks/ ← un archivo por bloque
55
+ │ ├── load_csv.py
56
+ │ ├── drop_nulls.py
57
+ │ ├── filter_rows.py
58
+ │ ├── groupby.py
59
+ │ └── export_csv.py
60
+
61
+ └── frontend/
62
+ ├── package.json
63
+ ├── vite.config.js
64
+ └── src/
65
+ ├── App.jsx ← app principal
66
+ ├── nodes/
67
+ │ └── BlockNode.jsx ← nodo del canvas
68
+ └── panels/
69
+ ├── Sidebar.jsx ← bloques disponibles
70
+ ├── ParamsPanel.jsx ← parámetros del bloque
71
+ └── PreviewPanel.jsx ← resultados
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Cómo agregar un nuevo bloque
77
+
78
+ Solo crear `backend/blocks/nuevo_bloque.py`. El frontend lo detecta automáticamente.
79
+
80
+ ```python
81
+ METADATA = {
82
+ "type": "mi_bloque",
83
+ "label": "Mi bloque",
84
+ "category": "Limpieza", # Entrada / Salida | Limpieza | Análisis
85
+ "params": [
86
+ {"key": "columna", "label": "Columna", "type": "text", "default": ""},
87
+ {"key": "metodo", "label": "Método", "type": "select", "options": ["a", "b"], "default": "a"},
88
+ {"key": "activo", "label": "Activar", "type": "toggle", "default": False},
89
+ ]
90
+ }
91
+
92
+ def run(df, params):
93
+ col = params.get("columna")
94
+ df = df.drop(columns=[col])
95
+ code = [f'df = df.drop(columns=["{col}"])']
96
+ return df, code
97
+ ```
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataforge-studio
3
+ Version: 1.0.1
4
+ Summary: Studio visual de preprocesamiento de datos — Universidad Alberto Hurtado
5
+ Author-email: Álvaro Riquelme <alvaroriquelme.14@gmail.com>
6
+ License-Expression: LicenseRef-Proprietary
7
+ Project-URL: Homepage, https://dataforgeUAH.github.io/dataici
8
+ Keywords: data,preprocessing,pandas,visual,pipeline,uah
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
12
+ Classifier: Intended Audience :: Education
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: fastapi>=0.110.0
16
+ Requires-Dist: uvicorn[standard]>=0.29.0
17
+ Requires-Dist: pandas>=2.0.0
18
+ Requires-Dist: openpyxl>=3.1.0
19
+ Requires-Dist: python-multipart>=0.0.9
20
+ Requires-Dist: matplotlib>=3.7.0
21
+ Requires-Dist: numpy>=1.24.0
22
+
23
+ # DataICI — v0.2
24
+
25
+ Herramienta visual de preprocesamiento de datos para estudiantes de Ingeniería Civil Industrial.
26
+
27
+ ## Requisitos previos
28
+ - Python 3.9+ → https://python.org
29
+ - Node.js 18+ → https://nodejs.org
30
+
31
+ ---
32
+
33
+ ## Instalación y ejecución
34
+
35
+ ### 1. Backend (FastAPI + pandas)
36
+
37
+ Abre una terminal en la carpeta `dataici/`:
38
+
39
+ ```bash
40
+ # Windows
41
+ cd backend
42
+ pip install -r requirements.txt
43
+ uvicorn main:app --reload
44
+
45
+ # Mac
46
+ cd backend
47
+ pip3 install -r requirements.txt
48
+ uvicorn main:app --reload
49
+ ```
50
+
51
+ Backend corriendo en: http://localhost:8000
52
+
53
+ ---
54
+
55
+ ### 2. Frontend (React)
56
+
57
+ Abre **otra terminal**:
58
+
59
+ ```bash
60
+ cd frontend
61
+ npm install
62
+ npm run dev
63
+ ```
64
+
65
+ App disponible en: http://localhost:5173
66
+
67
+ ---
68
+
69
+ ## Estructura del proyecto
70
+
71
+ ```
72
+ dataici/
73
+ ├── backend/
74
+ │ ├── main.py ← API FastAPI
75
+ │ ├── requirements.txt
76
+ │ └── blocks/ ← un archivo por bloque
77
+ │ ├── load_csv.py
78
+ │ ├── drop_nulls.py
79
+ │ ├── filter_rows.py
80
+ │ ├── groupby.py
81
+ │ └── export_csv.py
82
+
83
+ └── frontend/
84
+ ├── package.json
85
+ ├── vite.config.js
86
+ └── src/
87
+ ├── App.jsx ← app principal
88
+ ├── nodes/
89
+ │ └── BlockNode.jsx ← nodo del canvas
90
+ └── panels/
91
+ ├── Sidebar.jsx ← bloques disponibles
92
+ ├── ParamsPanel.jsx ← parámetros del bloque
93
+ └── PreviewPanel.jsx ← resultados
94
+ ```
95
+
96
+ ---
97
+
98
+ ## Cómo agregar un nuevo bloque
99
+
100
+ Solo crear `backend/blocks/nuevo_bloque.py`. El frontend lo detecta automáticamente.
101
+
102
+ ```python
103
+ METADATA = {
104
+ "type": "mi_bloque",
105
+ "label": "Mi bloque",
106
+ "category": "Limpieza", # Entrada / Salida | Limpieza | Análisis
107
+ "params": [
108
+ {"key": "columna", "label": "Columna", "type": "text", "default": ""},
109
+ {"key": "metodo", "label": "Método", "type": "select", "options": ["a", "b"], "default": "a"},
110
+ {"key": "activo", "label": "Activar", "type": "toggle", "default": False},
111
+ ]
112
+ }
113
+
114
+ def run(df, params):
115
+ col = params.get("columna")
116
+ df = df.drop(columns=[col])
117
+ code = [f'df = df.drop(columns=["{col}"])']
118
+ return df, code
119
+ ```
@@ -0,0 +1,33 @@
1
+ README.md
2
+ pyproject.toml
3
+ dataforge_studio.egg-info/PKG-INFO
4
+ dataforge_studio.egg-info/SOURCES.txt
5
+ dataforge_studio.egg-info/dependency_links.txt
6
+ dataforge_studio.egg-info/entry_points.txt
7
+ dataforge_studio.egg-info/requires.txt
8
+ dataforge_studio.egg-info/top_level.txt
9
+ dataici/__init__.py
10
+ dataici/charts.py
11
+ dataici/cli.py
12
+ dataici/main.py
13
+ dataici/blocks/__init__.py
14
+ dataici/blocks/aggregate.py
15
+ dataici/blocks/append_column.py
16
+ dataici/blocks/concatenate.py
17
+ dataici/blocks/drop_columns.py
18
+ dataici/blocks/filter_rows.py
19
+ dataici/blocks/handle_missings.py
20
+ dataici/blocks/load_csv.py
21
+ dataici/blocks/read_excel.py
22
+ dataici/blocks/rename_columns.py
23
+ dataici/blocks/reorder_columns.py
24
+ dataici/blocks/replace_values.py
25
+ dataici/blocks/resample.py
26
+ dataici/blocks/sample_rows.py
27
+ dataici/blocks/select_columns.py
28
+ dataici/blocks/set_dtypes.py
29
+ dataici/blocks/set_index.py
30
+ dataici/blocks/write_csv.py
31
+ dataici/static/index.html
32
+ dataici/static/assets/index-CYGnphoW.js
33
+ dataici/static/assets/index-DLK3-mBP.css
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dataici = dataici.cli:main
@@ -0,0 +1,7 @@
1
+ fastapi>=0.110.0
2
+ uvicorn[standard]>=0.29.0
3
+ pandas>=2.0.0
4
+ openpyxl>=3.1.0
5
+ python-multipart>=0.0.9
6
+ matplotlib>=3.7.0
7
+ numpy>=1.24.0
@@ -0,0 +1,3 @@
1
+ # DataICI — Studio de Preprocesamiento de Datos
2
+ # Universidad Alberto Hurtado
3
+ __version__ = "1.0.0"
File without changes
@@ -0,0 +1,50 @@
1
+ import json
2
+
3
+ METADATA = {
4
+ "type": "aggregate",
5
+ "label": "Aggregate",
6
+ "category": "Resampling",
7
+ "params": [
8
+ {"key": "custom", "label": "Custom Functions", "type": "text", "default": "false"},
9
+ {"key": "func", "label": "General function", "type": "text", "default": "mean"},
10
+ {"key": "col_funcs", "label": "Per-column funcs", "type": "text", "default": "{}"},
11
+ ]
12
+ }
13
+
14
+ # pandas resampler does not have a .unique() — map to nunique
15
+ _ALIASES = {"unique": "nunique"}
16
+
17
+
18
+ def _is_resampler(obj):
19
+ try:
20
+ from pandas.core.resample import DatetimeIndexResampler
21
+ return isinstance(obj, DatetimeIndexResampler)
22
+ except ImportError:
23
+ pass
24
+ return hasattr(obj, "_selected_obj") and not hasattr(obj, "to_dict")
25
+
26
+
27
+ def run(obj, params):
28
+ custom = str(params.get("custom", "false")).strip().lower() == "true"
29
+ func = (params.get("func", "mean") or "mean").strip()
30
+ col_funcs_raw = params.get("col_funcs", "{}")
31
+
32
+ try:
33
+ col_funcs = json.loads(col_funcs_raw) if col_funcs_raw else {}
34
+ except Exception:
35
+ col_funcs = {}
36
+
37
+ is_rs = _is_resampler(obj)
38
+ prefix = "resampler" if is_rs else "df"
39
+
40
+ if custom and col_funcs:
41
+ # Map aliases per-column
42
+ mapped = {col: _ALIASES.get(fn, fn) for col, fn in col_funcs.items()}
43
+ df = obj.agg(mapped)
44
+ code = [f"df = {prefix}.agg({json.dumps(mapped)})"]
45
+ else:
46
+ actual = _ALIASES.get(func, func)
47
+ df = getattr(obj, actual)()
48
+ code = [f"df = {prefix}.{actual}()"]
49
+
50
+ return df, code
@@ -0,0 +1,18 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ METADATA = {
4
+ "type": "append_column",
5
+ "label": "Append a Column",
6
+ "category": "Columns",
7
+ "params": [
8
+ {"key": "colname", "label": "colname", "type": "text", "default": "new_col"},
9
+ ]
10
+ }
11
+
12
+ def run(df, params):
13
+ colname = params.get("colname", "new_col").strip()
14
+ if not colname:
15
+ raise ValueError("Especifica el nombre de la nueva columna.")
16
+ df[colname] = 'None'
17
+ code = [f'df["{colname}"] = None']
18
+ return df, code
@@ -0,0 +1,70 @@
1
+ import pandas as pd
2
+
3
+ METADATA = {
4
+ "type": "concatenate",
5
+ "label": "Concatenate",
6
+ "category": "DataFrame",
7
+ "multi_input": True,
8
+ "params": [
9
+ {"key": "input_count", "label": "Número de entradas", "type": "text", "default": "2"},
10
+ {"key": "axis", "label": "Axis", "type": "text", "default": "index"},
11
+ {"key": "join", "label": "Join", "type": "text", "default": "outer"},
12
+ {"key": "change_col_names", "label": "Change column names", "type": "text", "default": "None"},
13
+ {"key": "ignore_index", "label": "Ignore index", "type": "text", "default": "false"},
14
+ {"key": "sort", "label": "Sort", "type": "text", "default": "false"},
15
+ ]
16
+ }
17
+
18
+
19
+ def _bool(val):
20
+ if isinstance(val, bool):
21
+ return val
22
+ return str(val).strip().lower() == "true"
23
+
24
+
25
+ def run(dfs, params):
26
+ """dfs: list of DataFrames received from upstream nodes."""
27
+ axis_raw = params.get("axis", "index")
28
+ axis = 0 if axis_raw == "index" else 1
29
+
30
+ join = params.get("join", "outer")
31
+ change_cols = params.get("change_col_names", "None") # None | prefix | suffix
32
+ ignore_index = _bool(params.get("ignore_index", False))
33
+ sort = _bool(params.get("sort", False))
34
+
35
+ if len(dfs) < 2:
36
+ raise ValueError("Concatenate necesita al menos 2 DataFrames.")
37
+
38
+ # ── Rename columns when axis=1 and change_col_names != None ──────────────
39
+ if axis == 1 and change_cols in ("prefix", "suffix"):
40
+ renamed = []
41
+ for i, df in enumerate(dfs):
42
+ if change_cols == "prefix":
43
+ df = df.rename(columns=lambda c: f"df{i+1}_{c}")
44
+ else:
45
+ df = df.rename(columns=lambda c: f"{c}_df{i+1}")
46
+ renamed.append(df)
47
+ dfs = renamed
48
+
49
+ # ── Build concat kwargs ───────────────────────────────────────────────────
50
+ kwargs = {"axis": axis, "join": join, "sort": sort}
51
+ if axis == 0:
52
+ kwargs["ignore_index"] = ignore_index
53
+
54
+ df_result = pd.concat(dfs, **kwargs)
55
+
56
+ # ── Code string ──────────────────────────────────────────────────────────
57
+ frames_repr = ", ".join(f"df_{i+1}" for i in range(len(dfs)))
58
+ code_lines = []
59
+
60
+ if axis == 1 and change_cols == "prefix":
61
+ for i in range(len(dfs)):
62
+ code_lines.append(f"df_{i+1} = df_{i+1}.rename(columns=lambda c: f'df{i+1}_{{c}}')")
63
+ elif axis == 1 and change_cols == "suffix":
64
+ for i in range(len(dfs)):
65
+ code_lines.append(f"df_{i+1} = df_{i+1}.rename(columns=lambda c: f'{{c}}_df{i+1}')")
66
+
67
+ kw_str = ", ".join(f"{k}={repr(v)}" for k, v in kwargs.items())
68
+ code_lines.append(f"df = pd.concat([{frames_repr}], {kw_str})")
69
+
70
+ return df_result, code_lines
@@ -0,0 +1,19 @@
1
+ METADATA = {
2
+ "type": "drop_columns",
3
+ "label": "Drop Columns",
4
+ "category": "Columns",
5
+ "params": [
6
+ {"key": "columns", "label": "columns", "type": "text", "default": ""},
7
+ ]
8
+ }
9
+
10
+ def run(df, params):
11
+ cols = [c.strip() for c in params.get("columns", "").split(",") if c.strip()]
12
+ if not cols:
13
+ raise ValueError("Selecciona al menos una columna para eliminar.")
14
+ missing = [c for c in cols if c not in df.columns]
15
+ if missing:
16
+ raise ValueError(f"Columnas no encontradas: {missing}")
17
+ df = df.drop(columns=cols)
18
+ code = [f"df = df.drop(columns={cols})"]
19
+ return df, code
@@ -0,0 +1,120 @@
1
+ import json
2
+ import pandas as pd
3
+
4
+ METADATA = {
5
+ "type": "filter_rows",
6
+ "label": "Filter Rows",
7
+ "category": "DataFrame",
8
+ "params": [
9
+ {"key": "conditions", "label": "conditions", "type": "text", "default": "[]"},
10
+ ]
11
+ }
12
+
13
+ def run(df, params):
14
+ raw = params.get("conditions", "[]")
15
+ try:
16
+ conditions = json.loads(raw) if isinstance(raw, str) else raw
17
+ except Exception:
18
+ raise ValueError("Error al leer las condiciones.")
19
+
20
+ if not conditions:
21
+ raise ValueError("Agrega al menos una condición Where.")
22
+
23
+ OP_METHOD = {"==": "eq", "!=": "ne", "<": "lt", "<=": "le", ">": "gt", ">=": "ge"}
24
+ OP_SYM = {"and": "&", "or": "|", "xor": "^"}
25
+
26
+ masks = []
27
+ cond_lines = []
28
+
29
+ for i, cond in enumerate(conditions):
30
+ col = cond.get("column", "")
31
+ op = cond.get("operator", "==")
32
+ typ = cond.get("type", "number")
33
+ val = str(cond.get("value", "0")).strip()
34
+ negate = cond.get("not", False)
35
+ logical = cond.get("logical", "and")
36
+
37
+ if not col or col not in df.columns:
38
+ raise ValueError(f"Columna '{col}' no encontrada en el DataFrame.")
39
+
40
+ s = df[col]
41
+
42
+ # ── Build mask ───────────────────────────────────────────────────────
43
+ if op == "isna":
44
+ mask = s.isna()
45
+ code_expr = f"df['{col}'].isna()"
46
+
47
+ elif op == "notna":
48
+ mask = s.notna()
49
+ code_expr = f"df['{col}'].notna()"
50
+
51
+ elif op == "isin":
52
+ items = [v.strip() for v in val.split(",") if v.strip()]
53
+ if typ == "number":
54
+ try:
55
+ parsed = [float(v) for v in items]
56
+ except ValueError:
57
+ raise ValueError(f"isin numérico: valores inválidos → {items}")
58
+ code_expr = f"df['{col}'].isin({parsed})"
59
+ else:
60
+ parsed = [v.strip("'\"") for v in items]
61
+ code_expr = f"df['{col}'].isin({parsed!r})"
62
+ mask = s.isin(parsed)
63
+
64
+ else:
65
+ method = OP_METHOD.get(op, "eq")
66
+
67
+ if typ == "number":
68
+ try:
69
+ parsed = float(val)
70
+ except ValueError:
71
+ raise ValueError(f"Valor numérico inválido: '{val}'")
72
+ mask = getattr(s, method)(parsed)
73
+ code_expr = f"df['{col}'].{method}({parsed})"
74
+
75
+ elif typ == "string":
76
+ parsed = val.strip("'\"")
77
+ mask = getattr(s, method)(parsed)
78
+ code_expr = f"df['{col}'].{method}('{parsed}')"
79
+
80
+ elif typ == "datetime":
81
+ try:
82
+ parsed = pd.Timestamp(val)
83
+ except Exception:
84
+ raise ValueError(f"Fecha inválida: '{val}'")
85
+ mask = getattr(s, method)(parsed)
86
+ code_expr = f"df['{col}'].{method}(pd.Timestamp('{val}'))"
87
+
88
+ elif typ == "column":
89
+ if val not in df.columns:
90
+ raise ValueError(f"Columna de comparación '{val}' no existe.")
91
+ mask = getattr(s, method)(df[val])
92
+ code_expr = f"df['{col}'].{method}(df['{val}'])"
93
+
94
+ else:
95
+ raise ValueError(f"Tipo desconocido: '{typ}'")
96
+
97
+ if negate:
98
+ mask = ~mask
99
+ code_expr = f"~({code_expr})"
100
+
101
+ masks.append({"logical": logical, "mask": mask, "expr": code_expr})
102
+
103
+ # ── Combine masks ────────────────────────────────────────────────────────
104
+ result_mask = masks[0]["mask"]
105
+ cond_lines = [f" ({masks[0]['expr']})"]
106
+
107
+ for m in masks[1:]:
108
+ sym = OP_SYM.get(m["logical"], "&")
109
+ if m["logical"] == "and":
110
+ result_mask = result_mask & m["mask"]
111
+ elif m["logical"] == "or":
112
+ result_mask = result_mask | m["mask"]
113
+ elif m["logical"] == "xor":
114
+ result_mask = result_mask ^ m["mask"]
115
+ cond_lines.append(f" {sym} ({m['expr']})")
116
+
117
+ df = df[result_mask]
118
+
119
+ code = ["cond = (", *cond_lines, ")", "df = df[cond]"]
120
+ return df, code