dataforge-studio 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataforge_studio-1.0.1/PKG-INFO +119 -0
- dataforge_studio-1.0.1/README.md +97 -0
- dataforge_studio-1.0.1/dataforge_studio.egg-info/PKG-INFO +119 -0
- dataforge_studio-1.0.1/dataforge_studio.egg-info/SOURCES.txt +33 -0
- dataforge_studio-1.0.1/dataforge_studio.egg-info/dependency_links.txt +1 -0
- dataforge_studio-1.0.1/dataforge_studio.egg-info/entry_points.txt +2 -0
- dataforge_studio-1.0.1/dataforge_studio.egg-info/requires.txt +7 -0
- dataforge_studio-1.0.1/dataforge_studio.egg-info/top_level.txt +1 -0
- dataforge_studio-1.0.1/dataici/__init__.py +3 -0
- dataforge_studio-1.0.1/dataici/blocks/__init__.py +0 -0
- dataforge_studio-1.0.1/dataici/blocks/aggregate.py +50 -0
- dataforge_studio-1.0.1/dataici/blocks/append_column.py +18 -0
- dataforge_studio-1.0.1/dataici/blocks/concatenate.py +70 -0
- dataforge_studio-1.0.1/dataici/blocks/drop_columns.py +19 -0
- dataforge_studio-1.0.1/dataici/blocks/filter_rows.py +120 -0
- dataforge_studio-1.0.1/dataici/blocks/handle_missings.py +160 -0
- dataforge_studio-1.0.1/dataici/blocks/load_csv.py +68 -0
- dataforge_studio-1.0.1/dataici/blocks/read_excel.py +47 -0
- dataforge_studio-1.0.1/dataici/blocks/rename_columns.py +25 -0
- dataforge_studio-1.0.1/dataici/blocks/reorder_columns.py +19 -0
- dataforge_studio-1.0.1/dataici/blocks/replace_values.py +154 -0
- dataforge_studio-1.0.1/dataici/blocks/resample.py +68 -0
- dataforge_studio-1.0.1/dataici/blocks/sample_rows.py +49 -0
- dataforge_studio-1.0.1/dataici/blocks/select_columns.py +19 -0
- dataforge_studio-1.0.1/dataici/blocks/set_dtypes.py +46 -0
- dataforge_studio-1.0.1/dataici/blocks/set_index.py +24 -0
- dataforge_studio-1.0.1/dataici/blocks/write_csv.py +49 -0
- dataforge_studio-1.0.1/dataici/charts.py +202 -0
- dataforge_studio-1.0.1/dataici/cli.py +35 -0
- dataforge_studio-1.0.1/dataici/main.py +349 -0
- dataforge_studio-1.0.1/dataici/static/assets/index-CYGnphoW.js +74 -0
- dataforge_studio-1.0.1/dataici/static/assets/index-DLK3-mBP.css +1 -0
- dataforge_studio-1.0.1/dataici/static/index.html +13 -0
- dataforge_studio-1.0.1/pyproject.toml +47 -0
- dataforge_studio-1.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataforge-studio
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Studio visual de preprocesamiento de datos — Universidad Alberto Hurtado
|
|
5
|
+
Author-email: Álvaro Riquelme <alvaroriquelme.14@gmail.com>
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Project-URL: Homepage, https://dataforgeUAH.github.io/dataici
|
|
8
|
+
Keywords: data,preprocessing,pandas,visual,pipeline,uah
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
12
|
+
Classifier: Intended Audience :: Education
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: fastapi>=0.110.0
|
|
16
|
+
Requires-Dist: uvicorn[standard]>=0.29.0
|
|
17
|
+
Requires-Dist: pandas>=2.0.0
|
|
18
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
19
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
20
|
+
Requires-Dist: matplotlib>=3.7.0
|
|
21
|
+
Requires-Dist: numpy>=1.24.0
|
|
22
|
+
|
|
23
|
+
# DataICI — v0.2
|
|
24
|
+
|
|
25
|
+
Herramienta visual de preprocesamiento de datos para estudiantes de Ingeniería Civil Industrial.
|
|
26
|
+
|
|
27
|
+
## Requisitos previos
|
|
28
|
+
- Python 3.9+ → https://python.org
|
|
29
|
+
- Node.js 18+ → https://nodejs.org
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Instalación y ejecución
|
|
34
|
+
|
|
35
|
+
### 1. Backend (FastAPI + pandas)
|
|
36
|
+
|
|
37
|
+
Abre una terminal en la carpeta `dataici/`:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# Windows
|
|
41
|
+
cd backend
|
|
42
|
+
pip install -r requirements.txt
|
|
43
|
+
uvicorn main:app --reload
|
|
44
|
+
|
|
45
|
+
# Mac
|
|
46
|
+
cd backend
|
|
47
|
+
pip3 install -r requirements.txt
|
|
48
|
+
uvicorn main:app --reload
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Backend corriendo en: http://localhost:8000
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
### 2. Frontend (React)
|
|
56
|
+
|
|
57
|
+
Abre **otra terminal**:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
cd frontend
|
|
61
|
+
npm install
|
|
62
|
+
npm run dev
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
App disponible en: http://localhost:5173
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Estructura del proyecto
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
dataici/
|
|
73
|
+
├── backend/
|
|
74
|
+
│ ├── main.py ← API FastAPI
|
|
75
|
+
│ ├── requirements.txt
|
|
76
|
+
│ └── blocks/ ← un archivo por bloque
|
|
77
|
+
│ ├── load_csv.py
|
|
78
|
+
│ ├── drop_nulls.py
|
|
79
|
+
│ ├── filter_rows.py
|
|
80
|
+
│ ├── groupby.py
|
|
81
|
+
│ └── export_csv.py
|
|
82
|
+
│
|
|
83
|
+
└── frontend/
|
|
84
|
+
├── package.json
|
|
85
|
+
├── vite.config.js
|
|
86
|
+
└── src/
|
|
87
|
+
├── App.jsx ← app principal
|
|
88
|
+
├── nodes/
|
|
89
|
+
│ └── BlockNode.jsx ← nodo del canvas
|
|
90
|
+
└── panels/
|
|
91
|
+
├── Sidebar.jsx ← bloques disponibles
|
|
92
|
+
├── ParamsPanel.jsx ← parámetros del bloque
|
|
93
|
+
└── PreviewPanel.jsx ← resultados
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Cómo agregar un nuevo bloque
|
|
99
|
+
|
|
100
|
+
Solo crear `backend/blocks/nuevo_bloque.py`. El frontend lo detecta automáticamente.
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
METADATA = {
|
|
104
|
+
"type": "mi_bloque",
|
|
105
|
+
"label": "Mi bloque",
|
|
106
|
+
"category": "Limpieza", # Entrada / Salida | Limpieza | Análisis
|
|
107
|
+
"params": [
|
|
108
|
+
{"key": "columna", "label": "Columna", "type": "text", "default": ""},
|
|
109
|
+
{"key": "metodo", "label": "Método", "type": "select", "options": ["a", "b"], "default": "a"},
|
|
110
|
+
{"key": "activo", "label": "Activar", "type": "toggle", "default": False},
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
def run(df, params):
|
|
115
|
+
col = params.get("columna")
|
|
116
|
+
df = df.drop(columns=[col])
|
|
117
|
+
code = [f'df = df.drop(columns=["{col}"])']
|
|
118
|
+
return df, code
|
|
119
|
+
```
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# DataICI — v0.2
|
|
2
|
+
|
|
3
|
+
Herramienta visual de preprocesamiento de datos para estudiantes de Ingeniería Civil Industrial.
|
|
4
|
+
|
|
5
|
+
## Requisitos previos
|
|
6
|
+
- Python 3.9+ → https://python.org
|
|
7
|
+
- Node.js 18+ → https://nodejs.org
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Instalación y ejecución
|
|
12
|
+
|
|
13
|
+
### 1. Backend (FastAPI + pandas)
|
|
14
|
+
|
|
15
|
+
Abre una terminal en la carpeta `dataici/`:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Windows
|
|
19
|
+
cd backend
|
|
20
|
+
pip install -r requirements.txt
|
|
21
|
+
uvicorn main:app --reload
|
|
22
|
+
|
|
23
|
+
# Mac
|
|
24
|
+
cd backend
|
|
25
|
+
pip3 install -r requirements.txt
|
|
26
|
+
uvicorn main:app --reload
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Backend corriendo en: http://localhost:8000
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
### 2. Frontend (React)
|
|
34
|
+
|
|
35
|
+
Abre **otra terminal**:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
cd frontend
|
|
39
|
+
npm install
|
|
40
|
+
npm run dev
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
App disponible en: http://localhost:5173
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Estructura del proyecto
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
dataici/
|
|
51
|
+
├── backend/
|
|
52
|
+
│ ├── main.py ← API FastAPI
|
|
53
|
+
│ ├── requirements.txt
|
|
54
|
+
│ └── blocks/ ← un archivo por bloque
|
|
55
|
+
│ ├── load_csv.py
|
|
56
|
+
│ ├── drop_nulls.py
|
|
57
|
+
│ ├── filter_rows.py
|
|
58
|
+
│ ├── groupby.py
|
|
59
|
+
│ └── export_csv.py
|
|
60
|
+
│
|
|
61
|
+
└── frontend/
|
|
62
|
+
├── package.json
|
|
63
|
+
├── vite.config.js
|
|
64
|
+
└── src/
|
|
65
|
+
├── App.jsx ← app principal
|
|
66
|
+
├── nodes/
|
|
67
|
+
│ └── BlockNode.jsx ← nodo del canvas
|
|
68
|
+
└── panels/
|
|
69
|
+
├── Sidebar.jsx ← bloques disponibles
|
|
70
|
+
├── ParamsPanel.jsx ← parámetros del bloque
|
|
71
|
+
└── PreviewPanel.jsx ← resultados
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Cómo agregar un nuevo bloque
|
|
77
|
+
|
|
78
|
+
Solo crear `backend/blocks/nuevo_bloque.py`. El frontend lo detecta automáticamente.
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
METADATA = {
|
|
82
|
+
"type": "mi_bloque",
|
|
83
|
+
"label": "Mi bloque",
|
|
84
|
+
"category": "Limpieza", # Entrada / Salida | Limpieza | Análisis
|
|
85
|
+
"params": [
|
|
86
|
+
{"key": "columna", "label": "Columna", "type": "text", "default": ""},
|
|
87
|
+
{"key": "metodo", "label": "Método", "type": "select", "options": ["a", "b"], "default": "a"},
|
|
88
|
+
{"key": "activo", "label": "Activar", "type": "toggle", "default": False},
|
|
89
|
+
]
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def run(df, params):
|
|
93
|
+
col = params.get("columna")
|
|
94
|
+
df = df.drop(columns=[col])
|
|
95
|
+
code = [f'df = df.drop(columns=["{col}"])']
|
|
96
|
+
return df, code
|
|
97
|
+
```
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataforge-studio
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Studio visual de preprocesamiento de datos — Universidad Alberto Hurtado
|
|
5
|
+
Author-email: Álvaro Riquelme <alvaroriquelme.14@gmail.com>
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Project-URL: Homepage, https://dataforgeUAH.github.io/dataici
|
|
8
|
+
Keywords: data,preprocessing,pandas,visual,pipeline,uah
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
12
|
+
Classifier: Intended Audience :: Education
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: fastapi>=0.110.0
|
|
16
|
+
Requires-Dist: uvicorn[standard]>=0.29.0
|
|
17
|
+
Requires-Dist: pandas>=2.0.0
|
|
18
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
19
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
20
|
+
Requires-Dist: matplotlib>=3.7.0
|
|
21
|
+
Requires-Dist: numpy>=1.24.0
|
|
22
|
+
|
|
23
|
+
# DataICI — v0.2
|
|
24
|
+
|
|
25
|
+
Herramienta visual de preprocesamiento de datos para estudiantes de Ingeniería Civil Industrial.
|
|
26
|
+
|
|
27
|
+
## Requisitos previos
|
|
28
|
+
- Python 3.9+ → https://python.org
|
|
29
|
+
- Node.js 18+ → https://nodejs.org
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Instalación y ejecución
|
|
34
|
+
|
|
35
|
+
### 1. Backend (FastAPI + pandas)
|
|
36
|
+
|
|
37
|
+
Abre una terminal en la carpeta `dataici/`:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# Windows
|
|
41
|
+
cd backend
|
|
42
|
+
pip install -r requirements.txt
|
|
43
|
+
uvicorn main:app --reload
|
|
44
|
+
|
|
45
|
+
# Mac
|
|
46
|
+
cd backend
|
|
47
|
+
pip3 install -r requirements.txt
|
|
48
|
+
uvicorn main:app --reload
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Backend corriendo en: http://localhost:8000
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
### 2. Frontend (React)
|
|
56
|
+
|
|
57
|
+
Abre **otra terminal**:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
cd frontend
|
|
61
|
+
npm install
|
|
62
|
+
npm run dev
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
App disponible en: http://localhost:5173
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Estructura del proyecto
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
dataici/
|
|
73
|
+
├── backend/
|
|
74
|
+
│ ├── main.py ← API FastAPI
|
|
75
|
+
│ ├── requirements.txt
|
|
76
|
+
│ └── blocks/ ← un archivo por bloque
|
|
77
|
+
│ ├── load_csv.py
|
|
78
|
+
│ ├── drop_nulls.py
|
|
79
|
+
│ ├── filter_rows.py
|
|
80
|
+
│ ├── groupby.py
|
|
81
|
+
│ └── export_csv.py
|
|
82
|
+
│
|
|
83
|
+
└── frontend/
|
|
84
|
+
├── package.json
|
|
85
|
+
├── vite.config.js
|
|
86
|
+
└── src/
|
|
87
|
+
├── App.jsx ← app principal
|
|
88
|
+
├── nodes/
|
|
89
|
+
│ └── BlockNode.jsx ← nodo del canvas
|
|
90
|
+
└── panels/
|
|
91
|
+
├── Sidebar.jsx ← bloques disponibles
|
|
92
|
+
├── ParamsPanel.jsx ← parámetros del bloque
|
|
93
|
+
└── PreviewPanel.jsx ← resultados
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Cómo agregar un nuevo bloque
|
|
99
|
+
|
|
100
|
+
Solo crear `backend/blocks/nuevo_bloque.py`. El frontend lo detecta automáticamente.
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
METADATA = {
|
|
104
|
+
"type": "mi_bloque",
|
|
105
|
+
"label": "Mi bloque",
|
|
106
|
+
"category": "Limpieza", # Entrada / Salida | Limpieza | Análisis
|
|
107
|
+
"params": [
|
|
108
|
+
{"key": "columna", "label": "Columna", "type": "text", "default": ""},
|
|
109
|
+
{"key": "metodo", "label": "Método", "type": "select", "options": ["a", "b"], "default": "a"},
|
|
110
|
+
{"key": "activo", "label": "Activar", "type": "toggle", "default": False},
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
def run(df, params):
|
|
115
|
+
col = params.get("columna")
|
|
116
|
+
df = df.drop(columns=[col])
|
|
117
|
+
code = [f'df = df.drop(columns=["{col}"])']
|
|
118
|
+
return df, code
|
|
119
|
+
```
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
dataforge_studio.egg-info/PKG-INFO
|
|
4
|
+
dataforge_studio.egg-info/SOURCES.txt
|
|
5
|
+
dataforge_studio.egg-info/dependency_links.txt
|
|
6
|
+
dataforge_studio.egg-info/entry_points.txt
|
|
7
|
+
dataforge_studio.egg-info/requires.txt
|
|
8
|
+
dataforge_studio.egg-info/top_level.txt
|
|
9
|
+
dataici/__init__.py
|
|
10
|
+
dataici/charts.py
|
|
11
|
+
dataici/cli.py
|
|
12
|
+
dataici/main.py
|
|
13
|
+
dataici/blocks/__init__.py
|
|
14
|
+
dataici/blocks/aggregate.py
|
|
15
|
+
dataici/blocks/append_column.py
|
|
16
|
+
dataici/blocks/concatenate.py
|
|
17
|
+
dataici/blocks/drop_columns.py
|
|
18
|
+
dataici/blocks/filter_rows.py
|
|
19
|
+
dataici/blocks/handle_missings.py
|
|
20
|
+
dataici/blocks/load_csv.py
|
|
21
|
+
dataici/blocks/read_excel.py
|
|
22
|
+
dataici/blocks/rename_columns.py
|
|
23
|
+
dataici/blocks/reorder_columns.py
|
|
24
|
+
dataici/blocks/replace_values.py
|
|
25
|
+
dataici/blocks/resample.py
|
|
26
|
+
dataici/blocks/sample_rows.py
|
|
27
|
+
dataici/blocks/select_columns.py
|
|
28
|
+
dataici/blocks/set_dtypes.py
|
|
29
|
+
dataici/blocks/set_index.py
|
|
30
|
+
dataici/blocks/write_csv.py
|
|
31
|
+
dataici/static/index.html
|
|
32
|
+
dataici/static/assets/index-CYGnphoW.js
|
|
33
|
+
dataici/static/assets/index-DLK3-mBP.css
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dataici
|
|
File without changes
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
METADATA = {
|
|
4
|
+
"type": "aggregate",
|
|
5
|
+
"label": "Aggregate",
|
|
6
|
+
"category": "Resampling",
|
|
7
|
+
"params": [
|
|
8
|
+
{"key": "custom", "label": "Custom Functions", "type": "text", "default": "false"},
|
|
9
|
+
{"key": "func", "label": "General function", "type": "text", "default": "mean"},
|
|
10
|
+
{"key": "col_funcs", "label": "Per-column funcs", "type": "text", "default": "{}"},
|
|
11
|
+
]
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# pandas resampler does not have a .unique() — map to nunique
|
|
15
|
+
_ALIASES = {"unique": "nunique"}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _is_resampler(obj):
|
|
19
|
+
try:
|
|
20
|
+
from pandas.core.resample import DatetimeIndexResampler
|
|
21
|
+
return isinstance(obj, DatetimeIndexResampler)
|
|
22
|
+
except ImportError:
|
|
23
|
+
pass
|
|
24
|
+
return hasattr(obj, "_selected_obj") and not hasattr(obj, "to_dict")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def run(obj, params):
|
|
28
|
+
custom = str(params.get("custom", "false")).strip().lower() == "true"
|
|
29
|
+
func = (params.get("func", "mean") or "mean").strip()
|
|
30
|
+
col_funcs_raw = params.get("col_funcs", "{}")
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
col_funcs = json.loads(col_funcs_raw) if col_funcs_raw else {}
|
|
34
|
+
except Exception:
|
|
35
|
+
col_funcs = {}
|
|
36
|
+
|
|
37
|
+
is_rs = _is_resampler(obj)
|
|
38
|
+
prefix = "resampler" if is_rs else "df"
|
|
39
|
+
|
|
40
|
+
if custom and col_funcs:
|
|
41
|
+
# Map aliases per-column
|
|
42
|
+
mapped = {col: _ALIASES.get(fn, fn) for col, fn in col_funcs.items()}
|
|
43
|
+
df = obj.agg(mapped)
|
|
44
|
+
code = [f"df = {prefix}.agg({json.dumps(mapped)})"]
|
|
45
|
+
else:
|
|
46
|
+
actual = _ALIASES.get(func, func)
|
|
47
|
+
df = getattr(obj, actual)()
|
|
48
|
+
code = [f"df = {prefix}.{actual}()"]
|
|
49
|
+
|
|
50
|
+
return df, code
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
METADATA = {
|
|
4
|
+
"type": "append_column",
|
|
5
|
+
"label": "Append a Column",
|
|
6
|
+
"category": "Columns",
|
|
7
|
+
"params": [
|
|
8
|
+
{"key": "colname", "label": "colname", "type": "text", "default": "new_col"},
|
|
9
|
+
]
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
def run(df, params):
|
|
13
|
+
colname = params.get("colname", "new_col").strip()
|
|
14
|
+
if not colname:
|
|
15
|
+
raise ValueError("Especifica el nombre de la nueva columna.")
|
|
16
|
+
df[colname] = 'None'
|
|
17
|
+
code = [f'df["{colname}"] = None']
|
|
18
|
+
return df, code
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
METADATA = {
|
|
4
|
+
"type": "concatenate",
|
|
5
|
+
"label": "Concatenate",
|
|
6
|
+
"category": "DataFrame",
|
|
7
|
+
"multi_input": True,
|
|
8
|
+
"params": [
|
|
9
|
+
{"key": "input_count", "label": "Número de entradas", "type": "text", "default": "2"},
|
|
10
|
+
{"key": "axis", "label": "Axis", "type": "text", "default": "index"},
|
|
11
|
+
{"key": "join", "label": "Join", "type": "text", "default": "outer"},
|
|
12
|
+
{"key": "change_col_names", "label": "Change column names", "type": "text", "default": "None"},
|
|
13
|
+
{"key": "ignore_index", "label": "Ignore index", "type": "text", "default": "false"},
|
|
14
|
+
{"key": "sort", "label": "Sort", "type": "text", "default": "false"},
|
|
15
|
+
]
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _bool(val):
|
|
20
|
+
if isinstance(val, bool):
|
|
21
|
+
return val
|
|
22
|
+
return str(val).strip().lower() == "true"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def run(dfs, params):
|
|
26
|
+
"""dfs: list of DataFrames received from upstream nodes."""
|
|
27
|
+
axis_raw = params.get("axis", "index")
|
|
28
|
+
axis = 0 if axis_raw == "index" else 1
|
|
29
|
+
|
|
30
|
+
join = params.get("join", "outer")
|
|
31
|
+
change_cols = params.get("change_col_names", "None") # None | prefix | suffix
|
|
32
|
+
ignore_index = _bool(params.get("ignore_index", False))
|
|
33
|
+
sort = _bool(params.get("sort", False))
|
|
34
|
+
|
|
35
|
+
if len(dfs) < 2:
|
|
36
|
+
raise ValueError("Concatenate necesita al menos 2 DataFrames.")
|
|
37
|
+
|
|
38
|
+
# ── Rename columns when axis=1 and change_col_names != None ──────────────
|
|
39
|
+
if axis == 1 and change_cols in ("prefix", "suffix"):
|
|
40
|
+
renamed = []
|
|
41
|
+
for i, df in enumerate(dfs):
|
|
42
|
+
if change_cols == "prefix":
|
|
43
|
+
df = df.rename(columns=lambda c: f"df{i+1}_{c}")
|
|
44
|
+
else:
|
|
45
|
+
df = df.rename(columns=lambda c: f"{c}_df{i+1}")
|
|
46
|
+
renamed.append(df)
|
|
47
|
+
dfs = renamed
|
|
48
|
+
|
|
49
|
+
# ── Build concat kwargs ───────────────────────────────────────────────────
|
|
50
|
+
kwargs = {"axis": axis, "join": join, "sort": sort}
|
|
51
|
+
if axis == 0:
|
|
52
|
+
kwargs["ignore_index"] = ignore_index
|
|
53
|
+
|
|
54
|
+
df_result = pd.concat(dfs, **kwargs)
|
|
55
|
+
|
|
56
|
+
# ── Code string ──────────────────────────────────────────────────────────
|
|
57
|
+
frames_repr = ", ".join(f"df_{i+1}" for i in range(len(dfs)))
|
|
58
|
+
code_lines = []
|
|
59
|
+
|
|
60
|
+
if axis == 1 and change_cols == "prefix":
|
|
61
|
+
for i in range(len(dfs)):
|
|
62
|
+
code_lines.append(f"df_{i+1} = df_{i+1}.rename(columns=lambda c: f'df{i+1}_{{c}}')")
|
|
63
|
+
elif axis == 1 and change_cols == "suffix":
|
|
64
|
+
for i in range(len(dfs)):
|
|
65
|
+
code_lines.append(f"df_{i+1} = df_{i+1}.rename(columns=lambda c: f'{{c}}_df{i+1}')")
|
|
66
|
+
|
|
67
|
+
kw_str = ", ".join(f"{k}={repr(v)}" for k, v in kwargs.items())
|
|
68
|
+
code_lines.append(f"df = pd.concat([{frames_repr}], {kw_str})")
|
|
69
|
+
|
|
70
|
+
return df_result, code_lines
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
METADATA = {
|
|
2
|
+
"type": "drop_columns",
|
|
3
|
+
"label": "Drop Columns",
|
|
4
|
+
"category": "Columns",
|
|
5
|
+
"params": [
|
|
6
|
+
{"key": "columns", "label": "columns", "type": "text", "default": ""},
|
|
7
|
+
]
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
def run(df, params):
|
|
11
|
+
cols = [c.strip() for c in params.get("columns", "").split(",") if c.strip()]
|
|
12
|
+
if not cols:
|
|
13
|
+
raise ValueError("Selecciona al menos una columna para eliminar.")
|
|
14
|
+
missing = [c for c in cols if c not in df.columns]
|
|
15
|
+
if missing:
|
|
16
|
+
raise ValueError(f"Columnas no encontradas: {missing}")
|
|
17
|
+
df = df.drop(columns=cols)
|
|
18
|
+
code = [f"df = df.drop(columns={cols})"]
|
|
19
|
+
return df, code
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
METADATA = {
|
|
5
|
+
"type": "filter_rows",
|
|
6
|
+
"label": "Filter Rows",
|
|
7
|
+
"category": "DataFrame",
|
|
8
|
+
"params": [
|
|
9
|
+
{"key": "conditions", "label": "conditions", "type": "text", "default": "[]"},
|
|
10
|
+
]
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
def run(df, params):
|
|
14
|
+
raw = params.get("conditions", "[]")
|
|
15
|
+
try:
|
|
16
|
+
conditions = json.loads(raw) if isinstance(raw, str) else raw
|
|
17
|
+
except Exception:
|
|
18
|
+
raise ValueError("Error al leer las condiciones.")
|
|
19
|
+
|
|
20
|
+
if not conditions:
|
|
21
|
+
raise ValueError("Agrega al menos una condición Where.")
|
|
22
|
+
|
|
23
|
+
OP_METHOD = {"==": "eq", "!=": "ne", "<": "lt", "<=": "le", ">": "gt", ">=": "ge"}
|
|
24
|
+
OP_SYM = {"and": "&", "or": "|", "xor": "^"}
|
|
25
|
+
|
|
26
|
+
masks = []
|
|
27
|
+
cond_lines = []
|
|
28
|
+
|
|
29
|
+
for i, cond in enumerate(conditions):
|
|
30
|
+
col = cond.get("column", "")
|
|
31
|
+
op = cond.get("operator", "==")
|
|
32
|
+
typ = cond.get("type", "number")
|
|
33
|
+
val = str(cond.get("value", "0")).strip()
|
|
34
|
+
negate = cond.get("not", False)
|
|
35
|
+
logical = cond.get("logical", "and")
|
|
36
|
+
|
|
37
|
+
if not col or col not in df.columns:
|
|
38
|
+
raise ValueError(f"Columna '{col}' no encontrada en el DataFrame.")
|
|
39
|
+
|
|
40
|
+
s = df[col]
|
|
41
|
+
|
|
42
|
+
# ── Build mask ───────────────────────────────────────────────────────
|
|
43
|
+
if op == "isna":
|
|
44
|
+
mask = s.isna()
|
|
45
|
+
code_expr = f"df['{col}'].isna()"
|
|
46
|
+
|
|
47
|
+
elif op == "notna":
|
|
48
|
+
mask = s.notna()
|
|
49
|
+
code_expr = f"df['{col}'].notna()"
|
|
50
|
+
|
|
51
|
+
elif op == "isin":
|
|
52
|
+
items = [v.strip() for v in val.split(",") if v.strip()]
|
|
53
|
+
if typ == "number":
|
|
54
|
+
try:
|
|
55
|
+
parsed = [float(v) for v in items]
|
|
56
|
+
except ValueError:
|
|
57
|
+
raise ValueError(f"isin numérico: valores inválidos → {items}")
|
|
58
|
+
code_expr = f"df['{col}'].isin({parsed})"
|
|
59
|
+
else:
|
|
60
|
+
parsed = [v.strip("'\"") for v in items]
|
|
61
|
+
code_expr = f"df['{col}'].isin({parsed!r})"
|
|
62
|
+
mask = s.isin(parsed)
|
|
63
|
+
|
|
64
|
+
else:
|
|
65
|
+
method = OP_METHOD.get(op, "eq")
|
|
66
|
+
|
|
67
|
+
if typ == "number":
|
|
68
|
+
try:
|
|
69
|
+
parsed = float(val)
|
|
70
|
+
except ValueError:
|
|
71
|
+
raise ValueError(f"Valor numérico inválido: '{val}'")
|
|
72
|
+
mask = getattr(s, method)(parsed)
|
|
73
|
+
code_expr = f"df['{col}'].{method}({parsed})"
|
|
74
|
+
|
|
75
|
+
elif typ == "string":
|
|
76
|
+
parsed = val.strip("'\"")
|
|
77
|
+
mask = getattr(s, method)(parsed)
|
|
78
|
+
code_expr = f"df['{col}'].{method}('{parsed}')"
|
|
79
|
+
|
|
80
|
+
elif typ == "datetime":
|
|
81
|
+
try:
|
|
82
|
+
parsed = pd.Timestamp(val)
|
|
83
|
+
except Exception:
|
|
84
|
+
raise ValueError(f"Fecha inválida: '{val}'")
|
|
85
|
+
mask = getattr(s, method)(parsed)
|
|
86
|
+
code_expr = f"df['{col}'].{method}(pd.Timestamp('{val}'))"
|
|
87
|
+
|
|
88
|
+
elif typ == "column":
|
|
89
|
+
if val not in df.columns:
|
|
90
|
+
raise ValueError(f"Columna de comparación '{val}' no existe.")
|
|
91
|
+
mask = getattr(s, method)(df[val])
|
|
92
|
+
code_expr = f"df['{col}'].{method}(df['{val}'])"
|
|
93
|
+
|
|
94
|
+
else:
|
|
95
|
+
raise ValueError(f"Tipo desconocido: '{typ}'")
|
|
96
|
+
|
|
97
|
+
if negate:
|
|
98
|
+
mask = ~mask
|
|
99
|
+
code_expr = f"~({code_expr})"
|
|
100
|
+
|
|
101
|
+
masks.append({"logical": logical, "mask": mask, "expr": code_expr})
|
|
102
|
+
|
|
103
|
+
# ── Combine masks ────────────────────────────────────────────────────────
|
|
104
|
+
result_mask = masks[0]["mask"]
|
|
105
|
+
cond_lines = [f" ({masks[0]['expr']})"]
|
|
106
|
+
|
|
107
|
+
for m in masks[1:]:
|
|
108
|
+
sym = OP_SYM.get(m["logical"], "&")
|
|
109
|
+
if m["logical"] == "and":
|
|
110
|
+
result_mask = result_mask & m["mask"]
|
|
111
|
+
elif m["logical"] == "or":
|
|
112
|
+
result_mask = result_mask | m["mask"]
|
|
113
|
+
elif m["logical"] == "xor":
|
|
114
|
+
result_mask = result_mask ^ m["mask"]
|
|
115
|
+
cond_lines.append(f" {sym} ({m['expr']})")
|
|
116
|
+
|
|
117
|
+
df = df[result_mask]
|
|
118
|
+
|
|
119
|
+
code = ["cond = (", *cond_lines, ")", "df = df[cond]"]
|
|
120
|
+
return df, code
|