MEDfl 0.1.0__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Medfl/LearningManager/__init__.py +13 -0
- Medfl/LearningManager/client.py +150 -0
- Medfl/LearningManager/dynamicModal.py +287 -0
- Medfl/LearningManager/federated_dataset.py +57 -0
- Medfl/LearningManager/flpipeline.py +189 -0
- Medfl/LearningManager/model.py +223 -0
- Medfl/LearningManager/params_optimiser.py +442 -0
- Medfl/LearningManager/plot.py +229 -0
- Medfl/LearningManager/server.py +179 -0
- Medfl/LearningManager/strategy.py +82 -0
- Medfl/LearningManager/utils.py +233 -0
- Medfl/NetManager/__init__.py +9 -0
- Medfl/NetManager/dataset.py +91 -0
- Medfl/NetManager/flsetup.py +304 -0
- Medfl/NetManager/net_helper.py +243 -0
- Medfl/NetManager/net_manager_queries.py +137 -0
- Medfl/NetManager/network.py +160 -0
- Medfl/NetManager/node.py +181 -0
- Medfl/__init__.py +2 -0
- {Medfl-0.1.0.dist-info → Medfl-0.1.4.dist-info}/METADATA +19 -18
- Medfl-0.1.4.dist-info/RECORD +29 -0
- {Medfl-0.1.0.dist-info → Medfl-0.1.4.dist-info}/WHEEL +1 -1
- {Medfl-0.1.0.dist-info → Medfl-0.1.4.dist-info}/top_level.txt +1 -0
- Medfl-0.1.0.dist-info/RECORD +0 -10
- {Medfl-0.1.0.data → Medfl-0.1.4.data}/scripts/setup_mysql.sh +0 -0
@@ -0,0 +1,243 @@
|
|
1
|
+
from sklearn.preprocessing import LabelEncoder
|
2
|
+
from sklearn.impute import SimpleImputer
|
3
|
+
|
4
|
+
from sqlalchemy import text
|
5
|
+
|
6
|
+
import torch
|
7
|
+
import pandas as pd
|
8
|
+
from torch.utils.data import TensorDataset
|
9
|
+
import numpy as np
|
10
|
+
|
11
|
+
|
12
|
+
from scripts.base import my_eng
|
13
|
+
|
14
|
+
|
15
|
+
def is_str(data_df, row, x):
|
16
|
+
"""
|
17
|
+
Check if a column in a DataFrame is of type 'object' and convert the value accordingly.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
data_df (pandas.DataFrame): DataFrame containing the data.
|
21
|
+
row (pandas.Series): Data row.
|
22
|
+
x (str): Column name.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
str or float: Processed value based on the column type.
|
26
|
+
"""
|
27
|
+
if data_df[x].dtype == "object":
|
28
|
+
x = f"'{row[x]}'"
|
29
|
+
else:
|
30
|
+
x = row[x]
|
31
|
+
return x
|
32
|
+
|
33
|
+
|
34
|
+
def process_eicu(data_df):
|
35
|
+
"""
|
36
|
+
Process eICU data by filling missing values with mean and replacing NaNs with 'Unknown'.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
data_df (pandas.DataFrame): Input data.
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
pandas.DataFrame: Processed data.
|
43
|
+
"""
|
44
|
+
# Identify numeric and non-numeric columns
|
45
|
+
numeric_columns = data_df.select_dtypes(include=[np.number]).columns
|
46
|
+
non_numeric_columns = data_df.select_dtypes(exclude=[np.number]).columns
|
47
|
+
|
48
|
+
# Fill NaN in numeric columns with mean
|
49
|
+
data_df[numeric_columns] = data_df[numeric_columns].fillna(data_df[numeric_columns].mean())
|
50
|
+
|
51
|
+
# Fill NaN in non-numeric columns with 'Unknown'
|
52
|
+
data_df[non_numeric_columns] = data_df[non_numeric_columns].fillna('Unknown')
|
53
|
+
|
54
|
+
try:
|
55
|
+
data_df = data_df.reset_index(drop=True)
|
56
|
+
except:
|
57
|
+
pass
|
58
|
+
|
59
|
+
return data_df
|
60
|
+
|
61
|
+
|
62
|
+
# remove indiserd columns after reading from the DB
|
63
|
+
def process_data_after_reading(data, output, fill_strategy="mean", fit_encode=[], to_drop=[]):
|
64
|
+
"""
|
65
|
+
Process data after reading from the database, including encoding, dropping columns, and creating a PyTorch TensorDataset.
|
66
|
+
|
67
|
+
Args:
|
68
|
+
data (pandas.DataFrame): Input data.
|
69
|
+
output (str): Output column name.
|
70
|
+
fill_strategy (str, optional): Imputation strategy for missing values. Default is "mean".
|
71
|
+
fit_encode (list, optional): List of columns to be label-encoded. Default is an empty list.
|
72
|
+
to_drop (list, optional): List of columns to be dropped from the DataFrame. Default is an empty list.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
torch.utils.data.TensorDataset: Processed data as a PyTorch TensorDataset.
|
76
|
+
"""
|
77
|
+
|
78
|
+
# Check if there is a DataSet assigned to the node
|
79
|
+
if (len(data) == 0):
|
80
|
+
raise "Node doesn't Have dataSet"
|
81
|
+
|
82
|
+
encoder = LabelEncoder()
|
83
|
+
# En Code some columns
|
84
|
+
for s in fit_encode:
|
85
|
+
try:
|
86
|
+
data[s] = encoder.fit_transform(data[s])
|
87
|
+
except:
|
88
|
+
raise print(s)
|
89
|
+
|
90
|
+
# The output of the DATA
|
91
|
+
y = data[output]
|
92
|
+
|
93
|
+
X = data
|
94
|
+
|
95
|
+
|
96
|
+
# remove indisered columns when reading the dataframe from the DB
|
97
|
+
for column in to_drop:
|
98
|
+
try:
|
99
|
+
X = X.drop(
|
100
|
+
[column], axis=1
|
101
|
+
)
|
102
|
+
except Exception as e:
|
103
|
+
raise e
|
104
|
+
|
105
|
+
|
106
|
+
# Get the DATAset Features
|
107
|
+
features = [col for col in X.columns if col != output]
|
108
|
+
|
109
|
+
# Impute missing values using the mean strategy
|
110
|
+
try:
|
111
|
+
imputer = SimpleImputer(strategy=fill_strategy)
|
112
|
+
X[features] = imputer.fit_transform(X[features])
|
113
|
+
except:
|
114
|
+
print()
|
115
|
+
|
116
|
+
X = torch.tensor(X.values, dtype=torch.float32)
|
117
|
+
y = torch.tensor(y.values, dtype=torch.float32)
|
118
|
+
data = TensorDataset(X, y)
|
119
|
+
|
120
|
+
return data
|
121
|
+
|
122
|
+
|
123
|
+
def get_nodeid_from_name(name):
|
124
|
+
"""
|
125
|
+
Get the NodeId from the Nodes table based on the NodeName.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
name (str): Node name.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
int or None: NodeId or None if not found.
|
132
|
+
"""
|
133
|
+
|
134
|
+
NodeId = int(
|
135
|
+
pd.read_sql(
|
136
|
+
text(f"SELECT NodeId FROM Nodes WHERE NodeName = '{name}'"), my_eng
|
137
|
+
).iloc[0, 0]
|
138
|
+
)
|
139
|
+
return NodeId
|
140
|
+
|
141
|
+
|
142
|
+
def get_netid_from_name(name):
|
143
|
+
"""
|
144
|
+
Get the Network Id from the Networks table based on the NetName.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
name (str): Network name.
|
148
|
+
|
149
|
+
Returns:
|
150
|
+
int or None: NetId or None if not found.
|
151
|
+
"""
|
152
|
+
try:
|
153
|
+
NetId = int(
|
154
|
+
pd.read_sql(
|
155
|
+
text(f"SELECT NetId FROM Networks WHERE NetName = '{name}'"),
|
156
|
+
my_eng,
|
157
|
+
).iloc[0, 0]
|
158
|
+
)
|
159
|
+
except:
|
160
|
+
NetId = None
|
161
|
+
return NetId
|
162
|
+
|
163
|
+
|
164
|
+
def get_flsetupid_from_name(name):
|
165
|
+
"""
|
166
|
+
Get the FLsetupId from the FLsetup table based on the FL setup name.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
name (str): FL setup name.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
int or None: FLsetupId or None if not found.
|
173
|
+
"""
|
174
|
+
try:
|
175
|
+
id = int(
|
176
|
+
pd.read_sql(
|
177
|
+
text(f"SELECT FLsetupId FROM FLsetup WHERE name = '{name}'"),
|
178
|
+
my_eng,
|
179
|
+
).iloc[0, 0]
|
180
|
+
)
|
181
|
+
except:
|
182
|
+
id = None
|
183
|
+
return id
|
184
|
+
|
185
|
+
|
186
|
+
def get_flpipeline_from_name(name):
|
187
|
+
"""
|
188
|
+
Get the FLpipeline Id from the FLpipeline table based on the FL pipeline name.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
name (str): FL pipeline name.
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
int or None: FLpipelineId or None if not found.
|
195
|
+
"""
|
196
|
+
try:
|
197
|
+
id = int(
|
198
|
+
pd.read_sql(
|
199
|
+
text(f"SELECT id FROM FLpipeline WHERE name = '{name}'"),
|
200
|
+
my_eng,
|
201
|
+
).iloc[0, 0]
|
202
|
+
)
|
203
|
+
except:
|
204
|
+
id = None
|
205
|
+
return id
|
206
|
+
|
207
|
+
|
208
|
+
def get_feddataset_id_from_name(name):
|
209
|
+
"""
|
210
|
+
Get the Federated dataset Id from the FedDatasets table based on the federated dataset name.
|
211
|
+
|
212
|
+
Args:
|
213
|
+
name (str): Federated dataset name.
|
214
|
+
|
215
|
+
Returns:
|
216
|
+
int or None: FedId or None if not found.
|
217
|
+
"""
|
218
|
+
try:
|
219
|
+
id = int(
|
220
|
+
pd.read_sql(
|
221
|
+
text(f"SELECT FedId FROM FedDatasets WHERE name = '{name}'"),
|
222
|
+
my_eng,
|
223
|
+
).iloc[0, 0]
|
224
|
+
)
|
225
|
+
except:
|
226
|
+
id = None
|
227
|
+
return id
|
228
|
+
|
229
|
+
|
230
|
+
def master_table_exists():
|
231
|
+
"""
|
232
|
+
Check if the MasterDataset table exists in the database.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
bool: True if the table exists, False otherwise.
|
236
|
+
"""
|
237
|
+
|
238
|
+
return pd.read_sql(
|
239
|
+
text(
|
240
|
+
" SELECT EXISTS ( SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_NAME = 'MasterDataset' )"
|
241
|
+
),
|
242
|
+
my_eng,
|
243
|
+
).values[0][0]
|
@@ -0,0 +1,137 @@
|
|
1
|
+
from .net_helper import is_str
|
2
|
+
|
3
|
+
INSERT_DATASET = """
|
4
|
+
INSERT INTO DataSets(DataSetName, NodeId, {columns})
|
5
|
+
VALUES (:name, :NodeId, {values})
|
6
|
+
"""
|
7
|
+
DELETE_DATASET = """
|
8
|
+
DELETE FROM DataSets WHERE DataSetName = :name
|
9
|
+
"""
|
10
|
+
|
11
|
+
SELECT_ALL_DATASET_NAMES = """
|
12
|
+
SELECT DISTINCT DataSetName,NodeId FROM DataSets
|
13
|
+
"""
|
14
|
+
|
15
|
+
SELECT_DATASET_BY_NAME = """
|
16
|
+
SELECT * FROM DataSets WHERE DataSetName = :name
|
17
|
+
"""
|
18
|
+
|
19
|
+
# node queries
|
20
|
+
# sql_queries.py
|
21
|
+
|
22
|
+
INSERT_NODE_QUERY = (
|
23
|
+
"INSERT INTO Nodes(NodeName,NetId,train) VALUES ('{}',{}, {})"
|
24
|
+
)
|
25
|
+
DELETE_NODE_QUERY = "DELETE FROM Nodes WHERE NodeName = '{}'"
|
26
|
+
SELECT_MASTER_COLUMNS_QUERY = "SELECT * FROM MasterDataset LIMIT 1"
|
27
|
+
SELECT_DATASET_BY_COLUMN_QUERY = "SELECT * FROM MasterDataset WHERE {} = '{}'"
|
28
|
+
SELECT_DATASET_BY_NODE_ID_QUERY = "SELECT * FROM DataSets WHERE NodeId = {}"
|
29
|
+
|
30
|
+
SELECT_ALL_DATASETS_QUERY = "SELECT DISTINCT DataSetName,NodeName FROM DataSets,Nodes WHERE Nodes.NodeName = '{}' and Nodes.NodeId = DataSets.NodeId"
|
31
|
+
SELECT_ALL_NODES_QUERY = "SELECT * FROM Nodes"
|
32
|
+
|
33
|
+
|
34
|
+
# SQL query to insert a new network
|
35
|
+
INSERT_NETWORK_QUERY = "INSERT INTO Networks(NetName) VALUES ('{name}')"
|
36
|
+
|
37
|
+
# SQL query to delete a network
|
38
|
+
DELETE_NETWORK_QUERY = "DELETE FROM Networks WHERE NetName = '{name}'"
|
39
|
+
|
40
|
+
# SQL query to delete a network
|
41
|
+
GET_NETWORK_QUERY = "SELECT * FROM Networks WHERE NetName = '{name}'"
|
42
|
+
|
43
|
+
|
44
|
+
# SQL query to update a network
|
45
|
+
UPDATE_NETWORK_QUERY = (
|
46
|
+
"UPDATE Networks SET FLsetupId = {FLsetupId} WHERE NetId = {id}"
|
47
|
+
)
|
48
|
+
|
49
|
+
# SQL query to retrieve all nodes for a network
|
50
|
+
LIST_ALL_NODES_QUERY = "SELECT Nodes.NodeName, Networks.NetName FROM Nodes, Networks WHERE Networks.NetName = '{name}' AND Networks.NetId = Nodes.NetId"
|
51
|
+
|
52
|
+
# SQL query to create the MasterDataset table
|
53
|
+
CREATE_MASTER_DATASET_TABLE_QUERY = """
|
54
|
+
CREATE TABLE IF NOT EXISTS MasterDataset (
|
55
|
+
PatientId INT NOT NULL AUTO_INCREMENT,
|
56
|
+
{},
|
57
|
+
PRIMARY KEY (PatientId)
|
58
|
+
);
|
59
|
+
"""
|
60
|
+
|
61
|
+
# SQL query to create the datasets table
|
62
|
+
CREATE_DATASETS_TABLE_QUERY = """
|
63
|
+
CREATE TABLE IF NOT EXISTS Datasets (
|
64
|
+
DataSetId INT NOT NULL AUTO_INCREMENT,
|
65
|
+
DataSetName VARCHAR(255),
|
66
|
+
NodeId INT,
|
67
|
+
{},
|
68
|
+
PRIMARY KEY (DataSetId)
|
69
|
+
|
70
|
+
);
|
71
|
+
"""
|
72
|
+
|
73
|
+
# SQL query to insert dataset values
|
74
|
+
INSERT_DATASET_VALUES_QUERY = "INSERT INTO MasterDataset({columns}, NodeId) VALUES ('{name}', {nodeId}, {values})"
|
75
|
+
|
76
|
+
|
77
|
+
# FL setup_queries
|
78
|
+
# sql_queries.py
|
79
|
+
|
80
|
+
CREATE_FLSETUP_QUERY = """
|
81
|
+
INSERT INTO FLsetup (name, description, creation_date, NetId, column_name)
|
82
|
+
VALUES (:name, :description, :creation_date, :net_id, :column_name)
|
83
|
+
"""
|
84
|
+
|
85
|
+
DELETE_FLSETUP_QUERY = """
|
86
|
+
DELETE FROM FLsetup
|
87
|
+
WHERE name = :name
|
88
|
+
"""
|
89
|
+
|
90
|
+
UPDATE_FLSETUP_QUERY = UPDATE_NETWORK_QUERY = (
|
91
|
+
"UPDATE FLsetup SET column_name ='{column_name}' WHERE name ='{FLsetupName}'"
|
92
|
+
)
|
93
|
+
|
94
|
+
|
95
|
+
READ_SETUP_QUERY = """
|
96
|
+
SELECT * FROM FLsetup
|
97
|
+
WHERE FLsetupId = :flsetup_id
|
98
|
+
"""
|
99
|
+
|
100
|
+
READ_ALL_SETUPS_QUERY = """
|
101
|
+
SELECT * FROM FLsetup
|
102
|
+
"""
|
103
|
+
|
104
|
+
READ_NETWORK_BY_ID_QUERY = """
|
105
|
+
SELECT * FROM Networks
|
106
|
+
WHERE NetId = :net_id
|
107
|
+
"""
|
108
|
+
|
109
|
+
READ_DISTINCT_NODES_QUERY = """
|
110
|
+
SELECT DISTINCT {} FROM MasterDataset
|
111
|
+
"""
|
112
|
+
|
113
|
+
|
114
|
+
# FederatedDataset Queries
|
115
|
+
INSERT_FLDATASET_QUERY = (
|
116
|
+
"INSERT INTO FedDatasets(name, FLsetupId) VALUES (:name, :FLsetupId)"
|
117
|
+
)
|
118
|
+
DELETE_FLDATASET_BY_SETUP_AND_PIPELINE_QUERY = "DELETE FROM FedDatasets WHERE FLsetupId = :FLsetupId AND FLpipeId = :FLpipeId"
|
119
|
+
|
120
|
+
|
121
|
+
UPDATE_FLDATASET_QUERY = (
|
122
|
+
"UPDATE FedDatasets SET FLpipeId = :FLpipeId WHERE FedId = :FedId"
|
123
|
+
)
|
124
|
+
SELECT_FLDATASET_BY_NAME_QUERY = "SELECT * FROM FedDatasets WHERE name = :name"
|
125
|
+
|
126
|
+
CREATE_FLPIPELINE_QUERY = """
|
127
|
+
INSERT INTO FLpipeline (name, description, creation_date, results)
|
128
|
+
VALUES ('{name}', '{description}', '{creation_date}', '{result}')
|
129
|
+
"""
|
130
|
+
DELETE_FLPIPELINE_QUERY = "DELETE FROM FLpipeline WHERE name = '{name}'"
|
131
|
+
|
132
|
+
SELECT_FLPIPELINE_QUERY = "SELECT FROM FLpipeline WHERE name = '{name}'"
|
133
|
+
|
134
|
+
CREATE_TEST_RESULTS_QUERY = """
|
135
|
+
INSERT INTO testResults (pipelineid, nodename, confusionmatrix, accuracy , sensivity, ppv , npv , f1score , fpr , tpr )
|
136
|
+
VALUES ('{pipelineId}', '{nodeName}', '{confusion_matrix}', '{accuracy}' , '{sensivity}' , '{ppv}' , '{npv}' , '{f1score}' , '{fpr}' , '{tpr}')
|
137
|
+
"""
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# src/Medfl/NetManager/network.py
|
2
|
+
|
3
|
+
from Medfl.LearningManager.utils import *
|
4
|
+
|
5
|
+
from .net_helper import *
|
6
|
+
from .net_manager_queries import (CREATE_MASTER_DATASET_TABLE_QUERY,
|
7
|
+
CREATE_DATASETS_TABLE_QUERY,
|
8
|
+
DELETE_NETWORK_QUERY,
|
9
|
+
INSERT_NETWORK_QUERY, LIST_ALL_NODES_QUERY,
|
10
|
+
UPDATE_NETWORK_QUERY, GET_NETWORK_QUERY)
|
11
|
+
from .node import Node
|
12
|
+
import pandas as pd
|
13
|
+
from Medfl.LearningManager.utils import params
|
14
|
+
|
15
|
+
|
16
|
+
class Network:
|
17
|
+
"""
|
18
|
+
A class representing a network.
|
19
|
+
|
20
|
+
Attributes:
|
21
|
+
name (str): The name of the network.
|
22
|
+
mtable_exists (int): An integer flag indicating whether the MasterDataset table exists (1) or not (0).
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self, name: str = ""):
|
26
|
+
"""
|
27
|
+
Initialize a Network instance.
|
28
|
+
|
29
|
+
Parameters:
|
30
|
+
name (str): The name of the network.
|
31
|
+
"""
|
32
|
+
self.name = name
|
33
|
+
self.mtable_exists = int(master_table_exists())
|
34
|
+
self.validate()
|
35
|
+
|
36
|
+
def validate(self):
|
37
|
+
"""Validate name"""
|
38
|
+
|
39
|
+
if not isinstance(self.name, str):
|
40
|
+
raise TypeError("name argument must be a string")
|
41
|
+
|
42
|
+
def create_network(self):
|
43
|
+
"""Create a new network in the database."""
|
44
|
+
my_eng.execute(text(INSERT_NETWORK_QUERY.format(name=self.name)))
|
45
|
+
self.id = get_netid_from_name(self.name)
|
46
|
+
|
47
|
+
def use_network(self, network_name: str):
|
48
|
+
"""Use a network in the database.
|
49
|
+
|
50
|
+
Parameters:
|
51
|
+
network_name (str): The name of the network to use.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
Network or None: An instance of the Network class if the network exists, else None.
|
55
|
+
|
56
|
+
"""
|
57
|
+
network = pd.read_sql(
|
58
|
+
text(GET_NETWORK_QUERY.format(name=network_name)),
|
59
|
+
my_eng,
|
60
|
+
)
|
61
|
+
|
62
|
+
if (network.NetId[0]):
|
63
|
+
self.name = network.NetName[0]
|
64
|
+
self.id = network.NetId[0]
|
65
|
+
self.mtable_exists = int(master_table_exists())
|
66
|
+
self.validate()
|
67
|
+
return self
|
68
|
+
else:
|
69
|
+
return None
|
70
|
+
|
71
|
+
def delete_network(self):
|
72
|
+
"""Delete the network from the database."""
|
73
|
+
my_eng.execute(text(DELETE_NETWORK_QUERY.format(name=self.name)))
|
74
|
+
|
75
|
+
def update_network(self, FLsetupId: int):
|
76
|
+
"""Update the network's FLsetupId in the database.
|
77
|
+
|
78
|
+
Parameters:
|
79
|
+
FLsetupId (int): The FLsetupId to update.
|
80
|
+
"""
|
81
|
+
my_eng.execute(
|
82
|
+
text(UPDATE_NETWORK_QUERY.format(FLsetupId=FLsetupId, id=self.id))
|
83
|
+
)
|
84
|
+
|
85
|
+
def add_node(self, node: Node):
|
86
|
+
"""Add a node to the network.
|
87
|
+
|
88
|
+
Parameters:
|
89
|
+
node (Node): The node to add.
|
90
|
+
"""
|
91
|
+
node.create_node(self.id)
|
92
|
+
|
93
|
+
def list_allnodes(self):
|
94
|
+
"""List all nodes in the network.
|
95
|
+
|
96
|
+
Parameters:
|
97
|
+
None
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
DataFrame: A DataFrame containing information about all nodes in the network.
|
101
|
+
|
102
|
+
"""
|
103
|
+
return pd.read_sql(
|
104
|
+
text(LIST_ALL_NODES_QUERY.format(name=self.name)), my_eng
|
105
|
+
)
|
106
|
+
|
107
|
+
def create_master_dataset(self, path_to_csv: str = params['path_to_master_csv']):
|
108
|
+
"""
|
109
|
+
Create the MasterDataset table and insert dataset values.
|
110
|
+
|
111
|
+
:param path_to_csv: Path to the CSV file containing the dataset.
|
112
|
+
"""
|
113
|
+
print(path_to_csv)
|
114
|
+
# Read the CSV file into a Pandas DataFrame
|
115
|
+
data_df = pd.read_csv(path_to_csv)
|
116
|
+
|
117
|
+
# Process the data if needed (e.g., handle missing values, encode categorical variables)
|
118
|
+
# ...
|
119
|
+
|
120
|
+
# Check if the MasterDataset table exists
|
121
|
+
|
122
|
+
if self.mtable_exists != 1:
|
123
|
+
columns = data_df.columns.tolist()
|
124
|
+
columns_str = ",\n".join(
|
125
|
+
[
|
126
|
+
f"{col} {column_map[str(data_df[col].dtype)]}"
|
127
|
+
for col in columns
|
128
|
+
]
|
129
|
+
)
|
130
|
+
my_eng.execute(
|
131
|
+
text(CREATE_MASTER_DATASET_TABLE_QUERY.format(columns_str))
|
132
|
+
)
|
133
|
+
my_eng.execute(text(CREATE_DATASETS_TABLE_QUERY.format(columns_str)))
|
134
|
+
|
135
|
+
# Get the list of columns in the DataFrame
|
136
|
+
|
137
|
+
data_df = process_eicu(data_df)
|
138
|
+
# Insert the dataset values into the MasterDataset table
|
139
|
+
|
140
|
+
for index, row in data_df.iterrows():
|
141
|
+
query_1 = "INSERT INTO MasterDataset(" + "".join(
|
142
|
+
f"{x}," for x in columns
|
143
|
+
)
|
144
|
+
query_2 = f"VALUES (" + "".join(
|
145
|
+
f"{is_str(data_df, row, x)}," for x in columns
|
146
|
+
)
|
147
|
+
query = query_1[:-1] + ")" + query_2[:-1] + ")"
|
148
|
+
my_eng.execute(text(query))
|
149
|
+
|
150
|
+
# Set mtable_exists flag to True
|
151
|
+
self.mtable_exists = 1
|
152
|
+
|
153
|
+
@staticmethod
|
154
|
+
def list_allnetworks():
|
155
|
+
"""List all networks in the database.
|
156
|
+
Returns:
|
157
|
+
DataFrame: A DataFrame containing information about all networks in the database.
|
158
|
+
|
159
|
+
"""
|
160
|
+
return pd.read_sql(text("SELECT * FROM Networks"), my_eng)
|