MEDfl 0.2.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MEDfl/LearningManager/__init__.py +13 -13
- MEDfl/LearningManager/client.py +150 -181
- MEDfl/LearningManager/dynamicModal.py +287 -287
- MEDfl/LearningManager/federated_dataset.py +60 -60
- MEDfl/LearningManager/flpipeline.py +192 -192
- MEDfl/LearningManager/model.py +223 -223
- MEDfl/LearningManager/params.yaml +14 -14
- MEDfl/LearningManager/params_optimiser.py +442 -442
- MEDfl/LearningManager/plot.py +229 -229
- MEDfl/LearningManager/server.py +181 -189
- MEDfl/LearningManager/strategy.py +82 -138
- MEDfl/LearningManager/utils.py +331 -331
- MEDfl/NetManager/__init__.py +10 -10
- MEDfl/NetManager/database_connector.py +43 -43
- MEDfl/NetManager/dataset.py +92 -92
- MEDfl/NetManager/flsetup.py +320 -320
- MEDfl/NetManager/net_helper.py +254 -254
- MEDfl/NetManager/net_manager_queries.py +142 -142
- MEDfl/NetManager/network.py +194 -194
- MEDfl/NetManager/node.py +184 -184
- MEDfl/__init__.py +2 -2
- MEDfl/scripts/__init__.py +1 -1
- MEDfl/scripts/base.py +29 -29
- MEDfl/scripts/create_db.py +126 -126
- Medfl/LearningManager/__init__.py +13 -0
- Medfl/LearningManager/client.py +150 -0
- Medfl/LearningManager/dynamicModal.py +287 -0
- Medfl/LearningManager/federated_dataset.py +60 -0
- Medfl/LearningManager/flpipeline.py +192 -0
- Medfl/LearningManager/model.py +223 -0
- Medfl/LearningManager/params.yaml +14 -0
- Medfl/LearningManager/params_optimiser.py +442 -0
- Medfl/LearningManager/plot.py +229 -0
- Medfl/LearningManager/server.py +181 -0
- Medfl/LearningManager/strategy.py +82 -0
- Medfl/LearningManager/utils.py +331 -0
- Medfl/NetManager/__init__.py +10 -0
- Medfl/NetManager/database_connector.py +43 -0
- Medfl/NetManager/dataset.py +92 -0
- Medfl/NetManager/flsetup.py +320 -0
- Medfl/NetManager/net_helper.py +254 -0
- Medfl/NetManager/net_manager_queries.py +142 -0
- Medfl/NetManager/network.py +194 -0
- Medfl/NetManager/node.py +184 -0
- Medfl/__init__.py +3 -0
- Medfl/scripts/__init__.py +2 -0
- Medfl/scripts/base.py +30 -0
- Medfl/scripts/create_db.py +126 -0
- alembic/env.py +61 -61
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/METADATA +120 -108
- medfl-2.0.0.dist-info/RECORD +55 -0
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/WHEEL +1 -1
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info/licenses}/LICENSE +674 -674
- MEDfl-0.2.1.dist-info/RECORD +0 -31
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/top_level.txt +0 -0
MEDfl/NetManager/node.py
CHANGED
@@ -1,184 +1,184 @@
|
|
1
|
-
import pandas as pd
|
2
|
-
|
3
|
-
from .net_helper import *
|
4
|
-
from .net_manager_queries import *
|
5
|
-
from MEDfl.LearningManager.utils import params
|
6
|
-
from MEDfl.NetManager.database_connector import DatabaseManager
|
7
|
-
|
8
|
-
from sqlalchemy import text, exc
|
9
|
-
|
10
|
-
|
11
|
-
class Node:
|
12
|
-
"""
|
13
|
-
A class representing a node in the network.
|
14
|
-
|
15
|
-
Attributes:
|
16
|
-
name (str): The name of the node.
|
17
|
-
train (int): An integer flag representing whether the node is used for training (1) or testing (0).
|
18
|
-
test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
|
19
|
-
"""
|
20
|
-
|
21
|
-
def __init__(
|
22
|
-
self, name: str, train: int, test_fraction: float = 0.2, engine=None
|
23
|
-
):
|
24
|
-
"""
|
25
|
-
Initialize a Node instance.
|
26
|
-
|
27
|
-
Parameters:
|
28
|
-
name (str): The name of the node.
|
29
|
-
train (int): An integer flag representing whether the node is used for training (1) or testing (0).
|
30
|
-
test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
|
31
|
-
"""
|
32
|
-
self.name = name
|
33
|
-
self.train = train
|
34
|
-
self.test_fraction = 1.0 if self.train == 0 else test_fraction
|
35
|
-
|
36
|
-
|
37
|
-
db_manager = DatabaseManager() ;
|
38
|
-
db_manager.connect() ;
|
39
|
-
self.engine = db_manager.get_connection()
|
40
|
-
|
41
|
-
def validate(self):
|
42
|
-
"""Validate name, train, test_fraction"""
|
43
|
-
if not isinstance(self.name, str):
|
44
|
-
raise TypeError("name argument must be a string")
|
45
|
-
|
46
|
-
if not isinstance(self.train, int):
|
47
|
-
raise TypeError("train argument must be an int")
|
48
|
-
|
49
|
-
if not isinstance(self.test_fraction, float):
|
50
|
-
raise TypeError("test_fraction argument must be a float")
|
51
|
-
|
52
|
-
def create_node(self, NetId: int):
|
53
|
-
"""Create a node in the database.
|
54
|
-
Parameters:
|
55
|
-
NetId (int): The ID of the network to which the node belongs.
|
56
|
-
|
57
|
-
Returns:
|
58
|
-
None
|
59
|
-
"""
|
60
|
-
self.engine.execute(
|
61
|
-
text(INSERT_NODE_QUERY.format(self.name, NetId, self.train))
|
62
|
-
)
|
63
|
-
|
64
|
-
def delete_node(self):
|
65
|
-
"""Delete the node from the database."""
|
66
|
-
self.engine.execute(text(DELETE_NODE_QUERY.format(self.name)))
|
67
|
-
|
68
|
-
def check_dataset_compatibility(self, data_df):
|
69
|
-
"""Check if the dataset is compatible with the master dataset.
|
70
|
-
Parameters:
|
71
|
-
data_df (DataFrame): The dataset to check.
|
72
|
-
|
73
|
-
Returns:
|
74
|
-
None
|
75
|
-
"""
|
76
|
-
if master_table_exists() != 1:
|
77
|
-
print("MasterDataset doesn't exist")
|
78
|
-
else:
|
79
|
-
columns = data_df.columns.tolist()
|
80
|
-
|
81
|
-
# get master_dataset columns
|
82
|
-
result_proxy = self.engine.execute(SELECT_MASTER_COLUMNS_QUERY)
|
83
|
-
master_table_columns = result_proxy.keys()
|
84
|
-
|
85
|
-
|
86
|
-
assert [x == y for x, y in zip(master_table_columns, columns)]
|
87
|
-
|
88
|
-
def update_node(self):
|
89
|
-
"""Update the node information (not implemented)."""
|
90
|
-
pass
|
91
|
-
|
92
|
-
def get_dataset(self, column_name: str = None):
|
93
|
-
"""Get the dataset for the node based on the given column name.
|
94
|
-
Parameters:
|
95
|
-
column_name (str, optional): The column name to filter the dataset. Default is None.
|
96
|
-
|
97
|
-
Returns:
|
98
|
-
DataFrame: The dataset associated with the node.
|
99
|
-
"""
|
100
|
-
NodeId = get_nodeid_from_name(self.name)
|
101
|
-
if column_name is not None:
|
102
|
-
query = text(SELECT_DATASET_BY_COLUMN_QUERY.format(column_name, self.name))
|
103
|
-
else:
|
104
|
-
query = text(SELECT_DATASET_BY_NODE_ID_QUERY.format(NodeId))
|
105
|
-
|
106
|
-
result_proxy = self.engine.execute(query)
|
107
|
-
node_dataset = pd.DataFrame(result_proxy.fetchall(), columns=result_proxy.keys())
|
108
|
-
|
109
|
-
return node_dataset
|
110
|
-
|
111
|
-
def upload_dataset(self, dataset_name: str, path_to_csv: str = params['path_to_test_csv']):
|
112
|
-
"""Upload the dataset to the database for the node.
|
113
|
-
|
114
|
-
Parameters:
|
115
|
-
dataset_name (str): The name of the dataset.
|
116
|
-
path_to_csv (str, optional): Path to the CSV file containing the dataset. Default is the path in params.
|
117
|
-
|
118
|
-
Returns:
|
119
|
-
None
|
120
|
-
"""
|
121
|
-
try:
|
122
|
-
data_df = pd.read_csv(path_to_csv)
|
123
|
-
nodeId = get_nodeid_from_name(self.name)
|
124
|
-
columns = data_df.columns.tolist()
|
125
|
-
self.check_dataset_compatibility(data_df)
|
126
|
-
|
127
|
-
data_df = process_eicu(data_df)
|
128
|
-
|
129
|
-
# Insert data in batches
|
130
|
-
batch_size = 1000 # Adjust as needed
|
131
|
-
for start_idx in range(0, len(data_df), batch_size):
|
132
|
-
batch_data = data_df.iloc[start_idx:start_idx + batch_size]
|
133
|
-
insert_query = f"INSERT INTO Datasets (DataSetName, NodeId, {', '.join(columns)}) VALUES (:dataset_name, :nodeId, {', '.join([':' + col for col in columns])})"
|
134
|
-
data_to_insert = batch_data.to_dict(orient='records')
|
135
|
-
params = [{"dataset_name": dataset_name, "nodeId": nodeId, **row} for row in data_to_insert]
|
136
|
-
self.engine.execute(text(insert_query), params)
|
137
|
-
except exc.SQLAlchemyError as e:
|
138
|
-
print(f"Error uploading dataset: {e}")
|
139
|
-
|
140
|
-
def assign_dataset(self, dataset_name:str):
|
141
|
-
"""Assigning existing dataSet to node
|
142
|
-
Parameters:
|
143
|
-
dataset_name (str): The name of the dataset to assign.
|
144
|
-
|
145
|
-
Returns:
|
146
|
-
None
|
147
|
-
"""
|
148
|
-
|
149
|
-
nodeId = get_nodeid_from_name(self.name)
|
150
|
-
query = f"UPDATE DataSets SET nodeId = {nodeId} WHERE DataSetName = '{dataset_name}'"
|
151
|
-
self.engine.execute(text(query))
|
152
|
-
|
153
|
-
def unassign_dataset(self, dataset_name:str):
|
154
|
-
"""unssigning existing dataSet to node
|
155
|
-
Parameters:
|
156
|
-
dataset_name (str): The name of the dataset to assign.
|
157
|
-
|
158
|
-
Returns:
|
159
|
-
None
|
160
|
-
"""
|
161
|
-
|
162
|
-
query = f"UPDATE DataSets SET nodeId = {-1} WHERE DataSetName = '{dataset_name}'"
|
163
|
-
self.engine.execute(text(query))
|
164
|
-
|
165
|
-
def list_alldatasets(self):
|
166
|
-
"""List all datasets associated with the node.
|
167
|
-
Returns:
|
168
|
-
DataFrame: A DataFrame containing information about all datasets associated with the node.
|
169
|
-
|
170
|
-
"""
|
171
|
-
return pd.read_sql(
|
172
|
-
text(SELECT_ALL_DATASETS_QUERY.format(self.name)), my_eng
|
173
|
-
)
|
174
|
-
|
175
|
-
@staticmethod
|
176
|
-
def list_allnodes():
|
177
|
-
"""List all nodes in the database.
|
178
|
-
Returns:
|
179
|
-
DataFrame: A DataFrame containing information about all nodes in the database.
|
180
|
-
|
181
|
-
"""
|
182
|
-
query = text(SELECT_ALL_NODES_QUERY)
|
183
|
-
res = pd.read_sql(query, my_eng)
|
184
|
-
return res
|
1
|
+
import pandas as pd
|
2
|
+
|
3
|
+
from .net_helper import *
|
4
|
+
from .net_manager_queries import *
|
5
|
+
from MEDfl.LearningManager.utils import params
|
6
|
+
from MEDfl.NetManager.database_connector import DatabaseManager
|
7
|
+
|
8
|
+
from sqlalchemy import text, exc
|
9
|
+
|
10
|
+
|
11
|
+
class Node:
|
12
|
+
"""
|
13
|
+
A class representing a node in the network.
|
14
|
+
|
15
|
+
Attributes:
|
16
|
+
name (str): The name of the node.
|
17
|
+
train (int): An integer flag representing whether the node is used for training (1) or testing (0).
|
18
|
+
test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def __init__(
|
22
|
+
self, name: str, train: int, test_fraction: float = 0.2, engine=None
|
23
|
+
):
|
24
|
+
"""
|
25
|
+
Initialize a Node instance.
|
26
|
+
|
27
|
+
Parameters:
|
28
|
+
name (str): The name of the node.
|
29
|
+
train (int): An integer flag representing whether the node is used for training (1) or testing (0).
|
30
|
+
test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
|
31
|
+
"""
|
32
|
+
self.name = name
|
33
|
+
self.train = train
|
34
|
+
self.test_fraction = 1.0 if self.train == 0 else test_fraction
|
35
|
+
|
36
|
+
|
37
|
+
db_manager = DatabaseManager() ;
|
38
|
+
db_manager.connect() ;
|
39
|
+
self.engine = db_manager.get_connection()
|
40
|
+
|
41
|
+
def validate(self):
|
42
|
+
"""Validate name, train, test_fraction"""
|
43
|
+
if not isinstance(self.name, str):
|
44
|
+
raise TypeError("name argument must be a string")
|
45
|
+
|
46
|
+
if not isinstance(self.train, int):
|
47
|
+
raise TypeError("train argument must be an int")
|
48
|
+
|
49
|
+
if not isinstance(self.test_fraction, float):
|
50
|
+
raise TypeError("test_fraction argument must be a float")
|
51
|
+
|
52
|
+
def create_node(self, NetId: int):
|
53
|
+
"""Create a node in the database.
|
54
|
+
Parameters:
|
55
|
+
NetId (int): The ID of the network to which the node belongs.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
None
|
59
|
+
"""
|
60
|
+
self.engine.execute(
|
61
|
+
text(INSERT_NODE_QUERY.format(self.name, NetId, self.train))
|
62
|
+
)
|
63
|
+
|
64
|
+
def delete_node(self):
|
65
|
+
"""Delete the node from the database."""
|
66
|
+
self.engine.execute(text(DELETE_NODE_QUERY.format(self.name)))
|
67
|
+
|
68
|
+
def check_dataset_compatibility(self, data_df):
|
69
|
+
"""Check if the dataset is compatible with the master dataset.
|
70
|
+
Parameters:
|
71
|
+
data_df (DataFrame): The dataset to check.
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
None
|
75
|
+
"""
|
76
|
+
if master_table_exists() != 1:
|
77
|
+
print("MasterDataset doesn't exist")
|
78
|
+
else:
|
79
|
+
columns = data_df.columns.tolist()
|
80
|
+
|
81
|
+
# get master_dataset columns
|
82
|
+
result_proxy = self.engine.execute(SELECT_MASTER_COLUMNS_QUERY)
|
83
|
+
master_table_columns = result_proxy.keys()
|
84
|
+
|
85
|
+
|
86
|
+
assert [x == y for x, y in zip(master_table_columns, columns)]
|
87
|
+
|
88
|
+
def update_node(self):
|
89
|
+
"""Update the node information (not implemented)."""
|
90
|
+
pass
|
91
|
+
|
92
|
+
def get_dataset(self, column_name: str = None):
|
93
|
+
"""Get the dataset for the node based on the given column name.
|
94
|
+
Parameters:
|
95
|
+
column_name (str, optional): The column name to filter the dataset. Default is None.
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
DataFrame: The dataset associated with the node.
|
99
|
+
"""
|
100
|
+
NodeId = get_nodeid_from_name(self.name)
|
101
|
+
if column_name is not None:
|
102
|
+
query = text(SELECT_DATASET_BY_COLUMN_QUERY.format(column_name, self.name))
|
103
|
+
else:
|
104
|
+
query = text(SELECT_DATASET_BY_NODE_ID_QUERY.format(NodeId))
|
105
|
+
|
106
|
+
result_proxy = self.engine.execute(query)
|
107
|
+
node_dataset = pd.DataFrame(result_proxy.fetchall(), columns=result_proxy.keys())
|
108
|
+
|
109
|
+
return node_dataset
|
110
|
+
|
111
|
+
def upload_dataset(self, dataset_name: str, path_to_csv: str = params['path_to_test_csv']):
|
112
|
+
"""Upload the dataset to the database for the node.
|
113
|
+
|
114
|
+
Parameters:
|
115
|
+
dataset_name (str): The name of the dataset.
|
116
|
+
path_to_csv (str, optional): Path to the CSV file containing the dataset. Default is the path in params.
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
None
|
120
|
+
"""
|
121
|
+
try:
|
122
|
+
data_df = pd.read_csv(path_to_csv)
|
123
|
+
nodeId = get_nodeid_from_name(self.name)
|
124
|
+
columns = data_df.columns.tolist()
|
125
|
+
self.check_dataset_compatibility(data_df)
|
126
|
+
|
127
|
+
data_df = process_eicu(data_df)
|
128
|
+
|
129
|
+
# Insert data in batches
|
130
|
+
batch_size = 1000 # Adjust as needed
|
131
|
+
for start_idx in range(0, len(data_df), batch_size):
|
132
|
+
batch_data = data_df.iloc[start_idx:start_idx + batch_size]
|
133
|
+
insert_query = f"INSERT INTO Datasets (DataSetName, NodeId, {', '.join(columns)}) VALUES (:dataset_name, :nodeId, {', '.join([':' + col for col in columns])})"
|
134
|
+
data_to_insert = batch_data.to_dict(orient='records')
|
135
|
+
params = [{"dataset_name": dataset_name, "nodeId": nodeId, **row} for row in data_to_insert]
|
136
|
+
self.engine.execute(text(insert_query), params)
|
137
|
+
except exc.SQLAlchemyError as e:
|
138
|
+
print(f"Error uploading dataset: {e}")
|
139
|
+
|
140
|
+
def assign_dataset(self, dataset_name:str):
|
141
|
+
"""Assigning existing dataSet to node
|
142
|
+
Parameters:
|
143
|
+
dataset_name (str): The name of the dataset to assign.
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
None
|
147
|
+
"""
|
148
|
+
|
149
|
+
nodeId = get_nodeid_from_name(self.name)
|
150
|
+
query = f"UPDATE DataSets SET nodeId = {nodeId} WHERE DataSetName = '{dataset_name}'"
|
151
|
+
self.engine.execute(text(query))
|
152
|
+
|
153
|
+
def unassign_dataset(self, dataset_name:str):
|
154
|
+
"""unssigning existing dataSet to node
|
155
|
+
Parameters:
|
156
|
+
dataset_name (str): The name of the dataset to assign.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
None
|
160
|
+
"""
|
161
|
+
|
162
|
+
query = f"UPDATE DataSets SET nodeId = {-1} WHERE DataSetName = '{dataset_name}'"
|
163
|
+
self.engine.execute(text(query))
|
164
|
+
|
165
|
+
def list_alldatasets(self):
|
166
|
+
"""List all datasets associated with the node.
|
167
|
+
Returns:
|
168
|
+
DataFrame: A DataFrame containing information about all datasets associated with the node.
|
169
|
+
|
170
|
+
"""
|
171
|
+
return pd.read_sql(
|
172
|
+
text(SELECT_ALL_DATASETS_QUERY.format(self.name)), my_eng
|
173
|
+
)
|
174
|
+
|
175
|
+
@staticmethod
|
176
|
+
def list_allnodes():
|
177
|
+
"""List all nodes in the database.
|
178
|
+
Returns:
|
179
|
+
DataFrame: A DataFrame containing information about all nodes in the database.
|
180
|
+
|
181
|
+
"""
|
182
|
+
query = text(SELECT_ALL_NODES_QUERY)
|
183
|
+
res = pd.read_sql(query, my_eng)
|
184
|
+
return res
|
MEDfl/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
from .LearningManager import *
|
2
|
-
from .NetManager import *
|
1
|
+
from .LearningManager import *
|
2
|
+
from .NetManager import *
|
3
3
|
from .scripts import *
|
MEDfl/scripts/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
# from .base import *
|
1
|
+
# from .base import *
|
2
2
|
# from .create_db import *
|
MEDfl/scripts/base.py
CHANGED
@@ -1,30 +1,30 @@
|
|
1
|
-
import mysql.connector
|
2
|
-
from sqlalchemy import create_engine, text
|
3
|
-
from configparser import ConfigParser
|
4
|
-
import yaml
|
5
|
-
import pkg_resources
|
6
|
-
import os
|
7
|
-
|
8
|
-
# Get the directory of the current script
|
9
|
-
current_directory = os.path.dirname(os.path.abspath(__file__))
|
10
|
-
|
11
|
-
# Load configuration from the config file
|
12
|
-
config_file_path = os.path.join(current_directory, 'db_config.ini')
|
13
|
-
|
14
|
-
config = ConfigParser()
|
15
|
-
config.read(config_file_path)
|
16
|
-
mysql_config = config['mysql']
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
connection_string = (
|
21
|
-
f"mysql+mysqlconnector://{mysql_config['user']}:{mysql_config['password']}@"
|
22
|
-
f"{mysql_config['host']}:{mysql_config['port']}/{mysql_config['database']}"
|
23
|
-
)
|
24
|
-
|
25
|
-
eng = create_engine(
|
26
|
-
connection_string,
|
27
|
-
execution_options={"autocommit": True},
|
28
|
-
)
|
29
|
-
|
1
|
+
import mysql.connector
|
2
|
+
from sqlalchemy import create_engine, text
|
3
|
+
from configparser import ConfigParser
|
4
|
+
import yaml
|
5
|
+
import pkg_resources
|
6
|
+
import os
|
7
|
+
|
8
|
+
# Get the directory of the current script
|
9
|
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
10
|
+
|
11
|
+
# Load configuration from the config file
|
12
|
+
config_file_path = os.path.join(current_directory, 'db_config.ini')
|
13
|
+
|
14
|
+
config = ConfigParser()
|
15
|
+
config.read(config_file_path)
|
16
|
+
mysql_config = config['mysql']
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
connection_string = (
|
21
|
+
f"mysql+mysqlconnector://{mysql_config['user']}:{mysql_config['password']}@"
|
22
|
+
f"{mysql_config['host']}:{mysql_config['port']}/{mysql_config['database']}"
|
23
|
+
)
|
24
|
+
|
25
|
+
eng = create_engine(
|
26
|
+
connection_string,
|
27
|
+
execution_options={"autocommit": True},
|
28
|
+
)
|
29
|
+
|
30
30
|
my_eng = eng.connect()
|