MEDfl 0.2.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. MEDfl/LearningManager/__init__.py +13 -13
  2. MEDfl/LearningManager/client.py +150 -181
  3. MEDfl/LearningManager/dynamicModal.py +287 -287
  4. MEDfl/LearningManager/federated_dataset.py +60 -60
  5. MEDfl/LearningManager/flpipeline.py +192 -192
  6. MEDfl/LearningManager/model.py +223 -223
  7. MEDfl/LearningManager/params.yaml +14 -14
  8. MEDfl/LearningManager/params_optimiser.py +442 -442
  9. MEDfl/LearningManager/plot.py +229 -229
  10. MEDfl/LearningManager/server.py +181 -189
  11. MEDfl/LearningManager/strategy.py +82 -138
  12. MEDfl/LearningManager/utils.py +331 -331
  13. MEDfl/NetManager/__init__.py +10 -10
  14. MEDfl/NetManager/database_connector.py +43 -43
  15. MEDfl/NetManager/dataset.py +92 -92
  16. MEDfl/NetManager/flsetup.py +320 -320
  17. MEDfl/NetManager/net_helper.py +254 -254
  18. MEDfl/NetManager/net_manager_queries.py +142 -142
  19. MEDfl/NetManager/network.py +194 -194
  20. MEDfl/NetManager/node.py +184 -184
  21. MEDfl/__init__.py +2 -2
  22. MEDfl/scripts/__init__.py +1 -1
  23. MEDfl/scripts/base.py +29 -29
  24. MEDfl/scripts/create_db.py +126 -126
  25. Medfl/LearningManager/__init__.py +13 -0
  26. Medfl/LearningManager/client.py +150 -0
  27. Medfl/LearningManager/dynamicModal.py +287 -0
  28. Medfl/LearningManager/federated_dataset.py +60 -0
  29. Medfl/LearningManager/flpipeline.py +192 -0
  30. Medfl/LearningManager/model.py +223 -0
  31. Medfl/LearningManager/params.yaml +14 -0
  32. Medfl/LearningManager/params_optimiser.py +442 -0
  33. Medfl/LearningManager/plot.py +229 -0
  34. Medfl/LearningManager/server.py +181 -0
  35. Medfl/LearningManager/strategy.py +82 -0
  36. Medfl/LearningManager/utils.py +331 -0
  37. Medfl/NetManager/__init__.py +10 -0
  38. Medfl/NetManager/database_connector.py +43 -0
  39. Medfl/NetManager/dataset.py +92 -0
  40. Medfl/NetManager/flsetup.py +320 -0
  41. Medfl/NetManager/net_helper.py +254 -0
  42. Medfl/NetManager/net_manager_queries.py +142 -0
  43. Medfl/NetManager/network.py +194 -0
  44. Medfl/NetManager/node.py +184 -0
  45. Medfl/__init__.py +3 -0
  46. Medfl/scripts/__init__.py +2 -0
  47. Medfl/scripts/base.py +30 -0
  48. Medfl/scripts/create_db.py +126 -0
  49. alembic/env.py +61 -61
  50. {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/METADATA +120 -108
  51. medfl-2.0.0.dist-info/RECORD +55 -0
  52. {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/WHEEL +1 -1
  53. {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info/licenses}/LICENSE +674 -674
  54. MEDfl-0.2.1.dist-info/RECORD +0 -31
  55. {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/top_level.txt +0 -0
MEDfl/NetManager/node.py CHANGED
@@ -1,184 +1,184 @@
1
- import pandas as pd
2
-
3
- from .net_helper import *
4
- from .net_manager_queries import *
5
- from MEDfl.LearningManager.utils import params
6
- from MEDfl.NetManager.database_connector import DatabaseManager
7
-
8
- from sqlalchemy import text, exc
9
-
10
-
11
- class Node:
12
- """
13
- A class representing a node in the network.
14
-
15
- Attributes:
16
- name (str): The name of the node.
17
- train (int): An integer flag representing whether the node is used for training (1) or testing (0).
18
- test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
19
- """
20
-
21
- def __init__(
22
- self, name: str, train: int, test_fraction: float = 0.2, engine=None
23
- ):
24
- """
25
- Initialize a Node instance.
26
-
27
- Parameters:
28
- name (str): The name of the node.
29
- train (int): An integer flag representing whether the node is used for training (1) or testing (0).
30
- test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
31
- """
32
- self.name = name
33
- self.train = train
34
- self.test_fraction = 1.0 if self.train == 0 else test_fraction
35
-
36
-
37
- db_manager = DatabaseManager() ;
38
- db_manager.connect() ;
39
- self.engine = db_manager.get_connection()
40
-
41
- def validate(self):
42
- """Validate name, train, test_fraction"""
43
- if not isinstance(self.name, str):
44
- raise TypeError("name argument must be a string")
45
-
46
- if not isinstance(self.train, int):
47
- raise TypeError("train argument must be an int")
48
-
49
- if not isinstance(self.test_fraction, float):
50
- raise TypeError("test_fraction argument must be a float")
51
-
52
- def create_node(self, NetId: int):
53
- """Create a node in the database.
54
- Parameters:
55
- NetId (int): The ID of the network to which the node belongs.
56
-
57
- Returns:
58
- None
59
- """
60
- self.engine.execute(
61
- text(INSERT_NODE_QUERY.format(self.name, NetId, self.train))
62
- )
63
-
64
- def delete_node(self):
65
- """Delete the node from the database."""
66
- self.engine.execute(text(DELETE_NODE_QUERY.format(self.name)))
67
-
68
- def check_dataset_compatibility(self, data_df):
69
- """Check if the dataset is compatible with the master dataset.
70
- Parameters:
71
- data_df (DataFrame): The dataset to check.
72
-
73
- Returns:
74
- None
75
- """
76
- if master_table_exists() != 1:
77
- print("MasterDataset doesn't exist")
78
- else:
79
- columns = data_df.columns.tolist()
80
-
81
- # get master_dataset columns
82
- result_proxy = self.engine.execute(SELECT_MASTER_COLUMNS_QUERY)
83
- master_table_columns = result_proxy.keys()
84
-
85
-
86
- assert [x == y for x, y in zip(master_table_columns, columns)]
87
-
88
- def update_node(self):
89
- """Update the node information (not implemented)."""
90
- pass
91
-
92
- def get_dataset(self, column_name: str = None):
93
- """Get the dataset for the node based on the given column name.
94
- Parameters:
95
- column_name (str, optional): The column name to filter the dataset. Default is None.
96
-
97
- Returns:
98
- DataFrame: The dataset associated with the node.
99
- """
100
- NodeId = get_nodeid_from_name(self.name)
101
- if column_name is not None:
102
- query = text(SELECT_DATASET_BY_COLUMN_QUERY.format(column_name, self.name))
103
- else:
104
- query = text(SELECT_DATASET_BY_NODE_ID_QUERY.format(NodeId))
105
-
106
- result_proxy = self.engine.execute(query)
107
- node_dataset = pd.DataFrame(result_proxy.fetchall(), columns=result_proxy.keys())
108
-
109
- return node_dataset
110
-
111
- def upload_dataset(self, dataset_name: str, path_to_csv: str = params['path_to_test_csv']):
112
- """Upload the dataset to the database for the node.
113
-
114
- Parameters:
115
- dataset_name (str): The name of the dataset.
116
- path_to_csv (str, optional): Path to the CSV file containing the dataset. Default is the path in params.
117
-
118
- Returns:
119
- None
120
- """
121
- try:
122
- data_df = pd.read_csv(path_to_csv)
123
- nodeId = get_nodeid_from_name(self.name)
124
- columns = data_df.columns.tolist()
125
- self.check_dataset_compatibility(data_df)
126
-
127
- data_df = process_eicu(data_df)
128
-
129
- # Insert data in batches
130
- batch_size = 1000 # Adjust as needed
131
- for start_idx in range(0, len(data_df), batch_size):
132
- batch_data = data_df.iloc[start_idx:start_idx + batch_size]
133
- insert_query = f"INSERT INTO Datasets (DataSetName, NodeId, {', '.join(columns)}) VALUES (:dataset_name, :nodeId, {', '.join([':' + col for col in columns])})"
134
- data_to_insert = batch_data.to_dict(orient='records')
135
- params = [{"dataset_name": dataset_name, "nodeId": nodeId, **row} for row in data_to_insert]
136
- self.engine.execute(text(insert_query), params)
137
- except exc.SQLAlchemyError as e:
138
- print(f"Error uploading dataset: {e}")
139
-
140
- def assign_dataset(self, dataset_name:str):
141
- """Assigning existing dataSet to node
142
- Parameters:
143
- dataset_name (str): The name of the dataset to assign.
144
-
145
- Returns:
146
- None
147
- """
148
-
149
- nodeId = get_nodeid_from_name(self.name)
150
- query = f"UPDATE DataSets SET nodeId = {nodeId} WHERE DataSetName = '{dataset_name}'"
151
- self.engine.execute(text(query))
152
-
153
- def unassign_dataset(self, dataset_name:str):
154
- """unssigning existing dataSet to node
155
- Parameters:
156
- dataset_name (str): The name of the dataset to assign.
157
-
158
- Returns:
159
- None
160
- """
161
-
162
- query = f"UPDATE DataSets SET nodeId = {-1} WHERE DataSetName = '{dataset_name}'"
163
- self.engine.execute(text(query))
164
-
165
- def list_alldatasets(self):
166
- """List all datasets associated with the node.
167
- Returns:
168
- DataFrame: A DataFrame containing information about all datasets associated with the node.
169
-
170
- """
171
- return pd.read_sql(
172
- text(SELECT_ALL_DATASETS_QUERY.format(self.name)), my_eng
173
- )
174
-
175
- @staticmethod
176
- def list_allnodes():
177
- """List all nodes in the database.
178
- Returns:
179
- DataFrame: A DataFrame containing information about all nodes in the database.
180
-
181
- """
182
- query = text(SELECT_ALL_NODES_QUERY)
183
- res = pd.read_sql(query, my_eng)
184
- return res
1
+ import pandas as pd
2
+
3
+ from .net_helper import *
4
+ from .net_manager_queries import *
5
+ from MEDfl.LearningManager.utils import params
6
+ from MEDfl.NetManager.database_connector import DatabaseManager
7
+
8
+ from sqlalchemy import text, exc
9
+
10
+
11
+ class Node:
12
+ """
13
+ A class representing a node in the network.
14
+
15
+ Attributes:
16
+ name (str): The name of the node.
17
+ train (int): An integer flag representing whether the node is used for training (1) or testing (0).
18
+ test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
19
+ """
20
+
21
+ def __init__(
22
+ self, name: str, train: int, test_fraction: float = 0.2, engine=None
23
+ ):
24
+ """
25
+ Initialize a Node instance.
26
+
27
+ Parameters:
28
+ name (str): The name of the node.
29
+ train (int): An integer flag representing whether the node is used for training (1) or testing (0).
30
+ test_fraction (float, optional): The fraction of data used for testing when train=1. Default is 0.2.
31
+ """
32
+ self.name = name
33
+ self.train = train
34
+ self.test_fraction = 1.0 if self.train == 0 else test_fraction
35
+
36
+
37
+ db_manager = DatabaseManager() ;
38
+ db_manager.connect() ;
39
+ self.engine = db_manager.get_connection()
40
+
41
+ def validate(self):
42
+ """Validate name, train, test_fraction"""
43
+ if not isinstance(self.name, str):
44
+ raise TypeError("name argument must be a string")
45
+
46
+ if not isinstance(self.train, int):
47
+ raise TypeError("train argument must be an int")
48
+
49
+ if not isinstance(self.test_fraction, float):
50
+ raise TypeError("test_fraction argument must be a float")
51
+
52
+ def create_node(self, NetId: int):
53
+ """Create a node in the database.
54
+ Parameters:
55
+ NetId (int): The ID of the network to which the node belongs.
56
+
57
+ Returns:
58
+ None
59
+ """
60
+ self.engine.execute(
61
+ text(INSERT_NODE_QUERY.format(self.name, NetId, self.train))
62
+ )
63
+
64
+ def delete_node(self):
65
+ """Delete the node from the database."""
66
+ self.engine.execute(text(DELETE_NODE_QUERY.format(self.name)))
67
+
68
+ def check_dataset_compatibility(self, data_df):
69
+ """Check if the dataset is compatible with the master dataset.
70
+ Parameters:
71
+ data_df (DataFrame): The dataset to check.
72
+
73
+ Returns:
74
+ None
75
+ """
76
+ if master_table_exists() != 1:
77
+ print("MasterDataset doesn't exist")
78
+ else:
79
+ columns = data_df.columns.tolist()
80
+
81
+ # get master_dataset columns
82
+ result_proxy = self.engine.execute(SELECT_MASTER_COLUMNS_QUERY)
83
+ master_table_columns = result_proxy.keys()
84
+
85
+
86
+ assert [x == y for x, y in zip(master_table_columns, columns)]
87
+
88
+ def update_node(self):
89
+ """Update the node information (not implemented)."""
90
+ pass
91
+
92
+ def get_dataset(self, column_name: str = None):
93
+ """Get the dataset for the node based on the given column name.
94
+ Parameters:
95
+ column_name (str, optional): The column name to filter the dataset. Default is None.
96
+
97
+ Returns:
98
+ DataFrame: The dataset associated with the node.
99
+ """
100
+ NodeId = get_nodeid_from_name(self.name)
101
+ if column_name is not None:
102
+ query = text(SELECT_DATASET_BY_COLUMN_QUERY.format(column_name, self.name))
103
+ else:
104
+ query = text(SELECT_DATASET_BY_NODE_ID_QUERY.format(NodeId))
105
+
106
+ result_proxy = self.engine.execute(query)
107
+ node_dataset = pd.DataFrame(result_proxy.fetchall(), columns=result_proxy.keys())
108
+
109
+ return node_dataset
110
+
111
+ def upload_dataset(self, dataset_name: str, path_to_csv: str = params['path_to_test_csv']):
112
+ """Upload the dataset to the database for the node.
113
+
114
+ Parameters:
115
+ dataset_name (str): The name of the dataset.
116
+ path_to_csv (str, optional): Path to the CSV file containing the dataset. Default is the path in params.
117
+
118
+ Returns:
119
+ None
120
+ """
121
+ try:
122
+ data_df = pd.read_csv(path_to_csv)
123
+ nodeId = get_nodeid_from_name(self.name)
124
+ columns = data_df.columns.tolist()
125
+ self.check_dataset_compatibility(data_df)
126
+
127
+ data_df = process_eicu(data_df)
128
+
129
+ # Insert data in batches
130
+ batch_size = 1000 # Adjust as needed
131
+ for start_idx in range(0, len(data_df), batch_size):
132
+ batch_data = data_df.iloc[start_idx:start_idx + batch_size]
133
+ insert_query = f"INSERT INTO Datasets (DataSetName, NodeId, {', '.join(columns)}) VALUES (:dataset_name, :nodeId, {', '.join([':' + col for col in columns])})"
134
+ data_to_insert = batch_data.to_dict(orient='records')
135
+ params = [{"dataset_name": dataset_name, "nodeId": nodeId, **row} for row in data_to_insert]
136
+ self.engine.execute(text(insert_query), params)
137
+ except exc.SQLAlchemyError as e:
138
+ print(f"Error uploading dataset: {e}")
139
+
140
+ def assign_dataset(self, dataset_name:str):
141
+ """Assigning existing dataSet to node
142
+ Parameters:
143
+ dataset_name (str): The name of the dataset to assign.
144
+
145
+ Returns:
146
+ None
147
+ """
148
+
149
+ nodeId = get_nodeid_from_name(self.name)
150
+ query = f"UPDATE DataSets SET nodeId = {nodeId} WHERE DataSetName = '{dataset_name}'"
151
+ self.engine.execute(text(query))
152
+
153
+ def unassign_dataset(self, dataset_name:str):
154
+ """unssigning existing dataSet to node
155
+ Parameters:
156
+ dataset_name (str): The name of the dataset to assign.
157
+
158
+ Returns:
159
+ None
160
+ """
161
+
162
+ query = f"UPDATE DataSets SET nodeId = {-1} WHERE DataSetName = '{dataset_name}'"
163
+ self.engine.execute(text(query))
164
+
165
+ def list_alldatasets(self):
166
+ """List all datasets associated with the node.
167
+ Returns:
168
+ DataFrame: A DataFrame containing information about all datasets associated with the node.
169
+
170
+ """
171
+ return pd.read_sql(
172
+ text(SELECT_ALL_DATASETS_QUERY.format(self.name)), my_eng
173
+ )
174
+
175
+ @staticmethod
176
+ def list_allnodes():
177
+ """List all nodes in the database.
178
+ Returns:
179
+ DataFrame: A DataFrame containing information about all nodes in the database.
180
+
181
+ """
182
+ query = text(SELECT_ALL_NODES_QUERY)
183
+ res = pd.read_sql(query, my_eng)
184
+ return res
MEDfl/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from .LearningManager import *
2
- from .NetManager import *
1
+ from .LearningManager import *
2
+ from .NetManager import *
3
3
  from .scripts import *
MEDfl/scripts/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- # from .base import *
1
+ # from .base import *
2
2
  # from .create_db import *
MEDfl/scripts/base.py CHANGED
@@ -1,30 +1,30 @@
1
- import mysql.connector
2
- from sqlalchemy import create_engine, text
3
- from configparser import ConfigParser
4
- import yaml
5
- import pkg_resources
6
- import os
7
-
8
- # Get the directory of the current script
9
- current_directory = os.path.dirname(os.path.abspath(__file__))
10
-
11
- # Load configuration from the config file
12
- config_file_path = os.path.join(current_directory, 'db_config.ini')
13
-
14
- config = ConfigParser()
15
- config.read(config_file_path)
16
- mysql_config = config['mysql']
17
-
18
-
19
-
20
- connection_string = (
21
- f"mysql+mysqlconnector://{mysql_config['user']}:{mysql_config['password']}@"
22
- f"{mysql_config['host']}:{mysql_config['port']}/{mysql_config['database']}"
23
- )
24
-
25
- eng = create_engine(
26
- connection_string,
27
- execution_options={"autocommit": True},
28
- )
29
-
1
+ import mysql.connector
2
+ from sqlalchemy import create_engine, text
3
+ from configparser import ConfigParser
4
+ import yaml
5
+ import pkg_resources
6
+ import os
7
+
8
+ # Get the directory of the current script
9
+ current_directory = os.path.dirname(os.path.abspath(__file__))
10
+
11
+ # Load configuration from the config file
12
+ config_file_path = os.path.join(current_directory, 'db_config.ini')
13
+
14
+ config = ConfigParser()
15
+ config.read(config_file_path)
16
+ mysql_config = config['mysql']
17
+
18
+
19
+
20
+ connection_string = (
21
+ f"mysql+mysqlconnector://{mysql_config['user']}:{mysql_config['password']}@"
22
+ f"{mysql_config['host']}:{mysql_config['port']}/{mysql_config['database']}"
23
+ )
24
+
25
+ eng = create_engine(
26
+ connection_string,
27
+ execution_options={"autocommit": True},
28
+ )
29
+
30
30
  my_eng = eng.connect()