FedModelKit 0.6.1__tar.gz → 0.6.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/PKG-INFO +2 -2
  2. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/pyproject.toml +2 -2
  3. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/cli.py +3 -3
  4. fedmodelkit-0.6.6/src/FedModelKit/templates/client_app_template.py +106 -0
  5. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/ds_template.ipynb +3 -3
  6. fedmodelkit-0.6.6/src/FedModelKit/templates/server_app_template.py +144 -0
  7. fedmodelkit-0.6.6/src/FedModelKit/templates/task_template.py +373 -0
  8. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates.py +6 -6
  9. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/uv.lock +1 -1
  10. fedmodelkit-0.6.1/src/FedModelKit/templates/client_app_template.py +0 -49
  11. fedmodelkit-0.6.1/src/FedModelKit/templates/server_app_template.py +0 -38
  12. fedmodelkit-0.6.1/src/FedModelKit/templates/task_template.py +0 -243
  13. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/.gitignore +0 -0
  14. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/.python-version +0 -0
  15. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/LICENSE +0 -0
  16. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/README.md +0 -0
  17. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/.gitignore +0 -0
  18. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/AML_preprocessed_dataset.xlsx +0 -0
  19. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/README.md +0 -0
  20. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/simulation_example.ipynb +0 -0
  21. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/images/aggregator.png +0 -0
  22. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/images/federated_learning_model.png +0 -0
  23. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/images/local_learner.png +0 -0
  24. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/README.md +0 -0
  25. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/__init__.py +0 -0
  26. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/aggregator.py +0 -0
  27. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/default_create_functions.py +0 -0
  28. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/interface.py +0 -0
  29. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/local_learner.py +0 -0
  30. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/py.typed +0 -0
  31. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/src/utils.py +0 -0
  32. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/__init__template.py +0 -0
  33. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/extern_pyproject_template.toml +0 -0
  34. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/doSendModels.png +0 -0
  35. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/doWaitsForJobs.png +0 -0
  36. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsAggregateModels.png +0 -0
  37. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsDoneSubmittingJobs.png +0 -0
  38. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsSendsJobs.png +0 -0
  39. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/overview.png +0 -0
  40. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/main_template.py +0 -0
  41. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/pyproject_template.toml +0 -0
  42. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/readme_template.md +0 -0
  43. {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/uv_template.lock +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FedModelKit
3
- Version: 0.6.1
4
- Summary: This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment.
3
+ Version: 0.6.6
4
+ Summary: LOW_LEVEL_API: This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment.
5
5
  Author-email: ceresale <alessandro.ceresi@upm.es>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.13
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "FedModelKit"
3
- version = "0.6.1"
4
- description = "This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment."
3
+ version = "0.6.6"
4
+ description = "LOW_LEVEL_API: This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment."
5
5
  readme = "README.md"
6
6
  authors = [
7
7
  { name = "ceresale", email = "alessandro.ceresi@upm.es" }
@@ -25,14 +25,14 @@ def create_structure(exp_name: str = "new_experiment") -> None:
25
25
  f"{exp_name}": {
26
26
  "__init__.py": get_init_template(),
27
27
  "task.py": get_task_template(),
28
- "server_app.py": get_server_template(),
29
- "client_app.py": get_client_template(),
28
+ "server_app.py": get_server_template(exp_name),
29
+ "client_app.py": get_client_template(exp_name),
30
30
  },
31
31
  },
32
32
  "pyproject.toml": get_extern_pyproject_template(exp_name),
33
33
  "uv.lock": get_uv_template(),
34
34
  "README.md": get_readme_template(),
35
- "ds.ipynb": get_ds_template(),
35
+ "ds.ipynb": get_ds_template(exp_name),
36
36
  }
37
37
 
38
38
 
@@ -0,0 +1,106 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import pickle
4
+ from pathlib import Path
5
+ from flwr.client import ClientApp
6
+ from flwr.common import Message, Context
7
+ from flwr.common.record import RecordDict, MetricRecord, ConfigRecord
8
+ from sklearn.preprocessing import OneHotEncoder
9
+ import FedModelKit as msi
10
+ import FedModelKit as msi
11
+
12
+ from EXPERIMENT_NAME.task import Net # Type: ignore[import]
13
+
14
+
15
+ # Initialize the Flower ClientApp
16
+ app = ClientApp()
17
+
18
+ @app.query()
19
+ def query(msg: Message, ctx: Context) -> Message:
20
+ """
21
+ Query function to be executed by the Flower client. This function handles the
22
+ initial configuration sent by the server.
23
+ """
24
+
25
+ # Retrieve the configuration sent by the server
26
+ fancy_config = msg.content.config_records['fancy_config']
27
+
28
+ # Instantiate the federated model
29
+ federated_model = Net()
30
+
31
+ # Load the client split data using the load_data function
32
+ federated_model.prepare_data(fancy_config['client_id'], fancy_config['num_clients'])
33
+
34
+ # Store the local learner and the data split in the context
35
+ # To store in context other objects, you can use ctx.state.<object_name> = <object>
36
+ ctx.state.local_learner = federated_model
37
+
38
+ return Message(RecordDict(), reply_to=msg)
39
+
40
+ @app.train()
41
+ def train(msg: Message, ctx: Context):
42
+ """
43
+ Train function to be executed by the Flower client.
44
+ This function handles the training of the local model using the data provided.
45
+ """
46
+
47
+ # Retrieve the local learner and the client split from the context
48
+ local_learner = ctx.state.local_learner
49
+
50
+ # Retrieve configuration sent by the server - example
51
+ #fancy_config = msg.content.configs_records['fancy_config']
52
+ #local_epochs = fancy_config['local_epochs']
53
+
54
+ # Retrieve the model parameters sent by the server
55
+ fancy_parameters = msg.content.array_records['fancy_model']
56
+ local_learner.set_parameters(fancy_parameters)
57
+
58
+ # Perform local training and obtain training metrics
59
+ train_metrics = local_learner.train_round()
60
+
61
+ # Retrieve the trained model parameters
62
+ new_array_records = local_learner.get_parameters()
63
+
64
+ # Construct a reply message carrying updated model parameters and generated metrics
65
+ reply_content = RecordDict()
66
+ reply_content.array_records['fancy_model_returned'] = new_array_records
67
+ reply_content.metric_records['train_metrics'] = MetricRecord(train_metrics)
68
+
69
+ # Store the metrics and the local learner in the context for future reference
70
+ ctx.state.metric_records['prev'] = MetricRecord(train_metrics)
71
+ ctx.state.local_learner = local_learner
72
+
73
+ # Return the reply message to the server
74
+ return Message(reply_content, reply_to=msg)
75
+
76
+ @app.evaluate()
77
+ def eval(msg: Message, ctx: Context):
78
+ """
79
+ Evaluate function to be executed by the Flower client.
80
+ This function handles the evaluation of the local model using the data provided.
81
+ """
82
+
83
+ # Retrieve the local learner and the client split from the context
84
+ local_learner = ctx.state.local_learner
85
+
86
+ # Retrieve configuration sent by the server - example
87
+ #fancy_config = msg.content.configs_records['fancy_config']
88
+ #local_epochs = fancy_config['local_epochs']
89
+
90
+ # Retrieve the model parameters sent by the server
91
+ fancy_parameters = msg.content.array_records['fancy_model']
92
+ local_learner.set_parameters(fancy_parameters)
93
+
94
+ # Evaluate the model and obtain evaluation metrics
95
+ eval_metrics = local_learner.evaluate()
96
+
97
+ # Construct a reply message with evaluation metrics
98
+ reply_content = RecordDict()
99
+ reply_content.metric_records['eval_metrics'] = MetricRecord(eval_metrics)
100
+
101
+ # Store the metrics and the local learner in the context for future reference
102
+ ctx.state.metric_records['prev'] = MetricRecord(eval_metrics)
103
+ ctx.state.local_learner = local_learner
104
+
105
+ # Return the reply message to the server
106
+ return Message(reply_content, reply_to=msg)
@@ -131,7 +131,7 @@
131
131
  "metadata": {},
132
132
  "outputs": [],
133
133
  "source": [
134
- "SYFT_FLWR_PROJECT_PATH = Path(\"./fl-diabetes-prediction\")\n",
134
+ "SYFT_FLWR_PROJECT_PATH = Path(\"./EXPERIMENT_NAME\")\n",
135
135
  "assert SYFT_FLWR_PROJECT_PATH.exists()"
136
136
  ]
137
137
  },
@@ -184,7 +184,7 @@
184
184
  "outputs": [],
185
185
  "source": [
186
186
  "# clean up\n",
187
- "!rm -rf {SYFT_FLWR_PROJECT_PATH / \"fl_diabetes_prediction\" / \"__pycache__\"}\n",
187
+ "!rm -rf {SYFT_FLWR_PROJECT_PATH / \"EXPERIMENT_NAME\" / \"__pycache__\"}\n",
188
188
  "!rm -rf weights/"
189
189
  ]
190
190
  },
@@ -229,7 +229,7 @@
229
229
  "outputs": [],
230
230
  "source": [
231
231
  "# clean up before submitting jobs\n",
232
- "!rm -rf {SYFT_FLWR_PROJECT_PATH / \"fl_diabetes_prediction\" / \"__pycache__\"}\n",
232
+ "!rm -rf {SYFT_FLWR_PROJECT_PATH / \"EXPERIMENT_NAME\" / \"__pycache__\"}\n",
233
233
  "!rm -rf {SYFT_FLWR_PROJECT_PATH / \"simulation_logs\"}\n",
234
234
  "!rm -rf weights/"
235
235
  ]
@@ -0,0 +1,144 @@
1
+
2
+ # from tkinter import Grid
3
+ from typing import List
4
+ import time
5
+
6
+ import flwr as fl
7
+ from flwr.common import (
8
+ Context,
9
+ NDArrays,
10
+ Message,
11
+ MessageType,
12
+ Metrics,
13
+ RecordDict,
14
+ ConfigRecord,
15
+ DEFAULT_TTL,
16
+ )
17
+ from flwr.server import Grid
18
+
19
+ from EXPERIMENT_NAME.task import Net, Strategy #type: ignore[import]
20
+
21
+
22
+ # Run via `flower-server-app server:app`
23
+ app = fl.server.ServerApp()
24
+
25
+
26
+
27
+
28
+ @app.main()
29
+ def main(grid: Grid, context: Context):
30
+ """
31
+ Main function to run the federated learning server.
32
+
33
+ Structure:
34
+ - Send a query message to clients for creating the local learner and loading the data
35
+ - Start global epochs loop for training and evaluation
36
+ - Send training messages to clients
37
+ - Aggregate parameters received from clients
38
+ - Send evaluation messages to clients
39
+ - Aggregate evaluation metrics
40
+ """
41
+ print("Starting test run")
42
+
43
+ # Get node IDs of connected clients
44
+ node_ids = grid.get_node_ids()
45
+
46
+ # Initialize the federated model
47
+ global_model = Net()
48
+ aggregation_strategy = Strategy()
49
+
50
+ # Send a query message to clients for creating the local learner and loading the data
51
+ messages = []
52
+ for idx, node_id in enumerate(node_ids):
53
+ # Create messages to send to clients
54
+ record_dict = RecordDict()
55
+
56
+ # Add a config with information to send the client for the query
57
+ record_dict.config_records["fancy_config"] = ConfigRecord({"num_clients": len(node_ids), "client_id": idx})
58
+
59
+ # Create a query message for each client
60
+ message = Message(
61
+ content=record_dict,
62
+ message_type=MessageType.QUERY,
63
+ dst_node_id=node_id,
64
+ group_id=str(1),
65
+ ttl=DEFAULT_TTL,
66
+ )
67
+ messages.append(message)
68
+
69
+ # Send training messages to clients
70
+ all_replies = list(grid.send_and_receive(messages))
71
+ print(f"Received {len(all_replies)} answers")
72
+
73
+
74
+ # Run federated training and evaluation for a fixed number of rounds
75
+ for server_round in range(3):
76
+ print(f"Commencing server train and evaluation round {server_round + 1}")
77
+
78
+ messages = []
79
+ for idx, node_id in enumerate(node_ids):
80
+ # Create messages to send to clients
81
+ record_dict = RecordDict()
82
+
83
+ # Add model parameters to record
84
+ record_dict.array_records["fancy_model"] = global_model.get_parameters()
85
+ # Add a config with information to send the client for training
86
+ record_dict.config_records["fancy_config"] = ConfigRecord({"local_epochs": 3})
87
+
88
+ # Create a training message for each client
89
+ message = Message(
90
+ content=record_dict,
91
+ message_type=MessageType.TRAIN,
92
+ dst_node_id=node_id,
93
+ group_id=str(server_round),
94
+ ttl=DEFAULT_TTL,
95
+ )
96
+ messages.append(message)
97
+
98
+ # Send training messages to clients
99
+ all_replies = list(grid.send_and_receive(messages))
100
+ print(f"Received {len(all_replies)} results")
101
+
102
+ # Print metrics received from clients
103
+ for reply in all_replies:
104
+ print(reply.content.metric_records)
105
+
106
+ # Aggregate parameters received from clients
107
+ array_records_list = [reply.content.array_records["fancy_model_returned"] for reply in all_replies]
108
+ new_array_records = aggregation_strategy.aggregate_parameters(array_records_list)
109
+ global_model.set_parameters(new_array_records)
110
+
111
+ # Evaluate the updated global model
112
+ messages = []
113
+ for idx, node_id in enumerate(node_ids):
114
+ # Create evaluation messages for clients
115
+ record_dict = RecordDict()
116
+
117
+ # Add updated model parameters to record
118
+ record_dict.array_records["fancy_model"] = new_array_records
119
+ # Add a config with information to send the client for evaluation
120
+ record_dict.config_records["fancy_config"] = ConfigRecord({"local_epochs": 3})
121
+
122
+ # Create an evaluation message for each client
123
+ message = Message(
124
+ content=record_dict,
125
+ message_type=MessageType.EVALUATE,
126
+ dst_node_id=node_id,
127
+ group_id=str(server_round),
128
+ ttl=DEFAULT_TTL,
129
+ )
130
+ messages.append(message)
131
+
132
+ # Send evaluation messages to clients
133
+ all_replies = list(grid.send_and_receive(messages))
134
+ print(f"Received {len(all_replies)} results")
135
+
136
+ # Print evaluation metrics received from clients
137
+ metrics_records_list = [reply.content.metric_records['eval_metrics'] for reply in all_replies]
138
+ for i, reply in enumerate(all_replies):
139
+ print(f"Client {i+1} metrics: ", reply.content.metric_records['eval_metrics'])
140
+
141
+ # Aggregate evaluation metrics
142
+ print("Aggregated metrics result: ", aggregation_strategy.aggregate_metrics(metrics_records_list))
143
+
144
+ print("🎉🎉🎉 Successfully completed federated learning run! 🎉🎉🎉")
@@ -0,0 +1,373 @@
1
+ from collections import OrderedDict
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.optim as optim
6
+ import flwr
7
+ import numpy as np
8
+ from typing import Optional
9
+ from flwr_datasets import FederatedDataset
10
+ from flwr_datasets.partitioner import IidPartitioner
11
+ from imblearn.over_sampling import SMOTE
12
+ from loguru import logger
13
+ from pandas import DataFrame
14
+ from sklearn.model_selection import train_test_split
15
+ from sklearn.preprocessing import StandardScaler
16
+ from torch.utils.data import DataLoader, TensorDataset
17
+
18
+
19
+ def get_device():
20
+ if torch.cuda.is_available():
21
+ return torch.device("cuda")
22
+ elif torch.backends.mps.is_available():
23
+ return torch.device("mps")
24
+ elif torch.xpu.is_available():
25
+ return torch.device("xpu")
26
+ else:
27
+ return torch.device("cpu")
28
+
29
+
30
+ DEVICE = get_device()
31
+ logger.info(f"Using device: {DEVICE}")
32
+
33
+
34
+ class Net(nn.Module):
35
+ def __init__(self, input_dim=6):
36
+ super(Net, self).__init__()
37
+ self.trainloader = None
38
+ self.testloader = None
39
+ self.fds = None # Cache FederatedDataset
40
+ # First layer with more units and batch normalization
41
+ self.layer1 = nn.Sequential(
42
+ nn.Linear(input_dim, 32), # Increased from 20 to 32
43
+ nn.BatchNorm1d(32), # Added batch normalization
44
+ nn.LeakyReLU(0.1), # LeakyReLU instead of ReLU
45
+ nn.Dropout(0.2), # Increased dropout
46
+ )
47
+
48
+ # Second layer with more units
49
+ self.layer2 = nn.Sequential(
50
+ nn.Linear(32, 24), # Increased from 14 to 24
51
+ nn.BatchNorm1d(24), # Added batch normalization
52
+ nn.LeakyReLU(0.1),
53
+ nn.Dropout(0.25),
54
+ )
55
+
56
+ # Third layer
57
+ self.layer3 = nn.Sequential(
58
+ nn.Linear(24, 16), nn.BatchNorm1d(16), nn.LeakyReLU(0.1)
59
+ )
60
+
61
+ # Output layer
62
+ self.output_layer = nn.Sequential(nn.Linear(16, 1), nn.Sigmoid())
63
+
64
+ def forward(self, x):
65
+ x = self.layer1(x)
66
+ x = self.layer2(x)
67
+ x = self.layer3(x)
68
+ x = self.output_layer(x)
69
+ return x
70
+
71
+
72
+
73
+
74
+ def dataset_processing(
75
+ self, train_df: DataFrame, test_df: DataFrame
76
+ ) -> tuple[DataLoader, DataLoader]:
77
+ def preprocess_df(df: DataFrame) -> DataFrame:
78
+ columns_to_drop = ["SkinThickness", "Insulin"]
79
+ df_new: DataFrame = df.drop(columns_to_drop, axis=1)
80
+
81
+ # Calculate mean and median (excluding zeros)
82
+ mean_glucose = df_new[df_new["Glucose"] != 0]["Glucose"].mean()
83
+ median_bmi = df_new[df_new["BMI"] != 0]["BMI"].median()
84
+ median_bp = df_new[df_new["BloodPressure"] != 0]["BloodPressure"].median()
85
+
86
+ # Replace zeros values with mean/median
87
+ df_new.replace(
88
+ {
89
+ "Glucose": {0: mean_glucose},
90
+ "BMI": {0: median_bmi},
91
+ "BloodPressure": {0: median_bp},
92
+ },
93
+ inplace=True,
94
+ )
95
+
96
+ return df_new
97
+
98
+ # Preprocess both datasets
99
+ train_processed = preprocess_df(train_df)
100
+ test_processed = preprocess_df(test_df)
101
+
102
+ # Split features and labels for both sets
103
+ X_train = train_processed.values[:, :6]
104
+ y_train = train_processed.values[:, 6:]
105
+ X_test = test_processed.values[:, :6]
106
+ y_test = test_processed.values[:, 6:]
107
+
108
+ from collections import Counter
109
+
110
+ def get_minority_class_count(y):
111
+ return min(Counter(y.flatten()).values())
112
+
113
+ minority_count = get_minority_class_count(y_train)
114
+ k_neighbors = min(5, minority_count - 1) if minority_count > 1 else 1
115
+
116
+ # Resample the training data to fix the class imbalance
117
+ smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
118
+ X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
119
+
120
+ # Scale the data to have zero mean and unit variance
121
+ scaler = StandardScaler()
122
+ X_train_resampled = scaler.fit_transform(X_train_resampled)
123
+ X_test = scaler.transform(X_test)
124
+
125
+ # Convert numpy arrays to PyTorch tensors
126
+ X_train_tensor = torch.FloatTensor(X_train_resampled)
127
+ y_train_tensor = torch.FloatTensor(y_train_resampled).reshape(
128
+ -1, 1
129
+ ) # Add this reshape
130
+ X_test_tensor = torch.FloatTensor(X_test)
131
+ y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1)
132
+
133
+ # Create datasets and dataloaders
134
+ train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
135
+ test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
136
+
137
+ train_loader = DataLoader(dataset=train_dataset, batch_size=10, shuffle=True)
138
+ test_loader = DataLoader(
139
+ dataset=test_dataset, batch_size=len(test_dataset), shuffle=False
140
+ )
141
+
142
+ return train_loader, test_loader
143
+
144
+ def _parameters_to_dict(self, params_record: flwr.common.ParametersRecord) -> OrderedDict:
145
+ # Convert ParametersRecord to an OrderedDict
146
+ state_dict = OrderedDict()
147
+ for k, v in params_record.items():
148
+ state_dict[k] = self._basic_array_deserialisation(v)
149
+ return state_dict
150
+
151
+ def _dict_to_parameter_record(self,
152
+ parameters: OrderedDict["str", flwr.common.NDArray],
153
+ ) -> flwr.common.ParametersRecord:
154
+ # Convert OrderedDict to ParametersRecord
155
+ state_dict = OrderedDict()
156
+ for k, v in parameters.items():
157
+ state_dict[k] = self._ndarray_to_array(v)
158
+
159
+ return flwr.common.ParametersRecord(state_dict)
160
+
161
+ def _ndarray_to_array(self, ndarray: flwr.common.NDArray) -> flwr.common.Array:
162
+ """Represent NumPy ndarray as Array."""
163
+ return flwr.common.Array(
164
+ data=ndarray.tobytes(),
165
+ dtype=str(ndarray.dtype),
166
+ stype="numpy.ndarray.tobytes",
167
+ shape=list(ndarray.shape),
168
+ )
169
+
170
+ def _basic_array_deserialisation(self, array: flwr.common.Array) -> flwr.common.NDArray:
171
+ # Deserialize Array to NumPy ndarray
172
+ return np.frombuffer(buffer=array.data, dtype=array.dtype).reshape(array.shape)
173
+
174
+ def load_syftbox_dataset(self) -> None:
175
+ import pandas as pd
176
+
177
+ from syft_flwr.utils import get_syftbox_dataset_path
178
+
179
+ data_dir = get_syftbox_dataset_path()
180
+ logger.info(f"Loading dataset from {data_dir}")
181
+
182
+ train_df = pd.read_csv(data_dir / "train.csv")
183
+ test_df = pd.read_csv(data_dir / "test.csv")
184
+
185
+ self.trainloader, self.testloader = self.dataset_processing(train_df, test_df)
186
+
187
+
188
+ def load_flwr_data(
189
+ self, partition_id: int, num_partitions: int
190
+ ) -> None:
191
+ """
192
+ Load the `fl-diabetes-prediction` dataset to memory
193
+ """
194
+ # global fds
195
+ if self.fds is None:
196
+ partitioner = IidPartitioner(num_partitions=num_partitions)
197
+ self.fds = FederatedDataset(
198
+ dataset="khoaguin/pima-indians-diabetes-database",
199
+ partitioners={"train": partitioner},
200
+ )
201
+
202
+ partition: DataFrame = self.fds.load_partition(partition_id, "train").with_format(
203
+ "pandas"
204
+ )[:]
205
+ train_df, test_df = train_test_split(partition, test_size=0.2, random_state=95)
206
+
207
+ self.trainloader, self.testloader = self.dataset_processing(train_df, test_df)
208
+
209
+
210
+ def prepare_data(
211
+ self, partition_id: int, num_partitions: int
212
+ ) -> None:
213
+ from syft_flwr.utils import run_syft_flwr
214
+ if not run_syft_flwr():
215
+ logger.info("Running flwr locally")
216
+ self.load_flwr_data(
217
+ partition_id=partition_id,
218
+ num_partitions=num_partitions,
219
+ )
220
+ else:
221
+ logger.info("Running with syft_flwr")
222
+ self.load_syftbox_dataset()
223
+
224
+
225
+ def train_round(self, local_epochs=1):
226
+ criterion = nn.BCELoss()
227
+ optimizer = optim.Adam(self.parameters(), lr=0.001, weight_decay=0.0005)
228
+ history = {"train_loss": [], "train_acc": []}
229
+ self.to(DEVICE)
230
+
231
+ for epoch in range(local_epochs):
232
+ self.train()
233
+ running_loss = 0.0
234
+ correct = 0
235
+ total = 0
236
+
237
+ for inputs, labels in self.trainloader:
238
+ inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
239
+
240
+ optimizer.zero_grad()
241
+ outputs = self(inputs)
242
+ loss = criterion(outputs, labels)
243
+ loss.backward()
244
+ optimizer.step()
245
+
246
+ running_loss += loss.item() * inputs.size(0)
247
+ predicted = (outputs > 0.5).float()
248
+ total += labels.size(0)
249
+ correct += (predicted == labels).sum().item()
250
+
251
+ epoch_loss = running_loss / len(self.trainloader.dataset)
252
+ epoch_acc = correct / total
253
+ history["train_loss"].append(epoch_loss)
254
+ history["train_acc"].append(epoch_acc)
255
+
256
+ return history
257
+
258
+
259
+ def evaluate(self):
260
+ self.to(DEVICE)
261
+ self.eval()
262
+ criterion = nn.BCELoss()
263
+
264
+ running_loss = 0.0
265
+ correct = 0
266
+ total = 0
267
+
268
+ with torch.no_grad():
269
+ for inputs, labels in self.testloader:
270
+ inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
271
+ outputs = self(inputs)
272
+ loss = criterion(outputs, labels)
273
+ running_loss += loss.item() * inputs.size(0)
274
+ predicted = (outputs > 0.5).float()
275
+ total += labels.size(0)
276
+ correct += (predicted == labels).sum().item()
277
+
278
+ epoch_loss = running_loss / len(self.testloader.dataset)
279
+ epoch_acc = correct / total
280
+
281
+ return {"epoch_loss": epoch_loss, "epoch_accuracy": epoch_acc}
282
+
283
+
284
+ def get_parameters(self) -> flwr.common.ArrayRecord:
285
+ return self.pytorch_to_parameter_record(self.state_dict())
286
+
287
+ def set_parameters(self, parameters: flwr.common.ArrayRecord) -> None:
288
+ self.load_state_dict(self.parameters_to_pytorch_state_dict(parameters))
289
+
290
+ def pytorch_to_parameter_record(
291
+ self, state_dict: dict,
292
+ ) -> flwr.common.ArrayRecord:
293
+ """Serialise your PyTorch model."""
294
+ transformed_state_dict = OrderedDict()
295
+
296
+ for k, v in state_dict.items():
297
+ transformed_state_dict[k] = self._ndarray_to_array(v.cpu().numpy())
298
+
299
+ return flwr.common.ArrayRecord(transformed_state_dict)
300
+
301
+ def parameters_to_pytorch_state_dict(
302
+ self, params_record: flwr.common.ArrayRecord,
303
+ ) -> dict:
304
+ # Make sure to import locally torch as it is only available in the server
305
+ import torch
306
+
307
+ """Reconstruct PyTorch state_dict from its serialised representation."""
308
+ state_dict = {}
309
+ for k, v in params_record.items():
310
+ state_dict[k] = torch.tensor(self._basic_array_deserialisation(v))
311
+
312
+ return state_dict
313
+
314
+
315
+ class Strategy:
316
+
317
+ def _parameters_to_dict(self, params_record: flwr.common.ArrayRecord) -> OrderedDict:
318
+ # Convert ParametersRecord to an OrderedDict
319
+ state_dict = OrderedDict()
320
+ for k, v in params_record.items():
321
+ state_dict[k] = self._basic_array_deserialisation(v)
322
+ return state_dict
323
+
324
+ def _dict_to_parameter_record(self,
325
+ parameters: OrderedDict["str", flwr.common.NDArray],
326
+ ) -> flwr.common.ArrayRecord:
327
+ # Convert OrderedDict to ParametersRecord
328
+ state_dict = OrderedDict()
329
+ for k, v in parameters.items():
330
+ state_dict[k] = self._ndarray_to_array(v)
331
+
332
+ return flwr.common.ArrayRecord(state_dict)
333
+
334
+ def _ndarray_to_array(self, ndarray: flwr.common.NDArray) -> flwr.common.Array:
335
+ """Represent NumPy ndarray as Array."""
336
+ return flwr.common.Array(
337
+ data=ndarray.tobytes(),
338
+ dtype=str(ndarray.dtype),
339
+ stype="numpy.ndarray.tobytes",
340
+ shape=list(ndarray.shape),
341
+ )
342
+
343
+ def _basic_array_deserialisation(self, array: flwr.common.Array) -> flwr.common.NDArray:
344
+ # Deserialize Array to NumPy ndarray
345
+ return np.frombuffer(buffer=array.data, dtype=array.dtype).reshape(array.shape)
346
+
347
+ def aggregate_parameters(self, results: list[flwr.common.ArrayRecord], config: Optional[flwr.common.ConfigRecord]=None
348
+ ) -> flwr.common.ArrayRecord:
349
+ parameters = [self._parameters_to_dict(param) for param in results]
350
+ keys = parameters[0].keys()
351
+ result = OrderedDict()
352
+ for key in keys:
353
+ # Init array
354
+ this_array: np.ndarray = np.zeros_like(parameters[0][key])
355
+ for p in parameters:
356
+ this_array += p[key]
357
+ result[key] = this_array / len(results)
358
+ return self._dict_to_parameter_record(result)
359
+
360
+ def aggregate_metrics(self, results: list[flwr.common.MetricRecord], config: Optional[flwr.common.ConfigRecord]=None) -> flwr.common.MetricRecord:
361
+ keys = results[0].keys()
362
+ result = OrderedDict()
363
+ for key in keys:
364
+ # Init array
365
+ cumsum = 0.0
366
+ for m in results:
367
+ if not isinstance(m[key], (int, float)):
368
+ raise ValueError(
369
+ f"flwr.common.MetricsRecord value type not supported: {type(m[key])}"
370
+ )
371
+ cumsum += m[key] # type: ignore
372
+ result[key] = cumsum / len(results)
373
+ return flwr.common.MetricRecord(result)
@@ -24,17 +24,17 @@ def get_readme_template() -> str:
24
24
  return file.read()
25
25
 
26
26
 
27
- def get_server_template() -> str:
27
+ def get_server_template(experiment_name: str) -> str:
28
28
  current_dir = os.path.dirname(__file__)
29
29
  template_path = os.path.join(current_dir, os.path.join("templates", 'server_app_template.py'))
30
30
  with open(template_path, 'r') as file:
31
- return file.read()
31
+ return file.read().replace("EXPERIMENT_NAME", experiment_name)
32
32
 
33
- def get_client_template() -> str:
33
+ def get_client_template(experiment_name: str) -> str:
34
34
  current_dir = os.path.dirname(__file__)
35
35
  template_path = os.path.join(current_dir, os.path.join("templates", 'client_app_template.py'))
36
36
  with open(template_path, 'r') as file:
37
- return file.read()
37
+ return file.read().replace("EXPERIMENT_NAME", experiment_name)
38
38
 
39
39
  def get_pyproject_template(experiment_name: str) -> str:
40
40
  current_dir = os.path.dirname(__file__)
@@ -61,11 +61,11 @@ def get_init_template() -> str:
61
61
  with open(template_path, 'r') as file:
62
62
  return file.read()
63
63
 
64
- def get_ds_template() -> str:
64
+ def get_ds_template(experiment_name: str) -> str:
65
65
  current_dir = os.path.dirname(__file__)
66
66
  template_path = os.path.join(current_dir, os.path.join("templates", 'ds_template.ipynb'))
67
67
  with open(template_path, 'r') as file:
68
- return file.read()
68
+ return file.read().replace("EXPERIMENT_NAME", experiment_name)
69
69
 
70
70
  def get_images(output_folder) -> None:
71
71
  current_dir = os.path.dirname(__file__)
@@ -476,7 +476,7 @@ wheels = [
476
476
 
477
477
  [[package]]
478
478
  name = "fedmodelkit"
479
- version = "0.5.0"
479
+ version = "0.6.6"
480
480
  source = { editable = "." }
481
481
  dependencies = [
482
482
  { name = "flwr" },
@@ -1,49 +0,0 @@
1
- from flwr.client import ClientApp, NumPyClient
2
- from flwr.common import Context
3
- from loguru import logger
4
-
5
- from EXPERIMENT_NAME.task import (
6
- Net,
7
- evaluate,
8
- get_weights,
9
- load_flwr_data,
10
- set_weights,
11
- train,
12
- )
13
-
14
-
15
- class FlowerClient(NumPyClient):
16
- def __init__(self, net, trainloader, testloader):
17
- self.net = net
18
- self.trainloader = trainloader
19
- self.testloader = testloader
20
-
21
- def fit(self, parameters, config):
22
- set_weights(self.net, parameters)
23
- train(self.net, self.trainloader)
24
- return get_weights(self.net), len(self.trainloader), {}
25
-
26
- def evaluate(self, parameters, config):
27
- set_weights(self.net, parameters)
28
- loss, accuracy = evaluate(self.net, self.testloader)
29
- return loss, len(self.testloader), {"accuracy": accuracy}
30
-
31
-
32
- def client_fn(context: Context):
33
- from EXPERIMENT_NAME.task import load_syftbox_dataset
34
- from syft_flwr.utils import run_syft_flwr
35
-
36
- if not run_syft_flwr():
37
- logger.info("Running flwr locally")
38
- train_loader, test_loader = load_flwr_data(
39
- partition_id=context.node_config["partition-id"],
40
- num_partitions=context.node_config["num-partitions"],
41
- )
42
- else:
43
- logger.info("Running with syft_flwr")
44
- train_loader, test_loader = load_syftbox_dataset()
45
- net = Net()
46
- return FlowerClient(net, train_loader, test_loader).to_client()
47
-
48
-
49
- app = ClientApp(client_fn=client_fn)
@@ -1,38 +0,0 @@
1
- """fltabular: Flower Example on Adult Census Income Tabular Dataset."""
2
-
3
- from flwr.common import Context, ndarrays_to_parameters
4
- from flwr.server import ServerApp, ServerAppComponents, ServerConfig
5
-
6
- from EXPERIMENT_NAME.task import Net, get_weights
7
-
8
-
9
- def weighted_average(metrics):
10
- accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
11
- examples = [num_examples for num_examples, _ in metrics]
12
-
13
- return {"accuracy": sum(accuracies) / sum(examples)}
14
-
15
-
16
- def server_fn(context: Context) -> ServerAppComponents:
17
- net = Net()
18
- params = ndarrays_to_parameters(get_weights(net))
19
-
20
- from pathlib import Path
21
-
22
- from syft_flwr.strategy import FedAvgWithModelSaving
23
-
24
- strategy = FedAvgWithModelSaving(
25
- save_path=Path(__file__).parent.parent.parent / "weights",
26
- fraction_fit=1.0,
27
- fraction_evaluate=1.0,
28
- min_available_clients=2,
29
- initial_parameters=params,
30
- evaluate_metrics_aggregation_fn=weighted_average,
31
- )
32
- num_rounds = context.run_config["num-server-rounds"]
33
- config = ServerConfig(num_rounds=num_rounds)
34
-
35
- return ServerAppComponents(config=config, strategy=strategy)
36
-
37
-
38
- app = ServerApp(server_fn=server_fn)
@@ -1,243 +0,0 @@
1
- from collections import OrderedDict
2
-
3
- import torch
4
- import torch.nn as nn
5
- import torch.optim as optim
6
- from flwr_datasets import FederatedDataset
7
- from flwr_datasets.partitioner import IidPartitioner
8
- from imblearn.over_sampling import SMOTE
9
- from loguru import logger
10
- from pandas import DataFrame
11
- from sklearn.model_selection import train_test_split
12
- from sklearn.preprocessing import StandardScaler
13
- from torch.utils.data import DataLoader, TensorDataset
14
-
15
-
16
- def get_device():
17
- if torch.cuda.is_available():
18
- return torch.device("cuda")
19
- elif torch.backends.mps.is_available():
20
- return torch.device("mps")
21
- elif torch.xpu.is_available():
22
- return torch.device("xpu")
23
- else:
24
- return torch.device("cpu")
25
-
26
-
27
- DEVICE = get_device()
28
- logger.info(f"Using device: {DEVICE}")
29
-
30
-
31
- class Net(nn.Module):
32
- def __init__(self, input_dim=6):
33
- super(Net, self).__init__()
34
- # First layer with more units and batch normalization
35
- self.layer1 = nn.Sequential(
36
- nn.Linear(input_dim, 32), # Increased from 20 to 32
37
- nn.BatchNorm1d(32), # Added batch normalization
38
- nn.LeakyReLU(0.1), # LeakyReLU instead of ReLU
39
- nn.Dropout(0.2), # Increased dropout
40
- )
41
-
42
- # Second layer with more units
43
- self.layer2 = nn.Sequential(
44
- nn.Linear(32, 24), # Increased from 14 to 24
45
- nn.BatchNorm1d(24), # Added batch normalization
46
- nn.LeakyReLU(0.1),
47
- nn.Dropout(0.25),
48
- )
49
-
50
- # Third layer
51
- self.layer3 = nn.Sequential(
52
- nn.Linear(24, 16), nn.BatchNorm1d(16), nn.LeakyReLU(0.1)
53
- )
54
-
55
- # Output layer
56
- self.output_layer = nn.Sequential(nn.Linear(16, 1), nn.Sigmoid())
57
-
58
- def forward(self, x):
59
- x = self.layer1(x)
60
- x = self.layer2(x)
61
- x = self.layer3(x)
62
- x = self.output_layer(x)
63
- return x
64
-
65
-
66
- def dataset_processing(
67
- train_df: DataFrame, test_df: DataFrame
68
- ) -> tuple[DataLoader, DataLoader]:
69
- def preprocess_df(df: DataFrame) -> DataFrame:
70
- columns_to_drop = ["SkinThickness", "Insulin"]
71
- df_new: DataFrame = df.drop(columns_to_drop, axis=1)
72
-
73
- # Calculate mean and median (excluding zeros)
74
- mean_glucose = df_new[df_new["Glucose"] != 0]["Glucose"].mean()
75
- median_bmi = df_new[df_new["BMI"] != 0]["BMI"].median()
76
- median_bp = df_new[df_new["BloodPressure"] != 0]["BloodPressure"].median()
77
-
78
- # Replace zeros values with mean/median
79
- df_new.replace(
80
- {
81
- "Glucose": {0: mean_glucose},
82
- "BMI": {0: median_bmi},
83
- "BloodPressure": {0: median_bp},
84
- },
85
- inplace=True,
86
- )
87
-
88
- return df_new
89
-
90
- # Preprocess both datasets
91
- train_processed = preprocess_df(train_df)
92
- test_processed = preprocess_df(test_df)
93
-
94
- # Split features and labels for both sets
95
- X_train = train_processed.values[:, :6]
96
- y_train = train_processed.values[:, 6:]
97
- X_test = test_processed.values[:, :6]
98
- y_test = test_processed.values[:, 6:]
99
-
100
- from collections import Counter
101
-
102
- def get_minority_class_count(y):
103
- return min(Counter(y.flatten()).values())
104
-
105
- minority_count = get_minority_class_count(y_train)
106
- k_neighbors = min(5, minority_count - 1) if minority_count > 1 else 1
107
-
108
- # Resample the training data to fix the class imbalance
109
- smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
110
- X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
111
-
112
- # Scale the data to have zero mean and unit variance
113
- scaler = StandardScaler()
114
- X_train_resampled = scaler.fit_transform(X_train_resampled)
115
- X_test = scaler.transform(X_test)
116
-
117
- # Convert numpy arrays to PyTorch tensors
118
- X_train_tensor = torch.FloatTensor(X_train_resampled)
119
- y_train_tensor = torch.FloatTensor(y_train_resampled).reshape(
120
- -1, 1
121
- ) # Add this reshape
122
- X_test_tensor = torch.FloatTensor(X_test)
123
- y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1)
124
-
125
- # Create datasets and dataloaders
126
- train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
127
- test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
128
-
129
- train_loader = DataLoader(dataset=train_dataset, batch_size=10, shuffle=True)
130
- test_loader = DataLoader(
131
- dataset=test_dataset, batch_size=len(test_dataset), shuffle=False
132
- )
133
-
134
- return train_loader, test_loader
135
-
136
-
137
- def load_syftbox_dataset() -> tuple[DataLoader, DataLoader]:
138
- import pandas as pd
139
-
140
- from syft_flwr.utils import get_syftbox_dataset_path
141
-
142
- data_dir = get_syftbox_dataset_path()
143
- logger.info(f"Loading dataset from {data_dir}")
144
-
145
- train_df = pd.read_csv(data_dir / "train.csv")
146
- test_df = pd.read_csv(data_dir / "test.csv")
147
-
148
- return dataset_processing(train_df, test_df)
149
-
150
-
151
- fds = None # Cache FederatedDataset
152
-
153
-
154
- def load_flwr_data(
155
- partition_id: int, num_partitions: int
156
- ) -> tuple[DataLoader, DataLoader]:
157
- """
158
- Load the `fl-diabetes-prediction` dataset to memory
159
- """
160
- global fds
161
- if fds is None:
162
- partitioner = IidPartitioner(num_partitions=num_partitions)
163
- fds = FederatedDataset(
164
- dataset="khoaguin/pima-indians-diabetes-database",
165
- partitioners={"train": partitioner},
166
- )
167
-
168
- partition: DataFrame = fds.load_partition(partition_id, "train").with_format(
169
- "pandas"
170
- )[:]
171
- train_df, test_df = train_test_split(partition, test_size=0.2, random_state=95)
172
-
173
- return dataset_processing(train_df, test_df)
174
-
175
-
176
- def train(model, train_loader, local_epochs=1):
177
- criterion = nn.BCELoss()
178
- optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005)
179
- history = {"train_loss": [], "train_acc": []}
180
- model.to(DEVICE)
181
-
182
- for epoch in range(local_epochs):
183
- model.train()
184
- running_loss = 0.0
185
- correct = 0
186
- total = 0
187
-
188
- for inputs, labels in train_loader:
189
- inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
190
-
191
- optimizer.zero_grad()
192
- outputs = model(inputs)
193
- loss = criterion(outputs, labels)
194
- loss.backward()
195
- optimizer.step()
196
-
197
- running_loss += loss.item() * inputs.size(0)
198
- predicted = (outputs > 0.5).float()
199
- total += labels.size(0)
200
- correct += (predicted == labels).sum().item()
201
-
202
- epoch_loss = running_loss / len(train_loader.dataset)
203
- epoch_acc = correct / total
204
- history["train_loss"].append(epoch_loss)
205
- history["train_acc"].append(epoch_acc)
206
-
207
- return history
208
-
209
-
210
- def evaluate(model, data_loader):
211
- model.to(DEVICE)
212
- model.eval()
213
- criterion = nn.BCELoss()
214
-
215
- running_loss = 0.0
216
- correct = 0
217
- total = 0
218
-
219
- with torch.no_grad():
220
- for inputs, labels in data_loader:
221
- inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
222
- outputs = model(inputs)
223
- loss = criterion(outputs, labels)
224
- running_loss += loss.item() * inputs.size(0)
225
- predicted = (outputs > 0.5).float()
226
- total += labels.size(0)
227
- correct += (predicted == labels).sum().item()
228
-
229
- epoch_loss = running_loss / len(data_loader.dataset)
230
- epoch_acc = correct / total
231
-
232
- return epoch_loss, epoch_acc
233
-
234
-
235
- def set_weights(model, parameters):
236
- params_dict = zip(model.state_dict().keys(), parameters)
237
- state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
238
- model.load_state_dict(state_dict, strict=True)
239
-
240
-
241
- def get_weights(model):
242
- ndarrays = [val.cpu().numpy() for _, val in model.state_dict().items()]
243
- return ndarrays
File without changes
File without changes
File without changes
File without changes