messenger-logger-callback 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ Metadata-Version: 2.4
2
+ Name: messenger-logger-callback
3
+ Version: 0.1.0
4
+ Summary: A custom logger and Hugging Face Trainer Callback for sending logs to a remote server with authentication.
5
+ Author-email: Riko0 <grigoriyalexeenko@gmail.com>
6
+ Project-URL: Homepage, https://github.com/Riko0/messenger-logger-callback
7
+ Project-URL: Bug Tracker, https://github.com/Riko0/messenger-logger-callback/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: requests>=2.25.1
16
+ Requires-Dist: transformers>=4.0.0
17
+
18
+ Messenger Logger Callback
19
+ A custom Hugging Face Trainer Callback for sending training logs and custom data to a remote server with authentication.
20
+
21
+ Overview
22
+ messenger-logger-callback is a Python library designed to easily integrate remote logging into your Hugging Face Trainer workflows. It provides a TrainerCallback that automatically captures standard training metrics (loss, learning rate, epoch, etc.) and sends them as JSON payloads to a specified HTTP endpoint. Additionally, it offers a flexible method to send arbitrary custom data from anywhere in your application.
23
+
24
+ This library is particularly useful for:
25
+
26
+ Centralized logging of machine learning experiments.
27
+
28
+ Real-time monitoring of training progress on a remote dashboard.
29
+
30
+ Integrating with custom notification systems (e.g., Telegram bots, Slack webhooks) by having a server endpoint process the received logs.
31
+
32
+ Features
33
+ Hugging Face Trainer Integration: Seamlessly plugs into the Hugging Face Trainer class.
34
+
35
+ Automatic Log Capture: Intercepts on_log, on_train_begin, on_train_end, and on_epoch_end events.
36
+
37
+ Custom Log Sending: Provides a send_custom_log method for sending any arbitrary JSON data.
38
+
39
+ Flexible Configuration: Server URL and authentication token can be provided via constructor arguments or environment variables.
40
+
41
+ Robust Error Handling: Includes try-except blocks for network requests to gracefully handle timeouts, connection errors, and HTTP errors, printing informative messages without crashing your training.
42
+
43
+ Authentication Support: Supports sending a Bearer token in the Authorization header for secure communication with your logging server.
44
+
45
+ Installation
46
+ You can install messenger-logger-callback using pip:
47
+
48
+ pip install messenger-logger-callback
49
+
50
+ Usage
51
+ 1. Basic Integration with Hugging Face Trainer
52
+ from transformers import Trainer, TrainingArguments
53
+ from messenger_logger.callback import MessengerLoggerCallback # Assuming your file is in messenger_logger/callback.py
54
+ import os
55
+
56
+ # --- Configure your server URL and optional authentication token ---
57
+ # Option A: Pass directly to the constructor
58
+ SERVER_URL = "http://your-logging-server.com/api/logs"
59
+ AUTH_TOKEN = "your_secret_api_token"
60
+
61
+ # Option B: Set as environment variables (recommended for production)
62
+ # os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://your-logging-server.com/api/logs"
63
+ # os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "your_secret_api_token"
64
+
65
+ # Initialize the callback
66
+ # If using environment variables, you can omit server_url and auth_token arguments:
67
+ # messenger_logger = MessengerLoggerCallback(
68
+ # project_name="my_awesome_model",
69
+ # run_id="experiment_v2"
70
+ # )
71
+ messenger_logger = MessengerLoggerCallback(
72
+ server_url=SERVER_URL,
73
+ project_name="my_awesome_model",
74
+ run_id="experiment_v2",
75
+ auth_token=AUTH_TOKEN
76
+ )
77
+
78
+ # Define your TrainingArguments
79
+ training_args = TrainingArguments(
80
+ output_dir="./results",
81
+ num_train_epochs=3,
82
+ per_device_train_batch_size=8,
83
+ logging_dir="./logs",
84
+ logging_steps=100,
85
+ report_to=["tensorboard"] # You can still report to other services
86
+ )
87
+
88
+ # Initialize your Trainer (replace with your actual model and dataset)
89
+ # from transformers import AutoModelForSequenceClassification, AutoTokenizer
90
+ # from datasets import load_dataset
91
+ # model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
92
+ # tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
93
+ # dataset = load_dataset("imdb")
94
+ # trainer = Trainer(
95
+ # model=model,
96
+ # args=training_args,
97
+ # train_dataset=dataset["train"],
98
+ # callbacks=[messenger_logger] # Add your custom callback here
99
+ # )
100
+
101
+ # For demonstration purposes, let's create a dummy Trainer
102
+ class DummyModel:
103
+ pass
104
+ class DummyDataset:
105
+ def __len__(self): return 1000
106
+ def __getitem__(self, idx): return {"input_ids": [1,2,3], "labels": 0}
107
+
108
+ trainer = Trainer(
109
+ model=DummyModel(),
110
+ args=training_args,
111
+ train_dataset=DummyDataset(),
112
+ callbacks=[messenger_logger] # Add your custom callback here
113
+ )
114
+
115
+ # Start training
116
+ print("Starting dummy training...")
117
+ trainer.train()
118
+ print("Dummy training finished.")
119
+
120
+ 2. Sending Custom Logs
121
+ You can send arbitrary data at any point using the send_custom_log method:
122
+
123
+ from messenger_logger.callback import MessengerLoggerCallback
124
+ import os
125
+
126
+ # Ensure the logger is initialized (e.g., from environment variables)
127
+ # os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
128
+ # os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_custom_token"
129
+ custom_logger = MessengerLoggerCallback(
130
+ server_url="http://localhost:5000/api/logs", # Or omit if using env vars
131
+ project_name="my_inference_project",
132
+ run_id="prediction_run_1"
133
+ )
134
+
135
+ # Send custom data, e.g., after model evaluation or deployment
136
+ custom_logger.send_custom_log({
137
+ "event": "model_evaluation_complete",
138
+ "model_version": "v1.2.0",
139
+ "evaluation_metrics": {
140
+ "accuracy": 0.92,
141
+ "f1_score": 0.915,
142
+ "precision": 0.90,
143
+ "recall": 0.93
144
+ },
145
+ "dataset_info": "test_set_2023-01-15"
146
+ })
147
+
148
+ custom_logger.send_custom_log({
149
+ "event": "alert",
150
+ "level": "CRITICAL",
151
+ "message": "High GPU temperature detected on node gpu-01",
152
+ "temperature_celsius": 85,
153
+ "timestamp": "2023-10-27T10:30:00Z"
154
+ })
155
+
156
+ Configuration
157
+ The MessengerLoggerCallback can be configured using:
158
+
159
+ Constructor Arguments:
160
+
161
+ server_url (str, optional): The HTTP endpoint to send logs to.
162
+
163
+ project_name (str, optional): A string identifier for your project (defaults to "default_project").
164
+
165
+ run_id (str, optional): A unique identifier for the current training run. If not provided, a timestamp-based ID is generated.
166
+
167
+ auth_token (str, optional): An authentication token to include in the Authorization: Bearer <token> header.
168
+
169
+ Environment Variables:
170
+
171
+ MESSENGER_LOGGER_SERVER_URL: Overrides server_url if set.
172
+
173
+ MESSENGER_LOGGER_AUTH_TOKEN: Overrides auth_token if set.
174
+
175
+ Precedence: Constructor arguments take precedence over environment variables. If neither is provided for server_url, a ValueError will be raised.
176
+
177
+ Error Handling
178
+ The library includes robust error handling for network requests. If the logging server is unavailable, times out, or returns an HTTP error (4xx/5xx), a warning or error message will be printed to the console, but your training script will continue to run without interruption.
179
+
180
+ Example error messages you might see:
181
+
182
+ Warning: Request to http://localhost:5000/api/logs timed out for step 10. The server did not respond within the expected time.
183
+
184
+ Error: Could not connect to server at http://localhost:9999/api/logs for step N/A. The server might be unavailable or the URL is incorrect. Error details: ...
185
+
186
+ Error: HTTP error occurred while sending logs for step 20. Status: 401, Response: Unauthorized. Check server logs for more details.
187
+
188
+ Local Testing with a Dummy Server
189
+ For local development and testing, you can use a simple Flask server to receive and print the logs:
190
+
191
+ Save dummy_server.py:
192
+
193
+ # dummy_server.py
194
+ from flask import Flask, request, jsonify
195
+ import json
196
+
197
+ app = Flask(__name__)
198
+
199
+ @app.route('/api/logs', methods=['POST'])
200
+ def receive_logs():
201
+ data = request.get_json()
202
+ print("\n--- Received data ---")
203
+ print(json.dumps(data, indent=2))
204
+ # Check for Authorization header
205
+ auth_header = request.headers.get('Authorization')
206
+ if auth_header:
207
+ print(f"Authorization Header: {auth_header}")
208
+ print("---------------------\n")
209
+ return jsonify({"status": "success", "message": "Log received"}), 200
210
+
211
+ if __name__ == '__main__':
212
+ print("Dummy server running on http://127.0.0.1:5000/api/logs")
213
+ app.run(port=5000)
214
+
215
+ Install Flask:
216
+
217
+ pip install Flask
218
+
219
+ Run the dummy server:
220
+
221
+ python dummy_server.py
222
+
223
+ Run your training script (or the example usage from messenger_logger/callback.py's if __name__ == "__main__": block). You will see the logs printed in your dummy server's console.
224
+
225
+ Contributing
226
+ Contributions are welcome! Please feel free to open issues or submit pull requests on the GitHub repository.
227
+
228
+ License
229
+ This project is licensed under the MIT License - see the LICENSE file for details.
@@ -0,0 +1,212 @@
1
+ Messenger Logger Callback
2
+ A custom Hugging Face Trainer Callback for sending training logs and custom data to a remote server with authentication.
3
+
4
+ Overview
5
+ messenger-logger-callback is a Python library designed to easily integrate remote logging into your Hugging Face Trainer workflows. It provides a TrainerCallback that automatically captures standard training metrics (loss, learning rate, epoch, etc.) and sends them as JSON payloads to a specified HTTP endpoint. Additionally, it offers a flexible method to send arbitrary custom data from anywhere in your application.
6
+
7
+ This library is particularly useful for:
8
+
9
+ Centralized logging of machine learning experiments.
10
+
11
+ Real-time monitoring of training progress on a remote dashboard.
12
+
13
+ Integrating with custom notification systems (e.g., Telegram bots, Slack webhooks) by having a server endpoint process the received logs.
14
+
15
+ Features
16
+ Hugging Face Trainer Integration: Seamlessly plugs into the Hugging Face Trainer class.
17
+
18
+ Automatic Log Capture: Intercepts on_log, on_train_begin, on_train_end, and on_epoch_end events.
19
+
20
+ Custom Log Sending: Provides a send_custom_log method for sending any arbitrary JSON data.
21
+
22
+ Flexible Configuration: Server URL and authentication token can be provided via constructor arguments or environment variables.
23
+
24
+ Robust Error Handling: Includes try-except blocks for network requests to gracefully handle timeouts, connection errors, and HTTP errors, printing informative messages without crashing your training.
25
+
26
+ Authentication Support: Supports sending a Bearer token in the Authorization header for secure communication with your logging server.
27
+
28
+ Installation
29
+ You can install messenger-logger-callback using pip:
30
+
31
+ pip install messenger-logger-callback
32
+
33
+ Usage
34
+ 1. Basic Integration with Hugging Face Trainer
35
+ from transformers import Trainer, TrainingArguments
36
+ from messenger_logger.callback import MessengerLoggerCallback # Assuming your file is in messenger_logger/callback.py
37
+ import os
38
+
39
+ # --- Configure your server URL and optional authentication token ---
40
+ # Option A: Pass directly to the constructor
41
+ SERVER_URL = "http://your-logging-server.com/api/logs"
42
+ AUTH_TOKEN = "your_secret_api_token"
43
+
44
+ # Option B: Set as environment variables (recommended for production)
45
+ # os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://your-logging-server.com/api/logs"
46
+ # os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "your_secret_api_token"
47
+
48
+ # Initialize the callback
49
+ # If using environment variables, you can omit server_url and auth_token arguments:
50
+ # messenger_logger = MessengerLoggerCallback(
51
+ # project_name="my_awesome_model",
52
+ # run_id="experiment_v2"
53
+ # )
54
+ messenger_logger = MessengerLoggerCallback(
55
+ server_url=SERVER_URL,
56
+ project_name="my_awesome_model",
57
+ run_id="experiment_v2",
58
+ auth_token=AUTH_TOKEN
59
+ )
60
+
61
+ # Define your TrainingArguments
62
+ training_args = TrainingArguments(
63
+ output_dir="./results",
64
+ num_train_epochs=3,
65
+ per_device_train_batch_size=8,
66
+ logging_dir="./logs",
67
+ logging_steps=100,
68
+ report_to=["tensorboard"] # You can still report to other services
69
+ )
70
+
71
+ # Initialize your Trainer (replace with your actual model and dataset)
72
+ # from transformers import AutoModelForSequenceClassification, AutoTokenizer
73
+ # from datasets import load_dataset
74
+ # model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
75
+ # tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
76
+ # dataset = load_dataset("imdb")
77
+ # trainer = Trainer(
78
+ # model=model,
79
+ # args=training_args,
80
+ # train_dataset=dataset["train"],
81
+ # callbacks=[messenger_logger] # Add your custom callback here
82
+ # )
83
+
84
+ # For demonstration purposes, let's create a dummy Trainer
85
+ class DummyModel:
86
+ pass
87
+ class DummyDataset:
88
+ def __len__(self): return 1000
89
+ def __getitem__(self, idx): return {"input_ids": [1,2,3], "labels": 0}
90
+
91
+ trainer = Trainer(
92
+ model=DummyModel(),
93
+ args=training_args,
94
+ train_dataset=DummyDataset(),
95
+ callbacks=[messenger_logger] # Add your custom callback here
96
+ )
97
+
98
+ # Start training
99
+ print("Starting dummy training...")
100
+ trainer.train()
101
+ print("Dummy training finished.")
102
+
103
+ 2. Sending Custom Logs
104
+ You can send arbitrary data at any point using the send_custom_log method:
105
+
106
+ from messenger_logger.callback import MessengerLoggerCallback
107
+ import os
108
+
109
+ # Ensure the logger is initialized (e.g., from environment variables)
110
+ # os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
111
+ # os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_custom_token"
112
+ custom_logger = MessengerLoggerCallback(
113
+ server_url="http://localhost:5000/api/logs", # Or omit if using env vars
114
+ project_name="my_inference_project",
115
+ run_id="prediction_run_1"
116
+ )
117
+
118
+ # Send custom data, e.g., after model evaluation or deployment
119
+ custom_logger.send_custom_log({
120
+ "event": "model_evaluation_complete",
121
+ "model_version": "v1.2.0",
122
+ "evaluation_metrics": {
123
+ "accuracy": 0.92,
124
+ "f1_score": 0.915,
125
+ "precision": 0.90,
126
+ "recall": 0.93
127
+ },
128
+ "dataset_info": "test_set_2023-01-15"
129
+ })
130
+
131
+ custom_logger.send_custom_log({
132
+ "event": "alert",
133
+ "level": "CRITICAL",
134
+ "message": "High GPU temperature detected on node gpu-01",
135
+ "temperature_celsius": 85,
136
+ "timestamp": "2023-10-27T10:30:00Z"
137
+ })
138
+
139
+ Configuration
140
+ The MessengerLoggerCallback can be configured using:
141
+
142
+ Constructor Arguments:
143
+
144
+ server_url (str, optional): The HTTP endpoint to send logs to.
145
+
146
+ project_name (str, optional): A string identifier for your project (defaults to "default_project").
147
+
148
+ run_id (str, optional): A unique identifier for the current training run. If not provided, a timestamp-based ID is generated.
149
+
150
+ auth_token (str, optional): An authentication token to include in the Authorization: Bearer <token> header.
151
+
152
+ Environment Variables:
153
+
154
+ MESSENGER_LOGGER_SERVER_URL: Overrides server_url if set.
155
+
156
+ MESSENGER_LOGGER_AUTH_TOKEN: Overrides auth_token if set.
157
+
158
+ Precedence: Constructor arguments take precedence over environment variables. If neither is provided for server_url, a ValueError will be raised.
159
+
160
+ Error Handling
161
+ The library includes robust error handling for network requests. If the logging server is unavailable, times out, or returns an HTTP error (4xx/5xx), a warning or error message will be printed to the console, but your training script will continue to run without interruption.
162
+
163
+ Example error messages you might see:
164
+
165
+ Warning: Request to http://localhost:5000/api/logs timed out for step 10. The server did not respond within the expected time.
166
+
167
+ Error: Could not connect to server at http://localhost:9999/api/logs for step N/A. The server might be unavailable or the URL is incorrect. Error details: ...
168
+
169
+ Error: HTTP error occurred while sending logs for step 20. Status: 401, Response: Unauthorized. Check server logs for more details.
170
+
171
+ Local Testing with a Dummy Server
172
+ For local development and testing, you can use a simple Flask server to receive and print the logs:
173
+
174
+ Save dummy_server.py:
175
+
176
+ # dummy_server.py
177
+ from flask import Flask, request, jsonify
178
+ import json
179
+
180
+ app = Flask(__name__)
181
+
182
+ @app.route('/api/logs', methods=['POST'])
183
+ def receive_logs():
184
+ data = request.get_json()
185
+ print("\n--- Received data ---")
186
+ print(json.dumps(data, indent=2))
187
+ # Check for Authorization header
188
+ auth_header = request.headers.get('Authorization')
189
+ if auth_header:
190
+ print(f"Authorization Header: {auth_header}")
191
+ print("---------------------\n")
192
+ return jsonify({"status": "success", "message": "Log received"}), 200
193
+
194
+ if __name__ == '__main__':
195
+ print("Dummy server running on http://127.0.0.1:5000/api/logs")
196
+ app.run(port=5000)
197
+
198
+ Install Flask:
199
+
200
+ pip install Flask
201
+
202
+ Run the dummy server:
203
+
204
+ python dummy_server.py
205
+
206
+ Run your training script (or the example usage from messenger_logger/callback.py's if __name__ == "__main__": block). You will see the logs printed in your dummy server's console.
207
+
208
+ Contributing
209
+ Contributions are welcome! Please feel free to open issues or submit pull requests on the GitHub repository.
210
+
211
+ License
212
+ This project is licensed under the MIT License - see the LICENSE file for details.
@@ -0,0 +1,245 @@
1
+ import requests
2
+ import json
3
+ import os
4
+ import datetime
5
+ import dataclasses
6
+ from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl
7
+ from typing import Dict, Any, Optional
8
+
9
+ class MessengerLoggerCallback(TrainerCallback):
10
+ """
11
+ A custom Hugging Face Trainer Callback to send training logs and custom data to a remote server.
12
+
13
+ This callback intercepts logging events from the Trainer and sends the
14
+ relevant metrics (loss, learning rate, epoch, etc.) as a JSON payload
15
+ to a specified HTTP endpoint. It also provides a method to send custom,
16
+ arbitrary data.
17
+
18
+ The server URL and an optional authentication token can be provided directly
19
+ or read from environment variables (MESSENGER_LOGGER_SERVER_URL and
20
+ MESSENGER_LOGGER_AUTH_TOKEN).
21
+
22
+ Args:
23
+ server_url (str, optional): The URL of the server endpoint where logs should be sent.
24
+ If not provided, it will attempt to read from
25
+ the MESSENGER_LOGGER_SERVER_URL environment variable.
26
+ Example: "http://your-server.com/api/logs"
27
+ project_name (str, optional): An identifier for the training project.
28
+ Defaults to "default_project".
29
+ run_id (str, optional): A unique identifier for the current training run.
30
+ If not provided, a unique ID will be generated
31
+ based on the current timestamp.
32
+ auth_token (str, optional): An authentication token to include in the request headers.
33
+ If not provided, it will attempt to read from
34
+ the MESSENGER_LOGGER_AUTH_TOKEN environment variable.
35
+ """
36
+ def __init__(self, server_url: Optional[str] = None, project_name: str = "default_project",
37
+ run_id: Optional[str] = None, auth_token: Optional[str] = None):
38
+
39
+ # Determine server_url
40
+ self.server_url = server_url if server_url else os.getenv("MESSENGER_LOGGER_SERVER_URL")
41
+ if not self.server_url:
42
+ raise ValueError(
43
+ "server_url must be provided either as an argument or via the "
44
+ "MESSENGER_LOGGER_SERVER_URL environment variable."
45
+ )
46
+
47
+ # Determine auth_token
48
+ self.auth_token = auth_token if auth_token else os.getenv("MESSENGER_LOGGER_AUTH_TOKEN")
49
+ if self.auth_token:
50
+ print("Authentication token will be used for server requests.")
51
+
52
+ self.project_name = project_name
53
+ # Simple unique ID based on timestamp if not provided
54
+ self.run_id = run_id if run_id else f"run_{int(datetime.datetime.now().timestamp())}"
55
+ print(f"MessengerLoggerCallback initialized for project '{self.project_name}', run '{self.run_id}'")
56
+ print(f"Logs will be sent to: {self.server_url}")
57
+
58
+ def _get_trainer_state_info(self, state: TrainerState) -> Dict[str, Any]:
59
+ """
60
+ Extracts all attributes from TrainerState into a dictionary using dataclasses.asdict.
61
+ This handles serialization of basic types and nested dataclasses automatically.
62
+ """
63
+ # TrainerState is a dataclass, so dataclasses.asdict can convert it directly.
64
+ # This will handle nested dataclasses and basic types correctly.
65
+ return dataclasses.asdict(state)
66
+
67
+ def _send_payload(self, payload: Dict[str, Any], step: Optional[int] = None):
68
+ """Helper method to send a JSON payload to the server with error handling."""
69
+ headers = {"Content-Type": "application/json"}
70
+ if self.auth_token:
71
+ headers["Authorization"] = f"Bearer {self.auth_token}"
72
+
73
+ try:
74
+ response = requests.post(self.server_url, json=payload, headers=headers, timeout=10) # Increased timeout to 10 seconds
75
+ response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
76
+ # print(f"Successfully sent logs for step {step if step is not None else 'N/A'} to server. Status: {response.status_code}")
77
+ except requests.exceptions.Timeout:
78
+ print(f"Warning: Request to {self.server_url} timed out for step {step if step is not None else 'N/A'}. "
79
+ "The server did not respond within the expected time.")
80
+ except requests.exceptions.ConnectionError as e:
81
+ print(f"Error: Could not connect to server at {self.server_url} for step {step if step is not None else 'N/A'}. "
82
+ f"The server might be unavailable or the URL is incorrect. Error details: {e}")
83
+ except requests.exceptions.HTTPError as e:
84
+ print(f"Error: HTTP error occurred while sending logs for step {step if step is not None else 'N/A'}. "
85
+ f"Status: {e.response.status_code}, Response: {e.response.text}. Check server logs for more details.")
86
+ except Exception as e:
87
+ print(f"An unexpected error occurred while sending logs for step {step if step is not None else 'N/A'}: {e}")
88
+
89
+ def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, Any], **kwargs):
90
+ """
91
+ Event called after logging.
92
+
93
+ This method is triggered by the Trainer when new logs (metrics) are available.
94
+ It constructs a payload with the current training state and metrics,
95
+ and sends it to the configured server URL.
96
+ """
97
+ payload = {
98
+ "project_name": self.project_name,
99
+ "run_id": self.run_id,
100
+ "event_type": "trainer_log", # Indicate this is a standard trainer log
101
+ "trainer_state": self._get_trainer_state_info(state),
102
+ "logs": logs, # The actual metrics (loss, learning_rate, etc.) for the current step
103
+ "timestamp": datetime.datetime.now().isoformat()
104
+ }
105
+ self._send_payload(payload, state.global_step)
106
+
107
+ def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
108
+ """Event called at the beginning of training."""
109
+ print(f"Training for project '{self.project_name}', run '{self.run_id}' has begun.")
110
+ self._send_status_update("training_started", state)
111
+
112
+ def on_train_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
113
+ """Event called at the end of training."""
114
+ print(f"Training for project '{self.project_name}', run '{self.run_id}' has ended.")
115
+ self._send_status_update("training_finished", state)
116
+
117
+ def on_epoch_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
118
+ """Event called at the end of an epoch."""
119
+ print(f"Epoch {state.epoch} ended for project '{self.project_name}', run '{self.run_id}'.")
120
+ self._send_status_update("epoch_ended", state)
121
+
122
+ def _send_status_update(self, event_type: str, state: TrainerState):
123
+ """Helper to send general status updates."""
124
+ payload = {
125
+ "project_name": self.project_name,
126
+ "run_id": self.run_id,
127
+ "event_type": event_type,
128
+ "trainer_state": self._get_trainer_state_info(state),
129
+ "timestamp": datetime.datetime.now().isoformat()
130
+ }
131
+ self._send_payload(payload, state.global_step)
132
+
133
+ def send_custom_log(self, custom_data: Dict[str, Any]):
134
+ """
135
+ Sends arbitrary custom data to the remote server.
136
+
137
+ This method can be called directly by the user at any point in their
138
+ training script or other parts of their application to send specific
139
+ information that is not part of the standard Trainer logs.
140
+
141
+ Args:
142
+ custom_data (Dict[str, Any]): A dictionary containing the custom data
143
+ to be sent.
144
+ """
145
+ if not isinstance(custom_data, dict):
146
+ print("Error: custom_data must be a dictionary.")
147
+ return
148
+
149
+ payload = {
150
+ "project_name": self.project_name,
151
+ "run_id": self.run_id,
152
+ "event_type": "custom_log", # Indicate this is a custom log
153
+ "custom_data": custom_data,
154
+ "timestamp": datetime.datetime.now().isoformat()
155
+ }
156
+ print(f"Sending custom log for project '{self.project_name}', run '{self.run_id}'.")
157
+ self._send_payload(payload)
158
+
159
+ # Example Usage (how you would use this in your training script):
160
+ if __name__ == "__main__":
161
+ # --- Demonstration of MessengerLoggerCallback ---
162
+ print("--- Demonstrating MessengerLoggerCallback instantiation ---")
163
+
164
+ # --- Scenario 1: Using direct arguments ---
165
+ print("\n--- Scenario 1: Using direct arguments for server_url and auth_token ---")
166
+ try:
167
+ my_logger_direct = MessengerLoggerCallback(
168
+ server_url="http://localhost:5000/api/logs",
169
+ project_name="my_nlp_project_direct",
170
+ run_id="experiment_direct_v1",
171
+ auth_token="my_secret_direct_token"
172
+ )
173
+ print("Simulating log event for direct arguments...")
174
+ dummy_args = TrainingArguments(output_dir="./tmp_output_direct")
175
+ dummy_state = TrainerState()
176
+ dummy_state.global_step = 10
177
+ dummy_state.epoch = 0.1
178
+ dummy_state.is_training = True
179
+ dummy_control = TrainerControl()
180
+ dummy_logs = {"loss": 0.1234}
181
+ my_logger_direct.on_log(dummy_args, dummy_state, dummy_control, dummy_logs)
182
+ my_logger_direct.send_custom_log({"message": "Direct argument test complete"})
183
+ except ValueError as e:
184
+ print(f"Configuration Error (Direct Arguments): {e}")
185
+ except Exception as e:
186
+ print(f"An error occurred during direct argument demonstration: {e}")
187
+
188
+ # --- Scenario 2: Using environment variables ---
189
+ print("\n--- Scenario 2: Using environment variables for server_url and auth_token ---")
190
+ # Set environment variables for demonstration
191
+ os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
192
+ os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_secret_env_token"
193
+
194
+ try:
195
+ my_logger_env = MessengerLoggerCallback(
196
+ project_name="my_nlp_project_env",
197
+ run_id="experiment_env_v1"
198
+ ) # server_url and auth_token will be read from env vars
199
+ print("Simulating log event for environment variables...")
200
+ dummy_args_env = TrainingArguments(output_dir="./tmp_output_env")
201
+ dummy_state_env = TrainerState()
202
+ dummy_state_env.global_step = 20
203
+ dummy_state_env.epoch = 0.2
204
+ dummy_state_env.is_training = True
205
+ dummy_control_env = TrainerControl()
206
+ dummy_logs_env = {"loss": 0.5678, "learning_rate": 5e-5}
207
+ my_logger_env.on_log(dummy_args_env, dummy_state_env, dummy_control_env, dummy_logs_env)
208
+ my_logger_env.send_custom_log({"message": "Environment variable test complete"})
209
+ except ValueError as e:
210
+ print(f"Configuration Error (Environment Variables): {e}")
211
+ except Exception as e:
212
+ print(f"An error occurred during environment variable demonstration: {e}")
213
+ finally:
214
+ # Clean up environment variables after demonstration
215
+ del os.environ["MESSENGER_LOGGER_SERVER_URL"]
216
+ if "MESSENGER_LOGGER_AUTH_TOKEN" in os.environ:
217
+ del os.environ["MESSENGER_LOGGER_AUTH_TOKEN"]
218
+
219
+ # --- Scenario 3: Server not available (demonstrate error handling) ---
220
+ print("\n--- Scenario 3: Demonstrating server unavailability error handling ---")
221
+ # Point to a non-existent server or port
222
+ os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:9999/api/logs" # Assuming nothing runs on 9999
223
+ try:
224
+ my_logger_unavailable = MessengerLoggerCallback(
225
+ project_name="my_nlp_project_unavailable",
226
+ run_id="experiment_unavailable_v1"
227
+ )
228
+ print("Attempting to send log to unavailable server...")
229
+ dummy_args_un = TrainingArguments(output_dir="./tmp_output_un")
230
+ dummy_state_un = TrainerState()
231
+ dummy_state_un.global_step = 30
232
+ dummy_state_un.epoch = 0.3
233
+ dummy_state_un.is_training = True
234
+ dummy_control_un = TrainerControl()
235
+ dummy_logs_un = {"loss": 0.999}
236
+ my_logger_unavailable.on_log(dummy_args_un, dummy_state_un, dummy_control_un, dummy_logs_un)
237
+ except ValueError as e:
238
+ print(f"Configuration Error (Unavailable Server): {e}")
239
+ except Exception as e:
240
+ print(f"An error occurred during unavailable server demonstration: {e}")
241
+ finally:
242
+ if "MESSENGER_LOGGER_SERVER_URL" in os.environ:
243
+ del os.environ["MESSENGER_LOGGER_SERVER_URL"]
244
+
245
+ print("\nDemonstration complete. Check the console output for messages.")
@@ -0,0 +1,229 @@
1
+ Metadata-Version: 2.4
2
+ Name: messenger-logger-callback
3
+ Version: 0.1.0
4
+ Summary: A custom logger and Hugging Face Trainer Callback for sending logs to a remote server with authentication.
5
+ Author-email: Riko0 <grigoriyalexeenko@gmail.com>
6
+ Project-URL: Homepage, https://github.com/Riko0/messenger-logger-callback
7
+ Project-URL: Bug Tracker, https://github.com/Riko0/messenger-logger-callback/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: requests>=2.25.1
16
+ Requires-Dist: transformers>=4.0.0
17
+
18
+ Messenger Logger Callback
19
+ A custom Hugging Face Trainer Callback for sending training logs and custom data to a remote server with authentication.
20
+
21
+ Overview
22
+ messenger-logger-callback is a Python library designed to easily integrate remote logging into your Hugging Face Trainer workflows. It provides a TrainerCallback that automatically captures standard training metrics (loss, learning rate, epoch, etc.) and sends them as JSON payloads to a specified HTTP endpoint. Additionally, it offers a flexible method to send arbitrary custom data from anywhere in your application.
23
+
24
+ This library is particularly useful for:
25
+
26
+ Centralized logging of machine learning experiments.
27
+
28
+ Real-time monitoring of training progress on a remote dashboard.
29
+
30
+ Integrating with custom notification systems (e.g., Telegram bots, Slack webhooks) by having a server endpoint process the received logs.
31
+
32
+ Features
33
+ Hugging Face Trainer Integration: Seamlessly plugs into the Hugging Face Trainer class.
34
+
35
+ Automatic Log Capture: Intercepts on_log, on_train_begin, on_train_end, and on_epoch_end events.
36
+
37
+ Custom Log Sending: Provides a send_custom_log method for sending any arbitrary JSON data.
38
+
39
+ Flexible Configuration: Server URL and authentication token can be provided via constructor arguments or environment variables.
40
+
41
+ Robust Error Handling: Includes try-except blocks for network requests to gracefully handle timeouts, connection errors, and HTTP errors, printing informative messages without crashing your training.
42
+
43
+ Authentication Support: Supports sending a Bearer token in the Authorization header for secure communication with your logging server.
44
+
45
+ Installation
46
+ You can install messenger-logger-callback using pip:
47
+
48
+ pip install messenger-logger-callback
49
+
50
+ Usage
51
+ 1. Basic Integration with Hugging Face Trainer
52
+ from transformers import Trainer, TrainingArguments
53
+ from messenger_logger.callback import MessengerLoggerCallback # Assuming your file is in messenger_logger/callback.py
54
+ import os
55
+
56
+ # --- Configure your server URL and optional authentication token ---
57
+ # Option A: Pass directly to the constructor
58
+ SERVER_URL = "http://your-logging-server.com/api/logs"
59
+ AUTH_TOKEN = "your_secret_api_token"
60
+
61
+ # Option B: Set as environment variables (recommended for production)
62
+ # os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://your-logging-server.com/api/logs"
63
+ # os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "your_secret_api_token"
64
+
65
+ # Initialize the callback
66
+ # If using environment variables, you can omit server_url and auth_token arguments:
67
+ # messenger_logger = MessengerLoggerCallback(
68
+ # project_name="my_awesome_model",
69
+ # run_id="experiment_v2"
70
+ # )
71
+ messenger_logger = MessengerLoggerCallback(
72
+ server_url=SERVER_URL,
73
+ project_name="my_awesome_model",
74
+ run_id="experiment_v2",
75
+ auth_token=AUTH_TOKEN
76
+ )
77
+
78
+ # Define your TrainingArguments
79
+ training_args = TrainingArguments(
80
+ output_dir="./results",
81
+ num_train_epochs=3,
82
+ per_device_train_batch_size=8,
83
+ logging_dir="./logs",
84
+ logging_steps=100,
85
+ report_to=["tensorboard"] # You can still report to other services
86
+ )
87
+
88
+ # Initialize your Trainer (replace with your actual model and dataset)
89
+ # from transformers import AutoModelForSequenceClassification, AutoTokenizer
90
+ # from datasets import load_dataset
91
+ # model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
92
+ # tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
93
+ # dataset = load_dataset("imdb")
94
+ # trainer = Trainer(
95
+ # model=model,
96
+ # args=training_args,
97
+ # train_dataset=dataset["train"],
98
+ # callbacks=[messenger_logger] # Add your custom callback here
99
+ # )
100
+
101
+ # For demonstration purposes, let's create a dummy Trainer
102
+ class DummyModel:
103
+ pass
104
+ class DummyDataset:
105
+ def __len__(self): return 1000
106
+ def __getitem__(self, idx): return {"input_ids": [1,2,3], "labels": 0}
107
+
108
+ trainer = Trainer(
109
+ model=DummyModel(),
110
+ args=training_args,
111
+ train_dataset=DummyDataset(),
112
+ callbacks=[messenger_logger] # Add your custom callback here
113
+ )
114
+
115
+ # Start training
116
+ print("Starting dummy training...")
117
+ trainer.train()
118
+ print("Dummy training finished.")
119
+
120
+ 2. Sending Custom Logs
121
+ You can send arbitrary data at any point using the send_custom_log method:
122
+
123
+ from messenger_logger.callback import MessengerLoggerCallback
124
+ import os
125
+
126
+ # Ensure the logger is initialized (e.g., from environment variables)
127
+ # os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
128
+ # os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_custom_token"
129
+ custom_logger = MessengerLoggerCallback(
130
+ server_url="http://localhost:5000/api/logs", # Or omit if using env vars
131
+ project_name="my_inference_project",
132
+ run_id="prediction_run_1"
133
+ )
134
+
135
+ # Send custom data, e.g., after model evaluation or deployment
136
+ custom_logger.send_custom_log({
137
+ "event": "model_evaluation_complete",
138
+ "model_version": "v1.2.0",
139
+ "evaluation_metrics": {
140
+ "accuracy": 0.92,
141
+ "f1_score": 0.915,
142
+ "precision": 0.90,
143
+ "recall": 0.93
144
+ },
145
+ "dataset_info": "test_set_2023-01-15"
146
+ })
147
+
148
+ custom_logger.send_custom_log({
149
+ "event": "alert",
150
+ "level": "CRITICAL",
151
+ "message": "High GPU temperature detected on node gpu-01",
152
+ "temperature_celsius": 85,
153
+ "timestamp": "2023-10-27T10:30:00Z"
154
+ })
155
+
156
+ Configuration
157
+ The MessengerLoggerCallback can be configured using:
158
+
159
+ Constructor Arguments:
160
+
161
+ server_url (str, optional): The HTTP endpoint to send logs to.
162
+
163
+ project_name (str, optional): A string identifier for your project (defaults to "default_project").
164
+
165
+ run_id (str, optional): A unique identifier for the current training run. If not provided, a timestamp-based ID is generated.
166
+
167
+ auth_token (str, optional): An authentication token to include in the Authorization: Bearer <token> header.
168
+
169
+ Environment Variables:
170
+
171
+ MESSENGER_LOGGER_SERVER_URL: Overrides server_url if set.
172
+
173
+ MESSENGER_LOGGER_AUTH_TOKEN: Overrides auth_token if set.
174
+
175
+ Precedence: Constructor arguments take precedence over environment variables. If neither is provided for server_url, a ValueError will be raised.
176
+
177
+ Error Handling
178
+ The library includes robust error handling for network requests. If the logging server is unavailable, times out, or returns an HTTP error (4xx/5xx), a warning or error message will be printed to the console, but your training script will continue to run without interruption.
179
+
180
+ Example error messages you might see:
181
+
182
+ Warning: Request to http://localhost:5000/api/logs timed out for step 10. The server did not respond within the expected time.
183
+
184
+ Error: Could not connect to server at http://localhost:9999/api/logs for step N/A. The server might be unavailable or the URL is incorrect. Error details: ...
185
+
186
+ Error: HTTP error occurred while sending logs for step 20. Status: 401, Response: Unauthorized. Check server logs for more details.
187
+
188
+ Local Testing with a Dummy Server
189
+ For local development and testing, you can use a simple Flask server to receive and print the logs:
190
+
191
+ Save dummy_server.py:
192
+
193
+ # dummy_server.py
194
+ from flask import Flask, request, jsonify
195
+ import json
196
+
197
+ app = Flask(__name__)
198
+
199
+ @app.route('/api/logs', methods=['POST'])
200
+ def receive_logs():
201
+ data = request.get_json()
202
+ print("\n--- Received data ---")
203
+ print(json.dumps(data, indent=2))
204
+ # Check for Authorization header
205
+ auth_header = request.headers.get('Authorization')
206
+ if auth_header:
207
+ print(f"Authorization Header: {auth_header}")
208
+ print("---------------------\n")
209
+ return jsonify({"status": "success", "message": "Log received"}), 200
210
+
211
+ if __name__ == '__main__':
212
+ print("Dummy server running on http://127.0.0.1:5000/api/logs")
213
+ app.run(port=5000)
214
+
215
+ Install Flask:
216
+
217
+ pip install Flask
218
+
219
+ Run the dummy server:
220
+
221
+ python dummy_server.py
222
+
223
+ Run your training script (or the example usage from messenger_logger/callback.py's if __name__ == "__main__": block). You will see the logs printed in your dummy server's console.
224
+
225
+ Contributing
226
+ Contributions are welcome! Please feel free to open issues or submit pull requests on the GitHub repository.
227
+
228
+ License
229
+ This project is licensed under the MIT License - see the LICENSE file for details.
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ messenger_logger/__init__.py
4
+ messenger_logger/callback.py
5
+ messenger_logger_callback.egg-info/PKG-INFO
6
+ messenger_logger_callback.egg-info/SOURCES.txt
7
+ messenger_logger_callback.egg-info/dependency_links.txt
8
+ messenger_logger_callback.egg-info/requires.txt
9
+ messenger_logger_callback.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ requests>=2.25.1
2
+ transformers>=4.0.0
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "messenger-logger-callback"
7
+ version = "0.1.0"
8
+ authors = [
9
+ { name="Riko0", email="grigoriyalexeenko@gmail.com" },
10
+ ]
11
+ description = "A custom logger and Hugging Face Trainer Callback for sending logs to a remote server with authentication."
12
+ readme = "README.md"
13
+ requires-python = ">=3.8"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ ]
21
+ dependencies = [
22
+ "requests>=2.25.1",
23
+ "transformers>=4.0.0"
24
+ ]
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/Riko0/messenger-logger-callback"
28
+ "Bug Tracker" = "https://github.com/Riko0/messenger-logger-callback/issues"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+