messenger-logger-callback 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- messenger_logger_callback-0.1.0/PKG-INFO +229 -0
- messenger_logger_callback-0.1.0/README.md +212 -0
- messenger_logger_callback-0.1.0/messenger_logger/__init__.py +0 -0
- messenger_logger_callback-0.1.0/messenger_logger/callback.py +245 -0
- messenger_logger_callback-0.1.0/messenger_logger_callback.egg-info/PKG-INFO +229 -0
- messenger_logger_callback-0.1.0/messenger_logger_callback.egg-info/SOURCES.txt +9 -0
- messenger_logger_callback-0.1.0/messenger_logger_callback.egg-info/dependency_links.txt +1 -0
- messenger_logger_callback-0.1.0/messenger_logger_callback.egg-info/requires.txt +2 -0
- messenger_logger_callback-0.1.0/messenger_logger_callback.egg-info/top_level.txt +1 -0
- messenger_logger_callback-0.1.0/pyproject.toml +28 -0
- messenger_logger_callback-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: messenger-logger-callback
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A custom logger and Hugging Face Trainer Callback for sending logs to a remote server with authentication.
|
|
5
|
+
Author-email: Riko0 <grigoriyalexeenko@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/Riko0/messenger-logger-callback
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/Riko0/messenger-logger-callback/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: requests>=2.25.1
|
|
16
|
+
Requires-Dist: transformers>=4.0.0
|
|
17
|
+
|
|
18
|
+
Messenger Logger Callback
|
|
19
|
+
A custom Hugging Face Trainer Callback for sending training logs and custom data to a remote server with authentication.
|
|
20
|
+
|
|
21
|
+
Overview
|
|
22
|
+
messenger-logger-callback is a Python library designed to easily integrate remote logging into your Hugging Face Trainer workflows. It provides a TrainerCallback that automatically captures standard training metrics (loss, learning rate, epoch, etc.) and sends them as JSON payloads to a specified HTTP endpoint. Additionally, it offers a flexible method to send arbitrary custom data from anywhere in your application.
|
|
23
|
+
|
|
24
|
+
This library is particularly useful for:
|
|
25
|
+
|
|
26
|
+
Centralized logging of machine learning experiments.
|
|
27
|
+
|
|
28
|
+
Real-time monitoring of training progress on a remote dashboard.
|
|
29
|
+
|
|
30
|
+
Integrating with custom notification systems (e.g., Telegram bots, Slack webhooks) by having a server endpoint process the received logs.
|
|
31
|
+
|
|
32
|
+
Features
|
|
33
|
+
Hugging Face Trainer Integration: Seamlessly plugs into the Hugging Face Trainer class.
|
|
34
|
+
|
|
35
|
+
Automatic Log Capture: Intercepts on_log, on_train_begin, on_train_end, and on_epoch_end events.
|
|
36
|
+
|
|
37
|
+
Custom Log Sending: Provides a send_custom_log method for sending any arbitrary JSON data.
|
|
38
|
+
|
|
39
|
+
Flexible Configuration: Server URL and authentication token can be provided via constructor arguments or environment variables.
|
|
40
|
+
|
|
41
|
+
Robust Error Handling: Includes try-except blocks for network requests to gracefully handle timeouts, connection errors, and HTTP errors, printing informative messages without crashing your training.
|
|
42
|
+
|
|
43
|
+
Authentication Support: Supports sending a Bearer token in the Authorization header for secure communication with your logging server.
|
|
44
|
+
|
|
45
|
+
Installation
|
|
46
|
+
You can install messenger-logger-callback using pip:
|
|
47
|
+
|
|
48
|
+
pip install messenger-logger-callback
|
|
49
|
+
|
|
50
|
+
Usage
|
|
51
|
+
1. Basic Integration with Hugging Face Trainer
|
|
52
|
+
from transformers import Trainer, TrainingArguments
|
|
53
|
+
from messenger_logger.callback import MessengerLoggerCallback # Assuming your file is in messenger_logger/callback.py
|
|
54
|
+
import os
|
|
55
|
+
|
|
56
|
+
# --- Configure your server URL and optional authentication token ---
|
|
57
|
+
# Option A: Pass directly to the constructor
|
|
58
|
+
SERVER_URL = "http://your-logging-server.com/api/logs"
|
|
59
|
+
AUTH_TOKEN = "your_secret_api_token"
|
|
60
|
+
|
|
61
|
+
# Option B: Set as environment variables (recommended for production)
|
|
62
|
+
# os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://your-logging-server.com/api/logs"
|
|
63
|
+
# os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "your_secret_api_token"
|
|
64
|
+
|
|
65
|
+
# Initialize the callback
|
|
66
|
+
# If using environment variables, you can omit server_url and auth_token arguments:
|
|
67
|
+
# messenger_logger = MessengerLoggerCallback(
|
|
68
|
+
# project_name="my_awesome_model",
|
|
69
|
+
# run_id="experiment_v2"
|
|
70
|
+
# )
|
|
71
|
+
messenger_logger = MessengerLoggerCallback(
|
|
72
|
+
server_url=SERVER_URL,
|
|
73
|
+
project_name="my_awesome_model",
|
|
74
|
+
run_id="experiment_v2",
|
|
75
|
+
auth_token=AUTH_TOKEN
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Define your TrainingArguments
|
|
79
|
+
training_args = TrainingArguments(
|
|
80
|
+
output_dir="./results",
|
|
81
|
+
num_train_epochs=3,
|
|
82
|
+
per_device_train_batch_size=8,
|
|
83
|
+
logging_dir="./logs",
|
|
84
|
+
logging_steps=100,
|
|
85
|
+
report_to=["tensorboard"] # You can still report to other services
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Initialize your Trainer (replace with your actual model and dataset)
|
|
89
|
+
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
90
|
+
# from datasets import load_dataset
|
|
91
|
+
# model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
|
|
92
|
+
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
|
93
|
+
# dataset = load_dataset("imdb")
|
|
94
|
+
# trainer = Trainer(
|
|
95
|
+
# model=model,
|
|
96
|
+
# args=training_args,
|
|
97
|
+
# train_dataset=dataset["train"],
|
|
98
|
+
# callbacks=[messenger_logger] # Add your custom callback here
|
|
99
|
+
# )
|
|
100
|
+
|
|
101
|
+
# For demonstration purposes, let's create a dummy Trainer
|
|
102
|
+
class DummyModel:
|
|
103
|
+
pass
|
|
104
|
+
class DummyDataset:
|
|
105
|
+
def __len__(self): return 1000
|
|
106
|
+
def __getitem__(self, idx): return {"input_ids": [1,2,3], "labels": 0}
|
|
107
|
+
|
|
108
|
+
trainer = Trainer(
|
|
109
|
+
model=DummyModel(),
|
|
110
|
+
args=training_args,
|
|
111
|
+
train_dataset=DummyDataset(),
|
|
112
|
+
callbacks=[messenger_logger] # Add your custom callback here
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Start training
|
|
116
|
+
print("Starting dummy training...")
|
|
117
|
+
trainer.train()
|
|
118
|
+
print("Dummy training finished.")
|
|
119
|
+
|
|
120
|
+
2. Sending Custom Logs
|
|
121
|
+
You can send arbitrary data at any point using the send_custom_log method:
|
|
122
|
+
|
|
123
|
+
from messenger_logger.callback import MessengerLoggerCallback
|
|
124
|
+
import os
|
|
125
|
+
|
|
126
|
+
# Ensure the logger is initialized (e.g., from environment variables)
|
|
127
|
+
# os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
|
|
128
|
+
# os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_custom_token"
|
|
129
|
+
custom_logger = MessengerLoggerCallback(
|
|
130
|
+
server_url="http://localhost:5000/api/logs", # Or omit if using env vars
|
|
131
|
+
project_name="my_inference_project",
|
|
132
|
+
run_id="prediction_run_1"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Send custom data, e.g., after model evaluation or deployment
|
|
136
|
+
custom_logger.send_custom_log({
|
|
137
|
+
"event": "model_evaluation_complete",
|
|
138
|
+
"model_version": "v1.2.0",
|
|
139
|
+
"evaluation_metrics": {
|
|
140
|
+
"accuracy": 0.92,
|
|
141
|
+
"f1_score": 0.915,
|
|
142
|
+
"precision": 0.90,
|
|
143
|
+
"recall": 0.93
|
|
144
|
+
},
|
|
145
|
+
"dataset_info": "test_set_2023-01-15"
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
custom_logger.send_custom_log({
|
|
149
|
+
"event": "alert",
|
|
150
|
+
"level": "CRITICAL",
|
|
151
|
+
"message": "High GPU temperature detected on node gpu-01",
|
|
152
|
+
"temperature_celsius": 85,
|
|
153
|
+
"timestamp": "2023-10-27T10:30:00Z"
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
Configuration
|
|
157
|
+
The MessengerLoggerCallback can be configured using:
|
|
158
|
+
|
|
159
|
+
Constructor Arguments:
|
|
160
|
+
|
|
161
|
+
server_url (str, optional): The HTTP endpoint to send logs to.
|
|
162
|
+
|
|
163
|
+
project_name (str, optional): A string identifier for your project (defaults to "default_project").
|
|
164
|
+
|
|
165
|
+
run_id (str, optional): A unique identifier for the current training run. If not provided, a timestamp-based ID is generated.
|
|
166
|
+
|
|
167
|
+
auth_token (str, optional): An authentication token to include in the Authorization: Bearer <token> header.
|
|
168
|
+
|
|
169
|
+
Environment Variables:
|
|
170
|
+
|
|
171
|
+
MESSENGER_LOGGER_SERVER_URL: Overrides server_url if set.
|
|
172
|
+
|
|
173
|
+
MESSENGER_LOGGER_AUTH_TOKEN: Overrides auth_token if set.
|
|
174
|
+
|
|
175
|
+
Precedence: Constructor arguments take precedence over environment variables. If neither is provided for server_url, a ValueError will be raised.
|
|
176
|
+
|
|
177
|
+
Error Handling
|
|
178
|
+
The library includes robust error handling for network requests. If the logging server is unavailable, times out, or returns an HTTP error (4xx/5xx), a warning or error message will be printed to the console, but your training script will continue to run without interruption.
|
|
179
|
+
|
|
180
|
+
Example error messages you might see:
|
|
181
|
+
|
|
182
|
+
Warning: Request to http://localhost:5000/api/logs timed out for step 10. The server did not respond within the expected time.
|
|
183
|
+
|
|
184
|
+
Error: Could not connect to server at http://localhost:9999/api/logs for step N/A. The server might be unavailable or the URL is incorrect. Error details: ...
|
|
185
|
+
|
|
186
|
+
Error: HTTP error occurred while sending logs for step 20. Status: 401, Response: Unauthorized. Check server logs for more details.
|
|
187
|
+
|
|
188
|
+
Local Testing with a Dummy Server
|
|
189
|
+
For local development and testing, you can use a simple Flask server to receive and print the logs:
|
|
190
|
+
|
|
191
|
+
Save dummy_server.py:
|
|
192
|
+
|
|
193
|
+
# dummy_server.py
|
|
194
|
+
from flask import Flask, request, jsonify
|
|
195
|
+
import json
|
|
196
|
+
|
|
197
|
+
app = Flask(__name__)
|
|
198
|
+
|
|
199
|
+
@app.route('/api/logs', methods=['POST'])
|
|
200
|
+
def receive_logs():
|
|
201
|
+
data = request.get_json()
|
|
202
|
+
print("\n--- Received data ---")
|
|
203
|
+
print(json.dumps(data, indent=2))
|
|
204
|
+
# Check for Authorization header
|
|
205
|
+
auth_header = request.headers.get('Authorization')
|
|
206
|
+
if auth_header:
|
|
207
|
+
print(f"Authorization Header: {auth_header}")
|
|
208
|
+
print("---------------------\n")
|
|
209
|
+
return jsonify({"status": "success", "message": "Log received"}), 200
|
|
210
|
+
|
|
211
|
+
if __name__ == '__main__':
|
|
212
|
+
print("Dummy server running on http://127.0.0.1:5000/api/logs")
|
|
213
|
+
app.run(port=5000)
|
|
214
|
+
|
|
215
|
+
Install Flask:
|
|
216
|
+
|
|
217
|
+
pip install Flask
|
|
218
|
+
|
|
219
|
+
Run the dummy server:
|
|
220
|
+
|
|
221
|
+
python dummy_server.py
|
|
222
|
+
|
|
223
|
+
Run your training script (or the example usage from messenger_logger/callback.py's if __name__ == "__main__": block). You will see the logs printed in your dummy server's console.
|
|
224
|
+
|
|
225
|
+
Contributing
|
|
226
|
+
Contributions are welcome! Please feel free to open issues or submit pull requests on the GitHub repository.
|
|
227
|
+
|
|
228
|
+
License
|
|
229
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
Messenger Logger Callback
|
|
2
|
+
A custom Hugging Face Trainer Callback for sending training logs and custom data to a remote server with authentication.
|
|
3
|
+
|
|
4
|
+
Overview
|
|
5
|
+
messenger-logger-callback is a Python library designed to easily integrate remote logging into your Hugging Face Trainer workflows. It provides a TrainerCallback that automatically captures standard training metrics (loss, learning rate, epoch, etc.) and sends them as JSON payloads to a specified HTTP endpoint. Additionally, it offers a flexible method to send arbitrary custom data from anywhere in your application.
|
|
6
|
+
|
|
7
|
+
This library is particularly useful for:
|
|
8
|
+
|
|
9
|
+
Centralized logging of machine learning experiments.
|
|
10
|
+
|
|
11
|
+
Real-time monitoring of training progress on a remote dashboard.
|
|
12
|
+
|
|
13
|
+
Integrating with custom notification systems (e.g., Telegram bots, Slack webhooks) by having a server endpoint process the received logs.
|
|
14
|
+
|
|
15
|
+
Features
|
|
16
|
+
Hugging Face Trainer Integration: Seamlessly plugs into the Hugging Face Trainer class.
|
|
17
|
+
|
|
18
|
+
Automatic Log Capture: Intercepts on_log, on_train_begin, on_train_end, and on_epoch_end events.
|
|
19
|
+
|
|
20
|
+
Custom Log Sending: Provides a send_custom_log method for sending any arbitrary JSON data.
|
|
21
|
+
|
|
22
|
+
Flexible Configuration: Server URL and authentication token can be provided via constructor arguments or environment variables.
|
|
23
|
+
|
|
24
|
+
Robust Error Handling: Includes try-except blocks for network requests to gracefully handle timeouts, connection errors, and HTTP errors, printing informative messages without crashing your training.
|
|
25
|
+
|
|
26
|
+
Authentication Support: Supports sending a Bearer token in the Authorization header for secure communication with your logging server.
|
|
27
|
+
|
|
28
|
+
Installation
|
|
29
|
+
You can install messenger-logger-callback using pip:
|
|
30
|
+
|
|
31
|
+
pip install messenger-logger-callback
|
|
32
|
+
|
|
33
|
+
Usage
|
|
34
|
+
1. Basic Integration with Hugging Face Trainer
|
|
35
|
+
from transformers import Trainer, TrainingArguments
|
|
36
|
+
from messenger_logger.callback import MessengerLoggerCallback # Assuming your file is in messenger_logger/callback.py
|
|
37
|
+
import os
|
|
38
|
+
|
|
39
|
+
# --- Configure your server URL and optional authentication token ---
|
|
40
|
+
# Option A: Pass directly to the constructor
|
|
41
|
+
SERVER_URL = "http://your-logging-server.com/api/logs"
|
|
42
|
+
AUTH_TOKEN = "your_secret_api_token"
|
|
43
|
+
|
|
44
|
+
# Option B: Set as environment variables (recommended for production)
|
|
45
|
+
# os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://your-logging-server.com/api/logs"
|
|
46
|
+
# os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "your_secret_api_token"
|
|
47
|
+
|
|
48
|
+
# Initialize the callback
|
|
49
|
+
# If using environment variables, you can omit server_url and auth_token arguments:
|
|
50
|
+
# messenger_logger = MessengerLoggerCallback(
|
|
51
|
+
# project_name="my_awesome_model",
|
|
52
|
+
# run_id="experiment_v2"
|
|
53
|
+
# )
|
|
54
|
+
messenger_logger = MessengerLoggerCallback(
|
|
55
|
+
server_url=SERVER_URL,
|
|
56
|
+
project_name="my_awesome_model",
|
|
57
|
+
run_id="experiment_v2",
|
|
58
|
+
auth_token=AUTH_TOKEN
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Define your TrainingArguments
|
|
62
|
+
training_args = TrainingArguments(
|
|
63
|
+
output_dir="./results",
|
|
64
|
+
num_train_epochs=3,
|
|
65
|
+
per_device_train_batch_size=8,
|
|
66
|
+
logging_dir="./logs",
|
|
67
|
+
logging_steps=100,
|
|
68
|
+
report_to=["tensorboard"] # You can still report to other services
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Initialize your Trainer (replace with your actual model and dataset)
|
|
72
|
+
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
73
|
+
# from datasets import load_dataset
|
|
74
|
+
# model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
|
|
75
|
+
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
|
76
|
+
# dataset = load_dataset("imdb")
|
|
77
|
+
# trainer = Trainer(
|
|
78
|
+
# model=model,
|
|
79
|
+
# args=training_args,
|
|
80
|
+
# train_dataset=dataset["train"],
|
|
81
|
+
# callbacks=[messenger_logger] # Add your custom callback here
|
|
82
|
+
# )
|
|
83
|
+
|
|
84
|
+
# For demonstration purposes, let's create a dummy Trainer
|
|
85
|
+
class DummyModel:
|
|
86
|
+
pass
|
|
87
|
+
class DummyDataset:
|
|
88
|
+
def __len__(self): return 1000
|
|
89
|
+
def __getitem__(self, idx): return {"input_ids": [1,2,3], "labels": 0}
|
|
90
|
+
|
|
91
|
+
trainer = Trainer(
|
|
92
|
+
model=DummyModel(),
|
|
93
|
+
args=training_args,
|
|
94
|
+
train_dataset=DummyDataset(),
|
|
95
|
+
callbacks=[messenger_logger] # Add your custom callback here
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Start training
|
|
99
|
+
print("Starting dummy training...")
|
|
100
|
+
trainer.train()
|
|
101
|
+
print("Dummy training finished.")
|
|
102
|
+
|
|
103
|
+
2. Sending Custom Logs
|
|
104
|
+
You can send arbitrary data at any point using the send_custom_log method:
|
|
105
|
+
|
|
106
|
+
from messenger_logger.callback import MessengerLoggerCallback
|
|
107
|
+
import os
|
|
108
|
+
|
|
109
|
+
# Ensure the logger is initialized (e.g., from environment variables)
|
|
110
|
+
# os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
|
|
111
|
+
# os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_custom_token"
|
|
112
|
+
custom_logger = MessengerLoggerCallback(
|
|
113
|
+
server_url="http://localhost:5000/api/logs", # Or omit if using env vars
|
|
114
|
+
project_name="my_inference_project",
|
|
115
|
+
run_id="prediction_run_1"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Send custom data, e.g., after model evaluation or deployment
|
|
119
|
+
custom_logger.send_custom_log({
|
|
120
|
+
"event": "model_evaluation_complete",
|
|
121
|
+
"model_version": "v1.2.0",
|
|
122
|
+
"evaluation_metrics": {
|
|
123
|
+
"accuracy": 0.92,
|
|
124
|
+
"f1_score": 0.915,
|
|
125
|
+
"precision": 0.90,
|
|
126
|
+
"recall": 0.93
|
|
127
|
+
},
|
|
128
|
+
"dataset_info": "test_set_2023-01-15"
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
custom_logger.send_custom_log({
|
|
132
|
+
"event": "alert",
|
|
133
|
+
"level": "CRITICAL",
|
|
134
|
+
"message": "High GPU temperature detected on node gpu-01",
|
|
135
|
+
"temperature_celsius": 85,
|
|
136
|
+
"timestamp": "2023-10-27T10:30:00Z"
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
Configuration
|
|
140
|
+
The MessengerLoggerCallback can be configured using:
|
|
141
|
+
|
|
142
|
+
Constructor Arguments:
|
|
143
|
+
|
|
144
|
+
server_url (str, optional): The HTTP endpoint to send logs to.
|
|
145
|
+
|
|
146
|
+
project_name (str, optional): A string identifier for your project (defaults to "default_project").
|
|
147
|
+
|
|
148
|
+
run_id (str, optional): A unique identifier for the current training run. If not provided, a timestamp-based ID is generated.
|
|
149
|
+
|
|
150
|
+
auth_token (str, optional): An authentication token to include in the Authorization: Bearer <token> header.
|
|
151
|
+
|
|
152
|
+
Environment Variables:
|
|
153
|
+
|
|
154
|
+
MESSENGER_LOGGER_SERVER_URL: Overrides server_url if set.
|
|
155
|
+
|
|
156
|
+
MESSENGER_LOGGER_AUTH_TOKEN: Overrides auth_token if set.
|
|
157
|
+
|
|
158
|
+
Precedence: Constructor arguments take precedence over environment variables. If neither is provided for server_url, a ValueError will be raised.
|
|
159
|
+
|
|
160
|
+
Error Handling
|
|
161
|
+
The library includes robust error handling for network requests. If the logging server is unavailable, times out, or returns an HTTP error (4xx/5xx), a warning or error message will be printed to the console, but your training script will continue to run without interruption.
|
|
162
|
+
|
|
163
|
+
Example error messages you might see:
|
|
164
|
+
|
|
165
|
+
Warning: Request to http://localhost:5000/api/logs timed out for step 10. The server did not respond within the expected time.
|
|
166
|
+
|
|
167
|
+
Error: Could not connect to server at http://localhost:9999/api/logs for step N/A. The server might be unavailable or the URL is incorrect. Error details: ...
|
|
168
|
+
|
|
169
|
+
Error: HTTP error occurred while sending logs for step 20. Status: 401, Response: Unauthorized. Check server logs for more details.
|
|
170
|
+
|
|
171
|
+
Local Testing with a Dummy Server
|
|
172
|
+
For local development and testing, you can use a simple Flask server to receive and print the logs:
|
|
173
|
+
|
|
174
|
+
Save dummy_server.py:
|
|
175
|
+
|
|
176
|
+
# dummy_server.py
|
|
177
|
+
from flask import Flask, request, jsonify
|
|
178
|
+
import json
|
|
179
|
+
|
|
180
|
+
app = Flask(__name__)
|
|
181
|
+
|
|
182
|
+
@app.route('/api/logs', methods=['POST'])
|
|
183
|
+
def receive_logs():
|
|
184
|
+
data = request.get_json()
|
|
185
|
+
print("\n--- Received data ---")
|
|
186
|
+
print(json.dumps(data, indent=2))
|
|
187
|
+
# Check for Authorization header
|
|
188
|
+
auth_header = request.headers.get('Authorization')
|
|
189
|
+
if auth_header:
|
|
190
|
+
print(f"Authorization Header: {auth_header}")
|
|
191
|
+
print("---------------------\n")
|
|
192
|
+
return jsonify({"status": "success", "message": "Log received"}), 200
|
|
193
|
+
|
|
194
|
+
if __name__ == '__main__':
|
|
195
|
+
print("Dummy server running on http://127.0.0.1:5000/api/logs")
|
|
196
|
+
app.run(port=5000)
|
|
197
|
+
|
|
198
|
+
Install Flask:
|
|
199
|
+
|
|
200
|
+
pip install Flask
|
|
201
|
+
|
|
202
|
+
Run the dummy server:
|
|
203
|
+
|
|
204
|
+
python dummy_server.py
|
|
205
|
+
|
|
206
|
+
Run your training script (or the example usage from messenger_logger/callback.py's if __name__ == "__main__": block). You will see the logs printed in your dummy server's console.
|
|
207
|
+
|
|
208
|
+
Contributing
|
|
209
|
+
Contributions are welcome! Please feel free to open issues or submit pull requests on the GitHub repository.
|
|
210
|
+
|
|
211
|
+
License
|
|
212
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
File without changes
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import datetime
|
|
5
|
+
import dataclasses
|
|
6
|
+
from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl
|
|
7
|
+
from typing import Dict, Any, Optional
|
|
8
|
+
|
|
9
|
+
class MessengerLoggerCallback(TrainerCallback):
|
|
10
|
+
"""
|
|
11
|
+
A custom Hugging Face Trainer Callback to send training logs and custom data to a remote server.
|
|
12
|
+
|
|
13
|
+
This callback intercepts logging events from the Trainer and sends the
|
|
14
|
+
relevant metrics (loss, learning rate, epoch, etc.) as a JSON payload
|
|
15
|
+
to a specified HTTP endpoint. It also provides a method to send custom,
|
|
16
|
+
arbitrary data.
|
|
17
|
+
|
|
18
|
+
The server URL and an optional authentication token can be provided directly
|
|
19
|
+
or read from environment variables (MESSENGER_LOGGER_SERVER_URL and
|
|
20
|
+
MESSENGER_LOGGER_AUTH_TOKEN).
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
server_url (str, optional): The URL of the server endpoint where logs should be sent.
|
|
24
|
+
If not provided, it will attempt to read from
|
|
25
|
+
the MESSENGER_LOGGER_SERVER_URL environment variable.
|
|
26
|
+
Example: "http://your-server.com/api/logs"
|
|
27
|
+
project_name (str, optional): An identifier for the training project.
|
|
28
|
+
Defaults to "default_project".
|
|
29
|
+
run_id (str, optional): A unique identifier for the current training run.
|
|
30
|
+
If not provided, a unique ID will be generated
|
|
31
|
+
based on the current timestamp.
|
|
32
|
+
auth_token (str, optional): An authentication token to include in the request headers.
|
|
33
|
+
If not provided, it will attempt to read from
|
|
34
|
+
the MESSENGER_LOGGER_AUTH_TOKEN environment variable.
|
|
35
|
+
"""
|
|
36
|
+
def __init__(self, server_url: Optional[str] = None, project_name: str = "default_project",
|
|
37
|
+
run_id: Optional[str] = None, auth_token: Optional[str] = None):
|
|
38
|
+
|
|
39
|
+
# Determine server_url
|
|
40
|
+
self.server_url = server_url if server_url else os.getenv("MESSENGER_LOGGER_SERVER_URL")
|
|
41
|
+
if not self.server_url:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
"server_url must be provided either as an argument or via the "
|
|
44
|
+
"MESSENGER_LOGGER_SERVER_URL environment variable."
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Determine auth_token
|
|
48
|
+
self.auth_token = auth_token if auth_token else os.getenv("MESSENGER_LOGGER_AUTH_TOKEN")
|
|
49
|
+
if self.auth_token:
|
|
50
|
+
print("Authentication token will be used for server requests.")
|
|
51
|
+
|
|
52
|
+
self.project_name = project_name
|
|
53
|
+
# Simple unique ID based on timestamp if not provided
|
|
54
|
+
self.run_id = run_id if run_id else f"run_{int(datetime.datetime.now().timestamp())}"
|
|
55
|
+
print(f"MessengerLoggerCallback initialized for project '{self.project_name}', run '{self.run_id}'")
|
|
56
|
+
print(f"Logs will be sent to: {self.server_url}")
|
|
57
|
+
|
|
58
|
+
def _get_trainer_state_info(self, state: TrainerState) -> Dict[str, Any]:
|
|
59
|
+
"""
|
|
60
|
+
Extracts all attributes from TrainerState into a dictionary using dataclasses.asdict.
|
|
61
|
+
This handles serialization of basic types and nested dataclasses automatically.
|
|
62
|
+
"""
|
|
63
|
+
# TrainerState is a dataclass, so dataclasses.asdict can convert it directly.
|
|
64
|
+
# This will handle nested dataclasses and basic types correctly.
|
|
65
|
+
return dataclasses.asdict(state)
|
|
66
|
+
|
|
67
|
+
def _send_payload(self, payload: Dict[str, Any], step: Optional[int] = None):
|
|
68
|
+
"""Helper method to send a JSON payload to the server with error handling."""
|
|
69
|
+
headers = {"Content-Type": "application/json"}
|
|
70
|
+
if self.auth_token:
|
|
71
|
+
headers["Authorization"] = f"Bearer {self.auth_token}"
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
response = requests.post(self.server_url, json=payload, headers=headers, timeout=10) # Increased timeout to 10 seconds
|
|
75
|
+
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
|
76
|
+
# print(f"Successfully sent logs for step {step if step is not None else 'N/A'} to server. Status: {response.status_code}")
|
|
77
|
+
except requests.exceptions.Timeout:
|
|
78
|
+
print(f"Warning: Request to {self.server_url} timed out for step {step if step is not None else 'N/A'}. "
|
|
79
|
+
"The server did not respond within the expected time.")
|
|
80
|
+
except requests.exceptions.ConnectionError as e:
|
|
81
|
+
print(f"Error: Could not connect to server at {self.server_url} for step {step if step is not None else 'N/A'}. "
|
|
82
|
+
f"The server might be unavailable or the URL is incorrect. Error details: {e}")
|
|
83
|
+
except requests.exceptions.HTTPError as e:
|
|
84
|
+
print(f"Error: HTTP error occurred while sending logs for step {step if step is not None else 'N/A'}. "
|
|
85
|
+
f"Status: {e.response.status_code}, Response: {e.response.text}. Check server logs for more details.")
|
|
86
|
+
except Exception as e:
|
|
87
|
+
print(f"An unexpected error occurred while sending logs for step {step if step is not None else 'N/A'}: {e}")
|
|
88
|
+
|
|
89
|
+
def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, logs: Dict[str, Any], **kwargs):
|
|
90
|
+
"""
|
|
91
|
+
Event called after logging.
|
|
92
|
+
|
|
93
|
+
This method is triggered by the Trainer when new logs (metrics) are available.
|
|
94
|
+
It constructs a payload with the current training state and metrics,
|
|
95
|
+
and sends it to the configured server URL.
|
|
96
|
+
"""
|
|
97
|
+
payload = {
|
|
98
|
+
"project_name": self.project_name,
|
|
99
|
+
"run_id": self.run_id,
|
|
100
|
+
"event_type": "trainer_log", # Indicate this is a standard trainer log
|
|
101
|
+
"trainer_state": self._get_trainer_state_info(state),
|
|
102
|
+
"logs": logs, # The actual metrics (loss, learning_rate, etc.) for the current step
|
|
103
|
+
"timestamp": datetime.datetime.now().isoformat()
|
|
104
|
+
}
|
|
105
|
+
self._send_payload(payload, state.global_step)
|
|
106
|
+
|
|
107
|
+
def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
|
|
108
|
+
"""Event called at the beginning of training."""
|
|
109
|
+
print(f"Training for project '{self.project_name}', run '{self.run_id}' has begun.")
|
|
110
|
+
self._send_status_update("training_started", state)
|
|
111
|
+
|
|
112
|
+
def on_train_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
|
|
113
|
+
"""Event called at the end of training."""
|
|
114
|
+
print(f"Training for project '{self.project_name}', run '{self.run_id}' has ended.")
|
|
115
|
+
self._send_status_update("training_finished", state)
|
|
116
|
+
|
|
117
|
+
def on_epoch_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
|
|
118
|
+
"""Event called at the end of an epoch."""
|
|
119
|
+
print(f"Epoch {state.epoch} ended for project '{self.project_name}', run '{self.run_id}'.")
|
|
120
|
+
self._send_status_update("epoch_ended", state)
|
|
121
|
+
|
|
122
|
+
def _send_status_update(self, event_type: str, state: TrainerState):
|
|
123
|
+
"""Helper to send general status updates."""
|
|
124
|
+
payload = {
|
|
125
|
+
"project_name": self.project_name,
|
|
126
|
+
"run_id": self.run_id,
|
|
127
|
+
"event_type": event_type,
|
|
128
|
+
"trainer_state": self._get_trainer_state_info(state),
|
|
129
|
+
"timestamp": datetime.datetime.now().isoformat()
|
|
130
|
+
}
|
|
131
|
+
self._send_payload(payload, state.global_step)
|
|
132
|
+
|
|
133
|
+
def send_custom_log(self, custom_data: Dict[str, Any]):
|
|
134
|
+
"""
|
|
135
|
+
Sends arbitrary custom data to the remote server.
|
|
136
|
+
|
|
137
|
+
This method can be called directly by the user at any point in their
|
|
138
|
+
training script or other parts of their application to send specific
|
|
139
|
+
information that is not part of the standard Trainer logs.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
custom_data (Dict[str, Any]): A dictionary containing the custom data
|
|
143
|
+
to be sent.
|
|
144
|
+
"""
|
|
145
|
+
if not isinstance(custom_data, dict):
|
|
146
|
+
print("Error: custom_data must be a dictionary.")
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
payload = {
|
|
150
|
+
"project_name": self.project_name,
|
|
151
|
+
"run_id": self.run_id,
|
|
152
|
+
"event_type": "custom_log", # Indicate this is a custom log
|
|
153
|
+
"custom_data": custom_data,
|
|
154
|
+
"timestamp": datetime.datetime.now().isoformat()
|
|
155
|
+
}
|
|
156
|
+
print(f"Sending custom log for project '{self.project_name}', run '{self.run_id}'.")
|
|
157
|
+
self._send_payload(payload)
|
|
158
|
+
|
|
159
|
+
# Example Usage (how you would use this in your training script):
|
|
160
|
+
if __name__ == "__main__":
|
|
161
|
+
# --- Demonstration of MessengerLoggerCallback ---
|
|
162
|
+
print("--- Demonstrating MessengerLoggerCallback instantiation ---")
|
|
163
|
+
|
|
164
|
+
# --- Scenario 1: Using direct arguments ---
|
|
165
|
+
print("\n--- Scenario 1: Using direct arguments for server_url and auth_token ---")
|
|
166
|
+
try:
|
|
167
|
+
my_logger_direct = MessengerLoggerCallback(
|
|
168
|
+
server_url="http://localhost:5000/api/logs",
|
|
169
|
+
project_name="my_nlp_project_direct",
|
|
170
|
+
run_id="experiment_direct_v1",
|
|
171
|
+
auth_token="my_secret_direct_token"
|
|
172
|
+
)
|
|
173
|
+
print("Simulating log event for direct arguments...")
|
|
174
|
+
dummy_args = TrainingArguments(output_dir="./tmp_output_direct")
|
|
175
|
+
dummy_state = TrainerState()
|
|
176
|
+
dummy_state.global_step = 10
|
|
177
|
+
dummy_state.epoch = 0.1
|
|
178
|
+
dummy_state.is_training = True
|
|
179
|
+
dummy_control = TrainerControl()
|
|
180
|
+
dummy_logs = {"loss": 0.1234}
|
|
181
|
+
my_logger_direct.on_log(dummy_args, dummy_state, dummy_control, dummy_logs)
|
|
182
|
+
my_logger_direct.send_custom_log({"message": "Direct argument test complete"})
|
|
183
|
+
except ValueError as e:
|
|
184
|
+
print(f"Configuration Error (Direct Arguments): {e}")
|
|
185
|
+
except Exception as e:
|
|
186
|
+
print(f"An error occurred during direct argument demonstration: {e}")
|
|
187
|
+
|
|
188
|
+
# --- Scenario 2: Using environment variables ---
|
|
189
|
+
print("\n--- Scenario 2: Using environment variables for server_url and auth_token ---")
|
|
190
|
+
# Set environment variables for demonstration
|
|
191
|
+
os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
|
|
192
|
+
os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_secret_env_token"
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
my_logger_env = MessengerLoggerCallback(
|
|
196
|
+
project_name="my_nlp_project_env",
|
|
197
|
+
run_id="experiment_env_v1"
|
|
198
|
+
) # server_url and auth_token will be read from env vars
|
|
199
|
+
print("Simulating log event for environment variables...")
|
|
200
|
+
dummy_args_env = TrainingArguments(output_dir="./tmp_output_env")
|
|
201
|
+
dummy_state_env = TrainerState()
|
|
202
|
+
dummy_state_env.global_step = 20
|
|
203
|
+
dummy_state_env.epoch = 0.2
|
|
204
|
+
dummy_state_env.is_training = True
|
|
205
|
+
dummy_control_env = TrainerControl()
|
|
206
|
+
dummy_logs_env = {"loss": 0.5678, "learning_rate": 5e-5}
|
|
207
|
+
my_logger_env.on_log(dummy_args_env, dummy_state_env, dummy_control_env, dummy_logs_env)
|
|
208
|
+
my_logger_env.send_custom_log({"message": "Environment variable test complete"})
|
|
209
|
+
except ValueError as e:
|
|
210
|
+
print(f"Configuration Error (Environment Variables): {e}")
|
|
211
|
+
except Exception as e:
|
|
212
|
+
print(f"An error occurred during environment variable demonstration: {e}")
|
|
213
|
+
finally:
|
|
214
|
+
# Clean up environment variables after demonstration
|
|
215
|
+
del os.environ["MESSENGER_LOGGER_SERVER_URL"]
|
|
216
|
+
if "MESSENGER_LOGGER_AUTH_TOKEN" in os.environ:
|
|
217
|
+
del os.environ["MESSENGER_LOGGER_AUTH_TOKEN"]
|
|
218
|
+
|
|
219
|
+
# --- Scenario 3: Server not available (demonstrate error handling) ---
|
|
220
|
+
print("\n--- Scenario 3: Demonstrating server unavailability error handling ---")
|
|
221
|
+
# Point to a non-existent server or port
|
|
222
|
+
os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:9999/api/logs" # Assuming nothing runs on 9999
|
|
223
|
+
try:
|
|
224
|
+
my_logger_unavailable = MessengerLoggerCallback(
|
|
225
|
+
project_name="my_nlp_project_unavailable",
|
|
226
|
+
run_id="experiment_unavailable_v1"
|
|
227
|
+
)
|
|
228
|
+
print("Attempting to send log to unavailable server...")
|
|
229
|
+
dummy_args_un = TrainingArguments(output_dir="./tmp_output_un")
|
|
230
|
+
dummy_state_un = TrainerState()
|
|
231
|
+
dummy_state_un.global_step = 30
|
|
232
|
+
dummy_state_un.epoch = 0.3
|
|
233
|
+
dummy_state_un.is_training = True
|
|
234
|
+
dummy_control_un = TrainerControl()
|
|
235
|
+
dummy_logs_un = {"loss": 0.999}
|
|
236
|
+
my_logger_unavailable.on_log(dummy_args_un, dummy_state_un, dummy_control_un, dummy_logs_un)
|
|
237
|
+
except ValueError as e:
|
|
238
|
+
print(f"Configuration Error (Unavailable Server): {e}")
|
|
239
|
+
except Exception as e:
|
|
240
|
+
print(f"An error occurred during unavailable server demonstration: {e}")
|
|
241
|
+
finally:
|
|
242
|
+
if "MESSENGER_LOGGER_SERVER_URL" in os.environ:
|
|
243
|
+
del os.environ["MESSENGER_LOGGER_SERVER_URL"]
|
|
244
|
+
|
|
245
|
+
print("\nDemonstration complete. Check the console output for messages.")
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: messenger-logger-callback
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A custom logger and Hugging Face Trainer Callback for sending logs to a remote server with authentication.
|
|
5
|
+
Author-email: Riko0 <grigoriyalexeenko@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/Riko0/messenger-logger-callback
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/Riko0/messenger-logger-callback/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: requests>=2.25.1
|
|
16
|
+
Requires-Dist: transformers>=4.0.0
|
|
17
|
+
|
|
18
|
+
Messenger Logger Callback
|
|
19
|
+
A custom Hugging Face Trainer Callback for sending training logs and custom data to a remote server with authentication.
|
|
20
|
+
|
|
21
|
+
Overview
|
|
22
|
+
messenger-logger-callback is a Python library designed to easily integrate remote logging into your Hugging Face Trainer workflows. It provides a TrainerCallback that automatically captures standard training metrics (loss, learning rate, epoch, etc.) and sends them as JSON payloads to a specified HTTP endpoint. Additionally, it offers a flexible method to send arbitrary custom data from anywhere in your application.
|
|
23
|
+
|
|
24
|
+
This library is particularly useful for:
|
|
25
|
+
|
|
26
|
+
Centralized logging of machine learning experiments.
|
|
27
|
+
|
|
28
|
+
Real-time monitoring of training progress on a remote dashboard.
|
|
29
|
+
|
|
30
|
+
Integrating with custom notification systems (e.g., Telegram bots, Slack webhooks) by having a server endpoint process the received logs.
|
|
31
|
+
|
|
32
|
+
Features
|
|
33
|
+
Hugging Face Trainer Integration: Seamlessly plugs into the Hugging Face Trainer class.
|
|
34
|
+
|
|
35
|
+
Automatic Log Capture: Intercepts on_log, on_train_begin, on_train_end, and on_epoch_end events.
|
|
36
|
+
|
|
37
|
+
Custom Log Sending: Provides a send_custom_log method for sending any arbitrary JSON data.
|
|
38
|
+
|
|
39
|
+
Flexible Configuration: Server URL and authentication token can be provided via constructor arguments or environment variables.
|
|
40
|
+
|
|
41
|
+
Robust Error Handling: Includes try-except blocks for network requests to gracefully handle timeouts, connection errors, and HTTP errors, printing informative messages without crashing your training.
|
|
42
|
+
|
|
43
|
+
Authentication Support: Supports sending a Bearer token in the Authorization header for secure communication with your logging server.
|
|
44
|
+
|
|
45
|
+
Installation
|
|
46
|
+
You can install messenger-logger-callback using pip:
|
|
47
|
+
|
|
48
|
+
pip install messenger-logger-callback
|
|
49
|
+
|
|
50
|
+
Usage
|
|
51
|
+
1. Basic Integration with Hugging Face Trainer
|
|
52
|
+
from transformers import Trainer, TrainingArguments
|
|
53
|
+
from messenger_logger.callback import MessengerLoggerCallback # Assuming your file is in messenger_logger/callback.py
|
|
54
|
+
import os
|
|
55
|
+
|
|
56
|
+
# --- Configure your server URL and optional authentication token ---
|
|
57
|
+
# Option A: Pass directly to the constructor
|
|
58
|
+
SERVER_URL = "http://your-logging-server.com/api/logs"
|
|
59
|
+
AUTH_TOKEN = "your_secret_api_token"
|
|
60
|
+
|
|
61
|
+
# Option B: Set as environment variables (recommended for production)
|
|
62
|
+
# os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://your-logging-server.com/api/logs"
|
|
63
|
+
# os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "your_secret_api_token"
|
|
64
|
+
|
|
65
|
+
# Initialize the callback
|
|
66
|
+
# If using environment variables, you can omit server_url and auth_token arguments:
|
|
67
|
+
# messenger_logger = MessengerLoggerCallback(
|
|
68
|
+
# project_name="my_awesome_model",
|
|
69
|
+
# run_id="experiment_v2"
|
|
70
|
+
# )
|
|
71
|
+
messenger_logger = MessengerLoggerCallback(
|
|
72
|
+
server_url=SERVER_URL,
|
|
73
|
+
project_name="my_awesome_model",
|
|
74
|
+
run_id="experiment_v2",
|
|
75
|
+
auth_token=AUTH_TOKEN
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Define your TrainingArguments
|
|
79
|
+
training_args = TrainingArguments(
|
|
80
|
+
output_dir="./results",
|
|
81
|
+
num_train_epochs=3,
|
|
82
|
+
per_device_train_batch_size=8,
|
|
83
|
+
logging_dir="./logs",
|
|
84
|
+
logging_steps=100,
|
|
85
|
+
report_to=["tensorboard"] # You can still report to other services
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Initialize your Trainer (replace with your actual model and dataset)
|
|
89
|
+
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
90
|
+
# from datasets import load_dataset
|
|
91
|
+
# model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
|
|
92
|
+
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
|
93
|
+
# dataset = load_dataset("imdb")
|
|
94
|
+
# trainer = Trainer(
|
|
95
|
+
# model=model,
|
|
96
|
+
# args=training_args,
|
|
97
|
+
# train_dataset=dataset["train"],
|
|
98
|
+
# callbacks=[messenger_logger] # Add your custom callback here
|
|
99
|
+
# )
|
|
100
|
+
|
|
101
|
+
# For demonstration purposes, let's create a dummy Trainer
|
|
102
|
+
class DummyModel:
|
|
103
|
+
pass
|
|
104
|
+
class DummyDataset:
|
|
105
|
+
def __len__(self): return 1000
|
|
106
|
+
def __getitem__(self, idx): return {"input_ids": [1,2,3], "labels": 0}
|
|
107
|
+
|
|
108
|
+
trainer = Trainer(
|
|
109
|
+
model=DummyModel(),
|
|
110
|
+
args=training_args,
|
|
111
|
+
train_dataset=DummyDataset(),
|
|
112
|
+
callbacks=[messenger_logger] # Add your custom callback here
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Start training
|
|
116
|
+
print("Starting dummy training...")
|
|
117
|
+
trainer.train()
|
|
118
|
+
print("Dummy training finished.")
|
|
119
|
+
|
|
120
|
+
2. Sending Custom Logs
|
|
121
|
+
You can send arbitrary data at any point using the send_custom_log method:
|
|
122
|
+
|
|
123
|
+
from messenger_logger.callback import MessengerLoggerCallback
|
|
124
|
+
import os
|
|
125
|
+
|
|
126
|
+
# Ensure the logger is initialized (e.g., from environment variables)
|
|
127
|
+
# os.environ["MESSENGER_LOGGER_SERVER_URL"] = "http://localhost:5000/api/logs"
|
|
128
|
+
# os.environ["MESSENGER_LOGGER_AUTH_TOKEN"] = "my_custom_token"
|
|
129
|
+
custom_logger = MessengerLoggerCallback(
|
|
130
|
+
server_url="http://localhost:5000/api/logs", # Or omit if using env vars
|
|
131
|
+
project_name="my_inference_project",
|
|
132
|
+
run_id="prediction_run_1"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Send custom data, e.g., after model evaluation or deployment
|
|
136
|
+
custom_logger.send_custom_log({
|
|
137
|
+
"event": "model_evaluation_complete",
|
|
138
|
+
"model_version": "v1.2.0",
|
|
139
|
+
"evaluation_metrics": {
|
|
140
|
+
"accuracy": 0.92,
|
|
141
|
+
"f1_score": 0.915,
|
|
142
|
+
"precision": 0.90,
|
|
143
|
+
"recall": 0.93
|
|
144
|
+
},
|
|
145
|
+
"dataset_info": "test_set_2023-01-15"
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
custom_logger.send_custom_log({
|
|
149
|
+
"event": "alert",
|
|
150
|
+
"level": "CRITICAL",
|
|
151
|
+
"message": "High GPU temperature detected on node gpu-01",
|
|
152
|
+
"temperature_celsius": 85,
|
|
153
|
+
"timestamp": "2023-10-27T10:30:00Z"
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
Configuration
|
|
157
|
+
The MessengerLoggerCallback can be configured using:
|
|
158
|
+
|
|
159
|
+
Constructor Arguments:
|
|
160
|
+
|
|
161
|
+
server_url (str, optional): The HTTP endpoint to send logs to.
|
|
162
|
+
|
|
163
|
+
project_name (str, optional): A string identifier for your project (defaults to "default_project").
|
|
164
|
+
|
|
165
|
+
run_id (str, optional): A unique identifier for the current training run. If not provided, a timestamp-based ID is generated.
|
|
166
|
+
|
|
167
|
+
auth_token (str, optional): An authentication token to include in the Authorization: Bearer <token> header.
|
|
168
|
+
|
|
169
|
+
Environment Variables:
|
|
170
|
+
|
|
171
|
+
MESSENGER_LOGGER_SERVER_URL: Overrides server_url if set.
|
|
172
|
+
|
|
173
|
+
MESSENGER_LOGGER_AUTH_TOKEN: Overrides auth_token if set.
|
|
174
|
+
|
|
175
|
+
Precedence: Constructor arguments take precedence over environment variables. If neither is provided for server_url, a ValueError will be raised.
|
|
176
|
+
|
|
177
|
+
Error Handling
|
|
178
|
+
The library includes robust error handling for network requests. If the logging server is unavailable, times out, or returns an HTTP error (4xx/5xx), a warning or error message will be printed to the console, but your training script will continue to run without interruption.
|
|
179
|
+
|
|
180
|
+
Example error messages you might see:
|
|
181
|
+
|
|
182
|
+
Warning: Request to http://localhost:5000/api/logs timed out for step 10. The server did not respond within the expected time.
|
|
183
|
+
|
|
184
|
+
Error: Could not connect to server at http://localhost:9999/api/logs for step N/A. The server might be unavailable or the URL is incorrect. Error details: ...
|
|
185
|
+
|
|
186
|
+
Error: HTTP error occurred while sending logs for step 20. Status: 401, Response: Unauthorized. Check server logs for more details.
|
|
187
|
+
|
|
188
|
+
Local Testing with a Dummy Server
|
|
189
|
+
For local development and testing, you can use a simple Flask server to receive and print the logs:
|
|
190
|
+
|
|
191
|
+
Save dummy_server.py:
|
|
192
|
+
|
|
193
|
+
# dummy_server.py
|
|
194
|
+
from flask import Flask, request, jsonify
|
|
195
|
+
import json
|
|
196
|
+
|
|
197
|
+
app = Flask(__name__)
|
|
198
|
+
|
|
199
|
+
@app.route('/api/logs', methods=['POST'])
|
|
200
|
+
def receive_logs():
|
|
201
|
+
data = request.get_json()
|
|
202
|
+
print("\n--- Received data ---")
|
|
203
|
+
print(json.dumps(data, indent=2))
|
|
204
|
+
# Check for Authorization header
|
|
205
|
+
auth_header = request.headers.get('Authorization')
|
|
206
|
+
if auth_header:
|
|
207
|
+
print(f"Authorization Header: {auth_header}")
|
|
208
|
+
print("---------------------\n")
|
|
209
|
+
return jsonify({"status": "success", "message": "Log received"}), 200
|
|
210
|
+
|
|
211
|
+
if __name__ == '__main__':
|
|
212
|
+
print("Dummy server running on http://127.0.0.1:5000/api/logs")
|
|
213
|
+
app.run(port=5000)
|
|
214
|
+
|
|
215
|
+
Install Flask:
|
|
216
|
+
|
|
217
|
+
pip install Flask
|
|
218
|
+
|
|
219
|
+
Run the dummy server:
|
|
220
|
+
|
|
221
|
+
python dummy_server.py
|
|
222
|
+
|
|
223
|
+
Run your training script (or the example usage from messenger_logger/callback.py's if __name__ == "__main__": block). You will see the logs printed in your dummy server's console.
|
|
224
|
+
|
|
225
|
+
Contributing
|
|
226
|
+
Contributions are welcome! Please feel free to open issues or submit pull requests on the GitHub repository.
|
|
227
|
+
|
|
228
|
+
License
|
|
229
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
messenger_logger/__init__.py
|
|
4
|
+
messenger_logger/callback.py
|
|
5
|
+
messenger_logger_callback.egg-info/PKG-INFO
|
|
6
|
+
messenger_logger_callback.egg-info/SOURCES.txt
|
|
7
|
+
messenger_logger_callback.egg-info/dependency_links.txt
|
|
8
|
+
messenger_logger_callback.egg-info/requires.txt
|
|
9
|
+
messenger_logger_callback.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
messenger_logger
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "messenger-logger-callback"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Riko0", email="grigoriyalexeenko@gmail.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "A custom logger and Hugging Face Trainer Callback for sending logs to a remote server with authentication."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
19
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"requests>=2.25.1",
|
|
23
|
+
"transformers>=4.0.0"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/Riko0/messenger-logger-callback"
|
|
28
|
+
"Bug Tracker" = "https://github.com/Riko0/messenger-logger-callback/issues"
|