notiboard 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notiboard-0.1.0/LICENSE +21 -0
- notiboard-0.1.0/PKG-INFO +98 -0
- notiboard-0.1.0/README.md +63 -0
- notiboard-0.1.0/notiboard/__init__.py +33 -0
- notiboard-0.1.0/notiboard/client.py +332 -0
- notiboard-0.1.0/notiboard/config.py +65 -0
- notiboard-0.1.0/notiboard/writer.py +470 -0
- notiboard-0.1.0/notiboard.egg-info/PKG-INFO +98 -0
- notiboard-0.1.0/notiboard.egg-info/SOURCES.txt +15 -0
- notiboard-0.1.0/notiboard.egg-info/dependency_links.txt +1 -0
- notiboard-0.1.0/notiboard.egg-info/requires.txt +11 -0
- notiboard-0.1.0/notiboard.egg-info/top_level.txt +1 -0
- notiboard-0.1.0/pyproject.toml +70 -0
- notiboard-0.1.0/setup.cfg +4 -0
- notiboard-0.1.0/tests/test_client.py +215 -0
- notiboard-0.1.0/tests/test_config.py +59 -0
- notiboard-0.1.0/tests/test_writer.py +217 -0
notiboard-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Noti contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
notiboard-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: notiboard
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Monitor AI training on your phone with a drop-in SummaryWriter replacement.
|
|
5
|
+
Author-email: Noti <noti@tech-webs.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://noti.tech-webs.com
|
|
8
|
+
Project-URL: Repository, https://github.com/noti-app/noti-sdk
|
|
9
|
+
Project-URL: Documentation, https://noti.tech-webs.com/document
|
|
10
|
+
Keywords: tensorboard,machine-learning,training,monitoring,notifications,mlops
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: httpx<1,>=0.27
|
|
25
|
+
Requires-Dist: numpy<3,>=1.24
|
|
26
|
+
Requires-Dist: Pillow<12,>=10
|
|
27
|
+
Requires-Dist: tensorboardX<3,>=2.6
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
|
|
31
|
+
Requires-Dist: ruff>=0.8; extra == "dev"
|
|
32
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
33
|
+
Requires-Dist: twine>=6.0; extra == "dev"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# notiboard
|
|
37
|
+
|
|
38
|
+
`notiboard` is both the PyPI distribution name and the Python import package.
|
|
39
|
+
|
|
40
|
+
Monitor your AI training on your phone with a drop-in replacement for TensorBoard's `SummaryWriter`.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install notiboard
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from notiboard import NotiWriter
|
|
52
|
+
|
|
53
|
+
writer = NotiWriter(
|
|
54
|
+
log_dir="runs/experiment1",
|
|
55
|
+
noti_api_key="nk_your_key_here", # from the Noti app: Settings > API Keys
|
|
56
|
+
noti_project="my-project",
|
|
57
|
+
noti_run_name="experiment-001", # optional, auto-generated if not set
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
for step in range(1000):
|
|
61
|
+
loss = train_step()
|
|
62
|
+
writer.add_scalar("loss/train", loss, step)
|
|
63
|
+
writer.set_progress(step, 1000, f"Step {step}/1000")
|
|
64
|
+
|
|
65
|
+
writer.send_notification("Training complete", "Final loss: 0.001", "Done.")
|
|
66
|
+
writer.close()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
All standard TensorBoard `SummaryWriter` methods are preserved. If PyTorch is installed,
|
|
70
|
+
`NotiWriter` uses `torch.utils.tensorboard`. Otherwise it falls back to `tensorboardX`,
|
|
71
|
+
which is included as a package dependency.
|
|
72
|
+
|
|
73
|
+
## Configuration
|
|
74
|
+
|
|
75
|
+
| Argument | Env Variable | Default | Description |
|
|
76
|
+
|---|---|---|---|
|
|
77
|
+
| `noti_api_key` | `NOTI_API_KEY` | — | API key from the Noti app (required) |
|
|
78
|
+
| `noti_server` | `NOTI_SERVER` | `https://notiapi.tech-webs.com` | Noti server URL |
|
|
79
|
+
| `noti_project` | — | `"default"` | Project name |
|
|
80
|
+
| `noti_run_name` | — | Auto-generated | Run name |
|
|
81
|
+
| `noti_enabled` | `NOTI_ENABLED` | `1` | Set `0` to disable Noti sync |
|
|
82
|
+
| `noti_complete_on_close` | — | `True` | Mark the run as completed when `close()` is called |
|
|
83
|
+
|
|
84
|
+
## Release Files
|
|
85
|
+
|
|
86
|
+
This package is configured for standard PyPI release tooling:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
python -m build
|
|
90
|
+
python -m twine check dist/*
|
|
91
|
+
python -m twine upload dist/*
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
There are helper scripts under `sdk/scripts/` and a release checklist in `sdk/RELEASING.md`.
|
|
95
|
+
|
|
96
|
+
## License
|
|
97
|
+
|
|
98
|
+
MIT
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# notiboard
|
|
2
|
+
|
|
3
|
+
`notiboard` is both the PyPI distribution name and the Python import package.
|
|
4
|
+
|
|
5
|
+
Monitor your AI training on your phone with a drop-in replacement for TensorBoard's `SummaryWriter`.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install notiboard
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from notiboard import NotiWriter
|
|
17
|
+
|
|
18
|
+
writer = NotiWriter(
|
|
19
|
+
log_dir="runs/experiment1",
|
|
20
|
+
noti_api_key="nk_your_key_here", # from the Noti app: Settings > API Keys
|
|
21
|
+
noti_project="my-project",
|
|
22
|
+
noti_run_name="experiment-001", # optional, auto-generated if not set
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
for step in range(1000):
|
|
26
|
+
loss = train_step()
|
|
27
|
+
writer.add_scalar("loss/train", loss, step)
|
|
28
|
+
writer.set_progress(step, 1000, f"Step {step}/1000")
|
|
29
|
+
|
|
30
|
+
writer.send_notification("Training complete", "Final loss: 0.001", "Done.")
|
|
31
|
+
writer.close()
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
All standard TensorBoard `SummaryWriter` methods are preserved. If PyTorch is installed,
|
|
35
|
+
`NotiWriter` uses `torch.utils.tensorboard`. Otherwise it falls back to `tensorboardX`,
|
|
36
|
+
which is included as a package dependency.
|
|
37
|
+
|
|
38
|
+
## Configuration
|
|
39
|
+
|
|
40
|
+
| Argument | Env Variable | Default | Description |
|
|
41
|
+
|---|---|---|---|
|
|
42
|
+
| `noti_api_key` | `NOTI_API_KEY` | — | API key from the Noti app (required) |
|
|
43
|
+
| `noti_server` | `NOTI_SERVER` | `https://notiapi.tech-webs.com` | Noti server URL |
|
|
44
|
+
| `noti_project` | — | `"default"` | Project name |
|
|
45
|
+
| `noti_run_name` | — | Auto-generated | Run name |
|
|
46
|
+
| `noti_enabled` | `NOTI_ENABLED` | `1` | Set `0` to disable Noti sync |
|
|
47
|
+
| `noti_complete_on_close` | — | `True` | Mark the run as completed when `close()` is called |
|
|
48
|
+
|
|
49
|
+
## Release Files
|
|
50
|
+
|
|
51
|
+
This package is configured for standard PyPI release tooling:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
python -m build
|
|
55
|
+
python -m twine check dist/*
|
|
56
|
+
python -m twine upload dist/*
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
There are helper scripts under `sdk/scripts/` and a release checklist in `sdk/RELEASING.md`.
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
notiboard - Monitor AI training on your phone.
|
|
3
|
+
|
|
4
|
+
Drop-in replacement for TensorBoard's SummaryWriter with Noti server sync.
|
|
5
|
+
|
|
6
|
+
Quick start:
|
|
7
|
+
from notiboard import NotiWriter
|
|
8
|
+
|
|
9
|
+
writer = NotiWriter(
|
|
10
|
+
log_dir="runs/experiment1",
|
|
11
|
+
noti_api_key="nk_your_key_here", # from the Noti app
|
|
12
|
+
noti_project="my-project",
|
|
13
|
+
noti_run_name="experiment-001", # optional
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
for step in range(1000):
|
|
17
|
+
loss = train_step()
|
|
18
|
+
writer.add_scalar("loss/train", loss, step)
|
|
19
|
+
writer.set_progress(step, 1000, f"Step {step}/1000")
|
|
20
|
+
|
|
21
|
+
writer.send_notification(
|
|
22
|
+
title="Training Complete",
|
|
23
|
+
preview="Loss: 0.001",
|
|
24
|
+
content="Training finished after 1000 steps. Final loss: 0.001",
|
|
25
|
+
)
|
|
26
|
+
writer.close()
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from notiboard.config import NotiConfig
|
|
30
|
+
from notiboard.writer import NotiWriter
|
|
31
|
+
|
|
32
|
+
__all__ = ["NotiWriter", "NotiConfig"]
|
|
33
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTTP client for communicating with the Noti server.
|
|
3
|
+
|
|
4
|
+
Runs a background daemon thread with an internal queue.
|
|
5
|
+
All public enqueue_* methods are thread-safe and non-blocking from the caller.
|
|
6
|
+
|
|
7
|
+
Reliability guarantees:
|
|
8
|
+
- Failed batches are retried with exponential backoff (up to max_retries).
|
|
9
|
+
- If a batch ultimately fails, it is dropped and a warning is logged.
|
|
10
|
+
(Persistent disk-based queue is out of scope for v0.1.)
|
|
11
|
+
- close() / flush_and_close() waits up to close_timeout for in-flight uploads.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
import queue
|
|
18
|
+
import threading
|
|
19
|
+
import time
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import httpx
|
|
23
|
+
|
|
24
|
+
from notiboard.config import NotiConfig
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("noti")
|
|
27
|
+
|
|
28
|
+
# Sentinel to tell the background thread to exit after draining the queue.
|
|
29
|
+
_STOP = object()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class NotiClient:
|
|
33
|
+
"""
|
|
34
|
+
Manages HTTP communication with the Noti server.
|
|
35
|
+
|
|
36
|
+
Architecture:
|
|
37
|
+
caller → enqueue_* → _queue → background thread → _flush() → HTTP POST
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, config: NotiConfig, run_id: str) -> None:
|
|
41
|
+
self._config = config
|
|
42
|
+
self._run_id = run_id
|
|
43
|
+
|
|
44
|
+
# Items are dicts with a 'type' key; _STOP is used as a stop sentinel.
|
|
45
|
+
self._queue: queue.Queue[Any] = queue.Queue()
|
|
46
|
+
|
|
47
|
+
self._http = httpx.Client(
|
|
48
|
+
base_url=config.server_url,
|
|
49
|
+
headers={"X-Noti-Key": config.api_key or ""},
|
|
50
|
+
timeout=config.request_timeout,
|
|
51
|
+
)
|
|
52
|
+
self._thread = threading.Thread(
|
|
53
|
+
target=self._flush_loop, daemon=True, name="noti-uploader"
|
|
54
|
+
)
|
|
55
|
+
self._thread.start()
|
|
56
|
+
|
|
57
|
+
# ─── Public API ──────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
def enqueue_scalar(
|
|
60
|
+
self, tag: str, step: int, value: float, wall_time: float | None = None
|
|
61
|
+
) -> None:
|
|
62
|
+
self._queue.put_nowait(
|
|
63
|
+
{"type": "scalar", "tag": tag, "step": step, "value": value, "wall_time": wall_time}
|
|
64
|
+
)
|
|
65
|
+
self._maybe_flush()
|
|
66
|
+
|
|
67
|
+
def enqueue_histogram(
|
|
68
|
+
self, tag: str, step: int, histogram_data: dict[str, Any]
|
|
69
|
+
) -> None:
|
|
70
|
+
self._queue.put_nowait(
|
|
71
|
+
{"type": "histogram", "tag": tag, "step": step, **histogram_data}
|
|
72
|
+
)
|
|
73
|
+
self._maybe_flush()
|
|
74
|
+
|
|
75
|
+
def enqueue_text(
|
|
76
|
+
self, tag: str, step: int, content: str, wall_time: float | None = None
|
|
77
|
+
) -> None:
|
|
78
|
+
self._queue.put_nowait(
|
|
79
|
+
{"type": "text", "tag": tag, "step": step, "content": content, "wall_time": wall_time}
|
|
80
|
+
)
|
|
81
|
+
self._maybe_flush()
|
|
82
|
+
|
|
83
|
+
def send_progress(
|
|
84
|
+
self, current_step: int, total_steps: int, message: str | None = None
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Synchronously update the training progress bar in the Noti app."""
|
|
87
|
+
try:
|
|
88
|
+
resp = self._http.put(
|
|
89
|
+
f"/api/v1/progress/{self._run_id}",
|
|
90
|
+
json={
|
|
91
|
+
"current_step": current_step,
|
|
92
|
+
"total_steps": total_steps,
|
|
93
|
+
"message": message,
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
resp.raise_for_status()
|
|
97
|
+
except Exception as exc:
|
|
98
|
+
logger.warning("Noti: failed to send progress update: %s", exc)
|
|
99
|
+
|
|
100
|
+
def send_notification(
|
|
101
|
+
self,
|
|
102
|
+
title: str,
|
|
103
|
+
preview: str,
|
|
104
|
+
content: str,
|
|
105
|
+
notification_type: str = "info",
|
|
106
|
+
) -> None:
|
|
107
|
+
"""Synchronously send a push notification and store it in the inbox."""
|
|
108
|
+
try:
|
|
109
|
+
resp = self._http.post(
|
|
110
|
+
"/api/v1/notifications",
|
|
111
|
+
json={
|
|
112
|
+
"title": title,
|
|
113
|
+
"preview": preview,
|
|
114
|
+
"content": content,
|
|
115
|
+
"run_id": self._run_id,
|
|
116
|
+
"notification_type": notification_type,
|
|
117
|
+
},
|
|
118
|
+
)
|
|
119
|
+
resp.raise_for_status()
|
|
120
|
+
except Exception as exc:
|
|
121
|
+
logger.warning("Noti: failed to send notification: %s", exc)
|
|
122
|
+
|
|
123
|
+
def send_error(self, title: str, preview: str | None = None, content: str | None = None) -> None:
|
|
124
|
+
"""Send an error/exception notification (red ❌ indicator in the inbox)."""
|
|
125
|
+
self.send_notification(
|
|
126
|
+
title=title,
|
|
127
|
+
preview=preview or title,
|
|
128
|
+
content=content or title,
|
|
129
|
+
notification_type="error",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def send_alert(self, title: str, preview: str | None = None, content: str | None = None) -> None:
|
|
133
|
+
"""Send a milestone/alert notification (yellow ⚡ indicator in the inbox)."""
|
|
134
|
+
self.send_notification(
|
|
135
|
+
title=title,
|
|
136
|
+
preview=preview or title,
|
|
137
|
+
content=content or title,
|
|
138
|
+
notification_type="alert",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def upload_image(
|
|
142
|
+
self,
|
|
143
|
+
tag: str,
|
|
144
|
+
step: int,
|
|
145
|
+
image_bytes: bytes,
|
|
146
|
+
*,
|
|
147
|
+
width: int,
|
|
148
|
+
height: int,
|
|
149
|
+
fmt: str = "PNG",
|
|
150
|
+
wall_time: float | None = None,
|
|
151
|
+
) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Upload a single image to the Noti server (synchronous, fires immediately).
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
tag: Metric tag name.
|
|
157
|
+
step: Training step.
|
|
158
|
+
image_bytes: Raw image bytes (PNG or JPEG).
|
|
159
|
+
width: Image width in pixels.
|
|
160
|
+
height: Image height in pixels.
|
|
161
|
+
fmt: Image format string, e.g. "PNG" or "JPEG".
|
|
162
|
+
wall_time: Optional epoch timestamp.
|
|
163
|
+
"""
|
|
164
|
+
ext = fmt.lower()
|
|
165
|
+
content_type = f"image/{'jpeg' if ext in ('jpg', 'jpeg') else ext}"
|
|
166
|
+
try:
|
|
167
|
+
resp = self._http.post(
|
|
168
|
+
"/api/v1/metrics/images/upload",
|
|
169
|
+
data={
|
|
170
|
+
"run_id": self._run_id,
|
|
171
|
+
"tag": tag,
|
|
172
|
+
"step": step,
|
|
173
|
+
"wall_time": wall_time,
|
|
174
|
+
"width": width,
|
|
175
|
+
"height": height,
|
|
176
|
+
"format": fmt.upper(),
|
|
177
|
+
},
|
|
178
|
+
files={"file": (f"{step}.{ext}", image_bytes, content_type)},
|
|
179
|
+
)
|
|
180
|
+
if resp.status_code == 401:
|
|
181
|
+
logger.warning("Noti: image upload rejected (401). Check API key.")
|
|
182
|
+
return
|
|
183
|
+
resp.raise_for_status()
|
|
184
|
+
except Exception as exc:
|
|
185
|
+
logger.warning("Noti: failed to upload image '%s' step %d: %s", tag, step, exc)
|
|
186
|
+
|
|
187
|
+
def mark_run_status(self, status: str) -> None:
|
|
188
|
+
"""
|
|
189
|
+
Update the run status on the server.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
status: One of 'running', 'completed', 'failed', 'stopped'.
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
resp = self._http.patch(
|
|
196
|
+
f"/api/v1/runs/{self._run_id}/status",
|
|
197
|
+
json={"status": status},
|
|
198
|
+
)
|
|
199
|
+
resp.raise_for_status()
|
|
200
|
+
logger.info("Noti: run %s marked as %s", self._run_id, status)
|
|
201
|
+
except Exception as exc:
|
|
202
|
+
logger.warning("Noti: failed to mark run status '%s': %s", status, exc)
|
|
203
|
+
|
|
204
|
+
def flush_and_close(self, complete_on_close: bool = True) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Flush all pending metrics, optionally mark the run as completed, then close.
|
|
207
|
+
|
|
208
|
+
Blocks up to config.close_timeout seconds for in-flight uploads to finish.
|
|
209
|
+
"""
|
|
210
|
+
# Enqueue sentinel so the background thread exits after draining
|
|
211
|
+
self._queue.put(_STOP)
|
|
212
|
+
self._thread.join(timeout=self._config.close_timeout)
|
|
213
|
+
if self._thread.is_alive():
|
|
214
|
+
logger.warning(
|
|
215
|
+
"Noti: background upload thread did not finish within %ss. "
|
|
216
|
+
"Some metrics may have been dropped.",
|
|
217
|
+
self._config.close_timeout,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
if complete_on_close:
|
|
221
|
+
self.mark_run_status("completed")
|
|
222
|
+
|
|
223
|
+
self._http.close()
|
|
224
|
+
|
|
225
|
+
# ─── Internal ────────────────────────────────────────────────────────────
|
|
226
|
+
|
|
227
|
+
def _maybe_flush(self) -> None:
|
|
228
|
+
"""Trigger an immediate flush if the queue has reached batch_size."""
|
|
229
|
+
if self._queue.qsize() >= self._config.batch_size:
|
|
230
|
+
# Signal flush by waking the thread (it drains on each wake)
|
|
231
|
+
self._flush_now()
|
|
232
|
+
|
|
233
|
+
def _flush_loop(self) -> None:
|
|
234
|
+
"""Background thread: flush every flush_interval seconds, or on stop."""
|
|
235
|
+
while True:
|
|
236
|
+
try:
|
|
237
|
+
# Wait up to flush_interval for something to appear in the queue
|
|
238
|
+
item = self._queue.get(timeout=self._config.flush_interval)
|
|
239
|
+
except queue.Empty:
|
|
240
|
+
# Periodic flush
|
|
241
|
+
self._flush_batch_from_queue()
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
if item is _STOP:
|
|
245
|
+
# Drain remaining items then exit
|
|
246
|
+
self._flush_batch_from_queue()
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
# Put the item back (we peeked) and flush everything
|
|
250
|
+
self._queue.put(item)
|
|
251
|
+
self._flush_batch_from_queue()
|
|
252
|
+
|
|
253
|
+
def _flush_batch_from_queue(self) -> None:
|
|
254
|
+
"""Drain all items currently in the queue and POST them as one batch."""
|
|
255
|
+
scalars: list[dict[str, Any]] = []
|
|
256
|
+
histograms: list[dict[str, Any]] = []
|
|
257
|
+
texts: list[dict[str, Any]] = []
|
|
258
|
+
|
|
259
|
+
while True:
|
|
260
|
+
try:
|
|
261
|
+
item = self._queue.get_nowait()
|
|
262
|
+
except queue.Empty:
|
|
263
|
+
break
|
|
264
|
+
if item is _STOP:
|
|
265
|
+
# Re-enqueue the stop sentinel so the caller's join() still works
|
|
266
|
+
self._queue.put(_STOP)
|
|
267
|
+
break
|
|
268
|
+
item_type = item.pop("type", None)
|
|
269
|
+
if item_type == "scalar":
|
|
270
|
+
scalars.append(item)
|
|
271
|
+
elif item_type == "histogram":
|
|
272
|
+
histograms.append(item)
|
|
273
|
+
elif item_type == "text":
|
|
274
|
+
texts.append(item)
|
|
275
|
+
|
|
276
|
+
if not scalars and not histograms and not texts:
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
payload = {
|
|
280
|
+
"run_id": self._run_id,
|
|
281
|
+
"scalars": scalars,
|
|
282
|
+
"histograms": histograms,
|
|
283
|
+
"texts": texts,
|
|
284
|
+
}
|
|
285
|
+
self._post_with_retry("/api/v1/metrics/batch", payload, scalars + histograms + texts)
|
|
286
|
+
|
|
287
|
+
def _post_with_retry(
|
|
288
|
+
self, path: str, payload: dict[str, Any], original_items: list[dict[str, Any]]
|
|
289
|
+
) -> None:
|
|
290
|
+
"""POST with exponential backoff. Re-enqueues items if all retries fail."""
|
|
291
|
+
for attempt in range(self._config.max_retries):
|
|
292
|
+
try:
|
|
293
|
+
resp = self._http.post(path, json=payload)
|
|
294
|
+
if resp.status_code == 401:
|
|
295
|
+
logger.warning(
|
|
296
|
+
"Noti: authentication failed (401). Check your API key. Metrics dropped."
|
|
297
|
+
)
|
|
298
|
+
return
|
|
299
|
+
resp.raise_for_status()
|
|
300
|
+
return
|
|
301
|
+
except httpx.HTTPStatusError as exc:
|
|
302
|
+
if exc.response.status_code < 500:
|
|
303
|
+
# 4xx (other than 401) → don't retry
|
|
304
|
+
logger.warning(
|
|
305
|
+
"Noti: server rejected batch (HTTP %d). Metrics dropped.",
|
|
306
|
+
exc.response.status_code,
|
|
307
|
+
)
|
|
308
|
+
return
|
|
309
|
+
wait = min(2**attempt, 30)
|
|
310
|
+
logger.debug(
|
|
311
|
+
"Noti: upload attempt %d/%d failed (HTTP %d), retrying in %ds",
|
|
312
|
+
attempt + 1, self._config.max_retries, exc.response.status_code, wait,
|
|
313
|
+
)
|
|
314
|
+
except Exception as exc:
|
|
315
|
+
wait = min(2**attempt, 30)
|
|
316
|
+
logger.debug(
|
|
317
|
+
"Noti: upload attempt %d/%d failed (%s), retrying in %ds",
|
|
318
|
+
attempt + 1, self._config.max_retries, exc, wait,
|
|
319
|
+
)
|
|
320
|
+
if attempt < self._config.max_retries - 1:
|
|
321
|
+
time.sleep(wait)
|
|
322
|
+
|
|
323
|
+
total = len(original_items)
|
|
324
|
+
logger.warning(
|
|
325
|
+
"Noti: failed to upload %d item(s) after %d attempts. Data dropped.",
|
|
326
|
+
total,
|
|
327
|
+
self._config.max_retries,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Compatibility alias for callers that call _flush_now directly
|
|
331
|
+
def _flush_now(self) -> None:
|
|
332
|
+
pass # Flush is handled lazily by the background thread on next cycle
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Configuration management for the Noti SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
#: Default server URL — can be overridden by NOTI_SERVER env var or constructor arg.
|
|
9
|
+
DEFAULT_SERVER_URL = "https://notiapi.tech-webs.com"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class NotiConfig:
|
|
14
|
+
"""
|
|
15
|
+
Holds all configuration for a NotiWriter instance.
|
|
16
|
+
|
|
17
|
+
Priority: constructor argument > environment variable > built-in default.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
#: Noti server base URL. Defaults to NOTI_SERVER env var or the official server.
|
|
21
|
+
server_url: str = field(
|
|
22
|
+
default_factory=lambda: os.getenv("NOTI_SERVER", DEFAULT_SERVER_URL)
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
#: Your Noti API key (starts with 'nk_'). Shown once when created in the app.
|
|
26
|
+
api_key: str | None = field(default_factory=lambda: os.getenv("NOTI_API_KEY"))
|
|
27
|
+
|
|
28
|
+
#: Project name. Creates the project if it doesn't exist.
|
|
29
|
+
project: str = "default"
|
|
30
|
+
|
|
31
|
+
#: Run name. Auto-generated timestamp-based name if not provided.
|
|
32
|
+
run_name: str | None = None
|
|
33
|
+
|
|
34
|
+
#: Set False to disable Noti sync (TensorBoard still works normally).
|
|
35
|
+
enabled: bool = field(
|
|
36
|
+
default_factory=lambda: os.getenv("NOTI_ENABLED", "1") != "0"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
#: Automatically mark the run as 'completed' when writer.close() is called.
|
|
40
|
+
complete_on_close: bool = True
|
|
41
|
+
|
|
42
|
+
#: Number of metrics to buffer before triggering an immediate upload.
|
|
43
|
+
batch_size: int = 100
|
|
44
|
+
|
|
45
|
+
#: Seconds between automatic batch uploads.
|
|
46
|
+
flush_interval: float = 5.0
|
|
47
|
+
|
|
48
|
+
#: Maximum attempts for each failed upload before the batch is dropped.
|
|
49
|
+
max_retries: int = 5
|
|
50
|
+
|
|
51
|
+
#: Timeout (seconds) for HTTP requests to the Noti server.
|
|
52
|
+
request_timeout: float = 10.0
|
|
53
|
+
|
|
54
|
+
#: Maximum seconds to wait for in-flight uploads when close() is called.
|
|
55
|
+
close_timeout: float = 30.0
|
|
56
|
+
|
|
57
|
+
def validate(self) -> None:
|
|
58
|
+
"""Raise ValueError if required fields are missing when Noti is enabled."""
|
|
59
|
+
if not self.enabled:
|
|
60
|
+
return
|
|
61
|
+
if not self.api_key:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"Noti API key is required. Pass noti_api_key= or set the NOTI_API_KEY "
|
|
64
|
+
"environment variable. Create an API key in the Noti app under Settings > API Keys."
|
|
65
|
+
)
|