huddle-cluster 2.2.0__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {huddle_cluster-2.2.0/huddle_cluster.egg-info → huddle_cluster-2.3.0}/PKG-INFO +51 -7
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/README.md +50 -6
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0/huddle_cluster.egg-info}/PKG-INFO +51 -7
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster.py +2 -2
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/cli.py +76 -29
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/cluster_agent.py +9 -2
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/cluster_master.py +95 -10
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/pyproject.toml +1 -1
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_cluster_agent.py +155 -3
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_cluster_master.py +178 -6
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/LICENSE +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/MANIFEST.in +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster.egg-info/SOURCES.txt +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster.egg-info/dependency_links.txt +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster.egg-info/entry_points.txt +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster.egg-info/requires.txt +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster.egg-info/top_level.txt +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/__init__.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/backends_redis.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/core.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/discovery_k8s.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/huddle_cluster_pkg/grpc_cluster.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/py.typed +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/setup.cfg +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_admin_api.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_alerting.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_canary.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_dashboard.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_draining.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_fairness.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_grpc_cluster.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_health_checker.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_histogram.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_integration.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_k8s_discovery.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_persistent_state.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_redis_backend.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_retry.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_rotation.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_sticky_sessions.py +0 -0
- {huddle_cluster-2.2.0 → huddle_cluster-2.3.0}/tests/test_stress.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: huddle-cluster
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: A penguin-inspired self-organizing server load balancer with adaptive thermal eviction — now with master/agent cluster management
|
|
5
5
|
Author-email: Rahad Bhuiya <rahadbhuiya2021@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -83,7 +83,7 @@ Dynamic: license-file
|
|
|
83
83
|
A penguin-inspired, self-organizing server load balancer with adaptive thermal eviction.
|
|
84
84
|
|
|
85
85
|
**Author:** Rahad Bhuiya <br>
|
|
86
|
-
**Version:** 2.
|
|
86
|
+
**Version:** 2.3.0 <br>
|
|
87
87
|
**License:** MIT <br>
|
|
88
88
|
**Paper:** [HuddleCluster: A Penguin-Inspired Self-Organizing Load Balancer with Adaptive Thermal Eviction](https://github.com/rahadbhuiya/HuddleCluster/blob/main/docs/HuddleCluster.pdf)
|
|
89
89
|
|
|
@@ -524,6 +524,50 @@ agent.start()
|
|
|
524
524
|
| `POST` | `/v1/nodes/{id}/heartbeat` | Heartbeat + metrics |
|
|
525
525
|
| `DELETE` | `/v1/nodes/{id}` | Graceful departure |
|
|
526
526
|
|
|
527
|
+
### Authentication & RBAC (Level 2)
|
|
528
|
+
|
|
529
|
+
By default the master's API is open — no credentials needed, same as
|
|
530
|
+
before this existed. To lock it down, pass `api_keys`:
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
from huddle_cluster_pkg import MasterNode
|
|
534
|
+
|
|
535
|
+
master = MasterNode(
|
|
536
|
+
port=7070,
|
|
537
|
+
api_keys={
|
|
538
|
+
"admin-secret-key": "admin", # can join/heartbeat/leave + read
|
|
539
|
+
"dashboard-view-key": "viewer", # read-only: status/metrics/nodes
|
|
540
|
+
},
|
|
541
|
+
)
|
|
542
|
+
master.start()
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
```bash
|
|
546
|
+
huddle-cluster master start --api-key admin-secret-key=admin --api-key dashboard-view-key=viewer
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
Every request then needs `Authorization: Bearer <key>` — except
|
|
550
|
+
`GET /v1/health`, which is deliberately exempt so liveness probes don't
|
|
551
|
+
need credentials. Agents authenticate the same way:
|
|
552
|
+
|
|
553
|
+
```python
|
|
554
|
+
agent = AgentNode(
|
|
555
|
+
node_id="web-01", master_url="http://master:7070", port=8080,
|
|
556
|
+
api_key="admin-secret-key", # agents need an admin-role key
|
|
557
|
+
)
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
```bash
|
|
561
|
+
huddle-cluster agent start --id web-01 --master http://master:7070 --port 8080 --api-key admin-secret-key
|
|
562
|
+
```
|
|
563
|
+
|
|
564
|
+
The CLI's read commands (`nodes list`, `nodes status`, `cluster status`,
|
|
565
|
+
`cluster metrics`) accept `--api-key` too. A missing/invalid key gets
|
|
566
|
+
`401`; a valid key without enough permission (e.g. a viewer trying to
|
|
567
|
+
join) gets `403` — both logged on the master side for visibility. An
|
|
568
|
+
unrecognized role string is treated as no access rather than full access,
|
|
569
|
+
so a typo in configuration fails closed.
|
|
570
|
+
|
|
527
571
|
### Auto Recovery (Level 2)
|
|
528
572
|
|
|
529
573
|
A node that dies and recovers repeatedly within a short window is not
|
|
@@ -640,7 +684,7 @@ HuddleCluster/
|
|
|
640
684
|
| | |-- nginx_lc.conf # NGINX least-connections config
|
|
641
685
|
| |-- run_http_benchmark.bat # Windows one-click runner
|
|
642
686
|
|
|
|
643
|
-
|-- tests/ #
|
|
687
|
+
|-- tests/ # 533 tests across 19 modules
|
|
644
688
|
| |-- test_rotation.py # Rotation, eviction, feedback loop
|
|
645
689
|
| |-- test_fairness.py # Fairness and Gini tests
|
|
646
690
|
| |-- test_stress.py # Concurrent load tests
|
|
@@ -658,8 +702,8 @@ HuddleCluster/
|
|
|
658
702
|
| |-- test_redis_backend.py # Redis backend tests (uses fakeredis mock)
|
|
659
703
|
| |-- test_grpc_cluster.py # gRPC cluster tests (uses grpc mock)
|
|
660
704
|
| |-- test_k8s_discovery.py # K8s discovery tests (uses k8s mock)
|
|
661
|
-
| |-- test_cluster_master.py # MasterNode tests —
|
|
662
|
-
| |-- test_cluster_agent.py # AgentNode tests —
|
|
705
|
+
| |-- test_cluster_master.py # MasterNode tests — 74 tests (v2.0.0)
|
|
706
|
+
| |-- test_cluster_agent.py # AgentNode tests — 32 tests (v2.0.0)
|
|
663
707
|
| |-- conftest.py # Shared fixtures
|
|
664
708
|
|
|
|
665
709
|
|-- examples/
|
|
@@ -893,10 +937,10 @@ Setup PyPI Trusted Publishing:
|
|
|
893
937
|
- [x] Node list with status, metrics, last-seen-ago — v2.0.0
|
|
894
938
|
- [x] `huddle-cluster` CLI (master start, agent start, nodes list, cluster status) — v2.0.0
|
|
895
939
|
|
|
896
|
-
**Level 2 — Production Ready (in progress,
|
|
940
|
+
**Level 2 — Production Ready (in progress, 4/6)**
|
|
897
941
|
- [x] Auto recovery — flapping detection (quarantine) and stale-node purge — v2.1.0
|
|
898
942
|
- [ ] Web dashboard — real-time cluster topology view
|
|
899
|
-
- [
|
|
943
|
+
- [x] RBAC / authentication — API key with admin/viewer roles — v2.3.0
|
|
900
944
|
- [x] Monitoring — `unhealthy_alive_ratio` threshold + on_cluster_unhealthy/recovered — v2.2.0
|
|
901
945
|
- [x] Metrics — `GET /v1/metrics` Prometheus exposition, per-node + cluster-wide — v2.2.0
|
|
902
946
|
- [ ] REST API — full OpenAPI spec with versioning
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
A penguin-inspired, self-organizing server load balancer with adaptive thermal eviction.
|
|
10
10
|
|
|
11
11
|
**Author:** Rahad Bhuiya <br>
|
|
12
|
-
**Version:** 2.
|
|
12
|
+
**Version:** 2.3.0 <br>
|
|
13
13
|
**License:** MIT <br>
|
|
14
14
|
**Paper:** [HuddleCluster: A Penguin-Inspired Self-Organizing Load Balancer with Adaptive Thermal Eviction](https://github.com/rahadbhuiya/HuddleCluster/blob/main/docs/HuddleCluster.pdf)
|
|
15
15
|
|
|
@@ -450,6 +450,50 @@ agent.start()
|
|
|
450
450
|
| `POST` | `/v1/nodes/{id}/heartbeat` | Heartbeat + metrics |
|
|
451
451
|
| `DELETE` | `/v1/nodes/{id}` | Graceful departure |
|
|
452
452
|
|
|
453
|
+
### Authentication & RBAC (Level 2)
|
|
454
|
+
|
|
455
|
+
By default the master's API is open — no credentials needed, same as
|
|
456
|
+
before this existed. To lock it down, pass `api_keys`:
|
|
457
|
+
|
|
458
|
+
```python
|
|
459
|
+
from huddle_cluster_pkg import MasterNode
|
|
460
|
+
|
|
461
|
+
master = MasterNode(
|
|
462
|
+
port=7070,
|
|
463
|
+
api_keys={
|
|
464
|
+
"admin-secret-key": "admin", # can join/heartbeat/leave + read
|
|
465
|
+
"dashboard-view-key": "viewer", # read-only: status/metrics/nodes
|
|
466
|
+
},
|
|
467
|
+
)
|
|
468
|
+
master.start()
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
```bash
|
|
472
|
+
huddle-cluster master start --api-key admin-secret-key=admin --api-key dashboard-view-key=viewer
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
Every request then needs `Authorization: Bearer <key>` — except
|
|
476
|
+
`GET /v1/health`, which is deliberately exempt so liveness probes don't
|
|
477
|
+
need credentials. Agents authenticate the same way:
|
|
478
|
+
|
|
479
|
+
```python
|
|
480
|
+
agent = AgentNode(
|
|
481
|
+
node_id="web-01", master_url="http://master:7070", port=8080,
|
|
482
|
+
api_key="admin-secret-key", # agents need an admin-role key
|
|
483
|
+
)
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
```bash
|
|
487
|
+
huddle-cluster agent start --id web-01 --master http://master:7070 --port 8080 --api-key admin-secret-key
|
|
488
|
+
```
|
|
489
|
+
|
|
490
|
+
The CLI's read commands (`nodes list`, `nodes status`, `cluster status`,
|
|
491
|
+
`cluster metrics`) accept `--api-key` too. A missing/invalid key gets
|
|
492
|
+
`401`; a valid key without enough permission (e.g. a viewer trying to
|
|
493
|
+
join) gets `403` — both logged on the master side for visibility. An
|
|
494
|
+
unrecognized role string is treated as no access rather than full access,
|
|
495
|
+
so a typo in configuration fails closed.
|
|
496
|
+
|
|
453
497
|
### Auto Recovery (Level 2)
|
|
454
498
|
|
|
455
499
|
A node that dies and recovers repeatedly within a short window is not
|
|
@@ -566,7 +610,7 @@ HuddleCluster/
|
|
|
566
610
|
| | |-- nginx_lc.conf # NGINX least-connections config
|
|
567
611
|
| |-- run_http_benchmark.bat # Windows one-click runner
|
|
568
612
|
|
|
|
569
|
-
|-- tests/ #
|
|
613
|
+
|-- tests/ # 533 tests across 19 modules
|
|
570
614
|
| |-- test_rotation.py # Rotation, eviction, feedback loop
|
|
571
615
|
| |-- test_fairness.py # Fairness and Gini tests
|
|
572
616
|
| |-- test_stress.py # Concurrent load tests
|
|
@@ -584,8 +628,8 @@ HuddleCluster/
|
|
|
584
628
|
| |-- test_redis_backend.py # Redis backend tests (uses fakeredis mock)
|
|
585
629
|
| |-- test_grpc_cluster.py # gRPC cluster tests (uses grpc mock)
|
|
586
630
|
| |-- test_k8s_discovery.py # K8s discovery tests (uses k8s mock)
|
|
587
|
-
| |-- test_cluster_master.py # MasterNode tests —
|
|
588
|
-
| |-- test_cluster_agent.py # AgentNode tests —
|
|
631
|
+
| |-- test_cluster_master.py # MasterNode tests — 74 tests (v2.0.0)
|
|
632
|
+
| |-- test_cluster_agent.py # AgentNode tests — 32 tests (v2.0.0)
|
|
589
633
|
| |-- conftest.py # Shared fixtures
|
|
590
634
|
|
|
|
591
635
|
|-- examples/
|
|
@@ -819,10 +863,10 @@ Setup PyPI Trusted Publishing:
|
|
|
819
863
|
- [x] Node list with status, metrics, last-seen-ago — v2.0.0
|
|
820
864
|
- [x] `huddle-cluster` CLI (master start, agent start, nodes list, cluster status) — v2.0.0
|
|
821
865
|
|
|
822
|
-
**Level 2 — Production Ready (in progress,
|
|
866
|
+
**Level 2 — Production Ready (in progress, 4/6)**
|
|
823
867
|
- [x] Auto recovery — flapping detection (quarantine) and stale-node purge — v2.1.0
|
|
824
868
|
- [ ] Web dashboard — real-time cluster topology view
|
|
825
|
-
- [
|
|
869
|
+
- [x] RBAC / authentication — API key with admin/viewer roles — v2.3.0
|
|
826
870
|
- [x] Monitoring — `unhealthy_alive_ratio` threshold + on_cluster_unhealthy/recovered — v2.2.0
|
|
827
871
|
- [x] Metrics — `GET /v1/metrics` Prometheus exposition, per-node + cluster-wide — v2.2.0
|
|
828
872
|
- [ ] REST API — full OpenAPI spec with versioning
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: huddle-cluster
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: A penguin-inspired self-organizing server load balancer with adaptive thermal eviction — now with master/agent cluster management
|
|
5
5
|
Author-email: Rahad Bhuiya <rahadbhuiya2021@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -83,7 +83,7 @@ Dynamic: license-file
|
|
|
83
83
|
A penguin-inspired, self-organizing server load balancer with adaptive thermal eviction.
|
|
84
84
|
|
|
85
85
|
**Author:** Rahad Bhuiya <br>
|
|
86
|
-
**Version:** 2.
|
|
86
|
+
**Version:** 2.3.0 <br>
|
|
87
87
|
**License:** MIT <br>
|
|
88
88
|
**Paper:** [HuddleCluster: A Penguin-Inspired Self-Organizing Load Balancer with Adaptive Thermal Eviction](https://github.com/rahadbhuiya/HuddleCluster/blob/main/docs/HuddleCluster.pdf)
|
|
89
89
|
|
|
@@ -524,6 +524,50 @@ agent.start()
|
|
|
524
524
|
| `POST` | `/v1/nodes/{id}/heartbeat` | Heartbeat + metrics |
|
|
525
525
|
| `DELETE` | `/v1/nodes/{id}` | Graceful departure |
|
|
526
526
|
|
|
527
|
+
### Authentication & RBAC (Level 2)
|
|
528
|
+
|
|
529
|
+
By default the master's API is open — no credentials needed, same as
|
|
530
|
+
before this existed. To lock it down, pass `api_keys`:
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
from huddle_cluster_pkg import MasterNode
|
|
534
|
+
|
|
535
|
+
master = MasterNode(
|
|
536
|
+
port=7070,
|
|
537
|
+
api_keys={
|
|
538
|
+
"admin-secret-key": "admin", # can join/heartbeat/leave + read
|
|
539
|
+
"dashboard-view-key": "viewer", # read-only: status/metrics/nodes
|
|
540
|
+
},
|
|
541
|
+
)
|
|
542
|
+
master.start()
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
```bash
|
|
546
|
+
huddle-cluster master start --api-key admin-secret-key=admin --api-key dashboard-view-key=viewer
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
Every request then needs `Authorization: Bearer <key>` — except
|
|
550
|
+
`GET /v1/health`, which is deliberately exempt so liveness probes don't
|
|
551
|
+
need credentials. Agents authenticate the same way:
|
|
552
|
+
|
|
553
|
+
```python
|
|
554
|
+
agent = AgentNode(
|
|
555
|
+
node_id="web-01", master_url="http://master:7070", port=8080,
|
|
556
|
+
api_key="admin-secret-key", # agents need an admin-role key
|
|
557
|
+
)
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
```bash
|
|
561
|
+
huddle-cluster agent start --id web-01 --master http://master:7070 --port 8080 --api-key admin-secret-key
|
|
562
|
+
```
|
|
563
|
+
|
|
564
|
+
The CLI's read commands (`nodes list`, `nodes status`, `cluster status`,
|
|
565
|
+
`cluster metrics`) accept `--api-key` too. A missing/invalid key gets
|
|
566
|
+
`401`; a valid key without enough permission (e.g. a viewer trying to
|
|
567
|
+
join) gets `403` — both logged on the master side for visibility. An
|
|
568
|
+
unrecognized role string is treated as no access rather than full access,
|
|
569
|
+
so a typo in configuration fails closed.
|
|
570
|
+
|
|
527
571
|
### Auto Recovery (Level 2)
|
|
528
572
|
|
|
529
573
|
A node that dies and recovers repeatedly within a short window is not
|
|
@@ -640,7 +684,7 @@ HuddleCluster/
|
|
|
640
684
|
| | |-- nginx_lc.conf # NGINX least-connections config
|
|
641
685
|
| |-- run_http_benchmark.bat # Windows one-click runner
|
|
642
686
|
|
|
|
643
|
-
|-- tests/ #
|
|
687
|
+
|-- tests/ # 533 tests across 19 modules
|
|
644
688
|
| |-- test_rotation.py # Rotation, eviction, feedback loop
|
|
645
689
|
| |-- test_fairness.py # Fairness and Gini tests
|
|
646
690
|
| |-- test_stress.py # Concurrent load tests
|
|
@@ -658,8 +702,8 @@ HuddleCluster/
|
|
|
658
702
|
| |-- test_redis_backend.py # Redis backend tests (uses fakeredis mock)
|
|
659
703
|
| |-- test_grpc_cluster.py # gRPC cluster tests (uses grpc mock)
|
|
660
704
|
| |-- test_k8s_discovery.py # K8s discovery tests (uses k8s mock)
|
|
661
|
-
| |-- test_cluster_master.py # MasterNode tests —
|
|
662
|
-
| |-- test_cluster_agent.py # AgentNode tests —
|
|
705
|
+
| |-- test_cluster_master.py # MasterNode tests — 74 tests (v2.0.0)
|
|
706
|
+
| |-- test_cluster_agent.py # AgentNode tests — 32 tests (v2.0.0)
|
|
663
707
|
| |-- conftest.py # Shared fixtures
|
|
664
708
|
|
|
|
665
709
|
|-- examples/
|
|
@@ -893,10 +937,10 @@ Setup PyPI Trusted Publishing:
|
|
|
893
937
|
- [x] Node list with status, metrics, last-seen-ago — v2.0.0
|
|
894
938
|
- [x] `huddle-cluster` CLI (master start, agent start, nodes list, cluster status) — v2.0.0
|
|
895
939
|
|
|
896
|
-
**Level 2 — Production Ready (in progress,
|
|
940
|
+
**Level 2 — Production Ready (in progress, 4/6)**
|
|
897
941
|
- [x] Auto recovery — flapping detection (quarantine) and stale-node purge — v2.1.0
|
|
898
942
|
- [ ] Web dashboard — real-time cluster topology view
|
|
899
|
-
- [
|
|
943
|
+
- [x] RBAC / authentication — API key with admin/viewer roles — v2.3.0
|
|
900
944
|
- [x] Monitoring — `unhealthy_alive_ratio` threshold + on_cluster_unhealthy/recovered — v2.2.0
|
|
901
945
|
- [x] Metrics — `GET /v1/metrics` Prometheus exposition, per-node + cluster-wide — v2.2.0
|
|
902
946
|
- [ ] REST API — full OpenAPI spec with versioning
|
|
@@ -14,7 +14,7 @@ Rotation Rules:
|
|
|
14
14
|
- No central coordinator needed — threshold-driven, self-organizing
|
|
15
15
|
|
|
16
16
|
Author : Rahad Bhuiya (inspired by Penguin Biology)
|
|
17
|
-
Version: 2.
|
|
17
|
+
Version: 2.3.0
|
|
18
18
|
License: MIT
|
|
19
19
|
|
|
20
20
|
Changelog v1.4.0
|
|
@@ -125,7 +125,7 @@ from enum import Enum
|
|
|
125
125
|
from typing import Any, Callable, Generator, Optional
|
|
126
126
|
|
|
127
127
|
# Version
|
|
128
|
-
__version__ = "2.
|
|
128
|
+
__version__ = "2.3.0"
|
|
129
129
|
__author__ = "Rahad Bhuiya"
|
|
130
130
|
__license__ = "MIT"
|
|
131
131
|
|
|
@@ -9,20 +9,21 @@ Commands
|
|
|
9
9
|
huddle-cluster master start [--host HOST] [--port PORT] [--timeout SEC]
|
|
10
10
|
[--flap-window SEC] [--flap-threshold N]
|
|
11
11
|
[--quarantine-recovery N] [--purge-after SEC]
|
|
12
|
+
[--api-key KEY=ROLE ...]
|
|
12
13
|
|
|
13
14
|
huddle-cluster agent start --id ID --master URL --port PORT
|
|
14
15
|
[--address IP] [--interval SEC]
|
|
15
|
-
[--retry N] [--meta key=val ...]
|
|
16
|
+
[--retry N] [--meta key=val ...] [--api-key KEY]
|
|
16
17
|
|
|
17
|
-
huddle-cluster nodes list [--master URL]
|
|
18
|
-
huddle-cluster nodes status NODE_ID [--master URL]
|
|
18
|
+
huddle-cluster nodes list [--master URL] [--api-key KEY]
|
|
19
|
+
huddle-cluster nodes status NODE_ID [--master URL] [--api-key KEY]
|
|
19
20
|
|
|
20
|
-
huddle-cluster cluster status [--master URL]
|
|
21
|
+
huddle-cluster cluster status [--master URL] [--api-key KEY]
|
|
21
22
|
huddle-cluster cluster health [--master URL]
|
|
22
|
-
huddle-cluster cluster metrics [--master URL]
|
|
23
|
+
huddle-cluster cluster metrics [--master URL] [--api-key KEY]
|
|
23
24
|
|
|
24
25
|
Author : Rahad Bhuiya
|
|
25
|
-
Version: 2.
|
|
26
|
+
Version: 2.3.0
|
|
26
27
|
License: MIT
|
|
27
28
|
"""
|
|
28
29
|
|
|
@@ -44,34 +45,53 @@ _DEFAULT_MASTER = "http://localhost:7070"
|
|
|
44
45
|
# HTTP helpers
|
|
45
46
|
|
|
46
47
|
|
|
47
|
-
def
|
|
48
|
+
def _build_get_request(master_url: str, path: str, api_key: Optional[str]) -> urllib.request.Request:
|
|
48
49
|
url = f"{master_url.rstrip('/')}{path}"
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
req = urllib.request.Request(url)
|
|
51
|
+
if api_key:
|
|
52
|
+
req.add_header("Authorization", f"Bearer {api_key}")
|
|
53
|
+
return req
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _report_fetch_error(master_url: str, exc: Exception) -> None:
|
|
57
|
+
if isinstance(exc, urllib.error.HTTPError):
|
|
58
|
+
if exc.code == 401:
|
|
59
|
+
print("\n[error] Authentication required — pass --api-key, "
|
|
60
|
+
"or the key was rejected")
|
|
61
|
+
elif exc.code == 403:
|
|
62
|
+
print("\n[error] This API key doesn't have permission for this "
|
|
63
|
+
"request (viewer keys can't modify the cluster)")
|
|
64
|
+
else:
|
|
65
|
+
try:
|
|
66
|
+
body = json.loads(exc.read())
|
|
67
|
+
print(f"\n[error] {body.get('error', exc.reason)}")
|
|
68
|
+
except Exception:
|
|
69
|
+
print(f"\n[error] HTTP {exc.code}: {exc.reason}")
|
|
70
|
+
elif isinstance(exc, urllib.error.URLError):
|
|
53
71
|
print(f"\n[error] Cannot reach master at {master_url}")
|
|
54
72
|
print(f" {exc.reason}")
|
|
55
73
|
print(" Is the master running? huddle-cluster master start")
|
|
56
|
-
|
|
57
|
-
except Exception as exc:
|
|
74
|
+
else:
|
|
58
75
|
print(f"\n[error] {exc}", file=sys.stderr)
|
|
59
|
-
|
|
76
|
+
sys.exit(1)
|
|
60
77
|
|
|
61
78
|
|
|
62
|
-
def
|
|
63
|
-
|
|
79
|
+
def _get(master_url: str, path: str, api_key: Optional[str] = None) -> Dict[str, Any]:
|
|
80
|
+
req = _build_get_request(master_url, path, api_key)
|
|
64
81
|
try:
|
|
65
|
-
with urllib.request.urlopen(
|
|
82
|
+
with urllib.request.urlopen(req, timeout=5.0) as resp:
|
|
83
|
+
return json.loads(resp.read())
|
|
84
|
+
except Exception as exc:
|
|
85
|
+
_report_fetch_error(master_url, exc)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _get_text(master_url: str, path: str, api_key: Optional[str] = None) -> str:
|
|
89
|
+
req = _build_get_request(master_url, path, api_key)
|
|
90
|
+
try:
|
|
91
|
+
with urllib.request.urlopen(req, timeout=5.0) as resp:
|
|
66
92
|
return resp.read().decode()
|
|
67
|
-
except urllib.error.URLError as exc:
|
|
68
|
-
print(f"\n[error] Cannot reach master at {master_url}")
|
|
69
|
-
print(f" {exc.reason}")
|
|
70
|
-
print(" Is the master running? huddle-cluster master start")
|
|
71
|
-
sys.exit(1)
|
|
72
93
|
except Exception as exc:
|
|
73
|
-
|
|
74
|
-
sys.exit(1)
|
|
94
|
+
_report_fetch_error(master_url, exc)
|
|
75
95
|
|
|
76
96
|
|
|
77
97
|
def _print_json(data: Dict) -> None:
|
|
@@ -81,7 +101,6 @@ def _print_json(data: Dict) -> None:
|
|
|
81
101
|
|
|
82
102
|
# Command handlers
|
|
83
103
|
|
|
84
|
-
|
|
85
104
|
def cmd_master_start(args: argparse.Namespace) -> None:
|
|
86
105
|
"""Start a MasterNode (blocking until Ctrl-C)."""
|
|
87
106
|
import logging
|
|
@@ -93,6 +112,17 @@ def cmd_master_start(args: argparse.Namespace) -> None:
|
|
|
93
112
|
datefmt="%H:%M:%S",
|
|
94
113
|
)
|
|
95
114
|
|
|
115
|
+
api_keys: Optional[Dict[str, str]] = None
|
|
116
|
+
if args.api_key:
|
|
117
|
+
api_keys = {}
|
|
118
|
+
for item in args.api_key:
|
|
119
|
+
if "=" not in item:
|
|
120
|
+
print(f"[warn] ignoring malformed --api-key entry: {item!r} "
|
|
121
|
+
f"(expected KEY=ROLE, e.g. --api-key secret123=admin)")
|
|
122
|
+
continue
|
|
123
|
+
k, role = item.split("=", 1)
|
|
124
|
+
api_keys[k.strip()] = role.strip()
|
|
125
|
+
|
|
96
126
|
master = MasterNode(
|
|
97
127
|
host=args.host,
|
|
98
128
|
port=args.port,
|
|
@@ -101,6 +131,7 @@ def cmd_master_start(args: argparse.Namespace) -> None:
|
|
|
101
131
|
flap_threshold=args.flap_threshold,
|
|
102
132
|
quarantine_recovery_heartbeats=args.quarantine_recovery,
|
|
103
133
|
purge_after_sec=args.purge_after,
|
|
134
|
+
api_keys=api_keys,
|
|
104
135
|
)
|
|
105
136
|
|
|
106
137
|
def on_join(node):
|
|
@@ -133,6 +164,7 @@ def cmd_master_start(args: argparse.Namespace) -> None:
|
|
|
133
164
|
f"{args.flap_window:.0f}s, recover after {args.quarantine_recovery} heartbeats")
|
|
134
165
|
if args.purge_after:
|
|
135
166
|
print(f" Purge : dead nodes removed after {args.purge_after:.0f}s")
|
|
167
|
+
print(f" Auth : {'enabled (' + str(len(api_keys)) + ' key(s))' if api_keys else 'disabled (open API)'}")
|
|
136
168
|
print(f" API prefix: http://{args.host}:{args.port}{_API_V1}/")
|
|
137
169
|
print("\n Press Ctrl-C to stop.\n")
|
|
138
170
|
|
|
@@ -171,6 +203,7 @@ def cmd_agent_start(args: argparse.Namespace) -> None:
|
|
|
171
203
|
address=args.address or None,
|
|
172
204
|
heartbeat_interval_sec=args.interval,
|
|
173
205
|
metadata=meta,
|
|
206
|
+
api_key=args.api_key,
|
|
174
207
|
)
|
|
175
208
|
agent.start(retry=args.retry)
|
|
176
209
|
|
|
@@ -192,7 +225,7 @@ def cmd_agent_start(args: argparse.Namespace) -> None:
|
|
|
192
225
|
|
|
193
226
|
|
|
194
227
|
def cmd_nodes_list(args: argparse.Namespace) -> None:
|
|
195
|
-
data = _get(args.master, f"{_API_V1}/nodes")
|
|
228
|
+
data = _get(args.master, f"{_API_V1}/nodes", args.api_key)
|
|
196
229
|
nodes = data.get("nodes", [])
|
|
197
230
|
|
|
198
231
|
if not nodes:
|
|
@@ -210,12 +243,12 @@ def cmd_nodes_list(args: argparse.Namespace) -> None:
|
|
|
210
243
|
|
|
211
244
|
|
|
212
245
|
def cmd_nodes_status(args: argparse.Namespace) -> None:
|
|
213
|
-
data = _get(args.master, f"{_API_V1}/nodes/{args.node_id}")
|
|
246
|
+
data = _get(args.master, f"{_API_V1}/nodes/{args.node_id}", args.api_key)
|
|
214
247
|
_print_json(data)
|
|
215
248
|
|
|
216
249
|
|
|
217
250
|
def cmd_cluster_status(args: argparse.Namespace) -> None:
|
|
218
|
-
data = _get(args.master, f"{_API_V1}/status")
|
|
251
|
+
data = _get(args.master, f"{_API_V1}/status", args.api_key)
|
|
219
252
|
|
|
220
253
|
print(f"\nHuddleCluster Status")
|
|
221
254
|
print(f" Master : {data.get('master')}")
|
|
@@ -238,7 +271,7 @@ def cmd_cluster_health(args: argparse.Namespace) -> None:
|
|
|
238
271
|
|
|
239
272
|
|
|
240
273
|
def cmd_cluster_metrics(args: argparse.Namespace) -> None:
|
|
241
|
-
text = _get_text(args.master, f"{_API_V1}/metrics")
|
|
274
|
+
text = _get_text(args.master, f"{_API_V1}/metrics", args.api_key)
|
|
242
275
|
print(text, end="")
|
|
243
276
|
|
|
244
277
|
|
|
@@ -277,6 +310,10 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
277
310
|
ms.add_argument("--purge-after", type=float, default=None,
|
|
278
311
|
help="Remove dead nodes from the registry after this many seconds "
|
|
279
312
|
"(default: never purge)")
|
|
313
|
+
ms.add_argument("--api-key", action="append", metavar="KEY=ROLE",
|
|
314
|
+
help="Add an API key with a role (admin or viewer); repeatable, "
|
|
315
|
+
"e.g. --api-key secret123=admin --api-key view456=viewer. "
|
|
316
|
+
"If never given, the API is open (no auth).")
|
|
280
317
|
ms.set_defaults(func=cmd_master_start)
|
|
281
318
|
|
|
282
319
|
|
|
@@ -301,6 +338,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
301
338
|
help="Join retry attempts (default: 5)")
|
|
302
339
|
ag.add_argument("--meta", nargs="*", metavar="KEY=VAL",
|
|
303
340
|
help="Metadata key=value pairs, e.g. --meta region=us-east role=lb")
|
|
341
|
+
ag.add_argument("--api-key", default=None,
|
|
342
|
+
help="API key to authenticate with the master, if it requires auth")
|
|
304
343
|
ag.set_defaults(func=cmd_agent_start)
|
|
305
344
|
|
|
306
345
|
|
|
@@ -313,12 +352,16 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
313
352
|
nl = nodes_s.add_parser("list", help="List all registered nodes")
|
|
314
353
|
nl.add_argument("--master", default=_DEFAULT_MASTER,
|
|
315
354
|
help=f"Master URL (default: {_DEFAULT_MASTER})")
|
|
355
|
+
nl.add_argument("--api-key", default=None,
|
|
356
|
+
help="API key, if the master requires auth")
|
|
316
357
|
nl.set_defaults(func=cmd_nodes_list)
|
|
317
358
|
|
|
318
359
|
ns = nodes_s.add_parser("status", help="Detailed status for one node")
|
|
319
360
|
ns.add_argument("node_id", help="Node ID to inspect")
|
|
320
361
|
ns.add_argument("--master", default=_DEFAULT_MASTER,
|
|
321
362
|
help=f"Master URL (default: {_DEFAULT_MASTER})")
|
|
363
|
+
ns.add_argument("--api-key", default=None,
|
|
364
|
+
help="API key, if the master requires auth")
|
|
322
365
|
ns.set_defaults(func=cmd_nodes_status)
|
|
323
366
|
|
|
324
367
|
|
|
@@ -331,6 +374,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
331
374
|
cs = cluster_s.add_parser("status", help="Show cluster status summary")
|
|
332
375
|
cs.add_argument("--master", default=_DEFAULT_MASTER,
|
|
333
376
|
help=f"Master URL (default: {_DEFAULT_MASTER})")
|
|
377
|
+
cs.add_argument("--api-key", default=None,
|
|
378
|
+
help="API key, if the master requires auth")
|
|
334
379
|
cs.set_defaults(func=cmd_cluster_status)
|
|
335
380
|
|
|
336
381
|
ch = cluster_s.add_parser("health", help="Quick health check (exit 1 if not ok)")
|
|
@@ -341,6 +386,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
341
386
|
cm = cluster_s.add_parser("metrics", help="Print Prometheus text exposition")
|
|
342
387
|
cm.add_argument("--master", default=_DEFAULT_MASTER,
|
|
343
388
|
help=f"Master URL (default: {_DEFAULT_MASTER})")
|
|
389
|
+
cm.add_argument("--api-key", default=None,
|
|
390
|
+
help="API key, if the master requires auth")
|
|
344
391
|
cm.set_defaults(func=cmd_cluster_metrics)
|
|
345
392
|
|
|
346
393
|
return parser
|
|
@@ -9,7 +9,7 @@ communication with the MasterNode:
|
|
|
9
9
|
- Deregisters gracefully on shutdown (DELETE /v1/nodes/{id})
|
|
10
10
|
|
|
11
11
|
Author : Rahad Bhuiya
|
|
12
|
-
Version: 2.
|
|
12
|
+
Version: 2.3.0
|
|
13
13
|
License: MIT
|
|
14
14
|
"""
|
|
15
15
|
|
|
@@ -77,6 +77,7 @@ class AgentNode:
|
|
|
77
77
|
heartbeat_interval_sec: float = DEFAULT_HEARTBEAT_INTERVAL,
|
|
78
78
|
request_timeout_sec: float = DEFAULT_REQUEST_TIMEOUT,
|
|
79
79
|
metadata: Optional[Dict[str, Any]] = None,
|
|
80
|
+
api_key: Optional[str] = None,
|
|
80
81
|
on_master_unreachable: Optional[Callable[[], None]] = None,
|
|
81
82
|
on_recovered: Optional[Callable[[], None]] = None,
|
|
82
83
|
) -> None:
|
|
@@ -95,6 +96,7 @@ class AgentNode:
|
|
|
95
96
|
self._interval = heartbeat_interval_sec
|
|
96
97
|
self._request_timeout = request_timeout_sec
|
|
97
98
|
self._metadata = metadata or {}
|
|
99
|
+
self._api_key = api_key
|
|
98
100
|
self._on_unreachable = on_master_unreachable
|
|
99
101
|
self._on_recovered = on_recovered
|
|
100
102
|
|
|
@@ -195,6 +197,7 @@ class AgentNode:
|
|
|
195
197
|
|
|
196
198
|
# Internal — join
|
|
197
199
|
|
|
200
|
+
|
|
198
201
|
def _join_with_retry(self, max_retry: int) -> None:
|
|
199
202
|
for attempt in range(1, max_retry + 1):
|
|
200
203
|
if self._send_join():
|
|
@@ -300,7 +303,7 @@ class AgentNode:
|
|
|
300
303
|
if master_reachable and self._consecutive_failures % 3 == 0:
|
|
301
304
|
should_rejoin = True
|
|
302
305
|
|
|
303
|
-
#
|
|
306
|
+
# fire actions OUTSIDE the lock
|
|
304
307
|
|
|
305
308
|
if should_rejoin:
|
|
306
309
|
if self._send_join():
|
|
@@ -333,6 +336,8 @@ class AgentNode:
|
|
|
333
336
|
url = f"{self._master}{_API_V1}/nodes/{self._node_id}"
|
|
334
337
|
req = urllib.request.Request(url, method="DELETE")
|
|
335
338
|
req.add_header("Content-Type", "application/json")
|
|
339
|
+
if self._api_key:
|
|
340
|
+
req.add_header("Authorization", f"Bearer {self._api_key}")
|
|
336
341
|
try:
|
|
337
342
|
with urllib.request.urlopen(req, timeout=self._request_timeout):
|
|
338
343
|
pass
|
|
@@ -357,6 +362,8 @@ class AgentNode:
|
|
|
357
362
|
req = urllib.request.Request(url, data=data, method="POST")
|
|
358
363
|
req.add_header("Content-Type", "application/json")
|
|
359
364
|
req.add_header("Content-Length", str(len(data)))
|
|
365
|
+
if self._api_key:
|
|
366
|
+
req.add_header("Authorization", f"Bearer {self._api_key}")
|
|
360
367
|
try:
|
|
361
368
|
with urllib.request.urlopen(req, timeout=self._request_timeout) as resp:
|
|
362
369
|
return json.loads(resp.read())
|