clustermesh 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. clustermesh-0.9.0.dist-info/METADATA +200 -0
  2. clustermesh-0.9.0.dist-info/RECORD +95 -0
  3. clustermesh-0.9.0.dist-info/WHEEL +4 -0
  4. clustermesh-0.9.0.dist-info/entry_points.txt +10 -0
  5. clustermesh-0.9.0.dist-info/licenses/LICENSE +21 -0
  6. mesh/__init__.py +7 -0
  7. mesh/agent/__init__.py +7 -0
  8. mesh/agent/client.py +120 -0
  9. mesh/agent/config.py +28 -0
  10. mesh/agent/daemon.py +192 -0
  11. mesh/agent/executor.py +119 -0
  12. mesh/agent/host_metrics.py +232 -0
  13. mesh/agent/library.py +5 -0
  14. mesh/agent/monitor.py +197 -0
  15. mesh/agent/preemption.py +39 -0
  16. mesh/agent/server.py +108 -0
  17. mesh/agent/shell.py +84 -0
  18. mesh/api/__init__.py +6 -0
  19. mesh/api/app.py +401 -0
  20. mesh/api/auth.py +72 -0
  21. mesh/api/context.py +250 -0
  22. mesh/api/events.py +121 -0
  23. mesh/api/server.py +189 -0
  24. mesh/cli.py +196 -0
  25. mesh/discovery/__init__.py +5 -0
  26. mesh/discovery/mdns.py +160 -0
  27. mesh/driver/__init__.py +14 -0
  28. mesh/driver/cluster.py +170 -0
  29. mesh/driver/ha/__init__.py +6 -0
  30. mesh/driver/ha/coordinator.py +44 -0
  31. mesh/driver/ha/election.py +80 -0
  32. mesh/driver/job_manager.py +554 -0
  33. mesh/driver/library_installer.py +218 -0
  34. mesh/driver/server.py +246 -0
  35. mesh/execution/__init__.py +5 -0
  36. mesh/execution/executor.py +160 -0
  37. mesh/health/__init__.py +5 -0
  38. mesh/health/heartbeat.py +141 -0
  39. mesh/libraries/__init__.py +0 -0
  40. mesh/libraries/manager.py +87 -0
  41. mesh/memory/__init__.py +5 -0
  42. mesh/memory/fabric.py +174 -0
  43. mesh/meshvpn/__init__.py +6 -0
  44. mesh/meshvpn/coordinator.py +103 -0
  45. mesh/meshvpn/relay.py +128 -0
  46. mesh/meshvpn/site.py +68 -0
  47. mesh/models/__init__.py +15 -0
  48. mesh/models/enums.py +26 -0
  49. mesh/models/job.py +31 -0
  50. mesh/models/node.py +85 -0
  51. mesh/models/task.py +54 -0
  52. mesh/net/__init__.py +1 -0
  53. mesh/net/address.py +52 -0
  54. mesh/notebook/__init__.py +5 -0
  55. mesh/notebook/runner.py +100 -0
  56. mesh/proto/__init__.py +5 -0
  57. mesh/proto/mesh.proto +136 -0
  58. mesh/proto/mesh_pb2.py +68 -0
  59. mesh/proto/mesh_pb2_grpc.py +562 -0
  60. mesh/recovery/__init__.py +14 -0
  61. mesh/recovery/checkpoint.py +45 -0
  62. mesh/recovery/replication.py +102 -0
  63. mesh/recovery/speculation.py +59 -0
  64. mesh/recovery/work_stealing.py +66 -0
  65. mesh/scheduler/__init__.py +13 -0
  66. mesh/scheduler/benchmark.py +112 -0
  67. mesh/scheduler/placement.py +99 -0
  68. mesh/scheduler/pools.py +76 -0
  69. mesh/scheduler/rebalancing.py +68 -0
  70. mesh/scheduler/scoring.py +53 -0
  71. mesh/sdk/__init__.py +59 -0
  72. mesh/sdk/decorator.py +94 -0
  73. mesh/sdk/units.py +61 -0
  74. mesh/sim/__init__.py +8 -0
  75. mesh/sim/agent.py +72 -0
  76. mesh/sim/chaos.py +55 -0
  77. mesh/sim/clock.py +22 -0
  78. mesh/sim/cluster.py +186 -0
  79. mesh/sim/demo.py +76 -0
  80. mesh/sim/soak.py +171 -0
  81. mesh/state/__init__.py +7 -0
  82. mesh/state/factory.py +45 -0
  83. mesh/state/postgres_store.py +215 -0
  84. mesh/state/redis_store.py +134 -0
  85. mesh/state/serialize.py +125 -0
  86. mesh/state/sqlite_store.py +193 -0
  87. mesh/state/store.py +31 -0
  88. mesh/tasks/__init__.py +5 -0
  89. mesh/tasks/builtins.py +34 -0
  90. mesh/tasks/registry.py +30 -0
  91. mesh/worker/__init__.py +6 -0
  92. mesh/worker/runtime.py +63 -0
  93. mesh/worker/server.py +42 -0
  94. mesh/worker/state.py +114 -0
  95. mesh/worker/static/index.html +209 -0
@@ -0,0 +1,200 @@
1
+ Metadata-Version: 2.4
2
+ Name: clustermesh
3
+ Version: 0.9.0
4
+ Summary: Enterprise compute fabric — pip install on any machine to join a compute cluster
5
+ Project-URL: Homepage, https://github.com/neetishsingh/ClusterMesh
6
+ Project-URL: Documentation, https://github.com/neetishsingh/ClusterMesh/blob/main/docs/join-mesh.md
7
+ Project-URL: Repository, https://github.com/neetishsingh/ClusterMesh
8
+ Author: ClusterMesh Team
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: cluster,compute,distributed,grpc,mesh,scheduler
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: System Administrators
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: System :: Clustering
23
+ Classifier: Topic :: System :: Distributed Computing
24
+ Requires-Python: >=3.11
25
+ Requires-Dist: fastapi>=0.110
26
+ Requires-Dist: grpcio>=1.60
27
+ Requires-Dist: protobuf>=4.25
28
+ Requires-Dist: psutil>=5.9
29
+ Requires-Dist: pyyaml>=6.0
30
+ Requires-Dist: uvicorn[standard]>=0.27
31
+ Requires-Dist: websockets>=12.0
32
+ Requires-Dist: zeroconf>=0.131
33
+ Provides-Extra: dev
34
+ Requires-Dist: grpcio-tools>=1.60; extra == 'dev'
35
+ Requires-Dist: httpx>=0.27; extra == 'dev'
36
+ Requires-Dist: hypothesis>=6.100; extra == 'dev'
37
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
38
+ Requires-Dist: pytest>=8.0; extra == 'dev'
39
+ Provides-Extra: discovery
40
+ Requires-Dist: zeroconf>=0.131; extra == 'discovery'
41
+ Provides-Extra: phase6
42
+ Requires-Dist: psycopg[binary]>=3.1; extra == 'phase6'
43
+ Requires-Dist: redis>=5.0; extra == 'phase6'
44
+ Requires-Dist: zeroconf>=0.131; extra == 'phase6'
45
+ Provides-Extra: postgres
46
+ Requires-Dist: psycopg[binary]>=3.1; extra == 'postgres'
47
+ Provides-Extra: redis
48
+ Requires-Dist: redis>=5.0; extra == 'redis'
49
+ Description-Content-Type: text/markdown
50
+
51
+ # ClusterMesh (ComputeMesh)
52
+
53
+ **An operating system for enterprise compute** — turn every laptop, desktop, VM, and GPU workstation into a single elastic, fault-tolerant compute cloud.
54
+
55
+ > Full vision: [Sparkpool](./Sparkpool) · Architecture: [docs/architecture.md](./docs/architecture.md) · Roadmap: [docs/roadmap.md](./docs/roadmap.md)
56
+
57
+ ## The Problem
58
+
59
+ Organizations sit on thousands of idle cores:
60
+
61
+ | Resource | Typical utilization |
62
+ |----------|---------------------|
63
+ | CPU | 10–20% |
64
+ | RAM | 30–50% |
65
+ | GPU | 5–10% |
66
+
67
+ Databricks, Kubernetes, Spark, and Ray all require **dedicated** compute. Nobody fully solves:
68
+
69
+ > *"Use all idle enterprise hardware automatically and safely."*
70
+
71
+ ClusterMesh does.
72
+
73
+ ## What We're Building
74
+
75
+ ```
76
+ Control Plane
77
+
78
+ ┌─────────────────┼─────────────────┐
79
+ │ │ │
80
+ Metadata Service Scheduler Service Auth Service
81
+ │ │ │
82
+ └─────────────────┼─────────────────┘
83
+
84
+ Driver Cluster (Raft HA)
85
+
86
+ ┌────────────────────┼────────────────────┐
87
+ │ │ │
88
+ Agent-1 Agent-2 Agent-3
89
+ Laptop Desktop VM
90
+ ```
91
+
92
+ **Killer features:** idle compute harvesting · GPU sharing · live discovery · fault-tolerant scheduling · work stealing · preemption handling · checkpoint recovery · multi-office clustering
93
+
94
+ ## Join a worker (any Python machine)
95
+
96
+ ```bash
97
+ pip install clustermesh
98
+ clustermesh join DRIVER_IP:50050 --open # local worker UI on :50052
99
+ ```
100
+
101
+ See [docs/join-mesh.md](./docs/join-mesh.md) for full details.
102
+
103
+ ## Quick Start (development)
104
+
105
+ ```bash
106
+ # Install in development mode
107
+ python -m venv .venv
108
+ source .venv/bin/activate
109
+ pip install -e ".[dev]"
110
+
111
+ # Run tests
112
+ pytest
113
+
114
+ # Run a simulated 50-node cluster demo
115
+ python -m mesh.sim.demo
116
+
117
+ # Phase 5: platform with React dashboard (build UI first)
118
+ cd frontend && npm install && npm run build && cd ..
119
+ mesh-platform --port 8080 --db clustermesh.db # driver + API + UI
120
+ # Phase 6 options
121
+ mesh-platform --port 8080 --mdns --site bangalore # advertise via mDNS
122
+ mesh-platform --store-url postgres://user:pass@localhost/clustermesh
123
+ mesh-platform --api-key your-secret-key # require auth on API
124
+ mesh-agent --discover # auto-find driver on LAN
125
+
126
+ # Phase 7: multi-site mesh VPN
127
+ mesh-platform --mesh-config config/sites.example.yaml --site bangalore
128
+ mesh-relay --listen 0.0.0.0:6000 --target 127.0.0.1:50050 # standalone relay
129
+ mesh-soak --hours 24 --nodes 50 # accelerated 24h chaos test
130
+ mesh-bench --nodes 1000 # placement SLA benchmark
131
+ ./scripts/dogfood.sh # local dogfood run
132
+ ```
133
+
134
+ ## Project Structure
135
+
136
+ ```
137
+ ClusterMesh/
138
+ ├── docs/ # Architecture, testing strategy, roadmap
139
+ ├── mesh/ # Core Python package
140
+ │ ├── models/ # Node, Task, Job, Resource types
141
+ │ ├── health/ # Heartbeat FSM, node health tracking
142
+ │ ├── scheduler/ # Scoring, placement, pool routing
143
+ │ ├── execution/ # TaskExecutor, TaskContext
144
+ │ ├── recovery/ # Checkpointing, work stealing, replication
145
+ │ ├── driver/ # JobManager, DriverCluster, gRPC server
146
+ │ ├── agent/ # Daemon, monitor, preemption, library
147
+ │ ├── proto/ # gRPC protobuf definitions
148
+ │ ├── tasks/ # Task registry + built-ins
149
+ │ ├── sdk/ # @task decorator, submit() API
150
+ │ └── sim/ # SimAgent, SimCluster, chaos injection
151
+ ├── tests/ # Unit + integration tests
152
+ ├── frontend/ # React dashboard (Vite + Tailwind)
153
+ └── Sparkpool # Original product vision document
154
+ ```
155
+
156
+ ## Current Status (Phase 8) ✅
157
+
158
+ | Component | Status |
159
+ |-----------|--------|
160
+ | Phases 0–7 (full platform + mesh VPN) | ✅ Done |
161
+ | Distributed memory fabric | ✅ Done |
162
+ | 1000-node placement SLA (`mesh-bench`) | ✅ Done |
163
+ | Memory dashboard + dogfood script | ✅ Done |
164
+
165
+ ## Developer SDK
166
+
167
+ ```python
168
+ from mesh import task, submit, TaskContext
169
+
170
+ @task(cpu=4, ram="8GB", checkpoint=True, total_work=1_000_000)
171
+ def process_records(ctx: TaskContext):
172
+ for i in range(int(ctx.progress), 1_000_000):
173
+ ctx.set_progress(i + 1, records=i + 1)
174
+ return "done"
175
+
176
+ # Sync submit — blocks until complete
177
+ result = submit(process_records)
178
+
179
+ # Async submit — returns JobHandle
180
+ job = submit(process_records, async_=True)
181
+ result = job.wait(timeout=3600)
182
+ ```
183
+
184
+ See [docs/api-spec.md](./docs/api-spec.md) for the full SDK specification.
185
+
186
+ ## Documentation
187
+
188
+ | Document | Description |
189
+ |----------|-------------|
190
+ | [Architecture](./docs/architecture.md) | System design, components, data flows |
191
+ | [Fault Tolerance](./docs/fault-tolerance.md) | All 10 recovery mechanisms in detail |
192
+ | [Testing Strategy](./docs/testing-strategy.md) | Test pyramid, scenarios, SLAs |
193
+ | [Roadmap](./docs/roadmap.md) | Phased build plan with milestones |
194
+ | [API Spec](./docs/api-spec.md) | Developer SDK and internal APIs |
195
+ | [Join mesh](./docs/join-mesh.md) | `pip install clustermesh` and worker CLI |
196
+ | [Publish to PyPI](./docs/publish-pypi.md) | Build, token setup, and upload guide |
197
+
198
+ ## License
199
+
200
+ MIT — see [LICENSE](./LICENSE).
@@ -0,0 +1,95 @@
1
+ mesh/__init__.py,sha256=xI92Z-9vXmvuANNYOe3OpcH3QEVDGv3sVhxTcrQjRko,190
2
+ mesh/cli.py,sha256=TjRTIP2r4D7sJNcAk4FpgJrpwCIskBeLcGNMxivoAqc,6513
3
+ mesh/agent/__init__.py,sha256=iZObG1xTPMbRszs-yXkLoNFnRmVBJzefEUuLd2Ib7ak,239
4
+ mesh/agent/client.py,sha256=EIfyL7RkPF3j-RZiIyKAE1WR9aMKH1atq4bUnt-dr_M,4108
5
+ mesh/agent/config.py,sha256=lsdFsYn29m8mFMmoeknhYZk0o4TpPnH71ZsfebXrx4g,923
6
+ mesh/agent/daemon.py,sha256=pFb8-cWuT3mRf3IFT7GGtMS_p5ZgAqtQ0cTNH-gi8ao,6929
7
+ mesh/agent/executor.py,sha256=HOgAkRJBzhG5sb6goyC6uHhNi2vCHwLQG4dtnoFa7fQ,4381
8
+ mesh/agent/host_metrics.py,sha256=MctniLzdHj4RiynbpLLyURKXA5jK_L9a2VREf96_D68,7038
9
+ mesh/agent/library.py,sha256=-Yh5y9ERp-IoK8Ds4dliU8AIJw_jzVb2Eny7zSNp4A8,156
10
+ mesh/agent/monitor.py,sha256=eJERv4vId_dYv4-uV1G8lnHyhlz9xicA5jZzDB24n4s,6462
11
+ mesh/agent/preemption.py,sha256=qzAgX15dcSnuGRPLIvtLs1aiKhmtC92xI4QUZvS8ATA,1231
12
+ mesh/agent/server.py,sha256=s3HIQeRrbZOGcwWxrbnKd2K6oKnjRlxnFW5mMMOp0iM,3757
13
+ mesh/agent/shell.py,sha256=3khwdUgW5cbAJTmtl1BGjEmfkIMdTKZrwkT5C9Imhjs,2314
14
+ mesh/api/__init__.py,sha256=8nFb0OzAgWWhgq_u6tIZxsztxv1QYq3JSRrtn2AE7hg,170
15
+ mesh/api/app.py,sha256=34XcZrO-kEC-KmA-hT2ho9wnFMexh7-aUVGL054N73Q,14862
16
+ mesh/api/auth.py,sha256=SIWcDEtMJBtuLBtFNSDkOIVY0kacDpMNYHB0bBUgAm0,2173
17
+ mesh/api/context.py,sha256=Q9BzLOJkYvI7vBmjbtz48dUf_muXtd_CO5epN1IaEOE,10185
18
+ mesh/api/events.py,sha256=Fux3b6P6mYWrMWLKICQjsLDKaOPGMVLq9_UE5usNJlE,3481
19
+ mesh/api/server.py,sha256=rrOEJHT_t2A_AiXDmd4W8CtgT-8GPiHTZBXooHKXv3E,6548
20
+ mesh/discovery/__init__.py,sha256=ESEdoVQaDC6eWUSi7XxCe28orPpnwaxbJXgmNQIJoV8,226
21
+ mesh/discovery/mdns.py,sha256=DvOhhohfkJkPaTKowdGtX5hK6BIbu2kd7qtxK2QLzqI,4613
22
+ mesh/driver/__init__.py,sha256=akYI3w2KsNUSPx70yVcwbiIay9H1F5A8xNvz87LECFo,373
23
+ mesh/driver/cluster.py,sha256=ZEN8hVxPHny1XfvMAW67fvamaNVw3xLzPaq-pzgCuH8,6715
24
+ mesh/driver/job_manager.py,sha256=5IgTbbM18kRhoz5a1VtFkoFTLZ7OIUjI_Um_NgtLIso,20880
25
+ mesh/driver/library_installer.py,sha256=v1IuOHWBvSJRisuOZPa5DRxmzRBl8bQmwQN4xCQcOec,7745
26
+ mesh/driver/server.py,sha256=HZ4Z7SFM1cCAKt5jEucM-VKIRLB81zsBhLcMU-VFnmc,8956
27
+ mesh/driver/ha/__init__.py,sha256=RHLh6NhVrLRqKHUL_ZHGcMYIbgQzClKR-xREajFh8Bc,196
28
+ mesh/driver/ha/coordinator.py,sha256=HpyYVbaXSiJ8Ku0sS6eUlIpkn9vkSVTxi80Le8pFZ_4,1286
29
+ mesh/driver/ha/election.py,sha256=zDbJ5QaO1MtNm6guAhr_YTy2e4JWJPLsFlp_V0n0WLk,2501
30
+ mesh/execution/__init__.py,sha256=7GorfwmtwslGlcd0FjvHpny1mHWTtINgs_MFmeoWo2s,202
31
+ mesh/execution/executor.py,sha256=BKaf6N5gbDc0OmX4wx8Wx3spAzPBZO2VqffOB0rJ22k,4919
32
+ mesh/health/__init__.py,sha256=GiFSJVB7-qgMe-1PuSwTYebE7VltAnlTTA4v63k1zrQ,179
33
+ mesh/health/heartbeat.py,sha256=uaiaW74DFhFcIs-6P0Cd-_xTx7j2h134f5PoDvhzfL8,4629
34
+ mesh/libraries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ mesh/libraries/manager.py,sha256=eJBzUEIDZpzRHmzIriaEwlbenrMd6k_8X2MPN7j8FAo,2851
36
+ mesh/memory/__init__.py,sha256=HdMKtiesOu9Pa5UMHoIzoxU4mtcmyKlOyfrJwarxafw,212
37
+ mesh/memory/fabric.py,sha256=qPon-9UXuLemXx8wNBUzU8dAhpN9CzXZVzsPpHge-NQ,5500
38
+ mesh/meshvpn/__init__.py,sha256=myCvwi_rsx8nAH5c2zx4NspnbQUzkTfvXXt6I2_l5oA,232
39
+ mesh/meshvpn/coordinator.py,sha256=tBFES6EZik5-oZjYASbuNLG8r9HqtGndSW57uK59wNw,3595
40
+ mesh/meshvpn/relay.py,sha256=XW-nIe64hS6mpXOPl93J9VL0ifeWO4Ru9yYkCv1NP0w,3923
41
+ mesh/meshvpn/site.py,sha256=2uRr-slTx51-a_0cZbNKlw0QG5Z9blBzdLxdDmbvP0Q,1939
42
+ mesh/models/__init__.py,sha256=XPX2OdNDYngfeV6hTzR9i5uN8X2Km42SV1u7aZjSNtU,352
43
+ mesh/models/enums.py,sha256=AfEWroFul_P3EaRV7Fkfip7uuB5XDaLZRl5KGB9FAZo,441
44
+ mesh/models/job.py,sha256=jvib8hmLJjBl0eY-_XdDuDS_hLEAdzuiUhra-XZdpsQ,820
45
+ mesh/models/node.py,sha256=Rpun25SXK3rmSj8I7c1HEw9mDtIaCGERGa2Xa4JV04Y,2602
46
+ mesh/models/task.py,sha256=KmHHp4NFYrlRfWAS60W26p5wQ6yvZ6nPi6tMBC6zJDw,1534
47
+ mesh/net/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
48
+ mesh/net/address.py,sha256=incCO8mi0NK1lhOFcfhkI6IaCN-p6K0jTCzjPwwNbDo,1599
49
+ mesh/notebook/__init__.py,sha256=vB789AFBOFI9WchxL1tSplTxo_93S6NHtXvA6Cfp3Nc,113
50
+ mesh/notebook/runner.py,sha256=mYBGl6uMt-xJtKQ7zqRtbAS0YhtiyQ0eEbz9GN6eLVg,3245
51
+ mesh/proto/__init__.py,sha256=3AFy1Kxx1ACSn8CEBSn_CaNeknckIE7hb5Zrh3LAVBU,142
52
+ mesh/proto/mesh_pb2.py,sha256=H725YrP_NWVUajUwT3g4Pr-DGlUB2ndjGwo23OK0RmY,6986
53
+ mesh/proto/mesh_pb2_grpc.py,sha256=dnmSNtttcxC-YliTWPlRFFDWTfO6argK7pcbfHIaxAg,20392
54
+ mesh/recovery/__init__.py,sha256=BtbadLBfX3m3pgKSqPlOZspblUC0_Hw5ZQTdkySqTRk,442
55
+ mesh/recovery/checkpoint.py,sha256=dDRO7twcYw9-HzBdlFuLQHck4I-rtHem_eT8bdHLZjk,1335
56
+ mesh/recovery/replication.py,sha256=xoLBwurIi9dEgaSIVWnX0SdYTqmqjbh9kiN_3GDNtXE,3102
57
+ mesh/recovery/speculation.py,sha256=HKFAlMfkM3qOinRwC4U8dc-AVbv6_XTNwQboi0JyUus,1919
58
+ mesh/recovery/work_stealing.py,sha256=W8DMrEkJU9zHk7q7w8wj3F5yYDveJ-Hc8eF1v5Q1Dt0,2193
59
+ mesh/scheduler/__init__.py,sha256=RAxLJC_8PHJJc806Dk890F-cAiGhC1a8hHW9xHzzQCQ,342
60
+ mesh/scheduler/benchmark.py,sha256=HI3VS9is4UDE8N2cTUK7r06L9pcvAgZiDBCFAwEtQO4,3209
61
+ mesh/scheduler/placement.py,sha256=i-kpIU81rH49gqgtp9d0YFd65tb56FZqrFsNgJFWMOo,3296
62
+ mesh/scheduler/pools.py,sha256=BM-87vZdHaAX4nGpZBCLDfo_YoXkb9aP4cjFTyCsSiU,2234
63
+ mesh/scheduler/rebalancing.py,sha256=lQahWrhphtvqsXMkJrAJhprv0w_XUnolS3e6wec2JHA,1959
64
+ mesh/scheduler/scoring.py,sha256=bHt9jUDAjrRoOz0apdhMJokVr0BYkNdV_Vqy5YxqQ-s,1662
65
+ mesh/sdk/__init__.py,sha256=RFUfRMS1aOs1Iha9UQaWlDvKsbGlRHKQeFaDg-7RolI,1644
66
+ mesh/sdk/decorator.py,sha256=bciWmlPSQvVi-bw98iRSDaN30p6GsL0C9AlLhgUuOtQ,2924
67
+ mesh/sdk/units.py,sha256=lfPKzmfEMhQiAaijQszLnhjEVxDERAy_1dwmW-2RMt0,1769
68
+ mesh/sim/__init__.py,sha256=Bxm4OhVl-lbqmtl4KTIYGSK2dz7_powTYohwVs58qSs,297
69
+ mesh/sim/agent.py,sha256=P8OuYXagCdKjRj1aV3bnprSxU3F_1oEFy4muKiu2aS0,2401
70
+ mesh/sim/chaos.py,sha256=XF5ZY-jDNXuWyr8FhzKe_EdtcqQh0JUglSvH3imzkv0,2060
71
+ mesh/sim/clock.py,sha256=RGHdgci8Q28RshiHEJ9J4T6MzCAIeRQA6xiNyeBJXtQ,509
72
+ mesh/sim/cluster.py,sha256=YFgv7LsKSJfclbVGMHDLeLuxgsrTwe1zhIeG-FtP3OA,6807
73
+ mesh/sim/demo.py,sha256=gE1MKQRtoomdGNzXE-aCyU37eicU8K57HWtxdoKnrkE,2306
74
+ mesh/sim/soak.py,sha256=xytA12gh04OfFaA7MoO4HOw5GXpO-ZgRTP5tpi629mM,5779
75
+ mesh/state/__init__.py,sha256=Sa3hNrLqaFlMR4CJtPDCOL2NX3vlphlC1TN__yLXr0g,241
76
+ mesh/state/factory.py,sha256=OaPg_s-DrpoT8vuOiqL1u6fJncnPk9WXemQeB7STm3k,1367
77
+ mesh/state/postgres_store.py,sha256=77sJK1Lq5B4pKfnB1_6JkEZFmBnnMsNQYjLGoMJHR0s,7658
78
+ mesh/state/redis_store.py,sha256=Y_yIuxisRzmDMp1-VyqqiOUOnw4DDRvI3sd5h1u1OsM,4556
79
+ mesh/state/serialize.py,sha256=t6Ze3WXH1Fn4zs9d19J1Y3Z_97JsM8-duyrzecZqACs,3770
80
+ mesh/state/sqlite_store.py,sha256=Iqi6m02_0j-RiSs19Xrhelt-p-8SSYLctqELt4rTaSA,7007
81
+ mesh/state/store.py,sha256=xofJtByw-YN4Vx9YRjdtxzj-omIGNKdvpS_NS17F73A,1176
82
+ mesh/tasks/__init__.py,sha256=vCWU1R7kA9SSGilvQGRBOyIDnAfxJD2oUM_17-AB_40,187
83
+ mesh/tasks/builtins.py,sha256=UM7kRiPMzm4ndxo7Gwfs6WV579rUGgmPa3vEcaoLQ54,1090
84
+ mesh/tasks/registry.py,sha256=_lqjocN27rkGW4Jp7dMbPf9NjxYCbQ6nxLdRw8OAEYw,622
85
+ mesh/worker/__init__.py,sha256=WBWRKh0i1h4KQDTL3_HfK1V6GApZxfyGrQW3B8ahL_U,175
86
+ mesh/worker/runtime.py,sha256=KajGdlvrvGwC6aGG-swMNoKeSCv7ic4sl17Che_125A,2028
87
+ mesh/worker/server.py,sha256=4x6Zku5sowi1nsrruFRaayE-lbVRnShkY_74AoJ29Zc,1138
88
+ mesh/worker/state.py,sha256=YQfalJ1oS4PysZ_ifCbyT1yl8rX8tNF73GNoGAYtYKo,3997
89
+ mesh/proto/mesh.proto,sha256=1ET-0aUiySdsgz_JUk968PQ5yi8v0GWLEJKSjubfEw4,2954
90
+ mesh/worker/static/index.html,sha256=SkOmFYC9lY7UWcEPUClllP3TUkcaEIyB4T4yrFR3a1Q,7800
91
+ clustermesh-0.9.0.dist-info/METADATA,sha256=mDyvKZmpvIQgsUs1iHQtENrUdvGvQh71_ATHZUVX6VY,7906
92
+ clustermesh-0.9.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
93
+ clustermesh-0.9.0.dist-info/entry_points.txt,sha256=_8bPIzZh7OV0Ep6aWDPKSqaDzGwMWZZ_ky8pSS2nkSQ,343
94
+ clustermesh-0.9.0.dist-info/licenses/LICENSE,sha256=dhGDK8xXQf9uw6Qm9asXHSQvOoDSSvMegOthAN8iGlU,1073
95
+ clustermesh-0.9.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,10 @@
1
+ [console_scripts]
2
+ clustermesh = mesh.cli:main
3
+ mesh-agent = mesh.agent.daemon:main
4
+ mesh-bench = mesh.scheduler.benchmark:main
5
+ mesh-dashboard = mesh.api.server:main
6
+ mesh-demo = mesh.sim.demo:main
7
+ mesh-driver = mesh.driver.server:main
8
+ mesh-platform = mesh.api.server:main
9
+ mesh-relay = mesh.meshvpn.relay:main_relay
10
+ mesh-soak = mesh.sim.soak:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ClusterMesh Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
mesh/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """ClusterMesh — Enterprise Compute Fabric."""
2
+
3
+ from mesh.sdk import submit, task
4
+ from mesh.execution import TaskContext
5
+
6
+ __version__ = "0.9.0"
7
+ __all__ = ["TaskContext", "submit", "task"]
mesh/agent/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """ClusterMesh agent daemon."""
2
+
3
+ from mesh.agent.config import AgentConfig
4
+ from mesh.agent.daemon import AgentDaemon, main
5
+ from mesh.agent.monitor import ResourceMonitor
6
+
7
+ __all__ = ["AgentConfig", "AgentDaemon", "ResourceMonitor", "main"]
mesh/agent/client.py ADDED
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import time
6
+
7
+ import grpc
8
+
9
+ from mesh.agent.monitor import ResourceMonitor, ResourceSnapshot
10
+ from mesh.proto import mesh_pb2, mesh_pb2_grpc
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class DriverClient:
16
+ """gRPC client for agent → driver communication."""
17
+
18
+ def __init__(self, driver_address: str, node_id: str) -> None:
19
+ self.driver_address = driver_address
20
+ self.node_id = node_id
21
+ self._channel: grpc.Channel | None = None
22
+ self._stub: mesh_pb2_grpc.DriverStub | None = None
23
+ self.heartbeat_interval = 2.0
24
+
25
+ def connect(self) -> None:
26
+ self._channel = grpc.insecure_channel(self.driver_address)
27
+ self._stub = mesh_pb2_grpc.DriverStub(self._channel)
28
+
29
+ def close(self) -> None:
30
+ if self._channel:
31
+ self._channel.close()
32
+
33
+ @property
34
+ def stub(self) -> mesh_pb2_grpc.DriverStub:
35
+ if self._stub is None:
36
+ self.connect()
37
+ assert self._stub is not None
38
+ return self._stub
39
+
40
+ def register(
41
+ self,
42
+ hostname: str,
43
+ agent_address: str,
44
+ os_name: str,
45
+ location: str,
46
+ preemptible: bool,
47
+ snapshot: ResourceSnapshot,
48
+ libraries: list[str],
49
+ ) -> mesh_pb2.RegisterResponse:
50
+ req = mesh_pb2.RegisterRequest(
51
+ node_id=self.node_id,
52
+ hostname=hostname,
53
+ agent_address=agent_address,
54
+ os_name=os_name,
55
+ location=location,
56
+ preemptible=preemptible,
57
+ resources=self._resource_msg(snapshot),
58
+ libraries=libraries,
59
+ )
60
+ resp = self.stub.RegisterNode(req, timeout=10)
61
+ if resp.accepted:
62
+ self.heartbeat_interval = resp.heartbeat_interval_seconds or 2.0
63
+ return resp
64
+
65
+ def heartbeat(self) -> mesh_pb2.HeartbeatResponse:
66
+ req = mesh_pb2.HeartbeatRequest(node_id=self.node_id, timestamp=time.time())
67
+ return self.stub.Heartbeat(req, timeout=5)
68
+
69
+ def report_resources(self, snapshot: ResourceSnapshot) -> mesh_pb2.Ack:
70
+ msg = self._resource_msg(snapshot)
71
+ msg.node_id = self.node_id
72
+ return self.stub.ReportResources(msg, timeout=5)
73
+
74
+ def preemption_warning(self, cpu_utilization: float, reason: str) -> mesh_pb2.Ack:
75
+ req = mesh_pb2.PreemptionRequest(
76
+ node_id=self.node_id,
77
+ cpu_utilization=cpu_utilization,
78
+ reason=reason,
79
+ )
80
+ return self.stub.PreemptionWarning(req, timeout=5)
81
+
82
+ def task_progress(self, task_id: str, progress: float, total_work: float, state: dict) -> mesh_pb2.Ack:
83
+ req = mesh_pb2.TaskProgressReport(
84
+ task_id=task_id,
85
+ node_id=self.node_id,
86
+ progress=progress,
87
+ total_work=total_work,
88
+ state_json=json.dumps(state),
89
+ )
90
+ return self.stub.TaskProgress(req, timeout=5)
91
+
92
+ def task_complete(
93
+ self, task_id: str, success: bool, result: object = None, error: str = ""
94
+ ) -> mesh_pb2.Ack:
95
+ req = mesh_pb2.TaskCompleteReport(
96
+ task_id=task_id,
97
+ node_id=self.node_id,
98
+ success=success,
99
+ result_json=json.dumps(result) if result is not None else "",
100
+ error=error or "",
101
+ )
102
+ return self.stub.TaskComplete(req, timeout=5)
103
+
104
+ def _resource_msg(self, s: ResourceSnapshot) -> mesh_pb2.ResourceReport:
105
+ return mesh_pb2.ResourceReport(
106
+ node_id=self.node_id,
107
+ cpu_cores_total=s.cpu_cores_total,
108
+ cpu_cores_physical=s.cpu_cores_physical or s.cpu_cores_total,
109
+ cpu_cores_free=s.cpu_cores_free,
110
+ ram_gb_total=s.ram_gb_total,
111
+ ram_gb_free=s.ram_gb_free,
112
+ gpu_count=s.gpu_count,
113
+ vram_gb_free=s.vram_gb_free,
114
+ cuda_version=s.cuda_version or "",
115
+ network_gbps=s.network_gbps,
116
+ battery_pct=s.battery_pct or -1,
117
+ cpu_utilization=s.cpu_utilization,
118
+ user_active=s.user_active,
119
+ host_metrics_json=s.host_metrics_json or "",
120
+ )
mesh/agent/config.py ADDED
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import os
5
+
6
+
7
+ @dataclass
8
+ class AgentConfig:
9
+ node_id: str = ""
10
+ driver_address: str = "localhost:50050"
11
+ agent_address: str = "localhost:50051"
12
+ location: str = "default"
13
+ preemptible: bool = True
14
+ heartbeat_interval: float = 2.0
15
+ resource_interval: float = 1.0
16
+ cpu_preemption_threshold: float = 0.85
17
+
18
+ @classmethod
19
+ def from_env(cls) -> AgentConfig:
20
+ import socket
21
+ hostname = socket.gethostname()
22
+ return cls(
23
+ node_id=os.environ.get("MESH_NODE_ID", hostname),
24
+ driver_address=os.environ.get("MESH_DRIVER_ADDRESS", "localhost:50050"),
25
+ agent_address=os.environ.get("MESH_AGENT_ADDRESS", "localhost:50051"),
26
+ location=os.environ.get("MESH_LOCATION", "default"),
27
+ preemptible=os.environ.get("MESH_PREEMPTIBLE", "true").lower() == "true",
28
+ )