sawyer-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sawyer_core-0.1.0/LICENSE +30 -0
- sawyer_core-0.1.0/PKG-INFO +250 -0
- sawyer_core-0.1.0/README.md +212 -0
- sawyer_core-0.1.0/pyproject.toml +81 -0
- sawyer_core-0.1.0/sawyer/__init__.py +24 -0
- sawyer_core-0.1.0/sawyer/auth/__init__.py +19 -0
- sawyer_core-0.1.0/sawyer/auth/api.py +365 -0
- sawyer_core-0.1.0/sawyer/cli.py +554 -0
- sawyer_core-0.1.0/sawyer/config.py +93 -0
- sawyer_core-0.1.0/sawyer/dashboard/__init__.py +5 -0
- sawyer_core-0.1.0/sawyer/dashboard/server.py +490 -0
- sawyer_core-0.1.0/sawyer/identity/__init__.py +5 -0
- sawyer_core-0.1.0/sawyer/identity/bedrock.py +337 -0
- sawyer_core-0.1.0/sawyer/model/__init__.py +12 -0
- sawyer_core-0.1.0/sawyer/model/registry.py +101 -0
- sawyer_core-0.1.0/sawyer/node/__init__.py +15 -0
- sawyer_core-0.1.0/sawyer/node/agent.py +182 -0
- sawyer_core-0.1.0/sawyer/node/inference.py +344 -0
- sawyer_core-0.1.0/sawyer/node/weights.py +310 -0
- sawyer_core-0.1.0/sawyer/proto/__init__.py +4 -0
- sawyer_core-0.1.0/sawyer/proto/sawyer_pb2.py +91 -0
- sawyer_core-0.1.0/sawyer/proto/sawyer_pb2_grpc.py +528 -0
- sawyer_core-0.1.0/sawyer/provider/__init__.py +41 -0
- sawyer_core-0.1.0/sawyer/provider/earnings_sync.py +162 -0
- sawyer_core-0.1.0/sawyer/provider/manager.py +554 -0
- sawyer_core-0.1.0/sawyer/provider/stripe_connect.py +333 -0
- sawyer_core-0.1.0/sawyer/provider/webhook.py +291 -0
- sawyer_core-0.1.0/sawyer/router/__init__.py +20 -0
- sawyer_core-0.1.0/sawyer/router/client.py +197 -0
- sawyer_core-0.1.0/sawyer/router/gateway.py +73 -0
- sawyer_core-0.1.0/sawyer/router/scheduler.py +275 -0
- sawyer_core-0.1.0/sawyer/router/server.py +344 -0
- sawyer_core-0.1.0/sawyer/server.py +293 -0
- sawyer_core-0.1.0/sawyer/storage/__init__.py +6 -0
- sawyer_core-0.1.0/sawyer/storage/accountant.py +175 -0
- sawyer_core-0.1.0/sawyer/storage/database.py +463 -0
- sawyer_core-0.1.0/sawyer/token/__init__.py +34 -0
- sawyer_core-0.1.0/sawyer/token/accounting.py +320 -0
- sawyer_core-0.1.0/sawyer/token/budget.py +89 -0
- sawyer_core-0.1.0/sawyer/token/stripe.py +353 -0
- sawyer_core-0.1.0/sawyer_core.egg-info/PKG-INFO +250 -0
- sawyer_core-0.1.0/sawyer_core.egg-info/SOURCES.txt +60 -0
- sawyer_core-0.1.0/sawyer_core.egg-info/dependency_links.txt +1 -0
- sawyer_core-0.1.0/sawyer_core.egg-info/entry_points.txt +2 -0
- sawyer_core-0.1.0/sawyer_core.egg-info/requires.txt +21 -0
- sawyer_core-0.1.0/sawyer_core.egg-info/top_level.txt +1 -0
- sawyer_core-0.1.0/setup.cfg +4 -0
- sawyer_core-0.1.0/tests/test_accounting.py +253 -0
- sawyer_core-0.1.0/tests/test_auth.py +272 -0
- sawyer_core-0.1.0/tests/test_config.py +123 -0
- sawyer_core-0.1.0/tests/test_dashboard.py +336 -0
- sawyer_core-0.1.0/tests/test_earnings_sync.py +171 -0
- sawyer_core-0.1.0/tests/test_grpc.py +281 -0
- sawyer_core-0.1.0/tests/test_identity.py +140 -0
- sawyer_core-0.1.0/tests/test_integration.py +395 -0
- sawyer_core-0.1.0/tests/test_provider.py +273 -0
- sawyer_core-0.1.0/tests/test_provider_webhook.py +262 -0
- sawyer_core-0.1.0/tests/test_sawyer.py +401 -0
- sawyer_core-0.1.0/tests/test_server.py +220 -0
- sawyer_core-0.1.0/tests/test_storage.py +377 -0
- sawyer_core-0.1.0/tests/test_stripe.py +175 -0
- sawyer_core-0.1.0/tests/test_weights_inference.py +217 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 InFill Systems, LLC
|
|
4
|
+
|
|
5
|
+
The Work is licensed under the Business Source License 1.1 (the "License").
|
|
6
|
+
You may not use the Work except in compliance with the License.
|
|
7
|
+
|
|
8
|
+
You may obtain a copy of the License at:
|
|
9
|
+
|
|
10
|
+
https://mariadb.com/bsl11/
|
|
11
|
+
|
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
See the License for the specific language governing permissions and
|
|
16
|
+
limitations under the License.
|
|
17
|
+
|
|
18
|
+
Additional Use Grant: You may use the Work for non-production purposes,
|
|
19
|
+
including development, testing, and evaluation, free of charge and without
|
|
20
|
+
a license key. Production use requires a paid license from InFill Systems, LLC.
|
|
21
|
+
|
|
22
|
+
Change Date: 2030-06-29
|
|
23
|
+
|
|
24
|
+
On the Change Date, this Work will be available under the Apache License,
|
|
25
|
+
Version 2.0.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
This software includes contributions from the open-source community.
|
|
30
|
+
All contributions are licensed under the same terms as the Work.
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sawyer-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Distributed MoE inference network — the load is split, friends help
|
|
5
|
+
Author: InFill Systems, LLC
|
|
6
|
+
License: BSL-1.1
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: infill-bedrock>=0.3.0
|
|
19
|
+
Requires-Dist: grpcio>=1.60.0
|
|
20
|
+
Requires-Dist: grpcio-tools>=1.60.0
|
|
21
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
22
|
+
Requires-Dist: httpx>=0.27.0
|
|
23
|
+
Requires-Dist: pydantic>=2.5.0
|
|
24
|
+
Requires-Dist: cryptography>=42.0.0
|
|
25
|
+
Requires-Dist: stripe>=9.0.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
31
|
+
Requires-Dist: black>=24.0; extra == "dev"
|
|
32
|
+
Requires-Dist: isort>=5.13; extra == "dev"
|
|
33
|
+
Requires-Dist: mypy>=1.9; extra == "dev"
|
|
34
|
+
Provides-Extra: inference
|
|
35
|
+
Requires-Dist: vllm>=0.4.0; extra == "inference"
|
|
36
|
+
Requires-Dist: llama-cpp-python>=0.2.60; extra == "inference"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# Sawyer — Distributed MoE Inference Network
|
|
40
|
+
|
|
41
|
+
> **Status: Active prototype** — Provider onboarding and APIs are evolving. Sawyer is under active development toward an alpha milestone.
|
|
42
|
+
|
|
43
|
+
**"The load is split. Friends help."**
|
|
44
|
+
|
|
45
|
+
<div align="center"><img src="sawyer_logo.png" alt="Sawyer on Bedrock" width="600"></div>
|
|
46
|
+
|
|
47
|
+
Named for Tom Sawyer, who turned an impossible chore into a community effort by making participation irresistible. Sawyer turns GPU inference — a credit-draining trap — into a distributed network where each node carries a piece of the load, and everyone benefits.
|
|
48
|
+
|
|
49
|
+
**Sawyer does not require providers to host full models.** Providers host isolated MoE expert workloads that the router activates only when needed. That is why Sawyer is not just another distributed inference project — it distributes only the sparse, independently activated sub-networks that MoE architectures make possible.
|
|
50
|
+
|
|
51
|
+
Built on [Bedrock](https://github.com/drc10101/bedrock) for node identity, consent-gated routing, and auditability. Sawyer runs on Bedrock. Sawyer does not own Bedrock.
|
|
52
|
+
|
|
53
|
+
## The Problem
|
|
54
|
+
|
|
55
|
+
Cloud API credits run out. A single model call on GPT-4-class inference costs cents that compound into hundreds of dollars. Frontier quantized models (Mixtral 8x7B, DeepSeek-V2, Qwen MoE) can run locally but require 2-4 GPUs for full precision. Most developers have one GPU — or none.
|
|
56
|
+
|
|
57
|
+
## The Idea
|
|
58
|
+
|
|
59
|
+
A distributed network where:
|
|
60
|
+
|
|
61
|
+
1. **Volunteers host MoE expert weights** on their hardware (a single RTX 3090 can host one expert)
|
|
62
|
+
2. **A router activates only the relevant experts per token** (MoE sparsity — only 2 of 8 experts fire on Mixtral)
|
|
63
|
+
3. **Users pay $5/month** for a token budget — cheap enough to experiment, paid enough to sustain
|
|
64
|
+
4. **Hosts earn a share** proportional to compute contributed — the incentive altruism alone can't provide
|
|
65
|
+
5. **Bedrock provides the trust layer** — node identity, consent tokens, audit chain
|
|
66
|
+
|
|
67
|
+
## Why It Works
|
|
68
|
+
|
|
69
|
+
- **MoE is more distributable than dense inference.** Experts are independent sub-networks. Unlike tensor parallelism (which splits a single matrix across GPUs), each expert runs its own forward pass. MoE is more distributable than dense tensor-parallel inference because experts are independently activated, but Sawyer's core engineering challenge is keeping routing, expert execution, and aggregation fast enough to feel local.
|
|
70
|
+
- **Sparsity means efficiency.** Only ~25% of parameters activate per token on Mixtral. The network doesn't pay for dormant compute.
|
|
71
|
+
- **Quantized models fit on consumer hardware.** Q4_K_M Mixtral expert ≈ 1.5GB. A 3090 can host 2-3 experts comfortably alongside other workloads.
|
|
72
|
+
- **$5/mo is the sweet spot.** Below the psychological barrier of "another subscription." Enough tokens to prototype, test, and run real workloads. Revenue sustains the network without extracting from users.
|
|
73
|
+
|
|
74
|
+
## Architecture
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
[User/Client]
|
|
78
|
+
│
|
|
79
|
+
▼
|
|
80
|
+
[Sawyer Router] ←── Bedrock identity, consent-gated routing
|
|
81
|
+
│
|
|
82
|
+
├──→ [Node: Expert 0] (RTX 3090, Dallas)
|
|
83
|
+
├──→ [Node: Expert 2] (A100, Frankfurt)
|
|
84
|
+
├──→ [Node: Expert 5] (M2 Max, Tokyo)
|
|
85
|
+
└──→ [Node: Expert 7] (T4, São Paulo)
|
|
86
|
+
│
|
|
87
|
+
▼
|
|
88
|
+
[Aggregated Output] → User
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Core Modules
|
|
92
|
+
|
|
93
|
+
### 1. `sawyer/router/` — Expert Router
|
|
94
|
+
- Receives token embeddings from the user's local dense layers
|
|
95
|
+
- Routes to the correct expert(s) based on the model's gating network
|
|
96
|
+
- Aggregates expert outputs, returns to user
|
|
97
|
+
- Tracks latency per node, falls back to redundant experts on timeout
|
|
98
|
+
|
|
99
|
+
### 2. `sawyer/node/` — Node Agent
|
|
100
|
+
- Registers with the network via Bedrock node identity
|
|
101
|
+
- Advertises capabilities: GPU model, VRAM, bandwidth, latency
|
|
102
|
+
- Hosts one or more expert weight files
|
|
103
|
+
- Serves inference requests via encrypted gRPC/QUIC
|
|
104
|
+
- Reports health and throughput to the router
|
|
105
|
+
|
|
106
|
+
### 3. `sawyer/token/` — Token Economics
|
|
107
|
+
- $5/mo subscription grants a token budget (e.g., 500K tokens)
|
|
108
|
+
- Tokens debit per inference request (input + output tokens)
|
|
109
|
+
- Token budget resets monthly, rolls over unused tokens (max 1 month)
|
|
110
|
+
- Hosts earn credits proportional to tokens served
|
|
111
|
+
- Credits convert to USD payout at thresholds ($10 minimum)
|
|
112
|
+
|
|
113
|
+
### 4. `sawyer/identity/` — Bedrock Integration
|
|
114
|
+
- Every node holds a Bedrock cryptographic identity
|
|
115
|
+
- Router verifies node certificates before routing
|
|
116
|
+
- Consent tokens gate which models a node will serve
|
|
117
|
+
- Audit chain logs every inference request for compliance
|
|
118
|
+
|
|
119
|
+
### 5. `sawyer/model/` — Model Registry
|
|
120
|
+
- Catalog of supported MoE models and their expert layouts
|
|
121
|
+
- Expert weight files versioned and checksummed
|
|
122
|
+
- Nodes download experts on registration or on-demand
|
|
123
|
+
- Supports Mixtral 8x7B, DeepSeek-V2, Qwen MoE, and extensible for new models
|
|
124
|
+
|
|
125
|
+
## Protocol
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
1. Node registers with Sawyer network
|
|
129
|
+
→ Bedrock identity issued (certificate, scope, audit chain)
|
|
130
|
+
→ Node advertises: GPU, VRAM, bandwidth, experts available
|
|
131
|
+
|
|
132
|
+
2. User sends inference request
|
|
133
|
+
→ Sawyer router authenticates user (token balance check)
|
|
134
|
+
→ Router runs gating network locally to select experts
|
|
135
|
+
→ Router sends expert activation request to node(s)
|
|
136
|
+
→ Node validates consent token, runs expert forward pass
|
|
137
|
+
→ Node returns expert output, logs to audit chain
|
|
138
|
+
→ Router aggregates, returns to user
|
|
139
|
+
→ Token balance debited
|
|
140
|
+
|
|
141
|
+
3. Monthly settlement
|
|
142
|
+
→ Host credits calculated from tokens served
|
|
143
|
+
→ Payouts processed at $10 threshold
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Pricing
|
|
147
|
+
|
|
148
|
+
| Tier | Price | Token Budget | Use Case |
|
|
149
|
+
|------|-------|-------------|----------|
|
|
150
|
+
| Explorer | $5/mo | 500K tokens | Prototyping, experimentation |
|
|
151
|
+
| Builder | $20/mo | 2M tokens | Development, testing |
|
|
152
|
+
| Operator | $50/mo | 5M tokens | Production workloads |
|
|
153
|
+
|
|
154
|
+
Token costs vary by model (frontier models cost more tokens per request). Quantized models get a token discount (lower quality, lower cost).
|
|
155
|
+
|
|
156
|
+
## Host Economics
|
|
157
|
+
|
|
158
|
+
- Earn credits per token of expert inference served
|
|
159
|
+
- Credits proportional to: tokens served × model complexity × response time SLA
|
|
160
|
+
- Payout at $10 threshold via Stripe
|
|
161
|
+
- A single RTX 3090 hosting 2 Mixtral experts at ~30% utilization: estimated $8-15/mo
|
|
162
|
+
|
|
163
|
+
## Supported Models (Initial)
|
|
164
|
+
|
|
165
|
+
| Model | Params | Experts | Active/Token | Q4_K_M Size | Expert Size |
|
|
166
|
+
|-------|--------|---------|-------------|-------------|-------------|
|
|
167
|
+
| Mixtral 8x7B | 46.7B | 8 | 2 | ~24GB | ~1.5GB |
|
|
168
|
+
| DeepSeek-V2 Lite | 15.7B | 64 (shared) | 6 | ~9GB | varies |
|
|
169
|
+
| Qwen1.5-MoE-A2.7B | 14.3B | 60 | 4 | ~7GB | varies |
|
|
170
|
+
| DBRX | 132B | 16 | 4 | ~65GB | ~2.5GB |
|
|
171
|
+
|
|
172
|
+
## Repository Structure
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
sawyer/
|
|
176
|
+
├── README.md
|
|
177
|
+
├── LICENSE # BSL-1.1 (same as Bedrock)
|
|
178
|
+
├── pyproject.toml
|
|
179
|
+
├── sawyer/
|
|
180
|
+
│ ├── __init__.py
|
|
181
|
+
│ ├── cli.py # sawyer register, sawyer serve, sawyer status
|
|
182
|
+
│ ├── router/
|
|
183
|
+
│ │ ├── __init__.py
|
|
184
|
+
│ │ ├── gateway.py # Main router server (gRPC/QUIC)
|
|
185
|
+
│ │ ├── scheduler.py # Expert selection, load balancing
|
|
186
|
+
│ │ ├── gating.py # Model-specific gating network runner
|
|
187
|
+
│ │ └── aggregator.py # Combine expert outputs
|
|
188
|
+
│ ├── node/
|
|
189
|
+
│ │ ├── __init__.py
|
|
190
|
+
│ │ ├── agent.py # Node agent — hosts experts, serves inference
|
|
191
|
+
│ │ ├── registry.py # Register capabilities, download experts
|
|
192
|
+
│ │ ├── inference.py # Expert forward pass (vLLM / llama.cpp)
|
|
193
|
+
│ │ └── health.py # Heartbeat, throughput reporting
|
|
194
|
+
│ ├── token/
|
|
195
|
+
│ │ ├── __init__.py
|
|
196
|
+
│ │ ├── budget.py # Token budget management
|
|
197
|
+
│ │ ├── accounting.py # Debit/credit per request
|
|
198
|
+
│ │ └── settlement.py # Host payouts, Stripe integration
|
|
199
|
+
│ ├── identity/
|
|
200
|
+
│ │ ├── __init__.py
|
|
201
|
+
│ │ ├── bedrock.py # Bedrock SDK integration (identity, consent, audit)
|
|
202
|
+
│ │ └── verification.py # Node certificate verification
|
|
203
|
+
│ ├── model/
|
|
204
|
+
│ │ ├── __init__.py
|
|
205
|
+
│ │ ├── registry.py # Model catalog, expert layouts
|
|
206
|
+
│ │ ├── download.py # Expert weight distribution
|
|
207
|
+
│ │ └── formats.py # GGUF, safetensors handling
|
|
208
|
+
│ └── config.py # Configuration management
|
|
209
|
+
├── tests/
|
|
210
|
+
│ ├── test_router.py
|
|
211
|
+
│ ├── test_node.py
|
|
212
|
+
│ ├── test_token.py
|
|
213
|
+
│ ├── test_identity.py
|
|
214
|
+
│ └── test_model.py
|
|
215
|
+
├── docs/
|
|
216
|
+
│ ├── ARCHITECTURE.md
|
|
217
|
+
│ ├── HOSTING.md # How to host an expert node
|
|
218
|
+
│ ├── MODELS.md # Supported models and expert layouts
|
|
219
|
+
│ └── TOKEN_ECONOMICS.md # Detailed token economics
|
|
220
|
+
└── site/
|
|
221
|
+
└── index.html # Landing page
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## Installation
|
|
225
|
+
|
|
226
|
+
Sawyer is currently available from source.
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
git clone https://github.com/drc10101/sawyer.git
|
|
230
|
+
cd sawyer
|
|
231
|
+
pip install -e .
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
PyPI package publishing is planned after the alpha API stabilizes.
|
|
235
|
+
|
|
236
|
+
## Dependencies
|
|
237
|
+
|
|
238
|
+
- **Bedrock** (infill-bedrock): Node identity, consent tokens, audit chain
|
|
239
|
+
- **vLLM / llama.cpp**: Expert inference backend
|
|
240
|
+
- **gRPC / QUIC**: Low-latency inter-node communication
|
|
241
|
+
- **Stripe**: Subscription and host payout management
|
|
242
|
+
- **HuggingFace Hub**: Model weight distribution
|
|
243
|
+
|
|
244
|
+
## License
|
|
245
|
+
|
|
246
|
+
BSL-1.1 — free for non-production use. Production use requires a paid license. Converts to Apache 2.0 after the change date.
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
**Alpha milestone:** Single-router, two-node demo with one toy MoE model — real node registration, real health checks, real routing logs, fake economics. Prove the network behavior first, then graduate to larger quantized MoE weights.
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# Sawyer — Distributed MoE Inference Network
|
|
2
|
+
|
|
3
|
+
> **Status: Active prototype** — Provider onboarding and APIs are evolving. Sawyer is under active development toward an alpha milestone.
|
|
4
|
+
|
|
5
|
+
**"The load is split. Friends help."**
|
|
6
|
+
|
|
7
|
+
<div align="center"><img src="sawyer_logo.png" alt="Sawyer on Bedrock" width="600"></div>
|
|
8
|
+
|
|
9
|
+
Named for Tom Sawyer, who turned an impossible chore into a community effort by making participation irresistible. Sawyer turns GPU inference — a credit-draining trap — into a distributed network where each node carries a piece of the load, and everyone benefits.
|
|
10
|
+
|
|
11
|
+
**Sawyer does not require providers to host full models.** Providers host isolated MoE expert workloads that the router activates only when needed. That is why Sawyer is not just another distributed inference project — it distributes only the sparse, independently activated sub-networks that MoE architectures make possible.
|
|
12
|
+
|
|
13
|
+
Built on [Bedrock](https://github.com/drc10101/bedrock) for node identity, consent-gated routing, and auditability. Sawyer runs on Bedrock. Sawyer does not own Bedrock.
|
|
14
|
+
|
|
15
|
+
## The Problem
|
|
16
|
+
|
|
17
|
+
Cloud API credits run out. A single model call on GPT-4-class inference costs cents that compound into hundreds of dollars. Frontier quantized models (Mixtral 8x7B, DeepSeek-V2, Qwen MoE) can run locally but require 2-4 GPUs for full precision. Most developers have one GPU — or none.
|
|
18
|
+
|
|
19
|
+
## The Idea
|
|
20
|
+
|
|
21
|
+
A distributed network where:
|
|
22
|
+
|
|
23
|
+
1. **Volunteers host MoE expert weights** on their hardware (a single RTX 3090 can host one expert)
|
|
24
|
+
2. **A router activates only the relevant experts per token** (MoE sparsity — only 2 of 8 experts fire on Mixtral)
|
|
25
|
+
3. **Users pay $5/month** for a token budget — cheap enough to experiment, paid enough to sustain
|
|
26
|
+
4. **Hosts earn a share** proportional to compute contributed — the incentive altruism alone can't provide
|
|
27
|
+
5. **Bedrock provides the trust layer** — node identity, consent tokens, audit chain
|
|
28
|
+
|
|
29
|
+
## Why It Works
|
|
30
|
+
|
|
31
|
+
- **MoE is more distributable than dense inference.** Experts are independent sub-networks. Unlike tensor parallelism (which splits a single matrix across GPUs), each expert runs its own forward pass. MoE is more distributable than dense tensor-parallel inference because experts are independently activated, but Sawyer's core engineering challenge is keeping routing, expert execution, and aggregation fast enough to feel local.
|
|
32
|
+
- **Sparsity means efficiency.** Only ~25% of parameters activate per token on Mixtral. The network doesn't pay for dormant compute.
|
|
33
|
+
- **Quantized models fit on consumer hardware.** Q4_K_M Mixtral expert ≈ 1.5GB. A 3090 can host 2-3 experts comfortably alongside other workloads.
|
|
34
|
+
- **$5/mo is the sweet spot.** Below the psychological barrier of "another subscription." Enough tokens to prototype, test, and run real workloads. Revenue sustains the network without extracting from users.
|
|
35
|
+
|
|
36
|
+
## Architecture
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
[User/Client]
|
|
40
|
+
│
|
|
41
|
+
▼
|
|
42
|
+
[Sawyer Router] ←── Bedrock identity, consent-gated routing
|
|
43
|
+
│
|
|
44
|
+
├──→ [Node: Expert 0] (RTX 3090, Dallas)
|
|
45
|
+
├──→ [Node: Expert 2] (A100, Frankfurt)
|
|
46
|
+
├──→ [Node: Expert 5] (M2 Max, Tokyo)
|
|
47
|
+
└──→ [Node: Expert 7] (T4, São Paulo)
|
|
48
|
+
│
|
|
49
|
+
▼
|
|
50
|
+
[Aggregated Output] → User
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Core Modules
|
|
54
|
+
|
|
55
|
+
### 1. `sawyer/router/` — Expert Router
|
|
56
|
+
- Receives token embeddings from the user's local dense layers
|
|
57
|
+
- Routes to the correct expert(s) based on the model's gating network
|
|
58
|
+
- Aggregates expert outputs, returns to user
|
|
59
|
+
- Tracks latency per node, falls back to redundant experts on timeout
|
|
60
|
+
|
|
61
|
+
### 2. `sawyer/node/` — Node Agent
|
|
62
|
+
- Registers with the network via Bedrock node identity
|
|
63
|
+
- Advertises capabilities: GPU model, VRAM, bandwidth, latency
|
|
64
|
+
- Hosts one or more expert weight files
|
|
65
|
+
- Serves inference requests via encrypted gRPC/QUIC
|
|
66
|
+
- Reports health and throughput to the router
|
|
67
|
+
|
|
68
|
+
### 3. `sawyer/token/` — Token Economics
|
|
69
|
+
- $5/mo subscription grants a token budget (e.g., 500K tokens)
|
|
70
|
+
- Tokens debit per inference request (input + output tokens)
|
|
71
|
+
- Token budget resets monthly, rolls over unused tokens (max 1 month)
|
|
72
|
+
- Hosts earn credits proportional to tokens served
|
|
73
|
+
- Credits convert to USD payout at thresholds ($10 minimum)
|
|
74
|
+
|
|
75
|
+
### 4. `sawyer/identity/` — Bedrock Integration
|
|
76
|
+
- Every node holds a Bedrock cryptographic identity
|
|
77
|
+
- Router verifies node certificates before routing
|
|
78
|
+
- Consent tokens gate which models a node will serve
|
|
79
|
+
- Audit chain logs every inference request for compliance
|
|
80
|
+
|
|
81
|
+
### 5. `sawyer/model/` — Model Registry
|
|
82
|
+
- Catalog of supported MoE models and their expert layouts
|
|
83
|
+
- Expert weight files versioned and checksummed
|
|
84
|
+
- Nodes download experts on registration or on-demand
|
|
85
|
+
- Supports Mixtral 8x7B, DeepSeek-V2, Qwen MoE, and extensible for new models
|
|
86
|
+
|
|
87
|
+
## Protocol
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
1. Node registers with Sawyer network
|
|
91
|
+
→ Bedrock identity issued (certificate, scope, audit chain)
|
|
92
|
+
→ Node advertises: GPU, VRAM, bandwidth, experts available
|
|
93
|
+
|
|
94
|
+
2. User sends inference request
|
|
95
|
+
→ Sawyer router authenticates user (token balance check)
|
|
96
|
+
→ Router runs gating network locally to select experts
|
|
97
|
+
→ Router sends expert activation request to node(s)
|
|
98
|
+
→ Node validates consent token, runs expert forward pass
|
|
99
|
+
→ Node returns expert output, logs to audit chain
|
|
100
|
+
→ Router aggregates, returns to user
|
|
101
|
+
→ Token balance debited
|
|
102
|
+
|
|
103
|
+
3. Monthly settlement
|
|
104
|
+
→ Host credits calculated from tokens served
|
|
105
|
+
→ Payouts processed at $10 threshold
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Pricing
|
|
109
|
+
|
|
110
|
+
| Tier | Price | Token Budget | Use Case |
|
|
111
|
+
|------|-------|-------------|----------|
|
|
112
|
+
| Explorer | $5/mo | 500K tokens | Prototyping, experimentation |
|
|
113
|
+
| Builder | $20/mo | 2M tokens | Development, testing |
|
|
114
|
+
| Operator | $50/mo | 5M tokens | Production workloads |
|
|
115
|
+
|
|
116
|
+
Token costs vary by model (frontier models cost more tokens per request). Quantized models get a token discount (lower quality, lower cost).
|
|
117
|
+
|
|
118
|
+
## Host Economics
|
|
119
|
+
|
|
120
|
+
- Earn credits per token of expert inference served
|
|
121
|
+
- Credits proportional to: tokens served × model complexity × response time SLA
|
|
122
|
+
- Payout at $10 threshold via Stripe
|
|
123
|
+
- A single RTX 3090 hosting 2 Mixtral experts at ~30% utilization: estimated $8-15/mo
|
|
124
|
+
|
|
125
|
+
## Supported Models (Initial)
|
|
126
|
+
|
|
127
|
+
| Model | Params | Experts | Active/Token | Q4_K_M Size | Expert Size |
|
|
128
|
+
|-------|--------|---------|-------------|-------------|-------------|
|
|
129
|
+
| Mixtral 8x7B | 46.7B | 8 | 2 | ~24GB | ~1.5GB |
|
|
130
|
+
| DeepSeek-V2 Lite | 15.7B | 64 (shared) | 6 | ~9GB | varies |
|
|
131
|
+
| Qwen1.5-MoE-A2.7B | 14.3B | 60 | 4 | ~7GB | varies |
|
|
132
|
+
| DBRX | 132B | 16 | 4 | ~65GB | ~2.5GB |
|
|
133
|
+
|
|
134
|
+
## Repository Structure
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
sawyer/
|
|
138
|
+
├── README.md
|
|
139
|
+
├── LICENSE # BSL-1.1 (same as Bedrock)
|
|
140
|
+
├── pyproject.toml
|
|
141
|
+
├── sawyer/
|
|
142
|
+
│ ├── __init__.py
|
|
143
|
+
│ ├── cli.py # sawyer register, sawyer serve, sawyer status
|
|
144
|
+
│ ├── router/
|
|
145
|
+
│ │ ├── __init__.py
|
|
146
|
+
│ │ ├── gateway.py # Main router server (gRPC/QUIC)
|
|
147
|
+
│ │ ├── scheduler.py # Expert selection, load balancing
|
|
148
|
+
│ │ ├── gating.py # Model-specific gating network runner
|
|
149
|
+
│ │ └── aggregator.py # Combine expert outputs
|
|
150
|
+
│ ├── node/
|
|
151
|
+
│ │ ├── __init__.py
|
|
152
|
+
│ │ ├── agent.py # Node agent — hosts experts, serves inference
|
|
153
|
+
│ │ ├── registry.py # Register capabilities, download experts
|
|
154
|
+
│ │ ├── inference.py # Expert forward pass (vLLM / llama.cpp)
|
|
155
|
+
│ │ └── health.py # Heartbeat, throughput reporting
|
|
156
|
+
│ ├── token/
|
|
157
|
+
│ │ ├── __init__.py
|
|
158
|
+
│ │ ├── budget.py # Token budget management
|
|
159
|
+
│ │ ├── accounting.py # Debit/credit per request
|
|
160
|
+
│ │ └── settlement.py # Host payouts, Stripe integration
|
|
161
|
+
│ ├── identity/
|
|
162
|
+
│ │ ├── __init__.py
|
|
163
|
+
│ │ ├── bedrock.py # Bedrock SDK integration (identity, consent, audit)
|
|
164
|
+
│ │ └── verification.py # Node certificate verification
|
|
165
|
+
│ ├── model/
|
|
166
|
+
│ │ ├── __init__.py
|
|
167
|
+
│ │ ├── registry.py # Model catalog, expert layouts
|
|
168
|
+
│ │ ├── download.py # Expert weight distribution
|
|
169
|
+
│ │ └── formats.py # GGUF, safetensors handling
|
|
170
|
+
│ └── config.py # Configuration management
|
|
171
|
+
├── tests/
|
|
172
|
+
│ ├── test_router.py
|
|
173
|
+
│ ├── test_node.py
|
|
174
|
+
│ ├── test_token.py
|
|
175
|
+
│ ├── test_identity.py
|
|
176
|
+
│ └── test_model.py
|
|
177
|
+
├── docs/
|
|
178
|
+
│ ├── ARCHITECTURE.md
|
|
179
|
+
│ ├── HOSTING.md # How to host an expert node
|
|
180
|
+
│ ├── MODELS.md # Supported models and expert layouts
|
|
181
|
+
│ └── TOKEN_ECONOMICS.md # Detailed token economics
|
|
182
|
+
└── site/
|
|
183
|
+
└── index.html # Landing page
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Installation
|
|
187
|
+
|
|
188
|
+
Sawyer is currently available from source.
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
git clone https://github.com/drc10101/sawyer.git
|
|
192
|
+
cd sawyer
|
|
193
|
+
pip install -e .
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
PyPI package publishing is planned after the alpha API stabilizes.
|
|
197
|
+
|
|
198
|
+
## Dependencies
|
|
199
|
+
|
|
200
|
+
- **Bedrock** (infill-bedrock): Node identity, consent tokens, audit chain
|
|
201
|
+
- **vLLM / llama.cpp**: Expert inference backend
|
|
202
|
+
- **gRPC / QUIC**: Low-latency inter-node communication
|
|
203
|
+
- **Stripe**: Subscription and host payout management
|
|
204
|
+
- **HuggingFace Hub**: Model weight distribution
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
BSL-1.1 — free for non-production use. Production use requires a paid license. Converts to Apache 2.0 after the change date.
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
**Alpha milestone:** Single-router, two-node demo with one toy MoE model — real node registration, real health checks, real routing logs, fake economics. Prove the network behavior first, then graduate to larger quantized MoE weights.
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sawyer-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Distributed MoE inference network — the load is split, friends help"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "BSL-1.1"}
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "InFill Systems, LLC"},
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: Other/Proprietary License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"infill-bedrock>=0.3.0",
|
|
27
|
+
"grpcio>=1.60.0",
|
|
28
|
+
"grpcio-tools>=1.60.0",
|
|
29
|
+
"aiohttp>=3.9.0",
|
|
30
|
+
"httpx>=0.27.0",
|
|
31
|
+
"pydantic>=2.5.0",
|
|
32
|
+
"cryptography>=42.0.0",
|
|
33
|
+
"stripe>=9.0.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=8.0",
|
|
39
|
+
"pytest-asyncio>=0.23",
|
|
40
|
+
"pytest-cov>=5.0",
|
|
41
|
+
"ruff>=0.4.0",
|
|
42
|
+
"black>=24.0",
|
|
43
|
+
"isort>=5.13",
|
|
44
|
+
"mypy>=1.9",
|
|
45
|
+
]
|
|
46
|
+
inference = [
|
|
47
|
+
"vllm>=0.4.0",
|
|
48
|
+
"llama-cpp-python>=0.2.60",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[project.scripts]
|
|
52
|
+
sawyer = "sawyer.cli:main"
|
|
53
|
+
|
|
54
|
+
[tool.setuptools.packages.find]
|
|
55
|
+
where = ["."]
|
|
56
|
+
include = ["sawyer*"]
|
|
57
|
+
exclude = ["tests*"]
|
|
58
|
+
|
|
59
|
+
[tool.ruff]
|
|
60
|
+
target-version = "py311"
|
|
61
|
+
line-length = 99
|
|
62
|
+
|
|
63
|
+
[tool.ruff.lint]
|
|
64
|
+
select = ["E", "F", "W", "I", "UP", "B", "SIM"]
|
|
65
|
+
ignore = ["B008"]
|
|
66
|
+
|
|
67
|
+
[tool.black]
|
|
68
|
+
line-length = 99
|
|
69
|
+
target-version = ["py311"]
|
|
70
|
+
|
|
71
|
+
[tool.isort]
|
|
72
|
+
profile = "black"
|
|
73
|
+
line_length = 99
|
|
74
|
+
|
|
75
|
+
[tool.mypy]
|
|
76
|
+
python_version = "3.11"
|
|
77
|
+
strict = true
|
|
78
|
+
|
|
79
|
+
[tool.pytest.ini_options]
|
|
80
|
+
asyncio_mode = "auto"
|
|
81
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sawyer — Distributed MoE Inference Network.
|
|
3
|
+
|
|
4
|
+
The load is split. Friends help.
|
|
5
|
+
|
|
6
|
+
Sawyer distributes Mixture-of-Experts model inference across a network of
|
|
7
|
+
volunteer-hosted nodes. Each node hosts one or more expert weight files, and
|
|
8
|
+
a central router activates only the relevant experts per token. Users pay a
|
|
9
|
+
low monthly subscription ($5/mo) for a token budget — cheap enough to
|
|
10
|
+
experiment, paid enough to sustain the network. Hosts earn credits
|
|
11
|
+
proportional to compute contributed.
|
|
12
|
+
|
|
13
|
+
Trust is provided by Bedrock: cryptographic node identity, consent-gated
|
|
14
|
+
routing, and a tamper-evident audit chain.
|
|
15
|
+
|
|
16
|
+
SPDX-License-Identifier: BSL-1.1 — See LICENSE for details.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
__version__ = "0.1.0"
|
|
20
|
+
__author__ = "InFill Systems, LLC"
|
|
21
|
+
|
|
22
|
+
from sawyer.server import SawyerServer
|
|
23
|
+
|
|
24
|
+
__all__ = ["SawyerServer"]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Sawyer Auth package — API key validation and rate limiting."""
|
|
2
|
+
|
|
3
|
+
from sawyer.auth.api import (
|
|
4
|
+
APIKey,
|
|
5
|
+
AuthError,
|
|
6
|
+
InvalidAPIKey,
|
|
7
|
+
KeyStatus,
|
|
8
|
+
RateLimitExceeded,
|
|
9
|
+
SawyerAuth,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"SawyerAuth",
|
|
14
|
+
"APIKey",
|
|
15
|
+
"KeyStatus",
|
|
16
|
+
"AuthError",
|
|
17
|
+
"InvalidAPIKey",
|
|
18
|
+
"RateLimitExceeded",
|
|
19
|
+
]
|