sawyer-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. sawyer_core-0.1.0/LICENSE +30 -0
  2. sawyer_core-0.1.0/PKG-INFO +250 -0
  3. sawyer_core-0.1.0/README.md +212 -0
  4. sawyer_core-0.1.0/pyproject.toml +81 -0
  5. sawyer_core-0.1.0/sawyer/__init__.py +24 -0
  6. sawyer_core-0.1.0/sawyer/auth/__init__.py +19 -0
  7. sawyer_core-0.1.0/sawyer/auth/api.py +365 -0
  8. sawyer_core-0.1.0/sawyer/cli.py +554 -0
  9. sawyer_core-0.1.0/sawyer/config.py +93 -0
  10. sawyer_core-0.1.0/sawyer/dashboard/__init__.py +5 -0
  11. sawyer_core-0.1.0/sawyer/dashboard/server.py +490 -0
  12. sawyer_core-0.1.0/sawyer/identity/__init__.py +5 -0
  13. sawyer_core-0.1.0/sawyer/identity/bedrock.py +337 -0
  14. sawyer_core-0.1.0/sawyer/model/__init__.py +12 -0
  15. sawyer_core-0.1.0/sawyer/model/registry.py +101 -0
  16. sawyer_core-0.1.0/sawyer/node/__init__.py +15 -0
  17. sawyer_core-0.1.0/sawyer/node/agent.py +182 -0
  18. sawyer_core-0.1.0/sawyer/node/inference.py +344 -0
  19. sawyer_core-0.1.0/sawyer/node/weights.py +310 -0
  20. sawyer_core-0.1.0/sawyer/proto/__init__.py +4 -0
  21. sawyer_core-0.1.0/sawyer/proto/sawyer_pb2.py +91 -0
  22. sawyer_core-0.1.0/sawyer/proto/sawyer_pb2_grpc.py +528 -0
  23. sawyer_core-0.1.0/sawyer/provider/__init__.py +41 -0
  24. sawyer_core-0.1.0/sawyer/provider/earnings_sync.py +162 -0
  25. sawyer_core-0.1.0/sawyer/provider/manager.py +554 -0
  26. sawyer_core-0.1.0/sawyer/provider/stripe_connect.py +333 -0
  27. sawyer_core-0.1.0/sawyer/provider/webhook.py +291 -0
  28. sawyer_core-0.1.0/sawyer/router/__init__.py +20 -0
  29. sawyer_core-0.1.0/sawyer/router/client.py +197 -0
  30. sawyer_core-0.1.0/sawyer/router/gateway.py +73 -0
  31. sawyer_core-0.1.0/sawyer/router/scheduler.py +275 -0
  32. sawyer_core-0.1.0/sawyer/router/server.py +344 -0
  33. sawyer_core-0.1.0/sawyer/server.py +293 -0
  34. sawyer_core-0.1.0/sawyer/storage/__init__.py +6 -0
  35. sawyer_core-0.1.0/sawyer/storage/accountant.py +175 -0
  36. sawyer_core-0.1.0/sawyer/storage/database.py +463 -0
  37. sawyer_core-0.1.0/sawyer/token/__init__.py +34 -0
  38. sawyer_core-0.1.0/sawyer/token/accounting.py +320 -0
  39. sawyer_core-0.1.0/sawyer/token/budget.py +89 -0
  40. sawyer_core-0.1.0/sawyer/token/stripe.py +353 -0
  41. sawyer_core-0.1.0/sawyer_core.egg-info/PKG-INFO +250 -0
  42. sawyer_core-0.1.0/sawyer_core.egg-info/SOURCES.txt +60 -0
  43. sawyer_core-0.1.0/sawyer_core.egg-info/dependency_links.txt +1 -0
  44. sawyer_core-0.1.0/sawyer_core.egg-info/entry_points.txt +2 -0
  45. sawyer_core-0.1.0/sawyer_core.egg-info/requires.txt +21 -0
  46. sawyer_core-0.1.0/sawyer_core.egg-info/top_level.txt +1 -0
  47. sawyer_core-0.1.0/setup.cfg +4 -0
  48. sawyer_core-0.1.0/tests/test_accounting.py +253 -0
  49. sawyer_core-0.1.0/tests/test_auth.py +272 -0
  50. sawyer_core-0.1.0/tests/test_config.py +123 -0
  51. sawyer_core-0.1.0/tests/test_dashboard.py +336 -0
  52. sawyer_core-0.1.0/tests/test_earnings_sync.py +171 -0
  53. sawyer_core-0.1.0/tests/test_grpc.py +281 -0
  54. sawyer_core-0.1.0/tests/test_identity.py +140 -0
  55. sawyer_core-0.1.0/tests/test_integration.py +395 -0
  56. sawyer_core-0.1.0/tests/test_provider.py +273 -0
  57. sawyer_core-0.1.0/tests/test_provider_webhook.py +262 -0
  58. sawyer_core-0.1.0/tests/test_sawyer.py +401 -0
  59. sawyer_core-0.1.0/tests/test_server.py +220 -0
  60. sawyer_core-0.1.0/tests/test_storage.py +377 -0
  61. sawyer_core-0.1.0/tests/test_stripe.py +175 -0
  62. sawyer_core-0.1.0/tests/test_weights_inference.py +217 -0
@@ -0,0 +1,30 @@
1
+ Business Source License 1.1
2
+
3
+ Copyright (c) 2026 InFill Systems, LLC
4
+
5
+ The Work is licensed under the Business Source License 1.1 (the "License").
6
+ You may not use the Work except in compliance with the License.
7
+
8
+ You may obtain a copy of the License at:
9
+
10
+ https://mariadb.com/bsl11/
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+
18
+ Additional Use Grant: You may use the Work for non-production purposes,
19
+ including development, testing, and evaluation, free of charge and without
20
+ a license key. Production use requires a paid license from InFill Systems, LLC.
21
+
22
+ Change Date: 2030-06-29
23
+
24
+ On the Change Date, this Work will be available under the Apache License,
25
+ Version 2.0.
26
+
27
+ ---
28
+
29
+ This software includes contributions from the open-source community.
30
+ All contributions are licensed under the same terms as the Work.
@@ -0,0 +1,250 @@
1
+ Metadata-Version: 2.4
2
+ Name: sawyer-core
3
+ Version: 0.1.0
4
+ Summary: Distributed MoE inference network — the load is split, friends help
5
+ Author: InFill Systems, LLC
6
+ License: BSL-1.1
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: Other/Proprietary License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: infill-bedrock>=0.3.0
19
+ Requires-Dist: grpcio>=1.60.0
20
+ Requires-Dist: grpcio-tools>=1.60.0
21
+ Requires-Dist: aiohttp>=3.9.0
22
+ Requires-Dist: httpx>=0.27.0
23
+ Requires-Dist: pydantic>=2.5.0
24
+ Requires-Dist: cryptography>=42.0.0
25
+ Requires-Dist: stripe>=9.0.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=8.0; extra == "dev"
28
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
29
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
30
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
31
+ Requires-Dist: black>=24.0; extra == "dev"
32
+ Requires-Dist: isort>=5.13; extra == "dev"
33
+ Requires-Dist: mypy>=1.9; extra == "dev"
34
+ Provides-Extra: inference
35
+ Requires-Dist: vllm>=0.4.0; extra == "inference"
36
+ Requires-Dist: llama-cpp-python>=0.2.60; extra == "inference"
37
+ Dynamic: license-file
38
+
39
+ # Sawyer — Distributed MoE Inference Network
40
+
41
+ > **Status: Active prototype** — Provider onboarding and APIs are evolving. Sawyer is under active development toward an alpha milestone.
42
+
43
+ **"The load is split. Friends help."**
44
+
45
+ <div align="center"><img src="sawyer_logo.png" alt="Sawyer on Bedrock" width="600"></div>
46
+
47
+ Named for Tom Sawyer, who turned an impossible chore into a community effort by making participation irresistible. Sawyer turns GPU inference — a credit-draining trap — into a distributed network where each node carries a piece of the load, and everyone benefits.
48
+
49
+ **Sawyer does not require providers to host full models.** Providers host isolated MoE expert workloads that the router activates only when needed. That is why Sawyer is not just another distributed inference project — it distributes only the sparse, independently activated sub-networks that MoE architectures make possible.
50
+
51
+ Built on [Bedrock](https://github.com/drc10101/bedrock) for node identity, consent-gated routing, and auditability. Sawyer runs on Bedrock. Sawyer does not own Bedrock.
52
+
53
+ ## The Problem
54
+
55
+ Cloud API credits run out. A single model call on GPT-4-class inference costs cents that compound into hundreds of dollars. Frontier quantized models (Mixtral 8x7B, DeepSeek-V2, Qwen MoE) can run locally but require 2-4 GPUs for full precision. Most developers have one GPU — or none.
56
+
57
+ ## The Idea
58
+
59
+ A distributed network where:
60
+
61
+ 1. **Volunteers host MoE expert weights** on their hardware (a single RTX 3090 can host one expert)
62
+ 2. **A router activates only the relevant experts per token** (MoE sparsity — only 2 of 8 experts fire on Mixtral)
63
+ 3. **Users pay $5/month** for a token budget — cheap enough to experiment, paid enough to sustain
64
+ 4. **Hosts earn a share** proportional to compute contributed — the incentive altruism alone can't provide
65
+ 5. **Bedrock provides the trust layer** — node identity, consent tokens, audit chain
66
+
67
+ ## Why It Works
68
+
69
+ - **MoE is more distributable than dense inference.** Experts are independent sub-networks. Unlike tensor parallelism (which splits a single matrix across GPUs), each expert runs its own forward pass. MoE is more distributable than dense tensor-parallel inference because experts are independently activated, but Sawyer's core engineering challenge is keeping routing, expert execution, and aggregation fast enough to feel local.
70
+ - **Sparsity means efficiency.** Only ~25% of parameters activate per token on Mixtral. The network doesn't pay for dormant compute.
71
+ - **Quantized models fit on consumer hardware.** Q4_K_M Mixtral expert ≈ 1.5GB. A 3090 can host 2-3 experts comfortably alongside other workloads.
72
+ - **$5/mo is the sweet spot.** Below the psychological barrier of "another subscription." Enough tokens to prototype, test, and run real workloads. Revenue sustains the network without extracting from users.
73
+
74
+ ## Architecture
75
+
76
+ ```
77
+ [User/Client]
78
+
79
+
80
+ [Sawyer Router] ←── Bedrock identity, consent-gated routing
81
+
82
+ ├──→ [Node: Expert 0] (RTX 3090, Dallas)
83
+ ├──→ [Node: Expert 2] (A100, Frankfurt)
84
+ ├──→ [Node: Expert 5] (M2 Max, Tokyo)
85
+ └──→ [Node: Expert 7] (T4, São Paulo)
86
+
87
+
88
+ [Aggregated Output] → User
89
+ ```
90
+
91
+ ## Core Modules
92
+
93
+ ### 1. `sawyer/router/` — Expert Router
94
+ - Receives token embeddings from the user's local dense layers
95
+ - Routes to the correct expert(s) based on the model's gating network
96
+ - Aggregates expert outputs, returns to user
97
+ - Tracks latency per node, falls back to redundant experts on timeout
98
+
99
+ ### 2. `sawyer/node/` — Node Agent
100
+ - Registers with the network via Bedrock node identity
101
+ - Advertises capabilities: GPU model, VRAM, bandwidth, latency
102
+ - Hosts one or more expert weight files
103
+ - Serves inference requests via encrypted gRPC/QUIC
104
+ - Reports health and throughput to the router
105
+
106
+ ### 3. `sawyer/token/` — Token Economics
107
+ - $5/mo subscription grants a token budget (e.g., 500K tokens)
108
+ - Tokens debit per inference request (input + output tokens)
109
+ - Token budget resets monthly, rolls over unused tokens (max 1 month)
110
+ - Hosts earn credits proportional to tokens served
111
+ - Credits convert to USD payout at thresholds ($10 minimum)
112
+
113
+ ### 4. `sawyer/identity/` — Bedrock Integration
114
+ - Every node holds a Bedrock cryptographic identity
115
+ - Router verifies node certificates before routing
116
+ - Consent tokens gate which models a node will serve
117
+ - Audit chain logs every inference request for compliance
118
+
119
+ ### 5. `sawyer/model/` — Model Registry
120
+ - Catalog of supported MoE models and their expert layouts
121
+ - Expert weight files versioned and checksummed
122
+ - Nodes download experts on registration or on-demand
123
+ - Supports Mixtral 8x7B, DeepSeek-V2, Qwen MoE, and extensible for new models
124
+
125
+ ## Protocol
126
+
127
+ ```
128
+ 1. Node registers with Sawyer network
129
+ → Bedrock identity issued (certificate, scope, audit chain)
130
+ → Node advertises: GPU, VRAM, bandwidth, experts available
131
+
132
+ 2. User sends inference request
133
+ → Sawyer router authenticates user (token balance check)
134
+ → Router runs gating network locally to select experts
135
+ → Router sends expert activation request to node(s)
136
+ → Node validates consent token, runs expert forward pass
137
+ → Node returns expert output, logs to audit chain
138
+ → Router aggregates, returns to user
139
+ → Token balance debited
140
+
141
+ 3. Monthly settlement
142
+ → Host credits calculated from tokens served
143
+ → Payouts processed at $10 threshold
144
+ ```
145
+
146
+ ## Pricing
147
+
148
+ | Tier | Price | Token Budget | Use Case |
149
+ |------|-------|-------------|----------|
150
+ | Explorer | $5/mo | 500K tokens | Prototyping, experimentation |
151
+ | Builder | $20/mo | 2M tokens | Development, testing |
152
+ | Operator | $50/mo | 5M tokens | Production workloads |
153
+
154
+ Token costs vary by model (frontier models cost more tokens per request). Quantized models get a token discount (lower quality, lower cost).
155
+
156
+ ## Host Economics
157
+
158
+ - Earn credits per token of expert inference served
159
+ - Credits proportional to: tokens served × model complexity × response time SLA
160
+ - Payout at $10 threshold via Stripe
161
+ - A single RTX 3090 hosting 2 Mixtral experts at ~30% utilization: estimated $8-15/mo
162
+
163
+ ## Supported Models (Initial)
164
+
165
+ | Model | Params | Experts | Active/Token | Q4_K_M Size | Expert Size |
166
+ |-------|--------|---------|-------------|-------------|-------------|
167
+ | Mixtral 8x7B | 46.7B | 8 | 2 | ~24GB | ~1.5GB |
168
+ | DeepSeek-V2 Lite | 15.7B | 64 (shared) | 6 | ~9GB | varies |
169
+ | Qwen1.5-MoE-A2.7B | 14.3B | 60 | 4 | ~7GB | varies |
170
+ | DBRX | 132B | 16 | 4 | ~65GB | ~2.5GB |
171
+
172
+ ## Repository Structure
173
+
174
+ ```
175
+ sawyer/
176
+ ├── README.md
177
+ ├── LICENSE # BSL-1.1 (same as Bedrock)
178
+ ├── pyproject.toml
179
+ ├── sawyer/
180
+ │ ├── __init__.py
181
+ │ ├── cli.py # sawyer register, sawyer serve, sawyer status
182
+ │ ├── router/
183
+ │ │ ├── __init__.py
184
+ │ │ ├── gateway.py # Main router server (gRPC/QUIC)
185
+ │ │ ├── scheduler.py # Expert selection, load balancing
186
+ │ │ ├── gating.py # Model-specific gating network runner
187
+ │ │ └── aggregator.py # Combine expert outputs
188
+ │ ├── node/
189
+ │ │ ├── __init__.py
190
+ │ │ ├── agent.py # Node agent — hosts experts, serves inference
191
+ │ │ ├── registry.py # Register capabilities, download experts
192
+ │ │ ├── inference.py # Expert forward pass (vLLM / llama.cpp)
193
+ │ │ └── health.py # Heartbeat, throughput reporting
194
+ │ ├── token/
195
+ │ │ ├── __init__.py
196
+ │ │ ├── budget.py # Token budget management
197
+ │ │ ├── accounting.py # Debit/credit per request
198
+ │ │ └── settlement.py # Host payouts, Stripe integration
199
+ │ ├── identity/
200
+ │ │ ├── __init__.py
201
+ │ │ ├── bedrock.py # Bedrock SDK integration (identity, consent, audit)
202
+ │ │ └── verification.py # Node certificate verification
203
+ │ ├── model/
204
+ │ │ ├── __init__.py
205
+ │ │ ├── registry.py # Model catalog, expert layouts
206
+ │ │ ├── download.py # Expert weight distribution
207
+ │ │ └── formats.py # GGUF, safetensors handling
208
+ │ └── config.py # Configuration management
209
+ ├── tests/
210
+ │ ├── test_router.py
211
+ │ ├── test_node.py
212
+ │ ├── test_token.py
213
+ │ ├── test_identity.py
214
+ │ └── test_model.py
215
+ ├── docs/
216
+ │ ├── ARCHITECTURE.md
217
+ │ ├── HOSTING.md # How to host an expert node
218
+ │ ├── MODELS.md # Supported models and expert layouts
219
+ │ └── TOKEN_ECONOMICS.md # Detailed token economics
220
+ └── site/
221
+ └── index.html # Landing page
222
+ ```
223
+
224
+ ## Installation
225
+
226
+ Sawyer is currently available from source.
227
+
228
+ ```bash
229
+ git clone https://github.com/drc10101/sawyer.git
230
+ cd sawyer
231
+ pip install -e .
232
+ ```
233
+
234
+ PyPI package publishing is planned after the alpha API stabilizes.
235
+
236
+ ## Dependencies
237
+
238
+ - **Bedrock** (infill-bedrock): Node identity, consent tokens, audit chain
239
+ - **vLLM / llama.cpp**: Expert inference backend
240
+ - **gRPC / QUIC**: Low-latency inter-node communication
241
+ - **Stripe**: Subscription and host payout management
242
+ - **HuggingFace Hub**: Model weight distribution
243
+
244
+ ## License
245
+
246
+ BSL-1.1 — free for non-production use. Production use requires a paid license. Converts to Apache 2.0 after the change date.
247
+
248
+ ---
249
+
250
+ **Alpha milestone:** Single-router, two-node demo with one toy MoE model — real node registration, real health checks, real routing logs, fake economics. Prove the network behavior first, then graduate to larger quantized MoE weights.
@@ -0,0 +1,212 @@
1
+ # Sawyer — Distributed MoE Inference Network
2
+
3
+ > **Status: Active prototype** — Provider onboarding and APIs are evolving. Sawyer is under active development toward an alpha milestone.
4
+
5
+ **"The load is split. Friends help."**
6
+
7
+ <div align="center"><img src="sawyer_logo.png" alt="Sawyer on Bedrock" width="600"></div>
8
+
9
+ Named for Tom Sawyer, who turned an impossible chore into a community effort by making participation irresistible. Sawyer turns GPU inference — a credit-draining trap — into a distributed network where each node carries a piece of the load, and everyone benefits.
10
+
11
+ **Sawyer does not require providers to host full models.** Providers host isolated MoE expert workloads that the router activates only when needed. That is why Sawyer is not just another distributed inference project — it distributes only the sparse, independently activated sub-networks that MoE architectures make possible.
12
+
13
+ Built on [Bedrock](https://github.com/drc10101/bedrock) for node identity, consent-gated routing, and auditability. Sawyer runs on Bedrock. Sawyer does not own Bedrock.
14
+
15
+ ## The Problem
16
+
17
+ Cloud API credits run out. A single model call on GPT-4-class inference costs cents that compound into hundreds of dollars. Frontier quantized models (Mixtral 8x7B, DeepSeek-V2, Qwen MoE) can run locally but require 2-4 GPUs for full precision. Most developers have one GPU — or none.
18
+
19
+ ## The Idea
20
+
21
+ A distributed network where:
22
+
23
+ 1. **Volunteers host MoE expert weights** on their hardware (a single RTX 3090 can host one expert)
24
+ 2. **A router activates only the relevant experts per token** (MoE sparsity — only 2 of 8 experts fire on Mixtral)
25
+ 3. **Users pay $5/month** for a token budget — cheap enough to experiment, paid enough to sustain
26
+ 4. **Hosts earn a share** proportional to compute contributed — the incentive altruism alone can't provide
27
+ 5. **Bedrock provides the trust layer** — node identity, consent tokens, audit chain
28
+
29
+ ## Why It Works
30
+
31
+ - **MoE is more distributable than dense inference.** Experts are independent sub-networks. Unlike tensor parallelism (which splits a single matrix across GPUs), each expert runs its own forward pass. MoE is more distributable than dense tensor-parallel inference because experts are independently activated, but Sawyer's core engineering challenge is keeping routing, expert execution, and aggregation fast enough to feel local.
32
+ - **Sparsity means efficiency.** Only ~25% of parameters activate per token on Mixtral. The network doesn't pay for dormant compute.
33
+ - **Quantized models fit on consumer hardware.** Q4_K_M Mixtral expert ≈ 1.5GB. A 3090 can host 2-3 experts comfortably alongside other workloads.
34
+ - **$5/mo is the sweet spot.** Below the psychological barrier of "another subscription." Enough tokens to prototype, test, and run real workloads. Revenue sustains the network without extracting from users.
35
+
36
+ ## Architecture
37
+
38
+ ```
39
+ [User/Client]
40
+
41
+
42
+ [Sawyer Router] ←── Bedrock identity, consent-gated routing
43
+
44
+ ├──→ [Node: Expert 0] (RTX 3090, Dallas)
45
+ ├──→ [Node: Expert 2] (A100, Frankfurt)
46
+ ├──→ [Node: Expert 5] (M2 Max, Tokyo)
47
+ └──→ [Node: Expert 7] (T4, São Paulo)
48
+
49
+
50
+ [Aggregated Output] → User
51
+ ```
52
+
53
+ ## Core Modules
54
+
55
+ ### 1. `sawyer/router/` — Expert Router
56
+ - Receives token embeddings from the user's local dense layers
57
+ - Routes to the correct expert(s) based on the model's gating network
58
+ - Aggregates expert outputs, returns to user
59
+ - Tracks latency per node, falls back to redundant experts on timeout
60
+
61
+ ### 2. `sawyer/node/` — Node Agent
62
+ - Registers with the network via Bedrock node identity
63
+ - Advertises capabilities: GPU model, VRAM, bandwidth, latency
64
+ - Hosts one or more expert weight files
65
+ - Serves inference requests via encrypted gRPC/QUIC
66
+ - Reports health and throughput to the router
67
+
68
+ ### 3. `sawyer/token/` — Token Economics
69
+ - $5/mo subscription grants a token budget (e.g., 500K tokens)
70
+ - Tokens debit per inference request (input + output tokens)
71
+ - Token budget resets monthly, rolls over unused tokens (max 1 month)
72
+ - Hosts earn credits proportional to tokens served
73
+ - Credits convert to USD payout at thresholds ($10 minimum)
74
+
75
+ ### 4. `sawyer/identity/` — Bedrock Integration
76
+ - Every node holds a Bedrock cryptographic identity
77
+ - Router verifies node certificates before routing
78
+ - Consent tokens gate which models a node will serve
79
+ - Audit chain logs every inference request for compliance
80
+
81
+ ### 5. `sawyer/model/` — Model Registry
82
+ - Catalog of supported MoE models and their expert layouts
83
+ - Expert weight files versioned and checksummed
84
+ - Nodes download experts on registration or on-demand
85
+ - Supports Mixtral 8x7B, DeepSeek-V2, Qwen MoE, and extensible for new models
86
+
87
+ ## Protocol
88
+
89
+ ```
90
+ 1. Node registers with Sawyer network
91
+ → Bedrock identity issued (certificate, scope, audit chain)
92
+ → Node advertises: GPU, VRAM, bandwidth, experts available
93
+
94
+ 2. User sends inference request
95
+ → Sawyer router authenticates user (token balance check)
96
+ → Router runs gating network locally to select experts
97
+ → Router sends expert activation request to node(s)
98
+ → Node validates consent token, runs expert forward pass
99
+ → Node returns expert output, logs to audit chain
100
+ → Router aggregates, returns to user
101
+ → Token balance debited
102
+
103
+ 3. Monthly settlement
104
+ → Host credits calculated from tokens served
105
+ → Payouts processed at $10 threshold
106
+ ```
107
+
108
+ ## Pricing
109
+
110
+ | Tier | Price | Token Budget | Use Case |
111
+ |------|-------|-------------|----------|
112
+ | Explorer | $5/mo | 500K tokens | Prototyping, experimentation |
113
+ | Builder | $20/mo | 2M tokens | Development, testing |
114
+ | Operator | $50/mo | 5M tokens | Production workloads |
115
+
116
+ Token costs vary by model (frontier models cost more tokens per request). Quantized models get a token discount (lower quality, lower cost).
117
+
118
+ ## Host Economics
119
+
120
+ - Earn credits per token of expert inference served
121
+ - Credits proportional to: tokens served × model complexity × response time SLA
122
+ - Payout at $10 threshold via Stripe
123
+ - A single RTX 3090 hosting 2 Mixtral experts at ~30% utilization: estimated $8-15/mo
124
+
125
+ ## Supported Models (Initial)
126
+
127
+ | Model | Params | Experts | Active/Token | Q4_K_M Size | Expert Size |
128
+ |-------|--------|---------|-------------|-------------|-------------|
129
+ | Mixtral 8x7B | 46.7B | 8 | 2 | ~24GB | ~1.5GB |
130
+ | DeepSeek-V2 Lite | 15.7B | 64 (shared) | 6 | ~9GB | varies |
131
+ | Qwen1.5-MoE-A2.7B | 14.3B | 60 | 4 | ~7GB | varies |
132
+ | DBRX | 132B | 16 | 4 | ~65GB | ~2.5GB |
133
+
134
+ ## Repository Structure
135
+
136
+ ```
137
+ sawyer/
138
+ ├── README.md
139
+ ├── LICENSE # BSL-1.1 (same as Bedrock)
140
+ ├── pyproject.toml
141
+ ├── sawyer/
142
+ │ ├── __init__.py
143
+ │ ├── cli.py # sawyer register, sawyer serve, sawyer status
144
+ │ ├── router/
145
+ │ │ ├── __init__.py
146
+ │ │ ├── gateway.py # Main router server (gRPC/QUIC)
147
+ │ │ ├── scheduler.py # Expert selection, load balancing
148
+ │ │ ├── gating.py # Model-specific gating network runner
149
+ │ │ └── aggregator.py # Combine expert outputs
150
+ │ ├── node/
151
+ │ │ ├── __init__.py
152
+ │ │ ├── agent.py # Node agent — hosts experts, serves inference
153
+ │ │ ├── registry.py # Register capabilities, download experts
154
+ │ │ ├── inference.py # Expert forward pass (vLLM / llama.cpp)
155
+ │ │ └── health.py # Heartbeat, throughput reporting
156
+ │ ├── token/
157
+ │ │ ├── __init__.py
158
+ │ │ ├── budget.py # Token budget management
159
+ │ │ ├── accounting.py # Debit/credit per request
160
+ │ │ └── settlement.py # Host payouts, Stripe integration
161
+ │ ├── identity/
162
+ │ │ ├── __init__.py
163
+ │ │ ├── bedrock.py # Bedrock SDK integration (identity, consent, audit)
164
+ │ │ └── verification.py # Node certificate verification
165
+ │ ├── model/
166
+ │ │ ├── __init__.py
167
+ │ │ ├── registry.py # Model catalog, expert layouts
168
+ │ │ ├── download.py # Expert weight distribution
169
+ │ │ └── formats.py # GGUF, safetensors handling
170
+ │ └── config.py # Configuration management
171
+ ├── tests/
172
+ │ ├── test_router.py
173
+ │ ├── test_node.py
174
+ │ ├── test_token.py
175
+ │ ├── test_identity.py
176
+ │ └── test_model.py
177
+ ├── docs/
178
+ │ ├── ARCHITECTURE.md
179
+ │ ├── HOSTING.md # How to host an expert node
180
+ │ ├── MODELS.md # Supported models and expert layouts
181
+ │ └── TOKEN_ECONOMICS.md # Detailed token economics
182
+ └── site/
183
+ └── index.html # Landing page
184
+ ```
185
+
186
+ ## Installation
187
+
188
+ Sawyer is currently available from source.
189
+
190
+ ```bash
191
+ git clone https://github.com/drc10101/sawyer.git
192
+ cd sawyer
193
+ pip install -e .
194
+ ```
195
+
196
+ PyPI package publishing is planned after the alpha API stabilizes.
197
+
198
+ ## Dependencies
199
+
200
+ - **Bedrock** (infill-bedrock): Node identity, consent tokens, audit chain
201
+ - **vLLM / llama.cpp**: Expert inference backend
202
+ - **gRPC / QUIC**: Low-latency inter-node communication
203
+ - **Stripe**: Subscription and host payout management
204
+ - **HuggingFace Hub**: Model weight distribution
205
+
206
+ ## License
207
+
208
+ BSL-1.1 — free for non-production use. Production use requires a paid license. Converts to Apache 2.0 after the change date.
209
+
210
+ ---
211
+
212
+ **Alpha milestone:** Single-router, two-node demo with one toy MoE model — real node registration, real health checks, real routing logs, fake economics. Prove the network behavior first, then graduate to larger quantized MoE weights.
@@ -0,0 +1,81 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sawyer-core"
7
+ version = "0.1.0"
8
+ description = "Distributed MoE inference network — the load is split, friends help"
9
+ readme = "README.md"
10
+ license = {text = "BSL-1.1"}
11
+ requires-python = ">=3.11"
12
+ authors = [
13
+ {name = "InFill Systems, LLC"},
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "License :: Other/Proprietary License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ ]
25
+ dependencies = [
26
+ "infill-bedrock>=0.3.0",
27
+ "grpcio>=1.60.0",
28
+ "grpcio-tools>=1.60.0",
29
+ "aiohttp>=3.9.0",
30
+ "httpx>=0.27.0",
31
+ "pydantic>=2.5.0",
32
+ "cryptography>=42.0.0",
33
+ "stripe>=9.0.0",
34
+ ]
35
+
36
+ [project.optional-dependencies]
37
+ dev = [
38
+ "pytest>=8.0",
39
+ "pytest-asyncio>=0.23",
40
+ "pytest-cov>=5.0",
41
+ "ruff>=0.4.0",
42
+ "black>=24.0",
43
+ "isort>=5.13",
44
+ "mypy>=1.9",
45
+ ]
46
+ inference = [
47
+ "vllm>=0.4.0",
48
+ "llama-cpp-python>=0.2.60",
49
+ ]
50
+
51
+ [project.scripts]
52
+ sawyer = "sawyer.cli:main"
53
+
54
+ [tool.setuptools.packages.find]
55
+ where = ["."]
56
+ include = ["sawyer*"]
57
+ exclude = ["tests*"]
58
+
59
+ [tool.ruff]
60
+ target-version = "py311"
61
+ line-length = 99
62
+
63
+ [tool.ruff.lint]
64
+ select = ["E", "F", "W", "I", "UP", "B", "SIM"]
65
+ ignore = ["B008"]
66
+
67
+ [tool.black]
68
+ line-length = 99
69
+ target-version = ["py311"]
70
+
71
+ [tool.isort]
72
+ profile = "black"
73
+ line_length = 99
74
+
75
+ [tool.mypy]
76
+ python_version = "3.11"
77
+ strict = true
78
+
79
+ [tool.pytest.ini_options]
80
+ asyncio_mode = "auto"
81
+ testpaths = ["tests"]
@@ -0,0 +1,24 @@
1
+ """
2
+ Sawyer — Distributed MoE Inference Network.
3
+
4
+ The load is split. Friends help.
5
+
6
+ Sawyer distributes Mixture-of-Experts model inference across a network of
7
+ volunteer-hosted nodes. Each node hosts one or more expert weight files, and
8
+ a central router activates only the relevant experts per token. Users pay a
9
+ low monthly subscription ($5/mo) for a token budget — cheap enough to
10
+ experiment, paid enough to sustain the network. Hosts earn credits
11
+ proportional to compute contributed.
12
+
13
+ Trust is provided by Bedrock: cryptographic node identity, consent-gated
14
+ routing, and a tamper-evident audit chain.
15
+
16
+ SPDX-License-Identifier: BSL-1.1 — See LICENSE for details.
17
+ """
18
+
19
+ __version__ = "0.1.0"
20
+ __author__ = "InFill Systems, LLC"
21
+
22
+ from sawyer.server import SawyerServer
23
+
24
+ __all__ = ["SawyerServer"]
@@ -0,0 +1,19 @@
1
+ """Sawyer Auth package — API key validation and rate limiting."""
2
+
3
+ from sawyer.auth.api import (
4
+ APIKey,
5
+ AuthError,
6
+ InvalidAPIKey,
7
+ KeyStatus,
8
+ RateLimitExceeded,
9
+ SawyerAuth,
10
+ )
11
+
12
+ __all__ = [
13
+ "SawyerAuth",
14
+ "APIKey",
15
+ "KeyStatus",
16
+ "AuthError",
17
+ "InvalidAPIKey",
18
+ "RateLimitExceeded",
19
+ ]