@bilalimamoglu/sift 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +201 -75
- package/dist/cli.js +36 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,22 +1,45 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<img src="assets/brand/sift-logo-minimal-teal-default.svg" alt="sift logo" width="220" />
|
|
4
|
+
|
|
1
5
|
# sift
|
|
2
6
|
|
|
7
|
+
### Turn noisy command output into actionable diagnoses for your coding agent
|
|
8
|
+
|
|
9
|
+
**Benchmark-backed test triage - Heuristic-first reductions - Agent-ready terminal workflows**
|
|
10
|
+
|
|
3
11
|
[](https://www.npmjs.com/package/@bilalimamoglu/sift)
|
|
4
12
|
[](LICENSE)
|
|
5
13
|
[](https://github.com/bilalimamoglu/sift/actions/workflows/ci.yml)
|
|
14
|
+
[](https://nodejs.org/)
|
|
6
15
|
|
|
7
|
-
|
|
16
|
+
<br />
|
|
17
|
+
|
|
18
|
+
### Get Started
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install -g @bilalimamoglu/sift
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
<sub>Works with pytest, vitest, jest, tsc, ESLint, webpack, Cargo, terraform, npm audit, and more.</sub>
|
|
25
|
+
|
|
26
|
+
</div>
|
|
27
|
+
|
|
28
|
+
---
|
|
8
29
|
|
|
9
|
-
|
|
30
|
+
## Why Sift?
|
|
31
|
+
|
|
32
|
+
When an agent hits noisy output, it burns budget reading logs instead of fixing the problem.
|
|
33
|
+
|
|
34
|
+
`sift` sits in front of that output and reduces it into a small, actionable first pass. Your agent reads the diagnosis, not the wall of text.
|
|
35
|
+
|
|
36
|
+
Turn 13,000 lines of test output into 2 root causes.
|
|
10
37
|
|
|
11
38
|
<p align="center">
|
|
12
39
|
<img src="assets/readme/test-status-demo.gif" alt="sift turning a pytest failure wall into a short diagnosis" width="960" />
|
|
13
40
|
</p>
|
|
14
41
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
128 test failures. 13,000 lines of logs. The agent reads all of it.
|
|
18
|
-
|
|
19
|
-
With `sift`, it reads this instead:
|
|
42
|
+
With `sift`, the same run becomes:
|
|
20
43
|
|
|
21
44
|
```text
|
|
22
45
|
- Tests did not pass.
|
|
@@ -30,20 +53,118 @@ With `sift`, it reads this instead:
|
|
|
30
53
|
- Decision: stop and act.
|
|
31
54
|
```
|
|
32
55
|
|
|
33
|
-
|
|
56
|
+
In the largest benchmark fixture, sift compressed 198,026 raw output tokens to 129. That is what the agent reads instead of the full log.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Benchmark Results
|
|
61
|
+
|
|
62
|
+
The output reduction above measures a single command's raw output. The table below measures the full end-to-end debug session: how many tokens, tool calls, and seconds the agent spends to reach the same diagnosis.
|
|
63
|
+
|
|
64
|
+
Real debug loop on a 640-test Python backend with 124 repeated setup errors, 3 contract failures, and 511 passing tests:
|
|
65
|
+
|
|
66
|
+
| Metric | Without sift | With sift | Reduction |
|
|
67
|
+
|--------|-------------:|----------:|----------:|
|
|
68
|
+
| Tokens | 52,944 | 20,049 | 62% fewer |
|
|
69
|
+
| Tool calls | 40.8 | 12 | 71% fewer |
|
|
70
|
+
| Wall-clock time | 244s | 85s | 65% faster |
|
|
71
|
+
| Commands | 15.5 | 6 | 61% fewer |
|
|
72
|
+
| Diagnosis | Same | Same | Same outcome |
|
|
73
|
+
|
|
74
|
+
Same diagnosis, less agent thrash.
|
|
75
|
+
|
|
76
|
+
Methodology and caveats: [BENCHMARK_NOTES.md](BENCHMARK_NOTES.md)
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## How It Works
|
|
81
|
+
|
|
82
|
+
`sift` keeps the explanation simple:
|
|
83
|
+
|
|
84
|
+
1. **Capture output.** Run the noisy command or accept already-existing piped output.
|
|
85
|
+
2. **Run local heuristics.** Detect known failure shapes first so common cases stay cheap and deterministic.
|
|
86
|
+
3. **Return the diagnosis.** When heuristics are confident, `sift` gives the agent the root cause, anchor, and next step.
|
|
87
|
+
4. **Fall back only when needed.** If heuristics are not enough, `sift` uses a cheaper model instead of spending your main agent budget.
|
|
88
|
+
|
|
89
|
+
Your agent spends tokens fixing, not reading.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Key Features
|
|
94
|
+
|
|
95
|
+
<table>
|
|
96
|
+
<tr>
|
|
97
|
+
<td width="33%" valign="top">
|
|
98
|
+
|
|
99
|
+
### Test Failure Triage
|
|
100
|
+
Collapse repeated pytest, vitest, and jest failures into a short diagnosis with root-cause buckets, anchors, and fix hints.
|
|
101
|
+
|
|
102
|
+
</td>
|
|
103
|
+
<td width="33%" valign="top">
|
|
104
|
+
|
|
105
|
+
### Typecheck and Lint Reduction
|
|
106
|
+
Group noisy `tsc` and ESLint output into the few issues that actually matter instead of dumping the whole log back into the model.
|
|
107
|
+
|
|
108
|
+
</td>
|
|
109
|
+
<td width="33%" valign="top">
|
|
110
|
+
|
|
111
|
+
### Build Failure Extraction
|
|
112
|
+
Pull out the first concrete error from webpack, esbuild/Vite, Cargo, Go, GCC/Clang, and similar build output.
|
|
113
|
+
|
|
114
|
+
</td>
|
|
115
|
+
</tr>
|
|
116
|
+
<tr>
|
|
117
|
+
<td width="33%" valign="top">
|
|
118
|
+
|
|
119
|
+
### Audit and Infra Risk
|
|
120
|
+
Surface high-impact `npm audit` findings and destructive `terraform plan` signals without making the agent read everything.
|
|
121
|
+
|
|
122
|
+
</td>
|
|
123
|
+
<td width="33%" valign="top">
|
|
124
|
+
|
|
125
|
+
### Heuristic-First by Default
|
|
126
|
+
Every built-in preset tries local parsing first. When the heuristic handles the output, no provider call is needed.
|
|
127
|
+
|
|
128
|
+
</td>
|
|
129
|
+
<td width="33%" valign="top">
|
|
130
|
+
|
|
131
|
+
### Agent and Automation Friendly
|
|
132
|
+
Use `sift` in Codex, Claude, CI, hooks, or shell scripts so downstream tooling gets short, structured answers instead of raw noise.
|
|
133
|
+
|
|
134
|
+
</td>
|
|
135
|
+
</tr>
|
|
136
|
+
</table>
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Setup and Agent Integration
|
|
141
|
+
|
|
142
|
+
Most built-in presets run entirely on local heuristics with no API key needed. For presets that fall back to a model (`diff-summary`, `log-errors`, or when heuristics are not confident enough), sift supports OpenAI-compatible and OpenRouter-compatible endpoints.
|
|
143
|
+
|
|
144
|
+
Set up the provider first, then install the managed instruction block for the agent you want to steer:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
sift config setup
|
|
148
|
+
sift doctor
|
|
149
|
+
sift agent install codex
|
|
150
|
+
sift agent install claude
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
You can also preview, inspect, or remove those blocks:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
sift agent show codex
|
|
157
|
+
sift agent status
|
|
158
|
+
sift agent remove codex
|
|
159
|
+
```
|
|
34
160
|
|
|
35
|
-
|
|
161
|
+
Command-first details live in [docs/cli-reference.md](docs/cli-reference.md).
|
|
36
162
|
|
|
37
|
-
|
|
163
|
+
---
|
|
38
164
|
|
|
39
|
-
|
|
40
|
-
- **Lint output** → grouped by rule, no model call
|
|
41
|
-
- **Build failures** → first real error from webpack, esbuild/Vite, Cargo, Go, GCC/Clang
|
|
42
|
-
- **`npm audit`** → high/critical vulnerabilities only, no model call
|
|
43
|
-
- **`terraform plan`** → destructive risk detection, no model call
|
|
44
|
-
- **Diffs and logs** → compressed through a cheaper model before reaching your agent
|
|
165
|
+
## Quick Start
|
|
45
166
|
|
|
46
|
-
|
|
167
|
+
### 1. Install
|
|
47
168
|
|
|
48
169
|
```bash
|
|
49
170
|
npm install -g @bilalimamoglu/sift
|
|
@@ -51,101 +172,94 @@ npm install -g @bilalimamoglu/sift
|
|
|
51
172
|
|
|
52
173
|
Requires Node.js 20+.
|
|
53
174
|
|
|
54
|
-
|
|
175
|
+
### 2. Run Sift in front of a noisy command
|
|
55
176
|
|
|
56
177
|
```bash
|
|
57
178
|
sift exec --preset test-status -- pytest -q
|
|
58
|
-
sift exec --preset test-status -- npx vitest run
|
|
59
|
-
sift exec --preset test-status -- npx jest
|
|
60
179
|
```
|
|
61
180
|
|
|
62
|
-
Other
|
|
181
|
+
Other common entry points:
|
|
63
182
|
|
|
64
183
|
```bash
|
|
65
|
-
sift exec --preset
|
|
66
|
-
sift exec --preset
|
|
67
|
-
sift exec --preset build-failure -- npm run build
|
|
68
|
-
sift exec --preset audit-critical -- npm audit
|
|
69
|
-
sift exec --preset infra-risk -- terraform plan
|
|
184
|
+
sift exec --preset test-status -- npx vitest run
|
|
185
|
+
sift exec --preset test-status -- npx jest
|
|
70
186
|
sift exec "what changed?" -- git diff
|
|
71
187
|
```
|
|
72
188
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
`sift` sits between a noisy command and a coding agent.
|
|
189
|
+
### 3. Zoom only if needed
|
|
76
190
|
|
|
77
|
-
|
|
78
|
-
2. Run local heuristics for known failure shapes.
|
|
79
|
-
3. If heuristics are confident, return the diagnosis. No model call.
|
|
80
|
-
4. If not, call a cheaper model — not your agent's.
|
|
191
|
+
Think of the workflow like this:
|
|
81
192
|
|
|
82
|
-
|
|
193
|
+
- `standard` = map
|
|
194
|
+
- `focused` = zoom
|
|
195
|
+
- raw traceback = last resort
|
|
83
196
|
|
|
84
|
-
|
|
197
|
+
```bash
|
|
198
|
+
sift rerun
|
|
199
|
+
sift rerun --remaining --detail focused
|
|
200
|
+
```
|
|
85
201
|
|
|
86
|
-
|
|
202
|
+
If `standard` already gives you the root cause, anchor, and fix, stop there and act.
|
|
87
203
|
|
|
88
|
-
|
|
204
|
+
---
|
|
89
205
|
|
|
90
|
-
|
|
91
|
-
|--------|-------------|
|
|
92
|
-
| `test-status` | Groups pytest, vitest, jest failures into root-cause buckets with anchors and fix suggestions. 30+ failure patterns. |
|
|
93
|
-
| `typecheck-summary` | Parses `tsc` output, groups by error code, returns max 5 bullets. No model call. |
|
|
94
|
-
| `lint-failures` | Parses ESLint output, groups by rule, detects fixable hints. No model call. |
|
|
95
|
-
| `build-failure` | Extracts first concrete error from webpack, esbuild/Vite, Cargo, Go, GCC/Clang, `tsc --build`. Falls back to model for unsupported formats. |
|
|
96
|
-
| `audit-critical` | Extracts high/critical vulnerabilities from `npm audit`. No model call. |
|
|
97
|
-
| `infra-risk` | Detects destructive signals in `terraform plan`. No model call. |
|
|
98
|
-
| `diff-summary` | Summarizes changes and risks in diff output. |
|
|
99
|
-
| `log-errors` | Extracts top error signals from log output. |
|
|
206
|
+
## Presets
|
|
100
207
|
|
|
101
|
-
|
|
208
|
+
| Preset | What it does | Needs provider? |
|
|
209
|
+
|--------|--------------|:---------------:|
|
|
210
|
+
| `test-status` | Groups pytest, vitest, and jest failures into root-cause buckets with anchors and fix suggestions. | No |
|
|
211
|
+
| `typecheck-summary` | Parses `tsc` output and groups issues by error code. | No |
|
|
212
|
+
| `lint-failures` | Parses ESLint output and groups failures by rule. | No |
|
|
213
|
+
| `build-failure` | Extracts the first concrete build error from common toolchains. | Fallback only |
|
|
214
|
+
| `audit-critical` | Pulls high and critical `npm audit` findings. | No |
|
|
215
|
+
| `infra-risk` | Detects destructive signals in `terraform plan`. | No |
|
|
216
|
+
| `diff-summary` | Summarizes change sets and likely risks in diff output. | Yes |
|
|
217
|
+
| `log-errors` | Extracts the strongest error signals from noisy logs. | Fallback only |
|
|
102
218
|
|
|
103
|
-
|
|
219
|
+
When output already exists in a pipeline, use pipe mode instead of `exec`:
|
|
104
220
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
| Wall-clock time | 244s | 85s | 65% faster |
|
|
110
|
-
| Commands | 15.5 | 6 | 61% fewer |
|
|
111
|
-
| Diagnosis | Same | Same | — |
|
|
221
|
+
```bash
|
|
222
|
+
pytest -q 2>&1 | sift preset test-status
|
|
223
|
+
npm audit 2>&1 | sift preset audit-critical
|
|
224
|
+
```
|
|
112
225
|
|
|
113
|
-
|
|
226
|
+
---
|
|
114
227
|
|
|
115
|
-
## Test
|
|
228
|
+
## Test Debugging Workflow
|
|
116
229
|
|
|
117
|
-
|
|
118
|
-
- `standard` = map
|
|
119
|
-
- `focused` = zoom
|
|
120
|
-
- raw traceback = last resort
|
|
230
|
+
For noisy test failures, start with the `test-status` preset and let `standard` be the default stop point.
|
|
121
231
|
|
|
122
232
|
```bash
|
|
123
233
|
sift exec --preset test-status -- <test command>
|
|
124
234
|
sift rerun
|
|
125
235
|
sift rerun --remaining --detail focused
|
|
236
|
+
sift rerun --remaining --detail verbose --show-raw
|
|
126
237
|
```
|
|
127
238
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
`sift rerun --remaining` narrows automatically for cached `pytest` runs. For `vitest` and `jest`, it reruns the full command and keeps diagnosis focused on what still fails.
|
|
239
|
+
Useful rules of thumb:
|
|
131
240
|
|
|
132
|
-
|
|
241
|
+
- If `standard` ends with `Decision: stop and act`, go read source and fix the issue.
|
|
242
|
+
- Use `sift rerun` after a change to refresh the same test command at `standard`.
|
|
243
|
+
- Use `sift rerun --remaining` to zoom into what still fails after the first pass.
|
|
244
|
+
- Treat raw traceback as the last resort, not the starting point.
|
|
133
245
|
|
|
134
|
-
|
|
246
|
+
For machine branching or automation, `test-status` also supports diagnose JSON:
|
|
135
247
|
|
|
136
248
|
```bash
|
|
137
|
-
sift
|
|
138
|
-
sift
|
|
249
|
+
sift exec --preset test-status --goal diagnose --format json -- pytest -q
|
|
250
|
+
sift rerun --goal diagnose --format json
|
|
139
251
|
```
|
|
140
252
|
|
|
141
|
-
|
|
253
|
+
---
|
|
142
254
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
sift
|
|
146
|
-
|
|
255
|
+
## Limitations
|
|
256
|
+
|
|
257
|
+
- sift adds the most value when output is long, repetitive, and shaped by a small number of root causes. For short, obvious failures it may not save much.
|
|
258
|
+
- The deepest local heuristic coverage is in test debugging (pytest, vitest, jest). Other presets have solid heuristics but less depth.
|
|
259
|
+
- sift does not help with interactive or TUI-based commands.
|
|
260
|
+
- When heuristics cannot explain the output confidently, sift falls back to a provider. If no provider is configured, it returns what the heuristics could extract and signals that raw output may still be needed.
|
|
147
261
|
|
|
148
|
-
|
|
262
|
+
---
|
|
149
263
|
|
|
150
264
|
## Docs
|
|
151
265
|
|
|
@@ -154,6 +268,18 @@ Config details: [docs/cli-reference.md](docs/cli-reference.md)
|
|
|
154
268
|
- Benchmark methodology: [BENCHMARK_NOTES.md](BENCHMARK_NOTES.md)
|
|
155
269
|
- Release notes: [release-notes](release-notes)
|
|
156
270
|
|
|
271
|
+
---
|
|
272
|
+
|
|
157
273
|
## License
|
|
158
274
|
|
|
159
275
|
MIT
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
<div align="center">
|
|
280
|
+
|
|
281
|
+
Built for agent-first terminal workflows.
|
|
282
|
+
|
|
283
|
+
[Report Bug](https://github.com/bilalimamoglu/sift/issues) | [Request Feature](https://github.com/bilalimamoglu/sift/issues)
|
|
284
|
+
|
|
285
|
+
</div>
|
package/dist/cli.js
CHANGED
|
@@ -485,7 +485,14 @@ function writeExampleConfig(options = {}) {
|
|
|
485
485
|
}
|
|
486
486
|
const yaml = YAML2.stringify(defaultConfig);
|
|
487
487
|
fs2.mkdirSync(path3.dirname(resolved), { recursive: true });
|
|
488
|
-
fs2.writeFileSync(resolved, yaml,
|
|
488
|
+
fs2.writeFileSync(resolved, yaml, {
|
|
489
|
+
encoding: "utf8",
|
|
490
|
+
mode: 384
|
|
491
|
+
});
|
|
492
|
+
try {
|
|
493
|
+
fs2.chmodSync(resolved, 384);
|
|
494
|
+
} catch {
|
|
495
|
+
}
|
|
489
496
|
return resolved;
|
|
490
497
|
}
|
|
491
498
|
function writeConfigFile(options) {
|
|
@@ -1807,8 +1814,29 @@ function escapeRegExp(value) {
|
|
|
1807
1814
|
}
|
|
1808
1815
|
|
|
1809
1816
|
// src/commands/doctor.ts
|
|
1817
|
+
var PLACEHOLDER_API_KEYS = [
|
|
1818
|
+
"YOUR_API_KEY",
|
|
1819
|
+
"your_api_key",
|
|
1820
|
+
"your-api-key",
|
|
1821
|
+
"sk-xxx",
|
|
1822
|
+
"sk-placeholder",
|
|
1823
|
+
"CHANGE_ME",
|
|
1824
|
+
"change_me",
|
|
1825
|
+
"TODO",
|
|
1826
|
+
"todo",
|
|
1827
|
+
"xxx",
|
|
1828
|
+
"XXX"
|
|
1829
|
+
];
|
|
1830
|
+
function isPlaceholderApiKey(key) {
|
|
1831
|
+
if (!key) return false;
|
|
1832
|
+
return PLACEHOLDER_API_KEYS.includes(key.trim());
|
|
1833
|
+
}
|
|
1834
|
+
function isRealApiKey(key) {
|
|
1835
|
+
return Boolean(key) && !isPlaceholderApiKey(key);
|
|
1836
|
+
}
|
|
1810
1837
|
function runDoctor(config, configPath) {
|
|
1811
1838
|
const ui = createPresentation(Boolean(process.stdout.isTTY));
|
|
1839
|
+
const apiKeyStatus = isRealApiKey(config.provider.apiKey) ? "set" : isPlaceholderApiKey(config.provider.apiKey) ? "placeholder (not a real key)" : "not set";
|
|
1812
1840
|
const lines = [
|
|
1813
1841
|
"sift doctor",
|
|
1814
1842
|
"A quick check for your local setup.",
|
|
@@ -1817,7 +1845,7 @@ function runDoctor(config, configPath) {
|
|
|
1817
1845
|
ui.labelValue("provider", config.provider.provider),
|
|
1818
1846
|
ui.labelValue("model", config.provider.model),
|
|
1819
1847
|
ui.labelValue("baseUrl", config.provider.baseUrl),
|
|
1820
|
-
ui.labelValue("apiKey",
|
|
1848
|
+
ui.labelValue("apiKey", apiKeyStatus),
|
|
1821
1849
|
ui.labelValue("maxCaptureChars", String(config.input.maxCaptureChars)),
|
|
1822
1850
|
ui.labelValue("maxInputChars", String(config.input.maxInputChars)),
|
|
1823
1851
|
ui.labelValue("rawFallback", String(config.runtime.rawFallback))
|
|
@@ -1831,8 +1859,12 @@ function runDoctor(config, configPath) {
|
|
|
1831
1859
|
if (!config.provider.model) {
|
|
1832
1860
|
problems.push("Missing provider.model");
|
|
1833
1861
|
}
|
|
1834
|
-
if ((config.provider.provider === "openai" || config.provider.provider === "openai-compatible" || config.provider.provider === "openrouter") && !config.provider.apiKey) {
|
|
1835
|
-
|
|
1862
|
+
if ((config.provider.provider === "openai" || config.provider.provider === "openai-compatible" || config.provider.provider === "openrouter") && !isRealApiKey(config.provider.apiKey)) {
|
|
1863
|
+
if (isPlaceholderApiKey(config.provider.apiKey)) {
|
|
1864
|
+
problems.push(`provider.apiKey looks like a placeholder: "${config.provider.apiKey}"`);
|
|
1865
|
+
} else {
|
|
1866
|
+
problems.push("Missing provider.apiKey");
|
|
1867
|
+
}
|
|
1836
1868
|
problems.push(
|
|
1837
1869
|
`Set one of: ${getProviderApiKeyEnvNames(
|
|
1838
1870
|
config.provider.provider,
|