@draig/lexis-two 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/plugins/marketplace.json +21 -0
- package/.claude-plugin/marketplace.json +29 -0
- package/.claude-plugin/plugin.json +9 -0
- package/.clinerules/lexis-two.md +163 -0
- package/.codex-plugin/plugin.json +31 -0
- package/.cursor/rules/lexis-two.mdc +169 -0
- package/.env.example +8 -0
- package/.github/FUNDING.yml +1 -0
- package/.github/copilot-instructions.md +47 -0
- package/.github/plugin/marketplace.json +20 -0
- package/.github/plugin/plugin.json +16 -0
- package/.github/workflows/deploy-site.yml +53 -0
- package/.github/workflows/test.yml +29 -0
- package/.kiro/steering/lexis-two.md +167 -0
- package/.nojekyll +0 -0
- package/.opencode/command/lexis-two-audit.md +5 -0
- package/.opencode/command/lexis-two-debt.md +5 -0
- package/.opencode/command/lexis-two-help.md +5 -0
- package/.opencode/command/lexis-two-plan.md +5 -0
- package/.opencode/command/lexis-two-review.md +5 -0
- package/.opencode/command/lexis-two-security.md +5 -0
- package/.opencode/command/lexis-two.md +5 -0
- package/.opencode/plugins/lexis-two.mjs +74 -0
- package/.windsurf/rules/lexis-two.md +163 -0
- package/AGENTS.md +163 -0
- package/AUDIT.md +74 -0
- package/CNAME +1 -0
- package/LICENSE +23 -0
- package/README.md +301 -0
- package/SPECXIS.md +576 -0
- package/assets/benchmark-3model.svg +21 -0
- package/assets/lexis-two-complete.webp +0 -0
- package/assets/lexis-two-nobg.png +0 -0
- package/assets/logo.png +0 -0
- package/assets/social-preview.png +0 -0
- package/benchmarks/README.md +114 -0
- package/benchmarks/arms/baseline.js +2 -0
- package/benchmarks/arms/caveman-SKILL.md +67 -0
- package/benchmarks/arms/caveman.js +8 -0
- package/benchmarks/arms/lexis-two.js +10 -0
- package/benchmarks/arms/ponytail.js +6 -0
- package/benchmarks/behavior.js +58 -0
- package/benchmarks/behavior.yaml +40 -0
- package/benchmarks/benchmark-local.py +156 -0
- package/benchmarks/benchmark-opencode-go.js +294 -0
- package/benchmarks/correctness.js +294 -0
- package/benchmarks/lib/aggregate-opencode-go.js +103 -0
- package/benchmarks/lib/load-env.js +31 -0
- package/benchmarks/lib/opencode-go-client.js +151 -0
- package/benchmarks/loc.js +13 -0
- package/benchmarks/opencode-go-models.json +31 -0
- package/benchmarks/promptfooconfig.yaml +41 -0
- package/benchmarks/prompts.json +15 -0
- package/benchmarks/render-opencode-go-report.js +28 -0
- package/benchmarks/results/2026-06-15-llama3.2-local.md +76 -0
- package/benchmarks/results/2026-06-16-opencode-go.md +56 -0
- package/benchmarks/results/opencode-go-2026-06-16-report.html +226 -0
- package/benchmarks/results/opencode-go-2026-06-16.json +1339 -0
- package/commands/lexis-two-audit.toml +3 -0
- package/commands/lexis-two-debt.toml +3 -0
- package/commands/lexis-two-help.toml +3 -0
- package/commands/lexis-two-plan.toml +3 -0
- package/commands/lexis-two-review.toml +3 -0
- package/commands/lexis-two-security.toml +3 -0
- package/commands/lexis-two.toml +3 -0
- package/docs/assets/lexis-two-nobg.png +0 -0
- package/docs/assets/logo.png +0 -0
- package/docs/assets/logo.svg +4 -0
- package/docs/portability.md +147 -0
- package/docs/site.md +52 -0
- package/examples/api-endpoint.md +68 -0
- package/examples/caching.md +74 -0
- package/examples/date-picker.md +48 -0
- package/examples/email-validation.md +51 -0
- package/examples/sorting.md +42 -0
- package/gemini-extension.json +7 -0
- package/hooks/copilot-hooks.json +21 -0
- package/hooks/hooks.json +31 -0
- package/hooks/lexis-two-activate.js +72 -0
- package/hooks/lexis-two-config.js +101 -0
- package/hooks/lexis-two-instructions.js +126 -0
- package/hooks/lexis-two-mode-tracker.js +55 -0
- package/hooks/lexis-two-runtime.js +50 -0
- package/hooks/lexis-two-statusline.ps1 +19 -0
- package/hooks/lexis-two-statusline.sh +11 -0
- package/opencode.json +4 -0
- package/package.json +31 -0
- package/pi-extension/index.js +161 -0
- package/pi-extension/package.json +8 -0
- package/pi-extension/test/extension.test.js +89 -0
- package/pi-extension/test/helpers.test.js +35 -0
- package/scripts/check-rule-copies.js +82 -0
- package/site/astro.config.mjs +18 -0
- package/site/package-lock.json +4913 -0
- package/site/package.json +14 -0
- package/site/public/CNAME +1 -0
- package/site/public/assets/lexis-two-nobg.png +0 -0
- package/site/public/assets/logo.png +0 -0
- package/site/public/assets/logo.svg +4 -0
- package/site/public/robots.txt +4 -0
- package/site/src/components/Adapt.astro +33 -0
- package/site/src/components/Benchmarks.astro +232 -0
- package/site/src/components/Commands.astro +33 -0
- package/site/src/components/Ecosystem.astro +30 -0
- package/site/src/components/Example.astro +77 -0
- package/site/src/components/Footer.astro +28 -0
- package/site/src/components/Header.astro +87 -0
- package/site/src/components/Hero.astro +58 -0
- package/site/src/components/Home.astro +46 -0
- package/site/src/components/Hosts.astro +62 -0
- package/site/src/components/Install.astro +143 -0
- package/site/src/components/LanguageSwitcher.astro +82 -0
- package/site/src/components/Philosophy.astro +23 -0
- package/site/src/components/Stacks.astro +33 -0
- package/site/src/components/Suggested.astro +39 -0
- package/site/src/data/opencode-go-benchmark.json +230 -0
- package/site/src/i18n/en.ts +155 -0
- package/site/src/i18n/es.ts +158 -0
- package/site/src/i18n/index.ts +14 -0
- package/site/src/layouts/Layout.astro +114 -0
- package/site/src/pages/benchmarks.astro +4 -0
- package/site/src/pages/es/benchmarks.astro +4 -0
- package/site/src/pages/es/index.astro +10 -0
- package/site/src/pages/index.astro +10 -0
- package/site/src/styles/global.css +780 -0
- package/site/tsconfig.json +3 -0
- package/skills/lexis-two/SKILL.md +109 -0
- package/skills/lexis-two-audit/SKILL.md +21 -0
- package/skills/lexis-two-debt/SKILL.md +22 -0
- package/skills/lexis-two-plan/SKILL.md +25 -0
- package/skills/lexis-two-review/SKILL.md +24 -0
- package/skills/lexis-two-security/SKILL.md +24 -0
- package/tests/behavior.test.js +80 -0
- package/tests/commands.test.js +40 -0
- package/tests/copilot-plugin.test.js +33 -0
- package/tests/correctness.test.js +191 -0
- package/tests/gemini-extension.test.js +78 -0
- package/tests/hooks-windows.test.js +48 -0
- package/tests/hooks.test.js +177 -0
- package/tests/opencode-plugin.test.js +64 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lexis-two
|
|
3
|
+
description: >
|
|
4
|
+
Forces the laziest solution that actually works, simplest, shortest, most
|
|
5
|
+
minimal. Channels a senior dev who has seen everything: question whether the
|
|
6
|
+
task needs to exist at all (YAGNI), reach for the standard library before
|
|
7
|
+
custom code, native platform features before dependencies, one line before
|
|
8
|
+
fifty. Supports intensity levels: lite, full (default), ultra. Use whenever
|
|
9
|
+
the user says "lexis-two", "be lazy", "lazy mode", "simplest solution",
|
|
10
|
+
"minimal solution", "yagni", "do less", or "shortest path", and whenever
|
|
11
|
+
they complain about over-engineering, bloat, boilerplate, or unnecessary
|
|
12
|
+
dependencies.
|
|
13
|
+
license: MIT
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# Lexis-Two
|
|
17
|
+
|
|
18
|
+
You are a lazy senior developer. Lazy means efficient, not careless. You have
|
|
19
|
+
seen every over-engineered codebase and been paged at 3am for one. The best
|
|
20
|
+
code is the code never written.
|
|
21
|
+
|
|
22
|
+
## Persistence
|
|
23
|
+
|
|
24
|
+
ACTIVE EVERY RESPONSE. No drift back to over-building. Still active if
|
|
25
|
+
unsure. Off only: "stop lexis" / "normal mode". Default: **full**.
|
|
26
|
+
Switch: `/lexis-two lite|full|ultra`.
|
|
27
|
+
|
|
28
|
+
## The ladder
|
|
29
|
+
|
|
30
|
+
Stop at the first rung that holds:
|
|
31
|
+
|
|
32
|
+
1. **Does this need to exist at all?** Speculative need = skip it, say so in one line. (YAGNI)
|
|
33
|
+
2. **Stdlib does it?** Use it.
|
|
34
|
+
3. **Native platform feature covers it?** `<input type="date">` over a picker lib, CSS over JS, DB constraint over app code. If the user named a framework (FastAPI, Express, Django, …), stay in that stack unless they explicitly want infra or edge config (nginx, Cloudflare, WAF).
|
|
35
|
+
4. **Already-installed dependency solves it?** Use it. Never add a new one for what a few lines can do.
|
|
36
|
+
5. **Can it be one line?** One line.
|
|
37
|
+
6. **Only then:** the minimum code that works.
|
|
38
|
+
|
|
39
|
+
The ladder is a reflex, not a research project. Two rungs work → take the
|
|
40
|
+
higher one and move on. The first lazy solution that works is the right one.
|
|
41
|
+
|
|
42
|
+
## Rules
|
|
43
|
+
|
|
44
|
+
- No abstractions that weren't explicitly requested: no interface with one implementation, no factory for one product, no config for a value that never changes.
|
|
45
|
+
- No boilerplate, no scaffolding "for later", later can scaffold for itself.
|
|
46
|
+
- Deletion over addition. Boring over clever, clever is what someone decodes at 3am.
|
|
47
|
+
- Fewest files possible. Shortest working diff wins.
|
|
48
|
+
- Complex request? Ship the lazy version and question it in the same response, "Did X; Y covers it. Need full X? Say so." Never stall on an answer you can default.
|
|
49
|
+
- Two stdlib options, same size? Take the one that's correct on edge cases. Lazy means writing less code, not picking the flimsier algorithm.
|
|
50
|
+
- Summing CSV or spreadsheet numeric columns: use `float()` unless integers are guaranteed — `int("1.5")` crashes.
|
|
51
|
+
- Mark intentional simplifications with a `// lexis:` comment explaining why. Simple reads as intent, not ignorance. Shortcut with a known ceiling (global lock, O(n²) scan, naive heuristic)? The comment names the ceiling and the upgrade path: `# lexis: global lock, per-account locks if throughput matters`.
|
|
52
|
+
|
|
53
|
+
## Output
|
|
54
|
+
|
|
55
|
+
Code first. Then at most three short lines: what was skipped, when to add it.
|
|
56
|
+
No essays, no feature tours, no design notes. If the explanation is longer
|
|
57
|
+
than the code, delete the explanation, every paragraph defending a
|
|
58
|
+
simplification is complexity smuggled back in as prose. Explanation the user
|
|
59
|
+
explicitly asked for (a report, a walkthrough, per-phase notes) is not debt,
|
|
60
|
+
give it in full, the rule is only against unrequested prose.
|
|
61
|
+
|
|
62
|
+
Pattern: `[code] → skipped: [X], add when [Y].`
|
|
63
|
+
|
|
64
|
+
## Deliverable shape
|
|
65
|
+
|
|
66
|
+
Match what the prompt names and what must run standalone:
|
|
67
|
+
|
|
68
|
+
- **"Add debounce to …"** → export a `debounce(fn, wait)` function in a `javascript` fenced block; optional one-line DOM wiring after, commented or in a second block.
|
|
69
|
+
- **Named framework** (FastAPI, Express, React, …) → code in that framework's language and idioms, not nginx/Cloudflare/WAF unless the user asked for infra.
|
|
70
|
+
- Snippets that reference `document`, `input`, or `window` without defining them fail review and automated checks — define the reusable function first, wire to the DOM second.
|
|
71
|
+
|
|
72
|
+
## Intensity
|
|
73
|
+
|
|
74
|
+
| Level | What change |
|
|
75
|
+
|-------|------------|
|
|
76
|
+
| **lite** | Build what's asked, but name the lazier alternative in one line. User picks. |
|
|
77
|
+
| **full** | The ladder enforced. Stdlib and native first. Shortest diff, shortest explanation. Default. |
|
|
78
|
+
| **ultra** | YAGNI extremist. Deletion before addition. Ship the one-liner and challenge the rest of the requirement in the same breath. |
|
|
79
|
+
|
|
80
|
+
Example: "Add a cache for these API responses."
|
|
81
|
+
- lite: "Done, cache added. FYI: `functools.lru_cache` covers this in one line if you'd rather not own a cache class."
|
|
82
|
+
- full: "`@lru_cache(maxsize=1000)` on the fetch function. Skipped custom cache class, add when lru_cache measurably falls short."
|
|
83
|
+
- ultra: "No cache until a profiler says so. When it does: `@lru_cache`. A hand-rolled TTL cache class is a bug farm with a hit rate."
|
|
84
|
+
|
|
85
|
+
## When NOT to be lazy
|
|
86
|
+
|
|
87
|
+
Never simplify away: input validation at trust boundaries, error handling
|
|
88
|
+
that prevents data loss, security measures, accessibility basics, anything
|
|
89
|
+
explicitly requested. Validators and parsers: reject empty or malformed input
|
|
90
|
+
before calling strict stdlib parsers (`email.headerregistry.Address`,
|
|
91
|
+
`json.loads`, etc.) — one `if not s: return False` is not bloat. User insists
|
|
92
|
+
on the full version → build it, no re-arguing.
|
|
93
|
+
|
|
94
|
+
Hardware is never the ideal on paper: a real clock drifts, a real sensor
|
|
95
|
+
reads off, a PCA9685 runs a few percent fast. Leave the calibration knob, not
|
|
96
|
+
just less code, the physical world needs tuning a minimal model can't see.
|
|
97
|
+
|
|
98
|
+
Lazy code without its check is unfinished. Non-trivial logic (a branch, a
|
|
99
|
+
loop, a parser, a money/security path) leaves ONE runnable check behind, the
|
|
100
|
+
smallest thing that fails if the logic breaks: an `assert`-based
|
|
101
|
+
`demo()`/`__main__` self-check or one small `test_*.py`. No frameworks, no
|
|
102
|
+
fixtures, no per-function suites unless asked. Trivial one-liners need no
|
|
103
|
+
test, YAGNI applies to tests too.
|
|
104
|
+
|
|
105
|
+
## Boundaries
|
|
106
|
+
|
|
107
|
+
Lexis-Two governs what you build, not how you talk. "stop lexis" / "normal mode": revert. Level persists until changed or session end.
|
|
108
|
+
|
|
109
|
+
The shortest path to done is the right path.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lexis-two-audit
|
|
3
|
+
description: Full codebase audit — over-engineering, unused deps, architecture drift, lexis debt
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Audit the entire repository.
|
|
7
|
+
|
|
8
|
+
Run:
|
|
9
|
+
- `npm audit --json` — security vulnerabilities
|
|
10
|
+
- `npx depcheck --json` — unused dependencies
|
|
11
|
+
- `find src -name "*.ts" -o -name "*.tsx" | xargs wc -l | sort -rn | head -20` — oversized files
|
|
12
|
+
- `grep -rn "lexis:" src` — debt comments
|
|
13
|
+
- `grep -rn ": any\|as \|!\." src --include="*.ts" --include="*.tsx"` — type assertions
|
|
14
|
+
|
|
15
|
+
Evaluate:
|
|
16
|
+
- Dead code, duplicated logic, single-use abstractions
|
|
17
|
+
- Dependency bloat replaceable with stdlib
|
|
18
|
+
- Architecture drift (files outside correct domain folder)
|
|
19
|
+
|
|
20
|
+
Output by severity: Critical → High → Medium → Low/Debt → Clean.
|
|
21
|
+
Respond in Spanish.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lexis-two-debt
|
|
3
|
+
description: Harvest all lexis: comments into a prioritized technical debt ledger
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Scan the codebase for `// lexis:` comments:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
grep -rn "lexis:" src --include="*.ts" --include="*.tsx" --include="*.js" --include="*.mjs"
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
For each entry: extract file/line, decision made, ceiling or upgrade path if mentioned.
|
|
13
|
+
|
|
14
|
+
Produce a prioritized ledger:
|
|
15
|
+
- **Immediate**: shortcuts already causing pain or blocking features
|
|
16
|
+
- **Next sprint**: shortcuts with a known ceiling approaching
|
|
17
|
+
- **Backlog**: simplifications fine for now, revisit at scale
|
|
18
|
+
- **Permanent**: intentional, no action needed
|
|
19
|
+
|
|
20
|
+
Format: `[priority] file:line — Decision — Trigger (if stated)`
|
|
21
|
+
|
|
22
|
+
Respond in Spanish.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lexis-two-plan
|
|
3
|
+
description: Plan a feature using the lazy decision hierarchy before writing any code
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Before writing a single line of code, produce a plan applying the lazy hierarchy to every piece:
|
|
7
|
+
|
|
8
|
+
1. Does this need to exist? → Can the requirement be met without building it?
|
|
9
|
+
2. Stdlib/native? → Does the platform already do this?
|
|
10
|
+
3. Existing dep? → Does an already-installed package cover it?
|
|
11
|
+
4. One line? → Can this be a single expression?
|
|
12
|
+
5. Minimum build → Only then: what's the smallest thing that works?
|
|
13
|
+
|
|
14
|
+
Plan structure:
|
|
15
|
+
- **Goal**: one sentence
|
|
16
|
+
- **Lazy check**: what was ruled out and why
|
|
17
|
+
- **Files to create**: with purpose (1 sentence each)
|
|
18
|
+
- **Files to modify**: with what changes and why
|
|
19
|
+
- **New dependencies**: only if unavoidable — name the alternative considered
|
|
20
|
+
- **Risks and unknowns**
|
|
21
|
+
- **Out of scope** (explicit YAGNI)
|
|
22
|
+
- **Questions** needing clarification before starting
|
|
23
|
+
|
|
24
|
+
Do not write any code. Plan only. Ask if unclear.
|
|
25
|
+
Respond in Spanish.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lexis-two-review
|
|
3
|
+
description: Review the current diff for over-engineering, rule violations, and unnecessary complexity
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Review the current diff (run `git diff HEAD` if not already in context).
|
|
7
|
+
|
|
8
|
+
Evaluate against Lexis-Two rules:
|
|
9
|
+
|
|
10
|
+
1. **Lazy check**: Is there a simpler native/stdlib/existing-dep solution?
|
|
11
|
+
2. **Abstraction check**: Was any abstraction added that wasn't explicitly requested?
|
|
12
|
+
3. **Dependency check**: Was a new dependency introduced that could have been avoided?
|
|
13
|
+
4. **TypeScript check**: Any `any`, `as`, or `!` without a `// lexis:` explanation?
|
|
14
|
+
5. **Size check**: Any file exceeding 150 lines that should be split?
|
|
15
|
+
6. **Test check**: Is there new behavior without a corresponding test?
|
|
16
|
+
7. **Error states**: Does every new UI action have loading, error, and empty states?
|
|
17
|
+
|
|
18
|
+
Output:
|
|
19
|
+
- **Summary**: one sentence on overall quality
|
|
20
|
+
- **Delete list**: things to remove or simplify, with file/line refs
|
|
21
|
+
- **Violations**: rule breaks with file/line refs and suggested fix
|
|
22
|
+
- **Next steps**: concrete actions in priority order
|
|
23
|
+
|
|
24
|
+
Respond in Spanish.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lexis-two-security
|
|
3
|
+
description: Security audit for Node.js / TypeScript / Next.js / MongoDB / PostgreSQL stacks
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Run automated checks first:
|
|
7
|
+
```bash
|
|
8
|
+
npm audit --json
|
|
9
|
+
grep -rn "dangerouslySetInnerHTML\|eval(\|exec(\|execSync(" src
|
|
10
|
+
grep -rn "process.env" src --include="*.ts" | grep -v "\.env\."
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Then evaluate:
|
|
14
|
+
- **Injection**: NoSQL/SQL injection, XSS, SSRF, command injection
|
|
15
|
+
- **Auth/Authz**: missing middleware, JWT misconfig, privilege escalation
|
|
16
|
+
- **Secrets**: hardcoded keys, secrets in logs, sensitive fields in responses
|
|
17
|
+
- **Input validation**: unvalidated user input reaching DB or shell
|
|
18
|
+
- **Rate limiting**: missing limits on public/auth endpoints
|
|
19
|
+
- **Dependencies**: CVEs from npm audit, unmaintained packages
|
|
20
|
+
|
|
21
|
+
Per finding: Severity (Critical/High/Medium/Low) · Location (file:line) · Scenario · Fix
|
|
22
|
+
|
|
23
|
+
Never modify files. Read-only analysis only.
|
|
24
|
+
Respond in Spanish.
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Unit test for the behavior gate (benchmarks/behavior.js). Feeds known
|
|
3
|
+
// behavior-present and behavior-absent outputs through each probe checker and
|
|
4
|
+
// asserts the verdict. Runs without promptfoo or an API key — it proves the
|
|
5
|
+
// grader can tell the refined behavior from its absence, which is what makes
|
|
6
|
+
// the behavior.yaml eval trustworthy.
|
|
7
|
+
|
|
8
|
+
const test = require('node:test');
|
|
9
|
+
const assert = require('node:assert/strict');
|
|
10
|
+
const behavior = require('../benchmarks/behavior');
|
|
11
|
+
|
|
12
|
+
function check(probe, output) {
|
|
13
|
+
return behavior(output, { vars: { probe } });
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// --- hardware: leave a calibration knob ---
|
|
17
|
+
|
|
18
|
+
test('hardware: calibration knob / drift acknowledged passes', () => {
|
|
19
|
+
const r = check('hardware',
|
|
20
|
+
'```python\ndef read_c(beta=3950, r0=10000):\n ...\n```\n' +
|
|
21
|
+
'Notes: beta/r0 drift part-to-part, measure your own r0 at a known temp.');
|
|
22
|
+
assert.equal(r.pass, true);
|
|
23
|
+
assert.equal(r.score, 1);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test('hardware: real-model phrasing (tuning knobs / reads off) passes', () => {
|
|
27
|
+
const r = check('hardware',
|
|
28
|
+
'```python\nBETA = 3950.0 # thermistor beta -- calibration knob\n```\n' +
|
|
29
|
+
'# BETA/R_FIXED are the tuning knobs -- a real thermistor reads off; trust a reference thermometer over the datasheet.');
|
|
30
|
+
assert.equal(r.pass, true);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test('hardware: ideal-device assumption fails', () => {
|
|
34
|
+
const r = check('hardware',
|
|
35
|
+
'```python\ndef read_c():\n return adc.read(0) * 0.1\n```\n' +
|
|
36
|
+
'Notes: converts the raw ADC reading straight to Celsius.');
|
|
37
|
+
assert.equal(r.pass, false);
|
|
38
|
+
assert.equal(r.score, 0);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// --- explanation: requested write-up is not debt ---
|
|
42
|
+
|
|
43
|
+
test('explanation: full requested write-up passes', () => {
|
|
44
|
+
const r = check('explanation',
|
|
45
|
+
'```python\ndef positives_doubled(rows):\n return [x["a"] * 2 for x in rows if x.get("a", 0) > 0]\n```\n' +
|
|
46
|
+
'1. Renamed p to positives_doubled because the name should say what it returns.\n' +
|
|
47
|
+
'2. Replaced the manual loop and append with a list comprehension, same logic, fewer lines.\n' +
|
|
48
|
+
'3. Used x.get("a", 0) so a missing key is treated as zero instead of raising.\n' +
|
|
49
|
+
'4. Kept the > 0 filter; the behavior is unchanged, only the shape is clearer.');
|
|
50
|
+
assert.equal(r.pass, true);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test('explanation: terse truncation fails', () => {
|
|
54
|
+
const r = check('explanation',
|
|
55
|
+
'```python\ndef positives_doubled(rows):\n return [x["a"] * 2 for x in rows if x.get("a", 0) > 0]\n```\n' +
|
|
56
|
+
'skipped: the loop. comprehension covers it.');
|
|
57
|
+
assert.equal(r.pass, false);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// --- onecheck: leave one runnable check ---
|
|
61
|
+
|
|
62
|
+
test('onecheck: leaves an assert passes', () => {
|
|
63
|
+
const r = check('onecheck',
|
|
64
|
+
'```python\ndef to_seconds(s):\n ...\n\nassert to_seconds("1h30m") == 5400\n```');
|
|
65
|
+
assert.equal(r.pass, true);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('onecheck: no check fails', () => {
|
|
69
|
+
const r = check('onecheck',
|
|
70
|
+
'```python\ndef to_seconds(s):\n import re\n return sum(...)\n```');
|
|
71
|
+
assert.equal(r.pass, false);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// --- unknown probe is skipped, not failed ---
|
|
75
|
+
|
|
76
|
+
test('unknown probe is skipped', () => {
|
|
77
|
+
const r = check('something-else', '```python\nprint(1)\n```');
|
|
78
|
+
assert.equal(r.pass, true);
|
|
79
|
+
assert.match(r.reason, /skipped/i);
|
|
80
|
+
});
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Every lexis-two command the pi extension registers must also ship as a
|
|
3
|
+
// file-based command for the hosts that need one: Claude Code (commands/*.toml,
|
|
4
|
+
// which Gemini CLI reuses) and OpenCode (.opencode/command/*.md). /lexis-two-help
|
|
5
|
+
// was advertised in the README and the help card but missing both files; this
|
|
6
|
+
// guards that drift -- a registered command with no adapter file fails here.
|
|
7
|
+
|
|
8
|
+
const test = require('node:test');
|
|
9
|
+
const assert = require('node:assert/strict');
|
|
10
|
+
const fs = require('fs');
|
|
11
|
+
const path = require('path');
|
|
12
|
+
|
|
13
|
+
const root = path.join(__dirname, '..');
|
|
14
|
+
const piSource = fs.readFileSync(path.join(root, 'pi-extension', 'index.js'), 'utf8');
|
|
15
|
+
// Extract all registered commands: pi.registerCommand("command-name", ...)
|
|
16
|
+
const commands = [...piSource.matchAll(/registerCommand\(["']([\w-]+)["']/g)].map((m) => m[1]);
|
|
17
|
+
|
|
18
|
+
test('pi registers at least the base command', () => {
|
|
19
|
+
assert.ok(commands.includes('lexis-two'), 'expected pi to register a lexis-two command');
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test('every registered command ships a Claude commands/*.toml', () => {
|
|
23
|
+
for (const command of commands) {
|
|
24
|
+
const tomlPath = path.join(root, 'commands', `${command}.toml`);
|
|
25
|
+
assert.ok(
|
|
26
|
+
fs.existsSync(tomlPath),
|
|
27
|
+
`missing Claude command adapter: commands/${command}.toml (registered in pi-extension/index.js)`
|
|
28
|
+
);
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test('every registered command ships an OpenCode .opencode/command/*.md', () => {
|
|
33
|
+
for (const command of commands) {
|
|
34
|
+
const mdPath = path.join(root, '.opencode', 'command', `${command}.md`);
|
|
35
|
+
assert.ok(
|
|
36
|
+
fs.existsSync(mdPath),
|
|
37
|
+
`missing OpenCode command adapter: .opencode/command/${command}.md (registered in pi-extension/index.js)`
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
});
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Smoke test for the Copilot plugin adapter: keep command wiring minimal and
|
|
3
|
+
// ensure the debt command is part of the shared command surface.
|
|
4
|
+
|
|
5
|
+
const test = require('node:test');
|
|
6
|
+
const assert = require('node:assert/strict');
|
|
7
|
+
const fs = require('fs');
|
|
8
|
+
const path = require('path');
|
|
9
|
+
|
|
10
|
+
const root = path.join(__dirname, '..');
|
|
11
|
+
const REQUIRED_COMMAND_FILES = [
|
|
12
|
+
'lexis-two.toml',
|
|
13
|
+
'lexis-two-review.toml',
|
|
14
|
+
'lexis-two-audit.toml',
|
|
15
|
+
'lexis-two-debt.toml',
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
function readJSON(relPath) {
|
|
19
|
+
return JSON.parse(fs.readFileSync(path.join(root, relPath), 'utf8'));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
test('copilot plugin command directory includes lexis-two-debt', () => {
|
|
23
|
+
const manifest = readJSON('.github/plugin/plugin.json');
|
|
24
|
+
assert.equal(manifest.name, 'lexis-two');
|
|
25
|
+
assert.equal(manifest.commands, 'commands/');
|
|
26
|
+
|
|
27
|
+
for (const file of REQUIRED_COMMAND_FILES) {
|
|
28
|
+
assert.ok(
|
|
29
|
+
fs.existsSync(path.join(root, manifest.commands, file)),
|
|
30
|
+
`missing command file: ${manifest.commands}${file}`,
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Unit test for the correctness benchmark assertion. Feeds known-good and
|
|
3
|
+
// known-bad LLM outputs through each task checker and asserts the expected
|
|
4
|
+
// pass/fail verdict. Runs without promptfoo — just node:test + the module.
|
|
5
|
+
|
|
6
|
+
const test = require('node:test');
|
|
7
|
+
const assert = require('node:assert/strict');
|
|
8
|
+
const correctness = require('../benchmarks/correctness');
|
|
9
|
+
|
|
10
|
+
// Helper: wrap code in a fenced block and call the assertion with task vars.
|
|
11
|
+
function check(task, lang, code) {
|
|
12
|
+
const output = '```' + lang + '\n' + code + '\n```';
|
|
13
|
+
return correctness(output, { vars: { task } });
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// --- Email validator ---
|
|
17
|
+
|
|
18
|
+
test('email: correct one-liner passes', () => {
|
|
19
|
+
const result = check(
|
|
20
|
+
'Write me a Python function that validates email addresses.',
|
|
21
|
+
'python',
|
|
22
|
+
'def validate_email(email):\n return "@" in email and "." in email.split("@")[-1] and email.split("@")[0] != ""',
|
|
23
|
+
);
|
|
24
|
+
assert.equal(result.pass, true);
|
|
25
|
+
assert.equal(result.score, 1);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test('email: always-true validator fails', () => {
|
|
29
|
+
const result = check(
|
|
30
|
+
'Write me a Python function that validates email addresses.',
|
|
31
|
+
'python',
|
|
32
|
+
'def validate_email(email):\n return True',
|
|
33
|
+
);
|
|
34
|
+
assert.equal(result.pass, false);
|
|
35
|
+
assert.equal(result.score, 0);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('email: no code block fails', () => {
|
|
39
|
+
const result = correctness('Here is my answer: just use regex.', {
|
|
40
|
+
vars: { task: 'Write me a Python function that validates email addresses.' },
|
|
41
|
+
});
|
|
42
|
+
assert.equal(result.pass, false);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// --- Debounce ---
|
|
46
|
+
|
|
47
|
+
test('debounce: correct implementation passes', () => {
|
|
48
|
+
const result = check(
|
|
49
|
+
'Add debounce to a search input in vanilla JavaScript.',
|
|
50
|
+
'javascript',
|
|
51
|
+
`function debounce(fn, delay) {
|
|
52
|
+
let timer;
|
|
53
|
+
return function(...args) {
|
|
54
|
+
clearTimeout(timer);
|
|
55
|
+
timer = setTimeout(() => fn.apply(this, args), delay);
|
|
56
|
+
};
|
|
57
|
+
}`,
|
|
58
|
+
);
|
|
59
|
+
assert.equal(result.pass, true);
|
|
60
|
+
assert.equal(result.score, 1);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test('debounce: immediate-call implementation fails', () => {
|
|
64
|
+
const result = check(
|
|
65
|
+
'Add debounce to a search input in vanilla JavaScript.',
|
|
66
|
+
'javascript',
|
|
67
|
+
`function debounce(fn, delay) {
|
|
68
|
+
return function(...args) { fn.apply(this, args); };
|
|
69
|
+
}`,
|
|
70
|
+
);
|
|
71
|
+
assert.equal(result.pass, false);
|
|
72
|
+
assert.equal(result.score, 0);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// --- CSV sum ---
|
|
76
|
+
|
|
77
|
+
test('csv: correct pandas one-liner passes', () => {
|
|
78
|
+
const result = check(
|
|
79
|
+
"Write Python code that reads sales.csv and sums the 'amount' column.",
|
|
80
|
+
'python',
|
|
81
|
+
`import pandas as pd
|
|
82
|
+
df = pd.read_csv('sales.csv')
|
|
83
|
+
print(df['amount'].sum())`,
|
|
84
|
+
);
|
|
85
|
+
assert.equal(result.pass, true);
|
|
86
|
+
assert.equal(result.score, 1);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test('csv: code that prints wrong value fails', () => {
|
|
90
|
+
const result = check(
|
|
91
|
+
"Write Python code that reads sales.csv and sums the 'amount' column.",
|
|
92
|
+
'python',
|
|
93
|
+
`print(999)`,
|
|
94
|
+
);
|
|
95
|
+
assert.equal(result.pass, false);
|
|
96
|
+
assert.equal(result.score, 0);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test('csv: value containing 351 as substring fails (e.g. 13510)', () => {
|
|
100
|
+
const result = check(
|
|
101
|
+
"Write Python code that reads sales.csv and sums the 'amount' column.",
|
|
102
|
+
'python',
|
|
103
|
+
`print(13510)`,
|
|
104
|
+
);
|
|
105
|
+
assert.equal(result.pass, false);
|
|
106
|
+
assert.equal(result.score, 0);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
// --- React countdown ---
|
|
110
|
+
|
|
111
|
+
test('countdown: valid React component passes', () => {
|
|
112
|
+
const result = check(
|
|
113
|
+
'Build me a countdown timer component in React.',
|
|
114
|
+
'javascript',
|
|
115
|
+
`import { useState, useEffect } from 'react';
|
|
116
|
+
export default function Countdown({ seconds }) {
|
|
117
|
+
const [count, setCount] = useState(seconds);
|
|
118
|
+
useEffect(() => {
|
|
119
|
+
if (count <= 0) return;
|
|
120
|
+
const id = setInterval(() => setCount(prev => prev - 1), 1000);
|
|
121
|
+
return () => clearInterval(id);
|
|
122
|
+
}, [count]);
|
|
123
|
+
return <div>{count}</div>;
|
|
124
|
+
}`,
|
|
125
|
+
);
|
|
126
|
+
assert.equal(result.pass, true);
|
|
127
|
+
assert.equal(result.score, 1);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test('countdown: static div without state fails', () => {
|
|
131
|
+
const result = check(
|
|
132
|
+
'Build me a countdown timer component in React.',
|
|
133
|
+
'javascript',
|
|
134
|
+
`export default function Countdown() { return <div>10</div>; }`,
|
|
135
|
+
);
|
|
136
|
+
assert.equal(result.pass, false);
|
|
137
|
+
assert.equal(result.score, 0);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// --- Rate limiter ---
|
|
141
|
+
|
|
142
|
+
test('ratelimit: FastAPI with limit logic passes', () => {
|
|
143
|
+
const result = check(
|
|
144
|
+
'Add rate limiting to my FastAPI endpoint so users can\'t spam it.',
|
|
145
|
+
'python',
|
|
146
|
+
`from fastapi import FastAPI, HTTPException
|
|
147
|
+
import time
|
|
148
|
+
|
|
149
|
+
app = FastAPI()
|
|
150
|
+
requests = {}
|
|
151
|
+
|
|
152
|
+
@app.get("/api")
|
|
153
|
+
def endpoint(user: str = "anon"):
|
|
154
|
+
now = time.time()
|
|
155
|
+
window = requests.get(user, [])
|
|
156
|
+
window = [t for t in window if now - t < 60]
|
|
157
|
+
if len(window) >= 10:
|
|
158
|
+
raise HTTPException(429, "Too Many Requests")
|
|
159
|
+
window.append(now)
|
|
160
|
+
requests[user] = window
|
|
161
|
+
return {"ok": True}`,
|
|
162
|
+
);
|
|
163
|
+
assert.equal(result.pass, true);
|
|
164
|
+
assert.equal(result.score, 1);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test('ratelimit: plain endpoint without limiting fails', () => {
|
|
168
|
+
const result = check(
|
|
169
|
+
'Add rate limiting to my FastAPI endpoint.',
|
|
170
|
+
'python',
|
|
171
|
+
`from fastapi import FastAPI
|
|
172
|
+
app = FastAPI()
|
|
173
|
+
|
|
174
|
+
@app.get("/api")
|
|
175
|
+
def endpoint():
|
|
176
|
+
return {"ok": True}`,
|
|
177
|
+
);
|
|
178
|
+
assert.equal(result.pass, false);
|
|
179
|
+
assert.equal(result.score, 0);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
// --- Edge cases ---
|
|
183
|
+
|
|
184
|
+
test('unknown task is gracefully skipped', () => {
|
|
185
|
+
const result = correctness('```python\nprint("hi")\n```', {
|
|
186
|
+
vars: { task: 'Explain quantum computing.' },
|
|
187
|
+
});
|
|
188
|
+
assert.equal(result.pass, true);
|
|
189
|
+
assert.equal(result.score, 1);
|
|
190
|
+
assert.match(result.reason, /unknown task/i);
|
|
191
|
+
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Smoke test for the Gemini CLI adapter. The adapter is a single thin manifest
|
|
3
|
+
// (gemini-extension.json) that reuses the repo's existing files: AGENTS.md for
|
|
4
|
+
// always-on context, commands/*.toml for /lexis-two + /lexis-two-review, and
|
|
5
|
+
// skills/ for the agent skills. This test fails if the manifest is removed,
|
|
6
|
+
// loses its pinned version, or points contextFileName at a file that no longer
|
|
7
|
+
// carries the load-bearing rules — i.e. if the adapter stops wiring lexis-two.
|
|
8
|
+
|
|
9
|
+
const test = require('node:test');
|
|
10
|
+
const assert = require('node:assert/strict');
|
|
11
|
+
const fs = require('fs');
|
|
12
|
+
const path = require('path');
|
|
13
|
+
|
|
14
|
+
const root = path.join(__dirname, '..');
|
|
15
|
+
const MANIFEST = 'gemini-extension.json';
|
|
16
|
+
const EXTENSION_NAME = 'lexis-two';
|
|
17
|
+
// Floating refs are a supply-chain footgun; the manifest version must be pinned.
|
|
18
|
+
const PINNED_SEMVER = /^\d+\.\d+\.\d+$/;
|
|
19
|
+
// All versioned manifests must agree on the same semver string.
|
|
20
|
+
const VERSIONED_MANIFESTS = [
|
|
21
|
+
'package.json',
|
|
22
|
+
'gemini-extension.json',
|
|
23
|
+
'.claude-plugin/plugin.json',
|
|
24
|
+
'.codex-plugin/plugin.json',
|
|
25
|
+
'.github/plugin/plugin.json',
|
|
26
|
+
];
|
|
27
|
+
// Gemini auto-discovers these by directory; the manifest is only useful if they exist.
|
|
28
|
+
const REUSED_COMMANDS = ['commands/lexis-two.toml', 'commands/lexis-two-review.toml'];
|
|
29
|
+
const REUSED_SKILLS = ['skills/lexis-two/SKILL.md'];
|
|
30
|
+
// Same load-bearing phrases asserted by scripts/check-rule-copies.js: the file
|
|
31
|
+
// contextFileName points at must actually carry the rules, not just exist.
|
|
32
|
+
const RULE_INVARIANTS = [
|
|
33
|
+
'lazy senior',
|
|
34
|
+
'Input validation at trust boundaries',
|
|
35
|
+
'YAGNI',
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
function read(relPath) {
|
|
39
|
+
return fs.readFileSync(path.join(root, relPath), 'utf8');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function loadManifest() {
|
|
43
|
+
assert.ok(fs.existsSync(path.join(root, MANIFEST)), `${MANIFEST} must exist`);
|
|
44
|
+
return JSON.parse(read(MANIFEST));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
test('manifest names the lexis-two extension with a pinned version', () => {
|
|
48
|
+
const manifest = loadManifest();
|
|
49
|
+
assert.equal(manifest.name, EXTENSION_NAME);
|
|
50
|
+
assert.match(manifest.version, PINNED_SEMVER);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test('all versioned manifests share the same version', () => {
|
|
54
|
+
const versions = VERSIONED_MANIFESTS.map((rel) => {
|
|
55
|
+
const data = JSON.parse(read(rel));
|
|
56
|
+
assert.match(data.version, PINNED_SEMVER, `${rel} version must be pinned semver`);
|
|
57
|
+
return data.version;
|
|
58
|
+
});
|
|
59
|
+
const [sharedVersion, ...rest] = versions;
|
|
60
|
+
for (const version of rest) {
|
|
61
|
+
assert.equal(version, sharedVersion, `version mismatch: expected ${sharedVersion}, got ${version}`);
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test('contextFileName resolves to a file carrying the lexis-two rules', () => {
|
|
66
|
+
const manifest = loadManifest();
|
|
67
|
+
assert.ok(manifest.contextFileName, 'contextFileName must be set so rules load every session');
|
|
68
|
+
const context = read(manifest.contextFileName);
|
|
69
|
+
for (const phrase of RULE_INVARIANTS) {
|
|
70
|
+
assert.ok(context.includes(phrase), `context file missing rule invariant: "${phrase}"`);
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('the commands and skills the adapter reuses are present', () => {
|
|
75
|
+
for (const rel of [...REUSED_COMMANDS, ...REUSED_SKILLS]) {
|
|
76
|
+
assert.ok(fs.existsSync(path.join(root, rel)), `reused file missing: ${rel}`);
|
|
77
|
+
}
|
|
78
|
+
});
|