@draig/lexis-two 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/package.json +7 -1
  2. package/.agents/plugins/marketplace.json +0 -21
  3. package/.claude-plugin/marketplace.json +0 -29
  4. package/.claude-plugin/plugin.json +0 -9
  5. package/.clinerules/lexis-two.md +0 -163
  6. package/.codex-plugin/plugin.json +0 -31
  7. package/.cursor/rules/lexis-two.mdc +0 -169
  8. package/.env.example +0 -8
  9. package/.github/FUNDING.yml +0 -1
  10. package/.github/copilot-instructions.md +0 -47
  11. package/.github/plugin/marketplace.json +0 -20
  12. package/.github/plugin/plugin.json +0 -16
  13. package/.github/workflows/deploy-site.yml +0 -53
  14. package/.github/workflows/test.yml +0 -29
  15. package/.kiro/steering/lexis-two.md +0 -167
  16. package/.nojekyll +0 -0
  17. package/.windsurf/rules/lexis-two.md +0 -163
  18. package/AGENTS.md +0 -163
  19. package/AUDIT.md +0 -74
  20. package/CNAME +0 -1
  21. package/SPECXIS.md +0 -576
  22. package/assets/benchmark-3model.svg +0 -21
  23. package/assets/lexis-two-complete.webp +0 -0
  24. package/assets/lexis-two-nobg.png +0 -0
  25. package/assets/logo.png +0 -0
  26. package/assets/social-preview.png +0 -0
  27. package/benchmarks/README.md +0 -114
  28. package/benchmarks/arms/baseline.js +0 -2
  29. package/benchmarks/arms/caveman-SKILL.md +0 -67
  30. package/benchmarks/arms/caveman.js +0 -8
  31. package/benchmarks/arms/lexis-two.js +0 -10
  32. package/benchmarks/arms/ponytail.js +0 -6
  33. package/benchmarks/behavior.js +0 -58
  34. package/benchmarks/behavior.yaml +0 -40
  35. package/benchmarks/benchmark-local.py +0 -156
  36. package/benchmarks/benchmark-opencode-go.js +0 -294
  37. package/benchmarks/correctness.js +0 -294
  38. package/benchmarks/lib/aggregate-opencode-go.js +0 -103
  39. package/benchmarks/lib/load-env.js +0 -31
  40. package/benchmarks/lib/opencode-go-client.js +0 -151
  41. package/benchmarks/loc.js +0 -13
  42. package/benchmarks/opencode-go-models.json +0 -31
  43. package/benchmarks/promptfooconfig.yaml +0 -41
  44. package/benchmarks/prompts.json +0 -15
  45. package/benchmarks/render-opencode-go-report.js +0 -28
  46. package/benchmarks/results/2026-06-15-llama3.2-local.md +0 -76
  47. package/benchmarks/results/2026-06-16-opencode-go.md +0 -56
  48. package/benchmarks/results/opencode-go-2026-06-16-report.html +0 -226
  49. package/benchmarks/results/opencode-go-2026-06-16.json +0 -1339
  50. package/commands/lexis-two-audit.toml +0 -3
  51. package/commands/lexis-two-debt.toml +0 -3
  52. package/commands/lexis-two-help.toml +0 -3
  53. package/commands/lexis-two-plan.toml +0 -3
  54. package/commands/lexis-two-review.toml +0 -3
  55. package/commands/lexis-two-security.toml +0 -3
  56. package/commands/lexis-two.toml +0 -3
  57. package/docs/assets/lexis-two-nobg.png +0 -0
  58. package/docs/assets/logo.png +0 -0
  59. package/docs/assets/logo.svg +0 -4
  60. package/docs/portability.md +0 -147
  61. package/docs/site.md +0 -52
  62. package/examples/api-endpoint.md +0 -68
  63. package/examples/caching.md +0 -74
  64. package/examples/date-picker.md +0 -48
  65. package/examples/email-validation.md +0 -51
  66. package/examples/sorting.md +0 -42
  67. package/gemini-extension.json +0 -7
  68. package/opencode.json +0 -4
  69. package/pi-extension/index.js +0 -161
  70. package/pi-extension/package.json +0 -8
  71. package/pi-extension/test/extension.test.js +0 -89
  72. package/pi-extension/test/helpers.test.js +0 -35
  73. package/scripts/check-rule-copies.js +0 -82
  74. package/site/astro.config.mjs +0 -18
  75. package/site/package-lock.json +0 -4913
  76. package/site/package.json +0 -14
  77. package/site/public/CNAME +0 -1
  78. package/site/public/assets/lexis-two-nobg.png +0 -0
  79. package/site/public/assets/logo.png +0 -0
  80. package/site/public/assets/logo.svg +0 -4
  81. package/site/public/robots.txt +0 -4
  82. package/site/src/components/Adapt.astro +0 -33
  83. package/site/src/components/Benchmarks.astro +0 -232
  84. package/site/src/components/Commands.astro +0 -33
  85. package/site/src/components/Ecosystem.astro +0 -30
  86. package/site/src/components/Example.astro +0 -77
  87. package/site/src/components/Footer.astro +0 -28
  88. package/site/src/components/Header.astro +0 -87
  89. package/site/src/components/Hero.astro +0 -58
  90. package/site/src/components/Home.astro +0 -46
  91. package/site/src/components/Hosts.astro +0 -62
  92. package/site/src/components/Install.astro +0 -139
  93. package/site/src/components/LanguageSwitcher.astro +0 -82
  94. package/site/src/components/Philosophy.astro +0 -23
  95. package/site/src/components/Stacks.astro +0 -33
  96. package/site/src/components/Suggested.astro +0 -39
  97. package/site/src/data/opencode-go-benchmark.json +0 -230
  98. package/site/src/i18n/en.ts +0 -155
  99. package/site/src/i18n/es.ts +0 -158
  100. package/site/src/i18n/index.ts +0 -14
  101. package/site/src/layouts/Layout.astro +0 -114
  102. package/site/src/pages/benchmarks.astro +0 -4
  103. package/site/src/pages/es/benchmarks.astro +0 -4
  104. package/site/src/pages/es/index.astro +0 -10
  105. package/site/src/pages/index.astro +0 -10
  106. package/site/src/styles/global.css +0 -780
  107. package/site/tsconfig.json +0 -3
  108. package/tests/behavior.test.js +0 -80
  109. package/tests/commands.test.js +0 -40
  110. package/tests/copilot-plugin.test.js +0 -33
  111. package/tests/correctness.test.js +0 -191
  112. package/tests/gemini-extension.test.js +0 -78
  113. package/tests/hooks-windows.test.js +0 -48
  114. package/tests/hooks.test.js +0 -177
  115. package/tests/opencode-plugin.test.js +0 -64
@@ -1,31 +0,0 @@
1
- // Load KEY=VALUE lines from a .env file into process.env (no dotenv dependency).
2
- const fs = require('fs');
3
-
4
- function loadEnvFile(envPath) {
5
- if (!fs.existsSync(envPath)) return false;
6
-
7
- const raw = fs.readFileSync(envPath, 'utf8').replace(/^\uFEFF/, '');
8
- for (const line of raw.split(/\r?\n/)) {
9
- const trimmed = line.trim();
10
- if (!trimmed || trimmed.startsWith('#')) continue;
11
-
12
- const eq = trimmed.indexOf('=');
13
- if (eq <= 0) continue;
14
-
15
- const key = trimmed.slice(0, eq).trim();
16
- if (process.env[key] !== undefined) continue;
17
-
18
- let value = trimmed.slice(eq + 1).trim();
19
- if (
20
- (value.startsWith('"') && value.endsWith('"')) ||
21
- (value.startsWith("'") && value.endsWith("'"))
22
- ) {
23
- value = value.slice(1, -1);
24
- }
25
- process.env[key] = value;
26
- }
27
-
28
- return true;
29
- }
30
-
31
- module.exports = { loadEnvFile };
@@ -1,151 +0,0 @@
1
- // OpenCode Go API client — OpenAI chat/completions + Anthropic /messages transports.
2
- // Docs: https://opencode.ai/docs/go/#endpoints
3
-
4
- const fs = require('fs');
5
- const path = require('path');
6
-
7
- const DEFAULT_BASE = 'https://opencode.ai/zen/go/v1';
8
- const ENV_PATH = path.join(__dirname, '..', '..', '.env');
9
-
10
- function getApiKey() {
11
- const key = (process.env.OPENCODE_API_KEY || process.env.OPENCODE_GO_API_KEY || '').trim();
12
- if (!key) {
13
- const hint = fs.existsSync(ENV_PATH)
14
- ? `OPENCODE_API_KEY is empty in ${ENV_PATH}. Paste your OpenCode Go key after the = sign.`
15
- : `Create ${ENV_PATH} from .env.example and set OPENCODE_API_KEY=your-key`;
16
- throw new Error(`Missing OPENCODE_API_KEY. ${hint}`);
17
- }
18
- return key;
19
- }
20
-
21
- async function readJsonResponse(res) {
22
- const text = await res.text();
23
- let body;
24
- try {
25
- body = text ? JSON.parse(text) : {};
26
- } catch {
27
- body = { raw: text };
28
- }
29
- if (!res.ok) {
30
- const msg =
31
- body?.error?.message ||
32
- body?.message ||
33
- (typeof body?.error === 'string' ? body.error : null) ||
34
- text.slice(0, 500) ||
35
- res.statusText;
36
- throw new Error(`HTTP ${res.status}: ${msg}`);
37
- }
38
- return body;
39
- }
40
-
41
- function extractOpenAiText(body) {
42
- return body?.choices?.[0]?.message?.content ?? '';
43
- }
44
-
45
- function extractAnthropicText(body) {
46
- const blocks = body?.content;
47
- if (!Array.isArray(blocks)) return '';
48
- return blocks
49
- .filter((b) => b?.type === 'text' && typeof b.text === 'string')
50
- .map((b) => b.text)
51
- .join('');
52
- }
53
-
54
- function extractUsage(body, transport) {
55
- if (transport === 'openai-chat') {
56
- const u = body?.usage;
57
- if (!u) return null;
58
- return {
59
- inputTokens: u.prompt_tokens ?? 0,
60
- outputTokens: u.completion_tokens ?? 0,
61
- };
62
- }
63
- const u = body?.usage;
64
- if (!u) return null;
65
- return {
66
- inputTokens: u.input_tokens ?? 0,
67
- outputTokens: u.output_tokens ?? 0,
68
- };
69
- }
70
-
71
- async function chatOpenAi({ baseUrl, apiKey, model, system, user, maxTokens, temperature }) {
72
- const messages = [];
73
- if (system) messages.push({ role: 'system', content: system });
74
- messages.push({ role: 'user', content: user });
75
-
76
- const res = await fetch(`${baseUrl}/chat/completions`, {
77
- method: 'POST',
78
- headers: {
79
- Authorization: `Bearer ${apiKey}`,
80
- 'Content-Type': 'application/json',
81
- },
82
- body: JSON.stringify({
83
- model,
84
- messages,
85
- max_tokens: maxTokens,
86
- temperature,
87
- }),
88
- });
89
-
90
- const body = await readJsonResponse(res);
91
- return {
92
- text: extractOpenAiText(body),
93
- usage: extractUsage(body, 'openai-chat'),
94
- raw: body,
95
- };
96
- }
97
-
98
- async function chatAnthropic({ baseUrl, apiKey, model, system, user, maxTokens, temperature }) {
99
- const payload = {
100
- model,
101
- max_tokens: maxTokens,
102
- messages: [{ role: 'user', content: user }],
103
- };
104
- if (system) payload.system = system;
105
- if (typeof temperature === 'number') payload.temperature = temperature;
106
-
107
- // lexis: Go /messages rejects Bearer — x-api-key only (qwen3.7-max, minimax-m3)
108
- const res = await fetch(`${baseUrl}/messages`, {
109
- method: 'POST',
110
- headers: {
111
- 'x-api-key': apiKey,
112
- 'Content-Type': 'application/json',
113
- 'anthropic-version': '2023-06-01',
114
- },
115
- body: JSON.stringify(payload),
116
- });
117
-
118
- const body = await readJsonResponse(res);
119
- return {
120
- text: extractAnthropicText(body),
121
- usage: extractUsage(body, 'anthropic-messages'),
122
- raw: body,
123
- };
124
- }
125
-
126
- async function complete({
127
- modelId,
128
- modelConfig,
129
- system,
130
- user,
131
- baseUrl = DEFAULT_BASE,
132
- apiKey = getApiKey(),
133
- temperature = 1,
134
- }) {
135
- const maxTokens = modelConfig.maxTokens ?? 8192;
136
- const args = { baseUrl, apiKey, model: modelId, system, user, maxTokens, temperature };
137
-
138
- if (modelConfig.transport === 'anthropic-messages') {
139
- return chatAnthropic(args);
140
- }
141
- if (modelConfig.transport === 'openai-chat') {
142
- return chatOpenAi(args);
143
- }
144
- throw new Error(`Unknown transport for ${modelId}: ${modelConfig.transport}`);
145
- }
146
-
147
- module.exports = {
148
- DEFAULT_BASE,
149
- getApiKey,
150
- complete,
151
- };
package/benchmarks/loc.js DELETED
@@ -1,13 +0,0 @@
1
- // Deterministic code-size metric: non-blank, non-comment lines of code. Counts
2
- // fenced blocks, or the whole response when the model emitted bare code unfenced.
3
- // Recorded as the `code_loc` metric per arm (always passes; it is a measurement, not a gate).
4
- module.exports = (output) => {
5
- const text = String(output || '');
6
- const blocks = [...text.matchAll(/```[a-zA-Z0-9_+-]*\n([\s\S]*?)```/g)].map((m) => m[1]);
7
- const code = blocks.length ? blocks.join('\n') : text;
8
- const loc = code
9
- .split('\n')
10
- .map((l) => l.trim())
11
- .filter((l) => l && !l.startsWith('//') && !l.startsWith('#') && l !== '*/' && !l.startsWith('/*') && !l.startsWith('*')).length;
12
- return { pass: true, score: loc, reason: loc + ' code LOC' };
13
- };
@@ -1,31 +0,0 @@
1
- {
2
- "baseUrl": "https://opencode.ai/zen/go/v1",
3
- "defaultModels": [
4
- "kimi-k2.6",
5
- "deepseek-v4-pro",
6
- "qwen3.7-max",
7
- "minimax-m3"
8
- ],
9
- "models": {
10
- "kimi-k2.6": {
11
- "name": "Kimi K2.6",
12
- "transport": "openai-chat",
13
- "maxTokens": 8192
14
- },
15
- "deepseek-v4-pro": {
16
- "name": "DeepSeek V4 Pro",
17
- "transport": "openai-chat",
18
- "maxTokens": 8192
19
- },
20
- "qwen3.7-max": {
21
- "name": "Qwen3.7 Max",
22
- "transport": "anthropic-messages",
23
- "maxTokens": 8192
24
- },
25
- "minimax-m3": {
26
- "name": "MiniMax M3",
27
- "transport": "anthropic-messages",
28
- "maxTokens": 8192
29
- }
30
- }
31
- }
@@ -1,41 +0,0 @@
1
- # Ponytail benchmark: code size + cost across three arms, same model, same tasks.
2
- #
3
- # Run: npx promptfoo@latest eval -c benchmarks/promptfooconfig.yaml
4
- # View: npx promptfoo@latest view
5
- # Share: npx promptfoo@latest share (publishes a hosted report URL)
6
- #
7
- # Needs ANTHROPIC_API_KEY in the environment or a .env file (see benchmarks/README.md).
8
- # Caveman arm uses JuliusBrussee/caveman SKILL.md (MIT), vendored at arms/caveman-SKILL.md.
9
- description: "Ponytail vs caveman vs no-skill: same model, same tasks. Measures code LOC (deterministic) and tokens/cost (API telemetry)."
10
-
11
- providers:
12
- - id: anthropic:messages:claude-haiku-4-5-20251001
13
- config: { max_tokens: 8192, temperature: 1 }
14
- - id: anthropic:messages:claude-sonnet-4-6
15
- config: { max_tokens: 8192, temperature: 1 }
16
- - id: anthropic:messages:claude-opus-4-8
17
- config: { max_tokens: 8192, temperature: 1 }
18
-
19
- prompts:
20
- - id: file://arms/baseline.js
21
- label: baseline (no skill)
22
- - id: file://arms/caveman.js
23
- label: caveman
24
- - id: file://arms/ponytail.js
25
- label: ponytail
26
-
27
- defaultTest:
28
- assert:
29
- - type: javascript
30
- value: file://loc.js
31
- metric: code_loc
32
- - type: javascript
33
- value: file://correctness.js
34
- metric: correct
35
-
36
- tests:
37
- - vars: { task: "Write me a Python function that validates email addresses." }
38
- - vars: { task: "Add debounce to a search input in vanilla JavaScript. It currently fires an API call on every keystroke." }
39
- - vars: { task: "Write Python code that reads sales.csv and sums the 'amount' column." }
40
- - vars: { task: "Build me a countdown timer component in React that counts down from a given number of seconds." }
41
- - vars: { task: "Add rate limiting to my FastAPI endpoint so users can't spam it." }
@@ -1,15 +0,0 @@
1
- {
2
- "method": "One fresh Claude Code subagent per task x config, same model, no file outputs. Metrics from task telemetry: total tokens (includes thinking), duration. Code lines counted from fenced blocks in the deliverable.",
3
- "configs": [
4
- "baseline — no skill",
5
- "caveman — caveman SKILL.md (full) as operating instructions",
6
- "ponytail — ponytail SKILL.md (full) as operating instructions"
7
- ],
8
- "tasks": [
9
- { "id": "email", "prompt": "Write me a Python function that validates email addresses." },
10
- { "id": "debounce", "prompt": "Add debounce to a search input in vanilla JavaScript — it currently fires an API call on every keystroke." },
11
- { "id": "csv-sum", "prompt": "Write Python code that reads sales.csv and sums the 'amount' column." },
12
- { "id": "react-countdown", "prompt": "Build me a countdown timer component in React that counts down from a given number of seconds." },
13
- { "id": "rate-limit", "prompt": "Add rate limiting to my FastAPI endpoint so users can't spam it." }
14
- ]
15
- }
@@ -1,28 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Sync aggregated benchmark data for the Astro site.
4
- *
5
- * Usage:
6
- * node benchmarks/render-opencode-go-report.js
7
- * node benchmarks/render-opencode-go-report.js benchmarks/results/opencode-go-2026-06-16.json
8
- */
9
-
10
- const fs = require('fs');
11
- const path = require('path');
12
- const { aggregateOpencodeGo, findLatestJson } = require('./lib/aggregate-opencode-go.js');
13
-
14
- const SITE_DATA = path.join(__dirname, '..', 'site', 'src', 'data', 'opencode-go-benchmark.json');
15
-
16
- function main() {
17
- const input = process.argv[2] ? path.resolve(process.argv[2]) : findLatestJson();
18
- const data = JSON.parse(fs.readFileSync(input, 'utf8'));
19
- const chart = aggregateOpencodeGo(data);
20
-
21
- fs.mkdirSync(path.dirname(SITE_DATA), { recursive: true });
22
- fs.writeFileSync(SITE_DATA, `${JSON.stringify(chart, null, 2)}\n`, 'utf8');
23
-
24
- console.log(`Site data → ${SITE_DATA}`);
25
- console.log('Preview charts: npm run site:dev → http://localhost:4321/benchmarks/');
26
- }
27
-
28
- main();
@@ -1,76 +0,0 @@
1
- # Local model benchmark: llama3.2 via Ollama — 2026-06-15
2
-
3
- Same 5 tasks as the Claude benchmark, same three arms (baseline / caveman / ponytail),
4
- run against a local **llama3.2:latest** (3.2B, Q4_K_M) via Ollama on a Windows 11 machine.
5
- Tooling: `benchmarks/benchmark-local.py` (no promptfoo needed).
6
-
7
- > **Updated 2026-06-15:** the LOC counter now counts bare, unfenced code. It
8
- > previously counted only fenced code blocks and scored everything else as 0,
9
- > which silently deflated any arm whose output happened to skip the fences (small
10
- > models do this often). Numbers below use the corrected counter at n=5 median.
11
- > Absolute times reflect this machine (GPU-accelerated); compare arms within a
12
- > run, not against an earlier CPU-bound machine.
13
-
14
- ## Results (n=5, median)
15
-
16
- **Code LOC**
17
-
18
- | arm | email | debounce | csv-sum | countdown | rate-limit | **TOTAL** |
19
- |---|--:|--:|--:|--:|--:|--:|
20
- | baseline | 16 | 18 | 22 | 37 | 16 | **109** |
21
- | caveman | 16 | 21 | 18 | 46 | 32 | **133** |
22
- | ponytail | 17 | 22 | 18 | 52 | 28 | **137** |
23
-
24
- **Time (seconds)**
25
-
26
- | arm | email | debounce | csv-sum | countdown | rate-limit | **TOTAL** |
27
- |---|--:|--:|--:|--:|--:|--:|
28
- | baseline | 3.1 | 3.7 | 3.6 | 4.2 | 4.8 | **19.4** |
29
- | caveman | 4.1 | 4.2 | 3.6 | 4.4 | 4.8 | **21.1** |
30
- | ponytail | 4.1 | 4.2 | 3.8 | 4.8 | 4.9 | **21.8** |
31
-
32
- ## Key findings
33
-
34
- **On llama3.2 the LOC effect is inside the noise floor.** At temperature 0.7 the
35
- per-run totals swing hard: across the five runs, ponytail landed anywhere from
36
- 17% *below* baseline to 50% *above* it. The n=5 median came out +26%; a separate
37
- n=3 median came out −17%. The aggregate itself flips sign depending on the
38
- sample, and the countdown task alone ranged 19 to 74 LOC on baseline. There is no
39
- stable LOC reduction to report.
40
-
41
- **Ponytail does not transfer to llama3.2.** The 80-94% LOC reduction seen on
42
- Claude is simply absent: the signal is lost in run-to-run variance. The one
43
- consistent effect is on time, and it goes the wrong way: ponytail is ~10-15%
44
- *slower* than baseline (more system-prompt tokens to process), never the 3-6x
45
- speedup seen on Claude.
46
-
47
- **Why:** ponytail is a prompt-engineering skill calibrated on Claude models,
48
- which are trained to follow detailed system instructions. A 3.2B quantised model
49
- absorbs the rules only partially and adds prose justifying its choices, paying
50
- the instruction-following cost without reliably converting it into less code.
51
-
52
- ## Reproduce
53
-
54
- Install Ollama and pull a model, then run from the repo root:
55
-
56
- ```bash
57
- ollama pull llama3.2
58
- python benchmarks/benchmark-local.py --model llama3.2 --repeat 5
59
- ```
60
-
61
- At this model size the LOC signal is noisy; raise `--repeat` (or lower the
62
- sampling temperature in the script) before reading anything into the totals.
63
-
64
- Optional flags:
65
-
66
- ```
67
- --repeat N Runs per cell; median is reported (default: 1)
68
- --ollama-url URL Ollama base URL (default: http://localhost:11434)
69
- ```
70
-
71
- ## Takeaway
72
-
73
- The benchmark claims in the README are accurate for the models tested (Haiku,
74
- Sonnet, Opus). For local/small models, expect the gains to shrink into the noise
75
- until instruction-following reaches a threshold comparable to Claude Haiku or
76
- better.
@@ -1,56 +0,0 @@
1
- # Lexis-Two benchmark — OpenCode Go (2026-06-16)
2
-
3
- Provider: [OpenCode Go](https://opencode.ai/docs/go/).
4
- Repeat: 3 per cell. Temperature: 1.
5
-
6
- ## Kimi K2.6 (`kimi-k2.6`)
7
-
8
- Repeat: 3. Arms: baseline, lexis-two.
9
-
10
- **Code LOC (median)**
11
-
12
- | arm | email | debounce | csv-sum | countdown | rate-limit | TOTAL | correct |
13
- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
14
- | baseline | 46 | 63 | 18 | 413 | 62 | 602 | 12/15 |
15
- | lexis-two | 13 | 10 | 4 | 13 | 23 | 63 | 12/15 |
16
-
17
- **lexis-two vs baseline (median total LOC):** 90% less code.
18
-
19
- ## DeepSeek V4 Pro (`deepseek-v4-pro`)
20
-
21
- Repeat: 3. Arms: baseline, lexis-two.
22
-
23
- **Code LOC (median)**
24
-
25
- | arm | email | debounce | csv-sum | countdown | rate-limit | TOTAL | correct |
26
- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
27
- | baseline | 36 | 61 | 25 | 113 | 53 | 288 | 14/15 |
28
- | lexis-two | 9 | 12 | 4 | 12 | 20 | 57 | 13/15 |
29
-
30
- **lexis-two vs baseline (median total LOC):** 80% less code.
31
-
32
- ## Qwen3.7 Max (`qwen3.7-max`)
33
-
34
- Repeat: 3. Arms: baseline, lexis-two.
35
-
36
- **Code LOC (median)**
37
-
38
- | arm | email | debounce | csv-sum | countdown | rate-limit | TOTAL | correct |
39
- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
40
- | baseline | 39 | 48 | 19 | 124 | 40 | 270 | 12/15 |
41
- | lexis-two | 14 | 9 | 4 | 10 | 17 | 54 | 13/15 |
42
-
43
- **lexis-two vs baseline (median total LOC):** 80% less code.
44
-
45
- ## MiniMax M3 (`minimax-m3`)
46
-
47
- Repeat: 3. Arms: baseline, lexis-two.
48
-
49
- **Code LOC (median)**
50
-
51
- | arm | email | debounce | csv-sum | countdown | rate-limit | TOTAL | correct |
52
- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
53
- | baseline | 55 | 66 | 33 | 112 | 59 | 325 | 11/15 |
54
- | lexis-two | 9 | 10 | 4 | 18 | 15 | 56 | 15/15 |
55
-
56
- **lexis-two vs baseline (median total LOC):** 83% less code.
@@ -1,226 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1" />
6
- <title>Lexis-Two benchmark — OpenCode Go (2026-06-16)</title>
7
- <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
8
- <style>
9
- :root {
10
- --bg: #0b0d0c;
11
- --panel: #121614;
12
- --border: #1e2420;
13
- --text: #e8ece9;
14
- --muted: #8a948d;
15
- --accent: #7cba8a;
16
- --baseline: #c97a7a;
17
- }
18
- * { box-sizing: border-box; }
19
- body {
20
- margin: 0;
21
- font-family: system-ui, sans-serif;
22
- background: var(--bg);
23
- color: var(--text);
24
- line-height: 1.5;
25
- }
26
- .wrap { max-width: 1100px; margin: 0 auto; padding: 2rem 1rem 3rem; }
27
- h1 { font-size: 1.75rem; margin: 0 0 0.25rem; }
28
- .sub { color: var(--muted); margin-bottom: 2rem; font-size: 0.95rem; }
29
- .grid {
30
- display: grid;
31
- gap: 1.25rem;
32
- grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
33
- }
34
- .card {
35
- background: var(--panel);
36
- border: 1px solid var(--border);
37
- border-radius: 0.5rem;
38
- padding: 1rem 1rem 0.5rem;
39
- }
40
- .card h2 {
41
- font-size: 0.85rem;
42
- text-transform: uppercase;
43
- letter-spacing: 0.06em;
44
- color: var(--muted);
45
- margin: 0 0 0.75rem;
46
- }
47
- .card canvas { max-height: 280px; }
48
- .wide { grid-column: 1 / -1; }
49
- table {
50
- width: 100%;
51
- border-collapse: collapse;
52
- font-size: 0.875rem;
53
- }
54
- th, td {
55
- padding: 0.5rem 0.75rem;
56
- border-bottom: 1px solid var(--border);
57
- text-align: right;
58
- }
59
- th:first-child, td:first-child { text-align: left; }
60
- th { color: var(--muted); font-weight: 600; }
61
- .good { color: var(--accent); }
62
- footer { margin-top: 2rem; color: var(--muted); font-size: 0.8rem; }
63
- a { color: var(--accent); }
64
- </style>
65
- </head>
66
- <body>
67
- <div class="wrap">
68
- <h1>Lexis-Two × OpenCode Go</h1>
69
- <p class="sub">
70
- Source: <code>opencode-go-2026-06-16.json</code> · 3 runs/cell · median LOC ·
71
- arms: baseline, lexis-two
72
- </p>
73
-
74
- <div class="grid">
75
- <div class="card wide">
76
- <h2>Total code LOC (median, 5 tasks)</h2>
77
- <canvas id="chart-total-loc"></canvas>
78
- </div>
79
- <div class="card">
80
- <h2>LOC reduction vs baseline</h2>
81
- <canvas id="chart-reduction"></canvas>
82
- </div>
83
- <div class="card">
84
- <h2>Wall time (median total seconds)</h2>
85
- <canvas id="chart-time"></canvas>
86
- </div>
87
- <div class="card wide">
88
- <h2>LOC by task — lexis-two arm</h2>
89
- <canvas id="chart-by-task"></canvas>
90
- </div>
91
- <div class="card wide">
92
- <h2>Summary table</h2>
93
- <table id="summary-table">
94
- <thead>
95
- <tr>
96
- <th>Model</th>
97
- <th>Baseline LOC</th>
98
- <th>Lexis-Two LOC</th>
99
- <th>Reduction</th>
100
- <th>Correct (lexis)</th>
101
- </tr>
102
- </thead>
103
- <tbody></tbody>
104
- </table>
105
- </div>
106
- </div>
107
-
108
- <footer>
109
- Regenerate: <code>node benchmarks/render-opencode-go-report.js</code>
110
- </footer>
111
- </div>
112
-
113
- <script>
114
- const DATA = {"date":"2026-06-16","repeat":3,"models":[{"id":"kimi-k2.6","locByArmTask":{"baseline":{"email":56,"debounce":71,"csv-sum":19,"countdown":366,"rate-limit":49},"lexis-two":{"email":10,"debounce":5,"csv-sum":4,"countdown":16,"rate-limit":14}},"timeByArmTask":{"baseline":{"email":12.973,"debounce":14.098,"csv-sum":5.08,"countdown":21.987,"rate-limit":16.836},"lexis-two":{"email":16.23,"debounce":8.363,"csv-sum":9.846,"countdown":18.389,"rate-limit":16.189}},"correctByArm":{"baseline":{"pass":11,"total":15},"lexis-two":{"pass":10,"total":15}},"totals":{"baselineLoc":561,"lexisLoc":49,"reductionPct":91,"baselineTimeSec":71,"lexisTimeSec":69}},{"id":"deepseek-v4-pro","locByArmTask":{"baseline":{"email":60,"debounce":39,"csv-sum":26,"countdown":90,"rate-limit":66},"lexis-two":{"email":7,"debounce":7,"csv-sum":4,"countdown":10,"rate-limit":18}},"timeByArmTask":{"baseline":{"email":35.051,"debounce":14.509,"csv-sum":13.422,"countdown":46.762,"rate-limit":32.401},"lexis-two":{"email":27.456,"debounce":16.982,"csv-sum":24.931,"countdown":36.191,"rate-limit":39.952}},"correctByArm":{"baseline":{"pass":13,"total":15},"lexis-two":{"pass":12,"total":15}},"totals":{"baselineLoc":281,"lexisLoc":46,"reductionPct":84,"baselineTimeSec":142.1,"lexisTimeSec":145.5}},{"id":"qwen3.7-max","locByArmTask":{"baseline":{"email":34,"debounce":44,"csv-sum":17,"countdown":128,"rate-limit":47},"lexis-two":{"email":7,"debounce":5,"csv-sum":3,"countdown":10,"rate-limit":13}},"timeByArmTask":{"baseline":{"email":43.542,"debounce":17.58,"csv-sum":25.876,"countdown":32.586,"rate-limit":41.602},"lexis-two":{"email":39.323,"debounce":28.275,"csv-sum":23.769,"countdown":28.775,"rate-limit":40.786}},"correctByArm":{"baseline":{"pass":14,"total":15},"lexis-two":{"pass":11,"total":15}},"totals":{"baselineLoc":270,"lexisLoc":38,"reductionPct":86,"baselineTimeSec":161.2,"lexisTimeSec":160.9}},{"id":"minimax-m3","locByArmTask":{"baseline":{"email":62,"debounce":58,"csv-sum":43,"countdown":117,"rate-limit":76},"lexis-two":{"email":12,"debounce":6,"csv-sum":2,"countdown":13,"rate-limit":14}},"timeByArmTask":{"baseline":{"email":18.398,"debounce":14.916,"csv-sum":10.237,"countdown":19.516,"rate-limit":22.131},"lexis-two":{"email":8.002,"debounce":2.393,"csv-sum":2.423,"countdown":2.753,"rate-limit":4.209}},"correctByArm":{"baseline":{"pass":12,"total":15},"lexis-two":{"pass":9,"total":15}},"totals":{"baselineLoc":356,"lexisLoc":47,"reductionPct":87,"baselineTimeSec":85.2,"lexisTimeSec":19.8}}],"tasks":["email","debounce","csv-sum","countdown","rate-limit"],"arms":["baseline","lexis-two"]};
115
- const labels = DATA.models.map((m) => m.id.replace('kimi-k2.6','Kimi K2.6').replace('deepseek-v4-pro','DeepSeek V4').replace('qwen3.7-max','Qwen3.7 Max').replace('minimax-m3','MiniMax M3'));
116
-
117
- Chart.defaults.color = '#8a948d';
118
- Chart.defaults.borderColor = '#1e2420';
119
- Chart.defaults.font.family = 'system-ui, sans-serif';
120
-
121
- new Chart(document.getElementById('chart-total-loc'), {
122
- type: 'bar',
123
- data: {
124
- labels,
125
- datasets: [
126
- {
127
- label: 'baseline',
128
- data: DATA.models.map((m) => m.totals.baselineLoc),
129
- backgroundColor: '#c97a7a',
130
- },
131
- {
132
- label: 'lexis-two',
133
- data: DATA.models.map((m) => m.totals.lexisLoc),
134
- backgroundColor: '#7cba8a',
135
- },
136
- ],
137
- },
138
- options: {
139
- responsive: true,
140
- plugins: { legend: { position: 'bottom' } },
141
- scales: { y: { beginAtZero: true, title: { display: true, text: 'lines of code' } } },
142
- },
143
- });
144
-
145
- new Chart(document.getElementById('chart-reduction'), {
146
- type: 'bar',
147
- data: {
148
- labels,
149
- datasets: [{
150
- label: '% less code',
151
- data: DATA.models.map((m) => m.totals.reductionPct),
152
- backgroundColor: '#7cba8a',
153
- }],
154
- },
155
- options: {
156
- indexAxis: 'y',
157
- responsive: true,
158
- plugins: { legend: { display: false } },
159
- scales: {
160
- x: { beginAtZero: true, max: 100, ticks: { callback: (v) => v + '%' } },
161
- },
162
- },
163
- });
164
-
165
- new Chart(document.getElementById('chart-time'), {
166
- type: 'bar',
167
- data: {
168
- labels,
169
- datasets: [
170
- {
171
- label: 'baseline',
172
- data: DATA.models.map((m) => m.totals.baselineTimeSec),
173
- backgroundColor: '#c97a7a',
174
- },
175
- {
176
- label: 'lexis-two',
177
- data: DATA.models.map((m) => m.totals.lexisTimeSec),
178
- backgroundColor: '#7cba8a',
179
- },
180
- ],
181
- },
182
- options: {
183
- responsive: true,
184
- plugins: { legend: { position: 'bottom' } },
185
- scales: { y: { beginAtZero: true, title: { display: true, text: 'seconds' } } },
186
- },
187
- });
188
-
189
- const taskLabels = DATA.tasks;
190
- const taskColors = ['#7cba8a', '#5a9a6a', '#9fd4a8', '#4a7356', '#3d5f48'];
191
- new Chart(document.getElementById('chart-by-task'), {
192
- type: 'bar',
193
- data: {
194
- labels,
195
- datasets: taskLabels.map((taskId, i) => ({
196
- label: taskId,
197
- data: DATA.models.map((m) => m.locByArmTask['lexis-two'][taskId]),
198
- backgroundColor: taskColors[i % taskColors.length],
199
- })),
200
- },
201
- options: {
202
- responsive: true,
203
- plugins: { legend: { position: 'bottom' } },
204
- scales: {
205
- x: { stacked: false },
206
- y: { beginAtZero: true, title: { display: true, text: 'LOC (lexis-two)' } },
207
- },
208
- },
209
- });
210
-
211
- const tbody = document.querySelector('#summary-table tbody');
212
- for (const m of DATA.models) {
213
- const c = m.correctByArm['lexis-two'];
214
- const tr = document.createElement('tr');
215
- tr.innerHTML = `
216
- <td>${m.id}</td>
217
- <td>${m.totals.baselineLoc}</td>
218
- <td class="good">${m.totals.lexisLoc}</td>
219
- <td class="good">${m.totals.reductionPct}%</td>
220
- <td>${c.pass}/${c.total}</td>
221
- `;
222
- tbody.appendChild(tr);
223
- }
224
- </script>
225
- </body>
226
- </html>