@draig/lexis-two 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -1
- package/.agents/plugins/marketplace.json +0 -21
- package/.claude-plugin/marketplace.json +0 -29
- package/.claude-plugin/plugin.json +0 -9
- package/.clinerules/lexis-two.md +0 -163
- package/.codex-plugin/plugin.json +0 -31
- package/.cursor/rules/lexis-two.mdc +0 -169
- package/.env.example +0 -8
- package/.github/FUNDING.yml +0 -1
- package/.github/copilot-instructions.md +0 -47
- package/.github/plugin/marketplace.json +0 -20
- package/.github/plugin/plugin.json +0 -16
- package/.github/workflows/deploy-site.yml +0 -53
- package/.github/workflows/test.yml +0 -29
- package/.kiro/steering/lexis-two.md +0 -167
- package/.nojekyll +0 -0
- package/.windsurf/rules/lexis-two.md +0 -163
- package/AGENTS.md +0 -163
- package/AUDIT.md +0 -74
- package/CNAME +0 -1
- package/SPECXIS.md +0 -576
- package/assets/benchmark-3model.svg +0 -21
- package/assets/lexis-two-complete.webp +0 -0
- package/assets/lexis-two-nobg.png +0 -0
- package/assets/logo.png +0 -0
- package/assets/social-preview.png +0 -0
- package/benchmarks/README.md +0 -114
- package/benchmarks/arms/baseline.js +0 -2
- package/benchmarks/arms/caveman-SKILL.md +0 -67
- package/benchmarks/arms/caveman.js +0 -8
- package/benchmarks/arms/lexis-two.js +0 -10
- package/benchmarks/arms/ponytail.js +0 -6
- package/benchmarks/behavior.js +0 -58
- package/benchmarks/behavior.yaml +0 -40
- package/benchmarks/benchmark-local.py +0 -156
- package/benchmarks/benchmark-opencode-go.js +0 -294
- package/benchmarks/correctness.js +0 -294
- package/benchmarks/lib/aggregate-opencode-go.js +0 -103
- package/benchmarks/lib/load-env.js +0 -31
- package/benchmarks/lib/opencode-go-client.js +0 -151
- package/benchmarks/loc.js +0 -13
- package/benchmarks/opencode-go-models.json +0 -31
- package/benchmarks/promptfooconfig.yaml +0 -41
- package/benchmarks/prompts.json +0 -15
- package/benchmarks/render-opencode-go-report.js +0 -28
- package/benchmarks/results/2026-06-15-llama3.2-local.md +0 -76
- package/benchmarks/results/2026-06-16-opencode-go.md +0 -56
- package/benchmarks/results/opencode-go-2026-06-16-report.html +0 -226
- package/benchmarks/results/opencode-go-2026-06-16.json +0 -1339
- package/commands/lexis-two-audit.toml +0 -3
- package/commands/lexis-two-debt.toml +0 -3
- package/commands/lexis-two-help.toml +0 -3
- package/commands/lexis-two-plan.toml +0 -3
- package/commands/lexis-two-review.toml +0 -3
- package/commands/lexis-two-security.toml +0 -3
- package/commands/lexis-two.toml +0 -3
- package/docs/assets/lexis-two-nobg.png +0 -0
- package/docs/assets/logo.png +0 -0
- package/docs/assets/logo.svg +0 -4
- package/docs/portability.md +0 -147
- package/docs/site.md +0 -52
- package/examples/api-endpoint.md +0 -68
- package/examples/caching.md +0 -74
- package/examples/date-picker.md +0 -48
- package/examples/email-validation.md +0 -51
- package/examples/sorting.md +0 -42
- package/gemini-extension.json +0 -7
- package/opencode.json +0 -4
- package/pi-extension/index.js +0 -161
- package/pi-extension/package.json +0 -8
- package/pi-extension/test/extension.test.js +0 -89
- package/pi-extension/test/helpers.test.js +0 -35
- package/scripts/check-rule-copies.js +0 -82
- package/site/astro.config.mjs +0 -18
- package/site/package-lock.json +0 -4913
- package/site/package.json +0 -14
- package/site/public/CNAME +0 -1
- package/site/public/assets/lexis-two-nobg.png +0 -0
- package/site/public/assets/logo.png +0 -0
- package/site/public/assets/logo.svg +0 -4
- package/site/public/robots.txt +0 -4
- package/site/src/components/Adapt.astro +0 -33
- package/site/src/components/Benchmarks.astro +0 -232
- package/site/src/components/Commands.astro +0 -33
- package/site/src/components/Ecosystem.astro +0 -30
- package/site/src/components/Example.astro +0 -77
- package/site/src/components/Footer.astro +0 -28
- package/site/src/components/Header.astro +0 -87
- package/site/src/components/Hero.astro +0 -58
- package/site/src/components/Home.astro +0 -46
- package/site/src/components/Hosts.astro +0 -62
- package/site/src/components/Install.astro +0 -139
- package/site/src/components/LanguageSwitcher.astro +0 -82
- package/site/src/components/Philosophy.astro +0 -23
- package/site/src/components/Stacks.astro +0 -33
- package/site/src/components/Suggested.astro +0 -39
- package/site/src/data/opencode-go-benchmark.json +0 -230
- package/site/src/i18n/en.ts +0 -155
- package/site/src/i18n/es.ts +0 -158
- package/site/src/i18n/index.ts +0 -14
- package/site/src/layouts/Layout.astro +0 -114
- package/site/src/pages/benchmarks.astro +0 -4
- package/site/src/pages/es/benchmarks.astro +0 -4
- package/site/src/pages/es/index.astro +0 -10
- package/site/src/pages/index.astro +0 -10
- package/site/src/styles/global.css +0 -780
- package/site/tsconfig.json +0 -3
- package/tests/behavior.test.js +0 -80
- package/tests/commands.test.js +0 -40
- package/tests/copilot-plugin.test.js +0 -33
- package/tests/correctness.test.js +0 -191
- package/tests/gemini-extension.test.js +0 -78
- package/tests/hooks-windows.test.js +0 -48
- package/tests/hooks.test.js +0 -177
- package/tests/opencode-plugin.test.js +0 -64
package/site/tsconfig.json
DELETED
package/tests/behavior.test.js
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Unit test for the behavior gate (benchmarks/behavior.js). Feeds known
|
|
3
|
-
// behavior-present and behavior-absent outputs through each probe checker and
|
|
4
|
-
// asserts the verdict. Runs without promptfoo or an API key — it proves the
|
|
5
|
-
// grader can tell the refined behavior from its absence, which is what makes
|
|
6
|
-
// the behavior.yaml eval trustworthy.
|
|
7
|
-
|
|
8
|
-
const test = require('node:test');
|
|
9
|
-
const assert = require('node:assert/strict');
|
|
10
|
-
const behavior = require('../benchmarks/behavior');
|
|
11
|
-
|
|
12
|
-
function check(probe, output) {
|
|
13
|
-
return behavior(output, { vars: { probe } });
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
// --- hardware: leave a calibration knob ---
|
|
17
|
-
|
|
18
|
-
test('hardware: calibration knob / drift acknowledged passes', () => {
|
|
19
|
-
const r = check('hardware',
|
|
20
|
-
'```python\ndef read_c(beta=3950, r0=10000):\n ...\n```\n' +
|
|
21
|
-
'Notes: beta/r0 drift part-to-part, measure your own r0 at a known temp.');
|
|
22
|
-
assert.equal(r.pass, true);
|
|
23
|
-
assert.equal(r.score, 1);
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
test('hardware: real-model phrasing (tuning knobs / reads off) passes', () => {
|
|
27
|
-
const r = check('hardware',
|
|
28
|
-
'```python\nBETA = 3950.0 # thermistor beta -- calibration knob\n```\n' +
|
|
29
|
-
'# BETA/R_FIXED are the tuning knobs -- a real thermistor reads off; trust a reference thermometer over the datasheet.');
|
|
30
|
-
assert.equal(r.pass, true);
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
test('hardware: ideal-device assumption fails', () => {
|
|
34
|
-
const r = check('hardware',
|
|
35
|
-
'```python\ndef read_c():\n return adc.read(0) * 0.1\n```\n' +
|
|
36
|
-
'Notes: converts the raw ADC reading straight to Celsius.');
|
|
37
|
-
assert.equal(r.pass, false);
|
|
38
|
-
assert.equal(r.score, 0);
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
// --- explanation: requested write-up is not debt ---
|
|
42
|
-
|
|
43
|
-
test('explanation: full requested write-up passes', () => {
|
|
44
|
-
const r = check('explanation',
|
|
45
|
-
'```python\ndef positives_doubled(rows):\n return [x["a"] * 2 for x in rows if x.get("a", 0) > 0]\n```\n' +
|
|
46
|
-
'1. Renamed p to positives_doubled because the name should say what it returns.\n' +
|
|
47
|
-
'2. Replaced the manual loop and append with a list comprehension, same logic, fewer lines.\n' +
|
|
48
|
-
'3. Used x.get("a", 0) so a missing key is treated as zero instead of raising.\n' +
|
|
49
|
-
'4. Kept the > 0 filter; the behavior is unchanged, only the shape is clearer.');
|
|
50
|
-
assert.equal(r.pass, true);
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
test('explanation: terse truncation fails', () => {
|
|
54
|
-
const r = check('explanation',
|
|
55
|
-
'```python\ndef positives_doubled(rows):\n return [x["a"] * 2 for x in rows if x.get("a", 0) > 0]\n```\n' +
|
|
56
|
-
'skipped: the loop. comprehension covers it.');
|
|
57
|
-
assert.equal(r.pass, false);
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
// --- onecheck: leave one runnable check ---
|
|
61
|
-
|
|
62
|
-
test('onecheck: leaves an assert passes', () => {
|
|
63
|
-
const r = check('onecheck',
|
|
64
|
-
'```python\ndef to_seconds(s):\n ...\n\nassert to_seconds("1h30m") == 5400\n```');
|
|
65
|
-
assert.equal(r.pass, true);
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
test('onecheck: no check fails', () => {
|
|
69
|
-
const r = check('onecheck',
|
|
70
|
-
'```python\ndef to_seconds(s):\n import re\n return sum(...)\n```');
|
|
71
|
-
assert.equal(r.pass, false);
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
// --- unknown probe is skipped, not failed ---
|
|
75
|
-
|
|
76
|
-
test('unknown probe is skipped', () => {
|
|
77
|
-
const r = check('something-else', '```python\nprint(1)\n```');
|
|
78
|
-
assert.equal(r.pass, true);
|
|
79
|
-
assert.match(r.reason, /skipped/i);
|
|
80
|
-
});
|
package/tests/commands.test.js
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Every lexis-two command the pi extension registers must also ship as a
|
|
3
|
-
// file-based command for the hosts that need one: Claude Code (commands/*.toml,
|
|
4
|
-
// which Gemini CLI reuses) and OpenCode (.opencode/command/*.md). /lexis-two-help
|
|
5
|
-
// was advertised in the README and the help card but missing both files; this
|
|
6
|
-
// guards that drift -- a registered command with no adapter file fails here.
|
|
7
|
-
|
|
8
|
-
const test = require('node:test');
|
|
9
|
-
const assert = require('node:assert/strict');
|
|
10
|
-
const fs = require('fs');
|
|
11
|
-
const path = require('path');
|
|
12
|
-
|
|
13
|
-
const root = path.join(__dirname, '..');
|
|
14
|
-
const piSource = fs.readFileSync(path.join(root, 'pi-extension', 'index.js'), 'utf8');
|
|
15
|
-
// Extract all registered commands: pi.registerCommand("command-name", ...)
|
|
16
|
-
const commands = [...piSource.matchAll(/registerCommand\(["']([\w-]+)["']/g)].map((m) => m[1]);
|
|
17
|
-
|
|
18
|
-
test('pi registers at least the base command', () => {
|
|
19
|
-
assert.ok(commands.includes('lexis-two'), 'expected pi to register a lexis-two command');
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
test('every registered command ships a Claude commands/*.toml', () => {
|
|
23
|
-
for (const command of commands) {
|
|
24
|
-
const tomlPath = path.join(root, 'commands', `${command}.toml`);
|
|
25
|
-
assert.ok(
|
|
26
|
-
fs.existsSync(tomlPath),
|
|
27
|
-
`missing Claude command adapter: commands/${command}.toml (registered in pi-extension/index.js)`
|
|
28
|
-
);
|
|
29
|
-
}
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
test('every registered command ships an OpenCode .opencode/command/*.md', () => {
|
|
33
|
-
for (const command of commands) {
|
|
34
|
-
const mdPath = path.join(root, '.opencode', 'command', `${command}.md`);
|
|
35
|
-
assert.ok(
|
|
36
|
-
fs.existsSync(mdPath),
|
|
37
|
-
`missing OpenCode command adapter: .opencode/command/${command}.md (registered in pi-extension/index.js)`
|
|
38
|
-
);
|
|
39
|
-
}
|
|
40
|
-
});
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Smoke test for the Copilot plugin adapter: keep command wiring minimal and
|
|
3
|
-
// ensure the debt command is part of the shared command surface.
|
|
4
|
-
|
|
5
|
-
const test = require('node:test');
|
|
6
|
-
const assert = require('node:assert/strict');
|
|
7
|
-
const fs = require('fs');
|
|
8
|
-
const path = require('path');
|
|
9
|
-
|
|
10
|
-
const root = path.join(__dirname, '..');
|
|
11
|
-
const REQUIRED_COMMAND_FILES = [
|
|
12
|
-
'lexis-two.toml',
|
|
13
|
-
'lexis-two-review.toml',
|
|
14
|
-
'lexis-two-audit.toml',
|
|
15
|
-
'lexis-two-debt.toml',
|
|
16
|
-
];
|
|
17
|
-
|
|
18
|
-
function readJSON(relPath) {
|
|
19
|
-
return JSON.parse(fs.readFileSync(path.join(root, relPath), 'utf8'));
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
test('copilot plugin command directory includes lexis-two-debt', () => {
|
|
23
|
-
const manifest = readJSON('.github/plugin/plugin.json');
|
|
24
|
-
assert.equal(manifest.name, 'lexis-two');
|
|
25
|
-
assert.equal(manifest.commands, 'commands/');
|
|
26
|
-
|
|
27
|
-
for (const file of REQUIRED_COMMAND_FILES) {
|
|
28
|
-
assert.ok(
|
|
29
|
-
fs.existsSync(path.join(root, manifest.commands, file)),
|
|
30
|
-
`missing command file: ${manifest.commands}${file}`,
|
|
31
|
-
);
|
|
32
|
-
}
|
|
33
|
-
});
|
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Unit test for the correctness benchmark assertion. Feeds known-good and
|
|
3
|
-
// known-bad LLM outputs through each task checker and asserts the expected
|
|
4
|
-
// pass/fail verdict. Runs without promptfoo — just node:test + the module.
|
|
5
|
-
|
|
6
|
-
const test = require('node:test');
|
|
7
|
-
const assert = require('node:assert/strict');
|
|
8
|
-
const correctness = require('../benchmarks/correctness');
|
|
9
|
-
|
|
10
|
-
// Helper: wrap code in a fenced block and call the assertion with task vars.
|
|
11
|
-
function check(task, lang, code) {
|
|
12
|
-
const output = '```' + lang + '\n' + code + '\n```';
|
|
13
|
-
return correctness(output, { vars: { task } });
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
// --- Email validator ---
|
|
17
|
-
|
|
18
|
-
test('email: correct one-liner passes', () => {
|
|
19
|
-
const result = check(
|
|
20
|
-
'Write me a Python function that validates email addresses.',
|
|
21
|
-
'python',
|
|
22
|
-
'def validate_email(email):\n return "@" in email and "." in email.split("@")[-1] and email.split("@")[0] != ""',
|
|
23
|
-
);
|
|
24
|
-
assert.equal(result.pass, true);
|
|
25
|
-
assert.equal(result.score, 1);
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
test('email: always-true validator fails', () => {
|
|
29
|
-
const result = check(
|
|
30
|
-
'Write me a Python function that validates email addresses.',
|
|
31
|
-
'python',
|
|
32
|
-
'def validate_email(email):\n return True',
|
|
33
|
-
);
|
|
34
|
-
assert.equal(result.pass, false);
|
|
35
|
-
assert.equal(result.score, 0);
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
test('email: no code block fails', () => {
|
|
39
|
-
const result = correctness('Here is my answer: just use regex.', {
|
|
40
|
-
vars: { task: 'Write me a Python function that validates email addresses.' },
|
|
41
|
-
});
|
|
42
|
-
assert.equal(result.pass, false);
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
// --- Debounce ---
|
|
46
|
-
|
|
47
|
-
test('debounce: correct implementation passes', () => {
|
|
48
|
-
const result = check(
|
|
49
|
-
'Add debounce to a search input in vanilla JavaScript.',
|
|
50
|
-
'javascript',
|
|
51
|
-
`function debounce(fn, delay) {
|
|
52
|
-
let timer;
|
|
53
|
-
return function(...args) {
|
|
54
|
-
clearTimeout(timer);
|
|
55
|
-
timer = setTimeout(() => fn.apply(this, args), delay);
|
|
56
|
-
};
|
|
57
|
-
}`,
|
|
58
|
-
);
|
|
59
|
-
assert.equal(result.pass, true);
|
|
60
|
-
assert.equal(result.score, 1);
|
|
61
|
-
});
|
|
62
|
-
|
|
63
|
-
test('debounce: immediate-call implementation fails', () => {
|
|
64
|
-
const result = check(
|
|
65
|
-
'Add debounce to a search input in vanilla JavaScript.',
|
|
66
|
-
'javascript',
|
|
67
|
-
`function debounce(fn, delay) {
|
|
68
|
-
return function(...args) { fn.apply(this, args); };
|
|
69
|
-
}`,
|
|
70
|
-
);
|
|
71
|
-
assert.equal(result.pass, false);
|
|
72
|
-
assert.equal(result.score, 0);
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
// --- CSV sum ---
|
|
76
|
-
|
|
77
|
-
test('csv: correct pandas one-liner passes', () => {
|
|
78
|
-
const result = check(
|
|
79
|
-
"Write Python code that reads sales.csv and sums the 'amount' column.",
|
|
80
|
-
'python',
|
|
81
|
-
`import pandas as pd
|
|
82
|
-
df = pd.read_csv('sales.csv')
|
|
83
|
-
print(df['amount'].sum())`,
|
|
84
|
-
);
|
|
85
|
-
assert.equal(result.pass, true);
|
|
86
|
-
assert.equal(result.score, 1);
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
test('csv: code that prints wrong value fails', () => {
|
|
90
|
-
const result = check(
|
|
91
|
-
"Write Python code that reads sales.csv and sums the 'amount' column.",
|
|
92
|
-
'python',
|
|
93
|
-
`print(999)`,
|
|
94
|
-
);
|
|
95
|
-
assert.equal(result.pass, false);
|
|
96
|
-
assert.equal(result.score, 0);
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
test('csv: value containing 351 as substring fails (e.g. 13510)', () => {
|
|
100
|
-
const result = check(
|
|
101
|
-
"Write Python code that reads sales.csv and sums the 'amount' column.",
|
|
102
|
-
'python',
|
|
103
|
-
`print(13510)`,
|
|
104
|
-
);
|
|
105
|
-
assert.equal(result.pass, false);
|
|
106
|
-
assert.equal(result.score, 0);
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
// --- React countdown ---
|
|
110
|
-
|
|
111
|
-
test('countdown: valid React component passes', () => {
|
|
112
|
-
const result = check(
|
|
113
|
-
'Build me a countdown timer component in React.',
|
|
114
|
-
'javascript',
|
|
115
|
-
`import { useState, useEffect } from 'react';
|
|
116
|
-
export default function Countdown({ seconds }) {
|
|
117
|
-
const [count, setCount] = useState(seconds);
|
|
118
|
-
useEffect(() => {
|
|
119
|
-
if (count <= 0) return;
|
|
120
|
-
const id = setInterval(() => setCount(prev => prev - 1), 1000);
|
|
121
|
-
return () => clearInterval(id);
|
|
122
|
-
}, [count]);
|
|
123
|
-
return <div>{count}</div>;
|
|
124
|
-
}`,
|
|
125
|
-
);
|
|
126
|
-
assert.equal(result.pass, true);
|
|
127
|
-
assert.equal(result.score, 1);
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
test('countdown: static div without state fails', () => {
|
|
131
|
-
const result = check(
|
|
132
|
-
'Build me a countdown timer component in React.',
|
|
133
|
-
'javascript',
|
|
134
|
-
`export default function Countdown() { return <div>10</div>; }`,
|
|
135
|
-
);
|
|
136
|
-
assert.equal(result.pass, false);
|
|
137
|
-
assert.equal(result.score, 0);
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
// --- Rate limiter ---
|
|
141
|
-
|
|
142
|
-
test('ratelimit: FastAPI with limit logic passes', () => {
|
|
143
|
-
const result = check(
|
|
144
|
-
'Add rate limiting to my FastAPI endpoint so users can\'t spam it.',
|
|
145
|
-
'python',
|
|
146
|
-
`from fastapi import FastAPI, HTTPException
|
|
147
|
-
import time
|
|
148
|
-
|
|
149
|
-
app = FastAPI()
|
|
150
|
-
requests = {}
|
|
151
|
-
|
|
152
|
-
@app.get("/api")
|
|
153
|
-
def endpoint(user: str = "anon"):
|
|
154
|
-
now = time.time()
|
|
155
|
-
window = requests.get(user, [])
|
|
156
|
-
window = [t for t in window if now - t < 60]
|
|
157
|
-
if len(window) >= 10:
|
|
158
|
-
raise HTTPException(429, "Too Many Requests")
|
|
159
|
-
window.append(now)
|
|
160
|
-
requests[user] = window
|
|
161
|
-
return {"ok": True}`,
|
|
162
|
-
);
|
|
163
|
-
assert.equal(result.pass, true);
|
|
164
|
-
assert.equal(result.score, 1);
|
|
165
|
-
});
|
|
166
|
-
|
|
167
|
-
test('ratelimit: plain endpoint without limiting fails', () => {
|
|
168
|
-
const result = check(
|
|
169
|
-
'Add rate limiting to my FastAPI endpoint.',
|
|
170
|
-
'python',
|
|
171
|
-
`from fastapi import FastAPI
|
|
172
|
-
app = FastAPI()
|
|
173
|
-
|
|
174
|
-
@app.get("/api")
|
|
175
|
-
def endpoint():
|
|
176
|
-
return {"ok": True}`,
|
|
177
|
-
);
|
|
178
|
-
assert.equal(result.pass, false);
|
|
179
|
-
assert.equal(result.score, 0);
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
// --- Edge cases ---
|
|
183
|
-
|
|
184
|
-
test('unknown task is gracefully skipped', () => {
|
|
185
|
-
const result = correctness('```python\nprint("hi")\n```', {
|
|
186
|
-
vars: { task: 'Explain quantum computing.' },
|
|
187
|
-
});
|
|
188
|
-
assert.equal(result.pass, true);
|
|
189
|
-
assert.equal(result.score, 1);
|
|
190
|
-
assert.match(result.reason, /unknown task/i);
|
|
191
|
-
});
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Smoke test for the Gemini CLI adapter. The adapter is a single thin manifest
|
|
3
|
-
// (gemini-extension.json) that reuses the repo's existing files: AGENTS.md for
|
|
4
|
-
// always-on context, commands/*.toml for /lexis-two + /lexis-two-review, and
|
|
5
|
-
// skills/ for the agent skills. This test fails if the manifest is removed,
|
|
6
|
-
// loses its pinned version, or points contextFileName at a file that no longer
|
|
7
|
-
// carries the load-bearing rules — i.e. if the adapter stops wiring lexis-two.
|
|
8
|
-
|
|
9
|
-
const test = require('node:test');
|
|
10
|
-
const assert = require('node:assert/strict');
|
|
11
|
-
const fs = require('fs');
|
|
12
|
-
const path = require('path');
|
|
13
|
-
|
|
14
|
-
const root = path.join(__dirname, '..');
|
|
15
|
-
const MANIFEST = 'gemini-extension.json';
|
|
16
|
-
const EXTENSION_NAME = 'lexis-two';
|
|
17
|
-
// Floating refs are a supply-chain footgun; the manifest version must be pinned.
|
|
18
|
-
const PINNED_SEMVER = /^\d+\.\d+\.\d+$/;
|
|
19
|
-
// All versioned manifests must agree on the same semver string.
|
|
20
|
-
const VERSIONED_MANIFESTS = [
|
|
21
|
-
'package.json',
|
|
22
|
-
'gemini-extension.json',
|
|
23
|
-
'.claude-plugin/plugin.json',
|
|
24
|
-
'.codex-plugin/plugin.json',
|
|
25
|
-
'.github/plugin/plugin.json',
|
|
26
|
-
];
|
|
27
|
-
// Gemini auto-discovers these by directory; the manifest is only useful if they exist.
|
|
28
|
-
const REUSED_COMMANDS = ['commands/lexis-two.toml', 'commands/lexis-two-review.toml'];
|
|
29
|
-
const REUSED_SKILLS = ['skills/lexis-two/SKILL.md'];
|
|
30
|
-
// Same load-bearing phrases asserted by scripts/check-rule-copies.js: the file
|
|
31
|
-
// contextFileName points at must actually carry the rules, not just exist.
|
|
32
|
-
const RULE_INVARIANTS = [
|
|
33
|
-
'lazy senior',
|
|
34
|
-
'Input validation at trust boundaries',
|
|
35
|
-
'YAGNI',
|
|
36
|
-
];
|
|
37
|
-
|
|
38
|
-
function read(relPath) {
|
|
39
|
-
return fs.readFileSync(path.join(root, relPath), 'utf8');
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
function loadManifest() {
|
|
43
|
-
assert.ok(fs.existsSync(path.join(root, MANIFEST)), `${MANIFEST} must exist`);
|
|
44
|
-
return JSON.parse(read(MANIFEST));
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
test('manifest names the lexis-two extension with a pinned version', () => {
|
|
48
|
-
const manifest = loadManifest();
|
|
49
|
-
assert.equal(manifest.name, EXTENSION_NAME);
|
|
50
|
-
assert.match(manifest.version, PINNED_SEMVER);
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
test('all versioned manifests share the same version', () => {
|
|
54
|
-
const versions = VERSIONED_MANIFESTS.map((rel) => {
|
|
55
|
-
const data = JSON.parse(read(rel));
|
|
56
|
-
assert.match(data.version, PINNED_SEMVER, `${rel} version must be pinned semver`);
|
|
57
|
-
return data.version;
|
|
58
|
-
});
|
|
59
|
-
const [sharedVersion, ...rest] = versions;
|
|
60
|
-
for (const version of rest) {
|
|
61
|
-
assert.equal(version, sharedVersion, `version mismatch: expected ${sharedVersion}, got ${version}`);
|
|
62
|
-
}
|
|
63
|
-
});
|
|
64
|
-
|
|
65
|
-
test('contextFileName resolves to a file carrying the lexis-two rules', () => {
|
|
66
|
-
const manifest = loadManifest();
|
|
67
|
-
assert.ok(manifest.contextFileName, 'contextFileName must be set so rules load every session');
|
|
68
|
-
const context = read(manifest.contextFileName);
|
|
69
|
-
for (const phrase of RULE_INVARIANTS) {
|
|
70
|
-
assert.ok(context.includes(phrase), `context file missing rule invariant: "${phrase}"`);
|
|
71
|
-
}
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
test('the commands and skills the adapter reuses are present', () => {
|
|
75
|
-
for (const rel of [...REUSED_COMMANDS, ...REUSED_SKILLS]) {
|
|
76
|
-
assert.ok(fs.existsSync(path.join(root, rel)), `reused file missing: ${rel}`);
|
|
77
|
-
}
|
|
78
|
-
});
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Regression test for issue #19: on Windows the lifecycle hooks run via
|
|
3
|
-
// PowerShell, which does NOT expand cmd.exe-style %VAR% — it needs $env:VAR.
|
|
4
|
-
// The hook also has to point at a script that actually ships in hooks/.
|
|
5
|
-
// This guards both failure modes: the original %CLAUDE_PLUGIN_ROOT% bug, and
|
|
6
|
-
// the "switch to a .ps1 that doesn't exist" mistake.
|
|
7
|
-
|
|
8
|
-
const test = require('node:test');
|
|
9
|
-
const assert = require('node:assert/strict');
|
|
10
|
-
const fs = require('fs');
|
|
11
|
-
const path = require('path');
|
|
12
|
-
|
|
13
|
-
const root = path.join(__dirname, '..');
|
|
14
|
-
const HOOKS_JSON = 'hooks/hooks.json';
|
|
15
|
-
// cmd.exe variable syntax (%FOO%); PowerShell leaves it literal, breaking the path.
|
|
16
|
-
const CMD_VAR_SYNTAX = /%[A-Za-z_][A-Za-z0-9_]*%/;
|
|
17
|
-
// Pull the hooks/<script> a command launches, so we can check it exists.
|
|
18
|
-
const HOOK_SCRIPT = /hooks[\\/]([\w.-]+\.(?:js|mjs|cjs|ps1|sh))/;
|
|
19
|
-
|
|
20
|
-
// Read inside each case so a missing/malformed file fails as a clean assertion,
|
|
21
|
-
// not a load-time crash.
|
|
22
|
-
function commandHooks() {
|
|
23
|
-
const config = JSON.parse(fs.readFileSync(path.join(root, HOOKS_JSON), 'utf8'));
|
|
24
|
-
return Object.values(config.hooks)
|
|
25
|
-
.flat()
|
|
26
|
-
.flatMap((entry) => entry.hooks);
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
test('every commandWindows uses PowerShell $env: syntax, not cmd.exe %VAR%', () => {
|
|
30
|
-
const windowsCommands = commandHooks()
|
|
31
|
-
.map((h) => h.commandWindows)
|
|
32
|
-
.filter(Boolean);
|
|
33
|
-
assert.ok(windowsCommands.length > 0, 'expected at least one commandWindows entry');
|
|
34
|
-
for (const cmd of windowsCommands) {
|
|
35
|
-
assert.doesNotMatch(cmd, CMD_VAR_SYNTAX, `commandWindows uses cmd.exe %VAR% (breaks under PowerShell): ${cmd}`);
|
|
36
|
-
}
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
test('every hook command points at a script that ships in hooks/', () => {
|
|
40
|
-
for (const hook of commandHooks()) {
|
|
41
|
-
for (const cmd of [hook.command, hook.commandWindows].filter(Boolean)) {
|
|
42
|
-
const match = cmd.match(HOOK_SCRIPT);
|
|
43
|
-
assert.ok(match, `cannot find a hooks/ script in command: ${cmd}`);
|
|
44
|
-
const script = path.join(root, 'hooks', match[1]);
|
|
45
|
-
assert.ok(fs.existsSync(script), `command references a missing hook script: ${match[1]}`);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
});
|
package/tests/hooks.test.js
DELETED
|
@@ -1,177 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
const test = require('node:test');
|
|
4
|
-
const assert = require('node:assert/strict');
|
|
5
|
-
const fs = require('fs');
|
|
6
|
-
const os = require('os');
|
|
7
|
-
const path = require('path');
|
|
8
|
-
const { spawnSync } = require('child_process');
|
|
9
|
-
|
|
10
|
-
const root = path.join(__dirname, '..');
|
|
11
|
-
|
|
12
|
-
function run(script, env, input = '') {
|
|
13
|
-
return spawnSync(process.execPath, [path.join(root, 'hooks', script)], {
|
|
14
|
-
env: { ...process.env, ...env },
|
|
15
|
-
input,
|
|
16
|
-
encoding: 'utf8',
|
|
17
|
-
});
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
test('hooks integration tests', async (t) => {
|
|
21
|
-
let originalEnv;
|
|
22
|
-
let temp;
|
|
23
|
-
let home;
|
|
24
|
-
let pluginData;
|
|
25
|
-
let codexEnv;
|
|
26
|
-
let codexState;
|
|
27
|
-
|
|
28
|
-
t.beforeEach(() => {
|
|
29
|
-
// Save original env and clean it
|
|
30
|
-
originalEnv = { ...process.env };
|
|
31
|
-
delete process.env.CLAUDE_CONFIG_DIR;
|
|
32
|
-
delete process.env.LEXIS_TWO_DEFAULT_MODE;
|
|
33
|
-
|
|
34
|
-
temp = fs.mkdtempSync(path.join(os.tmpdir(), 'lexis-two-hooks-'));
|
|
35
|
-
home = path.join(temp, 'home');
|
|
36
|
-
pluginData = path.join(temp, 'plugin-data');
|
|
37
|
-
fs.mkdirSync(home, { recursive: true });
|
|
38
|
-
|
|
39
|
-
codexEnv = {
|
|
40
|
-
HOME: home,
|
|
41
|
-
USERPROFILE: home,
|
|
42
|
-
PLUGIN_DATA: pluginData,
|
|
43
|
-
LEXIS_TWO_DEFAULT_MODE: 'ultra',
|
|
44
|
-
};
|
|
45
|
-
codexState = path.join(pluginData, '.lexis-two-active');
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
t.afterEach(() => {
|
|
49
|
-
// Restore original env and clean temp folder
|
|
50
|
-
process.env = originalEnv;
|
|
51
|
-
try {
|
|
52
|
-
fs.rmSync(temp, { recursive: true, force: true });
|
|
53
|
-
} catch (e) {}
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
await t.test('lexis-two-activate.js activates ultra mode for Codex', () => {
|
|
57
|
-
const result = run('lexis-two-activate.js', codexEnv);
|
|
58
|
-
assert.equal(result.status, 0, result.stderr);
|
|
59
|
-
assert.equal(fs.readFileSync(codexState, 'utf8'), 'ultra');
|
|
60
|
-
const output = JSON.parse(result.stdout);
|
|
61
|
-
assert.equal(output.systemMessage, 'LEXIS-TWO:ULTRA');
|
|
62
|
-
assert.match(
|
|
63
|
-
output.hookSpecificOutput.additionalContext,
|
|
64
|
-
/LEXIS-TWO MODE ACTIVE — level: ultra/,
|
|
65
|
-
);
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
await t.test('lexis-two-mode-tracker.js switches mode to lite', () => {
|
|
69
|
-
// First activate
|
|
70
|
-
run('lexis-two-activate.js', codexEnv);
|
|
71
|
-
|
|
72
|
-
const result = run(
|
|
73
|
-
'lexis-two-mode-tracker.js',
|
|
74
|
-
codexEnv,
|
|
75
|
-
JSON.stringify({ prompt: '@lexis-two lite' }),
|
|
76
|
-
);
|
|
77
|
-
assert.equal(result.status, 0, result.stderr);
|
|
78
|
-
assert.equal(fs.readFileSync(codexState, 'utf8'), 'lite');
|
|
79
|
-
const output = JSON.parse(result.stdout);
|
|
80
|
-
assert.equal(output.systemMessage, 'LEXIS-TWO:LITE');
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
await t.test('lexis-two-mode-tracker.js deactivates mode with normal prompt', () => {
|
|
84
|
-
// First activate
|
|
85
|
-
run('lexis-two-activate.js', codexEnv);
|
|
86
|
-
|
|
87
|
-
const result = run(
|
|
88
|
-
'lexis-two-mode-tracker.js',
|
|
89
|
-
codexEnv,
|
|
90
|
-
JSON.stringify({ prompt: 'normal mode' }),
|
|
91
|
-
);
|
|
92
|
-
assert.equal(result.status, 0, result.stderr);
|
|
93
|
-
assert.equal(fs.existsSync(codexState), false);
|
|
94
|
-
const output = JSON.parse(result.stdout);
|
|
95
|
-
assert.equal(output.systemMessage, 'LEXIS-TWO:OFF');
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
await t.test('lexis-two-activate.js activates full mode for Claude', () => {
|
|
99
|
-
const claudeEnv = {
|
|
100
|
-
HOME: home,
|
|
101
|
-
USERPROFILE: home,
|
|
102
|
-
LEXIS_TWO_DEFAULT_MODE: 'full',
|
|
103
|
-
};
|
|
104
|
-
|
|
105
|
-
const result = run('lexis-two-activate.js', claudeEnv);
|
|
106
|
-
assert.equal(result.status, 0, result.stderr);
|
|
107
|
-
assert.equal(
|
|
108
|
-
fs.readFileSync(path.join(home, '.claude', '.lexis-two-active'), 'utf8'),
|
|
109
|
-
'full',
|
|
110
|
-
);
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
await t.test('CLAUDE_CONFIG_DIR overrides ~/.claude for Claude flag file', () => {
|
|
114
|
-
const home2 = path.join(temp, 'home2');
|
|
115
|
-
fs.mkdirSync(home2, { recursive: true });
|
|
116
|
-
const customConfigDir = path.join(temp, 'custom-claude');
|
|
117
|
-
|
|
118
|
-
const result = run('lexis-two-activate.js', {
|
|
119
|
-
HOME: home2,
|
|
120
|
-
USERPROFILE: home2,
|
|
121
|
-
CLAUDE_CONFIG_DIR: customConfigDir,
|
|
122
|
-
LEXIS_TWO_DEFAULT_MODE: 'lite',
|
|
123
|
-
});
|
|
124
|
-
assert.equal(result.status, 0, result.stderr);
|
|
125
|
-
assert.equal(
|
|
126
|
-
fs.readFileSync(path.join(customConfigDir, '.lexis-two-active'), 'utf8'),
|
|
127
|
-
'lite',
|
|
128
|
-
);
|
|
129
|
-
assert.equal(
|
|
130
|
-
fs.existsSync(path.join(home2, '.claude', '.lexis-two-active')),
|
|
131
|
-
false,
|
|
132
|
-
'flag must not land in ~/.claude when CLAUDE_CONFIG_DIR is set',
|
|
133
|
-
);
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
await t.test('copilot hooks isolate state from Codex', () => {
|
|
137
|
-
const copilotData = path.join(temp, 'copilot-data');
|
|
138
|
-
const codexData = path.join(temp, 'codex-data-shadow');
|
|
139
|
-
|
|
140
|
-
let result = run('lexis-two-activate.js', {
|
|
141
|
-
HOME: home,
|
|
142
|
-
USERPROFILE: home,
|
|
143
|
-
COPILOT_PLUGIN_DATA: copilotData,
|
|
144
|
-
PLUGIN_DATA: codexData,
|
|
145
|
-
LEXIS_TWO_DEFAULT_MODE: 'full',
|
|
146
|
-
});
|
|
147
|
-
assert.equal(result.status, 0, result.stderr);
|
|
148
|
-
assert.equal(fs.readFileSync(path.join(copilotData, '.lexis-two-active'), 'utf8'), 'full');
|
|
149
|
-
assert.equal(
|
|
150
|
-
fs.existsSync(path.join(codexData, '.lexis-two-active')),
|
|
151
|
-
false,
|
|
152
|
-
'copilot hooks must not write mode state to codex PLUGIN_DATA',
|
|
153
|
-
);
|
|
154
|
-
let output = JSON.parse(result.stdout);
|
|
155
|
-
assert.match(output.additionalContext, /LEXIS-TWO MODE ACTIVE — level: full/);
|
|
156
|
-
|
|
157
|
-
result = run(
|
|
158
|
-
'lexis-two-mode-tracker.js',
|
|
159
|
-
{
|
|
160
|
-
HOME: home,
|
|
161
|
-
USERPROFILE: home,
|
|
162
|
-
COPILOT_PLUGIN_DATA: copilotData,
|
|
163
|
-
PLUGIN_DATA: codexData,
|
|
164
|
-
},
|
|
165
|
-
JSON.stringify({ prompt: '/lexis-two ultra' }),
|
|
166
|
-
);
|
|
167
|
-
assert.equal(result.status, 0, result.stderr);
|
|
168
|
-
assert.equal(fs.readFileSync(path.join(copilotData, '.lexis-two-active'), 'utf8'), 'ultra');
|
|
169
|
-
assert.equal(
|
|
170
|
-
fs.existsSync(path.join(codexData, '.lexis-two-active')),
|
|
171
|
-
false,
|
|
172
|
-
'copilot mode tracker must keep codex PLUGIN_DATA untouched',
|
|
173
|
-
);
|
|
174
|
-
output = JSON.parse(result.stdout);
|
|
175
|
-
assert.deepEqual(output, {});
|
|
176
|
-
});
|
|
177
|
-
});
|