happyskills 0.46.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ // CLI mirror of api/app/utils/slug_tokens.js — used by `happyskills postlex`
2
+ // to compute the same lexical-overlap signal the API uses for digest annotation
3
+ // and slug-boost behavior.
4
+ //
5
+ // Spec 260521-01 v2 § 6 — invariant: the STOP_TOKENS list MUST stay
6
+ // byte-identical to api/app/utils/slug_tokens.js. Drift causes the CLI's view
7
+ // of which candidate is "exact" to disagree with the API's slug-boost, and the
8
+ // post-lex stage promotes the wrong candidate (or none). The cross-import
9
+ // test at slug_tokens.test.js asserts set-equality on both sides — it fails
10
+ // CI on drift.
11
+ //
12
+ // If you change anything in this file, change the same thing in
13
+ // api/app/utils/slug_tokens.js in the same commit.
14
+
15
+ const STOP_TOKENS = new Set([
16
+ 'dev', 'devs', 'labs', 'lab', 'ai', 'ml',
17
+ 'skills', 'skill', 'agent', 'agents', 'agentic',
18
+ 'io', 'co', 'inc', 'ltd', 'org', 'corp',
19
+ 'team', 'group', 'project', 'projects',
20
+ 'the', 'a', 'an', 'of', 'for', 'and', 'or',
21
+ 'app', 'apps', 'tool', 'tools', 'kit', 'kits',
22
+ ])
23
+
24
+ // Split slug-style identifier on `-_./` and whitespace, lowercase, drop
25
+ // 1-char tokens and STOP_TOKENS. Returns a deduplicated array — order
26
+ // preserved by first occurrence. Use slug_token_set() when you want a Set.
27
+ const slug_tokens = (s) => {
28
+ if (!s || typeof s !== 'string') return []
29
+ const seen = new Set()
30
+ const out = []
31
+ for (const raw of s.toLowerCase().split(/[-_\s./]+/)) {
32
+ if (raw.length <= 1) continue
33
+ if (STOP_TOKENS.has(raw)) continue
34
+ if (seen.has(raw)) continue
35
+ seen.add(raw)
36
+ out.push(raw)
37
+ }
38
+ return out
39
+ }
40
+
41
+ const slug_token_set = (s) => new Set(slug_tokens(s))
42
+
43
+ // Compute the lexical tier of a candidate name against a query
44
+ // (spec 260521-01 v2 § 2.3):
45
+ // 'exact' — query token set equals name token set
46
+ // 'strong' — query tokens are a subset of name tokens (and not equal)
47
+ // 'partial' — non-empty overlap but neither subset relation
48
+ // 'none' — no overlap or one side empty
49
+ const compute_lex_tier = (q_tokens, name_tokens) => {
50
+ if (!q_tokens?.size || !name_tokens?.size) return 'none'
51
+ let overlap = 0
52
+ for (const t of q_tokens) if (name_tokens.has(t)) overlap++
53
+ if (overlap === 0) return 'none'
54
+ const q_subset = overlap === q_tokens.size
55
+ let n_in_q = 0
56
+ for (const t of name_tokens) if (q_tokens.has(t)) n_in_q++
57
+ const n_subset = n_in_q === name_tokens.size
58
+ if (q_subset && n_subset) return 'exact'
59
+ if (q_subset) return 'strong'
60
+ return 'partial'
61
+ }
62
+
63
+ module.exports = {
64
+ STOP_TOKENS,
65
+ slug_tokens,
66
+ slug_token_set,
67
+ compute_lex_tier,
68
+ }
@@ -0,0 +1,126 @@
1
+ // Unit tests + cross-import drift canary for cli/src/utils/slug_tokens.js.
2
+ // Spec 260521-01 v2 § 6 — STOP_TOKENS MUST stay byte-identical between
3
+ // CLI and API. The cross-import test at the bottom enforces that.
4
+
5
+ const { describe, it } = require('node:test')
6
+ const assert = require('node:assert/strict')
7
+ const { STOP_TOKENS, slug_tokens, slug_token_set, compute_lex_tier } = require('./slug_tokens')
8
+
9
+ describe('slug_tokens', () => {
10
+ it('splits on hyphens, underscores, dots, slashes, and whitespace', () => {
11
+ assert.deepEqual(slug_tokens('deploy-aws.lambda/handler edge_runtime'),
12
+ ['deploy', 'aws', 'lambda', 'handler', 'edge', 'runtime'])
13
+ })
14
+
15
+ it('lowercases', () => {
16
+ assert.deepEqual(slug_tokens('Deploy-AWS'), ['deploy', 'aws'])
17
+ })
18
+
19
+ it('drops 1-character tokens', () => {
20
+ assert.deepEqual(slug_tokens('a-b-deploy-c'), ['deploy'])
21
+ })
22
+
23
+ it('drops STOP_TOKENS', () => {
24
+ assert.deepEqual(slug_tokens('the-deploy-tool-for-aws'), ['deploy', 'aws'])
25
+ })
26
+
27
+ it('deduplicates while preserving first-occurrence order', () => {
28
+ assert.deepEqual(slug_tokens('deploy-aws-deploy'), ['deploy', 'aws'])
29
+ })
30
+
31
+ it('returns [] for null / non-string / empty', () => {
32
+ assert.deepEqual(slug_tokens(null), [])
33
+ assert.deepEqual(slug_tokens(undefined), [])
34
+ assert.deepEqual(slug_tokens(''), [])
35
+ assert.deepEqual(slug_tokens(42), [])
36
+ })
37
+ })
38
+
39
+ describe('slug_token_set', () => {
40
+ it('returns a Set with the same members as slug_tokens', () => {
41
+ const set = slug_token_set('deploy-aws-lambda')
42
+ assert.ok(set instanceof Set)
43
+ assert.deepEqual([...set].sort(), ['aws', 'deploy', 'lambda'])
44
+ })
45
+ })
46
+
47
+ describe('compute_lex_tier', () => {
48
+ const set = s => slug_token_set(s)
49
+
50
+ it('returns "exact" when query and name token sets are equal', () => {
51
+ assert.equal(compute_lex_tier(set('deploy-aws'), set('deploy-aws')), 'exact')
52
+ assert.equal(compute_lex_tier(set('aws-deploy'), set('deploy-aws')), 'exact') // order-independent
53
+ })
54
+
55
+ it('returns "strong" when query tokens are a strict subset of name tokens', () => {
56
+ assert.equal(compute_lex_tier(set('deploy'), set('deploy-aws-lambda')), 'strong')
57
+ })
58
+
59
+ it('returns "partial" when there is overlap but neither is a subset', () => {
60
+ assert.equal(compute_lex_tier(set('deploy-aws'), set('deploy-gcp')), 'partial')
61
+ })
62
+
63
+ it('returns "none" when there is no overlap', () => {
64
+ assert.equal(compute_lex_tier(set('deploy-aws'), set('static-site')), 'none')
65
+ })
66
+
67
+ it('returns "none" when either side is empty', () => {
68
+ assert.equal(compute_lex_tier(set(''), set('deploy-aws')), 'none')
69
+ assert.equal(compute_lex_tier(set('deploy-aws'), set('')), 'none')
70
+ assert.equal(compute_lex_tier(new Set(), set('deploy-aws')), 'none')
71
+ })
72
+ })
73
+
74
+ describe('drift canary — cross-imports api/app/utils/slug_tokens.js', () => {
75
+ const api = require('../../../api/app/utils/slug_tokens')
76
+
77
+ it('STOP_TOKENS is byte-equal in both directions', () => {
78
+ // Forward: every CLI token exists in the API set
79
+ for (const t of STOP_TOKENS) {
80
+ assert.ok(api.STOP_TOKENS.has(t), `API STOP_TOKENS missing CLI token "${t}"`)
81
+ }
82
+ // Reverse: every API token exists in the CLI set
83
+ for (const t of api.STOP_TOKENS) {
84
+ assert.ok(STOP_TOKENS.has(t), `CLI STOP_TOKENS missing API token "${t}"`)
85
+ }
86
+ // Size also matches (defense-in-depth — a duplicate on one side would
87
+ // pass both forward+reverse but trip this).
88
+ assert.equal(STOP_TOKENS.size, api.STOP_TOKENS.size)
89
+ })
90
+
91
+ it('slug_tokens produces identical output across a battery of inputs', () => {
92
+ const inputs = [
93
+ 'deploy-aws',
94
+ 'Deploy-AWS.Lambda',
95
+ 'the-deploy-tool-for-aws',
96
+ 'a-b-c-deploy',
97
+ 'acme/deploy-aws',
98
+ 'underscores_and-hyphens.and/slashes',
99
+ '',
100
+ null,
101
+ undefined,
102
+ '日本語-skill',
103
+ 'AGENTS-skill-kit',
104
+ ]
105
+ for (const input of inputs) {
106
+ assert.deepEqual(slug_tokens(input), api.slug_tokens(input),
107
+ `slug_tokens drift on input ${JSON.stringify(input)}`)
108
+ }
109
+ })
110
+
111
+ it('compute_lex_tier produces identical labels across a battery of inputs', () => {
112
+ const pairs = [
113
+ ['deploy-aws', 'deploy-aws'], // exact
114
+ ['deploy', 'deploy-aws-lambda'], // strong
115
+ ['deploy-aws', 'deploy-gcp'], // partial
116
+ ['deploy-aws', 'static-site'], // none
117
+ ['', 'deploy-aws'], // none
118
+ ['agents-skill-the-deploy', 'deploy'], // exact (stops drop)
119
+ ]
120
+ for (const [q, n] of pairs) {
121
+ const cli_tier = compute_lex_tier(slug_token_set(q), slug_token_set(n))
122
+ const api_tier = api.compute_lex_tier(api.slug_token_set(q), api.slug_token_set(n))
123
+ assert.equal(cli_tier, api_tier, `compute_lex_tier drift on ("${q}", "${n}")`)
124
+ }
125
+ })
126
+ })