solidity-argus 0.2.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -3
- package/README.md +93 -37
- package/package.json +34 -7
- package/skills/INVENTORY.md +88 -57
- package/skills/README.md +26 -23
- package/skills/case-studies/beanstalk-governance/SKILL.md +52 -0
- package/skills/case-studies/bzx-flash-loan/SKILL.md +53 -0
- package/skills/case-studies/cream-finance/SKILL.md +52 -0
- package/skills/case-studies/curve-reentrancy/SKILL.md +52 -0
- package/skills/case-studies/dao-hack/SKILL.md +51 -0
- package/skills/case-studies/euler-finance/SKILL.md +52 -0
- package/skills/case-studies/harvest-finance/SKILL.md +52 -0
- package/skills/case-studies/level-finance/SKILL.md +51 -0
- package/skills/case-studies/mango-markets/SKILL.md +53 -0
- package/skills/case-studies/nomad-bridge/SKILL.md +51 -0
- package/skills/case-studies/parity-multisig/SKILL.md +55 -0
- package/skills/case-studies/poly-network/SKILL.md +51 -0
- package/skills/case-studies/rari-fuse/SKILL.md +51 -0
- package/skills/case-studies/ronin-bridge/SKILL.md +52 -0
- package/skills/case-studies/wormhole-bridge/SKILL.md +51 -0
- package/skills/manifests/smartbugs.json +1 -3
- package/skills/manifests/sunweb3sec.json +1 -3
- package/skills/vulnerability-patterns/access-control/SKILL.md +14 -0
- package/skills/vulnerability-patterns/arbitrary-storage-location/SKILL.md +13 -1
- package/skills/vulnerability-patterns/assert-violation/SKILL.md +8 -1
- package/skills/vulnerability-patterns/asserting-contract-from-code-size/SKILL.md +12 -1
- package/skills/vulnerability-patterns/authorization-txorigin/SKILL.md +2 -1
- package/skills/vulnerability-patterns/cross-chain-bridge-vulnerabilities/SKILL.md +217 -0
- package/skills/vulnerability-patterns/default-visibility/SKILL.md +13 -1
- package/skills/vulnerability-patterns/delegatecall-untrusted-callee/SKILL.md +2 -1
- package/skills/vulnerability-patterns/dos-gas-limit/SKILL.md +8 -1
- package/skills/vulnerability-patterns/dos-revert/SKILL.md +1 -0
- package/skills/vulnerability-patterns/erc4626-exchange-rate-manipulation/SKILL.md +64 -0
- package/skills/vulnerability-patterns/fee-on-transfer-tokens/SKILL.md +93 -0
- package/skills/vulnerability-patterns/flash-loan-attacks/SKILL.md +1 -0
- package/skills/vulnerability-patterns/floating-pragma/SKILL.md +8 -1
- package/skills/vulnerability-patterns/front-running-attacks/SKILL.md +209 -0
- package/skills/vulnerability-patterns/gas-optimization-patterns/SKILL.md +203 -0
- package/skills/vulnerability-patterns/governance-attacks/SKILL.md +208 -0
- package/skills/vulnerability-patterns/hash-collision/SKILL.md +8 -1
- package/skills/vulnerability-patterns/inadherence-to-standards/SKILL.md +12 -1
- package/skills/vulnerability-patterns/incorrect-constructor/SKILL.md +8 -1
- package/skills/vulnerability-patterns/incorrect-inheritance-order/SKILL.md +8 -1
- package/skills/vulnerability-patterns/insufficient-gas-griefing/SKILL.md +12 -1
- package/skills/vulnerability-patterns/lack-of-precision/SKILL.md +7 -1
- package/skills/vulnerability-patterns/logic-errors/SKILL.md +10 -0
- package/skills/vulnerability-patterns/missing-parameter-bounds/SKILL.md +44 -0
- package/skills/vulnerability-patterns/missing-protection-signature-replay/SKILL.md +17 -1
- package/skills/vulnerability-patterns/msgvalue-loop/SKILL.md +12 -1
- package/skills/vulnerability-patterns/off-by-one/SKILL.md +7 -1
- package/skills/vulnerability-patterns/oracle-manipulation/SKILL.md +9 -0
- package/skills/vulnerability-patterns/outdated-compiler-version/SKILL.md +8 -1
- package/skills/vulnerability-patterns/overflow-underflow/SKILL.md +1 -0
- package/skills/vulnerability-patterns/proxy-vulnerabilities/SKILL.md +209 -0
- package/skills/vulnerability-patterns/reentrancy/SKILL.md +9 -0
- package/skills/vulnerability-patterns/shadowing-state-variables/SKILL.md +8 -1
- package/skills/vulnerability-patterns/share-accounting-desynchronization/SKILL.md +44 -0
- package/skills/vulnerability-patterns/signature-malleability/SKILL.md +2 -1
- package/skills/vulnerability-patterns/stateful-parameter-update-drift/SKILL.md +44 -0
- package/skills/vulnerability-patterns/unbounded-return-data/SKILL.md +12 -1
- package/skills/vulnerability-patterns/unchecked-return-values/SKILL.md +2 -1
- package/skills/vulnerability-patterns/unencrypted-private-data-on-chain/SKILL.md +8 -1
- package/skills/vulnerability-patterns/unexpected-ecrecover-null-address/SKILL.md +8 -1
- package/skills/vulnerability-patterns/uninitialized-storage-pointer/SKILL.md +8 -1
- package/skills/vulnerability-patterns/unsafe-erc20-transfers/SKILL.md +132 -0
- package/skills/vulnerability-patterns/unsafe-low-level-call/SKILL.md +12 -1
- package/skills/vulnerability-patterns/unsecure-signatures/SKILL.md +12 -1
- package/skills/vulnerability-patterns/unsupported-opcodes/SKILL.md +11 -1
- package/skills/vulnerability-patterns/unused-variables/SKILL.md +8 -1
- package/skills/vulnerability-patterns/use-of-deprecated-functions/SKILL.md +8 -1
- package/skills/vulnerability-patterns/weak-sources-randomness/SKILL.md +8 -1
- package/skills/vulnerability-patterns/weird-tokens/SKILL.md +10 -0
- package/skills/vulnerability-patterns/zero-address-misconfiguration/SKILL.md +48 -0
- package/src/agents/argus-prompt.ts +34 -7
- package/src/agents/pythia-prompt.ts +13 -4
- package/src/agents/scribe-prompt.ts +20 -2
- package/src/agents/sentinel-prompt.ts +45 -5
- package/src/cli/cli-program.ts +29 -26
- package/src/cli/commands/check-skills.ts +135 -0
- package/src/cli/commands/doctor.ts +48 -26
- package/src/cli/commands/init.ts +5 -3
- package/src/cli/commands/install.ts +7 -5
- package/src/cli/commands/lint-skills.ts +16 -12
- package/src/cli/index.ts +5 -5
- package/src/cli/types.ts +3 -3
- package/src/config/index.ts +1 -1
- package/src/config/loader.ts +4 -6
- package/src/config/schema.ts +6 -5
- package/src/config/types.ts +2 -2
- package/src/constants/defaults.ts +2 -0
- package/src/create-hooks.ts +145 -34
- package/src/create-managers.ts +10 -8
- package/src/create-tools.ts +13 -9
- package/src/features/background-agent/background-manager.ts +93 -87
- package/src/features/background-agent/index.ts +1 -1
- package/src/features/context-monitor/context-monitor.ts +3 -3
- package/src/features/context-monitor/index.ts +2 -2
- package/src/features/error-recovery/session-recovery.ts +2 -4
- package/src/features/error-recovery/tool-error-recovery.ts +12 -7
- package/src/features/index.ts +5 -5
- package/src/features/persistent-state/audit-state-manager.ts +143 -60
- package/src/features/persistent-state/global-run-index.ts +38 -0
- package/src/features/persistent-state/index.ts +1 -1
- package/src/features/persistent-state/run-journal.ts +86 -0
- package/src/hooks/config-handler.ts +28 -11
- package/src/hooks/context-budget.ts +2 -5
- package/src/hooks/event-hook.ts +47 -23
- package/src/hooks/hook-system.ts +4 -4
- package/src/hooks/index.ts +5 -5
- package/src/hooks/knowledge-sync-hook.ts +18 -21
- package/src/hooks/recon-context-builder.ts +2 -2
- package/src/hooks/safe-create-hook.ts +6 -7
- package/src/hooks/system-prompt-hook.ts +18 -1
- package/src/hooks/tool-tracking-hook.ts +110 -51
- package/src/hooks/types.ts +2 -1
- package/src/index.ts +24 -37
- package/src/knowledge/retry.ts +22 -22
- package/src/knowledge/scvd-client.ts +88 -95
- package/src/knowledge/scvd-errors.ts +35 -35
- package/src/knowledge/scvd-index.ts +78 -80
- package/src/knowledge/scvd-sync.ts +106 -101
- package/src/managers/index.ts +1 -1
- package/src/managers/types.ts +19 -14
- package/src/plugin-interface.ts +7 -9
- package/src/shared/binary-utils.ts +44 -35
- package/src/shared/deep-merge.ts +55 -36
- package/src/shared/file-utils.ts +21 -19
- package/src/shared/index.ts +11 -5
- package/src/shared/jsonc-parser.ts +123 -28
- package/src/shared/logger.ts +16 -3
- package/src/shared/project-utils.ts +30 -0
- package/src/skills/analysis/cluster.ts +414 -0
- package/src/skills/analysis/gates.ts +227 -0
- package/src/skills/analysis/index.ts +33 -0
- package/src/skills/analysis/normalize.ts +217 -0
- package/src/skills/analysis/similarity.ts +224 -0
- package/src/skills/argus-skill-resolver.ts +17 -6
- package/src/skills/skill-schema.ts +11 -10
- package/src/solodit-lifecycle.ts +203 -0
- package/src/state/audit-state.ts +8 -8
- package/src/state/finding-store.ts +68 -55
- package/src/state/types.ts +88 -67
- package/src/tools/argus-skill-load-tool.ts +12 -7
- package/src/tools/contract-analyzer-tool.ts +142 -77
- package/src/tools/forge-coverage-tool.ts +226 -0
- package/src/tools/forge-fuzz-tool.ts +127 -127
- package/src/tools/forge-test-tool.ts +201 -158
- package/src/tools/gas-analysis-tool.ts +264 -0
- package/src/tools/pattern-checker-tool.ts +203 -191
- package/src/tools/pattern-loader.ts +5 -111
- package/src/tools/pattern-schema.ts +3 -0
- package/src/tools/proxy-detection-tool.ts +224 -0
- package/src/tools/report-generator-tool.ts +305 -206
- package/src/tools/slither-tool.ts +266 -218
- package/src/tools/solodit-search-tool.ts +235 -119
- package/src/tools/sync-knowledge-tool.ts +7 -11
- package/src/utils/audit-artifact-detector.ts +28 -29
- package/src/utils/dependency-scanner.ts +37 -37
- package/src/utils/project-detector.ts +111 -124
- package/src/utils/solidity-parser.ts +175 -75
- package/skills/patterns/access-control.yaml +0 -31
- package/skills/patterns/erc4626.yaml +0 -29
- package/skills/patterns/flash-loan.yaml +0 -20
- package/skills/patterns/oracle.yaml +0 -30
- package/skills/patterns/proxy.yaml +0 -30
- package/skills/patterns/reentrancy.yaml +0 -30
- package/skills/patterns/signature.yaml +0 -31
- package/src/hooks/event-hook-v2.ts +0 -99
- package/src/state/plugin-state.ts +0 -14
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { parseFrontmatter } from "../skill-schema"
|
|
2
|
+
|
|
3
|
+
export interface SkillDoc {
|
|
4
|
+
name: string
|
|
5
|
+
description: string
|
|
6
|
+
category: string | undefined
|
|
7
|
+
detectionRules: string[]
|
|
8
|
+
bodyText: string
|
|
9
|
+
bodyTokens: string[]
|
|
10
|
+
nameDescTokens: string[]
|
|
11
|
+
ruleTokens: string[]
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const STOPWORDS = new Set([
|
|
15
|
+
"the",
|
|
16
|
+
"a",
|
|
17
|
+
"an",
|
|
18
|
+
"is",
|
|
19
|
+
"are",
|
|
20
|
+
"was",
|
|
21
|
+
"were",
|
|
22
|
+
"be",
|
|
23
|
+
"been",
|
|
24
|
+
"being",
|
|
25
|
+
"have",
|
|
26
|
+
"has",
|
|
27
|
+
"had",
|
|
28
|
+
"do",
|
|
29
|
+
"does",
|
|
30
|
+
"did",
|
|
31
|
+
"will",
|
|
32
|
+
"would",
|
|
33
|
+
"shall",
|
|
34
|
+
"should",
|
|
35
|
+
"may",
|
|
36
|
+
"might",
|
|
37
|
+
"can",
|
|
38
|
+
"could",
|
|
39
|
+
"of",
|
|
40
|
+
"in",
|
|
41
|
+
"to",
|
|
42
|
+
"for",
|
|
43
|
+
"with",
|
|
44
|
+
"on",
|
|
45
|
+
"at",
|
|
46
|
+
"by",
|
|
47
|
+
"from",
|
|
48
|
+
"as",
|
|
49
|
+
"into",
|
|
50
|
+
"through",
|
|
51
|
+
"during",
|
|
52
|
+
"before",
|
|
53
|
+
"after",
|
|
54
|
+
"above",
|
|
55
|
+
"below",
|
|
56
|
+
"between",
|
|
57
|
+
"out",
|
|
58
|
+
"off",
|
|
59
|
+
"over",
|
|
60
|
+
"under",
|
|
61
|
+
"again",
|
|
62
|
+
"further",
|
|
63
|
+
"then",
|
|
64
|
+
"once",
|
|
65
|
+
"here",
|
|
66
|
+
"there",
|
|
67
|
+
"where",
|
|
68
|
+
"when",
|
|
69
|
+
"how",
|
|
70
|
+
"all",
|
|
71
|
+
"each",
|
|
72
|
+
"every",
|
|
73
|
+
"both",
|
|
74
|
+
"few",
|
|
75
|
+
"more",
|
|
76
|
+
"most",
|
|
77
|
+
"other",
|
|
78
|
+
"some",
|
|
79
|
+
"such",
|
|
80
|
+
"no",
|
|
81
|
+
"nor",
|
|
82
|
+
"not",
|
|
83
|
+
"only",
|
|
84
|
+
"own",
|
|
85
|
+
"same",
|
|
86
|
+
"than",
|
|
87
|
+
"too",
|
|
88
|
+
"very",
|
|
89
|
+
"and",
|
|
90
|
+
"but",
|
|
91
|
+
"or",
|
|
92
|
+
"if",
|
|
93
|
+
"this",
|
|
94
|
+
"that",
|
|
95
|
+
"these",
|
|
96
|
+
"those",
|
|
97
|
+
"it",
|
|
98
|
+
"its",
|
|
99
|
+
"contract",
|
|
100
|
+
"function",
|
|
101
|
+
"solidity",
|
|
102
|
+
"smart",
|
|
103
|
+
"vulnerability",
|
|
104
|
+
"attack",
|
|
105
|
+
"attacker",
|
|
106
|
+
"token",
|
|
107
|
+
"address",
|
|
108
|
+
"value",
|
|
109
|
+
"state",
|
|
110
|
+
"require",
|
|
111
|
+
"modifier",
|
|
112
|
+
"external",
|
|
113
|
+
"internal",
|
|
114
|
+
"public",
|
|
115
|
+
"private",
|
|
116
|
+
"mapping",
|
|
117
|
+
"uint256",
|
|
118
|
+
"bool",
|
|
119
|
+
"returns",
|
|
120
|
+
"event",
|
|
121
|
+
"emit",
|
|
122
|
+
])
|
|
123
|
+
|
|
124
|
+
function stripFrontmatter(content: string): string {
|
|
125
|
+
return content.replace(/^---[ \t]*\r?\n[\s\S]*?\r?\n---[ \t]*\r?\n?/, "")
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function stripCodeBlocks(content: string): string {
|
|
129
|
+
return content.replace(/```[\s\S]*?```/g, " ")
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function stripHtmlComments(content: string): string {
|
|
133
|
+
return content.replace(/<!--[\s\S]*?-->/g, " ")
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function normalizeWhitespace(content: string): string {
|
|
137
|
+
return content.toLowerCase().replace(/\s+/g, " ").trim()
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function tokenize(text: string): string[] {
|
|
141
|
+
if (!text) return []
|
|
142
|
+
|
|
143
|
+
return text
|
|
144
|
+
.toLowerCase()
|
|
145
|
+
.split(/[^a-z0-9]+/g)
|
|
146
|
+
.filter((token) => token.length >= 3)
|
|
147
|
+
.filter((token) => !STOPWORDS.has(token))
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
151
|
+
return typeof value === "object" && value !== null
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function extractDetectionRules(frontmatter: Record<string, unknown>): string[] {
|
|
155
|
+
const rawRules = frontmatter.detection_rules
|
|
156
|
+
if (!Array.isArray(rawRules)) return []
|
|
157
|
+
|
|
158
|
+
const rules: string[] = []
|
|
159
|
+
for (const rule of rawRules) {
|
|
160
|
+
if (!isRecord(rule)) continue
|
|
161
|
+
if (typeof rule.regex !== "string") continue
|
|
162
|
+
rules.push(rule.regex)
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return rules
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function normalizeRuleToken(token: string): string {
|
|
169
|
+
return token.replace(/^[_.]+|[_.]+$/g, "").toLowerCase()
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function extractRuleTokens(rules: string[]): string[] {
|
|
173
|
+
const tokens: string[] = []
|
|
174
|
+
|
|
175
|
+
for (const rule of rules) {
|
|
176
|
+
const parts = rule.split(/[^a-zA-Z0-9_.]+/g)
|
|
177
|
+
for (const part of parts) {
|
|
178
|
+
const normalized = normalizeRuleToken(part)
|
|
179
|
+
if (!normalized) continue
|
|
180
|
+
if (normalized.length < 3) continue
|
|
181
|
+
tokens.push(normalized)
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return tokens
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export function normalizeSkill(content: string): SkillDoc | null {
|
|
189
|
+
if (!content.trim()) return null
|
|
190
|
+
|
|
191
|
+
const frontmatter = parseFrontmatter(content)
|
|
192
|
+
if (!frontmatter) return null
|
|
193
|
+
|
|
194
|
+
const rawName = frontmatter.name
|
|
195
|
+
if (typeof rawName !== "string" || !rawName.trim()) return null
|
|
196
|
+
|
|
197
|
+
const name = rawName.trim()
|
|
198
|
+
const description = typeof frontmatter.description === "string" ? frontmatter.description : ""
|
|
199
|
+
const category = typeof frontmatter.category === "string" ? frontmatter.category : undefined
|
|
200
|
+
|
|
201
|
+
const detectionRules = extractDetectionRules(frontmatter)
|
|
202
|
+
const bodyWithoutFrontmatter = stripFrontmatter(content)
|
|
203
|
+
const withoutComments = stripHtmlComments(bodyWithoutFrontmatter)
|
|
204
|
+
const withoutCode = stripCodeBlocks(withoutComments)
|
|
205
|
+
const bodyText = normalizeWhitespace(withoutCode)
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
name,
|
|
209
|
+
description,
|
|
210
|
+
category,
|
|
211
|
+
detectionRules,
|
|
212
|
+
bodyText,
|
|
213
|
+
bodyTokens: tokenize(bodyText),
|
|
214
|
+
nameDescTokens: tokenize(`${name} ${description}`),
|
|
215
|
+
ruleTokens: extractRuleTokens(detectionRules),
|
|
216
|
+
}
|
|
217
|
+
}
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import type { SkillDoc } from "./normalize"
|
|
2
|
+
|
|
3
|
+
export interface SimilarityScore {
|
|
4
|
+
composite: number
|
|
5
|
+
bodyTfidf: number
|
|
6
|
+
bodyShingle: number
|
|
7
|
+
nameDesc: number
|
|
8
|
+
detectionRules: number
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface SimilarityPair {
|
|
12
|
+
skillA: string
|
|
13
|
+
skillB: string
|
|
14
|
+
score: SimilarityScore
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface TfidfCorpus {
|
|
18
|
+
docCount: number
|
|
19
|
+
docFreq: Map<string, number>
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const BODY_TFIDF_WEIGHT = 0.45
|
|
23
|
+
const BODY_SHINGLE_WEIGHT = 0.2
|
|
24
|
+
const NAME_DESC_WEIGHT = 0.2
|
|
25
|
+
const DETECTION_RULES_WEIGHT = 0.15
|
|
26
|
+
|
|
27
|
+
function clamp01(value: number): number {
|
|
28
|
+
if (!Number.isFinite(value)) return 0
|
|
29
|
+
if (value < 0) return 0
|
|
30
|
+
if (value > 1) return 1
|
|
31
|
+
return value
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function getTokenCounts(tokens: string[]): Map<string, number> {
|
|
35
|
+
const counts = new Map<string, number>()
|
|
36
|
+
for (const token of tokens) {
|
|
37
|
+
counts.set(token, (counts.get(token) ?? 0) + 1)
|
|
38
|
+
}
|
|
39
|
+
return counts
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function buildTfIdfVector(doc: SkillDoc, corpus: TfidfCorpus): Map<string, number> {
|
|
43
|
+
const vector = new Map<string, number>()
|
|
44
|
+
const totalTokens = doc.bodyTokens.length
|
|
45
|
+
const docCount = corpus.docCount
|
|
46
|
+
|
|
47
|
+
if (totalTokens === 0 || docCount === 0) {
|
|
48
|
+
return vector
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const tokenCounts = getTokenCounts(doc.bodyTokens)
|
|
52
|
+
|
|
53
|
+
for (const [token, count] of tokenCounts) {
|
|
54
|
+
const df = corpus.docFreq.get(token)
|
|
55
|
+
if (!df || df <= 0) continue
|
|
56
|
+
|
|
57
|
+
const tf = count / totalTokens
|
|
58
|
+
const idf = Math.log(docCount / df)
|
|
59
|
+
const weight = tf * idf
|
|
60
|
+
if (weight === 0) continue
|
|
61
|
+
|
|
62
|
+
vector.set(token, weight)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return vector
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function dotProduct(a: Map<string, number>, b: Map<string, number>): number {
|
|
69
|
+
if (a.size === 0 || b.size === 0) return 0
|
|
70
|
+
|
|
71
|
+
let dot = 0
|
|
72
|
+
const [small, large] = a.size < b.size ? [a, b] : [b, a]
|
|
73
|
+
for (const [token, weight] of small) {
|
|
74
|
+
dot += weight * (large.get(token) ?? 0)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return dot
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function vectorNorm(vector: Map<string, number>): number {
|
|
81
|
+
let sumSquares = 0
|
|
82
|
+
for (const weight of vector.values()) {
|
|
83
|
+
sumSquares += weight * weight
|
|
84
|
+
}
|
|
85
|
+
return Math.sqrt(sumSquares)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function buildShingleSet(tokens: string[], n: number): Set<string> {
|
|
89
|
+
const shingles = new Set<string>()
|
|
90
|
+
if (tokens.length < n || n <= 0) return shingles
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i <= tokens.length - n; i += 1) {
|
|
93
|
+
shingles.add(tokens.slice(i, i + n).join(" "))
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return shingles
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function setIntersectionSize<T>(a: Set<T>, b: Set<T>): number {
|
|
100
|
+
if (a.size === 0 || b.size === 0) return 0
|
|
101
|
+
|
|
102
|
+
let count = 0
|
|
103
|
+
const [small, large] = a.size < b.size ? [a, b] : [b, a]
|
|
104
|
+
for (const value of small) {
|
|
105
|
+
if (large.has(value)) count += 1
|
|
106
|
+
}
|
|
107
|
+
return count
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function normalizeRegex(rule: string): string {
|
|
111
|
+
return rule.replace(/\s+/g, " ").trim()
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function buildTfidfCorpus(docs: SkillDoc[]): TfidfCorpus {
|
|
115
|
+
const docFreq = new Map<string, number>()
|
|
116
|
+
|
|
117
|
+
for (const doc of docs) {
|
|
118
|
+
const uniqueTokens = new Set(doc.bodyTokens)
|
|
119
|
+
for (const token of uniqueTokens) {
|
|
120
|
+
docFreq.set(token, (docFreq.get(token) ?? 0) + 1)
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
docCount: docs.length,
|
|
126
|
+
docFreq,
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export function tfidfCosine(a: SkillDoc, b: SkillDoc, corpus: TfidfCorpus): number {
|
|
131
|
+
const vectorA = buildTfIdfVector(a, corpus)
|
|
132
|
+
const vectorB = buildTfIdfVector(b, corpus)
|
|
133
|
+
if (vectorA.size === 0 || vectorB.size === 0) return 0
|
|
134
|
+
|
|
135
|
+
const normA = vectorNorm(vectorA)
|
|
136
|
+
const normB = vectorNorm(vectorB)
|
|
137
|
+
if (normA === 0 || normB === 0) return 0
|
|
138
|
+
|
|
139
|
+
const similarity = dotProduct(vectorA, vectorB) / (normA * normB)
|
|
140
|
+
return clamp01(similarity)
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function shingleJaccard(a: string[], b: string[], n: number = 4): number {
|
|
144
|
+
const setA = buildShingleSet(a, n)
|
|
145
|
+
const setB = buildShingleSet(b, n)
|
|
146
|
+
if (setA.size === 0 && setB.size === 0) return 0
|
|
147
|
+
|
|
148
|
+
const intersection = setIntersectionSize(setA, setB)
|
|
149
|
+
const union = setA.size + setB.size - intersection
|
|
150
|
+
if (union === 0) return 0
|
|
151
|
+
|
|
152
|
+
return clamp01(intersection / union)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export function tokenJaccard(a: string[], b: string[]): number {
|
|
156
|
+
const setA = new Set(a)
|
|
157
|
+
const setB = new Set(b)
|
|
158
|
+
if (setA.size === 0 && setB.size === 0) return 0
|
|
159
|
+
|
|
160
|
+
const intersection = setIntersectionSize(setA, setB)
|
|
161
|
+
const union = setA.size + setB.size - intersection
|
|
162
|
+
if (union === 0) return 0
|
|
163
|
+
|
|
164
|
+
return clamp01(intersection / union)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function detectionRuleOverlap(a: SkillDoc, b: SkillDoc): number {
|
|
168
|
+
const normalizedA = a.detectionRules.map(normalizeRegex)
|
|
169
|
+
const normalizedB = b.detectionRules.map(normalizeRegex)
|
|
170
|
+
const setA = new Set(normalizedA)
|
|
171
|
+
const setB = new Set(normalizedB)
|
|
172
|
+
|
|
173
|
+
const maxRuleCount = Math.max(normalizedA.length, normalizedB.length)
|
|
174
|
+
const sharedExact = setIntersectionSize(setA, setB)
|
|
175
|
+
const exactMatch = maxRuleCount === 0 ? 0 : sharedExact / maxRuleCount
|
|
176
|
+
const tokenOverlap = tokenJaccard(a.ruleTokens, b.ruleTokens)
|
|
177
|
+
|
|
178
|
+
return clamp01(exactMatch * 0.6 + tokenOverlap * 0.4)
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export function computeSimilarity(a: SkillDoc, b: SkillDoc, corpus: TfidfCorpus): SimilarityScore {
|
|
182
|
+
const bodyTfidf = clamp01(tfidfCosine(a, b, corpus))
|
|
183
|
+
const bodyShingle = clamp01(shingleJaccard(a.bodyTokens, b.bodyTokens, 4))
|
|
184
|
+
const nameDesc = clamp01(tokenJaccard(a.nameDescTokens, b.nameDescTokens))
|
|
185
|
+
const detectionRules = clamp01(detectionRuleOverlap(a, b))
|
|
186
|
+
|
|
187
|
+
const composite = clamp01(
|
|
188
|
+
bodyTfidf * BODY_TFIDF_WEIGHT +
|
|
189
|
+
bodyShingle * BODY_SHINGLE_WEIGHT +
|
|
190
|
+
nameDesc * NAME_DESC_WEIGHT +
|
|
191
|
+
detectionRules * DETECTION_RULES_WEIGHT,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
composite,
|
|
196
|
+
bodyTfidf,
|
|
197
|
+
bodyShingle,
|
|
198
|
+
nameDesc,
|
|
199
|
+
detectionRules,
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
export function computeAllPairs(docs: SkillDoc[], corpus: TfidfCorpus): SimilarityPair[] {
|
|
204
|
+
const pairs: SimilarityPair[] = []
|
|
205
|
+
|
|
206
|
+
for (let i = 0; i < docs.length; i += 1) {
|
|
207
|
+
const skillA = docs[i]
|
|
208
|
+
if (!skillA) continue
|
|
209
|
+
|
|
210
|
+
for (let j = i + 1; j < docs.length; j += 1) {
|
|
211
|
+
const skillB = docs[j]
|
|
212
|
+
if (!skillB) continue
|
|
213
|
+
|
|
214
|
+
pairs.push({
|
|
215
|
+
skillA: skillA.name,
|
|
216
|
+
skillB: skillB.name,
|
|
217
|
+
score: computeSimilarity(skillA, skillB, corpus),
|
|
218
|
+
})
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
pairs.sort((left, right) => right.score.composite - left.score.composite)
|
|
223
|
+
return pairs
|
|
224
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { existsSync, readdirSync, readFileSync
|
|
1
|
+
import { type Dirent, existsSync, readdirSync, readFileSync } from "node:fs"
|
|
2
2
|
import { homedir } from "node:os"
|
|
3
3
|
import { basename, extname, join, resolve } from "node:path"
|
|
4
4
|
import type { ArgusConfig } from "../config/types"
|
|
@@ -137,9 +137,14 @@ function resolveCustomSkillsRoot(projectDir: string, argusConfig?: ArgusConfig):
|
|
|
137
137
|
export function resolveSkillRoots(projectDir: string, argusConfig?: ArgusConfig): SkillRoot[] {
|
|
138
138
|
const precedence = argusConfig?.knowledge?.skillPrecedence ?? "bundled-first"
|
|
139
139
|
|
|
140
|
-
const bundledRoot: SkillRoot = {
|
|
140
|
+
const bundledRoot: SkillRoot = {
|
|
141
|
+
path: resolve(import.meta.dir, "../../skills"),
|
|
142
|
+
source: "bundled",
|
|
143
|
+
}
|
|
141
144
|
const customRoot = resolveCustomSkillsRoot(projectDir, argusConfig)
|
|
142
|
-
const customSkillRoot: SkillRoot | null = customRoot
|
|
145
|
+
const customSkillRoot: SkillRoot | null = customRoot
|
|
146
|
+
? { path: customRoot, source: "custom" }
|
|
147
|
+
: null
|
|
143
148
|
|
|
144
149
|
const roots: SkillRoot[] = []
|
|
145
150
|
|
|
@@ -169,7 +174,10 @@ export function resolveSkillRoots(projectDir: string, argusConfig?: ArgusConfig)
|
|
|
169
174
|
})
|
|
170
175
|
}
|
|
171
176
|
|
|
172
|
-
export function resolveArgusSkills(
|
|
177
|
+
export function resolveArgusSkills(
|
|
178
|
+
projectDir: string,
|
|
179
|
+
argusConfig?: ArgusConfig,
|
|
180
|
+
): Map<string, ResolvedSkill> {
|
|
173
181
|
const resolved = new Map<string, ResolvedSkill>()
|
|
174
182
|
const roots = resolveSkillRoots(projectDir, argusConfig)
|
|
175
183
|
const logger = createLogger()
|
|
@@ -188,7 +196,9 @@ export function resolveArgusSkills(projectDir: string, argusConfig?: ArgusConfig
|
|
|
188
196
|
if (frontmatter) {
|
|
189
197
|
const validation = validateSkillFrontmatter(frontmatter)
|
|
190
198
|
if (!validation.success) {
|
|
191
|
-
logger.warn(
|
|
199
|
+
logger.warn(
|
|
200
|
+
`Skipping skill with invalid frontmatter: ${markdownFile} — ${validation.errors.join(", ")}`,
|
|
201
|
+
)
|
|
192
202
|
continue
|
|
193
203
|
}
|
|
194
204
|
}
|
|
@@ -209,7 +219,8 @@ export function resolveArgusSkills(projectDir: string, argusConfig?: ArgusConfig
|
|
|
209
219
|
|
|
210
220
|
if (frontmatter) {
|
|
211
221
|
if (typeof frontmatter.source_url === "string") skill.source_url = frontmatter.source_url
|
|
212
|
-
if (typeof frontmatter.source_license === "string")
|
|
222
|
+
if (typeof frontmatter.source_license === "string")
|
|
223
|
+
skill.source_license = frontmatter.source_license
|
|
213
224
|
if (typeof frontmatter.imported_at === "string") skill.imported_at = frontmatter.imported_at
|
|
214
225
|
if (typeof frontmatter.source_hash === "string") skill.source_hash = frontmatter.source_hash
|
|
215
226
|
}
|
|
@@ -1,5 +1,9 @@
|
|
|
1
|
-
import { z } from "zod"
|
|
2
1
|
import { parse as parseYaml } from "yaml"
|
|
2
|
+
import { z } from "zod"
|
|
3
|
+
import { createLogger } from "../shared/logger"
|
|
4
|
+
import { PATTERN_CATEGORIES } from "../tools/pattern-schema"
|
|
5
|
+
|
|
6
|
+
const logger = createLogger()
|
|
3
7
|
|
|
4
8
|
export const DetectionRuleSchema = z.object({
|
|
5
9
|
regex: z.string(),
|
|
@@ -23,19 +27,14 @@ export const SkillFrontmatterSchema = z.object({
|
|
|
23
27
|
deprecated: z.boolean().optional(),
|
|
24
28
|
replacement: z.string().optional(),
|
|
25
29
|
category: z
|
|
26
|
-
.enum([
|
|
27
|
-
"vulnerability-pattern",
|
|
28
|
-
"methodology",
|
|
29
|
-
"protocol-pattern",
|
|
30
|
-
"checklist",
|
|
31
|
-
"reference",
|
|
32
|
-
])
|
|
30
|
+
.enum(["vulnerability-pattern", "methodology", "protocol-pattern", "checklist", "reference"])
|
|
33
31
|
.optional(),
|
|
34
32
|
source_url: z.string().url().optional(),
|
|
35
33
|
source_license: z.string().optional(),
|
|
36
34
|
imported_at: z.string().optional(),
|
|
37
35
|
source_hash: z.string().optional(),
|
|
38
36
|
detection_rules: z.array(DetectionRuleSchema).optional(),
|
|
37
|
+
pattern_category: z.enum(PATTERN_CATEGORIES).optional(),
|
|
39
38
|
})
|
|
40
39
|
|
|
41
40
|
export type SkillFrontmatter = z.infer<typeof SkillFrontmatterSchema>
|
|
@@ -68,7 +67,9 @@ export function parseFrontmatter(content: string): Record<string, unknown> | nul
|
|
|
68
67
|
if (typeof parsed === "object" && parsed !== null) {
|
|
69
68
|
return parsed as Record<string, unknown>
|
|
70
69
|
}
|
|
71
|
-
} catch {
|
|
70
|
+
} catch {
|
|
71
|
+
logger.debug("YAML frontmatter parse failed, falling back to line parser")
|
|
72
|
+
}
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
const lines = raw.split(/\r?\n/)
|
|
@@ -78,7 +79,7 @@ export function parseFrontmatter(content: string): Record<string, unknown> | nul
|
|
|
78
79
|
const kvMatch = line.match(/^([\w][\w-]*):\s*(.*)$/)
|
|
79
80
|
if (!kvMatch) continue
|
|
80
81
|
|
|
81
|
-
const key = kvMatch[1]
|
|
82
|
+
const key = kvMatch[1] ?? ""
|
|
82
83
|
let raw = kvMatch[2]?.trim() ?? ""
|
|
83
84
|
|
|
84
85
|
if ((raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'"))) {
|