@npmcli/arborist 9.6.0 → 9.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,8 +100,10 @@ class Arborist extends Base {
100
100
  nodeVersion: process.version,
101
101
  ...options,
102
102
  Arborist: this.constructor,
103
+ allowScripts: options.allowScripts ?? null,
103
104
  binLinks: 'binLinks' in options ? !!options.binLinks : true,
104
105
  cache: options.cache || `${homedir()}/.npm/_cacache`,
106
+ dangerouslyAllowAllScripts: !!options.dangerouslyAllowAllScripts,
105
107
  dryRun: !!options.dryRun,
106
108
  formatPackageLock: 'formatPackageLock' in options ? !!options.formatPackageLock : true,
107
109
  force: !!options.force,
@@ -335,7 +335,8 @@ module.exports = cls => class IsolatedReifier extends cls {
335
335
  root.inventory.set(workspace.location, workspace)
336
336
  root.workspaces.set(wsName, workspace.path)
337
337
 
338
- // Create workspace Link. For root declared deps, link at root node_modules/. For undeclared deps, link at the workspace's own node_modules/ (self-link).
338
+ // Declared workspaces are symlinked at root node_modules/.
339
+ // Undeclared workspaces get a tree-only Link kept for diff/filter participation but not materialized on disk.
339
340
  const isDeclared = this.#rootDeclaredDeps.has(wsName)
340
341
  const wsLink = new IsolatedLink({
341
342
  location: isDeclared ? join('node_modules', wsName) : join(c.localLocation, 'node_modules', wsName),
@@ -348,7 +349,7 @@ module.exports = cls => class IsolatedReifier extends cls {
348
349
  target: workspace,
349
350
  })
350
351
  if (!isDeclared) {
351
- workspace.children.set(wsName, wsLink)
352
+ wsLink.isUndeclaredWorkspaceLink = true
352
353
  }
353
354
  root.children.set(wsName, wsLink)
354
355
  root.inventory.set(wsLink.location, wsLink)
@@ -12,6 +12,7 @@ const { isNodeGypPackage, defaultGypInstallScript } = require('@npmcli/node-gyp'
12
12
  const { promiseRetry } = require('@gar/promise-retry')
13
13
  const { log, time } = require('proc-log')
14
14
  const { resolve } = require('node:path')
15
+ const { isScriptAllowed } = require('../script-allowed.js')
15
16
 
16
17
  const boolEnv = b => b ? '1' : ''
17
18
  const sortNodes = (a, b) => (a.depth - b.depth) || localeCompare(a.path, b.path)
@@ -225,6 +226,18 @@ module.exports = cls => class Builder extends cls {
225
226
  return
226
227
  }
227
228
 
229
+ // Phase 1 allowScripts gate: a `false` verdict from the policy matcher
230
+ // means the user explicitly denied install scripts for this node, so skip
231
+ // it. `true` and `null` (unreviewed) both fall through to the existing
232
+ // detection logic — unreviewed nodes still run their scripts in Phase 1
233
+ // and are surfaced via the post-reify advisory warning. The global
234
+ // --ignore-scripts kill switch in #build() still takes precedence, and
235
+ // --dangerously-allow-all-scripts bypasses this gate entirely.
236
+ if (!this.options.dangerouslyAllowAllScripts &&
237
+ isScriptAllowed(node, this.options.allowScripts) === false) {
238
+ return
239
+ }
240
+
228
241
  if (this.#oldMeta === null) {
229
242
  const { root: { meta } } = node
230
243
  this.#oldMeta = meta && meta.loadedFromDisk &&
@@ -239,7 +239,7 @@ module.exports = cls => class Reifier extends cls {
239
239
  this.actualTree = this.idealTree
240
240
  this.idealTree = null
241
241
 
242
- if (!this.options.global) {
242
+ if (!this.options.global && !this.options.dryRun) {
243
243
  await this.actualTree.meta.save()
244
244
  const ignoreScripts = !!this.options.ignoreScripts
245
245
  // if we aren't doing a dry run or ignoring scripts and we actually made changes to the dep
@@ -760,6 +760,12 @@ module.exports = cls => class Reifier extends cls {
760
760
  }
761
761
 
762
762
  // node.isLink
763
+
764
+ // Tree-only Link: present in the tree for diff/filter participation, never materialized on disk.
765
+ if (node.isUndeclaredWorkspaceLink) {
766
+ return
767
+ }
768
+
763
769
  await rm(node.path, { recursive: true, force: true })
764
770
 
765
771
  // symlink
@@ -1381,6 +1387,10 @@ module.exports = cls => class Reifier extends cls {
1381
1387
  if (!child.isLink) {
1382
1388
  continue
1383
1389
  }
1390
+ // Tree-only Links never exist on disk; skipping them lets the sweep remove any stale self-link left by an older npm version.
1391
+ if (child.isUndeclaredWorkspaceLink) {
1392
+ continue
1393
+ }
1384
1394
  const nmIdx = loc.lastIndexOf(NM_PREFIX)
1385
1395
  if (nmIdx === -1 || loc.includes(STORE_MARKER)) {
1386
1396
  continue
@@ -0,0 +1,88 @@
1
+ const { isNodeGypPackage } = require('@npmcli/node-gyp')
2
+
3
+ // Returns the install-relevant lifecycle scripts that would run for a
4
+ // given arborist Node, or `{}` if there are none.
5
+ //
6
+ // Includes:
7
+ // - explicit preinstall/install/postinstall
8
+ // - prepare, but only for non-registry sources (git, file, link, remote)
9
+ // - synthetic `node-gyp rebuild`, when `binding.gyp` is present on disk
10
+ // and the package does not opt out via `gypfile: false` or define its
11
+ // own install / preinstall script
12
+
13
+ // Lifecycle-script enumeration boundary.
14
+ //
15
+ // IMPORTANT: this helper decides whether `prepare` should be included
16
+ // in the enumerated install scripts (true for non-registry sources only).
17
+ // It is NOT a policy-matching predicate. The policy matcher in
18
+ // script-allowed.js uses `isRegistryNode`, which is strictly tied to
19
+ // versionFromTgz(node.resolved). The two helpers exist separately on
20
+ // purpose:
21
+ //
22
+ // - `hasNonRegistryShape` (here): "should we consider running prepare
23
+ // on this node?" — a yes/no for what to enumerate.
24
+ // - `isRegistryNode` (script-allowed.js): "do we trust this node's
25
+ // identity enough to apply a policy entry?" — a security check.
26
+ //
27
+ // The looser fallback here (treating unknown-resolved nodes as registry,
28
+ // thus skipping `prepare`) is the safer default for enumeration: we'd
29
+ // rather omit a script we should have run than synthesise one for a
30
+ // non-registry source we couldn't confirm. The policy matcher's stricter
31
+ // behaviour is correct for its boundary; the two helpers must not be
32
+ // merged.
33
+ const hasNonRegistryShape = (node) => {
34
+ if (typeof node.isRegistryDependency === 'boolean') {
35
+ return !node.isRegistryDependency
36
+ }
37
+ if (!node.resolved) {
38
+ return false
39
+ }
40
+ return !/^https?:\/\/[^/]+\/.+\/-\/[^/]+-\d/.test(node.resolved)
41
+ }
42
+
43
+ const getInstallScripts = async (node) => {
44
+ /* istanbul ignore next: arborist Nodes always carry a `package` object;
45
+ defensive fallbacks for non-arborist callers. */
46
+ const pkg = node.package || {}
47
+ /* istanbul ignore next */
48
+ const scripts = pkg.scripts || {}
49
+ const collected = {}
50
+
51
+ if (scripts.preinstall) {
52
+ collected.preinstall = scripts.preinstall
53
+ }
54
+ if (scripts.install) {
55
+ collected.install = scripts.install
56
+ }
57
+ if (scripts.postinstall) {
58
+ collected.postinstall = scripts.postinstall
59
+ }
60
+ if (scripts.prepare && hasNonRegistryShape(node)) {
61
+ collected.prepare = scripts.prepare
62
+ }
63
+
64
+ const hasExplicitGypGate = !!(collected.preinstall || collected.install)
65
+ if (
66
+ !hasExplicitGypGate &&
67
+ pkg.gypfile !== false &&
68
+ await isNodeGypPackage(node.path).catch(() => false)
69
+ ) {
70
+ collected.install = 'node-gyp rebuild'
71
+ }
72
+
73
+ // Lockfile-only nodes (e.g. `npm ci` before reify) carry
74
+ // `hasInstallScript: true` but no enumerated scripts: the lockfile
75
+ // records the presence flag but never the script bodies. Without this
76
+ // fallback the strict-allow-scripts preflight would miss them entirely
77
+ // and let postinstall run. We can't recover the real script body
78
+ // without fetching the manifest, so emit a sentinel describing that
79
+ // install scripts are present.
80
+ if (Object.keys(collected).length === 0 && node.hasInstallScript === true) {
81
+ collected.install = '(install scripts present)'
82
+ }
83
+
84
+ return collected
85
+ }
86
+
87
+ module.exports = getInstallScripts
88
+ module.exports.getInstallScripts = getInstallScripts
@@ -0,0 +1,340 @@
1
+ const npa = require('npm-package-arg')
2
+ const semver = require('semver')
3
+ const versionFromTgz = require('./version-from-tgz.js')
4
+
5
+ // Identity matcher for the allowScripts policy.
6
+ //
7
+ // Returns:
8
+ // - true: at least one allow entry matches and no deny entry matches
9
+ // - false: at least one deny entry matches (deny wins on conflict)
10
+ // - null: no entry matches (unreviewed)
11
+ //
12
+ // `policy` is a flat object of `spec-key -> boolean`, where spec-key is
13
+ // anything `npm-package-arg` can parse. `node` is an arborist Node.
14
+ //
15
+ // Identity rules (see RFC npm/rfcs#868):
16
+ // - registry deps match by the name+version parsed from the lockfile's
17
+ // resolved URL, NOT by `node.packageName` / `node.version`. Those two
18
+ // getters return `node.package.name` / `node.package.version`, which
19
+ // come from the tarball's own package.json and are therefore
20
+ // attacker-controlled. A package can publish a tarball claiming any
21
+ // name; the only trusted name is the one baked into the registry URL.
22
+ // - tarball / file / link / remote: exact match on node.resolved
23
+ // - git: match on hosted.ssh() plus a short-SHA prefix of the
24
+ // resolved committish
25
+
26
+ const isScriptAllowed = (node, policy) => {
27
+ // Bundled dependencies cannot be allowlisted in Phase 1. The RFC defers
28
+ // allowlisting them to a follow-up RFC because matching by name@version
29
+ // from the bundled tarball would reintroduce manifest confusion (a
30
+ // bundled tarball can claim any name and version). Returning null here
31
+ // marks bundled deps as unreviewed regardless of any policy entries, so
32
+ // their install scripts surface in the Phase 1 advisory warning and
33
+ // (eventually) get blocked at the install-time gate.
34
+ if (node.inBundle) {
35
+ return null
36
+ }
37
+
38
+ if (!policy || typeof policy !== 'object') {
39
+ return null
40
+ }
41
+
42
+ let anyAllow = false
43
+ let anyDeny = false
44
+
45
+ for (const [key, value] of Object.entries(policy)) {
46
+ if (!matches(node, key)) {
47
+ continue
48
+ }
49
+ if (value === false) {
50
+ anyDeny = true
51
+ continue
52
+ }
53
+ /* istanbul ignore else: policy values are strictly true/false;
54
+ defensive guard against unexpected coercions. */
55
+ if (value === true) {
56
+ anyAllow = true
57
+ }
58
+ }
59
+
60
+ if (anyDeny) {
61
+ return false
62
+ }
63
+ if (anyAllow) {
64
+ return true
65
+ }
66
+ return null
67
+ }
68
+
69
+ const matches = (node, key) => {
70
+ let parsed
71
+ try {
72
+ parsed = npa(key)
73
+ } catch {
74
+ return false
75
+ }
76
+
77
+ switch (parsed.type) {
78
+ case 'tag':
79
+ case 'range':
80
+ case 'version':
81
+ return matchRegistry(node, parsed)
82
+ case 'git':
83
+ return matchGit(node, parsed)
84
+ case 'file':
85
+ case 'directory':
86
+ return matchFileOrDir(node, parsed)
87
+ case 'remote':
88
+ return matchRemote(node, parsed)
89
+ case 'alias':
90
+ // Disallowed: aliases as policy keys do not match anything.
91
+ // The user has to address the real package name.
92
+ return false
93
+ /* istanbul ignore next: switch above covers every npa type we expect;
94
+ defensive fallback for future npa types. */
95
+ default:
96
+ return false
97
+ }
98
+ }
99
+
100
+ const matchRegistry = (node, parsed) => {
101
+ // If this node is not a registry dep, refuse the match. A registry-style
102
+ // key (`pkg`, `pkg@1`, `pkg@1 || 2`) must not match a tarball or git node
103
+ // even if their names happen to coincide.
104
+ if (!isRegistryNode(node)) {
105
+ return false
106
+ }
107
+
108
+ // Derive the trusted name+version from the lockfile's resolved URL.
109
+ // Never use `node.packageName` / `node.version` here: those read from
110
+ // the tarball's own package.json and can be forged by a malicious
111
+ // publisher to bypass an allowScripts entry.
112
+ const trusted = getTrustedRegistryIdentity(node)
113
+ if (!trusted || trusted.name !== parsed.name) {
114
+ return false
115
+ }
116
+
117
+ // `tag` covers `pkg@latest`. Rejected up front by validatePolicy in
118
+ // resolve-allow-scripts.js because tags look like a pin but can't be
119
+ // verified at install time. Defense-in-depth: if one slips through
120
+ // (e.g. arborist invoked directly without the resolver), don't match.
121
+ if (parsed.type === 'tag') {
122
+ /* istanbul ignore next: validatePolicy filters this; defensive */
123
+ return false
124
+ }
125
+
126
+ // `range` includes `pkg@^1`, `pkg@1 || 2`, `pkg@*`, `pkg@>=0`, and bare
127
+ // names like `pkg` (npa parses these as range with fetchSpec='*'). The
128
+ // RFC permits bare names (name-only allow) and exact versions joined by
129
+ // `||`; ranges like ^/~/>=/< are rejected because they would silently
130
+ // allow versions the user has never reviewed.
131
+ if (parsed.type === 'range') {
132
+ // Bare name or `pkg@*`: treat as name-only allow.
133
+ if (parsed.fetchSpec === '*' || parsed.rawSpec === '' || parsed.rawSpec === '*') {
134
+ return true
135
+ }
136
+ if (!trusted.version || !isExactVersionDisjunction(parsed.fetchSpec)) {
137
+ return false
138
+ }
139
+ return semver.satisfies(trusted.version, parsed.fetchSpec, { loose: true })
140
+ }
141
+
142
+ // `version` is an exact pin like `pkg@1.2.3`.
143
+ /* istanbul ignore else: parsed.type at this point is always 'version';
144
+ the istanbul-ignored fallback below handles the impossible case. */
145
+ if (parsed.type === 'version') {
146
+ return trusted.version === parsed.fetchSpec
147
+ }
148
+
149
+ /* istanbul ignore next: parsed.type is constrained to tag/range/version
150
+ by the caller; this final fallback is defensive. */
151
+ return false
152
+ }
153
+
154
+ // Derive a registry node's trusted name+version.
155
+ //
156
+ // Preferred source: the lockfile's resolved URL parsed via
157
+ // versionFromTgz. arborist records the URL when it first adds the dep,
158
+ // before any tarball is unpacked, so the URL cannot be forged by the
159
+ // package's own package.json.
160
+ //
161
+ // Fallback for lockfiles produced with omit-lockfile-registry-resolved
162
+ // (where the URL is absent): take the dep name from an incoming
163
+ // dependency edge. The edge's spec was written by the consumer (or by an
164
+ // upstream package.json), not by the installed tarball. For aliases like
165
+ // `"trusted": "npm:naughty@1.0.0"`, the underlying registered package
166
+ // name is parsed out of the alias `subSpec`. The install location
167
+ // (`node_modules/trusted`) is deliberately not consulted because for
168
+ // aliases it carries only the alias name, which would let a malicious
169
+ // publisher bypass an allowScripts entry written for the real package.
170
+ //
171
+ // Version is left null in the fallback case because the only remaining
172
+ // source for it (`node.version`) reads from the tarball.
173
+ //
174
+ // Returns `{ name, version }` or `null` if no trusted identity exists.
175
+ const getTrustedRegistryIdentity = (node) => {
176
+ if (node.resolved && typeof node.resolved === 'string') {
177
+ const parsed = versionFromTgz('', node.resolved)
178
+ /* istanbul ignore else: versionFromTgz returns either a complete
179
+ { name, version } or null; partial objects are not produced. */
180
+ if (parsed && parsed.name && parsed.version) {
181
+ return parsed
182
+ }
183
+ }
184
+ const name = nameFromEdges(node)
185
+ if (name) {
186
+ return { name, version: null }
187
+ }
188
+ return null
189
+ }
190
+
191
+ const nameFromEdges = (node) => {
192
+ if (!node.edgesIn || typeof node.edgesIn[Symbol.iterator] !== 'function') {
193
+ return null
194
+ }
195
+ for (const edge of node.edgesIn) {
196
+ let parsed
197
+ try {
198
+ parsed = npa.resolve(edge.name, edge.spec)
199
+ } catch {
200
+ continue
201
+ }
202
+ // Aliases: trust the underlying registered package, not the alias.
203
+ if (parsed.type === 'alias' && parsed.subSpec && parsed.subSpec.registry) {
204
+ return parsed.subSpec.name
205
+ }
206
+ // Non-aliased registry edge: the edge name is the package name as
207
+ // written by the consumer / upstream, which is trusted (it is not
208
+ // read from the installed tarball).
209
+ if (parsed.registry) {
210
+ return parsed.name
211
+ }
212
+ }
213
+ return null
214
+ }
215
+
216
+ // True if `rangeSpec` is one or more exact versions joined by `||`. Anything
217
+ // containing comparator operators (^, ~, >=, <, *) returns false.
218
+ const isExactVersionDisjunction = (rangeSpec) => {
219
+ /* istanbul ignore next: caller always passes parsed.fetchSpec, which
220
+ npa guarantees to be a non-empty string for range specs. */
221
+ if (typeof rangeSpec !== 'string' || rangeSpec.trim() === '') {
222
+ return false
223
+ }
224
+ const parts = rangeSpec.split('||').map(p => p.trim())
225
+ /* istanbul ignore next: String.prototype.split always returns at least
226
+ one element; defensive guard only. */
227
+ if (parts.length === 0) {
228
+ return false
229
+ }
230
+ return parts.every(p => p !== '' && semver.valid(p) !== null)
231
+ }
232
+
233
+ const matchGit = (node, parsed) => {
234
+ if (!node.resolved || !node.resolved.startsWith('git')) {
235
+ return false
236
+ }
237
+
238
+ let nodeParsed
239
+ try {
240
+ nodeParsed = npa(node.resolved)
241
+ } catch {
242
+ /* istanbul ignore next: npa parsing a git URL we already validated
243
+ starts with `git` should not throw; defensive guard only. */
244
+ return false
245
+ }
246
+
247
+ // Compare the host/repo. Both sides should resolve to the same canonical
248
+ // ssh URL.
249
+ const noCommittish = { noCommittish: true }
250
+ const keyHost = parsed.hosted?.ssh(noCommittish)
251
+ const nodeHost = nodeParsed.hosted?.ssh(noCommittish)
252
+ if (keyHost && nodeHost) {
253
+ if (keyHost !== nodeHost) {
254
+ return false
255
+ }
256
+ } else if (parsed.fetchSpec && nodeParsed.fetchSpec) {
257
+ // Non-hosted git URLs: fall back to fetch spec.
258
+ if (parsed.fetchSpec !== nodeParsed.fetchSpec) {
259
+ return false
260
+ }
261
+ } else {
262
+ return false
263
+ }
264
+
265
+ // If the policy key has no committish, name-only match.
266
+ const keyCommittish = parsed.gitCommittish || parsed.hosted?.committish
267
+ if (!keyCommittish) {
268
+ return true
269
+ }
270
+
271
+ // Match the resolved full SHA against the key's committish. Users
272
+ // typically write short SHAs in the policy; the lockfile stores 40-char
273
+ // SHAs. Direction matters: the lockfile's full SHA must START WITH the
274
+ // key's short SHA, never the reverse. A longer key matching a shorter
275
+ // resolved committish would let a malformed lockfile or a divergent
276
+ // resolver allow scripts the user never approved.
277
+ const nodeCommittish = nodeParsed.gitCommittish || nodeParsed.hosted?.committish || ''
278
+ if (!nodeCommittish) {
279
+ return false
280
+ }
281
+ return nodeCommittish.startsWith(keyCommittish)
282
+ }
283
+
284
+ const matchFileOrDir = (node, parsed) => {
285
+ if (!node.resolved) {
286
+ return false
287
+ }
288
+ return node.resolved === parsed.saveSpec || node.resolved === parsed.fetchSpec
289
+ }
290
+
291
+ const matchRemote = (node, parsed) => {
292
+ if (!node.resolved) {
293
+ return false
294
+ }
295
+ return node.resolved === parsed.fetchSpec || node.resolved === parsed.saveSpec
296
+ }
297
+
298
+ const isRegistryNode = (node) => {
299
+ // Prefer arborist's edge-based check when available (real Node objects).
300
+ // It inspects the incoming edges' specs and only returns true if every
301
+ // edge resolves to a registry spec, which is much harder to spoof than
302
+ // the URL.
303
+ if (typeof node.isRegistryDependency === 'boolean') {
304
+ return node.isRegistryDependency
305
+ }
306
+ // Fall back to URL parsing for nodes without the arborist getter
307
+ // (e.g. test fixtures, lockfiles with omit-lockfile-registry-resolved).
308
+ // Treat the node as a registry dep when:
309
+ // - resolved is missing entirely (omitLockfileRegistryResolved),
310
+ // - resolved is an https/http URL pointing at a registry tarball, or
311
+ // - resolved is undefined and the node has a version (defensive).
312
+ if (!node.resolved) {
313
+ return !!node.version
314
+ }
315
+ // Registry tarballs live at `<host>/<pkg-name>/-/<pkg-name>-<version>.tgz`.
316
+ // Require a path segment before `/-/` so an attacker can't lift a
317
+ // registry-style allow entry to a hostile URL like
318
+ // `https://evil.com/-/trusted-1.0.0.tgz`.
319
+ return /^https?:\/\/[^/]+\/.+\/-\/[^/]+-\d/.test(node.resolved)
320
+ }
321
+
322
+ // Trusted display identity for human-facing output (`npm install`
323
+ // advisory, `npm approve-scripts --allow-scripts-pending`). Same idea as
324
+ // getTrustedRegistryIdentity, but for DISPLAY only — version falls back
325
+ // to node.version when the URL doesn't carry one. Must never be used
326
+ // for policy matching.
327
+ const trustedDisplay = (node) => {
328
+ const trusted = getTrustedRegistryIdentity(node)
329
+ /* istanbul ignore next: defensive fallbacks for nodes without name/version */
330
+ return {
331
+ name: (trusted && trusted.name) || node.name || null,
332
+ version: (trusted && trusted.version) || node.version || null,
333
+ }
334
+ }
335
+
336
+ module.exports = isScriptAllowed
337
+ module.exports.isScriptAllowed = isScriptAllowed
338
+ module.exports.isExactVersionDisjunction = isExactVersionDisjunction
339
+ module.exports.getTrustedRegistryIdentity = getTrustedRegistryIdentity
340
+ module.exports.trustedDisplay = trustedDisplay
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@npmcli/arborist",
3
- "version": "9.6.0",
3
+ "version": "9.7.0",
4
4
  "description": "Manage node_modules trees",
5
5
  "dependencies": {
6
6
  "@gar/promise-retry": "^1.0.0",