@npmcli/arborist 9.5.0 → 9.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,15 @@ const Shrinkwrap = require('../shrinkwrap.js')
28
28
  const { defaultLockfileVersion } = Shrinkwrap
29
29
  const Node = require('../node.js')
30
30
  const Link = require('../link.js')
31
+
32
+ // Maps a parsed spec.type to the corresponding allow-* arborist option name.
33
+ // Hoisted to module scope so #checkAllow doesn't re-allocate it per call.
34
+ const ALLOW_OPTION_FOR_TYPE = {
35
+ git: 'allowGit',
36
+ remote: 'allowRemote',
37
+ file: 'allowFile',
38
+ directory: 'allowDirectory',
39
+ }
31
40
  const addRmPkgDeps = require('../add-rm-pkg-deps.js')
32
41
  const optionalSet = require('../optional-set.js')
33
42
  const { checkEngine, checkPlatform } = require('npm-install-checks')
@@ -649,6 +658,45 @@ module.exports = cls => class IdealTreeBuilder extends cls {
649
658
  return vuln.range
650
659
  }
651
660
 
661
+ // Enforces the allow-git / allow-file / allow-directory / allow-remote configs at the arborist resolution layer, before any branching into the symlink (Link) path or the manifest-fetch path.
662
+ // Pacote also enforces these inside FetcherBase.get() as defense-in-depth, but the symlink branch never reaches pacote, and the manifest cache here would bypass pacote on a cached hit.
663
+ // Throws the same { code: EALLOW${TYPE} } shape pacote uses, so callers and downstream consumers stay consistent.
664
+ #checkAllow (spec, edge) {
665
+ const optName = ALLOW_OPTION_FOR_TYPE[spec.type]
666
+ if (!optName) {
667
+ return
668
+ }
669
+ const allow = this.options[optName] ?? 'all'
670
+ if (allow === 'all') {
671
+ return
672
+ }
673
+ const isRoot = !!(edge?.from?.isProjectRoot || edge?.from?.isWorkspace)
674
+ if (allow !== 'none' && isRoot) {
675
+ return
676
+ }
677
+ throw Object.assign(
678
+ new Error(`Fetching${allow === 'root' ? ' non-root' : ''} packages of type "${spec.type}" have been disabled`),
679
+ {
680
+ code: `EALLOW${spec.type.toUpperCase()}`,
681
+ package: spec.toString(),
682
+ }
683
+ )
684
+ }
685
+
686
+ // Builds a Node representing a spec we failed to load (allow-* gate, network failure, ENOTARGET, etc.) and records it in #loadFailures so #pruneFailedOptional can later decide whether the failure is fatal or silently dropped for optional deps.
687
+ #failureNode (name, parent, error, edge) {
688
+ error.requiredBy = edge?.from?.location || '.'
689
+ const n = new Node({
690
+ name,
691
+ parent,
692
+ error,
693
+ installLinks: this.installLinks,
694
+ legacyPeerDeps: this.legacyPeerDeps,
695
+ })
696
+ this.#loadFailures.add(n)
697
+ return n
698
+ }
699
+
652
700
  #queueNamedUpdates () {
653
701
  // ignore top nodes, since they are not loaded the same way, and
654
702
  // probably have their own project associated with them.
@@ -1040,7 +1088,7 @@ This is a one-time fix-up, please be patient...
1040
1088
  // This can't be changed or removed till we figure out why
1041
1089
  // The test is named "tarball deps with transitive tarball deps"
1042
1090
  promises.push(() =>
1043
- this.#fetchManifest(npa.resolve(e.name, e.spec, fromPath(placed, e)), parent)
1091
+ this.#fetchManifest(npa.resolve(e.name, e.spec, fromPath(placed, e)), parent, e)
1044
1092
  .catch(() => null)
1045
1093
  )
1046
1094
  }
@@ -1231,12 +1279,14 @@ This is a one-time fix-up, please be patient...
1231
1279
  return problems
1232
1280
  }
1233
1281
 
1234
- async #fetchManifest (spec, parent) {
1282
+ async #fetchManifest (spec, parent, edge) {
1283
+ // Enforce allow-* gates before consulting the manifest cache so a cached entry from a different edge cannot bypass the policy.
1284
+ this.#checkAllow(spec, edge)
1235
1285
  const options = {
1236
1286
  ...this.options,
1237
1287
  avoid: this.#avoidRange(spec.name),
1238
1288
  fullMetadata: true,
1239
- _isRoot: parent?.isProjectRoot || parent?.isWorkspace,
1289
+ _isRoot: !!(edge?.from?.isProjectRoot || edge?.from?.isWorkspace),
1240
1290
  }
1241
1291
  // get the intended spec and stored metadata from yarn.lock file,
1242
1292
  // if available and valid.
@@ -1253,6 +1303,14 @@ This is a one-time fix-up, please be patient...
1253
1303
  }
1254
1304
 
1255
1305
  async #nodeFromSpec (name, spec, parent, edge) {
1306
+ // Enforce allow-git / allow-file / allow-directory / allow-remote before any branching, so the symlink (Link) path is enforced as well as the manifest-fetch path.
1307
+ // Route the failure through #loadFailures so optional-dep semantics apply (e.g. a transitive optionalDependencies entry that resolves to a disallowed git URL is silently dropped rather than failing the install).
1308
+ try {
1309
+ this.#checkAllow(spec, edge)
1310
+ } catch (error) {
1311
+ return this.#failureNode(name, parent, error, edge)
1312
+ }
1313
+
1256
1314
  // pacote will slap integrity on its options, so we have to clone the object so it doesn't get mutated.
1257
1315
  // Don't bother to load the manifest for link deps, because the target might be within another package that doesn't exist yet.
1258
1316
  const { installLinks, legacyPeerDeps } = this
@@ -1307,23 +1365,26 @@ This is a one-time fix-up, please be patient...
1307
1365
 
1308
1366
  // spec isn't a directory, and either isn't a workspace or the workspace we have
1309
1367
  // doesn't satisfy the edge. try to fetch a manifest and build a node from that.
1310
- return this.#fetchManifest(spec, parent)
1311
- .then(pkg => new Node({ name, pkg, parent, installLinks, legacyPeerDeps }), error => {
1312
- error.requiredBy = edge.from.location || '.'
1313
-
1314
- // failed to load the spec, either because of enotarget or
1315
- // fetch failure of some other sort. save it so we can verify
1316
- // later that it's optional; otherwise, the error is fatal.
1317
- const n = new Node({
1318
- name,
1319
- parent,
1320
- error,
1321
- installLinks,
1322
- legacyPeerDeps,
1323
- })
1324
- this.#loadFailures.add(n)
1325
- return n
1326
- })
1368
+ return this.#fetchManifest(spec, parent, edge)
1369
+ .then(
1370
+ pkg => {
1371
+ // When a proxy/upstream registry returns an incomplete manifest
1372
+ // (e.g. missing version field for platform-specific packages it
1373
+ // hasn't cached), treat it as a load failure so that optional deps
1374
+ // are properly pruned instead of written to the lockfile without
1375
+ // version metadata. Only apply to registry specs — file: deps
1376
+ // legitimately omit version.
1377
+ if (!pkg.version && spec.registry) {
1378
+ const error = Object.assign(
1379
+ new Error(`incomplete manifest for ${name}, missing version`),
1380
+ { code: 'EINCOMPLETEMANIFEST' }
1381
+ )
1382
+ return this.#failureNode(name, parent, error, edge)
1383
+ }
1384
+ return new Node({ name, pkg, parent, installLinks, legacyPeerDeps })
1385
+ },
1386
+ error => this.#failureNode(name, parent, error, edge)
1387
+ )
1327
1388
  }
1328
1389
 
1329
1390
  // load all peer deps and meta-peer deps into the node's parent
@@ -100,8 +100,10 @@ class Arborist extends Base {
100
100
  nodeVersion: process.version,
101
101
  ...options,
102
102
  Arborist: this.constructor,
103
+ allowScripts: options.allowScripts ?? null,
103
104
  binLinks: 'binLinks' in options ? !!options.binLinks : true,
104
105
  cache: options.cache || `${homedir()}/.npm/_cacache`,
106
+ dangerouslyAllowAllScripts: !!options.dangerouslyAllowAllScripts,
105
107
  dryRun: !!options.dryRun,
106
108
  formatPackageLock: 'formatPackageLock' in options ? !!options.formatPackageLock : true,
107
109
  force: !!options.force,
@@ -97,7 +97,9 @@ module.exports = cls => class IsolatedReifier extends cls {
97
97
  }
98
98
  this.counter = 0
99
99
 
100
- this.idealGraph.workspaces = await Promise.all(Array.from(idealTree.fsChildren.values(), w => this.#workspaceProxy(w)))
100
+ // Skip extraneous fsChildren: workspaces removed from the root manifest can linger in fsChildren via the lockfile, and re-materializing them here would re-create a directory the user just deleted.
101
+ const fsChildren = Array.from(idealTree.fsChildren.values()).filter(w => !w.extraneous)
102
+ this.idealGraph.workspaces = await Promise.all(fsChildren.map(w => this.#workspaceProxy(w)))
101
103
  const processed = new Set()
102
104
  const queue = [idealTree, ...idealTree.fsChildren]
103
105
  while (queue.length !== 0) {
@@ -333,7 +335,8 @@ module.exports = cls => class IsolatedReifier extends cls {
333
335
  root.inventory.set(workspace.location, workspace)
334
336
  root.workspaces.set(wsName, workspace.path)
335
337
 
336
- // Create workspace Link. For root declared deps, link at root node_modules/. For undeclared deps, link at the workspace's own node_modules/ (self-link).
338
+ // Declared workspaces are symlinked at root node_modules/.
339
+ // Undeclared workspaces get a tree-only Link kept for diff/filter participation but not materialized on disk.
337
340
  const isDeclared = this.#rootDeclaredDeps.has(wsName)
338
341
  const wsLink = new IsolatedLink({
339
342
  location: isDeclared ? join('node_modules', wsName) : join(c.localLocation, 'node_modules', wsName),
@@ -346,7 +349,7 @@ module.exports = cls => class IsolatedReifier extends cls {
346
349
  target: workspace,
347
350
  })
348
351
  if (!isDeclared) {
349
- workspace.children.set(wsName, wsLink)
352
+ wsLink.isUndeclaredWorkspaceLink = true
350
353
  }
351
354
  root.children.set(wsName, wsLink)
352
355
  root.inventory.set(wsLink.location, wsLink)
@@ -12,6 +12,7 @@ const { isNodeGypPackage, defaultGypInstallScript } = require('@npmcli/node-gyp'
12
12
  const { promiseRetry } = require('@gar/promise-retry')
13
13
  const { log, time } = require('proc-log')
14
14
  const { resolve } = require('node:path')
15
+ const { isScriptAllowed } = require('../script-allowed.js')
15
16
 
16
17
  const boolEnv = b => b ? '1' : ''
17
18
  const sortNodes = (a, b) => (a.depth - b.depth) || localeCompare(a.path, b.path)
@@ -225,6 +226,18 @@ module.exports = cls => class Builder extends cls {
225
226
  return
226
227
  }
227
228
 
229
+ // Phase 1 allowScripts gate: a `false` verdict from the policy matcher
230
+ // means the user explicitly denied install scripts for this node, so skip
231
+ // it. `true` and `null` (unreviewed) both fall through to the existing
232
+ // detection logic — unreviewed nodes still run their scripts in Phase 1
233
+ // and are surfaced via the post-reify advisory warning. The global
234
+ // --ignore-scripts kill switch in #build() still takes precedence, and
235
+ // --dangerously-allow-all-scripts bypasses this gate entirely.
236
+ if (!this.options.dangerouslyAllowAllScripts &&
237
+ isScriptAllowed(node, this.options.allowScripts) === false) {
238
+ return
239
+ }
240
+
228
241
  if (this.#oldMeta === null) {
229
242
  const { root: { meta } } = node
230
243
  this.#oldMeta = meta && meta.loadedFromDisk &&
@@ -4,6 +4,7 @@ const hgi = require('hosted-git-info')
4
4
  const npa = require('npm-package-arg')
5
5
  const packageContents = require('@npmcli/installed-package-contents')
6
6
  const pacote = require('pacote')
7
+ const { pickRegistry } = require('npm-registry-fetch')
7
8
  const promiseAllRejectLate = require('promise-all-reject-late')
8
9
  const runScript = require('@npmcli/run-script')
9
10
  const { callLimit: promiseCallLimit } = require('promise-call-limit')
@@ -238,7 +239,7 @@ module.exports = cls => class Reifier extends cls {
238
239
  this.actualTree = this.idealTree
239
240
  this.idealTree = null
240
241
 
241
- if (!this.options.global) {
242
+ if (!this.options.global && !this.options.dryRun) {
242
243
  await this.actualTree.meta.save()
243
244
  const ignoreScripts = !!this.options.ignoreScripts
244
245
  // if we aren't doing a dry run or ignoring scripts and we actually made changes to the dep
@@ -741,7 +742,14 @@ module.exports = cls => class Reifier extends cls {
741
742
  ...this.options,
742
743
  resolved: node.resolved,
743
744
  integrity: node.integrity,
744
- _isRoot: node.parent?.isProjectRoot || node.parent?.isWorkspace,
745
+ // A node counts as "root" for allow-* enforcement if it satisfies at least one valid dependency edge declared by the project root or a workspace.
746
+ // node.parent is unsafe here: after hoisting, transitive packages can have the project root as their tree parent.
747
+ _isRoot: [...node.edgesIn].some(e =>
748
+ e.valid && (e.from?.isProjectRoot || e.from?.isWorkspace)
749
+ ),
750
+ // pacote's npa re-parses our `name@URL` spec as type=remote, so allowRemote would mis-fire on registry tarballs.
751
+ // Override only when we can prove the URL is registry-mediated; see #isRegistryResolvedTarball.
752
+ ...(this.#isRegistryResolvedTarball(node) ? { allowRemote: 'all' } : {}),
745
753
  })
746
754
  // store nodes don't use Node class so node.package doesn't get updated
747
755
  if (node.isInStore) {
@@ -752,6 +760,12 @@ module.exports = cls => class Reifier extends cls {
752
760
  }
753
761
 
754
762
  // node.isLink
763
+
764
+ // Tree-only Link: present in the tree for diff/filter participation, never materialized on disk.
765
+ if (node.isUndeclaredWorkspaceLink) {
766
+ return
767
+ }
768
+
755
769
  await rm(node.path, { recursive: true, force: true })
756
770
 
757
771
  // symlink
@@ -865,6 +879,24 @@ module.exports = cls => class Reifier extends cls {
865
879
  return wrapper
866
880
  }
867
881
 
882
+ // When extracting a registry-resolved package, the spec we hand to pacote is name@URL.
883
+ // pacote re-parses that with npa and gets spec.type === 'remote', so without an override the allow-remote gate would fire on every registry tarball (both =none and =root mis-fire).
884
+ // Returns true only when we are confident this is a registry-mediated install: the node's inbound edges must all be registry-typed (no exotic spec smuggled the URL in) AND the resolved URL's host must match the registry npm-registry-fetch selected for this spec, so a tampered lockfile pointing at an attacker host still hits the gate.
885
+ #isRegistryResolvedTarball (node) {
886
+ if (!node.resolved || !node.isRegistryDependency) {
887
+ return false
888
+ }
889
+ try {
890
+ // Hostnames are case-insensitive; lowercase both sides for safety even though WHATWG URL already normalizes.
891
+ const resolvedHost = new URL(node.resolved).hostname.toLowerCase()
892
+ // pickRegistry only consults spec.scope, so a bare-name (tag) parse is sufficient and avoids a node.version dependency.
893
+ const registryHost = new URL(pickRegistry(npa(node.name), this.options)).hostname.toLowerCase()
894
+ return resolvedHost === registryHost
895
+ } catch {
896
+ return false
897
+ }
898
+ }
899
+
868
900
  #registryResolved (resolved) {
869
901
  // the default registry url is a magic value meaning "the currently
870
902
  // configured registry".
@@ -1355,6 +1387,10 @@ module.exports = cls => class Reifier extends cls {
1355
1387
  if (!child.isLink) {
1356
1388
  continue
1357
1389
  }
1390
+ // Tree-only Links never exist on disk; skipping them lets the sweep remove any stale self-link left by an older npm version.
1391
+ if (child.isUndeclaredWorkspaceLink) {
1392
+ continue
1393
+ }
1358
1394
  const nmIdx = loc.lastIndexOf(NM_PREFIX)
1359
1395
  if (nmIdx === -1 || loc.includes(STORE_MARKER)) {
1360
1396
  continue
@@ -0,0 +1,88 @@
1
+ const { isNodeGypPackage } = require('@npmcli/node-gyp')
2
+
3
+ // Returns the install-relevant lifecycle scripts that would run for a
4
+ // given arborist Node, or `{}` if there are none.
5
+ //
6
+ // Includes:
7
+ // - explicit preinstall/install/postinstall
8
+ // - prepare, but only for non-registry sources (git, file, link, remote)
9
+ // - synthetic `node-gyp rebuild`, when `binding.gyp` is present on disk
10
+ // and the package does not opt out via `gypfile: false` or define its
11
+ // own install / preinstall script
12
+
13
+ // Lifecycle-script enumeration boundary.
14
+ //
15
+ // IMPORTANT: this helper decides whether `prepare` should be included
16
+ // in the enumerated install scripts (true for non-registry sources only).
17
+ // It is NOT a policy-matching predicate. The policy matcher in
18
+ // script-allowed.js uses `isRegistryNode`, which is strictly tied to
19
+ // versionFromTgz(node.resolved). The two helpers exist separately on
20
+ // purpose:
21
+ //
22
+ // - `hasNonRegistryShape` (here): "should we consider running prepare
23
+ // on this node?" — a yes/no for what to enumerate.
24
+ // - `isRegistryNode` (script-allowed.js): "do we trust this node's
25
+ // identity enough to apply a policy entry?" — a security check.
26
+ //
27
+ // The looser fallback here (treating unknown-resolved nodes as registry,
28
+ // thus skipping `prepare`) is the safer default for enumeration: we'd
29
+ // rather omit a script we should have run than synthesise one for a
30
+ // non-registry source we couldn't confirm. The policy matcher's stricter
31
+ // behaviour is correct for its boundary; the two helpers must not be
32
+ // merged.
33
+ const hasNonRegistryShape = (node) => {
34
+ if (typeof node.isRegistryDependency === 'boolean') {
35
+ return !node.isRegistryDependency
36
+ }
37
+ if (!node.resolved) {
38
+ return false
39
+ }
40
+ return !/^https?:\/\/[^/]+\/.+\/-\/[^/]+-\d/.test(node.resolved)
41
+ }
42
+
43
+ const getInstallScripts = async (node) => {
44
+ /* istanbul ignore next: arborist Nodes always carry a `package` object;
45
+ defensive fallbacks for non-arborist callers. */
46
+ const pkg = node.package || {}
47
+ /* istanbul ignore next */
48
+ const scripts = pkg.scripts || {}
49
+ const collected = {}
50
+
51
+ if (scripts.preinstall) {
52
+ collected.preinstall = scripts.preinstall
53
+ }
54
+ if (scripts.install) {
55
+ collected.install = scripts.install
56
+ }
57
+ if (scripts.postinstall) {
58
+ collected.postinstall = scripts.postinstall
59
+ }
60
+ if (scripts.prepare && hasNonRegistryShape(node)) {
61
+ collected.prepare = scripts.prepare
62
+ }
63
+
64
+ const hasExplicitGypGate = !!(collected.preinstall || collected.install)
65
+ if (
66
+ !hasExplicitGypGate &&
67
+ pkg.gypfile !== false &&
68
+ await isNodeGypPackage(node.path).catch(() => false)
69
+ ) {
70
+ collected.install = 'node-gyp rebuild'
71
+ }
72
+
73
+ // Lockfile-only nodes (e.g. `npm ci` before reify) carry
74
+ // `hasInstallScript: true` but no enumerated scripts: the lockfile
75
+ // records the presence flag but never the script bodies. Without this
76
+ // fallback the strict-allow-scripts preflight would miss them entirely
77
+ // and let postinstall run. We can't recover the real script body
78
+ // without fetching the manifest, so emit a sentinel describing that
79
+ // install scripts are present.
80
+ if (Object.keys(collected).length === 0 && node.hasInstallScript === true) {
81
+ collected.install = '(install scripts present)'
82
+ }
83
+
84
+ return collected
85
+ }
86
+
87
+ module.exports = getInstallScripts
88
+ module.exports.getInstallScripts = getInstallScripts
package/lib/link.js CHANGED
@@ -109,12 +109,24 @@ class Link extends Node {
109
109
  // so this is a no-op
110
110
  [_loadDeps] () {}
111
111
 
112
- // When a Link receives overrides (via edgesIn), forward them to the target node which holds the actual edgesOut.
113
- // Without this, overrides stop at the Link and never reach the target's dependency edges.
112
+ // When a Link receives overrides (via edgesIn), forward them to the target node which holds the actual edgesOut — but only when the OverrideSet has at least one rule that names a dep the target actually depends on.
113
+ // Without this scope, the link forwards a generic ancestor OverrideSet that has no real effect on the target's edges, but still flips the target to "has overrides", which changes downstream `canReplaceWith` / placement decisions and causes `npm ci` to re-resolve lockfile-pinned edges from the registry.
114
+ // See npm/cli#9357.
114
115
  recalculateOutEdgesOverrides () {
115
- if (this.target) {
116
- this.target.updateOverridesEdgeInAdded(this.overrides)
116
+ if (!this.target || !this.overrides) {
117
+ return
118
+ }
119
+ let hasMatchingRule = false
120
+ for (const rule of this.overrides.ruleset.values()) {
121
+ if (this.target.edgesOut.has(rule.name)) {
122
+ hasMatchingRule = true
123
+ break
124
+ }
125
+ }
126
+ if (!hasMatchingRule) {
127
+ return
117
128
  }
129
+ this.target.updateOverridesEdgeInAdded(this.overrides)
118
130
  }
119
131
 
120
132
  // links can't have children, only their targets can
@@ -0,0 +1,340 @@
1
+ const npa = require('npm-package-arg')
2
+ const semver = require('semver')
3
+ const versionFromTgz = require('./version-from-tgz.js')
4
+
5
+ // Identity matcher for the allowScripts policy.
6
+ //
7
+ // Returns:
8
+ // - true: at least one allow entry matches and no deny entry matches
9
+ // - false: at least one deny entry matches (deny wins on conflict)
10
+ // - null: no entry matches (unreviewed)
11
+ //
12
+ // `policy` is a flat object of `spec-key -> boolean`, where spec-key is
13
+ // anything `npm-package-arg` can parse. `node` is an arborist Node.
14
+ //
15
+ // Identity rules (see RFC npm/rfcs#868):
16
+ // - registry deps match by the name+version parsed from the lockfile's
17
+ // resolved URL, NOT by `node.packageName` / `node.version`. Those two
18
+ // getters return `node.package.name` / `node.package.version`, which
19
+ // come from the tarball's own package.json and are therefore
20
+ // attacker-controlled. A package can publish a tarball claiming any
21
+ // name; the only trusted name is the one baked into the registry URL.
22
+ // - tarball / file / link / remote: exact match on node.resolved
23
+ // - git: match on hosted.ssh() plus a short-SHA prefix of the
24
+ // resolved committish
25
+
26
+ const isScriptAllowed = (node, policy) => {
27
+ // Bundled dependencies cannot be allowlisted in Phase 1. The RFC defers
28
+ // allowlisting them to a follow-up RFC because matching by name@version
29
+ // from the bundled tarball would reintroduce manifest confusion (a
30
+ // bundled tarball can claim any name and version). Returning null here
31
+ // marks bundled deps as unreviewed regardless of any policy entries, so
32
+ // their install scripts surface in the Phase 1 advisory warning and
33
+ // (eventually) get blocked at the install-time gate.
34
+ if (node.inBundle) {
35
+ return null
36
+ }
37
+
38
+ if (!policy || typeof policy !== 'object') {
39
+ return null
40
+ }
41
+
42
+ let anyAllow = false
43
+ let anyDeny = false
44
+
45
+ for (const [key, value] of Object.entries(policy)) {
46
+ if (!matches(node, key)) {
47
+ continue
48
+ }
49
+ if (value === false) {
50
+ anyDeny = true
51
+ continue
52
+ }
53
+ /* istanbul ignore else: policy values are strictly true/false;
54
+ defensive guard against unexpected coercions. */
55
+ if (value === true) {
56
+ anyAllow = true
57
+ }
58
+ }
59
+
60
+ if (anyDeny) {
61
+ return false
62
+ }
63
+ if (anyAllow) {
64
+ return true
65
+ }
66
+ return null
67
+ }
68
+
69
+ const matches = (node, key) => {
70
+ let parsed
71
+ try {
72
+ parsed = npa(key)
73
+ } catch {
74
+ return false
75
+ }
76
+
77
+ switch (parsed.type) {
78
+ case 'tag':
79
+ case 'range':
80
+ case 'version':
81
+ return matchRegistry(node, parsed)
82
+ case 'git':
83
+ return matchGit(node, parsed)
84
+ case 'file':
85
+ case 'directory':
86
+ return matchFileOrDir(node, parsed)
87
+ case 'remote':
88
+ return matchRemote(node, parsed)
89
+ case 'alias':
90
+ // Disallowed: aliases as policy keys do not match anything.
91
+ // The user has to address the real package name.
92
+ return false
93
+ /* istanbul ignore next: switch above covers every npa type we expect;
94
+ defensive fallback for future npa types. */
95
+ default:
96
+ return false
97
+ }
98
+ }
99
+
100
+ const matchRegistry = (node, parsed) => {
101
+ // If this node is not a registry dep, refuse the match. A registry-style
102
+ // key (`pkg`, `pkg@1`, `pkg@1 || 2`) must not match a tarball or git node
103
+ // even if their names happen to coincide.
104
+ if (!isRegistryNode(node)) {
105
+ return false
106
+ }
107
+
108
+ // Derive the trusted name+version from the lockfile's resolved URL.
109
+ // Never use `node.packageName` / `node.version` here: those read from
110
+ // the tarball's own package.json and can be forged by a malicious
111
+ // publisher to bypass an allowScripts entry.
112
+ const trusted = getTrustedRegistryIdentity(node)
113
+ if (!trusted || trusted.name !== parsed.name) {
114
+ return false
115
+ }
116
+
117
+ // `tag` covers `pkg@latest`. Rejected up front by validatePolicy in
118
+ // resolve-allow-scripts.js because tags look like a pin but can't be
119
+ // verified at install time. Defense-in-depth: if one slips through
120
+ // (e.g. arborist invoked directly without the resolver), don't match.
121
+ if (parsed.type === 'tag') {
122
+ /* istanbul ignore next: validatePolicy filters this; defensive */
123
+ return false
124
+ }
125
+
126
+ // `range` includes `pkg@^1`, `pkg@1 || 2`, `pkg@*`, `pkg@>=0`, and bare
127
+ // names like `pkg` (npa parses these as range with fetchSpec='*'). The
128
+ // RFC permits bare names (name-only allow) and exact versions joined by
129
+ // `||`; ranges like ^/~/>=/< are rejected because they would silently
130
+ // allow versions the user has never reviewed.
131
+ if (parsed.type === 'range') {
132
+ // Bare name or `pkg@*`: treat as name-only allow.
133
+ if (parsed.fetchSpec === '*' || parsed.rawSpec === '' || parsed.rawSpec === '*') {
134
+ return true
135
+ }
136
+ if (!trusted.version || !isExactVersionDisjunction(parsed.fetchSpec)) {
137
+ return false
138
+ }
139
+ return semver.satisfies(trusted.version, parsed.fetchSpec, { loose: true })
140
+ }
141
+
142
+ // `version` is an exact pin like `pkg@1.2.3`.
143
+ /* istanbul ignore else: parsed.type at this point is always 'version';
144
+ the istanbul-ignored fallback below handles the impossible case. */
145
+ if (parsed.type === 'version') {
146
+ return trusted.version === parsed.fetchSpec
147
+ }
148
+
149
+ /* istanbul ignore next: parsed.type is constrained to tag/range/version
150
+ by the caller; this final fallback is defensive. */
151
+ return false
152
+ }
153
+
154
+ // Derive a registry node's trusted name+version.
155
+ //
156
+ // Preferred source: the lockfile's resolved URL parsed via
157
+ // versionFromTgz. arborist records the URL when it first adds the dep,
158
+ // before any tarball is unpacked, so the URL cannot be forged by the
159
+ // package's own package.json.
160
+ //
161
+ // Fallback for lockfiles produced with omit-lockfile-registry-resolved
162
+ // (where the URL is absent): take the dep name from an incoming
163
+ // dependency edge. The edge's spec was written by the consumer (or by an
164
+ // upstream package.json), not by the installed tarball. For aliases like
165
+ // `"trusted": "npm:naughty@1.0.0"`, the underlying registered package
166
+ // name is parsed out of the alias `subSpec`. The install location
167
+ // (`node_modules/trusted`) is deliberately not consulted because for
168
+ // aliases it carries only the alias name, which would let a malicious
169
+ // publisher bypass an allowScripts entry written for the real package.
170
+ //
171
+ // Version is left null in the fallback case because the only remaining
172
+ // source for it (`node.version`) reads from the tarball.
173
+ //
174
+ // Returns `{ name, version }` or `null` if no trusted identity exists.
175
+ const getTrustedRegistryIdentity = (node) => {
176
+ if (node.resolved && typeof node.resolved === 'string') {
177
+ const parsed = versionFromTgz('', node.resolved)
178
+ /* istanbul ignore else: versionFromTgz returns either a complete
179
+ { name, version } or null; partial objects are not produced. */
180
+ if (parsed && parsed.name && parsed.version) {
181
+ return parsed
182
+ }
183
+ }
184
+ const name = nameFromEdges(node)
185
+ if (name) {
186
+ return { name, version: null }
187
+ }
188
+ return null
189
+ }
190
+
191
+ const nameFromEdges = (node) => {
192
+ if (!node.edgesIn || typeof node.edgesIn[Symbol.iterator] !== 'function') {
193
+ return null
194
+ }
195
+ for (const edge of node.edgesIn) {
196
+ let parsed
197
+ try {
198
+ parsed = npa.resolve(edge.name, edge.spec)
199
+ } catch {
200
+ continue
201
+ }
202
+ // Aliases: trust the underlying registered package, not the alias.
203
+ if (parsed.type === 'alias' && parsed.subSpec && parsed.subSpec.registry) {
204
+ return parsed.subSpec.name
205
+ }
206
+ // Non-aliased registry edge: the edge name is the package name as
207
+ // written by the consumer / upstream, which is trusted (it is not
208
+ // read from the installed tarball).
209
+ if (parsed.registry) {
210
+ return parsed.name
211
+ }
212
+ }
213
+ return null
214
+ }
215
+
216
+ // True if `rangeSpec` is one or more exact versions joined by `||`. Anything
217
+ // containing comparator operators (^, ~, >=, <, *) returns false.
218
+ const isExactVersionDisjunction = (rangeSpec) => {
219
+ /* istanbul ignore next: caller always passes parsed.fetchSpec, which
220
+ npa guarantees to be a non-empty string for range specs. */
221
+ if (typeof rangeSpec !== 'string' || rangeSpec.trim() === '') {
222
+ return false
223
+ }
224
+ const parts = rangeSpec.split('||').map(p => p.trim())
225
+ /* istanbul ignore next: String.prototype.split always returns at least
226
+ one element; defensive guard only. */
227
+ if (parts.length === 0) {
228
+ return false
229
+ }
230
+ return parts.every(p => p !== '' && semver.valid(p) !== null)
231
+ }
232
+
233
+ const matchGit = (node, parsed) => {
234
+ if (!node.resolved || !node.resolved.startsWith('git')) {
235
+ return false
236
+ }
237
+
238
+ let nodeParsed
239
+ try {
240
+ nodeParsed = npa(node.resolved)
241
+ } catch {
242
+ /* istanbul ignore next: npa parsing a git URL we already validated
243
+ starts with `git` should not throw; defensive guard only. */
244
+ return false
245
+ }
246
+
247
+ // Compare the host/repo. Both sides should resolve to the same canonical
248
+ // ssh URL.
249
+ const noCommittish = { noCommittish: true }
250
+ const keyHost = parsed.hosted?.ssh(noCommittish)
251
+ const nodeHost = nodeParsed.hosted?.ssh(noCommittish)
252
+ if (keyHost && nodeHost) {
253
+ if (keyHost !== nodeHost) {
254
+ return false
255
+ }
256
+ } else if (parsed.fetchSpec && nodeParsed.fetchSpec) {
257
+ // Non-hosted git URLs: fall back to fetch spec.
258
+ if (parsed.fetchSpec !== nodeParsed.fetchSpec) {
259
+ return false
260
+ }
261
+ } else {
262
+ return false
263
+ }
264
+
265
+ // If the policy key has no committish, name-only match.
266
+ const keyCommittish = parsed.gitCommittish || parsed.hosted?.committish
267
+ if (!keyCommittish) {
268
+ return true
269
+ }
270
+
271
+ // Match the resolved full SHA against the key's committish. Users
272
+ // typically write short SHAs in the policy; the lockfile stores 40-char
273
+ // SHAs. Direction matters: the lockfile's full SHA must START WITH the
274
+ // key's short SHA, never the reverse. A longer key matching a shorter
275
+ // resolved committish would let a malformed lockfile or a divergent
276
+ // resolver allow scripts the user never approved.
277
+ const nodeCommittish = nodeParsed.gitCommittish || nodeParsed.hosted?.committish || ''
278
+ if (!nodeCommittish) {
279
+ return false
280
+ }
281
+ return nodeCommittish.startsWith(keyCommittish)
282
+ }
283
+
284
+ const matchFileOrDir = (node, parsed) => {
285
+ if (!node.resolved) {
286
+ return false
287
+ }
288
+ return node.resolved === parsed.saveSpec || node.resolved === parsed.fetchSpec
289
+ }
290
+
291
+ const matchRemote = (node, parsed) => {
292
+ if (!node.resolved) {
293
+ return false
294
+ }
295
+ return node.resolved === parsed.fetchSpec || node.resolved === parsed.saveSpec
296
+ }
297
+
298
+ const isRegistryNode = (node) => {
299
+ // Prefer arborist's edge-based check when available (real Node objects).
300
+ // It inspects the incoming edges' specs and only returns true if every
301
+ // edge resolves to a registry spec, which is much harder to spoof than
302
+ // the URL.
303
+ if (typeof node.isRegistryDependency === 'boolean') {
304
+ return node.isRegistryDependency
305
+ }
306
+ // Fall back to URL parsing for nodes without the arborist getter
307
+ // (e.g. test fixtures, lockfiles with omit-lockfile-registry-resolved).
308
+ // Treat the node as a registry dep when:
309
+ // - resolved is missing entirely (omitLockfileRegistryResolved),
310
+ // - resolved is an https/http URL pointing at a registry tarball, or
311
+ // - resolved is undefined and the node has a version (defensive).
312
+ if (!node.resolved) {
313
+ return !!node.version
314
+ }
315
+ // Registry tarballs live at `<host>/<pkg-name>/-/<pkg-name>-<version>.tgz`.
316
+ // Require a path segment before `/-/` so an attacker can't lift a
317
+ // registry-style allow entry to a hostile URL like
318
+ // `https://evil.com/-/trusted-1.0.0.tgz`.
319
+ return /^https?:\/\/[^/]+\/.+\/-\/[^/]+-\d/.test(node.resolved)
320
+ }
321
+
322
+ // Trusted display identity for human-facing output (`npm install`
323
+ // advisory, `npm approve-scripts --allow-scripts-pending`). Same idea as
324
+ // getTrustedRegistryIdentity, but for DISPLAY only — version falls back
325
+ // to node.version when the URL doesn't carry one. Must never be used
326
+ // for policy matching.
327
+ const trustedDisplay = (node) => {
328
+ const trusted = getTrustedRegistryIdentity(node)
329
+ /* istanbul ignore next: defensive fallbacks for nodes without name/version */
330
+ return {
331
+ name: (trusted && trusted.name) || node.name || null,
332
+ version: (trusted && trusted.version) || node.version || null,
333
+ }
334
+ }
335
+
336
+ module.exports = isScriptAllowed
337
+ module.exports.isScriptAllowed = isScriptAllowed
338
+ module.exports.isExactVersionDisjunction = isExactVersionDisjunction
339
+ module.exports.getTrustedRegistryIdentity = getTrustedRegistryIdentity
340
+ module.exports.trustedDisplay = trustedDisplay
package/lib/shrinkwrap.js CHANGED
@@ -929,10 +929,24 @@ class Shrinkwrap {
929
929
  continue
930
930
  }
931
931
  const loc = relpath(this.path, node.path)
932
- this.data.packages[loc] = Shrinkwrap.metaFromNode(
932
+ // Drop lockfile entries for extraneous nodes outside node_modules. These are stale workspace entries: the workspace was removed from package.json or its directory was deleted, so it should not be tracked in package-lock.json.
933
+ if (node.extraneous && !/(^|\/)node_modules\//.test(loc) && loc !== 'node_modules') {
934
+ continue
935
+ }
936
+ const meta = Shrinkwrap.metaFromNode(
933
937
  node,
934
938
  this.path,
935
939
  this.resolveOptions)
940
+ // Skip inert nodes — these are optional deps that failed to load
941
+ // (e.g. 404 from a proxy registry that hasn't cached the package,
942
+ // or incomplete manifest missing version field).
943
+ // #pruneFailedOptional marks them inert so they won't be reified;
944
+ // writing them to the lockfile produces invalid entries like
945
+ // {"optional": true} that cause "Invalid Version:" errors.
946
+ if (node.inert && !node.package.version) {
947
+ continue
948
+ }
949
+ this.data.packages[loc] = meta
936
950
  }
937
951
  } else if (this.#awaitingUpdate.size > 0) {
938
952
  for (const loc of this.#awaitingUpdate.keys()) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@npmcli/arborist",
3
- "version": "9.5.0",
3
+ "version": "9.7.0",
4
4
  "description": "Manage node_modules trees",
5
5
  "dependencies": {
6
6
  "@gar/promise-retry": "^1.0.0",