prod-files 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +4 -4
  2. package/index.mjs +133 -104
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -11,7 +11,7 @@ projects, or you’re just obsessed with small deployments.
11
11
 
12
12
  It's relatively fast, prunes
13
13
  [Sentry's `node_modules`](https://github.com/getsentry/sentry/blob/master/package.json)
14
- in 2.1s (M2 MacBook). Prod deps only though, installed with `pnpm i --prod`, but
14
+ in 1.8s (M2 MacBook). Prod deps only though, installed with `pnpm i --prod`, but
15
15
  that's the common use-case anyway.
16
16
 
17
17
  ## Install
@@ -62,7 +62,7 @@ Flags:
62
62
 
63
63
  -g, --globs Prints out the default globs.
64
64
 
65
- -n, --noSize Skips the size calc at the end, saves about 200-1000ms.
65
+ -n, --noSize Skips the size calculation.
66
66
 
67
67
  -q, --quiet Quiet output, suppresses stdout.
68
68
  ```
@@ -139,12 +139,12 @@ pnpm test
139
139
  ### End to end tests
140
140
 
141
141
  In `test-project` directory has Sentry's `package.json`. You can run the script
142
- against it to see how it fairs in real-world use and get some timing data.
142
+ against it to see how it does in real-world use and get some timing data.
143
143
 
144
144
  ```sh
145
145
  # Re-installs the packages and runs the script on it
146
146
  pnpm test:e2e
147
- # Disable size reportings since it's pretty slow
147
+ # Disable size reportings since it adds 200-300ms
148
148
  pnpm test:e2e --noSize
149
149
  ```
150
150
 
package/index.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  // oxlint-disable prefer-spread
2
2
  import cp from 'node:child_process'
3
3
  import fs from 'node:fs/promises'
4
- import { matchesGlob, join, isAbsolute, resolve, dirname } from 'node:path'
4
+ import { matchesGlob, join, isAbsolute, resolve } from 'node:path'
5
5
  import { parseArgs, promisify, styleText } from 'node:util'
6
6
 
7
7
  const exec = promisify(cp.exec)
@@ -215,7 +215,7 @@ function usage() {
215
215
 
216
216
  -g, --globs Prints out the default globs.
217
217
 
218
- -n, --noSize Skips the size calc at the end, saves about 200-1000ms.
218
+ -n, --noSize Skips the size calculation.
219
219
 
220
220
  -q, --quiet Quiet output, suppresses stdout.
221
221
  `
@@ -275,8 +275,8 @@ export const log = {
275
275
  }
276
276
 
277
277
  /**
278
- * Get size of node_modules
279
- * @param {string} dirPath - Path to node_modules
278
+ * Get disk usage via du (512-byte blocks)
279
+ * @param {string} dirPath
280
280
  * @returns {Promise<number>}
281
281
  */
282
282
  async function getSize(dirPath) {
@@ -286,6 +286,28 @@ async function getSize(dirPath) {
286
286
  return size ? Number.parseInt(size, 10) : 0
287
287
  }
288
288
 
289
+ /**
290
+ * Sums disk usage of a path using lstat blocks (512-byte blocks, same as du)
291
+ * @param {string} path
292
+ * @returns {Promise<number>} Total in 512-byte blocks
293
+ */
294
+ async function treeSize(path) {
295
+ /** @type {import('node:fs').Stats} */
296
+ let stat
297
+ try {
298
+ stat = await fs.lstat(path)
299
+ } catch {
300
+ // Entry disappeared (concurrent pruning), count as 0
301
+ return 0
302
+ }
303
+ if (!stat.isDirectory()) return stat.blocks
304
+ const names = await fs.readdir(path).catch(() => [])
305
+ const sizes = await Promise.all(names.map(n => treeSize(join(path, n))))
306
+ let total = 0
307
+ for (const s of sizes) total += s
308
+ return total
309
+ }
310
+
289
311
  /**
290
312
  * @param {number} originalSize
291
313
  * @param {number} prunedSize
@@ -301,25 +323,23 @@ function calcSize(originalSize, prunedSize) {
301
323
  /**
302
324
  * Prints a nice diff table
303
325
  * @param {object} opts
304
- * @param {Promise<number> | undefined} opts.prunedSize
326
+ * @param {number | undefined} opts.removedBytes
305
327
  * @param {number} opts.startTime
306
328
  * @param {number} opts.itemCount
307
- * @param {Promise<number> | undefined} opts.originalSize
329
+ * @param {number | undefined} opts.originalSize
308
330
  */
309
- export async function printDiff({
310
- prunedSize,
331
+ export function printDiff({
332
+ removedBytes,
311
333
  startTime,
312
334
  itemCount,
313
335
  originalSize,
314
336
  }) {
315
- const [original, pruned] =
316
- originalSize && prunedSize
317
- ? await Promise.all([originalSize, prunedSize])
318
- : [undefined, undefined]
319
-
320
337
  log.table([
321
338
  {
322
- ...(original && pruned && { Pruned: calcSize(original, pruned) }),
339
+ ...(originalSize &&
340
+ removedBytes && {
341
+ Pruned: calcSize(originalSize, originalSize - removedBytes),
342
+ }),
323
343
  Time: `${((Date.now() - startTime) / 1000).toFixed(1)}s`,
324
344
  Items: itemCount,
325
345
  },
@@ -696,66 +716,101 @@ export function compactPaths(paths) {
696
716
  }
697
717
 
698
718
  /**
699
- * Checks the rmdir error: ENOTEMPTY means the dir was not empty and the removal
700
- * failed, which is what we want
701
- * @param {unknown} err
702
- * @returns {boolean}
719
+ * @typedef {object} WalkResult
720
+ * @property {string[]} removed - Compacted list of removed paths
721
+ * @property {number} removedBlocks - Removed disk usage in 512-byte blocks
703
722
  */
704
- function hasContent(err) {
705
- return (
706
- !!err &&
707
- typeof err === 'object' &&
708
- 'code' in err &&
709
- (err.code === 'ENOTEMPTY' || err.code === 'ENOENT')
710
- )
711
- }
712
723
 
713
724
  /**
714
- * Removes a dir if it's empty
715
- * @param {string[]} dirs
716
- * @returns {Promise<void>[]}
725
+ * Parallel walker that finds junk, removes it, and cleans empty dirs in one
726
+ * pass. Skips recursing into junk directories (implicit path compacting) and
727
+ * removes empty ancestors bottom-up as the recursion unwinds.
728
+ * @param {string} rootDir - The directory to walk
729
+ * @param {CompiledGlobs} compiledGlobs - Precompiled glob matchers
730
+ * @param {boolean} trackSize - Whether to collect byte sizes of removed items
731
+ * @returns {Promise<WalkResult>}
717
732
  */
718
- function rmEmptyDir(dirs) {
719
- return dirs.map(dir =>
720
- fs.rmdir(dir).catch(err => {
721
- if (hasContent(err)) return
722
- throw err
723
- })
724
- )
725
- }
733
+ async function walkAndPrune(rootDir, compiledGlobs, trackSize) {
734
+ /** @type {string[]} */
735
+ const removed = []
736
+ let removedBlocks = 0
737
+ const hasAnyGlobs = compiledGlobs.any.globs.length > 0
738
+ const hasDirGlobs = compiledGlobs.dir.globs.length > 0
726
739
 
727
- /**
728
- * Removes a file and collects parent directories for later cleanup
729
- * @param {string} file - the file to remove
730
- * @param {Set<string>} visited - tracks directories we've already visited
731
- * @param {Map<number, Set<string>>} dirDepths - cleanup dirs grouped by depth
732
- * @param {string} rootDir - stop collecting once we reach this directory
733
- */
734
- async function rimraf(
735
- file,
736
- visited = new Set(),
737
- dirDepths = new Map(),
738
- rootDir = dirname(file)
739
- ) {
740
- // Remove the file/dir recursively
741
- await fs.rm(file, { recursive: true, force: true })
742
-
743
- // Walk up the tree collecting all the ancestors, we'll use them later on to
744
- // delete directories which are left empty
745
- let dir = dirname(file)
746
- while (dir !== rootDir) {
747
- if (visited.has(dir)) break
748
- visited.add(dir)
749
-
750
- const depth = dir.split('/').length
751
-
752
- // Group the dirs by depth
753
- const dirs = dirDepths.get(depth)
754
- if (dirs) dirs.add(dir)
755
- else dirDepths.set(depth, new Set([dir]))
756
-
757
- dir = dirname(dir)
740
+ /**
741
+ * Walks a directory in parallel, removes junk, and reports whether the
742
+ * directory still has content so the caller can clean up empty parents
743
+ * @param {string} dir
744
+ * @returns {Promise<boolean>} true when the directory still has content
745
+ */
746
+ async function walkDir(dir) {
747
+ const entries = await fs.readdir(dir, { withFileTypes: true })
748
+
749
+ /** @type {string[]} */
750
+ const junkPaths = []
751
+ /** @type {string[]} */
752
+ const keptDirPaths = []
753
+ let keptFiles = 0
754
+
755
+ for (const entry of entries) {
756
+ const { name } = entry
757
+ const isDir = entry.isDirectory()
758
+ const path = join(dir, name)
759
+
760
+ // Basename checks are cheapest, try them first
761
+ if (
762
+ matchesSet(name, compiledGlobs.any) ||
763
+ (isDir && matchesSet(name, compiledGlobs.dir))
764
+ ) {
765
+ junkPaths.push(path)
766
+ continue
767
+ }
768
+
769
+ // Full path glob checks only when compiled globs exist
770
+ if (hasAnyGlobs || (isDir && hasDirGlobs)) {
771
+ const escapedPath = escapeLeadingDots(isDir ? `${path}/` : path)
772
+ if (
773
+ compiledGlobs.any.globs.some(g => matchesGlob(escapedPath, g)) ||
774
+ (isDir &&
775
+ compiledGlobs.dir.globs.some(g => matchesGlob(escapedPath, g)))
776
+ ) {
777
+ junkPaths.push(path)
778
+ continue
779
+ }
780
+ }
781
+
782
+ if (isDir) keptDirPaths.push(path)
783
+ else keptFiles += 1
784
+ }
785
+
786
+ // Collect removed paths before awaiting (no junk dir recursion = compacting)
787
+ for (const p of junkPaths) removed.push(p)
788
+
789
+ // Size (when tracking), remove junk, and recurse kept subdirs in parallel
790
+ const [junkSizes, walkResults] = await Promise.all([
791
+ Promise.all(
792
+ junkPaths.map(async p => {
793
+ const size = trackSize ? await treeSize(p) : 0
794
+ await fs.rm(p, { recursive: true, force: true })
795
+ return size
796
+ })
797
+ ),
798
+ Promise.all(keptDirPaths.map(walkDir)),
799
+ ])
800
+
801
+ for (const s of junkSizes) removedBlocks += s
802
+
803
+ // Subdirs that became empty after pruning their contents
804
+ const emptyDirs = keptDirPaths.filter((_, i) => !walkResults[i])
805
+ if (emptyDirs.length > 0) {
806
+ await Promise.all(emptyDirs.map(d => fs.rmdir(d)))
807
+ }
808
+
809
+ return keptFiles + keptDirPaths.length - emptyDirs.length > 0
758
810
  }
811
+
812
+ await walkDir(rootDir)
813
+ return { removed, removedBlocks }
759
814
  }
760
815
 
761
816
  /**
@@ -770,56 +825,30 @@ export async function prune(opts) {
770
825
  const startTime = Date.now()
771
826
  log.info('Pruning:', opts.path)
772
827
 
773
- // Don't wait
774
- const originalSize = getSize(opts.path)
828
+ // Fire early so du runs concurrently with the walk
829
+ const sizePromise = opts.noSize ? undefined : getSize(opts.path)
775
830
  const excludedGlobs = new Set(opts.exclude)
776
831
  const activeGlobs = [...defaultGlobs, ...opts.include].filter(
777
832
  glob => !excludedGlobs.has(glob)
778
833
  )
779
834
  const compiledGlobs = compileGlobs(activeGlobs)
780
835
 
781
- // TODO: this could be slightly faster with an optimized walker
782
- const allFiles = await fs.readdir(opts.path, {
783
- recursive: true,
784
- withFileTypes: true,
785
- })
786
-
787
- const junk = compactPaths(findJunkFiles(allFiles, compiledGlobs))
788
-
836
+ /** @type {WalkResult} */
837
+ let result
789
838
  try {
790
- /** @type {Set<string>} */
791
- const visited = new Set()
792
- /** @type {Map<number, Set<string>>} */
793
- const dirDepths = new Map()
794
- // Rm & populate visited & dirDepths so dirs can be removed in parallel
795
- await Promise.all(junk.map(x => rimraf(x, visited, dirDepths, opts.path)))
796
- const depths = [...dirDepths.keys()].sort((a, b) => b - a)
797
-
798
- /**
799
- * Remove one depth level at a time, but parallelize within each level
800
- * @param {number} i
801
- * @returns {Promise<void>}
802
- */
803
- async function removeDepth(i) {
804
- if (i >= depths.length) return
805
- const dirs = dirDepths.get(depths[i] || 0) ?? []
806
- await Promise.all(rmEmptyDir([...dirs]))
807
- await removeDepth(i + 1)
808
- }
809
-
810
- await removeDepth(0)
839
+ result = await walkAndPrune(opts.path, compiledGlobs, !opts.noSize)
811
840
  } catch (err) {
812
841
  throw bail(undefined, err)
813
842
  }
814
843
 
815
- void printDiff({
816
- itemCount: junk.length,
817
- prunedSize: opts.noSize ? undefined : getSize(opts.path),
818
- originalSize: opts.noSize ? undefined : originalSize,
844
+ printDiff({
845
+ itemCount: result.removed.length,
846
+ removedBytes: opts.noSize ? undefined : result.removedBlocks,
847
+ originalSize: sizePromise ? await sizePromise : undefined,
819
848
  startTime,
820
849
  })
821
850
 
822
- return junk
851
+ return result.removed
823
852
  }
824
853
 
825
854
  const entry = process.argv[1]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "prod-files",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "description": "Keep only prod files by pruning non-prod files from node_modules before deploying",
5
5
  "keywords": [
6
6
  "clean",