@wuyuchentr/file-splitter 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +39 -0
  2. package/package.json +29 -0
  3. package/src/index.js +80 -0
package/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # @wuyuchentr/file-splitter
2
+
3
+ Split large files by line count or size. **Zero dependencies, stream-based, memory-friendly.**
4
+
5
+ > Read a single line at a time — works with files of any size.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install @wuyuchentr/file-splitter
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```js
16
+ const { splitFile } = require('@wuyuchentr/file-splitter');
17
+
18
+ // Split by lines
19
+ const files = await splitFile('large.log', { linesPerFile: 10000 });
20
+ // → ['large-1.log', 'large-2.log', ...]
21
+
22
+ // Split by approximate size (respects line boundaries)
23
+ const files = await splitFile('data.csv', { sizePerFile: 1024 * 1024 });
24
+ // → ['data-1.csv', 'data-2.csv', ...]
25
+ ```
26
+
27
+ ## Options
28
+
29
+ | Option | Default | Description |
30
+ |--------|---------|-------------|
31
+ | `linesPerFile` | — | Max lines per output file |
32
+ | `sizePerFile` | — | Approximate bytes per output file (boundary-safe) |
33
+ | `outputDir` | input dir | Directory for output files |
34
+ | `outputPrefix` | basename | Custom prefix before `-1.ext` |
35
+ | `encoding` | `'utf-8'` | File encoding |
36
+
37
+ ## How it works
38
+
39
+ Uses `readline` + `fs.createReadStream` under the hood. Reads one line at a time, writes to sequential output files. Memory usage stays constant regardless of file size.
package/package.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "name": "@wuyuchentr/file-splitter",
3
+ "version": "1.0.0",
4
+ "description": "Split large files by line count or size. Zero-dependency, stream-based, memory-friendly.",
5
+ "main": "src/index.js",
6
+ "publishConfig": {
7
+ "access": "public"
8
+ },
9
+ "files": [
10
+ "src/",
11
+ "README.md"
12
+ ],
13
+ "keywords": [
14
+ "split",
15
+ "file",
16
+ "large-file",
17
+ "stream",
18
+ "line-split",
19
+ "log-splitter"
20
+ ],
21
+ "license": "MIT",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "git+https://github.com/wuyuchentr/file-splitter.git"
25
+ },
26
+ "engines": {
27
+ "node": ">=14.0.0"
28
+ }
29
+ }
package/src/index.js ADDED
@@ -0,0 +1,80 @@
1
+ const fs = require('fs');
2
+ const readline = require('readline');
3
+ const path = require('path');
4
+
5
+ async function splitFile(inputFile, options = {}) {
6
+ const {
7
+ linesPerFile,
8
+ sizePerFile,
9
+ outputDir = path.dirname(inputFile),
10
+ outputPrefix,
11
+ encoding = 'utf-8',
12
+ } = options;
13
+
14
+ if (!linesPerFile && !sizePerFile) {
15
+ throw new Error('Must specify either linesPerFile or sizePerFile');
16
+ }
17
+
18
+ const ext = path.extname(inputFile);
19
+ const base = outputPrefix || path.basename(inputFile, ext);
20
+ const prefix = path.join(outputDir, base);
21
+
22
+ const rl = readline.createInterface({
23
+ input: fs.createReadStream(inputFile, { encoding }),
24
+ crlfDelay: Infinity,
25
+ });
26
+
27
+ let partNum = 1;
28
+ let lineCount = 0;
29
+ let byteCount = 0;
30
+ let currentStream = null;
31
+ const outFiles = [];
32
+
33
+ function closeCurrent() {
34
+ return new Promise((resolve) => {
35
+ if (currentStream) {
36
+ currentStream.end(resolve);
37
+ } else {
38
+ resolve();
39
+ }
40
+ });
41
+ }
42
+
43
+ function openNext() {
44
+ const name = `${prefix}-${partNum}${ext}`;
45
+ outFiles.push(name);
46
+ currentStream = fs.createWriteStream(name, { encoding });
47
+ partNum++;
48
+ lineCount = 0;
49
+ byteCount = 0;
50
+ }
51
+
52
+ function shouldSplit() {
53
+ if (linesPerFile && lineCount >= linesPerFile) return true;
54
+ if (sizePerFile && lineCount > 0 && byteCount >= sizePerFile) return true;
55
+ return false;
56
+ }
57
+
58
+ openNext();
59
+
60
+ try {
61
+ for await (const line of rl) {
62
+ if (shouldSplit()) {
63
+ await closeCurrent();
64
+ openNext();
65
+ }
66
+ const lineBytes = Buffer.byteLength(line, encoding) + 1;
67
+ currentStream.write(line + '\n');
68
+ lineCount++;
69
+ byteCount += lineBytes;
70
+ }
71
+ } finally {
72
+ await closeCurrent();
73
+ rl.close();
74
+ rl.removeAllListeners();
75
+ }
76
+
77
+ return outFiles;
78
+ }
79
+
80
+ module.exports = { splitFile };