@wuyuchentr/file-splitter 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -0
- package/package.json +29 -0
- package/src/index.js +80 -0
package/README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# @wuyuchentr/file-splitter
|
|
2
|
+
|
|
3
|
+
Split large files by line count or size. **Zero dependencies, stream-based, memory-friendly.**
|
|
4
|
+
|
|
5
|
+
> Read a single line at a time — works with files of any size.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @wuyuchentr/file-splitter
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```js
|
|
16
|
+
const { splitFile } = require('@wuyuchentr/file-splitter');
|
|
17
|
+
|
|
18
|
+
// Split by lines
|
|
19
|
+
const files = await splitFile('large.log', { linesPerFile: 10000 });
|
|
20
|
+
// → ['large-1.log', 'large-2.log', ...]
|
|
21
|
+
|
|
22
|
+
// Split by approximate size (respects line boundaries)
|
|
23
|
+
const files = await splitFile('data.csv', { sizePerFile: 1024 * 1024 });
|
|
24
|
+
// → ['data-1.csv', 'data-2.csv', ...]
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Options
|
|
28
|
+
|
|
29
|
+
| Option | Default | Description |
|
|
30
|
+
|--------|---------|-------------|
|
|
31
|
+
| `linesPerFile` | — | Max lines per output file |
|
|
32
|
+
| `sizePerFile` | — | Approximate bytes per output file (boundary-safe) |
|
|
33
|
+
| `outputDir` | input dir | Directory for output files |
|
|
34
|
+
| `outputPrefix` | basename | Custom prefix before `-1.ext` |
|
|
35
|
+
| `encoding` | `'utf-8'` | File encoding |
|
|
36
|
+
|
|
37
|
+
## How it works
|
|
38
|
+
|
|
39
|
+
Uses `readline` + `fs.createReadStream` under the hood. Reads one line at a time, writes to sequential output files. Memory usage stays constant regardless of file size.
|
package/package.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@wuyuchentr/file-splitter",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Split large files by line count or size. Zero-dependency, stream-based, memory-friendly.",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"publishConfig": {
|
|
7
|
+
"access": "public"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"src/",
|
|
11
|
+
"README.md"
|
|
12
|
+
],
|
|
13
|
+
"keywords": [
|
|
14
|
+
"split",
|
|
15
|
+
"file",
|
|
16
|
+
"large-file",
|
|
17
|
+
"stream",
|
|
18
|
+
"line-split",
|
|
19
|
+
"log-splitter"
|
|
20
|
+
],
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"repository": {
|
|
23
|
+
"type": "git",
|
|
24
|
+
"url": "git+https://github.com/wuyuchentr/file-splitter.git"
|
|
25
|
+
},
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=14.0.0"
|
|
28
|
+
}
|
|
29
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const readline = require('readline');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
async function splitFile(inputFile, options = {}) {
|
|
6
|
+
const {
|
|
7
|
+
linesPerFile,
|
|
8
|
+
sizePerFile,
|
|
9
|
+
outputDir = path.dirname(inputFile),
|
|
10
|
+
outputPrefix,
|
|
11
|
+
encoding = 'utf-8',
|
|
12
|
+
} = options;
|
|
13
|
+
|
|
14
|
+
if (!linesPerFile && !sizePerFile) {
|
|
15
|
+
throw new Error('Must specify either linesPerFile or sizePerFile');
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const ext = path.extname(inputFile);
|
|
19
|
+
const base = outputPrefix || path.basename(inputFile, ext);
|
|
20
|
+
const prefix = path.join(outputDir, base);
|
|
21
|
+
|
|
22
|
+
const rl = readline.createInterface({
|
|
23
|
+
input: fs.createReadStream(inputFile, { encoding }),
|
|
24
|
+
crlfDelay: Infinity,
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
let partNum = 1;
|
|
28
|
+
let lineCount = 0;
|
|
29
|
+
let byteCount = 0;
|
|
30
|
+
let currentStream = null;
|
|
31
|
+
const outFiles = [];
|
|
32
|
+
|
|
33
|
+
function closeCurrent() {
|
|
34
|
+
return new Promise((resolve) => {
|
|
35
|
+
if (currentStream) {
|
|
36
|
+
currentStream.end(resolve);
|
|
37
|
+
} else {
|
|
38
|
+
resolve();
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function openNext() {
|
|
44
|
+
const name = `${prefix}-${partNum}${ext}`;
|
|
45
|
+
outFiles.push(name);
|
|
46
|
+
currentStream = fs.createWriteStream(name, { encoding });
|
|
47
|
+
partNum++;
|
|
48
|
+
lineCount = 0;
|
|
49
|
+
byteCount = 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function shouldSplit() {
|
|
53
|
+
if (linesPerFile && lineCount >= linesPerFile) return true;
|
|
54
|
+
if (sizePerFile && lineCount > 0 && byteCount >= sizePerFile) return true;
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
openNext();
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
for await (const line of rl) {
|
|
62
|
+
if (shouldSplit()) {
|
|
63
|
+
await closeCurrent();
|
|
64
|
+
openNext();
|
|
65
|
+
}
|
|
66
|
+
const lineBytes = Buffer.byteLength(line, encoding) + 1;
|
|
67
|
+
currentStream.write(line + '\n');
|
|
68
|
+
lineCount++;
|
|
69
|
+
byteCount += lineBytes;
|
|
70
|
+
}
|
|
71
|
+
} finally {
|
|
72
|
+
await closeCurrent();
|
|
73
|
+
rl.close();
|
|
74
|
+
rl.removeAllListeners();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return outFiles;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
module.exports = { splitFile };
|