codemodctl 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -8
- package/dist/cli.js +2 -2
- package/dist/{codeowner-analysis-n5QdN_A3.js → codeowner-analysis-C8hyzL4c.js} +1 -1
- package/dist/codeowners.js +2 -2
- package/dist/consistent-sharding-CcnfsY_k.js +113 -0
- package/dist/{consistent-sharding-D0wYSQBl.d.ts → consistent-sharding-D9n1M6by.d.ts} +19 -3
- package/dist/index.d.ts +2 -2
- package/dist/index.js +3 -3
- package/dist/sharding.d.ts +2 -2
- package/dist/sharding.js +2 -2
- package/package.json +1 -1
- package/dist/consistent-sharding-DDU9PV2R.js +0 -71
package/README.md
CHANGED
|
@@ -38,9 +38,14 @@ const belongsToShard = fitsInShard('src/components/Button.tsx', {
|
|
|
38
38
|
shardIndex: 2
|
|
39
39
|
});
|
|
40
40
|
|
|
41
|
-
// Distribute all files across shards
|
|
41
|
+
// Distribute all files across shards with consistent hashing
|
|
42
42
|
const files = ['file1.ts', 'file2.ts', 'file3.ts'];
|
|
43
43
|
const distribution = distributeFilesAcrossShards(files, 5);
|
|
44
|
+
|
|
45
|
+
// Check scaling behavior - minimal reassignment when growing
|
|
46
|
+
const scalingAnalysis = analyzeShardScaling(files, 5, 6);
|
|
47
|
+
console.log(`${scalingAnalysis.stableFiles} files stay in same shard`);
|
|
48
|
+
console.log(`${scalingAnalysis.reassignmentPercentage}% reassignment`); // Much less than 100%
|
|
44
49
|
```
|
|
45
50
|
|
|
46
51
|
#### Codeowner Analysis
|
|
@@ -73,13 +78,14 @@ const analysis = await codemodctl.codeowners.analyzeCodeowners(options);
|
|
|
73
78
|
|
|
74
79
|
## Key Features
|
|
75
80
|
|
|
76
|
-
###
|
|
81
|
+
### Consistent File Sharding
|
|
77
82
|
|
|
78
|
-
The sharding algorithm uses
|
|
83
|
+
The sharding algorithm uses **consistent hashing** to ensure:
|
|
79
84
|
|
|
80
85
|
- **Perfect consistency**: Same file + same shard count = same result, always
|
|
81
86
|
- **No external dependencies**: Result depends only on filename and shard count
|
|
82
|
-
- **
|
|
87
|
+
- **Minimal reassignment**: When scaling up, only ~20-40% of files move (not 100%)
|
|
88
|
+
- **Stable scaling**: Adding new shards doesn't reorganize existing file assignments
|
|
83
89
|
- **Simple API**: No complex parameters or configuration needed
|
|
84
90
|
- **Team-aware sharding**: Works with codeowner boundaries
|
|
85
91
|
|
|
@@ -99,9 +105,12 @@ The sharding algorithm uses deterministic hashing to ensure:
|
|
|
99
105
|
- `distributeFilesAcrossShards(files, shardCount)` - Distribute files across shards
|
|
100
106
|
- `calculateOptimalShardCount(totalFiles, targetShardSize)` - Calculate optimal shard count
|
|
101
107
|
- `getFileHashPosition(filename)` - Get consistent hash position for a file
|
|
108
|
+
- `analyzeShardScaling(files, oldCount, newCount)` - Analyze reassignment when scaling
|
|
102
109
|
|
|
103
110
|
All functions are deterministic: same input always produces the same output.
|
|
104
111
|
|
|
112
|
+
**Scaling behavior**: When going from N to N+1 shards, typically only 20-40% of files get reassigned to new locations, making it ideal for incremental scaling scenarios.
|
|
113
|
+
|
|
105
114
|
### Codeowner Functions
|
|
106
115
|
|
|
107
116
|
- `analyzeCodeowners(options)` - Complete analysis with shard generation
|
|
@@ -125,20 +134,27 @@ const shard1 = getShardForFilename('src/components/Button.tsx', { shardCount: 5
|
|
|
125
134
|
const shard2 = getShardForFilename('src/components/Button.tsx', { shardCount: 5 });
|
|
126
135
|
console.log(shard1 === shard2); // always true
|
|
127
136
|
|
|
128
|
-
// Different shard counts
|
|
137
|
+
// Different shard counts give different results (expected behavior)
|
|
129
138
|
const shard5 = getShardForFilename('src/components/Button.tsx', { shardCount: 5 });
|
|
130
139
|
const shard10 = getShardForFilename('src/components/Button.tsx', { shardCount: 10 });
|
|
131
|
-
// shard5 and shard10
|
|
140
|
+
// shard5 and shard10 will likely be different, but each is consistent
|
|
132
141
|
|
|
133
|
-
// Distribute files
|
|
142
|
+
// Distribute files with consistent hashing for stable scaling
|
|
134
143
|
const files = ['file1.ts', 'file2.ts', 'file3.ts'];
|
|
135
144
|
const distribution = distributeFilesAcrossShards(files, 5);
|
|
145
|
+
|
|
146
|
+
// When you need more capacity, most files stay in place
|
|
147
|
+
const moreFiles = [...files, 'newFile.ts'];
|
|
148
|
+
const analysis = analyzeShardScaling(files, 5, 6);
|
|
149
|
+
// Only ~20-40% of files get reassigned, not all of them!
|
|
136
150
|
```
|
|
137
151
|
|
|
138
152
|
### Key Benefits
|
|
139
153
|
- **No complex parameters**: Just filename and shard count
|
|
140
154
|
- **Perfectly deterministic**: Same input = same output, always
|
|
141
|
-
- **
|
|
155
|
+
- **Stable scaling**: When adding shards, most files stay in their original shards
|
|
156
|
+
- **Minimal reassignment**: Only ~20-40% of files move when scaling up
|
|
157
|
+
- **Fast and simple**: Hash-based assignment with consistent ring placement
|
|
142
158
|
- **Works across runs**: File gets same shard whether filesystem changes or not
|
|
143
159
|
|
|
144
160
|
## CLI Commands
|
package/dist/cli.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "./consistent-sharding-
|
|
3
|
-
import { analyzeCodeowners } from "./codeowner-analysis-
|
|
2
|
+
import "./consistent-sharding-CcnfsY_k.js";
|
|
3
|
+
import { analyzeCodeowners } from "./codeowner-analysis-C8hyzL4c.js";
|
|
4
4
|
import { defineCommand, runMain } from "citty";
|
|
5
5
|
import crypto from "node:crypto";
|
|
6
6
|
import { $ } from "execa";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { calculateOptimalShardCount } from "./consistent-sharding-
|
|
2
|
+
import { calculateOptimalShardCount } from "./consistent-sharding-CcnfsY_k.js";
|
|
3
3
|
import { execSync } from "node:child_process";
|
|
4
4
|
import { existsSync } from "node:fs";
|
|
5
5
|
import path, { resolve } from "node:path";
|
package/dist/codeowners.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "./consistent-sharding-
|
|
3
|
-
import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-
|
|
2
|
+
import "./consistent-sharding-CcnfsY_k.js";
|
|
3
|
+
import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-C8hyzL4c.js";
|
|
4
4
|
|
|
5
5
|
export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName };
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import crypto from "node:crypto";
|
|
3
|
+
|
|
4
|
+
//#region src/utils/consistent-sharding.ts
|
|
5
|
+
const HASH_RING_SIZE = 1e6;
|
|
6
|
+
/**
|
|
7
|
+
* Generates a numeric hash from a filename using SHA1
|
|
8
|
+
* Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues
|
|
9
|
+
*/
|
|
10
|
+
function getNumericFileNameSha1(filename) {
|
|
11
|
+
const hex = crypto.createHash("sha1").update(filename).digest("hex").substring(0, 8);
|
|
12
|
+
return parseInt(hex, 16);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1)
|
|
16
|
+
* This position remains constant regardless of shard count changes
|
|
17
|
+
*/
|
|
18
|
+
function getFileHashPosition(filename) {
|
|
19
|
+
return getNumericFileNameSha1(filename) % HASH_RING_SIZE;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Get the position for a specific shard index on the hash ring
|
|
23
|
+
* Shards get fixed positions that don't change when other shards are added
|
|
24
|
+
*/
|
|
25
|
+
function getShardPosition(shardIndex) {
|
|
26
|
+
return parseInt(crypto.createHash("sha1").update(`shard-${shardIndex}`).digest("hex").substring(0, 8), 16) % HASH_RING_SIZE;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Gets the shard index for a filename using consistent hashing
|
|
30
|
+
* Files are assigned to the next shard clockwise on the hash ring
|
|
31
|
+
*
|
|
32
|
+
* @param filename - The file path to hash
|
|
33
|
+
* @param shardCount - Total number of shards
|
|
34
|
+
* @returns Shard index (0-based)
|
|
35
|
+
*/
|
|
36
|
+
function getShardForFilename(filename, { shardCount }) {
|
|
37
|
+
if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
|
|
38
|
+
const filePosition = getFileHashPosition(filename);
|
|
39
|
+
const shardInfo = [];
|
|
40
|
+
for (let i = 0; i < shardCount; i++) shardInfo.push({
|
|
41
|
+
index: i,
|
|
42
|
+
position: getShardPosition(i)
|
|
43
|
+
});
|
|
44
|
+
shardInfo.sort((a, b) => a.position - b.position);
|
|
45
|
+
for (const shard of shardInfo) if (filePosition <= shard.position) return shard.index;
|
|
46
|
+
return shardInfo[0].index;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Checks if a file belongs to a specific shard
|
|
50
|
+
*
|
|
51
|
+
* @param filename - The file path to check
|
|
52
|
+
* @param shardCount - Total number of shards
|
|
53
|
+
* @param shardIndex - The shard index to check against (0-based)
|
|
54
|
+
* @returns True if file belongs to the specified shard
|
|
55
|
+
*/
|
|
56
|
+
function fitsInShard(filename, { shardCount, shardIndex }) {
|
|
57
|
+
return getShardForFilename(filename, { shardCount }) === shardIndex;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Distributes files across shards using deterministic hashing
|
|
61
|
+
*
|
|
62
|
+
* @param filenames - Array of file paths
|
|
63
|
+
* @param shardCount - Total number of shards
|
|
64
|
+
* @returns Map of shard index to array of filenames
|
|
65
|
+
*/
|
|
66
|
+
function distributeFilesAcrossShards(filenames, shardCount) {
|
|
67
|
+
if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
|
|
68
|
+
const shardMap = /* @__PURE__ */ new Map();
|
|
69
|
+
for (let i = 0; i < shardCount; i++) shardMap.set(i, []);
|
|
70
|
+
for (const filename of filenames) {
|
|
71
|
+
const shardIndex = getShardForFilename(filename, { shardCount });
|
|
72
|
+
shardMap.get(shardIndex).push(filename);
|
|
73
|
+
}
|
|
74
|
+
return shardMap;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Calculate optimal number of shards based on target shard size
|
|
78
|
+
*
|
|
79
|
+
* @param totalFiles - Total number of files
|
|
80
|
+
* @param targetShardSize - Desired number of files per shard
|
|
81
|
+
* @returns Number of shards needed
|
|
82
|
+
*/
|
|
83
|
+
function calculateOptimalShardCount(totalFiles, targetShardSize) {
|
|
84
|
+
return Math.ceil(totalFiles / targetShardSize);
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Analyzes file reassignment when scaling from oldShardCount to newShardCount
|
|
88
|
+
* Returns statistics about how many files would need to be reassigned
|
|
89
|
+
*
|
|
90
|
+
* @param filenames - Array of file paths to analyze
|
|
91
|
+
* @param oldShardCount - Current number of shards
|
|
92
|
+
* @param newShardCount - Target number of shards
|
|
93
|
+
* @returns Object with reassignment statistics
|
|
94
|
+
*/
|
|
95
|
+
function analyzeShardScaling(filenames, oldShardCount, newShardCount) {
|
|
96
|
+
let reassignedFiles = 0;
|
|
97
|
+
for (const filename of filenames) {
|
|
98
|
+
const oldShard = getShardForFilename(filename, { shardCount: oldShardCount });
|
|
99
|
+
const newShard = getShardForFilename(filename, { shardCount: newShardCount });
|
|
100
|
+
if (oldShard !== newShard) reassignedFiles++;
|
|
101
|
+
}
|
|
102
|
+
const stableFiles = filenames.length - reassignedFiles;
|
|
103
|
+
const reassignmentPercentage = filenames.length > 0 ? reassignedFiles / filenames.length * 100 : 0;
|
|
104
|
+
return {
|
|
105
|
+
totalFiles: filenames.length,
|
|
106
|
+
reassignedFiles,
|
|
107
|
+
reassignmentPercentage,
|
|
108
|
+
stableFiles
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
//#endregion
|
|
113
|
+
export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
//#region src/utils/consistent-sharding.d.ts
|
|
2
2
|
/**
|
|
3
3
|
* Generates a numeric hash from a filename using SHA1
|
|
4
|
+
* Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues
|
|
4
5
|
*/
|
|
5
6
|
declare function getNumericFileNameSha1(filename: string): number;
|
|
6
7
|
/**
|
|
@@ -9,8 +10,8 @@ declare function getNumericFileNameSha1(filename: string): number;
|
|
|
9
10
|
*/
|
|
10
11
|
declare function getFileHashPosition(filename: string): number;
|
|
11
12
|
/**
|
|
12
|
-
* Gets the shard index for a filename using
|
|
13
|
-
* Files
|
|
13
|
+
* Gets the shard index for a filename using consistent hashing
|
|
14
|
+
* Files are assigned to the next shard clockwise on the hash ring
|
|
14
15
|
*
|
|
15
16
|
* @param filename - The file path to hash
|
|
16
17
|
* @param shardCount - Total number of shards
|
|
@@ -52,5 +53,20 @@ declare function distributeFilesAcrossShards(filenames: string[], shardCount: nu
|
|
|
52
53
|
* @returns Number of shards needed
|
|
53
54
|
*/
|
|
54
55
|
declare function calculateOptimalShardCount(totalFiles: number, targetShardSize: number): number;
|
|
56
|
+
/**
|
|
57
|
+
* Analyzes file reassignment when scaling from oldShardCount to newShardCount
|
|
58
|
+
* Returns statistics about how many files would need to be reassigned
|
|
59
|
+
*
|
|
60
|
+
* @param filenames - Array of file paths to analyze
|
|
61
|
+
* @param oldShardCount - Current number of shards
|
|
62
|
+
* @param newShardCount - Target number of shards
|
|
63
|
+
* @returns Object with reassignment statistics
|
|
64
|
+
*/
|
|
65
|
+
declare function analyzeShardScaling(filenames: string[], oldShardCount: number, newShardCount: number): {
|
|
66
|
+
totalFiles: number;
|
|
67
|
+
reassignedFiles: number;
|
|
68
|
+
reassignmentPercentage: number;
|
|
69
|
+
stableFiles: number;
|
|
70
|
+
};
|
|
55
71
|
//#endregion
|
|
56
|
-
export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|
|
72
|
+
export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-
|
|
1
|
+
import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D9n1M6by.js";
|
|
2
2
|
import { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-D1oVulJ6.js";
|
|
3
|
-
export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
|
|
3
|
+
export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-
|
|
3
|
-
import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-
|
|
2
|
+
import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-CcnfsY_k.js";
|
|
3
|
+
import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-C8hyzL4c.js";
|
|
4
4
|
|
|
5
|
-
export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
|
|
5
|
+
export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
|
package/dist/sharding.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-
|
|
2
|
-
export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|
|
1
|
+
import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D9n1M6by.js";
|
|
2
|
+
export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|
package/dist/sharding.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-
|
|
2
|
+
import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-CcnfsY_k.js";
|
|
3
3
|
|
|
4
|
-
export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|
|
4
|
+
export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|
package/package.json
CHANGED
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import crypto from "node:crypto";
|
|
3
|
-
|
|
4
|
-
//#region src/utils/consistent-sharding.ts
|
|
5
|
-
const HASH_RING_SIZE = 1e6;
|
|
6
|
-
/**
|
|
7
|
-
* Generates a numeric hash from a filename using SHA1
|
|
8
|
-
*/
|
|
9
|
-
function getNumericFileNameSha1(filename) {
|
|
10
|
-
return parseInt(crypto.createHash("sha1").update(filename).digest("hex"), 16);
|
|
11
|
-
}
|
|
12
|
-
/**
|
|
13
|
-
* Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1)
|
|
14
|
-
* This position remains constant regardless of shard count changes
|
|
15
|
-
*/
|
|
16
|
-
function getFileHashPosition(filename) {
|
|
17
|
-
return getNumericFileNameSha1(filename) % HASH_RING_SIZE;
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* Gets the shard index for a filename using deterministic hashing
|
|
21
|
-
* Files get assigned to a consistent preferred shard regardless of total count
|
|
22
|
-
*
|
|
23
|
-
* @param filename - The file path to hash
|
|
24
|
-
* @param shardCount - Total number of shards
|
|
25
|
-
* @returns Shard index (0-based)
|
|
26
|
-
*/
|
|
27
|
-
function getShardForFilename(filename, { shardCount }) {
|
|
28
|
-
if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
|
|
29
|
-
return getNumericFileNameSha1(filename) % 10 % shardCount;
|
|
30
|
-
}
|
|
31
|
-
/**
|
|
32
|
-
* Checks if a file belongs to a specific shard
|
|
33
|
-
*
|
|
34
|
-
* @param filename - The file path to check
|
|
35
|
-
* @param shardCount - Total number of shards
|
|
36
|
-
* @param shardIndex - The shard index to check against (0-based)
|
|
37
|
-
* @returns True if file belongs to the specified shard
|
|
38
|
-
*/
|
|
39
|
-
function fitsInShard(filename, { shardCount, shardIndex }) {
|
|
40
|
-
return getShardForFilename(filename, { shardCount }) === shardIndex;
|
|
41
|
-
}
|
|
42
|
-
/**
|
|
43
|
-
* Distributes files across shards using deterministic hashing
|
|
44
|
-
*
|
|
45
|
-
* @param filenames - Array of file paths
|
|
46
|
-
* @param shardCount - Total number of shards
|
|
47
|
-
* @returns Map of shard index to array of filenames
|
|
48
|
-
*/
|
|
49
|
-
function distributeFilesAcrossShards(filenames, shardCount) {
|
|
50
|
-
if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
|
|
51
|
-
const shardMap = /* @__PURE__ */ new Map();
|
|
52
|
-
for (let i = 0; i < shardCount; i++) shardMap.set(i, []);
|
|
53
|
-
for (const filename of filenames) {
|
|
54
|
-
const shardIndex = getShardForFilename(filename, { shardCount });
|
|
55
|
-
shardMap.get(shardIndex).push(filename);
|
|
56
|
-
}
|
|
57
|
-
return shardMap;
|
|
58
|
-
}
|
|
59
|
-
/**
|
|
60
|
-
* Calculate optimal number of shards based on target shard size
|
|
61
|
-
*
|
|
62
|
-
* @param totalFiles - Total number of files
|
|
63
|
-
* @param targetShardSize - Desired number of files per shard
|
|
64
|
-
* @returns Number of shards needed
|
|
65
|
-
*/
|
|
66
|
-
function calculateOptimalShardCount(totalFiles, targetShardSize) {
|
|
67
|
-
return Math.ceil(totalFiles / targetShardSize);
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
//#endregion
|
|
71
|
-
export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
|