codemodctl 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -38,9 +38,14 @@ const belongsToShard = fitsInShard('src/components/Button.tsx', {
38
38
  shardIndex: 2
39
39
  });
40
40
 
41
- // Distribute all files across shards
41
+ // Distribute all files across shards with consistent hashing
42
42
  const files = ['file1.ts', 'file2.ts', 'file3.ts'];
43
43
  const distribution = distributeFilesAcrossShards(files, 5);
44
+
45
+ // Check scaling behavior - minimal reassignment when growing
46
+ const scalingAnalysis = analyzeShardScaling(files, 5, 6);
47
+ console.log(`${scalingAnalysis.stableFiles} files stay in same shard`);
48
+ console.log(`${scalingAnalysis.reassignmentPercentage}% reassignment`); // Much less than 100%
44
49
  ```
45
50
 
46
51
  #### Codeowner Analysis
@@ -73,13 +78,14 @@ const analysis = await codemodctl.codeowners.analyzeCodeowners(options);
73
78
 
74
79
  ## Key Features
75
80
 
76
- ### Deterministic File Sharding
81
+ ### Consistent File Sharding
77
82
 
78
- The sharding algorithm uses deterministic hashing to ensure:
83
+ The sharding algorithm uses **consistent hashing** to ensure:
79
84
 
80
85
  - **Perfect consistency**: Same file + same shard count = same result, always
81
86
  - **No external dependencies**: Result depends only on filename and shard count
82
- - **Even distribution**: SHA1 hashing provides good distribution across shards
87
+ - **Minimal reassignment**: When scaling up, only ~20-40% of files move (not 100%)
88
+ - **Stable scaling**: Adding new shards doesn't reorganize existing file assignments
83
89
  - **Simple API**: No complex parameters or configuration needed
84
90
  - **Team-aware sharding**: Works with codeowner boundaries
85
91
 
@@ -99,9 +105,12 @@ The sharding algorithm uses deterministic hashing to ensure:
99
105
  - `distributeFilesAcrossShards(files, shardCount)` - Distribute files across shards
100
106
  - `calculateOptimalShardCount(totalFiles, targetShardSize)` - Calculate optimal shard count
101
107
  - `getFileHashPosition(filename)` - Get consistent hash position for a file
108
+ - `analyzeShardScaling(files, oldCount, newCount)` - Analyze reassignment when scaling
102
109
 
103
110
  All functions are deterministic: same input always produces the same output.
104
111
 
112
+ **Scaling behavior**: When going from N to N+1 shards, typically only 20-40% of files get reassigned to new locations, making it ideal for incremental scaling scenarios.
113
+
105
114
  ### Codeowner Functions
106
115
 
107
116
  - `analyzeCodeowners(options)` - Complete analysis with shard generation
@@ -125,20 +134,27 @@ const shard1 = getShardForFilename('src/components/Button.tsx', { shardCount: 5
125
134
  const shard2 = getShardForFilename('src/components/Button.tsx', { shardCount: 5 });
126
135
  console.log(shard1 === shard2); // always true
127
136
 
128
- // Different shard counts may give different results (that's expected)
137
+ // Different shard counts give different results (expected behavior)
129
138
  const shard5 = getShardForFilename('src/components/Button.tsx', { shardCount: 5 });
130
139
  const shard10 = getShardForFilename('src/components/Button.tsx', { shardCount: 10 });
131
- // shard5 and shard10 may be different, but each is consistent
140
+ // shard5 and shard10 will likely be different, but each is consistent
132
141
 
133
- // Distribute files deterministically
142
+ // Distribute files with consistent hashing for stable scaling
134
143
  const files = ['file1.ts', 'file2.ts', 'file3.ts'];
135
144
  const distribution = distributeFilesAcrossShards(files, 5);
145
+
146
+ // When you need more capacity, most files stay in place
147
+ const moreFiles = [...files, 'newFile.ts'];
148
+ const analysis = analyzeShardScaling(files, 5, 6);
149
+ // Only ~20-40% of files get reassigned, not all of them!
136
150
  ```
137
151
 
138
152
  ### Key Benefits
139
153
  - **No complex parameters**: Just filename and shard count
140
154
  - **Perfectly deterministic**: Same input = same output, always
141
- - **Fast and simple**: Pure hash-based assignment
155
+ - **Stable scaling**: When adding shards, most files stay in their original shards
156
+ - **Minimal reassignment**: Only ~20-40% of files move when scaling up
157
+ - **Fast and simple**: Hash-based assignment with consistent ring placement
142
158
  - **Works across runs**: File gets same shard whether filesystem changes or not
143
159
 
144
160
  ## CLI Commands
package/dist/cli.js CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
- import "./consistent-sharding-DDU9PV2R.js";
3
- import { analyzeCodeowners } from "./codeowner-analysis-n5QdN_A3.js";
2
+ import "./consistent-sharding-CcnfsY_k.js";
3
+ import { analyzeCodeowners } from "./codeowner-analysis-C8hyzL4c.js";
4
4
  import { defineCommand, runMain } from "citty";
5
5
  import crypto from "node:crypto";
6
6
  import { $ } from "execa";
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { calculateOptimalShardCount } from "./consistent-sharding-DDU9PV2R.js";
2
+ import { calculateOptimalShardCount } from "./consistent-sharding-CcnfsY_k.js";
3
3
  import { execSync } from "node:child_process";
4
4
  import { existsSync } from "node:fs";
5
5
  import path, { resolve } from "node:path";
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import "./consistent-sharding-DDU9PV2R.js";
3
- import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-n5QdN_A3.js";
2
+ import "./consistent-sharding-CcnfsY_k.js";
3
+ import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-C8hyzL4c.js";
4
4
 
5
5
  export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName };
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env node
2
+ import crypto from "node:crypto";
3
+
4
+ //#region src/utils/consistent-sharding.ts
5
+ const HASH_RING_SIZE = 1e6;
6
+ /**
7
+ * Generates a numeric hash from a filename using SHA1
8
+ * Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues
9
+ */
10
+ function getNumericFileNameSha1(filename) {
11
+ const hex = crypto.createHash("sha1").update(filename).digest("hex").substring(0, 8);
12
+ return parseInt(hex, 16);
13
+ }
14
+ /**
15
+ * Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1)
16
+ * This position remains constant regardless of shard count changes
17
+ */
18
+ function getFileHashPosition(filename) {
19
+ return getNumericFileNameSha1(filename) % HASH_RING_SIZE;
20
+ }
21
+ /**
22
+ * Get the position for a specific shard index on the hash ring
23
+ * Shards get fixed positions that don't change when other shards are added
24
+ */
25
+ function getShardPosition(shardIndex) {
26
+ return parseInt(crypto.createHash("sha1").update(`shard-${shardIndex}`).digest("hex").substring(0, 8), 16) % HASH_RING_SIZE;
27
+ }
28
+ /**
29
+ * Gets the shard index for a filename using consistent hashing
30
+ * Files are assigned to the next shard clockwise on the hash ring
31
+ *
32
+ * @param filename - The file path to hash
33
+ * @param shardCount - Total number of shards
34
+ * @returns Shard index (0-based)
35
+ */
36
+ function getShardForFilename(filename, { shardCount }) {
37
+ if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
38
+ const filePosition = getFileHashPosition(filename);
39
+ const shardInfo = [];
40
+ for (let i = 0; i < shardCount; i++) shardInfo.push({
41
+ index: i,
42
+ position: getShardPosition(i)
43
+ });
44
+ shardInfo.sort((a, b) => a.position - b.position);
45
+ for (const shard of shardInfo) if (filePosition <= shard.position) return shard.index;
46
+ return shardInfo[0].index;
47
+ }
48
+ /**
49
+ * Checks if a file belongs to a specific shard
50
+ *
51
+ * @param filename - The file path to check
52
+ * @param shardCount - Total number of shards
53
+ * @param shardIndex - The shard index to check against (0-based)
54
+ * @returns True if file belongs to the specified shard
55
+ */
56
+ function fitsInShard(filename, { shardCount, shardIndex }) {
57
+ return getShardForFilename(filename, { shardCount }) === shardIndex;
58
+ }
59
+ /**
60
+ * Distributes files across shards using deterministic hashing
61
+ *
62
+ * @param filenames - Array of file paths
63
+ * @param shardCount - Total number of shards
64
+ * @returns Map of shard index to array of filenames
65
+ */
66
+ function distributeFilesAcrossShards(filenames, shardCount) {
67
+ if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
68
+ const shardMap = /* @__PURE__ */ new Map();
69
+ for (let i = 0; i < shardCount; i++) shardMap.set(i, []);
70
+ for (const filename of filenames) {
71
+ const shardIndex = getShardForFilename(filename, { shardCount });
72
+ shardMap.get(shardIndex).push(filename);
73
+ }
74
+ return shardMap;
75
+ }
76
+ /**
77
+ * Calculate optimal number of shards based on target shard size
78
+ *
79
+ * @param totalFiles - Total number of files
80
+ * @param targetShardSize - Desired number of files per shard
81
+ * @returns Number of shards needed
82
+ */
83
+ function calculateOptimalShardCount(totalFiles, targetShardSize) {
84
+ return Math.ceil(totalFiles / targetShardSize);
85
+ }
86
+ /**
87
+ * Analyzes file reassignment when scaling from oldShardCount to newShardCount
88
+ * Returns statistics about how many files would need to be reassigned
89
+ *
90
+ * @param filenames - Array of file paths to analyze
91
+ * @param oldShardCount - Current number of shards
92
+ * @param newShardCount - Target number of shards
93
+ * @returns Object with reassignment statistics
94
+ */
95
+ function analyzeShardScaling(filenames, oldShardCount, newShardCount) {
96
+ let reassignedFiles = 0;
97
+ for (const filename of filenames) {
98
+ const oldShard = getShardForFilename(filename, { shardCount: oldShardCount });
99
+ const newShard = getShardForFilename(filename, { shardCount: newShardCount });
100
+ if (oldShard !== newShard) reassignedFiles++;
101
+ }
102
+ const stableFiles = filenames.length - reassignedFiles;
103
+ const reassignmentPercentage = filenames.length > 0 ? reassignedFiles / filenames.length * 100 : 0;
104
+ return {
105
+ totalFiles: filenames.length,
106
+ reassignedFiles,
107
+ reassignmentPercentage,
108
+ stableFiles
109
+ };
110
+ }
111
+
112
+ //#endregion
113
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
@@ -1,6 +1,7 @@
1
1
  //#region src/utils/consistent-sharding.d.ts
2
2
  /**
3
3
  * Generates a numeric hash from a filename using SHA1
4
+ * Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues
4
5
  */
5
6
  declare function getNumericFileNameSha1(filename: string): number;
6
7
  /**
@@ -9,8 +10,8 @@ declare function getNumericFileNameSha1(filename: string): number;
9
10
  */
10
11
  declare function getFileHashPosition(filename: string): number;
11
12
  /**
12
- * Gets the shard index for a filename using deterministic hashing
13
- * Files get assigned to a consistent preferred shard regardless of total count
13
+ * Gets the shard index for a filename using consistent hashing
14
+ * Files are assigned to the next shard clockwise on the hash ring
14
15
  *
15
16
  * @param filename - The file path to hash
16
17
  * @param shardCount - Total number of shards
@@ -52,5 +53,20 @@ declare function distributeFilesAcrossShards(filenames: string[], shardCount: nu
52
53
  * @returns Number of shards needed
53
54
  */
54
55
  declare function calculateOptimalShardCount(totalFiles: number, targetShardSize: number): number;
56
+ /**
57
+ * Analyzes file reassignment when scaling from oldShardCount to newShardCount
58
+ * Returns statistics about how many files would need to be reassigned
59
+ *
60
+ * @param filenames - Array of file paths to analyze
61
+ * @param oldShardCount - Current number of shards
62
+ * @param newShardCount - Target number of shards
63
+ * @returns Object with reassignment statistics
64
+ */
65
+ declare function analyzeShardScaling(filenames: string[], oldShardCount: number, newShardCount: number): {
66
+ totalFiles: number;
67
+ reassignedFiles: number;
68
+ reassignmentPercentage: number;
69
+ stableFiles: number;
70
+ };
55
71
  //#endregion
56
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
72
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
package/dist/index.d.ts CHANGED
@@ -1,3 +1,3 @@
1
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D0wYSQBl.js";
1
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D9n1M6by.js";
2
2
  import { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-D1oVulJ6.js";
3
- export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
3
+ export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
package/dist/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-DDU9PV2R.js";
3
- import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-n5QdN_A3.js";
2
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-CcnfsY_k.js";
3
+ import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-C8hyzL4c.js";
4
4
 
5
- export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
5
+ export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
@@ -1,2 +1,2 @@
1
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D0wYSQBl.js";
2
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
1
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D9n1M6by.js";
2
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
package/dist/sharding.js CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env node
2
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-DDU9PV2R.js";
2
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-CcnfsY_k.js";
3
3
 
4
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
4
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codemodctl",
3
- "version": "0.1.11",
3
+ "version": "0.1.13",
4
4
  "description": "CLI tool and utilities for workflow engine operations, file sharding, and codeowner analysis",
5
5
  "type": "module",
6
6
  "exports": {
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env node
2
- import crypto from "node:crypto";
3
-
4
- //#region src/utils/consistent-sharding.ts
5
- const HASH_RING_SIZE = 1e6;
6
- /**
7
- * Generates a numeric hash from a filename using SHA1
8
- */
9
- function getNumericFileNameSha1(filename) {
10
- return parseInt(crypto.createHash("sha1").update(filename).digest("hex"), 16);
11
- }
12
- /**
13
- * Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1)
14
- * This position remains constant regardless of shard count changes
15
- */
16
- function getFileHashPosition(filename) {
17
- return getNumericFileNameSha1(filename) % HASH_RING_SIZE;
18
- }
19
- /**
20
- * Gets the shard index for a filename using deterministic hashing
21
- * Files get assigned to a consistent preferred shard regardless of total count
22
- *
23
- * @param filename - The file path to hash
24
- * @param shardCount - Total number of shards
25
- * @returns Shard index (0-based)
26
- */
27
- function getShardForFilename(filename, { shardCount }) {
28
- if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
29
- return getNumericFileNameSha1(filename) % 10 % shardCount;
30
- }
31
- /**
32
- * Checks if a file belongs to a specific shard
33
- *
34
- * @param filename - The file path to check
35
- * @param shardCount - Total number of shards
36
- * @param shardIndex - The shard index to check against (0-based)
37
- * @returns True if file belongs to the specified shard
38
- */
39
- function fitsInShard(filename, { shardCount, shardIndex }) {
40
- return getShardForFilename(filename, { shardCount }) === shardIndex;
41
- }
42
- /**
43
- * Distributes files across shards using deterministic hashing
44
- *
45
- * @param filenames - Array of file paths
46
- * @param shardCount - Total number of shards
47
- * @returns Map of shard index to array of filenames
48
- */
49
- function distributeFilesAcrossShards(filenames, shardCount) {
50
- if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
51
- const shardMap = /* @__PURE__ */ new Map();
52
- for (let i = 0; i < shardCount; i++) shardMap.set(i, []);
53
- for (const filename of filenames) {
54
- const shardIndex = getShardForFilename(filename, { shardCount });
55
- shardMap.get(shardIndex).push(filename);
56
- }
57
- return shardMap;
58
- }
59
- /**
60
- * Calculate optimal number of shards based on target shard size
61
- *
62
- * @param totalFiles - Total number of files
63
- * @param targetShardSize - Desired number of files per shard
64
- * @returns Number of shards needed
65
- */
66
- function calculateOptimalShardCount(totalFiles, targetShardSize) {
67
- return Math.ceil(totalFiles / targetShardSize);
68
- }
69
-
70
- //#endregion
71
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };