codemodctl 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -38,9 +38,14 @@ const belongsToShard = fitsInShard('src/components/Button.tsx', {
38
38
  shardIndex: 2
39
39
  });
40
40
 
41
- // Distribute all files across shards
41
+ // Distribute all files across shards with consistent hashing
42
42
  const files = ['file1.ts', 'file2.ts', 'file3.ts'];
43
43
  const distribution = distributeFilesAcrossShards(files, 5);
44
+
45
+ // Check scaling behavior - minimal reassignment when growing
46
+ const scalingAnalysis = analyzeShardScaling(files, 5, 6);
47
+ console.log(`${scalingAnalysis.stableFiles} files stay in same shard`);
48
+ console.log(`${scalingAnalysis.reassignmentPercentage}% reassignment`); // Much less than 100%
44
49
  ```
45
50
 
46
51
  #### Codeowner Analysis
@@ -73,13 +78,14 @@ const analysis = await codemodctl.codeowners.analyzeCodeowners(options);
73
78
 
74
79
  ## Key Features
75
80
 
76
- ### Deterministic File Sharding
81
+ ### Consistent File Sharding
77
82
 
78
- The sharding algorithm uses deterministic hashing to ensure:
83
+ The sharding algorithm uses **consistent hashing** to ensure:
79
84
 
80
85
  - **Perfect consistency**: Same file + same shard count = same result, always
81
86
  - **No external dependencies**: Result depends only on filename and shard count
82
- - **Even distribution**: SHA1 hashing provides good distribution across shards
87
+ - **Minimal reassignment**: When scaling up, only ~20-40% of files move (not 100%)
88
+ - **Stable scaling**: Adding new shards doesn't reorganize existing file assignments
83
89
  - **Simple API**: No complex parameters or configuration needed
84
90
  - **Team-aware sharding**: Works with codeowner boundaries
85
91
 
@@ -99,9 +105,12 @@ The sharding algorithm uses deterministic hashing to ensure:
99
105
  - `distributeFilesAcrossShards(files, shardCount)` - Distribute files across shards
100
106
  - `calculateOptimalShardCount(totalFiles, targetShardSize)` - Calculate optimal shard count
101
107
  - `getFileHashPosition(filename)` - Get consistent hash position for a file
108
+ - `analyzeShardScaling(files, oldCount, newCount)` - Analyze reassignment when scaling
102
109
 
103
110
  All functions are deterministic: same input always produces the same output.
104
111
 
112
+ **Scaling behavior**: When going from N to N+1 shards, typically only 20-40% of files get reassigned to new locations, making it ideal for incremental scaling scenarios.
113
+
105
114
  ### Codeowner Functions
106
115
 
107
116
  - `analyzeCodeowners(options)` - Complete analysis with shard generation
@@ -125,20 +134,27 @@ const shard1 = getShardForFilename('src/components/Button.tsx', { shardCount: 5
125
134
  const shard2 = getShardForFilename('src/components/Button.tsx', { shardCount: 5 });
126
135
  console.log(shard1 === shard2); // always true
127
136
 
128
- // Different shard counts may give different results (that's expected)
137
+ // Different shard counts give different results (expected behavior)
129
138
  const shard5 = getShardForFilename('src/components/Button.tsx', { shardCount: 5 });
130
139
  const shard10 = getShardForFilename('src/components/Button.tsx', { shardCount: 10 });
131
- // shard5 and shard10 may be different, but each is consistent
140
+ // shard5 and shard10 will likely be different, but each is consistent
132
141
 
133
- // Distribute files deterministically
142
+ // Distribute files with consistent hashing for stable scaling
134
143
  const files = ['file1.ts', 'file2.ts', 'file3.ts'];
135
144
  const distribution = distributeFilesAcrossShards(files, 5);
145
+
146
+ // When you need more capacity, most files stay in place
147
+ const moreFiles = [...files, 'newFile.ts'];
148
+ const analysis = analyzeShardScaling(files, 5, 6);
149
+ // Only ~20-40% of files get reassigned, not all of them!
136
150
  ```
137
151
 
138
152
  ### Key Benefits
139
153
  - **No complex parameters**: Just filename and shard count
140
154
  - **Perfectly deterministic**: Same input = same output, always
141
- - **Fast and simple**: Pure hash-based assignment
155
+ - **Stable scaling**: When adding shards, most files stay in their original shards
156
+ - **Minimal reassignment**: Only ~20-40% of files move when scaling up
157
+ - **Fast and simple**: Hash-based assignment with consistent ring placement
142
158
  - **Works across runs**: File gets same shard whether filesystem changes or not
143
159
 
144
160
  ## CLI Commands
package/dist/cli.js CHANGED
@@ -1,15 +1,16 @@
1
1
  #!/usr/bin/env node
2
- import "./consistent-sharding-DDU9PV2R.js";
3
- import { analyzeCodeowners } from "./codeowner-analysis-n5QdN_A3.js";
2
+ import { analyzeCodeowners, getApplicableFiles } from "./codeowner-analysis-pG3p0RPU.js";
3
+ import { calculateOptimalShardCount, distributeFilesAcrossShards } from "./consistent-sharding-pjG1rI6w.js";
4
4
  import { defineCommand, runMain } from "citty";
5
5
  import crypto from "node:crypto";
6
6
  import { $ } from "execa";
7
7
  import { writeFile } from "node:fs/promises";
8
+ import path from "node:path";
8
9
 
9
- //#region src/commands/pr/create.ts
10
- const createCommand = defineCommand({
10
+ //#region src/commands/git/create-pr.ts
11
+ const createPrCommand = defineCommand({
11
12
  meta: {
12
- name: "create",
13
+ name: "create-pr",
13
14
  description: "Create a pull request"
14
15
  },
15
16
  args: {
@@ -156,17 +157,30 @@ const createCommand = defineCommand({
156
157
  });
157
158
 
158
159
  //#endregion
159
- //#region src/commands/pr/index.ts
160
- const prCommand = defineCommand({
160
+ //#region src/commands/git/index.ts
161
+ const gitCommand = defineCommand({
161
162
  meta: {
162
- name: "pr",
163
- description: "Pull request operations"
163
+ name: "git",
164
+ description: "Git operations"
164
165
  },
165
- subCommands: { create: createCommand }
166
+ subCommands: { createPr: createPrCommand }
166
167
  });
167
168
 
168
169
  //#endregion
169
170
  //#region src/commands/shard/codeowner.ts
171
+ /**
172
+ * Codeowner-based sharding command
173
+ *
174
+ * Creates shards by grouping files by their CODEOWNERS team assignments.
175
+ * Uses simple file distribution within each team group, maintaining
176
+ * consistency with existing state when available.
177
+ *
178
+ * Example usage:
179
+ * npx codemodctl shard codeowner -l tsx -c ./codemod.ts -s 30 --stateProp shards --codeowners .github/CODEOWNERS
180
+ *
181
+ * This will analyze all applicable files, group them by CODEOWNERS team assignments, and create
182
+ * shards with approximately 30 files each within each team.
183
+ */
170
184
  const codeownerCommand = defineCommand({
171
185
  meta: {
172
186
  name: "codeowner",
@@ -217,12 +231,22 @@ const codeownerCommand = defineCommand({
217
231
  }
218
232
  try {
219
233
  console.log(`State property: ${stateProp}`);
234
+ const existingStateJson = process.env.CODEMOD_STATE;
235
+ let existingState;
236
+ if (existingStateJson) try {
237
+ existingState = JSON.parse(existingStateJson)[stateProp];
238
+ console.log(`Found existing state with ${existingState.length} shards`);
239
+ } catch (parseError) {
240
+ console.warn(`Warning: Failed to parse existing state: ${parseError}`);
241
+ existingState = void 0;
242
+ }
220
243
  const analysisOptions = {
221
244
  shardSize,
222
245
  codeownersPath,
223
246
  rulePath: codemodFilePath,
224
247
  projectRoot: process.cwd(),
225
- language
248
+ language,
249
+ existingState
226
250
  };
227
251
  const result = await analyzeCodeowners(analysisOptions);
228
252
  const stateOutput = `${stateProp}=${JSON.stringify(result.shards)}\n`;
@@ -238,6 +262,194 @@ const codeownerCommand = defineCommand({
238
262
  }
239
263
  });
240
264
 
265
+ //#endregion
266
+ //#region src/utils/directory-analysis.ts
267
+ /**
268
+ * Groups files by their immediate subdirectory within the target directory
269
+ *
270
+ * @param files - Array of file paths to group
271
+ * @param target - Target directory to analyze subdirectories within
272
+ * @returns Map of subdirectory paths to their file lists
273
+ */
274
+ function groupFilesByDirectory(files, target) {
275
+ const normalizedTarget = path.normalize(target);
276
+ const filesByDirectory = /* @__PURE__ */ new Map();
277
+ for (const filePath of files) {
278
+ const normalizedFile = path.normalize(filePath);
279
+ if (!normalizedFile.startsWith(normalizedTarget)) continue;
280
+ const relativePath = path.relative(normalizedTarget, normalizedFile);
281
+ if (!relativePath.includes(path.sep)) continue;
282
+ const firstDir = relativePath.split(path.sep)[0];
283
+ if (!firstDir) continue;
284
+ const subdirectory = path.join(normalizedTarget, firstDir);
285
+ if (!filesByDirectory.has(subdirectory)) filesByDirectory.set(subdirectory, []);
286
+ filesByDirectory.get(subdirectory).push(normalizedFile);
287
+ }
288
+ return filesByDirectory;
289
+ }
290
+ /**
291
+ * Creates directory-based shards using consistent hashing within each directory group.
292
+ * Maintains consistency with existing state when provided.
293
+ *
294
+ * @param filesByDirectory - Map of directory paths to their file lists
295
+ * @param shardSize - Target number of files per shard
296
+ * @param existingState - Optional existing state for consistency
297
+ * @returns Array of directory-based shards
298
+ */
299
+ function createDirectoryShards(filesByDirectory, shardSize, existingState) {
300
+ const allShards = [];
301
+ const existingByDirectory = /* @__PURE__ */ new Map();
302
+ if (existingState) for (const shard of existingState) {
303
+ if (!existingByDirectory.has(shard.directory)) existingByDirectory.set(shard.directory, []);
304
+ existingByDirectory.get(shard.directory).push(shard);
305
+ }
306
+ for (const [directory, files] of filesByDirectory.entries()) {
307
+ const fileCount = files.length;
308
+ const optimalShardCount = calculateOptimalShardCount(fileCount, shardSize);
309
+ const existingShards = existingByDirectory.get(directory) || [];
310
+ const existingShardCount = existingShards.length > 0 ? existingShards[0]?.shardCount ?? 0 : 0;
311
+ const shardCount = existingShardCount > 0 ? existingShardCount : optimalShardCount;
312
+ console.log(`Directory "${directory}" contains ${fileCount} files, ${existingShardCount > 0 ? `maintaining ${shardCount} existing shards` : `creating ${shardCount} new shards`}`);
313
+ const shardMap = distributeFilesAcrossShards(files, shardCount);
314
+ for (let shardIndex = 0; shardIndex < shardCount; shardIndex++) {
315
+ const shardFiles = shardMap.get(shardIndex) || [];
316
+ allShards.push({
317
+ directory,
318
+ shard: shardIndex + 1,
319
+ shardCount,
320
+ files: shardFiles.sort()
321
+ });
322
+ }
323
+ }
324
+ return allShards;
325
+ }
326
+ /**
327
+ * Main function to analyze directories and generate shard configuration.
328
+ * Maintains consistency with existing state when provided.
329
+ *
330
+ * @param options - Configuration options for directory analysis
331
+ * @returns Promise resolving to directory analysis result
332
+ * @throws Error if no files found in target subdirectories
333
+ */
334
+ async function analyzeDirectories(options) {
335
+ const { shardSize, target, rulePath, language, projectRoot = process.cwd(), existingState } = options;
336
+ console.debug(`Using rule file: ${rulePath}`);
337
+ console.debug(`Target directory: ${target}`);
338
+ console.debug(`Shard size: ${shardSize}`);
339
+ if (existingState) console.debug(`Using existing state with ${existingState.length} shards`);
340
+ console.log("Analyzing files with CLI command...");
341
+ const applicableFiles = await getApplicableFiles(rulePath, language, projectRoot);
342
+ console.log("Grouping files by directory...");
343
+ const filesByDirectory = groupFilesByDirectory(applicableFiles, target);
344
+ if (filesByDirectory.size === 0) throw new Error(`No files found in subdirectories of target: ${target}`);
345
+ console.log(`Found ${filesByDirectory.size} subdirectories in target`);
346
+ console.log("Generating directory-based shards...");
347
+ const shards = createDirectoryShards(filesByDirectory, shardSize, existingState);
348
+ const totalFiles = Array.from(filesByDirectory.values()).reduce((sum, files) => sum + files.length, 0);
349
+ console.log(`Generated ${shards.length} total shards for ${totalFiles} files across ${filesByDirectory.size} directories`);
350
+ return {
351
+ shards,
352
+ totalFiles
353
+ };
354
+ }
355
+
356
+ //#endregion
357
+ //#region src/commands/shard/directory.ts
358
+ /**
359
+ * Directory-based sharding command
360
+ *
361
+ * Creates shards by grouping files within subdirectories of a target directory.
362
+ * Uses consistent hashing to distribute files within each directory group, maintaining
363
+ * consistency with existing state when available.
364
+ *
365
+ * Example usage:
366
+ * npx codemodctl shard directory -l tsx -c ./codemod.ts -s 30 --stateProp shards --target packages/
367
+ *
368
+ * This will analyze all applicable files within subdirectories of 'packages/' and create
369
+ * shards with approximately 30 files each, grouped by directory.
370
+ */
371
+ const directoryCommand = defineCommand({
372
+ meta: {
373
+ name: "directory",
374
+ description: "Create directory-based sharding output"
375
+ },
376
+ args: {
377
+ shardSize: {
378
+ type: "string",
379
+ alias: "s",
380
+ description: "Number of files per shard",
381
+ required: true
382
+ },
383
+ stateProp: {
384
+ type: "string",
385
+ alias: "p",
386
+ description: "Property name for state output",
387
+ required: true
388
+ },
389
+ target: {
390
+ type: "string",
391
+ description: "Target directory to shard by subdirectories",
392
+ required: true
393
+ },
394
+ codemodFile: {
395
+ type: "string",
396
+ alias: "c",
397
+ description: "Path to codemod file",
398
+ required: true
399
+ },
400
+ language: {
401
+ type: "string",
402
+ alias: "l",
403
+ description: "Language of the codemod",
404
+ required: true
405
+ }
406
+ },
407
+ async run({ args }) {
408
+ const { shardSize: shardSizeStr, stateProp, target, codemodFile: codemodFilePath, language } = args;
409
+ const shardSize = parseInt(shardSizeStr, 10);
410
+ if (isNaN(shardSize) || shardSize <= 0) {
411
+ console.error("Error: shard-size must be a positive number");
412
+ process.exit(1);
413
+ }
414
+ const stateOutputsPath = process.env.STATE_OUTPUTS;
415
+ if (!stateOutputsPath) {
416
+ console.error("Error: STATE_OUTPUTS environment variable is required");
417
+ process.exit(1);
418
+ }
419
+ try {
420
+ console.log(`State property: ${stateProp}`);
421
+ console.log(`Target directory: ${target}`);
422
+ const existingStateJson = process.env.CODEMOD_STATE;
423
+ let existingState;
424
+ if (existingStateJson) try {
425
+ existingState = JSON.parse(existingStateJson)[stateProp];
426
+ console.log(`Found existing state with ${existingState.length} shards`);
427
+ } catch (parseError) {
428
+ console.warn(`Warning: Failed to parse existing state: ${parseError}`);
429
+ existingState = void 0;
430
+ }
431
+ const analysisOptions = {
432
+ shardSize,
433
+ target,
434
+ rulePath: codemodFilePath,
435
+ projectRoot: process.cwd(),
436
+ language,
437
+ existingState
438
+ };
439
+ const result = await analyzeDirectories(analysisOptions);
440
+ const stateOutput = `${stateProp}=${JSON.stringify(result.shards)}\n`;
441
+ console.log(`Writing state output to: ${stateOutputsPath}`);
442
+ await writeFile(stateOutputsPath, stateOutput, { flag: "a" });
443
+ console.log("✅ Directory-based sharding completed successfully!");
444
+ console.log("Generated shards:", JSON.stringify(result.shards, null, 2));
445
+ } catch (error) {
446
+ console.error("❌ Failed to process directory sharding:");
447
+ console.error(error instanceof Error ? error.message : String(error));
448
+ process.exit(1);
449
+ }
450
+ }
451
+ });
452
+
241
453
  //#endregion
242
454
  //#region src/commands/shard/index.ts
243
455
  const shardCommand = defineCommand({
@@ -245,7 +457,10 @@ const shardCommand = defineCommand({
245
457
  name: "shard",
246
458
  description: "Sharding operations for distributing work"
247
459
  },
248
- subCommands: { codeowner: codeownerCommand }
460
+ subCommands: {
461
+ codeowner: codeownerCommand,
462
+ directory: directoryCommand
463
+ }
249
464
  });
250
465
 
251
466
  //#endregion
@@ -257,7 +472,7 @@ const main = defineCommand({
257
472
  description: "CLI tool for workflow engine operations"
258
473
  },
259
474
  subCommands: {
260
- pr: prCommand,
475
+ git: gitCommand,
261
476
  shard: shardCommand
262
477
  }
263
478
  });
@@ -1,24 +1,54 @@
1
1
  //#region src/utils/codeowner-analysis.d.ts
2
+ /**
3
+ * Result for a single team-based shard
4
+ */
2
5
  interface ShardResult {
6
+ /** The team that owns these files */
3
7
  team: string;
8
+ /** The shard identifier string (e.g., "1/3") */
4
9
  shard: string;
10
+ /** The combined shard ID (e.g., "team-name 1/3") */
5
11
  shardId: string;
12
+ /** Array of file paths in this shard */
13
+ files: string[];
6
14
  }
15
+ /**
16
+ * Information about a team and their files
17
+ */
7
18
  interface TeamFileInfo {
19
+ /** Team name */
8
20
  team: string;
21
+ /** Number of files owned by this team */
9
22
  fileCount: number;
23
+ /** Array of file paths owned by this team */
10
24
  files: string[];
11
25
  }
26
+ /**
27
+ * Options for codeowner-based analysis
28
+ */
12
29
  interface CodeownerAnalysisOptions {
30
+ /** Target number of files per shard */
13
31
  shardSize: number;
32
+ /** Optional path to CODEOWNERS file */
14
33
  codeownersPath?: string;
34
+ /** Path to the codemod rule file */
15
35
  rulePath: string;
36
+ /** Programming language for the codemod */
16
37
  language: string;
38
+ /** Project root directory (defaults to process.cwd()) */
17
39
  projectRoot?: string;
40
+ /** Existing state for consistency (optional) */
41
+ existingState?: ShardResult[];
18
42
  }
43
+ /**
44
+ * Result of codeowner-based analysis
45
+ */
19
46
  interface CodeownerAnalysisResult {
47
+ /** Array of team information */
20
48
  teams: TeamFileInfo[];
49
+ /** Array of team-based shards with file assignments */
21
50
  shards: ShardResult[];
51
+ /** Total number of files processed */
22
52
  totalFiles: number;
23
53
  }
24
54
  /**
@@ -31,10 +61,6 @@ declare function findCodeownersFile(projectRoot?: string, explicitPath?: string)
31
61
  * Normalizes owner name by removing @ prefix and converting to lowercase
32
62
  */
33
63
  declare function normalizeOwnerName(owner: string): string;
34
- /**
35
- * Executes the codemod CLI command and returns applicable file paths
36
- */
37
- declare function getApplicableFiles(rulePath: string, language: string, projectRoot: string): Promise<string[]>;
38
64
  /**
39
65
  * Analyzes files and groups them by codeowner team
40
66
  */
@@ -44,16 +70,26 @@ declare function analyzeFilesByOwner(codeownersPath: string, language: string, r
44
70
  */
45
71
  declare function analyzeFilesWithoutOwner(language: string, rulePath: string, projectRoot?: string): Promise<Map<string, string[]>>;
46
72
  /**
47
- * Generates shard configuration from team file analysis
73
+ * Generates shard configuration from team file analysis with actual file distribution.
74
+ * Maintains consistency with existing state when provided.
75
+ *
76
+ * @param filesByOwner - Map of team names to their file arrays
77
+ * @param shardSize - Target number of files per shard
78
+ * @param existingState - Optional existing state for consistency
79
+ * @returns Array of team-based shards with file assignments
48
80
  */
49
- declare function generateShards(filesByOwner: Map<string, string[]>, shardSize: number): ShardResult[];
81
+ declare function generateShards(filesByOwner: Map<string, string[]>, shardSize: number, existingState?: ShardResult[]): ShardResult[];
50
82
  /**
51
83
  * Converts file ownership map to team info array
52
84
  */
53
85
  declare function getTeamFileInfo(filesByOwner: Map<string, string[]>): TeamFileInfo[];
54
86
  /**
55
- * Main function to analyze codeowners and generate shard configuration
87
+ * Main function to analyze codeowners and generate shard configuration.
88
+ * Maintains consistency with existing state when provided.
89
+ *
90
+ * @param options - Configuration options for codeowner analysis
91
+ * @returns Promise resolving to codeowner analysis result
56
92
  */
57
93
  declare function analyzeCodeowners(options: CodeownerAnalysisOptions): Promise<CodeownerAnalysisResult>;
58
94
  //#endregion
59
- export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName };
95
+ export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getTeamFileInfo, normalizeOwnerName };
@@ -1,10 +1,31 @@
1
1
  #!/usr/bin/env node
2
- import { calculateOptimalShardCount } from "./consistent-sharding-DDU9PV2R.js";
3
- import { execSync } from "node:child_process";
4
2
  import { existsSync } from "node:fs";
5
3
  import path, { resolve } from "node:path";
6
4
  import Codeowners from "codeowners";
5
+ import { execSync } from "node:child_process";
7
6
 
7
+ //#region src/utils/codemod-cli.ts
8
+ /**
9
+ * Executes the codemod CLI command and returns applicable file paths
10
+ */
11
+ async function getApplicableFiles(rulePath, language, projectRoot) {
12
+ try {
13
+ const command = `npx -y codemod@latest jssg list-applicable --language ${language} --target ${projectRoot} ${rulePath}`;
14
+ console.debug(`Executing: ${command}`);
15
+ const applicableFiles = execSync(command, {
16
+ encoding: "utf8",
17
+ cwd: projectRoot,
18
+ maxBuffer: 10 * 1024 * 1024
19
+ }).split("\n").filter((line) => line.startsWith("[Applicable] ")).map((line) => line.replace("[Applicable] ", "").trim()).filter((filePath) => filePath.length > 0);
20
+ console.debug(`Found ${applicableFiles.length} applicable files`);
21
+ return applicableFiles;
22
+ } catch (error) {
23
+ console.error("Error executing codemod CLI:", error);
24
+ throw new Error(`Failed to execute codemod CLI: ${error}`);
25
+ }
26
+ }
27
+
28
+ //#endregion
8
29
  //#region src/utils/codeowner-analysis.ts
9
30
  /**
10
31
  * Finds and resolves the CODEOWNERS file path
@@ -32,25 +53,6 @@ function normalizeOwnerName(owner) {
32
53
  return owner.replace("@", "").toLowerCase();
33
54
  }
34
55
  /**
35
- * Executes the codemod CLI command and returns applicable file paths
36
- */
37
- async function getApplicableFiles(rulePath, language, projectRoot) {
38
- try {
39
- const command = `npx -y codemod@latest jssg list-applicable --language ${language} --target ${projectRoot} ${rulePath}`;
40
- console.debug(`Executing: ${command}`);
41
- const applicableFiles = execSync(command, {
42
- encoding: "utf8",
43
- cwd: projectRoot,
44
- maxBuffer: 10 * 1024 * 1024
45
- }).split("\n").filter((line) => line.startsWith("[Applicable] ")).map((line) => line.replace("[Applicable] ", "").trim()).filter((filePath) => filePath.length > 0);
46
- console.debug(`Found ${applicableFiles.length} applicable files`);
47
- return applicableFiles;
48
- } catch (error) {
49
- console.error("Error executing codemod CLI:", error);
50
- throw new Error(`Failed to execute codemod CLI: ${error}`);
51
- }
52
- }
53
- /**
54
56
  * Analyzes files and groups them by codeowner team
55
57
  */
56
58
  async function analyzeFilesByOwner(codeownersPath, language, rulePath, projectRoot = process.cwd()) {
@@ -84,19 +86,48 @@ async function analyzeFilesWithoutOwner(language, rulePath, projectRoot = proces
84
86
  return filesByOwner;
85
87
  }
86
88
  /**
87
- * Generates shard configuration from team file analysis
89
+ * Calculate optimal number of shards based on target shard size
90
+ *
91
+ * @param totalFiles - Total number of files
92
+ * @param targetShardSize - Desired number of files per shard
93
+ * @returns Number of shards needed
88
94
  */
89
- function generateShards(filesByOwner, shardSize) {
95
+ function calculateOptimalShardCount(totalFiles, targetShardSize) {
96
+ return Math.ceil(totalFiles / targetShardSize);
97
+ }
98
+ /**
99
+ * Generates shard configuration from team file analysis with actual file distribution.
100
+ * Maintains consistency with existing state when provided.
101
+ *
102
+ * @param filesByOwner - Map of team names to their file arrays
103
+ * @param shardSize - Target number of files per shard
104
+ * @param existingState - Optional existing state for consistency
105
+ * @returns Array of team-based shards with file assignments
106
+ */
107
+ function generateShards(filesByOwner, shardSize, existingState) {
90
108
  const allShards = [];
109
+ const existingByTeam = /* @__PURE__ */ new Map();
110
+ if (existingState) for (const shard of existingState) {
111
+ if (!existingByTeam.has(shard.team)) existingByTeam.set(shard.team, []);
112
+ existingByTeam.get(shard.team).push(shard);
113
+ }
91
114
  for (const [team, files] of filesByOwner.entries()) {
92
115
  const fileCount = files.length;
93
- const numShards = calculateOptimalShardCount(fileCount, shardSize);
94
- console.log(`Team "${team}" owns ${fileCount} files, creating ${numShards} shards`);
95
- for (let i = 1; i <= numShards; i++) allShards.push({
96
- team,
97
- shard: `${i}/${numShards}`,
98
- shardId: `${team} ${i}/${numShards}`
99
- });
116
+ const optimalShardCount = calculateOptimalShardCount(fileCount, shardSize);
117
+ const existingShardCount = (existingByTeam.get(team) || []).length;
118
+ const numShards = existingShardCount > 0 ? existingShardCount : optimalShardCount;
119
+ console.log(`Team "${team}" owns ${fileCount} files, ${existingShardCount > 0 ? `maintaining ${numShards} existing shards` : `creating ${numShards} new shards`}`);
120
+ const sortedFiles = [...files].sort();
121
+ for (let i = 1; i <= numShards; i++) {
122
+ const shardFiles = [];
123
+ for (let fileIndex = i - 1; fileIndex < sortedFiles.length; fileIndex += numShards) shardFiles.push(sortedFiles[fileIndex] ?? "");
124
+ allShards.push({
125
+ team,
126
+ shard: `${i}/${numShards}`,
127
+ shardId: `${team} ${i}/${numShards}`,
128
+ files: shardFiles
129
+ });
130
+ }
100
131
  }
101
132
  return allShards;
102
133
  }
@@ -111,10 +142,14 @@ function getTeamFileInfo(filesByOwner) {
111
142
  }));
112
143
  }
113
144
  /**
114
- * Main function to analyze codeowners and generate shard configuration
145
+ * Main function to analyze codeowners and generate shard configuration.
146
+ * Maintains consistency with existing state when provided.
147
+ *
148
+ * @param options - Configuration options for codeowner analysis
149
+ * @returns Promise resolving to codeowner analysis result
115
150
  */
116
151
  async function analyzeCodeowners(options) {
117
- const { shardSize, codeownersPath, rulePath, language, projectRoot = process.cwd() } = options;
152
+ const { shardSize, codeownersPath, rulePath, language, projectRoot = process.cwd(), existingState } = options;
118
153
  const resolvedCodeownersPath = await findCodeownersFile(projectRoot, codeownersPath);
119
154
  let filesByOwner;
120
155
  console.debug(`Using rule file: ${rulePath}`);
@@ -129,8 +164,9 @@ async function analyzeCodeowners(options) {
129
164
  filesByOwner = await analyzeFilesWithoutOwner(language, rulePath, projectRoot);
130
165
  }
131
166
  console.log("File analysis completed. Generating shards...");
167
+ if (existingState) console.debug(`Using existing state with ${existingState.length} shards`);
132
168
  const teams = getTeamFileInfo(filesByOwner);
133
- const shards = generateShards(filesByOwner, shardSize);
169
+ const shards = generateShards(filesByOwner, shardSize, existingState);
134
170
  const totalFiles = Array.from(filesByOwner.values()).reduce((sum, files) => sum + files.length, 0);
135
171
  console.log(`Generated ${shards.length} total shards for ${totalFiles} files`);
136
172
  return {
@@ -1,2 +1,2 @@
1
- import { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-D1oVulJ6.js";
2
- export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName };
1
+ import { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-B7Jrhm9T.js";
2
+ export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getTeamFileInfo, normalizeOwnerName };
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env node
2
- import "./consistent-sharding-DDU9PV2R.js";
3
- import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-n5QdN_A3.js";
2
+ import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-pG3p0RPU.js";
4
3
 
5
- export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName };
4
+ export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getTeamFileInfo, normalizeOwnerName };
@@ -1,6 +1,7 @@
1
1
  //#region src/utils/consistent-sharding.d.ts
2
2
  /**
3
3
  * Generates a numeric hash from a filename using SHA1
4
+ * Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues
4
5
  */
5
6
  declare function getNumericFileNameSha1(filename: string): number;
6
7
  /**
@@ -9,8 +10,8 @@ declare function getNumericFileNameSha1(filename: string): number;
9
10
  */
10
11
  declare function getFileHashPosition(filename: string): number;
11
12
  /**
12
- * Gets the shard index for a filename using deterministic hashing
13
- * Files get assigned to a consistent preferred shard regardless of total count
13
+ * Gets the shard index for a filename using consistent hashing
14
+ * Files are assigned to the next shard clockwise on the hash ring
14
15
  *
15
16
  * @param filename - The file path to hash
16
17
  * @param shardCount - Total number of shards
@@ -22,19 +23,14 @@ declare function getShardForFilename(filename: string, {
22
23
  shardCount: number;
23
24
  }): number;
24
25
  /**
25
- * Checks if a file belongs to a specific shard
26
+ * Checks if a file belongs to a specific shard by simply checking if it's in the shard's files list
26
27
  *
27
28
  * @param filename - The file path to check
28
- * @param shardCount - Total number of shards
29
- * @param shardIndex - The shard index to check against (0-based)
30
- * @returns True if file belongs to the specified shard
29
+ * @param shard - Shard object containing files array
30
+ * @returns True if file is in the shard's files list
31
31
  */
32
- declare function fitsInShard(filename: string, {
33
- shardCount,
34
- shardIndex
35
- }: {
36
- shardCount: number;
37
- shardIndex: number;
32
+ declare function fitsInShard(filename: string, shard: {
33
+ files: string[];
38
34
  }): boolean;
39
35
  /**
40
36
  * Distributes files across shards using deterministic hashing
@@ -52,5 +48,20 @@ declare function distributeFilesAcrossShards(filenames: string[], shardCount: nu
52
48
  * @returns Number of shards needed
53
49
  */
54
50
  declare function calculateOptimalShardCount(totalFiles: number, targetShardSize: number): number;
51
+ /**
52
+ * Analyzes file reassignment when scaling from oldShardCount to newShardCount
53
+ * Returns statistics about how many files would need to be reassigned
54
+ *
55
+ * @param filenames - Array of file paths to analyze
56
+ * @param oldShardCount - Current number of shards
57
+ * @param newShardCount - Target number of shards
58
+ * @returns Object with reassignment statistics
59
+ */
60
+ declare function analyzeShardScaling(filenames: string[], oldShardCount: number, newShardCount: number): {
61
+ totalFiles: number;
62
+ reassignedFiles: number;
63
+ reassignmentPercentage: number;
64
+ stableFiles: number;
65
+ };
55
66
  //#endregion
56
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
67
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env node
2
+ import crypto from "node:crypto";
3
+
4
+ //#region src/utils/consistent-sharding.ts
5
+ const HASH_RING_SIZE = 1e6;
6
+ /**
7
+ * Generates a numeric hash from a filename using SHA1
8
+ * Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues
9
+ */
10
+ function getNumericFileNameSha1(filename) {
11
+ const hex = crypto.createHash("sha1").update(filename).digest("hex").substring(0, 8);
12
+ return parseInt(hex, 16);
13
+ }
14
+ /**
15
+ * Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1)
16
+ * This position remains constant regardless of shard count changes
17
+ */
18
+ function getFileHashPosition(filename) {
19
+ return getNumericFileNameSha1(filename) % HASH_RING_SIZE;
20
+ }
21
+ /**
22
+ * Get the position for a specific shard index on the hash ring
23
+ * Shards get fixed positions that don't change when other shards are added
24
+ */
25
+ function getShardPosition(shardIndex) {
26
+ return parseInt(crypto.createHash("sha1").update(`shard-${shardIndex}`).digest("hex").substring(0, 8), 16) % HASH_RING_SIZE;
27
+ }
28
+ /**
29
+ * Gets the shard index for a filename using consistent hashing
30
+ * Files are assigned to the next shard clockwise on the hash ring
31
+ *
32
+ * @param filename - The file path to hash
33
+ * @param shardCount - Total number of shards
34
+ * @returns Shard index (0-based)
35
+ */
36
+ function getShardForFilename(filename, { shardCount }) {
37
+ if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
38
+ const filePosition = getFileHashPosition(filename);
39
+ const shardInfo = [];
40
+ for (let i = 0; i < shardCount; i++) shardInfo.push({
41
+ index: i,
42
+ position: getShardPosition(i)
43
+ });
44
+ shardInfo.sort((a, b) => a.position - b.position);
45
+ for (const shard of shardInfo) if (filePosition <= shard.position) return shard.index;
46
+ return shardInfo[0].index;
47
+ }
48
+ /**
49
+ * Checks if a file belongs to a specific shard by simply checking if it's in the shard's files list
50
+ *
51
+ * @param filename - The file path to check
52
+ * @param shard - Shard object containing files array
53
+ * @returns True if file is in the shard's files list
54
+ */
55
+ function fitsInShard(filename, shard) {
56
+ return shard.files.includes(filename);
57
+ }
58
+ /**
59
+ * Distributes files across shards using deterministic hashing
60
+ *
61
+ * @param filenames - Array of file paths
62
+ * @param shardCount - Total number of shards
63
+ * @returns Map of shard index to array of filenames
64
+ */
65
+ function distributeFilesAcrossShards(filenames, shardCount) {
66
+ if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
67
+ const shardMap = /* @__PURE__ */ new Map();
68
+ for (let i = 0; i < shardCount; i++) shardMap.set(i, []);
69
+ for (const filename of filenames) {
70
+ const shardIndex = getShardForFilename(filename, { shardCount });
71
+ shardMap.get(shardIndex).push(filename);
72
+ }
73
+ return shardMap;
74
+ }
75
+ /**
76
+ * Calculate optimal number of shards based on target shard size
77
+ *
78
+ * @param totalFiles - Total number of files
79
+ * @param targetShardSize - Desired number of files per shard
80
+ * @returns Number of shards needed
81
+ */
82
+ function calculateOptimalShardCount(totalFiles, targetShardSize) {
83
+ return Math.ceil(totalFiles / targetShardSize);
84
+ }
85
+ /**
86
+ * Analyzes file reassignment when scaling from oldShardCount to newShardCount
87
+ * Returns statistics about how many files would need to be reassigned
88
+ *
89
+ * @param filenames - Array of file paths to analyze
90
+ * @param oldShardCount - Current number of shards
91
+ * @param newShardCount - Target number of shards
92
+ * @returns Object with reassignment statistics
93
+ */
94
+ function analyzeShardScaling(filenames, oldShardCount, newShardCount) {
95
+ let reassignedFiles = 0;
96
+ for (const filename of filenames) {
97
+ const oldShard = getShardForFilename(filename, { shardCount: oldShardCount });
98
+ const newShard = getShardForFilename(filename, { shardCount: newShardCount });
99
+ if (oldShard !== newShard) reassignedFiles++;
100
+ }
101
+ const stableFiles = filenames.length - reassignedFiles;
102
+ const reassignmentPercentage = filenames.length > 0 ? reassignedFiles / filenames.length * 100 : 0;
103
+ return {
104
+ totalFiles: filenames.length,
105
+ reassignedFiles,
106
+ reassignmentPercentage,
107
+ stableFiles
108
+ };
109
+ }
110
+
111
+ //#endregion
112
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
package/dist/index.d.ts CHANGED
@@ -1,3 +1,3 @@
1
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D0wYSQBl.js";
2
- import { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-D1oVulJ6.js";
3
- export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
1
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-CwWnbSoW.js";
2
+ import { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-B7Jrhm9T.js";
3
+ export { CodeownerAnalysisOptions, CodeownerAnalysisResult, ShardResult, TeamFileInfo, analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
package/dist/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-DDU9PV2R.js";
3
- import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getApplicableFiles, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-n5QdN_A3.js";
2
+ import { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, findCodeownersFile, generateShards, getTeamFileInfo, normalizeOwnerName } from "./codeowner-analysis-pG3p0RPU.js";
3
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-pjG1rI6w.js";
4
4
 
5
- export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getApplicableFiles, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
5
+ export { analyzeCodeowners, analyzeFilesByOwner, analyzeFilesWithoutOwner, analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, findCodeownersFile, fitsInShard, generateShards, getFileHashPosition, getNumericFileNameSha1, getShardForFilename, getTeamFileInfo, normalizeOwnerName };
@@ -1,2 +1,2 @@
1
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-D0wYSQBl.js";
2
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
1
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-CwWnbSoW.js";
2
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
package/dist/sharding.js CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env node
2
- import { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-DDU9PV2R.js";
2
+ import { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename } from "./consistent-sharding-pjG1rI6w.js";
3
3
 
4
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
4
+ export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codemodctl",
3
- "version": "0.1.12",
3
+ "version": "0.1.14",
4
4
  "description": "CLI tool and utilities for workflow engine operations, file sharding, and codeowner analysis",
5
5
  "type": "module",
6
6
  "exports": {
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env node
2
- import crypto from "node:crypto";
3
-
4
- //#region src/utils/consistent-sharding.ts
5
- const HASH_RING_SIZE = 1e6;
6
- /**
7
- * Generates a numeric hash from a filename using SHA1
8
- */
9
- function getNumericFileNameSha1(filename) {
10
- return parseInt(crypto.createHash("sha1").update(filename).digest("hex"), 16);
11
- }
12
- /**
13
- * Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1)
14
- * This position remains constant regardless of shard count changes
15
- */
16
- function getFileHashPosition(filename) {
17
- return getNumericFileNameSha1(filename) % HASH_RING_SIZE;
18
- }
19
- /**
20
- * Gets the shard index for a filename using deterministic hashing
21
- * Files get assigned to a consistent preferred shard regardless of total count
22
- *
23
- * @param filename - The file path to hash
24
- * @param shardCount - Total number of shards
25
- * @returns Shard index (0-based)
26
- */
27
- function getShardForFilename(filename, { shardCount }) {
28
- if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
29
- return getNumericFileNameSha1(filename) % 10 % shardCount;
30
- }
31
- /**
32
- * Checks if a file belongs to a specific shard
33
- *
34
- * @param filename - The file path to check
35
- * @param shardCount - Total number of shards
36
- * @param shardIndex - The shard index to check against (0-based)
37
- * @returns True if file belongs to the specified shard
38
- */
39
- function fitsInShard(filename, { shardCount, shardIndex }) {
40
- return getShardForFilename(filename, { shardCount }) === shardIndex;
41
- }
42
- /**
43
- * Distributes files across shards using deterministic hashing
44
- *
45
- * @param filenames - Array of file paths
46
- * @param shardCount - Total number of shards
47
- * @returns Map of shard index to array of filenames
48
- */
49
- function distributeFilesAcrossShards(filenames, shardCount) {
50
- if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
51
- const shardMap = /* @__PURE__ */ new Map();
52
- for (let i = 0; i < shardCount; i++) shardMap.set(i, []);
53
- for (const filename of filenames) {
54
- const shardIndex = getShardForFilename(filename, { shardCount });
55
- shardMap.get(shardIndex).push(filename);
56
- }
57
- return shardMap;
58
- }
59
- /**
60
- * Calculate optimal number of shards based on target shard size
61
- *
62
- * @param totalFiles - Total number of files
63
- * @param targetShardSize - Desired number of files per shard
64
- * @returns Number of shards needed
65
- */
66
- function calculateOptimalShardCount(totalFiles, targetShardSize) {
67
- return Math.ceil(totalFiles / targetShardSize);
68
- }
69
-
70
- //#endregion
71
- export { calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };