repo-anon 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Use Node.js
17
+ uses: actions/setup-node@v4
18
+ with:
19
+ node-version: '20.x'
20
+ cache: 'npm'
21
+
22
+ - name: Install dependencies
23
+ run: npm ci
24
+
25
+ - name: Run tests with coverage
26
+ run: npm test
27
+
28
+ - name: Upload coverage reports to Codecov
29
+ uses: codecov/codecov-action@v4
30
+ with:
31
+ token: ${{ secrets.CODECOV_TOKEN }}
32
+ file: ./coverage/clover.xml
33
+ flags: unittests
34
+ name: codecov-umbrella
35
+ fail_ci_if_error: false
36
+
37
+ - name: Archive code coverage results
38
+ uses: actions/upload-artifact@v4
39
+ with:
40
+ name: code-coverage-report
41
+ path: coverage/
package/README.md CHANGED
@@ -1,64 +1 @@
1
1
  # repo-anon
2
-
3
- A Node.js CLI tool to anonymize and de-anonymize files in a repository based on a `.phrases` configuration file. Perfect for preparing repositories for AI processing while keeping sensitive info, such as company or brand names, protected.
4
-
5
- ## Features
6
-
7
- - **Anonymize**: Replaces sensitive phrases with configured placeholders.
8
- - **Replacement History**: Writes `.repo-anon-history.json` with ordered per-file replacement events (including counts).
9
- - **De-anonymize**: Replays replacement history in reverse order, then runs a full phrase-based pass to also restore new placeholder usage added later.
10
- - **Recursive**: Traverses through all project directories (ignoring `node_modules`, `.git`, etc.).
11
- - **CI/CD Ready**: Includes GitLab pipeline configuration for publishing to the GitLab package registry.
12
-
13
- ## Installation
14
-
15
- ```bash
16
- npm install -g @your_gitlab_namespace/repo-anon
17
- ```
18
-
19
- *(Note: Replace `@your_gitlab_namespace` with your actual GitLab namespace).*
20
-
21
- ## Usage
22
-
23
- 1. Create a `.phrases` file in the current working directory:
24
-
25
- ```json
26
- {
27
- "CompanyA": "ANON_COMPANY_A",
28
- "ck": {
29
- "placeholder": "bb",
30
- "wordReplace": true
31
- }
32
- }
33
- ```
34
-
35
- String values keep the old behavior and replace matches anywhere inside a word.
36
- Object values let you opt into whole-word matching with `wordReplace: true`.
37
-
38
- 2. Run the anonymization command:
39
-
40
- ```bash
41
- repo-anon anonymize
42
- ```
43
-
44
- This also writes `.repo-anon-history.json` in the working directory.
45
-
46
- 3. Revert changes (if needed):
47
-
48
- ```bash
49
- repo-anon deanonymize
50
- ```
51
-
52
- De-anonymization uses the history file first to reverse exact prior replacements in order, then applies phrase-based de-anonymization globally so newly introduced placeholders are also restored.
53
-
54
- ## Development
55
-
56
- - **Tests**: Run unit tests using `npm test`.
57
- - **Linting**: Lint the project using `npm run lint`.
58
-
59
- ## CI/CD Deployment
60
-
61
- The project includes a `.gitlab-ci.yml` configured to automatically publish new versions to the GitLab package registry when a tag is pushed.
62
-
63
- ---
64
- Built with ❤️.
package/bin/repo-anon.js CHANGED
@@ -1,21 +1,203 @@
1
1
  #!/usr/bin/env node
2
+ const fs = require('fs');
3
+ const path = require('path');
4
+ const readline = require('readline');
5
+ const Anonymizer = require('../lib/processor');
2
6
 
3
- const { program } = require('commander');
4
- const { anonymize, deanonymize } = require('./../lib/processor');
5
- const { version } = require('../package.json');
7
+ const helpMessage = `
8
+ Anonymization Tool - CLI
6
9
 
7
- program
8
- .version(version)
9
- .description('Repository Anonymizer CLI');
10
+ Usage:
11
+ node repo-anon.js <action> [input] [options]
10
12
 
11
- program
12
- .command('anonymize')
13
- .description('Anonymize project based on .phrases file')
14
- .action(anonymize);
13
+ Actions:
14
+ anonymize: The action to perform (obscure phrases).
15
+ deanonymize: The action to perform (unobscure phrases).
15
16
 
16
- program
17
- .command('deanonymize')
18
- .description('De-anonymize project based on .phrases file')
19
- .action(deanonymize);
17
+ Arguments:
18
+ input: The literal text to process OR a file path.
20
19
 
21
- program.parse(process.argv);
20
+ Options:
21
+ -f, --file <path>: Explicitly specify a single file path to process.
22
+ -d, --dir <path>: Process all files in a directory.
23
+ -p, --pattern <glob>: Filter files by pattern (e.g., "*.txt", "**/*.js").
24
+ -o, --out-dir <path>: Save processed files here (instead of stdout/overwrite).
25
+ -r, --recursive: Process directories recursively (default with --dir).
26
+ -c, --config <path>: Path to the .phrases.json file (default: .phrases.json).
27
+ --overwrite: Overwrite existing files (only with -d or -f).
28
+
29
+ Examples:
30
+ node repo-anon.js anonymize -d ./src -p "**/*.js" -o ./anon_src -r
31
+ node repo-anon.js anonymize "Meeting with Acme Corp"
32
+ cat document.txt | node repo-anon.js anonymize
33
+ `;
34
+
35
+ async function readFromStdin() {
36
+ if (process.stdin.isTTY) return null;
37
+ const rl = readline.createInterface({ input: process.stdin, terminal: false });
38
+ const lines = [];
39
+ for await (const line of rl) lines.push(line);
40
+ return lines.join('\n');
41
+ }
42
+
43
+ function isExistingFile(p) {
44
+ try { return fs.existsSync(p) && fs.lstatSync(p).isFile(); } catch (e) { return false; }
45
+ }
46
+
47
+ function isExistingDir(p) {
48
+ try { return fs.existsSync(p) && fs.lstatSync(p).isDirectory(); } catch (e) { return false; }
49
+ }
50
+
51
+ // Minimal glob-to-regex converter for pattern matching
52
+ function globToRegex(glob) {
53
+ if (!glob) return null;
54
+ // Convert glob to regex:
55
+ // 1. Escape regex special characters (except * which we handle)
56
+ let re = glob
57
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape . + ^ $ { } ( ) | [ ] \
58
+ .replace(/\*\*/g, '___DOUBLE_STAR___')
59
+ .replace(/\*/g, '[^\\/]*')
60
+ .replace(/___DOUBLE_STAR___/g, '.*');
61
+ return new RegExp(`^${re}$`);
62
+ }
63
+
64
+ function walkSync(dir, fileList = [], recursive = true) {
65
+ const files = fs.readdirSync(dir);
66
+ for (const file of files) {
67
+ const filePath = path.join(dir, file);
68
+ if (fs.statSync(filePath).isDirectory()) {
69
+ if (recursive) walkSync(filePath, fileList, recursive);
70
+ } else {
71
+ fileList.push(filePath);
72
+ }
73
+ }
74
+ return fileList;
75
+ }
76
+
77
+ async function run() {
78
+ const args = process.argv.slice(2);
79
+ if (args.length < 1 || args.includes('-h') || args.includes('--help')) {
80
+ console.log(helpMessage);
81
+ return;
82
+ }
83
+
84
+ const action = args[0].toLowerCase();
85
+ if (action !== 'anonymize' && action !== 'deanonymize') {
86
+ console.log(`Error: Unknown action: ${action}\n${helpMessage}`);
87
+ return;
88
+ }
89
+
90
+ // Parse simple flags
91
+ let explicitFilePath = null;
92
+ let explicitDirPath = null;
93
+ let configPath = '.phrases.json';
94
+ let outDir = null;
95
+ let pattern = null;
96
+ let recursive = false;
97
+ let overwrite = false;
98
+ let inputArg = null;
99
+
100
+ for (let i = 1; i < args.length; i++) {
101
+ const arg = args[i];
102
+ if (arg === '-f' || arg === '--file') {
103
+ explicitFilePath = args[++i];
104
+ } else if (arg === '-d' || arg === '--dir') {
105
+ explicitDirPath = args[++i];
106
+ } else if (arg === '-c' || arg === '--config') {
107
+ configPath = args[++i];
108
+ } else if (arg === '-o' || arg === '--out-dir') {
109
+ outDir = args[++i];
110
+ } else if (arg === '-p' || arg === '--pattern') {
111
+ pattern = args[++i];
112
+ } else if (arg === '-r' || arg === '--recursive') {
113
+ recursive = true;
114
+ } else if (arg === '--overwrite') {
115
+ overwrite = true;
116
+ } else if (!inputArg) {
117
+ inputArg = arg;
118
+ }
119
+ }
120
+
121
+ const anonymizer = new Anonymizer(configPath);
122
+ const patternRegex = globToRegex(pattern);
123
+
124
+ // Determine what to process
125
+ if (explicitDirPath || (inputArg && isExistingDir(inputArg))) {
126
+ const dirToProcess = explicitDirPath || inputArg;
127
+ const files = walkSync(dirToProcess, [], recursive || !!explicitDirPath);
128
+
129
+ for (const filePath of files) {
130
+ // Relative path for pattern matching and out-dir structure
131
+ const relativePath = path.relative(dirToProcess, filePath).replace(/\\/g, '/');
132
+
133
+ if (patternRegex && !patternRegex.test(relativePath) && !patternRegex.test(path.basename(filePath))) {
134
+ continue;
135
+ }
136
+
137
+ const text = fs.readFileSync(filePath, 'utf8');
138
+ const result = action === 'anonymize' ? anonymizer.anonymize(text) : anonymizer.deanonymize(text);
139
+
140
+ if (outDir) {
141
+ const targetPath = path.join(outDir, relativePath);
142
+ fs.mkdirSync(path.dirname(targetPath), { recursive: true });
143
+ fs.writeFileSync(targetPath, result, 'utf8');
144
+ } else if (overwrite) {
145
+ fs.writeFileSync(filePath, result, 'utf8');
146
+ } else {
147
+ console.log(`--- File: ${filePath} ---`);
148
+ process.stdout.write(result + '\n');
149
+ }
150
+ }
151
+ } else {
152
+ // Single file or direct text or stdin
153
+ let text = '';
154
+ let isFile = false;
155
+ let targetPath = explicitFilePath;
156
+
157
+ if (explicitFilePath) {
158
+ if (isExistingFile(explicitFilePath)) {
159
+ text = fs.readFileSync(explicitFilePath, 'utf8');
160
+ isFile = true;
161
+ } else {
162
+ console.error(`Error: File not found at ${explicitFilePath}`);
163
+ return;
164
+ }
165
+ } else if (inputArg && inputArg !== '-') {
166
+ if (isExistingFile(inputArg)) {
167
+ text = fs.readFileSync(inputArg, 'utf8');
168
+ isFile = true;
169
+ targetPath = inputArg;
170
+ } else {
171
+ text = inputArg;
172
+ }
173
+ } else {
174
+ text = await readFromStdin();
175
+ if (text === null) {
176
+ console.log('Error: No input provided and stdin is a terminal.\n' + helpMessage);
177
+ return;
178
+ }
179
+ }
180
+
181
+ const result = action === 'anonymize' ? anonymizer.anonymize(text) : anonymizer.deanonymize(text);
182
+
183
+ if (isFile && outDir) {
184
+ const fileName = path.basename(targetPath);
185
+ const fullOutDir = path.resolve(process.cwd(), outDir);
186
+ fs.mkdirSync(fullOutDir, { recursive: true });
187
+ fs.writeFileSync(path.join(fullOutDir, fileName), result, 'utf8');
188
+ } else if (isFile && overwrite) {
189
+ fs.writeFileSync(targetPath, result, 'utf8');
190
+ } else {
191
+ process.stdout.write(result + '\n');
192
+ }
193
+ }
194
+ }
195
+
196
+ module.exports = { run };
197
+
198
+ if (require.main === module) {
199
+ run().catch(err => {
200
+ console.error(`An unexpected error occurred: ${err.message}`);
201
+ process.exit(1);
202
+ });
203
+ }
package/lib/processor.js CHANGED
@@ -1,212 +1,79 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
3
 
4
- const PHRASES_FILE = '.phrases';
5
- const HISTORY_FILE = '.repo-anon-history.json';
6
- const WORD_CHAR_CLASS = 'A-Za-z0-9_';
7
-
8
- function escapeRegExp(value) {
9
- return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
4
+ function escapeRegExp(string) {
5
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
10
6
  }
11
7
 
12
- function normalizePhrases(phrases) {
13
- return Object.entries(phrases).map(([original, config]) => {
14
- if (typeof config === 'string') {
15
- return { original, placeholder: config, wordReplace: false };
16
- }
17
-
18
- return {
19
- original,
20
- placeholder: config.placeholder,
21
- wordReplace: config.wordReplace === true,
22
- };
23
- });
24
- }
25
-
26
- function loadPhrases() {
27
- if (!fs.existsSync(PHRASES_FILE)) {
28
- throw new Error('No .phrases file found.');
8
+ class Anonymizer {
9
+ constructor(configPath = '.phrases.json') {
10
+ // Normalize path for Windows compatibility
11
+ this.configPath = path.isAbsolute(configPath) ? configPath : path.resolve(process.cwd(), configPath);
12
+ this.loadConfig();
29
13
  }
30
- return JSON.parse(fs.readFileSync(PHRASES_FILE, 'utf8'));
31
- }
32
14
 
33
- function buildPattern(search, wordReplace) {
34
- const escapedSearch = escapeRegExp(search);
35
- return wordReplace
36
- ? new RegExp(`(^|[^${WORD_CHAR_CLASS}])(${escapedSearch})(?=[^${WORD_CHAR_CLASS}]|$)`, 'g')
37
- : new RegExp(escapedSearch, 'g');
38
- }
39
-
40
- function replaceWithCount(content, search, replace, wordReplace, maxReplacements = Number.POSITIVE_INFINITY) {
41
- if (!search || !replace || maxReplacements <= 0 || !content.includes(search)) {
42
- return { content, count: 0 };
43
- }
44
-
45
- const pattern = buildPattern(search, wordReplace);
46
- let count = 0;
47
-
48
- const updatedContent = wordReplace
49
- ? content.replace(pattern, (match, prefix) => {
50
- if (count >= maxReplacements) {
51
- return match;
15
+ loadConfig() {
16
+ try {
17
+ if (!fs.existsSync(this.configPath)) {
18
+ // Fallback: check if the file is in the same directory as this script
19
+ const fallbackPath = path.resolve(__dirname, path.basename(this.configPath));
20
+ if (fs.existsSync(fallbackPath)) {
21
+ this.configPath = fallbackPath;
22
+ } else {
23
+ this.mappings = {};
24
+ return;
25
+ }
52
26
  }
53
- count += 1;
54
- return `${prefix}${replace}`;
55
- })
56
- : content.replace(pattern, (match) => {
57
- if (count >= maxReplacements) {
58
- return match;
59
- }
60
- count += 1;
61
- return replace;
62
- });
63
-
64
- return { content: updatedContent, count };
65
- }
66
-
67
- function processContent(content, phrases, reverse = false, options = {}) {
68
- let changed = false;
69
- let newContent = content;
70
- const replacements = [];
71
-
72
- const entries = normalizePhrases(phrases).sort(
73
- (leftEntry, rightEntry) => {
74
- const left = reverse ? leftEntry.placeholder : leftEntry.original;
75
- const right = reverse ? rightEntry.placeholder : rightEntry.original;
76
- return right.length - left.length;
77
- }
78
- );
79
-
80
- for (const { original, placeholder, wordReplace } of entries) {
81
- const search = reverse ? placeholder : original;
82
- const replace = reverse ? original : placeholder;
83
- const result = replaceWithCount(newContent, search, replace, wordReplace);
84
-
85
- if (result.count > 0) {
86
- newContent = result.content;
87
- changed = true;
88
- if (options.trackHistory === true) {
89
- replacements.push({
90
- search,
91
- replace,
92
- wordReplace,
93
- count: result.count,
94
- });
27
+ const rawData = fs.readFileSync(this.configPath, 'utf8');
28
+ if (!rawData.trim()) {
29
+ this.mappings = {};
30
+ return;
95
31
  }
32
+ const data = JSON.parse(rawData);
33
+ this.mappings = data.mappings || {};
34
+ } catch (err) {
35
+ console.error(`Warning: Could not load config from ${this.configPath}: ${err.message}`);
36
+ this.mappings = {};
96
37
  }
97
38
  }
98
39
 
99
- if (!changed) {
100
- return null;
101
- }
102
-
103
- if (options.trackHistory === true) {
104
- return { content: newContent, replacements };
105
- }
106
-
107
- return newContent;
108
- }
109
-
110
- function applyReverseReplacementHistory(content, replacements = []) {
111
- let changed = false;
112
- let newContent = content;
113
-
114
- for (let index = replacements.length - 1; index >= 0; index -= 1) {
115
- const event = replacements[index];
116
- const result = replaceWithCount(
117
- newContent,
118
- event.replace,
119
- event.search,
120
- event.wordReplace,
121
- event.count
122
- );
123
-
124
- if (result.count > 0) {
125
- newContent = result.content;
126
- changed = true;
40
+ anonymize(text) {
41
+ if (!text) return text;
42
+ let result = text;
43
+
44
+ // Sort keys by length descending to match longest phrases first
45
+ const sortedKeys = Object.keys(this.mappings).sort((a, b) => b.length - a.length);
46
+
47
+ for (const key of sortedKeys) {
48
+ const escapedKey = escapeRegExp(key);
49
+ const regex = new RegExp(escapedKey, 'g');
50
+ result = result.replace(regex, this.mappings[key]);
127
51
  }
52
+
53
+ return result;
128
54
  }
129
55
 
130
- return changed ? newContent : null;
131
- }
132
-
133
- function loadHistory() {
134
- if (!fs.existsSync(HISTORY_FILE)) {
135
- return null;
136
- }
137
-
138
- const raw = JSON.parse(fs.readFileSync(HISTORY_FILE, 'utf8'));
139
- return raw && typeof raw === 'object' ? raw : null;
140
- }
141
-
142
- function saveHistory(history) {
143
- fs.writeFileSync(HISTORY_FILE, JSON.stringify(history, null, 2), 'utf8');
144
- }
145
-
146
- async function walk(dir, callback) {
147
- const files = fs.readdirSync(dir);
148
- for (const file of files) {
149
- if (['node_modules', '.git', '.phrases', HISTORY_FILE, 'package.json', 'package-lock.json', 'bin', 'tests', '.gitlab-ci.yml'].includes(file)) continue;
150
- const filePath = path.join(dir, file);
151
- if (fs.statSync(filePath).isDirectory()) {
152
- await walk(filePath, callback);
153
- } else {
154
- callback(filePath);
56
+ deanonymize(text) {
57
+ if (!text) return text;
58
+ let result = text;
59
+
60
+ // Reverse mappings: placeholder -> original
61
+ const reverseMappings = Object.entries(this.mappings).reduce((acc, [key, value]) => {
62
+ acc[value] = key;
63
+ return acc;
64
+ }, {});
65
+
66
+ // Sort values (placeholders) by length descending
67
+ const sortedPlaceholders = Object.keys(reverseMappings).sort((a, b) => b.length - a.length);
68
+
69
+ for (const placeholder of sortedPlaceholders) {
70
+ const escapedPlaceholder = escapeRegExp(placeholder);
71
+ const regex = new RegExp(escapedPlaceholder, 'g');
72
+ result = result.replace(regex, reverseMappings[placeholder]);
155
73
  }
74
+
75
+ return result;
156
76
  }
157
77
  }
158
78
 
159
- async function anonymize() {
160
- const phrases = loadPhrases();
161
- const history = {
162
- version: 1,
163
- createdAt: new Date().toISOString(),
164
- files: {},
165
- };
166
-
167
- walk(process.cwd(), (filePath) => {
168
- const content = fs.readFileSync(filePath, 'utf8');
169
- const result = processContent(content, phrases, false, { trackHistory: true });
170
-
171
- if (result) {
172
- fs.writeFileSync(filePath, result.content, 'utf8');
173
- history.files[filePath] = result.replacements;
174
- console.log(`Updated: ${filePath}`);
175
- }
176
- });
177
-
178
- saveHistory(history);
179
- console.log(`Saved replacement history to: ${HISTORY_FILE}`);
180
- }
181
-
182
- async function deanonymize() {
183
- const phrases = loadPhrases();
184
- const history = loadHistory();
185
-
186
- walk(process.cwd(), (filePath) => {
187
- const content = fs.readFileSync(filePath, 'utf8');
188
-
189
- let newContent = null;
190
- if (history && history.files && Array.isArray(history.files[filePath])) {
191
- newContent = applyReverseReplacementHistory(content, history.files[filePath]);
192
- }
193
-
194
- const fallbackInput = newContent || content;
195
- const fallbackContent = processContent(fallbackInput, phrases, true);
196
-
197
- const finalContent = fallbackContent || newContent;
198
- if (finalContent) {
199
- fs.writeFileSync(filePath, finalContent, 'utf8');
200
- console.log(`Updated: ${filePath}`);
201
- }
202
- });
203
- }
204
-
205
- module.exports = {
206
- anonymize,
207
- deanonymize,
208
- processContent,
209
- replaceWithCount,
210
- applyReverseReplacementHistory,
211
- HISTORY_FILE,
212
- };
79
+ module.exports = Anonymizer;
package/package.json CHANGED
@@ -1,19 +1,32 @@
1
1
  {
2
2
  "name": "repo-anon",
3
- "version": "0.2.0",
3
+ "version": "1.0.0",
4
4
  "description": "CLI tool to anonymize/de-anonymize repositories.",
5
- "main": "index.js",
5
+ "main": "bin/repo-anon.js",
6
6
  "bin": {
7
7
  "repo-anon": "bin/repo-anon.js"
8
8
  },
9
- "dependencies": {
10
- "commander": "^13.1.0"
11
- },
12
9
  "scripts": {
13
- "test": "jest",
10
+ "test": "jest --coverage",
14
11
  "lint": "eslint .",
15
12
  "start": "node bin/repo-anon.js"
16
13
  },
14
+ "jest": {
15
+ "coverageThreshold": {
16
+ "global": {
17
+ "branches": 80,
18
+ "functions": 80,
19
+ "lines": 80,
20
+ "statements": 80
21
+ }
22
+ },
23
+ "coverageReporters": [
24
+ "text",
25
+ "lcov",
26
+ "clover",
27
+ "cobertura"
28
+ ]
29
+ },
17
30
  "keywords": [],
18
31
  "author": "",
19
32
  "license": "ISC",
@@ -0,0 +1,155 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const Anonymizer = require('../lib/processor');
4
+ const { run } = require('../bin/repo-anon.js');
5
+
6
+ // Mock fs and Anonymizer
7
+ jest.mock('fs');
8
+ jest.mock('../lib/processor');
9
+
10
+ describe('CLI (bin/repo-anon.js)', () => {
11
+ let originalArgv;
12
+ let consoleLogSpy;
13
+ let consoleErrorSpy;
14
+ let stdoutWriteSpy;
15
+ let exitSpy;
16
+
17
+ beforeEach(() => {
18
+ originalArgv = process.argv;
19
+ consoleLogSpy = jest.spyOn(console, 'log').mockImplementation();
20
+ consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation();
21
+ stdoutWriteSpy = jest.spyOn(process.stdout, 'write').mockImplementation();
22
+ exitSpy = jest.spyOn(process, 'exit').mockImplementation();
23
+
24
+ // Reset mocks
25
+ jest.clearAllMocks();
26
+
27
+ // Default Anonymizer mock behavior
28
+ Anonymizer.prototype.anonymize.mockImplementation(text => `anon(${text})`);
29
+ Anonymizer.prototype.deanonymize.mockImplementation(text => `deanon(${text})`);
30
+ });
31
+
32
+ afterEach(() => {
33
+ process.argv = originalArgv;
34
+ consoleLogSpy.mockRestore();
35
+ consoleErrorSpy.mockRestore();
36
+ stdoutWriteSpy.mockRestore();
37
+ exitSpy.mockRestore();
38
+ });
39
+
40
+ const runCLI = async (args) => {
41
+ process.argv = ['node', 'repo-anon.js', ...args];
42
+ await run();
43
+ };
44
+
45
+ it('should show help message when no arguments are provided', async () => {
46
+ await runCLI([]);
47
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('Usage:'));
48
+ });
49
+
50
+ it('should show error for unknown action', async () => {
51
+ await runCLI(['invalid-action']);
52
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('Error: Unknown action'));
53
+ });
54
+
55
+ it('should anonymize direct text input', async () => {
56
+ fs.existsSync.mockReturnValue(false); // Not a file
57
+ await runCLI(['anonymize', 'hello world']);
58
+ expect(stdoutWriteSpy).toHaveBeenCalledWith('anon(hello world)\n');
59
+ });
60
+
61
+ it('should anonymize a single file', async () => {
62
+ const filePath = 'test.txt';
63
+ const fileContent = 'file content';
64
+ fs.existsSync.mockReturnValue(true);
65
+ fs.lstatSync.mockReturnValue({ isFile: () => true, isDirectory: () => false });
66
+ fs.readFileSync.mockReturnValue(fileContent);
67
+
68
+ await runCLI(['anonymize', filePath]);
69
+
70
+ expect(fs.readFileSync).toHaveBeenCalledWith(filePath, 'utf8');
71
+ expect(stdoutWriteSpy).toHaveBeenCalledWith('anon(file content)\n');
72
+ });
73
+
74
+ it('should anonymize a directory recursively', async () => {
75
+ const dirPath = 'src';
76
+ fs.existsSync.mockReturnValue(true);
77
+ fs.lstatSync.mockImplementation((p) => ({
78
+ isFile: () => p.endsWith('.js'),
79
+ isDirectory: () => p === dirPath
80
+ }));
81
+ fs.readdirSync.mockReturnValue(['file1.js', 'subdir']);
82
+ fs.statSync.mockImplementation((p) => ({
83
+ isDirectory: () => p.endsWith('subdir')
84
+ }));
85
+ // Mock second call for subdir
86
+ fs.readdirSync.mockReturnValueOnce(['file1.js', 'subdir']).mockReturnValueOnce(['file2.js']);
87
+ fs.readFileSync.mockReturnValue('content');
88
+
89
+ await runCLI(['anonymize', '-d', dirPath, '-r']);
90
+
91
+ expect(fs.readFileSync).toHaveBeenCalledTimes(2);
92
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('--- File:'));
93
+ expect(stdoutWriteSpy).toHaveBeenCalledWith(expect.stringContaining('anon(content)'));
94
+ });
95
+
96
+ it('should overwrite file when --overwrite is used', async () => {
97
+ const filePath = 'test.txt';
98
+ fs.existsSync.mockReturnValue(true);
99
+ fs.lstatSync.mockReturnValue({ isFile: () => true, isDirectory: () => false });
100
+ fs.readFileSync.mockReturnValue('original');
101
+
102
+ await runCLI(['anonymize', filePath, '--overwrite']);
103
+
104
+ expect(fs.writeFileSync).toHaveBeenCalledWith(filePath, 'anon(original)', 'utf8');
105
+ });
106
+
107
+ it('should save to out-dir when -o is used', async () => {
108
+ const filePath = 'test.txt';
109
+ const outDir = 'output';
110
+ fs.existsSync.mockReturnValue(true);
111
+ fs.lstatSync.mockReturnValue({ isFile: () => true, isDirectory: () => false });
112
+ fs.readFileSync.mockReturnValue('original');
113
+
114
+ await runCLI(['anonymize', filePath, '-o', outDir]);
115
+
116
+ expect(fs.mkdirSync).toHaveBeenCalled();
117
+ expect(fs.writeFileSync).toHaveBeenCalledWith(expect.stringContaining(path.join(outDir, 'test.txt')), 'anon(original)', 'utf8');
118
+ });
119
+
120
+ it('should filter files by pattern', async () => {
121
+ const dirPath = 'src';
122
+ fs.existsSync.mockReturnValue(true);
123
+ fs.lstatSync.mockImplementation((p) => ({
124
+ isFile: () => true,
125
+ isDirectory: () => p === dirPath
126
+ }));
127
+ fs.readdirSync.mockReturnValue(['file1.js', 'file2.txt']);
128
+ fs.statSync.mockReturnValue({ isDirectory: () => false });
129
+ fs.readFileSync.mockReturnValue('content');
130
+
131
+ await runCLI(['anonymize', '-d', dirPath, '-p', '*.js']);
132
+
133
+ // Should only process file1.js
134
+ expect(fs.readFileSync).toHaveBeenCalledTimes(1);
135
+ expect(fs.readFileSync).toHaveBeenCalledWith(expect.stringContaining('file1.js'), 'utf8');
136
+ });
137
+
138
+ it('should deanonymize text input', async () => {
139
+ fs.existsSync.mockReturnValue(false);
140
+ await runCLI(['deanonymize', 'anon(hello)']);
141
+ expect(stdoutWriteSpy).toHaveBeenCalledWith('deanon(anon(hello))\n');
142
+ });
143
+
144
+ it('should handle explicit file path with --file', async () => {
145
+ const filePath = 'explicit.txt';
146
+ fs.existsSync.mockReturnValue(true);
147
+ fs.lstatSync.mockReturnValue({ isFile: () => true, isDirectory: () => false });
148
+ fs.readFileSync.mockReturnValue('content');
149
+
150
+ await runCLI(['anonymize', '--file', filePath]);
151
+
152
+ expect(fs.readFileSync).toHaveBeenCalledWith(filePath, 'utf8');
153
+ expect(stdoutWriteSpy).toHaveBeenCalledWith('anon(content)\n');
154
+ });
155
+ });
@@ -1,123 +1,129 @@
1
- const {
2
- processContent,
3
- applyReverseReplacementHistory,
4
- } = require('../lib/processor');
5
-
6
- describe('repo-anon processor', () => {
7
- test('should anonymize content', () => {
8
- const phrases = { company: 'anon' };
9
- const content = 'my company is here';
10
- const expected = 'my anon is here';
11
- const result = processContent(content, phrases);
12
- expect(result).toBe(expected);
13
- });
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const Anonymizer = require('../lib/processor');
14
4
 
15
- test('should deanonymize content', () => {
16
- const phrases = { company: 'anon' };
17
- const content = 'my anon is here';
18
- const expected = 'my company is here';
19
- const result = processContent(content, phrases, true);
20
- expect(result).toBe(expected);
21
- });
5
+ jest.mock('fs');
22
6
 
23
- test('should replace inside words by default', () => {
24
- const phrases = { th: 'BB' };
25
- const content = 'th with other';
26
- const expected = 'BB wiBB oBBer';
27
- const result = processContent(content, phrases);
28
- expect(result).toBe(expected);
29
- });
7
+ describe('Anonymizer', () => {
8
+ const mockConfig = {
9
+ mappings: {
10
+ 'Acme Corp': 'COMPANY_A',
11
+ 'John Doe': 'USER_1',
12
+ 'secret-key-123': 'SECRET_KEY'
13
+ }
14
+ };
30
15
 
31
- test('should only replace standalone words when wordReplace is true', () => {
32
- const phrases = {
33
- ck: {
34
- placeholder: 'bb',
35
- wordReplace: true,
36
- },
37
- };
38
- const content = 'ck back sack (ck) ck.';
39
- const expected = 'bb back sack (bb) bb.';
40
- const result = processContent(content, phrases);
41
- expect(result).toBe(expected);
16
+ beforeEach(() => {
17
+ jest.clearAllMocks();
42
18
  });
43
19
 
44
- test('should only deanonymize standalone placeholders when wordReplace is true', () => {
45
- const phrases = {
46
- ck: {
47
- placeholder: 'bb',
48
- wordReplace: true,
49
- },
50
- };
51
- const content = 'bb babb stubb (bb) bb.';
52
- const expected = 'ck babb stubb (ck) ck.';
53
- const result = processContent(content, phrases, true);
54
- expect(result).toBe(expected);
55
- });
20
+ describe('constructor and loadConfig', () => {
21
+ it('should load config from default path if it exists', () => {
22
+ fs.existsSync.mockReturnValue(true);
23
+ fs.readFileSync.mockReturnValue(JSON.stringify(mockConfig));
24
+
25
+ const anonymizer = new Anonymizer();
26
+ expect(anonymizer.mappings).toEqual(mockConfig.mappings);
27
+ });
56
28
 
57
- test('should return null if no change', () => {
58
- const phrases = { company: 'anon' };
59
- const content = 'nothing here';
60
- const result = processContent(content, phrases);
61
- expect(result).toBeNull();
62
- });
29
+ it('should use fallback path if default path does not exist', () => {
30
+ fs.existsSync.mockImplementation((p) => p.includes('fallback'));
31
+ fs.readFileSync.mockReturnValue(JSON.stringify(mockConfig));
32
+
33
+ // We need to trigger the fallback logic.
34
+ // The code checks if configPath exists, if not it tries fallbackPath.
35
+ fs.existsSync.mockReturnValueOnce(false).mockReturnValueOnce(true);
36
+
37
+ const anonymizer = new Anonymizer('missing.json');
38
+ expect(anonymizer.mappings).toEqual(mockConfig.mappings);
39
+ });
40
+
41
+ it('should handle empty or missing config file', () => {
42
+ fs.existsSync.mockReturnValue(true);
43
+ fs.readFileSync.mockReturnValue('');
44
+
45
+ const anonymizer = new Anonymizer();
46
+ expect(anonymizer.mappings).toEqual({});
47
+ });
63
48
 
64
- test('should track ordered replacements during anonymize', () => {
65
- const phrases = {
66
- company: 'ANON_COMPANY',
67
- project: 'ANON_PROJECT',
68
- };
69
- const content = 'company project company';
70
-
71
- const result = processContent(content, phrases, false, { trackHistory: true });
72
-
73
- expect(result.content).toBe('ANON_COMPANY ANON_PROJECT ANON_COMPANY');
74
- expect(result.replacements).toEqual([
75
- {
76
- search: 'company',
77
- replace: 'ANON_COMPANY',
78
- wordReplace: false,
79
- count: 2,
80
- },
81
- {
82
- search: 'project',
83
- replace: 'ANON_PROJECT',
84
- wordReplace: false,
85
- count: 1,
86
- },
87
- ]);
49
+ it('should handle invalid JSON in config file', () => {
50
+ fs.existsSync.mockReturnValue(true);
51
+ fs.readFileSync.mockReturnValue('invalid json');
52
+ const consoleSpy = jest.spyOn(console, 'error').mockImplementation();
53
+
54
+ const anonymizer = new Anonymizer();
55
+ expect(anonymizer.mappings).toEqual({});
56
+ expect(consoleSpy).toHaveBeenCalled();
57
+ consoleSpy.mockRestore();
58
+ });
88
59
  });
89
60
 
90
- test('should replay replacement history in reverse using recorded counts', () => {
91
- const content = 'ANON_COMPANY x ANON_COMPANY ANON_COMPANY';
92
- const replacements = [
93
- {
94
- search: 'company',
95
- replace: 'ANON_COMPANY',
96
- wordReplace: false,
97
- count: 2,
98
- },
99
- ];
61
+ describe('anonymize', () => {
62
+ let anonymizer;
63
+
64
+ beforeEach(() => {
65
+ fs.existsSync.mockReturnValue(true);
66
+ fs.readFileSync.mockReturnValue(JSON.stringify(mockConfig));
67
+ anonymizer = new Anonymizer();
68
+ });
100
69
 
101
- const restored = applyReverseReplacementHistory(content, replacements);
70
+ it('should return same text if null or empty', () => {
71
+ expect(anonymizer.anonymize(null)).toBeNull();
72
+ expect(anonymizer.anonymize('')).toBe('');
73
+ });
102
74
 
103
- expect(restored).toBe('company x company ANON_COMPANY');
75
+ it('should replace phrases with placeholders', () => {
76
+ const input = 'Welcome to Acme Corp, John Doe!';
77
+ const expected = 'Welcome to COMPANY_A, USER_1!';
78
+ expect(anonymizer.anonymize(input)).toBe(expected);
79
+ });
80
+
81
+ it('should handle overlapping phrases by length (longest first)', () => {
82
+ anonymizer.mappings = {
83
+ 'Acme': 'SHORT',
84
+ 'Acme Corp': 'LONG'
85
+ };
86
+ const input = 'Welcome to Acme Corp';
87
+ expect(anonymizer.anonymize(input)).toBe('Welcome to LONG');
88
+ });
89
+
90
+ it('should escape regex special characters in phrases', () => {
91
+ anonymizer.mappings = {
92
+ 'user.name': 'USER_NAME'
93
+ };
94
+ const input = 'The user.name is here';
95
+ expect(anonymizer.anonymize(input)).toBe('The USER_NAME is here');
96
+ });
104
97
  });
105
98
 
106
- test('should support additional deanonymization after history replay', () => {
107
- const phrases = { company: 'ANON_COMPANY' };
108
- const content = 'ANON_COMPANY x ANON_COMPANY ANON_COMPANY';
109
- const replacements = [
110
- {
111
- search: 'company',
112
- replace: 'ANON_COMPANY',
113
- wordReplace: false,
114
- count: 2,
115
- },
116
- ];
117
-
118
- const restoredFromHistory = applyReverseReplacementHistory(content, replacements);
119
- const fullyRestored = processContent(restoredFromHistory, phrases, true);
120
-
121
- expect(fullyRestored).toBe('company x company company');
99
+ describe('deanonymize', () => {
100
+ let anonymizer;
101
+
102
+ beforeEach(() => {
103
+ fs.existsSync.mockReturnValue(true);
104
+ fs.readFileSync.mockReturnValue(JSON.stringify(mockConfig));
105
+ anonymizer = new Anonymizer();
106
+ });
107
+
108
+ it('should return same text if null or empty', () => {
109
+ expect(anonymizer.deanonymize(null)).toBeNull();
110
+ expect(anonymizer.deanonymize('')).toBe('');
111
+ });
112
+
113
+ it('should replace placeholders with original phrases', () => {
114
+ const input = 'Welcome to COMPANY_A, USER_1!';
115
+ const expected = 'Welcome to Acme Corp, John Doe!';
116
+ expect(anonymizer.deanonymize(input)).toBe(expected);
117
+ });
118
+
119
+ it('should handle overlapping placeholders by length', () => {
120
+ anonymizer.mappings = {
121
+ 'Original': 'PLACEHOLDER',
122
+ 'Something Else': 'PLACEHOLDER_LONG'
123
+ };
124
+ const input = 'Testing PLACEHOLDER_LONG and PLACEHOLDER';
125
+ const expected = 'Testing Something Else and Original';
126
+ expect(anonymizer.deanonymize(input)).toBe(expected);
127
+ });
122
128
  });
123
129
  });