repo-anon 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,8 @@ name: Publish to NPM
3
3
  on:
4
4
  release:
5
5
  types: [published]
6
+ # add manual trigger for testing
7
+ workflow_dispatch:
6
8
 
7
9
  jobs:
8
10
  build:
@@ -13,7 +15,7 @@ jobs:
13
15
  with:
14
16
  node-version: '20'
15
17
  registry-url: 'https://registry.npmjs.org'
16
- - run: npm ci
18
+ - run: npm install
17
19
  - run: npm test
18
20
  - run: npm publish
19
21
  env:
package/README.md CHANGED
@@ -5,7 +5,8 @@ A Node.js CLI tool to anonymize and de-anonymize files in a repository based on
5
5
  ## Features
6
6
 
7
7
  - **Anonymize**: Replaces sensitive phrases with configured placeholders.
8
- - **De-anonymize**: Restores original phrases from placeholders.
8
+ - **Replacement History**: Writes `.repo-anon-history.json` with ordered per-file replacement events (including counts).
9
+ - **De-anonymize**: Replays replacement history in reverse order, then runs a full phrase-based pass to also restore new placeholder usage added later.
9
10
  - **Recursive**: Traverses through all project directories (ignoring `node_modules`, `.git`, etc.).
10
11
  - **CI/CD Ready**: Includes GitLab pipeline configuration for publishing to the GitLab package registry.
11
12
 
@@ -23,23 +24,33 @@ npm install -g @your_gitlab_namespace/repo-anon
23
24
 
24
25
  ```json
25
26
  {
26
- "CompanyA": "ANON_COMPANY_A",
27
- "BrandX": "ANON_BRAND_X"
27
+ "CompanyA": "ANON_COMPANY_A",
28
+ "ck": {
29
+ "placeholder": "bb",
30
+ "wordReplace": true
31
+ }
28
32
  }
29
33
  ```
30
34
 
35
+ String values keep the old behavior and replace matches anywhere inside a word.
36
+ Object values let you opt into whole-word matching with `wordReplace: true`.
37
+
31
38
  2. Run the anonymization command:
32
39
 
33
40
  ```bash
34
41
  repo-anon anonymize
35
42
  ```
36
43
 
44
+ This also writes `.repo-anon-history.json` in the working directory.
45
+
37
46
  3. Revert changes (if needed):
38
47
 
39
48
  ```bash
40
49
  repo-anon deanonymize
41
50
  ```
42
51
 
52
+ De-anonymization uses the history file first to reverse exact prior replacements in order, then applies phrase-based de-anonymization globally so newly introduced placeholders are also restored.
53
+
43
54
  ## Development
44
55
 
45
56
  - **Tests**: Run unit tests using `npm test`.
package/lib/processor.js CHANGED
@@ -2,6 +2,26 @@ const fs = require('fs');
2
2
  const path = require('path');
3
3
 
4
4
  const PHRASES_FILE = '.phrases';
5
+ const HISTORY_FILE = '.repo-anon-history.json';
6
+ const WORD_CHAR_CLASS = 'A-Za-z0-9_';
7
+
8
+ function escapeRegExp(value) {
9
+ return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
10
+ }
11
+
12
+ function normalizePhrases(phrases) {
13
+ return Object.entries(phrases).map(([original, config]) => {
14
+ if (typeof config === 'string') {
15
+ return { original, placeholder: config, wordReplace: false };
16
+ }
17
+
18
+ return {
19
+ original,
20
+ placeholder: config.placeholder,
21
+ wordReplace: config.wordReplace === true,
22
+ };
23
+ });
24
+ }
5
25
 
6
26
  function loadPhrases() {
7
27
  if (!fs.existsSync(PHRASES_FILE)) {
@@ -10,25 +30,123 @@ function loadPhrases() {
10
30
  return JSON.parse(fs.readFileSync(PHRASES_FILE, 'utf8'));
11
31
  }
12
32
 
13
- function processContent(content, phrases, reverse = false) {
33
+ function buildPattern(search, wordReplace) {
34
+ const escapedSearch = escapeRegExp(search);
35
+ return wordReplace
36
+ ? new RegExp(`(^|[^${WORD_CHAR_CLASS}])(${escapedSearch})(?=[^${WORD_CHAR_CLASS}]|$)`, 'g')
37
+ : new RegExp(escapedSearch, 'g');
38
+ }
39
+
40
+ function replaceWithCount(content, search, replace, wordReplace, maxReplacements = Number.POSITIVE_INFINITY) {
41
+ if (!search || !replace || maxReplacements <= 0 || !content.includes(search)) {
42
+ return { content, count: 0 };
43
+ }
44
+
45
+ const pattern = buildPattern(search, wordReplace);
46
+ let count = 0;
47
+
48
+ const updatedContent = wordReplace
49
+ ? content.replace(pattern, (match, prefix) => {
50
+ if (count >= maxReplacements) {
51
+ return match;
52
+ }
53
+ count += 1;
54
+ return `${prefix}${replace}`;
55
+ })
56
+ : content.replace(pattern, (match) => {
57
+ if (count >= maxReplacements) {
58
+ return match;
59
+ }
60
+ count += 1;
61
+ return replace;
62
+ });
63
+
64
+ return { content: updatedContent, count };
65
+ }
66
+
67
+ function processContent(content, phrases, reverse = false, options = {}) {
14
68
  let changed = false;
15
69
  let newContent = content;
70
+ const replacements = [];
16
71
 
17
- for (const [original, placeholder] of Object.entries(phrases)) {
72
+ const entries = normalizePhrases(phrases).sort(
73
+ (leftEntry, rightEntry) => {
74
+ const left = reverse ? leftEntry.placeholder : leftEntry.original;
75
+ const right = reverse ? rightEntry.placeholder : rightEntry.original;
76
+ return right.length - left.length;
77
+ }
78
+ );
79
+
80
+ for (const { original, placeholder, wordReplace } of entries) {
18
81
  const search = reverse ? placeholder : original;
19
82
  const replace = reverse ? original : placeholder;
20
- if (newContent.includes(search)) {
21
- newContent = newContent.split(search).join(replace);
83
+ const result = replaceWithCount(newContent, search, replace, wordReplace);
84
+
85
+ if (result.count > 0) {
86
+ newContent = result.content;
22
87
  changed = true;
88
+ if (options.trackHistory === true) {
89
+ replacements.push({
90
+ search,
91
+ replace,
92
+ wordReplace,
93
+ count: result.count,
94
+ });
95
+ }
23
96
  }
24
97
  }
98
+
99
+ if (!changed) {
100
+ return null;
101
+ }
102
+
103
+ if (options.trackHistory === true) {
104
+ return { content: newContent, replacements };
105
+ }
106
+
107
+ return newContent;
108
+ }
109
+
110
+ function applyReverseReplacementHistory(content, replacements = []) {
111
+ let changed = false;
112
+ let newContent = content;
113
+
114
+ for (let index = replacements.length - 1; index >= 0; index -= 1) {
115
+ const event = replacements[index];
116
+ const result = replaceWithCount(
117
+ newContent,
118
+ event.replace,
119
+ event.search,
120
+ event.wordReplace,
121
+ event.count
122
+ );
123
+
124
+ if (result.count > 0) {
125
+ newContent = result.content;
126
+ changed = true;
127
+ }
128
+ }
129
+
25
130
  return changed ? newContent : null;
26
131
  }
27
132
 
133
+ function loadHistory() {
134
+ if (!fs.existsSync(HISTORY_FILE)) {
135
+ return null;
136
+ }
137
+
138
+ const raw = JSON.parse(fs.readFileSync(HISTORY_FILE, 'utf8'));
139
+ return raw && typeof raw === 'object' ? raw : null;
140
+ }
141
+
142
+ function saveHistory(history) {
143
+ fs.writeFileSync(HISTORY_FILE, JSON.stringify(history, null, 2), 'utf8');
144
+ }
145
+
28
146
  async function walk(dir, callback) {
29
147
  const files = fs.readdirSync(dir);
30
148
  for (const file of files) {
31
- if (['node_modules', '.git', '.phrases', 'package.json', 'package-lock.json', 'bin', 'tests', '.gitlab-ci.yml'].includes(file)) continue;
149
+ if (['node_modules', '.git', '.phrases', HISTORY_FILE, 'package.json', 'package-lock.json', 'bin', 'tests', '.gitlab-ci.yml'].includes(file)) continue;
32
150
  const filePath = path.join(dir, file);
33
151
  if (fs.statSync(filePath).isDirectory()) {
34
152
  await walk(filePath, callback);
@@ -40,26 +158,55 @@ async function walk(dir, callback) {
40
158
 
41
159
  async function anonymize() {
42
160
  const phrases = loadPhrases();
161
+ const history = {
162
+ version: 1,
163
+ createdAt: new Date().toISOString(),
164
+ files: {},
165
+ };
166
+
43
167
  walk(process.cwd(), (filePath) => {
44
168
  const content = fs.readFileSync(filePath, 'utf8');
45
- const newContent = processContent(content, phrases);
46
- if (newContent) {
47
- fs.writeFileSync(filePath, newContent, 'utf8');
169
+ const result = processContent(content, phrases, false, { trackHistory: true });
170
+
171
+ if (result) {
172
+ fs.writeFileSync(filePath, result.content, 'utf8');
173
+ history.files[filePath] = result.replacements;
48
174
  console.log(`Updated: ${filePath}`);
49
175
  }
50
176
  });
177
+
178
+ saveHistory(history);
179
+ console.log(`Saved replacement history to: ${HISTORY_FILE}`);
51
180
  }
52
181
 
53
182
  async function deanonymize() {
54
183
  const phrases = loadPhrases();
184
+ const history = loadHistory();
185
+
55
186
  walk(process.cwd(), (filePath) => {
56
187
  const content = fs.readFileSync(filePath, 'utf8');
57
- const newContent = processContent(content, phrases, true);
58
- if (newContent) {
59
- fs.writeFileSync(filePath, newContent, 'utf8');
188
+
189
+ let newContent = null;
190
+ if (history && history.files && Array.isArray(history.files[filePath])) {
191
+ newContent = applyReverseReplacementHistory(content, history.files[filePath]);
192
+ }
193
+
194
+ const fallbackInput = newContent || content;
195
+ const fallbackContent = processContent(fallbackInput, phrases, true);
196
+
197
+ const finalContent = fallbackContent || newContent;
198
+ if (finalContent) {
199
+ fs.writeFileSync(filePath, finalContent, 'utf8');
60
200
  console.log(`Updated: ${filePath}`);
61
201
  }
62
202
  });
63
203
  }
64
204
 
65
- module.exports = { anonymize, deanonymize, processContent };
205
+ module.exports = {
206
+ anonymize,
207
+ deanonymize,
208
+ processContent,
209
+ replaceWithCount,
210
+ applyReverseReplacementHistory,
211
+ HISTORY_FILE,
212
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "repo-anon",
3
- "version": "0.1.2",
3
+ "version": "0.2.0",
4
4
  "description": "CLI tool to anonymize/de-anonymize repositories.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -11,7 +11,8 @@
11
11
  },
12
12
  "scripts": {
13
13
  "test": "jest",
14
- "lint": "eslint ."
14
+ "lint": "eslint .",
15
+ "start": "node bin/repo-anon.js"
15
16
  },
16
17
  "keywords": [],
17
18
  "author": "",
@@ -1,27 +1,123 @@
1
- const { processContent } = require('../lib/processor');
1
+ const {
2
+ processContent,
3
+ applyReverseReplacementHistory,
4
+ } = require('../lib/processor');
2
5
 
3
6
  describe('repo-anon processor', () => {
4
- test('should anonymize content', () => {
5
- const phrases = { "company": "anon" };
6
- const content = "my company is here";
7
- const expected = "my anon is here";
8
- const result = processContent(content, phrases);
9
- expect(result).toBe(expected);
10
- });
11
-
12
- test('should deanonymize content', () => {
13
- const phrases = { "company": "anon" };
14
- const content = "my anon is here";
15
- const expected = "my company is here";
16
- // reverse = true
17
- const result = processContent(content, phrases, true);
18
- expect(result).toBe(expected);
19
- });
20
-
21
- test('should return null if no change', () => {
22
- const phrases = { "company": "anon" };
23
- const content = "nothing here";
24
- const result = processContent(content, phrases);
25
- expect(result).toBeNull();
26
- });
7
+ test('should anonymize content', () => {
8
+ const phrases = { company: 'anon' };
9
+ const content = 'my company is here';
10
+ const expected = 'my anon is here';
11
+ const result = processContent(content, phrases);
12
+ expect(result).toBe(expected);
13
+ });
14
+
15
+ test('should deanonymize content', () => {
16
+ const phrases = { company: 'anon' };
17
+ const content = 'my anon is here';
18
+ const expected = 'my company is here';
19
+ const result = processContent(content, phrases, true);
20
+ expect(result).toBe(expected);
21
+ });
22
+
23
+ test('should replace inside words by default', () => {
24
+ const phrases = { th: 'BB' };
25
+ const content = 'th with other';
26
+ const expected = 'BB wiBB oBBer';
27
+ const result = processContent(content, phrases);
28
+ expect(result).toBe(expected);
29
+ });
30
+
31
+ test('should only replace standalone words when wordReplace is true', () => {
32
+ const phrases = {
33
+ ck: {
34
+ placeholder: 'bb',
35
+ wordReplace: true,
36
+ },
37
+ };
38
+ const content = 'ck back sack (ck) ck.';
39
+ const expected = 'bb back sack (bb) bb.';
40
+ const result = processContent(content, phrases);
41
+ expect(result).toBe(expected);
42
+ });
43
+
44
+ test('should only deanonymize standalone placeholders when wordReplace is true', () => {
45
+ const phrases = {
46
+ ck: {
47
+ placeholder: 'bb',
48
+ wordReplace: true,
49
+ },
50
+ };
51
+ const content = 'bb babb stubb (bb) bb.';
52
+ const expected = 'ck babb stubb (ck) ck.';
53
+ const result = processContent(content, phrases, true);
54
+ expect(result).toBe(expected);
55
+ });
56
+
57
+ test('should return null if no change', () => {
58
+ const phrases = { company: 'anon' };
59
+ const content = 'nothing here';
60
+ const result = processContent(content, phrases);
61
+ expect(result).toBeNull();
62
+ });
63
+
64
+ test('should track ordered replacements during anonymize', () => {
65
+ const phrases = {
66
+ company: 'ANON_COMPANY',
67
+ project: 'ANON_PROJECT',
68
+ };
69
+ const content = 'company project company';
70
+
71
+ const result = processContent(content, phrases, false, { trackHistory: true });
72
+
73
+ expect(result.content).toBe('ANON_COMPANY ANON_PROJECT ANON_COMPANY');
74
+ expect(result.replacements).toEqual([
75
+ {
76
+ search: 'company',
77
+ replace: 'ANON_COMPANY',
78
+ wordReplace: false,
79
+ count: 2,
80
+ },
81
+ {
82
+ search: 'project',
83
+ replace: 'ANON_PROJECT',
84
+ wordReplace: false,
85
+ count: 1,
86
+ },
87
+ ]);
88
+ });
89
+
90
+ test('should replay replacement history in reverse using recorded counts', () => {
91
+ const content = 'ANON_COMPANY x ANON_COMPANY ANON_COMPANY';
92
+ const replacements = [
93
+ {
94
+ search: 'company',
95
+ replace: 'ANON_COMPANY',
96
+ wordReplace: false,
97
+ count: 2,
98
+ },
99
+ ];
100
+
101
+ const restored = applyReverseReplacementHistory(content, replacements);
102
+
103
+ expect(restored).toBe('company x company ANON_COMPANY');
104
+ });
105
+
106
+ test('should support additional deanonymization after history replay', () => {
107
+ const phrases = { company: 'ANON_COMPANY' };
108
+ const content = 'ANON_COMPANY x ANON_COMPANY ANON_COMPANY';
109
+ const replacements = [
110
+ {
111
+ search: 'company',
112
+ replace: 'ANON_COMPANY',
113
+ wordReplace: false,
114
+ count: 2,
115
+ },
116
+ ];
117
+
118
+ const restoredFromHistory = applyReverseReplacementHistory(content, replacements);
119
+ const fullyRestored = processContent(restoredFromHistory, phrases, true);
120
+
121
+ expect(fullyRestored).toBe('company x company company');
122
+ });
27
123
  });