repo-anon 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +3 -1
- package/README.md +14 -3
- package/lib/processor.js +159 -12
- package/package.json +3 -2
- package/tests/processor.test.js +120 -24
|
@@ -3,6 +3,8 @@ name: Publish to NPM
|
|
|
3
3
|
on:
|
|
4
4
|
release:
|
|
5
5
|
types: [published]
|
|
6
|
+
# add manual trigger for testing
|
|
7
|
+
workflow_dispatch:
|
|
6
8
|
|
|
7
9
|
jobs:
|
|
8
10
|
build:
|
|
@@ -13,7 +15,7 @@ jobs:
|
|
|
13
15
|
with:
|
|
14
16
|
node-version: '20'
|
|
15
17
|
registry-url: 'https://registry.npmjs.org'
|
|
16
|
-
- run: npm
|
|
18
|
+
- run: npm install
|
|
17
19
|
- run: npm test
|
|
18
20
|
- run: npm publish
|
|
19
21
|
env:
|
package/README.md
CHANGED
|
@@ -5,7 +5,8 @@ A Node.js CLI tool to anonymize and de-anonymize files in a repository based on
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
7
|
- **Anonymize**: Replaces sensitive phrases with configured placeholders.
|
|
8
|
-
- **
|
|
8
|
+
- **Replacement History**: Writes `.repo-anon-history.json` with ordered per-file replacement events (including counts).
|
|
9
|
+
- **De-anonymize**: Replays replacement history in reverse order, then runs a full phrase-based pass to also restore new placeholder usage added later.
|
|
9
10
|
- **Recursive**: Traverses through all project directories (ignoring `node_modules`, `.git`, etc.).
|
|
10
11
|
- **CI/CD Ready**: Includes GitLab pipeline configuration for publishing to the GitLab package registry.
|
|
11
12
|
|
|
@@ -23,23 +24,33 @@ npm install -g @your_gitlab_namespace/repo-anon
|
|
|
23
24
|
|
|
24
25
|
```json
|
|
25
26
|
{
|
|
26
|
-
"CompanyA": "ANON_COMPANY_A",
|
|
27
|
-
"
|
|
27
|
+
"CompanyA": "ANON_COMPANY_A",
|
|
28
|
+
"ck": {
|
|
29
|
+
"placeholder": "bb",
|
|
30
|
+
"wordReplace": true
|
|
31
|
+
}
|
|
28
32
|
}
|
|
29
33
|
```
|
|
30
34
|
|
|
35
|
+
String values keep the old behavior and replace matches anywhere inside a word.
|
|
36
|
+
Object values let you opt into whole-word matching with `wordReplace: true`.
|
|
37
|
+
|
|
31
38
|
2. Run the anonymization command:
|
|
32
39
|
|
|
33
40
|
```bash
|
|
34
41
|
repo-anon anonymize
|
|
35
42
|
```
|
|
36
43
|
|
|
44
|
+
This also writes `.repo-anon-history.json` in the working directory.
|
|
45
|
+
|
|
37
46
|
3. Revert changes (if needed):
|
|
38
47
|
|
|
39
48
|
```bash
|
|
40
49
|
repo-anon deanonymize
|
|
41
50
|
```
|
|
42
51
|
|
|
52
|
+
De-anonymization uses the history file first to reverse exact prior replacements in order, then applies phrase-based de-anonymization globally so newly introduced placeholders are also restored.
|
|
53
|
+
|
|
43
54
|
## Development
|
|
44
55
|
|
|
45
56
|
- **Tests**: Run unit tests using `npm test`.
|
package/lib/processor.js
CHANGED
|
@@ -2,6 +2,26 @@ const fs = require('fs');
|
|
|
2
2
|
const path = require('path');
|
|
3
3
|
|
|
4
4
|
const PHRASES_FILE = '.phrases';
|
|
5
|
+
const HISTORY_FILE = '.repo-anon-history.json';
|
|
6
|
+
const WORD_CHAR_CLASS = 'A-Za-z0-9_';
|
|
7
|
+
|
|
8
|
+
function escapeRegExp(value) {
|
|
9
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function normalizePhrases(phrases) {
|
|
13
|
+
return Object.entries(phrases).map(([original, config]) => {
|
|
14
|
+
if (typeof config === 'string') {
|
|
15
|
+
return { original, placeholder: config, wordReplace: false };
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return {
|
|
19
|
+
original,
|
|
20
|
+
placeholder: config.placeholder,
|
|
21
|
+
wordReplace: config.wordReplace === true,
|
|
22
|
+
};
|
|
23
|
+
});
|
|
24
|
+
}
|
|
5
25
|
|
|
6
26
|
function loadPhrases() {
|
|
7
27
|
if (!fs.existsSync(PHRASES_FILE)) {
|
|
@@ -10,25 +30,123 @@ function loadPhrases() {
|
|
|
10
30
|
return JSON.parse(fs.readFileSync(PHRASES_FILE, 'utf8'));
|
|
11
31
|
}
|
|
12
32
|
|
|
13
|
-
function
|
|
33
|
+
function buildPattern(search, wordReplace) {
|
|
34
|
+
const escapedSearch = escapeRegExp(search);
|
|
35
|
+
return wordReplace
|
|
36
|
+
? new RegExp(`(^|[^${WORD_CHAR_CLASS}])(${escapedSearch})(?=[^${WORD_CHAR_CLASS}]|$)`, 'g')
|
|
37
|
+
: new RegExp(escapedSearch, 'g');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function replaceWithCount(content, search, replace, wordReplace, maxReplacements = Number.POSITIVE_INFINITY) {
|
|
41
|
+
if (!search || !replace || maxReplacements <= 0 || !content.includes(search)) {
|
|
42
|
+
return { content, count: 0 };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const pattern = buildPattern(search, wordReplace);
|
|
46
|
+
let count = 0;
|
|
47
|
+
|
|
48
|
+
const updatedContent = wordReplace
|
|
49
|
+
? content.replace(pattern, (match, prefix) => {
|
|
50
|
+
if (count >= maxReplacements) {
|
|
51
|
+
return match;
|
|
52
|
+
}
|
|
53
|
+
count += 1;
|
|
54
|
+
return `${prefix}${replace}`;
|
|
55
|
+
})
|
|
56
|
+
: content.replace(pattern, (match) => {
|
|
57
|
+
if (count >= maxReplacements) {
|
|
58
|
+
return match;
|
|
59
|
+
}
|
|
60
|
+
count += 1;
|
|
61
|
+
return replace;
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
return { content: updatedContent, count };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function processContent(content, phrases, reverse = false, options = {}) {
|
|
14
68
|
let changed = false;
|
|
15
69
|
let newContent = content;
|
|
70
|
+
const replacements = [];
|
|
16
71
|
|
|
17
|
-
|
|
72
|
+
const entries = normalizePhrases(phrases).sort(
|
|
73
|
+
(leftEntry, rightEntry) => {
|
|
74
|
+
const left = reverse ? leftEntry.placeholder : leftEntry.original;
|
|
75
|
+
const right = reverse ? rightEntry.placeholder : rightEntry.original;
|
|
76
|
+
return right.length - left.length;
|
|
77
|
+
}
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
for (const { original, placeholder, wordReplace } of entries) {
|
|
18
81
|
const search = reverse ? placeholder : original;
|
|
19
82
|
const replace = reverse ? original : placeholder;
|
|
20
|
-
|
|
21
|
-
|
|
83
|
+
const result = replaceWithCount(newContent, search, replace, wordReplace);
|
|
84
|
+
|
|
85
|
+
if (result.count > 0) {
|
|
86
|
+
newContent = result.content;
|
|
22
87
|
changed = true;
|
|
88
|
+
if (options.trackHistory === true) {
|
|
89
|
+
replacements.push({
|
|
90
|
+
search,
|
|
91
|
+
replace,
|
|
92
|
+
wordReplace,
|
|
93
|
+
count: result.count,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
23
96
|
}
|
|
24
97
|
}
|
|
98
|
+
|
|
99
|
+
if (!changed) {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (options.trackHistory === true) {
|
|
104
|
+
return { content: newContent, replacements };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return newContent;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function applyReverseReplacementHistory(content, replacements = []) {
|
|
111
|
+
let changed = false;
|
|
112
|
+
let newContent = content;
|
|
113
|
+
|
|
114
|
+
for (let index = replacements.length - 1; index >= 0; index -= 1) {
|
|
115
|
+
const event = replacements[index];
|
|
116
|
+
const result = replaceWithCount(
|
|
117
|
+
newContent,
|
|
118
|
+
event.replace,
|
|
119
|
+
event.search,
|
|
120
|
+
event.wordReplace,
|
|
121
|
+
event.count
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
if (result.count > 0) {
|
|
125
|
+
newContent = result.content;
|
|
126
|
+
changed = true;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
25
130
|
return changed ? newContent : null;
|
|
26
131
|
}
|
|
27
132
|
|
|
133
|
+
function loadHistory() {
|
|
134
|
+
if (!fs.existsSync(HISTORY_FILE)) {
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const raw = JSON.parse(fs.readFileSync(HISTORY_FILE, 'utf8'));
|
|
139
|
+
return raw && typeof raw === 'object' ? raw : null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function saveHistory(history) {
|
|
143
|
+
fs.writeFileSync(HISTORY_FILE, JSON.stringify(history, null, 2), 'utf8');
|
|
144
|
+
}
|
|
145
|
+
|
|
28
146
|
async function walk(dir, callback) {
|
|
29
147
|
const files = fs.readdirSync(dir);
|
|
30
148
|
for (const file of files) {
|
|
31
|
-
if (['node_modules', '.git', '.phrases', 'package.json', 'package-lock.json', 'bin', 'tests', '.gitlab-ci.yml'].includes(file)) continue;
|
|
149
|
+
if (['node_modules', '.git', '.phrases', HISTORY_FILE, 'package.json', 'package-lock.json', 'bin', 'tests', '.gitlab-ci.yml'].includes(file)) continue;
|
|
32
150
|
const filePath = path.join(dir, file);
|
|
33
151
|
if (fs.statSync(filePath).isDirectory()) {
|
|
34
152
|
await walk(filePath, callback);
|
|
@@ -40,26 +158,55 @@ async function walk(dir, callback) {
|
|
|
40
158
|
|
|
41
159
|
async function anonymize() {
|
|
42
160
|
const phrases = loadPhrases();
|
|
161
|
+
const history = {
|
|
162
|
+
version: 1,
|
|
163
|
+
createdAt: new Date().toISOString(),
|
|
164
|
+
files: {},
|
|
165
|
+
};
|
|
166
|
+
|
|
43
167
|
walk(process.cwd(), (filePath) => {
|
|
44
168
|
const content = fs.readFileSync(filePath, 'utf8');
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
169
|
+
const result = processContent(content, phrases, false, { trackHistory: true });
|
|
170
|
+
|
|
171
|
+
if (result) {
|
|
172
|
+
fs.writeFileSync(filePath, result.content, 'utf8');
|
|
173
|
+
history.files[filePath] = result.replacements;
|
|
48
174
|
console.log(`Updated: ${filePath}`);
|
|
49
175
|
}
|
|
50
176
|
});
|
|
177
|
+
|
|
178
|
+
saveHistory(history);
|
|
179
|
+
console.log(`Saved replacement history to: ${HISTORY_FILE}`);
|
|
51
180
|
}
|
|
52
181
|
|
|
53
182
|
async function deanonymize() {
|
|
54
183
|
const phrases = loadPhrases();
|
|
184
|
+
const history = loadHistory();
|
|
185
|
+
|
|
55
186
|
walk(process.cwd(), (filePath) => {
|
|
56
187
|
const content = fs.readFileSync(filePath, 'utf8');
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
188
|
+
|
|
189
|
+
let newContent = null;
|
|
190
|
+
if (history && history.files && Array.isArray(history.files[filePath])) {
|
|
191
|
+
newContent = applyReverseReplacementHistory(content, history.files[filePath]);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const fallbackInput = newContent || content;
|
|
195
|
+
const fallbackContent = processContent(fallbackInput, phrases, true);
|
|
196
|
+
|
|
197
|
+
const finalContent = fallbackContent || newContent;
|
|
198
|
+
if (finalContent) {
|
|
199
|
+
fs.writeFileSync(filePath, finalContent, 'utf8');
|
|
60
200
|
console.log(`Updated: ${filePath}`);
|
|
61
201
|
}
|
|
62
202
|
});
|
|
63
203
|
}
|
|
64
204
|
|
|
65
|
-
module.exports = {
|
|
205
|
+
module.exports = {
|
|
206
|
+
anonymize,
|
|
207
|
+
deanonymize,
|
|
208
|
+
processContent,
|
|
209
|
+
replaceWithCount,
|
|
210
|
+
applyReverseReplacementHistory,
|
|
211
|
+
HISTORY_FILE,
|
|
212
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "repo-anon",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "CLI tool to anonymize/de-anonymize repositories.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
},
|
|
12
12
|
"scripts": {
|
|
13
13
|
"test": "jest",
|
|
14
|
-
"lint": "eslint ."
|
|
14
|
+
"lint": "eslint .",
|
|
15
|
+
"start": "node bin/repo-anon.js"
|
|
15
16
|
},
|
|
16
17
|
"keywords": [],
|
|
17
18
|
"author": "",
|
package/tests/processor.test.js
CHANGED
|
@@ -1,27 +1,123 @@
|
|
|
1
|
-
const {
|
|
1
|
+
const {
|
|
2
|
+
processContent,
|
|
3
|
+
applyReverseReplacementHistory,
|
|
4
|
+
} = require('../lib/processor');
|
|
2
5
|
|
|
3
6
|
describe('repo-anon processor', () => {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
7
|
+
test('should anonymize content', () => {
|
|
8
|
+
const phrases = { company: 'anon' };
|
|
9
|
+
const content = 'my company is here';
|
|
10
|
+
const expected = 'my anon is here';
|
|
11
|
+
const result = processContent(content, phrases);
|
|
12
|
+
expect(result).toBe(expected);
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
test('should deanonymize content', () => {
|
|
16
|
+
const phrases = { company: 'anon' };
|
|
17
|
+
const content = 'my anon is here';
|
|
18
|
+
const expected = 'my company is here';
|
|
19
|
+
const result = processContent(content, phrases, true);
|
|
20
|
+
expect(result).toBe(expected);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test('should replace inside words by default', () => {
|
|
24
|
+
const phrases = { th: 'BB' };
|
|
25
|
+
const content = 'th with other';
|
|
26
|
+
const expected = 'BB wiBB oBBer';
|
|
27
|
+
const result = processContent(content, phrases);
|
|
28
|
+
expect(result).toBe(expected);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
test('should only replace standalone words when wordReplace is true', () => {
|
|
32
|
+
const phrases = {
|
|
33
|
+
ck: {
|
|
34
|
+
placeholder: 'bb',
|
|
35
|
+
wordReplace: true,
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
const content = 'ck back sack (ck) ck.';
|
|
39
|
+
const expected = 'bb back sack (bb) bb.';
|
|
40
|
+
const result = processContent(content, phrases);
|
|
41
|
+
expect(result).toBe(expected);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test('should only deanonymize standalone placeholders when wordReplace is true', () => {
|
|
45
|
+
const phrases = {
|
|
46
|
+
ck: {
|
|
47
|
+
placeholder: 'bb',
|
|
48
|
+
wordReplace: true,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
const content = 'bb babb stubb (bb) bb.';
|
|
52
|
+
const expected = 'ck babb stubb (ck) ck.';
|
|
53
|
+
const result = processContent(content, phrases, true);
|
|
54
|
+
expect(result).toBe(expected);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test('should return null if no change', () => {
|
|
58
|
+
const phrases = { company: 'anon' };
|
|
59
|
+
const content = 'nothing here';
|
|
60
|
+
const result = processContent(content, phrases);
|
|
61
|
+
expect(result).toBeNull();
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('should track ordered replacements during anonymize', () => {
|
|
65
|
+
const phrases = {
|
|
66
|
+
company: 'ANON_COMPANY',
|
|
67
|
+
project: 'ANON_PROJECT',
|
|
68
|
+
};
|
|
69
|
+
const content = 'company project company';
|
|
70
|
+
|
|
71
|
+
const result = processContent(content, phrases, false, { trackHistory: true });
|
|
72
|
+
|
|
73
|
+
expect(result.content).toBe('ANON_COMPANY ANON_PROJECT ANON_COMPANY');
|
|
74
|
+
expect(result.replacements).toEqual([
|
|
75
|
+
{
|
|
76
|
+
search: 'company',
|
|
77
|
+
replace: 'ANON_COMPANY',
|
|
78
|
+
wordReplace: false,
|
|
79
|
+
count: 2,
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
search: 'project',
|
|
83
|
+
replace: 'ANON_PROJECT',
|
|
84
|
+
wordReplace: false,
|
|
85
|
+
count: 1,
|
|
86
|
+
},
|
|
87
|
+
]);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test('should replay replacement history in reverse using recorded counts', () => {
|
|
91
|
+
const content = 'ANON_COMPANY x ANON_COMPANY ANON_COMPANY';
|
|
92
|
+
const replacements = [
|
|
93
|
+
{
|
|
94
|
+
search: 'company',
|
|
95
|
+
replace: 'ANON_COMPANY',
|
|
96
|
+
wordReplace: false,
|
|
97
|
+
count: 2,
|
|
98
|
+
},
|
|
99
|
+
];
|
|
100
|
+
|
|
101
|
+
const restored = applyReverseReplacementHistory(content, replacements);
|
|
102
|
+
|
|
103
|
+
expect(restored).toBe('company x company ANON_COMPANY');
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test('should support additional deanonymization after history replay', () => {
|
|
107
|
+
const phrases = { company: 'ANON_COMPANY' };
|
|
108
|
+
const content = 'ANON_COMPANY x ANON_COMPANY ANON_COMPANY';
|
|
109
|
+
const replacements = [
|
|
110
|
+
{
|
|
111
|
+
search: 'company',
|
|
112
|
+
replace: 'ANON_COMPANY',
|
|
113
|
+
wordReplace: false,
|
|
114
|
+
count: 2,
|
|
115
|
+
},
|
|
116
|
+
];
|
|
117
|
+
|
|
118
|
+
const restoredFromHistory = applyReverseReplacementHistory(content, replacements);
|
|
119
|
+
const fullyRestored = processContent(restoredFromHistory, phrases, true);
|
|
120
|
+
|
|
121
|
+
expect(fullyRestored).toBe('company x company company');
|
|
122
|
+
});
|
|
27
123
|
});
|