groove-dev 0.27.77 → 0.27.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CLAUDE.md +0 -7
  2. package/MOE_TRAINING_PIPELINE.md +216 -12
  3. package/moe-training/DEPLOY_CENTRAL_COMMAND.md +413 -0
  4. package/moe-training/client/consent.js +96 -0
  5. package/moe-training/client/envelope-builder.js +56 -0
  6. package/moe-training/client/index.js +10 -0
  7. package/moe-training/client/parsers/claude-code.js +110 -0
  8. package/moe-training/client/parsers/codex.js +80 -0
  9. package/moe-training/client/parsers/gemini.js +80 -0
  10. package/moe-training/client/parsers/grok.js +16 -0
  11. package/moe-training/client/parsers/index.js +20 -0
  12. package/moe-training/client/scrubber.js +126 -0
  13. package/moe-training/client/session-attestation.js +114 -0
  14. package/moe-training/client/step-classifier.js +51 -0
  15. package/moe-training/client/trajectory-capture.js +227 -0
  16. package/moe-training/client/transmission-queue.js +93 -0
  17. package/moe-training/package-lock.json +1266 -0
  18. package/moe-training/package.json +20 -0
  19. package/moe-training/server/enrichment.js +24 -0
  20. package/moe-training/server/index.js +119 -0
  21. package/moe-training/server/ledger.js +110 -0
  22. package/moe-training/server/routes/ingest.js +96 -0
  23. package/moe-training/server/routes/sessions.js +43 -0
  24. package/moe-training/server/routes/stats.js +31 -0
  25. package/moe-training/server/scoring.js +63 -0
  26. package/moe-training/server/session-registry.js +156 -0
  27. package/moe-training/server/stats.js +129 -0
  28. package/moe-training/server/stitcher.js +69 -0
  29. package/moe-training/server/storage.js +147 -0
  30. package/moe-training/server/verifier.js +102 -0
  31. package/moe-training/shared/constants.js +30 -0
  32. package/moe-training/shared/crypto.js +45 -0
  33. package/moe-training/shared/envelope-schema.js +220 -0
  34. package/moe-training/test/client/consent.test.js +121 -0
  35. package/moe-training/test/client/envelope-builder.test.js +107 -0
  36. package/moe-training/test/client/parsers/claude-code.test.js +119 -0
  37. package/moe-training/test/client/parsers/codex.test.js +83 -0
  38. package/moe-training/test/client/parsers/gemini.test.js +99 -0
  39. package/moe-training/test/client/scrubber.test.js +133 -0
  40. package/moe-training/test/client/session-attestation-security.test.js +95 -0
  41. package/moe-training/test/client/step-classifier.test.js +88 -0
  42. package/moe-training/test/integration/handshake.test.js +260 -0
  43. package/moe-training/test/server/ingest-security.test.js +166 -0
  44. package/moe-training/test/server/ledger.test.js +131 -0
  45. package/moe-training/test/server/scoring.test.js +242 -0
  46. package/moe-training/test/server/session-registry.test.js +125 -0
  47. package/moe-training/test/server/stitcher.test.js +157 -0
  48. package/moe-training/test/server/verifier.test.js +232 -0
  49. package/moe-training/test/shared/crypto.test.js +87 -0
  50. package/moe-training/test/shared/envelope-schema.test.js +351 -0
  51. package/node_modules/@groove-dev/cli/package.json +1 -1
  52. package/node_modules/@groove-dev/daemon/package.json +1 -1
  53. package/node_modules/@groove-dev/daemon/src/agent-loop.js +48 -5
  54. package/node_modules/@groove-dev/daemon/src/api.js +77 -0
  55. package/node_modules/@groove-dev/daemon/src/index.js +61 -0
  56. package/node_modules/@groove-dev/daemon/src/journalist.js +64 -21
  57. package/node_modules/@groove-dev/daemon/src/process.js +199 -0
  58. package/node_modules/@groove-dev/daemon/src/providers/grok.js +15 -0
  59. package/node_modules/@groove-dev/daemon/src/state.js +20 -1
  60. package/node_modules/@groove-dev/gui/dist/assets/{index-BbmPDhuW.js → index-BJgEJ9lZ.js} +1677 -1677
  61. package/node_modules/@groove-dev/gui/dist/index.html +1 -1
  62. package/node_modules/@groove-dev/gui/package.json +1 -1
  63. package/node_modules/@groove-dev/gui/src/stores/groove.js +32 -0
  64. package/node_modules/@groove-dev/gui/src/views/settings.jsx +167 -1
  65. package/package.json +1 -1
  66. package/packages/cli/package.json +1 -1
  67. package/packages/daemon/package.json +1 -1
  68. package/packages/daemon/src/agent-loop.js +48 -5
  69. package/packages/daemon/src/api.js +77 -0
  70. package/packages/daemon/src/index.js +61 -0
  71. package/packages/daemon/src/journalist.js +64 -21
  72. package/packages/daemon/src/process.js +199 -0
  73. package/packages/daemon/src/providers/grok.js +15 -0
  74. package/packages/daemon/src/state.js +20 -1
  75. package/packages/gui/dist/assets/{index-BbmPDhuW.js → index-BJgEJ9lZ.js} +1677 -1677
  76. package/packages/gui/dist/index.html +1 -1
  77. package/packages/gui/package.json +1 -1
  78. package/packages/gui/src/stores/groove.js +32 -0
  79. package/packages/gui/src/views/settings.jsx +167 -1
@@ -0,0 +1,96 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ import Database from 'better-sqlite3';
4
+ import { randomUUID } from 'node:crypto';
5
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, chmodSync } from 'node:fs';
6
+ import { join } from 'node:path';
7
+ import { homedir } from 'node:os';
8
+ import { CURRENT_CONSENT_VERSION } from '../shared/constants.js';
9
+
10
+ export class ConsentManager {
11
+ constructor(dbPath) {
12
+ this._dbPath = dbPath || join(homedir(), '.groove', 'consent.db');
13
+ const dir = this._dbPath.replace(/[/\\][^/\\]+$/, '');
14
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
15
+ this._db = new Database(this._dbPath);
16
+ this._db.pragma('journal_mode = WAL');
17
+ this._db.exec(`
18
+ CREATE TABLE IF NOT EXISTS consent_history (
19
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
20
+ user_id TEXT NOT NULL,
21
+ opted_in INTEGER NOT NULL,
22
+ consent_version TEXT NOT NULL,
23
+ metadata TEXT,
24
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
25
+ )
26
+ `);
27
+ }
28
+
29
+ recordConsent(userId, optedIn, consentVersion, metadata) {
30
+ this._db.prepare(
31
+ 'INSERT INTO consent_history (user_id, opted_in, consent_version, metadata) VALUES (?, ?, ?, ?)'
32
+ ).run(userId, optedIn ? 1 : 0, consentVersion, metadata ? JSON.stringify(metadata) : null);
33
+ }
34
+
35
+ isOptedIn(userId) {
36
+ const row = this._db.prepare(
37
+ 'SELECT opted_in, consent_version FROM consent_history WHERE user_id = ? ORDER BY id DESC LIMIT 1'
38
+ ).get(userId);
39
+ if (!row) return false;
40
+ if (row.consent_version !== CURRENT_CONSENT_VERSION) return false;
41
+ return row.opted_in === 1;
42
+ }
43
+
44
+ revokeConsent(userId) {
45
+ this.recordConsent(userId, false, CURRENT_CONSENT_VERSION);
46
+ }
47
+
48
+ getOptedInCount() {
49
+ const row = this._db.prepare(`
50
+ SELECT COUNT(DISTINCT user_id) as cnt FROM consent_history ch1
51
+ WHERE opted_in = 1
52
+ AND consent_version = ?
53
+ AND id = (SELECT MAX(id) FROM consent_history ch2 WHERE ch2.user_id = ch1.user_id)
54
+ `).get(CURRENT_CONSENT_VERSION);
55
+ return row?.cnt || 0;
56
+ }
57
+
58
+ getConsentHistory(userId) {
59
+ const rows = this._db.prepare(
60
+ 'SELECT * FROM consent_history WHERE user_id = ? ORDER BY id ASC'
61
+ ).all(userId);
62
+ return rows.map((r) => ({
63
+ ...r,
64
+ opted_in: r.opted_in === 1,
65
+ metadata: r.metadata ? JSON.parse(r.metadata) : null,
66
+ }));
67
+ }
68
+
69
+ close() {
70
+ this._db.close();
71
+ }
72
+
73
+ static getOrCreateUserId(userIdPath) {
74
+ const filePath = userIdPath || join(homedir(), '.groove', 'user_id');
75
+ const dir = filePath.replace(/[/\\][^/\\]+$/, '');
76
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
77
+ if (existsSync(filePath)) {
78
+ return readFileSync(filePath, 'utf-8').trim();
79
+ }
80
+ const uid = randomUUID().replace(/-/g, '');
81
+ writeFileSync(filePath, uid, { mode: 0o600 });
82
+ return uid;
83
+ }
84
+
85
+ static isCaptureEnabled(userIdPath, dbPath) {
86
+ const filePath = userIdPath || join(homedir(), '.groove', 'user_id');
87
+ if (!existsSync(filePath)) return false;
88
+ const userId = readFileSync(filePath, 'utf-8').trim();
89
+ const manager = new ConsentManager(dbPath);
90
+ try {
91
+ return manager.isOptedIn(userId);
92
+ } finally {
93
+ manager.close();
94
+ }
95
+ }
96
+ }
@@ -0,0 +1,56 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ import { randomUUID } from 'node:crypto';
4
+ import { CHUNK_SIZE } from '../shared/constants.js';
5
+
6
+ export class EnvelopeBuilder {
7
+ constructor(sessionId, contributorId, metadata) {
8
+ this._sessionId = sessionId;
9
+ this._contributorId = contributorId;
10
+ this._metadata = metadata;
11
+ this._buffer = [];
12
+ this._chunkSequence = 0;
13
+ }
14
+
15
+ addStep(step) {
16
+ if (step.content && typeof step.content === 'string' && step.content.length > 10_000) {
17
+ step.content = step.content.slice(0, 10_000);
18
+ }
19
+ if (typeof step.token_count === 'number' && step.token_count > 100_000) {
20
+ step.token_count = 100_000;
21
+ }
22
+ this._buffer.push(step);
23
+ if (this._buffer.length >= CHUNK_SIZE) {
24
+ return this._buildEnvelope();
25
+ }
26
+ return null;
27
+ }
28
+
29
+ flush() {
30
+ if (this._buffer.length === 0) return null;
31
+ return this._buildEnvelope();
32
+ }
33
+
34
+ buildSessionClose(outcome) {
35
+ return {
36
+ envelope_id: `env_${randomUUID()}`,
37
+ session_id: this._sessionId,
38
+ type: 'SESSION_CLOSE',
39
+ attestation: { session_hmac: '', sequence: 0, app_version_hash: '' },
40
+ outcome,
41
+ };
42
+ }
43
+
44
+ _buildEnvelope() {
45
+ const envelope = {
46
+ envelope_id: `env_${randomUUID()}`,
47
+ session_id: this._sessionId,
48
+ chunk_sequence: this._chunkSequence++,
49
+ contributor_id: this._contributorId,
50
+ attestation: { session_hmac: '', sequence: 0, app_version_hash: '' },
51
+ metadata: { ...this._metadata },
52
+ trajectory_log: this._buffer.splice(0),
53
+ };
54
+ return envelope;
55
+ }
56
+ }
@@ -0,0 +1,10 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ export { TrajectoryCapture } from './trajectory-capture.js';
4
+ export { ConsentManager } from './consent.js';
5
+ export { PIIScrubber } from './scrubber.js';
6
+ export { SessionAttestation } from './session-attestation.js';
7
+ export { TransmissionQueue } from './transmission-queue.js';
8
+ export { EnvelopeBuilder } from './envelope-builder.js';
9
+ export { StepClassifier } from './step-classifier.js';
10
+ export { getParser } from './parsers/index.js';
@@ -0,0 +1,110 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ import { OBSERVATION_TRUNCATE_HEAD, OBSERVATION_TRUNCATE_TAIL } from '../../shared/constants.js';
4
+
5
+ function truncateObservation(text) {
6
+ if (!text || typeof text !== 'string') return text;
7
+ const lines = text.split('\n');
8
+ if (lines.length <= OBSERVATION_TRUNCATE_HEAD + OBSERVATION_TRUNCATE_TAIL) return text;
9
+ const head = lines.slice(0, OBSERVATION_TRUNCATE_HEAD);
10
+ const tail = lines.slice(-OBSERVATION_TRUNCATE_TAIL);
11
+ const omitted = lines.length - OBSERVATION_TRUNCATE_HEAD - OBSERVATION_TRUNCATE_TAIL;
12
+ return [...head, `[... ${omitted} lines omitted ...]`, ...tail].join('\n');
13
+ }
14
+
15
+ export class ClaudeCodeParser {
16
+ constructor() {
17
+ this._pendingToolUse = new Map();
18
+ }
19
+
20
+ parseEvent(jsonEvent) {
21
+ if (!jsonEvent || !jsonEvent.type) return null;
22
+
23
+ if (jsonEvent.type === 'assistant') {
24
+ const contentBlocks = jsonEvent.message?.content;
25
+ if (!Array.isArray(contentBlocks)) return null;
26
+
27
+ const results = [];
28
+ for (const block of contentBlocks) {
29
+ if (block.type === 'text') {
30
+ results.push({
31
+ type: 'thought',
32
+ content: block.text || '',
33
+ token_count: jsonEvent.message?.usage?.output_tokens || 0,
34
+ });
35
+ } else if (block.type === 'tool_use') {
36
+ this._pendingToolUse.set(block.id, block);
37
+ results.push({
38
+ type: 'action',
39
+ tool: block.name,
40
+ arguments: block.input,
41
+ content: `Using ${block.name}`,
42
+ });
43
+ } else if (block.type === 'tool_result') {
44
+ const toolUse = this._pendingToolUse.get(block.tool_use_id);
45
+ if (toolUse) this._pendingToolUse.delete(block.tool_use_id);
46
+
47
+ const resultContent = Array.isArray(block.content)
48
+ ? block.content.map((c) => c.text || '').join('\n')
49
+ : (typeof block.content === 'string' ? block.content : '');
50
+
51
+ if (block.is_error) {
52
+ results.push({ type: 'error', content: resultContent, is_error: true });
53
+ } else {
54
+ results.push({
55
+ type: 'observation',
56
+ content: truncateObservation(resultContent),
57
+ is_error: false,
58
+ });
59
+ }
60
+ }
61
+ }
62
+
63
+ return results.length === 1 ? results[0] : results.length > 1 ? results : null;
64
+ }
65
+
66
+ if (jsonEvent.type === 'result') {
67
+ return {
68
+ type: 'resolution',
69
+ content: typeof jsonEvent.result === 'string' ? jsonEvent.result : JSON.stringify(jsonEvent.result),
70
+ token_count: jsonEvent.total_tokens_used || 0,
71
+ };
72
+ }
73
+
74
+ return null;
75
+ }
76
+
77
+ extractTokens(jsonEvent) {
78
+ if (!jsonEvent) return null;
79
+ if (jsonEvent.type === 'assistant') {
80
+ const usage = jsonEvent.message?.usage;
81
+ if (!usage) return null;
82
+ return {
83
+ input: usage.input_tokens || 0,
84
+ output: usage.output_tokens || 0,
85
+ cacheRead: usage.cache_read_input_tokens || 0,
86
+ cacheCreation: usage.cache_creation_input_tokens || 0,
87
+ };
88
+ }
89
+ if (jsonEvent.type === 'result') {
90
+ return {
91
+ input: jsonEvent.total_input_tokens || 0,
92
+ output: jsonEvent.total_output_tokens || 0,
93
+ cacheRead: 0,
94
+ cacheCreation: 0,
95
+ };
96
+ }
97
+ return null;
98
+ }
99
+
100
+ extractSessionId(jsonEvent) {
101
+ return jsonEvent?.session_id || null;
102
+ }
103
+
104
+ extractModel(jsonEvent) {
105
+ if (jsonEvent?.type === 'assistant') {
106
+ return jsonEvent.message?.model || null;
107
+ }
108
+ return null;
109
+ }
110
+ }
@@ -0,0 +1,80 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ export class CodexParser {
4
+ constructor() {
5
+ this._sessionId = null;
6
+ }
7
+
8
+ parseEvent(jsonEvent) {
9
+ if (!jsonEvent || !jsonEvent.type) return null;
10
+
11
+ switch (jsonEvent.type) {
12
+ case 'thread.started': {
13
+ this._sessionId = jsonEvent.thread_id || null;
14
+ return null;
15
+ }
16
+
17
+ case 'item.started': {
18
+ const item = jsonEvent.item || {};
19
+ if (item.type === 'agent_message') {
20
+ return { type: 'thought', content: item.text || '' };
21
+ }
22
+ if (item.type === 'command_execution') {
23
+ return { type: 'action', tool: 'command_execution', arguments: { command: item.command }, content: `Executing: ${item.command || ''}` };
24
+ }
25
+ if (item.type === 'file_edit' || item.type === 'file_write') {
26
+ return { type: 'action', tool: item.type, arguments: { path: item.path || item.file || '' }, content: `${item.type}: ${item.path || item.file || ''}` };
27
+ }
28
+ if (item.type === 'file_read') {
29
+ return { type: 'action', tool: 'file_read', arguments: { path: item.path || item.file || '' }, content: `Reading: ${item.path || item.file || ''}` };
30
+ }
31
+ return null;
32
+ }
33
+
34
+ case 'item.completed': {
35
+ const item = jsonEvent.item || {};
36
+ if (item.type === 'agent_message') {
37
+ return { type: 'thought', content: item.text || '' };
38
+ }
39
+ if (item.type === 'command_execution') {
40
+ const output = (item.aggregated_output || '').slice(0, 2000);
41
+ if (item.exit_code !== 0) {
42
+ return { type: 'error', content: output || `Exit code: ${item.exit_code}` };
43
+ }
44
+ return { type: 'observation', content: output };
45
+ }
46
+ if (item.type === 'file_edit' || item.type === 'file_write' || item.type === 'file_read') {
47
+ const output = (item.output || item.content || '').slice(0, 2000);
48
+ return { type: 'observation', content: output };
49
+ }
50
+ return null;
51
+ }
52
+
53
+ case 'turn.completed': {
54
+ return { type: 'resolution', content: '' };
55
+ }
56
+
57
+ default:
58
+ return null;
59
+ }
60
+ }
61
+
62
+ extractTokens(jsonEvent) {
63
+ if (!jsonEvent) return null;
64
+ const usage = jsonEvent.usage || jsonEvent.item?.usage;
65
+ if (!usage) return null;
66
+ return {
67
+ input: usage.input_tokens || 0,
68
+ output: usage.output_tokens || 0,
69
+ cacheRead: 0,
70
+ cacheCreation: 0,
71
+ };
72
+ }
73
+
74
+ extractSessionId(jsonEvent) {
75
+ if (jsonEvent?.type === 'thread.started') {
76
+ return jsonEvent.thread_id || null;
77
+ }
78
+ return null;
79
+ }
80
+ }
@@ -0,0 +1,80 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ export class GeminiParser {
4
+ constructor() {
5
+ this._sessionId = null;
6
+ }
7
+
8
+ parseEvent(jsonEvent) {
9
+ if (!jsonEvent || !jsonEvent.type) return null;
10
+
11
+ switch (jsonEvent.type) {
12
+ case 'agent_start': {
13
+ this._sessionId = jsonEvent.streamId || null;
14
+ return null;
15
+ }
16
+
17
+ case 'message': {
18
+ if (jsonEvent.role === 'user') return null;
19
+ const raw = jsonEvent.content;
20
+ const parts = Array.isArray(raw) ? raw : (typeof raw === 'string' ? [{ text: raw }] : raw ? [raw] : []);
21
+
22
+ for (const part of parts) {
23
+ if (part.type === 'thought') {
24
+ return { type: 'thought', content: part.thought || part.text || '' };
25
+ }
26
+ }
27
+
28
+ const text = parts.map((p) => p.text || '').filter(Boolean).join('\n');
29
+ if (text) {
30
+ return { type: 'thought', content: text };
31
+ }
32
+ return null;
33
+ }
34
+
35
+ case 'tool_request': {
36
+ return {
37
+ type: 'action',
38
+ tool: jsonEvent.name || 'Tool',
39
+ arguments: jsonEvent.args || {},
40
+ content: `Using ${jsonEvent.name || 'Tool'}`,
41
+ };
42
+ }
43
+
44
+ case 'tool_response': {
45
+ const rawContent = jsonEvent.content;
46
+ const contentParts = Array.isArray(rawContent) ? rawContent : (typeof rawContent === 'string' ? [{ text: rawContent }] : rawContent ? [rawContent] : []);
47
+ const content = contentParts.map((p) => p.text || '').join('').slice(0, 2000);
48
+ return { type: 'observation', content };
49
+ }
50
+
51
+ case 'error': {
52
+ return { type: 'error', content: jsonEvent.message || 'Unknown error' };
53
+ }
54
+
55
+ case 'agent_end': {
56
+ return { type: 'resolution', content: '' };
57
+ }
58
+
59
+ default:
60
+ return null;
61
+ }
62
+ }
63
+
64
+ extractTokens(jsonEvent) {
65
+ if (!jsonEvent || jsonEvent.type !== 'usage') return null;
66
+ return {
67
+ input: jsonEvent.inputTokens || 0,
68
+ output: jsonEvent.outputTokens || 0,
69
+ cacheRead: jsonEvent.cachedTokens || 0,
70
+ cacheCreation: 0,
71
+ };
72
+ }
73
+
74
+ extractSessionId(jsonEvent) {
75
+ if (jsonEvent?.type === 'agent_start') {
76
+ return jsonEvent.streamId || null;
77
+ }
78
+ return null;
79
+ }
80
+ }
@@ -0,0 +1,16 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ export class GrokParser {
4
+ // TODO: Grok agentic CLI not yet available. Wire up when headless CLI is built.
5
+ parseEvent(_jsonEvent) {
6
+ return null;
7
+ }
8
+
9
+ extractTokens(_jsonEvent) {
10
+ return null;
11
+ }
12
+
13
+ extractSessionId(_jsonEvent) {
14
+ return null;
15
+ }
16
+ }
@@ -0,0 +1,20 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ import { SUPPORTED_PROVIDERS } from '../../shared/constants.js';
4
+ import { ClaudeCodeParser } from './claude-code.js';
5
+ import { CodexParser } from './codex.js';
6
+ import { GeminiParser } from './gemini.js';
7
+ import { GrokParser } from './grok.js';
8
+
9
+ const constructors = {
10
+ 'claude-code': ClaudeCodeParser,
11
+ 'codex': CodexParser,
12
+ 'gemini': GeminiParser,
13
+ 'grok': GrokParser,
14
+ };
15
+
16
+ export function getParser(providerName) {
17
+ if (!SUPPORTED_PROVIDERS.includes(providerName)) return null;
18
+ const Ctor = constructors[providerName];
19
+ return Ctor ? new Ctor() : null;
20
+ }
@@ -0,0 +1,126 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ function luhnCheck(digits) {
4
+ let sum = 0;
5
+ let alt = false;
6
+ for (let i = digits.length - 1; i >= 0; i--) {
7
+ let n = parseInt(digits[i], 10);
8
+ if (alt) {
9
+ n *= 2;
10
+ if (n > 9) n -= 9;
11
+ }
12
+ sum += n;
13
+ alt = !alt;
14
+ }
15
+ return sum % 10 === 0;
16
+ }
17
+
18
+ export class PIIScrubber {
19
+ constructor() {
20
+ this._patterns = [
21
+ {
22
+ name: 'pem_private_key',
23
+ regex: /-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----/g,
24
+ replacement: '[PRIVATE_KEY]',
25
+ },
26
+ {
27
+ name: 'aws_key',
28
+ regex: /AKIA[0-9A-Z]{16}/g,
29
+ replacement: '[AWS_KEY]',
30
+ },
31
+ {
32
+ name: 'jwt_token',
33
+ regex: /eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/g,
34
+ replacement: '[API_KEY]',
35
+ },
36
+ {
37
+ name: 'bearer_token',
38
+ regex: /Bearer\s+[A-Za-z0-9._~+/\-]+=*/g,
39
+ replacement: '[API_KEY]',
40
+ },
41
+ {
42
+ name: 'sk_pk_key',
43
+ regex: /(?:sk|pk)_[a-zA-Z0-9_]{20,}/g,
44
+ replacement: '[API_KEY]',
45
+ },
46
+ {
47
+ name: 'credit_card',
48
+ regex: /\b(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})\b/g,
49
+ replacement: null, // handled in scrub() with Luhn
50
+ },
51
+ {
52
+ name: 'ssn',
53
+ regex: /\b\d{3}-\d{2}-\d{4}\b/g,
54
+ replacement: '[SSN]',
55
+ },
56
+ {
57
+ name: 'email',
58
+ regex: /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g,
59
+ replacement: '[EMAIL]',
60
+ },
61
+ {
62
+ name: 'email_urlencoded',
63
+ regex: /[a-zA-Z0-9._%+-]+%40[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
64
+ replacement: '[EMAIL]',
65
+ },
66
+ {
67
+ name: 'ipv6',
68
+ regex: /(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|::(?:[fF]{4}:)?(?:\d{1,3}\.){3}\d{1,3}|::1?\b/g,
69
+ replacement: '[IP]',
70
+ },
71
+ {
72
+ name: 'ipv4',
73
+ regex: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
74
+ replacement: '[IP]',
75
+ },
76
+ {
77
+ name: 'intl_phone',
78
+ regex: /\+\d{1,3}[\s.-]?\(?\d{1,4}\)?[\s.-]?\d{2,4}[\s.-]?\d{2,4}(?:[\s.-]?\d{1,4})?/g,
79
+ replacement: '[PHONE]',
80
+ },
81
+ {
82
+ name: 'phone',
83
+ regex: /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
84
+ replacement: '[PHONE]',
85
+ },
86
+ {
87
+ name: 'url_with_secret',
88
+ regex: /https?:\/\/[^\s]*[?&](?:token|key|secret|password|api_key|apikey|access_token|auth)=[^\s&]*/gi,
89
+ replacement: '[REDACTED_URL]',
90
+ },
91
+ {
92
+ name: 'long_hex',
93
+ regex: /\b[0-9a-fA-F]{40,}\b/g,
94
+ replacement: '[API_KEY]',
95
+ },
96
+ {
97
+ name: 'home_path',
98
+ regex: /(?:\/Users\/[^\s]+|\/home\/[^\s]+|C:\\Users\\[^\s]+)/g,
99
+ replacement: '[FILE_PATH]',
100
+ },
101
+ {
102
+ name: 'base64_secret',
103
+ regex: /(?<![A-Za-z0-9+/])[A-Za-z0-9+/]{40,}={0,2}(?![A-Za-z0-9+/])/g,
104
+ replacement: '[API_KEY]',
105
+ },
106
+ ];
107
+ }
108
+
109
+ scrub(text) {
110
+ if (!text || typeof text !== 'string') return text;
111
+ let result = text;
112
+
113
+ for (const pattern of this._patterns) {
114
+ if (pattern.name === 'credit_card') {
115
+ result = result.replace(pattern.regex, (match, g1, g2, g3, g4) => {
116
+ const digits = (g1 + g2 + g3 + g4);
117
+ return luhnCheck(digits) ? '[CREDIT_CARD]' : match;
118
+ });
119
+ } else {
120
+ result = result.replace(pattern.regex, pattern.replacement);
121
+ }
122
+ }
123
+
124
+ return result;
125
+ }
126
+ }