@ogcio/o11y-sdk-node 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/dist/lib/exporter/pii-exporter-decorator.d.ts +2 -0
  3. package/dist/lib/exporter/pii-exporter-decorator.js +27 -12
  4. package/dist/lib/index.d.ts +5 -0
  5. package/dist/lib/instrumentation.node.js +0 -8
  6. package/dist/lib/internals/redaction/pii-detection.d.ts +25 -0
  7. package/dist/lib/internals/redaction/pii-detection.js +80 -0
  8. package/dist/lib/internals/redaction/redactors/email.d.ts +8 -0
  9. package/dist/lib/internals/redaction/redactors/email.js +48 -0
  10. package/dist/lib/internals/redaction/redactors/index.d.ts +4 -0
  11. package/dist/lib/internals/redaction/redactors/index.js +6 -0
  12. package/dist/lib/internals/redaction/redactors/ip.d.ts +10 -0
  13. package/dist/lib/internals/redaction/redactors/ip.js +54 -0
  14. package/dist/lib/processor/enrich-logger-processor.d.ts +2 -2
  15. package/dist/package.json +14 -14
  16. package/dist/vitest.config.js +4 -4
  17. package/lib/exporter/pii-exporter-decorator.ts +53 -18
  18. package/lib/index.ts +5 -0
  19. package/lib/instrumentation.node.ts +0 -10
  20. package/lib/internals/redaction/pii-detection.ts +113 -0
  21. package/lib/internals/redaction/redactors/email.ts +58 -0
  22. package/lib/internals/redaction/redactors/index.ts +12 -0
  23. package/lib/internals/redaction/redactors/ip.ts +68 -0
  24. package/lib/internals/shared-metrics.ts +1 -1
  25. package/lib/processor/enrich-logger-processor.ts +2 -2
  26. package/package.json +14 -14
  27. package/test/internals/pii-detection.test.ts +157 -33
  28. package/test/internals/redactors/email.test.ts +81 -0
  29. package/test/internals/redactors/ip.test.ts +93 -0
  30. package/test/traces/active-span.test.ts +1 -1
  31. package/vitest.config.ts +4 -4
  32. package/dist/lib/internals/pii-detection.d.ts +0 -17
  33. package/dist/lib/internals/pii-detection.js +0 -116
  34. package/lib/internals/pii-detection.ts +0 -145
@@ -0,0 +1,113 @@
1
+ import type { AnyValue, AnyValueMap } from "@opentelemetry/api-logs";
2
+ import { Redactor } from "./redactors/index.js";
3
+
4
+ const decoder = new TextDecoder();
5
+ const encoder = new TextEncoder();
6
+
7
+ export type PIISource = "trace" | "log" | "metric";
8
+
9
+ /**
10
+ * Checks whether a string contains URI-encoded components.
11
+ *
12
+ * @param {string} value - The string to inspect.
13
+ * @returns {boolean} `true` if the string is encoded, `false` otherwise.
14
+ */
15
+ export function _containsEncodedComponents(value: string): boolean {
16
+ try {
17
+ const decodedURIComponent = decodeURIComponent(value);
18
+ if (decodeURI(value) !== decodedURIComponent) {
19
+ return true;
20
+ }
21
+
22
+ if (value !== decodedURIComponent) {
23
+ return (
24
+ encodeURIComponent(decodedURIComponent) === value ||
25
+ encodeURI(decodedURIComponent) === value
26
+ );
27
+ }
28
+ } catch {
29
+ return false;
30
+ }
31
+
32
+ return false;
33
+ }
34
+
35
+ /**
36
+ * Cleans a string by redacting configured PIIs and emitting metrics for redacted values.
37
+ *
38
+ * If the string is URL-encoded, it will be decoded before redaction.
39
+ *
40
+ * @template T
41
+ *
42
+ * @param {string} value - The input value to sanitize.
43
+ * @param {"trace" | "log"} source - The source context of the input, used in metrics.
44
+ * @param {Redactor[]} redactors - The string processors containing the redaction logic.
45
+ *
46
+ * @returns {string} The cleaned string with any configured PII replaced by `[REDACTED PII_TYPE]`.
47
+ */
48
+ export function _cleanStringPII(
49
+ value: string,
50
+ source: PIISource,
51
+ redactors: Redactor[],
52
+ ): string {
53
+ if (typeof value !== "string") {
54
+ return value;
55
+ }
56
+
57
+ let kind: "string" | "url" = "string";
58
+ let decodedValue: string = value;
59
+
60
+ if (_containsEncodedComponents(value)) {
61
+ decodedValue = decodeURIComponent(value);
62
+ kind = "url";
63
+ }
64
+ return redactors.reduce(
65
+ (redactedValue: string, currentRedactor): string =>
66
+ currentRedactor(redactedValue, source, kind),
67
+ decodedValue,
68
+ );
69
+ }
70
+
71
+ export function _recursiveObjectClean<T extends AnyValue>(
72
+ value: T,
73
+ source: PIISource,
74
+ redactors: Redactor[],
75
+ ): T {
76
+ if (typeof value === "string") {
77
+ return _cleanStringPII(value, source, redactors) as T;
78
+ }
79
+
80
+ if (
81
+ typeof value === "number" ||
82
+ typeof value === "boolean" ||
83
+ value == null
84
+ ) {
85
+ return value;
86
+ }
87
+
88
+ if (value instanceof Uint8Array) {
89
+ try {
90
+ const decoded = decoder.decode(value);
91
+ const sanitized = _cleanStringPII(decoded, source, redactors);
92
+ return encoder.encode(sanitized) as T;
93
+ } catch {
94
+ return value;
95
+ }
96
+ }
97
+
98
+ if (Array.isArray(value)) {
99
+ return value.map((value) =>
100
+ _recursiveObjectClean(value, source, redactors),
101
+ ) as T;
102
+ }
103
+
104
+ if (typeof value === "object") {
105
+ const sanitized: AnyValueMap = {};
106
+ for (const [key, val] of Object.entries(value)) {
107
+ sanitized[key] = _recursiveObjectClean(val, source, redactors);
108
+ }
109
+ return sanitized as T;
110
+ }
111
+
112
+ return value;
113
+ }
@@ -0,0 +1,58 @@
1
+ import { _getPIICounterRedactionMetric } from "../../shared-metrics.js";
2
+
3
+ const EMAIL_REGEX = /[\p{L}\p{N}._%+-]+@((?:[\p{L}\p{N}-]+\.)+[\p{L}]{2,})/giu;
4
+
5
+ /**
6
+ * Redacts all email addresses in the input string and collects metadata.
7
+ *
8
+ * @param {string} value The input string potentially containing email addresses.
9
+ * @returns {{
10
+ * redacted: string,
11
+ * count: number,
12
+ * domains: Record<string, number>
13
+ * }}
14
+ *
15
+ * An object containing:
16
+ * - `redacted`: the string with email addresses replaced by `[REDACTED EMAIL]`
17
+ * - `count`: total number of email addresses redacted
18
+ * - `domains`: a map of domain names to the number of times they were redacted
19
+ */
20
+ function _redactEmails(value: string): {
21
+ redacted: string;
22
+ count: number;
23
+ domains: Record<string, number>;
24
+ } {
25
+ let count = 0;
26
+ const domains: Record<string, number> = {};
27
+
28
+ const redacted = value.replace(EMAIL_REGEX, (_, domain) => {
29
+ count++;
30
+ domains[domain] = (domains[domain] || 0) + 1;
31
+ return "[REDACTED EMAIL]";
32
+ });
33
+
34
+ return { redacted, count, domains };
35
+ }
36
+
37
+ /**
38
+ * Redacts provided input and collects metadata metrics about redacted email domains,
39
+ * data source and kind.
40
+ *
41
+ * @param {string} value The input string potentially containing email addresses.
42
+ * @returns {string} the redacted value
43
+ */
44
+ export const emailRedactor = (value: string, source: string, kind: string) => {
45
+ const { redacted, count, domains } = _redactEmails(value);
46
+
47
+ if (count > 0) {
48
+ for (const [domain, domainCount] of Object.entries(domains)) {
49
+ _getPIICounterRedactionMetric().add(domainCount, {
50
+ pii_type: "email",
51
+ redaction_source: source,
52
+ pii_email_domain: domain,
53
+ pii_format: kind,
54
+ });
55
+ }
56
+ }
57
+ return redacted;
58
+ };
@@ -0,0 +1,12 @@
1
+ import { NodeSDKConfig } from "../../../index.js";
2
+ import { emailRedactor } from "./email.js";
3
+ import { ipRedactor } from "./ip.js";
4
+
5
+ export type Redactor = (value: string, source: string, kind: string) => string;
6
+
7
+ export type RedactorKeys = keyof NonNullable<NodeSDKConfig["detection"]>;
8
+
9
+ export const redactors: Record<RedactorKeys, Redactor> = {
10
+ email: emailRedactor,
11
+ ip: ipRedactor,
12
+ };
@@ -0,0 +1,68 @@
1
+ import { _getPIICounterRedactionMetric } from "../../shared-metrics.js";
2
+
3
+ // Generous IP address matchers (might match some invalid addresses like 192.168.01.1)
4
+ const IPV4_REGEX =
5
+ /(?<!\d)(?:%[0-9A-Fa-f]{2})?(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(?:%[0-9A-Fa-f]{2})?(?!\d)/gi;
6
+ const IPV6_REGEX =
7
+ /(?<![0-9a-f:])(?:%[0-9A-Fa-f]{2})?((?:[0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}|(?:[0-9A-Fa-f]{1,4}:){1,7}:|:(?::[0-9A-Fa-f]{1,4}){1,7}|(?:[0-9A-Fa-f]{1,4}:){1,6}:[0-9A-Fa-f]{1,4}|(?:[0-9A-Fa-f]{1,4}:){1,5}(?::[0-9A-Fa-f]{1,4}){1,2}|(?:[0-9A-Fa-f]{1,4}:){1,4}(?::[0-9A-Fa-f]{1,4}){1,3}|(?:[0-9A-Fa-f]{1,4}:){1,3}(?::[0-9A-Fa-f]{1,4}){1,4}|(?:[0-9A-Fa-f]{1,4}:){1,2}(?::[0-9A-Fa-f]{1,4}){1,5}|[0-9A-Fa-f]{1,4}:(?::[0-9A-Fa-f]{1,4}){1,6}|:(?::[0-9A-Fa-f]{1,4}){1,7}:?|(?:[0-9A-Fa-f]{1,4}:){1,4}:(?:25[0-5]|2[0-4]\d|1\d\d|\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|\d{1,2})){3})(?:%[0-9A-Fa-f]{2})?(?![0-9a-f:])/gi;
8
+
9
+ /**
10
+ * Redacts all ip addresses in the input string and collects metadata.
11
+ *
12
+ * @param {string} value The input string potentially containing ip addresses.
13
+ * @returns {{
14
+ * redacted: string,
15
+ * count: number,
16
+ * domains: Record<string, number>
17
+ * }}
18
+ *
19
+ * An object containing:
20
+ * - `redacted`: the string with IP addresses replaced by `[REDACTED IPV*]`
21
+ * - `counters`: total number of addresses redacted by IPv* type
22
+ * - `domains`: a map of domain names to the number of times they were redacted
23
+ */
24
+ function _redactIps(value: string): {
25
+ redacted: string;
26
+ counters: Record<string, number>;
27
+ } {
28
+ const counters: Record<string, number> = {};
29
+ const redacted = value
30
+ .replace(IPV4_REGEX, () => {
31
+ counters["IPv4"] = (counters["IPv4"] || 0) + 1;
32
+ return "[REDACTED IPV4]";
33
+ })
34
+ .replace(IPV6_REGEX, () => {
35
+ counters["IPv4"] = (counters["IPv4"] || 0) + 1;
36
+ return "[REDACTED IPV6]";
37
+ });
38
+ return { redacted, counters };
39
+ }
40
+
41
+ /**
42
+ * Redacts provided input and collects metadata metrics about redacted IPs,
43
+ * data source and kind.
44
+ *
45
+ * @param {string} value The input string potentially containing IP addresses.
46
+ * @param {string} source The source of the attribute being redacted (log, span, metric).
47
+ * @param {string} kind The type of the data structure containing the PII
48
+ * @returns {string} the redacted value
49
+ */
50
+ export const ipRedactor = (
51
+ value: string,
52
+ source: string,
53
+ kind: string,
54
+ ): string => {
55
+ const { redacted, counters } = _redactIps(value);
56
+
57
+ Object.entries(counters).forEach(([type, counter]) => {
58
+ if (counter > 0) {
59
+ _getPIICounterRedactionMetric().add(counter, {
60
+ pii_type: type,
61
+ redaction_source: source,
62
+ pii_format: kind,
63
+ });
64
+ }
65
+ });
66
+
67
+ return redacted;
68
+ };
@@ -1,6 +1,6 @@
1
1
  import { Attributes, Counter } from "@opentelemetry/api";
2
2
  import { getMetric } from "../metrics.js";
3
- import { PIISource } from "./pii-detection.js";
3
+ import { PIISource } from "./redaction/pii-detection.js";
4
4
 
5
5
  interface RedactionMetric extends Attributes {
6
6
  /** Type of PII redacted (e.g., "email", "phone"). */
@@ -1,4 +1,4 @@
1
- import { LogRecord, LogRecordProcessor } from "@opentelemetry/sdk-logs";
1
+ import { SdkLogRecord, LogRecordProcessor } from "@opentelemetry/sdk-logs";
2
2
  import { Context } from "@opentelemetry/api";
3
3
  import { SignalAttributeValue } from "../index.js";
4
4
 
@@ -18,7 +18,7 @@ export class EnrichLogProcessor implements LogRecordProcessor {
18
18
  forceFlush(): Promise<void> {
19
19
  return Promise.resolve();
20
20
  }
21
- onEmit(logRecord: LogRecord, _context?: Context): void {
21
+ onEmit(logRecord: SdkLogRecord, _context?: Context): void {
22
22
  if (this._spanAttributes) {
23
23
  for (const [key, value] of Object.entries(this._spanAttributes)) {
24
24
  logRecord.setAttribute(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ogcio/o11y-sdk-node",
3
- "version": "0.3.1",
3
+ "version": "0.4.1",
4
4
  "description": "Opentelemetry standard instrumentation SDK for NodeJS based project",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",
@@ -22,26 +22,26 @@
22
22
  "@grpc/grpc-js": "^1.13.4",
23
23
  "@opentelemetry/api": "^1.9.0",
24
24
  "@opentelemetry/api-logs": "^0.203.0",
25
- "@opentelemetry/auto-instrumentations-node": "^0.60.1",
25
+ "@opentelemetry/auto-instrumentations-node": "^0.62.1",
26
26
  "@opentelemetry/core": "^2.0.1",
27
- "@opentelemetry/exporter-logs-otlp-grpc": "^0.202.0",
28
- "@opentelemetry/exporter-logs-otlp-http": "^0.202.0",
29
- "@opentelemetry/exporter-metrics-otlp-grpc": "^0.202.0",
30
- "@opentelemetry/exporter-metrics-otlp-http": "^0.202.0",
31
- "@opentelemetry/exporter-trace-otlp-grpc": "^0.202.0",
32
- "@opentelemetry/exporter-trace-otlp-http": "^0.202.0",
33
- "@opentelemetry/instrumentation": "^0.202.0",
34
- "@opentelemetry/otlp-exporter-base": "^0.202.0",
27
+ "@opentelemetry/exporter-logs-otlp-grpc": "^0.203.0",
28
+ "@opentelemetry/exporter-logs-otlp-http": "^0.203.0",
29
+ "@opentelemetry/exporter-metrics-otlp-grpc": "^0.203.0",
30
+ "@opentelemetry/exporter-metrics-otlp-http": "^0.203.0",
31
+ "@opentelemetry/exporter-trace-otlp-grpc": "^0.203.0",
32
+ "@opentelemetry/exporter-trace-otlp-http": "^0.203.0",
33
+ "@opentelemetry/instrumentation": "^0.203.0",
34
+ "@opentelemetry/otlp-exporter-base": "^0.203.0",
35
35
  "@opentelemetry/resources": "^2.0.1",
36
- "@opentelemetry/sdk-logs": "^0.202.0",
36
+ "@opentelemetry/sdk-logs": "^0.203.0",
37
37
  "@opentelemetry/sdk-metrics": "^2.0.1",
38
- "@opentelemetry/sdk-node": "^0.202.0",
38
+ "@opentelemetry/sdk-node": "^0.203.0",
39
39
  "@opentelemetry/sdk-trace-base": "^2.0.1"
40
40
  },
41
41
  "devDependencies": {
42
- "@types/node": "^24.0.10",
42
+ "@types/node": "^24.3.0",
43
43
  "@vitest/coverage-v8": "^3.2.4",
44
- "tsx": "^4.20.3",
44
+ "tsx": "^4.20.5",
45
45
  "typescript": "^5.8.3",
46
46
  "vitest": "^3.2.4"
47
47
  },
@@ -1,9 +1,12 @@
1
1
  import { describe, expect, it, vi, beforeEach } from "vitest";
2
2
  import {
3
3
  _cleanStringPII,
4
- _cleanLogBodyPII,
5
- } from "../../lib/internals/pii-detection.js";
4
+ _containsEncodedComponents,
5
+ _recursiveObjectClean,
6
+ } from "../../lib/internals/redaction/pii-detection.js";
6
7
  import * as sharedMetrics from "../../lib/internals/shared-metrics.js";
8
+ import { emailRedactor } from "../../lib/internals/redaction/redactors/email";
9
+ import { ipRedactor } from "../../lib/internals/redaction/redactors/ip";
7
10
 
8
11
  describe("PII Detection Utils", () => {
9
12
  const mockMetricAdd = vi.fn();
@@ -16,9 +19,9 @@ describe("PII Detection Utils", () => {
16
19
  });
17
20
 
18
21
  describe("_cleanStringPII", () => {
19
- it("redacts plain email", () => {
22
+ it("redacts plain PII", () => {
20
23
  const input = "admin@example.com";
21
- const output = _cleanStringPII(input, "log");
24
+ const output = _cleanStringPII(input, "log", [emailRedactor]);
22
25
 
23
26
  expect(output).toBe("[REDACTED EMAIL]");
24
27
  expect(mockMetricAdd).toHaveBeenCalledWith(
@@ -33,7 +36,7 @@ describe("PII Detection Utils", () => {
33
36
 
34
37
  it("redacts email in URL-encoded string", () => {
35
38
  const input = "user%40gmail.com";
36
- const output = _cleanStringPII(input, "log");
39
+ const output = _cleanStringPII(input, "log", [emailRedactor]);
37
40
 
38
41
  expect(output).toBe("[REDACTED EMAIL]");
39
42
  expect(mockMetricAdd).toHaveBeenCalledWith(
@@ -45,36 +48,57 @@ describe("PII Detection Utils", () => {
45
48
  );
46
49
  });
47
50
 
48
- it("handles strings without email unchanged", () => {
49
- const input = "hello world";
50
- const output = _cleanStringPII(input, "log");
51
+ it("redacts ip in URL-encoded string", () => {
52
+ const input = "%20127.0.0.1";
53
+ const output = _cleanStringPII(input, "log", [ipRedactor]);
51
54
 
52
- expect(output).toBe("hello world");
53
- expect(mockMetricAdd).not.toHaveBeenCalled();
55
+ expect(output).toBe(" [REDACTED IPV4]");
56
+ expect(mockMetricAdd).toHaveBeenCalledWith(
57
+ 1,
58
+ expect.objectContaining({
59
+ pii_format: "url",
60
+ pii_type: "IPv4",
61
+ redaction_source: "log",
62
+ }),
63
+ );
54
64
  });
55
65
 
56
- it("handles array of strings", () => {
57
- const input = ["one@gmail.com", "two@example.com"];
58
- const output = _cleanStringPII(input, "log");
66
+ it("handles strings without PII unchanged", () => {
67
+ const input = "hello world";
68
+ const output = _cleanStringPII(input, "log", [emailRedactor]);
59
69
 
60
- expect(output).toEqual(["[REDACTED EMAIL]", "[REDACTED EMAIL]"]);
61
- expect(mockMetricAdd).toHaveBeenCalledTimes(2);
70
+ expect(output).toBe("hello world");
71
+ expect(mockMetricAdd).not.toHaveBeenCalled();
62
72
  });
63
73
 
64
74
  it("ignores non-string input", () => {
65
- expect(_cleanStringPII(1234, "trace")).toBe(1234);
66
- expect(_cleanStringPII(true, "trace")).toBe(true);
67
- expect(_cleanStringPII(undefined, "trace")).toBeUndefined();
75
+ // @ts-expect-error
76
+ expect(_cleanStringPII(1234, "trace", [emailRedactor])).toBe(1234);
77
+ // @ts-expect-error
78
+ expect(_cleanStringPII(true, "trace", [emailRedactor])).toBe(true);
79
+ expect(
80
+ _cleanStringPII(undefined, "trace", [emailRedactor]),
81
+ ).toBeUndefined();
68
82
  expect(mockMetricAdd).not.toHaveBeenCalled();
69
83
  });
70
84
  });
71
85
 
72
- describe("_cleanLogBodyPII", () => {
73
- it("cleans string email", () => {
74
- const result = _cleanLogBodyPII("demo@abc.com");
86
+ describe("_recursiveObjectClean", () => {
87
+ it("cleans string PII", () => {
88
+ const result = _recursiveObjectClean("demo@abc.com", "log", [
89
+ emailRedactor,
90
+ ]);
75
91
  expect(result).toBe("[REDACTED EMAIL]");
76
92
  });
77
93
 
94
+ it("cleans array of strings", () => {
95
+ const input = ["one@gmail.com", "two@example.com"];
96
+ const output = _recursiveObjectClean(input, "log", [emailRedactor]);
97
+
98
+ expect(output).toEqual(["[REDACTED EMAIL]", "[REDACTED EMAIL]"]);
99
+ expect(mockMetricAdd).toHaveBeenCalledTimes(2);
100
+ });
101
+
78
102
  it("cleans deeply nested object", () => {
79
103
  const input = {
80
104
  user: {
@@ -86,7 +110,7 @@ describe("PII Detection Utils", () => {
86
110
  status: "active",
87
111
  };
88
112
 
89
- const result = _cleanLogBodyPII(input);
113
+ const result = _recursiveObjectClean(input, "log", [emailRedactor]);
90
114
 
91
115
  expect(result).toEqual({
92
116
  user: {
@@ -102,7 +126,7 @@ describe("PII Detection Utils", () => {
102
126
  it("cleans Uint8Array input", () => {
103
127
  const str = "admin@gmail.com";
104
128
  const buffer = new TextEncoder().encode(str);
105
- const result = _cleanLogBodyPII(buffer);
129
+ const result = _recursiveObjectClean(buffer, "log", [emailRedactor]);
106
130
  const decoded = new TextDecoder().decode(result as Uint8Array);
107
131
 
108
132
  expect(decoded).toBe("[REDACTED EMAIL]");
@@ -110,7 +134,7 @@ describe("PII Detection Utils", () => {
110
134
 
111
135
  it("skips malformed Uint8Array decode", () => {
112
136
  const corrupted = new Uint8Array([0xff, 0xfe, 0xfd]);
113
- const result = _cleanLogBodyPII(corrupted);
137
+ const result = _recursiveObjectClean(corrupted, "log", [emailRedactor]);
114
138
 
115
139
  // Should return a Uint8Array, but unmodified/redaction should not happen
116
140
  expect(result).toBeInstanceOf(Uint8Array);
@@ -118,11 +142,11 @@ describe("PII Detection Utils", () => {
118
142
  });
119
143
 
120
144
  it("cleans arrays of values", () => {
121
- const result = _cleanLogBodyPII([
122
- "bob@abc.com",
123
- 123,
124
- { nested: "jane@example.com" },
125
- ]);
145
+ const result = _recursiveObjectClean(
146
+ ["bob@abc.com", 123, { nested: "jane@example.com" }],
147
+ "log",
148
+ [emailRedactor],
149
+ );
126
150
 
127
151
  expect(result).toEqual([
128
152
  "[REDACTED EMAIL]",
@@ -132,10 +156,110 @@ describe("PII Detection Utils", () => {
132
156
  });
133
157
 
134
158
  it("passes null and boolean through", () => {
135
- expect(_cleanLogBodyPII(null)).toBeNull();
136
- expect(_cleanLogBodyPII(undefined)).toBeUndefined();
137
- expect(_cleanLogBodyPII(true)).toBe(true);
138
- expect(_cleanLogBodyPII(false)).toBe(false);
159
+ expect(_recursiveObjectClean(null, "log", [emailRedactor])).toBeNull();
160
+ expect(
161
+ _recursiveObjectClean(undefined, "log", [emailRedactor]),
162
+ ).toBeUndefined();
163
+ expect(_recursiveObjectClean(true, "log", [emailRedactor])).toBe(true);
164
+ expect(_recursiveObjectClean(false, "log", [emailRedactor])).toBe(false);
165
+ });
166
+ });
167
+
168
+ describe("_containsEncodedComponents", () => {
169
+ describe("should return true for properly URL encoded strings", () => {
170
+ it.each([
171
+ ["hello%20world", "Space encoded as %20"],
172
+ ["test%2Bvalue", "Plus sign encoded as %2B"],
173
+ ["path%2Fto%2Ffile", "Forward slashes encoded"],
174
+ ["user%40domain.com", "@ symbol encoded"],
175
+ ["100%25%20off", "Percent and space encoded"],
176
+ ["a%3Db%26c%3Dd", "Query parameters (a=b&c=d)"],
177
+ ["caf%C3%A9", "UTF-8 encoded (café)"],
178
+ ["price%3A%20%2410", "Colon, space, dollar ($10)"],
179
+ ["%22quoted%22", "Double quotes encoded"],
180
+ ["https%3A%2F%2Fexample.com", "Full URL encoded"],
181
+ ["file%20name.txt", "filename encoded"],
182
+ ["search%3Fq%3Dhello%20world", "Query string encoded"],
183
+ ["%3C%3E%26%22%27", "HTML special chars encoded (<>&\"')"],
184
+ ["%E2%9C%93", "UTF-8 checkmark (✓) encoded"],
185
+ ["test%2b", "Lowercase hex"],
186
+ ["%E4%B8%AD%E6%96%87", "Chinese characters encoded"],
187
+ ])('should detect "%s" as URL encoded (%s)', (input, description) => {
188
+ expect(_containsEncodedComponents(input)).toBe(true);
189
+ });
190
+ });
191
+
192
+ describe("should return false for non-URL encoded strings", () => {
193
+ it.each([
194
+ ["test", "Simple ASCII string"],
195
+ ["hello world", "Unencoded space"],
196
+ ["user@domain.com", "Unencoded email"],
197
+ ["simple123", "Alphanumeric only"],
198
+ ["", "Empty string"],
199
+ ["25%%", "Literal percent signs"],
200
+ ["100% off", "Percent without hex digits"],
201
+ ["test%2", "Incomplete percent encoding"],
202
+ ["hello%ZZ", "Invalid hex digits"],
203
+ ["test%2G", "Invalid hex digit G"],
204
+ ["bad%encoding%here", "Percent without hex pairs"],
205
+ ["hello%20world and more", "Partially encoded string"],
206
+ ["hello%20world%21%20how%20are%20you", "Overly encoded string"],
207
+ ["café", "Unicode characters (unencoded)"],
208
+ ["hello+world", "Plus sign (form encoding style)"],
209
+ ["test%", "Trailing percent"],
210
+ ["hello%20world%", "Encoded content with trailing percent"],
211
+ ["%", "Single percent"],
212
+ ["%%", "Double percent"],
213
+ ["normal text with % symbols", "Text with percent but no encoding"],
214
+ ["price: $100%", "Currency with percent"],
215
+ ["file.txt", "Simple filename"],
216
+ ["path/to/file", "Unencoded path"],
217
+ ["query?param=value", "Unencoded query string"],
218
+ ["hello%world", "Percent without following hex"],
219
+ ["test%1", "Single hex digit after percent"],
220
+ ["hello%zz", "Non-hex characters after percent"],
221
+ ])('should not detect "%s" as URL encoded (%s)', (input, description) => {
222
+ expect(_containsEncodedComponents(input)).toBe(false);
223
+ });
224
+ });
225
+
226
+ describe("Error handling", () => {
227
+ it.each([
228
+ ["%C0%80", "Overlong UTF-8 encoding (security concern)"],
229
+ ["%ED%A0%80", "UTF-8 surrogate (invalid)"],
230
+ ["%FF%FE", "Invalid UTF-8 sequence"],
231
+ ["test%20%ZZ", "Mix of valid and invalid encoding"],
232
+ ])('should handle "%s" (%s)', (input, description) => {
233
+ // These should not throw errors
234
+ expect(() => _containsEncodedComponents(input)).not.toThrow();
235
+
236
+ // Most of these should return false due to invalid sequences
237
+ // or security-related encoding issues
238
+ const result = _containsEncodedComponents(input);
239
+ expect(typeof result).toBe("boolean");
240
+ });
241
+ });
242
+
243
+ describe("real-world url examples", () => {
244
+ it.each([
245
+ [
246
+ "https%3A%2F%2Fgoogle.com%2Fsearch%3Fq%3Djavascript",
247
+ "Encoded Google search URL",
248
+ ],
249
+ ["The%20quick%20brown%20fox", "Sentence with spaces encoded"],
250
+ [
251
+ "/api/user?body=here%20are%20all%20my%20secrets",
252
+ "contextually encoded API path",
253
+ ],
254
+ ["redirect_uri=https%3A%2F%2Fapp.com%2Fcallback", "OAuth redirect URI"],
255
+ ["data%3Atext%2Fplain%3Bbase64%2CSGVsbG8%3D", "Data URL encoded"],
256
+ ])('real-world case: "%s" (%s)', (input, description) => {
257
+ const result = _containsEncodedComponents(input);
258
+
259
+ // Verify the function doesn't crash
260
+ expect(typeof result).toBe("boolean");
261
+ expect(result).toBe(true);
262
+ });
139
263
  });
140
264
  });
141
265
  });
@@ -0,0 +1,81 @@
1
+ import {
2
+ describe,
3
+ expect,
4
+ it,
5
+ vi,
6
+ beforeEach,
7
+ beforeAll,
8
+ afterAll,
9
+ } from "vitest";
10
+
11
+ import * as sharedMetrics from "../../../lib/internals/shared-metrics.js";
12
+ import { ipRedactor } from "../../../lib/internals/redaction/redactors/ip";
13
+ import { emailRedactor } from "../../../lib/internals/redaction/redactors/email";
14
+
15
+ describe("Email Redaction utils", () => {
16
+ describe("tracks metrics", () => {
17
+ const mockMetricAdd = vi.fn();
18
+
19
+ beforeEach(() => {
20
+ vi.restoreAllMocks();
21
+ vi.spyOn(sharedMetrics, "_getPIICounterRedactionMetric").mockReturnValue({
22
+ add: mockMetricAdd,
23
+ });
24
+ });
25
+
26
+ it("redacts plain PII and tracks redaction with metric", () => {
27
+ const input = "admin@example.com";
28
+ const output = emailRedactor(input, "log", "string");
29
+
30
+ expect(output).toBe("[REDACTED EMAIL]");
31
+ expect(mockMetricAdd).toHaveBeenCalledWith(
32
+ 1,
33
+ expect.objectContaining({
34
+ pii_email_domain: "example.com",
35
+ pii_type: "email",
36
+ redaction_source: "log",
37
+ }),
38
+ );
39
+ });
40
+
41
+ it("handles strings without PII unchanged", () => {
42
+ const input = "hello world";
43
+ const output = ipRedactor(input, "log", "string");
44
+
45
+ expect(output).toBe("hello world");
46
+ expect(mockMetricAdd).not.toHaveBeenCalled();
47
+ });
48
+ });
49
+
50
+ describe("Redacts email addresses", () => {
51
+ beforeAll(() => {
52
+ vi.spyOn(sharedMetrics, "_getPIICounterRedactionMetric").mockReturnValue({
53
+ add: vi.fn(),
54
+ });
55
+ });
56
+
57
+ afterAll(() => {
58
+ vi.restoreAllMocks();
59
+ });
60
+
61
+ it.each`
62
+ value | expectedRedactedValue
63
+ ${"user+tag@example.com"} | ${"[REDACTED EMAIL]"}
64
+ ${"user.name+tag+sorting@example.com"} | ${"[REDACTED EMAIL]"}
65
+ ${"x@example.museum"} | ${"[REDACTED EMAIL]"}
66
+ ${"a.b-c_d@example.co.uk"} | ${"[REDACTED EMAIL]"}
67
+ ${"üser@example.de"} | ${"[REDACTED EMAIL]"}
68
+ ${"john.doe@xn--exmple-cua.com"} | ${"[REDACTED EMAIL]"}
69
+ ${"üser@example.de"} | ${"[REDACTED EMAIL]"}
70
+ ${"plainaddress"} | ${"plainaddress"}
71
+ ${"@missinglocal.org"} | ${"@missinglocal.org"}
72
+ ${"user@invalid_domain.com"} | ${"user@invalid_domain.com"}
73
+ `(
74
+ "returns $expectedRedactedValue for value '$value'",
75
+ async ({ value, expectedRedactedValue }: Record<string, string>) => {
76
+ const result = emailRedactor(value, "log", "string");
77
+ expect(result).toBe(expectedRedactedValue);
78
+ },
79
+ );
80
+ });
81
+ });