@elanlanguages/bridge-anonymization 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +382 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +100 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +163 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +173 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +294 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +102 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +253 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +240 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +45 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +99 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +140 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +341 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +9 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +9 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +238 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +74 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +169 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +134 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +69 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,WAAW,CAAC;AAC1B,cAAc,oBAAoB,CAAC;AACnC,cAAc,cAAc,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Luhn Algorithm (Mod 10) Implementation
|
|
3
|
+
* Used for validating credit card numbers and other identifier checksums
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Validates a number string using the Luhn algorithm
|
|
7
|
+
* @param input - String of digits (spaces and dashes are stripped)
|
|
8
|
+
* @returns true if the checksum is valid
|
|
9
|
+
*/
|
|
10
|
+
export declare function validateLuhn(input: string): boolean;
|
|
11
|
+
/**
|
|
12
|
+
* Calculates the Luhn check digit for a partial number
|
|
13
|
+
* @param partialNumber - String of digits without the check digit
|
|
14
|
+
* @returns The check digit (0-9)
|
|
15
|
+
*/
|
|
16
|
+
export declare function calculateLuhnCheckDigit(partialNumber: string): number;
|
|
17
|
+
//# sourceMappingURL=luhn.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"luhn.d.ts","sourceRoot":"","sources":["../../src/utils/luhn.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CA2BnD;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,CAsBrE"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Luhn Algorithm (Mod 10) Implementation
|
|
3
|
+
* Used for validating credit card numbers and other identifier checksums
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Validates a number string using the Luhn algorithm
|
|
7
|
+
* @param input - String of digits (spaces and dashes are stripped)
|
|
8
|
+
* @returns true if the checksum is valid
|
|
9
|
+
*/
|
|
10
|
+
export function validateLuhn(input) {
|
|
11
|
+
// Remove spaces, dashes, and other non-digit characters
|
|
12
|
+
const digits = input.replace(/\D/g, '');
|
|
13
|
+
if (digits.length === 0) {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
let sum = 0;
|
|
17
|
+
let isEven = false;
|
|
18
|
+
// Process digits from right to left
|
|
19
|
+
for (let i = digits.length - 1; i >= 0; i--) {
|
|
20
|
+
let digit = parseInt(digits[i], 10);
|
|
21
|
+
if (isEven) {
|
|
22
|
+
digit *= 2;
|
|
23
|
+
if (digit > 9) {
|
|
24
|
+
digit -= 9;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
sum += digit;
|
|
28
|
+
isEven = !isEven;
|
|
29
|
+
}
|
|
30
|
+
return sum % 10 === 0;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Calculates the Luhn check digit for a partial number
|
|
34
|
+
* @param partialNumber - String of digits without the check digit
|
|
35
|
+
* @returns The check digit (0-9)
|
|
36
|
+
*/
|
|
37
|
+
export function calculateLuhnCheckDigit(partialNumber) {
|
|
38
|
+
const digits = partialNumber.replace(/\D/g, '');
|
|
39
|
+
let sum = 0;
|
|
40
|
+
let isEven = true; // Start with true since we're adding a digit
|
|
41
|
+
// Process digits from right to left
|
|
42
|
+
for (let i = digits.length - 1; i >= 0; i--) {
|
|
43
|
+
let digit = parseInt(digits[i], 10);
|
|
44
|
+
if (isEven) {
|
|
45
|
+
digit *= 2;
|
|
46
|
+
if (digit > 9) {
|
|
47
|
+
digit -= 9;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
sum += digit;
|
|
51
|
+
isEven = !isEven;
|
|
52
|
+
}
|
|
53
|
+
return (10 - (sum % 10)) % 10;
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=luhn.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"luhn.js","sourceRoot":"","sources":["../../src/utils/luhn.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,wDAAwD;IACxD,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAExC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,oCAAoC;IACpC,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,IAAI,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC;QAErC,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,IAAI,CAAC,CAAC;YACX,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,KAAK,IAAI,CAAC,CAAC;YACb,CAAC;QACH,CAAC;QAED,GAAG,IAAI,KAAK,CAAC;QACb,MAAM,GAAG,CAAC,MAAM,CAAC;IACnB,CAAC;IAED,OAAO,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;AACxB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CAAC,aAAqB;IAC3D,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAEhD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,MAAM,GAAG,IAAI,CAAC,CAAC,6CAA6C;IAEhE,oCAAoC;IACpC,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,IAAI,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC;QAErC,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,IAAI,CAAC,CAAC;YACX,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,KAAK,IAAI,CAAC,CAAC;YACb,CAAC;QACH,CAAC;QAED,GAAG,IAAI,KAAK,CAAC;QACb,MAAM,GAAG,CAAC,MAAM,CAAC;IACnB,CAAC;IAED,OAAO,CAAC,EAAE,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC;AAChC,CAAC"}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character Offset Utilities
|
|
3
|
+
* Handles character offset calculations for text manipulation
|
|
4
|
+
*/
|
|
5
|
+
import type { SpanMatch } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Checks if two spans overlap
|
|
8
|
+
*/
|
|
9
|
+
export declare function spansOverlap(a: {
|
|
10
|
+
start: number;
|
|
11
|
+
end: number;
|
|
12
|
+
}, b: {
|
|
13
|
+
start: number;
|
|
14
|
+
end: number;
|
|
15
|
+
}): boolean;
|
|
16
|
+
/**
|
|
17
|
+
* Checks if span A contains span B
|
|
18
|
+
*/
|
|
19
|
+
export declare function spanContains(outer: {
|
|
20
|
+
start: number;
|
|
21
|
+
end: number;
|
|
22
|
+
}, inner: {
|
|
23
|
+
start: number;
|
|
24
|
+
end: number;
|
|
25
|
+
}): boolean;
|
|
26
|
+
/**
|
|
27
|
+
* Gets the length of a span
|
|
28
|
+
*/
|
|
29
|
+
export declare function spanLength(span: {
|
|
30
|
+
start: number;
|
|
31
|
+
end: number;
|
|
32
|
+
}): number;
|
|
33
|
+
/**
|
|
34
|
+
* Extracts text for a span from the original text
|
|
35
|
+
*/
|
|
36
|
+
export declare function getSpanText(text: string, span: {
|
|
37
|
+
start: number;
|
|
38
|
+
end: number;
|
|
39
|
+
}): string;
|
|
40
|
+
/**
|
|
41
|
+
* Sorts spans by start position (ascending) then by length (descending)
|
|
42
|
+
*/
|
|
43
|
+
export declare function sortSpansByPosition<T extends {
|
|
44
|
+
start: number;
|
|
45
|
+
end: number;
|
|
46
|
+
}>(spans: T[]): T[];
|
|
47
|
+
/**
|
|
48
|
+
* Sorts spans by start position descending (for replacement operations)
|
|
49
|
+
*/
|
|
50
|
+
export declare function sortSpansByPositionDescending<T extends {
|
|
51
|
+
start: number;
|
|
52
|
+
end: number;
|
|
53
|
+
}>(spans: T[]): T[];
|
|
54
|
+
/**
|
|
55
|
+
* Removes overlapping spans, keeping the preferred ones based on a comparator
|
|
56
|
+
* @param spans - Array of spans to deduplicate
|
|
57
|
+
* @param prefer - Comparator that returns positive if 'a' should be preferred over 'b'
|
|
58
|
+
*/
|
|
59
|
+
export declare function removeOverlappingSpans<T extends SpanMatch>(spans: T[], prefer: (a: T, b: T) => number): T[];
|
|
60
|
+
/**
|
|
61
|
+
* Validates that spans don't overlap (for final validation)
|
|
62
|
+
*/
|
|
63
|
+
export declare function validateNoOverlaps(spans: {
|
|
64
|
+
start: number;
|
|
65
|
+
end: number;
|
|
66
|
+
}[]): boolean;
|
|
67
|
+
/**
|
|
68
|
+
* Calculates offset adjustments for replacements
|
|
69
|
+
* Used when you need to map positions between original and modified text
|
|
70
|
+
*/
|
|
71
|
+
export interface OffsetAdjustment {
|
|
72
|
+
originalStart: number;
|
|
73
|
+
originalEnd: number;
|
|
74
|
+
newStart: number;
|
|
75
|
+
newEnd: number;
|
|
76
|
+
delta: number;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Builds a list of offset adjustments from replacements
|
|
80
|
+
*/
|
|
81
|
+
export declare function buildOffsetAdjustments(replacements: Array<{
|
|
82
|
+
start: number;
|
|
83
|
+
end: number;
|
|
84
|
+
replacement: string;
|
|
85
|
+
}>): OffsetAdjustment[];
|
|
86
|
+
//# sourceMappingURL=offsets.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"offsets.d.ts","sourceRoot":"","sources":["../../src/utils/offsets.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEnD;;GAEG;AACH,wBAAgB,YAAY,CAAC,CAAC,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,CAAC,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAE1G;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,KAAK,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAElH;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAEvE;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAEtF;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,CAAC,SAAS;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAQ7F;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAAC,CAAC,SAAS;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAEvG;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,CAAC,SAAS,SAAS,EACxD,KAAK,EAAE,CAAC,EAAE,EACV,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,MAAM,GAC7B,CAAC,EAAE,CA0CL;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,GAAG,OAAO,CAanF;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,YAAY,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,CAAC,GACvE,gBAAgB,EAAE,CAsBpB"}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character Offset Utilities
|
|
3
|
+
* Handles character offset calculations for text manipulation
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Checks if two spans overlap
|
|
7
|
+
*/
|
|
8
|
+
export function spansOverlap(a, b) {
|
|
9
|
+
return a.start < b.end && b.start < a.end;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Checks if span A contains span B
|
|
13
|
+
*/
|
|
14
|
+
export function spanContains(outer, inner) {
|
|
15
|
+
return outer.start <= inner.start && outer.end >= inner.end;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Gets the length of a span
|
|
19
|
+
*/
|
|
20
|
+
export function spanLength(span) {
|
|
21
|
+
return span.end - span.start;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Extracts text for a span from the original text
|
|
25
|
+
*/
|
|
26
|
+
export function getSpanText(text, span) {
|
|
27
|
+
return text.slice(span.start, span.end);
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Sorts spans by start position (ascending) then by length (descending)
|
|
31
|
+
*/
|
|
32
|
+
export function sortSpansByPosition(spans) {
|
|
33
|
+
return [...spans].sort((a, b) => {
|
|
34
|
+
if (a.start !== b.start) {
|
|
35
|
+
return a.start - b.start;
|
|
36
|
+
}
|
|
37
|
+
// For same start, longer spans first
|
|
38
|
+
return spanLength(b) - spanLength(a);
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Sorts spans by start position descending (for replacement operations)
|
|
43
|
+
*/
|
|
44
|
+
export function sortSpansByPositionDescending(spans) {
|
|
45
|
+
return [...spans].sort((a, b) => b.start - a.start);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Removes overlapping spans, keeping the preferred ones based on a comparator
|
|
49
|
+
* @param spans - Array of spans to deduplicate
|
|
50
|
+
* @param prefer - Comparator that returns positive if 'a' should be preferred over 'b'
|
|
51
|
+
*/
|
|
52
|
+
export function removeOverlappingSpans(spans, prefer) {
|
|
53
|
+
if (spans.length === 0) {
|
|
54
|
+
return [];
|
|
55
|
+
}
|
|
56
|
+
// Sort by start position
|
|
57
|
+
const sorted = sortSpansByPosition(spans);
|
|
58
|
+
const result = [];
|
|
59
|
+
for (const span of sorted) {
|
|
60
|
+
// Check if this span overlaps with any already-selected span
|
|
61
|
+
let shouldAdd = true;
|
|
62
|
+
let indexToRemove = null;
|
|
63
|
+
for (let i = 0; i < result.length; i++) {
|
|
64
|
+
const existing = result[i];
|
|
65
|
+
if (spansOverlap(span, existing)) {
|
|
66
|
+
// Determine which to keep
|
|
67
|
+
const preference = prefer(span, existing);
|
|
68
|
+
if (preference > 0) {
|
|
69
|
+
// New span is preferred, mark old one for removal
|
|
70
|
+
indexToRemove = i;
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
// Existing span is preferred, don't add new one
|
|
74
|
+
shouldAdd = false;
|
|
75
|
+
}
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
if (indexToRemove !== null) {
|
|
80
|
+
result.splice(indexToRemove, 1);
|
|
81
|
+
}
|
|
82
|
+
if (shouldAdd) {
|
|
83
|
+
result.push(span);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return sortSpansByPosition(result);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Validates that spans don't overlap (for final validation)
|
|
90
|
+
*/
|
|
91
|
+
export function validateNoOverlaps(spans) {
|
|
92
|
+
const sorted = sortSpansByPosition(spans);
|
|
93
|
+
for (let i = 0; i < sorted.length - 1; i++) {
|
|
94
|
+
const current = sorted[i];
|
|
95
|
+
const next = sorted[i + 1];
|
|
96
|
+
if (current.end > next.start) {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Builds a list of offset adjustments from replacements
|
|
104
|
+
*/
|
|
105
|
+
export function buildOffsetAdjustments(replacements) {
|
|
106
|
+
const sorted = sortSpansByPosition(replacements);
|
|
107
|
+
const adjustments = [];
|
|
108
|
+
let cumulativeDelta = 0;
|
|
109
|
+
for (const rep of sorted) {
|
|
110
|
+
const originalLength = rep.end - rep.start;
|
|
111
|
+
const newLength = rep.replacement.length;
|
|
112
|
+
const delta = newLength - originalLength;
|
|
113
|
+
adjustments.push({
|
|
114
|
+
originalStart: rep.start,
|
|
115
|
+
originalEnd: rep.end,
|
|
116
|
+
newStart: rep.start + cumulativeDelta,
|
|
117
|
+
newEnd: rep.start + cumulativeDelta + newLength,
|
|
118
|
+
delta,
|
|
119
|
+
});
|
|
120
|
+
cumulativeDelta += delta;
|
|
121
|
+
}
|
|
122
|
+
return adjustments;
|
|
123
|
+
}
|
|
124
|
+
//# sourceMappingURL=offsets.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"offsets.js","sourceRoot":"","sources":["../../src/utils/offsets.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,CAAiC,EAAE,CAAiC;IAC/F,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC;AAC5C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,KAAqC,EAAE,KAAqC;IACvG,OAAO,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,IAAI,KAAK,CAAC,GAAG,CAAC;AAC9D,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAoC;IAC7D,OAAO,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY,EAAE,IAAoC;IAC5E,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAA2C,KAAU;IACtF,OAAO,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC9B,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC;YACxB,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;QAC3B,CAAC;QACD,qCAAqC;QACrC,OAAO,UAAU,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,6BAA6B,CAA2C,KAAU;IAChG,OAAO,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AACtD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CACpC,KAAU,EACV,MAA8B;IAE9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,yBAAyB;IACzB,MAAM,MAAM,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAQ,EAAE,CAAC;IAEvB,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC1B,6DAA6D;QAC7D,IAAI,SAAS,GAAG,IAAI,CAAC;QACrB,IAAI,aAAa,GAAkB,IAAI,CAAC;QAExC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;YAE5B,IAAI,YAAY,CAAC,IAAI,EAAE,QAAQ,CAAC,EAAE,CAAC;gBACjC,0BAA0B;gBAC1B,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;gBAE1C,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;oBACnB,kDAAkD;oBAClD,aAAa,GAAG,CAAC,CAAC;gBACpB,CAAC;qBAAM,CAAC;oBACN,gDAAgD;oBAChD,SAAS,GAAG,KAAK,CAAC;gBACpB,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAC3B,MAAM,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAuC;IACxE,MAAM,MAAM,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC;QAE5B,IAAI,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAcD;;GAEG;AACH,MAAM,UAAU,sBAAsB,CACpC,YAAwE;IAExE,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IACjD,MAAM,WAAW,GAAuB,EAAE,CAAC;IAC3C,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,MAAM,cAAc,GAAG,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC;QAC3C,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAC,MAAM,CAAC;QACzC,MAAM,KAAK,GAAG,SAAS,GAAG,cAAc,CAAC;QAEzC,WAAW,CAAC,IAAI,CAAC;YACf,aAAa,EAAE,GAAG,CAAC,KAAK;YACxB,WAAW,EAAE,GAAG,CAAC,GAAG;YACpB,QAAQ,EAAE,GAAG,CAAC,KAAK,GAAG,eAAe;YACrC,MAAM,EAAE,GAAG,CAAC,KAAK,GAAG,eAAe,GAAG,SAAS;YAC/C,KAAK;SACN,CAAC,CAAC;QAEH,eAAe,IAAI,KAAK,CAAC;IAC3B,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@elanlanguages/bridge-anonymization",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "On-device PII anonymization module for high-privacy translation",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"files": [
|
|
9
|
+
"dist",
|
|
10
|
+
"README.md"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"build": "tsc",
|
|
14
|
+
"test": "vitest",
|
|
15
|
+
"test:run": "vitest run",
|
|
16
|
+
"lint": "eslint src --ext .ts",
|
|
17
|
+
"clean": "rm -rf dist",
|
|
18
|
+
"setup:ner": "bash scripts/setup-ner-model.sh",
|
|
19
|
+
"setup:ner:quantized": "bash scripts/setup-ner-model.sh --quantize",
|
|
20
|
+
"prepublishOnly": "npm run clean && npm run build"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [
|
|
23
|
+
"anonymization",
|
|
24
|
+
"pii",
|
|
25
|
+
"privacy",
|
|
26
|
+
"ner",
|
|
27
|
+
"onnx"
|
|
28
|
+
],
|
|
29
|
+
"author": "",
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "git+https://github.com/elanlanguages/bridge-anonymization.git"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {},
|
|
36
|
+
"optionalDependencies": {
|
|
37
|
+
"onnxruntime-node": "^1.17.0"
|
|
38
|
+
},
|
|
39
|
+
"peerDependencies": {
|
|
40
|
+
"onnxruntime-node": "^1.17.0",
|
|
41
|
+
"onnxruntime-web": "^1.17.0"
|
|
42
|
+
},
|
|
43
|
+
"peerDependenciesMeta": {
|
|
44
|
+
"onnxruntime-node": {
|
|
45
|
+
"optional": true
|
|
46
|
+
},
|
|
47
|
+
"onnxruntime-web": {
|
|
48
|
+
"optional": true
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"devDependencies": {
|
|
52
|
+
"@types/node": "^20.10.0",
|
|
53
|
+
"typescript": "^5.3.0",
|
|
54
|
+
"vitest": "^1.0.0",
|
|
55
|
+
"eslint": "^8.55.0",
|
|
56
|
+
"@typescript-eslint/eslint-plugin": "^6.13.0",
|
|
57
|
+
"@typescript-eslint/parser": "^6.13.0"
|
|
58
|
+
},
|
|
59
|
+
"engines": {
|
|
60
|
+
"node": ">=18.0.0"
|
|
61
|
+
}
|
|
62
|
+
}
|