@elanlanguages/bridge-anonymization 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -1
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -1
- package/dist/crypto/pii-map-crypto.js +8 -8
- package/dist/crypto/pii-map-crypto.js.map +1 -1
- package/dist/index.d.ts +25 -20
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +103 -52
- package/dist/index.js.map +1 -1
- package/dist/ner/model-manager.d.ts.map +1 -1
- package/dist/ner/model-manager.js +10 -8
- package/dist/ner/model-manager.js.map +1 -1
- package/dist/ner/ner-model.d.ts.map +1 -1
- package/dist/ner/ner-model.js +10 -10
- package/dist/ner/ner-model.js.map +1 -1
- package/dist/ner/onnx-runtime.d.ts +3 -3
- package/dist/ner/onnx-runtime.d.ts.map +1 -1
- package/dist/ner/onnx-runtime.js +1 -1
- package/dist/ner/onnx-runtime.js.map +1 -1
- package/dist/ner/tokenizer.d.ts +26 -53
- package/dist/ner/tokenizer.d.ts.map +1 -1
- package/dist/ner/tokenizer.js +174 -196
- package/dist/ner/tokenizer.js.map +1 -1
- package/dist/pipeline/index.d.ts +7 -4
- package/dist/pipeline/index.d.ts.map +1 -1
- package/dist/pipeline/index.js +7 -4
- package/dist/pipeline/index.js.map +1 -1
- package/dist/pipeline/resolver.d.ts.map +1 -1
- package/dist/pipeline/resolver.js +3 -2
- package/dist/pipeline/resolver.js.map +1 -1
- package/dist/pipeline/semantic-data-loader.d.ts +157 -0
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
- package/dist/pipeline/semantic-data-loader.js +662 -0
- package/dist/pipeline/semantic-data-loader.js.map +1 -0
- package/dist/pipeline/semantic-enricher.d.ts +102 -0
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
- package/dist/pipeline/semantic-enricher.js +268 -0
- package/dist/pipeline/semantic-enricher.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +52 -12
- package/dist/pipeline/tagger.d.ts.map +1 -1
- package/dist/pipeline/tagger.js +226 -21
- package/dist/pipeline/tagger.js.map +1 -1
- package/dist/pipeline/title-extractor.d.ts +79 -0
- package/dist/pipeline/title-extractor.d.ts.map +1 -0
- package/dist/pipeline/title-extractor.js +801 -0
- package/dist/pipeline/title-extractor.js.map +1 -0
- package/dist/types/index.d.ts +66 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +14 -3
- package/dist/types/index.js.map +1 -1
- package/dist/utils/index.d.ts +3 -3
- package/dist/utils/index.js +3 -3
- package/package.json +7 -5
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Data Loader
|
|
3
|
+
* Handles automatic downloading, caching, and parsing of semantic enrichment data.
|
|
4
|
+
*
|
|
5
|
+
* Data sources:
|
|
6
|
+
* - nam_dict.txt: Name-gender mappings from gender-guesser (~40K names)
|
|
7
|
+
* - cities15000.txt: GeoNames cities with population > 15,000 (~25K cities)
|
|
8
|
+
* - countryInfo.txt: Country names and codes (~250 countries)
|
|
9
|
+
* - admin1CodesASCII.txt: First-level admin divisions (~4K regions)
|
|
10
|
+
*
|
|
11
|
+
* Data is cached in the same location as NER models:
|
|
12
|
+
* - macOS: ~/Library/Caches/bridge-anonymization/semantic-data/
|
|
13
|
+
* - Linux: ~/.cache/bridge-anonymization/semantic-data/
|
|
14
|
+
* - Windows: %LOCALAPPDATA%/bridge-anonymization/semantic-data/
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Loaded name-gender data
|
|
18
|
+
*/
|
|
19
|
+
interface NameEntry {
|
|
20
|
+
gender: string;
|
|
21
|
+
localeOverrides?: Record<string, string>;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Loaded location data
|
|
25
|
+
*/
|
|
26
|
+
interface CityEntry {
|
|
27
|
+
country: string;
|
|
28
|
+
population: number;
|
|
29
|
+
}
|
|
30
|
+
interface RegionEntry {
|
|
31
|
+
country: string;
|
|
32
|
+
name: string;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Semantic data store
|
|
36
|
+
*/
|
|
37
|
+
interface SemanticData {
|
|
38
|
+
names: Map<string, NameEntry>;
|
|
39
|
+
cities: Map<string, CityEntry>;
|
|
40
|
+
countries: Map<string, string>;
|
|
41
|
+
regions: Map<string, RegionEntry>;
|
|
42
|
+
loaded: boolean;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Gets the cache directory for semantic data
|
|
46
|
+
* Uses platform-specific cache location (same as NER models)
|
|
47
|
+
*/
|
|
48
|
+
export declare function getSemanticDataCacheDir(): string;
|
|
49
|
+
/**
|
|
50
|
+
* Gets the path to the data directory (alias for backwards compatibility)
|
|
51
|
+
*/
|
|
52
|
+
export declare function getDataDirectory(): string;
|
|
53
|
+
/**
|
|
54
|
+
* Semantic data file info
|
|
55
|
+
*/
|
|
56
|
+
export interface SemanticDataFileInfo {
|
|
57
|
+
/** Filename */
|
|
58
|
+
filename: string;
|
|
59
|
+
/** Download URL */
|
|
60
|
+
url: string;
|
|
61
|
+
/** Whether file is required */
|
|
62
|
+
required: boolean;
|
|
63
|
+
/** Description */
|
|
64
|
+
description: string;
|
|
65
|
+
/** Approximate size */
|
|
66
|
+
size: string;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Registry of semantic data files and their download URLs
|
|
70
|
+
*/
|
|
71
|
+
export declare const SEMANTIC_DATA_FILES: SemanticDataFileInfo[];
|
|
72
|
+
/**
|
|
73
|
+
* Progress callback for downloads
|
|
74
|
+
*/
|
|
75
|
+
export type SemanticDownloadProgressCallback = (progress: {
|
|
76
|
+
file: string;
|
|
77
|
+
bytesDownloaded: number;
|
|
78
|
+
totalBytes: number | null;
|
|
79
|
+
percent: number | null;
|
|
80
|
+
}) => void;
|
|
81
|
+
/**
|
|
82
|
+
* Checks if semantic data is already downloaded
|
|
83
|
+
*/
|
|
84
|
+
export declare function isSemanticDataDownloaded(): Promise<boolean>;
|
|
85
|
+
/**
|
|
86
|
+
* Checks if the semantic data files are available (synchronous version)
|
|
87
|
+
*/
|
|
88
|
+
export declare function isSemanticDataAvailable(): boolean;
|
|
89
|
+
/**
|
|
90
|
+
* Downloads all semantic data files
|
|
91
|
+
*/
|
|
92
|
+
export declare function downloadSemanticData(onProgress?: SemanticDownloadProgressCallback, onStatus?: (status: string) => void): Promise<string>;
|
|
93
|
+
/**
|
|
94
|
+
* Ensures semantic data is available, downloading if needed
|
|
95
|
+
*/
|
|
96
|
+
export declare function ensureSemanticData(options?: {
|
|
97
|
+
autoDownload?: boolean;
|
|
98
|
+
onProgress?: SemanticDownloadProgressCallback;
|
|
99
|
+
onStatus?: (status: string) => void;
|
|
100
|
+
}): Promise<string>;
|
|
101
|
+
/**
|
|
102
|
+
* Clears cached semantic data
|
|
103
|
+
*/
|
|
104
|
+
export declare function clearSemanticDataCache(): Promise<void>;
|
|
105
|
+
/**
|
|
106
|
+
* Gets info about semantic data files
|
|
107
|
+
*/
|
|
108
|
+
export declare function getSemanticDataInfo(): {
|
|
109
|
+
files: SemanticDataFileInfo[];
|
|
110
|
+
cacheDir: string;
|
|
111
|
+
totalSize: string;
|
|
112
|
+
};
|
|
113
|
+
/**
|
|
114
|
+
* Initializes semantic data (downloads if needed, then loads)
|
|
115
|
+
*/
|
|
116
|
+
export declare function initializeSemanticData(options?: {
|
|
117
|
+
autoDownload?: boolean;
|
|
118
|
+
onProgress?: SemanticDownloadProgressCallback;
|
|
119
|
+
onStatus?: (status: string) => void;
|
|
120
|
+
}): Promise<void>;
|
|
121
|
+
/**
|
|
122
|
+
* Loads semantic data from cached files
|
|
123
|
+
* @throws Error if required data files are not available
|
|
124
|
+
*/
|
|
125
|
+
export declare function loadSemanticData(): SemanticData;
|
|
126
|
+
/**
|
|
127
|
+
* Gets the loaded semantic data (loads if not already loaded)
|
|
128
|
+
*/
|
|
129
|
+
export declare function getSemanticData(): SemanticData;
|
|
130
|
+
/**
|
|
131
|
+
* Clears the loaded semantic data (useful for testing)
|
|
132
|
+
*/
|
|
133
|
+
export declare function clearSemanticData(): void;
|
|
134
|
+
/**
|
|
135
|
+
* Looks up gender for a name
|
|
136
|
+
*/
|
|
137
|
+
export declare function lookupGender(name: string, locale?: string): string | undefined;
|
|
138
|
+
/**
|
|
139
|
+
* Looks up location type (city, country, or region)
|
|
140
|
+
* Priority: country > major city (pop > 500K) > region > other cities
|
|
141
|
+
*/
|
|
142
|
+
export declare function lookupLocationType(location: string): {
|
|
143
|
+
type: "city" | "country" | "region";
|
|
144
|
+
countryCode?: string;
|
|
145
|
+
} | undefined;
|
|
146
|
+
/**
|
|
147
|
+
* Gets statistics about loaded data
|
|
148
|
+
*/
|
|
149
|
+
export declare function getDataStats(): {
|
|
150
|
+
names: number;
|
|
151
|
+
cities: number;
|
|
152
|
+
countries: number;
|
|
153
|
+
regions: number;
|
|
154
|
+
loaded: boolean;
|
|
155
|
+
};
|
|
156
|
+
export {};
|
|
157
|
+
//# sourceMappingURL=semantic-data-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-data-loader.d.ts","sourceRoot":"","sources":["../../src/pipeline/semantic-data-loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AA+EH;;GAEG;AACH,UAAU,SAAS;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC1C;AAED;;GAEG;AACH,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,WAAW;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,UAAU,YAAY;IACpB,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC9B,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC/B,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IAClC,MAAM,EAAE,OAAO,CAAC;CACjB;AASD;;;GAGG;AACH,wBAAgB,uBAAuB,IAAI,MAAM,CA0BhD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAEzC;AAMD;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,eAAe;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,+BAA+B;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,kBAAkB;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,eAAO,MAAM,mBAAmB,EAAE,oBAAoB,EA6BrD,CAAC;AAMF;;GAEG;AACH,MAAM,MAAM,gCAAgC,GAAG,CAAC,QAAQ,EAAE;IACxD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB,KAAK,IAAI,CAAC;AAEX;;GAEG;AACH,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,OAAO,CAAC,CAcjE;AAED;;GAEG;AACH,wBAAgB,uBAAuB,IAAI,OAAO,CAYjD;AAoGD;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,UAAU,CAAC,EAAE,gCAAgC,EAC7C,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,GAClC,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,GAAE;IACP,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,gCAAgC,CAAC;IAC9C,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CAChC,GACL,OAAO,CAAC,MAAM,CAAC,CAyBjB;AAED;;GAEG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CAAC,IAAI,CAAC,CAK5D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI;IACrC,KAAK,EAAE,oBAAoB,EAAE,CAAC;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB,CAMA;AAsQD;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,OAAO,GAAE;IACP,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,gCAAgC,CAAC;IAC9C,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CAChC,GACL,OAAO,CAAC,IAAI,CAAC,CAMf;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,IAAI,YAAY,CA4B/C;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CAK9C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,IAAI,CAExC;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,GACd,MAAM,GAAG,SAAS,CAiBpB;AAOD;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,MAAM,GACf;IAAE,IAAI,EAAE,MAAM,GAAG,SAAS,GAAG,QAAQ,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CA4B3E;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;CACjB,CAYA"}
|