@elanlanguages/bridge-anonymization 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +73 -1
  2. package/dist/crypto/pii-map-crypto.d.ts.map +1 -1
  3. package/dist/crypto/pii-map-crypto.js +8 -8
  4. package/dist/crypto/pii-map-crypto.js.map +1 -1
  5. package/dist/index.d.ts +25 -20
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +103 -52
  8. package/dist/index.js.map +1 -1
  9. package/dist/ner/model-manager.d.ts.map +1 -1
  10. package/dist/ner/model-manager.js +10 -8
  11. package/dist/ner/model-manager.js.map +1 -1
  12. package/dist/ner/ner-model.d.ts.map +1 -1
  13. package/dist/ner/ner-model.js +10 -10
  14. package/dist/ner/ner-model.js.map +1 -1
  15. package/dist/ner/onnx-runtime.d.ts +3 -3
  16. package/dist/ner/onnx-runtime.d.ts.map +1 -1
  17. package/dist/ner/onnx-runtime.js +1 -1
  18. package/dist/ner/onnx-runtime.js.map +1 -1
  19. package/dist/ner/tokenizer.d.ts +26 -53
  20. package/dist/ner/tokenizer.d.ts.map +1 -1
  21. package/dist/ner/tokenizer.js +174 -196
  22. package/dist/ner/tokenizer.js.map +1 -1
  23. package/dist/pipeline/index.d.ts +7 -4
  24. package/dist/pipeline/index.d.ts.map +1 -1
  25. package/dist/pipeline/index.js +7 -4
  26. package/dist/pipeline/index.js.map +1 -1
  27. package/dist/pipeline/resolver.d.ts.map +1 -1
  28. package/dist/pipeline/resolver.js +3 -2
  29. package/dist/pipeline/resolver.js.map +1 -1
  30. package/dist/pipeline/semantic-data-loader.d.ts +157 -0
  31. package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
  32. package/dist/pipeline/semantic-data-loader.js +662 -0
  33. package/dist/pipeline/semantic-data-loader.js.map +1 -0
  34. package/dist/pipeline/semantic-enricher.d.ts +102 -0
  35. package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
  36. package/dist/pipeline/semantic-enricher.js +268 -0
  37. package/dist/pipeline/semantic-enricher.js.map +1 -0
  38. package/dist/pipeline/tagger.d.ts +52 -12
  39. package/dist/pipeline/tagger.d.ts.map +1 -1
  40. package/dist/pipeline/tagger.js +226 -21
  41. package/dist/pipeline/tagger.js.map +1 -1
  42. package/dist/pipeline/title-extractor.d.ts +79 -0
  43. package/dist/pipeline/title-extractor.d.ts.map +1 -0
  44. package/dist/pipeline/title-extractor.js +801 -0
  45. package/dist/pipeline/title-extractor.js.map +1 -0
  46. package/dist/types/index.d.ts +66 -3
  47. package/dist/types/index.d.ts.map +1 -1
  48. package/dist/types/index.js +14 -3
  49. package/dist/types/index.js.map +1 -1
  50. package/dist/utils/index.d.ts +3 -3
  51. package/dist/utils/index.js +3 -3
  52. package/package.json +7 -5
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Semantic Data Loader
3
+ * Handles automatic downloading, caching, and parsing of semantic enrichment data.
4
+ *
5
+ * Data sources:
6
+ * - nam_dict.txt: Name-gender mappings from gender-guesser (~40K names)
7
+ * - cities15000.txt: GeoNames cities with population > 15,000 (~25K cities)
8
+ * - countryInfo.txt: Country names and codes (~250 countries)
9
+ * - admin1CodesASCII.txt: First-level admin divisions (~4K regions)
10
+ *
11
+ * Data is cached in the same location as NER models:
12
+ * - macOS: ~/Library/Caches/bridge-anonymization/semantic-data/
13
+ * - Linux: ~/.cache/bridge-anonymization/semantic-data/
14
+ * - Windows: %LOCALAPPDATA%/bridge-anonymization/semantic-data/
15
+ */
16
+ /**
17
+ * Loaded name-gender data
18
+ */
19
+ interface NameEntry {
20
+ gender: string;
21
+ localeOverrides?: Record<string, string>;
22
+ }
23
+ /**
24
+ * Loaded location data
25
+ */
26
+ interface CityEntry {
27
+ country: string;
28
+ population: number;
29
+ }
30
+ interface RegionEntry {
31
+ country: string;
32
+ name: string;
33
+ }
34
+ /**
35
+ * Semantic data store
36
+ */
37
+ interface SemanticData {
38
+ names: Map<string, NameEntry>;
39
+ cities: Map<string, CityEntry>;
40
+ countries: Map<string, string>;
41
+ regions: Map<string, RegionEntry>;
42
+ loaded: boolean;
43
+ }
44
+ /**
45
+ * Gets the cache directory for semantic data
46
+ * Uses platform-specific cache location (same as NER models)
47
+ */
48
+ export declare function getSemanticDataCacheDir(): string;
49
+ /**
50
+ * Gets the path to the data directory (alias for backwards compatibility)
51
+ */
52
+ export declare function getDataDirectory(): string;
53
+ /**
54
+ * Semantic data file info
55
+ */
56
+ export interface SemanticDataFileInfo {
57
+ /** Filename */
58
+ filename: string;
59
+ /** Download URL */
60
+ url: string;
61
+ /** Whether file is required */
62
+ required: boolean;
63
+ /** Description */
64
+ description: string;
65
+ /** Approximate size */
66
+ size: string;
67
+ }
68
+ /**
69
+ * Registry of semantic data files and their download URLs
70
+ */
71
+ export declare const SEMANTIC_DATA_FILES: SemanticDataFileInfo[];
72
+ /**
73
+ * Progress callback for downloads
74
+ */
75
+ export type SemanticDownloadProgressCallback = (progress: {
76
+ file: string;
77
+ bytesDownloaded: number;
78
+ totalBytes: number | null;
79
+ percent: number | null;
80
+ }) => void;
81
+ /**
82
+ * Checks if semantic data is already downloaded
83
+ */
84
+ export declare function isSemanticDataDownloaded(): Promise<boolean>;
85
+ /**
86
+ * Checks if the semantic data files are available (synchronous version)
87
+ */
88
+ export declare function isSemanticDataAvailable(): boolean;
89
+ /**
90
+ * Downloads all semantic data files
91
+ */
92
+ export declare function downloadSemanticData(onProgress?: SemanticDownloadProgressCallback, onStatus?: (status: string) => void): Promise<string>;
93
+ /**
94
+ * Ensures semantic data is available, downloading if needed
95
+ */
96
+ export declare function ensureSemanticData(options?: {
97
+ autoDownload?: boolean;
98
+ onProgress?: SemanticDownloadProgressCallback;
99
+ onStatus?: (status: string) => void;
100
+ }): Promise<string>;
101
+ /**
102
+ * Clears cached semantic data
103
+ */
104
+ export declare function clearSemanticDataCache(): Promise<void>;
105
+ /**
106
+ * Gets info about semantic data files
107
+ */
108
+ export declare function getSemanticDataInfo(): {
109
+ files: SemanticDataFileInfo[];
110
+ cacheDir: string;
111
+ totalSize: string;
112
+ };
113
+ /**
114
+ * Initializes semantic data (downloads if needed, then loads)
115
+ */
116
+ export declare function initializeSemanticData(options?: {
117
+ autoDownload?: boolean;
118
+ onProgress?: SemanticDownloadProgressCallback;
119
+ onStatus?: (status: string) => void;
120
+ }): Promise<void>;
121
+ /**
122
+ * Loads semantic data from cached files
123
+ * @throws Error if required data files are not available
124
+ */
125
+ export declare function loadSemanticData(): SemanticData;
126
+ /**
127
+ * Gets the loaded semantic data (loads if not already loaded)
128
+ */
129
+ export declare function getSemanticData(): SemanticData;
130
+ /**
131
+ * Clears the loaded semantic data (useful for testing)
132
+ */
133
+ export declare function clearSemanticData(): void;
134
+ /**
135
+ * Looks up gender for a name
136
+ */
137
+ export declare function lookupGender(name: string, locale?: string): string | undefined;
138
+ /**
139
+ * Looks up location type (city, country, or region)
140
+ * Priority: country > major city (pop > 500K) > region > other cities
141
+ */
142
+ export declare function lookupLocationType(location: string): {
143
+ type: "city" | "country" | "region";
144
+ countryCode?: string;
145
+ } | undefined;
146
+ /**
147
+ * Gets statistics about loaded data
148
+ */
149
+ export declare function getDataStats(): {
150
+ names: number;
151
+ cities: number;
152
+ countries: number;
153
+ regions: number;
154
+ loaded: boolean;
155
+ };
156
+ export {};
157
+ //# sourceMappingURL=semantic-data-loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-data-loader.d.ts","sourceRoot":"","sources":["../../src/pipeline/semantic-data-loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AA+EH;;GAEG;AACH,UAAU,SAAS;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC1C;AAED;;GAEG;AACH,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,WAAW;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,UAAU,YAAY;IACpB,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC9B,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC/B,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IAClC,MAAM,EAAE,OAAO,CAAC;CACjB;AASD;;;GAGG;AACH,wBAAgB,uBAAuB,IAAI,MAAM,CA0BhD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAEzC;AAMD;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,eAAe;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,+BAA+B;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,kBAAkB;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,eAAO,MAAM,mBAAmB,EAAE,oBAAoB,EA6BrD,CAAC;AAMF;;GAEG;AACH,MAAM,MAAM,gCAAgC,GAAG,CAAC,QAAQ,EAAE;IACxD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB,KAAK,IAAI,CAAC;AAEX;;GAEG;AACH,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,OAAO,CAAC,CAcjE;AAED;;GAEG;AACH,wBAAgB,uBAAuB,IAAI,OAAO,CAYjD;AAoGD;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,UAAU,CAAC,EAAE,gCAAgC,EAC7C,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,GAClC,OAAO,CAAC,MAAM,CAAC,CAuCjB;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,GAAE;IACP,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,gCAAgC,CAAC;IAC9C,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CAChC,GACL,OAAO,CAAC,MAAM,CAAC,CAyBjB;AAED;;GAEG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CAAC,IAAI,CAAC,CAK5D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI;IACrC,KAAK,EAAE,oBAAoB,EAAE,CAAC;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB,CAMA;AAsQD;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,OAAO,GAAE;IACP,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,gCAAgC,CAAC;IAC9C,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CAChC,GACL,OAAO,CAAC,IAAI,CAAC,CAMf;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,IAAI,YAAY,CA4B/C;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CAK9C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,IAAI,CAExC;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,GACd,MAAM,GAAG,SAAS,CAiBpB;AAOD;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,MAAM,GACf;IAAE,IAAI,EAAE,MAAM,GAAG,SAAS,GAAG,QAAQ,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CA4B3E;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;CACjB,CAYA"}