catalist-support-agent 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/admin-portal.d.ts +43 -0
- package/dist/admin-portal.d.ts.map +1 -0
- package/dist/admin-portal.js +166 -0
- package/dist/admin-portal.js.map +1 -0
- package/dist/analysis/entities.d.ts +73 -0
- package/dist/analysis/entities.d.ts.map +1 -0
- package/dist/analysis/entities.js +378 -0
- package/dist/analysis/entities.js.map +1 -0
- package/dist/analysis/index.d.ts +44 -0
- package/dist/analysis/index.d.ts.map +1 -0
- package/dist/analysis/index.js +243 -0
- package/dist/analysis/index.js.map +1 -0
- package/dist/analysis/intent.d.ts +49 -0
- package/dist/analysis/intent.d.ts.map +1 -0
- package/dist/analysis/intent.js +320 -0
- package/dist/analysis/intent.js.map +1 -0
- package/dist/analysis/sentiment.d.ts +57 -0
- package/dist/analysis/sentiment.d.ts.map +1 -0
- package/dist/analysis/sentiment.js +351 -0
- package/dist/analysis/sentiment.js.map +1 -0
- package/dist/brand/compliance.d.ts +122 -0
- package/dist/brand/compliance.d.ts.map +1 -0
- package/dist/brand/compliance.js +378 -0
- package/dist/brand/compliance.js.map +1 -0
- package/dist/brand/forbidden-terms.d.ts +99 -0
- package/dist/brand/forbidden-terms.d.ts.map +1 -0
- package/dist/brand/forbidden-terms.js +265 -0
- package/dist/brand/forbidden-terms.js.map +1 -0
- package/dist/brand/index.d.ts +10 -0
- package/dist/brand/index.d.ts.map +1 -0
- package/dist/brand/index.js +12 -0
- package/dist/brand/index.js.map +1 -0
- package/dist/config.d.ts +325 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +492 -0
- package/dist/config.js.map +1 -0
- package/dist/delivery/index.d.ts +84 -0
- package/dist/delivery/index.d.ts.map +1 -0
- package/dist/delivery/index.js +435 -0
- package/dist/delivery/index.js.map +1 -0
- package/dist/embeddings/cache.d.ts +96 -0
- package/dist/embeddings/cache.d.ts.map +1 -0
- package/dist/embeddings/cache.js +193 -0
- package/dist/embeddings/cache.js.map +1 -0
- package/dist/embeddings/index.d.ts +152 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +337 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/openai-client.d.ts +67 -0
- package/dist/embeddings/openai-client.d.ts.map +1 -0
- package/dist/embeddings/openai-client.js +190 -0
- package/dist/embeddings/openai-client.js.map +1 -0
- package/dist/errors.d.ts +302 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +508 -0
- package/dist/errors.js.map +1 -0
- package/dist/escalation/index.d.ts +93 -0
- package/dist/escalation/index.d.ts.map +1 -0
- package/dist/escalation/index.js +436 -0
- package/dist/escalation/index.js.map +1 -0
- package/dist/extraction/deduplication.d.ts +97 -0
- package/dist/extraction/deduplication.d.ts.map +1 -0
- package/dist/extraction/deduplication.js +271 -0
- package/dist/extraction/deduplication.js.map +1 -0
- package/dist/extraction/gmail-extractor.d.ts +160 -0
- package/dist/extraction/gmail-extractor.d.ts.map +1 -0
- package/dist/extraction/gmail-extractor.js +396 -0
- package/dist/extraction/gmail-extractor.js.map +1 -0
- package/dist/extraction/gmail-token-manager.d.ts +36 -0
- package/dist/extraction/gmail-token-manager.d.ts.map +1 -0
- package/dist/extraction/gmail-token-manager.js +146 -0
- package/dist/extraction/gmail-token-manager.js.map +1 -0
- package/dist/extraction/index.d.ts +13 -0
- package/dist/extraction/index.d.ts.map +1 -0
- package/dist/extraction/index.js +20 -0
- package/dist/extraction/index.js.map +1 -0
- package/dist/extraction/pii-handler.d.ts +100 -0
- package/dist/extraction/pii-handler.d.ts.map +1 -0
- package/dist/extraction/pii-handler.js +295 -0
- package/dist/extraction/pii-handler.js.map +1 -0
- package/dist/extraction/pipeline.d.ts +94 -0
- package/dist/extraction/pipeline.d.ts.map +1 -0
- package/dist/extraction/pipeline.js +380 -0
- package/dist/extraction/pipeline.js.map +1 -0
- package/dist/extraction/quality-filter.d.ts +99 -0
- package/dist/extraction/quality-filter.d.ts.map +1 -0
- package/dist/extraction/quality-filter.js +370 -0
- package/dist/extraction/quality-filter.js.map +1 -0
- package/dist/extraction/rate-limiter.d.ts +90 -0
- package/dist/extraction/rate-limiter.d.ts.map +1 -0
- package/dist/extraction/rate-limiter.js +242 -0
- package/dist/extraction/rate-limiter.js.map +1 -0
- package/dist/extraction/state-manager.d.ts +126 -0
- package/dist/extraction/state-manager.d.ts.map +1 -0
- package/dist/extraction/state-manager.js +344 -0
- package/dist/extraction/state-manager.js.map +1 -0
- package/dist/generation/index.d.ts +75 -0
- package/dist/generation/index.d.ts.map +1 -0
- package/dist/generation/index.js +641 -0
- package/dist/generation/index.js.map +1 -0
- package/dist/index.d.ts +96 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +233 -0
- package/dist/index.js.map +1 -0
- package/dist/intake/index.d.ts +15 -0
- package/dist/intake/index.d.ts.map +1 -0
- package/dist/intake/index.js +19 -0
- package/dist/intake/index.js.map +1 -0
- package/dist/intake/normalizer.d.ts +163 -0
- package/dist/intake/normalizer.d.ts.map +1 -0
- package/dist/intake/normalizer.js +309 -0
- package/dist/intake/normalizer.js.map +1 -0
- package/dist/intake/postmark.d.ts +72 -0
- package/dist/intake/postmark.d.ts.map +1 -0
- package/dist/intake/postmark.js +276 -0
- package/dist/intake/postmark.js.map +1 -0
- package/dist/intake/slack.d.ts +106 -0
- package/dist/intake/slack.d.ts.map +1 -0
- package/dist/intake/slack.js +378 -0
- package/dist/intake/slack.js.map +1 -0
- package/dist/intake/twilio.d.ts +86 -0
- package/dist/intake/twilio.d.ts.map +1 -0
- package/dist/intake/twilio.js +283 -0
- package/dist/intake/twilio.js.map +1 -0
- package/dist/knowledge/index.d.ts +100 -0
- package/dist/knowledge/index.d.ts.map +1 -0
- package/dist/knowledge/index.js +516 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/knowledge/invoice-resolver.d.ts +62 -0
- package/dist/knowledge/invoice-resolver.d.ts.map +1 -0
- package/dist/knowledge/invoice-resolver.js +267 -0
- package/dist/knowledge/invoice-resolver.js.map +1 -0
- package/dist/types.d.ts +535 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +48 -0
- package/dist/types.js.map +1 -0
- package/ga-service-account.json +13 -0
- package/gmail-knowledge-migration.sql +149 -0
- package/nul +1 -0
- package/package.json +55 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PII Handler Module
|
|
3
|
+
*
|
|
4
|
+
* Detects and redacts personally identifiable information (PII) from email content.
|
|
5
|
+
* Supports email addresses, phone numbers, SSN, credit cards, and physical addresses.
|
|
6
|
+
*/
|
|
7
|
+
export type PIIType = 'email' | 'phone' | 'ssn' | 'credit_card' | 'address' | 'date_of_birth' | 'ip_address' | 'bank_account';
|
|
8
|
+
export interface PIIMatch {
|
|
9
|
+
type: PIIType;
|
|
10
|
+
value: string;
|
|
11
|
+
start: number;
|
|
12
|
+
end: number;
|
|
13
|
+
replacement: string;
|
|
14
|
+
}
|
|
15
|
+
export interface PIIResult {
|
|
16
|
+
text: string;
|
|
17
|
+
redacted: boolean;
|
|
18
|
+
piiTypesFound: PIIType[];
|
|
19
|
+
matches: PIIMatch[];
|
|
20
|
+
}
|
|
21
|
+
export interface PIIHandlerConfig {
|
|
22
|
+
enabled: boolean;
|
|
23
|
+
preserveCompanyEmails?: string[];
|
|
24
|
+
redactPatterns?: {
|
|
25
|
+
email?: boolean;
|
|
26
|
+
phone?: boolean;
|
|
27
|
+
ssn?: boolean;
|
|
28
|
+
creditCard?: boolean;
|
|
29
|
+
address?: boolean;
|
|
30
|
+
dateOfBirth?: boolean;
|
|
31
|
+
ipAddress?: boolean;
|
|
32
|
+
bankAccount?: boolean;
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
export declare class PIIHandler {
|
|
36
|
+
private config;
|
|
37
|
+
private preserveEmailDomains;
|
|
38
|
+
constructor(config?: Partial<PIIHandlerConfig>);
|
|
39
|
+
/**
|
|
40
|
+
* Detect PII in text without redaction
|
|
41
|
+
*/
|
|
42
|
+
detect(text: string): PIIMatch[];
|
|
43
|
+
/**
|
|
44
|
+
* Redact PII from text
|
|
45
|
+
*/
|
|
46
|
+
redact(text: string): PIIResult;
|
|
47
|
+
/**
|
|
48
|
+
* Check if text contains any PII
|
|
49
|
+
*/
|
|
50
|
+
containsPII(text: string): boolean;
|
|
51
|
+
/**
|
|
52
|
+
* Get PII types found in text
|
|
53
|
+
*/
|
|
54
|
+
getPIITypes(text: string): PIIType[];
|
|
55
|
+
/**
|
|
56
|
+
* Check if an email should be preserved (company email)
|
|
57
|
+
*/
|
|
58
|
+
private shouldPreserveEmail;
|
|
59
|
+
/**
|
|
60
|
+
* Validate a match to reduce false positives
|
|
61
|
+
*/
|
|
62
|
+
private validateMatch;
|
|
63
|
+
/**
|
|
64
|
+
* Luhn algorithm for credit card validation
|
|
65
|
+
*/
|
|
66
|
+
private luhnCheck;
|
|
67
|
+
/**
|
|
68
|
+
* Get the config key for a PII type
|
|
69
|
+
*/
|
|
70
|
+
private getPatternKey;
|
|
71
|
+
/**
|
|
72
|
+
* Update configuration
|
|
73
|
+
*/
|
|
74
|
+
updateConfig(config: Partial<PIIHandlerConfig>): void;
|
|
75
|
+
/**
|
|
76
|
+
* Get current configuration
|
|
77
|
+
*/
|
|
78
|
+
getConfig(): PIIHandlerConfig;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Get singleton PII handler instance
|
|
82
|
+
*/
|
|
83
|
+
export declare function getPIIHandler(): PIIHandler;
|
|
84
|
+
/**
|
|
85
|
+
* Reset the singleton (for testing)
|
|
86
|
+
*/
|
|
87
|
+
export declare function resetPIIHandler(): void;
|
|
88
|
+
/**
|
|
89
|
+
* Quick redaction function using singleton
|
|
90
|
+
*/
|
|
91
|
+
export declare function redactPII(text: string): PIIResult;
|
|
92
|
+
/**
|
|
93
|
+
* Quick detection function using singleton
|
|
94
|
+
*/
|
|
95
|
+
export declare function detectPII(text: string): PIIMatch[];
|
|
96
|
+
/**
|
|
97
|
+
* Check if text contains PII
|
|
98
|
+
*/
|
|
99
|
+
export declare function hasPII(text: string): boolean;
|
|
100
|
+
//# sourceMappingURL=pii-handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pii-handler.d.ts","sourceRoot":"","sources":["../../src/extraction/pii-handler.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,MAAM,OAAO,GACf,OAAO,GACP,OAAO,GACP,KAAK,GACL,aAAa,GACb,SAAS,GACT,eAAe,GACf,YAAY,GACZ,cAAc,CAAC;AAEnB,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,OAAO,EAAE,CAAC;IACzB,OAAO,EAAE,QAAQ,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,OAAO,CAAC;IACjB,qBAAqB,CAAC,EAAE,MAAM,EAAE,CAAC;IACjC,cAAc,CAAC,EAAE;QACf,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,GAAG,CAAC,EAAE,OAAO,CAAC;QACd,UAAU,CAAC,EAAE,OAAO,CAAC;QACrB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,SAAS,CAAC,EAAE,OAAO,CAAC;QACpB,WAAW,CAAC,EAAE,OAAO,CAAC;KACvB,CAAC;CACH;AAqDD,qBAAa,UAAU;IACrB,OAAO,CAAC,MAAM,CAAmB;IACjC,OAAO,CAAC,oBAAoB,CAAc;gBAE9B,MAAM,GAAE,OAAO,CAAC,gBAAgB,CAAM;IAuBlD;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IA+ChC;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS;IAyC/B;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAIlC;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,EAAE;IAKpC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAY3B;;OAEG;IACH,OAAO,CAAC,aAAa;IA6BrB;;OAEG;IACH,OAAO,CAAC,SAAS;IAuBjB;;OAEG;IACH,OAAO,CAAC,aAAa;IAiBrB;;OAEG;IACH,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,gBAAgB,CAAC,GAAG,IAAI;IAUrD;;OAEG;IACH,SAAS,IAAI,gBAAgB;CAG9B;AAQD;;GAEG;AACH,wBAAgB,aAAa,IAAI,UAAU,CAQ1C;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,IAAI,CAEtC;AAMD;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAEjD;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE,CAElD;AAED;;GAEG;AACH,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAE5C"}
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PII Handler Module
|
|
3
|
+
*
|
|
4
|
+
* Detects and redacts personally identifiable information (PII) from email content.
|
|
5
|
+
* Supports email addresses, phone numbers, SSN, credit cards, and physical addresses.
|
|
6
|
+
*/
|
|
7
|
+
// =============================================================================
|
|
8
|
+
// PII Patterns
|
|
9
|
+
// =============================================================================
|
|
10
|
+
const PII_PATTERNS = {
|
|
11
|
+
// Email addresses (RFC 5322 simplified)
|
|
12
|
+
email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/gi,
|
|
13
|
+
// Phone numbers (US formats: XXX-XXX-XXXX, (XXX) XXX-XXXX, XXXXXXXXXX, +1XXXXXXXXXX)
|
|
14
|
+
phone: /(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}(?!\d)/g,
|
|
15
|
+
// Social Security Numbers (XXX-XX-XXXX)
|
|
16
|
+
ssn: /\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b/g,
|
|
17
|
+
// Credit Card Numbers (16 digits with optional separators)
|
|
18
|
+
credit_card: /\b(?:\d{4}[-\s]?){3}\d{4}\b|\b\d{15,16}\b/g,
|
|
19
|
+
// US Street Addresses (simplified - number + street + type)
|
|
20
|
+
address: /\b\d{1,5}\s+(?:[A-Za-z]+\s+){1,4}(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Drive|Dr|Lane|Ln|Court|Ct|Way|Place|Pl|Circle|Cir|Highway|Hwy)\.?\s*(?:#?\s*\d+)?(?:,?\s*(?:Apt|Suite|Unit|Ste)\.?\s*#?\s*\d+)?\b/gi,
|
|
21
|
+
// Date of Birth patterns (MM/DD/YYYY, YYYY-MM-DD, etc.)
|
|
22
|
+
date_of_birth: /\b(?:0?[1-9]|1[0-2])[-\/](?:0?[1-9]|[12]\d|3[01])[-\/](?:19|20)\d{2}\b|\b(?:19|20)\d{2}[-\/](?:0?[1-9]|1[0-2])[-\/](?:0?[1-9]|[12]\d|3[01])\b/g,
|
|
23
|
+
// IP Addresses (IPv4)
|
|
24
|
+
ip_address: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g,
|
|
25
|
+
// Bank Account Numbers (8-17 digits)
|
|
26
|
+
bank_account: /\b(?:account\s*#?:?\s*|acct\s*#?:?\s*)(\d{8,17})\b/gi,
|
|
27
|
+
};
|
|
28
|
+
// Replacement placeholders
|
|
29
|
+
const PII_REPLACEMENTS = {
|
|
30
|
+
email: '[EMAIL_REDACTED]',
|
|
31
|
+
phone: '[PHONE_REDACTED]',
|
|
32
|
+
ssn: '[SSN_REDACTED]',
|
|
33
|
+
credit_card: '[CC_REDACTED]',
|
|
34
|
+
address: '[ADDRESS_REDACTED]',
|
|
35
|
+
date_of_birth: '[DOB_REDACTED]',
|
|
36
|
+
ip_address: '[IP_REDACTED]',
|
|
37
|
+
bank_account: '[ACCOUNT_REDACTED]',
|
|
38
|
+
};
|
|
39
|
+
// =============================================================================
|
|
40
|
+
// PII Handler Class
|
|
41
|
+
// =============================================================================
|
|
42
|
+
export class PIIHandler {
|
|
43
|
+
config;
|
|
44
|
+
preserveEmailDomains;
|
|
45
|
+
constructor(config = {}) {
|
|
46
|
+
this.config = {
|
|
47
|
+
enabled: true,
|
|
48
|
+
preserveCompanyEmails: [],
|
|
49
|
+
redactPatterns: {
|
|
50
|
+
email: true,
|
|
51
|
+
phone: true,
|
|
52
|
+
ssn: true,
|
|
53
|
+
creditCard: true,
|
|
54
|
+
address: true,
|
|
55
|
+
dateOfBirth: true,
|
|
56
|
+
ipAddress: false, // Often needed for debugging
|
|
57
|
+
bankAccount: true,
|
|
58
|
+
},
|
|
59
|
+
...config,
|
|
60
|
+
};
|
|
61
|
+
// Build set of domains to preserve (lowercase)
|
|
62
|
+
this.preserveEmailDomains = new Set((this.config.preserveCompanyEmails || []).map((d) => d.toLowerCase()));
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Detect PII in text without redaction
|
|
66
|
+
*/
|
|
67
|
+
detect(text) {
|
|
68
|
+
if (!this.config.enabled || !text) {
|
|
69
|
+
return [];
|
|
70
|
+
}
|
|
71
|
+
const matches = [];
|
|
72
|
+
for (const [type, pattern] of Object.entries(PII_PATTERNS)) {
|
|
73
|
+
// Check if this pattern type should be processed
|
|
74
|
+
const patternKey = this.getPatternKey(type);
|
|
75
|
+
if (!this.config.redactPatterns?.[patternKey]) {
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
// Reset regex state
|
|
79
|
+
pattern.lastIndex = 0;
|
|
80
|
+
let match;
|
|
81
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
82
|
+
const value = match[0];
|
|
83
|
+
const start = match.index;
|
|
84
|
+
const end = start + value.length;
|
|
85
|
+
// Special handling for emails - check if should preserve
|
|
86
|
+
if (type === 'email' && this.shouldPreserveEmail(value)) {
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
// Validate match (reduce false positives)
|
|
90
|
+
if (!this.validateMatch(type, value)) {
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
matches.push({
|
|
94
|
+
type,
|
|
95
|
+
value,
|
|
96
|
+
start,
|
|
97
|
+
end,
|
|
98
|
+
replacement: PII_REPLACEMENTS[type],
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// Sort by position (descending) for safe replacement
|
|
103
|
+
return matches.sort((a, b) => b.start - a.start);
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Redact PII from text
|
|
107
|
+
*/
|
|
108
|
+
redact(text) {
|
|
109
|
+
if (!this.config.enabled || !text) {
|
|
110
|
+
return {
|
|
111
|
+
text,
|
|
112
|
+
redacted: false,
|
|
113
|
+
piiTypesFound: [],
|
|
114
|
+
matches: [],
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
const matches = this.detect(text);
|
|
118
|
+
if (matches.length === 0) {
|
|
119
|
+
return {
|
|
120
|
+
text,
|
|
121
|
+
redacted: false,
|
|
122
|
+
piiTypesFound: [],
|
|
123
|
+
matches: [],
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
// Apply replacements (matches are sorted descending by position)
|
|
127
|
+
let redactedText = text;
|
|
128
|
+
for (const match of matches) {
|
|
129
|
+
redactedText =
|
|
130
|
+
redactedText.substring(0, match.start) +
|
|
131
|
+
match.replacement +
|
|
132
|
+
redactedText.substring(match.end);
|
|
133
|
+
}
|
|
134
|
+
// Get unique PII types
|
|
135
|
+
const piiTypesFound = [...new Set(matches.map((m) => m.type))];
|
|
136
|
+
return {
|
|
137
|
+
text: redactedText,
|
|
138
|
+
redacted: true,
|
|
139
|
+
piiTypesFound,
|
|
140
|
+
matches,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Check if text contains any PII
|
|
145
|
+
*/
|
|
146
|
+
containsPII(text) {
|
|
147
|
+
return this.detect(text).length > 0;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Get PII types found in text
|
|
151
|
+
*/
|
|
152
|
+
getPIITypes(text) {
|
|
153
|
+
const matches = this.detect(text);
|
|
154
|
+
return [...new Set(matches.map((m) => m.type))];
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Check if an email should be preserved (company email)
|
|
158
|
+
*/
|
|
159
|
+
shouldPreserveEmail(email) {
|
|
160
|
+
const lowerEmail = email.toLowerCase();
|
|
161
|
+
for (const domain of this.preserveEmailDomains) {
|
|
162
|
+
if (lowerEmail.endsWith(domain)) {
|
|
163
|
+
return true;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return false;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Validate a match to reduce false positives
|
|
170
|
+
*/
|
|
171
|
+
validateMatch(type, value) {
|
|
172
|
+
switch (type) {
|
|
173
|
+
case 'phone':
|
|
174
|
+
// Must have at least 10 digits
|
|
175
|
+
const digits = value.replace(/\D/g, '');
|
|
176
|
+
return digits.length >= 10 && digits.length <= 11;
|
|
177
|
+
case 'ssn':
|
|
178
|
+
// SSN cannot start with 9, 666, or 000
|
|
179
|
+
const ssnDigits = value.replace(/\D/g, '');
|
|
180
|
+
if (ssnDigits.length !== 9)
|
|
181
|
+
return false;
|
|
182
|
+
const areaNumber = parseInt(ssnDigits.substring(0, 3), 10);
|
|
183
|
+
if (areaNumber === 0 || areaNumber === 666 || areaNumber >= 900)
|
|
184
|
+
return false;
|
|
185
|
+
return true;
|
|
186
|
+
case 'credit_card':
|
|
187
|
+
// Validate using Luhn algorithm
|
|
188
|
+
const ccDigits = value.replace(/\D/g, '');
|
|
189
|
+
return ccDigits.length >= 15 && ccDigits.length <= 16 && this.luhnCheck(ccDigits);
|
|
190
|
+
case 'bank_account':
|
|
191
|
+
// Must be preceded by account keyword
|
|
192
|
+
return /account|acct/i.test(value);
|
|
193
|
+
default:
|
|
194
|
+
return true;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Luhn algorithm for credit card validation
|
|
199
|
+
*/
|
|
200
|
+
luhnCheck(cardNumber) {
|
|
201
|
+
let sum = 0;
|
|
202
|
+
let isEven = false;
|
|
203
|
+
for (let i = cardNumber.length - 1; i >= 0; i--) {
|
|
204
|
+
const char = cardNumber[i];
|
|
205
|
+
if (!char)
|
|
206
|
+
continue;
|
|
207
|
+
let digit = parseInt(char, 10);
|
|
208
|
+
if (isEven) {
|
|
209
|
+
digit *= 2;
|
|
210
|
+
if (digit > 9) {
|
|
211
|
+
digit -= 9;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
sum += digit;
|
|
215
|
+
isEven = !isEven;
|
|
216
|
+
}
|
|
217
|
+
return sum % 10 === 0;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Get the config key for a PII type
|
|
221
|
+
*/
|
|
222
|
+
getPatternKey(type) {
|
|
223
|
+
switch (type) {
|
|
224
|
+
case 'credit_card':
|
|
225
|
+
return 'creditCard';
|
|
226
|
+
case 'date_of_birth':
|
|
227
|
+
return 'dateOfBirth';
|
|
228
|
+
case 'ip_address':
|
|
229
|
+
return 'ipAddress';
|
|
230
|
+
case 'bank_account':
|
|
231
|
+
return 'bankAccount';
|
|
232
|
+
default:
|
|
233
|
+
return type;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Update configuration
|
|
238
|
+
*/
|
|
239
|
+
updateConfig(config) {
|
|
240
|
+
this.config = { ...this.config, ...config };
|
|
241
|
+
if (config.preserveCompanyEmails) {
|
|
242
|
+
this.preserveEmailDomains = new Set(config.preserveCompanyEmails.map((d) => d.toLowerCase()));
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Get current configuration
|
|
247
|
+
*/
|
|
248
|
+
getConfig() {
|
|
249
|
+
return { ...this.config };
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// =============================================================================
|
|
253
|
+
// Singleton Instance
|
|
254
|
+
// =============================================================================
|
|
255
|
+
let piiHandlerInstance = null;
|
|
256
|
+
/**
|
|
257
|
+
* Get singleton PII handler instance
|
|
258
|
+
*/
|
|
259
|
+
export function getPIIHandler() {
|
|
260
|
+
if (!piiHandlerInstance) {
|
|
261
|
+
piiHandlerInstance = new PIIHandler({
|
|
262
|
+
enabled: process.env.PII_REDACTION_ENABLED !== 'false',
|
|
263
|
+
preserveCompanyEmails: ['@catalistgroup.co', '@catalist.deals'],
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
return piiHandlerInstance;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Reset the singleton (for testing)
|
|
270
|
+
*/
|
|
271
|
+
export function resetPIIHandler() {
|
|
272
|
+
piiHandlerInstance = null;
|
|
273
|
+
}
|
|
274
|
+
// =============================================================================
|
|
275
|
+
// Utility Functions
|
|
276
|
+
// =============================================================================
|
|
277
|
+
/**
|
|
278
|
+
* Quick redaction function using singleton
|
|
279
|
+
*/
|
|
280
|
+
export function redactPII(text) {
|
|
281
|
+
return getPIIHandler().redact(text);
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Quick detection function using singleton
|
|
285
|
+
*/
|
|
286
|
+
export function detectPII(text) {
|
|
287
|
+
return getPIIHandler().detect(text);
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Check if text contains PII
|
|
291
|
+
*/
|
|
292
|
+
export function hasPII(text) {
|
|
293
|
+
return getPIIHandler().containsPII(text);
|
|
294
|
+
}
|
|
295
|
+
//# sourceMappingURL=pii-handler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pii-handler.js","sourceRoot":"","sources":["../../src/extraction/pii-handler.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA8CH,gFAAgF;AAChF,eAAe;AACf,gFAAgF;AAEhF,MAAM,YAAY,GAA4B;IAC5C,wCAAwC;IACxC,KAAK,EAAE,kDAAkD;IAEzD,qFAAqF;IACrF,KAAK,EACH,iEAAiE;IAEnE,wCAAwC;IACxC,GAAG,EAAE,kCAAkC;IAEvC,2DAA2D;IAC3D,WAAW,EACT,4CAA4C;IAE9C,4DAA4D;IAC5D,OAAO,EACL,qNAAqN;IAEvN,wDAAwD;IACxD,aAAa,EACX,gJAAgJ;IAElJ,sBAAsB;IACtB,UAAU,EACR,8EAA8E;IAEhF,qCAAqC;IACrC,YAAY,EAAE,sDAAsD;CACrE,CAAC;AAEF,2BAA2B;AAC3B,MAAM,gBAAgB,GAA4B;IAChD,KAAK,EAAE,kBAAkB;IACzB,KAAK,EAAE,kBAAkB;IACzB,GAAG,EAAE,gBAAgB;IACrB,WAAW,EAAE,eAAe;IAC5B,OAAO,EAAE,oBAAoB;IAC7B,aAAa,EAAE,gBAAgB;IAC/B,UAAU,EAAE,eAAe;IAC3B,YAAY,EAAE,oBAAoB;CACnC,CAAC;AAEF,gFAAgF;AAChF,oBAAoB;AACpB,gFAAgF;AAEhF,MAAM,OAAO,UAAU;IACb,MAAM,CAAmB;IACzB,oBAAoB,CAAc;IAE1C,YAAY,SAAoC,EAAE;QAChD,IAAI,CAAC,MAAM,GAAG;YACZ,OAAO,EAAE,IAAI;YACb,qBAAqB,EAAE,EAAE;YACzB,cAAc,EAAE;gBACd,KAAK,EAAE,IAAI;gBACX,KAAK,EAAE,IAAI;gBACX,GAAG,EAAE,IAAI;gBACT,UAAU,EAAE,IAAI;gBAChB,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,IAAI;gBACjB,SAAS,EAAE,KAAK,EAAE,6BAA6B;gBAC/C,WAAW,EAAE,IAAI;aAClB;YACD,GAAG,MAAM;SACV,CAAC;QAEF,+CAA+C;QAC/C,IAAI,CAAC,oBAAoB,GAAG,IAAI,GAAG,CACjC,CAAC,IAAI,CAAC,MAAM,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACtE,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,IAAY;QACjB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;YAClC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAwB,EAAE,CAAC;YAClF,iDAAiD;YACjD,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;YAC5C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC9C,SAAS;YACX,CAAC;YAED,oBAAoB;YACpB,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YAEtB,IAAI,KAAK,CAAC;YACV,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;gBAC7C,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACvB,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC;gBAC1B,MAAM,GAAG,GAAG,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;gBAEjC,yDAAyD;gBACzD,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,EAAE,CAAC;oBACxD,SAAS;gBACX,CAAC;gBAED,0CAA0C;gBAC1C,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;oBACrC,SAAS;gBACX,CAAC;gBAED,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI;oBACJ,KAAK;oBACL,KAAK;oBACL,GAAG;oBACH,WAAW,EAAE,gBAAgB,CAAC,IAAI,CAAC;iBACpC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,qDAAqD;QACrD,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,IAAY;QACjB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;YAClC,OAAO;gBACL,IAAI;gBACJ,QAAQ,EAAE,KAAK;gBACf,aAAa,EAAE,EAAE;gBACjB,OAAO,EAAE,EAAE;aACZ,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAElC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL,IAAI;gBACJ,QAAQ,EAAE,KAAK;gBACf,aAAa,EAAE,EAAE;gBACjB,OAAO,EAAE,EAAE;aACZ,CAAC;QACJ,CAAC;QAED,iEAAiE;QACjE,IAAI,YAAY,GAAG,IAAI,CAAC;QACxB,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,YAAY;gBACV,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC;oBACtC,KAAK,CAAC,WAAW;oBACjB,YAAY,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACtC,CAAC;QAED,uBAAuB;QACvB,MAAM,aAAa,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAE/D,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,QAAQ,EAAE,IAAI;YACd,aAAa;YACb,OAAO;SACR,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,IAAY;QACtB,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,IAAY;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,KAAa;QACvC,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAEvC,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,oBAAoB,EAAE,CAAC;YAC/C,IAAI,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAChC,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAa,EAAE,KAAa;QAChD,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,OAAO;gBACV,+BAA+B;gBAC/B,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBACxC,OAAO,MAAM,CAAC,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC;YAEpD,KAAK,KAAK;gBACR,uCAAuC;gBACvC,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBAC3C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;oBAAE,OAAO,KAAK,CAAC;gBACzC,MAAM,UAAU,GAAG,QAAQ,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC3D,IAAI,UAAU,KAAK,CAAC,IAAI,UAAU,KAAK,GAAG,IAAI,UAAU,IAAI,GAAG;oBAAE,OAAO,KAAK,CAAC;gBAC9E,OAAO,IAAI,CAAC;YAEd,KAAK,aAAa;gBAChB,gCAAgC;gBAChC,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBAC1C,OAAO,QAAQ,CAAC,MAAM,IAAI,EAAE,IAAI,QAAQ,CAAC,MAAM,IAAI,EAAE,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;YAEpF,KAAK,cAAc;gBACjB,sCAAsC;gBACtC,OAAO,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAErC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,SAAS,CAAC,UAAkB;QAClC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,IAAI,MAAM,GAAG,KAAK,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC3B,IAAI,CAAC,IAAI;gBAAE,SAAS;YACpB,IAAI,KAAK,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YAE/B,IAAI,MAAM,EAAE,CAAC;gBACX,KAAK,IAAI,CAAC,CAAC;gBACX,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;oBACd,KAAK,IAAI,CAAC,CAAC;gBACb,CAAC;YACH,CAAC;YAED,GAAG,IAAI,KAAK,CAAC;YACb,MAAM,GAAG,CAAC,MAAM,CAAC;QACnB,CAAC;QAED,OAAO,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACK,aAAa,CACnB,IAAa;QAEb,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,aAAa;gBAChB,OAAO,YAAY,CAAC;YACtB,KAAK,eAAe;gBAClB,OAAO,aAAa,CAAC;YACvB,KAAK,YAAY;gBACf,OAAO,WAAW,CAAC;YACrB,KAAK,cAAc;gBACjB,OAAO,aAAa,CAAC;YACvB;gBACE,OAAO,IAA6C,CAAC;QACzD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,MAAiC;QAC5C,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;QAE5C,IAAI,MAAM,CAAC,qBAAqB,EAAE,CAAC;YACjC,IAAI,CAAC,oBAAoB,GAAG,IAAI,GAAG,CACjC,MAAM,CAAC,qBAAqB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACzD,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAC5B,CAAC;CACF;AAED,gFAAgF;AAChF,qBAAqB;AACrB,gFAAgF;AAEhF,IAAI,kBAAkB,GAAsB,IAAI,CAAC;AAEjD;;GAEG;AACH,MAAM,UAAU,aAAa;IAC3B,IAAI,CAAC,kBAAkB,EAAE,CAAC;QACxB,kBAAkB,GAAG,IAAI,UAAU,CAAC;YAClC,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,qBAAqB,KAAK,OAAO;YACtD,qBAAqB,EAAE,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;SAChE,CAAC,CAAC;IACL,CAAC;IACD,OAAO,kBAAkB,CAAC;AAC5B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe;IAC7B,kBAAkB,GAAG,IAAI,CAAC;AAC5B,CAAC;AAED,gFAAgF;AAChF,oBAAoB;AACpB,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,OAAO,aAAa,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,OAAO,aAAa,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,MAAM,CAAC,IAAY;IACjC,OAAO,aAAa,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;AAC3C,CAAC"}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extraction Pipeline Module
|
|
3
|
+
*
|
|
4
|
+
* Main orchestrator for Gmail knowledge base extraction.
|
|
5
|
+
* Coordinates Gmail API calls, extraction, quality filtering,
|
|
6
|
+
* deduplication, intent classification, and storage.
|
|
7
|
+
*/
|
|
8
|
+
import { StateManager } from './state-manager.js';
|
|
9
|
+
import { EmbeddingService } from '../embeddings/index.js';
|
|
10
|
+
export interface PipelineConfig {
|
|
11
|
+
supabaseUrl: string;
|
|
12
|
+
supabaseServiceRoleKey: string;
|
|
13
|
+
salesEmailAddress: string;
|
|
14
|
+
openaiApiKey: string;
|
|
15
|
+
gmailAccessToken?: string;
|
|
16
|
+
lookbackDays?: number;
|
|
17
|
+
batchSize?: number;
|
|
18
|
+
maxThreads?: number;
|
|
19
|
+
gmailLabels?: string[];
|
|
20
|
+
dryRun?: boolean;
|
|
21
|
+
generateEmbeddings?: boolean;
|
|
22
|
+
classifyIntents?: boolean;
|
|
23
|
+
onProgress?: (stage: string, progress: number, total: number) => void;
|
|
24
|
+
onError?: (error: Error, context: Record<string, unknown>) => void;
|
|
25
|
+
}
|
|
26
|
+
export interface PipelineResult {
|
|
27
|
+
batchId: string | null;
|
|
28
|
+
stats: {
|
|
29
|
+
threadsFound: number;
|
|
30
|
+
threadsProcessed: number;
|
|
31
|
+
pairsExtracted: number;
|
|
32
|
+
pairsStored: number;
|
|
33
|
+
duplicatesSkipped: number;
|
|
34
|
+
qualityFilteredOut: number;
|
|
35
|
+
embeddingsGenerated: number;
|
|
36
|
+
errors: number;
|
|
37
|
+
};
|
|
38
|
+
duration: {
|
|
39
|
+
totalMs: number;
|
|
40
|
+
extractionMs: number;
|
|
41
|
+
storageMs: number;
|
|
42
|
+
embeddingsMs: number;
|
|
43
|
+
};
|
|
44
|
+
errors: Array<{
|
|
45
|
+
stage: string;
|
|
46
|
+
message: string;
|
|
47
|
+
context?: Record<string, unknown>;
|
|
48
|
+
}>;
|
|
49
|
+
}
|
|
50
|
+
export declare class ExtractionPipeline {
|
|
51
|
+
private config;
|
|
52
|
+
private gmailExtractor;
|
|
53
|
+
private stateManager;
|
|
54
|
+
private embeddingService;
|
|
55
|
+
private rateLimiter;
|
|
56
|
+
constructor(config: PipelineConfig);
|
|
57
|
+
/**
|
|
58
|
+
* Run the full extraction pipeline
|
|
59
|
+
*/
|
|
60
|
+
run(): Promise<PipelineResult>;
|
|
61
|
+
/**
|
|
62
|
+
* Fetch Gmail threads from API
|
|
63
|
+
*/
|
|
64
|
+
private fetchGmailThreads;
|
|
65
|
+
/**
|
|
66
|
+
* Fetch a single Gmail thread with full message details
|
|
67
|
+
*/
|
|
68
|
+
private fetchGmailThread;
|
|
69
|
+
/**
|
|
70
|
+
* Classify intents for extracted pairs
|
|
71
|
+
*/
|
|
72
|
+
private classifyIntents;
|
|
73
|
+
/**
|
|
74
|
+
* Report progress callback
|
|
75
|
+
*/
|
|
76
|
+
private reportProgress;
|
|
77
|
+
/**
|
|
78
|
+
* Report error callback
|
|
79
|
+
*/
|
|
80
|
+
private reportError;
|
|
81
|
+
/**
|
|
82
|
+
* Get state manager for external use
|
|
83
|
+
*/
|
|
84
|
+
getStateManager(): StateManager;
|
|
85
|
+
/**
|
|
86
|
+
* Get embedding service for external use
|
|
87
|
+
*/
|
|
88
|
+
getEmbeddingService(): EmbeddingService | null;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Create an extraction pipeline with environment configuration
|
|
92
|
+
*/
|
|
93
|
+
export declare function createExtractionPipeline(options?: Partial<PipelineConfig>): ExtractionPipeline;
|
|
94
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAQH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAO1D,MAAM,WAAW,cAAc;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,YAAY,EAAE,MAAM,CAAC;IAGrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAG1B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IAKpB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAGvB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,eAAe,CAAC,EAAE,OAAO,CAAC;IAG1B,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC;CACpE;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,KAAK,EAAE;QACL,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,cAAc,EAAE,MAAM,CAAC;QACvB,WAAW,EAAE,MAAM,CAAC;QACpB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,kBAAkB,EAAE,MAAM,CAAC;QAC3B,mBAAmB,EAAE,MAAM,CAAC;QAC5B,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,QAAQ,EAAE;QACR,OAAO,EAAE,MAAM,CAAC;QAChB,YAAY,EAAE,MAAM,CAAC;QACrB,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;KAAE,CAAC,CAAC;CACtF;AASD,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,gBAAgB,CAA0B;IAClD,OAAO,CAAC,WAAW,CAAc;gBAErB,MAAM,EAAE,cAAc;IAwClC;;OAEG;IACG,GAAG,IAAI,OAAO,CAAC,cAAc,CAAC;IA0LpC;;OAEG;YACW,iBAAiB;IA+E/B;;OAEG;YACW,gBAAgB;IAwB9B;;OAEG;YACW,eAAe;IAyC7B;;OAEG;IACH,OAAO,CAAC,cAAc;IAMtB;;OAEG;IACH,OAAO,CAAC,WAAW;IAQnB;;OAEG;IACH,eAAe,IAAI,YAAY;IAI/B;;OAEG;IACH,mBAAmB,IAAI,gBAAgB,GAAG,IAAI;CAG/C;AAMD;;GAEG;AACH,wBAAgB,wBAAwB,CACtC,OAAO,GAAE,OAAO,CAAC,cAAc,CAAM,GACpC,kBAAkB,CAiBpB"}
|