webpeel 0.21.57 → 0.21.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/pipeline.d.ts +3 -0
- package/dist/core/pipeline.js +45 -0
- package/dist/core/safe-browsing.d.ts +22 -0
- package/dist/core/safe-browsing.js +183 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +25 -3
- package/dist/server/pg-auth-store.js +1 -1
- package/dist/types.d.ts +26 -0
- package/package.json +1 -1
package/dist/core/pipeline.d.ts
CHANGED
|
@@ -13,6 +13,7 @@ import type { PeelOptions, PeelResult, ImageInfo } from '../types.js';
|
|
|
13
13
|
import type { BrandingProfile } from './branding.js';
|
|
14
14
|
import type { ChangeResult } from './change-tracking.js';
|
|
15
15
|
import type { DesignAnalysis } from './design-analysis.js';
|
|
16
|
+
import type { SafeBrowsingResult } from './safe-browsing.js';
|
|
16
17
|
/** Mutable context threaded through pipeline stages */
|
|
17
18
|
export interface PipelineContext {
|
|
18
19
|
url: string;
|
|
@@ -81,6 +82,8 @@ export interface PipelineContext {
|
|
|
81
82
|
warnings: string[];
|
|
82
83
|
/** Raw HTML size in characters (measured from fetched content before any conversion) */
|
|
83
84
|
rawHtmlSize?: number;
|
|
85
|
+
/** Safe Browsing check result (set early in pipeline, before fetch) */
|
|
86
|
+
safeBrowsingResult?: SafeBrowsingResult;
|
|
84
87
|
}
|
|
85
88
|
/** Create the initial PipelineContext with defaults */
|
|
86
89
|
export declare function createContext(url: string, options: PeelOptions): PipelineContext;
|
package/dist/core/pipeline.js
CHANGED
|
@@ -20,6 +20,8 @@ import { quickAnswer as runQuickAnswer } from './quick-answer.js';
|
|
|
20
20
|
import { Timer } from './timing.js';
|
|
21
21
|
import { chunkContent } from './chunker.js';
|
|
22
22
|
import { BlockedError } from '../types.js';
|
|
23
|
+
import { sanitizeForLLM } from './prompt-guard.js';
|
|
24
|
+
import { getSourceCredibility } from './source-credibility.js';
|
|
23
25
|
import { createLogger } from './logger.js';
|
|
24
26
|
const log = createLogger('pipeline');
|
|
25
27
|
/** Create the initial PipelineContext with defaults */
|
|
@@ -1245,6 +1247,48 @@ export async function finalize(ctx) {
|
|
|
1245
1247
|
export function buildResult(ctx) {
|
|
1246
1248
|
const fetchResult = ctx.fetchResult;
|
|
1247
1249
|
const elapsed = Date.now() - ctx.startTime;
|
|
1250
|
+
// --- Trust & Safety ---
|
|
1251
|
+
// Run prompt injection scan on final content
|
|
1252
|
+
const sanitizeResult = sanitizeForLLM(ctx.content);
|
|
1253
|
+
// If injection was detected, use the cleaned content
|
|
1254
|
+
if (sanitizeResult.injectionDetected) {
|
|
1255
|
+
ctx.content = sanitizeResult.content;
|
|
1256
|
+
ctx.warnings.push('Prompt injection patterns detected and stripped from content.');
|
|
1257
|
+
}
|
|
1258
|
+
// Assess source credibility
|
|
1259
|
+
const credibility = getSourceCredibility(ctx.url);
|
|
1260
|
+
// Compute composite trust score
|
|
1261
|
+
let trustScore = 1.0;
|
|
1262
|
+
if (credibility.tier === 'general')
|
|
1263
|
+
trustScore -= 0.2;
|
|
1264
|
+
if (sanitizeResult.injectionDetected)
|
|
1265
|
+
trustScore -= 0.5;
|
|
1266
|
+
if ((ctx.quality ?? 1.0) < 0.5)
|
|
1267
|
+
trustScore -= 0.1;
|
|
1268
|
+
trustScore = Math.max(0, Math.min(1, trustScore));
|
|
1269
|
+
// Build trust warnings
|
|
1270
|
+
const trustWarnings = [];
|
|
1271
|
+
if (credibility.tier === 'general')
|
|
1272
|
+
trustWarnings.push('Source is unverified (not a known official or trusted domain).');
|
|
1273
|
+
if (sanitizeResult.injectionDetected)
|
|
1274
|
+
trustWarnings.push(`Prompt injection detected: ${sanitizeResult.detectedPatterns.join(', ')}`);
|
|
1275
|
+
if (sanitizeResult.strippedChars > 0)
|
|
1276
|
+
trustWarnings.push(`Stripped ${sanitizeResult.strippedChars} suspicious characters (zero-width/Unicode smuggling).`);
|
|
1277
|
+
const trust = {
|
|
1278
|
+
source: {
|
|
1279
|
+
tier: credibility.tier,
|
|
1280
|
+
stars: credibility.stars,
|
|
1281
|
+
label: credibility.label,
|
|
1282
|
+
},
|
|
1283
|
+
contentSafety: {
|
|
1284
|
+
clean: !sanitizeResult.injectionDetected,
|
|
1285
|
+
injectionDetected: sanitizeResult.injectionDetected,
|
|
1286
|
+
detectedPatterns: sanitizeResult.detectedPatterns,
|
|
1287
|
+
strippedCount: sanitizeResult.strippedChars,
|
|
1288
|
+
},
|
|
1289
|
+
score: trustScore,
|
|
1290
|
+
warnings: trustWarnings,
|
|
1291
|
+
};
|
|
1248
1292
|
const tokens = estimateTokens(ctx.content);
|
|
1249
1293
|
const fingerprint = createHash('sha256').update(ctx.content).digest('hex').slice(0, 16);
|
|
1250
1294
|
// Token savings metrics — only when raw HTML size was captured (from actual fetch or domain extractor)
|
|
@@ -1342,5 +1386,6 @@ export function buildResult(ctx) {
|
|
|
1342
1386
|
...(rawTokenEstimate !== undefined ? { rawTokenEstimate } : {}),
|
|
1343
1387
|
...(tokenSavingsPercent !== undefined ? { tokenSavingsPercent } : {}),
|
|
1344
1388
|
...(fetchResult.autoInteract !== undefined ? { autoInteract: fetchResult.autoInteract } : {}),
|
|
1389
|
+
trust,
|
|
1345
1390
|
};
|
|
1346
1391
|
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain safety check using Google Safe Browsing Lookup API v4.
|
|
3
|
+
* Free: 10,000 lookups/day.
|
|
4
|
+
* Falls back to a local blocklist when no API key is configured.
|
|
5
|
+
*/
|
|
6
|
+
export interface SafeBrowsingResult {
|
|
7
|
+
safe: boolean;
|
|
8
|
+
threats: string[];
|
|
9
|
+
source: 'google-api' | 'local-blocklist' | 'unchecked';
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Check URL safety.
|
|
13
|
+
*
|
|
14
|
+
* Flow:
|
|
15
|
+
* 1. If SAFE_BROWSING_API_KEY (or passed apiKey) is set, race Google API vs 2s timeout.
|
|
16
|
+
* Falls back to local blocklist on timeout or error.
|
|
17
|
+
* 2. Without an API key, use local heuristic blocklist only.
|
|
18
|
+
*
|
|
19
|
+
* @param url The URL to check
|
|
20
|
+
* @param apiKey Google Safe Browsing API key (optional). Falls back to SAFE_BROWSING_API_KEY env var.
|
|
21
|
+
*/
|
|
22
|
+
export declare function checkUrlSafety(url: string, apiKey?: string): Promise<SafeBrowsingResult>;
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain safety check using Google Safe Browsing Lookup API v4.
|
|
3
|
+
* Free: 10,000 lookups/day.
|
|
4
|
+
* Falls back to a local blocklist when no API key is configured.
|
|
5
|
+
*/
|
|
6
|
+
// Known brands commonly impersonated in phishing
|
|
7
|
+
const KNOWN_BRANDS = [
|
|
8
|
+
'amazon', 'google', 'facebook', 'apple', 'microsoft', 'paypal', 'netflix',
|
|
9
|
+
'instagram', 'twitter', 'linkedin', 'dropbox', 'chase', 'wellsfargo', 'bankofamerica',
|
|
10
|
+
'citibank', 'hsbc', 'ebay', 'walmart', 'target', 'bestbuy', 'fedex', 'ups', 'usps',
|
|
11
|
+
'irs', 'dmv', 'gov', 'yahoo', 'outlook', 'hotmail',
|
|
12
|
+
];
|
|
13
|
+
// TLDs heavily abused for phishing/malware (free-domain registrars)
|
|
14
|
+
const SUSPICIOUS_TLDS = new Set(['.tk', '.ml', '.ga', '.cf', '.gq', '.top', '.click', '.loan', '.win', '.xyz', '.club', '.work']);
|
|
15
|
+
// Private/reserved IPv4 ranges (safe for local dev)
|
|
16
|
+
const PRIVATE_IP_RANGES = [
|
|
17
|
+
/^127\.\d+\.\d+\.\d+$/, // loopback
|
|
18
|
+
/^10\.\d+\.\d+\.\d+$/, // RFC 1918
|
|
19
|
+
/^192\.168\.\d+\.\d+$/, // RFC 1918
|
|
20
|
+
/^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/, // RFC 1918
|
|
21
|
+
/^169\.254\.\d+\.\d+$/, // link-local
|
|
22
|
+
/^::1$/, // IPv6 loopback
|
|
23
|
+
/^fc00:/, // IPv6 private
|
|
24
|
+
/^fd[0-9a-f]{2}:/i, // IPv6 ULA
|
|
25
|
+
];
|
|
26
|
+
function isPrivateIp(host) {
|
|
27
|
+
return PRIVATE_IP_RANGES.some((re) => re.test(host));
|
|
28
|
+
}
|
|
29
|
+
function isIpAddress(host) {
|
|
30
|
+
// IPv4
|
|
31
|
+
if (/^\d{1,3}(\.\d{1,3}){3}$/.test(host))
|
|
32
|
+
return true;
|
|
33
|
+
// IPv6 (bare or bracketed)
|
|
34
|
+
if (/^\[?[0-9a-fA-F:]+\]?$/.test(host))
|
|
35
|
+
return true;
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Local heuristic blocklist — catches common attack patterns without an API key.
|
|
40
|
+
*/
|
|
41
|
+
function checkLocalBlocklist(url) {
|
|
42
|
+
const threats = [];
|
|
43
|
+
// 1. Data URIs — always suspicious
|
|
44
|
+
if (/^data:/i.test(url.trim())) {
|
|
45
|
+
threats.push('DATA_URI');
|
|
46
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
47
|
+
}
|
|
48
|
+
let parsed = null;
|
|
49
|
+
try {
|
|
50
|
+
parsed = new URL(url);
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
// Unparseable URL — flag as suspicious
|
|
54
|
+
threats.push('INVALID_URL');
|
|
55
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
56
|
+
}
|
|
57
|
+
const { hostname, username, password } = parsed;
|
|
58
|
+
// 2. @ sign trick: http://google.com@evil.com/login → username = 'google.com'
|
|
59
|
+
if (username || password) {
|
|
60
|
+
threats.push('URL_CREDENTIALS_TRICK');
|
|
61
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
62
|
+
}
|
|
63
|
+
// 3. Punycode homograph attacks (xn-- internationalized domains)
|
|
64
|
+
if (/\bxn--/i.test(hostname)) {
|
|
65
|
+
// Allow legitimate IDN TLDs (e.g. .xn--p1ai = .рф)
|
|
66
|
+
const parts = hostname.split('.');
|
|
67
|
+
const hasPunycodeLabel = parts.slice(0, -1).some((p) => /^xn--/i.test(p));
|
|
68
|
+
if (hasPunycodeLabel) {
|
|
69
|
+
threats.push('PUNYCODE_HOMOGRAPH');
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// 4. IP-only URLs pointing to non-private ranges
|
|
73
|
+
if (isIpAddress(hostname)) {
|
|
74
|
+
const bare = hostname.replace(/^\[|\]$/g, ''); // strip brackets from IPv6
|
|
75
|
+
if (!isPrivateIp(bare)) {
|
|
76
|
+
threats.push('SUSPICIOUS_IP');
|
|
77
|
+
}
|
|
78
|
+
if (threats.length > 0)
|
|
79
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
80
|
+
return { safe: true, threats: [], source: 'local-blocklist' };
|
|
81
|
+
}
|
|
82
|
+
const lowerHost = hostname.toLowerCase();
|
|
83
|
+
// Remove www prefix for analysis
|
|
84
|
+
const hostNoWww = lowerHost.replace(/^www\./, '');
|
|
85
|
+
const parts = hostNoWww.split('.');
|
|
86
|
+
const tld = parts.length >= 2 ? '.' + parts[parts.length - 1] : '';
|
|
87
|
+
const sld = parts.length >= 2 ? parts[parts.length - 2] : '';
|
|
88
|
+
// 5. Known-bad TLDs combined with brand names (amazon-login.tk)
|
|
89
|
+
if (SUSPICIOUS_TLDS.has(tld)) {
|
|
90
|
+
const containsBrand = KNOWN_BRANDS.some((brand) => hostNoWww.includes(brand));
|
|
91
|
+
if (containsBrand) {
|
|
92
|
+
threats.push('PHISHING');
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// 6. Excessive hyphens in SLD (amaz0n-login-verify-account.com)
|
|
96
|
+
const hyphenCount = (sld.match(/-/g) || []).length;
|
|
97
|
+
if (hyphenCount >= 3) {
|
|
98
|
+
threats.push('EXCESSIVE_HYPHENS');
|
|
99
|
+
}
|
|
100
|
+
// 7. Brand name in subdomain combined with suspicious TLD
|
|
101
|
+
if (SUSPICIOUS_TLDS.has(tld)) {
|
|
102
|
+
const subdomains = parts.slice(0, -2).join('.');
|
|
103
|
+
const subHasBrand = KNOWN_BRANDS.some((brand) => subdomains.includes(brand));
|
|
104
|
+
if (subHasBrand && !threats.includes('PHISHING')) {
|
|
105
|
+
threats.push('PHISHING');
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// 8. Excessive subdomains: login.secure.verify.account.bank.xyz.com
|
|
109
|
+
if (parts.length > 5) {
|
|
110
|
+
threats.push('EXCESSIVE_SUBDOMAINS');
|
|
111
|
+
}
|
|
112
|
+
if (threats.length > 0) {
|
|
113
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
114
|
+
}
|
|
115
|
+
return { safe: true, threats: [], source: 'local-blocklist' };
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Check a URL against the Google Safe Browsing Lookup API v4.
|
|
119
|
+
* Returns null on any error (network timeout, bad key, etc.) so caller can fall back.
|
|
120
|
+
*/
|
|
121
|
+
async function checkGoogleSafeBrowsing(url, apiKey) {
|
|
122
|
+
const endpoint = `https://safebrowsing.googleapis.com/v4/threatMatches:find?key=${encodeURIComponent(apiKey)}`;
|
|
123
|
+
const body = {
|
|
124
|
+
client: { clientId: 'webpeel', clientVersion: '1.0.0' },
|
|
125
|
+
threatInfo: {
|
|
126
|
+
threatTypes: ['MALWARE', 'SOCIAL_ENGINEERING', 'UNWANTED_SOFTWARE', 'POTENTIALLY_HARMFUL_APPLICATION'],
|
|
127
|
+
platformTypes: ['ANY_PLATFORM'],
|
|
128
|
+
threatEntryTypes: ['URL'],
|
|
129
|
+
threatEntries: [{ url }],
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
const controller = new AbortController();
|
|
133
|
+
const timeoutId = setTimeout(() => controller.abort(), 2000);
|
|
134
|
+
try {
|
|
135
|
+
const resp = await fetch(endpoint, {
|
|
136
|
+
method: 'POST',
|
|
137
|
+
headers: { 'Content-Type': 'application/json' },
|
|
138
|
+
body: JSON.stringify(body),
|
|
139
|
+
signal: controller.signal,
|
|
140
|
+
});
|
|
141
|
+
clearTimeout(timeoutId);
|
|
142
|
+
if (!resp.ok)
|
|
143
|
+
return null;
|
|
144
|
+
const data = await resp.json();
|
|
145
|
+
if (!data.matches || data.matches.length === 0) {
|
|
146
|
+
return { safe: true, threats: [], source: 'google-api' };
|
|
147
|
+
}
|
|
148
|
+
const threats = [...new Set(data.matches.map((m) => m.threatType))];
|
|
149
|
+
return { safe: false, threats, source: 'google-api' };
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
clearTimeout(timeoutId);
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Check URL safety.
|
|
158
|
+
*
|
|
159
|
+
* Flow:
|
|
160
|
+
* 1. If SAFE_BROWSING_API_KEY (or passed apiKey) is set, race Google API vs 2s timeout.
|
|
161
|
+
* Falls back to local blocklist on timeout or error.
|
|
162
|
+
* 2. Without an API key, use local heuristic blocklist only.
|
|
163
|
+
*
|
|
164
|
+
* @param url The URL to check
|
|
165
|
+
* @param apiKey Google Safe Browsing API key (optional). Falls back to SAFE_BROWSING_API_KEY env var.
|
|
166
|
+
*/
|
|
167
|
+
export async function checkUrlSafety(url, apiKey) {
|
|
168
|
+
const key = apiKey ?? process.env.SAFE_BROWSING_API_KEY;
|
|
169
|
+
if (key) {
|
|
170
|
+
// Race: Google API with 2s timeout, fallback to local
|
|
171
|
+
const timeoutResult = checkLocalBlocklist(url);
|
|
172
|
+
const googleResult = await Promise.race([
|
|
173
|
+
checkGoogleSafeBrowsing(url, key),
|
|
174
|
+
new Promise((resolve) => setTimeout(() => resolve(null), 2000)),
|
|
175
|
+
]);
|
|
176
|
+
if (googleResult !== null)
|
|
177
|
+
return googleResult;
|
|
178
|
+
// API timed out or errored — use local blocklist result
|
|
179
|
+
return timeoutResult;
|
|
180
|
+
}
|
|
181
|
+
// No API key — local blocklist only
|
|
182
|
+
return checkLocalBlocklist(url);
|
|
183
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -42,6 +42,9 @@ export type SearchFallbackResult = {
|
|
|
42
42
|
};
|
|
43
43
|
export declare function searchFallback(..._args: any[]): Promise<SearchFallbackResult | null>;
|
|
44
44
|
export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS, type PeelTLSOptions, type PeelTLSResult } from './core/peel-tls.js';
|
|
45
|
+
export { sanitizeForLLM, type SanitizeResult } from './core/prompt-guard.js';
|
|
46
|
+
export { getSourceCredibility, type SourceCredibility } from './core/source-credibility.js';
|
|
47
|
+
export { checkUrlSafety, type SafeBrowsingResult } from './core/safe-browsing.js';
|
|
45
48
|
/**
|
|
46
49
|
* Fetch and extract content from a URL
|
|
47
50
|
*
|
package/dist/index.js
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser } from './core/fetcher.js';
|
|
7
7
|
import { createContext, normalizeOptions, handleYouTube, fetchContent, detectContentType, parseContent, postProcess, finalize, buildResult, } from './core/pipeline.js';
|
|
8
|
+
import { checkUrlSafety } from './core/safe-browsing.js';
|
|
8
9
|
export * from './types.js';
|
|
9
10
|
export { getDomainExtractor, extractDomainData } from './core/domain-extractors.js';
|
|
10
11
|
export { crawl } from './core/crawler.js';
|
|
@@ -47,6 +48,9 @@ export async function searchFallback(..._args) {
|
|
|
47
48
|
}
|
|
48
49
|
}
|
|
49
50
|
export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS } from './core/peel-tls.js';
|
|
51
|
+
export { sanitizeForLLM } from './core/prompt-guard.js';
|
|
52
|
+
export { getSourceCredibility } from './core/source-credibility.js';
|
|
53
|
+
export { checkUrlSafety } from './core/safe-browsing.js';
|
|
50
54
|
/**
|
|
51
55
|
* Fetch and extract content from a URL
|
|
52
56
|
*
|
|
@@ -66,16 +70,34 @@ export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS } from './core/peel-t
|
|
|
66
70
|
export async function peel(url, options = {}) {
|
|
67
71
|
const ctx = createContext(url, options);
|
|
68
72
|
normalizeOptions(ctx);
|
|
73
|
+
// Safe Browsing check — runs before any HTTP request, non-blocking
|
|
74
|
+
const sbResult = await checkUrlSafety(url, process.env.SAFE_BROWSING_API_KEY);
|
|
75
|
+
ctx.safeBrowsingResult = sbResult;
|
|
76
|
+
if (!sbResult.safe) {
|
|
77
|
+
const threatList = sbResult.threats.join(', ');
|
|
78
|
+
ctx.warnings.push(`⚠️ URL flagged by Safe Browsing: ${threatList}`);
|
|
79
|
+
}
|
|
69
80
|
const ytResult = await handleYouTube(ctx);
|
|
70
|
-
if (ytResult)
|
|
71
|
-
|
|
81
|
+
if (ytResult) {
|
|
82
|
+
// Attach safe browsing to YouTube results too
|
|
83
|
+
return {
|
|
84
|
+
...ytResult,
|
|
85
|
+
safeBrowsing: sbResult,
|
|
86
|
+
...(ytResult.warnings || ctx.warnings.length > 0
|
|
87
|
+
? { warnings: [...(ytResult.warnings ?? []), ...ctx.warnings.filter(w => !ytResult.warnings?.includes(w))] }
|
|
88
|
+
: {}),
|
|
89
|
+
};
|
|
90
|
+
}
|
|
72
91
|
try {
|
|
73
92
|
await fetchContent(ctx);
|
|
74
93
|
detectContentType(ctx);
|
|
75
94
|
await parseContent(ctx);
|
|
76
95
|
await postProcess(ctx);
|
|
77
96
|
await finalize(ctx);
|
|
78
|
-
|
|
97
|
+
const result = buildResult(ctx);
|
|
98
|
+
// Attach safe browsing result
|
|
99
|
+
result.safeBrowsing = sbResult;
|
|
100
|
+
return result;
|
|
79
101
|
}
|
|
80
102
|
catch (error) {
|
|
81
103
|
// Clean up browser resources on error
|
|
@@ -52,7 +52,7 @@ export class PostgresAuthStore {
|
|
|
52
52
|
title TEXT,
|
|
53
53
|
content TEXT NOT NULL,
|
|
54
54
|
tokens INTEGER,
|
|
55
|
-
created_by TEXT
|
|
55
|
+
created_by TEXT,
|
|
56
56
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
57
57
|
expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '30 days',
|
|
58
58
|
view_count INTEGER DEFAULT 0
|
package/dist/types.d.ts
CHANGED
|
@@ -339,6 +339,26 @@ export interface PeelResult {
|
|
|
339
339
|
rawTokenEstimate?: number;
|
|
340
340
|
/** Token savings percentage compared to raw HTML (how much cheaper WebPeel is) */
|
|
341
341
|
tokenSavingsPercent?: number;
|
|
342
|
+
/** Trust & safety assessment of the fetched content */
|
|
343
|
+
trust?: {
|
|
344
|
+
/** Source credibility tier */
|
|
345
|
+
source: {
|
|
346
|
+
tier: 'official' | 'verified' | 'general';
|
|
347
|
+
stars: number;
|
|
348
|
+
label: string;
|
|
349
|
+
};
|
|
350
|
+
/** Prompt injection scan result */
|
|
351
|
+
contentSafety: {
|
|
352
|
+
clean: boolean;
|
|
353
|
+
injectionDetected: boolean;
|
|
354
|
+
detectedPatterns: string[];
|
|
355
|
+
strippedCount: number;
|
|
356
|
+
};
|
|
357
|
+
/** Overall trust score 0-1 (composite of source + content safety) */
|
|
358
|
+
score: number;
|
|
359
|
+
/** Human-readable safety warnings */
|
|
360
|
+
warnings: string[];
|
|
361
|
+
};
|
|
342
362
|
/** Content chunks (when chunk option is enabled) */
|
|
343
363
|
chunks?: Array<{
|
|
344
364
|
index: number;
|
|
@@ -350,6 +370,12 @@ export interface PeelResult {
|
|
|
350
370
|
startOffset: number;
|
|
351
371
|
endOffset: number;
|
|
352
372
|
}>;
|
|
373
|
+
/** Safe Browsing check result */
|
|
374
|
+
safeBrowsing?: {
|
|
375
|
+
safe: boolean;
|
|
376
|
+
threats: string[];
|
|
377
|
+
source: 'google-api' | 'local-blocklist' | 'unchecked';
|
|
378
|
+
};
|
|
353
379
|
}
|
|
354
380
|
export interface PageMetadata {
|
|
355
381
|
/** Meta description */
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.59",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|