webpeel 0.21.59 → 0.21.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/pipeline.js +12 -10
- package/dist/core/source-credibility.d.ts +19 -7
- package/dist/core/source-credibility.js +563 -62
- package/dist/server/routes/search.js +4 -4
- package/dist/types.d.ts +4 -2
- package/package.json +1 -1
package/dist/core/pipeline.js
CHANGED
|
@@ -1257,19 +1257,19 @@ export function buildResult(ctx) {
|
|
|
1257
1257
|
}
|
|
1258
1258
|
// Assess source credibility
|
|
1259
1259
|
const credibility = getSourceCredibility(ctx.url);
|
|
1260
|
-
// Compute composite trust score
|
|
1261
|
-
let trustScore =
|
|
1262
|
-
if (credibility.tier === 'general')
|
|
1263
|
-
trustScore -= 0.2;
|
|
1260
|
+
// Compute composite trust score from source credibility (0-100) + content safety
|
|
1261
|
+
let trustScore = credibility.score / 100; // normalize 0-100 → 0-1
|
|
1264
1262
|
if (sanitizeResult.injectionDetected)
|
|
1265
|
-
trustScore -= 0.
|
|
1263
|
+
trustScore -= 0.3;
|
|
1266
1264
|
if ((ctx.quality ?? 1.0) < 0.5)
|
|
1267
1265
|
trustScore -= 0.1;
|
|
1268
|
-
trustScore = Math.max(0, Math.min(1, trustScore));
|
|
1266
|
+
trustScore = Math.round(Math.max(0, Math.min(1, trustScore)) * 100) / 100;
|
|
1269
1267
|
// Build trust warnings
|
|
1270
|
-
const trustWarnings = [];
|
|
1271
|
-
if (credibility.tier === '
|
|
1272
|
-
trustWarnings.push('
|
|
1268
|
+
const trustWarnings = [...(credibility.warnings ?? [])];
|
|
1269
|
+
if (credibility.tier === 'new')
|
|
1270
|
+
trustWarnings.push('Domain has limited verifiable presence — exercise caution.');
|
|
1271
|
+
if (credibility.tier === 'suspicious')
|
|
1272
|
+
trustWarnings.push('Domain shows suspicious signals — treat content with caution.');
|
|
1273
1273
|
if (sanitizeResult.injectionDetected)
|
|
1274
1274
|
trustWarnings.push(`Prompt injection detected: ${sanitizeResult.detectedPatterns.join(', ')}`);
|
|
1275
1275
|
if (sanitizeResult.strippedChars > 0)
|
|
@@ -1277,8 +1277,10 @@ export function buildResult(ctx) {
|
|
|
1277
1277
|
const trust = {
|
|
1278
1278
|
source: {
|
|
1279
1279
|
tier: credibility.tier,
|
|
1280
|
-
|
|
1280
|
+
score: credibility.score,
|
|
1281
1281
|
label: credibility.label,
|
|
1282
|
+
signals: credibility.signals,
|
|
1283
|
+
warnings: credibility.warnings,
|
|
1282
1284
|
},
|
|
1283
1285
|
contentSafety: {
|
|
1284
1286
|
clean: !sanitizeResult.injectionDetected,
|
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Source credibility scoring — lightweight, zero dependencies.
|
|
2
|
+
* Source credibility scoring — lightweight, zero dependencies, no network calls.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* -
|
|
6
|
-
* -
|
|
7
|
-
* -
|
|
4
|
+
* Actively investigates domain signals from the URL itself:
|
|
5
|
+
* - TLD trust score
|
|
6
|
+
* - HTTPS enforcement
|
|
7
|
+
* - Domain structure analysis
|
|
8
|
+
* - Brand/platform recognition (500+ known domains)
|
|
9
|
+
* - Content platform detection
|
|
10
|
+
*
|
|
11
|
+
* Score breakdown (0–100):
|
|
12
|
+
* TLD weight 0–20
|
|
13
|
+
* HTTPS 0–10
|
|
14
|
+
* Known domain 0–40
|
|
15
|
+
* Structure 0–15
|
|
16
|
+
* Platform 0–15
|
|
8
17
|
*/
|
|
9
18
|
export interface SourceCredibility {
|
|
10
|
-
tier: 'official' | '
|
|
11
|
-
|
|
19
|
+
tier: 'official' | 'established' | 'community' | 'new' | 'suspicious';
|
|
20
|
+
score: number;
|
|
12
21
|
label: string;
|
|
22
|
+
signals: string[];
|
|
23
|
+
warnings: string[];
|
|
13
24
|
}
|
|
14
25
|
/**
|
|
15
26
|
* Assess the credibility of a source URL.
|
|
27
|
+
* Fully synchronous — no network calls.
|
|
16
28
|
*/
|
|
17
29
|
export declare function getSourceCredibility(url: string): SourceCredibility;
|
|
@@ -1,83 +1,584 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Source credibility scoring — lightweight, zero dependencies.
|
|
2
|
+
* Source credibility scoring — lightweight, zero dependencies, no network calls.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* -
|
|
6
|
-
* -
|
|
7
|
-
* -
|
|
4
|
+
* Actively investigates domain signals from the URL itself:
|
|
5
|
+
* - TLD trust score
|
|
6
|
+
* - HTTPS enforcement
|
|
7
|
+
* - Domain structure analysis
|
|
8
|
+
* - Brand/platform recognition (500+ known domains)
|
|
9
|
+
* - Content platform detection
|
|
10
|
+
*
|
|
11
|
+
* Score breakdown (0–100):
|
|
12
|
+
* TLD weight 0–20
|
|
13
|
+
* HTTPS 0–10
|
|
14
|
+
* Known domain 0–40
|
|
15
|
+
* Structure 0–15
|
|
16
|
+
* Platform 0–15
|
|
8
17
|
*/
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
'
|
|
14
|
-
'
|
|
15
|
-
'
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// TLD trust map: points (0–20)
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
const TLD_TRUST = {
|
|
22
|
+
'.gov': 20, '.edu': 20, '.mil': 20,
|
|
23
|
+
'.org': 14, '.net': 12, '.com': 12, '.io': 11,
|
|
24
|
+
'.co': 10, '.us': 10, '.uk': 10, '.ca': 10, '.au': 10,
|
|
25
|
+
'.de': 10, '.fr': 10, '.jp': 10, '.br': 10, '.in': 10,
|
|
26
|
+
'.eu': 11, '.int': 15,
|
|
27
|
+
'.info': 8, '.biz': 7, '.me': 8, '.tv': 8, '.app': 10,
|
|
28
|
+
'.dev': 10, '.ai': 10, '.tech': 8, '.page': 8,
|
|
29
|
+
'.blog': 7, '.news': 8, '.media': 8, '.press': 8,
|
|
30
|
+
'.shop': 7, '.store': 7, '.online': 7, '.site': 6,
|
|
31
|
+
'.website': 6, '.space': 5, '.club': 5, '.pro': 7,
|
|
32
|
+
// Low-trust freebies
|
|
33
|
+
'.tk': 1, '.ml': 1, '.ga': 1, '.cf': 1, '.gq': 1,
|
|
34
|
+
'.xyz': 4, '.top': 3, '.loan': 2, '.click': 3, '.link': 4,
|
|
35
|
+
'.win': 2, '.bid': 2, '.download': 2, '.racing': 2, '.review': 4,
|
|
36
|
+
'.cc': 3, '.pw': 3, '.men': 2, '.party': 2, '.stream': 3,
|
|
37
|
+
};
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// Suspicious TLDs (high-risk freebies used in phishing)
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
const SUSPICIOUS_TLDS = new Set(['.tk', '.ml', '.ga', '.cf', '.gq', '.win', '.bid', '.men', '.party', '.loan']);
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Official TLDs
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
const OFFICIAL_TLDS = new Set(['.gov', '.edu', '.mil', '.int']);
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Official hostnames (beyond .gov/.edu/.mil TLD)
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
const OFFICIAL_DOMAINS = new Set([
|
|
16
50
|
// International organisations
|
|
17
51
|
'who.int', 'un.org', 'worldbank.org', 'imf.org', 'oecd.org', 'europa.eu',
|
|
52
|
+
'nato.int', 'wto.org', 'unicef.org', 'unhcr.org', 'icrc.org',
|
|
53
|
+
// Academic / research
|
|
54
|
+
'arxiv.org', 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov', 'jstor.org',
|
|
55
|
+
'nature.com', 'science.org', 'cell.com', 'nejm.org', 'bmj.com',
|
|
56
|
+
'thelancet.com', 'plos.org', 'springer.com', 'elsevier.com',
|
|
57
|
+
'scholar.google.com', 'researchgate.net', 'semanticscholar.org',
|
|
58
|
+
'acm.org', 'ieee.org',
|
|
18
59
|
// Official tech documentation
|
|
19
60
|
'docs.python.org', 'developer.mozilla.org', 'nodejs.org', 'rust-lang.org',
|
|
20
61
|
'docs.microsoft.com', 'learn.microsoft.com', 'developer.apple.com',
|
|
21
62
|
'developer.android.com', 'php.net', 'ruby-lang.org', 'golang.org', 'go.dev',
|
|
22
|
-
// Health
|
|
23
|
-
'
|
|
24
|
-
'webmd.com', 'medlineplus.gov',
|
|
63
|
+
// Health
|
|
64
|
+
'mayoclinic.org', 'clevelandclinic.org', 'webmd.com',
|
|
25
65
|
// Standards / specs
|
|
26
|
-
'w3.org', 'ietf.org', 'rfc-editor.org', 'iso.org',
|
|
66
|
+
'w3.org', 'ietf.org', 'rfc-editor.org', 'iso.org', 'ecma-international.org',
|
|
67
|
+
]);
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Established domains (score bonus 40 pts) — 500+ entries
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
const ESTABLISHED_DOMAINS = new Set([
|
|
72
|
+
// ── Major Tech ──────────────────────────────────────────────────────────
|
|
73
|
+
'google.com', 'apple.com', 'microsoft.com', 'amazon.com', 'meta.com',
|
|
74
|
+
'netflix.com', 'spotify.com', 'adobe.com', 'salesforce.com', 'oracle.com',
|
|
75
|
+
'ibm.com', 'intel.com', 'nvidia.com', 'amd.com', 'qualcomm.com',
|
|
76
|
+
'cisco.com', 'vmware.com', 'sap.com', 'servicenow.com', 'workday.com',
|
|
77
|
+
'zoom.us', 'slack.com', 'dropbox.com', 'box.com', 'atlassian.com',
|
|
78
|
+
'jira.atlassian.com', 'confluence.atlassian.com',
|
|
79
|
+
'twilio.com', 'sendgrid.com', 'mailchimp.com', 'hubspot.com',
|
|
80
|
+
'zendesk.com', 'intercom.com', 'freshworks.com', 'docusign.com',
|
|
81
|
+
'okta.com', 'auth0.com', 'cloudflare.com', 'fastly.com', 'akamai.com',
|
|
82
|
+
'digitalocean.com', 'linode.com', 'vultr.com',
|
|
83
|
+
'datadog.com', 'newrelic.com', 'splunk.com', 'elastic.co',
|
|
84
|
+
'mongodb.com', 'redis.io', 'postgresql.org', 'mysql.com',
|
|
85
|
+
'docker.com', 'kubernetes.io', 'helm.sh',
|
|
86
|
+
'terraform.io', 'ansible.com', 'chef.io', 'puppet.com',
|
|
87
|
+
'heroku.com', 'render.com', 'railway.app', 'fly.io',
|
|
88
|
+
'supabase.com', 'planetscale.com', 'neon.tech', 'fauna.com',
|
|
89
|
+
'firebase.google.com', 'expo.dev',
|
|
90
|
+
'openai.com', 'anthropic.com', 'cohere.com', 'huggingface.co',
|
|
91
|
+
'stability.ai', 'midjourney.com', 'replicate.com',
|
|
92
|
+
'figma.com', 'sketch.com', 'invisionapp.com', 'zeplin.io',
|
|
93
|
+
'notion.so', 'airtable.com', 'monday.com', 'asana.com', 'clickup.com',
|
|
94
|
+
'trello.com', 'basecamp.com', 'linear.app', 'shortcut.com',
|
|
95
|
+
'postman.com', 'insomnia.rest', 'swagger.io',
|
|
96
|
+
'sentry.io', 'bugsnag.com', 'rollbar.com',
|
|
97
|
+
'segment.com', 'mixpanel.com', 'amplitude.com', 'heap.io',
|
|
98
|
+
'looker.com', 'tableau.com', 'powerbi.microsoft.com',
|
|
99
|
+
'snowflake.com', 'databricks.com', 'dbt.com', 'fivetran.com', 'airbyte.com',
|
|
100
|
+
'vercel.com', 'netlify.com',
|
|
101
|
+
// ── Cloud / Hosting ──────────────────────────────────────────────────────
|
|
102
|
+
'aws.amazon.com', 'cloud.google.com', 'azure.microsoft.com',
|
|
103
|
+
'docs.aws.amazon.com', 'console.aws.amazon.com',
|
|
104
|
+
// ── Developer Ecosystems ──────────────────────────────────────────────────
|
|
105
|
+
'github.com', 'gitlab.com', 'bitbucket.org', 'sourcehut.com',
|
|
106
|
+
'stackoverflow.com', 'superuser.com', 'serverfault.com',
|
|
107
|
+
'npmjs.com', 'pypi.org', 'crates.io', 'packagist.org', 'rubygems.org',
|
|
108
|
+
'nuget.org', 'pub.dev', 'hex.pm', 'opam.ocaml.org',
|
|
109
|
+
'docs.rs', 'crates.io', 'pkg.go.dev',
|
|
110
|
+
'codepen.io', 'jsfiddle.net', 'replit.com', 'glitch.com', 'codesandbox.io',
|
|
111
|
+
'leetcode.com', 'hackerrank.com', 'codewars.com', 'exercism.org',
|
|
112
|
+
'regex101.com', 'regexr.com',
|
|
113
|
+
// ── Major Social ──────────────────────────────────────────────────────────
|
|
114
|
+
'twitter.com', 'x.com', 'reddit.com', 'linkedin.com', 'instagram.com',
|
|
115
|
+
'facebook.com', 'youtube.com', 'tiktok.com', 'snapchat.com', 'pinterest.com',
|
|
116
|
+
'tumblr.com', 'mastodon.social', 'threads.net', 'discord.com', 'discord.gg',
|
|
117
|
+
'twitch.tv', 'kick.com', 'vimeo.com', 'dailymotion.com',
|
|
118
|
+
'quora.com', 'medium.com', 'substack.com', 'hashnode.com', 'dev.to',
|
|
119
|
+
// ── Major News ────────────────────────────────────────────────────────────
|
|
120
|
+
'nytimes.com', 'washingtonpost.com', 'theguardian.com', 'bbc.com', 'bbc.co.uk',
|
|
121
|
+
'reuters.com', 'apnews.com', 'bloomberg.com', 'economist.com', 'ft.com',
|
|
122
|
+
'wsj.com', 'cnn.com', 'foxnews.com', 'msnbc.com', 'nbcnews.com',
|
|
123
|
+
'cbsnews.com', 'abcnews.go.com', 'npr.org', 'pbs.org',
|
|
124
|
+
'time.com', 'usatoday.com', 'huffpost.com', 'vox.com', 'axios.com',
|
|
125
|
+
'politico.com', 'thehill.com', 'rollcall.com', 'slate.com', 'salon.com',
|
|
126
|
+
'theatlantic.com', 'newyorker.com', 'newrepublic.com',
|
|
127
|
+
'motherjones.com', 'propublica.org', 'intercept.co',
|
|
128
|
+
'aljazeera.com', 'dw.com', 'france24.com', 'rt.com',
|
|
129
|
+
'spiegel.de', 'lemonde.fr', 'liberation.fr', 'lefigaro.fr',
|
|
130
|
+
'elpais.com', 'elmundo.es', 'repubblica.it', 'corriere.it',
|
|
131
|
+
'theglobeandmail.com', 'thestar.com', 'nationalpost.com',
|
|
132
|
+
'smh.com.au', 'theage.com.au', 'abc.net.au',
|
|
133
|
+
'timesofindia.com', 'hindustantimes.com', 'thehindu.com', 'ndtv.com',
|
|
134
|
+
'scmp.com', 'channelnewsasia.com', 'straitstimes.com',
|
|
135
|
+
'haaretz.com', 'timesofisrael.com', 'jpost.com',
|
|
136
|
+
'techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com',
|
|
137
|
+
'engadget.com', 'gizmodo.com', 'cnet.com', 'pcmag.com', 'tomshardware.com',
|
|
138
|
+
'anandtech.com', 'macrumors.com', '9to5mac.com', '9to5google.com',
|
|
139
|
+
'androidcentral.com', 'windowscentral.com',
|
|
140
|
+
'venturebeat.com', 'businessinsider.com', 'forbes.com', 'fortune.com',
|
|
141
|
+
'inc.com', 'entrepreneur.com', 'fastcompany.com',
|
|
142
|
+
// ── Finance ───────────────────────────────────────────────────────────────
|
|
143
|
+
'chase.com', 'bankofamerica.com', 'wellsfargo.com', 'citibank.com',
|
|
144
|
+
'capitalone.com', 'usbank.com', 'tdbank.com', 'pnc.com',
|
|
145
|
+
'americanexpress.com', 'discover.com', 'synchrony.com',
|
|
146
|
+
'paypal.com', 'stripe.com', 'square.com', 'braintree.com', 'adyen.com',
|
|
147
|
+
'coinbase.com', 'binance.com', 'kraken.com', 'gemini.com', 'crypto.com',
|
|
148
|
+
'robinhood.com', 'etrade.com', 'schwab.com', 'fidelity.com',
|
|
149
|
+
'vanguard.com', 'blackrock.com', 'jpmorgan.com', 'goldmansachs.com',
|
|
150
|
+
'morganstanley.com', 'ubs.com', 'credit-suisse.com', 'hsbc.com',
|
|
151
|
+
'barclays.com', 'lloydsbank.com', 'natwest.com', 'santander.com',
|
|
152
|
+
'transferwise.com', 'wise.com', 'revolut.com', 'monzo.com',
|
|
153
|
+
'quickbooks.intuit.com', 'turbotax.intuit.com', 'mint.com', 'hrblock.com',
|
|
154
|
+
'experian.com', 'equifax.com', 'transunion.com',
|
|
155
|
+
// ── E-commerce / Retail ──────────────────────────────────────────────────
|
|
156
|
+
'amazon.com', 'ebay.com', 'etsy.com', 'walmart.com', 'target.com',
|
|
157
|
+
'bestbuy.com', 'costco.com', 'homedepot.com', 'lowes.com', 'wayfair.com',
|
|
158
|
+
'shopify.com', 'bigcommerce.com', 'woocommerce.com', 'squarespace.com',
|
|
159
|
+
'overstock.com', 'newegg.com', 'bhphotovideo.com', 'adorama.com',
|
|
160
|
+
'aliexpress.com', 'alibaba.com', 'wish.com', 'dhgate.com',
|
|
161
|
+
'zappos.com', 'nordstrom.com', 'macys.com', 'bloomingdales.com', 'gap.com',
|
|
162
|
+
'nike.com', 'adidas.com', 'reebok.com', 'underarmour.com', 'lululemon.com',
|
|
163
|
+
'ikea.com', 'crate.com', 'potterybarn.com', 'williams-sonoma.com',
|
|
164
|
+
'chewy.com', 'petco.com', 'petsmart.com',
|
|
165
|
+
'instacart.com', 'doordash.com', 'ubereats.com', 'grubhub.com',
|
|
166
|
+
'opentable.com', 'yelp.com', 'tripadvisor.com',
|
|
167
|
+
// ── Travel ────────────────────────────────────────────────────────────────
|
|
168
|
+
'booking.com', 'expedia.com', 'airbnb.com', 'vrbo.com', 'kayak.com',
|
|
169
|
+
'hotels.com', 'priceline.com', 'orbitz.com', 'travelocity.com',
|
|
170
|
+
'delta.com', 'united.com', 'aa.com', 'southwest.com', 'jetblue.com',
|
|
171
|
+
'marriott.com', 'hilton.com', 'hyatt.com', 'ihg.com', 'wyndham.com',
|
|
172
|
+
'uber.com', 'lyft.com', 'waymo.com',
|
|
173
|
+
// ── Education ─────────────────────────────────────────────────────────────
|
|
174
|
+
'coursera.org', 'edx.org', 'khanacademy.org', 'udemy.com', 'udacity.com',
|
|
175
|
+
'pluralsight.com', 'lynda.com', 'linkedin.com', 'skillshare.com',
|
|
176
|
+
'codecademy.com', 'freecodecamp.org', 'theodinproject.com',
|
|
177
|
+
'brilliant.org', 'duolingo.com', 'babbel.com', 'rosettastone.com',
|
|
178
|
+
'cambridgeinternational.org',
|
|
179
|
+
'britannica.com', 'encyclopedia.com',
|
|
180
|
+
// ── Reference / Knowledge ─────────────────────────────────────────────────
|
|
181
|
+
'wikipedia.org', 'wikimedia.org', 'wikihow.com', 'wikidata.org',
|
|
182
|
+
'imdb.com', 'rottentomatoes.com', 'metacritic.com', 'goodreads.com',
|
|
183
|
+
'nationalgeographic.com', 'smithsonianmag.com', 'history.com',
|
|
184
|
+
'wolframalpha.com', 'dictionary.com', 'merriam-webster.com',
|
|
185
|
+
'etymonline.com', 'thesaurus.com',
|
|
186
|
+
'archive.org', 'waybackmachine.org',
|
|
187
|
+
// ── Health ────────────────────────────────────────────────────────────────
|
|
188
|
+
'webmd.com', 'mayoclinic.org', 'clevelandclinic.org', 'healthline.com',
|
|
189
|
+
'medicalnewstoday.com', 'everydayhealth.com', 'drugs.com',
|
|
190
|
+
'rxlist.com', 'medscape.com', 'uptodate.com', 'emedicinehealth.com',
|
|
191
|
+
'psych.org', 'nami.org', 'betterhelp.com', 'talkspace.com',
|
|
192
|
+
// ── Legal ─────────────────────────────────────────────────────────────────
|
|
193
|
+
'law.cornell.edu', 'justia.com', 'findlaw.com', 'nolo.com', 'avvo.com',
|
|
194
|
+
'legalzoom.com', 'rocket lawyer.com',
|
|
195
|
+
// ── Government / Civic (beyond TLD) ──────────────────────────────────────
|
|
196
|
+
'gov.uk', 'gc.ca', 'australia.gov.au',
|
|
197
|
+
// ── Open Source / Misc Tech ──────────────────────────────────────────────
|
|
198
|
+
'linux.org', 'kernel.org', 'gnu.org', 'apache.org', 'mozilla.org',
|
|
199
|
+
'python.org', 'perl.org', 'haskell.org',
|
|
200
|
+
'jquery.com', 'reactjs.org', 'react.dev', 'vuejs.org', 'angular.io',
|
|
201
|
+
'svelte.dev', 'nextjs.org', 'nuxtjs.org', 'remix.run', 'astro.build',
|
|
202
|
+
'tailwindcss.com', 'getbootstrap.com', 'mui.com', 'chakra-ui.com',
|
|
203
|
+
'styled-components.com', 'emotion.sh',
|
|
204
|
+
'vitejs.dev', 'webpack.js.org', 'rollupjs.org', 'esbuild.github.io',
|
|
205
|
+
'babeljs.io', 'eslint.org', 'prettier.io', 'typescript.dev',
|
|
206
|
+
'typescriptlang.org', 'deno.com', 'deno.land', 'bun.sh',
|
|
207
|
+
'expressjs.com', 'fastify.io', 'nestjs.com', 'koajs.com', 'hapi.dev',
|
|
208
|
+
'graphql.org', 'apollographql.com', 'trpc.io', 'grpc.io',
|
|
209
|
+
'prisma.io', 'drizzle.team', 'typeorm.io', 'sequelize.org',
|
|
210
|
+
'socket.io', 'feathersjs.com',
|
|
211
|
+
'git-scm.com', 'gitkraken.com',
|
|
212
|
+
'homebrew.sh', 'brew.sh', 'chocolatey.org', 'scoop.sh', 'winget.run',
|
|
213
|
+
'ubuntu.com', 'debian.org', 'fedoraproject.org', 'archlinux.org',
|
|
214
|
+
'redhat.com', 'suse.com', 'centos.org',
|
|
215
|
+
// ── Security / Privacy ───────────────────────────────────────────────────
|
|
216
|
+
'haveibeenpwned.com', 'virustotal.com', '1password.com', 'bitwarden.com',
|
|
217
|
+
'lastpass.com', 'dashlane.com', 'nordvpn.com', 'expressvpn.com',
|
|
218
|
+
'protonmail.com', 'proton.me', 'tutanota.com', 'fastmail.com',
|
|
219
|
+
'letsencrypt.org', 'ssllabs.com', 'namecheap.com', 'godaddy.com',
|
|
220
|
+
'porkbun.com', 'cloudflare.com', 'dnschecker.org',
|
|
221
|
+
// ── Search ────────────────────────────────────────────────────────────────
|
|
222
|
+
'google.com', 'bing.com', 'yahoo.com', 'duckduckgo.com', 'brave.com',
|
|
223
|
+
'startpage.com', 'ecosia.org', 'kagi.com',
|
|
224
|
+
// ── Productivity ─────────────────────────────────────────────────────────
|
|
225
|
+
'gmail.com', 'outlook.com', 'office.com', 'office365.com',
|
|
226
|
+
'docs.google.com', 'drive.google.com', 'calendar.google.com',
|
|
227
|
+
'maps.google.com', 'translate.google.com',
|
|
228
|
+
'evernote.com', 'onenote.com', 'bear.app', 'obsidian.md',
|
|
229
|
+
'cal.com', 'calendly.com', 'doodle.com', 'when2meet.com',
|
|
230
|
+
'loom.com', 'screen.studio', 'cleanshot.com',
|
|
231
|
+
'canva.com', 'unsplash.com', 'pexels.com', 'pixabay.com',
|
|
232
|
+
'shutterstock.com', 'gettyimages.com', 'istockphoto.com',
|
|
233
|
+
'giphy.com', 'tenor.com',
|
|
234
|
+
// ── Music / Media ─────────────────────────────────────────────────────────
|
|
235
|
+
'soundcloud.com', 'bandcamp.com', 'last.fm', 'allmusic.com',
|
|
236
|
+
'discogs.com', 'genius.com', 'azlyrics.com', 'musixmatch.com',
|
|
237
|
+
'hulu.com', 'disneyplus.com', 'hbomax.com', 'max.com',
|
|
238
|
+
'peacocktv.com', 'paramount.com', 'crunchyroll.com', 'funimation.com',
|
|
239
|
+
'apple.com', 'music.apple.com',
|
|
240
|
+
// ── Gaming ───────────────────────────────────────────────────────────────
|
|
241
|
+
'steam.com', 'steampowered.com', 'epicgames.com', 'gog.com',
|
|
242
|
+
'itch.io', 'roblox.com', 'minecraft.net', 'ea.com',
|
|
243
|
+
'activision.com', 'blizzard.com', 'battle.net', 'ubisoft.com',
|
|
244
|
+
'nintendo.com', 'playstation.com', 'xbox.com',
|
|
245
|
+
'ign.com', 'gamespot.com', 'kotaku.com', 'polygon.com',
|
|
246
|
+
'pcgamer.com', 'rockpapershotgun.com',
|
|
247
|
+
// ── Science / Research ───────────────────────────────────────────────────
|
|
248
|
+
'nasa.gov', 'esa.int', 'noaa.gov', 'nist.gov', 'usgs.gov',
|
|
249
|
+
'epa.gov', 'energy.gov', 'nsf.gov',
|
|
250
|
+
'acs.org', 'aps.org', 'aip.org', 'ams.org',
|
|
251
|
+
'newsweek.com', 'scientificamerican.com', 'popularmechanics.com',
|
|
252
|
+
'livescience.com', 'space.com', 'phys.org', 'sciencedaily.com',
|
|
253
|
+
'technologyreview.com',
|
|
254
|
+
// ── Mapping / Location ────────────────────────────────────────────────────
|
|
255
|
+
'openstreetmap.org', 'mapbox.com', 'here.com', 'waze.com',
|
|
256
|
+
'zillow.com', 'redfin.com', 'realtor.com', 'trulia.com', 'apartments.com',
|
|
257
|
+
// ── HR / Recruiting ───────────────────────────────────────────────────────
|
|
258
|
+
'indeed.com', 'glassdoor.com', 'monster.com', 'ziprecruiter.com',
|
|
259
|
+
'careerbuilder.com', 'simplyhired.com', 'flexjobs.com', 'remote.com',
|
|
260
|
+
'levels.fyi', 'teamblind.com', 'angellist.com', 'wellfound.com',
|
|
261
|
+
// ── Misc established ─────────────────────────────────────────────────────
|
|
262
|
+
'hbr.org', 'mckinsey.com', 'bcg.com', 'bain.com', 'deloitte.com',
|
|
263
|
+
'pwc.com', 'kpmg.com', 'ey.com', 'accenture.com',
|
|
264
|
+
'gartner.com', 'idc.com', 'forrester.com',
|
|
265
|
+
'ted.com', 'masterclass.com',
|
|
266
|
+
'change.org', 'gofundme.com', 'kickstarter.com', 'indiegogo.com',
|
|
267
|
+
'patreon.com', 'ko-fi.com', 'buymeacoffee.com',
|
|
268
|
+
'webpeel.dev',
|
|
27
269
|
]);
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
'
|
|
33
|
-
'
|
|
34
|
-
'
|
|
35
|
-
|
|
36
|
-
'
|
|
37
|
-
'
|
|
38
|
-
|
|
39
|
-
'
|
|
40
|
-
'
|
|
41
|
-
|
|
42
|
-
'
|
|
43
|
-
|
|
44
|
-
'
|
|
270
|
+
// ---------------------------------------------------------------------------
|
|
271
|
+
// Community / content platforms — user content hosted on established infra
|
|
272
|
+
// ---------------------------------------------------------------------------
|
|
273
|
+
const COMMUNITY_PLATFORMS = new Map([
|
|
274
|
+
['github.com', 'Community Content on GitHub'],
|
|
275
|
+
['github.io', 'Personal Site on GitHub Pages'],
|
|
276
|
+
['gitlab.com', 'Community Content on GitLab'],
|
|
277
|
+
['medium.com', 'Article on Medium'],
|
|
278
|
+
['substack.com', 'Newsletter on Substack'],
|
|
279
|
+
['hashnode.com', 'Blog on Hashnode'],
|
|
280
|
+
['dev.to', 'Article on DEV Community'],
|
|
281
|
+
['wordpress.com', 'Blog on WordPress'],
|
|
282
|
+
['blogspot.com', 'Blog on Blogger'],
|
|
283
|
+
['blogger.com', 'Blog on Blogger'],
|
|
284
|
+
['tumblr.com', 'Blog on Tumblr'],
|
|
285
|
+
['weebly.com', 'Site on Weebly'],
|
|
286
|
+
['wix.com', 'Site on Wix'],
|
|
287
|
+
['squarespace.com', 'Site on Squarespace'],
|
|
288
|
+
['webflow.io', 'Site on Webflow'],
|
|
289
|
+
['vercel.app', 'Deployed Project on Vercel'],
|
|
290
|
+
['netlify.app', 'Deployed Project on Netlify'],
|
|
291
|
+
['pages.dev', 'Deployed Project on Cloudflare Pages'],
|
|
292
|
+
['web.app', 'Firebase Hosted App'],
|
|
293
|
+
['firebaseapp.com', 'Firebase Hosted App'],
|
|
294
|
+
['herokuapp.com', 'App on Heroku'],
|
|
295
|
+
['replit.dev', 'Project on Replit'],
|
|
296
|
+
['glitch.me', 'Project on Glitch'],
|
|
297
|
+
['codesandbox.io', 'Sandbox on CodeSandbox'],
|
|
298
|
+
['stackblitz.com', 'Project on StackBlitz'],
|
|
299
|
+
['codepen.io', 'Pen on CodePen'],
|
|
300
|
+
['jsfiddle.net', 'Fiddle on JSFiddle'],
|
|
301
|
+
['notion.site', 'Notion Page'],
|
|
302
|
+
['gitbook.io', 'Docs on GitBook'],
|
|
303
|
+
['gitbook.com', 'Docs on GitBook'],
|
|
304
|
+
['readthedocs.io', 'Docs on Read the Docs'],
|
|
305
|
+
['readthedocs.org', 'Docs on Read the Docs'],
|
|
306
|
+
['reddit.com', 'Community Discussion on Reddit'],
|
|
307
|
+
['news.ycombinator.com', 'Discussion on Hacker News'],
|
|
308
|
+
['quora.com', 'Answer on Quora'],
|
|
309
|
+
['stackoverflow.com', 'Answer on Stack Overflow'],
|
|
310
|
+
['stackexchange.com', 'Answer on Stack Exchange'],
|
|
311
|
+
['producthunt.com', 'Launch on Product Hunt'],
|
|
312
|
+
['indiehackers.com', 'Post on Indie Hackers'],
|
|
313
|
+
['hackernoon.com', 'Article on HackerNoon'],
|
|
314
|
+
['lobste.rs', 'Discussion on Lobsters'],
|
|
315
|
+
['lobsters.rs', 'Discussion on Lobsters'],
|
|
316
|
+
['twitter.com', 'Post on X (Twitter)'],
|
|
317
|
+
['x.com', 'Post on X (Twitter)'],
|
|
318
|
+
['linkedin.com', 'Post on LinkedIn'],
|
|
319
|
+
['youtube.com', 'Video on YouTube'],
|
|
320
|
+
['vimeo.com', 'Video on Vimeo'],
|
|
321
|
+
['twitch.tv', 'Stream on Twitch'],
|
|
322
|
+
['soundcloud.com', 'Audio on SoundCloud'],
|
|
323
|
+
['bandcamp.com', 'Music on Bandcamp'],
|
|
324
|
+
['pinterest.com', 'Pin on Pinterest'],
|
|
325
|
+
['instagram.com', 'Post on Instagram'],
|
|
326
|
+
['tiktok.com', 'Video on TikTok'],
|
|
45
327
|
]);
|
|
328
|
+
// ---------------------------------------------------------------------------
|
|
329
|
+
// Brand-category labels for established domains
|
|
330
|
+
// ---------------------------------------------------------------------------
|
|
331
|
+
const DOMAIN_CATEGORY = {
|
|
332
|
+
// Tech
|
|
333
|
+
'google.com': 'Established Technology Company',
|
|
334
|
+
'apple.com': 'Established Technology Company',
|
|
335
|
+
'microsoft.com': 'Established Technology Company',
|
|
336
|
+
'amazon.com': 'Established E-commerce & Cloud Platform',
|
|
337
|
+
'meta.com': 'Established Technology Company',
|
|
338
|
+
'netflix.com': 'Established Streaming Service',
|
|
339
|
+
'spotify.com': 'Established Music Streaming Service',
|
|
340
|
+
'openai.com': 'Established AI Research Company',
|
|
341
|
+
'anthropic.com': 'Established AI Research Company',
|
|
342
|
+
'github.com': 'Established Developer Platform',
|
|
343
|
+
'gitlab.com': 'Established Developer Platform',
|
|
344
|
+
'stackoverflow.com': 'Established Developer Q&A Platform',
|
|
345
|
+
'npmjs.com': 'Established Package Registry',
|
|
346
|
+
'pypi.org': 'Established Package Registry',
|
|
347
|
+
'docker.com': 'Established Container Platform',
|
|
348
|
+
'vercel.com': 'Established Hosting Platform',
|
|
349
|
+
'netlify.com': 'Established Hosting Platform',
|
|
350
|
+
'cloudflare.com': 'Established CDN & Security Provider',
|
|
351
|
+
'figma.com': 'Established Design Platform',
|
|
352
|
+
'notion.so': 'Established Productivity Platform',
|
|
353
|
+
'slack.com': 'Established Business Communication Platform',
|
|
354
|
+
'zoom.us': 'Established Video Communication Platform',
|
|
355
|
+
'adobe.com': 'Established Creative Software Company',
|
|
356
|
+
// News
|
|
357
|
+
'nytimes.com': 'Established News Organization',
|
|
358
|
+
'washingtonpost.com': 'Established News Organization',
|
|
359
|
+
'theguardian.com': 'Established News Organization',
|
|
360
|
+
'bbc.com': 'Established News Organization',
|
|
361
|
+
'bbc.co.uk': 'Established News Organization',
|
|
362
|
+
'reuters.com': 'Established News Agency',
|
|
363
|
+
'apnews.com': 'Established News Agency',
|
|
364
|
+
'bloomberg.com': 'Established Financial News Organization',
|
|
365
|
+
'economist.com': 'Established News Publication',
|
|
366
|
+
'ft.com': 'Established Financial News Organization',
|
|
367
|
+
'wsj.com': 'Established Financial News Organization',
|
|
368
|
+
'cnn.com': 'Established News Organization',
|
|
369
|
+
'npr.org': 'Established Public Radio',
|
|
370
|
+
'techcrunch.com': 'Established Technology News Publication',
|
|
371
|
+
'wired.com': 'Established Technology News Publication',
|
|
372
|
+
'arstechnica.com': 'Established Technology News Publication',
|
|
373
|
+
'theverge.com': 'Established Technology News Publication',
|
|
374
|
+
// Finance
|
|
375
|
+
'paypal.com': 'Established Payment Platform',
|
|
376
|
+
'stripe.com': 'Established Payment Platform',
|
|
377
|
+
'square.com': 'Established Payment Platform',
|
|
378
|
+
'coinbase.com': 'Established Cryptocurrency Exchange',
|
|
379
|
+
'chase.com': 'Established Financial Institution',
|
|
380
|
+
'bankofamerica.com': 'Established Financial Institution',
|
|
381
|
+
'wellsfargo.com': 'Established Financial Institution',
|
|
382
|
+
// E-commerce
|
|
383
|
+
'ebay.com': 'Established E-commerce Marketplace',
|
|
384
|
+
'etsy.com': 'Established Handmade Marketplace',
|
|
385
|
+
'walmart.com': 'Established Retail Company',
|
|
386
|
+
'target.com': 'Established Retail Company',
|
|
387
|
+
'bestbuy.com': 'Established Electronics Retailer',
|
|
388
|
+
'shopify.com': 'Established E-commerce Platform',
|
|
389
|
+
// Education
|
|
390
|
+
'coursera.org': 'Established Online Education Platform',
|
|
391
|
+
'edx.org': 'Established Online Education Platform',
|
|
392
|
+
'khanacademy.org': 'Non-Profit Education Platform',
|
|
393
|
+
'udemy.com': 'Established Online Learning Marketplace',
|
|
394
|
+
'britannica.com': 'Established Reference Encyclopedia',
|
|
395
|
+
'wikipedia.org': 'Open Encyclopedia (Community Edited)',
|
|
396
|
+
// Reference
|
|
397
|
+
'archive.org': 'Established Digital Archive',
|
|
398
|
+
'wolframalpha.com': 'Established Computational Knowledge Engine',
|
|
399
|
+
'imdb.com': 'Established Movie & TV Database',
|
|
400
|
+
};
|
|
401
|
+
// ---------------------------------------------------------------------------
|
|
402
|
+
// Helpers
|
|
403
|
+
// ---------------------------------------------------------------------------
|
|
404
|
+
function extractTLD(hostname) {
|
|
405
|
+
const parts = hostname.split('.');
|
|
406
|
+
if (parts.length < 2)
|
|
407
|
+
return '';
|
|
408
|
+
return '.' + parts.slice(-1)[0];
|
|
409
|
+
}
|
|
410
|
+
function extractSLD(hostname) {
|
|
411
|
+
// Returns registrable domain (e.g. "google.com")
|
|
412
|
+
const parts = hostname.split('.');
|
|
413
|
+
if (parts.length < 2)
|
|
414
|
+
return hostname;
|
|
415
|
+
return parts.slice(-2).join('.');
|
|
416
|
+
}
|
|
417
|
+
function countSubdomains(hostname) {
|
|
418
|
+
// www.example.com → 0 subdomains (www doesn't count)
|
|
419
|
+
const stripped = hostname.replace(/^www\./, '');
|
|
420
|
+
const parts = stripped.split('.');
|
|
421
|
+
return Math.max(0, parts.length - 2);
|
|
422
|
+
}
|
|
423
|
+
// ---------------------------------------------------------------------------
|
|
424
|
+
// Main export
|
|
425
|
+
// ---------------------------------------------------------------------------
|
|
46
426
|
/**
|
|
47
427
|
* Assess the credibility of a source URL.
|
|
428
|
+
* Fully synchronous — no network calls.
|
|
48
429
|
*/
|
|
49
430
|
export function getSourceCredibility(url) {
|
|
431
|
+
const signals = [];
|
|
432
|
+
const warnings = [];
|
|
433
|
+
let score = 0;
|
|
434
|
+
// ── Parse URL ─────────────────────────────────────────────────────────────
|
|
435
|
+
let parsedUrl;
|
|
50
436
|
try {
|
|
51
|
-
|
|
52
|
-
// Check official TLDs
|
|
53
|
-
for (const tld of OFFICIAL_TLDS) {
|
|
54
|
-
if (hostname.endsWith(tld)) {
|
|
55
|
-
return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
// Check known official hostnames
|
|
59
|
-
if (OFFICIAL_HOSTNAMES.has(hostname)) {
|
|
60
|
-
return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
|
|
61
|
-
}
|
|
62
|
-
// Check parent domain (e.g. en.wikipedia.org → wikipedia.org)
|
|
63
|
-
const parts = hostname.split('.');
|
|
64
|
-
if (parts.length > 2) {
|
|
65
|
-
const parentDomain = parts.slice(-2).join('.');
|
|
66
|
-
if (OFFICIAL_HOSTNAMES.has(parentDomain)) {
|
|
67
|
-
return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
|
|
68
|
-
}
|
|
69
|
-
if (VERIFIED_HOSTNAMES.has(parentDomain)) {
|
|
70
|
-
return { tier: 'verified', stars: 2, label: 'VERIFIED' };
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
// Check known verified hostnames
|
|
74
|
-
if (VERIFIED_HOSTNAMES.has(hostname)) {
|
|
75
|
-
return { tier: 'verified', stars: 2, label: 'VERIFIED' };
|
|
76
|
-
}
|
|
77
|
-
// Everything else
|
|
78
|
-
return { tier: 'general', stars: 1, label: 'UNVERIFIED' };
|
|
437
|
+
parsedUrl = new URL(url);
|
|
79
438
|
}
|
|
80
439
|
catch {
|
|
81
|
-
return {
|
|
440
|
+
return {
|
|
441
|
+
tier: 'suspicious',
|
|
442
|
+
score: 0,
|
|
443
|
+
label: 'Invalid URL — Cannot Assess',
|
|
444
|
+
signals: [],
|
|
445
|
+
warnings: ['URL could not be parsed'],
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
const protocol = parsedUrl.protocol; // 'https:' or 'http:'
|
|
449
|
+
const rawHostname = parsedUrl.hostname.toLowerCase();
|
|
450
|
+
const hostname = rawHostname.replace(/^www\./, '');
|
|
451
|
+
const tld = extractTLD(hostname);
|
|
452
|
+
const sld = extractSLD(hostname); // e.g. "google.com"
|
|
453
|
+
const subdomainCount = countSubdomains(rawHostname);
|
|
454
|
+
// ── 1. HTTPS check (0–10 pts) ─────────────────────────────────────────────
|
|
455
|
+
if (protocol === 'https:') {
|
|
456
|
+
score += 10;
|
|
457
|
+
signals.push('HTTPS enforced');
|
|
458
|
+
}
|
|
459
|
+
else {
|
|
460
|
+
warnings.push('HTTP only — no encryption');
|
|
461
|
+
}
|
|
462
|
+
// ── 2. TLD trust (0–20 pts) ───────────────────────────────────────────────
|
|
463
|
+
const tldScore = TLD_TRUST[tld] ?? 5;
|
|
464
|
+
score += tldScore;
|
|
465
|
+
if (tldScore >= 18) {
|
|
466
|
+
signals.push(`Trusted TLD (${tld})`);
|
|
467
|
+
}
|
|
468
|
+
else if (tldScore <= 3) {
|
|
469
|
+
warnings.push(`High-risk TLD (${tld}) — commonly used in phishing`);
|
|
470
|
+
}
|
|
471
|
+
// ── 3. Official TLD shortcut ──────────────────────────────────────────────
|
|
472
|
+
if (OFFICIAL_TLDS.has(tld) || OFFICIAL_DOMAINS.has(hostname) || OFFICIAL_DOMAINS.has(sld)) {
|
|
473
|
+
const category = DOMAIN_CATEGORY[hostname] ?? DOMAIN_CATEGORY[sld] ?? 'Official Source';
|
|
474
|
+
return {
|
|
475
|
+
tier: 'official',
|
|
476
|
+
score: Math.min(100, score + 40 + 15),
|
|
477
|
+
label: tld === '.gov' ? 'Official Government Source' :
|
|
478
|
+
tld === '.edu' ? 'Official Educational Institution' :
|
|
479
|
+
tld === '.mil' ? 'Official Military Source' :
|
|
480
|
+
tld === '.int' ? 'International Organization' :
|
|
481
|
+
category,
|
|
482
|
+
signals: [...signals, 'Official domain verified', `Trusted TLD (${tld})`].filter((v, i, a) => a.indexOf(v) === i),
|
|
483
|
+
warnings,
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
// ── 4. Domain structure (0–15 pts) ────────────────────────────────────────
|
|
487
|
+
if (subdomainCount === 0) {
|
|
488
|
+
score += 15;
|
|
489
|
+
signals.push('Clean domain structure');
|
|
490
|
+
}
|
|
491
|
+
else if (subdomainCount === 1) {
|
|
492
|
+
score += 10;
|
|
493
|
+
signals.push('Standard subdomain structure');
|
|
494
|
+
}
|
|
495
|
+
else if (subdomainCount === 2) {
|
|
496
|
+
score += 5;
|
|
497
|
+
}
|
|
498
|
+
else {
|
|
499
|
+
// 3+ subdomains — possible phishing pattern
|
|
500
|
+
score += 0;
|
|
501
|
+
warnings.push(`Excessive subdomains (${subdomainCount}) — potential phishing indicator`);
|
|
502
|
+
}
|
|
503
|
+
// ── 5 & 6. Known domain + Community platform (mutually exclusive bonus) ──
|
|
504
|
+
// Community platform detection — user content on a known hosting platform.
|
|
505
|
+
// When the domain is a community platform, it gets the platform bonus (15 pts)
|
|
506
|
+
// but NOT the established domain bonus (they're conceptually different tiers).
|
|
507
|
+
const communityLabel = COMMUNITY_PLATFORMS.get(hostname) ?? COMMUNITY_PLATFORMS.get(sld);
|
|
508
|
+
const isEstablished = ESTABLISHED_DOMAINS.has(hostname) || ESTABLISHED_DOMAINS.has(sld);
|
|
509
|
+
if (communityLabel) {
|
|
510
|
+
// Platform bonus only — user content hosted on verified infra
|
|
511
|
+
score += 15;
|
|
512
|
+
signals.push(`Hosted on verified platform (${sld})`);
|
|
513
|
+
}
|
|
514
|
+
else if (isEstablished) {
|
|
515
|
+
// Full established domain bonus
|
|
516
|
+
score += 40;
|
|
517
|
+
signals.push('Recognized established domain');
|
|
518
|
+
}
|
|
519
|
+
// ── 7. Suspicious TLD ─────────────────────────────────────────────────────
|
|
520
|
+
if (SUSPICIOUS_TLDS.has(tld)) {
|
|
521
|
+
score = Math.min(score, 15); // Cap at suspicious tier
|
|
522
|
+
warnings.push('Domain uses a free TLD associated with fraud');
|
|
523
|
+
}
|
|
524
|
+
// ── 8. Phishing keyword detection ─────────────────────────────────────────
|
|
525
|
+
const phishingKeywords = ['paypal-', 'apple-', 'google-', 'microsoft-', 'amazon-',
|
|
526
|
+
'bank-', 'login-', 'signin-', 'secure-', 'verify-', 'account-', 'update-',
|
|
527
|
+
'support-', 'helpdesk-', '-login', '-signin', '-secure', '-verify', '-account',
|
|
528
|
+
'paypal.', 'apple.', 'google.', 'microsoft.', 'amazon.'];
|
|
529
|
+
const suspiciousPattern = phishingKeywords.some(kw => hostname.includes(kw) && !isEstablished && !communityLabel);
|
|
530
|
+
if (suspiciousPattern) {
|
|
531
|
+
score = Math.min(score, 19);
|
|
532
|
+
warnings.push('Domain contains impersonation keywords — potential phishing');
|
|
533
|
+
}
|
|
534
|
+
// ── Clamp score ───────────────────────────────────────────────────────────
|
|
535
|
+
score = Math.max(0, Math.min(100, score));
|
|
536
|
+
// ── Tier assignment ───────────────────────────────────────────────────────
|
|
537
|
+
let tier;
|
|
538
|
+
if (score >= 90)
|
|
539
|
+
tier = 'official';
|
|
540
|
+
else if (score >= 60)
|
|
541
|
+
tier = 'established';
|
|
542
|
+
else if (score >= 40)
|
|
543
|
+
tier = 'community';
|
|
544
|
+
else if (score >= 20)
|
|
545
|
+
tier = 'new';
|
|
546
|
+
else
|
|
547
|
+
tier = 'suspicious';
|
|
548
|
+
// ── Label generation ──────────────────────────────────────────────────────
|
|
549
|
+
let label;
|
|
550
|
+
if (communityLabel) {
|
|
551
|
+
label = communityLabel;
|
|
552
|
+
}
|
|
553
|
+
else if (isEstablished) {
|
|
554
|
+
label = DOMAIN_CATEGORY[hostname] ?? DOMAIN_CATEGORY[sld] ?? labelFromTier(tier, hostname, tld);
|
|
555
|
+
}
|
|
556
|
+
else {
|
|
557
|
+
label = labelFromTier(tier, hostname, tld);
|
|
558
|
+
}
|
|
559
|
+
return { tier, score, label, signals, warnings };
|
|
560
|
+
}
|
|
561
|
+
// ---------------------------------------------------------------------------
|
|
562
|
+
// Generate a useful fallback label based on tier + domain context
|
|
563
|
+
// ---------------------------------------------------------------------------
|
|
564
|
+
function labelFromTier(tier, _hostname, tld) {
|
|
565
|
+
switch (tier) {
|
|
566
|
+
case 'official':
|
|
567
|
+
return 'Official Source';
|
|
568
|
+
case 'established':
|
|
569
|
+
return tld === '.org' ? 'Established Organization' :
|
|
570
|
+
tld === '.net' ? 'Established Network Service' :
|
|
571
|
+
tld === '.io' ? 'Established Tech Service' :
|
|
572
|
+
'Established Website';
|
|
573
|
+
case 'community':
|
|
574
|
+
return 'Community or Independent Website';
|
|
575
|
+
case 'new':
|
|
576
|
+
return 'Small or Recently Established Website';
|
|
577
|
+
case 'suspicious':
|
|
578
|
+
return SUSPICIOUS_TLDS.has(tld)
|
|
579
|
+
? `Free Domain TLD (${tld}) — Exercise Caution`
|
|
580
|
+
: 'Unrecognized Domain — Exercise Caution';
|
|
581
|
+
default:
|
|
582
|
+
return 'Unknown Domain — Limited Verification Available';
|
|
82
583
|
}
|
|
83
584
|
}
|
|
@@ -212,16 +212,16 @@ export function createSearchRouter(authStore) {
|
|
|
212
212
|
}
|
|
213
213
|
}
|
|
214
214
|
// Add credibility scores and sort by trustworthiness
|
|
215
|
-
const tierOrder = { official: 0,
|
|
215
|
+
const tierOrder = { official: 0, established: 1, community: 2, new: 3, suspicious: 4 };
|
|
216
216
|
results = results
|
|
217
217
|
.map(r => {
|
|
218
218
|
const cred = getSourceCredibility(r.url);
|
|
219
219
|
return { ...r, credibility: cred };
|
|
220
220
|
})
|
|
221
221
|
.sort((a, b) => {
|
|
222
|
-
const aTier = tierOrder[a.credibility?.tier || '
|
|
223
|
-
const bTier = tierOrder[b.credibility?.tier || '
|
|
224
|
-
return aTier - bTier; // Official first, then
|
|
222
|
+
const aTier = tierOrder[a.credibility?.tier || 'new'] ?? 3;
|
|
223
|
+
const bTier = tierOrder[b.credibility?.tier || 'new'] ?? 3;
|
|
224
|
+
return aTier - bTier; // Official first, then established, community, new, suspicious
|
|
225
225
|
})
|
|
226
226
|
.map((r, i) => ({ ...r, rank: i + 1 }));
|
|
227
227
|
data.web = results;
|
package/dist/types.d.ts
CHANGED
|
@@ -343,9 +343,11 @@ export interface PeelResult {
|
|
|
343
343
|
trust?: {
|
|
344
344
|
/** Source credibility tier */
|
|
345
345
|
source: {
|
|
346
|
-
tier: 'official' | '
|
|
347
|
-
|
|
346
|
+
tier: 'official' | 'established' | 'community' | 'new' | 'suspicious';
|
|
347
|
+
score: number;
|
|
348
348
|
label: string;
|
|
349
|
+
signals?: string[];
|
|
350
|
+
warnings?: string[];
|
|
349
351
|
};
|
|
350
352
|
/** Prompt injection scan result */
|
|
351
353
|
contentSafety: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.60",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|