webpeel 0.21.59 → 0.21.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1257,19 +1257,19 @@ export function buildResult(ctx) {
1257
1257
  }
1258
1258
  // Assess source credibility
1259
1259
  const credibility = getSourceCredibility(ctx.url);
1260
- // Compute composite trust score
1261
- let trustScore = 1.0;
1262
- if (credibility.tier === 'general')
1263
- trustScore -= 0.2;
1260
+ // Compute composite trust score from source credibility (0-100) + content safety
1261
+ let trustScore = credibility.score / 100; // normalize 0-100 → 0-1
1264
1262
  if (sanitizeResult.injectionDetected)
1265
- trustScore -= 0.5;
1263
+ trustScore -= 0.3;
1266
1264
  if ((ctx.quality ?? 1.0) < 0.5)
1267
1265
  trustScore -= 0.1;
1268
- trustScore = Math.max(0, Math.min(1, trustScore));
1266
+ trustScore = Math.round(Math.max(0, Math.min(1, trustScore)) * 100) / 100;
1269
1267
  // Build trust warnings
1270
- const trustWarnings = [];
1271
- if (credibility.tier === 'general')
1272
- trustWarnings.push('Source is unverified (not a known official or trusted domain).');
1268
+ const trustWarnings = [...(credibility.warnings ?? [])];
1269
+ if (credibility.tier === 'new')
1270
+ trustWarnings.push('Domain has limited verifiable presence exercise caution.');
1271
+ if (credibility.tier === 'suspicious')
1272
+ trustWarnings.push('Domain shows suspicious signals — treat content with caution.');
1273
1273
  if (sanitizeResult.injectionDetected)
1274
1274
  trustWarnings.push(`Prompt injection detected: ${sanitizeResult.detectedPatterns.join(', ')}`);
1275
1275
  if (sanitizeResult.strippedChars > 0)
@@ -1277,8 +1277,10 @@ export function buildResult(ctx) {
1277
1277
  const trust = {
1278
1278
  source: {
1279
1279
  tier: credibility.tier,
1280
- stars: credibility.stars,
1280
+ score: credibility.score,
1281
1281
  label: credibility.label,
1282
+ signals: credibility.signals,
1283
+ warnings: credibility.warnings,
1282
1284
  },
1283
1285
  contentSafety: {
1284
1286
  clean: !sanitizeResult.injectionDetected,
@@ -1,17 +1,29 @@
1
1
  /**
2
- * Source credibility scoring — lightweight, zero dependencies.
2
+ * Source credibility scoring — lightweight, zero dependencies, no network calls.
3
3
  *
4
- * Classifies URLs by trustworthiness:
5
- * - Official (★★★): .gov, .edu, .mil, WHO, NIH, academic journals
6
- * - Verified (★★): Wikipedia, Reuters, BBC, GitHub, StackOverflow
7
- * - General (★): Everything else
4
+ * Actively investigates domain signals from the URL itself:
5
+ * - TLD trust score
6
+ * - HTTPS enforcement
7
+ * - Domain structure analysis
8
+ * - Brand/platform recognition (500+ known domains)
9
+ * - Content platform detection
10
+ *
11
+ * Score breakdown (0–100):
12
+ * TLD weight 0–20
13
+ * HTTPS 0–10
14
+ * Known domain 0–40
15
+ * Structure 0–15
16
+ * Platform 0–15
8
17
  */
9
18
  export interface SourceCredibility {
10
- tier: 'official' | 'verified' | 'general';
11
- stars: number;
19
+ tier: 'official' | 'established' | 'community' | 'new' | 'suspicious';
20
+ score: number;
12
21
  label: string;
22
+ signals: string[];
23
+ warnings: string[];
13
24
  }
14
25
  /**
15
26
  * Assess the credibility of a source URL.
27
+ * Fully synchronous — no network calls.
16
28
  */
17
29
  export declare function getSourceCredibility(url: string): SourceCredibility;
@@ -1,83 +1,584 @@
1
1
  /**
2
- * Source credibility scoring — lightweight, zero dependencies.
2
+ * Source credibility scoring — lightweight, zero dependencies, no network calls.
3
3
  *
4
- * Classifies URLs by trustworthiness:
5
- * - Official (★★★): .gov, .edu, .mil, WHO, NIH, academic journals
6
- * - Verified (★★): Wikipedia, Reuters, BBC, GitHub, StackOverflow
7
- * - General (★): Everything else
4
+ * Actively investigates domain signals from the URL itself:
5
+ * - TLD trust score
6
+ * - HTTPS enforcement
7
+ * - Domain structure analysis
8
+ * - Brand/platform recognition (500+ known domains)
9
+ * - Content platform detection
10
+ *
11
+ * Score breakdown (0–100):
12
+ * TLD weight 0–20
13
+ * HTTPS 0–10
14
+ * Known domain 0–40
15
+ * Structure 0–15
16
+ * Platform 0–15
8
17
  */
9
- /** Official TLDs and hostnames that indicate high-authority sources */
10
- const OFFICIAL_TLDS = new Set(['.gov', '.edu', '.mil']);
11
- const OFFICIAL_HOSTNAMES = new Set([
12
- // Academic / research
13
- 'arxiv.org', 'scholar.google.com', 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov',
14
- 'jstor.org', 'nature.com', 'science.org', 'cell.com', 'nejm.org', 'bmj.com',
15
- 'thelancet.com', 'plos.org', 'springer.com', 'elsevier.com',
18
+ // ---------------------------------------------------------------------------
19
+ // TLD trust map: points (0–20)
20
+ // ---------------------------------------------------------------------------
21
+ const TLD_TRUST = {
22
+ '.gov': 20, '.edu': 20, '.mil': 20,
23
+ '.org': 14, '.net': 12, '.com': 12, '.io': 11,
24
+ '.co': 10, '.us': 10, '.uk': 10, '.ca': 10, '.au': 10,
25
+ '.de': 10, '.fr': 10, '.jp': 10, '.br': 10, '.in': 10,
26
+ '.eu': 11, '.int': 15,
27
+ '.info': 8, '.biz': 7, '.me': 8, '.tv': 8, '.app': 10,
28
+ '.dev': 10, '.ai': 10, '.tech': 8, '.page': 8,
29
+ '.blog': 7, '.news': 8, '.media': 8, '.press': 8,
30
+ '.shop': 7, '.store': 7, '.online': 7, '.site': 6,
31
+ '.website': 6, '.space': 5, '.club': 5, '.pro': 7,
32
+ // Low-trust freebies
33
+ '.tk': 1, '.ml': 1, '.ga': 1, '.cf': 1, '.gq': 1,
34
+ '.xyz': 4, '.top': 3, '.loan': 2, '.click': 3, '.link': 4,
35
+ '.win': 2, '.bid': 2, '.download': 2, '.racing': 2, '.review': 4,
36
+ '.cc': 3, '.pw': 3, '.men': 2, '.party': 2, '.stream': 3,
37
+ };
38
+ // ---------------------------------------------------------------------------
39
+ // Suspicious TLDs (high-risk freebies used in phishing)
40
+ // ---------------------------------------------------------------------------
41
+ const SUSPICIOUS_TLDS = new Set(['.tk', '.ml', '.ga', '.cf', '.gq', '.win', '.bid', '.men', '.party', '.loan']);
42
+ // ---------------------------------------------------------------------------
43
+ // Official TLDs
44
+ // ---------------------------------------------------------------------------
45
+ const OFFICIAL_TLDS = new Set(['.gov', '.edu', '.mil', '.int']);
46
+ // ---------------------------------------------------------------------------
47
+ // Official hostnames (beyond .gov/.edu/.mil TLD)
48
+ // ---------------------------------------------------------------------------
49
+ const OFFICIAL_DOMAINS = new Set([
16
50
  // International organisations
17
51
  'who.int', 'un.org', 'worldbank.org', 'imf.org', 'oecd.org', 'europa.eu',
52
+ 'nato.int', 'wto.org', 'unicef.org', 'unhcr.org', 'icrc.org',
53
+ // Academic / research
54
+ 'arxiv.org', 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov', 'jstor.org',
55
+ 'nature.com', 'science.org', 'cell.com', 'nejm.org', 'bmj.com',
56
+ 'thelancet.com', 'plos.org', 'springer.com', 'elsevier.com',
57
+ 'scholar.google.com', 'researchgate.net', 'semanticscholar.org',
58
+ 'acm.org', 'ieee.org',
18
59
  // Official tech documentation
19
60
  'docs.python.org', 'developer.mozilla.org', 'nodejs.org', 'rust-lang.org',
20
61
  'docs.microsoft.com', 'learn.microsoft.com', 'developer.apple.com',
21
62
  'developer.android.com', 'php.net', 'ruby-lang.org', 'golang.org', 'go.dev',
22
- // Health / medicine
23
- 'cdc.gov', 'nih.gov', 'fda.gov', 'mayoclinic.org', 'clevelandclinic.org',
24
- 'webmd.com', 'medlineplus.gov',
63
+ // Health
64
+ 'mayoclinic.org', 'clevelandclinic.org', 'webmd.com',
25
65
  // Standards / specs
26
- 'w3.org', 'ietf.org', 'rfc-editor.org', 'iso.org',
66
+ 'w3.org', 'ietf.org', 'rfc-editor.org', 'iso.org', 'ecma-international.org',
67
+ ]);
68
+ // ---------------------------------------------------------------------------
69
+ // Established domains (score bonus 40 pts) — 500+ entries
70
+ // ---------------------------------------------------------------------------
71
+ const ESTABLISHED_DOMAINS = new Set([
72
+ // ── Major Tech ──────────────────────────────────────────────────────────
73
+ 'google.com', 'apple.com', 'microsoft.com', 'amazon.com', 'meta.com',
74
+ 'netflix.com', 'spotify.com', 'adobe.com', 'salesforce.com', 'oracle.com',
75
+ 'ibm.com', 'intel.com', 'nvidia.com', 'amd.com', 'qualcomm.com',
76
+ 'cisco.com', 'vmware.com', 'sap.com', 'servicenow.com', 'workday.com',
77
+ 'zoom.us', 'slack.com', 'dropbox.com', 'box.com', 'atlassian.com',
78
+ 'jira.atlassian.com', 'confluence.atlassian.com',
79
+ 'twilio.com', 'sendgrid.com', 'mailchimp.com', 'hubspot.com',
80
+ 'zendesk.com', 'intercom.com', 'freshworks.com', 'docusign.com',
81
+ 'okta.com', 'auth0.com', 'cloudflare.com', 'fastly.com', 'akamai.com',
82
+ 'digitalocean.com', 'linode.com', 'vultr.com',
83
+ 'datadog.com', 'newrelic.com', 'splunk.com', 'elastic.co',
84
+ 'mongodb.com', 'redis.io', 'postgresql.org', 'mysql.com',
85
+ 'docker.com', 'kubernetes.io', 'helm.sh',
86
+ 'terraform.io', 'ansible.com', 'chef.io', 'puppet.com',
87
+ 'heroku.com', 'render.com', 'railway.app', 'fly.io',
88
+ 'supabase.com', 'planetscale.com', 'neon.tech', 'fauna.com',
89
+ 'firebase.google.com', 'expo.dev',
90
+ 'openai.com', 'anthropic.com', 'cohere.com', 'huggingface.co',
91
+ 'stability.ai', 'midjourney.com', 'replicate.com',
92
+ 'figma.com', 'sketch.com', 'invisionapp.com', 'zeplin.io',
93
+ 'notion.so', 'airtable.com', 'monday.com', 'asana.com', 'clickup.com',
94
+ 'trello.com', 'basecamp.com', 'linear.app', 'shortcut.com',
95
+ 'postman.com', 'insomnia.rest', 'swagger.io',
96
+ 'sentry.io', 'bugsnag.com', 'rollbar.com',
97
+ 'segment.com', 'mixpanel.com', 'amplitude.com', 'heap.io',
98
+ 'looker.com', 'tableau.com', 'powerbi.microsoft.com',
99
+ 'snowflake.com', 'databricks.com', 'dbt.com', 'fivetran.com', 'airbyte.com',
100
+ 'vercel.com', 'netlify.com',
101
+ // ── Cloud / Hosting ──────────────────────────────────────────────────────
102
+ 'aws.amazon.com', 'cloud.google.com', 'azure.microsoft.com',
103
+ 'docs.aws.amazon.com', 'console.aws.amazon.com',
104
+ // ── Developer Ecosystems ──────────────────────────────────────────────────
105
+ 'github.com', 'gitlab.com', 'bitbucket.org', 'sourcehut.com',
106
+ 'stackoverflow.com', 'superuser.com', 'serverfault.com',
107
+ 'npmjs.com', 'pypi.org', 'crates.io', 'packagist.org', 'rubygems.org',
108
+ 'nuget.org', 'pub.dev', 'hex.pm', 'opam.ocaml.org',
109
+ 'docs.rs', 'crates.io', 'pkg.go.dev',
110
+ 'codepen.io', 'jsfiddle.net', 'replit.com', 'glitch.com', 'codesandbox.io',
111
+ 'leetcode.com', 'hackerrank.com', 'codewars.com', 'exercism.org',
112
+ 'regex101.com', 'regexr.com',
113
+ // ── Major Social ──────────────────────────────────────────────────────────
114
+ 'twitter.com', 'x.com', 'reddit.com', 'linkedin.com', 'instagram.com',
115
+ 'facebook.com', 'youtube.com', 'tiktok.com', 'snapchat.com', 'pinterest.com',
116
+ 'tumblr.com', 'mastodon.social', 'threads.net', 'discord.com', 'discord.gg',
117
+ 'twitch.tv', 'kick.com', 'vimeo.com', 'dailymotion.com',
118
+ 'quora.com', 'medium.com', 'substack.com', 'hashnode.com', 'dev.to',
119
+ // ── Major News ────────────────────────────────────────────────────────────
120
+ 'nytimes.com', 'washingtonpost.com', 'theguardian.com', 'bbc.com', 'bbc.co.uk',
121
+ 'reuters.com', 'apnews.com', 'bloomberg.com', 'economist.com', 'ft.com',
122
+ 'wsj.com', 'cnn.com', 'foxnews.com', 'msnbc.com', 'nbcnews.com',
123
+ 'cbsnews.com', 'abcnews.go.com', 'npr.org', 'pbs.org',
124
+ 'time.com', 'usatoday.com', 'huffpost.com', 'vox.com', 'axios.com',
125
+ 'politico.com', 'thehill.com', 'rollcall.com', 'slate.com', 'salon.com',
126
+ 'theatlantic.com', 'newyorker.com', 'newrepublic.com',
127
+ 'motherjones.com', 'propublica.org', 'intercept.co',
128
+ 'aljazeera.com', 'dw.com', 'france24.com', 'rt.com',
129
+ 'spiegel.de', 'lemonde.fr', 'liberation.fr', 'lefigaro.fr',
130
+ 'elpais.com', 'elmundo.es', 'repubblica.it', 'corriere.it',
131
+ 'theglobeandmail.com', 'thestar.com', 'nationalpost.com',
132
+ 'smh.com.au', 'theage.com.au', 'abc.net.au',
133
+ 'timesofindia.com', 'hindustantimes.com', 'thehindu.com', 'ndtv.com',
134
+ 'scmp.com', 'channelnewsasia.com', 'straitstimes.com',
135
+ 'haaretz.com', 'timesofisrael.com', 'jpost.com',
136
+ 'techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com',
137
+ 'engadget.com', 'gizmodo.com', 'cnet.com', 'pcmag.com', 'tomshardware.com',
138
+ 'anandtech.com', 'macrumors.com', '9to5mac.com', '9to5google.com',
139
+ 'androidcentral.com', 'windowscentral.com',
140
+ 'venturebeat.com', 'businessinsider.com', 'forbes.com', 'fortune.com',
141
+ 'inc.com', 'entrepreneur.com', 'fastcompany.com',
142
+ // ── Finance ───────────────────────────────────────────────────────────────
143
+ 'chase.com', 'bankofamerica.com', 'wellsfargo.com', 'citibank.com',
144
+ 'capitalone.com', 'usbank.com', 'tdbank.com', 'pnc.com',
145
+ 'americanexpress.com', 'discover.com', 'synchrony.com',
146
+ 'paypal.com', 'stripe.com', 'square.com', 'braintree.com', 'adyen.com',
147
+ 'coinbase.com', 'binance.com', 'kraken.com', 'gemini.com', 'crypto.com',
148
+ 'robinhood.com', 'etrade.com', 'schwab.com', 'fidelity.com',
149
+ 'vanguard.com', 'blackrock.com', 'jpmorgan.com', 'goldmansachs.com',
150
+ 'morganstanley.com', 'ubs.com', 'credit-suisse.com', 'hsbc.com',
151
+ 'barclays.com', 'lloydsbank.com', 'natwest.com', 'santander.com',
152
+ 'transferwise.com', 'wise.com', 'revolut.com', 'monzo.com',
153
+ 'quickbooks.intuit.com', 'turbotax.intuit.com', 'mint.com', 'hrblock.com',
154
+ 'experian.com', 'equifax.com', 'transunion.com',
155
+ // ── E-commerce / Retail ──────────────────────────────────────────────────
156
+ 'amazon.com', 'ebay.com', 'etsy.com', 'walmart.com', 'target.com',
157
+ 'bestbuy.com', 'costco.com', 'homedepot.com', 'lowes.com', 'wayfair.com',
158
+ 'shopify.com', 'bigcommerce.com', 'woocommerce.com', 'squarespace.com',
159
+ 'overstock.com', 'newegg.com', 'bhphotovideo.com', 'adorama.com',
160
+ 'aliexpress.com', 'alibaba.com', 'wish.com', 'dhgate.com',
161
+ 'zappos.com', 'nordstrom.com', 'macys.com', 'bloomingdales.com', 'gap.com',
162
+ 'nike.com', 'adidas.com', 'reebok.com', 'underarmour.com', 'lululemon.com',
163
+ 'ikea.com', 'crate.com', 'potterybarn.com', 'williams-sonoma.com',
164
+ 'chewy.com', 'petco.com', 'petsmart.com',
165
+ 'instacart.com', 'doordash.com', 'ubereats.com', 'grubhub.com',
166
+ 'opentable.com', 'yelp.com', 'tripadvisor.com',
167
+ // ── Travel ────────────────────────────────────────────────────────────────
168
+ 'booking.com', 'expedia.com', 'airbnb.com', 'vrbo.com', 'kayak.com',
169
+ 'hotels.com', 'priceline.com', 'orbitz.com', 'travelocity.com',
170
+ 'delta.com', 'united.com', 'aa.com', 'southwest.com', 'jetblue.com',
171
+ 'marriott.com', 'hilton.com', 'hyatt.com', 'ihg.com', 'wyndham.com',
172
+ 'uber.com', 'lyft.com', 'waymo.com',
173
+ // ── Education ─────────────────────────────────────────────────────────────
174
+ 'coursera.org', 'edx.org', 'khanacademy.org', 'udemy.com', 'udacity.com',
175
+ 'pluralsight.com', 'lynda.com', 'linkedin.com', 'skillshare.com',
176
+ 'codecademy.com', 'freecodecamp.org', 'theodinproject.com',
177
+ 'brilliant.org', 'duolingo.com', 'babbel.com', 'rosettastone.com',
178
+ 'cambridgeinternational.org',
179
+ 'britannica.com', 'encyclopedia.com',
180
+ // ── Reference / Knowledge ─────────────────────────────────────────────────
181
+ 'wikipedia.org', 'wikimedia.org', 'wikihow.com', 'wikidata.org',
182
+ 'imdb.com', 'rottentomatoes.com', 'metacritic.com', 'goodreads.com',
183
+ 'nationalgeographic.com', 'smithsonianmag.com', 'history.com',
184
+ 'wolframalpha.com', 'dictionary.com', 'merriam-webster.com',
185
+ 'etymonline.com', 'thesaurus.com',
186
+ 'archive.org', 'waybackmachine.org',
187
+ // ── Health ────────────────────────────────────────────────────────────────
188
+ 'webmd.com', 'mayoclinic.org', 'clevelandclinic.org', 'healthline.com',
189
+ 'medicalnewstoday.com', 'everydayhealth.com', 'drugs.com',
190
+ 'rxlist.com', 'medscape.com', 'uptodate.com', 'emedicinehealth.com',
191
+ 'psych.org', 'nami.org', 'betterhelp.com', 'talkspace.com',
192
+ // ── Legal ─────────────────────────────────────────────────────────────────
193
+ 'law.cornell.edu', 'justia.com', 'findlaw.com', 'nolo.com', 'avvo.com',
194
+ 'legalzoom.com', 'rocket lawyer.com',
195
+ // ── Government / Civic (beyond TLD) ──────────────────────────────────────
196
+ 'gov.uk', 'gc.ca', 'australia.gov.au',
197
+ // ── Open Source / Misc Tech ──────────────────────────────────────────────
198
+ 'linux.org', 'kernel.org', 'gnu.org', 'apache.org', 'mozilla.org',
199
+ 'python.org', 'perl.org', 'haskell.org',
200
+ 'jquery.com', 'reactjs.org', 'react.dev', 'vuejs.org', 'angular.io',
201
+ 'svelte.dev', 'nextjs.org', 'nuxtjs.org', 'remix.run', 'astro.build',
202
+ 'tailwindcss.com', 'getbootstrap.com', 'mui.com', 'chakra-ui.com',
203
+ 'styled-components.com', 'emotion.sh',
204
+ 'vitejs.dev', 'webpack.js.org', 'rollupjs.org', 'esbuild.github.io',
205
+ 'babeljs.io', 'eslint.org', 'prettier.io', 'typescript.dev',
206
+ 'typescriptlang.org', 'deno.com', 'deno.land', 'bun.sh',
207
+ 'expressjs.com', 'fastify.io', 'nestjs.com', 'koajs.com', 'hapi.dev',
208
+ 'graphql.org', 'apollographql.com', 'trpc.io', 'grpc.io',
209
+ 'prisma.io', 'drizzle.team', 'typeorm.io', 'sequelize.org',
210
+ 'socket.io', 'feathersjs.com',
211
+ 'git-scm.com', 'gitkraken.com',
212
+ 'homebrew.sh', 'brew.sh', 'chocolatey.org', 'scoop.sh', 'winget.run',
213
+ 'ubuntu.com', 'debian.org', 'fedoraproject.org', 'archlinux.org',
214
+ 'redhat.com', 'suse.com', 'centos.org',
215
+ // ── Security / Privacy ───────────────────────────────────────────────────
216
+ 'haveibeenpwned.com', 'virustotal.com', '1password.com', 'bitwarden.com',
217
+ 'lastpass.com', 'dashlane.com', 'nordvpn.com', 'expressvpn.com',
218
+ 'protonmail.com', 'proton.me', 'tutanota.com', 'fastmail.com',
219
+ 'letsencrypt.org', 'ssllabs.com', 'namecheap.com', 'godaddy.com',
220
+ 'porkbun.com', 'cloudflare.com', 'dnschecker.org',
221
+ // ── Search ────────────────────────────────────────────────────────────────
222
+ 'google.com', 'bing.com', 'yahoo.com', 'duckduckgo.com', 'brave.com',
223
+ 'startpage.com', 'ecosia.org', 'kagi.com',
224
+ // ── Productivity ─────────────────────────────────────────────────────────
225
+ 'gmail.com', 'outlook.com', 'office.com', 'office365.com',
226
+ 'docs.google.com', 'drive.google.com', 'calendar.google.com',
227
+ 'maps.google.com', 'translate.google.com',
228
+ 'evernote.com', 'onenote.com', 'bear.app', 'obsidian.md',
229
+ 'cal.com', 'calendly.com', 'doodle.com', 'when2meet.com',
230
+ 'loom.com', 'screen.studio', 'cleanshot.com',
231
+ 'canva.com', 'unsplash.com', 'pexels.com', 'pixabay.com',
232
+ 'shutterstock.com', 'gettyimages.com', 'istockphoto.com',
233
+ 'giphy.com', 'tenor.com',
234
+ // ── Music / Media ─────────────────────────────────────────────────────────
235
+ 'soundcloud.com', 'bandcamp.com', 'last.fm', 'allmusic.com',
236
+ 'discogs.com', 'genius.com', 'azlyrics.com', 'musixmatch.com',
237
+ 'hulu.com', 'disneyplus.com', 'hbomax.com', 'max.com',
238
+ 'peacocktv.com', 'paramount.com', 'crunchyroll.com', 'funimation.com',
239
+ 'apple.com', 'music.apple.com',
240
+ // ── Gaming ───────────────────────────────────────────────────────────────
241
+ 'steam.com', 'steampowered.com', 'epicgames.com', 'gog.com',
242
+ 'itch.io', 'roblox.com', 'minecraft.net', 'ea.com',
243
+ 'activision.com', 'blizzard.com', 'battle.net', 'ubisoft.com',
244
+ 'nintendo.com', 'playstation.com', 'xbox.com',
245
+ 'ign.com', 'gamespot.com', 'kotaku.com', 'polygon.com',
246
+ 'pcgamer.com', 'rockpapershotgun.com',
247
+ // ── Science / Research ───────────────────────────────────────────────────
248
+ 'nasa.gov', 'esa.int', 'noaa.gov', 'nist.gov', 'usgs.gov',
249
+ 'epa.gov', 'energy.gov', 'nsf.gov',
250
+ 'acs.org', 'aps.org', 'aip.org', 'ams.org',
251
+ 'newsweek.com', 'scientificamerican.com', 'popularmechanics.com',
252
+ 'livescience.com', 'space.com', 'phys.org', 'sciencedaily.com',
253
+ 'technologyreview.com',
254
+ // ── Mapping / Location ────────────────────────────────────────────────────
255
+ 'openstreetmap.org', 'mapbox.com', 'here.com', 'waze.com',
256
+ 'zillow.com', 'redfin.com', 'realtor.com', 'trulia.com', 'apartments.com',
257
+ // ── HR / Recruiting ───────────────────────────────────────────────────────
258
+ 'indeed.com', 'glassdoor.com', 'monster.com', 'ziprecruiter.com',
259
+ 'careerbuilder.com', 'simplyhired.com', 'flexjobs.com', 'remote.com',
260
+ 'levels.fyi', 'teamblind.com', 'angellist.com', 'wellfound.com',
261
+ // ── Misc established ─────────────────────────────────────────────────────
262
+ 'hbr.org', 'mckinsey.com', 'bcg.com', 'bain.com', 'deloitte.com',
263
+ 'pwc.com', 'kpmg.com', 'ey.com', 'accenture.com',
264
+ 'gartner.com', 'idc.com', 'forrester.com',
265
+ 'ted.com', 'masterclass.com',
266
+ 'change.org', 'gofundme.com', 'kickstarter.com', 'indiegogo.com',
267
+ 'patreon.com', 'ko-fi.com', 'buymeacoffee.com',
268
+ 'webpeel.dev',
27
269
  ]);
28
- const VERIFIED_HOSTNAMES = new Set([
29
- // Encyclopaedia / reference
30
- 'wikipedia.org', 'en.wikipedia.org', 'britannica.com',
31
- // Reputable news agencies
32
- 'reuters.com', 'apnews.com', 'bbc.com', 'bbc.co.uk', 'nytimes.com',
33
- 'washingtonpost.com', 'theguardian.com', 'economist.com', 'ft.com',
34
- 'cnn.com', 'npr.org', 'pbs.org',
35
- // Developer resources
36
- 'github.com', 'stackoverflow.com', 'npmjs.com', 'pypi.org',
37
- 'crates.io', 'docs.rs', 'packagist.org', 'rubygems.org',
38
- // Official cloud / vendor docs
39
- 'docs.aws.amazon.com', 'cloud.google.com', 'docs.github.com',
40
- 'azure.microsoft.com', 'registry.terraform.io',
41
- // Reputable tech publications
42
- 'arstechnica.com', 'wired.com', 'techcrunch.com', 'theverge.com',
43
- // National Geographic, Smithsonian
44
- 'nationalgeographic.com', 'smithsonianmag.com',
270
+ // ---------------------------------------------------------------------------
271
+ // Community / content platforms — user content hosted on established infra
272
+ // ---------------------------------------------------------------------------
273
+ const COMMUNITY_PLATFORMS = new Map([
274
+ ['github.com', 'Community Content on GitHub'],
275
+ ['github.io', 'Personal Site on GitHub Pages'],
276
+ ['gitlab.com', 'Community Content on GitLab'],
277
+ ['medium.com', 'Article on Medium'],
278
+ ['substack.com', 'Newsletter on Substack'],
279
+ ['hashnode.com', 'Blog on Hashnode'],
280
+ ['dev.to', 'Article on DEV Community'],
281
+ ['wordpress.com', 'Blog on WordPress'],
282
+ ['blogspot.com', 'Blog on Blogger'],
283
+ ['blogger.com', 'Blog on Blogger'],
284
+ ['tumblr.com', 'Blog on Tumblr'],
285
+ ['weebly.com', 'Site on Weebly'],
286
+ ['wix.com', 'Site on Wix'],
287
+ ['squarespace.com', 'Site on Squarespace'],
288
+ ['webflow.io', 'Site on Webflow'],
289
+ ['vercel.app', 'Deployed Project on Vercel'],
290
+ ['netlify.app', 'Deployed Project on Netlify'],
291
+ ['pages.dev', 'Deployed Project on Cloudflare Pages'],
292
+ ['web.app', 'Firebase Hosted App'],
293
+ ['firebaseapp.com', 'Firebase Hosted App'],
294
+ ['herokuapp.com', 'App on Heroku'],
295
+ ['replit.dev', 'Project on Replit'],
296
+ ['glitch.me', 'Project on Glitch'],
297
+ ['codesandbox.io', 'Sandbox on CodeSandbox'],
298
+ ['stackblitz.com', 'Project on StackBlitz'],
299
+ ['codepen.io', 'Pen on CodePen'],
300
+ ['jsfiddle.net', 'Fiddle on JSFiddle'],
301
+ ['notion.site', 'Notion Page'],
302
+ ['gitbook.io', 'Docs on GitBook'],
303
+ ['gitbook.com', 'Docs on GitBook'],
304
+ ['readthedocs.io', 'Docs on Read the Docs'],
305
+ ['readthedocs.org', 'Docs on Read the Docs'],
306
+ ['reddit.com', 'Community Discussion on Reddit'],
307
+ ['news.ycombinator.com', 'Discussion on Hacker News'],
308
+ ['quora.com', 'Answer on Quora'],
309
+ ['stackoverflow.com', 'Answer on Stack Overflow'],
310
+ ['stackexchange.com', 'Answer on Stack Exchange'],
311
+ ['producthunt.com', 'Launch on Product Hunt'],
312
+ ['indiehackers.com', 'Post on Indie Hackers'],
313
+ ['hackernoon.com', 'Article on HackerNoon'],
314
+ ['lobste.rs', 'Discussion on Lobsters'],
315
+ ['lobsters.rs', 'Discussion on Lobsters'],
316
+ ['twitter.com', 'Post on X (Twitter)'],
317
+ ['x.com', 'Post on X (Twitter)'],
318
+ ['linkedin.com', 'Post on LinkedIn'],
319
+ ['youtube.com', 'Video on YouTube'],
320
+ ['vimeo.com', 'Video on Vimeo'],
321
+ ['twitch.tv', 'Stream on Twitch'],
322
+ ['soundcloud.com', 'Audio on SoundCloud'],
323
+ ['bandcamp.com', 'Music on Bandcamp'],
324
+ ['pinterest.com', 'Pin on Pinterest'],
325
+ ['instagram.com', 'Post on Instagram'],
326
+ ['tiktok.com', 'Video on TikTok'],
45
327
  ]);
328
+ // ---------------------------------------------------------------------------
329
+ // Brand-category labels for established domains
330
+ // ---------------------------------------------------------------------------
331
+ const DOMAIN_CATEGORY = {
332
+ // Tech
333
+ 'google.com': 'Established Technology Company',
334
+ 'apple.com': 'Established Technology Company',
335
+ 'microsoft.com': 'Established Technology Company',
336
+ 'amazon.com': 'Established E-commerce & Cloud Platform',
337
+ 'meta.com': 'Established Technology Company',
338
+ 'netflix.com': 'Established Streaming Service',
339
+ 'spotify.com': 'Established Music Streaming Service',
340
+ 'openai.com': 'Established AI Research Company',
341
+ 'anthropic.com': 'Established AI Research Company',
342
+ 'github.com': 'Established Developer Platform',
343
+ 'gitlab.com': 'Established Developer Platform',
344
+ 'stackoverflow.com': 'Established Developer Q&A Platform',
345
+ 'npmjs.com': 'Established Package Registry',
346
+ 'pypi.org': 'Established Package Registry',
347
+ 'docker.com': 'Established Container Platform',
348
+ 'vercel.com': 'Established Hosting Platform',
349
+ 'netlify.com': 'Established Hosting Platform',
350
+ 'cloudflare.com': 'Established CDN & Security Provider',
351
+ 'figma.com': 'Established Design Platform',
352
+ 'notion.so': 'Established Productivity Platform',
353
+ 'slack.com': 'Established Business Communication Platform',
354
+ 'zoom.us': 'Established Video Communication Platform',
355
+ 'adobe.com': 'Established Creative Software Company',
356
+ // News
357
+ 'nytimes.com': 'Established News Organization',
358
+ 'washingtonpost.com': 'Established News Organization',
359
+ 'theguardian.com': 'Established News Organization',
360
+ 'bbc.com': 'Established News Organization',
361
+ 'bbc.co.uk': 'Established News Organization',
362
+ 'reuters.com': 'Established News Agency',
363
+ 'apnews.com': 'Established News Agency',
364
+ 'bloomberg.com': 'Established Financial News Organization',
365
+ 'economist.com': 'Established News Publication',
366
+ 'ft.com': 'Established Financial News Organization',
367
+ 'wsj.com': 'Established Financial News Organization',
368
+ 'cnn.com': 'Established News Organization',
369
+ 'npr.org': 'Established Public Radio',
370
+ 'techcrunch.com': 'Established Technology News Publication',
371
+ 'wired.com': 'Established Technology News Publication',
372
+ 'arstechnica.com': 'Established Technology News Publication',
373
+ 'theverge.com': 'Established Technology News Publication',
374
+ // Finance
375
+ 'paypal.com': 'Established Payment Platform',
376
+ 'stripe.com': 'Established Payment Platform',
377
+ 'square.com': 'Established Payment Platform',
378
+ 'coinbase.com': 'Established Cryptocurrency Exchange',
379
+ 'chase.com': 'Established Financial Institution',
380
+ 'bankofamerica.com': 'Established Financial Institution',
381
+ 'wellsfargo.com': 'Established Financial Institution',
382
+ // E-commerce
383
+ 'ebay.com': 'Established E-commerce Marketplace',
384
+ 'etsy.com': 'Established Handmade Marketplace',
385
+ 'walmart.com': 'Established Retail Company',
386
+ 'target.com': 'Established Retail Company',
387
+ 'bestbuy.com': 'Established Electronics Retailer',
388
+ 'shopify.com': 'Established E-commerce Platform',
389
+ // Education
390
+ 'coursera.org': 'Established Online Education Platform',
391
+ 'edx.org': 'Established Online Education Platform',
392
+ 'khanacademy.org': 'Non-Profit Education Platform',
393
+ 'udemy.com': 'Established Online Learning Marketplace',
394
+ 'britannica.com': 'Established Reference Encyclopedia',
395
+ 'wikipedia.org': 'Open Encyclopedia (Community Edited)',
396
+ // Reference
397
+ 'archive.org': 'Established Digital Archive',
398
+ 'wolframalpha.com': 'Established Computational Knowledge Engine',
399
+ 'imdb.com': 'Established Movie & TV Database',
400
+ };
401
+ // ---------------------------------------------------------------------------
402
+ // Helpers
403
+ // ---------------------------------------------------------------------------
404
+ function extractTLD(hostname) {
405
+ const parts = hostname.split('.');
406
+ if (parts.length < 2)
407
+ return '';
408
+ return '.' + parts.slice(-1)[0];
409
+ }
410
+ function extractSLD(hostname) {
411
+ // Returns registrable domain (e.g. "google.com")
412
+ const parts = hostname.split('.');
413
+ if (parts.length < 2)
414
+ return hostname;
415
+ return parts.slice(-2).join('.');
416
+ }
417
+ function countSubdomains(hostname) {
418
+ // www.example.com → 0 subdomains (www doesn't count)
419
+ const stripped = hostname.replace(/^www\./, '');
420
+ const parts = stripped.split('.');
421
+ return Math.max(0, parts.length - 2);
422
+ }
423
+ // ---------------------------------------------------------------------------
424
+ // Main export
425
+ // ---------------------------------------------------------------------------
46
426
  /**
47
427
  * Assess the credibility of a source URL.
428
+ * Fully synchronous — no network calls.
48
429
  */
49
430
  export function getSourceCredibility(url) {
431
+ const signals = [];
432
+ const warnings = [];
433
+ let score = 0;
434
+ // ── Parse URL ─────────────────────────────────────────────────────────────
435
+ let parsedUrl;
50
436
  try {
51
- const hostname = new URL(url).hostname.toLowerCase().replace(/^www\./, '');
52
- // Check official TLDs
53
- for (const tld of OFFICIAL_TLDS) {
54
- if (hostname.endsWith(tld)) {
55
- return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
56
- }
57
- }
58
- // Check known official hostnames
59
- if (OFFICIAL_HOSTNAMES.has(hostname)) {
60
- return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
61
- }
62
- // Check parent domain (e.g. en.wikipedia.org → wikipedia.org)
63
- const parts = hostname.split('.');
64
- if (parts.length > 2) {
65
- const parentDomain = parts.slice(-2).join('.');
66
- if (OFFICIAL_HOSTNAMES.has(parentDomain)) {
67
- return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
68
- }
69
- if (VERIFIED_HOSTNAMES.has(parentDomain)) {
70
- return { tier: 'verified', stars: 2, label: 'VERIFIED' };
71
- }
72
- }
73
- // Check known verified hostnames
74
- if (VERIFIED_HOSTNAMES.has(hostname)) {
75
- return { tier: 'verified', stars: 2, label: 'VERIFIED' };
76
- }
77
- // Everything else
78
- return { tier: 'general', stars: 1, label: 'UNVERIFIED' };
437
+ parsedUrl = new URL(url);
79
438
  }
80
439
  catch {
81
- return { tier: 'general', stars: 1, label: 'UNVERIFIED' };
440
+ return {
441
+ tier: 'suspicious',
442
+ score: 0,
443
+ label: 'Invalid URL — Cannot Assess',
444
+ signals: [],
445
+ warnings: ['URL could not be parsed'],
446
+ };
447
+ }
448
+ const protocol = parsedUrl.protocol; // 'https:' or 'http:'
449
+ const rawHostname = parsedUrl.hostname.toLowerCase();
450
+ const hostname = rawHostname.replace(/^www\./, '');
451
+ const tld = extractTLD(hostname);
452
+ const sld = extractSLD(hostname); // e.g. "google.com"
453
+ const subdomainCount = countSubdomains(rawHostname);
454
+ // ── 1. HTTPS check (0–10 pts) ─────────────────────────────────────────────
455
+ if (protocol === 'https:') {
456
+ score += 10;
457
+ signals.push('HTTPS enforced');
458
+ }
459
+ else {
460
+ warnings.push('HTTP only — no encryption');
461
+ }
462
+ // ── 2. TLD trust (0–20 pts) ───────────────────────────────────────────────
463
+ const tldScore = TLD_TRUST[tld] ?? 5;
464
+ score += tldScore;
465
+ if (tldScore >= 18) {
466
+ signals.push(`Trusted TLD (${tld})`);
467
+ }
468
+ else if (tldScore <= 3) {
469
+ warnings.push(`High-risk TLD (${tld}) — commonly used in phishing`);
470
+ }
471
+ // ── 3. Official TLD shortcut ──────────────────────────────────────────────
472
+ if (OFFICIAL_TLDS.has(tld) || OFFICIAL_DOMAINS.has(hostname) || OFFICIAL_DOMAINS.has(sld)) {
473
+ const category = DOMAIN_CATEGORY[hostname] ?? DOMAIN_CATEGORY[sld] ?? 'Official Source';
474
+ return {
475
+ tier: 'official',
476
+ score: Math.min(100, score + 40 + 15),
477
+ label: tld === '.gov' ? 'Official Government Source' :
478
+ tld === '.edu' ? 'Official Educational Institution' :
479
+ tld === '.mil' ? 'Official Military Source' :
480
+ tld === '.int' ? 'International Organization' :
481
+ category,
482
+ signals: [...signals, 'Official domain verified', `Trusted TLD (${tld})`].filter((v, i, a) => a.indexOf(v) === i),
483
+ warnings,
484
+ };
485
+ }
486
+ // ── 4. Domain structure (0–15 pts) ────────────────────────────────────────
487
+ if (subdomainCount === 0) {
488
+ score += 15;
489
+ signals.push('Clean domain structure');
490
+ }
491
+ else if (subdomainCount === 1) {
492
+ score += 10;
493
+ signals.push('Standard subdomain structure');
494
+ }
495
+ else if (subdomainCount === 2) {
496
+ score += 5;
497
+ }
498
+ else {
499
+ // 3+ subdomains — possible phishing pattern
500
+ score += 0;
501
+ warnings.push(`Excessive subdomains (${subdomainCount}) — potential phishing indicator`);
502
+ }
503
+ // ── 5 & 6. Known domain + Community platform (mutually exclusive bonus) ──
504
+ // Community platform detection — user content on a known hosting platform.
505
+ // When the domain is a community platform, it gets the platform bonus (15 pts)
506
+ // but NOT the established domain bonus (they're conceptually different tiers).
507
+ const communityLabel = COMMUNITY_PLATFORMS.get(hostname) ?? COMMUNITY_PLATFORMS.get(sld);
508
+ const isEstablished = ESTABLISHED_DOMAINS.has(hostname) || ESTABLISHED_DOMAINS.has(sld);
509
+ if (communityLabel) {
510
+ // Platform bonus only — user content hosted on verified infra
511
+ score += 15;
512
+ signals.push(`Hosted on verified platform (${sld})`);
513
+ }
514
+ else if (isEstablished) {
515
+ // Full established domain bonus
516
+ score += 40;
517
+ signals.push('Recognized established domain');
518
+ }
519
+ // ── 7. Suspicious TLD ─────────────────────────────────────────────────────
520
+ if (SUSPICIOUS_TLDS.has(tld)) {
521
+ score = Math.min(score, 15); // Cap at suspicious tier
522
+ warnings.push('Domain uses a free TLD associated with fraud');
523
+ }
524
+ // ── 8. Phishing keyword detection ─────────────────────────────────────────
525
+ const phishingKeywords = ['paypal-', 'apple-', 'google-', 'microsoft-', 'amazon-',
526
+ 'bank-', 'login-', 'signin-', 'secure-', 'verify-', 'account-', 'update-',
527
+ 'support-', 'helpdesk-', '-login', '-signin', '-secure', '-verify', '-account',
528
+ 'paypal.', 'apple.', 'google.', 'microsoft.', 'amazon.'];
529
+ const suspiciousPattern = phishingKeywords.some(kw => hostname.includes(kw) && !isEstablished && !communityLabel);
530
+ if (suspiciousPattern) {
531
+ score = Math.min(score, 19);
532
+ warnings.push('Domain contains impersonation keywords — potential phishing');
533
+ }
534
+ // ── Clamp score ───────────────────────────────────────────────────────────
535
+ score = Math.max(0, Math.min(100, score));
536
+ // ── Tier assignment ───────────────────────────────────────────────────────
537
+ let tier;
538
+ if (score >= 90)
539
+ tier = 'official';
540
+ else if (score >= 60)
541
+ tier = 'established';
542
+ else if (score >= 40)
543
+ tier = 'community';
544
+ else if (score >= 20)
545
+ tier = 'new';
546
+ else
547
+ tier = 'suspicious';
548
+ // ── Label generation ──────────────────────────────────────────────────────
549
+ let label;
550
+ if (communityLabel) {
551
+ label = communityLabel;
552
+ }
553
+ else if (isEstablished) {
554
+ label = DOMAIN_CATEGORY[hostname] ?? DOMAIN_CATEGORY[sld] ?? labelFromTier(tier, hostname, tld);
555
+ }
556
+ else {
557
+ label = labelFromTier(tier, hostname, tld);
558
+ }
559
+ return { tier, score, label, signals, warnings };
560
+ }
561
+ // ---------------------------------------------------------------------------
562
+ // Generate a useful fallback label based on tier + domain context
563
+ // ---------------------------------------------------------------------------
564
+ function labelFromTier(tier, _hostname, tld) {
565
+ switch (tier) {
566
+ case 'official':
567
+ return 'Official Source';
568
+ case 'established':
569
+ return tld === '.org' ? 'Established Organization' :
570
+ tld === '.net' ? 'Established Network Service' :
571
+ tld === '.io' ? 'Established Tech Service' :
572
+ 'Established Website';
573
+ case 'community':
574
+ return 'Community or Independent Website';
575
+ case 'new':
576
+ return 'Small or Recently Established Website';
577
+ case 'suspicious':
578
+ return SUSPICIOUS_TLDS.has(tld)
579
+ ? `Free Domain TLD (${tld}) — Exercise Caution`
580
+ : 'Unrecognized Domain — Exercise Caution';
581
+ default:
582
+ return 'Unknown Domain — Limited Verification Available';
82
583
  }
83
584
  }
@@ -212,16 +212,16 @@ export function createSearchRouter(authStore) {
212
212
  }
213
213
  }
214
214
  // Add credibility scores and sort by trustworthiness
215
- const tierOrder = { official: 0, verified: 1, general: 2 };
215
+ const tierOrder = { official: 0, established: 1, community: 2, new: 3, suspicious: 4 };
216
216
  results = results
217
217
  .map(r => {
218
218
  const cred = getSourceCredibility(r.url);
219
219
  return { ...r, credibility: cred };
220
220
  })
221
221
  .sort((a, b) => {
222
- const aTier = tierOrder[a.credibility?.tier || 'general'] ?? 2;
223
- const bTier = tierOrder[b.credibility?.tier || 'general'] ?? 2;
224
- return aTier - bTier; // Official first, then verified, then general
222
+ const aTier = tierOrder[a.credibility?.tier || 'new'] ?? 3;
223
+ const bTier = tierOrder[b.credibility?.tier || 'new'] ?? 3;
224
+ return aTier - bTier; // Official first, then established, community, new, suspicious
225
225
  })
226
226
  .map((r, i) => ({ ...r, rank: i + 1 }));
227
227
  data.web = results;
package/dist/types.d.ts CHANGED
@@ -343,9 +343,11 @@ export interface PeelResult {
343
343
  trust?: {
344
344
  /** Source credibility tier */
345
345
  source: {
346
- tier: 'official' | 'verified' | 'general';
347
- stars: number;
346
+ tier: 'official' | 'established' | 'community' | 'new' | 'suspicious';
347
+ score: number;
348
348
  label: string;
349
+ signals?: string[];
350
+ warnings?: string[];
349
351
  };
350
352
  /** Prompt injection scan result */
351
353
  contentSafety: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.59",
3
+ "version": "0.21.60",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",