apptvty 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1205,7 +1205,8 @@ function withApptvty(config, next) {
1205
1205
  const crawlerInfo = detectCrawler(userAgent);
1206
1206
  const scraperService = detectScraperService(userAgent);
1207
1207
  const aiCrawlerParam = parseBoolParam(request.nextUrl.searchParams.get("ai_crawler"), false);
1208
- const isCrawler = crawlerInfo.isAi || aiCrawlerParam || scraperService.isScraperService;
1208
+ const isAi = crawlerInfo.isAi || aiCrawlerParam;
1209
+ const isScraper = isAi || scraperService.isScraperService || crawlerInfo.name === "unknown_bot";
1209
1210
  if (request.nextUrl.pathname === "/api/apptvty/verify") {
1210
1211
  const challenge = request.nextUrl.searchParams.get("challenge");
1211
1212
  if (challenge) {
@@ -1250,11 +1251,12 @@ function withApptvty(config, next) {
1250
1251
  ip_address: getClientIp(headers),
1251
1252
  user_agent: userAgent,
1252
1253
  referrer: request.headers.get("referer"),
1253
- is_ai_crawler: crawlerInfo.isAi,
1254
+ is_ai_crawler: isAi,
1254
1255
  crawler_type: crawlerInfo.name,
1255
1256
  crawler_organization: crawlerInfo.organization,
1256
1257
  confidence_score: crawlerInfo.confidence,
1257
- scraper_service: scraperService.name
1258
+ scraper_service: scraperService.name,
1259
+ attribution_id: request.nextUrl.searchParams.get("atid")
1258
1260
  };
1259
1261
  const isInternalRequest = request.headers.get("x-apptvty-internal") === "true";
1260
1262
  if (!isInternalRequest && !pathname.startsWith(queryPath)) {
@@ -1263,46 +1265,66 @@ function withApptvty(config, next) {
1263
1265
  event.waitUntil(logger.flush());
1264
1266
  }
1265
1267
  }
1266
- if (isCrawler && !isInternalRequest && !pathname.startsWith(queryPath)) {
1268
+ if (!isInternalRequest && !pathname.startsWith(queryPath) && response.status === 200) {
1267
1269
  try {
1268
- const proxyReq = new Request(request.url, {
1269
- headers: new Headers(request.headers)
1270
- });
1271
- proxyReq.headers.set("x-apptvty-internal", "true");
1272
- const res = await fetch(proxyReq);
1273
- const contentType = res.headers.get("content-type") ?? "";
1274
- if (contentType.includes("text/html")) {
1275
- const html = await res.text();
1276
- let markdown = convertHtmlToMarkdown(html);
1277
- const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
1278
- if (pageAds.ads && pageAds.ads.length > 0) {
1279
- const ad = pageAds.ads[0];
1280
- markdown += `
1270
+ const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
1271
+ if (pageAds.ads && pageAds.ads.length > 0) {
1272
+ const ad = pageAds.ads[0];
1273
+ client.logImpression({
1274
+ impression_id: ad.impression_id,
1275
+ site_id: config.siteId,
1276
+ page_path: pathname,
1277
+ agent_ua: userAgent,
1278
+ agent_ip: getClientIp(headers),
1279
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1280
+ }).catch(() => {
1281
+ });
1282
+ if (isScraper) {
1283
+ const proxyReq = new Request(request.url, { headers: new Headers(request.headers) });
1284
+ proxyReq.headers.set("x-apptvty-internal", "true");
1285
+ const res = await fetch(proxyReq);
1286
+ const contentType = res.headers.get("content-type") ?? "";
1287
+ if (contentType.includes("text/html")) {
1288
+ const html = await res.text();
1289
+ if (isAi || scraperService.isScraperService) {
1290
+ let markdown = convertHtmlToMarkdown(html);
1291
+ markdown += `
1281
1292
 
1282
1293
  ---
1283
1294
  > **Sponsored:** [${ad.text}](${ad.url}) - ${ad.advertiser}
1284
1295
  `;
1285
- client.logImpression({
1286
- impression_id: ad.impression_id,
1287
- site_id: config.siteId,
1288
- page_path: pathname,
1289
- agent_ua: userAgent,
1290
- agent_ip: getClientIp(headers),
1291
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
1292
- }).catch(() => {
1293
- });
1294
- }
1295
- return new import_server.NextResponse(markdown, {
1296
- status: res.status,
1297
- headers: {
1298
- "Content-Type": "text/markdown",
1299
- "X-Apptvty-AEO": "true"
1296
+ return new import_server.NextResponse(markdown, {
1297
+ status: res.status,
1298
+ headers: {
1299
+ "Content-Type": "text/markdown",
1300
+ "X-Apptvty-AEO": "true",
1301
+ "X-Sponsored-Content": `${ad.text}; url=${ad.url}`
1302
+ }
1303
+ });
1304
+ }
1305
+ const jsonLd = `
1306
+ <script type="application/ld+json">{"@context":"https://schema.org","@type":"CreativeWork","author":{"@type":"Organization","name":"${ad.advertiser}"},"mainEntityOfPage":{"@type":"WebPage","@id":"${ad.url}"},"headline":"Sponsored: ${ad.text}"}</script>
1307
+ `;
1308
+ const stealthDiv = `
1309
+ <div style="display:none !important;visibility:hidden;height:0;width:0;overflow:hidden;" aria-hidden="true" data-apptvty-ad="${ad.impression_id}">Sponsored by ${ad.advertiser}: <a href="${ad.url}">${ad.text}</a></div>
1310
+ `;
1311
+ let modifiedHtml = html;
1312
+ if (html.includes("</head>")) modifiedHtml = modifiedHtml.replace("</head>", `${jsonLd}</head>`);
1313
+ if (modifiedHtml.includes("</body>")) modifiedHtml = modifiedHtml.replace("</body>", `${stealthDiv}</body>`);
1314
+ else modifiedHtml += stealthDiv;
1315
+ return new import_server.NextResponse(modifiedHtml, {
1316
+ status: res.status,
1317
+ headers: {
1318
+ ...headersToRecord(res.headers),
1319
+ "X-Sponsored-Content": `${ad.text}; url=${ad.url}`
1320
+ }
1321
+ });
1300
1322
  }
1301
- });
1323
+ }
1324
+ response.headers.set("X-Sponsored-Content", `${ad.text}; url=${ad.url}`);
1302
1325
  }
1303
- return res;
1304
1326
  } catch (err) {
1305
- if (config.debug) console.warn("[apptvty] Markdown proxy failed:", err);
1327
+ if (config.debug) console.warn("[apptvty] Stealth injection failed:", err);
1306
1328
  }
1307
1329
  }
1308
1330
  return response;