apptvty 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -887,7 +887,8 @@ function withApptvty(config, next) {
887
887
  const crawlerInfo = detectCrawler(userAgent);
888
888
  const scraperService = detectScraperService(userAgent);
889
889
  const aiCrawlerParam = parseBoolParam(request.nextUrl.searchParams.get("ai_crawler"), false);
890
- const isCrawler = crawlerInfo.isAi || aiCrawlerParam || scraperService.isScraperService;
890
+ const isAi = crawlerInfo.isAi || aiCrawlerParam;
891
+ const isScraper = isAi || scraperService.isScraperService || crawlerInfo.name === "unknown_bot";
891
892
  if (request.nextUrl.pathname === "/api/apptvty/verify") {
892
893
  const challenge = request.nextUrl.searchParams.get("challenge");
893
894
  if (challenge) {
@@ -932,59 +933,80 @@ function withApptvty(config, next) {
932
933
  ip_address: getClientIp(headers),
933
934
  user_agent: userAgent,
934
935
  referrer: request.headers.get("referer"),
935
- is_ai_crawler: crawlerInfo.isAi,
936
+ is_ai_crawler: isAi,
936
937
  crawler_type: crawlerInfo.name,
937
938
  crawler_organization: crawlerInfo.organization,
938
939
  confidence_score: crawlerInfo.confidence,
939
- scraper_service: scraperService.name
940
+ scraper_service: scraperService.name,
941
+ attribution_id: request.nextUrl.searchParams.get("atid")
940
942
  };
941
- const isInternalRequest = request.headers.get("x-apptvty-internal") === "true";
942
- if (!isInternalRequest && !pathname.startsWith(queryPath)) {
943
+ const isInternalRequest2 = request.headers.get("x-apptvty-internal") === "true";
944
+ if (!isInternalRequest2 && !pathname.startsWith(queryPath)) {
943
945
  logger.enqueue(entry);
944
946
  if (event && typeof event.waitUntil === "function") {
945
947
  event.waitUntil(logger.flush());
946
948
  }
947
949
  }
948
- if (isCrawler && !isInternalRequest && !pathname.startsWith(queryPath)) {
950
+ if (!isInternalRequest2 && !pathname.startsWith(queryPath) && response.status === 200) {
949
951
  try {
950
- const proxyReq = new Request(request.url, {
951
- headers: new Headers(request.headers)
952
- });
953
- proxyReq.headers.set("x-apptvty-internal", "true");
954
- const res = await fetch(proxyReq);
955
- const contentType = res.headers.get("content-type") ?? "";
956
- if (contentType.includes("text/html")) {
957
- const html = await res.text();
958
- let markdown = convertHtmlToMarkdown(html);
959
- const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
960
- if (pageAds.ads && pageAds.ads.length > 0) {
961
- const ad = pageAds.ads[0];
962
- markdown += `
952
+ const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
953
+ if (pageAds.ads && pageAds.ads.length > 0) {
954
+ const ad = pageAds.ads[0];
955
+ client.logImpression({
956
+ impression_id: ad.impression_id,
957
+ site_id: config.siteId,
958
+ page_path: pathname,
959
+ agent_ua: userAgent,
960
+ agent_ip: getClientIp(headers),
961
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
962
+ }).catch(() => {
963
+ });
964
+ if (isScraper) {
965
+ const proxyReq = new Request(request.url, { headers: new Headers(request.headers) });
966
+ proxyReq.headers.set("x-apptvty-internal", "true");
967
+ const res = await fetch(proxyReq);
968
+ const contentType = res.headers.get("content-type") ?? "";
969
+ if (contentType.includes("text/html")) {
970
+ const html = await res.text();
971
+ if (isAi || scraperService.isScraperService) {
972
+ let markdown = convertHtmlToMarkdown(html);
973
+ markdown += `
963
974
 
964
975
  ---
965
976
  > **Sponsored:** [${ad.text}](${ad.url}) - ${ad.advertiser}
966
977
  `;
967
- client.logImpression({
968
- impression_id: ad.impression_id,
969
- site_id: config.siteId,
970
- page_path: pathname,
971
- agent_ua: userAgent,
972
- agent_ip: getClientIp(headers),
973
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
974
- }).catch(() => {
975
- });
976
- }
977
- return new import_server.NextResponse(markdown, {
978
- status: res.status,
979
- headers: {
980
- "Content-Type": "text/markdown",
981
- "X-Apptvty-AEO": "true"
978
+ return new import_server.NextResponse(markdown, {
979
+ status: res.status,
980
+ headers: {
981
+ "Content-Type": "text/markdown",
982
+ "X-Apptvty-AEO": "true",
983
+ "X-Sponsored-Content": `${ad.text}; url=${ad.url}`
984
+ }
985
+ });
986
+ }
987
+ const jsonLd = `
988
+ <script type="application/ld+json">{"@context":"https://schema.org","@type":"CreativeWork","author":{"@type":"Organization","name":"${ad.advertiser}"},"mainEntityOfPage":{"@type":"WebPage","@id":"${ad.url}"},"headline":"Sponsored: ${ad.text}"}</script>
989
+ `;
990
+ const stealthDiv = `
991
+ <div style="display:none !important;visibility:hidden;height:0;width:0;overflow:hidden;" aria-hidden="true" data-apptvty-ad="${ad.impression_id}">Sponsored by ${ad.advertiser}: <a href="${ad.url}">${ad.text}</a></div>
992
+ `;
993
+ let modifiedHtml = html;
994
+ if (html.includes("</head>")) modifiedHtml = modifiedHtml.replace("</head>", `${jsonLd}</head>`);
995
+ if (modifiedHtml.includes("</body>")) modifiedHtml = modifiedHtml.replace("</body>", `${stealthDiv}</body>`);
996
+ else modifiedHtml += stealthDiv;
997
+ return new import_server.NextResponse(modifiedHtml, {
998
+ status: res.status,
999
+ headers: {
1000
+ ...headersToRecord(res.headers),
1001
+ "X-Sponsored-Content": `${ad.text}; url=${ad.url}`
1002
+ }
1003
+ });
982
1004
  }
983
- });
1005
+ }
1006
+ response.headers.set("X-Sponsored-Content", `${ad.text}; url=${ad.url}`);
984
1007
  }
985
- return res;
986
1008
  } catch (err) {
987
- if (config.debug) console.warn("[apptvty] Markdown proxy failed:", err);
1009
+ if (config.debug) console.warn("[apptvty] Stealth injection failed:", err);
988
1010
  }
989
1011
  }
990
1012
  return response;
@@ -1100,13 +1122,18 @@ function createExpressMiddleware(config) {
1100
1122
  const crawlerInfo = detectCrawler(userAgent);
1101
1123
  const scraperService = detectScraperService(userAgent);
1102
1124
  const path = req.url ?? "/";
1103
- const isCrawler = crawlerInfo.isAi || scraperService.isScraperService;
1104
1125
  const ipAddress = getClientIp(req.headers);
1105
- const adsPromise = isCrawler && !shouldSkip2(path) ? client.getAdsForPage({ site_id: config.siteId, page_path: path }).catch(() => ({ ads: [] })) : Promise.resolve({ ads: [] });
1106
- if (isCrawler && !shouldSkip2(path)) {
1107
- const chunks = [];
1108
- const originalWrite = res.write.bind(res);
1109
- const originalEnd = res.end.bind(res);
1126
+ const urlObj = new URL(path, `http://${req.headers.host ?? "localhost"}`);
1127
+ const aiCrawlerParam = parseBoolParam2(urlObj.searchParams.get("ai_crawler"), false);
1128
+ const attributionId = urlObj.searchParams.get("atid");
1129
+ const isAi = crawlerInfo.isAi || aiCrawlerParam;
1130
+ const isScraper = isAi || scraperService.isScraperService || crawlerInfo.name === "unknown_bot";
1131
+ const adsPromise = !isInternalRequest(req) && !shouldSkip2(path) ? client.getAdsForPage({ site_id: config.siteId, page_path: path }).catch(() => ({ ads: [] })) : Promise.resolve({ ads: [] });
1132
+ const chunks = [];
1133
+ const originalWrite = res.write.bind(res);
1134
+ const originalEnd = res.end.bind(res);
1135
+ const shouldBuffer = !isInternalRequest(req) && !shouldSkip2(path);
1136
+ if (shouldBuffer) {
1110
1137
  res.write = function(chunk, encodingOrCallback, callback) {
1111
1138
  if (chunk != null) {
1112
1139
  chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
@@ -1121,17 +1148,12 @@ function createExpressMiddleware(config) {
1121
1148
  }
1122
1149
  const contentType = res.getHeader("content-type") ?? "";
1123
1150
  const isHtml = contentType.includes("text/html");
1124
- if (!isHtml || chunks.length === 0) {
1151
+ if (!isHtml || chunks.length === 0 || res.statusCode !== 200) {
1125
1152
  res.write = originalWrite;
1126
1153
  res.end = originalEnd;
1127
1154
  return originalEnd(Buffer.concat(chunks), encodingOrCallback, callback);
1128
1155
  }
1129
1156
  const html = Buffer.concat(chunks).toString("utf-8");
1130
- if (html.includes(AD_INJECTION_MARKER)) {
1131
- res.write = originalWrite;
1132
- res.end = originalEnd;
1133
- return originalEnd(html, encodingOrCallback, callback);
1134
- }
1135
1157
  adsPromise.then((pageAds) => {
1136
1158
  res.write = originalWrite;
1137
1159
  res.end = originalEnd;
@@ -1139,14 +1161,33 @@ function createExpressMiddleware(config) {
1139
1161
  originalEnd(html, encodingOrCallback, callback);
1140
1162
  return;
1141
1163
  }
1142
- const modified = injectIntoHtml(html, pageAds.ads, scraperService.isScraperService);
1164
+ const ad = pageAds.ads[0];
1165
+ let modified = html;
1166
+ if (isAi || scraperService.isScraperService) {
1167
+ modified = injectIntoHtml(html, pageAds.ads, scraperService.isScraperService);
1168
+ } else {
1169
+ const jsonLd = `
1170
+ <script type="application/ld+json">{"@context":"https://schema.org","@type":"CreativeWork","author":{"@type":"Organization","name":"${ad.advertiser}"},"mainEntityOfPage":{"@type":"WebPage","@id":"${ad.url}"},"headline":"Sponsored: ${ad.text}"}</script>
1171
+ `;
1172
+ const stealthDiv = `
1173
+ <div style="display:none !important;visibility:hidden;height:0;width:0;overflow:hidden;" aria-hidden="true" data-apptvty-ad="${ad.impression_id}">Sponsored by ${ad.advertiser}: <a href="${ad.url}">${ad.text}</a></div>
1174
+ `;
1175
+ if (html.includes("</head>")) {
1176
+ modified = html.replace("</head>", `${jsonLd}</head>`);
1177
+ }
1178
+ if (modified.includes("</body>")) {
1179
+ modified = modified.replace("</body>", `${stealthDiv}</body>`);
1180
+ } else {
1181
+ modified += stealthDiv;
1182
+ }
1183
+ }
1143
1184
  res.setHeader("X-Sponsored-Content", buildSponsoredHeader(pageAds.ads));
1144
1185
  const buf = Buffer.from(modified, "utf-8");
1145
1186
  res.setHeader("Content-Length", buf.length);
1146
1187
  const timestamp = (/* @__PURE__ */ new Date()).toISOString();
1147
- for (const ad of pageAds.ads) {
1188
+ for (const adItem of pageAds.ads) {
1148
1189
  client.logImpression({
1149
- impression_id: ad.impression_id,
1190
+ impression_id: adItem.impression_id,
1150
1191
  site_id: config.siteId,
1151
1192
  page_path: path,
1152
1193
  agent_ua: userAgent,
@@ -1165,7 +1206,7 @@ function createExpressMiddleware(config) {
1165
1206
  };
1166
1207
  }
1167
1208
  res.on("finish", () => {
1168
- if (shouldSkip2(path)) return;
1209
+ if (shouldSkip2(path) || isInternalRequest(req)) return;
1169
1210
  const entry = {
1170
1211
  site_id: config.siteId,
1171
1212
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -1176,17 +1217,21 @@ function createExpressMiddleware(config) {
1176
1217
  ip_address: ipAddress,
1177
1218
  user_agent: userAgent,
1178
1219
  referrer: req.headers["referer"] ?? null,
1179
- is_ai_crawler: crawlerInfo.isAi,
1220
+ is_ai_crawler: isAi,
1180
1221
  crawler_type: crawlerInfo.name,
1181
1222
  crawler_organization: crawlerInfo.organization,
1182
1223
  confidence_score: crawlerInfo.confidence,
1183
- scraper_service: scraperService.name
1224
+ scraper_service: scraperService.name,
1225
+ attribution_id: attributionId
1184
1226
  };
1185
1227
  logger.enqueue(entry);
1186
1228
  });
1187
1229
  next();
1188
1230
  };
1189
1231
  }
1232
+ function isInternalRequest(req) {
1233
+ return req.headers["x-apptvty-internal"] === "true";
1234
+ }
1190
1235
  function createExpressQueryHandler(config) {
1191
1236
  const { client } = getInstance2(config);
1192
1237
  const handleQuery = createQueryHandler(client, config);