apptvty 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -887,7 +887,8 @@ function withApptvty(config, next) {
887
887
  const crawlerInfo = detectCrawler(userAgent);
888
888
  const scraperService = detectScraperService(userAgent);
889
889
  const aiCrawlerParam = parseBoolParam(request.nextUrl.searchParams.get("ai_crawler"), false);
890
- const isCrawler = crawlerInfo.isAi || aiCrawlerParam || scraperService.isScraperService;
890
+ const isAi = crawlerInfo.isAi || aiCrawlerParam;
891
+ const isScraper = isAi || scraperService.isScraperService || crawlerInfo.name === "unknown_bot";
891
892
  if (request.nextUrl.pathname === "/api/apptvty/verify") {
892
893
  const challenge = request.nextUrl.searchParams.get("challenge");
893
894
  if (challenge) {
@@ -932,59 +933,87 @@ function withApptvty(config, next) {
932
933
  ip_address: getClientIp(headers),
933
934
  user_agent: userAgent,
934
935
  referrer: request.headers.get("referer"),
935
- is_ai_crawler: crawlerInfo.isAi,
936
+ is_ai_crawler: isAi,
936
937
  crawler_type: crawlerInfo.name,
937
938
  crawler_organization: crawlerInfo.organization,
938
939
  confidence_score: crawlerInfo.confidence,
939
- scraper_service: scraperService.name
940
+ scraper_service: scraperService.name,
941
+ attribution_id: request.nextUrl.searchParams.get("atid")
940
942
  };
941
- const isInternalRequest = request.headers.get("x-apptvty-internal") === "true";
942
- if (!isInternalRequest && !pathname.startsWith(queryPath)) {
943
+ const isInternalRequest2 = request.headers.get("x-apptvty-internal") === "true";
944
+ if (!isInternalRequest2 && !pathname.startsWith(queryPath)) {
943
945
  logger.enqueue(entry);
944
946
  if (event && typeof event.waitUntil === "function") {
945
947
  event.waitUntil(logger.flush());
946
948
  }
947
949
  }
948
- if (isCrawler && !isInternalRequest && !pathname.startsWith(queryPath)) {
950
+ if (!isInternalRequest2 && !pathname.startsWith(queryPath) && response.status === 200) {
949
951
  try {
950
- const proxyReq = new Request(request.url, {
951
- headers: new Headers(request.headers)
952
- });
953
- proxyReq.headers.set("x-apptvty-internal", "true");
954
- const res = await fetch(proxyReq);
955
- const contentType = res.headers.get("content-type") ?? "";
956
- if (contentType.includes("text/html")) {
957
- const html = await res.text();
958
- let markdown = convertHtmlToMarkdown(html);
959
- const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
960
- if (pageAds.ads && pageAds.ads.length > 0) {
961
- const ad = pageAds.ads[0];
962
- markdown += `
952
+ const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
953
+ if (pageAds.ads && pageAds.ads.length > 0) {
954
+ const ad = pageAds.ads[0];
955
+ client.logImpression({
956
+ impression_id: ad.impression_id,
957
+ site_id: config.siteId,
958
+ page_path: pathname,
959
+ agent_ua: userAgent,
960
+ agent_ip: getClientIp(headers),
961
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
962
+ }).catch(() => {
963
+ });
964
+ if (isAi || scraperService.isScraperService) {
965
+ const proxyReq = new Request(request.url, { headers: new Headers(request.headers) });
966
+ proxyReq.headers.set("x-apptvty-internal", "true");
967
+ const res = await fetch(proxyReq);
968
+ const contentType = res.headers.get("content-type") ?? "";
969
+ if (contentType.includes("text/html")) {
970
+ const html = await res.text();
971
+ let markdown = convertHtmlToMarkdown(html);
972
+ markdown += `
963
973
 
964
974
  ---
965
975
  > **Sponsored:** [${ad.text}](${ad.url}) - ${ad.advertiser}
966
976
  `;
967
- client.logImpression({
968
- impression_id: ad.impression_id,
969
- site_id: config.siteId,
970
- page_path: pathname,
971
- agent_ua: userAgent,
972
- agent_ip: getClientIp(headers),
973
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
974
- }).catch(() => {
975
- });
977
+ return new import_server.NextResponse(markdown, {
978
+ status: res.status,
979
+ headers: {
980
+ "Content-Type": "text/markdown",
981
+ "X-Apptvty-AEO": "true",
982
+ "X-Sponsored-Content": `${ad.text}; url=${ad.url}`
983
+ }
984
+ });
985
+ }
976
986
  }
977
- return new import_server.NextResponse(markdown, {
978
- status: res.status,
979
- headers: {
980
- "Content-Type": "text/markdown",
981
- "X-Apptvty-AEO": "true"
987
+ const originalHeaders = headersToRecord(response.headers);
988
+ if (originalHeaders["content-type"]?.includes("text/html")) {
989
+ const html = await response.text();
990
+ const jsonLd = `
991
+ <script type="application/ld+json">{"@context":"https://schema.org","@type":"CreativeWork","author":{"@type":"Organization","name":"${ad.advertiser}"},"mainEntityOfPage":{"@type":"WebPage","@id":"${ad.url}"},"headline":"Sponsored: ${ad.text}"}</script>
992
+ `;
993
+ const stealthDiv = `
994
+ <div style="display:none !important;visibility:hidden;height:0;width:0;overflow:hidden;" aria-hidden="true" data-apptvty-ad="${ad.impression_id}">Sponsored by ${ad.advertiser}: <a href="${ad.url}">${ad.text}</a></div>
995
+ `;
996
+ let modifiedHtml = html;
997
+ if (html.includes("</head>")) {
998
+ modifiedHtml = html.replace("</head>", `${jsonLd}</head>`);
982
999
  }
983
- });
1000
+ if (modifiedHtml.includes("</body>")) {
1001
+ modifiedHtml = modifiedHtml.replace("</body>", `${stealthDiv}</body>`);
1002
+ } else {
1003
+ modifiedHtml += stealthDiv;
1004
+ }
1005
+ return new import_server.NextResponse(modifiedHtml, {
1006
+ status: response.status,
1007
+ headers: {
1008
+ ...originalHeaders,
1009
+ "X-Sponsored-Content": `${ad.text}; url=${ad.url}`
1010
+ }
1011
+ });
1012
+ }
1013
+ response.headers.set("X-Sponsored-Content", `${ad.text}; url=${ad.url}`);
984
1014
  }
985
- return res;
986
1015
  } catch (err) {
987
- if (config.debug) console.warn("[apptvty] Markdown proxy failed:", err);
1016
+ if (config.debug) console.warn("[apptvty] Stealth injection failed:", err);
988
1017
  }
989
1018
  }
990
1019
  return response;
@@ -1100,13 +1129,18 @@ function createExpressMiddleware(config) {
1100
1129
  const crawlerInfo = detectCrawler(userAgent);
1101
1130
  const scraperService = detectScraperService(userAgent);
1102
1131
  const path = req.url ?? "/";
1103
- const isCrawler = crawlerInfo.isAi || scraperService.isScraperService;
1104
1132
  const ipAddress = getClientIp(req.headers);
1105
- const adsPromise = isCrawler && !shouldSkip2(path) ? client.getAdsForPage({ site_id: config.siteId, page_path: path }).catch(() => ({ ads: [] })) : Promise.resolve({ ads: [] });
1106
- if (isCrawler && !shouldSkip2(path)) {
1107
- const chunks = [];
1108
- const originalWrite = res.write.bind(res);
1109
- const originalEnd = res.end.bind(res);
1133
+ const urlObj = new URL(path, `http://${req.headers.host ?? "localhost"}`);
1134
+ const aiCrawlerParam = parseBoolParam2(urlObj.searchParams.get("ai_crawler"), false);
1135
+ const attributionId = urlObj.searchParams.get("atid");
1136
+ const isAi = crawlerInfo.isAi || aiCrawlerParam;
1137
+ const isScraper = isAi || scraperService.isScraperService || crawlerInfo.name === "unknown_bot";
1138
+ const adsPromise = !isInternalRequest(req) && !shouldSkip2(path) ? client.getAdsForPage({ site_id: config.siteId, page_path: path }).catch(() => ({ ads: [] })) : Promise.resolve({ ads: [] });
1139
+ const chunks = [];
1140
+ const originalWrite = res.write.bind(res);
1141
+ const originalEnd = res.end.bind(res);
1142
+ const shouldBuffer = !isInternalRequest(req) && !shouldSkip2(path);
1143
+ if (shouldBuffer) {
1110
1144
  res.write = function(chunk, encodingOrCallback, callback) {
1111
1145
  if (chunk != null) {
1112
1146
  chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
@@ -1121,17 +1155,12 @@ function createExpressMiddleware(config) {
1121
1155
  }
1122
1156
  const contentType = res.getHeader("content-type") ?? "";
1123
1157
  const isHtml = contentType.includes("text/html");
1124
- if (!isHtml || chunks.length === 0) {
1158
+ if (!isHtml || chunks.length === 0 || res.statusCode !== 200) {
1125
1159
  res.write = originalWrite;
1126
1160
  res.end = originalEnd;
1127
1161
  return originalEnd(Buffer.concat(chunks), encodingOrCallback, callback);
1128
1162
  }
1129
1163
  const html = Buffer.concat(chunks).toString("utf-8");
1130
- if (html.includes(AD_INJECTION_MARKER)) {
1131
- res.write = originalWrite;
1132
- res.end = originalEnd;
1133
- return originalEnd(html, encodingOrCallback, callback);
1134
- }
1135
1164
  adsPromise.then((pageAds) => {
1136
1165
  res.write = originalWrite;
1137
1166
  res.end = originalEnd;
@@ -1139,14 +1168,33 @@ function createExpressMiddleware(config) {
1139
1168
  originalEnd(html, encodingOrCallback, callback);
1140
1169
  return;
1141
1170
  }
1142
- const modified = injectIntoHtml(html, pageAds.ads, scraperService.isScraperService);
1171
+ const ad = pageAds.ads[0];
1172
+ let modified = html;
1173
+ if (isAi || scraperService.isScraperService) {
1174
+ modified = injectIntoHtml(html, pageAds.ads, scraperService.isScraperService);
1175
+ } else {
1176
+ const jsonLd = `
1177
+ <script type="application/ld+json">{"@context":"https://schema.org","@type":"CreativeWork","author":{"@type":"Organization","name":"${ad.advertiser}"},"mainEntityOfPage":{"@type":"WebPage","@id":"${ad.url}"},"headline":"Sponsored: ${ad.text}"}</script>
1178
+ `;
1179
+ const stealthDiv = `
1180
+ <div style="display:none !important;visibility:hidden;height:0;width:0;overflow:hidden;" aria-hidden="true" data-apptvty-ad="${ad.impression_id}">Sponsored by ${ad.advertiser}: <a href="${ad.url}">${ad.text}</a></div>
1181
+ `;
1182
+ if (html.includes("</head>")) {
1183
+ modified = html.replace("</head>", `${jsonLd}</head>`);
1184
+ }
1185
+ if (modified.includes("</body>")) {
1186
+ modified = modified.replace("</body>", `${stealthDiv}</body>`);
1187
+ } else {
1188
+ modified += stealthDiv;
1189
+ }
1190
+ }
1143
1191
  res.setHeader("X-Sponsored-Content", buildSponsoredHeader(pageAds.ads));
1144
1192
  const buf = Buffer.from(modified, "utf-8");
1145
1193
  res.setHeader("Content-Length", buf.length);
1146
1194
  const timestamp = (/* @__PURE__ */ new Date()).toISOString();
1147
- for (const ad of pageAds.ads) {
1195
+ for (const adItem of pageAds.ads) {
1148
1196
  client.logImpression({
1149
- impression_id: ad.impression_id,
1197
+ impression_id: adItem.impression_id,
1150
1198
  site_id: config.siteId,
1151
1199
  page_path: path,
1152
1200
  agent_ua: userAgent,
@@ -1165,7 +1213,7 @@ function createExpressMiddleware(config) {
1165
1213
  };
1166
1214
  }
1167
1215
  res.on("finish", () => {
1168
- if (shouldSkip2(path)) return;
1216
+ if (shouldSkip2(path) || isInternalRequest(req)) return;
1169
1217
  const entry = {
1170
1218
  site_id: config.siteId,
1171
1219
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -1176,17 +1224,21 @@ function createExpressMiddleware(config) {
1176
1224
  ip_address: ipAddress,
1177
1225
  user_agent: userAgent,
1178
1226
  referrer: req.headers["referer"] ?? null,
1179
- is_ai_crawler: crawlerInfo.isAi,
1227
+ is_ai_crawler: isAi,
1180
1228
  crawler_type: crawlerInfo.name,
1181
1229
  crawler_organization: crawlerInfo.organization,
1182
1230
  confidence_score: crawlerInfo.confidence,
1183
- scraper_service: scraperService.name
1231
+ scraper_service: scraperService.name,
1232
+ attribution_id: attributionId
1184
1233
  };
1185
1234
  logger.enqueue(entry);
1186
1235
  });
1187
1236
  next();
1188
1237
  };
1189
1238
  }
1239
+ function isInternalRequest(req) {
1240
+ return req.headers["x-apptvty-internal"] === "true";
1241
+ }
1190
1242
  function createExpressQueryHandler(config) {
1191
1243
  const { client } = getInstance2(config);
1192
1244
  const handleQuery = createQueryHandler(client, config);