apptvty 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1192,13 +1192,18 @@ function createExpressMiddleware(config) {
1192
1192
  const crawlerInfo = detectCrawler(userAgent);
1193
1193
  const scraperService = detectScraperService(userAgent);
1194
1194
  const path = req.url ?? "/";
1195
- const isCrawler = crawlerInfo.isAi || scraperService.isScraperService;
1196
1195
  const ipAddress = getClientIp(req.headers);
1197
- const adsPromise = isCrawler && !shouldSkip(path) ? client.getAdsForPage({ site_id: config.siteId, page_path: path }).catch(() => ({ ads: [] })) : Promise.resolve({ ads: [] });
1198
- if (isCrawler && !shouldSkip(path)) {
1199
- const chunks = [];
1200
- const originalWrite = res.write.bind(res);
1201
- const originalEnd = res.end.bind(res);
1196
+ const urlObj = new URL(path, `http://${req.headers.host ?? "localhost"}`);
1197
+ const aiCrawlerParam = parseBoolParam(urlObj.searchParams.get("ai_crawler"), false);
1198
+ const attributionId = urlObj.searchParams.get("atid");
1199
+ const isAi = crawlerInfo.isAi || aiCrawlerParam;
1200
+ const isScraper = isAi || scraperService.isScraperService || crawlerInfo.name === "unknown_bot";
1201
+ const adsPromise = !isInternalRequest(req) && !shouldSkip(path) ? client.getAdsForPage({ site_id: config.siteId, page_path: path }).catch(() => ({ ads: [] })) : Promise.resolve({ ads: [] });
1202
+ const chunks = [];
1203
+ const originalWrite = res.write.bind(res);
1204
+ const originalEnd = res.end.bind(res);
1205
+ const shouldBuffer = !isInternalRequest(req) && !shouldSkip(path);
1206
+ if (shouldBuffer) {
1202
1207
  res.write = function(chunk, encodingOrCallback, callback) {
1203
1208
  if (chunk != null) {
1204
1209
  chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
@@ -1213,17 +1218,12 @@ function createExpressMiddleware(config) {
1213
1218
  }
1214
1219
  const contentType = res.getHeader("content-type") ?? "";
1215
1220
  const isHtml = contentType.includes("text/html");
1216
- if (!isHtml || chunks.length === 0) {
1221
+ if (!isHtml || chunks.length === 0 || res.statusCode !== 200) {
1217
1222
  res.write = originalWrite;
1218
1223
  res.end = originalEnd;
1219
1224
  return originalEnd(Buffer.concat(chunks), encodingOrCallback, callback);
1220
1225
  }
1221
1226
  const html = Buffer.concat(chunks).toString("utf-8");
1222
- if (html.includes(AD_INJECTION_MARKER)) {
1223
- res.write = originalWrite;
1224
- res.end = originalEnd;
1225
- return originalEnd(html, encodingOrCallback, callback);
1226
- }
1227
1227
  adsPromise.then((pageAds) => {
1228
1228
  res.write = originalWrite;
1229
1229
  res.end = originalEnd;
@@ -1231,14 +1231,33 @@ function createExpressMiddleware(config) {
1231
1231
  originalEnd(html, encodingOrCallback, callback);
1232
1232
  return;
1233
1233
  }
1234
- const modified = injectIntoHtml(html, pageAds.ads, scraperService.isScraperService);
1234
+ const ad = pageAds.ads[0];
1235
+ let modified = html;
1236
+ if (isAi || scraperService.isScraperService) {
1237
+ modified = injectIntoHtml(html, pageAds.ads, scraperService.isScraperService);
1238
+ } else {
1239
+ const jsonLd = `
1240
+ <script type="application/ld+json">{"@context":"https://schema.org","@type":"CreativeWork","author":{"@type":"Organization","name":"${ad.advertiser}"},"mainEntityOfPage":{"@type":"WebPage","@id":"${ad.url}"},"headline":"Sponsored: ${ad.text}"}</script>
1241
+ `;
1242
+ const stealthDiv = `
1243
+ <div style="display:none !important;visibility:hidden;height:0;width:0;overflow:hidden;" aria-hidden="true" data-apptvty-ad="${ad.impression_id}">Sponsored by ${ad.advertiser}: <a href="${ad.url}">${ad.text}</a></div>
1244
+ `;
1245
+ if (html.includes("</head>")) {
1246
+ modified = html.replace("</head>", `${jsonLd}</head>`);
1247
+ }
1248
+ if (modified.includes("</body>")) {
1249
+ modified = modified.replace("</body>", `${stealthDiv}</body>`);
1250
+ } else {
1251
+ modified += stealthDiv;
1252
+ }
1253
+ }
1235
1254
  res.setHeader("X-Sponsored-Content", buildSponsoredHeader(pageAds.ads));
1236
1255
  const buf = Buffer.from(modified, "utf-8");
1237
1256
  res.setHeader("Content-Length", buf.length);
1238
1257
  const timestamp = (/* @__PURE__ */ new Date()).toISOString();
1239
- for (const ad of pageAds.ads) {
1258
+ for (const adItem of pageAds.ads) {
1240
1259
  client.logImpression({
1241
- impression_id: ad.impression_id,
1260
+ impression_id: adItem.impression_id,
1242
1261
  site_id: config.siteId,
1243
1262
  page_path: path,
1244
1263
  agent_ua: userAgent,
@@ -1257,7 +1276,7 @@ function createExpressMiddleware(config) {
1257
1276
  };
1258
1277
  }
1259
1278
  res.on("finish", () => {
1260
- if (shouldSkip(path)) return;
1279
+ if (shouldSkip(path) || isInternalRequest(req)) return;
1261
1280
  const entry = {
1262
1281
  site_id: config.siteId,
1263
1282
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -1268,17 +1287,21 @@ function createExpressMiddleware(config) {
1268
1287
  ip_address: ipAddress,
1269
1288
  user_agent: userAgent,
1270
1289
  referrer: req.headers["referer"] ?? null,
1271
- is_ai_crawler: crawlerInfo.isAi,
1290
+ is_ai_crawler: isAi,
1272
1291
  crawler_type: crawlerInfo.name,
1273
1292
  crawler_organization: crawlerInfo.organization,
1274
1293
  confidence_score: crawlerInfo.confidence,
1275
- scraper_service: scraperService.name
1294
+ scraper_service: scraperService.name,
1295
+ attribution_id: attributionId
1276
1296
  };
1277
1297
  logger.enqueue(entry);
1278
1298
  });
1279
1299
  next();
1280
1300
  };
1281
1301
  }
1302
+ function isInternalRequest(req) {
1303
+ return req.headers["x-apptvty-internal"] === "true";
1304
+ }
1282
1305
  function createExpressQueryHandler(config) {
1283
1306
  const { client } = getInstance(config);
1284
1307
  const handleQuery = createQueryHandler(client, config);