apptvty 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,9 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
6
8
  var __export = (target, all) => {
7
9
  for (var name in all)
@@ -15,6 +17,14 @@ var __copyProps = (to, from, except, desc) => {
15
17
  }
16
18
  return to;
17
19
  };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
18
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
29
 
20
30
  // src/middleware/nextjs.ts
@@ -1091,61 +1101,58 @@ function getDashboardHtml(config) {
1091
1101
  `;
1092
1102
  }
1093
1103
 
1094
- // src/ad-injection.ts
1095
- var AD_INJECTION_MARKER = "<!-- apptvty-sponsored -->";
1096
- function injectIntoHtml(html, ads, isScraperService) {
1097
- if (!html || ads.length === 0) return html;
1098
- if (html.includes(AD_INJECTION_MARKER)) return html;
1099
- let modified = html;
1100
- const contentBlock = buildContentStreamBlock(ads);
1101
- if (modified.includes("</article>")) {
1102
- modified = modified.replace("</article>", `${contentBlock}
1103
- </article>`);
1104
- } else if (modified.includes("</main>")) {
1105
- modified = modified.replace("</main>", `${contentBlock}
1106
- </main>`);
1107
- } else if (!isScraperService && modified.includes("</body>")) {
1108
- modified = modified.replace("</body>", `${contentBlock}
1109
- </body>`);
1110
- }
1111
- if (!isScraperService && modified.includes("</head>")) {
1112
- const jsonLdBlock = buildJsonLdBlock(ads);
1113
- modified = modified.replace("</head>", `${jsonLdBlock}
1114
- </head>`);
1104
+ // src/markdown.ts
1105
+ var cheerio = __toESM(require("cheerio"));
1106
+ function convertHtmlToMarkdown(html) {
1107
+ if (!html) return "";
1108
+ const $ = cheerio.load(html);
1109
+ $("script, style, nav, footer, header, aside, svg, .ad, .sponsor, noscript").remove();
1110
+ const main = $('main, article, [role="main"], #content, .content').first();
1111
+ const root = main.length ? main : $("body");
1112
+ let markdown = "";
1113
+ root.find("h1, h2, h3, h4, h5, h6, p, ul, ol").each((_, el) => {
1114
+ const $el = $(el);
1115
+ const tagName = el.tagName.toLowerCase();
1116
+ if (tagName === "ul" || tagName === "ol") {
1117
+ $el.find("li").each((_2, li) => {
1118
+ const text2 = cleanText($(li).text());
1119
+ if (text2) markdown += `- ${text2}
1120
+ `;
1121
+ });
1122
+ markdown += "\n";
1123
+ return;
1124
+ }
1125
+ const text = cleanText($el.text());
1126
+ if (!text) return;
1127
+ if (tagName === "h1") markdown += `# ${text}
1128
+
1129
+ `;
1130
+ else if (tagName === "h2") markdown += `## ${text}
1131
+
1132
+ `;
1133
+ else if (tagName === "h3") markdown += `### ${text}
1134
+
1135
+ `;
1136
+ else if (tagName === "h4") markdown += `#### ${text}
1137
+
1138
+ `;
1139
+ else if (tagName === "h5") markdown += `##### ${text}
1140
+
1141
+ `;
1142
+ else if (tagName === "h6") markdown += `###### ${text}
1143
+
1144
+ `;
1145
+ else if (tagName === "p") markdown += `${text}
1146
+
1147
+ `;
1148
+ });
1149
+ if (markdown.trim().length < 50) {
1150
+ markdown = cleanText(root.text()) + "\n\n";
1115
1151
  }
1116
- return modified;
1117
- }
1118
- function buildSponsoredHeader(ads) {
1119
- return JSON.stringify(
1120
- ads.map((ad) => ({ text: ad.text, url: ad.url, advertiser: ad.advertiser }))
1121
- );
1122
- }
1123
- function buildContentStreamBlock(ads) {
1124
- const paragraphs = ads.map(
1125
- (ad) => `<p data-apptvty-sponsored="${escapeAttr(ad.impression_id)}"><strong>[Sponsored]</strong> <a href="${escapeAttr(ad.url)}" rel="sponsored noopener">${escapeHtml(ad.text)}</a> \u2014 <span>${escapeHtml(ad.advertiser)}</span></p>`
1126
- ).join("\n");
1127
- return `${AD_INJECTION_MARKER}
1128
- ${paragraphs}`;
1129
- }
1130
- function buildJsonLdBlock(ads) {
1131
- const entries = ads.map((ad) => ({
1132
- "@context": "https://schema.org",
1133
- "@type": "WPAdBlock",
1134
- sponsor: {
1135
- "@type": "Organization",
1136
- name: ad.advertiser,
1137
- url: ad.url
1138
- },
1139
- description: ad.text
1140
- }));
1141
- const ld = entries.length === 1 ? entries[0] : entries;
1142
- return `<script type="application/ld+json">${JSON.stringify(ld)}</script>`;
1152
+ return markdown.trim();
1143
1153
  }
1144
- function escapeHtml(s) {
1145
- return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
1146
- }
1147
- function escapeAttr(s) {
1148
- return s.replace(/"/g, "&quot;").replace(/'/g, "&#39;");
1154
+ function cleanText(text) {
1155
+ return text.trim().replace(/\s+/g, " ");
1149
1156
  }
1150
1157
 
1151
1158
  // src/middleware/nextjs.ts
@@ -1223,27 +1230,53 @@ function withApptvty(config, next) {
1223
1230
  confidence_score: crawlerInfo.confidence,
1224
1231
  scraper_service: scraperService.name
1225
1232
  };
1226
- logger.enqueue(entry);
1227
- if (event && typeof event.waitUntil === "function") {
1228
- event.waitUntil(logger.flush());
1233
+ const isInternalRequest = request.headers.get("x-apptvty-internal") === "true";
1234
+ if (!isInternalRequest && !pathname.startsWith(queryPath)) {
1235
+ logger.enqueue(entry);
1236
+ if (event && typeof event.waitUntil === "function") {
1237
+ event.waitUntil(logger.flush());
1238
+ }
1229
1239
  }
1230
- if (isCrawler && response.ok && !pathname.startsWith(queryPath)) {
1231
- const contentType = response.headers.get("content-type") ?? "";
1232
- if (contentType.includes("text/html")) {
1233
- try {
1234
- const modified = await injectAdsIntoResponse(
1235
- response,
1236
- client,
1237
- config,
1238
- pathname,
1239
- userAgent,
1240
- getClientIp(headers),
1241
- scraperService.isScraperService
1242
- );
1243
- if (modified) return modified;
1244
- } catch (err) {
1245
- if (config.debug) console.warn("[apptvty] Ad injection failed:", err);
1240
+ if (isCrawler && !isInternalRequest && !pathname.startsWith(queryPath)) {
1241
+ try {
1242
+ const proxyReq = new Request(request.url, {
1243
+ headers: new Headers(request.headers)
1244
+ });
1245
+ proxyReq.headers.set("x-apptvty-internal", "true");
1246
+ const res = await fetch(proxyReq);
1247
+ const contentType = res.headers.get("content-type") ?? "";
1248
+ if (contentType.includes("text/html")) {
1249
+ const html = await res.text();
1250
+ let markdown = convertHtmlToMarkdown(html);
1251
+ const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
1252
+ if (pageAds.ads && pageAds.ads.length > 0) {
1253
+ const ad = pageAds.ads[0];
1254
+ markdown += `
1255
+
1256
+ ---
1257
+ > **Sponsored:** [${ad.text}](${ad.url}) - ${ad.advertiser}
1258
+ `;
1259
+ client.logImpression({
1260
+ impression_id: ad.impression_id,
1261
+ site_id: config.siteId,
1262
+ page_path: pathname,
1263
+ agent_ua: userAgent,
1264
+ agent_ip: getClientIp(headers),
1265
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1266
+ }).catch(() => {
1267
+ });
1268
+ }
1269
+ return new import_server.NextResponse(markdown, {
1270
+ status: res.status,
1271
+ headers: {
1272
+ "Content-Type": "text/markdown",
1273
+ "X-Apptvty-AEO": "true"
1274
+ }
1275
+ });
1246
1276
  }
1277
+ return res;
1278
+ } catch (err) {
1279
+ if (config.debug) console.warn("[apptvty] Markdown proxy failed:", err);
1247
1280
  }
1248
1281
  }
1249
1282
  return response;
@@ -1298,33 +1331,6 @@ function createNextjsDashboardHandler(config) {
1298
1331
  });
1299
1332
  };
1300
1333
  }
1301
- async function injectAdsIntoResponse(response, client, config, pathname, userAgent, ipAddress, isScraperService) {
1302
- const html = await response.text();
1303
- if (!html) return null;
1304
- const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
1305
- if (!pageAds.ads || pageAds.ads.length === 0) return null;
1306
- const modified = injectIntoHtml(html, pageAds.ads, isScraperService);
1307
- if (modified === html) return null;
1308
- const newHeaders = new Headers(response.headers);
1309
- newHeaders.set("X-Sponsored-Content", buildSponsoredHeader(pageAds.ads));
1310
- const timestamp = (/* @__PURE__ */ new Date()).toISOString();
1311
- for (const ad of pageAds.ads) {
1312
- client.logImpression({
1313
- impression_id: ad.impression_id,
1314
- site_id: config.siteId,
1315
- page_path: pathname,
1316
- agent_ua: userAgent,
1317
- agent_ip: ipAddress,
1318
- timestamp
1319
- }).catch(() => {
1320
- });
1321
- }
1322
- return new import_server.NextResponse(modified, {
1323
- status: response.status,
1324
- statusText: response.statusText,
1325
- headers: newHeaders
1326
- });
1327
- }
1328
1334
  function parseBoolParam(value, defaultValue) {
1329
1335
  if (value === null) return defaultValue;
1330
1336
  return value === "1" || value === "true" || value === "yes";