apptvty 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-EFCEIG74.mjs → chunk-DFSZAE6R.mjs} +60 -6
- package/dist/chunk-DFSZAE6R.mjs.map +1 -0
- package/dist/{chunk-A6GFTPJ5.mjs → chunk-EZYPAN7G.mjs} +2 -62
- package/dist/chunk-EZYPAN7G.mjs.map +1 -0
- package/dist/{chunk-4WO7W6JR.mjs → chunk-RNZYGUMF.mjs} +104 -51
- package/dist/chunk-RNZYGUMF.mjs.map +1 -0
- package/dist/index.js +162 -99
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +3 -3
- package/dist/middleware/express.mjs +2 -2
- package/dist/middleware/nextjs.js +105 -99
- package/dist/middleware/nextjs.js.map +1 -1
- package/dist/middleware/nextjs.mjs +2 -2
- package/package.json +8 -3
- package/dist/chunk-4WO7W6JR.mjs.map +0 -1
- package/dist/chunk-A6GFTPJ5.mjs.map +0 -1
- package/dist/chunk-EFCEIG74.mjs.map +0 -1
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
2
3
|
var __defProp = Object.defineProperty;
|
|
3
4
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
5
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
8
|
var __export = (target, all) => {
|
|
7
9
|
for (var name in all)
|
|
@@ -15,6 +17,14 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
15
17
|
}
|
|
16
18
|
return to;
|
|
17
19
|
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
18
28
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
29
|
|
|
20
30
|
// src/middleware/nextjs.ts
|
|
@@ -1091,61 +1101,58 @@ function getDashboardHtml(config) {
|
|
|
1091
1101
|
`;
|
|
1092
1102
|
}
|
|
1093
1103
|
|
|
1094
|
-
// src/
|
|
1095
|
-
var
|
|
1096
|
-
function
|
|
1097
|
-
if (!html
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
const
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1104
|
+
// src/markdown.ts
|
|
1105
|
+
var cheerio = __toESM(require("cheerio"));
|
|
1106
|
+
function convertHtmlToMarkdown(html) {
|
|
1107
|
+
if (!html) return "";
|
|
1108
|
+
const $ = cheerio.load(html);
|
|
1109
|
+
$("script, style, nav, footer, header, aside, svg, .ad, .sponsor, noscript").remove();
|
|
1110
|
+
const main = $('main, article, [role="main"], #content, .content').first();
|
|
1111
|
+
const root = main.length ? main : $("body");
|
|
1112
|
+
let markdown = "";
|
|
1113
|
+
root.find("h1, h2, h3, h4, h5, h6, p, ul, ol").each((_, el) => {
|
|
1114
|
+
const $el = $(el);
|
|
1115
|
+
const tagName = el.tagName.toLowerCase();
|
|
1116
|
+
if (tagName === "ul" || tagName === "ol") {
|
|
1117
|
+
$el.find("li").each((_2, li) => {
|
|
1118
|
+
const text2 = cleanText($(li).text());
|
|
1119
|
+
if (text2) markdown += `- ${text2}
|
|
1120
|
+
`;
|
|
1121
|
+
});
|
|
1122
|
+
markdown += "\n";
|
|
1123
|
+
return;
|
|
1124
|
+
}
|
|
1125
|
+
const text = cleanText($el.text());
|
|
1126
|
+
if (!text) return;
|
|
1127
|
+
if (tagName === "h1") markdown += `# ${text}
|
|
1128
|
+
|
|
1129
|
+
`;
|
|
1130
|
+
else if (tagName === "h2") markdown += `## ${text}
|
|
1131
|
+
|
|
1132
|
+
`;
|
|
1133
|
+
else if (tagName === "h3") markdown += `### ${text}
|
|
1134
|
+
|
|
1135
|
+
`;
|
|
1136
|
+
else if (tagName === "h4") markdown += `#### ${text}
|
|
1137
|
+
|
|
1138
|
+
`;
|
|
1139
|
+
else if (tagName === "h5") markdown += `##### ${text}
|
|
1140
|
+
|
|
1141
|
+
`;
|
|
1142
|
+
else if (tagName === "h6") markdown += `###### ${text}
|
|
1143
|
+
|
|
1144
|
+
`;
|
|
1145
|
+
else if (tagName === "p") markdown += `${text}
|
|
1146
|
+
|
|
1147
|
+
`;
|
|
1148
|
+
});
|
|
1149
|
+
if (markdown.trim().length < 50) {
|
|
1150
|
+
markdown = cleanText(root.text()) + "\n\n";
|
|
1115
1151
|
}
|
|
1116
|
-
return
|
|
1117
|
-
}
|
|
1118
|
-
function buildSponsoredHeader(ads) {
|
|
1119
|
-
return JSON.stringify(
|
|
1120
|
-
ads.map((ad) => ({ text: ad.text, url: ad.url, advertiser: ad.advertiser }))
|
|
1121
|
-
);
|
|
1122
|
-
}
|
|
1123
|
-
function buildContentStreamBlock(ads) {
|
|
1124
|
-
const paragraphs = ads.map(
|
|
1125
|
-
(ad) => `<p data-apptvty-sponsored="${escapeAttr(ad.impression_id)}"><strong>[Sponsored]</strong> <a href="${escapeAttr(ad.url)}" rel="sponsored noopener">${escapeHtml(ad.text)}</a> \u2014 <span>${escapeHtml(ad.advertiser)}</span></p>`
|
|
1126
|
-
).join("\n");
|
|
1127
|
-
return `${AD_INJECTION_MARKER}
|
|
1128
|
-
${paragraphs}`;
|
|
1129
|
-
}
|
|
1130
|
-
function buildJsonLdBlock(ads) {
|
|
1131
|
-
const entries = ads.map((ad) => ({
|
|
1132
|
-
"@context": "https://schema.org",
|
|
1133
|
-
"@type": "WPAdBlock",
|
|
1134
|
-
sponsor: {
|
|
1135
|
-
"@type": "Organization",
|
|
1136
|
-
name: ad.advertiser,
|
|
1137
|
-
url: ad.url
|
|
1138
|
-
},
|
|
1139
|
-
description: ad.text
|
|
1140
|
-
}));
|
|
1141
|
-
const ld = entries.length === 1 ? entries[0] : entries;
|
|
1142
|
-
return `<script type="application/ld+json">${JSON.stringify(ld)}</script>`;
|
|
1152
|
+
return markdown.trim();
|
|
1143
1153
|
}
|
|
1144
|
-
function
|
|
1145
|
-
return
|
|
1146
|
-
}
|
|
1147
|
-
function escapeAttr(s) {
|
|
1148
|
-
return s.replace(/"/g, """).replace(/'/g, "'");
|
|
1154
|
+
function cleanText(text) {
|
|
1155
|
+
return text.trim().replace(/\s+/g, " ");
|
|
1149
1156
|
}
|
|
1150
1157
|
|
|
1151
1158
|
// src/middleware/nextjs.ts
|
|
@@ -1223,27 +1230,53 @@ function withApptvty(config, next) {
|
|
|
1223
1230
|
confidence_score: crawlerInfo.confidence,
|
|
1224
1231
|
scraper_service: scraperService.name
|
|
1225
1232
|
};
|
|
1226
|
-
|
|
1227
|
-
if (
|
|
1228
|
-
|
|
1233
|
+
const isInternalRequest = request.headers.get("x-apptvty-internal") === "true";
|
|
1234
|
+
if (!isInternalRequest && !pathname.startsWith(queryPath)) {
|
|
1235
|
+
logger.enqueue(entry);
|
|
1236
|
+
if (event && typeof event.waitUntil === "function") {
|
|
1237
|
+
event.waitUntil(logger.flush());
|
|
1238
|
+
}
|
|
1229
1239
|
}
|
|
1230
|
-
if (isCrawler &&
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
)
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1240
|
+
if (isCrawler && !isInternalRequest && !pathname.startsWith(queryPath)) {
|
|
1241
|
+
try {
|
|
1242
|
+
const proxyReq = new Request(request.url, {
|
|
1243
|
+
headers: new Headers(request.headers)
|
|
1244
|
+
});
|
|
1245
|
+
proxyReq.headers.set("x-apptvty-internal", "true");
|
|
1246
|
+
const res = await fetch(proxyReq);
|
|
1247
|
+
const contentType = res.headers.get("content-type") ?? "";
|
|
1248
|
+
if (contentType.includes("text/html")) {
|
|
1249
|
+
const html = await res.text();
|
|
1250
|
+
let markdown = convertHtmlToMarkdown(html);
|
|
1251
|
+
const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
|
|
1252
|
+
if (pageAds.ads && pageAds.ads.length > 0) {
|
|
1253
|
+
const ad = pageAds.ads[0];
|
|
1254
|
+
markdown += `
|
|
1255
|
+
|
|
1256
|
+
---
|
|
1257
|
+
> **Sponsored:** [${ad.text}](${ad.url}) - ${ad.advertiser}
|
|
1258
|
+
`;
|
|
1259
|
+
client.logImpression({
|
|
1260
|
+
impression_id: ad.impression_id,
|
|
1261
|
+
site_id: config.siteId,
|
|
1262
|
+
page_path: pathname,
|
|
1263
|
+
agent_ua: userAgent,
|
|
1264
|
+
agent_ip: getClientIp(headers),
|
|
1265
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1266
|
+
}).catch(() => {
|
|
1267
|
+
});
|
|
1268
|
+
}
|
|
1269
|
+
return new import_server.NextResponse(markdown, {
|
|
1270
|
+
status: res.status,
|
|
1271
|
+
headers: {
|
|
1272
|
+
"Content-Type": "text/markdown",
|
|
1273
|
+
"X-Apptvty-AEO": "true"
|
|
1274
|
+
}
|
|
1275
|
+
});
|
|
1246
1276
|
}
|
|
1277
|
+
return res;
|
|
1278
|
+
} catch (err) {
|
|
1279
|
+
if (config.debug) console.warn("[apptvty] Markdown proxy failed:", err);
|
|
1247
1280
|
}
|
|
1248
1281
|
}
|
|
1249
1282
|
return response;
|
|
@@ -1298,33 +1331,6 @@ function createNextjsDashboardHandler(config) {
|
|
|
1298
1331
|
});
|
|
1299
1332
|
};
|
|
1300
1333
|
}
|
|
1301
|
-
async function injectAdsIntoResponse(response, client, config, pathname, userAgent, ipAddress, isScraperService) {
|
|
1302
|
-
const html = await response.text();
|
|
1303
|
-
if (!html) return null;
|
|
1304
|
-
const pageAds = await client.getAdsForPage({ site_id: config.siteId, page_path: pathname });
|
|
1305
|
-
if (!pageAds.ads || pageAds.ads.length === 0) return null;
|
|
1306
|
-
const modified = injectIntoHtml(html, pageAds.ads, isScraperService);
|
|
1307
|
-
if (modified === html) return null;
|
|
1308
|
-
const newHeaders = new Headers(response.headers);
|
|
1309
|
-
newHeaders.set("X-Sponsored-Content", buildSponsoredHeader(pageAds.ads));
|
|
1310
|
-
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
1311
|
-
for (const ad of pageAds.ads) {
|
|
1312
|
-
client.logImpression({
|
|
1313
|
-
impression_id: ad.impression_id,
|
|
1314
|
-
site_id: config.siteId,
|
|
1315
|
-
page_path: pathname,
|
|
1316
|
-
agent_ua: userAgent,
|
|
1317
|
-
agent_ip: ipAddress,
|
|
1318
|
-
timestamp
|
|
1319
|
-
}).catch(() => {
|
|
1320
|
-
});
|
|
1321
|
-
}
|
|
1322
|
-
return new import_server.NextResponse(modified, {
|
|
1323
|
-
status: response.status,
|
|
1324
|
-
statusText: response.statusText,
|
|
1325
|
-
headers: newHeaders
|
|
1326
|
-
});
|
|
1327
|
-
}
|
|
1328
1334
|
function parseBoolParam(value, defaultValue) {
|
|
1329
1335
|
if (value === null) return defaultValue;
|
|
1330
1336
|
return value === "1" || value === "true" || value === "yes";
|