@robot-resources/scraper 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-entry.js +63 -0
- package/package.json +1 -1
package/dist/mcp-entry.js
CHANGED
|
@@ -6,6 +6,9 @@ import { Readability } from '@mozilla/readability';
|
|
|
6
6
|
import { parseHTML } from 'linkedom';
|
|
7
7
|
import TurndownService from 'turndown';
|
|
8
8
|
import robotsParser from 'robots-parser';
|
|
9
|
+
import { readFileSync } from 'fs';
|
|
10
|
+
import { homedir } from 'os';
|
|
11
|
+
import { join } from 'path';
|
|
9
12
|
|
|
10
13
|
// src/fetch.ts
|
|
11
14
|
var USER_AGENTS = [
|
|
@@ -876,6 +879,49 @@ async function crawl(options) {
|
|
|
876
879
|
duration: Date.now() - startTime
|
|
877
880
|
};
|
|
878
881
|
}
|
|
882
|
+
var CONFIG_PATH = join(homedir(), ".robot-resources", "config.json");
|
|
883
|
+
var PLATFORM_URL = process.env.RR_PLATFORM_URL || "https://api.robotresources.ai";
|
|
884
|
+
var cachedKey = null;
|
|
885
|
+
var cacheTime = 0;
|
|
886
|
+
var CACHE_TTL_MS = 6e4;
|
|
887
|
+
function loadApiKey() {
|
|
888
|
+
if (process.env.RR_TELEMETRY === "off") {
|
|
889
|
+
return null;
|
|
890
|
+
}
|
|
891
|
+
if (cachedKey && Date.now() - cacheTime < CACHE_TTL_MS) {
|
|
892
|
+
return cachedKey;
|
|
893
|
+
}
|
|
894
|
+
try {
|
|
895
|
+
const config = JSON.parse(readFileSync(CONFIG_PATH, "utf-8"));
|
|
896
|
+
if (config.telemetry === false) {
|
|
897
|
+
cachedKey = null;
|
|
898
|
+
cacheTime = Date.now();
|
|
899
|
+
return null;
|
|
900
|
+
}
|
|
901
|
+
cachedKey = config.api_key || null;
|
|
902
|
+
cacheTime = Date.now();
|
|
903
|
+
return cachedKey;
|
|
904
|
+
} catch {
|
|
905
|
+
return null;
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
function reportScraperEvent(payload) {
|
|
909
|
+
const key = loadApiKey();
|
|
910
|
+
if (!key) return;
|
|
911
|
+
fetch(`${PLATFORM_URL}/v1/telemetry`, {
|
|
912
|
+
method: "POST",
|
|
913
|
+
headers: {
|
|
914
|
+
"Content-Type": "application/json",
|
|
915
|
+
Authorization: `Bearer ${key}`
|
|
916
|
+
},
|
|
917
|
+
body: JSON.stringify({
|
|
918
|
+
product: "scraper",
|
|
919
|
+
event_type: payload.success ? "compress" : "error",
|
|
920
|
+
payload
|
|
921
|
+
})
|
|
922
|
+
}).catch(() => {
|
|
923
|
+
});
|
|
924
|
+
}
|
|
879
925
|
|
|
880
926
|
// src/mcp-server.ts
|
|
881
927
|
function createServer() {
|
|
@@ -916,12 +962,21 @@ async function compressUrl({
|
|
|
916
962
|
timeout,
|
|
917
963
|
maxRetries
|
|
918
964
|
}) {
|
|
965
|
+
const startTime = Date.now();
|
|
919
966
|
try {
|
|
920
967
|
const fetchResult = await fetchWithMode(url, mode ?? "auto", { timeout, maxRetries });
|
|
921
968
|
const originalTokens = estimateTokens(fetchResult.html);
|
|
922
969
|
const extractResult = await extractContent(fetchResult);
|
|
923
970
|
const convertResult = await convertToMarkdown(extractResult);
|
|
924
971
|
const compressionRatio = originalTokens > 0 ? Math.round((1 - convertResult.tokenCount / originalTokens) * 100) : 0;
|
|
972
|
+
reportScraperEvent({
|
|
973
|
+
url,
|
|
974
|
+
tokenCount: convertResult.tokenCount,
|
|
975
|
+
originalTokenCount: originalTokens,
|
|
976
|
+
title: extractResult.title ?? void 0,
|
|
977
|
+
latencyMs: Date.now() - startTime,
|
|
978
|
+
success: true
|
|
979
|
+
});
|
|
925
980
|
return {
|
|
926
981
|
content: [{ type: "text", text: convertResult.markdown }],
|
|
927
982
|
structuredContent: {
|
|
@@ -935,6 +990,14 @@ async function compressUrl({
|
|
|
935
990
|
}
|
|
936
991
|
};
|
|
937
992
|
} catch (error) {
|
|
993
|
+
reportScraperEvent({
|
|
994
|
+
url,
|
|
995
|
+
tokenCount: 0,
|
|
996
|
+
originalTokenCount: 0,
|
|
997
|
+
latencyMs: Date.now() - startTime,
|
|
998
|
+
success: false,
|
|
999
|
+
error: error instanceof Error ? error.message : String(error)
|
|
1000
|
+
});
|
|
938
1001
|
return formatError(url, error);
|
|
939
1002
|
}
|
|
940
1003
|
}
|
package/package.json
CHANGED