novada-proxy-core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/build/adapters/brightdata.d.ts +24 -0
  2. package/build/adapters/brightdata.js +56 -0
  3. package/build/adapters/generic.d.ts +32 -0
  4. package/build/adapters/generic.js +63 -0
  5. package/build/adapters/index.d.ts +16 -0
  6. package/build/adapters/index.js +42 -0
  7. package/build/adapters/novada.d.ts +23 -0
  8. package/build/adapters/novada.js +61 -0
  9. package/build/adapters/oxylabs.d.ts +22 -0
  10. package/build/adapters/oxylabs.js +54 -0
  11. package/build/adapters/smartproxy.d.ts +22 -0
  12. package/build/adapters/smartproxy.js +54 -0
  13. package/build/adapters/types.d.ts +58 -0
  14. package/build/adapters/types.js +7 -0
  15. package/build/config.d.ts +4 -0
  16. package/build/config.js +7 -0
  17. package/build/errors.d.ts +2 -0
  18. package/build/errors.js +58 -0
  19. package/build/index.d.ts +28 -0
  20. package/build/index.js +22 -0
  21. package/build/redact.d.ts +2 -0
  22. package/build/redact.js +24 -0
  23. package/build/tools/batch.d.ts +24 -0
  24. package/build/tools/batch.js +156 -0
  25. package/build/tools/crawl.d.ts +33 -0
  26. package/build/tools/crawl.js +604 -0
  27. package/build/tools/extract.d.ts +22 -0
  28. package/build/tools/extract.js +454 -0
  29. package/build/tools/fetch.d.ts +17 -0
  30. package/build/tools/fetch.js +243 -0
  31. package/build/tools/index.d.ts +19 -0
  32. package/build/tools/index.js +10 -0
  33. package/build/tools/map.d.ts +19 -0
  34. package/build/tools/map.js +131 -0
  35. package/build/tools/render.d.ts +8 -0
  36. package/build/tools/render.js +98 -0
  37. package/build/tools/research.d.ts +9 -0
  38. package/build/tools/research.js +126 -0
  39. package/build/tools/search.d.ts +9 -0
  40. package/build/tools/search.js +104 -0
  41. package/build/tools/session.d.ts +12 -0
  42. package/build/tools/session.js +108 -0
  43. package/build/tools/status.d.ts +2 -0
  44. package/build/tools/status.js +66 -0
  45. package/build/types.d.ts +34 -0
  46. package/build/types.js +1 -0
  47. package/build/utils.d.ts +18 -0
  48. package/build/utils.js +151 -0
  49. package/build/validation.d.ts +4 -0
  50. package/build/validation.js +6 -0
  51. package/package.json +50 -0
@@ -0,0 +1,9 @@
1
+ export interface SearchParams {
2
+ query: string;
3
+ engine?: "google";
4
+ num?: number;
5
+ country?: string;
6
+ language?: string;
7
+ }
8
+ export declare function novadaProxySearch(params: SearchParams, novadaApiKey: string): Promise<string>;
9
+ export declare function validateSearchParams(raw: Record<string, unknown>): SearchParams;
@@ -0,0 +1,104 @@
1
+ import axios from "axios";
2
+ import { NOVADA_SEARCH_URL, DEFAULT_USER_AGENT } from "../config.js";
3
+ import { QUOTA_NOTE } from "../validation.js";
4
+ const SAFE_LOCALE = /^[a-zA-Z0-9_-]{1,10}$/;
5
+ export async function novadaProxySearch(params, novadaApiKey) {
6
+ const { query, engine = "google", num = 10, country = "", language = "" } = params;
7
+ // Guard: validate locale params even when called directly (not via validateSearchParams)
8
+ if (country && !SAFE_LOCALE.test(country))
9
+ throw new Error("country contains invalid characters");
10
+ if (language && !SAFE_LOCALE.test(language))
11
+ throw new Error("language contains invalid characters");
12
+ // Note: Novada Scraper API authenticates via query param (api_key), not header.
13
+ // The key is therefore visible in server-side access logs — this is an API design
14
+ // constraint of the current Novada endpoint. We mitigate by never including the
15
+ // key in error messages surfaced to the agent (see sanitizeMessage below).
16
+ const searchParams = new URLSearchParams({
17
+ q: query,
18
+ api_key: novadaApiKey,
19
+ engine,
20
+ num: String(num),
21
+ });
22
+ if (country)
23
+ searchParams.set("country", country);
24
+ if (language)
25
+ searchParams.set("language", language);
26
+ const requestUrl = `${NOVADA_SEARCH_URL}?${searchParams.toString()}`;
27
+ const startTime = Date.now();
28
+ let response;
29
+ try {
30
+ response = await axios.get(requestUrl, {
31
+ headers: {
32
+ "User-Agent": DEFAULT_USER_AGENT,
33
+ Origin: "https://www.novada.com",
34
+ Referer: "https://www.novada.com/",
35
+ },
36
+ timeout: 30000,
37
+ });
38
+ }
39
+ catch (err) {
40
+ // Sanitize: never surface the request URL (contains api_key) in error messages
41
+ // Sanitize api_key from all error paths — it's embedded in the request URL
42
+ const sanitize = (s) => s.replaceAll(novadaApiKey, "***")
43
+ .replaceAll(encodeURIComponent(novadaApiKey), "***");
44
+ if (axios.isAxiosError(err)) {
45
+ const status = err.response?.status;
46
+ const msg = sanitize(String(err.response?.data?.msg || err.message));
47
+ throw new Error(status ? `Search API HTTP ${status}: ${msg}` : `Search API error: ${msg}`);
48
+ }
49
+ throw new Error(sanitize(String(err instanceof Error ? err.message : err)));
50
+ }
51
+ const latency_ms = Date.now() - startTime;
52
+ const data = response.data;
53
+ if (data.code && data.code !== 200 && data.code !== 0) {
54
+ throw new Error(`Novada search error (${data.code}): ${String(data.msg || "unknown")}`);
55
+ }
56
+ const rawResults = data.data?.organic_results || data.organic_results || data.data?.results || data.results || [];
57
+ const results = rawResults.slice(0, num).map(r => ({
58
+ title: r.title || "Untitled",
59
+ url: r.redirection_link || r.url || r.link || "",
60
+ snippet: r.description || r.snippet || "",
61
+ }));
62
+ const result = {
63
+ ok: true,
64
+ tool: "novada_proxy_search",
65
+ data: {
66
+ query,
67
+ engine,
68
+ count: results.length,
69
+ results,
70
+ },
71
+ meta: {
72
+ latency_ms,
73
+ quota: { credits_estimated: 1, note: QUOTA_NOTE },
74
+ },
75
+ };
76
+ return JSON.stringify(result);
77
+ }
78
+ export function validateSearchParams(raw) {
79
+ if (!raw.query || typeof raw.query !== "string") {
80
+ throw new Error("query is required");
81
+ }
82
+ if (raw.query.length > 500) {
83
+ throw new Error("query must be 500 characters or less");
84
+ }
85
+ if (raw.engine && raw.engine !== "google") {
86
+ throw new Error("engine must be 'google' — other engines have known quality issues");
87
+ }
88
+ const num = raw.num !== undefined ? Number(raw.num) : 10;
89
+ if (!Number.isFinite(num) || num < 1 || num > 20)
90
+ throw new Error("num must be between 1 and 20");
91
+ if (raw.country && (typeof raw.country !== "string" || !SAFE_LOCALE.test(raw.country))) {
92
+ throw new Error("country must be a short locale code (e.g. us, uk, de)");
93
+ }
94
+ if (raw.language && (typeof raw.language !== "string" || !SAFE_LOCALE.test(raw.language))) {
95
+ throw new Error("language must be a short language code (e.g. en, zh, de)");
96
+ }
97
+ return {
98
+ query: raw.query,
99
+ engine: raw.engine || "google",
100
+ num,
101
+ country: raw.country || "",
102
+ language: raw.language || "",
103
+ };
104
+ }
@@ -0,0 +1,12 @@
1
+ import type { ProxyAdapter, ProxyCredentials } from "../adapters/index.js";
2
+ export interface SessionParams {
3
+ session_id: string;
4
+ url: string;
5
+ country?: string;
6
+ city?: string;
7
+ format?: "raw" | "markdown";
8
+ timeout?: number;
9
+ verify_sticky?: boolean;
10
+ }
11
+ export declare function novadaProxySession(params: SessionParams, adapter: ProxyAdapter, credentials: ProxyCredentials): Promise<string>;
12
+ export declare function validateSessionParams(raw: Record<string, unknown>): SessionParams;
@@ -0,0 +1,108 @@
1
+ import axios from "axios";
2
+ import { HttpsProxyAgent } from "https-proxy-agent";
3
+ import { novadaProxyFetch } from "./fetch.js";
4
+ import { SAFE_COUNTRY, SAFE_CITY, SAFE_SESSION_ID, QUOTA_NOTE } from "../validation.js";
5
+ export async function novadaProxySession(params, adapter, credentials) {
6
+ const { verify_sticky = false } = params;
7
+ // Make the main fetch call
8
+ const fetchResultStr = await novadaProxyFetch({
9
+ url: params.url,
10
+ session_id: params.session_id,
11
+ country: params.country,
12
+ city: params.city,
13
+ format: params.format || "markdown",
14
+ timeout: params.timeout,
15
+ }, adapter, credentials);
16
+ // Parse the fetch result JSON
17
+ let fetchResult;
18
+ try {
19
+ fetchResult = JSON.parse(fetchResultStr);
20
+ }
21
+ catch {
22
+ // Fallback: return raw fetch result if JSON parsing fails
23
+ return fetchResultStr;
24
+ }
25
+ // If verify_sticky requested, make a second call to httpbin.org/ip with same session
26
+ let session_verified;
27
+ if (verify_sticky && adapter.capabilities.sticky) {
28
+ try {
29
+ const proxyUrl = adapter.buildProxyUrl(credentials, {
30
+ session_id: params.session_id,
31
+ country: params.country,
32
+ });
33
+ const httpsAgent = new HttpsProxyAgent(proxyUrl);
34
+ // First IP check via httpbin
35
+ const ip1Resp = await axios.get("https://httpbin.org/ip", {
36
+ httpsAgent,
37
+ proxy: false,
38
+ timeout: 15000,
39
+ });
40
+ const ip1 = ip1Resp.data.origin?.split(",")[0]?.trim();
41
+ // Second IP check — same session, should return same IP
42
+ const ip2Resp = await axios.get("https://httpbin.org/ip", {
43
+ httpsAgent,
44
+ proxy: false,
45
+ timeout: 15000,
46
+ });
47
+ const ip2 = ip2Resp.data.origin?.split(",")[0]?.trim();
48
+ session_verified = ip1 !== undefined && ip2 !== undefined && ip1 === ip2;
49
+ }
50
+ catch {
51
+ // Verification call failed — leave session_verified undefined
52
+ session_verified = false;
53
+ }
54
+ }
55
+ // credits: 1 base + 2 for verify_sticky (2 httpbin calls)
56
+ const creditsEstimated = verify_sticky ? 3 : 1;
57
+ // Rebuild response with session_verified in meta
58
+ const result = {
59
+ ...fetchResult,
60
+ tool: "novada_proxy_session",
61
+ meta: {
62
+ ...fetchResult.meta,
63
+ session_id: params.session_id,
64
+ session_verified,
65
+ quota: { credits_estimated: creditsEstimated, note: QUOTA_NOTE },
66
+ },
67
+ };
68
+ if (result.meta.session_verified === undefined)
69
+ delete result.meta.session_verified;
70
+ return JSON.stringify(result);
71
+ }
72
+ export function validateSessionParams(raw) {
73
+ if (!raw.session_id || typeof raw.session_id !== "string" || raw.session_id.length > 64 || !SAFE_SESSION_ID.test(raw.session_id)) {
74
+ throw new Error("session_id is required — letters, numbers, underscores only, max 64 chars (no hyphens)");
75
+ }
76
+ if (!raw.url || typeof raw.url !== "string") {
77
+ throw new Error("url is required");
78
+ }
79
+ if (!raw.url.startsWith("http://") && !raw.url.startsWith("https://")) {
80
+ throw new Error("url must start with http:// or https://");
81
+ }
82
+ if (raw.country !== undefined) {
83
+ if (typeof raw.country !== "string" || raw.country.length > 10 || !SAFE_COUNTRY.test(raw.country)) {
84
+ throw new Error("country must be a 2-letter ISO code with no hyphens (e.g. US, DE, GB)");
85
+ }
86
+ }
87
+ if (raw.city !== undefined) {
88
+ if (typeof raw.city !== "string" || raw.city.length > 50 || !SAFE_CITY.test(raw.city)) {
89
+ throw new Error("city must contain only letters, numbers, underscores, max 50 chars (e.g. newyork, london)");
90
+ }
91
+ }
92
+ if (raw.format && raw.format !== "raw" && raw.format !== "markdown") {
93
+ throw new Error("format must be 'raw' or 'markdown'");
94
+ }
95
+ const timeout = raw.timeout !== undefined ? Number(raw.timeout) : 60;
96
+ if (!Number.isFinite(timeout) || timeout < 1 || timeout > 120) {
97
+ throw new Error("timeout must be between 1 and 120 seconds");
98
+ }
99
+ return {
100
+ session_id: raw.session_id,
101
+ url: raw.url,
102
+ country: raw.country,
103
+ city: raw.city,
104
+ format: raw.format || "markdown",
105
+ timeout,
106
+ verify_sticky: raw.verify_sticky === true,
107
+ };
108
+ }
@@ -0,0 +1,2 @@
1
+ import type { ProxyAdapter, ProxyCredentials } from "../adapters/index.js";
2
+ export declare function novadaProxyStatus(adapter?: ProxyAdapter, credentials?: ProxyCredentials): Promise<string>;
@@ -0,0 +1,66 @@
1
+ import axios from "axios";
2
+ import { HttpsProxyAgent } from "https-proxy-agent";
3
+ import { VERSION } from "../config.js";
4
+ import { QUOTA_NOTE } from "../validation.js";
5
+ export async function novadaProxyStatus(adapter, credentials) {
6
+ const startTime = Date.now();
7
+ let connectivity_status = "UNAVAILABLE";
8
+ let proxy_ip;
9
+ const verified_via = "https://httpbin.org/ip";
10
+ if (adapter && credentials) {
11
+ try {
12
+ const proxyUrl = adapter.buildProxyUrl(credentials, {});
13
+ const httpsAgent = new HttpsProxyAgent(proxyUrl);
14
+ const response = await axios.get(verified_via, {
15
+ httpsAgent,
16
+ proxy: false,
17
+ timeout: 10000,
18
+ });
19
+ const ip = response.data.origin?.split(",")[0]?.trim();
20
+ if (ip) {
21
+ proxy_ip = ip;
22
+ connectivity_status = "HEALTHY";
23
+ }
24
+ else {
25
+ connectivity_status = "DEGRADED";
26
+ }
27
+ }
28
+ catch {
29
+ connectivity_status = "UNAVAILABLE";
30
+ }
31
+ }
32
+ const latency_ms = Date.now() - startTime;
33
+ const capabilities = [];
34
+ if (adapter) {
35
+ if (adapter.capabilities.country)
36
+ capabilities.push("country_targeting");
37
+ if (adapter.capabilities.city)
38
+ capabilities.push("city_targeting");
39
+ if (adapter.capabilities.sticky)
40
+ capabilities.push("sticky_sessions");
41
+ }
42
+ const result = {
43
+ ok: true,
44
+ tool: "novada_proxy_status",
45
+ data: {
46
+ provider: adapter?.displayName || "none configured",
47
+ version: VERSION,
48
+ capabilities,
49
+ connectivity: {
50
+ status: connectivity_status,
51
+ verified_via,
52
+ proxy_ip,
53
+ latency_ms,
54
+ },
55
+ },
56
+ meta: {
57
+ latency_ms,
58
+ quota: { credits_estimated: 1, note: QUOTA_NOTE },
59
+ },
60
+ };
61
+ // Remove undefined proxy_ip
62
+ if (!proxy_ip) {
63
+ delete result.data.connectivity.proxy_ip;
64
+ }
65
+ return JSON.stringify(result);
66
+ }
@@ -0,0 +1,34 @@
1
+ export type ProxyErrorCode = "BOT_DETECTION_SUSPECTED" | "SESSION_STICKINESS_FAILED" | "RATE_LIMITED" | "INVALID_INPUT" | "TIMEOUT" | "TLS_ERROR" | "NETWORK_ERROR" | "PROVIDER_NOT_CONFIGURED" | "UNKNOWN_ERROR";
2
+ export interface QuotaMeta {
3
+ credits_estimated: number;
4
+ note: string;
5
+ }
6
+ export interface ProxySuccessResponse {
7
+ ok: true;
8
+ tool: string;
9
+ data: Record<string, unknown>;
10
+ meta: {
11
+ latency_ms: number;
12
+ proxy_ip?: string;
13
+ country?: string;
14
+ session_id?: string;
15
+ session_verified?: boolean;
16
+ truncated?: boolean;
17
+ content_density?: number;
18
+ concurrency?: number;
19
+ quota?: QuotaMeta;
20
+ cache_hit?: boolean;
21
+ cache_age_seconds?: number;
22
+ };
23
+ }
24
+ export interface ProxyErrorResponse {
25
+ ok: false;
26
+ error: {
27
+ code: ProxyErrorCode;
28
+ message: string;
29
+ recoverable: boolean;
30
+ agent_instruction: string;
31
+ retry_after_seconds?: number;
32
+ };
33
+ }
34
+ export type ProxyResponse = ProxySuccessResponse | ProxyErrorResponse;
package/build/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,18 @@
1
+ export declare function unicodeSafeTruncate(s: string, maxChars: number): string;
2
+ export declare function decodeHtmlEntities(s: string): string;
3
+ /**
4
+ * Remove noise elements from HTML BEFORE markdown conversion.
5
+ * Conservative — only strips elements where the tag name or class/id strongly indicates noise.
6
+ */
7
+ export declare function stripNoiseElements(html: string): string;
8
+ export declare function htmlToMarkdown(html: string): string;
9
+ /**
10
+ * Count rough number of HTML tags in a string.
11
+ */
12
+ export declare function countHtmlTags(html: string): number;
13
+ /**
14
+ * Compute content density score: ratio of text content to total content + tag overhead.
15
+ * Higher = cleaner content. Range: 0.0 to 1.0.
16
+ */
17
+ export declare function contentDensity(markdownLength: number, tagCount: number): number;
18
+ export declare function htmlToText(html: string): string;
package/build/utils.js ADDED
@@ -0,0 +1,151 @@
1
+ export function unicodeSafeTruncate(s, maxChars) {
2
+ if (s.length <= maxChars)
3
+ return s;
4
+ let end = maxChars;
5
+ // Don't split a surrogate pair at the boundary
6
+ const code = s.charCodeAt(end - 1);
7
+ if (code >= 0xD800 && code <= 0xDBFF)
8
+ end--; // high surrogate stranded → drop it
9
+ else if (code >= 0xDC00 && code <= 0xDFFF)
10
+ end -= 2; // low surrogate → drop the whole pair
11
+ return s.slice(0, end);
12
+ }
13
+ export function decodeHtmlEntities(s) {
14
+ return s
15
+ .replace(/&amp;/g, "&")
16
+ .replace(/&lt;/g, "<")
17
+ .replace(/&gt;/g, ">")
18
+ .replace(/&quot;/g, '"')
19
+ .replace(/&#39;/g, "'")
20
+ .replace(/&nbsp;/g, " ");
21
+ }
22
+ /**
23
+ * Noise class/id patterns that strongly indicate non-content elements.
24
+ * Conservative: only match when the pattern is a clear indicator of noise.
25
+ */
26
+ const NOISE_ATTR_PATTERN = /\b(cookie[-_]?banner|cookie[-_]?consent|cookie[-_]?notice|popup|modal|overlay|sidebar|nav[-_]?bar|navigation|footer|header|advertisement|ad[-_]?banner|social[-_]?share|share[-_]?buttons|comments?[-_]?section|menu[-_]?toggle|skip[-_]?nav|breadcrumb)\b/i;
27
+ /**
28
+ * Remove noise elements from HTML BEFORE markdown conversion.
29
+ * Conservative — only strips elements where the tag name or class/id strongly indicates noise.
30
+ */
31
+ export function stripNoiseElements(html) {
32
+ let result = html;
33
+ // 1. Strip structural noise tags and their content: nav, header, footer, aside, form
34
+ result = result.replace(/<nav[\s>][\s\S]*?<\/nav>/gi, "");
35
+ result = result.replace(/<header[\s>][\s\S]*?<\/header>/gi, "");
36
+ result = result.replace(/<footer[\s>][\s\S]*?<\/footer>/gi, "");
37
+ result = result.replace(/<aside[\s>][\s\S]*?<\/aside>/gi, "");
38
+ result = result.replace(/<form[\s>][\s\S]*?<\/form>/gi, "");
39
+ // 2. Strip elements with noise class/id patterns
40
+ // Match opening tags with class="..." or id="..." containing noise keywords,
41
+ // then remove through the matching closing tag.
42
+ // We handle <div>, <section>, <span>, <ul>, <ol> with noise attributes.
43
+ const noiseTagNames = ["div", "section", "span", "ul", "ol", "p"];
44
+ for (const tag of noiseTagNames) {
45
+ // Match opening tag with class or id containing noise pattern
46
+ const openTagRe = new RegExp(`<${tag}\\s[^>]*(?:class|id)\\s*=\\s*["'][^"']*${NOISE_ATTR_PATTERN.source}[^"']*["'][^>]*>`, "gi");
47
+ // For each match, find the corresponding closing tag and remove everything
48
+ let match;
49
+ while ((match = openTagRe.exec(result)) !== null) {
50
+ const startIdx = match.index;
51
+ // Simple depth-based closing tag finder
52
+ const closeTag = `</${tag}>`;
53
+ let depth = 1;
54
+ let searchPos = startIdx + match[0].length;
55
+ const openRe = new RegExp(`<${tag}[\\s>]`, "gi");
56
+ const closeRe = new RegExp(`</${tag}>`, "gi");
57
+ let endIdx = -1;
58
+ while (depth > 0 && searchPos < result.length) {
59
+ openRe.lastIndex = searchPos;
60
+ closeRe.lastIndex = searchPos;
61
+ const nextOpen = openRe.exec(result);
62
+ const nextClose = closeRe.exec(result);
63
+ if (!nextClose)
64
+ break; // malformed HTML, bail
65
+ if (nextOpen && nextOpen.index < nextClose.index) {
66
+ depth++;
67
+ searchPos = nextOpen.index + nextOpen[0].length;
68
+ }
69
+ else {
70
+ depth--;
71
+ if (depth === 0) {
72
+ endIdx = nextClose.index + closeTag.length;
73
+ }
74
+ searchPos = nextClose.index + nextClose[0].length;
75
+ }
76
+ }
77
+ if (endIdx !== -1) {
78
+ result = result.slice(0, startIdx) + result.slice(endIdx);
79
+ openTagRe.lastIndex = startIdx; // re-scan from same position
80
+ }
81
+ }
82
+ }
83
+ // 3. Strip hidden elements
84
+ result = result.replace(/<[^>]+style\s*=\s*["'][^"']*display\s*:\s*none[^"']*["'][^>]*>[\s\S]*?<\/[^>]+>/gi, "");
85
+ result = result.replace(/<[^>]+style\s*=\s*["'][^"']*visibility\s*:\s*hidden[^"']*["'][^>]*>[\s\S]*?<\/[^>]+>/gi, "");
86
+ result = result.replace(/<[^>]+aria-hidden\s*=\s*["']true["'][^>]*>[\s\S]*?<\/[^>]+>/gi, "");
87
+ // 4. Strip empty divs and spans (only whitespace content)
88
+ result = result.replace(/<div[^>]*>\s*<\/div>/gi, "");
89
+ result = result.replace(/<span[^>]*>\s*<\/span>/gi, "");
90
+ return result;
91
+ }
92
+ export function htmlToMarkdown(html) {
93
+ // Step 1: Strip noise elements before conversion
94
+ const cleaned = stripNoiseElements(html);
95
+ let md = cleaned
96
+ .replace(/<script[\s\S]*?<\/script>/gi, "")
97
+ .replace(/<style[\s\S]*?<\/style>/gi, "")
98
+ .replace(/<noscript[\s\S]*?<\/noscript>/gi, "")
99
+ .replace(/<br\s*\/?>/gi, "\n")
100
+ .replace(/<\/p>/gi, "\n\n")
101
+ .replace(/<\/h[1-6]>/gi, "\n\n")
102
+ .replace(/<\/li>/gi, "\n")
103
+ .replace(/<li[^>]*>/gi, "- ")
104
+ .replace(/<h([1-6])[^>]*>/gi, (_, n) => "#".repeat(Number(n)) + " ")
105
+ .replace(/<a[^>]+href=["']([^"']+)["'][^>]*>([^<]*)<\/a>/gi, (_, href, text) => {
106
+ const decoded = decodeHtmlEntities(href);
107
+ if (decoded.startsWith("data:") || decoded.startsWith("javascript:"))
108
+ return text;
109
+ return `[${text}](${decoded})`;
110
+ })
111
+ .replace(/<[^>]+>/g, "")
112
+ .replace(/&amp;/g, "&")
113
+ .replace(/&lt;/g, "<")
114
+ .replace(/&gt;/g, ">")
115
+ .replace(/&quot;/g, '"')
116
+ .replace(/&#39;/g, "'")
117
+ .replace(/&nbsp;/g, " ");
118
+ // Step 2: Post-conversion cleanup
119
+ // Collapse 3+ consecutive newlines to 2
120
+ md = md.replace(/\n{3,}/g, "\n\n");
121
+ // Remove lines that are only dashes or underscores (visual separators)
122
+ md = md.replace(/^\s*[-_]{3,}\s*$/gm, "");
123
+ // Trim trailing whitespace per line
124
+ md = md.replace(/[^\S\n]+$/gm, "");
125
+ // Final collapse after separator removal
126
+ md = md.replace(/\n{3,}/g, "\n\n");
127
+ return md.trim();
128
+ }
129
+ /**
130
+ * Count rough number of HTML tags in a string.
131
+ */
132
+ export function countHtmlTags(html) {
133
+ const matches = html.match(/<[a-zA-Z][^>]*>/g);
134
+ return matches ? matches.length : 0;
135
+ }
136
+ /**
137
+ * Compute content density score: ratio of text content to total content + tag overhead.
138
+ * Higher = cleaner content. Range: 0.0 to 1.0.
139
+ */
140
+ export function contentDensity(markdownLength, tagCount) {
141
+ if (markdownLength === 0 && tagCount === 0)
142
+ return 0;
143
+ return parseFloat((markdownLength / (markdownLength + tagCount * 10)).toFixed(2));
144
+ }
145
+ export function htmlToText(html) {
146
+ return htmlToMarkdown(html)
147
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // strip link URLs, keep text
148
+ .replace(/#+\s/g, "") // strip heading markers
149
+ .replace(/^-\s/gm, "") // strip list bullets
150
+ .trim();
151
+ }
@@ -0,0 +1,4 @@
1
+ export declare const SAFE_COUNTRY: RegExp;
2
+ export declare const SAFE_CITY: RegExp;
3
+ export declare const SAFE_SESSION_ID: RegExp;
4
+ export declare const QUOTA_NOTE = "Check dashboard.novada.com for real-time balance";
@@ -0,0 +1,6 @@
1
+ // Proxy username injection prevention — no hyphens allowed
2
+ // (providers use `-` as segment delimiter in auth strings)
3
+ export const SAFE_COUNTRY = /^[a-zA-Z0-9_]+$/;
4
+ export const SAFE_CITY = /^[a-zA-Z0-9_]+$/;
5
+ export const SAFE_SESSION_ID = /^[a-zA-Z0-9_]+$/;
6
+ export const QUOTA_NOTE = "Check dashboard.novada.com for real-time balance";
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "name": "novada-proxy-core",
3
+ "version": "0.0.1",
4
+ "description": "Core proxy engine \u2014 adapters, tools, types for Novada Proxy",
5
+ "type": "module",
6
+ "main": "build/index.js",
7
+ "types": "build/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./build/index.d.ts",
11
+ "import": "./build/index.js"
12
+ },
13
+ "./tools": {
14
+ "types": "./build/tools/index.d.ts",
15
+ "import": "./build/tools/index.js"
16
+ },
17
+ "./adapters": {
18
+ "types": "./build/adapters/index.d.ts",
19
+ "import": "./build/adapters/index.js"
20
+ },
21
+ "./errors": {
22
+ "types": "./build/errors.d.ts",
23
+ "import": "./build/errors.js"
24
+ }
25
+ },
26
+ "files": [
27
+ "build/**/*.js",
28
+ "build/**/*.d.ts",
29
+ "README.md"
30
+ ],
31
+ "scripts": {
32
+ "build": "tsc",
33
+ "test": "vitest run"
34
+ },
35
+ "dependencies": {
36
+ "axios": "^1.7.0",
37
+ "http-proxy-agent": "^7.0.0",
38
+ "https-proxy-agent": "^9.0.0",
39
+ "puppeteer-core": "^22.15.0"
40
+ },
41
+ "devDependencies": {
42
+ "@types/node": "^20.11.24",
43
+ "typescript": "^5.3.3",
44
+ "vitest": "^4.1.4"
45
+ },
46
+ "engines": {
47
+ "node": ">=18.0.0"
48
+ },
49
+ "license": "MIT"
50
+ }