novada-proxy-core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/build/adapters/brightdata.d.ts +24 -0
  2. package/build/adapters/brightdata.js +56 -0
  3. package/build/adapters/generic.d.ts +32 -0
  4. package/build/adapters/generic.js +63 -0
  5. package/build/adapters/index.d.ts +16 -0
  6. package/build/adapters/index.js +42 -0
  7. package/build/adapters/novada.d.ts +23 -0
  8. package/build/adapters/novada.js +61 -0
  9. package/build/adapters/oxylabs.d.ts +22 -0
  10. package/build/adapters/oxylabs.js +54 -0
  11. package/build/adapters/smartproxy.d.ts +22 -0
  12. package/build/adapters/smartproxy.js +54 -0
  13. package/build/adapters/types.d.ts +58 -0
  14. package/build/adapters/types.js +7 -0
  15. package/build/config.d.ts +4 -0
  16. package/build/config.js +7 -0
  17. package/build/errors.d.ts +2 -0
  18. package/build/errors.js +58 -0
  19. package/build/index.d.ts +28 -0
  20. package/build/index.js +22 -0
  21. package/build/redact.d.ts +2 -0
  22. package/build/redact.js +24 -0
  23. package/build/tools/batch.d.ts +24 -0
  24. package/build/tools/batch.js +156 -0
  25. package/build/tools/crawl.d.ts +33 -0
  26. package/build/tools/crawl.js +604 -0
  27. package/build/tools/extract.d.ts +22 -0
  28. package/build/tools/extract.js +454 -0
  29. package/build/tools/fetch.d.ts +17 -0
  30. package/build/tools/fetch.js +243 -0
  31. package/build/tools/index.d.ts +19 -0
  32. package/build/tools/index.js +10 -0
  33. package/build/tools/map.d.ts +19 -0
  34. package/build/tools/map.js +131 -0
  35. package/build/tools/render.d.ts +8 -0
  36. package/build/tools/render.js +98 -0
  37. package/build/tools/research.d.ts +9 -0
  38. package/build/tools/research.js +126 -0
  39. package/build/tools/search.d.ts +9 -0
  40. package/build/tools/search.js +104 -0
  41. package/build/tools/session.d.ts +12 -0
  42. package/build/tools/session.js +108 -0
  43. package/build/tools/status.d.ts +2 -0
  44. package/build/tools/status.js +66 -0
  45. package/build/types.d.ts +34 -0
  46. package/build/types.js +1 -0
  47. package/build/utils.d.ts +18 -0
  48. package/build/utils.js +151 -0
  49. package/build/validation.d.ts +4 -0
  50. package/build/validation.js +6 -0
  51. package/package.json +50 -0
package/build/index.js ADDED
@@ -0,0 +1,22 @@
1
+ // ─── Config ──────────────────────────────────────────────────────────────────
2
+ export { VERSION, NPM_PACKAGE } from "./config.js";
3
+ // ─── Errors ──────────────────────────────────────────────────────────────────
4
+ export { classifyError } from "./errors.js";
5
+ export { resolveAdapter, listAdapters } from "./adapters/index.js";
6
+ // ─── Tools ───────────────────────────────────────────────────────────────────
7
+ export { novadaProxyFetch, validateFetchParams, getCacheTtl, makeCacheKey, clearResponseCache, } from "./tools/fetch.js";
8
+ export { novadaProxyBatchFetch, validateBatchFetchParams } from "./tools/batch.js";
9
+ export { novadaProxySearch, validateSearchParams } from "./tools/search.js";
10
+ export { novadaProxySession, validateSessionParams } from "./tools/session.js";
11
+ export { novadaProxyStatus } from "./tools/status.js";
12
+ export { novadaProxyRender, validateRenderParams } from "./tools/render.js";
13
+ export { novadaProxyExtract, validateExtractParams, extractField, deepFind, shouldEscalateToRender, } from "./tools/extract.js";
14
+ export { novadaProxyMap, validateMapParams } from "./tools/map.js";
15
+ export { novadaProxyCrawl, validateCrawlParams } from "./tools/crawl.js";
16
+ export { novadaProxyResearch, validateResearchParams } from "./tools/research.js";
17
+ // ─── Utils ───────────────────────────────────────────────────────────────────
18
+ export { unicodeSafeTruncate, decodeHtmlEntities, htmlToMarkdown, htmlToText, stripNoiseElements, countHtmlTags, contentDensity, } from "./utils.js";
19
+ // ─── Validation constants ─────────────────────────────────────────────────────
20
+ export { SAFE_COUNTRY, SAFE_CITY, SAFE_SESSION_ID, QUOTA_NOTE } from "./validation.js";
21
+ // ─── Redaction ───────────────────────────────────────────────────────────────
22
+ export { redactCredentials } from "./redact.js";
@@ -0,0 +1,2 @@
1
+ import type { ProxyAdapter, ProxyCredentials } from "./adapters/index.js";
2
+ export declare function redactCredentials(message: string, adapter: ProxyAdapter, credentials: ProxyCredentials): string;
@@ -0,0 +1,24 @@
1
+ export function redactCredentials(message, adapter, credentials) {
2
+ let redacted = message;
3
+ for (const field of adapter.sensitiveFields) {
4
+ const val = credentials[field];
5
+ if (val) {
6
+ redacted = redacted.replaceAll(val, "***");
7
+ redacted = redacted.replaceAll(encodeURIComponent(val), "***");
8
+ }
9
+ }
10
+ const user = credentials["user"];
11
+ if (user) {
12
+ redacted = redacted.replaceAll(user, "***");
13
+ redacted = redacted.replaceAll(encodeURIComponent(user), "***");
14
+ }
15
+ // Also redact API keys that may appear in URLs
16
+ for (const envKey of ["NOVADA_API_KEY", "NOVADA_BROWSER_WS"]) {
17
+ const val = process.env[envKey];
18
+ if (val) {
19
+ redacted = redacted.replaceAll(val, "***");
20
+ redacted = redacted.replaceAll(encodeURIComponent(val), "***");
21
+ }
22
+ }
23
+ return redacted;
24
+ }
@@ -0,0 +1,24 @@
1
+ import type { ProxyAdapter, ProxyCredentials } from "../adapters/index.js";
2
+ export interface BatchFetchParams {
3
+ urls: string[];
4
+ country?: string;
5
+ session_id?: string;
6
+ format?: "markdown" | "raw";
7
+ timeout?: number;
8
+ concurrency?: number;
9
+ }
10
+ export interface BatchFetchResult {
11
+ url: string;
12
+ ok: boolean;
13
+ status_code?: number;
14
+ content?: string;
15
+ size_bytes?: number;
16
+ cache_hit?: boolean;
17
+ error?: {
18
+ code: string;
19
+ message: string;
20
+ };
21
+ latency_ms: number;
22
+ }
23
+ export declare function novadaProxyBatchFetch(params: BatchFetchParams, adapter: ProxyAdapter, credentials: ProxyCredentials): Promise<string>;
24
+ export declare function validateBatchFetchParams(raw: Record<string, unknown>): BatchFetchParams;
@@ -0,0 +1,156 @@
1
+ import { novadaProxyFetch } from "./fetch.js";
2
+ import { SAFE_COUNTRY, SAFE_SESSION_ID, QUOTA_NOTE } from "../validation.js";
3
+ export async function novadaProxyBatchFetch(params, adapter, credentials) {
4
+ const { urls, country, session_id, format = "markdown", timeout = 60, concurrency = 3, } = params;
5
+ const wallStart = Date.now();
6
+ // Semaphore-based concurrency control
7
+ let activeCount = 0;
8
+ const queue = [];
9
+ function acquire() {
10
+ return new Promise((resolve) => {
11
+ if (activeCount < concurrency) {
12
+ activeCount++;
13
+ resolve();
14
+ }
15
+ else {
16
+ queue.push(() => {
17
+ activeCount++;
18
+ resolve();
19
+ });
20
+ }
21
+ });
22
+ }
23
+ function release() {
24
+ activeCount--;
25
+ const next = queue.shift();
26
+ if (next)
27
+ next();
28
+ }
29
+ const fetchOne = async (url) => {
30
+ const start = Date.now();
31
+ await acquire();
32
+ try {
33
+ const rawResult = await novadaProxyFetch({ url, country, session_id, format, timeout }, adapter, credentials);
34
+ const parsed = JSON.parse(rawResult);
35
+ const latency_ms = Date.now() - start;
36
+ return {
37
+ url,
38
+ ok: true,
39
+ status_code: parsed.data.status_code,
40
+ content: parsed.data.content,
41
+ size_bytes: parsed.data.size_bytes,
42
+ cache_hit: parsed.meta.cache_hit,
43
+ latency_ms,
44
+ };
45
+ }
46
+ catch (err) {
47
+ const latency_ms = Date.now() - start;
48
+ let msg = err instanceof Error ? err.message : String(err);
49
+ // Redact credentials from per-URL error messages (proxy URLs may leak in TLS errors)
50
+ for (const field of adapter.sensitiveFields) {
51
+ const val = credentials[field];
52
+ if (val) {
53
+ msg = msg.replaceAll(val, "***");
54
+ msg = msg.replaceAll(encodeURIComponent(val), "***");
55
+ }
56
+ }
57
+ const user = credentials["user"];
58
+ if (user) {
59
+ msg = msg.replaceAll(user, "***");
60
+ msg = msg.replaceAll(encodeURIComponent(user), "***");
61
+ }
62
+ // Infer error code from message
63
+ let code = "UNKNOWN_ERROR";
64
+ if (msg.includes("429") || msg.includes("rate limit"))
65
+ code = "RATE_LIMITED";
66
+ else if (msg.includes("timeout") || msg.includes("ECONNABORTED"))
67
+ code = "TIMEOUT";
68
+ else if (msg.includes("TLS") || msg.includes("SSL"))
69
+ code = "TLS_ERROR";
70
+ else if (msg.includes("blocked") || msg.includes("403") || msg.includes("401"))
71
+ code = "BOT_DETECTION_SUSPECTED";
72
+ else if (msg.includes("must start with"))
73
+ code = "INVALID_INPUT";
74
+ return {
75
+ url,
76
+ ok: false,
77
+ error: { code, message: msg },
78
+ latency_ms,
79
+ };
80
+ }
81
+ finally {
82
+ release();
83
+ }
84
+ };
85
+ const results = await Promise.all(urls.map(fetchOne));
86
+ const wallLatency = Date.now() - wallStart;
87
+ const succeeded = results.filter(r => r.ok).length;
88
+ const failed = results.filter(r => !r.ok).length;
89
+ const cachedCount = results.filter(r => r.ok && r.cache_hit).length;
90
+ const result = {
91
+ ok: true,
92
+ tool: "novada_proxy_batch_fetch",
93
+ data: {
94
+ requested: urls.length,
95
+ succeeded,
96
+ failed,
97
+ results: results,
98
+ },
99
+ meta: {
100
+ latency_ms: wallLatency,
101
+ concurrency,
102
+ country,
103
+ quota: {
104
+ credits_estimated: urls.length - cachedCount,
105
+ note: QUOTA_NOTE,
106
+ },
107
+ },
108
+ };
109
+ if (!result.meta.country)
110
+ delete result.meta.country;
111
+ return JSON.stringify(result);
112
+ }
113
+ export function validateBatchFetchParams(raw) {
114
+ if (!raw.urls || !Array.isArray(raw.urls)) {
115
+ throw new Error("urls is required and must be an array");
116
+ }
117
+ if (raw.urls.length < 2 || raw.urls.length > 20) {
118
+ throw new Error("urls must contain between 2 and 20 URLs");
119
+ }
120
+ for (const u of raw.urls) {
121
+ if (typeof u !== "string")
122
+ throw new Error("each url must be a string");
123
+ // Note: URL format is intentionally NOT validated here — invalid URLs are captured
124
+ // as per-item errors in data.results rather than failing the whole batch.
125
+ // This allows agents to pass mixed lists and inspect failures per-URL.
126
+ }
127
+ if (raw.country !== undefined) {
128
+ if (typeof raw.country !== "string" || raw.country.length > 10 || !SAFE_COUNTRY.test(raw.country)) {
129
+ throw new Error("country must be a 2-letter ISO code with no hyphens (e.g. US, DE, GB)");
130
+ }
131
+ }
132
+ if (raw.session_id !== undefined) {
133
+ if (typeof raw.session_id !== "string" || raw.session_id.length > 64 || !SAFE_SESSION_ID.test(raw.session_id)) {
134
+ throw new Error("session_id must contain only letters, numbers, and underscores, max 64 chars (no hyphens)");
135
+ }
136
+ }
137
+ if (raw.format && raw.format !== "raw" && raw.format !== "markdown") {
138
+ throw new Error("format must be 'raw' or 'markdown'");
139
+ }
140
+ const timeout = raw.timeout !== undefined ? Number(raw.timeout) : 60;
141
+ if (!Number.isFinite(timeout) || timeout < 1 || timeout > 120) {
142
+ throw new Error("timeout must be between 1 and 120 seconds");
143
+ }
144
+ const concurrency = raw.concurrency !== undefined ? Number(raw.concurrency) : 3;
145
+ if (!Number.isFinite(concurrency) || concurrency < 1 || concurrency > 5) {
146
+ throw new Error("concurrency must be between 1 and 5");
147
+ }
148
+ return {
149
+ urls: raw.urls,
150
+ country: raw.country,
151
+ session_id: raw.session_id,
152
+ format: raw.format || "markdown",
153
+ timeout,
154
+ concurrency,
155
+ };
156
+ }
@@ -0,0 +1,33 @@
1
+ import type { ProxyAdapter, ProxyCredentials } from "../adapters/index.js";
2
+ export interface CrawlParams {
3
+ url: string;
4
+ depth?: number;
5
+ limit?: number;
6
+ max_pages?: number;
7
+ max_depth?: number;
8
+ include_content?: boolean;
9
+ include_patterns?: string[];
10
+ exclude_patterns?: string[];
11
+ render?: "none" | "render" | "browser";
12
+ output_format?: "markdown" | "html" | "text";
13
+ extract_fields?: string[];
14
+ country?: string;
15
+ timeout?: number;
16
+ format?: "markdown" | "raw";
17
+ rate_limit?: number;
18
+ browser_ws?: string;
19
+ }
20
+ export interface CrawlPageResult {
21
+ url: string;
22
+ depth: number;
23
+ title?: string;
24
+ status_code?: number;
25
+ total_links: number;
26
+ new_links: number;
27
+ content?: string;
28
+ extracted_fields?: Record<string, string | null>;
29
+ links_found?: string[];
30
+ error?: string;
31
+ }
32
+ export declare function novadaProxyCrawl(params: CrawlParams, adapter: ProxyAdapter, credentials: ProxyCredentials): Promise<string>;
33
+ export declare function validateCrawlParams(raw: Record<string, unknown>): CrawlParams;