@apify/actors-mcp-server 0.4.9 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/const.d.ts +5 -1
- package/dist/const.d.ts.map +1 -1
- package/dist/const.js +5 -1
- package/dist/const.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.d.ts.map +1 -1
- package/dist/state.js +3 -1
- package/dist/state.js.map +1 -1
- package/dist/tools/get-html-skeleton.d.ts +3 -0
- package/dist/tools/get-html-skeleton.d.ts.map +1 -0
- package/dist/tools/get-html-skeleton.js +76 -0
- package/dist/tools/get-html-skeleton.js.map +1 -0
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +4 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/generic.d.ts +15 -9
- package/dist/utils/generic.d.ts.map +1 -1
- package/dist/utils/generic.js +28 -10
- package/dist/utils/generic.js.map +1 -1
- package/dist/utils/html.d.ts +11 -0
- package/dist/utils/html.d.ts.map +1 -0
- package/dist/utils/html.js +48 -0
- package/dist/utils/html.js.map +1 -0
- package/dist/utils/mcp.d.ts +10 -0
- package/dist/utils/mcp.d.ts.map +1 -0
- package/dist/utils/mcp.js +9 -0
- package/dist/utils/mcp.js.map +1 -0
- package/package.json +5 -1
package/dist/utils/generic.d.ts
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parses a comma-separated string into an array of trimmed strings.
|
|
3
|
+
* Empty strings are filtered out after trimming.
|
|
4
|
+
*
|
|
5
|
+
* @param input - The comma-separated string to parse. If undefined, returns an empty array.
|
|
6
|
+
* @returns An array of trimmed, non-empty strings.
|
|
7
|
+
* @example
|
|
8
|
+
* parseCommaSeparatedList("a, b, c"); // ["a", "b", "c"]
|
|
9
|
+
* parseCommaSeparatedList("a, , b"); // ["a", "b"]
|
|
10
|
+
*/
|
|
11
|
+
export declare function parseCommaSeparatedList(input?: string): string[];
|
|
1
12
|
/**
|
|
2
13
|
* Recursively gets the value in a nested object for each key in the keys array.
|
|
3
14
|
* Each key can be a dot-separated path (e.g. 'a.b.c').
|
|
@@ -8,16 +19,11 @@
|
|
|
8
19
|
* const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']);
|
|
9
20
|
* value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } }
|
|
10
21
|
*/
|
|
22
|
+
export declare function getValuesByDotKeys(obj: Record<string, unknown>, keys: string[]): Record<string, unknown>;
|
|
11
23
|
/**
|
|
12
|
-
*
|
|
13
|
-
* Empty strings are filtered out after trimming.
|
|
24
|
+
* Validates whether a given string is a well-formed URL.
|
|
14
25
|
*
|
|
15
|
-
*
|
|
16
|
-
* @returns An array of trimmed, non-empty strings.
|
|
17
|
-
* @example
|
|
18
|
-
* parseCommaSeparatedList("a, b, c"); // ["a", "b", "c"]
|
|
19
|
-
* parseCommaSeparatedList("a, , b"); // ["a", "b"]
|
|
26
|
+
* Allows only valid HTTP or HTTPS URLs.
|
|
20
27
|
*/
|
|
21
|
-
export declare function
|
|
22
|
-
export declare function getValuesByDotKeys(obj: Record<string, unknown>, keys: string[]): Record<string, unknown>;
|
|
28
|
+
export declare function isValidHttpUrl(urlString: string): boolean;
|
|
23
29
|
//# sourceMappingURL=generic.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generic.d.ts","sourceRoot":"","sources":["../../src/utils/generic.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH
|
|
1
|
+
{"version":3,"file":"generic.d.ts","sourceRoot":"","sources":["../../src/utils/generic.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAKhE;AAED;;;;;;;;;GASG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAqBxG;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAWzD"}
|
package/dist/utils/generic.js
CHANGED
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Recursively gets the value in a nested object for each key in the keys array.
|
|
3
|
-
* Each key can be a dot-separated path (e.g. 'a.b.c').
|
|
4
|
-
* Returns an object mapping each key to its resolved value (or undefined if not found).
|
|
5
|
-
*
|
|
6
|
-
* @example
|
|
7
|
-
* const obj = { a: { b: { c: 42 } }, nested: { d: 100 } };
|
|
8
|
-
* const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']);
|
|
9
|
-
* value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } }
|
|
10
|
-
*/
|
|
11
1
|
/**
|
|
12
2
|
* Parses a comma-separated string into an array of trimmed strings.
|
|
13
3
|
* Empty strings are filtered out after trimming.
|
|
@@ -24,6 +14,16 @@ export function parseCommaSeparatedList(input) {
|
|
|
24
14
|
}
|
|
25
15
|
return input.split(',').map((s) => s.trim()).filter((s) => s.length > 0);
|
|
26
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* Recursively gets the value in a nested object for each key in the keys array.
|
|
19
|
+
* Each key can be a dot-separated path (e.g. 'a.b.c').
|
|
20
|
+
* Returns an object mapping each key to its resolved value (or undefined if not found).
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* const obj = { a: { b: { c: 42 } }, nested: { d: 100 } };
|
|
24
|
+
* const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']);
|
|
25
|
+
* value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } }
|
|
26
|
+
*/
|
|
27
27
|
export function getValuesByDotKeys(obj, keys) {
|
|
28
28
|
const result = {};
|
|
29
29
|
for (const key of keys) {
|
|
@@ -45,4 +45,22 @@ export function getValuesByDotKeys(obj, keys) {
|
|
|
45
45
|
}
|
|
46
46
|
return result;
|
|
47
47
|
}
|
|
48
|
+
/**
|
|
49
|
+
* Validates whether a given string is a well-formed URL.
|
|
50
|
+
*
|
|
51
|
+
* Allows only valid HTTP or HTTPS URLs.
|
|
52
|
+
*/
|
|
53
|
+
export function isValidHttpUrl(urlString) {
|
|
54
|
+
if (!urlString.startsWith('http://') && !urlString.startsWith('https://')) {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
try {
|
|
58
|
+
/* eslint-disable no-new */
|
|
59
|
+
new URL(urlString);
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
48
66
|
//# sourceMappingURL=generic.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generic.js","sourceRoot":"","sources":["../../src/utils/generic.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH
|
|
1
|
+
{"version":3,"file":"generic.js","sourceRoot":"","sources":["../../src/utils/generic.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAc;IAClD,IAAI,CAAC,KAAK,EAAE,CAAC;QACT,OAAO,EAAE,CAAC;IACd,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAC7E,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAA4B,EAAE,IAAc;IAC3E,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,OAAO,GAAY,GAAG,CAAC;QAC3B,KAAK,MAAM,OAAO,IAAI,IAAI,EAAE,CAAC;YACzB,IACI,OAAO,KAAK,IAAI;mBACb,OAAO,OAAO,KAAK,QAAQ;mBAC3B,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAC3D,CAAC;gBACC,qDAAqD;gBACrD,OAAO,GAAI,OAAmC,CAAC,OAAO,CAAC,CAAC;YAC5D,CAAC;iBAAM,CAAC;gBACJ,OAAO,GAAG,SAAS,CAAC;gBACpB,MAAM;YACV,CAAC;QACL,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC;IAC1B,CAAC;IACD,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,SAAiB;IAC5C,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QACxE,OAAO,KAAK,CAAC;IACjB,CAAC;IACD,IAAI,CAAC;QACD,2BAA2B;QAC3B,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;QACnB,OAAO,IAAI,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACL,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strips HTML and keeps only the structure.
|
|
3
|
+
*
|
|
4
|
+
* Removes styles, scripts, and other non-content elements.
|
|
5
|
+
* Collapses whitespace and trims the result.
|
|
6
|
+
* Keeps only href, src, alt, id, class, title, name, data-* attributes.
|
|
7
|
+
* Removes HTML comments and spaces between tags.
|
|
8
|
+
* Removes base64 encoded images.
|
|
9
|
+
*/
|
|
10
|
+
export declare function stripHtml(html: string): string;
|
|
11
|
+
//# sourceMappingURL=html.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../src/utils/html.ts"],"names":[],"mappings":"AAWA;;;;;;;;GAQG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAuC9C"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
/**
|
|
3
|
+
* Strips HTML and keeps only the structure.
|
|
4
|
+
*
|
|
5
|
+
* Removes styles, scripts, and other non-content elements.
|
|
6
|
+
* Collapses whitespace and trims the result.
|
|
7
|
+
* Keeps only href, src, alt, id, class, title, name, data-* attributes.
|
|
8
|
+
* Removes HTML comments and spaces between tags.
|
|
9
|
+
* Removes base64 encoded images.
|
|
10
|
+
*/
|
|
11
|
+
export function stripHtml(html) {
|
|
12
|
+
const $ = cheerio.load(html);
|
|
13
|
+
// Remove all attributes except href (only on a), src, alt, id, class, title, name, data-*
|
|
14
|
+
const allowedAttrs = ['href', 'src', 'alt', 'id', 'class', 'title', 'name'];
|
|
15
|
+
$('*').each((_, element) => {
|
|
16
|
+
const { attribs } = element;
|
|
17
|
+
if (attribs) {
|
|
18
|
+
Object.keys(attribs).forEach((attr) => {
|
|
19
|
+
if (attr === 'href' && element.tagName !== 'a') {
|
|
20
|
+
$(element).removeAttr(attr);
|
|
21
|
+
}
|
|
22
|
+
else if (!allowedAttrs.includes(attr) && !attr.startsWith('data-')) {
|
|
23
|
+
$(element).removeAttr(attr);
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
});
|
|
28
|
+
// Remove <style>, <script>, <noscript>, <iframe>, <svg>, <canvas>, <math> tags and their content
|
|
29
|
+
$('style, script, noscript, iframe, svg, canvas, math').remove();
|
|
30
|
+
// Remove HTML comments
|
|
31
|
+
$('*').contents().filter((_, element) => element.type === 'comment').remove();
|
|
32
|
+
// Remove base64 encoded images
|
|
33
|
+
$('img[src^="data:image/"]').remove();
|
|
34
|
+
let result;
|
|
35
|
+
if (html.trim() === '') {
|
|
36
|
+
result = '';
|
|
37
|
+
}
|
|
38
|
+
else if (html.includes('<html')) {
|
|
39
|
+
result = $.html();
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
result = $('body').html() || '';
|
|
43
|
+
}
|
|
44
|
+
// Collapse multiple spaces into one, remove spaces between tags, and trim
|
|
45
|
+
result = result.replace(/\s+/g, ' ').replace(/>\s+</g, '><').trim();
|
|
46
|
+
return result;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=html.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html.js","sourceRoot":"","sources":["../../src/utils/html.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAWnC;;;;;;;;GAQG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IAClC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,0FAA0F;IAC1F,MAAM,YAAY,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;IAC5E,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QACvB,MAAM,EAAE,OAAO,EAAE,GAAI,OAA8B,CAAC;QACpD,IAAI,OAAO,EAAE,CAAC;YACV,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;gBAClC,IAAI,IAAI,KAAK,MAAM,IAAK,OAA8B,CAAC,OAAO,KAAK,GAAG,EAAE,CAAC;oBACrE,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAChC,CAAC;qBAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;oBACnE,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAChC,CAAC;YACL,CAAC,CAAC,CAAC;QACP,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,iGAAiG;IACjG,CAAC,CAAC,oDAAoD,CAAC,CAAC,MAAM,EAAE,CAAC;IAEjE,uBAAuB;IACvB,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,CAAE,OAAoB,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,MAAM,EAAE,CAAC;IAE5F,+BAA+B;IAC/B,CAAC,CAAC,yBAAyB,CAAC,CAAC,MAAM,EAAE,CAAC;IAEtC,IAAI,MAAM,CAAC;IACX,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrB,MAAM,GAAG,EAAE,CAAC;IAChB,CAAC;SAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACtB,CAAC;SAAM,CAAC;QACJ,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC;IAED,0EAA0E;IAC1E,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IACpE,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../../src/utils/mcp.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE;;;;;EAI/C"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp.js","sourceRoot":"","sources":["../../src/utils/mcp.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAe;IAC5C,OAAO;QACH,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;KACzD,CAAC;AACN,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@apify/actors-mcp-server",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.11",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Apify MCP Server",
|
|
6
|
+
"mcpName": "com.apify/apify-mcp-server",
|
|
6
7
|
"engines": {
|
|
7
8
|
"node": ">=18.0.0"
|
|
8
9
|
},
|
|
@@ -38,11 +39,13 @@
|
|
|
38
39
|
"@apify/datastructures": "^2.0.3",
|
|
39
40
|
"@apify/log": "^2.5.16",
|
|
40
41
|
"@modelcontextprotocol/sdk": "^1.17.4",
|
|
42
|
+
"@types/cheerio": "^0.22.35",
|
|
41
43
|
"@types/turndown": "^5.0.5",
|
|
42
44
|
"ajv": "^8.17.1",
|
|
43
45
|
"algoliasearch": "^5.31.0",
|
|
44
46
|
"apify": "^3.4.2",
|
|
45
47
|
"apify-client": "^2.12.6",
|
|
48
|
+
"cheerio": "^1.1.2",
|
|
46
49
|
"express": "^4.21.2",
|
|
47
50
|
"to-json-schema": "^0.2.5",
|
|
48
51
|
"turndown": "^7.2.0",
|
|
@@ -71,6 +74,7 @@
|
|
|
71
74
|
"start": "npm run start:dev",
|
|
72
75
|
"start:prod": "node dist/main.js",
|
|
73
76
|
"start:dev": "tsx src/main.ts",
|
|
77
|
+
"start:standby": "APIFY_META_ORIGIN=STANDBY npm run start",
|
|
74
78
|
"lint": "eslint .",
|
|
75
79
|
"lint:fix": "eslint . --fix",
|
|
76
80
|
"build": "tsc -b src",
|