@apify/actors-mcp-server 0.9.11-beta.2 → 0.9.12-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/manifest.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "manifest_version": "0.2",
3
3
  "name": "apify-mcp-server",
4
4
  "display_name": "Apify",
5
- "version": "0.9.11",
5
+ "version": "0.9.12",
6
6
  "description": "Extract data from any website with thousands of scrapers, crawlers, and automations on Apify Store.",
7
7
  "long_description": "Apify is the world's largest marketplace of tools for web scraping, data extraction, and web automation. You can extract structured data from social media, e-commerce, search engines, maps, travel sites, or any other website.",
8
8
  "keywords": [
@@ -88,7 +88,7 @@
88
88
  "linux"
89
89
  ],
90
90
  "runtimes": {
91
- "node": ">=22.0.0"
91
+ "node": ">=18.0.0"
92
92
  }
93
93
  }
94
94
  }
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "@apify/actors-mcp-server",
3
- "version": "0.9.11-beta.2",
3
+ "version": "0.9.12-beta.0",
4
4
  "type": "module",
5
5
  "description": "Apify MCP Server",
6
6
  "mcpName": "com.apify/apify-mcp-server",
7
7
  "engines": {
8
- "node": ">=22.0.0"
8
+ "node": ">=18.0.0"
9
9
  },
10
10
  "main": "dist/index.js",
11
11
  "exports": {
@@ -47,13 +47,11 @@
47
47
  "@modelcontextprotocol/sdk": "^1.25.2",
48
48
  "@segment/analytics-node": "^2.3.0",
49
49
  "@sentry/node": "^10.38.0",
50
- "@types/cheerio": "^0.22.35",
51
50
  "@types/turndown": "^5.0.5",
52
51
  "ajv": "^8.17.1",
53
52
  "algoliasearch": "^5.31.0",
54
53
  "apify": "^3.4.2",
55
54
  "apify-client": "^2.22.1",
56
- "cheerio": "^1.1.2",
57
55
  "dotenv": "^16.4.7",
58
56
  "express": "^4.21.2",
59
57
  "mcp-client-capabilities": "^0.0.5",
package/server.json CHANGED
@@ -6,7 +6,7 @@
6
6
  "url": "https://github.com/apify/apify-mcp-server",
7
7
  "source": "github"
8
8
  },
9
- "version": "0.9.11",
9
+ "version": "0.9.12",
10
10
  "remotes": [
11
11
  {
12
12
  "type": "streamable-http",
@@ -1,3 +0,0 @@
1
- import type { ToolEntry } from '../../types.js';
2
- export declare const getHtmlSkeleton: ToolEntry;
3
- //# sourceMappingURL=get_html_skeleton.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"get_html_skeleton.d.ts","sourceRoot":"","sources":["../../../src/tools/common/get_html_skeleton.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAoB,SAAS,EAAmB,MAAM,gBAAgB,CAAC;AAgCnF,eAAO,MAAM,eAAe,EAAE,SAyFnB,CAAC"}
@@ -1,102 +0,0 @@
1
- import { z } from 'zod';
2
- import { ApifyClient } from '../../apify_client.js';
3
- import { HelperTools, RAG_WEB_BROWSER, TOOL_MAX_OUTPUT_CHARS, TOOL_STATUS } from '../../const.js';
4
- import { getHtmlSkeletonCache } from '../../state.js';
5
- import { compileSchema } from '../../utils/ajv.js';
6
- import { isValidHttpUrl } from '../../utils/generic.js';
7
- import { stripHtml } from '../../utils/html.js';
8
- import { buildMCPResponse } from '../../utils/mcp.js';
9
- const getHtmlSkeletonArgs = z.object({
10
- url: z.string()
11
- .min(1)
12
- .describe('URL of the webpage to retrieve HTML skeleton from.'),
13
- enableJavascript: z.boolean()
14
- .optional()
15
- .default(false)
16
- .describe('Whether to enable JavaScript rendering. Enabling this may increase the time taken to retrieve the HTML skeleton.'),
17
- chunk: z.number()
18
- .optional()
19
- .default(1)
20
- .describe('Chunk number to retrieve when getting the content. The content is split into chunks to prevent exceeding the maximum tool output length.'),
21
- });
22
- export const getHtmlSkeleton = Object.freeze({
23
- type: 'internal',
24
- name: HelperTools.GET_HTML_SKELETON,
25
- description: `Retrieve the HTML skeleton (clean structure) of a webpage by stripping scripts, styles, and non-essential attributes.
26
- This keeps the core HTML structure, links, images, and data attributes for analysis. Supports optional JavaScript rendering for dynamic pages.
27
-
28
- The results will include a chunked HTML skeleton if the content is large. Use the chunk parameter to paginate through the output.
29
-
30
- USAGE:
31
- - Use when you need a clean HTML structure to design selectors or parsers for scraping.
32
-
33
- USAGE EXAMPLES:
34
- - user_input: Get HTML skeleton for https://example.com
35
- - user_input: Get next chunk of HTML skeleton for https://example.com (chunk=2)`,
36
- inputSchema: z.toJSONSchema(getHtmlSkeletonArgs),
37
- ajvValidate: compileSchema(z.toJSONSchema(getHtmlSkeletonArgs)),
38
- annotations: {
39
- title: 'Get HTML skeleton',
40
- readOnlyHint: true,
41
- destructiveHint: false,
42
- idempotentHint: true,
43
- openWorldHint: true,
44
- },
45
- call: async (toolArgs) => {
46
- const { args, apifyToken } = toolArgs;
47
- const parsed = getHtmlSkeletonArgs.parse(args);
48
- if (!isValidHttpUrl(parsed.url)) {
49
- return buildMCPResponse({
50
- texts: [`The provided URL is not a valid HTTP or HTTPS URL: ${parsed.url}`],
51
- isError: true,
52
- toolStatus: TOOL_STATUS.SOFT_FAIL,
53
- });
54
- }
55
- // Try to get from cache first
56
- let strippedHtml = getHtmlSkeletonCache.get(parsed.url);
57
- if (!strippedHtml) {
58
- // Not in cache, call the Actor for scraping
59
- const client = new ApifyClient({ token: apifyToken });
60
- const run = await client.actor(RAG_WEB_BROWSER).call({
61
- query: parsed.url,
62
- outputFormats: [
63
- 'html',
64
- ],
65
- scrapingTool: parsed.enableJavascript ? 'browser-playwright' : 'raw-http',
66
- });
67
- const datasetItems = await client.dataset(run.defaultDatasetId).listItems();
68
- if (datasetItems.items.length === 0) {
69
- return buildMCPResponse({
70
- texts: [`The scraping Actor (${RAG_WEB_BROWSER}) did not return any output for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`],
71
- isError: true,
72
- });
73
- }
74
- const firstItem = datasetItems.items[0];
75
- if (firstItem.crawl.httpStatusMessage.toLocaleLowerCase() !== 'ok') {
76
- return buildMCPResponse({
77
- texts: [`The scraping Actor (${RAG_WEB_BROWSER}) returned an HTTP status ${firstItem.crawl.httpStatusCode} (${firstItem.crawl.httpStatusMessage}) for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`],
78
- isError: true,
79
- });
80
- }
81
- if (!firstItem.html) {
82
- return buildMCPResponse({
83
- texts: [`The scraping Actor (${RAG_WEB_BROWSER}) did not return any HTML content for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`],
84
- isError: true,
85
- });
86
- }
87
- strippedHtml = stripHtml(firstItem.html);
88
- getHtmlSkeletonCache.set(parsed.url, strippedHtml);
89
- }
90
- // Pagination logic
91
- const totalLength = strippedHtml.length;
92
- const chunkSize = TOOL_MAX_OUTPUT_CHARS;
93
- const totalChunks = Math.ceil(totalLength / chunkSize);
94
- const startIndex = (parsed.chunk - 1) * chunkSize;
95
- const endIndex = Math.min(startIndex + chunkSize, totalLength);
96
- const chunkContent = strippedHtml.slice(startIndex, endIndex);
97
- const hasNextChunk = parsed.chunk < totalChunks;
98
- const chunkInfo = `\n\n--- Chunk ${parsed.chunk} of ${totalChunks} ---\n${hasNextChunk ? `Next chunk: ${parsed.chunk + 1}` : 'End of content'}`;
99
- return buildMCPResponse({ texts: [chunkContent + chunkInfo] });
100
- },
101
- });
102
- //# sourceMappingURL=get_html_skeleton.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"get_html_skeleton.js","sourceRoot":"","sources":["../../../src/tools/common/get_html_skeleton.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,qBAAqB,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAClG,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAEtD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AActD,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE;SACV,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACnE,gBAAgB,EAAE,CAAC,CAAC,OAAO,EAAE;SACxB,QAAQ,EAAE;SACV,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,kHAAkH,CAAC;IACjI,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;SACZ,QAAQ,EAAE;SACV,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,0IAA0I,CAAC;CAC5J,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,eAAe,GAAc,MAAM,CAAC,MAAM,CAAC;IACpD,IAAI,EAAE,UAAU;IAChB,IAAI,EAAE,WAAW,CAAC,iBAAiB;IACnC,WAAW,EAAE;;;;;;;;;;gFAU+D;IAC5E,WAAW,EAAE,CAAC,CAAC,YAAY,CAAC,mBAAmB,CAAoB;IACnE,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC,YAAY,CAAC,mBAAmB,CAAC,CAAC;IAC/D,WAAW,EAAE;QACT,KAAK,EAAE,mBAAmB;QAC1B,YAAY,EAAE,IAAI;QAClB,eAAe,EAAE,KAAK;QACtB,cAAc,EAAE,IAAI;QACpB,aAAa,EAAE,IAAI;KACtB;IACD,IAAI,EAAE,KAAK,EAAE,QAA0B,EAAE,EAAE;QACvC,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,QAAQ,CAAC;QACtC,MAAM,MAAM,GAAG,mBAAmB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/C,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9B,OAAO,gBAAgB,CAAC;gBACpB,KAAK,EAAE,CAAC,sDAAsD,MAAM,CAAC,GAAG,EAAE,CAAC;gBAC3E,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE,WAAW,CAAC,SAAS;aACpC,CAAC,CAAC;QACP,CAAC;QAED,8BAA8B;QAC9B,IAAI,YAAY,GAAG,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACxD,IAAI,CAAC,YAAY,EAAE,CAAC;YAChB,4CAA4C;YAC5C,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;YAEtD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC;gBACjD,KAAK,EAAE,MAAM,CAAC,GAAG;gBACjB,aAAa,EAAE;oBACX,MAAM;iBACT;gBACD,YAAY,EAAE,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,UAAU;aAC5E,CAAC,CAAC;YAEH,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC,SAAS,EAAE,CAAC;YAC5E,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAClC,OAAO,gBAAgB,CAAC;oBACpB,KAAK,EAAE,CAAC,uBAAuB,eAAe,4CAA4C,MAAM,CAAC,GAAG,kDAAkD,GAAG,CAAC,EAAE,EAAE,CAAC;oBAC/J,OAAO,EAAE,IAAI;iBAChB,CAAC,CAAC;YACP,CAAC;YAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,CAA+B,CAAC;YACtE,IAAI,SAAS,CAAC,KAAK,CAAC,iBAAiB,CAAC,iBAAiB,EAAE,KAAK,IAAI,EAAE,CAAC;gBACjE,OAAO,gBAAgB,CAAC;oBACpB,KAAK,EAAE,CAAC,uBAAuB,eAAe,6BAA6B,SAAS,CAAC,KAAK,CAAC,cAAc,KAAK,SAAS,CAAC,KAAK,CAAC,iBAAiB,kBAAkB,MAAM,CAAC,GAAG,kDAAkD,GAAG,CAAC,EAAE,EAAE,CAAC;oBACtO,OAAO,EAAE,IAAI;iBAChB,CAAC,CAAC;YACP,CAAC;YAED,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;gBAClB,OAAO,gBAAgB,CAAC;oBACpB,KAAK,EAAE,CAAC,uBAAuB,eAAe,kDAAkD,MAAM,CAAC,GAAG,kDAAkD,GAAG,CAAC,EAAE,EAAE,CAAC;oBACrK,OAAO,EAAE,IAAI;iBAChB,CAAC,CAAC;YACP,CAAC;YAED,YAAY,GAAG,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACzC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;QACvD,CAAC;QAED,mBAAmB;QACnB,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,CAAC;QACxC,MAAM,SAAS,GAAG,qBAAqB,CAAC;QACxC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC,CAAC;QACvD,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;QAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,SAAS,EAAE,WAAW,CAAC,CAAC;QAC/D,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAC9D,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,GAAG,WAAW,CAAC;QAEhD,MAAM,SAAS,GAAG,iBAAiB,MAAM,CAAC,KAAK,OAAO,WAAW,SAAS,YAAY,CAAC,CAAC,CAAC,eAAe,MAAM,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;QAEhJ,OAAO,gBAAgB,CAAC,EAAE,KAAK,EAAE,CAAC,YAAY,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;IACnE,CAAC;CACK,CAAC,CAAC"}
@@ -1,11 +0,0 @@
1
- /**
2
- * Strips HTML and keeps only the structure.
3
- *
4
- * Removes styles, scripts, and other non-content elements.
5
- * Collapses whitespace and trims the result.
6
- * Keeps only href, src, alt, id, class, title, name, data-* attributes.
7
- * Removes HTML comments and spaces between tags.
8
- * Removes base64 encoded images.
9
- */
10
- export declare function stripHtml(html: string): string;
11
- //# sourceMappingURL=html.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../src/utils/html.ts"],"names":[],"mappings":"AAWA;;;;;;;;GAQG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAuC9C"}
@@ -1,48 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- /**
3
- * Strips HTML and keeps only the structure.
4
- *
5
- * Removes styles, scripts, and other non-content elements.
6
- * Collapses whitespace and trims the result.
7
- * Keeps only href, src, alt, id, class, title, name, data-* attributes.
8
- * Removes HTML comments and spaces between tags.
9
- * Removes base64 encoded images.
10
- */
11
- export function stripHtml(html) {
12
- const $ = cheerio.load(html);
13
- // Remove all attributes except href (only on a), src, alt, id, class, title, name, data-*
14
- const allowedAttrs = ['href', 'src', 'alt', 'id', 'class', 'title', 'name'];
15
- $('*').each((_, element) => {
16
- const { attribs } = element;
17
- if (attribs) {
18
- Object.keys(attribs).forEach((attr) => {
19
- if (attr === 'href' && element.tagName !== 'a') {
20
- $(element).removeAttr(attr);
21
- }
22
- else if (!allowedAttrs.includes(attr) && !attr.startsWith('data-')) {
23
- $(element).removeAttr(attr);
24
- }
25
- });
26
- }
27
- });
28
- // Remove <style>, <script>, <noscript>, <iframe>, <svg>, <canvas>, <math> tags and their content
29
- $('style, script, noscript, iframe, svg, canvas, math').remove();
30
- // Remove HTML comments
31
- $('*').contents().filter((_, element) => element.type === 'comment').remove();
32
- // Remove base64 encoded images
33
- $('img[src^="data:image/"]').remove();
34
- let result;
35
- if (html.trim() === '') {
36
- result = '';
37
- }
38
- else if (html.includes('<html')) {
39
- result = $.html();
40
- }
41
- else {
42
- result = $('body').html() || '';
43
- }
44
- // Collapse multiple spaces into one, remove spaces between tags, and trim
45
- result = result.replace(/\s+/g, ' ').replace(/>\s+</g, '><').trim();
46
- return result;
47
- }
48
- //# sourceMappingURL=html.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"html.js","sourceRoot":"","sources":["../../src/utils/html.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAWnC;;;;;;;;GAQG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IAClC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,0FAA0F;IAC1F,MAAM,YAAY,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;IAC5E,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QACvB,MAAM,EAAE,OAAO,EAAE,GAAI,OAA8B,CAAC;QACpD,IAAI,OAAO,EAAE,CAAC;YACV,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;gBAClC,IAAI,IAAI,KAAK,MAAM,IAAK,OAA8B,CAAC,OAAO,KAAK,GAAG,EAAE,CAAC;oBACrE,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAChC,CAAC;qBAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;oBACnE,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAChC,CAAC;YACL,CAAC,CAAC,CAAC;QACP,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,iGAAiG;IACjG,CAAC,CAAC,oDAAoD,CAAC,CAAC,MAAM,EAAE,CAAC;IAEjE,uBAAuB;IACvB,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,CAAE,OAAoB,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,MAAM,EAAE,CAAC;IAE5F,+BAA+B;IAC/B,CAAC,CAAC,yBAAyB,CAAC,CAAC,MAAM,EAAE,CAAC;IAEtC,IAAI,MAAM,CAAC;IACX,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrB,MAAM,GAAG,EAAE,CAAC;IAChB,CAAC;SAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACtB,CAAC;SAAM,CAAC;QACJ,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC;IAED,0EAA0E;IAC1E,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IACpE,OAAO,MAAM,CAAC;AAClB,CAAC"}