@stubber/virtual-worker 1.5.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/commands/browser/browser_extract_html.js +6 -3
- package/app/commands/browser/browser_extract_markdown.js +45 -0
- package/app/commands/browser/browser_find.js +49 -0
- package/app/commands/browser/browser_wait.js +46 -0
- package/app/commands/index.js +11 -5
- package/app/commands/run_commands.js +5 -0
- package/devel/tests.sh +46 -2
- package/package.json +2 -1
|
@@ -22,11 +22,14 @@ export const browser_extract_html = async (params, stubber_context) => {
|
|
|
22
22
|
const { locator } = params || {};
|
|
23
23
|
|
|
24
24
|
if (locator) {
|
|
25
|
-
const target = page.locator(locator);
|
|
26
|
-
if (!
|
|
25
|
+
const target = await page.locator(locator).all();
|
|
26
|
+
if (!target.length) {
|
|
27
27
|
return create_error_conceptual({ message: "Locator did not match any elements", details: { locator } });
|
|
28
28
|
}
|
|
29
|
-
|
|
29
|
+
for (const element of target) {
|
|
30
|
+
const outer_html = await element.evaluate((el) => el.outerHTML);
|
|
31
|
+
html_content += outer_html; // Concatenate HTML of all matched elements
|
|
32
|
+
}
|
|
30
33
|
} else {
|
|
31
34
|
html_content = await page.content();
|
|
32
35
|
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { create_error_conceptual } from "#app/functions/create_error_conceptual.js";
|
|
2
|
+
import { create_error_technical } from "#app/functions/create_error_technical.js";
|
|
3
|
+
import { create_success } from "#root/app/functions/create_success.js";
|
|
4
|
+
import { get_chromium_page } from "../../helpers/get_chromium_page.js";
|
|
5
|
+
import { NodeHtmlMarkdown as node_html_md } from "node-html-markdown";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
*
|
|
9
|
+
* @param {Object} params
|
|
10
|
+
* @param {string} [params.locator] - Optional locator to extract HTML from a specific element
|
|
11
|
+
*/
|
|
12
|
+
export const browser_extract_markdown = async (params, stubber_context) => {
|
|
13
|
+
// Get the Chromium page
|
|
14
|
+
const page_result = await get_chromium_page(params, stubber_context);
|
|
15
|
+
if (!page_result.success) {
|
|
16
|
+
return page_result;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const page = page_result.payload;
|
|
20
|
+
let html_content = "";
|
|
21
|
+
let markdown_content = "";
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
const { locator } = params || {};
|
|
25
|
+
|
|
26
|
+
if (locator) {
|
|
27
|
+
const target = await page.locator(locator).all();
|
|
28
|
+
if (!target.length) {
|
|
29
|
+
return create_error_conceptual({ message: "Locator did not match any elements", details: { locator } });
|
|
30
|
+
}
|
|
31
|
+
for (const element of target) {
|
|
32
|
+
const outer_html = await element.evaluate((el) => el.outerHTML);
|
|
33
|
+
html_content += outer_html; // Concatenate HTML of all matched elements
|
|
34
|
+
}
|
|
35
|
+
markdown_content = node_html_md.translate(html_content);
|
|
36
|
+
} else {
|
|
37
|
+
html_content = await page.content();
|
|
38
|
+
markdown_content = node_html_md.translate(html_content);
|
|
39
|
+
}
|
|
40
|
+
} catch (error) {
|
|
41
|
+
return create_error_technical(error);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return create_success({ message: `Markdown content retrieved successfully`, payload: { markdown_content } });
|
|
45
|
+
};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { create_error_conceptual } from "#app/functions/create_error_conceptual.js";
|
|
2
|
+
import { create_error_technical } from "#app/functions/create_error_technical.js";
|
|
3
|
+
import { create_success } from "#root/app/functions/create_success.js";
|
|
4
|
+
import { get_chromium_page } from "../../helpers/get_chromium_page.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Used to find contents in the page and returns surrounding HTML.
|
|
8
|
+
* @param {Object} params
|
|
9
|
+
* @param {string|{pattern:string, flags?:string}} [params.text] - Optional find by text content
|
|
10
|
+
* @param {number} [params.parent_levels] - Optional number of parent levels to include in the result (default: 0, meaning only the element itself)
|
|
11
|
+
*/
|
|
12
|
+
export const browser_find = async (params, stubber_context) => {
|
|
13
|
+
// Get the Chromium page
|
|
14
|
+
const page_result = await get_chromium_page(params, stubber_context);
|
|
15
|
+
if (!page_result.success) {
|
|
16
|
+
return page_result;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const page = page_result.payload;
|
|
20
|
+
|
|
21
|
+
if (!params?.text) {
|
|
22
|
+
return create_error_conceptual({ message: "Missing required parameter: text" });
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
try {
|
|
26
|
+
const text = typeof params.text === "string" ? params.text : new RegExp(params.text.pattern, params.text.flags);
|
|
27
|
+
|
|
28
|
+
const locators = await page.getByText(text).all();
|
|
29
|
+
|
|
30
|
+
if (!locators.length) {
|
|
31
|
+
return create_error_conceptual({ message: "Text not found on the page", details: { text: params.text } });
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const results = [];
|
|
35
|
+
for (const locator of locators) {
|
|
36
|
+
let element = locator;
|
|
37
|
+
for (let i = 0; i < (params.parent_levels || 0); i++) {
|
|
38
|
+
element = await element.evaluateHandle((el) => el.parentElement);
|
|
39
|
+
if (!element) break;
|
|
40
|
+
}
|
|
41
|
+
const html_content = await element.evaluate((el) => el.outerHTML);
|
|
42
|
+
results.push({ html_content });
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return create_success({ message: `Found ${results.length} element(s) matching the text`, payload: { results } });
|
|
46
|
+
} catch (error) {
|
|
47
|
+
return create_error_technical(error);
|
|
48
|
+
}
|
|
49
|
+
};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { create_error_conceptual } from "#app/functions/create_error_conceptual.js";
|
|
2
|
+
import { create_error_technical } from "#app/functions/create_error_technical.js";
|
|
3
|
+
import { create_success } from "#root/app/functions/create_success.js";
|
|
4
|
+
import { get_chromium_page } from "../../helpers/get_chromium_page.js";
|
|
5
|
+
|
|
6
|
+
// * @param {string} [params.text] - wait for specific text to appear
|
|
7
|
+
// * @param {string} [params.url] - wait for URL pattern
|
|
8
|
+
// * @param {string} [params.function] - wait for a custom function to return true, `window.ready == true` etc
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
*
|
|
12
|
+
* @param {Object} params
|
|
13
|
+
* @param {string} [params.locator] - wait for a specific element to appear using a locator
|
|
14
|
+
* @param {number} [params.ms] - wait for a specific amount of milliseconds
|
|
15
|
+
* @param {string} [params.load_state] - wait for specific load state (e.g. "load", "domcontentloaded", "networkidle")
|
|
16
|
+
*/
|
|
17
|
+
export const browser_wait = async (params, stubber_context) => {
|
|
18
|
+
// Get the Chromium page
|
|
19
|
+
const page_result = await get_chromium_page(params, stubber_context);
|
|
20
|
+
if (!page_result.success) {
|
|
21
|
+
return page_result;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const page = page_result.payload;
|
|
25
|
+
|
|
26
|
+
if (params.locator) {
|
|
27
|
+
await page.waitForSelector(params.locator);
|
|
28
|
+
|
|
29
|
+
return create_success({ message: `Element matching locator "${params.locator}" is now present on the page` });
|
|
30
|
+
} else if (params.ms) {
|
|
31
|
+
await page.waitForTimeout(params.ms);
|
|
32
|
+
|
|
33
|
+
return create_success({ message: `Waited for ${params.ms} milliseconds` });
|
|
34
|
+
} else if (params.load_state) {
|
|
35
|
+
await page.waitForLoadState(params.load_state);
|
|
36
|
+
|
|
37
|
+
return create_success({ message: `Page reached load state "${params.load_state}"` });
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// return create_error_conceptual({ message: "Locator did not match any elements", details: { locator } });
|
|
41
|
+
// return create_error_technical(error);
|
|
42
|
+
|
|
43
|
+
return create_error_conceptual({
|
|
44
|
+
message: "No valid wait condition provided. Please specify a locator, ms, or load_state in the parameters.",
|
|
45
|
+
});
|
|
46
|
+
};
|
package/app/commands/index.js
CHANGED
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
import { browser_click } from "./browser/browser_click.js";
|
|
2
2
|
import { browser_extract_data } from "./browser/browser_extract_data.js";
|
|
3
|
+
import { browser_extract_html } from "./browser/browser_extract_html.js";
|
|
4
|
+
import { browser_get_clipboard_text } from "./browser/browser_get_clipboard_text.js";
|
|
3
5
|
import { browser_get_localstorage } from "./browser/browser_get_localstorage.js";
|
|
6
|
+
import { browser_manage_sessions } from "./browser/browser_manage_sessions.js";
|
|
4
7
|
import { browser_navigate } from "./browser/browser_navigate.js";
|
|
5
|
-
import { browser_write_text } from "./browser/browser_write_text.js";
|
|
6
8
|
import { browser_press_key } from "./browser/browser_press_key.js";
|
|
7
|
-
import { browser_extract_html } from "./browser/browser_extract_html.js";
|
|
8
9
|
import { browser_screenshot } from "./browser/browser_screenshot.js";
|
|
9
|
-
import {
|
|
10
|
-
import { browser_manage_sessions } from "./browser/browser_manage_sessions.js";
|
|
10
|
+
import { browser_write_text } from "./browser/browser_write_text.js";
|
|
11
11
|
import { cli_run } from "./cli/cli_run.js";
|
|
12
12
|
|
|
13
|
-
import { upload_files } from "./file-server/upload_files.js";
|
|
14
13
|
import { download_files } from "./file-server/download_files.js";
|
|
14
|
+
import { upload_files } from "./file-server/upload_files.js";
|
|
15
15
|
|
|
16
16
|
import { api_proxy } from "./api_proxy/api_proxy.js";
|
|
17
|
+
import { browser_extract_markdown } from "./browser/browser_extract_markdown.js";
|
|
18
|
+
import { browser_wait } from "./browser/browser_wait.js";
|
|
19
|
+
import { browser_find } from "./browser/browser_find.js";
|
|
17
20
|
|
|
18
21
|
const all_commands = {
|
|
19
22
|
browser_click,
|
|
@@ -23,9 +26,12 @@ const all_commands = {
|
|
|
23
26
|
browser_write_text,
|
|
24
27
|
browser_press_key,
|
|
25
28
|
browser_extract_html,
|
|
29
|
+
browser_extract_markdown,
|
|
26
30
|
browser_screenshot,
|
|
27
31
|
browser_get_clipboard_text,
|
|
28
32
|
browser_manage_sessions,
|
|
33
|
+
browser_wait,
|
|
34
|
+
browser_find,
|
|
29
35
|
|
|
30
36
|
cli_run,
|
|
31
37
|
|
|
@@ -31,6 +31,7 @@ export const run_commands = async (task, _stubber) => {
|
|
|
31
31
|
cloned_context.commands = payload.commands;
|
|
32
32
|
|
|
33
33
|
let important_command_failed = false;
|
|
34
|
+
let command_index = 0;
|
|
34
35
|
for (const [command_name, command] of command_entries) {
|
|
35
36
|
console.log(`Starting command: ${command_name}`, command);
|
|
36
37
|
|
|
@@ -79,6 +80,10 @@ export const run_commands = async (task, _stubber) => {
|
|
|
79
80
|
delete payload.commands[command_name].payload.attachments;
|
|
80
81
|
}
|
|
81
82
|
|
|
83
|
+
// ensure a __order key is set on the payload.commands[command_name] for clearer debugging
|
|
84
|
+
payload.commands[command_name].__order = command_index;
|
|
85
|
+
command_index++;
|
|
86
|
+
|
|
82
87
|
if (!payload.commands[command_name].success && !command.continue_on_error) {
|
|
83
88
|
important_command_failed = true;
|
|
84
89
|
break; // Stop execution if continue_on_error is falsy
|
package/devel/tests.sh
CHANGED
|
@@ -10,8 +10,52 @@ curl -X POST "http://localhost:3000/api/v1/task-gateway/virtual_worker_send_comm
|
|
|
10
10
|
"command_1": {
|
|
11
11
|
"commandtype": "browser_navigate",
|
|
12
12
|
"params": {
|
|
13
|
-
"url": "https://fast.com"
|
|
14
|
-
|
|
13
|
+
"url": "https://fast.com"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"command_2": {
|
|
17
|
+
"commandtype": "browser_wait",
|
|
18
|
+
"params": {
|
|
19
|
+
"ms": 2000
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"command_3": {
|
|
23
|
+
"commandtype": "browser_extract_markdown",
|
|
24
|
+
"params": {
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"_stubber": {
|
|
30
|
+
"orguuid": "c8cfd7f1-8015-43ff-8878-d22c136a2325",
|
|
31
|
+
"stubref": "my-stub-ref"
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}'
|
|
35
|
+
|
|
36
|
+
curl -X POST "http://localhost:3000/api/v1/task-gateway/virtual_worker_send_commands" \
|
|
37
|
+
-H "Content-Type: application/json" \
|
|
38
|
+
-H "stubber-virtual-worker-apikey: 123-456-789" \
|
|
39
|
+
-d '{
|
|
40
|
+
"task": {
|
|
41
|
+
"tasktype": "virtual_worker_send_commands",
|
|
42
|
+
"task_name": "virtual_worker_send_commands",
|
|
43
|
+
"params": {
|
|
44
|
+
"commands": {
|
|
45
|
+
"command_1": {
|
|
46
|
+
"commandtype": "browser_navigate",
|
|
47
|
+
"params": {
|
|
48
|
+
"url": "https://berkshirehathaway.com"
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"command_2": {
|
|
52
|
+
"commandtype": "browser_find",
|
|
53
|
+
"params": {
|
|
54
|
+
"text": {
|
|
55
|
+
"pattern": "Warren.*Buffett",
|
|
56
|
+
"flags": "i"
|
|
57
|
+
},
|
|
58
|
+
"parent_levels": 1
|
|
15
59
|
}
|
|
16
60
|
}
|
|
17
61
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stubber/virtual-worker",
|
|
3
|
-
"version": "1.5.
|
|
3
|
+
"version": "1.5.3",
|
|
4
4
|
"description": "Template to easily create a node app and keep development standards",
|
|
5
5
|
"main": "app.js",
|
|
6
6
|
"directories": {
|
|
@@ -38,6 +38,7 @@
|
|
|
38
38
|
"lodash-es": "^4.17.21",
|
|
39
39
|
"mime-types": "^3.0.1",
|
|
40
40
|
"net": "^1.0.2",
|
|
41
|
+
"node-html-markdown": "^2.0.0",
|
|
41
42
|
"playwright": "^1.53.0",
|
|
42
43
|
"socket.io-client": "^4.8.1",
|
|
43
44
|
"uuid": "^9.0.0",
|