@intuned/browser-dev 0.1.4-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/BROWSER_SCRIPTS_SETUP.md +84 -0
- package/LICENSE +43 -0
- package/README.md +160 -0
- package/RELEASE.md +60 -0
- package/dist/ai/export.d.js +5 -0
- package/dist/ai/export.d.ts +641 -0
- package/dist/ai/extractStructuredData.js +320 -0
- package/dist/ai/extractStructuredDataUsingAi.js +142 -0
- package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
- package/dist/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/ai/index.d.ts +641 -0
- package/dist/ai/index.js +19 -0
- package/dist/ai/isPageLoaded.js +80 -0
- package/dist/ai/prompt.js +39 -0
- package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
- package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
- package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
- package/dist/ai/tools/index.js +48 -0
- package/dist/ai/types/errors.js +67 -0
- package/dist/ai/types/models.js +45 -0
- package/dist/ai/types/types.js +48 -0
- package/dist/ai/validators.js +167 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +32 -0
- package/dist/common/ensureBrowserScripts.js +14 -0
- package/dist/common/extendedTest.js +157 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +57 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/script.js +2602 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/clickUntilExhausted.js +85 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +5 -0
- package/dist/helpers/export.d.ts +1220 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +98 -0
- package/dist/helpers/index.d.ts +1220 -0
- package/dist/helpers/index.js +122 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +64 -0
- package/dist/helpers/sanitizeHtml.js +74 -0
- package/dist/helpers/saveFileToS3.js +50 -0
- package/dist/helpers/scrollToLoadContent.js +57 -0
- package/dist/helpers/tests/testClickUntilExhausted.spec.js +372 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +206 -0
- package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
- package/dist/helpers/types/Attachment.js +115 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +154 -0
- package/dist/helpers/utils/getS3Client.js +22 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +103 -0
- package/dist/helpers/waitForDomSettled.js +90 -0
- package/dist/helpers/withNetworkSettledWait.js +91 -0
- package/dist/index.d.js +16 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +16 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +143 -0
- package/dist/intunedServices/ApiGateway/factory.js +16 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +355 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +269 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +146 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +130 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +160 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +243 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/types/intuned-runtime.d.js +1 -0
- package/dist/types/intuned-runtime.d.ts +64 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
- package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
- package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
- package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
- package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
- package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
- package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
- package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
- package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
- package/generated-docs/helpers/functions/processDate.mdx +55 -0
- package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
- package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
- package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
- package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
- package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
- package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
- package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
- package/how-to-generate-docs.md +61 -0
- package/how-to-run-tests.md +42 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +124 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.isTableHeaderOrFooter = isTableHeaderOrFooter;
|
|
7
|
+
var _anthropicModel = require("../models/anthropicModel");
|
|
8
|
+
var _zod = require("zod");
|
|
9
|
+
var _neverthrow = require("neverthrow");
|
|
10
|
+
var Errors = _interopRequireWildcard(require("../types/errors"));
|
|
11
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
12
|
+
async function isTableHeaderOrFooter(content) {
|
|
13
|
+
var _response$error;
|
|
14
|
+
if (!content) {
|
|
15
|
+
return (0, _neverthrow.ok)({
|
|
16
|
+
isHeader: false
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
const anthropic = (0, _anthropicModel.createAnthropicInstance)();
|
|
20
|
+
const itemContent = typeof content === "string" ? {
|
|
21
|
+
type: "text",
|
|
22
|
+
text: content
|
|
23
|
+
} : {
|
|
24
|
+
type: "image",
|
|
25
|
+
source: {
|
|
26
|
+
data: content.toString("base64"),
|
|
27
|
+
media_type: "image/png",
|
|
28
|
+
type: "base64"
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
const response = await (0, _neverthrow.fromPromise)(anthropic.messages.create({
|
|
32
|
+
max_tokens: 4096,
|
|
33
|
+
temperature: 0,
|
|
34
|
+
messages: [{
|
|
35
|
+
role: "user",
|
|
36
|
+
content: [{
|
|
37
|
+
type: "text",
|
|
38
|
+
text: `given the following content, you need to decide if the content is a html table header or a table footer,
|
|
39
|
+
a table header is a row that contains labels for table columns, and footer usually has pagination information or summary of the table`
|
|
40
|
+
}, itemContent]
|
|
41
|
+
}],
|
|
42
|
+
model: "claude-3-haiku-20240307",
|
|
43
|
+
tools: [{
|
|
44
|
+
input_schema: {
|
|
45
|
+
type: "object",
|
|
46
|
+
properties: {
|
|
47
|
+
isTableHeaderOrFooter: {
|
|
48
|
+
type: "boolean"
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
required: ["isTableHeaderOrFooter"]
|
|
52
|
+
},
|
|
53
|
+
name: "is_table_header_or_footer",
|
|
54
|
+
description: `given a text or image content, decide if the content is a table header or footer or not.`
|
|
55
|
+
}]
|
|
56
|
+
}), error => {
|
|
57
|
+
const typedError = error;
|
|
58
|
+
if (typedError.status === 449) return Errors.insufficientAiCredits(`🔴 ${typedError.error.error}`);
|
|
59
|
+
});
|
|
60
|
+
if (response.isErr() && ((_response$error = response.error) === null || _response$error === void 0 ? void 0 : _response$error.type) === "InsufficientAiCredits") {
|
|
61
|
+
return (0, _neverthrow.err)(response.error);
|
|
62
|
+
}
|
|
63
|
+
if (response.isErr()) {
|
|
64
|
+
return (0, _neverthrow.ok)({
|
|
65
|
+
isHeader: false
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
const unwrappedResponse = response.value;
|
|
69
|
+
const tool = unwrappedResponse.content.find(t => t.type === "tool_use");
|
|
70
|
+
if (!tool) return (0, _neverthrow.ok)({
|
|
71
|
+
isHeader: false
|
|
72
|
+
});
|
|
73
|
+
const headersParsingResults = _zod.z.object({
|
|
74
|
+
isTableHeaderOrFooter: _zod.z.boolean()
|
|
75
|
+
}).safeParse(tool.input);
|
|
76
|
+
if (!headersParsingResults.success) {
|
|
77
|
+
return (0, _neverthrow.ok)({
|
|
78
|
+
isHeader: false
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
return (0, _neverthrow.ok)({
|
|
82
|
+
isHeader: headersParsingResults.data.isTableHeaderOrFooter
|
|
83
|
+
});
|
|
84
|
+
}
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.MatchSource = exports.MatchMode = void 0;
|
|
7
|
+
exports.filterAndRankMatches = filterAndRankMatches;
|
|
8
|
+
exports.matchStringsWithDomContent = matchStringsWithDomContent;
|
|
9
|
+
exports.matchStringsWithDomContentInBrowser = matchStringsWithDomContentInBrowser;
|
|
10
|
+
exports.normalizeSpacing = normalizeSpacing;
|
|
11
|
+
exports.rankMatch = rankMatch;
|
|
12
|
+
exports.removePunctuationAndSpaces = removePunctuationAndSpaces;
|
|
13
|
+
exports.replaceWithBestMatches = replaceWithBestMatches;
|
|
14
|
+
exports.selectBestMatch = selectBestMatch;
|
|
15
|
+
var _levenshtein = require("../../../common/matching/levenshtein");
|
|
16
|
+
var _ensureBrowserScripts = require("../../../common/ensureBrowserScripts");
|
|
17
|
+
let MatchMode = exports.MatchMode = function (MatchMode) {
|
|
18
|
+
MatchMode["FULL"] = "full";
|
|
19
|
+
MatchMode["PARTIAL"] = "partial";
|
|
20
|
+
MatchMode["FUZZY"] = "fuzzy";
|
|
21
|
+
return MatchMode;
|
|
22
|
+
}({});
|
|
23
|
+
let MatchSource = exports.MatchSource = function (MatchSource) {
|
|
24
|
+
MatchSource["ATTRIBUTE"] = "attribute";
|
|
25
|
+
MatchSource["TEXT_CONTENT"] = "text_content";
|
|
26
|
+
MatchSource["DIRECT_TEXT_NODE"] = "direct_text_node";
|
|
27
|
+
return MatchSource;
|
|
28
|
+
}({});
|
|
29
|
+
function normalizeSpacing(text) {
|
|
30
|
+
let normalized = text.replace(/\n/g, " ").replace(/\t/g, " ");
|
|
31
|
+
normalized = normalized.split(/\s+/).join(" ");
|
|
32
|
+
return normalized.trim();
|
|
33
|
+
}
|
|
34
|
+
function removePunctuationAndSpaces(s) {
|
|
35
|
+
return s.replace(/[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~\s]/g, "");
|
|
36
|
+
}
|
|
37
|
+
function rankMatch(original, match) {
|
|
38
|
+
try {
|
|
39
|
+
const normalizedOriginal = normalizeSpacing(original).toLowerCase();
|
|
40
|
+
const normalizedMatch = normalizeSpacing(match).toLowerCase();
|
|
41
|
+
const ratio = (0, _levenshtein.levenshteinRatio)(normalizedOriginal, normalizedMatch);
|
|
42
|
+
const lenOriginal = normalizedOriginal.length;
|
|
43
|
+
if (lenOriginal > 20 && ratio > 0.85) {
|
|
44
|
+
return "HIGH";
|
|
45
|
+
}
|
|
46
|
+
const normalizedOriginalNoPunct = removePunctuationAndSpaces(normalizedOriginal);
|
|
47
|
+
const normalizedMatchNoPunct = removePunctuationAndSpaces(normalizedMatch);
|
|
48
|
+
if (normalizedOriginalNoPunct === normalizedMatchNoPunct) {
|
|
49
|
+
return "HIGH";
|
|
50
|
+
}
|
|
51
|
+
return "LOW";
|
|
52
|
+
} catch (error) {
|
|
53
|
+
console.warn("Error in rankMatch, falling back to simple comparison", error);
|
|
54
|
+
const normalizedOriginal = removePunctuationAndSpaces(normalizeSpacing(original).toLowerCase());
|
|
55
|
+
const normalizedMatch = removePunctuationAndSpaces(normalizeSpacing(match).toLowerCase());
|
|
56
|
+
if (normalizedOriginal === normalizedMatch) {
|
|
57
|
+
return "HIGH";
|
|
58
|
+
}
|
|
59
|
+
return "LOW";
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
function selectBestMatch(original, matches) {
|
|
63
|
+
const exactMatches = matches.filter(match => match.match_mode !== MatchMode.FUZZY);
|
|
64
|
+
if (exactMatches.length > 0) {
|
|
65
|
+
return exactMatches[0].matched_value;
|
|
66
|
+
}
|
|
67
|
+
const fuzzyMatches = matches.filter(match => match.match_mode === MatchMode.FUZZY);
|
|
68
|
+
const rankedFuzzyMatches = fuzzyMatches.map(match => ({
|
|
69
|
+
match,
|
|
70
|
+
rank: rankMatch(original, match.matched_value)
|
|
71
|
+
})).filter(({
|
|
72
|
+
rank
|
|
73
|
+
}) => rank === "HIGH");
|
|
74
|
+
if (rankedFuzzyMatches.length > 0) {
|
|
75
|
+
const sortedFuzzyMatches = rankedFuzzyMatches.sort((a, b) => {
|
|
76
|
+
const distA = a.match.fuzzy_distance ?? Infinity;
|
|
77
|
+
const distB = b.match.fuzzy_distance ?? Infinity;
|
|
78
|
+
return distA - distB;
|
|
79
|
+
});
|
|
80
|
+
return sortedFuzzyMatches[0].match.matched_value;
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
async function matchStringsWithDomContent(pageObject, stringsList, container) {
|
|
85
|
+
try {
|
|
86
|
+
await (0, _ensureBrowserScripts.ensureBrowserScripts)(pageObject);
|
|
87
|
+
let handle;
|
|
88
|
+
if (container) {
|
|
89
|
+
handle = container;
|
|
90
|
+
} else {
|
|
91
|
+
handle = await pageObject.locator("html").elementHandle();
|
|
92
|
+
}
|
|
93
|
+
const matches = await pageObject.evaluate(async ([container, searchTexts]) => {
|
|
94
|
+
try {
|
|
95
|
+
if (typeof window.__INTUNED__ !== "undefined" && typeof window.__INTUNED__.matchStringsWithDomContent === "function") {
|
|
96
|
+
return await window.__INTUNED__.matchStringsWithDomContent(container, searchTexts);
|
|
97
|
+
} else {
|
|
98
|
+
return searchTexts.reduce((acc, text) => {
|
|
99
|
+
acc[text] = [];
|
|
100
|
+
return acc;
|
|
101
|
+
}, {});
|
|
102
|
+
}
|
|
103
|
+
} catch (error) {
|
|
104
|
+
console.error("Error matching strings with DOM content:", error);
|
|
105
|
+
return searchTexts.reduce((acc, text) => {
|
|
106
|
+
acc[text] = [];
|
|
107
|
+
return acc;
|
|
108
|
+
}, {});
|
|
109
|
+
}
|
|
110
|
+
}, [handle, stringsList]);
|
|
111
|
+
return matches;
|
|
112
|
+
} catch (e) {
|
|
113
|
+
console.warn("Error matching strings with DOM content:", e);
|
|
114
|
+
return stringsList.reduce((acc, string) => {
|
|
115
|
+
acc[string] = [];
|
|
116
|
+
return acc;
|
|
117
|
+
}, {});
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
async function replaceWithBestMatches(stringsToMatch, pageObject, container) {
|
|
121
|
+
const matchesMap = await matchStringsWithDomContent(pageObject, stringsToMatch, container);
|
|
122
|
+
return Object.fromEntries(Object.entries(matchesMap).map(([string, matches]) => [string, matches.length > 0 ? selectBestMatch(string, matches) : string]));
|
|
123
|
+
}
|
|
124
|
+
async function filterAndRankMatches(frame, matches) {
|
|
125
|
+
const filteredMatches = matches.filter(match => {
|
|
126
|
+
const xpath = match.xpath;
|
|
127
|
+
return !(xpath.includes("[name()='svg']/") || xpath.includes("/path") || xpath.includes("/style"));
|
|
128
|
+
});
|
|
129
|
+
async function getVisibility(match) {
|
|
130
|
+
try {
|
|
131
|
+
const locator = frame.locator(`xpath=${match.xpath}`);
|
|
132
|
+
return await locator.isVisible({
|
|
133
|
+
timeout: 100
|
|
134
|
+
});
|
|
135
|
+
} catch {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
const visibilityPromises = filteredMatches.map(match => getVisibility(match));
|
|
140
|
+
const visibilityResults = await Promise.all(visibilityPromises);
|
|
141
|
+
const modeOrder = {
|
|
142
|
+
full: 3,
|
|
143
|
+
partial: 2,
|
|
144
|
+
fuzzy: 1
|
|
145
|
+
};
|
|
146
|
+
const sourceOrder = {
|
|
147
|
+
direct_text_node: 3,
|
|
148
|
+
text_content: 2,
|
|
149
|
+
attribute: 1
|
|
150
|
+
};
|
|
151
|
+
function sortKey(match, visible) {
|
|
152
|
+
const modeKey = modeOrder[match.match_mode.toLowerCase()] || 0;
|
|
153
|
+
const sourceKey = sourceOrder[match.match_source.toLowerCase()] || 0;
|
|
154
|
+
const visibleKey = visible ? 1 : 0;
|
|
155
|
+
let partialScore = 0;
|
|
156
|
+
if (match.match_mode.toLowerCase() === "partial" && match.matched_source_value && match.matched_value) {
|
|
157
|
+
const extraChars = match.matched_source_value.length - match.matched_value.length;
|
|
158
|
+
partialScore = extraChars === 0 ? 1 : 1 / extraChars;
|
|
159
|
+
}
|
|
160
|
+
return modeKey + sourceKey + visibleKey + partialScore;
|
|
161
|
+
}
|
|
162
|
+
const matchesWithVisibility = filteredMatches.map((match, index) => ({
|
|
163
|
+
match,
|
|
164
|
+
visible: visibilityResults[index]
|
|
165
|
+
}));
|
|
166
|
+
const sortedMatchesWithVisibility = matchesWithVisibility.sort((a, b) => sortKey(b.match, b.visible) - sortKey(a.match, a.visible));
|
|
167
|
+
const sortedMatches = sortedMatchesWithVisibility.map(item => item.match);
|
|
168
|
+
const seenXpaths = new Set();
|
|
169
|
+
const uniqueMatches = [];
|
|
170
|
+
for (const match of sortedMatches) {
|
|
171
|
+
if (seenXpaths.has(match.xpath)) {
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
seenXpaths.add(match.xpath);
|
|
175
|
+
uniqueMatches.push(match);
|
|
176
|
+
}
|
|
177
|
+
return uniqueMatches;
|
|
178
|
+
}
|
|
179
|
+
async function matchStringsWithDomContentInBrowser(frame, stringsList, container) {
|
|
180
|
+
let handle;
|
|
181
|
+
if (container) {
|
|
182
|
+
handle = await container.elementHandle();
|
|
183
|
+
} else {
|
|
184
|
+
handle = await frame.locator("html").elementHandle();
|
|
185
|
+
}
|
|
186
|
+
console.info(`Searching for ${stringsList.length} strings in the DOM:`, stringsList);
|
|
187
|
+
const matches = await frame.evaluate(async ([container, searchTexts]) => {
|
|
188
|
+
const result = await window.__INTUNED__.matchStringsWithDomContent(container, searchTexts);
|
|
189
|
+
return result;
|
|
190
|
+
}, [handle, stringsList]);
|
|
191
|
+
let frames = [];
|
|
192
|
+
if ("childFrames" in frame) {
|
|
193
|
+
frames = frame.childFrames();
|
|
194
|
+
} else if ("frames" in frame) {
|
|
195
|
+
frames = frame.frames();
|
|
196
|
+
}
|
|
197
|
+
for (const subframe of frames) {
|
|
198
|
+
try {
|
|
199
|
+
const frameMatches = await matchStringsWithDomContentInBrowser(subframe, stringsList, null);
|
|
200
|
+
for (const [string, stringMatches] of Object.entries(frameMatches)) {
|
|
201
|
+
if (string in matches) {
|
|
202
|
+
matches[string].push(...stringMatches);
|
|
203
|
+
} else {
|
|
204
|
+
matches[string] = stringMatches;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
} catch {
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return matches;
|
|
212
|
+
}
|