@intuned/browser-dev 2.2.3-test-build.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/LICENSE +43 -0
- package/dist/ai/export.d.js +5 -0
- package/dist/ai/export.d.ts +641 -0
- package/dist/ai/extractStructuredData.js +320 -0
- package/dist/ai/extractStructuredDataUsingAi.js +139 -0
- package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
- package/dist/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/ai/index.d.ts +641 -0
- package/dist/ai/index.js +19 -0
- package/dist/ai/isPageLoaded.js +77 -0
- package/dist/ai/prompt.js +39 -0
- package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
- package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
- package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
- package/dist/ai/tools/index.js +48 -0
- package/dist/ai/types/errors.js +67 -0
- package/dist/ai/types/models.js +45 -0
- package/dist/ai/types/types.js +48 -0
- package/dist/ai/validators.js +167 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +32 -0
- package/dist/common/browser_scripts.js +2596 -0
- package/dist/common/ensureBrowserScripts.js +18 -0
- package/dist/common/extendedTest.js +148 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +18 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/clickUntilExhausted.js +85 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +5 -0
- package/dist/helpers/export.d.ts +1220 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +98 -0
- package/dist/helpers/index.d.ts +1220 -0
- package/dist/helpers/index.js +128 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +64 -0
- package/dist/helpers/sanitizeHtml.js +74 -0
- package/dist/helpers/saveFileToS3.js +50 -0
- package/dist/helpers/scrollToLoadContent.js +57 -0
- package/dist/helpers/tests/extendedTest.js +130 -0
- package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
- package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
- package/dist/helpers/types/Attachment.js +115 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +154 -0
- package/dist/helpers/utils/getS3Client.js +22 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +103 -0
- package/dist/helpers/waitForDomSettled.js +90 -0
- package/dist/helpers/withNetworkSettledWait.js +91 -0
- package/dist/index.d.js +16 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +16 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
- package/dist/intunedServices/ApiGateway/factory.js +13 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/docs.md +14 -0
- package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
- package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
- package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
- package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
- package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
- package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
- package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
- package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
- package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
- package/generated-docs/helpers/functions/processDate.mdx +55 -0
- package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
- package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
- package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
- package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
- package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
- package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
- package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
- package/how-to-run-tests.md +10 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +119 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.getSimplifiedHtmlPerListItem = getSimplifiedHtmlPerListItem;
|
|
7
|
+
var _getSimplifiedHtml = require("../../objectExtractionHelpers/getSimplifiedHtml");
|
|
8
|
+
async function getSimplifiedHtmlPerListItem(itemsLocators) {
|
|
9
|
+
const result = [];
|
|
10
|
+
for (let i = 0; i < itemsLocators.length; i++) {
|
|
11
|
+
const locator = itemsLocators[i];
|
|
12
|
+
const handle = await locator.elementHandle();
|
|
13
|
+
const htmlForItem = await (0, _getSimplifiedHtml.getSimplifiedHtml)(handle, {
|
|
14
|
+
keepOnlyVisibleElements: false,
|
|
15
|
+
shouldIncludeContentAsProp: true,
|
|
16
|
+
shouldIncludeOnClick: true
|
|
17
|
+
});
|
|
18
|
+
result.push(htmlForItem);
|
|
19
|
+
}
|
|
20
|
+
return result;
|
|
21
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.createJsonFromTable = createJsonFromTable;
|
|
7
|
+
exports.isListTable = isListTable;
|
|
8
|
+
async function isListTable(containerLocator, itemsSimplifiedHtml) {
|
|
9
|
+
const parentTableLocator = containerLocator.locator("xpath=ancestor::table[1]");
|
|
10
|
+
const hasParentTable = (await parentTableLocator.all()).length !== 0;
|
|
11
|
+
if (!hasParentTable) {
|
|
12
|
+
return {
|
|
13
|
+
tableLocater: undefined,
|
|
14
|
+
isTable: false
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
const table = await parentTableLocator.elementHandle({
|
|
18
|
+
timeout: 1_000
|
|
19
|
+
});
|
|
20
|
+
const allItemsContainTr = itemsSimplifiedHtml.every(item => item.includes("<tr"));
|
|
21
|
+
return {
|
|
22
|
+
tableLocater: table,
|
|
23
|
+
isTable: !!table && !!allItemsContainTr
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
async function createJsonFromTable(page) {
|
|
27
|
+
return await page.evaluate(() => {
|
|
28
|
+
function compressStringSpaces(str) {
|
|
29
|
+
return str.replace(/\s+/g, " ").trim();
|
|
30
|
+
}
|
|
31
|
+
const table = document.querySelector("table");
|
|
32
|
+
if (!table) {
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
const headers = Array.from(table.querySelectorAll("th")).map(th => th === null || th === void 0 ? void 0 : th.textContent).map(header => header && compressStringSpaces(header));
|
|
36
|
+
const rows = Array.from(table.querySelectorAll("tr"));
|
|
37
|
+
const jsonArray = rows.slice(1).map(tr => {
|
|
38
|
+
const cells = Array.from(tr.querySelectorAll("td"));
|
|
39
|
+
const rowObject = headers.reduce((obj, header, index) => {
|
|
40
|
+
var _cells$index;
|
|
41
|
+
if (header) obj[header] = ((_cells$index = cells[index]) === null || _cells$index === void 0 ? void 0 : _cells$index.textContent) ?? "";
|
|
42
|
+
return obj;
|
|
43
|
+
}, {});
|
|
44
|
+
return rowObject;
|
|
45
|
+
});
|
|
46
|
+
return jsonArray;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.validateDynamicListExtractorOptions = validateDynamicListExtractorOptions;
|
|
7
|
+
var _neverthrow = require("neverthrow");
|
|
8
|
+
var Errors = _interopRequireWildcard(require("../errors"));
|
|
9
|
+
var _locatorHelpers = require("../../../common/locatorHelpers");
|
|
10
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
11
|
+
async function validateDynamicListExtractorOptions(page, label, options) {
|
|
12
|
+
if (typeof label !== "string") {
|
|
13
|
+
return (0, _neverthrow.err)(Errors.invalidInput("Identifier must be a string."));
|
|
14
|
+
}
|
|
15
|
+
const searchRegion = options.searchRegion ?? page.locator("html");
|
|
16
|
+
const elementExist = (await searchRegion.count().catch(e => {
|
|
17
|
+
return 0;
|
|
18
|
+
})) > 0;
|
|
19
|
+
if (!elementExist) {
|
|
20
|
+
return (0, _neverthrow.err)(Errors.invalidSearchRegion());
|
|
21
|
+
}
|
|
22
|
+
const searchRegionHandler = await searchRegion.elementHandle();
|
|
23
|
+
if (!searchRegionHandler) {
|
|
24
|
+
return (0, _neverthrow.err)(Errors.invalidSearchRegion());
|
|
25
|
+
}
|
|
26
|
+
const pageUrl = page.url();
|
|
27
|
+
const getPageUrlOrigin = () => new URL(pageUrl).origin;
|
|
28
|
+
const safeGetPageUrlOrigin = (0, _neverthrow.fromThrowable)(getPageUrlOrigin, () => Errors.invalidAddressUrl("Cannot get page url origin."));
|
|
29
|
+
const variantKey = options.variantKey ? (0, _neverthrow.ok)(options.variantKey) : safeGetPageUrlOrigin();
|
|
30
|
+
if (variantKey.isErr()) {
|
|
31
|
+
return (0, _neverthrow.err)(variantKey.error);
|
|
32
|
+
}
|
|
33
|
+
const invalidate = options.optionalPropertiesInvalidator ?? (() => []);
|
|
34
|
+
const primaryProperty = Object.entries(options.itemEntitySchema.properties).find(([k, v]) => v.primary);
|
|
35
|
+
return (0, _neverthrow.ok)({
|
|
36
|
+
itemEntityName: options.itemEntityName,
|
|
37
|
+
itemEntitySchema: options.itemEntitySchema,
|
|
38
|
+
variantKey: variantKey.value ?? "about:blank",
|
|
39
|
+
invalidate,
|
|
40
|
+
pageUrl,
|
|
41
|
+
primaryProperty: primaryProperty,
|
|
42
|
+
searchRegionHandler,
|
|
43
|
+
searchRegion,
|
|
44
|
+
hasSearchRegionContainer: !!options.searchRegion,
|
|
45
|
+
label,
|
|
46
|
+
searchRegionKey: options.searchRegion ? (0, _locatorHelpers.getLocatorInternalKey)(options.searchRegion) : null,
|
|
47
|
+
strategy: options.strategy,
|
|
48
|
+
prompt: options.prompt,
|
|
49
|
+
searchRegionXpath: options.searchRegion ? await (0, _locatorHelpers.findXPathForLocator)(options.searchRegion) : undefined,
|
|
50
|
+
apiKey: options.apiKey
|
|
51
|
+
});
|
|
52
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.createAnthropicInstance = createAnthropicInstance;
|
|
7
|
+
var _dotenv = require("dotenv");
|
|
8
|
+
var _jwtTokenManager = require("../../common/jwtTokenManager");
|
|
9
|
+
var _sdk = _interopRequireDefault(require("@anthropic-ai/sdk"));
|
|
10
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
11
|
+
(0, _dotenv.config)();
|
|
12
|
+
function createAnthropicInstance(options) {
|
|
13
|
+
if (options !== null && options !== void 0 && options.apiKey) {
|
|
14
|
+
return new _sdk.default({
|
|
15
|
+
apiKey: options.apiKey
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
return new _sdk.default({
|
|
19
|
+
apiKey: "--THI_VALUE_WILL_BE_REPLACED_BY_INTUNED_BE--",
|
|
20
|
+
baseURL: `${process.env.FUNCTIONS_DOMAIN}/api/${process.env.INTUNED_WORKSPACE_ID}/functions/${process.env.INTUNED_INTEGRATION_ID}/anthropic`,
|
|
21
|
+
fetch: _jwtTokenManager.backendFunctionsTokenManager.fetchWithToken.bind(_jwtTokenManager.backendFunctionsTokenManager)
|
|
22
|
+
});
|
|
23
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.createOpenAIInstance = createOpenAIInstance;
|
|
7
|
+
var _openai = require("openai");
|
|
8
|
+
var _dotenv = require("dotenv");
|
|
9
|
+
var _jwtTokenManager = require("../../common/jwtTokenManager");
|
|
10
|
+
(0, _dotenv.config)();
|
|
11
|
+
function createOpenAIInstance(options) {
|
|
12
|
+
if (options !== null && options !== void 0 && options.apiKey) {
|
|
13
|
+
return new _openai.OpenAI({
|
|
14
|
+
apiKey: options.apiKey
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
const openai = new _openai.OpenAI({
|
|
18
|
+
apiKey: "",
|
|
19
|
+
baseURL: `${process.env.FUNCTIONS_DOMAIN}/api/${process.env.INTUNED_WORKSPACE_ID}/functions/${process.env.INTUNED_INTEGRATION_ID}/openai`,
|
|
20
|
+
fetch: _jwtTokenManager.backendFunctionsTokenManager.fetchWithToken.bind(_jwtTokenManager.backendFunctionsTokenManager)
|
|
21
|
+
});
|
|
22
|
+
return openai;
|
|
23
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.runAIExtraction = runAIExtraction;
|
|
7
|
+
var _neverthrow = require("neverthrow");
|
|
8
|
+
var z = _interopRequireWildcard(require("zod"));
|
|
9
|
+
var Errors = _interopRequireWildcard(require("./errors"));
|
|
10
|
+
var _findDomMatches = require("./findDomMatches");
|
|
11
|
+
var _noEmpty = _interopRequireDefault(require("../../common/noEmpty"));
|
|
12
|
+
var _common = require("../common");
|
|
13
|
+
var _buildImagesFromPage = require("../common/buildImagesFromPage");
|
|
14
|
+
var _Logger = require("../../common/Logger");
|
|
15
|
+
var _utils = require("../common/matching/utils");
|
|
16
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
17
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
18
|
+
async function runAIExtraction(pageAndSearchRegion, extractionInfo, snapshot, strategy, hasSearchRegionContainer, prompt, apiKey) {
|
|
19
|
+
const images = strategy.type === "IMAGE" ? await (0, _buildImagesFromPage.buildImagesFromPageOrHandle)(pageAndSearchRegion.page, hasSearchRegionContainer ? pageAndSearchRegion.searchRegionHandler : undefined) : undefined;
|
|
20
|
+
if (images && images.isErr()) {
|
|
21
|
+
return (0, _neverthrow.err)(Errors.other(images.error.context));
|
|
22
|
+
}
|
|
23
|
+
const textContent = [];
|
|
24
|
+
if (strategy.type === "HTML") {
|
|
25
|
+
textContent.push(snapshot.html);
|
|
26
|
+
}
|
|
27
|
+
const result = await (0, _common.extractStructuredDataUsingAi)({
|
|
28
|
+
entityName: extractionInfo.entityName,
|
|
29
|
+
jsonSchema: extractionInfo.entitySchema,
|
|
30
|
+
model: strategy.model,
|
|
31
|
+
text: textContent,
|
|
32
|
+
images: images === null || images === void 0 ? void 0 : images.value.map(image => ({
|
|
33
|
+
data: image,
|
|
34
|
+
image_type: "png"
|
|
35
|
+
})),
|
|
36
|
+
identifier: extractionInfo.identifier,
|
|
37
|
+
systemMessage: prompt,
|
|
38
|
+
apiKey
|
|
39
|
+
});
|
|
40
|
+
if (result.isErr()) {
|
|
41
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult(result.error.context));
|
|
42
|
+
}
|
|
43
|
+
const extractedData = result.value.result;
|
|
44
|
+
_Logger.logger.debug("Extracted data from AI", extractedData);
|
|
45
|
+
const extractedDataParsingValidation = z.record(z.string(), z.string().nullable()).safeParse(extractedData);
|
|
46
|
+
if (!extractedDataParsingValidation.success) return (0, _neverthrow.err)(Errors.invalidExtractionResult(`Failed to parse extraction result. ${extractedDataParsingValidation.error.message}`));
|
|
47
|
+
const extractedObject = extractedDataParsingValidation.data;
|
|
48
|
+
const extractedValues = Object.values(extractedObject).filter(_noEmpty.default);
|
|
49
|
+
const matchResults = await (0, _neverthrow.fromPromise)((0, _findDomMatches.getDomMatches)(pageAndSearchRegion.page, pageAndSearchRegion.searchRegionHandler, extractedValues), e => Errors.other("Failed to get dom matches.", e));
|
|
50
|
+
if (matchResults.isErr()) return (0, _neverthrow.err)(matchResults.error);
|
|
51
|
+
const foundValues = matchResults.value;
|
|
52
|
+
const toReturn = {};
|
|
53
|
+
const xpathMapping = {};
|
|
54
|
+
Object.entries(extractedObject).forEach(([propertyName, extractedPropertyValue]) => {
|
|
55
|
+
const matches = extractedPropertyValue ? foundValues.get(extractedPropertyValue) : [];
|
|
56
|
+
const bestMatch = extractedPropertyValue && matches ? (0, _utils.selectBestMatch)(extractedPropertyValue, matches) : undefined;
|
|
57
|
+
if (bestMatch) {
|
|
58
|
+
toReturn[propertyName] = bestMatch.matchText;
|
|
59
|
+
xpathMapping[propertyName] = {
|
|
60
|
+
matchXpath: bestMatch.matchXpath,
|
|
61
|
+
matchText: bestMatch.matchText,
|
|
62
|
+
matchType: bestMatch.matchType
|
|
63
|
+
};
|
|
64
|
+
} else {
|
|
65
|
+
_Logger.logger.debug(`Property ${propertyName} not found in the page HTML, dropped for hallucination protection`);
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
return (0, _neverthrow.ok)({
|
|
69
|
+
extractionResultObject: toReturn,
|
|
70
|
+
valueDomMatches: matchResults.value,
|
|
71
|
+
xpathMapping
|
|
72
|
+
});
|
|
73
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _vitest = require("vitest");
|
|
4
|
+
var _checksumUtils = require("../checksumUtils");
|
|
5
|
+
(0, _vitest.describe)("hashObject", () => {
|
|
6
|
+
(0, _vitest.test)("should work regardless of property order", () => {
|
|
7
|
+
const obj1 = {
|
|
8
|
+
foo: [{
|
|
9
|
+
c: 1,
|
|
10
|
+
test: true
|
|
11
|
+
}, {
|
|
12
|
+
d: 2,
|
|
13
|
+
e: 3
|
|
14
|
+
}],
|
|
15
|
+
bar: {
|
|
16
|
+
a: 2,
|
|
17
|
+
b: undefined
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
const obj2 = {
|
|
21
|
+
bar: {
|
|
22
|
+
b: undefined,
|
|
23
|
+
a: 2
|
|
24
|
+
},
|
|
25
|
+
foo: [{
|
|
26
|
+
test: true,
|
|
27
|
+
c: 1
|
|
28
|
+
}, {
|
|
29
|
+
e: 3,
|
|
30
|
+
d: 2
|
|
31
|
+
}]
|
|
32
|
+
};
|
|
33
|
+
const res1 = (0, _checksumUtils.hashObject)(obj1);
|
|
34
|
+
(0, _vitest.expect)(res1).toBeTruthy();
|
|
35
|
+
(0, _vitest.expect)((0, _checksumUtils.hashObject)(obj1)).toEqual((0, _checksumUtils.hashObject)(obj2));
|
|
36
|
+
});
|
|
37
|
+
(0, _vitest.test)("should result in different hashes with a diff in a space in a string", () => {
|
|
38
|
+
const obj1 = {
|
|
39
|
+
foo: [{
|
|
40
|
+
c: 1,
|
|
41
|
+
test: "true"
|
|
42
|
+
}, {
|
|
43
|
+
d: 2,
|
|
44
|
+
e: 3
|
|
45
|
+
}],
|
|
46
|
+
bar: {
|
|
47
|
+
a: 2,
|
|
48
|
+
b: undefined
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
const obj2 = {
|
|
52
|
+
bar: {
|
|
53
|
+
b: undefined,
|
|
54
|
+
a: 2
|
|
55
|
+
},
|
|
56
|
+
foo: [{
|
|
57
|
+
test: "true ",
|
|
58
|
+
c: 1
|
|
59
|
+
}, {
|
|
60
|
+
e: 3,
|
|
61
|
+
d: 2
|
|
62
|
+
}]
|
|
63
|
+
};
|
|
64
|
+
const res1 = (0, _checksumUtils.hashObject)(obj1);
|
|
65
|
+
(0, _vitest.expect)(res1).toBeTruthy();
|
|
66
|
+
(0, _vitest.expect)((0, _checksumUtils.hashObject)(obj1)).not.toEqual((0, _checksumUtils.hashObject)(obj2));
|
|
67
|
+
});
|
|
68
|
+
(0, _vitest.test)("should work with Map<string, Match[]>", () => {
|
|
69
|
+
const map1 = new Map();
|
|
70
|
+
map1.set("foo", [{
|
|
71
|
+
nodeXpath: "foo",
|
|
72
|
+
matchXpath: "foo",
|
|
73
|
+
exact: true,
|
|
74
|
+
matchType: "direct-text",
|
|
75
|
+
matchText: "foo",
|
|
76
|
+
sourceText: "foo"
|
|
77
|
+
}, {
|
|
78
|
+
nodeXpath: "boo",
|
|
79
|
+
matchXpath: "coo",
|
|
80
|
+
exact: true,
|
|
81
|
+
matchType: "direct-text",
|
|
82
|
+
matchText: "doo",
|
|
83
|
+
sourceText: "foo"
|
|
84
|
+
}]);
|
|
85
|
+
const map2 = new Map();
|
|
86
|
+
map2.set("foo", [{
|
|
87
|
+
nodeXpath: "boo",
|
|
88
|
+
matchXpath: "coo",
|
|
89
|
+
exact: true,
|
|
90
|
+
matchType: "direct-text",
|
|
91
|
+
matchText: "doo",
|
|
92
|
+
sourceText: "foo"
|
|
93
|
+
}, {
|
|
94
|
+
nodeXpath: "foo",
|
|
95
|
+
matchXpath: "foo",
|
|
96
|
+
exact: true,
|
|
97
|
+
matchType: "direct-text",
|
|
98
|
+
matchText: "foo",
|
|
99
|
+
sourceText: "foo"
|
|
100
|
+
}]);
|
|
101
|
+
(0, _vitest.expect)((0, _checksumUtils.hashObject)(map1, true)).toEqual((0, _checksumUtils.hashObject)(map2, true));
|
|
102
|
+
});
|
|
103
|
+
});
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _extendedTest = require("../../../common/extendedTest");
|
|
4
|
+
var _ = require("../..");
|
|
5
|
+
var _uuid = require("uuid");
|
|
6
|
+
const productTemplate = `
|
|
7
|
+
<div class="product-page">
|
|
8
|
+
<div class="product-info">
|
|
9
|
+
<h1 class="product-title">iPhone 14 Pro</h1>
|
|
10
|
+
<div class="price-container">
|
|
11
|
+
<span class="price">$999</span>
|
|
12
|
+
</div>
|
|
13
|
+
<div class="description">
|
|
14
|
+
<p class="product-description">Latest iPhone with advanced camera system</p>
|
|
15
|
+
</div>
|
|
16
|
+
</div>
|
|
17
|
+
<div class="additional-info">
|
|
18
|
+
<div class="shipping-info">Free shipping available</div>
|
|
19
|
+
<div class="warranty">2-year warranty included</div>
|
|
20
|
+
</div>
|
|
21
|
+
<div class="reviews-section">
|
|
22
|
+
<div class="review-count">125 reviews</div>
|
|
23
|
+
<div class="rating">4.8/5 stars</div>
|
|
24
|
+
</div>
|
|
25
|
+
</div>
|
|
26
|
+
`;
|
|
27
|
+
(0, _extendedTest.describe)("Object Extractor Caching Tests", () => {
|
|
28
|
+
(0, _extendedTest.describe)("DOM Changes and Cache Behavior", () => {
|
|
29
|
+
(0, _extendedTest.test)("should demonstrate caching behavior with different types of DOM changes", async ({
|
|
30
|
+
page
|
|
31
|
+
}) => {
|
|
32
|
+
const testLabel = `product-cache-test-${(0, _uuid.v4)()}`;
|
|
33
|
+
const variantKey = testLabel;
|
|
34
|
+
const entitySchema = {
|
|
35
|
+
type: "object",
|
|
36
|
+
required: ["title", "price"],
|
|
37
|
+
properties: {
|
|
38
|
+
title: {
|
|
39
|
+
type: "string",
|
|
40
|
+
description: "Product title"
|
|
41
|
+
},
|
|
42
|
+
price: {
|
|
43
|
+
type: "string",
|
|
44
|
+
description: "Product price"
|
|
45
|
+
},
|
|
46
|
+
description: {
|
|
47
|
+
type: "string",
|
|
48
|
+
description: "Product description"
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
const extractionOptions = {
|
|
53
|
+
entityName: "product",
|
|
54
|
+
label: testLabel,
|
|
55
|
+
entitySchema,
|
|
56
|
+
strategy: {
|
|
57
|
+
model: "claude-3-5-sonnet-20240620",
|
|
58
|
+
type: "HTML"
|
|
59
|
+
},
|
|
60
|
+
variantKey: variantKey,
|
|
61
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
62
|
+
};
|
|
63
|
+
await page.setContent(productTemplate);
|
|
64
|
+
const firstResult = await (0, _.extractObjectFromLocator)(page.locator(".product-page"), extractionOptions);
|
|
65
|
+
console.log("First extraction result:", firstResult);
|
|
66
|
+
(0, _extendedTest.expect)(firstResult).toHaveProperty("title", "iPhone 14 Pro");
|
|
67
|
+
(0, _extendedTest.expect)(firstResult).toHaveProperty("price", "$999");
|
|
68
|
+
(0, _extendedTest.expect)(firstResult).toHaveProperty("description", "Latest iPhone with advanced camera system");
|
|
69
|
+
const modifiedTemplate = productTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro Max").replace("$999", "$1199");
|
|
70
|
+
await page.setContent(modifiedTemplate);
|
|
71
|
+
const secondResult = await (0, _.extractObjectFromLocator)(page.locator(".product-page"), extractionOptions);
|
|
72
|
+
console.log("Second extraction result (after relevant change):", secondResult);
|
|
73
|
+
(0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
|
|
74
|
+
(0, _extendedTest.expect)(secondResult).toHaveProperty("title", "iPhone 15 Pro Max");
|
|
75
|
+
(0, _extendedTest.expect)(secondResult).toHaveProperty("price", "$1199");
|
|
76
|
+
const irrelevantChangeTemplate = modifiedTemplate.replace("Free shipping available", "Express shipping available").replace("2-year warranty included", "3-year warranty included").replace("125 reviews", "200 reviews");
|
|
77
|
+
await page.setContent(irrelevantChangeTemplate);
|
|
78
|
+
const thirdResult = await (0, _.extractObjectFromLocator)(page.locator(".product-page"), extractionOptions);
|
|
79
|
+
console.log("Third extraction result (after irrelevant change):", thirdResult);
|
|
80
|
+
(0, _extendedTest.expect)(thirdResult).toEqual(secondResult);
|
|
81
|
+
(0, _extendedTest.expect)(thirdResult).toHaveProperty("title", "iPhone 15 Pro Max");
|
|
82
|
+
(0, _extendedTest.expect)(thirdResult).toHaveProperty("price", "$1199");
|
|
83
|
+
const appendedTemplate = irrelevantChangeTemplate + `
|
|
84
|
+
<div class="newly-added-section">
|
|
85
|
+
<div class="social-media">
|
|
86
|
+
<button class="share-facebook">Share on Facebook</button>
|
|
87
|
+
<button class="share-twitter">Share on Twitter</button>
|
|
88
|
+
</div>
|
|
89
|
+
<div class="related-products">
|
|
90
|
+
<h3>Related Products</h3>
|
|
91
|
+
<div class="product-list">
|
|
92
|
+
<div class="related-item">AirPods Pro</div>
|
|
93
|
+
<div class="related-item">MacBook Air</div>
|
|
94
|
+
</div>
|
|
95
|
+
</div>
|
|
96
|
+
</div>
|
|
97
|
+
`;
|
|
98
|
+
await page.setContent(appendedTemplate);
|
|
99
|
+
const fourthResult = await (0, _.extractObjectFromLocator)(page.locator(".product-page"), extractionOptions);
|
|
100
|
+
console.log("Fourth extraction result (after appending content):", fourthResult);
|
|
101
|
+
(0, _extendedTest.expect)(fourthResult).toEqual(thirdResult);
|
|
102
|
+
(0, _extendedTest.expect)(fourthResult).toHaveProperty("title", "iPhone 15 Pro Max");
|
|
103
|
+
(0, _extendedTest.expect)(fourthResult).toHaveProperty("price", "$1199");
|
|
104
|
+
console.log("All cache behavior tests completed successfully!");
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
});
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _extendedTest = require("../../../common/extendedTest");
|
|
4
|
+
var _ = require("../..");
|
|
5
|
+
var _uuid = require("uuid");
|
|
6
|
+
const productTemplate = `
|
|
7
|
+
<div class="product-page">
|
|
8
|
+
<div class="product-info">
|
|
9
|
+
<h1 class="product-title">iPhone 14 Pro</h1>
|
|
10
|
+
<div class="price-container">
|
|
11
|
+
<span class="price">$999</span>
|
|
12
|
+
</div>
|
|
13
|
+
<div class="description">
|
|
14
|
+
<p class="product-description">Latest iPhone with advanced camera system</p>
|
|
15
|
+
</div>
|
|
16
|
+
</div>
|
|
17
|
+
<div class="additional-info">
|
|
18
|
+
<div class="shipping-info">Free shipping available</div>
|
|
19
|
+
<div class="warranty">2-year warranty included</div>
|
|
20
|
+
</div>
|
|
21
|
+
<div class="reviews-section">
|
|
22
|
+
<div class="review-count">125 reviews</div>
|
|
23
|
+
<div class="rating">4.8/5 stars</div>
|
|
24
|
+
</div>
|
|
25
|
+
</div>
|
|
26
|
+
`;
|
|
27
|
+
(0, _extendedTest.describe)("Object Extractor Caching Tests", () => {
|
|
28
|
+
(0, _extendedTest.describe)("DOM Changes and Cache Behavior", () => {
|
|
29
|
+
(0, _extendedTest.test)("should demonstrate caching behavior with different types of DOM changes", async ({
|
|
30
|
+
page
|
|
31
|
+
}) => {
|
|
32
|
+
const testLabel = `product-cache-test-${(0, _uuid.v4)()}`;
|
|
33
|
+
const variantKey = testLabel;
|
|
34
|
+
const entitySchema = {
|
|
35
|
+
type: "object",
|
|
36
|
+
required: ["title", "price"],
|
|
37
|
+
properties: {
|
|
38
|
+
title: {
|
|
39
|
+
type: "string",
|
|
40
|
+
description: "Product title"
|
|
41
|
+
},
|
|
42
|
+
price: {
|
|
43
|
+
type: "string",
|
|
44
|
+
description: "Product price"
|
|
45
|
+
},
|
|
46
|
+
description: {
|
|
47
|
+
type: "string",
|
|
48
|
+
description: "Product description"
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
const extractionOptions = {
|
|
53
|
+
entityName: "product",
|
|
54
|
+
label: testLabel,
|
|
55
|
+
entitySchema,
|
|
56
|
+
strategy: {
|
|
57
|
+
model: "claude-3-5-sonnet-20240620",
|
|
58
|
+
type: "HTML"
|
|
59
|
+
},
|
|
60
|
+
variantKey: variantKey,
|
|
61
|
+
apiKey: process.env.ANTHROPIC_API_KEY
|
|
62
|
+
};
|
|
63
|
+
await page.setContent(productTemplate);
|
|
64
|
+
const firstResult = await (0, _.extractObjectFromPage)(page, extractionOptions);
|
|
65
|
+
console.log("First extraction result:", firstResult);
|
|
66
|
+
(0, _extendedTest.expect)(firstResult).toHaveProperty("title", "iPhone 14 Pro");
|
|
67
|
+
(0, _extendedTest.expect)(firstResult).toHaveProperty("price", "$999");
|
|
68
|
+
(0, _extendedTest.expect)(firstResult).toHaveProperty("description", "Latest iPhone with advanced camera system");
|
|
69
|
+
const modifiedTemplate = productTemplate.replace("iPhone 14 Pro", "iPhone 15 Pro Max").replace("$999", "$1199");
|
|
70
|
+
await page.setContent(modifiedTemplate);
|
|
71
|
+
const secondResult = await (0, _.extractObjectFromPage)(page, extractionOptions);
|
|
72
|
+
console.log("Second extraction result (after relevant change):", secondResult);
|
|
73
|
+
(0, _extendedTest.expect)(secondResult).not.toEqual(firstResult);
|
|
74
|
+
(0, _extendedTest.expect)(secondResult).toHaveProperty("title", "iPhone 15 Pro Max");
|
|
75
|
+
(0, _extendedTest.expect)(secondResult).toHaveProperty("price", "$1199");
|
|
76
|
+
const irrelevantChangeTemplate = modifiedTemplate.replace("Free shipping available", "Express shipping available").replace("2-year warranty included", "3-year warranty included").replace("125 reviews", "200 reviews");
|
|
77
|
+
await page.setContent(irrelevantChangeTemplate);
|
|
78
|
+
const thirdResult = await (0, _.extractObjectFromPage)(page, extractionOptions);
|
|
79
|
+
console.log("Third extraction result (after irrelevant change):", thirdResult);
|
|
80
|
+
(0, _extendedTest.expect)(thirdResult).toEqual(secondResult);
|
|
81
|
+
(0, _extendedTest.expect)(thirdResult).toHaveProperty("title", "iPhone 15 Pro Max");
|
|
82
|
+
(0, _extendedTest.expect)(thirdResult).toHaveProperty("price", "$1199");
|
|
83
|
+
const appendedTemplate = irrelevantChangeTemplate + `
|
|
84
|
+
<div class="newly-added-section">
|
|
85
|
+
<div class="social-media">
|
|
86
|
+
<button class="share-facebook">Share on Facebook</button>
|
|
87
|
+
<button class="share-twitter">Share on Twitter</button>
|
|
88
|
+
</div>
|
|
89
|
+
<div class="related-products">
|
|
90
|
+
<h3>Related Products</h3>
|
|
91
|
+
<div class="product-list">
|
|
92
|
+
<div class="related-item">AirPods Pro</div>
|
|
93
|
+
<div class="related-item">MacBook Air</div>
|
|
94
|
+
</div>
|
|
95
|
+
</div>
|
|
96
|
+
</div>
|
|
97
|
+
`;
|
|
98
|
+
await page.setContent(appendedTemplate);
|
|
99
|
+
const fourthResult = await (0, _.extractObjectFromPage)(page, extractionOptions);
|
|
100
|
+
console.log("Fourth extraction result (after appending content):", fourthResult);
|
|
101
|
+
(0, _extendedTest.expect)(fourthResult).toEqual(thirdResult);
|
|
102
|
+
(0, _extendedTest.expect)(fourthResult).toHaveProperty("title", "iPhone 15 Pro Max");
|
|
103
|
+
(0, _extendedTest.expect)(fourthResult).toHaveProperty("price", "$1199");
|
|
104
|
+
console.log("All cache behavior tests completed successfully!");
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
});
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.calculateObjectExampleHash = calculateObjectExampleHash;
|
|
7
|
+
var _checksumUtils = require("./checksumUtils");
|
|
8
|
+
function calculateObjectExampleHash({
|
|
9
|
+
currentPageUrl,
|
|
10
|
+
entityDescription,
|
|
11
|
+
entityName,
|
|
12
|
+
entitySchema,
|
|
13
|
+
strategy,
|
|
14
|
+
variantKey,
|
|
15
|
+
searchRegionLocator,
|
|
16
|
+
prompt
|
|
17
|
+
}) {
|
|
18
|
+
return (0, _checksumUtils.hashObject)({
|
|
19
|
+
currentPageUrl,
|
|
20
|
+
entityName,
|
|
21
|
+
entityDescription,
|
|
22
|
+
entitySchema,
|
|
23
|
+
variantKey,
|
|
24
|
+
strategy,
|
|
25
|
+
searchRegionLocator,
|
|
26
|
+
prompt
|
|
27
|
+
}, true);
|
|
28
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.captureSnapshot = captureSnapshot;
|
|
7
|
+
var _neverthrow = require("neverthrow");
|
|
8
|
+
var Errors = _interopRequireWildcard(require("./errors"));
|
|
9
|
+
var _getSimplifiedHtml = require("./getSimplifiedHtml");
|
|
10
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
11
|
+
async function captureSnapshot(pageAndSearchRegion) {
|
|
12
|
+
const snapshot = (0, _neverthrow.fromPromise)((async () => {
|
|
13
|
+
const html = await (0, _getSimplifiedHtml.getSimplifiedHtml)(pageAndSearchRegion.searchRegionHandler);
|
|
14
|
+
return {
|
|
15
|
+
html,
|
|
16
|
+
dateCreated: new Date().toISOString(),
|
|
17
|
+
url: pageAndSearchRegion.page.url(),
|
|
18
|
+
extractionHtml: await pageAndSearchRegion.searchRegion.evaluate(element => element.outerHTML),
|
|
19
|
+
pageHtml: await pageAndSearchRegion.page.content()
|
|
20
|
+
};
|
|
21
|
+
})(), e => {
|
|
22
|
+
console.log(e);
|
|
23
|
+
return Errors.other("Failed to get simplified html.", e);
|
|
24
|
+
});
|
|
25
|
+
return snapshot;
|
|
26
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.hashObject = hashObject;
|
|
7
|
+
var crypto = _interopRequireWildcard(require("crypto"));
|
|
8
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
9
|
+
function _serialize(obj, treatArraysAsUnsortedLists) {
|
|
10
|
+
if (obj instanceof Map) {
|
|
11
|
+
return _serialize(Array.from(obj.entries()), treatArraysAsUnsortedLists);
|
|
12
|
+
} else if (Array.isArray(obj)) {
|
|
13
|
+
let serializedSortedArray = obj.map(el => _serialize(el, treatArraysAsUnsortedLists));
|
|
14
|
+
if (treatArraysAsUnsortedLists) {
|
|
15
|
+
serializedSortedArray = serializedSortedArray.sort();
|
|
16
|
+
}
|
|
17
|
+
return `[${serializedSortedArray.join(",")}]`;
|
|
18
|
+
} else if (typeof obj === "object" && obj !== null) {
|
|
19
|
+
let acc = "";
|
|
20
|
+
const keys = Object.keys(obj).sort();
|
|
21
|
+
acc += `{${JSON.stringify(keys)}`;
|
|
22
|
+
for (let i = 0; i < keys.length; i++) {
|
|
23
|
+
acc += `${_serialize(obj[keys[i]], treatArraysAsUnsortedLists)},`;
|
|
24
|
+
}
|
|
25
|
+
return `${acc}}`;
|
|
26
|
+
}
|
|
27
|
+
return `${JSON.stringify(obj)}`;
|
|
28
|
+
}
|
|
29
|
+
function hashObject(obj, treatArraysAsUnsortedLists = false, hashAlgorithm = "SHA256", encoding = "hex") {
|
|
30
|
+
const hash = crypto.createHash(hashAlgorithm);
|
|
31
|
+
return hash.update(_serialize(obj, treatArraysAsUnsortedLists)).digest(encoding);
|
|
32
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.ALLOWED_ATTRIBUTES = void 0;
|
|
7
|
+
const ALLOWED_ATTRIBUTES = exports.ALLOWED_ATTRIBUTES = ["aria-label", "data-name", "name", "type", "placeholder", "value", "role", "title", "href", "id", "alt", new RegExp(/^data-/)];
|