@intuned/browser-dev 2.2.3-unify-sdks.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/LICENSE +43 -0
- package/dist/ai-extractors/AnthropicClient/index.js +23 -0
- package/dist/ai-extractors/export.d.js +5 -0
- package/dist/ai-extractors/export.d.ts +422 -0
- package/dist/ai-extractors/extractStructuredData.js +79 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/constants.js +7 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/errors.js +42 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingClaude.js +149 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingOpenAi.js +144 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStrucutredDataUsingAiInstance.js +123 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/index.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/isItemTableHeaderOrFooter.js +96 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/screenshotHelpers.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/types.js +5 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/utils.js +53 -0
- package/dist/ai-extractors/extractionHelpers/types.js +5 -0
- package/dist/ai-extractors/fileExtractors.js +176 -0
- package/dist/ai-extractors/index.js +31 -0
- package/dist/ai-extractors/jsonSchema.d.js +5 -0
- package/dist/ai-extractors/jsonSchema.d.ts +49 -0
- package/dist/ai-extractors/openAiClients/index.js +23 -0
- package/dist/ai-extractors/validators.js +239 -0
- package/dist/browser/ai/export.d.js +3 -0
- package/dist/browser/ai/export.d.ts +587 -0
- package/dist/browser/ai/extractMarkdown.js +15 -0
- package/dist/browser/ai/extractStructuredData.js +231 -0
- package/dist/browser/ai/extractStructuredDataUsingAi.js +140 -0
- package/dist/browser/ai/extractionHelpers/screenshotHelpers.js +55 -0
- package/dist/browser/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/browser/ai/index.d.ts +587 -0
- package/dist/browser/ai/index.js +19 -0
- package/dist/browser/ai/isPageLoaded.js +67 -0
- package/dist/browser/ai/prompt.js +39 -0
- package/dist/browser/ai/tests/testCheckAllTypesAreStrings.spec.js +143 -0
- package/dist/browser/ai/tests/testExtractStructuredData.spec.js +622 -0
- package/dist/browser/ai/tools/index.js +48 -0
- package/dist/browser/ai/types/errors.js +67 -0
- package/dist/browser/ai/types/models.js +45 -0
- package/dist/browser/ai/types/types.js +48 -0
- package/dist/browser/ai/validators.js +136 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +50 -0
- package/dist/common/browser_scripts.js +2596 -0
- package/dist/common/ensureBrowserScripts.js +17 -0
- package/dist/common/environmentVariables.js +16 -0
- package/dist/common/eventTracking/getAiTrackingHeaders.js +31 -0
- package/dist/common/eventTracking/getFileTrackingHeaders.js +23 -0
- package/dist/common/extendedTest.js +148 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +18 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +1 -0
- package/dist/helpers/export.d.ts +1294 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +93 -0
- package/dist/helpers/index.d.ts +1294 -0
- package/dist/helpers/index.js +115 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +63 -0
- package/dist/helpers/sanitizeHtml.js +73 -0
- package/dist/helpers/saveFileToS3.js +46 -0
- package/dist/helpers/scrollToLoadContent.js +50 -0
- package/dist/helpers/tests/extendedTest.js +130 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +197 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testIsPageLoaded.spec.js +285 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testSimplifyHtml.spec.js +251 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +380 -0
- package/dist/helpers/tests/testWaitForDomSettled.spec.js +169 -0
- package/dist/helpers/tests/testWaitForNetworkIdle.spec.js +115 -0
- package/dist/helpers/types/Attachment.js +81 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +153 -0
- package/dist/helpers/utils/getS3Client.js +21 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +119 -0
- package/dist/helpers/waitForDomSettled.js +182 -0
- package/dist/helpers/waitForNetworkIdle.js +191 -0
- package/dist/index.d.js +82 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +84 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +87 -0
- package/dist/intunedServices/ApiGateway/factory.js +13 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +221 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +149 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +145 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +175 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +97 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/docs.md +14 -0
- package/how-to-run-tests.md +10 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +124 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports._expand = _expand;
|
|
7
|
+
exports.editDistance = editDistance;
|
|
8
|
+
exports.fuzzySearch = fuzzySearch;
|
|
9
|
+
exports.fuzzySearchCandidates = fuzzySearchCandidates;
|
|
10
|
+
exports.fuzzySearchNgrams = fuzzySearchNgrams;
|
|
11
|
+
exports.isEditDistanceNoGreaterThan = isEditDistanceNoGreaterThan;
|
|
12
|
+
var _utils = require("./utils");
|
|
13
|
+
function isEditDistanceNoGreaterThan(a, b, maxDist) {
|
|
14
|
+
if (a.length > b.length) {
|
|
15
|
+
[a, b] = [b, a];
|
|
16
|
+
}
|
|
17
|
+
const lenDelta = b.length - a.length;
|
|
18
|
+
if (lenDelta > maxDist) {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
if (maxDist === 0) {
|
|
22
|
+
return a === b;
|
|
23
|
+
}
|
|
24
|
+
let firstDiffIdx;
|
|
25
|
+
for (firstDiffIdx = 0; firstDiffIdx < a.length; firstDiffIdx++) {
|
|
26
|
+
if (a[firstDiffIdx] !== b[firstDiffIdx]) break;
|
|
27
|
+
}
|
|
28
|
+
if (firstDiffIdx === a.length) {
|
|
29
|
+
return lenDelta <= maxDist;
|
|
30
|
+
}
|
|
31
|
+
let lastDiffIdx;
|
|
32
|
+
for (lastDiffIdx = a.length - 1; lastDiffIdx >= 0; lastDiffIdx--) {
|
|
33
|
+
if (a[lastDiffIdx] !== b[lastDiffIdx + lenDelta]) break;
|
|
34
|
+
}
|
|
35
|
+
a = a.slice(firstDiffIdx, lastDiffIdx + 1);
|
|
36
|
+
b = b.slice(firstDiffIdx, lastDiffIdx + 1 + lenDelta);
|
|
37
|
+
const [dist, length] = _expand(a, b, maxDist);
|
|
38
|
+
return dist + (b.length - length) <= maxDist;
|
|
39
|
+
}
|
|
40
|
+
function editDistance(a, b) {
|
|
41
|
+
if (a.length > b.length) {
|
|
42
|
+
[a, b] = [b, a];
|
|
43
|
+
}
|
|
44
|
+
const scores = new Array(a.length + 1);
|
|
45
|
+
for (let i = 0; i <= a.length; i++) {
|
|
46
|
+
scores[i] = i;
|
|
47
|
+
}
|
|
48
|
+
let _prevScore;
|
|
49
|
+
let prevScore;
|
|
50
|
+
for (let i = 0; i < b.length; i++) {
|
|
51
|
+
scores[0] = i + 1;
|
|
52
|
+
prevScore = i;
|
|
53
|
+
for (let k = 0; k < a.length; k++) {
|
|
54
|
+
_prevScore = scores[k + 1];
|
|
55
|
+
scores[k + 1] = Math.min(prevScore + +(a[k] !== b[i]), scores[k] + 1, scores[k + 1] + 1);
|
|
56
|
+
prevScore = _prevScore;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return scores[a.length];
|
|
60
|
+
}
|
|
61
|
+
function makeChar2needleIdx(needle, maxDist) {
|
|
62
|
+
const res = {};
|
|
63
|
+
for (let i = Math.min(needle.length - 1, maxDist); i >= 0; i--) {
|
|
64
|
+
res[needle[i]] = i;
|
|
65
|
+
}
|
|
66
|
+
return res;
|
|
67
|
+
}
|
|
68
|
+
function* fuzzySearch(needle, haystack, maxDist) {
|
|
69
|
+
if (needle.length > haystack.length + maxDist) return;
|
|
70
|
+
const ngramLen = Math.floor(needle.length / (maxDist + 1));
|
|
71
|
+
if (maxDist === 0) {
|
|
72
|
+
for (const index of (0, _utils.searchExact)(needle, haystack)) {
|
|
73
|
+
yield {
|
|
74
|
+
start: index,
|
|
75
|
+
end: index + needle.length,
|
|
76
|
+
dist: 0
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
} else if (ngramLen >= 10) {
|
|
80
|
+
yield* fuzzySearchNgrams(needle, haystack, maxDist);
|
|
81
|
+
} else {
|
|
82
|
+
yield* fuzzySearchCandidates(needle, haystack, maxDist);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
function _expand(needle, haystack, maxDist) {
|
|
86
|
+
maxDist = +maxDist;
|
|
87
|
+
let firstDiff;
|
|
88
|
+
for (firstDiff = 0; firstDiff < Math.min(needle.length, haystack.length); firstDiff++) {
|
|
89
|
+
if (needle.charCodeAt(firstDiff) !== haystack.charCodeAt(firstDiff)) break;
|
|
90
|
+
}
|
|
91
|
+
if (firstDiff) {
|
|
92
|
+
needle = needle.slice(firstDiff);
|
|
93
|
+
haystack = haystack.slice(firstDiff);
|
|
94
|
+
}
|
|
95
|
+
if (!needle) {
|
|
96
|
+
return [0, firstDiff];
|
|
97
|
+
} else if (!haystack) {
|
|
98
|
+
if (needle.length <= maxDist) {
|
|
99
|
+
return [needle.length, firstDiff];
|
|
100
|
+
} else {
|
|
101
|
+
return [null, null];
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (maxDist === 0) return [null, null];
|
|
105
|
+
let scores = new Array(needle.length + 1);
|
|
106
|
+
for (let i = 0; i <= maxDist; i++) {
|
|
107
|
+
scores[i] = i;
|
|
108
|
+
}
|
|
109
|
+
let newScores = new Array(needle.length + 1);
|
|
110
|
+
let minScore = null;
|
|
111
|
+
let minScoreIdx = null;
|
|
112
|
+
let maxGoodScore = maxDist;
|
|
113
|
+
let firstGoodScoreIdx = 0;
|
|
114
|
+
let lastGoodScoreIdx = needle.length - 1;
|
|
115
|
+
for (let haystackIdx = 0; haystackIdx < haystack.length; haystackIdx++) {
|
|
116
|
+
const char = haystack.charCodeAt(haystackIdx);
|
|
117
|
+
const needleIdxStart = Math.max(0, firstGoodScoreIdx - 1);
|
|
118
|
+
const needleIdxLimit = Math.min(haystackIdx + maxDist, needle.length - 1, lastGoodScoreIdx);
|
|
119
|
+
newScores[0] = scores[0] + 1;
|
|
120
|
+
firstGoodScoreIdx = newScores[0] <= maxGoodScore ? 0 : null;
|
|
121
|
+
lastGoodScoreIdx = newScores[0] <= maxGoodScore ? 0 : -1;
|
|
122
|
+
let needleIdx;
|
|
123
|
+
for (needleIdx = needleIdxStart; needleIdx < needleIdxLimit; needleIdx++) {
|
|
124
|
+
const score = newScores[needleIdx + 1] = Math.min(scores[needleIdx] + +(char !== needle.charCodeAt(needleIdx)), scores[needleIdx + 1] + 1, newScores[needleIdx] + 1);
|
|
125
|
+
if (score <= maxGoodScore) {
|
|
126
|
+
if (firstGoodScoreIdx === null) firstGoodScoreIdx = needleIdx + 1;
|
|
127
|
+
lastGoodScoreIdx = Math.max(lastGoodScoreIdx, needleIdx + 1 + (maxGoodScore - score));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const lastScore = newScores[needleIdx + 1] = Math.min(scores[needleIdx] + +(char !== needle.charCodeAt(needleIdx)), newScores[needleIdx] + 1);
|
|
131
|
+
if (lastScore <= maxGoodScore) {
|
|
132
|
+
if (firstGoodScoreIdx === null) firstGoodScoreIdx = needleIdx + 1;
|
|
133
|
+
lastGoodScoreIdx = needleIdx + 1;
|
|
134
|
+
}
|
|
135
|
+
if (needleIdx === needle.length - 1 && (minScore === null || lastScore <= minScore)) {
|
|
136
|
+
minScore = lastScore;
|
|
137
|
+
minScoreIdx = haystackIdx;
|
|
138
|
+
if (minScore < maxGoodScore) maxGoodScore = minScore;
|
|
139
|
+
}
|
|
140
|
+
[scores, newScores] = [newScores, scores];
|
|
141
|
+
if (firstGoodScoreIdx === null) break;
|
|
142
|
+
}
|
|
143
|
+
if (minScore !== null && minScore <= maxDist) {
|
|
144
|
+
return [minScore, minScoreIdx + 1 + firstDiff];
|
|
145
|
+
} else {
|
|
146
|
+
return [null, null];
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
function* fuzzySearchNgrams(needle, haystack, maxDist) {
|
|
150
|
+
const ngramLen = Math.floor(needle.length / (maxDist + 1));
|
|
151
|
+
const needleLen = needle.length;
|
|
152
|
+
const haystackLen = haystack.length;
|
|
153
|
+
for (let ngramStartIdx = 0; ngramStartIdx <= needle.length - ngramLen; ngramStartIdx += ngramLen) {
|
|
154
|
+
const ngram = needle.slice(ngramStartIdx, ngramStartIdx + ngramLen);
|
|
155
|
+
const ngramEnd = ngramStartIdx + ngramLen;
|
|
156
|
+
const needleBeforeReversed = (0, _utils.reverse)(needle.slice(0, ngramStartIdx));
|
|
157
|
+
const needleAfter = needle.slice(ngramEnd);
|
|
158
|
+
const startIdx = Math.max(0, ngramStartIdx - maxDist);
|
|
159
|
+
const endIdx = Math.min(haystackLen, haystackLen - needleLen + ngramEnd + maxDist);
|
|
160
|
+
for (const haystackMatchIdx of (0, _utils.searchExact)(ngram, haystack, startIdx, endIdx)) {
|
|
161
|
+
const [distRight, rightExpandSize] = _expand(needleAfter, haystack.slice(haystackMatchIdx + ngramLen, haystackMatchIdx - ngramStartIdx + needleLen + maxDist), maxDist);
|
|
162
|
+
if (distRight === null) continue;
|
|
163
|
+
const [distLeft, leftExpandSize] = _expand(needleBeforeReversed, (0, _utils.reverse)(haystack.slice(Math.max(0, haystackMatchIdx - ngramStartIdx - (maxDist - distRight)), haystackMatchIdx)), maxDist - distRight);
|
|
164
|
+
if (distLeft === null) continue;
|
|
165
|
+
yield {
|
|
166
|
+
start: haystackMatchIdx - leftExpandSize,
|
|
167
|
+
end: haystackMatchIdx + ngramLen + rightExpandSize,
|
|
168
|
+
dist: distLeft + distRight
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
function* fuzzySearchCandidates(needle, haystack, maxDist) {
|
|
174
|
+
const debugFlag = false;
|
|
175
|
+
if (debugFlag) console.log(`fuzzySearchCandidates(${needle}, ${haystack}, ${maxDist})`);
|
|
176
|
+
const needleLen = needle.length;
|
|
177
|
+
const haystackLen = haystack.length;
|
|
178
|
+
if (needleLen > haystackLen + maxDist) return;
|
|
179
|
+
const char2needleIdx = makeChar2needleIdx(needle, maxDist);
|
|
180
|
+
let prevCandidates = new Map();
|
|
181
|
+
let candidates = new Map();
|
|
182
|
+
for (let i = 0; i < haystack.length; i++) {
|
|
183
|
+
const haystackChar = haystack[i];
|
|
184
|
+
prevCandidates = candidates;
|
|
185
|
+
candidates = new Map();
|
|
186
|
+
const needleIdx = char2needleIdx[haystackChar];
|
|
187
|
+
if (needleIdx !== undefined) {
|
|
188
|
+
if (needleIdx + 1 === needleLen) {
|
|
189
|
+
if (debugFlag) {
|
|
190
|
+
console.log(`yield ${{
|
|
191
|
+
start: i,
|
|
192
|
+
end: i + 1,
|
|
193
|
+
dist: needleIdx
|
|
194
|
+
}}`);
|
|
195
|
+
}
|
|
196
|
+
yield {
|
|
197
|
+
start: i,
|
|
198
|
+
end: i + 1,
|
|
199
|
+
dist: needleIdx
|
|
200
|
+
};
|
|
201
|
+
} else {
|
|
202
|
+
candidates.set(`${i},${needleIdx + 1},${needleIdx}`, {
|
|
203
|
+
startIdx: i,
|
|
204
|
+
needleIdx: needleIdx + 1,
|
|
205
|
+
dist: needleIdx
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
for (const [, candidate] of prevCandidates) {
|
|
210
|
+
if (needle[candidate.needleIdx] === haystackChar) {
|
|
211
|
+
if (candidate.needleIdx + 1 === needleLen) {
|
|
212
|
+
if (debugFlag) {
|
|
213
|
+
console.log(`yield ${{
|
|
214
|
+
start: candidate.startIdx,
|
|
215
|
+
end: i + 1,
|
|
216
|
+
dist: candidate.dist
|
|
217
|
+
}}`);
|
|
218
|
+
}
|
|
219
|
+
yield {
|
|
220
|
+
start: candidate.startIdx,
|
|
221
|
+
end: i + 1,
|
|
222
|
+
dist: candidate.dist
|
|
223
|
+
};
|
|
224
|
+
} else {
|
|
225
|
+
candidates.set(`${candidate.startIdx},${candidate.needleIdx + 1},${candidate.dist}`, {
|
|
226
|
+
startIdx: candidate.startIdx,
|
|
227
|
+
needleIdx: candidate.needleIdx + 1,
|
|
228
|
+
dist: candidate.dist
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
} else {
|
|
232
|
+
if (candidate.dist === maxDist) continue;
|
|
233
|
+
candidates.set(`${candidate.startIdx},${candidate.needleIdx},${candidate.dist + 1}`, {
|
|
234
|
+
startIdx: candidate.startIdx,
|
|
235
|
+
needleIdx: candidate.needleIdx,
|
|
236
|
+
dist: candidate.dist + 1
|
|
237
|
+
});
|
|
238
|
+
for (let nSkipped = 1; nSkipped <= maxDist - candidate.dist; nSkipped++) {
|
|
239
|
+
if (candidate.needleIdx + nSkipped === needleLen) {
|
|
240
|
+
if (debugFlag) {
|
|
241
|
+
console.log(`yield ${{
|
|
242
|
+
start: candidate.startIdx,
|
|
243
|
+
end: i + 1,
|
|
244
|
+
dist: candidate.dist + nSkipped
|
|
245
|
+
}}`);
|
|
246
|
+
}
|
|
247
|
+
yield {
|
|
248
|
+
start: candidate.startIdx,
|
|
249
|
+
end: i + 1,
|
|
250
|
+
dist: candidate.dist + nSkipped
|
|
251
|
+
};
|
|
252
|
+
break;
|
|
253
|
+
} else if (needle[candidate.needleIdx + nSkipped] === haystackChar) {
|
|
254
|
+
if (candidate.needleIdx + nSkipped + 1 === needleLen) {
|
|
255
|
+
if (debugFlag) {
|
|
256
|
+
console.log(`yield ${{
|
|
257
|
+
start: candidate.startIdx,
|
|
258
|
+
end: i + 1,
|
|
259
|
+
dist: candidate.dist + nSkipped
|
|
260
|
+
}}`);
|
|
261
|
+
}
|
|
262
|
+
yield {
|
|
263
|
+
start: candidate.startIdx,
|
|
264
|
+
end: i + 1,
|
|
265
|
+
dist: candidate.dist + nSkipped
|
|
266
|
+
};
|
|
267
|
+
} else {
|
|
268
|
+
candidates.set(`${candidate.startIdx},${candidate.needleIdx + 1 + nSkipped},${candidate.dist + nSkipped}`, {
|
|
269
|
+
startIdx: candidate.startIdx,
|
|
270
|
+
needleIdx: candidate.needleIdx + 1 + nSkipped,
|
|
271
|
+
dist: candidate.dist + nSkipped
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (i + 1 < haystackLen && candidate.needleIdx + 1 < needleLen) {
|
|
278
|
+
candidates.set(`${candidate.startIdx},${candidate.needleIdx + 1},${candidate.dist + 1}`, {
|
|
279
|
+
startIdx: candidate.startIdx,
|
|
280
|
+
needleIdx: candidate.needleIdx + 1,
|
|
281
|
+
dist: candidate.dist + 1
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
if (debugFlag) console.log(candidates);
|
|
287
|
+
}
|
|
288
|
+
for (const [, candidate] of candidates) {
|
|
289
|
+
candidate.dist += needle.length - candidate.needleIdx;
|
|
290
|
+
if (candidate.dist <= maxDist) {
|
|
291
|
+
yield {
|
|
292
|
+
start: candidate.startIdx,
|
|
293
|
+
end: haystack.length,
|
|
294
|
+
dist: candidate.dist
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.reverse = reverse;
|
|
7
|
+
exports.searchExact = searchExact;
|
|
8
|
+
function* searchExact(needle, haystack, startIndex = 0, endIndex = null) {
|
|
9
|
+
const needleLen = needle.length;
|
|
10
|
+
if (needleLen === 0) return;
|
|
11
|
+
if (endIndex === null) {
|
|
12
|
+
endIndex = haystack.length;
|
|
13
|
+
}
|
|
14
|
+
let index;
|
|
15
|
+
while ((index = haystack.indexOf(needle, startIndex)) > -1) {
|
|
16
|
+
if (index + needle.length > endIndex) break;
|
|
17
|
+
yield index;
|
|
18
|
+
startIndex = index + 1;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
function reverse(string) {
|
|
22
|
+
return string.split("").reverse().join("");
|
|
23
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.getModelProvider = getModelProvider;
|
|
7
|
+
function getModelProvider(modelName) {
|
|
8
|
+
const openaiPrefixes = ["gpt-3", "gpt-4", "o1", "o3", "gpt", "o4"];
|
|
9
|
+
if (openaiPrefixes.some(prefix => modelName.startsWith(prefix))) {
|
|
10
|
+
return "openai";
|
|
11
|
+
} else if (modelName.startsWith("claude")) {
|
|
12
|
+
return "anthropic";
|
|
13
|
+
} else if (modelName.startsWith("gemini")) {
|
|
14
|
+
return "google_vertexai";
|
|
15
|
+
} else {
|
|
16
|
+
return "unknown";
|
|
17
|
+
}
|
|
18
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.getSimplifiedHtml = getSimplifiedHtml;
|
|
7
|
+
async function getSimplifiedHtml(containerHandle, options) {
|
|
8
|
+
const tagName = await containerHandle.evaluateHandle(element => element.tagName.toLowerCase());
|
|
9
|
+
const ALLOWED_ATTRIBUTES = ["aria-label", "data-name", "name", "type", "placeholder", "value", "role", "title", "href", "id", "alt", new RegExp(/^data-/)];
|
|
10
|
+
const shouldReturnFullHtml = (await tagName.jsonValue()) === "html";
|
|
11
|
+
const optionsWithDefault = {
|
|
12
|
+
shouldIncludeOnClick: false,
|
|
13
|
+
shouldIncludeContentAsProp: false,
|
|
14
|
+
keepOnlyVisibleElements: true,
|
|
15
|
+
shouldReturnFullHtml,
|
|
16
|
+
...options
|
|
17
|
+
};
|
|
18
|
+
const simplifiedHtml = await containerHandle.evaluate((element, {
|
|
19
|
+
optionsWithDefault,
|
|
20
|
+
ALLOWED_ATTRIBUTES
|
|
21
|
+
}) => {
|
|
22
|
+
function truthyFilter(value) {
|
|
23
|
+
return Boolean(value);
|
|
24
|
+
}
|
|
25
|
+
function isElementNode(node) {
|
|
26
|
+
return node.nodeType === node.ELEMENT_NODE;
|
|
27
|
+
}
|
|
28
|
+
const hasAnyAllowedAttribute = (element, allowedAttributes) => {
|
|
29
|
+
const elementAttributes = element.getAttributeNames();
|
|
30
|
+
const hasAllowedAttribute = !!allowedAttributes.some(attr => {
|
|
31
|
+
if (typeof attr === "string") {
|
|
32
|
+
return element.hasAttribute(attr);
|
|
33
|
+
} else if (attr instanceof RegExp) {
|
|
34
|
+
return elementAttributes.some(attrName => attr.test(attrName));
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
return hasAllowedAttribute;
|
|
38
|
+
};
|
|
39
|
+
function isElementVisible(style) {
|
|
40
|
+
return style.opacity !== "" && style.display !== "none" && style.visibility !== "hidden" && style.opacity !== "0";
|
|
41
|
+
}
|
|
42
|
+
function isElementInteractive(element, style) {
|
|
43
|
+
return element.tagName === "A" || element.tagName === "INPUT" || element.tagName === "BUTTON" || element.tagName === "SELECT" || element.tagName === "TEXTAREA" || element.hasAttribute("onclick") || element.hasAttribute("onmousedown") || element.hasAttribute("onmouseup") || element.hasAttribute("onkeydown") || element.hasAttribute("onkeyup") || style.cursor === "pointer";
|
|
44
|
+
}
|
|
45
|
+
function getDocumentFromIframeElementSafely(element) {
|
|
46
|
+
try {
|
|
47
|
+
if (element.contentWindow && element.contentWindow.document) {
|
|
48
|
+
return element.contentWindow.document.documentElement;
|
|
49
|
+
}
|
|
50
|
+
} catch (error) {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
function isInputWithValue(element) {
|
|
55
|
+
return element.tagName === "INPUT" && element.value && element.value.trim();
|
|
56
|
+
}
|
|
57
|
+
function generateSimplifiedDom(element, interactiveElements, document, allowedAttributes, shouldIncludeContentAsProp, keepOnlyVisibleElements) {
|
|
58
|
+
var _element$textContent;
|
|
59
|
+
if (element.nodeType === 3 && (_element$textContent = element.textContent) !== null && _element$textContent !== void 0 && _element$textContent.trim()) {
|
|
60
|
+
return document.createTextNode(element.textContent + " ");
|
|
61
|
+
}
|
|
62
|
+
const shouldSkipElementChecks = optionsWithDefault.shouldIncludeIframes ? ["BODY", "HTML", "IFRAME"] : ["BODY", "HTML"].includes(element.nodeName);
|
|
63
|
+
if (!isElementNode(element)) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
const style = window.getComputedStyle(element);
|
|
67
|
+
const isVisible = isElementVisible(style) || shouldSkipElementChecks;
|
|
68
|
+
if (keepOnlyVisibleElements && !isVisible && !isInputWithValue(element)) {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
let children = optionsWithDefault.shouldIncludeIframes && element.nodeName === "IFRAME" ? [getDocumentFromIframeElementSafely(element)].filter(Boolean) : Array.from(element.childNodes).map(c => generateSimplifiedDom(c, interactiveElements, document, allowedAttributes, shouldIncludeContentAsProp, keepOnlyVisibleElements)).filter(truthyFilter);
|
|
72
|
+
if (element.tagName === "BODY") children = children.filter(c => c.nodeType !== 3);
|
|
73
|
+
const interactive = isElementInteractive(element, style) || element.hasAttribute("role");
|
|
74
|
+
const hasLabel = element.hasAttribute("aria-label") || element.hasAttribute("name");
|
|
75
|
+
const hasAllowedAttribute = hasAnyAllowedAttribute(element, allowedAttributes);
|
|
76
|
+
let includeNode = interactive || hasLabel || hasAllowedAttribute || shouldSkipElementChecks;
|
|
77
|
+
if (children.length === 0 && !hasAnyAllowedAttribute(element, allowedAttributes)) {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
if (children.length === 1 && !hasAnyAllowedAttribute(element, allowedAttributes) && children[0].nodeType !== 3 && !(shouldSkipElementChecks && optionsWithDefault.shouldReturnFullHtml)) {
|
|
81
|
+
return children[0];
|
|
82
|
+
}
|
|
83
|
+
if (!includeNode && children.length === 0) return null;
|
|
84
|
+
if (!includeNode && children.some(c => c.nodeType === 3)) {
|
|
85
|
+
includeNode = true;
|
|
86
|
+
}
|
|
87
|
+
if (!includeNode && children.length === 1) {
|
|
88
|
+
return children[0];
|
|
89
|
+
}
|
|
90
|
+
const container = element.cloneNode();
|
|
91
|
+
const allAttributes = element.getAttributeNames();
|
|
92
|
+
const listOfAttributesToRemove = allAttributes.filter(attr => {
|
|
93
|
+
const isAllowedString = allowedAttributes.includes(attr);
|
|
94
|
+
const isAllowedRegExp = allowedAttributes.some(regex => regex instanceof RegExp && regex.test(attr));
|
|
95
|
+
return !isAllowedString && !isAllowedRegExp;
|
|
96
|
+
});
|
|
97
|
+
for (const attr of listOfAttributesToRemove) {
|
|
98
|
+
container.removeAttribute(attr);
|
|
99
|
+
}
|
|
100
|
+
if (interactive) {
|
|
101
|
+
interactiveElements.push(element);
|
|
102
|
+
}
|
|
103
|
+
if (shouldIncludeContentAsProp && element.textContent) {
|
|
104
|
+
container.setAttribute("content", element.textContent);
|
|
105
|
+
}
|
|
106
|
+
children.forEach(child => container.appendChild(child));
|
|
107
|
+
return container;
|
|
108
|
+
}
|
|
109
|
+
function getSimplifiedDomFromElement(htmlElement, shouldIncludeOnClick, shouldIncludeContentAsProp, keepOnlyVisibleElements) {
|
|
110
|
+
const interactiveElements = [];
|
|
111
|
+
const allowedAttributes = shouldIncludeOnClick ? [...ALLOWED_ATTRIBUTES, "onclick"] : ALLOWED_ATTRIBUTES;
|
|
112
|
+
const simplifiedDom = generateSimplifiedDom(htmlElement, interactiveElements, htmlElement.ownerDocument, allowedAttributes, shouldIncludeContentAsProp, keepOnlyVisibleElements);
|
|
113
|
+
if (!simplifiedDom) return "";
|
|
114
|
+
return simplifiedDom.outerHTML;
|
|
115
|
+
}
|
|
116
|
+
return getSimplifiedDomFromElement(element, optionsWithDefault.shouldIncludeOnClick, optionsWithDefault.shouldIncludeContentAsProp, optionsWithDefault.keepOnlyVisibleElements);
|
|
117
|
+
}, {
|
|
118
|
+
optionsWithDefault,
|
|
119
|
+
ALLOWED_ATTRIBUTES
|
|
120
|
+
});
|
|
121
|
+
return simplifiedHtml;
|
|
122
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.hashObject = hashObject;
|
|
7
|
+
var crypto = _interopRequireWildcard(require("crypto"));
|
|
8
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
9
|
+
function _serialize(obj, treatArraysAsUnsortedLists) {
|
|
10
|
+
if (obj instanceof Map) {
|
|
11
|
+
return _serialize(Array.from(obj.entries()), treatArraysAsUnsortedLists);
|
|
12
|
+
} else if (Array.isArray(obj)) {
|
|
13
|
+
let serializedSortedArray = obj.map(el => _serialize(el, treatArraysAsUnsortedLists));
|
|
14
|
+
if (treatArraysAsUnsortedLists) {
|
|
15
|
+
serializedSortedArray = serializedSortedArray.sort();
|
|
16
|
+
}
|
|
17
|
+
return `[${serializedSortedArray.join(",")}]`;
|
|
18
|
+
} else if (typeof obj === "object" && obj !== null) {
|
|
19
|
+
let acc = "";
|
|
20
|
+
const keys = Object.keys(obj).sort();
|
|
21
|
+
acc += `{${JSON.stringify(keys)}`;
|
|
22
|
+
for (let i = 0; i < keys.length; i++) {
|
|
23
|
+
acc += `${_serialize(obj[keys[i]], treatArraysAsUnsortedLists)},`;
|
|
24
|
+
}
|
|
25
|
+
return `${acc}}`;
|
|
26
|
+
}
|
|
27
|
+
return `${JSON.stringify(obj)}`;
|
|
28
|
+
}
|
|
29
|
+
function hashObject(obj, treatArraysAsUnsortedLists = false, hashAlgorithm = "SHA256", encoding = "hex") {
|
|
30
|
+
const hash = crypto.createHash(hashAlgorithm);
|
|
31
|
+
return hash.update(_serialize(obj, treatArraysAsUnsortedLists)).digest(encoding);
|
|
32
|
+
}
|