@intuned/browser-dev 0.1.8-dev.0 → 0.1.10-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -143
- package/dist/ai/export.d.ts +291 -143
- package/dist/ai/extractStructuredData.js +21 -27
- package/dist/ai/extractStructuredDataUsingAi.js +24 -1
- package/dist/ai/index.d.ts +291 -143
- package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +348 -2
- package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
- package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
- package/dist/ai/tests/testMatching.spec.js +342 -0
- package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
- package/dist/common/Logger/index.js +2 -2
- package/dist/common/extendedTest.js +38 -30
- package/dist/common/frame_utils/frameTree.js +116 -0
- package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
- package/dist/common/frame_utils/index.js +95 -0
- package/dist/common/frame_utils/stitchIframe.js +105 -0
- package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
- package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
- package/dist/common/frame_utils/utils.js +91 -0
- package/dist/common/getSimplifiedHtml.js +20 -20
- package/dist/common/matching/matching.js +91 -16
- package/dist/common/tests/matching.test.js +225 -0
- package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
- package/dist/helpers/export.d.ts +702 -575
- package/dist/helpers/extractMarkdown.js +16 -7
- package/dist/helpers/index.d.ts +702 -575
- package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
- package/dist/helpers/waitForDomSettled.js +4 -4
- package/dist/helpers/withNetworkSettledWait.js +2 -7
- package/dist/optimized-extractors/export.d.ts +17 -18
- package/dist/optimized-extractors/index.d.ts +17 -18
- package/dist/types/intuned-runtime.d.ts +6 -32
- package/how-to-generate-docs.md +40 -28
- package/package.json +2 -2
- package/dist/helpers/frame_utils/constants.js +0 -8
- package/dist/helpers/frame_utils/findAllIframes.js +0 -82
- package/dist/helpers/frame_utils/index.js +0 -44
- /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
- /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
|
@@ -15,7 +15,8 @@ var _getSimplifiedHtml = require("../common/getSimplifiedHtml");
|
|
|
15
15
|
var _hashObject = require("../common/hashObject");
|
|
16
16
|
var _Logger = require("../common/Logger");
|
|
17
17
|
var _helpers = require("../helpers");
|
|
18
|
-
var
|
|
18
|
+
var _frame_utils = require("../common/frame_utils");
|
|
19
|
+
var _matching = require("../common/matching/matching");
|
|
19
20
|
const extractStructuredData = async options => {
|
|
20
21
|
if ("content" in options && !("source" in options)) {
|
|
21
22
|
return await extractStructuredDataFromContent(options);
|
|
@@ -57,11 +58,7 @@ const extractStructuredData = async options => {
|
|
|
57
58
|
}
|
|
58
59
|
let cacheKey = "";
|
|
59
60
|
if (validatedData.strategy === "HTML") {
|
|
60
|
-
const
|
|
61
|
-
if (!containerHandle) {
|
|
62
|
-
throw new Error("No HTML content found in the specified region.");
|
|
63
|
-
}
|
|
64
|
-
const simplifiedHtml = await (0, _getSimplifiedHtml.getSimplifiedHtml)(containerHandle);
|
|
61
|
+
const simplifiedHtml = await (0, _frame_utils.getContentWithNestedIframes)(pageOrLocator, 10000, _getSimplifiedHtml.getSimplifiedHtml);
|
|
65
62
|
if (validatedData.enableCache) {
|
|
66
63
|
cacheKey = (0, _hashObject.hashObject)({
|
|
67
64
|
pageUrl: pageObject.url(),
|
|
@@ -76,7 +73,7 @@ const extractStructuredData = async options => {
|
|
|
76
73
|
}, true);
|
|
77
74
|
const cachedResult = await _cache.cache.get(cacheKey);
|
|
78
75
|
if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
|
|
79
|
-
const isValid = await (0,
|
|
76
|
+
const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
|
|
80
77
|
if (isValid) {
|
|
81
78
|
_Logger.logger.info("Returning cached result with valid DOM matching");
|
|
82
79
|
return cachedResult.result;
|
|
@@ -106,9 +103,10 @@ const extractStructuredData = async options => {
|
|
|
106
103
|
if (!validatedData.enableDomMatching) {
|
|
107
104
|
await _cache.cache.set(cacheKey, result.value.result);
|
|
108
105
|
} else {
|
|
106
|
+
const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
|
|
109
107
|
const resultsToCache = {
|
|
110
108
|
result: result.value.result,
|
|
111
|
-
matchesMapping:
|
|
109
|
+
matchesMapping: domValidationHash
|
|
112
110
|
};
|
|
113
111
|
await _cache.cache.set(cacheKey, resultsToCache);
|
|
114
112
|
}
|
|
@@ -132,7 +130,7 @@ const extractStructuredData = async options => {
|
|
|
132
130
|
}, true);
|
|
133
131
|
const cachedResult = await _cache.cache.get(cacheKey);
|
|
134
132
|
if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
|
|
135
|
-
const isValid = await (0,
|
|
133
|
+
const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
|
|
136
134
|
if (isValid) {
|
|
137
135
|
_Logger.logger.info("Returning cached result with valid DOM matching");
|
|
138
136
|
return cachedResult.result;
|
|
@@ -168,9 +166,10 @@ const extractStructuredData = async options => {
|
|
|
168
166
|
if (!validatedData.enableDomMatching) {
|
|
169
167
|
await _cache.cache.set(cacheKey, result.value.result);
|
|
170
168
|
} else {
|
|
169
|
+
const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
|
|
171
170
|
const resultsToCache = {
|
|
172
171
|
result: result.value.result,
|
|
173
|
-
matchesMapping:
|
|
172
|
+
matchesMapping: domValidationHash
|
|
174
173
|
};
|
|
175
174
|
await _cache.cache.set(cacheKey, resultsToCache);
|
|
176
175
|
}
|
|
@@ -178,13 +177,8 @@ const extractStructuredData = async options => {
|
|
|
178
177
|
return result.value.result;
|
|
179
178
|
}
|
|
180
179
|
if (validatedData.strategy === "MARKDOWN") {
|
|
181
|
-
const containerHandle = isPageInput ? await pageOrLocator.locator("html").elementHandle() : await pageOrLocator.elementHandle();
|
|
182
|
-
const html = await (containerHandle === null || containerHandle === void 0 ? void 0 : containerHandle.innerHTML());
|
|
183
|
-
if (!html) {
|
|
184
|
-
throw new Error("No HTML content found in the specified region.");
|
|
185
|
-
}
|
|
186
180
|
const markdown = await (0, _helpers.extractMarkdown)({
|
|
187
|
-
source:
|
|
181
|
+
source: pageOrLocator
|
|
188
182
|
});
|
|
189
183
|
if (validatedData.enableCache) {
|
|
190
184
|
cacheKey = (0, _hashObject.hashObject)({
|
|
@@ -200,14 +194,13 @@ const extractStructuredData = async options => {
|
|
|
200
194
|
})
|
|
201
195
|
}, true);
|
|
202
196
|
const cachedResult = await _cache.cache.get(cacheKey);
|
|
203
|
-
if (enableDomMatching && cachedResult && cachedResult.matchesMapping) {
|
|
204
|
-
const
|
|
205
|
-
const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedXpathMapping);
|
|
197
|
+
if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
|
|
198
|
+
const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
|
|
206
199
|
if (isValid) {
|
|
207
200
|
_Logger.logger.info("Returning cached result with valid DOM matching");
|
|
208
201
|
return cachedResult.result;
|
|
209
202
|
}
|
|
210
|
-
} else if (cachedResult && !enableDomMatching) {
|
|
203
|
+
} else if (cachedResult && !validatedData.enableDomMatching) {
|
|
211
204
|
_Logger.logger.info("Returning cached result");
|
|
212
205
|
return cachedResult;
|
|
213
206
|
}
|
|
@@ -229,15 +222,16 @@ const extractStructuredData = async options => {
|
|
|
229
222
|
throw new Error(result.error.context);
|
|
230
223
|
}
|
|
231
224
|
if (validatedData.enableCache) {
|
|
232
|
-
if (!enableDomMatching) {
|
|
225
|
+
if (!validatedData.enableDomMatching) {
|
|
233
226
|
await _cache.cache.set(cacheKey, result.value.result);
|
|
234
|
-
|
|
227
|
+
} else {
|
|
228
|
+
const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
|
|
229
|
+
const resultsToCache = {
|
|
230
|
+
result: result.value.result,
|
|
231
|
+
matchesMapping: domValidationHash
|
|
232
|
+
};
|
|
233
|
+
await _cache.cache.set(cacheKey, resultsToCache);
|
|
235
234
|
}
|
|
236
|
-
const resultsToCache = {
|
|
237
|
-
result: result.value.result,
|
|
238
|
-
matchesMapping: result.value.xpathMapping || {}
|
|
239
|
-
};
|
|
240
|
-
await _cache.cache.set(cacheKey, resultsToCache);
|
|
241
235
|
}
|
|
242
236
|
return result.value.result;
|
|
243
237
|
}
|
|
@@ -154,7 +154,30 @@ async function extractStructuredDataUsingAi(input) {
|
|
|
154
154
|
});
|
|
155
155
|
_Logger.logger.info(`Extraction failed,
|
|
156
156
|
Total LLM ${isGateway ? "Cost In Cents" : "Tokens"}: ${accumulatedTokens}`);
|
|
157
|
-
|
|
157
|
+
let errorMessage = "Unknown error during extraction";
|
|
158
|
+
if (error instanceof Error) {
|
|
159
|
+
errorMessage = error.message;
|
|
160
|
+
const apiError = error;
|
|
161
|
+
if (apiError.responseBody) {
|
|
162
|
+
try {
|
|
163
|
+
const responseBody = JSON.parse(apiError.responseBody);
|
|
164
|
+
if (responseBody.error) {
|
|
165
|
+
if (typeof responseBody.error === "string") {
|
|
166
|
+
errorMessage = responseBody.error;
|
|
167
|
+
} else if (responseBody.error.message) {
|
|
168
|
+
errorMessage = responseBody.error.message;
|
|
169
|
+
} else {
|
|
170
|
+
errorMessage = JSON.stringify(responseBody.error);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
} catch {
|
|
174
|
+
if (typeof apiError.responseBody === "string") {
|
|
175
|
+
errorMessage = apiError.responseBody;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult(errorMessage));
|
|
158
181
|
}
|
|
159
182
|
}
|
|
160
183
|
_Logger.logger.info(`Extraction failed.
|