@intuned/browser-dev 0.1.8-dev.0 → 0.1.10-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +85 -143
  2. package/dist/ai/export.d.ts +291 -143
  3. package/dist/ai/extractStructuredData.js +21 -27
  4. package/dist/ai/extractStructuredDataUsingAi.js +24 -1
  5. package/dist/ai/index.d.ts +291 -143
  6. package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
  7. package/dist/ai/tests/testExtractStructuredData.spec.js +348 -2
  8. package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
  9. package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
  10. package/dist/ai/tests/testMatching.spec.js +342 -0
  11. package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
  12. package/dist/common/Logger/index.js +2 -2
  13. package/dist/common/extendedTest.js +38 -30
  14. package/dist/common/frame_utils/frameTree.js +116 -0
  15. package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
  16. package/dist/common/frame_utils/index.js +95 -0
  17. package/dist/common/frame_utils/stitchIframe.js +105 -0
  18. package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
  19. package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
  20. package/dist/common/frame_utils/utils.js +91 -0
  21. package/dist/common/getSimplifiedHtml.js +20 -20
  22. package/dist/common/matching/matching.js +91 -16
  23. package/dist/common/tests/matching.test.js +225 -0
  24. package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
  25. package/dist/helpers/export.d.ts +702 -575
  26. package/dist/helpers/extractMarkdown.js +16 -7
  27. package/dist/helpers/index.d.ts +702 -575
  28. package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
  29. package/dist/helpers/waitForDomSettled.js +4 -4
  30. package/dist/helpers/withNetworkSettledWait.js +2 -7
  31. package/dist/optimized-extractors/export.d.ts +17 -18
  32. package/dist/optimized-extractors/index.d.ts +17 -18
  33. package/dist/types/intuned-runtime.d.ts +6 -32
  34. package/how-to-generate-docs.md +40 -28
  35. package/package.json +2 -2
  36. package/dist/helpers/frame_utils/constants.js +0 -8
  37. package/dist/helpers/frame_utils/findAllIframes.js +0 -82
  38. package/dist/helpers/frame_utils/index.js +0 -44
  39. /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
  40. /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
@@ -15,7 +15,8 @@ var _getSimplifiedHtml = require("../common/getSimplifiedHtml");
15
15
  var _hashObject = require("../common/hashObject");
16
16
  var _Logger = require("../common/Logger");
17
17
  var _helpers = require("../helpers");
18
- var _xpathMapping = require("../common/xpathMapping");
18
+ var _frame_utils = require("../common/frame_utils");
19
+ var _matching = require("../common/matching/matching");
19
20
  const extractStructuredData = async options => {
20
21
  if ("content" in options && !("source" in options)) {
21
22
  return await extractStructuredDataFromContent(options);
@@ -57,11 +58,7 @@ const extractStructuredData = async options => {
57
58
  }
58
59
  let cacheKey = "";
59
60
  if (validatedData.strategy === "HTML") {
60
- const containerHandle = isPageInput ? await pageOrLocator.locator("html").elementHandle() : await pageOrLocator.elementHandle();
61
- if (!containerHandle) {
62
- throw new Error("No HTML content found in the specified region.");
63
- }
64
- const simplifiedHtml = await (0, _getSimplifiedHtml.getSimplifiedHtml)(containerHandle);
61
+ const simplifiedHtml = await (0, _frame_utils.getContentWithNestedIframes)(pageOrLocator, 10000, _getSimplifiedHtml.getSimplifiedHtml);
65
62
  if (validatedData.enableCache) {
66
63
  cacheKey = (0, _hashObject.hashObject)({
67
64
  pageUrl: pageObject.url(),
@@ -76,7 +73,7 @@ const extractStructuredData = async options => {
76
73
  }, true);
77
74
  const cachedResult = await _cache.cache.get(cacheKey);
78
75
  if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
79
- const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedResult.matchesMapping);
76
+ const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
80
77
  if (isValid) {
81
78
  _Logger.logger.info("Returning cached result with valid DOM matching");
82
79
  return cachedResult.result;
@@ -106,9 +103,10 @@ const extractStructuredData = async options => {
106
103
  if (!validatedData.enableDomMatching) {
107
104
  await _cache.cache.set(cacheKey, result.value.result);
108
105
  } else {
106
+ const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
109
107
  const resultsToCache = {
110
108
  result: result.value.result,
111
- matchesMapping: result.value.xpathMapping || {}
109
+ matchesMapping: domValidationHash
112
110
  };
113
111
  await _cache.cache.set(cacheKey, resultsToCache);
114
112
  }
@@ -132,7 +130,7 @@ const extractStructuredData = async options => {
132
130
  }, true);
133
131
  const cachedResult = await _cache.cache.get(cacheKey);
134
132
  if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
135
- const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedResult.matchesMapping);
133
+ const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
136
134
  if (isValid) {
137
135
  _Logger.logger.info("Returning cached result with valid DOM matching");
138
136
  return cachedResult.result;
@@ -168,9 +166,10 @@ const extractStructuredData = async options => {
168
166
  if (!validatedData.enableDomMatching) {
169
167
  await _cache.cache.set(cacheKey, result.value.result);
170
168
  } else {
169
+ const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
171
170
  const resultsToCache = {
172
171
  result: result.value.result,
173
- matchesMapping: result.value.xpathMapping || {}
172
+ matchesMapping: domValidationHash
174
173
  };
175
174
  await _cache.cache.set(cacheKey, resultsToCache);
176
175
  }
@@ -178,13 +177,8 @@ const extractStructuredData = async options => {
178
177
  return result.value.result;
179
178
  }
180
179
  if (validatedData.strategy === "MARKDOWN") {
181
- const containerHandle = isPageInput ? await pageOrLocator.locator("html").elementHandle() : await pageOrLocator.elementHandle();
182
- const html = await (containerHandle === null || containerHandle === void 0 ? void 0 : containerHandle.innerHTML());
183
- if (!html) {
184
- throw new Error("No HTML content found in the specified region.");
185
- }
186
180
  const markdown = await (0, _helpers.extractMarkdown)({
187
- source: pageObject
181
+ source: pageOrLocator
188
182
  });
189
183
  if (validatedData.enableCache) {
190
184
  cacheKey = (0, _hashObject.hashObject)({
@@ -200,14 +194,13 @@ const extractStructuredData = async options => {
200
194
  })
201
195
  }, true);
202
196
  const cachedResult = await _cache.cache.get(cacheKey);
203
- if (enableDomMatching && cachedResult && cachedResult.matchesMapping) {
204
- const cachedXpathMapping = cachedResult.matchesMapping;
205
- const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedXpathMapping);
197
+ if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
198
+ const isValid = await (0, _matching.validateMatchesMapping)(pageObject, cachedResult.matchesMapping);
206
199
  if (isValid) {
207
200
  _Logger.logger.info("Returning cached result with valid DOM matching");
208
201
  return cachedResult.result;
209
202
  }
210
- } else if (cachedResult && !enableDomMatching) {
203
+ } else if (cachedResult && !validatedData.enableDomMatching) {
211
204
  _Logger.logger.info("Returning cached result");
212
205
  return cachedResult;
213
206
  }
@@ -229,15 +222,16 @@ const extractStructuredData = async options => {
229
222
  throw new Error(result.error.context);
230
223
  }
231
224
  if (validatedData.enableCache) {
232
- if (!enableDomMatching) {
225
+ if (!validatedData.enableDomMatching) {
233
226
  await _cache.cache.set(cacheKey, result.value.result);
234
- return result.value.result;
227
+ } else {
228
+ const domValidationHash = await (0, _matching.createMatchesMapping)(pageObject, result.value.result);
229
+ const resultsToCache = {
230
+ result: result.value.result,
231
+ matchesMapping: domValidationHash
232
+ };
233
+ await _cache.cache.set(cacheKey, resultsToCache);
235
234
  }
236
- const resultsToCache = {
237
- result: result.value.result,
238
- matchesMapping: result.value.xpathMapping || {}
239
- };
240
- await _cache.cache.set(cacheKey, resultsToCache);
241
235
  }
242
236
  return result.value.result;
243
237
  }
@@ -154,7 +154,30 @@ async function extractStructuredDataUsingAi(input) {
154
154
  });
155
155
  _Logger.logger.info(`Extraction failed,
156
156
  Total LLM ${isGateway ? "Cost In Cents" : "Tokens"}: ${accumulatedTokens}`);
157
- return (0, _neverthrow.err)(Errors.invalidExtractionResult(error instanceof Error ? error.message : "Unknown error during extraction"));
157
+ let errorMessage = "Unknown error during extraction";
158
+ if (error instanceof Error) {
159
+ errorMessage = error.message;
160
+ const apiError = error;
161
+ if (apiError.responseBody) {
162
+ try {
163
+ const responseBody = JSON.parse(apiError.responseBody);
164
+ if (responseBody.error) {
165
+ if (typeof responseBody.error === "string") {
166
+ errorMessage = responseBody.error;
167
+ } else if (responseBody.error.message) {
168
+ errorMessage = responseBody.error.message;
169
+ } else {
170
+ errorMessage = JSON.stringify(responseBody.error);
171
+ }
172
+ }
173
+ } catch {
174
+ if (typeof apiError.responseBody === "string") {
175
+ errorMessage = apiError.responseBody;
176
+ }
177
+ }
178
+ }
179
+ }
180
+ return (0, _neverthrow.err)(Errors.invalidExtractionResult(errorMessage));
158
181
  }
159
182
  }
160
183
  _Logger.logger.info(`Extraction failed.