@intuned/browser-dev 2.2.3-unify-sdks.21 → 2.2.3-unify-sdks.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/export.d.js +3 -1
- package/dist/ai/export.d.ts +76 -53
- package/dist/ai/extractStructuredDataUsingAi.js +12 -8
- package/dist/ai/index.d.ts +76 -53
- package/dist/ai/isPageLoaded.js +5 -0
- package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +2 -4
- package/dist/ai/validators.js +8 -6
- package/dist/helpers/downloadFile.js +2 -2
- package/dist/helpers/export.d.ts +135 -132
- package/dist/helpers/gotoUrl.js +3 -4
- package/dist/helpers/index.d.ts +135 -132
- package/dist/helpers/sanitizeHtml.js +5 -4
- package/dist/helpers/scrollToLoadContent.js +1 -1
- package/dist/helpers/tests/testExtractMarkdown.spec.js +4 -6
- package/dist/playwright/export.d.js +5 -0
- package/dist/playwright/export.d.ts +220 -0
- package/dist/playwright/index.d.ts +220 -0
- package/dist/playwright/index.js +18 -0
- package/dist/playwright/staticExtractors/extractHelpers.js +170 -0
- package/dist/playwright/staticExtractors/getArrayUsingArrayExtractor.js +84 -0
- package/dist/playwright/staticExtractors/getObjectUsingObjectExtractor.js +45 -0
- package/dist/playwright/staticExtractors/index.js +37 -0
- package/dist/playwright/staticExtractors/types.js +26 -0
- package/package.json +8 -2
package/dist/ai/export.d.js
CHANGED
package/dist/ai/export.d.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { Locator, Page } from "playwright-core";
|
|
2
|
-
import { ObjectSchema } from "./jsonSchema";
|
|
3
2
|
import { JSONSchema7TypeName } from "json-schema";
|
|
4
3
|
|
|
5
4
|
/**
|
|
@@ -278,62 +277,85 @@ export declare function extractStructuredData(options: {
|
|
|
278
277
|
maxRetries?: number;
|
|
279
278
|
}): Promise<any>;
|
|
280
279
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
280
|
+
type SUPPORTED_CLAUDE_MODELS =
|
|
281
|
+
| "claude-3-5-haiku-20241022"
|
|
282
|
+
| "claude-3-5-haiku-latest"
|
|
283
|
+
| "claude-3-5-sonnet-20240620"
|
|
284
|
+
| "claude-3-5-sonnet-20241022"
|
|
285
|
+
| "claude-3-5-sonnet-latest"
|
|
286
|
+
| "claude-3-7-sonnet-20250219"
|
|
287
|
+
| "claude-3-7-sonnet-latest"
|
|
288
|
+
| "claude-3-haiku-20240307"
|
|
289
|
+
| "claude-4-opus-20250514"
|
|
290
|
+
| "claude-4-sonnet-20250514"
|
|
291
|
+
| "claude-opus-4-1"
|
|
288
292
|
| "claude-opus-4-1-20250805"
|
|
289
293
|
| "claude-opus-4-20250514"
|
|
290
|
-
| "claude-sonnet-4-20250514"
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
| "claude-3-5-haiku-latest"
|
|
294
|
-
| "gpt-5"
|
|
295
|
-
| "gpt-5-mini"
|
|
296
|
-
| "gpt-5-nano"
|
|
297
|
-
| "gpt-5-chat"
|
|
298
|
-
| "gpt-5-chat-latest"
|
|
299
|
-
| "gpt-5-2025-08-07"
|
|
300
|
-
| "gpt-5-mini-2025-08-07"
|
|
301
|
-
| "gpt-5-nano-2025-08-07"
|
|
302
|
-
| "gpt-4.1"
|
|
303
|
-
| "gpt-4.1-mini"
|
|
304
|
-
| "gpt-4.1-nano"
|
|
305
|
-
| "o4-mini"
|
|
306
|
-
| "o3-mini"
|
|
307
|
-
| "o3"
|
|
308
|
-
| "o1-mini"
|
|
309
|
-
| "o1-preview"
|
|
310
|
-
| "gpt-4o-mini"
|
|
311
|
-
| "gpt-4o-mini-2024-07-18"
|
|
312
|
-
| "gpt-4o"
|
|
313
|
-
| "gpt-4o-2024-08-06"
|
|
314
|
-
| "gpt-4o-2024-05-13"
|
|
315
|
-
| "gpt-4o-2024-05-13"
|
|
316
|
-
| "gpt-4-turbo"
|
|
317
|
-
| "gpt-4-turbo-preview"
|
|
318
|
-
| "gpt-4-0125-preview"
|
|
319
|
-
| "gpt-4-1106-preview"
|
|
320
|
-
| "gpt-3.5-turbo-1106"
|
|
294
|
+
| "claude-sonnet-4-20250514";
|
|
295
|
+
|
|
296
|
+
type SUPPORTED_OPENAI_MODELS =
|
|
321
297
|
| "gpt-3.5-turbo"
|
|
298
|
+
| "gpt-3.5-turbo-0125"
|
|
322
299
|
| "gpt-3.5-turbo-0301"
|
|
323
300
|
| "gpt-3.5-turbo-0613"
|
|
301
|
+
| "gpt-3.5-turbo-1106"
|
|
324
302
|
| "gpt-3.5-turbo-16k"
|
|
325
303
|
| "gpt-3.5-turbo-16k-0613"
|
|
304
|
+
| "gpt-3.5-turbo-instruct"
|
|
305
|
+
| "gpt-3.5-turbo-instruct-0914"
|
|
326
306
|
| "gpt-4"
|
|
327
307
|
| "gpt-4-0314"
|
|
328
308
|
| "gpt-4-0613"
|
|
329
309
|
| "gpt-4-32k"
|
|
330
310
|
| "gpt-4-32k-0314"
|
|
331
311
|
| "gpt-4-32k-0613"
|
|
332
|
-
| "
|
|
333
|
-
| "
|
|
334
|
-
| "
|
|
335
|
-
| "
|
|
336
|
-
| "
|
|
312
|
+
| "gpt-4-turbo"
|
|
313
|
+
| "gpt-4-turbo-2024-04-09"
|
|
314
|
+
| "gpt-4.1"
|
|
315
|
+
| "gpt-4.1-2025-04-14"
|
|
316
|
+
| "gpt-4.1-mini"
|
|
317
|
+
| "gpt-4.1-mini-2025-04-14"
|
|
318
|
+
| "gpt-4.1-nano"
|
|
319
|
+
| "gpt-4.1-nano-2025-04-14"
|
|
320
|
+
| "gpt-4o"
|
|
321
|
+
| "gpt-4o-2024-05-13"
|
|
322
|
+
| "gpt-4o-2024-08-06"
|
|
323
|
+
| "gpt-4o-2024-11-20"
|
|
324
|
+
| "gpt-4o-mini"
|
|
325
|
+
| "gpt-4o-mini-2024-07-18"
|
|
326
|
+
| "gpt-5"
|
|
327
|
+
| "gpt-5-2025-08-07"
|
|
328
|
+
| "gpt-5-chat"
|
|
329
|
+
| "gpt-5-chat-latest"
|
|
330
|
+
| "gpt-5-mini"
|
|
331
|
+
| "gpt-5-mini-2025-08-07"
|
|
332
|
+
| "gpt-5-nano"
|
|
333
|
+
| "gpt-5-nano-2025-08-07"
|
|
334
|
+
| "o1"
|
|
335
|
+
| "o1-2024-12-17"
|
|
336
|
+
| "o1-mini"
|
|
337
|
+
| "o1-mini-2024-09-12"
|
|
338
|
+
| "o1-pro"
|
|
339
|
+
| "o1-pro-2025-03-19"
|
|
340
|
+
| "o3"
|
|
341
|
+
| "o3-2025-04-16"
|
|
342
|
+
| "o3-deep-research"
|
|
343
|
+
| "o3-deep-research-2025-06-26"
|
|
344
|
+
| "o3-mini"
|
|
345
|
+
| "o3-mini-2025-01-31"
|
|
346
|
+
| "o3-pro"
|
|
347
|
+
| "o3-pro-2025-06-10"
|
|
348
|
+
| "o4-mini"
|
|
349
|
+
| "o4-mini-2025-04-16"
|
|
350
|
+
| "o4-mini-deep-research"
|
|
351
|
+
| "o4-mini-deep-research-2025-06-26";
|
|
352
|
+
/**
|
|
353
|
+
* This type defines the supported AI models for data extraction.
|
|
354
|
+
* It includes models from OpenAI, Anthropic, and Google Gemini.
|
|
355
|
+
* The models are used in the extraction strategies to process and analyze the content of web pages or elements.
|
|
356
|
+
* @type SUPPORTED_MODELS
|
|
357
|
+
*/
|
|
358
|
+
type SUPPORTED_MODELS = SUPPORTED_CLAUDE_MODELS | SUPPORTED_OPENAI_MODELS;
|
|
337
359
|
|
|
338
360
|
/**
|
|
339
361
|
* Represents a JSON Schema definition for validating data structures.
|
|
@@ -394,13 +416,6 @@ type SUPPORTED_MODELS =
|
|
|
394
416
|
* };
|
|
395
417
|
* ```
|
|
396
418
|
*/
|
|
397
|
-
export type JSONSchema =
|
|
398
|
-
| StringSchema
|
|
399
|
-
| NumberSchema
|
|
400
|
-
| BooleanSchema
|
|
401
|
-
| ArraySchema
|
|
402
|
-
| ObjectSchema
|
|
403
|
-
| BaseSchema;
|
|
404
419
|
|
|
405
420
|
/**
|
|
406
421
|
* @interface HTMLStrategy
|
|
@@ -592,7 +607,7 @@ export interface HtmlStrategy {
|
|
|
592
607
|
*
|
|
593
608
|
* @param {Page} page - The Playwright page to check
|
|
594
609
|
* @param {Object} [options] - Optional configuration object
|
|
595
|
-
* @param {
|
|
610
|
+
* @param {SUPPORTED_MODELS} [options.model="gpt-4o-2024-08-06"] - [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) to use for the AI Check. default gpt-4o-2024-08-06
|
|
596
611
|
* @param {number} [options.timeoutInMs=10000] - Screenshot timeout in milliseconds
|
|
597
612
|
* @param {string} [options.apiKey] - Optional API key for the AI service
|
|
598
613
|
* @returns {Promise.<{status: LoadingStatus, reason: (string|null|undefined), cost: (number|undefined)}>}
|
|
@@ -636,7 +651,7 @@ export declare function isPageLoaded(
|
|
|
636
651
|
page: Page,
|
|
637
652
|
options?: {
|
|
638
653
|
timeoutInMs?: number;
|
|
639
|
-
model?:
|
|
654
|
+
model?: SUPPORTED_MODELS;
|
|
640
655
|
apiKey?: string;
|
|
641
656
|
}
|
|
642
657
|
): Promise<{
|
|
@@ -649,3 +664,11 @@ export declare function isPageLoaded(
|
|
|
649
664
|
* LoadingStatus is a union of true, false, and "Dont know".
|
|
650
665
|
*/
|
|
651
666
|
export type LoadingStatus = true | false | "Dont know";
|
|
667
|
+
|
|
668
|
+
export type JSONSchema =
|
|
669
|
+
| StringSchema
|
|
670
|
+
| NumberSchema
|
|
671
|
+
| BooleanSchema
|
|
672
|
+
| ArraySchema
|
|
673
|
+
| ObjectSchema
|
|
674
|
+
| BaseSchema;
|
|
@@ -19,7 +19,7 @@ var _ai = require("ai");
|
|
|
19
19
|
var _loadRuntime = require("../common/loadRuntime");
|
|
20
20
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
21
21
|
async function extractStructuredDataUsingAi(page, input) {
|
|
22
|
-
var _getExecutionContext, _getExecutionContext2, _getExecutionContext3;
|
|
22
|
+
var _getExecutionContext, _getExecutionContext2, _getExecutionContext3, _result$usage6;
|
|
23
23
|
const {
|
|
24
24
|
apiKey,
|
|
25
25
|
enableDomMatching,
|
|
@@ -30,7 +30,7 @@ async function extractStructuredDataUsingAi(page, input) {
|
|
|
30
30
|
images,
|
|
31
31
|
maxRetries = 3
|
|
32
32
|
} = input;
|
|
33
|
-
let
|
|
33
|
+
let accumulatedTokens = 0;
|
|
34
34
|
const getExecutionContext = await (0, _loadRuntime.loadRuntime)();
|
|
35
35
|
const toolName = `extract_data`;
|
|
36
36
|
const headers = (0, _getAiTrackingHeaders.getAiTrackingHeaders)({
|
|
@@ -54,7 +54,7 @@ async function extractStructuredDataUsingAi(page, input) {
|
|
|
54
54
|
let result;
|
|
55
55
|
while (currentRetry < maxRetries) {
|
|
56
56
|
try {
|
|
57
|
-
var _result$usage;
|
|
57
|
+
var _result$usage, _result$usage4;
|
|
58
58
|
result = await (0, _ai.generateText)({
|
|
59
59
|
model: gatewayModel,
|
|
60
60
|
messages: messagesHistory,
|
|
@@ -63,8 +63,7 @@ async function extractStructuredDataUsingAi(page, input) {
|
|
|
63
63
|
maxRetries,
|
|
64
64
|
headers
|
|
65
65
|
});
|
|
66
|
-
|
|
67
|
-
_Logger.logger.info(`AI extraction cost: ${accumulatedCost}`);
|
|
66
|
+
accumulatedTokens += ((_result$usage = result.usage) === null || _result$usage === void 0 ? void 0 : _result$usage.totalTokens) ?? 0;
|
|
68
67
|
const toolCall = result.toolCalls[0] ?? null;
|
|
69
68
|
let extractedData = toolCall.input;
|
|
70
69
|
const isArray = jsonSchema.type === "array";
|
|
@@ -95,9 +94,10 @@ async function extractStructuredDataUsingAi(page, input) {
|
|
|
95
94
|
continue;
|
|
96
95
|
}
|
|
97
96
|
if (!enableDomMatching) {
|
|
97
|
+
var _result$usage2;
|
|
98
|
+
_Logger.logger.info(`Total LLM Tokens: ${(_result$usage2 = result.usage) === null || _result$usage2 === void 0 ? void 0 : _result$usage2.totalTokens}`);
|
|
98
99
|
return (0, _neverthrow.ok)({
|
|
99
100
|
result: extractedData,
|
|
100
|
-
usage: accumulatedCost,
|
|
101
101
|
xpathMapping: {}
|
|
102
102
|
});
|
|
103
103
|
}
|
|
@@ -105,9 +105,10 @@ async function extractStructuredDataUsingAi(page, input) {
|
|
|
105
105
|
dataStructure: extractedData
|
|
106
106
|
});
|
|
107
107
|
if (!stringsToMatch || stringsToMatch.length === 0) {
|
|
108
|
+
var _result$usage3;
|
|
109
|
+
_Logger.logger.info(`Total LLM Tokens: ${(_result$usage3 = result.usage) === null || _result$usage3 === void 0 ? void 0 : _result$usage3.totalTokens}`);
|
|
108
110
|
return (0, _neverthrow.ok)({
|
|
109
111
|
result: [],
|
|
110
|
-
usage: accumulatedCost,
|
|
111
112
|
xpathMapping: {}
|
|
112
113
|
});
|
|
113
114
|
}
|
|
@@ -123,18 +124,21 @@ async function extractStructuredDataUsingAi(page, input) {
|
|
|
123
124
|
stringReplacements[key] = (value === null || value === void 0 ? void 0 : value.matchText) || null;
|
|
124
125
|
});
|
|
125
126
|
const matchesData = await (0, _validateSchema.recursivelyReplaceStrings)(extractedData, stringReplacements);
|
|
127
|
+
_Logger.logger.info(`Total LLM Tokens: ${(_result$usage4 = result.usage) === null || _result$usage4 === void 0 ? void 0 : _result$usage4.totalTokens}`);
|
|
126
128
|
return (0, _neverthrow.ok)({
|
|
127
129
|
result: matchesData,
|
|
128
|
-
usage: accumulatedCost,
|
|
129
130
|
xpathMapping
|
|
130
131
|
});
|
|
131
132
|
} catch (error) {
|
|
133
|
+
var _result$usage5;
|
|
132
134
|
_Logger.logger.error("Error during AI extraction", {
|
|
133
135
|
error,
|
|
134
136
|
model
|
|
135
137
|
});
|
|
138
|
+
_Logger.logger.info(`Total LLM Tokens: ${(_result$usage5 = result.usage) === null || _result$usage5 === void 0 ? void 0 : _result$usage5.totalTokens}`);
|
|
136
139
|
return (0, _neverthrow.err)(Errors.invalidExtractionResult(error instanceof Error ? error.message : "Unknown error during extraction"));
|
|
137
140
|
}
|
|
138
141
|
}
|
|
142
|
+
_Logger.logger.info(`Total LLM Tokens: ${(_result$usage6 = result.usage) === null || _result$usage6 === void 0 ? void 0 : _result$usage6.totalTokens}`);
|
|
139
143
|
return (0, _neverthrow.err)(Errors.maxRetriesExceeded(`Max retries of ${maxRetries} exceeded for extraction`));
|
|
140
144
|
}
|
package/dist/ai/index.d.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { Locator, Page } from "playwright-core";
|
|
2
|
-
import { ObjectSchema } from "./jsonSchema";
|
|
3
2
|
import { JSONSchema7TypeName } from "json-schema";
|
|
4
3
|
|
|
5
4
|
/**
|
|
@@ -278,62 +277,85 @@ export declare function extractStructuredData(options: {
|
|
|
278
277
|
maxRetries?: number;
|
|
279
278
|
}): Promise<any>;
|
|
280
279
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
280
|
+
type SUPPORTED_CLAUDE_MODELS =
|
|
281
|
+
| "claude-3-5-haiku-20241022"
|
|
282
|
+
| "claude-3-5-haiku-latest"
|
|
283
|
+
| "claude-3-5-sonnet-20240620"
|
|
284
|
+
| "claude-3-5-sonnet-20241022"
|
|
285
|
+
| "claude-3-5-sonnet-latest"
|
|
286
|
+
| "claude-3-7-sonnet-20250219"
|
|
287
|
+
| "claude-3-7-sonnet-latest"
|
|
288
|
+
| "claude-3-haiku-20240307"
|
|
289
|
+
| "claude-4-opus-20250514"
|
|
290
|
+
| "claude-4-sonnet-20250514"
|
|
291
|
+
| "claude-opus-4-1"
|
|
288
292
|
| "claude-opus-4-1-20250805"
|
|
289
293
|
| "claude-opus-4-20250514"
|
|
290
|
-
| "claude-sonnet-4-20250514"
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
| "claude-3-5-haiku-latest"
|
|
294
|
-
| "gpt-5"
|
|
295
|
-
| "gpt-5-mini"
|
|
296
|
-
| "gpt-5-nano"
|
|
297
|
-
| "gpt-5-chat"
|
|
298
|
-
| "gpt-5-chat-latest"
|
|
299
|
-
| "gpt-5-2025-08-07"
|
|
300
|
-
| "gpt-5-mini-2025-08-07"
|
|
301
|
-
| "gpt-5-nano-2025-08-07"
|
|
302
|
-
| "gpt-4.1"
|
|
303
|
-
| "gpt-4.1-mini"
|
|
304
|
-
| "gpt-4.1-nano"
|
|
305
|
-
| "o4-mini"
|
|
306
|
-
| "o3-mini"
|
|
307
|
-
| "o3"
|
|
308
|
-
| "o1-mini"
|
|
309
|
-
| "o1-preview"
|
|
310
|
-
| "gpt-4o-mini"
|
|
311
|
-
| "gpt-4o-mini-2024-07-18"
|
|
312
|
-
| "gpt-4o"
|
|
313
|
-
| "gpt-4o-2024-08-06"
|
|
314
|
-
| "gpt-4o-2024-05-13"
|
|
315
|
-
| "gpt-4o-2024-05-13"
|
|
316
|
-
| "gpt-4-turbo"
|
|
317
|
-
| "gpt-4-turbo-preview"
|
|
318
|
-
| "gpt-4-0125-preview"
|
|
319
|
-
| "gpt-4-1106-preview"
|
|
320
|
-
| "gpt-3.5-turbo-1106"
|
|
294
|
+
| "claude-sonnet-4-20250514";
|
|
295
|
+
|
|
296
|
+
type SUPPORTED_OPENAI_MODELS =
|
|
321
297
|
| "gpt-3.5-turbo"
|
|
298
|
+
| "gpt-3.5-turbo-0125"
|
|
322
299
|
| "gpt-3.5-turbo-0301"
|
|
323
300
|
| "gpt-3.5-turbo-0613"
|
|
301
|
+
| "gpt-3.5-turbo-1106"
|
|
324
302
|
| "gpt-3.5-turbo-16k"
|
|
325
303
|
| "gpt-3.5-turbo-16k-0613"
|
|
304
|
+
| "gpt-3.5-turbo-instruct"
|
|
305
|
+
| "gpt-3.5-turbo-instruct-0914"
|
|
326
306
|
| "gpt-4"
|
|
327
307
|
| "gpt-4-0314"
|
|
328
308
|
| "gpt-4-0613"
|
|
329
309
|
| "gpt-4-32k"
|
|
330
310
|
| "gpt-4-32k-0314"
|
|
331
311
|
| "gpt-4-32k-0613"
|
|
332
|
-
| "
|
|
333
|
-
| "
|
|
334
|
-
| "
|
|
335
|
-
| "
|
|
336
|
-
| "
|
|
312
|
+
| "gpt-4-turbo"
|
|
313
|
+
| "gpt-4-turbo-2024-04-09"
|
|
314
|
+
| "gpt-4.1"
|
|
315
|
+
| "gpt-4.1-2025-04-14"
|
|
316
|
+
| "gpt-4.1-mini"
|
|
317
|
+
| "gpt-4.1-mini-2025-04-14"
|
|
318
|
+
| "gpt-4.1-nano"
|
|
319
|
+
| "gpt-4.1-nano-2025-04-14"
|
|
320
|
+
| "gpt-4o"
|
|
321
|
+
| "gpt-4o-2024-05-13"
|
|
322
|
+
| "gpt-4o-2024-08-06"
|
|
323
|
+
| "gpt-4o-2024-11-20"
|
|
324
|
+
| "gpt-4o-mini"
|
|
325
|
+
| "gpt-4o-mini-2024-07-18"
|
|
326
|
+
| "gpt-5"
|
|
327
|
+
| "gpt-5-2025-08-07"
|
|
328
|
+
| "gpt-5-chat"
|
|
329
|
+
| "gpt-5-chat-latest"
|
|
330
|
+
| "gpt-5-mini"
|
|
331
|
+
| "gpt-5-mini-2025-08-07"
|
|
332
|
+
| "gpt-5-nano"
|
|
333
|
+
| "gpt-5-nano-2025-08-07"
|
|
334
|
+
| "o1"
|
|
335
|
+
| "o1-2024-12-17"
|
|
336
|
+
| "o1-mini"
|
|
337
|
+
| "o1-mini-2024-09-12"
|
|
338
|
+
| "o1-pro"
|
|
339
|
+
| "o1-pro-2025-03-19"
|
|
340
|
+
| "o3"
|
|
341
|
+
| "o3-2025-04-16"
|
|
342
|
+
| "o3-deep-research"
|
|
343
|
+
| "o3-deep-research-2025-06-26"
|
|
344
|
+
| "o3-mini"
|
|
345
|
+
| "o3-mini-2025-01-31"
|
|
346
|
+
| "o3-pro"
|
|
347
|
+
| "o3-pro-2025-06-10"
|
|
348
|
+
| "o4-mini"
|
|
349
|
+
| "o4-mini-2025-04-16"
|
|
350
|
+
| "o4-mini-deep-research"
|
|
351
|
+
| "o4-mini-deep-research-2025-06-26";
|
|
352
|
+
/**
|
|
353
|
+
* This type defines the supported AI models for data extraction.
|
|
354
|
+
* It includes models from OpenAI, Anthropic, and Google Gemini.
|
|
355
|
+
* The models are used in the extraction strategies to process and analyze the content of web pages or elements.
|
|
356
|
+
* @type SUPPORTED_MODELS
|
|
357
|
+
*/
|
|
358
|
+
type SUPPORTED_MODELS = SUPPORTED_CLAUDE_MODELS | SUPPORTED_OPENAI_MODELS;
|
|
337
359
|
|
|
338
360
|
/**
|
|
339
361
|
* Represents a JSON Schema definition for validating data structures.
|
|
@@ -394,13 +416,6 @@ type SUPPORTED_MODELS =
|
|
|
394
416
|
* };
|
|
395
417
|
* ```
|
|
396
418
|
*/
|
|
397
|
-
export type JSONSchema =
|
|
398
|
-
| StringSchema
|
|
399
|
-
| NumberSchema
|
|
400
|
-
| BooleanSchema
|
|
401
|
-
| ArraySchema
|
|
402
|
-
| ObjectSchema
|
|
403
|
-
| BaseSchema;
|
|
404
419
|
|
|
405
420
|
/**
|
|
406
421
|
* @interface HTMLStrategy
|
|
@@ -592,7 +607,7 @@ export interface HtmlStrategy {
|
|
|
592
607
|
*
|
|
593
608
|
* @param {Page} page - The Playwright page to check
|
|
594
609
|
* @param {Object} [options] - Optional configuration object
|
|
595
|
-
* @param {
|
|
610
|
+
* @param {SUPPORTED_MODELS} [options.model="gpt-4o-2024-08-06"] - [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) to use for the AI Check. default gpt-4o-2024-08-06
|
|
596
611
|
* @param {number} [options.timeoutInMs=10000] - Screenshot timeout in milliseconds
|
|
597
612
|
* @param {string} [options.apiKey] - Optional API key for the AI service
|
|
598
613
|
* @returns {Promise.<{status: LoadingStatus, reason: (string|null|undefined), cost: (number|undefined)}>}
|
|
@@ -636,7 +651,7 @@ export declare function isPageLoaded(
|
|
|
636
651
|
page: Page,
|
|
637
652
|
options?: {
|
|
638
653
|
timeoutInMs?: number;
|
|
639
|
-
model?:
|
|
654
|
+
model?: SUPPORTED_MODELS;
|
|
640
655
|
apiKey?: string;
|
|
641
656
|
}
|
|
642
657
|
): Promise<{
|
|
@@ -649,3 +664,11 @@ export declare function isPageLoaded(
|
|
|
649
664
|
* LoadingStatus is a union of true, false, and "Dont know".
|
|
650
665
|
*/
|
|
651
666
|
export type LoadingStatus = true | false | "Dont know";
|
|
667
|
+
|
|
668
|
+
export type JSONSchema =
|
|
669
|
+
| StringSchema
|
|
670
|
+
| NumberSchema
|
|
671
|
+
| BooleanSchema
|
|
672
|
+
| ArraySchema
|
|
673
|
+
| ObjectSchema
|
|
674
|
+
| BaseSchema;
|
package/dist/ai/isPageLoaded.js
CHANGED
|
@@ -4,6 +4,7 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
6
|
exports.isPageLoaded = void 0;
|
|
7
|
+
var _Logger = require("../common/Logger");
|
|
7
8
|
var _utils = require("../helpers/utils");
|
|
8
9
|
var _ai = require("ai");
|
|
9
10
|
const isPageLoaded = async (page, options) => {
|
|
@@ -39,6 +40,7 @@ Some good cues for determining if a page is loading:
|
|
|
39
40
|
}]
|
|
40
41
|
});
|
|
41
42
|
let llmResult = response.text.trim();
|
|
43
|
+
_Logger.logger.info(`Total LLM Tokens: ${response.usage.totalTokens}`);
|
|
42
44
|
if (!llmResult) {
|
|
43
45
|
throw new Error("LLM response is empty");
|
|
44
46
|
}
|
|
@@ -50,10 +52,13 @@ Some good cues for determining if a page is loading:
|
|
|
50
52
|
const reason = lines.length > 1 ? lines[1] : null;
|
|
51
53
|
let result;
|
|
52
54
|
if (isTrue) {
|
|
55
|
+
_Logger.logger.info(`Page is loaded.`);
|
|
53
56
|
result = true;
|
|
54
57
|
} else if (isFalse) {
|
|
58
|
+
_Logger.logger.info(`Page is not loaded.`);
|
|
55
59
|
result = false;
|
|
56
60
|
} else if (isDontKnow) {
|
|
61
|
+
_Logger.logger.info(`Page loading status is unknown.`);
|
|
57
62
|
result = "Dont know";
|
|
58
63
|
} else {
|
|
59
64
|
throw new Error("LLM result is not valid");
|
|
@@ -36,11 +36,9 @@ var _validators = require("../validators");
|
|
|
36
36
|
(0, _extendedTest.test)("should return false for array of mixed types", async () => {
|
|
37
37
|
const schema = {
|
|
38
38
|
type: "array",
|
|
39
|
-
items:
|
|
40
|
-
type: "string"
|
|
41
|
-
}, {
|
|
39
|
+
items: {
|
|
42
40
|
type: "number"
|
|
43
|
-
}
|
|
41
|
+
}
|
|
44
42
|
};
|
|
45
43
|
(0, _extendedTest.expect)((0, _validators.checkAllTypesAreStrings)(schema)).toBe(false);
|
|
46
44
|
});
|
package/dist/ai/validators.js
CHANGED
|
@@ -118,19 +118,21 @@ function checkAllTypesAreStrings(schema) {
|
|
|
118
118
|
return true;
|
|
119
119
|
}
|
|
120
120
|
if (schema.type === "array") {
|
|
121
|
-
|
|
121
|
+
const arraySchema = schema;
|
|
122
|
+
if (!arraySchema.items) {
|
|
122
123
|
return true;
|
|
123
124
|
}
|
|
124
|
-
if (Array.isArray(
|
|
125
|
-
return
|
|
125
|
+
if (Array.isArray(arraySchema.items)) {
|
|
126
|
+
return arraySchema.items.every(item => checkAllTypesAreStrings(item));
|
|
126
127
|
}
|
|
127
|
-
return checkAllTypesAreStrings(
|
|
128
|
+
return checkAllTypesAreStrings(arraySchema.items);
|
|
128
129
|
}
|
|
129
130
|
if (schema.type === "object") {
|
|
130
|
-
|
|
131
|
+
const objectSchema = schema;
|
|
132
|
+
if (!objectSchema.properties) {
|
|
131
133
|
return true;
|
|
132
134
|
}
|
|
133
|
-
return Object.values(
|
|
135
|
+
return Object.values(objectSchema.properties).every(prop => checkAllTypesAreStrings(prop));
|
|
134
136
|
}
|
|
135
137
|
return false;
|
|
136
138
|
}
|
|
@@ -32,12 +32,12 @@ async function getAbsoluteUrl(page, url) {
|
|
|
32
32
|
});
|
|
33
33
|
return absUrl;
|
|
34
34
|
}
|
|
35
|
-
const downloadFile = async
|
|
35
|
+
const downloadFile = async input => {
|
|
36
36
|
const {
|
|
37
37
|
page,
|
|
38
38
|
trigger,
|
|
39
39
|
timeoutInMs
|
|
40
|
-
} =
|
|
40
|
+
} = input;
|
|
41
41
|
let pageToDownloadFrom = page;
|
|
42
42
|
let shouldClosePage = false;
|
|
43
43
|
let downloadPromise;
|