@intuned/browser-dev 0.1.16-dev.0 → 0.1.16-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/optimized-extractors/common/aiModelsValidations.js +2 -21
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +2 -3
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +1 -4
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +1 -2
- package/dist/optimized-extractors/common/findTableHeaders.js +2 -2
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +2 -2
- package/dist/optimized-extractors/common/modelStringSupport.test.js +1 -1
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +1 -1
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +2 -2
- package/dist/optimized-extractors/validators.js +1 -1
- package/package.json +1 -1
- package/dist/optimized-extractors/types/aiModelsValidation.js +0 -45
|
@@ -3,30 +3,11 @@
|
|
|
3
3
|
Object.defineProperty(exports, "__esModule", {
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
|
-
exports.
|
|
7
|
-
const CLAUDE_MODELS = exports.CLAUDE_MODELS = ["claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-5-haiku-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"];
|
|
6
|
+
exports.MAX_TOKENS_OVERRIDES = void 0;
|
|
8
7
|
const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
|
|
9
8
|
"claude-3-5-sonnet-20240620": 8192,
|
|
10
9
|
"gemini-1.5-pro-002": 8192,
|
|
11
10
|
"gemini-1.5-flash-8b-002": 8192,
|
|
12
11
|
"gemini-1.5-flash-002": 8192,
|
|
13
12
|
"gemini-2.0-flash-exp": 8192
|
|
14
|
-
};
|
|
15
|
-
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = [...CLAUDE_MODELS];
|
|
16
|
-
const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
|
|
17
|
-
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
18
|
-
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
19
|
-
"claude-3-opus": "claude-3-opus-20240229",
|
|
20
|
-
"claude-3-sonnet": "claude-3-sonnet-20240229",
|
|
21
|
-
"claude-3.5-sonnet": "claude-3-5-sonnet-20241022",
|
|
22
|
-
"claude-4-sonnet": "claude-sonnet-4-20250514",
|
|
23
|
-
"claude-4-opus": "claude-opus-4-20250514"
|
|
24
|
-
};
|
|
25
|
-
const GPT_MODELS = exports.GPT_MODELS = ["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-4o", "gpt-4o-mini", "gpt-4o-audio-preview", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo", "o1", "o1-mini", "o1-preview", "o3-mini", "o3", "o4-mini", "chatgpt-4o-latest", "gpt4-turbo"];
|
|
26
|
-
const GOOGLE_MODELS = exports.GOOGLE_MODELS = ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.0-flash", "gemini-1.5-pro", "gemini-1.5-pro-latest", "gemini-1.5-flash", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b", "gemini-1.5-flash-8b-latest"];
|
|
27
|
-
const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
|
|
28
|
-
...CLAUDE_MODELS_MAPPINGS
|
|
29
|
-
};
|
|
30
|
-
const SUPPPORTED_CLAUDE_MODELS = exports.SUPPPORTED_CLAUDE_MODELS = ["claude-3-5-haiku-20241022", "claude-3-5-haiku-latest", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-latest", "claude-3-7-sonnet-20250219", "claude-3-7-sonnet-latest", "claude-3-haiku-20240307", "claude-4-opus-20250514", "claude-4-sonnet-20250514", "claude-opus-4-1", "claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514"];
|
|
31
|
-
const SUPPPORTED_GPT_MODELS = exports.SUPPPORTED_GPT_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-instruct", "gpt-3.5-turbo-instruct-0914", "gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4.1", "gpt-4.1-2025-04-14", "gpt-4.1-mini", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano", "gpt-4.1-nano-2025-04-14", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", "gpt-5", "gpt-5-2025-08-07", "gpt-5-chat", "gpt-5-chat-latest", "gpt-5-mini", "gpt-5-mini-2025-08-07", "gpt-5-nano", "gpt-5-nano-2025-08-07", "o1", "o1-2024-12-17", "o1-mini", "o1-mini-2024-09-12", "o1-pro", "o1-pro-2025-03-19", "o3", "o3-2025-04-16", "o3-deep-research", "o3-deep-research-2025-06-26", "o3-mini", "o3-mini-2025-01-31", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-2025-04-16", "o4-mini-deep-research", "o4-mini-deep-research-2025-06-26"];
|
|
32
|
-
const SUPPORTED_MODELS = exports.SUPPORTED_MODELS = [...SUPPPORTED_CLAUDE_MODELS, ...SUPPPORTED_GPT_MODELS];
|
|
13
|
+
};
|
|
@@ -56,8 +56,7 @@ async function extractStructuredDataUsingClaude(input) {
|
|
|
56
56
|
const anthropic = (0, _anthropicModel.createAnthropicInstance)({
|
|
57
57
|
apiKey
|
|
58
58
|
});
|
|
59
|
-
const
|
|
60
|
-
const maxTokens = _aiModelsValidations.MAX_TOKENS_OVERRIDES[modelName] ?? 4096;
|
|
59
|
+
const maxTokens = _aiModelsValidations.MAX_TOKENS_OVERRIDES[model] ?? 4096;
|
|
61
60
|
const response = await (0, _neverthrow.fromPromise)(anthropic.messages.create({
|
|
62
61
|
max_tokens: maxTokens,
|
|
63
62
|
temperature: 0,
|
|
@@ -66,7 +65,7 @@ async function extractStructuredDataUsingClaude(input) {
|
|
|
66
65
|
role: "user",
|
|
67
66
|
content
|
|
68
67
|
}],
|
|
69
|
-
model
|
|
68
|
+
model,
|
|
70
69
|
tools: [{
|
|
71
70
|
input_schema: processedJsonSchema,
|
|
72
71
|
name: toolName,
|
|
@@ -15,10 +15,7 @@ async function extractStructuredDataUsingGoogle(input) {
|
|
|
15
15
|
if (!input.apiKey) {
|
|
16
16
|
return (0, _neverthrow.err)(Errors.invalidInput("Google AI is only supported with a custom API key. Please provide it or use a different AI provider."));
|
|
17
17
|
}
|
|
18
|
-
|
|
19
|
-
if (input.model in _aiModelsValidations.MODELS_MAPPINGS) {
|
|
20
|
-
model = _aiModelsValidations.MODELS_MAPPINGS[input.model];
|
|
21
|
-
}
|
|
18
|
+
const model = input.model;
|
|
22
19
|
const googleGenAi = (0, _google.createGoogleGenerativeAI)({
|
|
23
20
|
apiKey: input.apiKey
|
|
24
21
|
});
|
|
@@ -8,7 +8,6 @@ var _neverthrow = require("neverthrow");
|
|
|
8
8
|
var Errors = _interopRequireWildcard(require("../types/errors"));
|
|
9
9
|
var _utils = require("./utils");
|
|
10
10
|
var _Logger = require("../../common/Logger");
|
|
11
|
-
var _aiModelsValidations = require("../common/aiModelsValidations");
|
|
12
11
|
var _openaiModel = require("../models/openaiModel");
|
|
13
12
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
14
13
|
async function extractStructuredDataUsingOpenAi(input) {
|
|
@@ -50,7 +49,7 @@ async function extractStructuredDataUsingOpenAi(input) {
|
|
|
50
49
|
}));
|
|
51
50
|
content.push(...imageContent);
|
|
52
51
|
}
|
|
53
|
-
const modelName =
|
|
52
|
+
const modelName = input.model;
|
|
54
53
|
const toolName = `extract_${entityName}`;
|
|
55
54
|
const openAiInstance = (0, _openaiModel.createOpenAIInstance)({
|
|
56
55
|
apiKey
|
|
@@ -10,7 +10,7 @@ var _imageSize = require("image-size");
|
|
|
10
10
|
var _neverthrow = require("neverthrow");
|
|
11
11
|
var Errors = _interopRequireWildcard(require("../types/errors"));
|
|
12
12
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
13
|
-
async function getTableHeadersUsingAi(handle,
|
|
13
|
+
async function getTableHeadersUsingAi(handle, model) {
|
|
14
14
|
var _response$error;
|
|
15
15
|
let image;
|
|
16
16
|
try {
|
|
@@ -57,7 +57,7 @@ async function getTableHeadersUsingAi(handle, identifier) {
|
|
|
57
57
|
}
|
|
58
58
|
}]
|
|
59
59
|
}],
|
|
60
|
-
model
|
|
60
|
+
model,
|
|
61
61
|
tools: [{
|
|
62
62
|
input_schema: {
|
|
63
63
|
type: "object",
|
|
@@ -9,7 +9,7 @@ var _zod = require("zod");
|
|
|
9
9
|
var _neverthrow = require("neverthrow");
|
|
10
10
|
var Errors = _interopRequireWildcard(require("../types/errors"));
|
|
11
11
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
12
|
-
async function isTableHeaderOrFooter(content) {
|
|
12
|
+
async function isTableHeaderOrFooter(content, model) {
|
|
13
13
|
var _response$error;
|
|
14
14
|
if (!content) {
|
|
15
15
|
return (0, _neverthrow.ok)({
|
|
@@ -39,7 +39,7 @@ async function isTableHeaderOrFooter(content) {
|
|
|
39
39
|
a table header is a row that contains labels for table columns, and footer usually has pagination information or summary of the table`
|
|
40
40
|
}, itemContent]
|
|
41
41
|
}],
|
|
42
|
-
model
|
|
42
|
+
model,
|
|
43
43
|
tools: [{
|
|
44
44
|
input_schema: {
|
|
45
45
|
type: "object",
|
|
@@ -209,7 +209,7 @@ async function splitDomAndExtractData({
|
|
|
209
209
|
tableLocater
|
|
210
210
|
} = await (0, _tablesUtils.isListTable)(listItemsContainerLocator, itemsSimplifiedHtml);
|
|
211
211
|
const tableAsJsonArray = isTable ? await (0, _tablesUtils.createJsonFromTable)(pageAndSearchRegion.page) : [];
|
|
212
|
-
const tableHeaders = tableLocater ? await (0, _findTableHeaders.getTableHeadersUsingAi)(tableLocater,
|
|
212
|
+
const tableHeaders = tableLocater ? await (0, _findTableHeaders.getTableHeadersUsingAi)(tableLocater, strategy.model) : undefined;
|
|
213
213
|
if (tableHeaders && tableHeaders.isErr()) {
|
|
214
214
|
return (0, _neverthrow.err)(tableHeaders.error);
|
|
215
215
|
}
|
package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js
CHANGED
|
@@ -44,7 +44,7 @@ async function extractPropertiesUsingGPT({
|
|
|
44
44
|
}
|
|
45
45
|
if (possibleTableHeaderOrFooter) {
|
|
46
46
|
const content = text ?? image;
|
|
47
|
-
const isHeader = await (0, _isTableHeaderOrFooter.isTableHeaderOrFooter)(content);
|
|
47
|
+
const isHeader = await (0, _isTableHeaderOrFooter.isTableHeaderOrFooter)(content, strategy.model);
|
|
48
48
|
if (isHeader.isErr()) {
|
|
49
49
|
return (0, _neverthrow.err)(isHeader.error);
|
|
50
50
|
}
|
|
@@ -146,7 +146,7 @@ async function extractPropertiesWithHTMLStrategy({
|
|
|
146
146
|
apiKey
|
|
147
147
|
}) {
|
|
148
148
|
const shouldUseTableData = !!tableAsJsonArray && tableAsJsonArray.length === items.length;
|
|
149
|
-
const isWeakModel = strategy.model
|
|
149
|
+
const isWeakModel = strategy.model.includes("haiku") || strategy.model.includes("turbo");
|
|
150
150
|
const averageItemLength = items.reduce((sum, item) => {
|
|
151
151
|
if (item.type !== "text") return sum;
|
|
152
152
|
return sum + (0, _extractionHelpers.compressStringSpaces)(item.text).length;
|
|
@@ -113,7 +113,7 @@ const strategySchema = exports.strategySchema = _zod.z.union([htmlStrategySchema
|
|
|
113
113
|
};
|
|
114
114
|
}
|
|
115
115
|
}).optional().default({
|
|
116
|
-
model: "claude-
|
|
116
|
+
model: "claude-haiku-4-5-20251001",
|
|
117
117
|
type: "HTML"
|
|
118
118
|
});
|
|
119
119
|
const labelSchema = _zod.z.string({
|
package/package.json
CHANGED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.SUPPORTED_VISION_MODELS = exports.SUPPORTED_TEXT_MODELS = exports.SUPPORTED_GPT_MODELS = exports.SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_MODELS_MAPPINGS = void 0;
|
|
7
|
-
const CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = ["claude-3-5-haiku", "claude-3-5-haiku-20241022"];
|
|
8
|
-
const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3-haiku", "claude-3-haiku-20240307", "claude-3.5-sonnet", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-opus-4", "claude-opus-4-20250514", "claude-sonnet-4", "claude-sonnet-4-20250514"];
|
|
9
|
-
const SUPPORTED_CLAUDE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = [...CLAUDE_ONLY_TEXT_MODELS, ...CLAUDE_VISION_SUPPORTED_MODELS];
|
|
10
|
-
const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
|
|
11
|
-
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
12
|
-
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
13
|
-
"claude-3.5-sonnet": "claude-3-5-sonnet-20241022",
|
|
14
|
-
"claude-opus-4": "claude-opus-4-20250514",
|
|
15
|
-
"claude-sonnet-4": "claude-sonnet-4-20250514"
|
|
16
|
-
};
|
|
17
|
-
const GPT_ONLY_TEXT_GPT_MODELS = ["gpt3.5-turbo", "gpt-3.5-turbo-0125"];
|
|
18
|
-
const GPT_VISION_SUPPORTED_MODELS = ["gpt4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"];
|
|
19
|
-
const SUPPORTED_GPT_MODELS = exports.SUPPORTED_GPT_MODELS = [...GPT_ONLY_TEXT_GPT_MODELS, ...GPT_VISION_SUPPORTED_MODELS];
|
|
20
|
-
const GPT_MODELS_MAPPINGS = exports.GPT_MODELS_MAPPINGS = {
|
|
21
|
-
"gpt4-turbo": "gpt-4-turbo-2024-04-09",
|
|
22
|
-
"gpt3.5-turbo": "gpt-3.5-turbo-0125",
|
|
23
|
-
"gpt-4o": "gpt-4o-2024-05-13",
|
|
24
|
-
"gpt-4o-mini": "gpt-4o-mini-2024-07-18"
|
|
25
|
-
};
|
|
26
|
-
const SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_GOOGLE_MODELS = ["gemini-1.5-pro", "gemini-1.5-pro-002", "gemini-1.5-flash-8b", "gemini-1.5-flash-8b-002", "gemini-1.5-flash", "gemini-1.5-flash-002", "gemini-2.0-flash-exp"];
|
|
27
|
-
const GOOGLE_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = {
|
|
28
|
-
"gemini-1.5-pro": "gemini-1.5-pro-002",
|
|
29
|
-
"gemini-1.5-flash-8b": "gemini-1.5-flash-8b-002",
|
|
30
|
-
"gemini-1.5-flash": "gemini-1.5-flash-002"
|
|
31
|
-
};
|
|
32
|
-
const SUPPORTED_TEXT_MODELS = exports.SUPPORTED_TEXT_MODELS = [...SUPPORTED_CLAUDE_MODELS, ...SUPPORTED_GPT_MODELS, ...SUPPORTED_GOOGLE_MODELS];
|
|
33
|
-
const SUPPORTED_VISION_MODELS = exports.SUPPORTED_VISION_MODELS = [...CLAUDE_VISION_SUPPORTED_MODELS, ...GPT_VISION_SUPPORTED_MODELS, ...SUPPORTED_GOOGLE_MODELS];
|
|
34
|
-
const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
|
|
35
|
-
...GPT_MODELS_MAPPINGS,
|
|
36
|
-
...CLAUDE_MODELS_MAPPINGS,
|
|
37
|
-
...GOOGLE_MODELS_MAPPINGS
|
|
38
|
-
};
|
|
39
|
-
const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
|
|
40
|
-
"claude-3-5-sonnet-20240620": 8192,
|
|
41
|
-
"gemini-1.5-pro-002": 8192,
|
|
42
|
-
"gemini-1.5-flash-8b-002": 8192,
|
|
43
|
-
"gemini-1.5-flash-002": 8192,
|
|
44
|
-
"gemini-2.0-flash-exp": 8192
|
|
45
|
-
};
|