@intuned/browser-dev 2.2.3-test-build.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/LICENSE +43 -0
  5. package/dist/ai/export.d.js +5 -0
  6. package/dist/ai/export.d.ts +641 -0
  7. package/dist/ai/extractStructuredData.js +320 -0
  8. package/dist/ai/extractStructuredDataUsingAi.js +139 -0
  9. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  10. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  11. package/dist/ai/index.d.ts +641 -0
  12. package/dist/ai/index.js +19 -0
  13. package/dist/ai/isPageLoaded.js +77 -0
  14. package/dist/ai/prompt.js +39 -0
  15. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  16. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  17. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  18. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  19. package/dist/ai/tools/index.js +48 -0
  20. package/dist/ai/types/errors.js +67 -0
  21. package/dist/ai/types/models.js +45 -0
  22. package/dist/ai/types/types.js +48 -0
  23. package/dist/ai/validators.js +167 -0
  24. package/dist/common/Logger/index.js +60 -0
  25. package/dist/common/Logger/types.js +5 -0
  26. package/dist/common/SdkError.js +50 -0
  27. package/dist/common/aiModelsValidations.js +32 -0
  28. package/dist/common/browser_scripts.js +2596 -0
  29. package/dist/common/ensureBrowserScripts.js +18 -0
  30. package/dist/common/extendedTest.js +148 -0
  31. package/dist/common/extractionHelpers.js +19 -0
  32. package/dist/common/formatZodError.js +18 -0
  33. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  34. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  35. package/dist/common/fuzzySearch/utils.js +23 -0
  36. package/dist/common/getModelProvider.js +18 -0
  37. package/dist/common/getSimplifiedHtml.js +122 -0
  38. package/dist/common/hashObject.js +32 -0
  39. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  40. package/dist/common/html2markdown/index.js +19 -0
  41. package/dist/common/jwtTokenManager.js +18 -0
  42. package/dist/common/loadRuntime.js +16 -0
  43. package/dist/common/locatorHelpers.js +41 -0
  44. package/dist/common/matching/collectStrings.js +32 -0
  45. package/dist/common/matching/levenshtein.js +40 -0
  46. package/dist/common/matching/matching.js +317 -0
  47. package/dist/common/matching/types.js +1 -0
  48. package/dist/common/noEmpty.js +9 -0
  49. package/dist/common/saveSnapshotWithExamples.js +60 -0
  50. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  51. package/dist/common/xpathMapping.js +107 -0
  52. package/dist/helpers/clickUntilExhausted.js +85 -0
  53. package/dist/helpers/downloadFile.js +125 -0
  54. package/dist/helpers/export.d.js +5 -0
  55. package/dist/helpers/export.d.ts +1220 -0
  56. package/dist/helpers/extractMarkdown.js +35 -0
  57. package/dist/helpers/filterEmptyValues.js +54 -0
  58. package/dist/helpers/gotoUrl.js +98 -0
  59. package/dist/helpers/index.d.ts +1220 -0
  60. package/dist/helpers/index.js +128 -0
  61. package/dist/helpers/processDate.js +25 -0
  62. package/dist/helpers/resolveUrl.js +64 -0
  63. package/dist/helpers/sanitizeHtml.js +74 -0
  64. package/dist/helpers/saveFileToS3.js +50 -0
  65. package/dist/helpers/scrollToLoadContent.js +57 -0
  66. package/dist/helpers/tests/extendedTest.js +130 -0
  67. package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
  68. package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
  69. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  70. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  71. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  72. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  73. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  74. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  75. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  76. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  77. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  78. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  79. package/dist/helpers/types/Attachment.js +115 -0
  80. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  81. package/dist/helpers/types/RunEnvironment.js +18 -0
  82. package/dist/helpers/types/ValidationError.js +17 -0
  83. package/dist/helpers/types/index.js +51 -0
  84. package/dist/helpers/uploadFileToS3.js +154 -0
  85. package/dist/helpers/utils/getS3Client.js +22 -0
  86. package/dist/helpers/utils/index.js +73 -0
  87. package/dist/helpers/utils/isDownload.js +10 -0
  88. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  89. package/dist/helpers/utils/isLocator.js +9 -0
  90. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  91. package/dist/helpers/validateDataUsingSchema.js +103 -0
  92. package/dist/helpers/waitForDomSettled.js +90 -0
  93. package/dist/helpers/withNetworkSettledWait.js +91 -0
  94. package/dist/index.d.js +16 -0
  95. package/dist/index.d.ts +10 -0
  96. package/dist/index.js +16 -0
  97. package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
  98. package/dist/intunedServices/ApiGateway/factory.js +13 -0
  99. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  100. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  101. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  102. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
  103. package/dist/intunedServices/ApiGateway/types.js +11 -0
  104. package/dist/intunedServices/cache/cache.js +61 -0
  105. package/dist/intunedServices/cache/index.js +12 -0
  106. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  107. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  108. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  109. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  110. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  112. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  113. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  114. package/dist/optimized-extractors/common/index.js +55 -0
  115. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  116. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  117. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  118. package/dist/optimized-extractors/common/matching/types.js +18 -0
  119. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  120. package/dist/optimized-extractors/common/utils.js +58 -0
  121. package/dist/optimized-extractors/export.d.js +5 -0
  122. package/dist/optimized-extractors/export.d.ts +397 -0
  123. package/dist/optimized-extractors/extractArray.js +120 -0
  124. package/dist/optimized-extractors/extractObject.js +104 -0
  125. package/dist/optimized-extractors/index.d.ts +397 -0
  126. package/dist/optimized-extractors/index.js +31 -0
  127. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
  128. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  144. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  145. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  146. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  147. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  148. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  149. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  160. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  161. package/dist/optimized-extractors/types/errors.js +42 -0
  162. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  163. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  164. package/dist/optimized-extractors/types/types.js +5 -0
  165. package/dist/optimized-extractors/validators.js +152 -0
  166. package/dist/vite-env.d.js +1 -0
  167. package/dist/vite-env.d.ts +9 -0
  168. package/docs.md +14 -0
  169. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  170. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  171. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  172. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  173. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  174. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  175. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  176. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  177. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  178. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  179. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  180. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  181. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  182. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  183. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  184. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  185. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  186. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  187. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  188. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  189. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  190. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  191. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  192. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  193. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  194. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  195. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  196. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  197. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  198. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  199. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  200. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  201. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  202. package/how-to-run-tests.md +10 -0
  203. package/intuned-runtime-setup.md +13 -0
  204. package/package.json +119 -0
  205. package/tsconfig.eslint.json +5 -0
  206. package/tsconfig.json +26 -0
@@ -0,0 +1,641 @@
1
+ import { Locator, Page } from "playwright-core";
2
+ import { z } from "zod";
3
+
4
+ /**
5
+ * Base schema interface that all JSON schema types extend from.
6
+ * Provides common properties like type and description.
7
+ *
8
+ * @interface BasicSchema
9
+ */
10
+ export interface BasicSchema {
11
+ /** The JSON schema type(s) for this schema definition */
12
+ type: "string" | "number" | "integer" | "boolean" | "array" | "object";
13
+ /** Optional description of what this schema represents */
14
+ description?: string;
15
+ }
16
+
17
+ /**
18
+ * Schema definition for string values with validation constraints.
19
+ *
20
+ * @interface StringSchema
21
+ * @extends BasicSchema
22
+ * @example
23
+ * ```typescript String Schema
24
+ * import { StringSchema } from "@intuned/browser/ai";
25
+ * export default async function handler(params, page, context){
26
+ * const nameSchema: StringSchema = {
27
+ * type: "string",
28
+ * minLength: 2,
29
+ * maxLength: 50,
30
+ * pattern: "^[A-Za-z\\s]+$",
31
+ * description: "Person's full name"
32
+ * };
33
+ * }
34
+ * ```
35
+ */
36
+ export interface StringSchema extends BasicSchema {
37
+ /** Must be "string" for string schemas */
38
+ type: "string";
39
+ /** Array of allowed string values (enumeration) */
40
+ enum?: string[];
41
+ /** Maximum allowed string length */
42
+ maxLength?: number;
43
+ /** Minimum required string length */
44
+ minLength?: number;
45
+ /** Regular expression pattern the string must match */
46
+ pattern?: string;
47
+ }
48
+
49
+ /**
50
+ * Schema definition for numeric values (numbers and integers) with validation constraints.
51
+ *
52
+ * @interface NumberSchema
53
+ * @extends BasicSchema
54
+ * @example
55
+ * ```typescript Number Schema
56
+ * import { NumberSchema } from "@intuned/browser/ai";
57
+ * export default async function handler(params, page, context){
58
+ * const ageSchema: NumberSchema = {
59
+ * type: "integer",
60
+ * minimum: 0,
61
+ * maximum: 150,
62
+ * description: "Person's age in years"
63
+ * };
64
+ * }
65
+ * ```
66
+ */
67
+ export interface NumberSchema extends BasicSchema {
68
+ /** Must be "number" or "integer" for numeric schemas */
69
+ type: "number" | "integer";
70
+ /** Number must be a multiple of this value */
71
+ multipleOf?: number;
72
+ /** Maximum allowed value (inclusive) */
73
+ maximum?: number;
74
+ /** Maximum allowed value (exclusive) */
75
+ exclusiveMaximum?: number;
76
+ /** Minimum allowed value (inclusive) */
77
+ minimum?: number;
78
+ /** Minimum allowed value (exclusive) */
79
+ exclusiveMinimum?: number;
80
+ }
81
+
82
+ /**
83
+ * Schema definition for boolean values.
84
+ *
85
+ * @interface BooleanSchema
86
+ * @extends BasicSchema
87
+ * @example
88
+ * ```typescript Boolean Schema
89
+ * import { BooleanSchema } from "@intuned/browser/ai";
90
+ * export default async function handler(params, page, context){
91
+ * const isActiveSchema: BooleanSchema = {
92
+ * type: "boolean",
93
+ * description: "Whether the user account is active"
94
+ * };
95
+ * }
96
+ * ```
97
+ */
98
+ export interface BooleanSchema extends BasicSchema {
99
+ /** Must be "boolean" for boolean schemas */
100
+ type: "boolean";
101
+ }
102
+
103
+ /**
104
+ * Schema definition for array values with item validation and constraints.
105
+ *
106
+ * @interface ArraySchema
107
+ * @extends BasicSchema
108
+ * @example
109
+ * ```typescript Array Schema
110
+ * import { ArraySchema } from "@intuned/browser/ai";
111
+ * export default async function handler(params, page, context){
112
+ * const tagsSchema: ArraySchema = {
113
+ * type: "array",
114
+ * items: { type: "string" },
115
+ * minItems: 1,
116
+ * maxItems: 10,
117
+ * uniqueItems: true,
118
+ * description: "List of tags"
119
+ * };
120
+ * }
121
+ * ```
122
+ */
123
+ export interface ArraySchema extends BasicSchema {
124
+ /** Must be "array" for array schemas */
125
+ type: "array";
126
+ /** Schema definition for array items */
127
+ items: JsonSchema | z.ZodSchema;
128
+ /** Maximum number of items allowed */
129
+ maxItems?: number;
130
+ /** Minimum number of items required */
131
+ minItems?: number;
132
+ /** Whether all items must be unique */
133
+ uniqueItems?: boolean;
134
+ }
135
+
136
+ /**
137
+ * Schema definition for object values with property validation and constraints.
138
+ *
139
+ * @interface ObjectSchema
140
+ * @extends BasicSchema
141
+ * @example
142
+ * ```typescript Object Schema
143
+ * import { ObjectSchema } from "@intuned/browser/ai";
144
+ * export default async function handler(params, page, context){
145
+ * const userSchema: ObjectSchema = {
146
+ * type: "object",
147
+ * properties: {
148
+ * name: { type: "string" },
149
+ * email: { type: "string", pattern: "^[^@]+@[^@]+\\.[^@]+$" },
150
+ * age: { type: "integer", minimum: 0 }
151
+ * },
152
+ * required: ["name", "email"],
153
+ * description: "User profile information"
154
+ * };
155
+ * }
156
+ * ```
157
+ */
158
+ export interface ObjectSchema extends BasicSchema {
159
+ /** Must be "object" for object schemas */
160
+ type: "object";
161
+ /** Schema definitions for object properties */
162
+ properties: Record<string, JsonSchema | z.ZodSchema>;
163
+ /** Array of required property names */
164
+ required?: string[];
165
+ /** Maximum number of properties allowed */
166
+ maxProperties?: number;
167
+ /** Minimum number of properties required */
168
+ minProperties?: number;
169
+ }
170
+
171
+ /**
172
+ * Union type representing all supported JSON schema types.
173
+ * Can be a StringSchema, NumberSchema, BooleanSchema, ArraySchema, or ObjectSchema.
174
+ * Each schema type provides validation constraints for its respective data type.
175
+ *
176
+ * @type JsonSchema
177
+ * @example
178
+ * ```typescript Object Schema
179
+ * import { JsonSchema } from "@intuned/browser/ai";
180
+ * export default async function handler(params, page, context){
181
+ * const schema: JsonSchema = {
182
+ * type: "object",
183
+ * properties: {
184
+ * name: { type: "string" },
185
+ * age: { type: "number" }
186
+ * },
187
+ * required: ["name"]
188
+ * };
189
+ * }
190
+ * ```
191
+ * @example
192
+ * ```typescript Array Schema
193
+ * import { JsonSchema } from "@intuned/browser/ai";
194
+ * export default async function handler(params, page, context){
195
+ * const schema: JsonSchema = {
196
+ * type: "array",
197
+ * items: { type: "string" },
198
+ * minItems: 1
199
+ * };
200
+ * }
201
+ * ```
202
+ */
203
+ export type JsonSchema =
204
+ | StringSchema
205
+ | NumberSchema
206
+ | BooleanSchema
207
+ | ArraySchema
208
+ | ObjectSchema;
209
+ /**
210
+ * Extract structured data from web pages using AI-powered content analysis.
211
+ * @overload From Page or Locator
212
+ * This function provides intelligent data extraction from web pages using various strategies
213
+ * including HTML parsing, image analysis, and Markdown conversion. It supports extraction
214
+ * from entire pages or specific elements, with built-in caching and retry mechanisms.
215
+ *
216
+ * @param {Object} options - Configuration object containing extraction parameters
217
+ * @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
218
+ * @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract. This can be a JsonSchema or ZodSchema
219
+ * @param {string} [options.strategy="HTML"] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN". Defaults to "HTML"
220
+ * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
221
+ * @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
222
+ * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
223
+ * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
224
+ * @param {SUPPORTED_MODELS} [options.model="claude-3-5-haiku-latest"] - AI model to use for extraction. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "claude-3-5-haiku-latest"
225
+ * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
226
+ *
227
+ * @returns Promise resolving to the extracted structured data matching the provided schema
228
+
229
+ * @example
230
+ * ```typescript Page source
231
+ * import { extractStructuredData } from '@intuned/browser/ai';
232
+ * export default async function handler(params, page, context){
233
+ * await page.goto("https://books.toscrape.com/")
234
+ * const product = await extractStructuredData({
235
+ * source: page,
236
+ * strategy: "HTML",
237
+ * model: "gpt-4o",
238
+ * dataSchema: {
239
+ * type: "object",
240
+ * properties: {
241
+ * name: { type: "string" },
242
+ * price: { type: "string" },
243
+ * description: { type: "string" },
244
+ * inStock: { type: "boolean" }
245
+ * },
246
+ * required: ["name", "price"]
247
+ * },
248
+ * prompt: "Extract product details from this e page"
249
+ * });
250
+ * console.log(`Found book: ${product.name} - ${product.price}`);
251
+ * }
252
+ * ```
253
+ *
254
+ * @example
255
+ * ```typescript Locator source
256
+ * import { extractStructuredData } from '@intuned/browser/ai';
257
+ * export default async function handler(params, page, context){
258
+ * await page.goto("https://books.toscrape.com/")
259
+ * const articleContainer = page.locator("article").first()
260
+ * const article = await extractStructuredData({
261
+ * source: articleContainer,
262
+ * strategy: "MARKDOWN",
263
+ * model: "claude-3-5-sonnet-20240620",
264
+ * dataSchema: {
265
+ * type: "object",
266
+ * properties: {
267
+ * title: { type: "string" },
268
+ * author: { type: "string" },
269
+ * publishDate: { type: "string" },
270
+ * content: { type: "string" },
271
+ * },
272
+ * required: ["title"]
273
+ * },
274
+ * maxRetries: 5
275
+ * });
276
+ * console.log(`Found book: ${article.title}`);
277
+ * }
278
+ * ```
279
+ */
280
+ export declare function extractStructuredData(options: {
281
+ source: Page | Locator;
282
+ dataSchema: JsonSchema | z.ZodSchema;
283
+ prompt?: string;
284
+ strategy?: "IMAGE" | "MARKDOWN" | "HTML";
285
+ enableDomMatching?: boolean;
286
+ enableCache?: boolean;
287
+ maxRetries?: number;
288
+ model?: SUPPORTED_MODELS;
289
+ apiKey?: string;
290
+ }): Promise<any>;
291
+
292
+ /**
293
+ * Extract structured data from content items (text, images) using AI-powered analysis.
294
+ * @overload From Content
295
+ * This overload provides a simplified interface for data extraction from various content types
296
+ * without requiring a page source or extraction strategy. It accepts text content, image buffers,
297
+ * or image URLs and extracts structured data according to the provided schema.
298
+ *
299
+ * @param {Object} options - Configuration object containing extraction parameters
300
+ * @param {ContentItem[] | ContentItem} options.content - Content to extract data from - can be a single content item or array of content items
301
+ * @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract
302
+ * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
303
+ * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
304
+ * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
305
+ * @param {SUPPORTED_MODELS} options.model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models
306
+ * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
307
+ *
308
+ * @returns Promise resolving to the extracted structured data matching the provided schema
309
+ *
310
+ * @example
311
+ * ```typescript Text Content
312
+ * import { extractStructuredData } from '@intuned/browser/ai';
313
+ * export default async function handler(params, page, context){
314
+ * const textContent: TextContentItem = {
315
+ * type: "text",
316
+ * data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
317
+ * };
318
+ *
319
+ * const person = await extractStructuredData({
320
+ * content: textContent,
321
+ * model: "gpt-4o",
322
+ * dataSchema: {
323
+ * type: "object",
324
+ * properties: {
325
+ * name: { type: "string" },
326
+ * age: { type: "number" },
327
+ * occupation: { type: "string" },
328
+ * company: { type: "string" }
329
+ * },
330
+ * required: ["name"]
331
+ * },
332
+ * prompt: "Extract person information from the text"
333
+ * });
334
+ *
335
+ * console.log(`Found person: ${person.name}, ${person.age} years old`);
336
+ * }
337
+ * ```
338
+ *
339
+ * @example
340
+ * ```typescript Multiple Content Items
341
+ * import { extractStructuredData } from '@intuned/browser/ai';
342
+ * export default async function handler(params, page, context){
343
+ * const mixedContent = [
344
+ * { type: "text", data: "Product: iPhone 15" },
345
+ * { type: "image-url", image_type: "jpeg", data: "https://mintcdn.com/intuned-7/asXJUUPBWwDlStUB/logo/light.svg?fit=max&auto=format&n=asXJUUPBWwDlStUB&q=85&s=6525c0b299b3226464eba6afa9b7ebe6" }
346
+ * ];
347
+ *
348
+ *
349
+ * const product = await extractStructuredData({
350
+ * content: mixedContent,
351
+ * model: "claude-3",
352
+ * dataSchema: {
353
+ * type: "object",
354
+ * properties: {
355
+ * name: { type: "string" },
356
+ * price: { type: "string" },
357
+ * features: { type: "array", items: { type: "string" } }
358
+ * }
359
+ * },
360
+ * maxRetries: 1,
361
+ * enableCache: true
362
+ * });
363
+ * }
364
+ * ```
365
+ */
366
+ export declare function extractStructuredData(options: {
367
+ content: ContentItem[] | ContentItem;
368
+ dataSchema: JsonSchema | z.ZodSchema;
369
+ prompt?: string;
370
+ maxRetries?: number;
371
+ enableCache?: boolean;
372
+ model: SUPPORTED_MODELS;
373
+ apiKey?: string;
374
+ }): Promise<any>;
375
+
376
+ type SUPPORTED_CLAUDE_MODELS =
377
+ | "claude-3-5-haiku-20241022"
378
+ | "claude-3-5-haiku-latest"
379
+ | "claude-3-5-sonnet-20240620"
380
+ | "claude-3-5-sonnet-20241022"
381
+ | "claude-3-5-sonnet-latest"
382
+ | "claude-3-7-sonnet-20250219"
383
+ | "claude-3-7-sonnet-latest"
384
+ | "claude-3-haiku-20240307"
385
+ | "claude-4-opus-20250514"
386
+ | "claude-4-sonnet-20250514"
387
+ | "claude-opus-4-1"
388
+ | "claude-opus-4-1-20250805"
389
+ | "claude-opus-4-20250514"
390
+ | "claude-sonnet-4-20250514";
391
+
392
+ type SUPPORTED_OPENAI_MODELS =
393
+ | "gpt-3.5-turbo"
394
+ | "gpt-3.5-turbo-0125"
395
+ | "gpt-3.5-turbo-0301"
396
+ | "gpt-3.5-turbo-0613"
397
+ | "gpt-3.5-turbo-1106"
398
+ | "gpt-3.5-turbo-16k"
399
+ | "gpt-3.5-turbo-16k-0613"
400
+ | "gpt-3.5-turbo-instruct"
401
+ | "gpt-3.5-turbo-instruct-0914"
402
+ | "gpt-4"
403
+ | "gpt-4-0314"
404
+ | "gpt-4-0613"
405
+ | "gpt-4-32k"
406
+ | "gpt-4-32k-0314"
407
+ | "gpt-4-32k-0613"
408
+ | "gpt-4-turbo"
409
+ | "gpt-4-turbo-2024-04-09"
410
+ | "gpt-4.1"
411
+ | "gpt-4.1-2025-04-14"
412
+ | "gpt-4.1-mini"
413
+ | "gpt-4.1-mini-2025-04-14"
414
+ | "gpt-4.1-nano"
415
+ | "gpt-4.1-nano-2025-04-14"
416
+ | "gpt-4o"
417
+ | "gpt-4o-2024-05-13"
418
+ | "gpt-4o-2024-08-06"
419
+ | "gpt-4o-2024-11-20"
420
+ | "gpt-4o-mini"
421
+ | "gpt-4o-mini-2024-07-18"
422
+ | "gpt-5"
423
+ | "gpt-5-2025-08-07"
424
+ | "gpt-5-chat"
425
+ | "gpt-5-chat-latest"
426
+ | "gpt-5-mini"
427
+ | "gpt-5-mini-2025-08-07"
428
+ | "gpt-5-nano"
429
+ | "gpt-5-nano-2025-08-07"
430
+ | "o1"
431
+ | "o1-2024-12-17"
432
+ | "o1-mini"
433
+ | "o1-mini-2024-09-12"
434
+ | "o1-pro"
435
+ | "o1-pro-2025-03-19"
436
+ | "o3"
437
+ | "o3-2025-04-16"
438
+ | "o3-deep-research"
439
+ | "o3-deep-research-2025-06-26"
440
+ | "o3-mini"
441
+ | "o3-mini-2025-01-31"
442
+ | "o3-pro"
443
+ | "o3-pro-2025-06-10"
444
+ | "o4-mini"
445
+ | "o4-mini-2025-04-16"
446
+ | "o4-mini-deep-research"
447
+ | "o4-mini-deep-research-2025-06-26";
448
+ /**
449
+ * Union type representing all supported AI models for data extraction.
450
+ * Includes models from both OpenAI and Anthropic.
451
+ *
452
+ * **Supported OpenAI Models:**
453
+ * "gpt-3.5-turbo"
454
+ * "gpt-3.5-turbo-0125"
455
+ * "gpt-3.5-turbo-0301"
456
+ * "gpt-3.5-turbo-0613"
457
+ * "gpt-3.5-turbo-1106"
458
+ * "gpt-3.5-turbo-16k"
459
+ * "gpt-3.5-turbo-16k-0613"
460
+ * "gpt-3.5-turbo-instruct"
461
+ * "gpt-3.5-turbo-instruct-0914"
462
+ * "gpt-4"
463
+ * "gpt-4-0314"
464
+ * "gpt-4-0613"
465
+ * "gpt-4-32k"
466
+ * "gpt-4-32k-0314"
467
+ * "gpt-4-32k-0613"
468
+ * "gpt-4-turbo"
469
+ * "gpt-4-turbo-2024-04-09"
470
+ * "gpt-4.1"
471
+ * "gpt-4.1-2025-04-14"
472
+ * "gpt-4.1-mini"
473
+ * "gpt-4.1-mini-2025-04-14"
474
+ * "gpt-4.1-nano"
475
+ * "gpt-4.1-nano-2025-04-14"
476
+ * "gpt-4o"
477
+ * "gpt-4o-2024-05-13"
478
+ * "gpt-4o-2024-08-06"
479
+ * "gpt-4o-2024-11-20"
480
+ * "gpt-4o-mini"
481
+ * "gpt-4o-mini-2024-07-18"
482
+ * "gpt-5"
483
+ * "gpt-5-2025-08-07"
484
+ * "gpt-5-chat"
485
+ * "gpt-5-chat-latest"
486
+ * "gpt-5-mini"
487
+ * "gpt-5-mini-2025-08-07"
488
+ * "gpt-5-nano"
489
+ * "gpt-5-nano-2025-08-07"
490
+ * "o1"
491
+ * "o1-2024-12-17"
492
+ * "o1-mini"
493
+ * "o1-mini-2024-09-12"
494
+ * "o1-pro"
495
+ * "o1-pro-2025-03-19"
496
+ * "o3"
497
+ * "o3-2025-04-16"
498
+ * "o3-deep-research"
499
+ * "o3-deep-research-2025-06-26"
500
+ * "o3-mini"
501
+ * "o3-mini-2025-01-31"
502
+ * "o3-pro"
503
+ * "o3-pro-2025-06-10"
504
+ * "o4-mini"
505
+ * "o4-mini-2025-04-16"
506
+ * "o4-mini-deep-research"
507
+ * "o4-mini-deep-research-2025-06-26"
508
+ *
509
+ * **Supported Anthropic (Claude) Models:**
510
+ * "claude-3-5-haiku-20241022"
511
+ * "claude-3-5-haiku-latest"
512
+ * "claude-3-5-sonnet-20240620"
513
+ * "claude-3-5-sonnet-20241022"
514
+ * "claude-3-5-sonnet-latest"
515
+ * "claude-3-7-sonnet-20250219"
516
+ * "claude-3-7-sonnet-latest"
517
+ * "claude-3-haiku-20240307"
518
+ * "claude-4-opus-20250514"
519
+ * "claude-4-sonnet-20250514"
520
+ * "claude-opus-4-1"
521
+ * "claude-opus-4-1-20250805"
522
+ * "claude-opus-4-20250514"
523
+ * "claude-sonnet-4-20250514"
524
+ *
525
+ * @type SUPPORTED_MODELS
526
+ */
527
+ export type SUPPORTED_MODELS =
528
+ | SUPPORTED_CLAUDE_MODELS
529
+ | SUPPORTED_OPENAI_MODELS;
530
+
531
+ /**
532
+ * Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
533
+ * Detects loading spinners, blank content, or incomplete page states.
534
+ *
535
+ * @param {Object} input - Input object containing the page to check
536
+ * @param {Page} input.page - The Playwright page to check
537
+ * @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds. Defaults to 10000
538
+ * @param {SUPPORTED_MODELS} [input.model="gpt-4o-2024-08-06"] - AI model to use for the check. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "gpt-4o-2024-08-06"
539
+ * @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
540
+ * @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading
541
+ * @example
542
+ * ```typescript Check Page Loading
543
+ * import { isPageLoaded } from "@intuned/browser/ai";
544
+ * export default async function handler(params, page, context){
545
+ * // Wait for page to finish loading
546
+ * await page.goto('https://example.com');
547
+ *
548
+ * const pageLoaded = await isPageLoaded({page});
549
+ * if (pageLoaded) {
550
+ * // Continue with scraping or interactions
551
+ * } else {
552
+ * // Wait longer or retry
553
+ * }
554
+ * }
555
+ * ```
556
+ *
557
+ * @example
558
+ * ```typescript Loading Loop
559
+ * import { isPageLoaded } from "@intuned/browser/ai";
560
+ * export default async function handler(params, page, context){
561
+ * // Keep checking until page loads
562
+ * await page.goto("https://example.com");
563
+ * let attempts = 0;
564
+ * while (attempts < 10) {
565
+ * const pageLoaded = await isPageLoaded({
566
+ * page,
567
+ * model: "gpt-4o",
568
+ * timeoutInMs: 5000
569
+ * });
570
+ * if (pageLoaded) break;
571
+ *
572
+ * await page.waitForTimeout(2000);
573
+ * attempts++;
574
+ * }
575
+ * }
576
+ * ```
577
+ * }
578
+ */
579
+ export declare function isPageLoaded(input: {
580
+ page: Page;
581
+ timeoutInMs?: number;
582
+ model?: SUPPORTED_MODELS;
583
+ apiKey?: string;
584
+ }): Promise<boolean>;
585
+
586
+ /**
587
+ * Represents text content for AI extraction.
588
+ * Used when passing text data directly to extractStructuredData without a page source.
589
+ *
590
+ * @interface TextContentItem
591
+ * @property {string} type - The type of the content item, which is always "text"
592
+ * @property {string} data - The text content to extract data from
593
+ */
594
+ export interface TextContentItem {
595
+ type: "text";
596
+ data: string;
597
+ }
598
+
599
+ /**
600
+ * Represents image content provided as a Buffer for AI extraction.
601
+ * Used when passing image data directly to extractStructuredData without a page source.
602
+ * The image will be analyzed by AI vision models for data extraction.
603
+ *
604
+ * @interface ImageBufferContentItem
605
+ * @property {string} type - The type of the content item, which is always "image-buffer"
606
+ * @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
607
+ * @property {Buffer} data - The Buffer containing the raw image data
608
+ */
609
+ export interface ImageBufferContentItem {
610
+ type: "image-buffer";
611
+ image_type: "png" | "jpeg" | "gif" | "webp";
612
+ data: Buffer;
613
+ }
614
+
615
+ /**
616
+ * Represents image content provided as a URL for AI extraction.
617
+ * Used when passing image URLs directly to extractStructuredData without a page source.
618
+ * The image will be fetched from the URL and analyzed by AI vision models for data extraction.
619
+ *
620
+ * @interface ImageUrlContentItem
621
+ * @property {string} type - The type of the content item, which is always "image-url"
622
+ * @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
623
+ * @property {string} data - The URL of the image to fetch and analyze
624
+ */
625
+ export interface ImageUrlContentItem {
626
+ type: "image-url";
627
+ image_type: "png" | "jpeg" | "gif" | "webp";
628
+ data: string;
629
+ }
630
+
631
+ /**
632
+ * Union type representing all content items for AI data extraction.
633
+ * @type ContentItem
634
+ * @property {TextContentItem} type - [TextContentItem](../interfaces/TextContentItem) type. Used when passing text data directly to extractStructuredData without a page source.
635
+ * @property {ImageBufferContentItem} type - [ImageBufferContentItem](../interfaces/ImageBufferContentItem) type. Used when passing image data directly to extractStructuredData without a page source.
636
+ * @property {ImageUrlContentItem} type - [ImageUrlContentItem](../interfaces/ImageUrlContentItem) type. Used when passing image URLs directly to extractStructuredData without a page source.
637
+ */
638
+ export type ContentItem =
639
+ | TextContentItem
640
+ | ImageBufferContentItem
641
+ | ImageUrlContentItem;
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ Object.defineProperty(exports, "extractStructuredData", {
7
+ enumerable: true,
8
+ get: function () {
9
+ return _extractStructuredData.extractStructuredData;
10
+ }
11
+ });
12
+ Object.defineProperty(exports, "isPageLoaded", {
13
+ enumerable: true,
14
+ get: function () {
15
+ return _isPageLoaded.isPageLoaded;
16
+ }
17
+ });
18
+ var _extractStructuredData = require("./extractStructuredData");
19
+ var _isPageLoaded = require("./isPageLoaded");