@intuned/browser-dev 2.2.3-test-build.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/LICENSE +43 -0
  5. package/dist/ai/export.d.js +5 -0
  6. package/dist/ai/export.d.ts +641 -0
  7. package/dist/ai/extractStructuredData.js +320 -0
  8. package/dist/ai/extractStructuredDataUsingAi.js +139 -0
  9. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  10. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  11. package/dist/ai/index.d.ts +641 -0
  12. package/dist/ai/index.js +19 -0
  13. package/dist/ai/isPageLoaded.js +77 -0
  14. package/dist/ai/prompt.js +39 -0
  15. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  16. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  17. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  18. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  19. package/dist/ai/tools/index.js +48 -0
  20. package/dist/ai/types/errors.js +67 -0
  21. package/dist/ai/types/models.js +45 -0
  22. package/dist/ai/types/types.js +48 -0
  23. package/dist/ai/validators.js +167 -0
  24. package/dist/common/Logger/index.js +60 -0
  25. package/dist/common/Logger/types.js +5 -0
  26. package/dist/common/SdkError.js +50 -0
  27. package/dist/common/aiModelsValidations.js +32 -0
  28. package/dist/common/browser_scripts.js +2596 -0
  29. package/dist/common/ensureBrowserScripts.js +18 -0
  30. package/dist/common/extendedTest.js +148 -0
  31. package/dist/common/extractionHelpers.js +19 -0
  32. package/dist/common/formatZodError.js +18 -0
  33. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  34. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  35. package/dist/common/fuzzySearch/utils.js +23 -0
  36. package/dist/common/getModelProvider.js +18 -0
  37. package/dist/common/getSimplifiedHtml.js +122 -0
  38. package/dist/common/hashObject.js +32 -0
  39. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  40. package/dist/common/html2markdown/index.js +19 -0
  41. package/dist/common/jwtTokenManager.js +18 -0
  42. package/dist/common/loadRuntime.js +16 -0
  43. package/dist/common/locatorHelpers.js +41 -0
  44. package/dist/common/matching/collectStrings.js +32 -0
  45. package/dist/common/matching/levenshtein.js +40 -0
  46. package/dist/common/matching/matching.js +317 -0
  47. package/dist/common/matching/types.js +1 -0
  48. package/dist/common/noEmpty.js +9 -0
  49. package/dist/common/saveSnapshotWithExamples.js +60 -0
  50. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  51. package/dist/common/xpathMapping.js +107 -0
  52. package/dist/helpers/clickUntilExhausted.js +85 -0
  53. package/dist/helpers/downloadFile.js +125 -0
  54. package/dist/helpers/export.d.js +5 -0
  55. package/dist/helpers/export.d.ts +1220 -0
  56. package/dist/helpers/extractMarkdown.js +35 -0
  57. package/dist/helpers/filterEmptyValues.js +54 -0
  58. package/dist/helpers/gotoUrl.js +98 -0
  59. package/dist/helpers/index.d.ts +1220 -0
  60. package/dist/helpers/index.js +128 -0
  61. package/dist/helpers/processDate.js +25 -0
  62. package/dist/helpers/resolveUrl.js +64 -0
  63. package/dist/helpers/sanitizeHtml.js +74 -0
  64. package/dist/helpers/saveFileToS3.js +50 -0
  65. package/dist/helpers/scrollToLoadContent.js +57 -0
  66. package/dist/helpers/tests/extendedTest.js +130 -0
  67. package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
  68. package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
  69. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  70. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  71. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  72. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  73. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  74. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  75. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  76. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  77. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  78. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  79. package/dist/helpers/types/Attachment.js +115 -0
  80. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  81. package/dist/helpers/types/RunEnvironment.js +18 -0
  82. package/dist/helpers/types/ValidationError.js +17 -0
  83. package/dist/helpers/types/index.js +51 -0
  84. package/dist/helpers/uploadFileToS3.js +154 -0
  85. package/dist/helpers/utils/getS3Client.js +22 -0
  86. package/dist/helpers/utils/index.js +73 -0
  87. package/dist/helpers/utils/isDownload.js +10 -0
  88. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  89. package/dist/helpers/utils/isLocator.js +9 -0
  90. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  91. package/dist/helpers/validateDataUsingSchema.js +103 -0
  92. package/dist/helpers/waitForDomSettled.js +90 -0
  93. package/dist/helpers/withNetworkSettledWait.js +91 -0
  94. package/dist/index.d.js +16 -0
  95. package/dist/index.d.ts +10 -0
  96. package/dist/index.js +16 -0
  97. package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
  98. package/dist/intunedServices/ApiGateway/factory.js +13 -0
  99. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  100. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  101. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  102. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
  103. package/dist/intunedServices/ApiGateway/types.js +11 -0
  104. package/dist/intunedServices/cache/cache.js +61 -0
  105. package/dist/intunedServices/cache/index.js +12 -0
  106. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  107. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  108. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  109. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  110. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  112. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  113. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  114. package/dist/optimized-extractors/common/index.js +55 -0
  115. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  116. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  117. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  118. package/dist/optimized-extractors/common/matching/types.js +18 -0
  119. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  120. package/dist/optimized-extractors/common/utils.js +58 -0
  121. package/dist/optimized-extractors/export.d.js +5 -0
  122. package/dist/optimized-extractors/export.d.ts +397 -0
  123. package/dist/optimized-extractors/extractArray.js +120 -0
  124. package/dist/optimized-extractors/extractObject.js +104 -0
  125. package/dist/optimized-extractors/index.d.ts +397 -0
  126. package/dist/optimized-extractors/index.js +31 -0
  127. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
  128. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  144. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  145. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  146. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  147. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  148. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  149. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  160. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  161. package/dist/optimized-extractors/types/errors.js +42 -0
  162. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  163. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  164. package/dist/optimized-extractors/types/types.js +5 -0
  165. package/dist/optimized-extractors/validators.js +152 -0
  166. package/dist/vite-env.d.js +1 -0
  167. package/dist/vite-env.d.ts +9 -0
  168. package/docs.md +14 -0
  169. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  170. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  171. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  172. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  173. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  174. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  175. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  176. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  177. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  178. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  179. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  180. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  181. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  182. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  183. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  184. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  185. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  186. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  187. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  188. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  189. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  190. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  191. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  192. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  193. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  194. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  195. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  196. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  197. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  198. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  199. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  200. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  201. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  202. package/how-to-run-tests.md +10 -0
  203. package/intuned-runtime-setup.md +13 -0
  204. package/package.json +119 -0
  205. package/tsconfig.eslint.json +5 -0
  206. package/tsconfig.json +26 -0
@@ -0,0 +1,152 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.strategySchema = exports.simpleObjectJsonSchema = exports.simpleArrayItemJsonSchema = exports.extractObjectOptimizedInputSchema = exports.extractArrayOptimizedInputSchema = void 0;
7
+ var _zod = require("zod");
8
+ var _aiModelsValidation = require("./types/aiModelsValidation");
9
+ const htmlStrategySchema = _zod.z.object({
10
+ model: _zod.z.enum(_aiModelsValidation.SUPPORTED_TEXT_MODELS, {
11
+ required_error: "strategy model is required",
12
+ invalid_type_error: "strategy model is invalid"
13
+ }),
14
+ type: _zod.z.literal("HTML", {
15
+ required_error: "strategy type is required",
16
+ invalid_type_error: "strategy type is invalid"
17
+ })
18
+ });
19
+ const imageStrategySchema = _zod.z.object({
20
+ model: _zod.z.enum(_aiModelsValidation.SUPPORTED_VISION_MODELS, {
21
+ required_error: "strategy model is required",
22
+ invalid_type_error: "strategy model is invalid"
23
+ }),
24
+ type: _zod.z.literal("IMAGE", {
25
+ required_error: "strategy type is required",
26
+ invalid_type_error: "strategy type is invalid"
27
+ })
28
+ });
29
+ const simpleStringSchema = _zod.z.object({
30
+ type: _zod.z.literal("string", {
31
+ required_error: "property type is required",
32
+ invalid_type_error: "optimized extractors only support string types"
33
+ }),
34
+ description: _zod.z.string({
35
+ required_error: "property description is required",
36
+ invalid_type_error: "property description must be a string"
37
+ }).optional()
38
+ });
39
+ const simpleArrayStringSchema = simpleStringSchema.extend({
40
+ primary: _zod.z.boolean().optional()
41
+ });
42
+ const simpleObjectJsonSchema = exports.simpleObjectJsonSchema = _zod.z.object({
43
+ type: _zod.z.literal("object", {
44
+ errorMap: () => ({
45
+ message: 'schema type is required, and must have the value "object"'
46
+ })
47
+ }),
48
+ description: _zod.z.string().optional(),
49
+ properties: _zod.z.record(_zod.z.string(), simpleStringSchema, {
50
+ required_error: "properties is required in object schemas"
51
+ }),
52
+ required: _zod.z.array(_zod.z.string(), {
53
+ required_error: "required must be an array of strings",
54
+ invalid_type_error: "required must be an array of strings"
55
+ }).min(1, {
56
+ message: "at least one property must be required"
57
+ })
58
+ }, {
59
+ required_error: "schema is required"
60
+ }).refine(data => {
61
+ return data.required.every(key => Object.keys(data.properties).includes(key));
62
+ }, {
63
+ message: "All required keys must be defined in the properties object"
64
+ }).refine(data => {
65
+ return Object.keys(data.properties).length > 0;
66
+ }, {
67
+ message: "you must have at least one property in the properties object"
68
+ });
69
+ const simpleArrayItemJsonSchema = exports.simpleArrayItemJsonSchema = _zod.z.object({
70
+ type: _zod.z.literal("object", {
71
+ errorMap: () => ({
72
+ message: 'schema type is required, and must have the value "object"'
73
+ })
74
+ }),
75
+ description: _zod.z.string().optional(),
76
+ properties: _zod.z.record(_zod.z.string(), simpleArrayStringSchema, {
77
+ required_error: "properties is required in object schemas"
78
+ }),
79
+ required: _zod.z.array(_zod.z.string(), {
80
+ required_error: "required must be an array of strings",
81
+ invalid_type_error: "required must be an array of strings"
82
+ }).min(1, {
83
+ message: "at least one property must be required"
84
+ })
85
+ }, {
86
+ required_error: "schema is required"
87
+ }).refine(data => {
88
+ return data.required.every(key => Object.keys(data.properties).includes(key));
89
+ }, {
90
+ message: "All required keys must be defined in the properties object"
91
+ }).refine(data => {
92
+ const primaryField = Object.entries(data.properties).find(([k, v]) => v.primary);
93
+ return primaryField;
94
+ }, {
95
+ message: "you must have one primary property"
96
+ }).refine(data => {
97
+ const primaryField = Object.entries(data.properties).find(([k, v]) => v.primary);
98
+ if (!primaryField) {
99
+ return false;
100
+ }
101
+ return data.required.includes(primaryField[0]);
102
+ }, {
103
+ message: "The primary field must be required"
104
+ });
105
+ const strategySchema = exports.strategySchema = _zod.z.union([htmlStrategySchema, imageStrategySchema], {
106
+ errorMap: (err, context) => {
107
+ if (err.code === "invalid_union" && context.data.model === "gpt3.5-turbo" && context.data.type === "IMAGE") {
108
+ return {
109
+ message: "gpt3.5-turbo does not support IMAGE strategy"
110
+ };
111
+ }
112
+ return {
113
+ message: err.message ?? "invalid strategy configuration"
114
+ };
115
+ }
116
+ }).optional().default({
117
+ model: "claude-3-haiku",
118
+ type: "HTML"
119
+ });
120
+ const labelSchema = _zod.z.string({
121
+ invalid_type_error: "label must be a string",
122
+ required_error: "label is required"
123
+ }).min(1, "label must be at least 1 character long");
124
+ const entityNameSchema = _zod.z.string().min(1, {
125
+ message: "entity name must be at least 1 character long."
126
+ }).max(50, {
127
+ message: "entity name must be no more than 50 characters long."
128
+ }).regex(/^[a-zA-Z0-9_-]+$/, {
129
+ message: "entity name can only contain letters, digits, underscores, and hyphens."
130
+ });
131
+ const extractObjectOptimizedInputSchema = exports.extractObjectOptimizedInputSchema = _zod.z.object({
132
+ label: labelSchema,
133
+ strategy: strategySchema,
134
+ entityName: entityNameSchema,
135
+ entitySchema: simpleObjectJsonSchema,
136
+ variantKey: _zod.z.string().optional().default("about:blank"),
137
+ prompt: _zod.z.string().optional(),
138
+ optionalPropertiesInvalidator: _zod.z.function().returns(_zod.z.array(_zod.z.string())).optional().default(() => () => []),
139
+ apiKey: _zod.z.string().optional()
140
+ }, {
141
+ required_error: "extractObjectOptimized function missing configurations"
142
+ });
143
+ const extractArrayOptimizedInputSchema = exports.extractArrayOptimizedInputSchema = _zod.z.object({
144
+ label: labelSchema,
145
+ strategy: strategySchema,
146
+ prompt: _zod.z.string().optional(),
147
+ itemEntityName: entityNameSchema,
148
+ itemEntitySchema: simpleArrayItemJsonSchema,
149
+ variantKey: _zod.z.string().optional(),
150
+ optionalPropertiesInvalidator: _zod.z.function().returns(_zod.z.array(_zod.z.string())).optional().default(() => () => []),
151
+ apiKey: _zod.z.string().optional()
152
+ });
@@ -0,0 +1 @@
1
+ "use strict";
@@ -0,0 +1,9 @@
1
+ interface ImportMetaEnv {
2
+ readonly VITE_S3_BUCKET: string;
3
+ readonly VITE_AWS_REGION?: string;
4
+ readonly VITE_API_URL?: string;
5
+ }
6
+
7
+ interface ImportMeta {
8
+ readonly env: ImportMetaEnv;
9
+ }
package/docs.md ADDED
@@ -0,0 +1,14 @@
1
+ to generate all docs run:
2
+ ```bash
3
+ yarn generate-all-docs
4
+ ```
5
+
6
+ or run:
7
+ ```bash
8
+ yarn generate-docs <input.d.tx> [outputdir]
9
+ ```
10
+ These commands will go to all export.d.ts file of the 5 namespaces and generate their docs.
11
+ The script in ./scripts/generate-docs reads the JSDOCs in the directories and passes them to the markdown converters. And outputs functions in outputdir/functions and Interfaces at outputdir/interfaces
12
+ The markdown converters scripts parse JSDOCs into a Mintlify compatable format.
13
+
14
+ To write a jsdoc, follow the conventions found in any export.d.ts file, especially the one in `helpers/export.d.ts`.
@@ -0,0 +1,255 @@
1
+ ---
2
+ title: extractStructuredData
3
+ description: ""
4
+ ---
5
+
6
+ <Tabs>
7
+
8
+ <Tab title="From Page or Locator">
9
+
10
+ ```typescript
11
+ export declare function extractStructuredData(options: {
12
+ source: Page | Locator;
13
+ dataSchema: JsonSchema | z.ZodSchema;
14
+ prompt?: string;
15
+ strategy?: "IMAGE" | "MARKDOWN" | "HTML";
16
+ enableDomMatching?: boolean;
17
+ enableCache?: boolean;
18
+ maxRetries?: number;
19
+ model?: SUPPORTED_MODELS;
20
+ apiKey?: string;
21
+ }): Promise<any>;
22
+ ```
23
+
24
+ Extract structured data from web pages using AI-powered content analysis.
25
+
26
+ ## Examples
27
+
28
+ <CodeGroup>
29
+
30
+ ```typescript Page source
31
+ import { extractStructuredData } from '@intuned/browser/ai';
32
+ export default async function handler(params, page, context){
33
+ await page.goto("https://books.toscrape.com/")
34
+ const product = await extractStructuredData({
35
+ source: page,
36
+ strategy: "HTML",
37
+ model: "gpt-4o"
38
+ dataSchema: {
39
+ type: "object",
40
+ properties: {
41
+ name: { type: "string" },
42
+ price: { type: "string" },
43
+ description: { type: "string" },
44
+ inStock: { type: "boolean" }
45
+ },
46
+ required: ["name", "price"]
47
+ },
48
+ prompt: "Extract product details from this e page"
49
+ });
50
+ }
51
+ console.log(`Found book: ${product.name} - ${product.price}`);
52
+ ```
53
+
54
+ ```typescript Locator source
55
+ import { extractStructuredData } from '@intuned/browser/ai';
56
+ export default async function handler(params, page, context){
57
+ await page.goto("https://books.toscrape.com/")
58
+ const articleContainer = page.locator("article").first()
59
+ const article = await extractStructuredData({
60
+ source: articleContainer,
61
+ strategy: "MARKDOWN",
62
+ model: "claude-3",
63
+ dataSchema: {
64
+ type: "object",
65
+ properties: {
66
+ title: { type: "string" },
67
+ author: { type: "string" },
68
+ publishDate: { type: "string" },
69
+ content: { type: "string" },
70
+ },
71
+ required: ["title"]
72
+ },
73
+ maxRetries: 5
74
+ });
75
+ }
76
+ console.log(`Found book: ${article.title}`);
77
+ ```
78
+
79
+ </CodeGroup>
80
+
81
+ ## Arguments
82
+
83
+ <ParamField path="options" type="Object" required
84
+ >
85
+ Configuration object containing extraction parameters
86
+
87
+ <Expandable title="options">
88
+ <ParamField path="options.source" type="Page | Locator">
89
+ Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
90
+ </ParamField>
91
+
92
+ <ParamField path="options.dataSchema" type="JsonSchema | z.ZodSchema">
93
+ JsonSchema defining the structure of the data to extract. This can be a JsonSchema or ZodSchema
94
+ </ParamField>
95
+
96
+ <ParamField path="options.strategy" type="string">
97
+ Type of extraction: "HTML", "IMAGE", or "MARKDOWN". Defaults to "HTML"
98
+ </ParamField>
99
+
100
+ <ParamField path="options.prompt" type="string">
101
+ Optional prompt to guide the extraction process and provide more context
102
+ </ParamField>
103
+
104
+ <ParamField path="options.enableDomMatching" type="boolean">
105
+ Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
106
+ </ParamField>
107
+
108
+ <ParamField path="options.enableCache" type="boolean">
109
+ Whether to enable caching of the extracted data. Defaults to true
110
+ </ParamField>
111
+
112
+ <ParamField path="options.maxRetries" type="number">
113
+ Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
114
+ </ParamField>
115
+
116
+ <ParamField path="options.model" type="SUPPORTED_MODELS">
117
+ AI model to use for extraction. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "claude-3-5-haiku-latest"
118
+ </ParamField>
119
+
120
+ <ParamField path="options.apiKey" type="string">
121
+ Optional API key for AI extraction (if provided, will not be billed to your account)
122
+ </ParamField>
123
+
124
+ </Expandable>
125
+
126
+ </ParamField>
127
+
128
+ ## Returns: `any`
129
+
130
+ Promise resolving to the extracted structured data matching the provided schema
131
+
132
+ </Tab>
133
+
134
+ <Tab title="From Content">
135
+
136
+ ```typescript
137
+ export declare function extractStructuredData(options: {
138
+ content: ContentItem[] | ContentItem;
139
+ dataSchema: JsonSchema | z.ZodSchema;
140
+ prompt?: string;
141
+ maxRetires?: number;
142
+ enableCache?: boolean;
143
+ model: SUPPORTED_MODELS;
144
+ apiKey?: string;
145
+ }): Promise<any>;
146
+ ```
147
+
148
+ Extract structured data from content items (text, images) using AI-powered analysis.
149
+
150
+ ## Examples
151
+
152
+ <CodeGroup>
153
+
154
+ ```typescript Text Content
155
+ import { extractStructuredData } from '@intuned/browser/ai';
156
+ export default async function handler(params, page, context){
157
+ const textContent: TextContentItem = {
158
+ type: "text",
159
+ data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
160
+ };
161
+
162
+ const person = await extractStructuredData({
163
+ content: textContent,
164
+ model: "gpt-4o",
165
+ dataSchema: {
166
+ type: "object",
167
+ properties: {
168
+ name: { type: "string" },
169
+ age: { type: "number" },
170
+ occupation: { type: "string" },
171
+ company: { type: "string" }
172
+ },
173
+ required: ["name"]
174
+ },
175
+ prompt: "Extract person information from the text"
176
+ });
177
+
178
+ console.log(`Found person: ${person.name}, ${person.age} years old`);
179
+ }
180
+ ```
181
+
182
+ ```typescript Multiple Content Items
183
+ import { extractStructuredData } from '@intuned/browser/ai';
184
+ export default async function handler(params, page, context){
185
+ const mixedContent = [
186
+ { type: "text", data: "Product: iPhone 15" },
187
+ { type: "image-url", image_type: "jpeg", data: "https://mintcdn.com/intuned-7/asXJUUPBWwDlStUB/logo/light.svg?fit=max&auto=format&n=asXJUUPBWwDlStUB&q=85&s=6525c0b299b3226464eba6afa9b7ebe6" }
188
+ ];
189
+
190
+
191
+ const product = await extractStructuredData({
192
+ content: mixedContent,
193
+ model: "claude-3",
194
+ dataSchema: {
195
+ type: "object",
196
+ properties: {
197
+ name: { type: "string" },
198
+ price: { type: "string" },
199
+ features: { type: "array", items: { type: "string" } }
200
+ }
201
+ },
202
+ maxRetries: 1,
203
+ enableCache: true
204
+ });
205
+ }
206
+ ```
207
+
208
+ </CodeGroup>
209
+
210
+ ## Arguments
211
+
212
+ <ParamField path="options" type="Object" required
213
+ >
214
+ Configuration object containing extraction parameters
215
+
216
+ <Expandable title="options">
217
+ <ParamField path="options.content" type="Array<ContentItem> | ContentItem">
218
+ Content to extract data from - can be a single content item or array of content items
219
+ </ParamField>
220
+
221
+ <ParamField path="options.dataSchema" type="JsonSchema | z.ZodSchema">
222
+ JsonSchema defining the structure of the data to extract
223
+ </ParamField>
224
+
225
+ <ParamField path="options.prompt" type="string">
226
+ Optional prompt to guide the extraction process and provide more context
227
+ </ParamField>
228
+
229
+ <ParamField path="options.enableCache" type="boolean">
230
+ Whether to enable caching of the extracted data. Defaults to true
231
+ </ParamField>
232
+
233
+ <ParamField path="options.maxRetries" type="number">
234
+ Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
235
+ </ParamField>
236
+
237
+ <ParamField path="options.model" type="SUPPORTED_MODELS">
238
+ AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models
239
+ </ParamField>
240
+
241
+ <ParamField path="options.apiKey" type="string">
242
+ Optional API key for AI extraction (if provided, will not be billed to your account)
243
+ </ParamField>
244
+
245
+ </Expandable>
246
+
247
+ </ParamField>
248
+
249
+ ## Returns: `any`
250
+
251
+ Promise resolving to the extracted structured data matching the provided schema
252
+
253
+ </Tab>
254
+
255
+ </Tabs>
@@ -0,0 +1,88 @@
1
+ ---
2
+ title: isPageLoaded
3
+ description: ""
4
+ ---
5
+
6
+ ```typescript
7
+ export declare function isPageLoaded(input: {
8
+ page: Page;
9
+ timeoutInMs?: number;
10
+ model?: SUPPORTED_MODELS;
11
+ apiKey?: string;
12
+ }): Promise<boolean>;
13
+ ```
14
+
15
+ Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
16
+ Detects loading spinners, blank content, or incomplete page states.
17
+
18
+ ## Examples
19
+
20
+ <CodeGroup>
21
+
22
+ ```typescript Check Page Loading
23
+ import { isPageLoaded } from "@intuned/browser/ai";
24
+ export default async function handler(params, page, context){
25
+ // Wait for page to finish loading
26
+ await page.goto('https://example.com');
27
+
28
+ const pageLoaded = await isPageLoaded({page});
29
+ if (pageLoaded) {
30
+ // Continue with scraping or interactions
31
+ } else {
32
+ // Wait longer or retry
33
+ }
34
+ }
35
+ ```
36
+
37
+ ```typescript Loading Loop
38
+ import { isPageLoaded } from "@intuned/browser/ai";
39
+ export default async function handler(params, page, context){
40
+ // Keep checking until page loads
41
+ await page.goto("https://example.com");
42
+ let attempts = 0;
43
+ while (attempts < 10) {
44
+ const pageLoaded = await isPageLoaded({
45
+ page,
46
+ model: "gpt-4o",
47
+ timeoutInMs: 5000
48
+ });
49
+ if (pageLoaded) break;
50
+
51
+ await page.waitForTimeout(2000);
52
+ attempts++;
53
+ }
54
+ ```
55
+ }
56
+
57
+ </CodeGroup>
58
+
59
+ ## Arguments
60
+
61
+ <ParamField path="input" type="Object" required
62
+ >
63
+ Input object containing the page to check
64
+
65
+ <Expandable title="input">
66
+ <ParamField path="input.page" type="Page">
67
+ The Playwright page to check
68
+ </ParamField>
69
+
70
+ <ParamField path="input.timeoutInMs" type="number">
71
+ Screenshot timeout in milliseconds. Defaults to 10000
72
+ </ParamField>
73
+
74
+ <ParamField path="input.model" type="SUPPORTED_MODELS">
75
+ AI model to use for the check. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "gpt-4o-2024-08-06"
76
+ </ParamField>
77
+
78
+ <ParamField path="input.apiKey" type="string">
79
+ Optional API key for the AI service (if provided, will not be billed to your account)
80
+ </ParamField>
81
+
82
+ </Expandable>
83
+
84
+ </ParamField>
85
+
86
+ ## Returns: `Promise<boolean>`
87
+
88
+ Promise resolving to true if page is loaded, false if still loading
@@ -0,0 +1,36 @@
1
+ ---
2
+ title: ArraySchema
3
+ description: ""
4
+ ---
5
+
6
+ ```typescript
7
+ export interface ArraySchema extends BasicSchema {
8
+ type: "array";
9
+ items: JsonSchema | z.ZodSchema;
10
+ maxItems?: number;
11
+ minItems?: number;
12
+ uniqueItems?: boolean;
13
+ }
14
+ ```
15
+
16
+ Schema definition for array values with item validation and constraints.
17
+
18
+ ## Examples
19
+
20
+ <CodeGroup>
21
+
22
+ ```typescript Array Schema
23
+ import { ArraySchema } from "@intuned/browser/ai";
24
+ export default async function handler(params, page, context){
25
+ const tagsSchema: ArraySchema = {
26
+ type: "array",
27
+ items: { type: "string" },
28
+ minItems: 1,
29
+ maxItems: 10,
30
+ uniqueItems: true,
31
+ description: "List of tags"
32
+ };
33
+ }
34
+ ```
35
+
36
+ </CodeGroup>
@@ -0,0 +1,14 @@
1
+ ---
2
+ title: BasicSchema
3
+ description: ""
4
+ ---
5
+
6
+ ```typescript
7
+ export interface BasicSchema {
8
+ type: "string" | "number" | "integer" | "boolean" | "array" | "object";
9
+ description?: string;
10
+ }
11
+ ```
12
+
13
+ Base schema interface that all JSON schema types extend from.
14
+ Provides common properties like type and description.
@@ -0,0 +1,28 @@
1
+ ---
2
+ title: BooleanSchema
3
+ description: ""
4
+ ---
5
+
6
+ ```typescript
7
+ export interface BooleanSchema extends BasicSchema {
8
+ type: "boolean";
9
+ }
10
+ ```
11
+
12
+ Schema definition for boolean values.
13
+
14
+ ## Examples
15
+
16
+ <CodeGroup>
17
+
18
+ ```typescript Boolean Schema
19
+ import { BooleanSchema } from "@intuned/browser/ai";
20
+ export default async function handler(params, page, context){
21
+ const isActiveSchema: BooleanSchema = {
22
+ type: "boolean",
23
+ description: "Whether the user account is active"
24
+ };
25
+ }
26
+ ```
27
+
28
+ </CodeGroup>
@@ -0,0 +1,16 @@
1
+ ---
2
+ title: ImageBufferContentItem
3
+ description: ""
4
+ ---
5
+
6
+ ```typescript
7
+ export interface ImageBufferContentItem {
8
+ type: "image-buffer";
9
+ image_type: "png" | "jpeg" | "gif" | "webp";
10
+ data: Buffer;
11
+ }
12
+ ```
13
+
14
+ Represents image content provided as a Buffer for AI extraction.
15
+ Used when passing image data directly to extractStructuredData without a page source.
16
+ The image will be analyzed by AI vision models for data extraction.
@@ -0,0 +1,16 @@
1
+ ---
2
+ title: ImageUrlContentItem
3
+ description: ""
4
+ ---
5
+
6
+ ```typescript
7
+ export interface ImageUrlContentItem {
8
+ type: "image-url";
9
+ image_type: "png" | "jpeg" | "gif" | "webp";
10
+ data: string;
11
+ }
12
+ ```
13
+
14
+ Represents image content provided as a URL for AI extraction.
15
+ Used when passing image URLs directly to extractStructuredData without a page source.
16
+ The image will be fetched from the URL and analyzed by AI vision models for data extraction.
@@ -0,0 +1,35 @@
1
+ ---
2
+ title: NumberSchema
3
+ description: ""
4
+ ---
5
+
6
+ ```typescript
7
+ export interface NumberSchema extends BasicSchema {
8
+ type: "number" | "integer";
9
+ multipleOf?: number;
10
+ maximum?: number;
11
+ exclusiveMaximum?: number;
12
+ minimum?: number;
13
+ exclusiveMinimum?: number;
14
+ }
15
+ ```
16
+
17
+ Schema definition for numeric values (numbers and integers) with validation constraints.
18
+
19
+ ## Examples
20
+
21
+ <CodeGroup>
22
+
23
+ ```typescript Number Schema
24
+ import { NumberSchema } from "@intuned/browser/ai";
25
+ export default async function handler(params, page, context){
26
+ const ageSchema: NumberSchema = {
27
+ type: "integer",
28
+ minimum: 0,
29
+ maximum: 150,
30
+ description: "Person's age in years"
31
+ };
32
+ }
33
+ ```
34
+
35
+ </CodeGroup>