@intuned/browser-dev 0.1.4-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/BROWSER_SCRIPTS_SETUP.md +84 -0
  5. package/LICENSE +43 -0
  6. package/README.md +160 -0
  7. package/RELEASE.md +60 -0
  8. package/dist/ai/export.d.js +5 -0
  9. package/dist/ai/export.d.ts +641 -0
  10. package/dist/ai/extractStructuredData.js +320 -0
  11. package/dist/ai/extractStructuredDataUsingAi.js +142 -0
  12. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  13. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  14. package/dist/ai/index.d.ts +641 -0
  15. package/dist/ai/index.js +19 -0
  16. package/dist/ai/isPageLoaded.js +80 -0
  17. package/dist/ai/prompt.js +39 -0
  18. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  19. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  20. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  21. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  22. package/dist/ai/tools/index.js +48 -0
  23. package/dist/ai/types/errors.js +67 -0
  24. package/dist/ai/types/models.js +45 -0
  25. package/dist/ai/types/types.js +48 -0
  26. package/dist/ai/validators.js +167 -0
  27. package/dist/common/Logger/index.js +60 -0
  28. package/dist/common/Logger/types.js +5 -0
  29. package/dist/common/SdkError.js +50 -0
  30. package/dist/common/aiModelsValidations.js +32 -0
  31. package/dist/common/ensureBrowserScripts.js +14 -0
  32. package/dist/common/extendedTest.js +157 -0
  33. package/dist/common/extractionHelpers.js +19 -0
  34. package/dist/common/formatZodError.js +18 -0
  35. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  36. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  37. package/dist/common/fuzzySearch/utils.js +23 -0
  38. package/dist/common/getModelProvider.js +18 -0
  39. package/dist/common/getSimplifiedHtml.js +122 -0
  40. package/dist/common/hashObject.js +32 -0
  41. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  42. package/dist/common/html2markdown/index.js +19 -0
  43. package/dist/common/jwtTokenManager.js +57 -0
  44. package/dist/common/loadRuntime.js +16 -0
  45. package/dist/common/locatorHelpers.js +41 -0
  46. package/dist/common/matching/collectStrings.js +32 -0
  47. package/dist/common/matching/levenshtein.js +40 -0
  48. package/dist/common/matching/matching.js +317 -0
  49. package/dist/common/matching/types.js +1 -0
  50. package/dist/common/noEmpty.js +9 -0
  51. package/dist/common/saveSnapshotWithExamples.js +60 -0
  52. package/dist/common/script.js +2602 -0
  53. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  54. package/dist/common/xpathMapping.js +107 -0
  55. package/dist/helpers/clickUntilExhausted.js +85 -0
  56. package/dist/helpers/downloadFile.js +125 -0
  57. package/dist/helpers/export.d.js +5 -0
  58. package/dist/helpers/export.d.ts +1220 -0
  59. package/dist/helpers/extractMarkdown.js +35 -0
  60. package/dist/helpers/filterEmptyValues.js +54 -0
  61. package/dist/helpers/gotoUrl.js +98 -0
  62. package/dist/helpers/index.d.ts +1220 -0
  63. package/dist/helpers/index.js +122 -0
  64. package/dist/helpers/processDate.js +25 -0
  65. package/dist/helpers/resolveUrl.js +64 -0
  66. package/dist/helpers/sanitizeHtml.js +74 -0
  67. package/dist/helpers/saveFileToS3.js +50 -0
  68. package/dist/helpers/scrollToLoadContent.js +57 -0
  69. package/dist/helpers/tests/testClickUntilExhausted.spec.js +372 -0
  70. package/dist/helpers/tests/testDownloadFile.spec.js +206 -0
  71. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  72. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  73. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  74. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  75. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  76. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  77. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  78. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  79. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  80. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  81. package/dist/helpers/types/Attachment.js +115 -0
  82. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  83. package/dist/helpers/types/RunEnvironment.js +18 -0
  84. package/dist/helpers/types/ValidationError.js +17 -0
  85. package/dist/helpers/types/index.js +51 -0
  86. package/dist/helpers/uploadFileToS3.js +154 -0
  87. package/dist/helpers/utils/getS3Client.js +22 -0
  88. package/dist/helpers/utils/index.js +73 -0
  89. package/dist/helpers/utils/isDownload.js +10 -0
  90. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  91. package/dist/helpers/utils/isLocator.js +9 -0
  92. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  93. package/dist/helpers/validateDataUsingSchema.js +103 -0
  94. package/dist/helpers/waitForDomSettled.js +90 -0
  95. package/dist/helpers/withNetworkSettledWait.js +91 -0
  96. package/dist/index.d.js +16 -0
  97. package/dist/index.d.ts +10 -0
  98. package/dist/index.js +16 -0
  99. package/dist/intunedServices/ApiGateway/aiApiGateway.js +143 -0
  100. package/dist/intunedServices/ApiGateway/factory.js +16 -0
  101. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  102. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  103. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  104. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +355 -0
  105. package/dist/intunedServices/ApiGateway/types.js +11 -0
  106. package/dist/intunedServices/cache/cache.js +61 -0
  107. package/dist/intunedServices/cache/index.js +12 -0
  108. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  109. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  110. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  112. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  113. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  114. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  115. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  116. package/dist/optimized-extractors/common/index.js +55 -0
  117. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  118. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  119. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  120. package/dist/optimized-extractors/common/matching/types.js +18 -0
  121. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  122. package/dist/optimized-extractors/common/utils.js +58 -0
  123. package/dist/optimized-extractors/export.d.js +5 -0
  124. package/dist/optimized-extractors/export.d.ts +397 -0
  125. package/dist/optimized-extractors/extractArray.js +120 -0
  126. package/dist/optimized-extractors/extractObject.js +104 -0
  127. package/dist/optimized-extractors/index.d.ts +397 -0
  128. package/dist/optimized-extractors/index.js +31 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +269 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +146 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +130 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +160 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +243 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  144. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  145. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  146. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  147. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  148. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  149. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  160. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  161. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  162. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  163. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  164. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  165. package/dist/optimized-extractors/types/errors.js +42 -0
  166. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  167. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  168. package/dist/optimized-extractors/types/types.js +5 -0
  169. package/dist/optimized-extractors/validators.js +152 -0
  170. package/dist/types/intuned-runtime.d.js +1 -0
  171. package/dist/types/intuned-runtime.d.ts +64 -0
  172. package/dist/vite-env.d.js +1 -0
  173. package/dist/vite-env.d.ts +9 -0
  174. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  175. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  176. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  177. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  178. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  179. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  180. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  181. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  182. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  183. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  184. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  185. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  186. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  187. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  188. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  189. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  190. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  191. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  192. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  193. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  194. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  195. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  196. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  197. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  198. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  199. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  200. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  201. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  202. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  203. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  204. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  205. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  206. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  207. package/how-to-generate-docs.md +61 -0
  208. package/how-to-run-tests.md +42 -0
  209. package/intuned-runtime-setup.md +13 -0
  210. package/package.json +124 -0
  211. package/tsconfig.eslint.json +5 -0
  212. package/tsconfig.json +26 -0
@@ -0,0 +1,277 @@
1
+ "use strict";
2
+
3
+ var _extendedTest = require("../../common/extendedTest");
4
+ var _playwrightCore = require("playwright-core");
5
+ var _isPageLoaded = require("../isPageLoaded");
6
+ var dotenv = _interopRequireWildcard(require("dotenv"));
7
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
8
+ dotenv.config({
9
+ override: true
10
+ });
11
+ const FULLY_LOADED_PAGE = `
12
+ <!DOCTYPE html>
13
+ <html lang="en">
14
+ <head>
15
+ <meta charset="UTF-8">
16
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
17
+ <title>Fully Loaded Page</title>
18
+ <style>
19
+ body { font-family: Arial, sans-serif; margin: 20px; }
20
+ .header { background: #f0f0f0; padding: 20px; }
21
+ .content { margin: 20px 0; }
22
+ .footer { background: #e0e0e0; padding: 20px; }
23
+ .image { width: 200px; height: 150px; background: #ccc; }
24
+ </style>
25
+ </head>
26
+ <body>
27
+ <div class="header">
28
+ <h1>Welcome to Our Website</h1>
29
+ <nav>
30
+ <a href="#home">Home</a> |
31
+ <a href="#about">About</a> |
32
+ <a href="#contact">Contact</a>
33
+ </nav>
34
+ </div>
35
+ <div class="content">
36
+ <h2>Main Content</h2>
37
+ <p>This page is fully loaded with all content visible.</p>
38
+ <div class="image">Image Placeholder</div>
39
+ <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
40
+ </div>
41
+ <div class="footer">
42
+ <p>&copy; 2024 Test Company. All rights reserved.</p>
43
+ </div>
44
+ </body>
45
+ </html>
46
+ `;
47
+ const LOADING_PAGE = `
48
+ <!DOCTYPE html>
49
+ <html lang="en">
50
+ <head>
51
+ <meta charset="UTF-8">
52
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
53
+ <title>Loading Page</title>
54
+ <style>
55
+ body {
56
+ font-family: Arial, sans-serif;
57
+ display: flex;
58
+ justify-content: center;
59
+ align-items: center;
60
+ height: 100vh;
61
+ margin: 0;
62
+ background: #f5f5f5;
63
+ }
64
+ .loading-container {
65
+ text-align: center;
66
+ }
67
+ .spinner {
68
+ border: 4px solid #f3f3f3;
69
+ border-top: 4px solid #3498db;
70
+ border-radius: 50%;
71
+ width: 50px;
72
+ height: 50px;
73
+ animation: spin 1s linear infinite;
74
+ margin: 0 auto 20px;
75
+ }
76
+ @keyframes spin {
77
+ 0% { transform: rotate(0deg); }
78
+ 100% { transform: rotate(360deg); }
79
+ }
80
+ .loading-text {
81
+ font-size: 18px;
82
+ color: #666;
83
+ }
84
+ </style>
85
+ </head>
86
+ <body>
87
+ <div class="loading-container">
88
+ <div class="spinner"></div>
89
+ <div class="loading-text">Loading, please wait...</div>
90
+ </div>
91
+ </body>
92
+ </html>
93
+ `;
94
+ const BLANK_PAGE = `
95
+ <!DOCTYPE html>
96
+ <html lang="en">
97
+ <head>
98
+ <meta charset="UTF-8">
99
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
100
+ <title>Blank Page</title>
101
+ <style>
102
+ body { margin: 0; padding: 0; background: white; }
103
+ </style>
104
+ </head>
105
+ <body>
106
+ <!-- Completely blank page -->
107
+ </body>
108
+ </html>
109
+ `;
110
+ const PARTIALLY_LOADED_PAGE = `
111
+ <!DOCTYPE html>
112
+ <html lang="en">
113
+ <head>
114
+ <meta charset="UTF-8">
115
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
116
+ <title>Partially Loaded Page</title>
117
+ <style>
118
+ body { font-family: Arial, sans-serif; margin: 20px; }
119
+ .loaded { opacity: 1; }
120
+ .loading { opacity: 0.3; }
121
+ .placeholder {
122
+ background: #ddd;
123
+ height: 100px;
124
+ margin: 10px 0;
125
+ display: flex;
126
+ align-items: center;
127
+ justify-content: center;
128
+ color: #666;
129
+ }
130
+ </style>
131
+ </head>
132
+ <body>
133
+ <h1 class="loaded">Website Header</h1>
134
+ <div class="placeholder loading">Content Loading...</div>
135
+ <div class="placeholder loading">Images Loading...</div>
136
+ <div class="placeholder loading">More Content Loading...</div>
137
+ <p class="loaded">Some content has loaded but other parts are still loading.</p>
138
+ </body>
139
+ </html>
140
+ `;
141
+ const ERROR_PAGE = `
142
+ <!DOCTYPE html>
143
+ <html lang="en">
144
+ <head>
145
+ <meta charset="UTF-8">
146
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
147
+ <title>Error Page</title>
148
+ <style>
149
+ body {
150
+ font-family: Arial, sans-serif;
151
+ display: flex;
152
+ justify-content: center;
153
+ align-items: center;
154
+ height: 100vh;
155
+ margin: 0;
156
+ background: #ffe6e6;
157
+ }
158
+ .error-container {
159
+ text-align: center;
160
+ background: white;
161
+ padding: 40px;
162
+ border-radius: 10px;
163
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
164
+ }
165
+ .error-code { font-size: 48px; color: #e74c3c; font-weight: bold; }
166
+ .error-message { font-size: 18px; color: #666; margin: 20px 0; }
167
+ </style>
168
+ </head>
169
+ <body>
170
+ <div class="error-container">
171
+ <div class="error-code">404</div>
172
+ <div class="error-message">Page Not Found</div>
173
+ <p>The page you are looking for could not be found.</p>
174
+ </div>
175
+ </body>
176
+ </html>
177
+ `;
178
+ (0, _extendedTest.describe)("isPageLoaded Tests", () => {
179
+ let browser;
180
+ let page;
181
+ (0, _extendedTest.beforeAll)(async () => {
182
+ browser = await _playwrightCore.chromium.launch({
183
+ headless: true
184
+ });
185
+ });
186
+ (0, _extendedTest.afterAll)(async () => {
187
+ await browser.close();
188
+ });
189
+ (0, _extendedTest.beforeEach)(async () => {
190
+ page = await browser.newPage();
191
+ });
192
+ (0, _extendedTest.afterEach)(async () => {
193
+ await page.close();
194
+ });
195
+ (0, _extendedTest.describe)("Real LLM calls", () => {
196
+ (0, _extendedTest.test)("should detect fully loaded page", async () => {
197
+ await page.setContent(FULLY_LOADED_PAGE);
198
+ await page.waitForSelector("h1", {
199
+ timeout: 5000
200
+ });
201
+ const result = await (0, _isPageLoaded.isPageLoaded)({
202
+ page,
203
+ model: "gpt-4o-2024-05-13",
204
+ apiKey: process.env.OPENAI_API_KEY,
205
+ timeoutInMs: 10000
206
+ });
207
+ (0, _extendedTest.expect)(result).toBe(true);
208
+ console.log("Fully loaded page result:", result);
209
+ });
210
+ (0, _extendedTest.test)("should detect loading page with spinner", async () => {
211
+ await page.setContent(LOADING_PAGE);
212
+ await page.waitForSelector(".spinner", {
213
+ timeout: 5000
214
+ });
215
+ const apiKey = process.env.OPENAI_API_KEY;
216
+ const result = await (0, _isPageLoaded.isPageLoaded)({
217
+ page,
218
+ model: "gpt-4o-2024-05-13",
219
+ apiKey: apiKey,
220
+ timeoutInMs: 10000
221
+ });
222
+ (0, _extendedTest.expect)(result).toBe(false);
223
+ console.log("Loading page result:", result);
224
+ });
225
+ (0, _extendedTest.test)("should detect blank page", async () => {
226
+ await page.setContent(BLANK_PAGE);
227
+ const result = await (0, _isPageLoaded.isPageLoaded)({
228
+ page,
229
+ model: "gpt-4o-2024-05-13",
230
+ apiKey: process.env.OPENAI_API_KEY,
231
+ timeoutInMs: 10000
232
+ });
233
+ (0, _extendedTest.expect)(result).toBe(false);
234
+ console.log("Blank page result:", result);
235
+ });
236
+ (0, _extendedTest.test)("should handle partially loaded page", async () => {
237
+ await page.setContent(PARTIALLY_LOADED_PAGE);
238
+ await page.waitForSelector("h1", {
239
+ timeout: 5000
240
+ });
241
+ const result = await (0, _isPageLoaded.isPageLoaded)({
242
+ page,
243
+ model: "gpt-4o-2024-05-13",
244
+ apiKey: process.env.OPENAI_API_KEY,
245
+ timeoutInMs: 10000
246
+ });
247
+ (0, _extendedTest.expect)(result).toBe(false);
248
+ console.log("Partially loaded page result:", result);
249
+ });
250
+ (0, _extendedTest.test)("should handle error page", async () => {
251
+ await page.setContent(ERROR_PAGE);
252
+ await page.waitForSelector(".error-code", {
253
+ timeout: 5000
254
+ });
255
+ const result = await (0, _isPageLoaded.isPageLoaded)({
256
+ page,
257
+ model: "gpt-4o-2024-05-13",
258
+ apiKey: process.env.OPENAI_API_KEY,
259
+ timeoutInMs: 10000
260
+ });
261
+ (0, _extendedTest.expect)(result).toBe(true);
262
+ console.log("Error page result:", result);
263
+ });
264
+ });
265
+ (0, _extendedTest.describe)("Different model types", () => {
266
+ (0, _extendedTest.test)("should work with claude-3-5-sonnet-20240620", async () => {
267
+ await page.setContent(FULLY_LOADED_PAGE);
268
+ const result = await (0, _isPageLoaded.isPageLoaded)({
269
+ page,
270
+ model: "claude-3-5-sonnet-20240620",
271
+ apiKey: process.env.ANTHROPIC_API_KEY,
272
+ timeoutInMs: 10000
273
+ });
274
+ (0, _extendedTest.expect)(result).toBe(true);
275
+ });
276
+ });
277
+ });
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getTools = getTools;
7
+ var _ai = require("ai");
8
+ var _validateSchema = require("../extractionHelpers/validateSchema");
9
+ var _neverthrow = require("neverthrow");
10
+ function getTools(toolName, inputSchema) {
11
+ const isArray = inputSchema.type === "array";
12
+ const formattedSchema = isArray ? {
13
+ type: "object",
14
+ properties: {
15
+ extracted_data: inputSchema,
16
+ number_of_entities: {
17
+ type: "number",
18
+ description: "The number of entities items in the text - not the overall total. Relay on the text to find this, if the number is not mentioned in the text, this should be null. For example, some lists say 'showing 5 our of 20 items' - 5 is the number of items in the list."
19
+ }
20
+ },
21
+ required: ["extracted_data", "number_of_entities"],
22
+ additionalProperties: false
23
+ } : inputSchema;
24
+ const validationResult = (0, _validateSchema.validateJSONSchema)(formattedSchema);
25
+ if (validationResult.isErr()) {
26
+ return (0, _neverthrow.err)(validationResult.error);
27
+ }
28
+ const validatedSchema = validationResult.value;
29
+ const tools = {
30
+ [toolName]: (0, _ai.tool)({
31
+ description: `Extract ${toolName.replace("extract_", "")} data from the provided content`,
32
+ inputSchema: (0, _ai.jsonSchema)(validatedSchema),
33
+ execute: async args => args
34
+ }),
35
+ no_data_found: (0, _ai.tool)({
36
+ description: "Called when no data matching the schema can be found in the content.",
37
+ inputSchema: (0, _ai.jsonSchema)({
38
+ type: "object",
39
+ description: "No data found in the content.",
40
+ properties: {}
41
+ }),
42
+ execute: () => {
43
+ return {};
44
+ }
45
+ })
46
+ };
47
+ return (0, _neverthrow.ok)(tools);
48
+ }
@@ -0,0 +1,67 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.other = exports.maxRetriesExceeded = exports.invalidSearchRegion = exports.invalidModel = exports.invalidJsonSchema = exports.invalidInput = exports.invalidExtractionResult = exports.invalidAddressUrl = exports.insufficientAiCredits = exports.NoToolUsage = exports.NoDataFound = exports.AiCallFailed = void 0;
7
+ const NoDataFound = context => ({
8
+ type: "NoDataFound",
9
+ context
10
+ });
11
+ exports.NoDataFound = NoDataFound;
12
+ const NoToolUsage = context => ({
13
+ type: "NoToolUsage",
14
+ context
15
+ });
16
+ exports.NoToolUsage = NoToolUsage;
17
+ const other = context => ({
18
+ type: "other",
19
+ context
20
+ });
21
+ exports.other = other;
22
+ const AiCallFailed = (context, error) => ({
23
+ type: "AiCallFailed",
24
+ context,
25
+ error
26
+ });
27
+ exports.AiCallFailed = AiCallFailed;
28
+ const invalidExtractionResult = context => ({
29
+ type: "InvalidExtractionResult",
30
+ context
31
+ });
32
+ exports.invalidExtractionResult = invalidExtractionResult;
33
+ const invalidInput = context => ({
34
+ type: "invalidInput",
35
+ context
36
+ });
37
+ exports.invalidInput = invalidInput;
38
+ const insufficientAiCredits = context => ({
39
+ type: "InsufficientAiCredits",
40
+ context
41
+ });
42
+ exports.insufficientAiCredits = insufficientAiCredits;
43
+ const invalidModel = context => ({
44
+ type: "InvalidModel",
45
+ context
46
+ });
47
+ exports.invalidModel = invalidModel;
48
+ const invalidJsonSchema = context => ({
49
+ type: "InvalidJsonSchema",
50
+ context
51
+ });
52
+ exports.invalidJsonSchema = invalidJsonSchema;
53
+ const maxRetriesExceeded = context => ({
54
+ type: "MaxRetriesExceeded",
55
+ context
56
+ });
57
+ exports.maxRetriesExceeded = maxRetriesExceeded;
58
+ const invalidSearchRegion = context => ({
59
+ type: "InvalidSearchRegion",
60
+ context
61
+ });
62
+ exports.invalidSearchRegion = invalidSearchRegion;
63
+ const invalidAddressUrl = context => ({
64
+ type: "InvalidAddressUrl",
65
+ context
66
+ });
67
+ exports.invalidAddressUrl = invalidAddressUrl;
@@ -0,0 +1,45 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.SUPPORTED_VISION_MODELS = exports.SUPPORTED_TEXT_MODELS = exports.SUPPORTED_GPT_MODELS = exports.SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = exports.MODELS_MAPPINGS = exports.MAX_TOKENS_OVERRIDES = exports.GPT_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = exports.CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_MODELS_MAPPINGS = void 0;
7
+ const CLAUDE_ONLY_TEXT_MODELS = exports.CLAUDE_ONLY_TEXT_MODELS = ["claude-3-5-haiku", "claude-3-5-haiku-20241022"];
8
+ const CLAUDE_VISION_SUPPORTED_MODELS = exports.CLAUDE_VISION_SUPPORTED_MODELS = ["claude-3.5-sonnet", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022"];
9
+ const SUPPORTED_CLAUDE_MODELS = exports.SUPPORTED_CLAUDE_MODELS = [...CLAUDE_ONLY_TEXT_MODELS, ...CLAUDE_VISION_SUPPORTED_MODELS];
10
+ const CLAUDE_MODELS_MAPPINGS = exports.CLAUDE_MODELS_MAPPINGS = {
11
+ "claude-3-haiku": "claude-3-haiku-20240307",
12
+ "claude-3-5-haiku": "claude-3-5-haiku-20241022",
13
+ "claude-3-opus": "claude-3-opus-20240229",
14
+ "claude-3-sonnet": "claude-3-sonnet-20240229",
15
+ "claude-3.5-sonnet": "claude-3-5-sonnet-20241022"
16
+ };
17
+ const GPT_ONLY_TEXT_GPT_MODELS = ["gpt3.5-turbo", "gpt-3.5-turbo-0125"];
18
+ const GPT_VISION_SUPPORTED_MODELS = ["gpt4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-mini", "gpt-4o-mini-2024-07-18"];
19
+ const SUPPORTED_GPT_MODELS = exports.SUPPORTED_GPT_MODELS = [...GPT_ONLY_TEXT_GPT_MODELS, ...GPT_VISION_SUPPORTED_MODELS];
20
+ const GPT_MODELS_MAPPINGS = exports.GPT_MODELS_MAPPINGS = {
21
+ "gpt4-turbo": "gpt-4-turbo-2024-04-09",
22
+ "gpt3.5-turbo": "gpt-3.5-turbo-0125",
23
+ "gpt-4o": "gpt-4o-2024-05-13",
24
+ "gpt-4o-mini": "gpt-4o-mini-2024-07-18"
25
+ };
26
+ const SUPPORTED_GOOGLE_MODELS = exports.SUPPORTED_GOOGLE_MODELS = ["gemini-1.5-pro", "gemini-1.5-pro-002", "gemini-1.5-flash-8b", "gemini-1.5-flash-8b-002", "gemini-1.5-flash", "gemini-1.5-flash-002", "gemini-2.0-flash-exp"];
27
+ const GOOGLE_MODELS_MAPPINGS = exports.GOOGLE_MODELS_MAPPINGS = {
28
+ "gemini-1.5-pro": "gemini-1.5-pro-002",
29
+ "gemini-1.5-flash-8b": "gemini-1.5-flash-8b-002",
30
+ "gemini-1.5-flash": "gemini-1.5-flash-002"
31
+ };
32
+ const SUPPORTED_TEXT_MODELS = exports.SUPPORTED_TEXT_MODELS = [...SUPPORTED_CLAUDE_MODELS, ...SUPPORTED_GPT_MODELS, ...SUPPORTED_GOOGLE_MODELS];
33
+ const SUPPORTED_VISION_MODELS = exports.SUPPORTED_VISION_MODELS = [...CLAUDE_VISION_SUPPORTED_MODELS, ...GPT_VISION_SUPPORTED_MODELS, ...SUPPORTED_GOOGLE_MODELS];
34
+ const MODELS_MAPPINGS = exports.MODELS_MAPPINGS = {
35
+ ...GPT_MODELS_MAPPINGS,
36
+ ...CLAUDE_MODELS_MAPPINGS,
37
+ ...GOOGLE_MODELS_MAPPINGS
38
+ };
39
+ const MAX_TOKENS_OVERRIDES = exports.MAX_TOKENS_OVERRIDES = {
40
+ "claude-3-5-sonnet-20240620": 8192,
41
+ "gemini-1.5-pro-002": 8192,
42
+ "gemini-1.5-flash-8b-002": 8192,
43
+ "gemini-1.5-flash-002": 8192,
44
+ "gemini-2.0-flash-exp": 8192
45
+ };
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ Object.defineProperty(exports, "ArraySchema", {
7
+ enumerable: true,
8
+ get: function () {
9
+ return _export.ArraySchema;
10
+ }
11
+ });
12
+ Object.defineProperty(exports, "BasicSchema", {
13
+ enumerable: true,
14
+ get: function () {
15
+ return _export.BasicSchema;
16
+ }
17
+ });
18
+ Object.defineProperty(exports, "BooleanSchema", {
19
+ enumerable: true,
20
+ get: function () {
21
+ return _export.BooleanSchema;
22
+ }
23
+ });
24
+ Object.defineProperty(exports, "JsonSchema", {
25
+ enumerable: true,
26
+ get: function () {
27
+ return _export.JsonSchema;
28
+ }
29
+ });
30
+ Object.defineProperty(exports, "NumberSchema", {
31
+ enumerable: true,
32
+ get: function () {
33
+ return _export.NumberSchema;
34
+ }
35
+ });
36
+ Object.defineProperty(exports, "ObjectSchema", {
37
+ enumerable: true,
38
+ get: function () {
39
+ return _export.ObjectSchema;
40
+ }
41
+ });
42
+ Object.defineProperty(exports, "StringSchema", {
43
+ enumerable: true,
44
+ get: function () {
45
+ return _export.StringSchema;
46
+ }
47
+ });
48
+ var _export = require("../export");
@@ -0,0 +1,167 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.checkAllTypesAreStrings = checkAllTypesAreStrings;
7
+ exports.contentValidationSchema = void 0;
8
+ exports.convertZodToJsonSchema = convertZodToJsonSchema;
9
+ exports.genericExtractDataInputSchema = exports.extractDataInputJsonSchema = void 0;
10
+ exports.isZodSchema = isZodSchema;
11
+ exports.jsonSchema = void 0;
12
+ exports.normalizeJsonSchema = normalizeJsonSchema;
13
+ var _zod = require("zod");
14
+ var _locatorHelpers = require("../common/locatorHelpers");
15
+ var _ajv = _interopRequireDefault(require("ajv"));
16
+ var _aiModelsValidations = require("../common/aiModelsValidations");
17
+ var _zodToJsonSchema = require("zod-to-json-schema");
18
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
19
+ const basicSchema = _zod.z.object({
20
+ type: _zod.z.string(),
21
+ description: _zod.z.string().optional()
22
+ });
23
+ const stringSchema = basicSchema.extend({
24
+ type: _zod.z.literal("string"),
25
+ enum: _zod.z.array(_zod.z.string()).optional(),
26
+ maxLength: _zod.z.number().optional(),
27
+ minLength: _zod.z.number().optional(),
28
+ pattern: _zod.z.string().optional()
29
+ });
30
+ const numberSchema = basicSchema.extend({
31
+ type: _zod.z.union([_zod.z.literal("number"), _zod.z.literal("integer")]),
32
+ multipleOf: _zod.z.number().optional(),
33
+ maximum: _zod.z.number().optional(),
34
+ exclusiveMaximum: _zod.z.number().optional(),
35
+ minimum: _zod.z.number().optional(),
36
+ exclusiveMinimum: _zod.z.number().optional()
37
+ });
38
+ const booleanSchema = basicSchema.extend({
39
+ type: _zod.z.literal("boolean")
40
+ });
41
+ const arraySchema = basicSchema.extend({
42
+ type: _zod.z.literal("array"),
43
+ items: _zod.z.lazy(() => jsonSchema),
44
+ maxItems: _zod.z.number().min(0).optional(),
45
+ minItems: _zod.z.number().min(0).optional(),
46
+ uniqueItems: _zod.z.boolean().optional()
47
+ });
48
+ const objectSchema = basicSchema.extend({
49
+ type: _zod.z.literal("object"),
50
+ properties: _zod.z.record(_zod.z.string(), _zod.z.lazy(() => jsonSchema)),
51
+ required: _zod.z.array(_zod.z.string()),
52
+ maxProperties: _zod.z.number().min(1).optional(),
53
+ minProperties: _zod.z.number().min(1).optional()
54
+ });
55
+ const jsonSchema = exports.jsonSchema = _zod.z.union([stringSchema, numberSchema, booleanSchema, arraySchema, objectSchema]);
56
+ const jsonSchemaCustomValidation = _zod.z.any().transform(value => {
57
+ if (isZodSchema(value)) {
58
+ return convertZodToJsonSchema(value);
59
+ }
60
+ return value;
61
+ }).superRefine((value, ctx) => {
62
+ try {
63
+ const ajv = new _ajv.default({
64
+ strict: true,
65
+ strictRequired: true
66
+ });
67
+ if (value) {
68
+ void ajv.validateSchema(value, true);
69
+ }
70
+ } catch (e) {
71
+ const message = e.message.replace("schema is invalid: ", "").split(", ")[0].replace("data/", "").replaceAll("/", ".");
72
+ ctx.addIssue({
73
+ code: _zod.z.ZodIssueCode.custom,
74
+ message: message
75
+ });
76
+ }
77
+ }).refine(v => {
78
+ if (v && v.type !== "object" && v.type !== "array") {
79
+ return false;
80
+ }
81
+ return true;
82
+ }, {
83
+ message: "root schema must be an object or an array"
84
+ }).refine(v => {
85
+ if (v && v.type !== "object") {
86
+ return true;
87
+ }
88
+ return !!(v !== null && v !== void 0 && v.properties);
89
+ }, {
90
+ message: "object schema must have properties"
91
+ }).refine(v => {
92
+ if (v && v.type !== "array") {
93
+ return true;
94
+ }
95
+ return !!(v !== null && v !== void 0 && v.items);
96
+ }, {
97
+ message: "array schema must have items"
98
+ });
99
+ const extractDataInputJsonSchema = exports.extractDataInputJsonSchema = _zod.z.object({
100
+ source: _zod.z.union([_zod.z.any().refine(_locatorHelpers.isLocator, {
101
+ message: "Invalid PageOrLocator"
102
+ }), _zod.z.any().refine(_locatorHelpers.isPage, {
103
+ message: "Invalid PageOrLocator"
104
+ })]),
105
+ model: _zod.z.enum(_aiModelsValidations.SUPPORTED_MODELS).optional().default("claude-3-5-haiku-latest"),
106
+ dataSchema: jsonSchemaCustomValidation,
107
+ strategy: _zod.z.enum(["IMAGE", "MARKDOWN", "HTML"]).optional().default("HTML"),
108
+ prompt: _zod.z.string().optional(),
109
+ apiKey: _zod.z.string().optional(),
110
+ enableDomMatching: _zod.z.boolean().optional().default(false),
111
+ enableCache: _zod.z.boolean().optional().default(true),
112
+ maxRetries: _zod.z.number().optional().default(3)
113
+ });
114
+ const contentItemValidationSchema = _zod.z.union([_zod.z.object({
115
+ type: _zod.z.literal("text"),
116
+ data: _zod.z.string()
117
+ }), _zod.z.object({
118
+ type: _zod.z.literal("image-buffer"),
119
+ data: _zod.z.instanceof(Buffer),
120
+ image_type: _zod.z.enum(["png", "jpeg", "gif", "webp"])
121
+ }), _zod.z.object({
122
+ type: _zod.z.literal("image-url"),
123
+ data: _zod.z.string().url(),
124
+ image_type: _zod.z.enum(["png", "jpeg", "gif", "webp"])
125
+ })]);
126
+ const contentValidationSchema = exports.contentValidationSchema = _zod.z.union([_zod.z.array(contentItemValidationSchema).min(1, "content should have at least one item"), contentItemValidationSchema]);
127
+ function convertZodToJsonSchema(zodSchema) {
128
+ return (0, _zodToJsonSchema.zodToJsonSchema)(zodSchema);
129
+ }
130
+ function isZodSchema(value) {
131
+ return value && typeof value === "object" && "_def" in value && typeof value.parse === "function";
132
+ }
133
+ function normalizeJsonSchema(schema) {
134
+ if (isZodSchema(schema)) {
135
+ return convertZodToJsonSchema(schema);
136
+ }
137
+ return schema;
138
+ }
139
+ function checkAllTypesAreStrings(schema) {
140
+ if (schema.type === "string") {
141
+ return true;
142
+ }
143
+ if (schema.type === "array") {
144
+ if (!schema.items) {
145
+ return true;
146
+ }
147
+ if (Array.isArray(schema.items)) {
148
+ return schema.items.every(item => checkAllTypesAreStrings(item));
149
+ }
150
+ return checkAllTypesAreStrings(schema.items);
151
+ }
152
+ if (schema.type === "object") {
153
+ if (!schema.properties) {
154
+ return true;
155
+ }
156
+ return Object.values(schema.properties).every(prop => checkAllTypesAreStrings(prop));
157
+ }
158
+ return false;
159
+ }
160
+ const genericExtractDataInputSchema = exports.genericExtractDataInputSchema = _zod.z.object({
161
+ dataSchema: jsonSchemaCustomValidation,
162
+ prompt: _zod.z.string().optional(),
163
+ model: _zod.z.enum(_aiModelsValidations.SUPPORTED_MODELS).optional().default("claude-3-5-haiku-latest"),
164
+ apiKey: _zod.z.string().optional(),
165
+ enableCache: _zod.z.boolean().optional().default(true),
166
+ maxRetries: _zod.z.number().optional().default(3)
167
+ });