@intuned/browser-dev 0.1.15-dev.0 → 0.1.16-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/.prettierrc +3 -0
  2. package/dist/ai/isPageLoaded.js +6 -6
  3. package/dist/ai/tests/extractStructuredDataDomMatchingCache.spec.js +87 -0
  4. package/dist/ai/tests/testMatching.spec.js +38 -0
  5. package/dist/ai/tests/testValidateMatchesMapping.spec.js +58 -0
  6. package/dist/common/matching/matching.js +3 -3
  7. package/dist/common/xpathMapping.js +23 -10
  8. package/dist/helpers/downloadFile.js +3 -0
  9. package/dist/helpers/saveFileToS3.js +3 -0
  10. package/dist/helpers/tests/testDownloadFile.spec.js +16 -0
  11. package/dist/optimized-extractors/common/index.js +4 -4
  12. package/dist/optimized-extractors/common/matching/utils.js +4 -2
  13. package/dist/optimized-extractors/common/modelStringSupport.test.js +82 -0
  14. package/dist/optimized-extractors/export.d.ts +2 -50
  15. package/dist/optimized-extractors/extractArray.js +3 -1
  16. package/dist/optimized-extractors/index.d.ts +2 -50
  17. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +366 -1
  18. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +43 -0
  19. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +4 -3
  20. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +31 -22
  21. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +2 -1
  22. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +208 -0
  23. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +4 -2
  24. package/dist/optimized-extractors/validators.js +4 -5
  25. package/package.json +1 -1
@@ -2,7 +2,9 @@
2
2
 
3
3
  var _extendedTest = require("../../../common/extendedTest");
4
4
  var _ = require("../..");
5
+ var _neverthrow = require("neverthrow");
5
6
  var _uuid = require("uuid");
7
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
6
8
  const productTemplate = `
7
9
  <div class="product-page">
8
10
  <div class="product-info">
@@ -104,4 +106,210 @@ _extendedTest.describe.skip("Object Extractor Caching Tests", () => {
104
106
  console.log("All cache behavior tests completed successfully!");
105
107
  });
106
108
  });
109
+ });
110
+ async function mockExtractStructuredDataUsingAi(result) {
111
+ const commonModule = await Promise.resolve().then(() => _interopRequireWildcard(require("../../common")));
112
+ return _extendedTest.vi.spyOn(commonModule, "extractStructuredDataUsingAi").mockResolvedValue((0, _neverthrow.ok)({
113
+ result
114
+ }));
115
+ }
116
+ async function useInMemoryCache() {
117
+ const cacheModule = await Promise.resolve().then(() => _interopRequireWildcard(require("../../../intunedServices/cache")));
118
+ const store = new Map();
119
+ _extendedTest.vi.spyOn(cacheModule.cache, "get").mockImplementation(async key => store.has(key) ? store.get(key) : null);
120
+ _extendedTest.vi.spyOn(cacheModule.cache, "set").mockImplementation(async (key, value) => {
121
+ store.set(key, value);
122
+ });
123
+ }
124
+ (0, _extendedTest.describe)("extractObjectFromLocator - cache round-trip (real matcher, mocked AI)", () => {
125
+ (0, _extendedTest.afterEach)(() => {
126
+ _extendedTest.vi.restoreAllMocks();
127
+ });
128
+ (0, _extendedTest.test)("reuses the cache on a second identical call when the match was partial (PH: prefix)", async ({
129
+ page
130
+ }) => {
131
+ const label = `contact-roundtrip-${(0, _uuid.v4)()}`;
132
+ const aiResult = {
133
+ contact_information_name: "Andreas Jansson",
134
+ contact_information_phone_number: "(727) 471-4768",
135
+ contact_information_email: "andreas.jansson@flowbird.group"
136
+ };
137
+ const aiSpy = await mockExtractStructuredDataUsingAi(aiResult);
138
+ await useInMemoryCache();
139
+ await page.setContent(`
140
+ <div class="vendor">
141
+ <h2>Contact Information</h2>
142
+ <p class="c-name">Andreas Jansson</p>
143
+ <p class="c-phone">PH: (727) 471-4768</p>
144
+ <p class="c-email">andreas.jansson@flowbird.group</p>
145
+ </div>
146
+ `);
147
+ const options = {
148
+ entityName: "supplier_contacts",
149
+ label,
150
+ entitySchema: {
151
+ type: "object",
152
+ required: ["contact_information_email"],
153
+ properties: {
154
+ contact_information_name: {
155
+ type: "string",
156
+ description: "the contact person name"
157
+ },
158
+ contact_information_phone_number: {
159
+ type: "string",
160
+ description: "the contact phone number"
161
+ },
162
+ contact_information_email: {
163
+ type: "string",
164
+ description: "the contact email"
165
+ }
166
+ }
167
+ },
168
+ strategy: {
169
+ model: "claude-sonnet-4-20250514",
170
+ type: "HTML"
171
+ },
172
+ variantKey: label
173
+ };
174
+ const first = await (0, _.extractObjectFromLocator)(page.locator("div.vendor"), options);
175
+ (0, _extendedTest.expect)(first).toEqual(aiResult);
176
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
177
+ const second = await (0, _.extractObjectFromLocator)(page.locator("div.vendor"), options);
178
+ (0, _extendedTest.expect)(second).toEqual(first);
179
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
180
+ });
181
+ (0, _extendedTest.test)("reuses the cache when fields sit in separate direct text nodes of one element (br-separated block)", async ({
182
+ page
183
+ }) => {
184
+ const label = `vendor-block-${(0, _uuid.v4)()}`;
185
+ const vendorAiResult = {
186
+ primary_vendor_name: "V00000110 - International Cleaning Services, Inc.",
187
+ primary_vendor_contact_email: "monaco.intl@sbcglobal.net",
188
+ primary_vendor_contact_phone: "(630) 904-4118"
189
+ };
190
+ const aiSpy = await mockExtractStructuredDataUsingAi(vendorAiResult);
191
+ await useInMemoryCache();
192
+ await page.setContent(`
193
+ <table><tbody><tr><td class="vendor">
194
+ <a href="#">V00000110 - International Cleaning Services, Inc.</a>
195
+ 2415 Comstock Court<br/>
196
+ Naperville, IL 60564<br/>
197
+ US<br/>
198
+ Email: monaco.intl@sbcglobal.net<br/>
199
+ FAX: (630) 904-4118
200
+ </td></tr></tbody></table>
201
+ `);
202
+ const options = {
203
+ entityName: "supplier_contacts",
204
+ label,
205
+ entitySchema: {
206
+ type: "object",
207
+ required: ["primary_vendor_name"],
208
+ properties: {
209
+ primary_vendor_name: {
210
+ type: "string",
211
+ description: "vendor id - company name"
212
+ },
213
+ primary_vendor_contact_email: {
214
+ type: "string",
215
+ description: "vendor email"
216
+ },
217
+ primary_vendor_contact_phone: {
218
+ type: "string",
219
+ description: "vendor phone"
220
+ }
221
+ }
222
+ },
223
+ strategy: {
224
+ model: "claude-sonnet-4-20250514",
225
+ type: "HTML"
226
+ },
227
+ variantKey: label
228
+ };
229
+ const first = await (0, _.extractObjectFromLocator)(page.locator("td.vendor"), options);
230
+ (0, _extendedTest.expect)(first).toEqual(vendorAiResult);
231
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
232
+ const second = await (0, _.extractObjectFromLocator)(page.locator("td.vendor"), options);
233
+ (0, _extendedTest.expect)(second).toEqual(first);
234
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
235
+ });
236
+ (0, _extendedTest.test)("reuses the cache for a link whose href is stored resolved but read raw (relative href)", async ({
237
+ page
238
+ }) => {
239
+ const label = `href-${(0, _uuid.v4)()}`;
240
+ const hrefAiResult = {
241
+ attachment_href: "https://example.com/files/annual-report.pdf",
242
+ attachment_name: "Annual Report"
243
+ };
244
+ const aiSpy = await mockExtractStructuredDataUsingAi(hrefAiResult);
245
+ await useInMemoryCache();
246
+ await page.setContent(`<!DOCTYPE html><html><head><base href="https://example.com/files/"></head><body>` + `<div class="doc"><a href="annual-report.pdf">Annual Report</a></div>` + `</body></html>`);
247
+ const options = {
248
+ entityName: "attachment",
249
+ label,
250
+ entitySchema: {
251
+ type: "object",
252
+ required: ["attachment_href"],
253
+ properties: {
254
+ attachment_href: {
255
+ type: "string",
256
+ description: "the file url (href)"
257
+ },
258
+ attachment_name: {
259
+ type: "string",
260
+ description: "the file name"
261
+ }
262
+ }
263
+ },
264
+ strategy: {
265
+ model: "claude-sonnet-4-20250514",
266
+ type: "HTML"
267
+ },
268
+ variantKey: label
269
+ };
270
+ const first = await (0, _.extractObjectFromLocator)(page.locator("div.doc"), options);
271
+ (0, _extendedTest.expect)(first).toEqual(hrefAiResult);
272
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
273
+ const second = await (0, _.extractObjectFromLocator)(page.locator("div.doc"), options);
274
+ (0, _extendedTest.expect)(second).toEqual(first);
275
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
276
+ });
277
+ (0, _extendedTest.test)("reuses the cache for a FUZZY match (AI value differs slightly from the DOM)", async ({
278
+ page
279
+ }) => {
280
+ const label = `fuzzy-${(0, _uuid.v4)()}`;
281
+ const fuzzyAiResult = {
282
+ company_name: "Internationl Cleaning Servces"
283
+ };
284
+ const aiSpy = await mockExtractStructuredDataUsingAi(fuzzyAiResult);
285
+ await useInMemoryCache();
286
+ await page.setContent(`<div class="vendor"><span class="name">International Cleaning Services</span></div>`);
287
+ const options = {
288
+ entityName: "vendor",
289
+ label,
290
+ entitySchema: {
291
+ type: "object",
292
+ required: ["company_name"],
293
+ properties: {
294
+ company_name: {
295
+ type: "string",
296
+ description: "the company name"
297
+ }
298
+ }
299
+ },
300
+ strategy: {
301
+ model: "claude-sonnet-4-20250514",
302
+ type: "HTML"
303
+ },
304
+ variantKey: label
305
+ };
306
+ const first = await (0, _.extractObjectFromLocator)(page.locator("div.vendor"), options);
307
+ (0, _extendedTest.expect)(first).toEqual({
308
+ company_name: "International Cleaning Services"
309
+ });
310
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
311
+ const second = await (0, _.extractObjectFromLocator)(page.locator("div.vendor"), options);
312
+ (0, _extendedTest.expect)(second).toEqual(first);
313
+ (0, _extendedTest.expect)(aiSpy).toHaveBeenCalledTimes(1);
314
+ });
107
315
  });
@@ -88,11 +88,13 @@ async function dynamicObjectExtractor(page, identifier, options) {
88
88
  Object.entries(xpathMappingFromAI).forEach(([_propertyName, {
89
89
  matchXpath,
90
90
  matchText,
91
- matchType
91
+ matchType,
92
+ sourceText
92
93
  }]) => {
93
94
  xpathMapping[matchText] = [{
94
95
  xpath: matchXpath,
95
- matchType
96
+ matchType,
97
+ sourceText
96
98
  }];
97
99
  });
98
100
  const resultsToCache = {
@@ -5,22 +5,21 @@ Object.defineProperty(exports, "__esModule", {
5
5
  });
6
6
  exports.strategySchema = exports.simpleObjectJsonSchema = exports.simpleArrayItemJsonSchema = exports.extractObjectOptimizedInputSchema = exports.extractArrayOptimizedInputSchema = void 0;
7
7
  var _zod = require("zod");
8
- var _aiModelsValidation = require("./types/aiModelsValidation");
9
8
  const htmlStrategySchema = _zod.z.object({
10
- model: _zod.z.enum(_aiModelsValidation.SUPPORTED_TEXT_MODELS, {
9
+ model: _zod.z.string({
11
10
  required_error: "strategy model is required",
12
11
  invalid_type_error: "strategy model is invalid"
13
- }),
12
+ }).min(1, "strategy model is required"),
14
13
  type: _zod.z.literal("HTML", {
15
14
  required_error: "strategy type is required",
16
15
  invalid_type_error: "strategy type is invalid"
17
16
  })
18
17
  });
19
18
  const imageStrategySchema = _zod.z.object({
20
- model: _zod.z.enum(_aiModelsValidation.SUPPORTED_VISION_MODELS, {
19
+ model: _zod.z.string({
21
20
  required_error: "strategy model is required",
22
21
  invalid_type_error: "strategy model is invalid"
23
- }),
22
+ }).min(1, "strategy model is required"),
24
23
  type: _zod.z.literal("IMAGE", {
25
24
  required_error: "strategy type is required",
26
25
  invalid_type_error: "strategy type is invalid"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@intuned/browser-dev",
3
- "version": "0.1.15-dev.0",
3
+ "version": "0.1.16-dev.0",
4
4
  "description": "runner package for intuned functions",
5
5
  "types": "./dist/index.d.ts",
6
6
  "typesVersions": {