@adobe/spacecat-shared-data-access 2.39.0 → 2.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-data-access-v2.40.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.40.0...@adobe/spacecat-shared-data-access-v2.40.1) (2025-07-27)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * **deps:** update external fixes ([#878](https://github.com/adobe/spacecat-shared/issues/878)) ([b049828](https://github.com/adobe/spacecat-shared/commit/b04982839c0ff5e4de4ab0e37508c5eb5272a679))
7
+
8
+ # [@adobe/spacecat-shared-data-access-v2.40.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.39.0...@adobe/spacecat-shared-data-access-v2.40.0) (2025-07-24)
9
+
10
+
11
+ ### Features
12
+
13
+ * amend llmo config with URL patterns ([#869](https://github.com/adobe/spacecat-shared/issues/869)) ([c6cb047](https://github.com/adobe/spacecat-shared/commit/c6cb0476c6d2b3dad048855ae49481ae8fcfb5ce))
14
+
1
15
  # [@adobe/spacecat-shared-data-access-v2.39.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.38.1...@adobe/spacecat-shared-data-access-v2.39.0) (2025-07-22)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-data-access",
3
- "version": "2.39.0",
3
+ "version": "2.40.1",
4
4
  "description": "Shared modules of the Spacecat Services - Data Access",
5
5
  "type": "module",
6
6
  "engines": {
@@ -37,7 +37,7 @@
37
37
  "dependencies": {
38
38
  "@adobe/spacecat-shared-utils": "1.39.1",
39
39
  "@aws-sdk/client-dynamodb": "3.848.0",
40
- "@aws-sdk/lib-dynamodb": "3.848.0",
40
+ "@aws-sdk/lib-dynamodb": "3.850.0",
41
41
  "@types/joi": "17.2.3",
42
42
  "aws-xray-sdk": "3.10.3",
43
43
  "electrodb": "3.4.3",
@@ -49,7 +49,7 @@
49
49
  "chai": "5.2.1",
50
50
  "chai-as-promised": "8.0.1",
51
51
  "dynamo-db-local": "9.6.0",
52
- "nock": "14.0.6",
52
+ "nock": "14.0.7",
53
53
  "sinon": "20.0.0",
54
54
  "sinon-chai": "4.0.0"
55
55
  }
@@ -37,20 +37,32 @@ export const IMPORT_SOURCES = {
37
37
  RUM: 'rum',
38
38
  };
39
39
 
40
+ const LLMO_TAG_PATTERN = /^(market|product|topic):\s?.+/;
41
+ const LLMO_TAG = Joi.alternatives()
42
+ .try(
43
+ // Tag market, product, topic like this: "market: ch", "product: firefly", "topic: copyright"
44
+ Joi.string().pattern(LLMO_TAG_PATTERN),
45
+ Joi.string(),
46
+ );
47
+
40
48
  // LLMO question schema for both Human and AI questions
41
49
  const QUESTION_SCHEMA = Joi.object({
42
50
  key: Joi.string().required(),
43
51
  question: Joi.string().required(),
44
52
  source: Joi.string().optional(),
45
- country: Joi.string().optional(),
46
- product: Joi.string().optional(),
47
53
  volume: Joi.string().optional(),
48
54
  keyword: Joi.string().optional(),
49
55
  url: Joi.string().uri().optional(),
50
- tags: Joi.array().items(Joi.string()).optional(),
56
+ tags: Joi.array().items(LLMO_TAG).optional(),
51
57
  importTime: Joi.string().isoDate().optional(),
52
58
  });
53
59
 
60
+ const LLMO_URL_PATTERN_SCHEMA = {
61
+ urlPattern: Joi.string().uri().required(),
62
+ tags: Joi.array().items(LLMO_TAG).optional(),
63
+ };
64
+ const LLMO_URL_PATTERNS_SCHEMA = Joi.array().items(LLMO_URL_PATTERN_SCHEMA);
65
+
54
66
  const IMPORT_BASE_KEYS = {
55
67
  destinations: Joi.array().items(Joi.string().valid(IMPORT_DESTINATIONS.DEFAULT)).required(),
56
68
  sources: Joi.array().items(Joi.string().valid(...Object.values(IMPORT_SOURCES))).required(),
@@ -222,6 +234,7 @@ export const configSchema = Joi.object({
222
234
  Human: Joi.array().items(QUESTION_SCHEMA).optional(),
223
235
  AI: Joi.array().items(QUESTION_SCHEMA).optional(),
224
236
  }).optional(),
237
+ urlPatterns: LLMO_URL_PATTERNS_SCHEMA.optional(),
225
238
  }).optional(),
226
239
  cdnLogsConfig: Joi.object({
227
240
  bucketName: Joi.string().required(),
@@ -279,6 +292,20 @@ export function validateConfiguration(config) {
279
292
  return value; // Validated and sanitized configuration
280
293
  }
281
294
 
295
+ export function extractWellKnownTags(tags) {
296
+ const wellKnownTags = {};
297
+ for (const tag of tags) {
298
+ if (LLMO_TAG_PATTERN.test(tag)) {
299
+ const colonIdx = tag.indexOf(':');
300
+ const value = tag.slice(colonIdx + 1).trim();
301
+ if (colonIdx !== -1 && value) {
302
+ wellKnownTags[tag.slice(0, colonIdx).trim()] = value;
303
+ }
304
+ }
305
+ }
306
+ return wellKnownTags;
307
+ }
308
+
282
309
  export const Config = (data = {}) => {
283
310
  let configData;
284
311
 
@@ -296,7 +323,7 @@ export const Config = (data = {}) => {
296
323
  }
297
324
 
298
325
  const state = { ...configData };
299
- const self = { state };
326
+ const self = { state, extractWellKnownTags };
300
327
  self.getSlackConfig = () => state.slack;
301
328
  self.isInternalCustomer = () => state?.slack?.workspace === 'internal';
302
329
  self.getSlackMentions = (type) => state?.handlers?.[type]?.mentions?.slack;
@@ -318,6 +345,7 @@ export const Config = (data = {}) => {
318
345
  self.getLlmoBrand = () => state?.llmo?.brand;
319
346
  self.getLlmoHumanQuestions = () => state?.llmo?.questions?.Human;
320
347
  self.getLlmoAIQuestions = () => state?.llmo?.questions?.AI;
348
+ self.getLlmoUrlPatterns = () => state?.llmo?.urlPatterns;
321
349
 
322
350
  self.updateSlackConfig = (channel, workspace, invitedUserCount) => {
323
351
  state.slack = {
@@ -327,11 +355,12 @@ export const Config = (data = {}) => {
327
355
  };
328
356
  };
329
357
 
330
- self.updateLlmoConfig = (dataFolder, brand, questions) => {
358
+ self.updateLlmoConfig = (dataFolder, brand, questions = {}, urlPatterns = undefined) => {
331
359
  state.llmo = {
332
360
  dataFolder,
333
361
  brand,
334
- ...(questions !== undefined ? { questions } : {}),
362
+ questions,
363
+ urlPatterns,
335
364
  };
336
365
  };
337
366
 
@@ -385,6 +414,36 @@ export const Config = (data = {}) => {
385
414
  );
386
415
  };
387
416
 
417
+ self.addLlmoUrlPatterns = (urlPatterns) => {
418
+ Joi.assert(urlPatterns, LLMO_URL_PATTERNS_SCHEMA, 'Invalid URL patterns');
419
+
420
+ state.llmo ??= {};
421
+ state.llmo.urlPatterns ??= [];
422
+ const byPattern = new Map(
423
+ state.llmo.urlPatterns.map((p) => [p.urlPattern, p]),
424
+ );
425
+ for (const p of urlPatterns) {
426
+ byPattern.set(p.urlPattern, p);
427
+ }
428
+
429
+ state.llmo.urlPatterns = [...byPattern.values()];
430
+ };
431
+
432
+ self.replaceLlmoUrlPatterns = (urlPatterns) => {
433
+ Joi.assert(urlPatterns, LLMO_URL_PATTERNS_SCHEMA, 'Invalid URL patterns');
434
+ state.llmo ??= {};
435
+ state.llmo.urlPatterns = urlPatterns;
436
+ };
437
+
438
+ self.removeLlmoUrlPattern = (urlPattern) => {
439
+ const urlPatterns = state.llmo?.urlPatterns;
440
+ if (!urlPatterns) return;
441
+
442
+ state.llmo.urlPatterns = urlPatterns.filter(
443
+ (pattern) => pattern.urlPattern !== urlPattern,
444
+ );
445
+ };
446
+
388
447
  self.updateImports = (imports) => {
389
448
  state.imports = imports;
390
449
  };
@@ -21,7 +21,7 @@ import type {
21
21
  Organization,
22
22
  SiteCandidate,
23
23
  SiteTopPage,
24
- } from '../index';
24
+ } from '../index.js';
25
25
 
26
26
  export interface HlxConfig {
27
27
  hlxVersion: number; // helix (AEM Edge Delivery) major version
@@ -61,17 +61,23 @@ export interface ImportConfig {
61
61
  limit?: number;
62
62
  }
63
63
 
64
+ export type WellKnownLmmoTag = 'market' | 'product' | 'topic';
65
+ export type LmmoTag = `${WellKnownLmmoTag}:${string}` | string;
66
+
64
67
  export interface LlmoQuestion {
65
68
  key: string;
66
69
  question: string;
67
70
  source?: string;
68
- country?: string;
69
- product?: string;
70
71
  volume?: string;
71
72
  importTime?: string;
72
73
  keyword?: string;
73
74
  url?: string;
74
- tags?: string[];
75
+ tags?: LmmoTag[];
76
+ }
77
+
78
+ export interface LlmoUrlPattern {
79
+ urlPattern: string;
80
+ tags?: LmmoTag[];
75
81
  }
76
82
 
77
83
  export interface SiteConfig {
@@ -117,8 +123,10 @@ export interface SiteConfig {
117
123
  Human?: Array<LlmoQuestion>;
118
124
  AI?: Array<LlmoQuestion>;
119
125
  };
126
+ urlPatterns?: Array<LlmoUrlPattern>
120
127
  };
121
128
  };
129
+ extractWellKnownTags(tags: Array<string>): Partial<Record<WellKnownLmmoTag, string>>;
122
130
  getSlackConfig(): { workspace?: string; channel?: string; invitedUserCount?: number };
123
131
  getImports(): ImportConfig[];
124
132
  getImportConfig(type: ImportType): ImportConfig | undefined;
@@ -145,17 +153,20 @@ export interface SiteConfig {
145
153
  updateLlmoConfig(dataFolder: string, brand: string, questions?: {
146
154
  Human?: Array<LlmoQuestion>;
147
155
  AI?: Array<LlmoQuestion>;
148
- }): void;
156
+ }, urlPatterns?: Array<LlmoUrlPattern>): void;
149
157
  updateLlmoDataFolder(dataFolder: string): void;
150
158
  updateLlmoBrand(brand: string): void;
151
159
  getLlmoDataFolder(): string | undefined;
152
160
  getLlmoBrand(): string | undefined;
153
161
  getLlmoHumanQuestions(): LlmoQuestion[] | undefined;
154
162
  getLlmoAIQuestions(): LlmoQuestion[] | undefined;
163
+ getLlmoUrlPatterns(): Array<LlmoUrlPattern> | undefined;
155
164
  addLlmoHumanQuestions(questions: LlmoQuestion[]): void;
156
165
  addLlmoAIQuestions(questions: LlmoQuestion[]): void;
157
166
  removeLlmoQuestion(key: string): void;
158
167
  updateLlmoQuestion(key: string, questionUpdate: Partial<LlmoQuestion>): void;
168
+ addLlmoUrlPatterns(urlPatterns: Array<LlmoUrlPattern>): void;
169
+ removeLlmoUrlPattern(urlPattern: string): void;
159
170
  }
160
171
 
161
172
  export interface Site extends BaseModel {