@adobe/spacecat-shared-data-access 2.38.1 → 2.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-data-access-v2.40.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.39.0...@adobe/spacecat-shared-data-access-v2.40.0) (2025-07-24)
2
+
3
+
4
+ ### Features
5
+
6
+ * amend llmo config with URL patterns ([#869](https://github.com/adobe/spacecat-shared/issues/869)) ([c6cb047](https://github.com/adobe/spacecat-shared/commit/c6cb0476c6d2b3dad048855ae49481ae8fcfb5ce))
7
+
8
+ # [@adobe/spacecat-shared-data-access-v2.39.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.38.1...@adobe/spacecat-shared-data-access-v2.39.0) (2025-07-22)
9
+
10
+
11
+ ### Features
12
+
13
+ * add llmo specific configuration in site config ([#853](https://github.com/adobe/spacecat-shared/issues/853)) ([476b8e8](https://github.com/adobe/spacecat-shared/commit/476b8e84009602a4c9d7ef8d42b9efcae3beae9e))
14
+
1
15
  # [@adobe/spacecat-shared-data-access-v2.38.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.38.0...@adobe/spacecat-shared-data-access-v2.38.1) (2025-07-22)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-data-access",
3
- "version": "2.38.1",
3
+ "version": "2.40.0",
4
4
  "description": "Shared modules of the Spacecat Services - Data Access",
5
5
  "type": "module",
6
6
  "engines": {
@@ -37,6 +37,32 @@ export const IMPORT_SOURCES = {
37
37
  RUM: 'rum',
38
38
  };
39
39
 
40
+ const LLMO_TAG_PATTERN = /^(market|product|topic):\s?.+/;
41
+ const LLMO_TAG = Joi.alternatives()
42
+ .try(
43
+ // Tag market, product, topic like this: "market: ch", "product: firefly", "topic: copyright"
44
+ Joi.string().pattern(LLMO_TAG_PATTERN),
45
+ Joi.string(),
46
+ );
47
+
48
+ // LLMO question schema for both Human and AI questions
49
+ const QUESTION_SCHEMA = Joi.object({
50
+ key: Joi.string().required(),
51
+ question: Joi.string().required(),
52
+ source: Joi.string().optional(),
53
+ volume: Joi.string().optional(),
54
+ keyword: Joi.string().optional(),
55
+ url: Joi.string().uri().optional(),
56
+ tags: Joi.array().items(LLMO_TAG).optional(),
57
+ importTime: Joi.string().isoDate().optional(),
58
+ });
59
+
60
+ const LLMO_URL_PATTERN_SCHEMA = {
61
+ urlPattern: Joi.string().uri().required(),
62
+ tags: Joi.array().items(LLMO_TAG).optional(),
63
+ };
64
+ const LLMO_URL_PATTERNS_SCHEMA = Joi.array().items(LLMO_URL_PATTERN_SCHEMA);
65
+
40
66
  const IMPORT_BASE_KEYS = {
41
67
  destinations: Joi.array().items(Joi.string().valid(IMPORT_DESTINATIONS.DEFAULT)).required(),
42
68
  sources: Joi.array().items(Joi.string().valid(...Object.values(IMPORT_SOURCES))).required(),
@@ -201,6 +227,15 @@ export const configSchema = Joi.object({
201
227
  headers: Joi.object().pattern(Joi.string(), Joi.string()),
202
228
  overrideBaseURL: Joi.string().uri().optional(),
203
229
  }).optional(),
230
+ llmo: Joi.object({
231
+ dataFolder: Joi.string().required(),
232
+ brand: Joi.string().required(),
233
+ questions: Joi.object({
234
+ Human: Joi.array().items(QUESTION_SCHEMA).optional(),
235
+ AI: Joi.array().items(QUESTION_SCHEMA).optional(),
236
+ }).optional(),
237
+ urlPatterns: LLMO_URL_PATTERNS_SCHEMA.optional(),
238
+ }).optional(),
204
239
  cdnLogsConfig: Joi.object({
205
240
  bucketName: Joi.string().required(),
206
241
  filters: Joi.array().items(
@@ -257,6 +292,20 @@ export function validateConfiguration(config) {
257
292
  return value; // Validated and sanitized configuration
258
293
  }
259
294
 
295
+ export function extractWellKnownTags(tags) {
296
+ const wellKnownTags = {};
297
+ for (const tag of tags) {
298
+ if (LLMO_TAG_PATTERN.test(tag)) {
299
+ const colonIdx = tag.indexOf(':');
300
+ const value = tag.slice(colonIdx + 1).trim();
301
+ if (colonIdx !== -1 && value) {
302
+ wellKnownTags[tag.slice(0, colonIdx).trim()] = value;
303
+ }
304
+ }
305
+ }
306
+ return wellKnownTags;
307
+ }
308
+
260
309
  export const Config = (data = {}) => {
261
310
  let configData;
262
311
 
@@ -274,7 +323,7 @@ export const Config = (data = {}) => {
274
323
  }
275
324
 
276
325
  const state = { ...configData };
277
- const self = { state };
326
+ const self = { state, extractWellKnownTags };
278
327
  self.getSlackConfig = () => state.slack;
279
328
  self.isInternalCustomer = () => state?.slack?.workspace === 'internal';
280
329
  self.getSlackMentions = (type) => state?.handlers?.[type]?.mentions?.slack;
@@ -291,6 +340,12 @@ export const Config = (data = {}) => {
291
340
  self.getFetchConfig = () => state?.fetchConfig;
292
341
  self.getBrandConfig = () => state?.brandConfig;
293
342
  self.getCdnLogsConfig = () => state?.cdnLogsConfig;
343
+ self.getLlmoConfig = () => state?.llmo;
344
+ self.getLlmoDataFolder = () => state?.llmo?.dataFolder;
345
+ self.getLlmoBrand = () => state?.llmo?.brand;
346
+ self.getLlmoHumanQuestions = () => state?.llmo?.questions?.Human;
347
+ self.getLlmoAIQuestions = () => state?.llmo?.questions?.AI;
348
+ self.getLlmoUrlPatterns = () => state?.llmo?.urlPatterns;
294
349
 
295
350
  self.updateSlackConfig = (channel, workspace, invitedUserCount) => {
296
351
  state.slack = {
@@ -300,6 +355,95 @@ export const Config = (data = {}) => {
300
355
  };
301
356
  };
302
357
 
358
+ self.updateLlmoConfig = (dataFolder, brand, questions = {}, urlPatterns = undefined) => {
359
+ state.llmo = {
360
+ dataFolder,
361
+ brand,
362
+ questions,
363
+ urlPatterns,
364
+ };
365
+ };
366
+
367
+ self.updateLlmoDataFolder = (dataFolder) => {
368
+ state.llmo = state.llmo || {};
369
+ state.llmo.dataFolder = dataFolder;
370
+ };
371
+
372
+ self.updateLlmoBrand = (brand) => {
373
+ state.llmo = state.llmo || {};
374
+ state.llmo.brand = brand;
375
+ };
376
+
377
+ self.addLlmoHumanQuestions = (questions) => {
378
+ state.llmo = state.llmo || {};
379
+ state.llmo.questions = state.llmo.questions || {};
380
+ state.llmo.questions.Human = state.llmo.questions.Human || [];
381
+ state.llmo.questions.Human.push(...questions);
382
+ };
383
+
384
+ self.addLlmoAIQuestions = (questions) => {
385
+ state.llmo = state.llmo || {};
386
+ state.llmo.questions = state.llmo.questions || {};
387
+ state.llmo.questions.AI = state.llmo.questions.AI || [];
388
+ state.llmo.questions.AI.push(...questions);
389
+ };
390
+
391
+ self.removeLlmoQuestion = (key) => {
392
+ state.llmo = state.llmo || {};
393
+ state.llmo.questions = state.llmo.questions || {};
394
+ state.llmo.questions.Human = state.llmo.questions.Human || [];
395
+ state.llmo.questions.Human = state.llmo.questions.Human.filter(
396
+ (question) => question.key !== key,
397
+ );
398
+ state.llmo.questions.AI = state.llmo.questions.AI || [];
399
+ state.llmo.questions.AI = state.llmo.questions.AI.filter(
400
+ (question) => question.key !== key,
401
+ );
402
+ };
403
+
404
+ self.updateLlmoQuestion = (key, questionUpdate) => {
405
+ state.llmo = state.llmo || {};
406
+ state.llmo.questions = state.llmo.questions || {};
407
+ state.llmo.questions.Human = state.llmo.questions.Human || [];
408
+ state.llmo.questions.Human = state.llmo.questions.Human.map(
409
+ (question) => (question.key === key ? { ...question, ...questionUpdate, key } : question),
410
+ );
411
+ state.llmo.questions.AI = state.llmo.questions.AI || [];
412
+ state.llmo.questions.AI = state.llmo.questions.AI.map(
413
+ (question) => (question.key === key ? { ...question, ...questionUpdate, key } : question),
414
+ );
415
+ };
416
+
417
+ self.addLlmoUrlPatterns = (urlPatterns) => {
418
+ Joi.assert(urlPatterns, LLMO_URL_PATTERNS_SCHEMA, 'Invalid URL patterns');
419
+
420
+ state.llmo ??= {};
421
+ state.llmo.urlPatterns ??= [];
422
+ const byPattern = new Map(
423
+ state.llmo.urlPatterns.map((p) => [p.urlPattern, p]),
424
+ );
425
+ for (const p of urlPatterns) {
426
+ byPattern.set(p.urlPattern, p);
427
+ }
428
+
429
+ state.llmo.urlPatterns = [...byPattern.values()];
430
+ };
431
+
432
+ self.replaceLlmoUrlPatterns = (urlPatterns) => {
433
+ Joi.assert(urlPatterns, LLMO_URL_PATTERNS_SCHEMA, 'Invalid URL patterns');
434
+ state.llmo ??= {};
435
+ state.llmo.urlPatterns = urlPatterns;
436
+ };
437
+
438
+ self.removeLlmoUrlPattern = (urlPattern) => {
439
+ const urlPatterns = state.llmo?.urlPatterns;
440
+ if (!urlPatterns) return;
441
+
442
+ state.llmo.urlPatterns = urlPatterns.filter(
443
+ (pattern) => pattern.urlPattern !== urlPattern,
444
+ );
445
+ };
446
+
303
447
  self.updateImports = (imports) => {
304
448
  state.imports = imports;
305
449
  };
@@ -409,4 +553,5 @@ Config.toDynamoItem = (config) => ({
409
553
  fetchConfig: config.getFetchConfig(),
410
554
  brandConfig: config.getBrandConfig(),
411
555
  cdnLogsConfig: config.getCdnLogsConfig(),
556
+ llmo: config.getLlmoConfig(),
412
557
  });
@@ -21,7 +21,7 @@ import type {
21
21
  Organization,
22
22
  SiteCandidate,
23
23
  SiteTopPage,
24
- } from '../index';
24
+ } from '../index.js';
25
25
 
26
26
  export interface HlxConfig {
27
27
  hlxVersion: number; // helix (AEM Edge Delivery) major version
@@ -61,6 +61,25 @@ export interface ImportConfig {
61
61
  limit?: number;
62
62
  }
63
63
 
64
+ export type WellKnownLmmoTag = 'market' | 'product' | 'topic';
65
+ export type LmmoTag = `${WellKnownLmmoTag}:${string}` | string;
66
+
67
+ export interface LlmoQuestion {
68
+ key: string;
69
+ question: string;
70
+ source?: string;
71
+ volume?: string;
72
+ importTime?: string;
73
+ keyword?: string;
74
+ url?: string;
75
+ tags?: LmmoTag[];
76
+ }
77
+
78
+ export interface LlmoUrlPattern {
79
+ urlPattern: string;
80
+ tags?: LmmoTag[];
81
+ }
82
+
64
83
  export interface SiteConfig {
65
84
  state: {
66
85
  slack?: {
@@ -97,7 +116,17 @@ export interface SiteConfig {
97
116
  headers?: Record<string, string>;
98
117
  overrideBaseURL?: string;
99
118
  };
119
+ llmo?: {
120
+ dataFolder: string;
121
+ brand: string;
122
+ questions?: {
123
+ Human?: Array<LlmoQuestion>;
124
+ AI?: Array<LlmoQuestion>;
125
+ };
126
+ urlPatterns?: Array<LlmoUrlPattern>
127
+ };
100
128
  };
129
+ extractWellKnownTags(tags: Array<string>): Partial<Record<WellKnownLmmoTag, string>>;
101
130
  getSlackConfig(): { workspace?: string; channel?: string; invitedUserCount?: number };
102
131
  getImports(): ImportConfig[];
103
132
  getImportConfig(type: ImportType): ImportConfig | undefined;
@@ -116,6 +145,28 @@ export interface SiteConfig {
116
145
  getLatestMetrics(type: string):
117
146
  { pageViewsChange: number; ctrChange: number; projectedTrafficValue: number } | undefined;
118
147
  getFetchConfig(): { headers?: Record<string, string>, overrideBaseURL?: string } | undefined;
148
+ getLlmoConfig(): {
149
+ dataFolder: string;
150
+ brand: string;
151
+ questions?: { Human?: Array<LlmoQuestion>; AI?: Array<LlmoQuestion> };
152
+ } | undefined;
153
+ updateLlmoConfig(dataFolder: string, brand: string, questions?: {
154
+ Human?: Array<LlmoQuestion>;
155
+ AI?: Array<LlmoQuestion>;
156
+ }, urlPatterns?: Array<LlmoUrlPattern>): void;
157
+ updateLlmoDataFolder(dataFolder: string): void;
158
+ updateLlmoBrand(brand: string): void;
159
+ getLlmoDataFolder(): string | undefined;
160
+ getLlmoBrand(): string | undefined;
161
+ getLlmoHumanQuestions(): LlmoQuestion[] | undefined;
162
+ getLlmoAIQuestions(): LlmoQuestion[] | undefined;
163
+ getLlmoUrlPatterns(): Array<LlmoUrlPattern> | undefined;
164
+ addLlmoHumanQuestions(questions: LlmoQuestion[]): void;
165
+ addLlmoAIQuestions(questions: LlmoQuestion[]): void;
166
+ removeLlmoQuestion(key: string): void;
167
+ updateLlmoQuestion(key: string, questionUpdate: Partial<LlmoQuestion>): void;
168
+ addLlmoUrlPatterns(urlPatterns: Array<LlmoUrlPattern>): void;
169
+ removeLlmoUrlPattern(urlPattern: string): void;
119
170
  }
120
171
 
121
172
  export interface Site extends BaseModel {