@adobe/spacecat-shared-utils 1.58.0 → 1.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-utils-v1.59.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.58.1...@adobe/spacecat-shared-utils-v1.59.0) (2025-10-09)
2
+
3
+
4
+ ### Features
5
+
6
+ * Add locale detection util ([#1006](https://github.com/adobe/spacecat-shared/issues/1006)) ([cb8dcd6](https://github.com/adobe/spacecat-shared/commit/cb8dcd69e85cf673f9c791f8653e164e1e5a06d8))
7
+
8
+ # [@adobe/spacecat-shared-utils-v1.58.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.58.0...@adobe/spacecat-shared-utils-v1.58.1) (2025-10-09)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * adding calendar util functions to interface ([#1016](https://github.com/adobe/spacecat-shared/issues/1016)) ([245ff41](https://github.com/adobe/spacecat-shared/commit/245ff41dd05cf4348cfe81d2b187487744a2ad57))
14
+
1
15
  # [@adobe/spacecat-shared-utils-v1.58.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.57.1...@adobe/spacecat-shared-utils-v1.58.0) (2025-10-09)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-utils",
3
- "version": "1.58.0",
3
+ "version": "1.59.0",
4
4
  "description": "Shared modules of the Spacecat Services - utils",
5
5
  "type": "module",
6
6
  "exports": {
@@ -25,7 +25,7 @@
25
25
  "require": "test/setup-env.js",
26
26
  "reporter": "mocha-multi-reporters",
27
27
  "reporter-options": "configFile=.mocha-multi.json",
28
- "spec": "test/*.test.js"
28
+ "spec": "test/**/*.test.js"
29
29
  },
30
30
  "repository": {
31
31
  "type": "git",
@@ -60,8 +60,12 @@
60
60
  "@aws-sdk/client-sqs": "3.893.0",
61
61
  "@json2csv/plainjs": "7.0.6",
62
62
  "aws-xray-sdk": "3.10.3",
63
+ "cheerio": "1.1.2",
63
64
  "date-fns": "4.1.0",
65
+ "franc-min": "6.2.0",
66
+ "iso-639-3": "3.0.1",
64
67
  "validator": "^13.15.15",
68
+ "world-countries": "5.1.0",
65
69
  "zod": "^4.1.11"
66
70
  }
67
71
  }
package/src/index.d.ts CHANGED
@@ -265,5 +265,13 @@ export function retrievePageAuthentication(site: object, context: object): Promi
265
265
 
266
266
  export function prettifyLogForwardingConfig(payload: object): object;
267
267
 
268
+ export function isoCalendarWeek(date: Date): object;
269
+
270
+ export function isoCalendarWeekSunday(date: Date): Date;
271
+
272
+ export function isoCalendarWeekMonday(date: Date): Date;
273
+
268
274
  export * as llmoConfig from './llmo-config.js';
269
275
  export * as schemas from './schemas.js';
276
+
277
+ export { type detectLocale } from './locale-detect/index.js';
package/src/index.js CHANGED
@@ -89,6 +89,9 @@ export {
89
89
  getWeekInfo,
90
90
  getMonthInfo,
91
91
  getTemporalCondition,
92
+ isoCalendarWeek,
93
+ isoCalendarWeekSunday,
94
+ isoCalendarWeekMonday,
92
95
  } from './calendar-week-helper.js';
93
96
 
94
97
  export { detectAEMVersion, DELIVERY_TYPES } from './aem.js';
@@ -98,4 +101,5 @@ export { determineAEMCSPageId, getPageEditUrl } from './aem-content-api-utils.js
98
101
  export * as llmoConfig from './llmo-config.js';
99
102
  export * as schemas from './schemas.js';
100
103
 
104
+ export { detectLocale } from './locale-detect/locale-detect.js';
101
105
  export { prettifyLogForwardingConfig } from './cdn-helpers.js';
@@ -0,0 +1,24 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ interface LocaleDetectConfig {
14
+ baseUrl: string;
15
+ headers?: object;
16
+ html?: string;
17
+ }
18
+
19
+ interface LocaleDetectResponse {
20
+ language: string;
21
+ region: string;
22
+ }
23
+
24
+ export function detectLocale(config: LocaleDetectConfig): Promise<LocaleDetectResponse>;
@@ -0,0 +1,201 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import worldCountries from 'world-countries';
14
+ import { franc } from 'franc-min';
15
+
16
+ import { parseLocale } from './utils.js';
17
+
18
+ export function checkTld({ baseUrl }) {
19
+ const hostnameParts = baseUrl.hostname.split('.');
20
+ if (hostnameParts.length < 2) {
21
+ return [];
22
+ }
23
+ let tld = hostnameParts.pop();
24
+ tld = `.${tld.toLowerCase()}`;
25
+
26
+ const country = worldCountries.find((c) => c.tld.includes(tld));
27
+ if (!country) {
28
+ return [];
29
+ }
30
+ return [{
31
+ region: country.cca2.toUpperCase(),
32
+ type: 'tld',
33
+ }];
34
+ }
35
+
36
+ export function checkSubdomain({ baseUrl }) {
37
+ const hostnameParts = baseUrl.hostname.split('.');
38
+ if (hostnameParts.length < 3) {
39
+ return [];
40
+ }
41
+ const subdomain = hostnameParts[0];
42
+ if (!subdomain || subdomain === 'www' || subdomain.length < 2 || subdomain.length > 3) {
43
+ return [];
44
+ }
45
+ // We don't know if subdomain is language or region, try use as both
46
+ const locale = parseLocale(`${subdomain}_${subdomain}`);
47
+ if (locale) {
48
+ return [{ ...locale, type: 'subdomain' }];
49
+ }
50
+ return [];
51
+ }
52
+
53
+ export function checkPath({ baseUrl }) {
54
+ // Remove any file extension
55
+ const path = baseUrl.pathname.replace(/\.[^/.]+$/, '');
56
+
57
+ if (!path || path === '/') {
58
+ return [];
59
+ }
60
+
61
+ // Check for BCP 47 segment
62
+ const bcp47Segment = path
63
+ .split('/')
64
+ .find((s) => s.length === 5 && (s.includes('-') || s.includes('_')));
65
+ if (bcp47Segment) {
66
+ const locale = parseLocale(bcp47Segment);
67
+ if (locale) {
68
+ return [{ ...locale, type: 'path' }];
69
+ }
70
+ }
71
+
72
+ // Get all segments of length 2 or 3
73
+ let segments = path.split('/')
74
+ .map((s) => s.toLowerCase().trim())
75
+ .filter((s) => s.length === 2 || s.length === 3);
76
+
77
+ if (segments.length === 0) {
78
+ return [];
79
+ }
80
+
81
+ // If there are more than two segments, only consider the first two
82
+ if (segments.length > 2) {
83
+ segments = segments.slice(0, 2);
84
+ }
85
+
86
+ // If two segments, reverse them as we assume they are region and language
87
+ if (segments.length === 2) {
88
+ segments = segments.reverse();
89
+ }
90
+
91
+ const locale = parseLocale(segments.join('_'));
92
+ if (locale) {
93
+ return [{ ...locale, type: 'path' }];
94
+ }
95
+ return [];
96
+ }
97
+
98
+ export function checkHeaders({ headers }) {
99
+ const indicators = [];
100
+
101
+ const headerKeys = ['content-language', 'x-content-language'];
102
+
103
+ for (const headerKey of headerKeys) {
104
+ if (headers[headerKey]) {
105
+ const values = headers[headerKey].split(',').map((v) => v.trim());
106
+ for (const value of values) {
107
+ const locale = parseLocale(value);
108
+ if (locale) {
109
+ indicators.push({ ...locale, type: 'header' });
110
+ }
111
+ }
112
+ }
113
+ }
114
+
115
+ return indicators;
116
+ }
117
+
118
+ export function checkHtmlLang({ $ }) {
119
+ const lang = $('html').attr('lang');
120
+ if (!lang) {
121
+ return [];
122
+ }
123
+ const locale = parseLocale(lang);
124
+ if (locale) {
125
+ return [{ ...locale, type: 'html' }];
126
+ }
127
+ return [];
128
+ }
129
+
130
+ export function checkMetaTags({ $ }) {
131
+ const indicators = [];
132
+
133
+ const metaTagSelectors = ['meta[http-equiv="content-language"]', 'meta[property="og:locale"]'];
134
+
135
+ for (const metaTagSelector of metaTagSelectors) {
136
+ const metaTag = $(metaTagSelector);
137
+ if (metaTag && metaTag.length > 0) {
138
+ const content = metaTag.attr('content');
139
+ if (!content) {
140
+ // eslint-disable-next-line no-continue
141
+ continue;
142
+ }
143
+ const values = metaTag.attr('content').split(',').map((v) => v.trim());
144
+ for (const value of values) {
145
+ const locale = parseLocale(value);
146
+ if (locale) {
147
+ indicators.push({ ...locale, type: 'metaTag' });
148
+ }
149
+ }
150
+ }
151
+ }
152
+
153
+ return indicators;
154
+ }
155
+
156
+ export function checkHrefLang({ baseUrl, $ }) {
157
+ const linkTags = $('link[hreflang]');
158
+ const matchingLinkTag = Array.from(linkTags).find((element) => {
159
+ const elementHref = new URL($(element).attr('href'));
160
+ if (!`${elementHref.hostname}${elementHref.pathname}`.includes(`${baseUrl.hostname}${baseUrl.pathname}`)) {
161
+ return false;
162
+ }
163
+ if ($(element).attr('hreflang').includes('default')) {
164
+ return false;
165
+ }
166
+ return true;
167
+ });
168
+ if (!matchingLinkTag) {
169
+ return [];
170
+ }
171
+ const locale = parseLocale($(matchingLinkTag).attr('hreflang'));
172
+ if (locale) {
173
+ return [{ ...locale, type: 'hreflang' }];
174
+ }
175
+ return [];
176
+ }
177
+
178
+ export function checkContentLanguage({ $ }) {
179
+ const metaDescription = $('meta[name="description"]').attr('content');
180
+ if (!metaDescription) {
181
+ return [];
182
+ }
183
+ const language = franc(metaDescription);
184
+ const locale = parseLocale(language);
185
+ if (locale) {
186
+ return [{ ...locale, type: 'content' }];
187
+ }
188
+ return [];
189
+ }
190
+
191
+ // Export all indicators as array
192
+ export const indicators = [
193
+ checkTld,
194
+ checkSubdomain,
195
+ checkPath,
196
+ checkHeaders,
197
+ checkHtmlLang,
198
+ checkMetaTags,
199
+ checkHrefLang,
200
+ checkContentLanguage,
201
+ ];
@@ -0,0 +1,66 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import * as cheerio from 'cheerio';
14
+
15
+ import { hasText, isNonEmptyObject, isValidUrl } from '../functions.js';
16
+ import { tracingFetch } from '../tracing-fetch.js';
17
+ import { indicators } from './indicators.js';
18
+
19
+ export async function detectLocale(config) {
20
+ const { baseUrl, indicatorFuncs = indicators } = config;
21
+
22
+ // Abort if baseUrl was not provided or cannot be parsed
23
+ if (!baseUrl || !isValidUrl(baseUrl)) {
24
+ throw new Error('Invalid baseUrl');
25
+ }
26
+ const indicatorResults = [];
27
+
28
+ const parsedBaseUrl = new URL(baseUrl);
29
+
30
+ // If not provided, fetch HTML and headers
31
+ let { html, headers } = config;
32
+ if (!hasText(config.html)) {
33
+ const response = await tracingFetch(baseUrl, { timeout: 5000 });
34
+ headers = response.headers;
35
+ html = await response.text();
36
+ } else if (!isNonEmptyObject(config.headers)) {
37
+ const response = await tracingFetch(baseUrl, { timeout: 5000, method: 'HEAD' });
38
+ headers = response.headers;
39
+ }
40
+
41
+ const $ = cheerio.load(html);
42
+
43
+ // Execute language detection indicators
44
+ for (const indicator of indicatorFuncs) {
45
+ const results = indicator({ baseUrl: parsedBaseUrl, headers, $ });
46
+ indicatorResults.push(...results);
47
+ }
48
+
49
+ // Derive locale from results
50
+ const summary = indicatorResults.reduce((acc, indicator) => {
51
+ if (indicator.region) {
52
+ acc.region[indicator.region] = (acc.region[indicator.region] || 0) + 1;
53
+ }
54
+ if (indicator.language) {
55
+ acc.language[indicator.language] = (acc.language[indicator.language] || 0) + 1;
56
+ }
57
+ return acc;
58
+ }, { region: {}, language: {} });
59
+ const region = Object.keys(summary.region).length > 0 ? Object.keys(summary.region).sort((a, b) => summary.region[b] - summary.region[a])[0] : 'US';
60
+ const language = Object.keys(summary.language).length > 0 ? Object.keys(summary.language).sort((a, b) => summary.language[b] - summary.language[a])[0] : 'en';
61
+
62
+ return {
63
+ region,
64
+ language,
65
+ };
66
+ }
@@ -0,0 +1,56 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { iso6393 } from 'iso-639-3';
13
+ import worldCountries from 'world-countries';
14
+
15
+ export function parseLocale(locale) {
16
+ let language;
17
+ let region;
18
+
19
+ // If it contains - or _, split into language and region
20
+ if (locale.includes('-') || locale.includes('_')) {
21
+ [language, region] = locale.toLowerCase().split(/[-_]/);
22
+ } else {
23
+ language = locale.toLowerCase();
24
+ }
25
+
26
+ // Validate language
27
+ const lang = iso6393.find((l) => l.iso6393 === language || l.iso6391 === language);
28
+ if (!lang) {
29
+ language = null;
30
+ } else {
31
+ language = lang.iso6391;
32
+ }
33
+
34
+ // Validate region
35
+ const country = worldCountries.find(
36
+ (c) => c.cca2.toLowerCase() === region || c.cca3.toLowerCase() === region,
37
+ );
38
+ if (country) {
39
+ region = country.cca2.toUpperCase();
40
+ } else {
41
+ region = null;
42
+ }
43
+
44
+ if (!language && !region) {
45
+ return null;
46
+ }
47
+
48
+ const result = {};
49
+ if (language) {
50
+ result.language = language;
51
+ }
52
+ if (region) {
53
+ result.region = region.toUpperCase();
54
+ }
55
+ return result;
56
+ }