@adobe/spacecat-shared-utils 1.58.1 → 1.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/package.json +6 -2
- package/src/index.d.ts +2 -0
- package/src/index.js +1 -0
- package/src/locale-detect/index.d.ts +24 -0
- package/src/locale-detect/indicators.js +201 -0
- package/src/locale-detect/locale-detect.js +66 -0
- package/src/locale-detect/utils.js +56 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-utils-v1.59.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.58.1...@adobe/spacecat-shared-utils-v1.59.0) (2025-10-09)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* Add locale detection util ([#1006](https://github.com/adobe/spacecat-shared/issues/1006)) ([cb8dcd6](https://github.com/adobe/spacecat-shared/commit/cb8dcd69e85cf673f9c791f8653e164e1e5a06d8))
|
|
7
|
+
|
|
1
8
|
# [@adobe/spacecat-shared-utils-v1.58.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.58.0...@adobe/spacecat-shared-utils-v1.58.1) (2025-10-09)
|
|
2
9
|
|
|
3
10
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/spacecat-shared-utils",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.59.0",
|
|
4
4
|
"description": "Shared modules of the Spacecat Services - utils",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
"require": "test/setup-env.js",
|
|
26
26
|
"reporter": "mocha-multi-reporters",
|
|
27
27
|
"reporter-options": "configFile=.mocha-multi.json",
|
|
28
|
-
"spec": "test
|
|
28
|
+
"spec": "test/**/*.test.js"
|
|
29
29
|
},
|
|
30
30
|
"repository": {
|
|
31
31
|
"type": "git",
|
|
@@ -60,8 +60,12 @@
|
|
|
60
60
|
"@aws-sdk/client-sqs": "3.893.0",
|
|
61
61
|
"@json2csv/plainjs": "7.0.6",
|
|
62
62
|
"aws-xray-sdk": "3.10.3",
|
|
63
|
+
"cheerio": "1.1.2",
|
|
63
64
|
"date-fns": "4.1.0",
|
|
65
|
+
"franc-min": "6.2.0",
|
|
66
|
+
"iso-639-3": "3.0.1",
|
|
64
67
|
"validator": "^13.15.15",
|
|
68
|
+
"world-countries": "5.1.0",
|
|
65
69
|
"zod": "^4.1.11"
|
|
66
70
|
}
|
|
67
71
|
}
|
package/src/index.d.ts
CHANGED
package/src/index.js
CHANGED
|
@@ -101,4 +101,5 @@ export { determineAEMCSPageId, getPageEditUrl } from './aem-content-api-utils.js
|
|
|
101
101
|
export * as llmoConfig from './llmo-config.js';
|
|
102
102
|
export * as schemas from './schemas.js';
|
|
103
103
|
|
|
104
|
+
export { detectLocale } from './locale-detect/locale-detect.js';
|
|
104
105
|
export { prettifyLogForwardingConfig } from './cdn-helpers.js';
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
interface LocaleDetectConfig {
|
|
14
|
+
baseUrl: string;
|
|
15
|
+
headers?: object;
|
|
16
|
+
html?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface LocaleDetectResponse {
|
|
20
|
+
language: string;
|
|
21
|
+
region: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function detectLocale(config: LocaleDetectConfig): Promise<LocaleDetectResponse>;
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import worldCountries from 'world-countries';
|
|
14
|
+
import { franc } from 'franc-min';
|
|
15
|
+
|
|
16
|
+
import { parseLocale } from './utils.js';
|
|
17
|
+
|
|
18
|
+
export function checkTld({ baseUrl }) {
|
|
19
|
+
const hostnameParts = baseUrl.hostname.split('.');
|
|
20
|
+
if (hostnameParts.length < 2) {
|
|
21
|
+
return [];
|
|
22
|
+
}
|
|
23
|
+
let tld = hostnameParts.pop();
|
|
24
|
+
tld = `.${tld.toLowerCase()}`;
|
|
25
|
+
|
|
26
|
+
const country = worldCountries.find((c) => c.tld.includes(tld));
|
|
27
|
+
if (!country) {
|
|
28
|
+
return [];
|
|
29
|
+
}
|
|
30
|
+
return [{
|
|
31
|
+
region: country.cca2.toUpperCase(),
|
|
32
|
+
type: 'tld',
|
|
33
|
+
}];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function checkSubdomain({ baseUrl }) {
|
|
37
|
+
const hostnameParts = baseUrl.hostname.split('.');
|
|
38
|
+
if (hostnameParts.length < 3) {
|
|
39
|
+
return [];
|
|
40
|
+
}
|
|
41
|
+
const subdomain = hostnameParts[0];
|
|
42
|
+
if (!subdomain || subdomain === 'www' || subdomain.length < 2 || subdomain.length > 3) {
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
// We don't know if subdomain is language or region, try use as both
|
|
46
|
+
const locale = parseLocale(`${subdomain}_${subdomain}`);
|
|
47
|
+
if (locale) {
|
|
48
|
+
return [{ ...locale, type: 'subdomain' }];
|
|
49
|
+
}
|
|
50
|
+
return [];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function checkPath({ baseUrl }) {
|
|
54
|
+
// Remove any file extension
|
|
55
|
+
const path = baseUrl.pathname.replace(/\.[^/.]+$/, '');
|
|
56
|
+
|
|
57
|
+
if (!path || path === '/') {
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Check for BCP 47 segment
|
|
62
|
+
const bcp47Segment = path
|
|
63
|
+
.split('/')
|
|
64
|
+
.find((s) => s.length === 5 && (s.includes('-') || s.includes('_')));
|
|
65
|
+
if (bcp47Segment) {
|
|
66
|
+
const locale = parseLocale(bcp47Segment);
|
|
67
|
+
if (locale) {
|
|
68
|
+
return [{ ...locale, type: 'path' }];
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Get all segments of length 2 or 3
|
|
73
|
+
let segments = path.split('/')
|
|
74
|
+
.map((s) => s.toLowerCase().trim())
|
|
75
|
+
.filter((s) => s.length === 2 || s.length === 3);
|
|
76
|
+
|
|
77
|
+
if (segments.length === 0) {
|
|
78
|
+
return [];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// If there are more than two segments, only consider the first two
|
|
82
|
+
if (segments.length > 2) {
|
|
83
|
+
segments = segments.slice(0, 2);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// If two segments, reverse them as we assume they are region and language
|
|
87
|
+
if (segments.length === 2) {
|
|
88
|
+
segments = segments.reverse();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const locale = parseLocale(segments.join('_'));
|
|
92
|
+
if (locale) {
|
|
93
|
+
return [{ ...locale, type: 'path' }];
|
|
94
|
+
}
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function checkHeaders({ headers }) {
|
|
99
|
+
const indicators = [];
|
|
100
|
+
|
|
101
|
+
const headerKeys = ['content-language', 'x-content-language'];
|
|
102
|
+
|
|
103
|
+
for (const headerKey of headerKeys) {
|
|
104
|
+
if (headers[headerKey]) {
|
|
105
|
+
const values = headers[headerKey].split(',').map((v) => v.trim());
|
|
106
|
+
for (const value of values) {
|
|
107
|
+
const locale = parseLocale(value);
|
|
108
|
+
if (locale) {
|
|
109
|
+
indicators.push({ ...locale, type: 'header' });
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return indicators;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function checkHtmlLang({ $ }) {
|
|
119
|
+
const lang = $('html').attr('lang');
|
|
120
|
+
if (!lang) {
|
|
121
|
+
return [];
|
|
122
|
+
}
|
|
123
|
+
const locale = parseLocale(lang);
|
|
124
|
+
if (locale) {
|
|
125
|
+
return [{ ...locale, type: 'html' }];
|
|
126
|
+
}
|
|
127
|
+
return [];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export function checkMetaTags({ $ }) {
|
|
131
|
+
const indicators = [];
|
|
132
|
+
|
|
133
|
+
const metaTagSelectors = ['meta[http-equiv="content-language"]', 'meta[property="og:locale"]'];
|
|
134
|
+
|
|
135
|
+
for (const metaTagSelector of metaTagSelectors) {
|
|
136
|
+
const metaTag = $(metaTagSelector);
|
|
137
|
+
if (metaTag && metaTag.length > 0) {
|
|
138
|
+
const content = metaTag.attr('content');
|
|
139
|
+
if (!content) {
|
|
140
|
+
// eslint-disable-next-line no-continue
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
const values = metaTag.attr('content').split(',').map((v) => v.trim());
|
|
144
|
+
for (const value of values) {
|
|
145
|
+
const locale = parseLocale(value);
|
|
146
|
+
if (locale) {
|
|
147
|
+
indicators.push({ ...locale, type: 'metaTag' });
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return indicators;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export function checkHrefLang({ baseUrl, $ }) {
|
|
157
|
+
const linkTags = $('link[hreflang]');
|
|
158
|
+
const matchingLinkTag = Array.from(linkTags).find((element) => {
|
|
159
|
+
const elementHref = new URL($(element).attr('href'));
|
|
160
|
+
if (!`${elementHref.hostname}${elementHref.pathname}`.includes(`${baseUrl.hostname}${baseUrl.pathname}`)) {
|
|
161
|
+
return false;
|
|
162
|
+
}
|
|
163
|
+
if ($(element).attr('hreflang').includes('default')) {
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
return true;
|
|
167
|
+
});
|
|
168
|
+
if (!matchingLinkTag) {
|
|
169
|
+
return [];
|
|
170
|
+
}
|
|
171
|
+
const locale = parseLocale($(matchingLinkTag).attr('hreflang'));
|
|
172
|
+
if (locale) {
|
|
173
|
+
return [{ ...locale, type: 'hreflang' }];
|
|
174
|
+
}
|
|
175
|
+
return [];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export function checkContentLanguage({ $ }) {
|
|
179
|
+
const metaDescription = $('meta[name="description"]').attr('content');
|
|
180
|
+
if (!metaDescription) {
|
|
181
|
+
return [];
|
|
182
|
+
}
|
|
183
|
+
const language = franc(metaDescription);
|
|
184
|
+
const locale = parseLocale(language);
|
|
185
|
+
if (locale) {
|
|
186
|
+
return [{ ...locale, type: 'content' }];
|
|
187
|
+
}
|
|
188
|
+
return [];
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Export all indicators as array
|
|
192
|
+
export const indicators = [
|
|
193
|
+
checkTld,
|
|
194
|
+
checkSubdomain,
|
|
195
|
+
checkPath,
|
|
196
|
+
checkHeaders,
|
|
197
|
+
checkHtmlLang,
|
|
198
|
+
checkMetaTags,
|
|
199
|
+
checkHrefLang,
|
|
200
|
+
checkContentLanguage,
|
|
201
|
+
];
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import * as cheerio from 'cheerio';
|
|
14
|
+
|
|
15
|
+
import { hasText, isNonEmptyObject, isValidUrl } from '../functions.js';
|
|
16
|
+
import { tracingFetch } from '../tracing-fetch.js';
|
|
17
|
+
import { indicators } from './indicators.js';
|
|
18
|
+
|
|
19
|
+
export async function detectLocale(config) {
|
|
20
|
+
const { baseUrl, indicatorFuncs = indicators } = config;
|
|
21
|
+
|
|
22
|
+
// Abort if baseUrl was not provided or cannot be parsed
|
|
23
|
+
if (!baseUrl || !isValidUrl(baseUrl)) {
|
|
24
|
+
throw new Error('Invalid baseUrl');
|
|
25
|
+
}
|
|
26
|
+
const indicatorResults = [];
|
|
27
|
+
|
|
28
|
+
const parsedBaseUrl = new URL(baseUrl);
|
|
29
|
+
|
|
30
|
+
// If not provided, fetch HTML and headers
|
|
31
|
+
let { html, headers } = config;
|
|
32
|
+
if (!hasText(config.html)) {
|
|
33
|
+
const response = await tracingFetch(baseUrl, { timeout: 5000 });
|
|
34
|
+
headers = response.headers;
|
|
35
|
+
html = await response.text();
|
|
36
|
+
} else if (!isNonEmptyObject(config.headers)) {
|
|
37
|
+
const response = await tracingFetch(baseUrl, { timeout: 5000, method: 'HEAD' });
|
|
38
|
+
headers = response.headers;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const $ = cheerio.load(html);
|
|
42
|
+
|
|
43
|
+
// Execute language detection indicators
|
|
44
|
+
for (const indicator of indicatorFuncs) {
|
|
45
|
+
const results = indicator({ baseUrl: parsedBaseUrl, headers, $ });
|
|
46
|
+
indicatorResults.push(...results);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Derive locale from results
|
|
50
|
+
const summary = indicatorResults.reduce((acc, indicator) => {
|
|
51
|
+
if (indicator.region) {
|
|
52
|
+
acc.region[indicator.region] = (acc.region[indicator.region] || 0) + 1;
|
|
53
|
+
}
|
|
54
|
+
if (indicator.language) {
|
|
55
|
+
acc.language[indicator.language] = (acc.language[indicator.language] || 0) + 1;
|
|
56
|
+
}
|
|
57
|
+
return acc;
|
|
58
|
+
}, { region: {}, language: {} });
|
|
59
|
+
const region = Object.keys(summary.region).length > 0 ? Object.keys(summary.region).sort((a, b) => summary.region[b] - summary.region[a])[0] : 'US';
|
|
60
|
+
const language = Object.keys(summary.language).length > 0 ? Object.keys(summary.language).sort((a, b) => summary.language[b] - summary.language[a])[0] : 'en';
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
region,
|
|
64
|
+
language,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { iso6393 } from 'iso-639-3';
|
|
13
|
+
import worldCountries from 'world-countries';
|
|
14
|
+
|
|
15
|
+
export function parseLocale(locale) {
|
|
16
|
+
let language;
|
|
17
|
+
let region;
|
|
18
|
+
|
|
19
|
+
// If it contains - or _, split into language and region
|
|
20
|
+
if (locale.includes('-') || locale.includes('_')) {
|
|
21
|
+
[language, region] = locale.toLowerCase().split(/[-_]/);
|
|
22
|
+
} else {
|
|
23
|
+
language = locale.toLowerCase();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Validate language
|
|
27
|
+
const lang = iso6393.find((l) => l.iso6393 === language || l.iso6391 === language);
|
|
28
|
+
if (!lang) {
|
|
29
|
+
language = null;
|
|
30
|
+
} else {
|
|
31
|
+
language = lang.iso6391;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Validate region
|
|
35
|
+
const country = worldCountries.find(
|
|
36
|
+
(c) => c.cca2.toLowerCase() === region || c.cca3.toLowerCase() === region,
|
|
37
|
+
);
|
|
38
|
+
if (country) {
|
|
39
|
+
region = country.cca2.toUpperCase();
|
|
40
|
+
} else {
|
|
41
|
+
region = null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (!language && !region) {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const result = {};
|
|
49
|
+
if (language) {
|
|
50
|
+
result.language = language;
|
|
51
|
+
}
|
|
52
|
+
if (region) {
|
|
53
|
+
result.region = region.toUpperCase();
|
|
54
|
+
}
|
|
55
|
+
return result;
|
|
56
|
+
}
|