@socialgouv/fiches-travail-data-types 4.532.0 → 4.533.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/fetch-data/parseDom.js +32 -14
- package/package.json +1 -1
|
@@ -34,9 +34,7 @@ function getCleanSrc(src) {
|
|
|
34
34
|
}
|
|
35
35
|
const formatPicture = (node) => {
|
|
36
36
|
let comment;
|
|
37
|
-
node.parentElement
|
|
38
|
-
.childNodes
|
|
39
|
-
.forEach(function (childNode) {
|
|
37
|
+
node.parentElement.childNodes.forEach(function (childNode) {
|
|
40
38
|
if (childNode.nodeName === "#comment" || childNode.nodeType === 8) {
|
|
41
39
|
if (childNode.data.match(SRC_REGEX)) {
|
|
42
40
|
comment = childNode;
|
|
@@ -52,9 +50,7 @@ const formatPicture = (node) => {
|
|
|
52
50
|
}
|
|
53
51
|
}
|
|
54
52
|
let image;
|
|
55
|
-
node
|
|
56
|
-
.childNodes
|
|
57
|
-
.forEach(function (childNode) {
|
|
53
|
+
node.childNodes.forEach(function (childNode) {
|
|
58
54
|
if (childNode.nodeName === "IMG") {
|
|
59
55
|
image = childNode;
|
|
60
56
|
}
|
|
@@ -150,20 +146,32 @@ function parseDom(dom, id, url) {
|
|
|
150
146
|
}
|
|
151
147
|
}
|
|
152
148
|
const title = titleElement.textContent.trim();
|
|
153
|
-
const dateRaw = $(dom.window.document, "meta[property*=modified_time]") ||
|
|
149
|
+
const dateRaw = $(dom.window.document, "meta[property*=modified_time]") ||
|
|
150
|
+
$(dom.window.document, "meta[property$=published_time]");
|
|
154
151
|
const [year, month, day] = dateRaw.getAttribute("content").split("-");
|
|
155
152
|
let intro = $(article, ".main-article__chapo") || "";
|
|
156
|
-
intro =
|
|
157
|
-
|
|
153
|
+
intro =
|
|
154
|
+
intro &&
|
|
155
|
+
intro.innerHTML
|
|
156
|
+
.replace(/\n/g, "")
|
|
157
|
+
.replace(/\s+/g, " ")
|
|
158
|
+
.trim()
|
|
159
|
+
.replace(/<script[^>]*>([\s\S]*?)<\/script>/g, "");
|
|
160
|
+
const description = $(dom.window.document, "meta[name=description]")?.getAttribute("content") ??
|
|
161
|
+
"";
|
|
158
162
|
const sections = [];
|
|
159
163
|
const sectionTag = getSectionTag(article);
|
|
160
164
|
// First pass is only to get a potential untitled section at the top of the article
|
|
161
165
|
// This section has neither anchor nor title
|
|
162
166
|
let nextArticleElement = $(article, ".main-article__texte > *");
|
|
163
167
|
const untitledSection = {
|
|
164
|
-
anchor: "",
|
|
168
|
+
anchor: "",
|
|
169
|
+
html: "",
|
|
170
|
+
text: "",
|
|
171
|
+
title: title,
|
|
165
172
|
};
|
|
166
|
-
while (nextArticleElement &&
|
|
173
|
+
while (nextArticleElement &&
|
|
174
|
+
nextArticleElement.tagName.toLowerCase() !== sectionTag) {
|
|
167
175
|
if (nextArticleElement.textContent) {
|
|
168
176
|
if (!untitledSection.description) {
|
|
169
177
|
untitledSection.description = "temp description";
|
|
@@ -172,7 +180,8 @@ function parseDom(dom, id, url) {
|
|
|
172
180
|
.replace(/\n+/g, "")
|
|
173
181
|
.replace(/>\s+</g, "><")
|
|
174
182
|
.replace(/\s+/g, " ");
|
|
175
|
-
untitledSection.text +=
|
|
183
|
+
untitledSection.text +=
|
|
184
|
+
" " + nextArticleElement.textContent.replace(/\s+/g, " ").trim();
|
|
176
185
|
}
|
|
177
186
|
nextArticleElement = nextArticleElement.nextElementSibling;
|
|
178
187
|
}
|
|
@@ -198,7 +207,10 @@ function parseDom(dom, id, url) {
|
|
|
198
207
|
sections.push({
|
|
199
208
|
anchor: el.getAttribute("id") || (0, cdtn_slugify_1.default)(el.textContent),
|
|
200
209
|
description: sectionText.slice(0, 200).trim(),
|
|
201
|
-
html: html
|
|
210
|
+
html: html
|
|
211
|
+
.replace(/\n+/g, "")
|
|
212
|
+
.replace(/>\s+</g, "><")
|
|
213
|
+
.replace(/\s+/g, " "),
|
|
202
214
|
references: getReferences(sectionText),
|
|
203
215
|
text: sectionText,
|
|
204
216
|
title: el.textContent.trim(),
|
|
@@ -209,7 +221,13 @@ function parseDom(dom, id, url) {
|
|
|
209
221
|
throw new got_1.ParseError(`No sections`);
|
|
210
222
|
}
|
|
211
223
|
return {
|
|
212
|
-
date: `${day}/${month}/${year}`,
|
|
224
|
+
date: `${day}/${month}/${year}`,
|
|
225
|
+
description,
|
|
226
|
+
intro,
|
|
227
|
+
pubId: id,
|
|
228
|
+
sections,
|
|
229
|
+
title,
|
|
230
|
+
url,
|
|
213
231
|
};
|
|
214
232
|
}
|
|
215
233
|
exports.parseDom = parseDom;
|