@socialgouv/fiches-travail-data 4.374.0 → 4.376.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/build/checkRefs.d.ts +1 -0
- package/build/checkRefs.js +24 -0
- package/build/email.d.ts +3 -0
- package/build/email.js +12 -0
- package/build/fetch-data/generateHeaders.d.ts +4 -0
- package/build/fetch-data/generateHeaders.js +16 -0
- package/build/fetch-data/index.d.ts +2 -0
- package/build/fetch-data/index.js +66 -0
- package/build/fetch-data/parseDom.d.ts +14 -0
- package/build/fetch-data/parseDom.js +211 -0
- package/build/fetch-data/postProcess.d.ts +1 -0
- package/build/fetch-data/postProcess.js +60 -0
- package/build/fetch-data/referenceExtractor.d.ts +18 -0
- package/build/fetch-data/referenceExtractor.js +239 -0
- package/build/fetch-data/referenceResolver.d.ts +1 -0
- package/build/fetch-data/referenceResolver.js +176 -0
- package/build/fetch-data/scrapUrl.d.ts +14 -0
- package/build/fetch-data/scrapUrl.js +52 -0
- package/build/index.d.ts +3 -0
- package/build/index.js +20 -0
- package/build/types.d.ts +29 -0
- package/build/types.js +2 -0
- package/package.json +26 -40
- package/data/fiches-travail.json +0 -34908
- package/index.d.ts +0 -32
- package/index.esm.js +0 -7
- package/index.js +0 -11
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
# [4.376.0](https://github.com/SocialGouv/fiches-travail-data/compare/v4.375.0...v4.376.0) (2022-12-06)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Bug Fixes
|
|
5
|
+
|
|
6
|
+
* **build:** ajout de la gestion build full typescript ✨ ([#386](https://github.com/SocialGouv/fiches-travail-data/issues/386)) ([46e5ade](https://github.com/SocialGouv/fiches-travail-data/commit/46e5ade139b31b3e4ffd220dff1136248cd3aeaa))
|
|
7
|
+
* **picture:** ajout du scénario pour la balise picture + typescript / eslint ([#385](https://github.com/SocialGouv/fiches-travail-data/issues/385)) ([5dce100](https://github.com/SocialGouv/fiches-travail-data/commit/5dce100607fbfc938a96468ac6774e3924df559b))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
### Features
|
|
11
|
+
|
|
12
|
+
* **data:** 20221206_1018 update ([8ee594e](https://github.com/SocialGouv/fiches-travail-data/commit/8ee594e4e29b30300a501030f37dbeeac83980a8))
|
|
13
|
+
|
|
14
|
+
# [4.375.0](https://github.com/SocialGouv/fiches-travail-data/compare/v4.374.0...v4.375.0) (2022-12-01)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
### Features
|
|
18
|
+
|
|
19
|
+
* **data:** 20221201_2211 update ([9bf8887](https://github.com/SocialGouv/fiches-travail-data/commit/9bf888781d07d7c8d1bdc87af5dc038fa1dcf9b3))
|
|
20
|
+
|
|
1
21
|
# [4.374.0](https://github.com/SocialGouv/fiches-travail-data/compare/v4.373.0...v4.374.0) (2022-11-30)
|
|
2
22
|
|
|
3
23
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const fiches = require("../data/fiches-travail.json");
|
|
3
|
+
const UNDEFINED_KEY = "UNDEFINED";
|
|
4
|
+
const undefinedReferences = fiches.filter((fiche) => {
|
|
5
|
+
const refErrors = fiche.sections.filter((section) => {
|
|
6
|
+
if (!section.references) {
|
|
7
|
+
console.log("no refs in " + fiche.title);
|
|
8
|
+
}
|
|
9
|
+
return section.references && UNDEFINED_KEY in section.references;
|
|
10
|
+
});
|
|
11
|
+
return refErrors.length > 0;
|
|
12
|
+
});
|
|
13
|
+
const printMissingRef = (fiche) => {
|
|
14
|
+
console.log(`#### [${fiche.title}](${fiche.url})`);
|
|
15
|
+
fiche.sections.forEach((section) => {
|
|
16
|
+
if (section.references && UNDEFINED_KEY in section.references) {
|
|
17
|
+
console.log(`- ${section.anchor}`);
|
|
18
|
+
const fmt = section.references[UNDEFINED_KEY].articles.map((ref) => ref.text);
|
|
19
|
+
console.log(`> ${Array.from(new Set(fmt)).join(" / ")}`);
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
console.log(`### ${undefinedReferences.length}/${fiches.length} fiches aux références non résolues.`);
|
|
24
|
+
undefinedReferences.map((fiche) => printMissingRef(fiche));
|
package/build/email.d.ts
ADDED
package/build/email.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.encode = exports.decode = void 0;
|
|
4
|
+
const char = "_";
|
|
5
|
+
function encode(str) {
|
|
6
|
+
return str.replace(/@/g, `${char}@`);
|
|
7
|
+
}
|
|
8
|
+
exports.encode = encode;
|
|
9
|
+
function decode(str) {
|
|
10
|
+
return str.replace(new RegExp(`${char}@`, "g"), "@");
|
|
11
|
+
}
|
|
12
|
+
exports.decode = decode;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.generateHeaders = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Build the header for request with a specific token to bypass bot protection
|
|
6
|
+
*/
|
|
7
|
+
function generateHeaders(extras) {
|
|
8
|
+
if (!process.env.TOKEN_MT) {
|
|
9
|
+
throw Error("Token (cgtoken) is required to fetch the data. This token is provided by the travail-emploi.gouv.fr team.");
|
|
10
|
+
}
|
|
11
|
+
return {
|
|
12
|
+
...extras,
|
|
13
|
+
Cookie: `cgtoken=${process.env.TOKEN_MT};`,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
exports.generateHeaders = generateHeaders;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.scrap = exports.fetchFeed = void 0;
|
|
7
|
+
const fs_1 = __importDefault(require("fs"));
|
|
8
|
+
const got_1 = __importDefault(require("got"));
|
|
9
|
+
const p_limit_1 = __importDefault(require("p-limit"));
|
|
10
|
+
const path_1 = __importDefault(require("path"));
|
|
11
|
+
const generateHeaders_1 = require("./generateHeaders");
|
|
12
|
+
const scrapUrl_1 = require("./scrapUrl");
|
|
13
|
+
const FEED_URL = "https://travail-emploi.gouv.fr/?page=oseo_json";
|
|
14
|
+
const limit = (0, p_limit_1.default)(10);
|
|
15
|
+
async function fetchFeed(url) {
|
|
16
|
+
const response = await got_1.default.post(url, {
|
|
17
|
+
headers: (0, generateHeaders_1.generateHeaders)({
|
|
18
|
+
"Content-Type": "application/json",
|
|
19
|
+
}),
|
|
20
|
+
http2: true,
|
|
21
|
+
retry: 3,
|
|
22
|
+
});
|
|
23
|
+
const { fiches: feed } = JSON.parse(response.body);
|
|
24
|
+
return feed;
|
|
25
|
+
}
|
|
26
|
+
exports.fetchFeed = fetchFeed;
|
|
27
|
+
async function scrap(urls) {
|
|
28
|
+
const inputs = urls.map(({ id, url }) => limit(() => (0, scrapUrl_1.scrapUrl)(id, url)));
|
|
29
|
+
const results = await Promise.allSettled(inputs);
|
|
30
|
+
const failedPromise = results.filter(({ status }) => status === "rejected");
|
|
31
|
+
if (failedPromise.length > 0) {
|
|
32
|
+
console.error("scrap fail", failedPromise.map(({ reason }) => reason));
|
|
33
|
+
throw new Error("Error - fetching pages fail. Some pages are missing");
|
|
34
|
+
}
|
|
35
|
+
const resolvedPromise = results.flatMap(({ status, value }) => status === "fulfilled" ? [value] : []);
|
|
36
|
+
// ensure we not have duplicate url
|
|
37
|
+
let hasDuplicate = false;
|
|
38
|
+
for (const { pubId, url } of resolvedPromise) {
|
|
39
|
+
const count = resolvedPromise.filter((fiche) => fiche.pubId === pubId && pubId !== undefined).length;
|
|
40
|
+
if (count > 1) {
|
|
41
|
+
hasDuplicate = true;
|
|
42
|
+
console.error(`[error] la fiche ${url} est présente ${count} fois. Veuillez supprimer le doublon du datafiller`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (hasDuplicate) {
|
|
46
|
+
throw new Error(`[error] fiches en doublons. Veuillez supprimer les doublons du datafiller`);
|
|
47
|
+
}
|
|
48
|
+
return resolvedPromise;
|
|
49
|
+
}
|
|
50
|
+
exports.scrap = scrap;
|
|
51
|
+
if (module === require.main) {
|
|
52
|
+
const t0 = Date.now();
|
|
53
|
+
fetchFeed(FEED_URL)
|
|
54
|
+
.then(scrap)
|
|
55
|
+
.then((fiches) => {
|
|
56
|
+
console.log(`done in ${Math.round((Date.now() - t0) / 1000)} sec`);
|
|
57
|
+
const dataFilePath = path_1.default.join(__dirname, "..", "..", "data", "fiches-travail.json");
|
|
58
|
+
fs_1.default.mkdirSync(path_1.default.dirname(dataFilePath), { recursive: true });
|
|
59
|
+
fs_1.default.writeFileSync(dataFilePath, JSON.stringify(fiches, null, 2));
|
|
60
|
+
})
|
|
61
|
+
.catch((error) => {
|
|
62
|
+
console.error(error);
|
|
63
|
+
console.error(`fail in ${Math.round((Date.now() - t0) / 1000)} sec`);
|
|
64
|
+
process.exit(1);
|
|
65
|
+
});
|
|
66
|
+
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.parseDom = void 0;
|
|
7
|
+
const cdtn_slugify_1 = __importDefault(require("@socialgouv/cdtn-slugify"));
|
|
8
|
+
const got_1 = require("got");
|
|
9
|
+
const email_1 = require("../email");
|
|
10
|
+
const postProcess_1 = require("./postProcess");
|
|
11
|
+
const referenceExtractor_1 = require("./referenceExtractor");
|
|
12
|
+
const referenceResolver_1 = require("./referenceResolver");
|
|
13
|
+
const $$ = (node, selector) => Array.from(node.querySelectorAll(selector));
|
|
14
|
+
const $ = (node, selector) => node.querySelector(selector);
|
|
15
|
+
function unwrapEmail(data = "") {
|
|
16
|
+
const [k, ...tokens] = Array.from({ length: data.length / 2 }, (_, i) => i * 2).map((val) => parseInt(data.slice(val, val + 2), 16));
|
|
17
|
+
const rawValue = tokens.map((v) => String.fromCharCode(v ^ k)).join("");
|
|
18
|
+
return (0, email_1.encode)(decodeURIComponent(escape(rawValue)));
|
|
19
|
+
}
|
|
20
|
+
const formatEmail = (node) => {
|
|
21
|
+
const value = unwrapEmail(node.getAttribute("data-cfemail"));
|
|
22
|
+
node.removeAttribute("data-cfemail");
|
|
23
|
+
node.textContent = value;
|
|
24
|
+
};
|
|
25
|
+
const formatPicture = (node) => {
|
|
26
|
+
const comment = node.parentElement.childNodes[0];
|
|
27
|
+
if (comment.nodeName !== "#comment") {
|
|
28
|
+
//upper sibbling node is not a comment so it's not a case we handle
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
const [, src = ""] = comment.data.match(/src=["']([^'"]*)["']/);
|
|
32
|
+
if (src.length === 0) {
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
let [srcClean] = src.split("?");
|
|
36
|
+
if (!srcClean.match(/^https?:\/\//)) {
|
|
37
|
+
if (srcClean.slice(0, 1) !== "/") {
|
|
38
|
+
srcClean = "/" + srcClean;
|
|
39
|
+
}
|
|
40
|
+
srcClean = `https://travail-emploi.gouv.fr${srcClean}`;
|
|
41
|
+
}
|
|
42
|
+
// we remove the ie comment that have timestamp in the url
|
|
43
|
+
comment.remove();
|
|
44
|
+
// we add e
|
|
45
|
+
const sourceNode = node.ownerDocument.createElement("source");
|
|
46
|
+
sourceNode.setAttribute("srcset", srcClean);
|
|
47
|
+
sourceNode.setAttribute("media", "(min-width: 300px)");
|
|
48
|
+
node.appendChild(sourceNode);
|
|
49
|
+
return node;
|
|
50
|
+
};
|
|
51
|
+
const formatAnchor = (node) => {
|
|
52
|
+
if (node.innerHTML.trim() === "") {
|
|
53
|
+
node.remove();
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
if (node.getElementsByTagName("img").length) {
|
|
57
|
+
node.classList.add("no-after");
|
|
58
|
+
}
|
|
59
|
+
let href = node.getAttribute("href");
|
|
60
|
+
// remove ATTAg(...) on pdf link
|
|
61
|
+
node.removeAttribute("onclick");
|
|
62
|
+
if (!href)
|
|
63
|
+
return;
|
|
64
|
+
// unwrap link with href="javascript:"
|
|
65
|
+
if (/^javascript:/.test(href)) {
|
|
66
|
+
node.parentNode.innerHTML = node.textContent;
|
|
67
|
+
}
|
|
68
|
+
if (/email-protection/.test(href)) {
|
|
69
|
+
const [, data = ""] = href.split("#");
|
|
70
|
+
const value = unwrapEmail(data);
|
|
71
|
+
node.setAttribute("href", `mailto:${value}`);
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
if (!href.match(/^https?:\/\//)) {
|
|
75
|
+
if (href.slice(0, 1) !== "/") {
|
|
76
|
+
href = "/" + href;
|
|
77
|
+
}
|
|
78
|
+
node.setAttribute("href", `https://travail-emploi.gouv.fr${href}`);
|
|
79
|
+
node.setAttribute("target", "_blank");
|
|
80
|
+
node.setAttribute("rel", "nofollow, noopener");
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
const flattenCsBlocs = (node) => {
|
|
84
|
+
node.insertAdjacentHTML("afterend", node.innerHTML);
|
|
85
|
+
node.parentNode.removeChild(node);
|
|
86
|
+
};
|
|
87
|
+
const getSectionTag = (article) => {
|
|
88
|
+
const h3 = $$(article, ".main-article__texte > h3").length && "h3";
|
|
89
|
+
const h4 = $$(article, ".main-article__texte > h4").length && "h4";
|
|
90
|
+
const h5 = $$(article, ".main-article__texte > h5").length && "h5";
|
|
91
|
+
return h3 || h4 || h5 || "sectionTag";
|
|
92
|
+
};
|
|
93
|
+
const getReferences = (text) => {
|
|
94
|
+
// first we extract the tokens referencing articles
|
|
95
|
+
const references = (0, referenceExtractor_1.extractReferences)(text);
|
|
96
|
+
// then we try to resolve the actual articles ids using legi-data
|
|
97
|
+
return (0, referenceResolver_1.resolveReferences)(references);
|
|
98
|
+
};
|
|
99
|
+
function parseDom(dom, id, url) {
|
|
100
|
+
const article = $(dom.window.document, "main");
|
|
101
|
+
if (!article) {
|
|
102
|
+
throw new got_1.ParseError("no <main>");
|
|
103
|
+
}
|
|
104
|
+
$$(article, "a").forEach(formatAnchor);
|
|
105
|
+
$$(article, "picture").forEach(formatPicture);
|
|
106
|
+
$$(article, "[data-cfemail]").forEach(formatEmail);
|
|
107
|
+
$$(article, ".cs_blocs").forEach(flattenCsBlocs);
|
|
108
|
+
const imgs = $$(article, "img");
|
|
109
|
+
imgs.forEach((node) => {
|
|
110
|
+
// remove adaptImgFix(this) on hero img
|
|
111
|
+
node.removeAttribute("onmousedown");
|
|
112
|
+
});
|
|
113
|
+
imgs
|
|
114
|
+
.filter((node) => node.getAttribute("src").indexOf("data:image") === -1)
|
|
115
|
+
.forEach((node) => {
|
|
116
|
+
let src = node.getAttribute("src");
|
|
117
|
+
if (!src.match(/^https?:\/\//)) {
|
|
118
|
+
if (src.slice(0, 1) !== "/") {
|
|
119
|
+
src = "/" + src;
|
|
120
|
+
}
|
|
121
|
+
src = `https://travail-emploi.gouv.fr${src}`;
|
|
122
|
+
node.setAttribute("src", src);
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
let titleElement = $(article, "h1");
|
|
126
|
+
if (!titleElement) {
|
|
127
|
+
titleElement = $(article, "h2");
|
|
128
|
+
if (!titleElement) {
|
|
129
|
+
throw new got_1.ParseError("No <h1> or <h2> element");
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
const title = titleElement.textContent.trim();
|
|
133
|
+
if (!id) {
|
|
134
|
+
throw new got_1.ParseError(`No id`);
|
|
135
|
+
}
|
|
136
|
+
const dateRaw = $(dom.window.document, "meta[property*=modified_time]") ||
|
|
137
|
+
$(dom.window.document, "meta[property$=published_time]");
|
|
138
|
+
const [year, month, day] = dateRaw.getAttribute("content").split("-");
|
|
139
|
+
let intro = $(article, ".main-article__chapo") || "";
|
|
140
|
+
intro =
|
|
141
|
+
intro && intro.innerHTML.replace(/\n/g, "").replace(/\s+/g, " ").trim();
|
|
142
|
+
const description = $(dom.window.document, "meta[name=description]").getAttribute("content");
|
|
143
|
+
const sections = [];
|
|
144
|
+
const sectionTag = getSectionTag(article);
|
|
145
|
+
// First pass is only to get a potential untitled section at the top of the article
|
|
146
|
+
// This section has neither anchor nor title
|
|
147
|
+
let nextArticleElement = $(article, ".main-article__texte > *");
|
|
148
|
+
const untitledSection = {
|
|
149
|
+
anchor: "",
|
|
150
|
+
html: "",
|
|
151
|
+
text: "",
|
|
152
|
+
title: title,
|
|
153
|
+
};
|
|
154
|
+
while (nextArticleElement &&
|
|
155
|
+
nextArticleElement.tagName.toLowerCase() !== sectionTag) {
|
|
156
|
+
if (nextArticleElement.textContent) {
|
|
157
|
+
if (!untitledSection.description) {
|
|
158
|
+
untitledSection.description = "temp description";
|
|
159
|
+
}
|
|
160
|
+
untitledSection.html += nextArticleElement.outerHTML
|
|
161
|
+
.replace(/\n+/g, "")
|
|
162
|
+
.replace(/>\s+</g, "><")
|
|
163
|
+
.replace(/\s+/g, " ");
|
|
164
|
+
untitledSection.text +=
|
|
165
|
+
" " + nextArticleElement.textContent.replace(/\s+/g, " ").trim();
|
|
166
|
+
}
|
|
167
|
+
nextArticleElement = nextArticleElement.nextElementSibling;
|
|
168
|
+
}
|
|
169
|
+
if (untitledSection.description) {
|
|
170
|
+
untitledSection.text.trim();
|
|
171
|
+
untitledSection.description = untitledSection.text.slice(0, 200).trim();
|
|
172
|
+
untitledSection.references = getReferences(untitledSection.text);
|
|
173
|
+
sections.push(untitledSection);
|
|
174
|
+
}
|
|
175
|
+
// Gets all the titled content
|
|
176
|
+
const articleChildren = $$(article, `.main-article__texte > ${sectionTag}`);
|
|
177
|
+
articleChildren.forEach(function (el) {
|
|
178
|
+
if (el.tagName.toLowerCase() === sectionTag) {
|
|
179
|
+
let nextEl = el.nextElementSibling;
|
|
180
|
+
let html = "";
|
|
181
|
+
while (nextEl && nextEl.tagName.toLowerCase() !== sectionTag) {
|
|
182
|
+
html += nextEl.outerHTML;
|
|
183
|
+
nextEl = nextEl.nextElementSibling;
|
|
184
|
+
}
|
|
185
|
+
const section = dom.window.document.createElement("div");
|
|
186
|
+
section.innerHTML = html;
|
|
187
|
+
const sectionText = section.textContent.replace(/\s+/g, " ").trim();
|
|
188
|
+
sections.push({
|
|
189
|
+
anchor: el.getAttribute("id") || (0, cdtn_slugify_1.default)(el.textContent),
|
|
190
|
+
description: sectionText.slice(0, 200).trim(),
|
|
191
|
+
html: (0, postProcess_1.htmlPostParser)(html.replace(/\n+/g, "").replace(/>\s+</g, "><").replace(/\s+/g, " ")),
|
|
192
|
+
references: getReferences(sectionText),
|
|
193
|
+
text: sectionText,
|
|
194
|
+
title: el.textContent.trim(),
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
if (sections.length === 0) {
|
|
199
|
+
throw new got_1.ParseError(`No sections`);
|
|
200
|
+
}
|
|
201
|
+
return {
|
|
202
|
+
date: `${day}/${month}/${year}`,
|
|
203
|
+
description,
|
|
204
|
+
intro,
|
|
205
|
+
pubId: id,
|
|
206
|
+
sections,
|
|
207
|
+
title,
|
|
208
|
+
url,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
exports.parseDom = parseDom;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const htmlPostParser: (html: string) => string;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.htmlPostParser = void 0;
|
|
27
|
+
const cheerio = __importStar(require("cheerio"));
|
|
28
|
+
const htmlPostParser = (html) => {
|
|
29
|
+
const $ = cheerio.load(html, null, false);
|
|
30
|
+
const arrImgSrc = [];
|
|
31
|
+
let currentIndex = 0;
|
|
32
|
+
$("style").remove();
|
|
33
|
+
// https://travail-emploi.gouv.fr/le-ministere-en-action/coronavirus-covid-19/questions-reponses-par-theme/article/mesures-de-prevention-dans-l-entreprise-contre-la-covid-19
|
|
34
|
+
$("button").remove();
|
|
35
|
+
$(".oembed-source").remove();
|
|
36
|
+
// https://travail-emploi.gouv.fr/emploi-et-insertion/accompagnement-des-mutations-economiques/activite-partielle-chomage-partiel/article/activite-partielle-chomage-partiel
|
|
37
|
+
$("*")
|
|
38
|
+
.contents()
|
|
39
|
+
.each(function () {
|
|
40
|
+
if (this.nodeType === 8) {
|
|
41
|
+
const regex = /src=['"](.*?)['"]/;
|
|
42
|
+
const result = regex.exec(this.nodeValue);
|
|
43
|
+
if (result) {
|
|
44
|
+
const src = result[0].slice(5, -1);
|
|
45
|
+
if (src)
|
|
46
|
+
arrImgSrc.push(src);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
$("picture").replaceWith(() => {
|
|
51
|
+
const src = arrImgSrc[currentIndex];
|
|
52
|
+
if (src) {
|
|
53
|
+
currentIndex++;
|
|
54
|
+
return `<img src="https://travail-emploi.gouv.fr/${src}" style="width:100%;height:auto;"/>`;
|
|
55
|
+
}
|
|
56
|
+
return $(this).html()?.toString() ?? "";
|
|
57
|
+
});
|
|
58
|
+
return $.html();
|
|
59
|
+
};
|
|
60
|
+
exports.htmlPostParser = htmlPostParser;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export function classifyTokens(tokens: any): any;
|
|
2
|
+
export namespace CODE_SECU {
|
|
3
|
+
const id: string;
|
|
4
|
+
const name: string;
|
|
5
|
+
}
|
|
6
|
+
export namespace CODE_TRAVAIL {
|
|
7
|
+
const id_1: string;
|
|
8
|
+
export { id_1 as id };
|
|
9
|
+
const name_1: string;
|
|
10
|
+
export { name_1 as name };
|
|
11
|
+
}
|
|
12
|
+
export const codesFullNames: {
|
|
13
|
+
[x: string]: {
|
|
14
|
+
id: string;
|
|
15
|
+
name: string;
|
|
16
|
+
};
|
|
17
|
+
};
|
|
18
|
+
export function extractReferences(text: any): any;
|