@tricoteuses/tisseuse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE.md +22 -0
  2. package/README.md +5 -0
  3. package/dist/index.js +707 -0
  4. package/dist/lib/asserts.d.ts +1 -0
  5. package/dist/lib/index.d.ts +3 -0
  6. package/dist/lib/numbers.d.ts +6 -0
  7. package/dist/lib/server/auditors/config.d.ts +4 -0
  8. package/dist/lib/server/config.d.ts +18 -0
  9. package/dist/lib/server/databases/index.d.ts +10 -0
  10. package/dist/lib/server/text_links.d.ts +46 -0
  11. package/dist/lib/server/text_links.test.d.ts +1 -0
  12. package/dist/lib/server/text_parsers/transformers.d.ts +3 -0
  13. package/dist/lib/strings.d.ts +1 -0
  14. package/dist/lib/text_parsers/actions.d.ts +1 -0
  15. package/dist/lib/text_parsers/actions.test.d.ts +1 -0
  16. package/dist/lib/text_parsers/articles.d.ts +53 -0
  17. package/dist/lib/text_parsers/articles.test.d.ts +1 -0
  18. package/dist/lib/text_parsers/ast.d.ts +136 -0
  19. package/dist/lib/text_parsers/citations.d.ts +7 -0
  20. package/dist/lib/text_parsers/citations.test.d.ts +1 -0
  21. package/dist/lib/text_parsers/dates.d.ts +6 -0
  22. package/dist/lib/text_parsers/dates.test.d.ts +1 -0
  23. package/dist/lib/text_parsers/divisions.d.ts +29 -0
  24. package/dist/lib/text_parsers/divisions.test.d.ts +1 -0
  25. package/dist/lib/text_parsers/helpers.d.ts +10 -0
  26. package/dist/lib/text_parsers/index.d.ts +4 -0
  27. package/dist/lib/text_parsers/index.test.d.ts +1 -0
  28. package/dist/lib/text_parsers/numbers.d.ts +17 -0
  29. package/dist/lib/text_parsers/numbers.test.d.ts +1 -0
  30. package/dist/lib/text_parsers/parsers.d.ts +51 -0
  31. package/dist/lib/text_parsers/parsers.test.d.ts +1 -0
  32. package/dist/lib/text_parsers/portions.d.ts +53 -0
  33. package/dist/lib/text_parsers/portions.test.d.ts +1 -0
  34. package/dist/lib/text_parsers/positions.d.ts +4 -0
  35. package/dist/lib/text_parsers/prepositions.d.ts +6 -0
  36. package/dist/lib/text_parsers/prepositions.test.d.ts +1 -0
  37. package/dist/lib/text_parsers/references.d.ts +13 -0
  38. package/dist/lib/text_parsers/references.test.d.ts +1 -0
  39. package/dist/lib/text_parsers/relative_locations.d.ts +9 -0
  40. package/dist/lib/text_parsers/relative_locations.test.d.ts +1 -0
  41. package/dist/lib/text_parsers/separators.d.ts +7 -0
  42. package/dist/lib/text_parsers/simplifiers.d.ts +13 -0
  43. package/dist/lib/text_parsers/simplifiers.test.d.ts +1 -0
  44. package/dist/lib/text_parsers/texts.d.ts +42 -0
  45. package/dist/lib/text_parsers/texts.test.d.ts +1 -0
  46. package/dist/lib/text_parsers/transformers.d.ts +53 -0
  47. package/dist/lib/text_parsers/typography.d.ts +9 -0
  48. package/dist/lib/text_parsers/typography.test.d.ts +1 -0
  49. package/dist/scripts/add_links_to_html_document.d.ts +1 -0
  50. package/dist/scripts/add_references_to_html_document.d.ts +6 -0
  51. package/dist/scripts/extract_texts_infos.d.ts +1 -0
  52. package/dist/scripts/extract_texts_titles_infos.d.ts +1 -0
  53. package/dist/scripts/html_document_to_text.d.ts +1 -0
  54. package/package.json +69 -0
package/dist/index.js ADDED
@@ -0,0 +1,707 @@
1
+ function _(u, n) {
2
+ throw `Unexpected type ${u}: ${n}`;
3
+ }
4
+ const N = /^<\/?(!DOCTYPE|\?XML|[A-Z][A-Z0-9]*)/i;
5
+ function H(u, n) {
6
+ return (t) => {
7
+ const s = [];
8
+ let e = t;
9
+ for (const i of n) {
10
+ const r = i(e);
11
+ (r.transformations !== void 0 || r.sourceMap.length !== 0) && (s.push(r), e = r.output);
12
+ }
13
+ return { input: t, output: e, title: u, transformations: s };
14
+ };
15
+ }
16
+ function* $(u) {
17
+ if (u.transformations === void 0)
18
+ yield u;
19
+ else
20
+ for (const n of u.transformations)
21
+ yield* $(n);
22
+ }
23
+ function* U(u) {
24
+ const n = [
25
+ ...$(u)
26
+ ].reverse();
27
+ let t = {
28
+ position: { start: 0, stop: 0 }
29
+ };
30
+ const s = n.map(
31
+ (i) => {
32
+ const r = y(
33
+ i
34
+ );
35
+ return r.next(t), r;
36
+ }
37
+ );
38
+ let e = yield t;
39
+ for (; e !== void 0; ) {
40
+ t = {
41
+ position: e
42
+ };
43
+ for (const i of s) {
44
+ const r = i.next(t);
45
+ if (r.done)
46
+ return;
47
+ t = r.value;
48
+ }
49
+ e = yield t;
50
+ }
51
+ for (const i of s)
52
+ i.next(void 0);
53
+ }
54
+ function* y(u) {
55
+ let n, t = "looking_for_start_segment", s = yield {
56
+ position: { start: 0, stop: 0 }
57
+ };
58
+ const e = [
59
+ { inputIndex: 0, inputLength: 0, outputIndex: 0, outputLength: 0 },
60
+ ...u.sourceMap,
61
+ {
62
+ inputIndex: Number.MAX_SAFE_INTEGER,
63
+ inputLength: 0,
64
+ outputIndex: Number.MAX_SAFE_INTEGER,
65
+ outputLength: 0
66
+ }
67
+ ];
68
+ for (const [i, r] of e.entries())
69
+ for (let c = !1; !c; ) {
70
+ if (s === void 0)
71
+ return;
72
+ let { position: o } = s, p, l, d, g;
73
+ switch (t) {
74
+ case "looking_for_start_segment": {
75
+ const x = e[i + 1];
76
+ x !== void 0 && x.outputIndex + x.outputLength <= o.start ? c = !0 : (n = i, t = "looking_for_stop_segment");
77
+ break;
78
+ }
79
+ case "looking_for_stop_segment": {
80
+ if (r.outputIndex < o.stop)
81
+ c = !0;
82
+ else {
83
+ const x = e[n], T = i, f = r;
84
+ let m = o.start < x.outputIndex + x.outputLength ? n : n + 1, a = o.stop > f.outputIndex ? T : T - 1;
85
+ for (let M = !0; M; ) {
86
+ M = !1, p = void 0, l = void 0, d = void 0, g = void 0;
87
+ for (let P = m; P <= a; P++) {
88
+ const E = e[P].matchingSegmentIndex;
89
+ if (E !== void 0) {
90
+ if (E + 1 < m) {
91
+ let R = !1;
92
+ const L = e[E + 1];
93
+ if (L.openingTag !== void 0 && L.outputIndex + L.outputLength < o.start) {
94
+ const A = L.openingTag.match(N);
95
+ if (A !== null) {
96
+ const v = A[1], O = v.toUpperCase();
97
+ [
98
+ "B",
99
+ "EM",
100
+ "I",
101
+ "SPAN",
102
+ "STRONG",
103
+ "SUB",
104
+ "SUP"
105
+ ].includes(O) && (d = `${d ?? ""}</${v}>`, p = `${L.openingTag}${p ?? ""}`, R = !0);
106
+ }
107
+ }
108
+ if (!R) {
109
+ p = void 0, d = void 0, m = E + 1, o = {
110
+ start: e[m].outputIndex,
111
+ stop: o.stop
112
+ }, M = !0;
113
+ break;
114
+ }
115
+ }
116
+ if (E + 1 > a) {
117
+ let R = !1;
118
+ const L = e[P], A = e[E + 1];
119
+ if (L.openingTag !== void 0 && o.stop < A.outputIndex) {
120
+ const v = L.openingTag.match(N);
121
+ if (v !== null) {
122
+ const O = v[1], F = O.toUpperCase();
123
+ [
124
+ "B",
125
+ "EM",
126
+ "I",
127
+ "SPAN",
128
+ "STRONG",
129
+ "SUB",
130
+ "SUP"
131
+ ].includes(F) && (l = `${l ?? ""}</${O}>`, g = `${L.openingTag}${g ?? ""}`, R = !0);
132
+ }
133
+ }
134
+ if (!R) {
135
+ l = void 0, g = void 0, a = E + 1;
136
+ const v = e[a];
137
+ o = {
138
+ start: o.start,
139
+ stop: v.outputIndex + v.outputLength
140
+ }, M = !0;
141
+ break;
142
+ }
143
+ }
144
+ }
145
+ }
146
+ }
147
+ const h = e[m - 1], S = h.inputIndex + h.inputLength + o.start - (h.outputIndex + h.outputLength), I = e[a], C = I.inputIndex + I.inputLength + o.stop - (I.outputIndex + I.outputLength);
148
+ s = yield Object.fromEntries(
149
+ Object.entries({
150
+ innerPrefix: `${p ?? ""}${s.innerPrefix ?? ""}` || void 0,
151
+ innerSuffix: `${s.innerSuffix ?? ""}${l ?? ""}` || void 0,
152
+ outerPrefix: `${s.outerPrefix ?? ""}${d ?? ""}` || void 0,
153
+ outerSuffix: `${g ?? ""}${s.outerSuffix ?? ""}` || void 0,
154
+ position: {
155
+ start: S,
156
+ stop: C
157
+ }
158
+ }).filter(([, M]) => M !== void 0)
159
+ ), t = "looking_for_start_segment";
160
+ }
161
+ break;
162
+ }
163
+ default:
164
+ _(
165
+ "iterOriginalMergedPositionsFromTransformedUsingTransformationLeaf.state",
166
+ t
167
+ );
168
+ }
169
+ }
170
+ }
171
+ function q(u, n) {
172
+ const t = U(u);
173
+ return t.next({ start: 0, stop: 0 }), n.map((s) => {
174
+ const e = t.next(s);
175
+ if (e.done)
176
+ throw new Error(
177
+ `Reverse transformation of position failed: ${s}`
178
+ );
179
+ return e.value;
180
+ });
181
+ }
182
+ function W(u, n) {
183
+ for (const { sourceMap: t } of [
184
+ ...$(u)
185
+ ].reverse())
186
+ n = b(
187
+ t,
188
+ n
189
+ );
190
+ return n;
191
+ }
192
+ function b(u, n) {
193
+ const t = [];
194
+ u = [
195
+ { inputIndex: 0, inputLength: 0, outputIndex: 0, outputLength: 0 },
196
+ ...u,
197
+ {
198
+ inputIndex: Number.MAX_SAFE_INTEGER,
199
+ inputLength: 0,
200
+ outputIndex: Number.MAX_SAFE_INTEGER,
201
+ outputLength: 0
202
+ }
203
+ ];
204
+ let s = 0, e = u[s];
205
+ for (const i of n) {
206
+ let { start: r } = i;
207
+ const { stop: c } = i;
208
+ t: for (let o = !1; !o; ) {
209
+ for (; e.outputIndex + e.outputLength <= r; s++, e = u[s]) ;
210
+ let p = s;
211
+ const l = u[p - 1];
212
+ let d = l.inputIndex + l.inputLength + r - (l.outputIndex + l.outputLength), g;
213
+ for (g = p - 1; u[g + 1].outputIndex < c; g++) ;
214
+ const x = u[g];
215
+ let T = x.inputIndex + x.inputLength + c - (x.outputIndex + x.outputLength);
216
+ for (let f = p; f <= g; f++) {
217
+ const m = u[f], a = m.matchingSegmentIndex;
218
+ if (a !== void 0) {
219
+ if (a + 1 < p) {
220
+ const h = u[a + 1];
221
+ if (h.outputIndex < r) {
222
+ m.inputIndex > d && t.push({
223
+ start: d,
224
+ stop: m.inputIndex
225
+ }), r = m.outputIndex + m.outputLength;
226
+ for (let S = f, I = m; I.outputIndex + I.outputLength === r; S++, I = u[S])
227
+ s = S;
228
+ continue t;
229
+ }
230
+ p = a + 1, d = h.inputIndex;
231
+ } else if (a + 1 > g) {
232
+ const h = u[a + 1];
233
+ if (h.outputIndex + h.outputLength > c) {
234
+ m.inputIndex > d && t.push({
235
+ start: d,
236
+ stop: m.inputIndex
237
+ }), r = m.outputIndex + m.outputLength;
238
+ for (let S = f, I = m; I.outputIndex + I.outputLength === r; S++, I = u[S])
239
+ s = S;
240
+ continue t;
241
+ }
242
+ g = a + 1, T = h.inputIndex + h.inputLength;
243
+ }
244
+ }
245
+ }
246
+ t.push({
247
+ start: d,
248
+ stop: T
249
+ }), o = !0;
250
+ }
251
+ }
252
+ return t;
253
+ }
254
+ const B = {
255
+ amp: "&",
256
+ apos: "'",
257
+ asymp: "≈",
258
+ copy: "©",
259
+ deg: "°",
260
+ euro: "€",
261
+ gt: ">",
262
+ lt: "<",
263
+ mdash: "—",
264
+ nbsp: " ",
265
+ ndash: "–",
266
+ ne: "≠",
267
+ pound: "£",
268
+ quot: '"',
269
+ reg: "®",
270
+ trade: "™"
271
+ };
272
+ function k({
273
+ removeAWithHref: u
274
+ } = {}) {
275
+ return (n) => {
276
+ let t = 0, s = 0, e = [], i = [];
277
+ const r = [], c = "Conversion des éléments HTML en texte";
278
+ for (; t < n.length; ) {
279
+ const o = n.indexOf("<", t);
280
+ if (o === -1)
281
+ return e.push(n.slice(t).replace(/[\n\r]/g, " ")), {
282
+ input: n,
283
+ output: e.join(""),
284
+ sourceMap: i,
285
+ title: c
286
+ };
287
+ const p = n.indexOf(">", o);
288
+ if (p === -1)
289
+ return e.push(n.slice(t).replace(/[\n\r]/g, " ")), {
290
+ input: n,
291
+ output: e.join(""),
292
+ sourceMap: i,
293
+ title: c
294
+ };
295
+ const l = n.slice(o, p + 1), d = l.startsWith("</"), g = l.length, x = l.match(N);
296
+ if (x === null) {
297
+ e.push(
298
+ n.slice(t, p + 1).replace(/[\n\r]/g, " ")
299
+ ), t = p + 1;
300
+ continue;
301
+ }
302
+ const f = x[1].toUpperCase();
303
+ if (l.endsWith("/>") || [
304
+ "!DOCTYPE",
305
+ "?XML",
306
+ "AREA",
307
+ "BASE",
308
+ "BR",
309
+ "COL",
310
+ "EMBED",
311
+ "HR",
312
+ "IMG",
313
+ "INPUT",
314
+ "LINK",
315
+ "META",
316
+ "PARAM",
317
+ "SOURCE",
318
+ "TRACK",
319
+ "WBR"
320
+ ].includes(f))
321
+ if (d)
322
+ o > t && e.push(
323
+ n.slice(t, o).replace(/[\n\r]/g, " ")
324
+ ), i.push({
325
+ inputIndex: o,
326
+ inputLength: g,
327
+ outputIndex: o + s,
328
+ outputLength: 0
329
+ }), s -= g;
330
+ else if (["BR", "HR"].includes(f)) {
331
+ o > t && e.push(
332
+ n.slice(t, o).replace(/[\n\r]/g, " ")
333
+ );
334
+ const a = p + 1 - o;
335
+ e.push(`
336
+ `), i.push({
337
+ inputIndex: o,
338
+ inputLength: a,
339
+ openingTag: l,
340
+ outputIndex: o + s,
341
+ outputLength: 1
342
+ }), s += 1 - a;
343
+ } else ["!DOCTYPE", "?XML", "COL", "IMG", "INPUT"].includes(f) ? (o > t && e.push(
344
+ n.slice(t, o).replace(/[\n\r]/g, " ")
345
+ ), i.push({
346
+ inputIndex: o,
347
+ inputLength: g,
348
+ openingTag: l,
349
+ outputIndex: o + s,
350
+ outputLength: 0
351
+ }), s -= g) : e.push(
352
+ n.slice(t, p + 1).replace(/[\n\r]/g, " ")
353
+ );
354
+ else if (d) {
355
+ const a = r.at(-1);
356
+ if (f === a?.name)
357
+ switch (r.pop(), a.action) {
358
+ case void 0: {
359
+ e.push(
360
+ n.slice(t, p + 1).replace(/[\n\r]/g, " ")
361
+ );
362
+ break;
363
+ }
364
+ case "ignore": {
365
+ e = a.outputFragments, s = a.outputOffset, i = a.sourceMap;
366
+ const h = p + 1 - a.inputIndex;
367
+ i.push({
368
+ inputIndex: a.inputIndex,
369
+ inputLength: h,
370
+ outputIndex: a.inputIndex + s,
371
+ outputLength: 0
372
+ }), s = s - h;
373
+ break;
374
+ }
375
+ case "keep_content": {
376
+ e.push(
377
+ n.slice(t, o).replace(/[\n\r]/g, " ")
378
+ ), a.closingTagReplacement.length !== 0 && e.push(a.closingTagReplacement);
379
+ const h = p + 1 - o;
380
+ i[a.openingSegmentIndex].matchingSegmentIndex = i.length, i.push({
381
+ inputIndex: o,
382
+ inputLength: h,
383
+ matchingSegmentIndex: a.openingSegmentIndex,
384
+ outputIndex: o + s,
385
+ outputLength: a.closingTagReplacement.length
386
+ }), s += a.closingTagReplacement.length - h;
387
+ break;
388
+ }
389
+ default:
390
+ _("TagInfos.action", a);
391
+ }
392
+ else
393
+ o > t && e.push(
394
+ n.slice(t, o).replace(/[\n\r]/g, " ")
395
+ ), i.push({
396
+ inputIndex: o,
397
+ inputLength: g,
398
+ outputIndex: o + s,
399
+ outputLength: 0
400
+ }), s -= g;
401
+ } else if (["COLGROUP", "HEAD", "SCRIPT", "STYLE"].includes(f) || u && f === "A" && / href=/i.test(l))
402
+ o > t && e.push(
403
+ n.slice(t, o).replace(/[\n\r]/g, " ")
404
+ ), r.push({
405
+ action: "ignore",
406
+ inputIndex: o,
407
+ name: f,
408
+ // Backup outputFragments, outputOffset & sourceMap, because
409
+ // every changes made inside ignored element will be ignored.
410
+ outputFragments: e,
411
+ outputOffset: s,
412
+ sourceMap: i
413
+ }), e = [], i = [];
414
+ else if ([
415
+ "A",
416
+ // When removeAWithHref is false or no href
417
+ "B",
418
+ "BODY",
419
+ "DL",
420
+ "EM",
421
+ "HTML",
422
+ "I",
423
+ "OL",
424
+ "SPAN",
425
+ "STRONG",
426
+ "SUB",
427
+ "SUP",
428
+ "TABLE",
429
+ "TBODY",
430
+ "THEAD",
431
+ "TR",
432
+ "UL"
433
+ ].includes(f)) {
434
+ o > t && e.push(
435
+ n.slice(t, o).replace(/[\n\r]/g, " ")
436
+ );
437
+ const a = i.length;
438
+ i.push({
439
+ inputIndex: o,
440
+ inputLength: g,
441
+ openingTag: l,
442
+ outputIndex: o + s,
443
+ outputLength: 0
444
+ }), s -= g, r.push({
445
+ action: "keep_content",
446
+ closingTagReplacement: "",
447
+ name: f,
448
+ openingSegmentIndex: a
449
+ });
450
+ } else if ([
451
+ "CAPTION",
452
+ "DD",
453
+ "DT",
454
+ "H1",
455
+ "H2",
456
+ "H3",
457
+ "H4",
458
+ "H5",
459
+ "H6",
460
+ "DIV",
461
+ "FORM",
462
+ "LI",
463
+ "P",
464
+ "TD",
465
+ "TH"
466
+ ].includes(f)) {
467
+ o > t && e.push(
468
+ n.slice(t, o).replace(/[\n\r]/g, " ")
469
+ ), e.push(`
470
+ `);
471
+ const a = i.length;
472
+ i.push({
473
+ inputIndex: o,
474
+ inputLength: g,
475
+ openingTag: l,
476
+ outputIndex: o + s,
477
+ outputLength: 1
478
+ }), s += 1 - g, r.push({
479
+ action: "keep_content",
480
+ closingTagReplacement: `
481
+ `,
482
+ name: f,
483
+ openingSegmentIndex: a
484
+ });
485
+ } else
486
+ e.push(
487
+ n.slice(t, p + 1).replace(/[\n\r]/g, " ")
488
+ ), r.push({
489
+ name: f
490
+ });
491
+ t = p + 1;
492
+ }
493
+ return {
494
+ input: n,
495
+ output: e.join(""),
496
+ sourceMap: i,
497
+ title: c
498
+ };
499
+ };
500
+ }
501
+ function D(u) {
502
+ let n = 0;
503
+ const t = [], s = u.replace(
504
+ /&(amp|apos|asymp|copy|deg|euro|gt|lt|mdash|nbsp|ndash|ne|pound|quot|reg|trade);/gi,
505
+ (e, i, r) => {
506
+ const c = B[i.toLowerCase()];
507
+ return t.push({
508
+ inputIndex: r,
509
+ inputLength: e.length,
510
+ outputIndex: r + n,
511
+ outputLength: c.length
512
+ }), n += c.length - e.length, c;
513
+ }
514
+ );
515
+ return {
516
+ input: u,
517
+ output: s,
518
+ sourceMap: t,
519
+ title: "Décodage des entités HTML nommées"
520
+ };
521
+ }
522
+ function G(u) {
523
+ let n = 0;
524
+ const t = [], s = u.replace(
525
+ /&#(?:(\d+)|x([0-9A-F]+));/gi,
526
+ (e, i, r, c) => {
527
+ const o = parseInt(
528
+ i ?? r,
529
+ i === void 0 ? 16 : 10
530
+ ), p = String.fromCharCode(o);
531
+ return t.push({
532
+ inputIndex: c,
533
+ inputLength: e.length,
534
+ outputIndex: c + n,
535
+ outputLength: p.length
536
+ }), n += p.length - e.length, p;
537
+ }
538
+ );
539
+ return {
540
+ input: u,
541
+ output: s,
542
+ sourceMap: t,
543
+ title: "Décodage des entités HTML numériques"
544
+ };
545
+ }
546
+ function w(u, n) {
547
+ return (t) => {
548
+ const s = [], e = t.replaceAll(u, (r, ...c) => {
549
+ const o = c.at(-2);
550
+ let p = n;
551
+ for (const [l, d] of c.slice(0, -2).entries())
552
+ p = p.replaceAll(`$${l + 1}`, d);
553
+ return s.push({
554
+ inputIndex: o,
555
+ inputLength: r.length,
556
+ // Note: `outputIndex` is added below.
557
+ outputLength: p.length
558
+ }), p;
559
+ });
560
+ let i = 0;
561
+ for (const r of s)
562
+ r.outputIndex = r.inputIndex + i, i += r.outputLength - r.inputLength;
563
+ return {
564
+ input: t,
565
+ output: e,
566
+ sourceMap: s,
567
+ title: `Remplacement de ${u} par ${JSON.stringify(n)}`
568
+ };
569
+ };
570
+ }
571
+ function X(u) {
572
+ const n = [];
573
+ let t = u;
574
+ for (const [s, e] of [
575
+ // Note: The most englobing patterns must be first.
576
+ // Remove HTML comment.
577
+ [/<!--.*?-->/gs, ""],
578
+ // Remove <script> element.
579
+ [/<script.*?>.*?<\/script>/gis, ""],
580
+ // Remove <script> element.
581
+ [/<style.*?>.*?<\/style>/gis, ""],
582
+ // Ensure that there is always a space after "n°".
583
+ [/(\sn°)([^\s])/gi, "$1 $2"],
584
+ // Remove Sénat "pastillage":
585
+ // - \uF04B-\uF054 are circled numbers 0-9.
586
+ // - \uF031-\uF039 are left-half circled numbers 1-9.
587
+ // - \uF041-\uF04A are numbers 0-9 with a circle fragment on their top & bottom only.
588
+ // - \uF061-\uF06A are right-half circled numbers 0-9.
589
+ [/[\uF031-\uF039\uF041-\uF054\uF061-\uF06A]/g, ""]
590
+ ]) {
591
+ const i = w(s, e)(t);
592
+ i.sourceMap.length !== 0 && (n.push(i), t = i.output);
593
+ }
594
+ return {
595
+ input: u,
596
+ output: t,
597
+ title: "Suppression des commentaires, scripts et styles HTML et nettoyage d'expressions",
598
+ transformations: n
599
+ };
600
+ }
601
+ function K({
602
+ removeAWithHref: u
603
+ } = {}) {
604
+ return (n) => H("Simplification du HTML", [
605
+ D,
606
+ G,
607
+ X,
608
+ j,
609
+ k({ removeAWithHref: u }),
610
+ Y
611
+ ])(n);
612
+ }
613
+ function Y(u) {
614
+ const n = [];
615
+ let t = u;
616
+ for (const [s, e, i] of [
617
+ ["Remplacement des espaces multiples par une espace unique", / +/g, " "],
618
+ ["Suppression d'une espace en début de ligne", /^ /gm, ""],
619
+ ["Suppression d'une espace en fin de ligne", / $/gm, ""],
620
+ [
621
+ "Remplacement des sauts de lignes multiples par un saut de ligne unique",
622
+ /\n\n+/g,
623
+ `
624
+ `
625
+ ],
626
+ ["Suppression d'un saut de ligne en début de texte", /^\n/g, ""],
627
+ ["Suppression d'un saut de ligne en fin de texte", /\n$/g, ""]
628
+ ]) {
629
+ let r = 0;
630
+ const c = [], o = t.replaceAll(e, (p, ...l) => {
631
+ const d = l.at(-2);
632
+ let g = i;
633
+ for (const [x, T] of l.slice(0, -2).entries())
634
+ g = g.replaceAll(`$${x + 1}`, T);
635
+ return c.push({
636
+ inputIndex: d,
637
+ inputLength: p.length,
638
+ outputIndex: d + r,
639
+ outputLength: g.length
640
+ }), r += g.length - p.length, g;
641
+ });
642
+ c.length !== 0 && (n.push({
643
+ input: t,
644
+ output: o,
645
+ sourceMap: c,
646
+ title: s
647
+ }), t = o);
648
+ }
649
+ return {
650
+ input: u,
651
+ output: t,
652
+ title: "Simplification du texte",
653
+ transformations: n
654
+ };
655
+ }
656
+ function j(u) {
657
+ const n = [];
658
+ let t = u;
659
+ for (const [e, i] of [
660
+ // Replace U+00A0 (no-break space) and tab with a normal space.
661
+ [/[ \t]/g, " "],
662
+ // Replace three non-ASCII dashes (U+2010, U+2011 et U+2013) with a minus sign.
663
+ [/[‐‑–]/g, "-"],
664
+ // Replace non-ASCII apostrophe.
665
+ [/’/g, "'"],
666
+ // Replace İ (I with a point) with normal I.
667
+ // The İ can be used, probably to differentiate the letter I from the Roman numeral I.
668
+ // For example: Article 199 decies İ of the General Tax Code.
669
+ // But Légifrance uses a classic I…
670
+ ["İ", "I"]
671
+ ])
672
+ t = t.replaceAll(e, (r, ...c) => {
673
+ const o = c.at(-2), p = {
674
+ inputIndex: o,
675
+ inputLength: 1,
676
+ outputIndex: o,
677
+ // Note: `outputIndex` is added below.
678
+ outputLength: 1
679
+ }, l = n.findIndex(
680
+ (d) => d.inputIndex > o
681
+ );
682
+ return l === -1 ? n.push(p) : n.splice(l, 0, p), i;
683
+ });
684
+ let s = 0;
685
+ for (const e of n)
686
+ e.outputIndex = e.inputIndex + s, s += e.outputLength - e.inputLength;
687
+ return {
688
+ input: u,
689
+ output: t,
690
+ sourceMap: n,
691
+ title: "Simplification des caractères unicodes"
692
+ };
693
+ }
694
+ export {
695
+ H as chainTransformers,
696
+ k as convertHtmlElementsToText,
697
+ D as decodeNamedHtmlEntities,
698
+ G as decodeNumericHtmlEntities,
699
+ U as iterOriginalMergedPositionsFromTransformed,
700
+ q as originalMergedPositionsFromTransformed,
701
+ W as originalSplitPositionsFromTransformed,
702
+ w as replacePattern,
703
+ X as replacePatterns,
704
+ K as simplifyHtml,
705
+ Y as simplifyText,
706
+ j as simplifyUnicodeCharacters
707
+ };
@@ -0,0 +1 @@
1
+ export declare function assertNever(type: string, value: never): never;
@@ -0,0 +1,3 @@
1
+ export type { TextPosition } from './text_parsers/positions.js';
2
+ export { convertHtmlElementsToText, decodeNamedHtmlEntities, decodeNumericHtmlEntities, replacePattern, replacePatterns, simplifyHtml, simplifyText, simplifyUnicodeCharacters, } from './text_parsers/simplifiers.js';
3
+ export { chainTransformers, iterOriginalMergedPositionsFromTransformed, originalMergedPositionsFromTransformed, originalSplitPositionsFromTransformed, type FragmentReverseTransformation, type SourceMapSegment, type Transformation, type TransformationLeaf, type TransformationNode, type Transformer, type TransformerLeaf, type TransformerNode, } from './text_parsers/transformers.js';