@mdgf11/filesystem-lib 2.2.17 → 2.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -5,6 +5,13 @@ export declare function loadFilesystemDocument(jsonPath: string): FilesystemDocu
|
|
|
5
5
|
export type DocumentFileType = "nlp" | "details" | "pdf" | "docx" | "txt";
|
|
6
6
|
export declare function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "nlp" | "details"): string;
|
|
7
7
|
export declare function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "pdf" | "docx" | "txt"): Buffer;
|
|
8
|
+
export declare const ANONIMIZADO_NAME = "Anonimizado";
|
|
9
|
+
/**
|
|
10
|
+
* Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
|
|
11
|
+
* saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
|
|
12
|
+
* Requires pandoc (and xelatex for PDF) to be available on PATH.
|
|
13
|
+
*/
|
|
14
|
+
export declare function saveAnonimizedDocument(doc: PartialJurisprudenciaDocument, textoHtml: string, sumarioHtml?: string): Promise<void>;
|
|
8
15
|
/** @deprecated use loadDocumentFile(doc, "nlp") instead */
|
|
9
16
|
export declare function loadNlpDocument(jurisprudencia_document: JurisprudenciaDocument): string;
|
|
10
17
|
export declare function hasSelectableText(buffer: Buffer): Promise<boolean>;
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import { execFileSync } from "child_process";
|
|
1
2
|
import fs from "fs";
|
|
2
3
|
import mammoth from "mammoth";
|
|
4
|
+
import path from "path";
|
|
3
5
|
import { DETAILS_NAME, FILESYSTEM_PATH, ORIGINAL_NAME, ROOT_PATH, SHAREPOINT_COPY_PATH } from "./types.js";
|
|
4
6
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
5
7
|
export function writeFilesystemDocument(filesystem_document) {
|
|
@@ -64,6 +66,44 @@ export function loadDocumentFile(doc, type) {
|
|
|
64
66
|
}
|
|
65
67
|
return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.${type}`);
|
|
66
68
|
}
|
|
69
|
+
export const ANONIMIZADO_NAME = "Anonimizado";
|
|
70
|
+
/**
|
|
71
|
+
* Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
|
|
72
|
+
* saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
|
|
73
|
+
* Requires pandoc (and xelatex for PDF) to be available on PATH.
|
|
74
|
+
*/
|
|
75
|
+
export async function saveAnonimizedDocument(doc, textoHtml, sumarioHtml) {
|
|
76
|
+
const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
|
|
77
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
78
|
+
const bodyHtml = sumarioHtml
|
|
79
|
+
? `<h2>Sumário</h2>${sumarioHtml}<h2>Texto</h2>${textoHtml}`
|
|
80
|
+
: textoHtml;
|
|
81
|
+
const fullHtml = `<!DOCTYPE html><html><head><meta charset="UTF-8"></head><body>${bodyHtml}</body></html>`;
|
|
82
|
+
const tmpHtml = path.join(dirPath, `_anonimizado_tmp.html`);
|
|
83
|
+
fs.writeFileSync(tmpHtml, fullHtml, { encoding: "utf-8" });
|
|
84
|
+
try {
|
|
85
|
+
const docxPath = path.join(dirPath, `${ANONIMIZADO_NAME}.docx`);
|
|
86
|
+
const pdfPath = path.join(dirPath, `${ANONIMIZADO_NAME}.pdf`);
|
|
87
|
+
try {
|
|
88
|
+
execFileSync("pandoc", [tmpHtml, "-o", docxPath]);
|
|
89
|
+
}
|
|
90
|
+
catch (err) {
|
|
91
|
+
console.error("saveAnonimizedDocument: failed to generate DOCX:", err);
|
|
92
|
+
}
|
|
93
|
+
try {
|
|
94
|
+
execFileSync("pandoc", [tmpHtml, "--pdf-engine=xelatex", "-o", pdfPath]);
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
console.error("saveAnonimizedDocument: failed to generate PDF:", err);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
finally {
|
|
101
|
+
try {
|
|
102
|
+
fs.unlinkSync(tmpHtml);
|
|
103
|
+
}
|
|
104
|
+
catch { /* ignore */ }
|
|
105
|
+
}
|
|
106
|
+
}
|
|
67
107
|
/** @deprecated use loadDocumentFile(doc, "nlp") instead */
|
|
68
108
|
export function loadNlpDocument(jurisprudencia_document) {
|
|
69
109
|
return loadDocumentFile(jurisprudencia_document, "nlp");
|
package/package.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { JurisprudenciaDocument, PartialJurisprudenciaDocument } from "@stjiris/jurisprudencia-document";
|
|
2
|
+
import { execFileSync } from "child_process";
|
|
2
3
|
import fs from "fs";
|
|
3
4
|
import mammoth from "mammoth";
|
|
5
|
+
import path from "path";
|
|
4
6
|
import { ContentType, Date_Area_Section, DETAILS_NAME, FILESYSTEM_PATH, FilesystemDocument, ORIGINAL_NAME, Retrievable_Metadata, ROOT_PATH, SHAREPOINT_COPY_PATH, Sharepoint_Metadata, SupportedUpdateSources } from "./types.js";
|
|
5
7
|
import { DescritorOficial } from "./descritores.js";
|
|
6
8
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
@@ -78,6 +80,50 @@ export function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: Docum
|
|
|
78
80
|
return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.${type}`);
|
|
79
81
|
}
|
|
80
82
|
|
|
83
|
+
export const ANONIMIZADO_NAME = "Anonimizado";
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
|
|
87
|
+
* saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
|
|
88
|
+
* Requires pandoc (and xelatex for PDF) to be available on PATH.
|
|
89
|
+
*/
|
|
90
|
+
export async function saveAnonimizedDocument(
|
|
91
|
+
doc: PartialJurisprudenciaDocument,
|
|
92
|
+
textoHtml: string,
|
|
93
|
+
sumarioHtml?: string
|
|
94
|
+
): Promise<void> {
|
|
95
|
+
const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
|
|
96
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
97
|
+
|
|
98
|
+
const bodyHtml = sumarioHtml
|
|
99
|
+
? `<h2>Sumário</h2>${sumarioHtml}<h2>Texto</h2>${textoHtml}`
|
|
100
|
+
: textoHtml;
|
|
101
|
+
|
|
102
|
+
const fullHtml = `<!DOCTYPE html><html><head><meta charset="UTF-8"></head><body>${bodyHtml}</body></html>`;
|
|
103
|
+
|
|
104
|
+
const tmpHtml = path.join(dirPath, `_anonimizado_tmp.html`);
|
|
105
|
+
fs.writeFileSync(tmpHtml, fullHtml, { encoding: "utf-8" });
|
|
106
|
+
|
|
107
|
+
try {
|
|
108
|
+
const docxPath = path.join(dirPath, `${ANONIMIZADO_NAME}.docx`);
|
|
109
|
+
const pdfPath = path.join(dirPath, `${ANONIMIZADO_NAME}.pdf`);
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
execFileSync("pandoc", [tmpHtml, "-o", docxPath]);
|
|
113
|
+
} catch (err) {
|
|
114
|
+
console.error("saveAnonimizedDocument: failed to generate DOCX:", err);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
execFileSync("pandoc", [tmpHtml, "--pdf-engine=xelatex", "-o", pdfPath]);
|
|
119
|
+
} catch (err) {
|
|
120
|
+
console.error("saveAnonimizedDocument: failed to generate PDF:", err);
|
|
121
|
+
}
|
|
122
|
+
} finally {
|
|
123
|
+
try { fs.unlinkSync(tmpHtml); } catch { /* ignore */ }
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
81
127
|
/** @deprecated use loadDocumentFile(doc, "nlp") instead */
|
|
82
128
|
export function loadNlpDocument(jurisprudencia_document: JurisprudenciaDocument): string {
|
|
83
129
|
return loadDocumentFile(jurisprudencia_document, "nlp");
|