@mdgf11/filesystem-lib 2.2.16 → 2.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -2,6 +2,17 @@ import { JurisprudenciaDocument, PartialJurisprudenciaDocument } from "@stjiris/
|
|
|
2
2
|
import { ContentType, FilesystemDocument } from "./types.js";
|
|
3
3
|
export declare function writeFilesystemDocument(filesystem_document: FilesystemDocument): void;
|
|
4
4
|
export declare function loadFilesystemDocument(jsonPath: string): FilesystemDocument;
|
|
5
|
+
export type DocumentFileType = "nlp" | "details" | "pdf" | "docx" | "txt";
|
|
6
|
+
export declare function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "nlp" | "details"): string;
|
|
7
|
+
export declare function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "pdf" | "docx" | "txt"): Buffer;
|
|
8
|
+
export declare const ANONIMIZADO_NAME = "Anonimizado";
|
|
9
|
+
/**
|
|
10
|
+
* Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
|
|
11
|
+
* saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
|
|
12
|
+
* Requires pandoc (and xelatex for PDF) to be available on PATH.
|
|
13
|
+
*/
|
|
14
|
+
export declare function saveAnonimizedDocument(doc: PartialJurisprudenciaDocument, textoHtml: string, sumarioHtml?: string): Promise<void>;
|
|
15
|
+
/** @deprecated use loadDocumentFile(doc, "nlp") instead */
|
|
5
16
|
export declare function loadNlpDocument(jurisprudencia_document: JurisprudenciaDocument): string;
|
|
6
17
|
export declare function hasSelectableText(buffer: Buffer): Promise<boolean>;
|
|
7
18
|
export declare function generateFilePath(jurisprudencia_document: PartialJurisprudenciaDocument): string;
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import { execFileSync } from "child_process";
|
|
1
2
|
import fs from "fs";
|
|
2
3
|
import mammoth from "mammoth";
|
|
4
|
+
import path from "path";
|
|
3
5
|
import { DETAILS_NAME, FILESYSTEM_PATH, ORIGINAL_NAME, ROOT_PATH, SHAREPOINT_COPY_PATH } from "./types.js";
|
|
4
6
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
5
7
|
export function writeFilesystemDocument(filesystem_document) {
|
|
@@ -54,8 +56,57 @@ export function loadFilesystemDocument(jsonPath) {
|
|
|
54
56
|
}))
|
|
55
57
|
};
|
|
56
58
|
}
|
|
59
|
+
export function loadDocumentFile(doc, type) {
|
|
60
|
+
const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
|
|
61
|
+
if (type === "nlp") {
|
|
62
|
+
return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.json`, "utf-8");
|
|
63
|
+
}
|
|
64
|
+
if (type === "details") {
|
|
65
|
+
return fs.readFileSync(`${dirPath}/${DETAILS_NAME}.json`, "utf-8");
|
|
66
|
+
}
|
|
67
|
+
return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.${type}`);
|
|
68
|
+
}
|
|
69
|
+
export const ANONIMIZADO_NAME = "Anonimizado";
|
|
70
|
+
/**
|
|
71
|
+
* Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
|
|
72
|
+
* saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
|
|
73
|
+
* Requires pandoc (and xelatex for PDF) to be available on PATH.
|
|
74
|
+
*/
|
|
75
|
+
export async function saveAnonimizedDocument(doc, textoHtml, sumarioHtml) {
|
|
76
|
+
const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
|
|
77
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
78
|
+
const bodyHtml = sumarioHtml
|
|
79
|
+
? `<h2>Sumário</h2>${sumarioHtml}<h2>Texto</h2>${textoHtml}`
|
|
80
|
+
: textoHtml;
|
|
81
|
+
const fullHtml = `<!DOCTYPE html><html><head><meta charset="UTF-8"></head><body>${bodyHtml}</body></html>`;
|
|
82
|
+
const tmpHtml = path.join(dirPath, `_anonimizado_tmp.html`);
|
|
83
|
+
fs.writeFileSync(tmpHtml, fullHtml, { encoding: "utf-8" });
|
|
84
|
+
try {
|
|
85
|
+
const docxPath = path.join(dirPath, `${ANONIMIZADO_NAME}.docx`);
|
|
86
|
+
const pdfPath = path.join(dirPath, `${ANONIMIZADO_NAME}.pdf`);
|
|
87
|
+
try {
|
|
88
|
+
execFileSync("pandoc", [tmpHtml, "-o", docxPath]);
|
|
89
|
+
}
|
|
90
|
+
catch (err) {
|
|
91
|
+
console.error("saveAnonimizedDocument: failed to generate DOCX:", err);
|
|
92
|
+
}
|
|
93
|
+
try {
|
|
94
|
+
execFileSync("pandoc", [tmpHtml, "--pdf-engine=xelatex", "-o", pdfPath]);
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
console.error("saveAnonimizedDocument: failed to generate PDF:", err);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
finally {
|
|
101
|
+
try {
|
|
102
|
+
fs.unlinkSync(tmpHtml);
|
|
103
|
+
}
|
|
104
|
+
catch { /* ignore */ }
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
/** @deprecated use loadDocumentFile(doc, "nlp") instead */
|
|
57
108
|
export function loadNlpDocument(jurisprudencia_document) {
|
|
58
|
-
return
|
|
109
|
+
return loadDocumentFile(jurisprudencia_document, "nlp");
|
|
59
110
|
}
|
|
60
111
|
export async function hasSelectableText(buffer) {
|
|
61
112
|
try {
|
package/package.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { JurisprudenciaDocument, PartialJurisprudenciaDocument } from "@stjiris/jurisprudencia-document";
|
|
2
|
+
import { execFileSync } from "child_process";
|
|
2
3
|
import fs from "fs";
|
|
3
4
|
import mammoth from "mammoth";
|
|
5
|
+
import path from "path";
|
|
4
6
|
import { ContentType, Date_Area_Section, DETAILS_NAME, FILESYSTEM_PATH, FilesystemDocument, ORIGINAL_NAME, Retrievable_Metadata, ROOT_PATH, SHAREPOINT_COPY_PATH, Sharepoint_Metadata, SupportedUpdateSources } from "./types.js";
|
|
5
7
|
import { DescritorOficial } from "./descritores.js";
|
|
6
8
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
@@ -63,8 +65,68 @@ export function loadFilesystemDocument(jsonPath: string): FilesystemDocument {
|
|
|
63
65
|
};
|
|
64
66
|
}
|
|
65
67
|
|
|
68
|
+
export type DocumentFileType = "nlp" | "details" | "pdf" | "docx" | "txt";
|
|
69
|
+
|
|
70
|
+
export function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "nlp" | "details"): string;
|
|
71
|
+
export function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "pdf" | "docx" | "txt"): Buffer;
|
|
72
|
+
export function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: DocumentFileType): string | Buffer {
|
|
73
|
+
const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
|
|
74
|
+
if (type === "nlp") {
|
|
75
|
+
return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.json`, "utf-8");
|
|
76
|
+
}
|
|
77
|
+
if (type === "details") {
|
|
78
|
+
return fs.readFileSync(`${dirPath}/${DETAILS_NAME}.json`, "utf-8");
|
|
79
|
+
}
|
|
80
|
+
return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.${type}`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export const ANONIMIZADO_NAME = "Anonimizado";
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
|
|
87
|
+
* saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
|
|
88
|
+
* Requires pandoc (and xelatex for PDF) to be available on PATH.
|
|
89
|
+
*/
|
|
90
|
+
export async function saveAnonimizedDocument(
|
|
91
|
+
doc: PartialJurisprudenciaDocument,
|
|
92
|
+
textoHtml: string,
|
|
93
|
+
sumarioHtml?: string
|
|
94
|
+
): Promise<void> {
|
|
95
|
+
const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
|
|
96
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
97
|
+
|
|
98
|
+
const bodyHtml = sumarioHtml
|
|
99
|
+
? `<h2>Sumário</h2>${sumarioHtml}<h2>Texto</h2>${textoHtml}`
|
|
100
|
+
: textoHtml;
|
|
101
|
+
|
|
102
|
+
const fullHtml = `<!DOCTYPE html><html><head><meta charset="UTF-8"></head><body>${bodyHtml}</body></html>`;
|
|
103
|
+
|
|
104
|
+
const tmpHtml = path.join(dirPath, `_anonimizado_tmp.html`);
|
|
105
|
+
fs.writeFileSync(tmpHtml, fullHtml, { encoding: "utf-8" });
|
|
106
|
+
|
|
107
|
+
try {
|
|
108
|
+
const docxPath = path.join(dirPath, `${ANONIMIZADO_NAME}.docx`);
|
|
109
|
+
const pdfPath = path.join(dirPath, `${ANONIMIZADO_NAME}.pdf`);
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
execFileSync("pandoc", [tmpHtml, "-o", docxPath]);
|
|
113
|
+
} catch (err) {
|
|
114
|
+
console.error("saveAnonimizedDocument: failed to generate DOCX:", err);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
execFileSync("pandoc", [tmpHtml, "--pdf-engine=xelatex", "-o", pdfPath]);
|
|
119
|
+
} catch (err) {
|
|
120
|
+
console.error("saveAnonimizedDocument: failed to generate PDF:", err);
|
|
121
|
+
}
|
|
122
|
+
} finally {
|
|
123
|
+
try { fs.unlinkSync(tmpHtml); } catch { /* ignore */ }
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/** @deprecated use loadDocumentFile(doc, "nlp") instead */
|
|
66
128
|
export function loadNlpDocument(jurisprudencia_document: JurisprudenciaDocument): string {
|
|
67
|
-
return
|
|
129
|
+
return loadDocumentFile(jurisprudencia_document, "nlp");
|
|
68
130
|
}
|
|
69
131
|
|
|
70
132
|
export async function hasSelectableText(buffer: Buffer): Promise<boolean> {
|