@mdgf11/filesystem-lib 2.2.16 → 2.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,17 @@ import { JurisprudenciaDocument, PartialJurisprudenciaDocument } from "@stjiris/
2
2
  import { ContentType, FilesystemDocument } from "./types.js";
3
3
  export declare function writeFilesystemDocument(filesystem_document: FilesystemDocument): void;
4
4
  export declare function loadFilesystemDocument(jsonPath: string): FilesystemDocument;
5
+ export type DocumentFileType = "nlp" | "details" | "pdf" | "docx" | "txt";
6
+ export declare function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "nlp" | "details"): string;
7
+ export declare function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "pdf" | "docx" | "txt"): Buffer;
8
+ export declare const ANONIMIZADO_NAME = "Anonimizado";
9
+ /**
10
+ * Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
11
+ * saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
12
+ * Requires pandoc (and xelatex for PDF) to be available on PATH.
13
+ */
14
+ export declare function saveAnonimizedDocument(doc: PartialJurisprudenciaDocument, textoHtml: string, sumarioHtml?: string): Promise<void>;
15
+ /** @deprecated use loadDocumentFile(doc, "nlp") instead */
5
16
  export declare function loadNlpDocument(jurisprudencia_document: JurisprudenciaDocument): string;
6
17
  export declare function hasSelectableText(buffer: Buffer): Promise<boolean>;
7
18
  export declare function generateFilePath(jurisprudencia_document: PartialJurisprudenciaDocument): string;
@@ -1,5 +1,7 @@
1
+ import { execFileSync } from "child_process";
1
2
  import fs from "fs";
2
3
  import mammoth from "mammoth";
4
+ import path from "path";
3
5
  import { DETAILS_NAME, FILESYSTEM_PATH, ORIGINAL_NAME, ROOT_PATH, SHAREPOINT_COPY_PATH } from "./types.js";
4
6
  import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
5
7
  export function writeFilesystemDocument(filesystem_document) {
@@ -54,8 +56,57 @@ export function loadFilesystemDocument(jsonPath) {
54
56
  }))
55
57
  };
56
58
  }
59
+ export function loadDocumentFile(doc, type) {
60
+ const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
61
+ if (type === "nlp") {
62
+ return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.json`, "utf-8");
63
+ }
64
+ if (type === "details") {
65
+ return fs.readFileSync(`${dirPath}/${DETAILS_NAME}.json`, "utf-8");
66
+ }
67
+ return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.${type}`);
68
+ }
69
+ export const ANONIMIZADO_NAME = "Anonimizado";
70
+ /**
71
+ * Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
72
+ * saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
73
+ * Requires pandoc (and xelatex for PDF) to be available on PATH.
74
+ */
75
+ export async function saveAnonimizedDocument(doc, textoHtml, sumarioHtml) {
76
+ const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
77
+ fs.mkdirSync(dirPath, { recursive: true });
78
+ const bodyHtml = sumarioHtml
79
+ ? `<h2>Sum&aacute;rio</h2>${sumarioHtml}<h2>Texto</h2>${textoHtml}`
80
+ : textoHtml;
81
+ const fullHtml = `<!DOCTYPE html><html><head><meta charset="UTF-8"></head><body>${bodyHtml}</body></html>`;
82
+ const tmpHtml = path.join(dirPath, `_anonimizado_tmp.html`);
83
+ fs.writeFileSync(tmpHtml, fullHtml, { encoding: "utf-8" });
84
+ try {
85
+ const docxPath = path.join(dirPath, `${ANONIMIZADO_NAME}.docx`);
86
+ const pdfPath = path.join(dirPath, `${ANONIMIZADO_NAME}.pdf`);
87
+ try {
88
+ execFileSync("pandoc", [tmpHtml, "-o", docxPath]);
89
+ }
90
+ catch (err) {
91
+ console.error("saveAnonimizedDocument: failed to generate DOCX:", err);
92
+ }
93
+ try {
94
+ execFileSync("pandoc", [tmpHtml, "--pdf-engine=xelatex", "-o", pdfPath]);
95
+ }
96
+ catch (err) {
97
+ console.error("saveAnonimizedDocument: failed to generate PDF:", err);
98
+ }
99
+ }
100
+ finally {
101
+ try {
102
+ fs.unlinkSync(tmpHtml);
103
+ }
104
+ catch { /* ignore */ }
105
+ }
106
+ }
107
+ /** @deprecated use loadDocumentFile(doc, "nlp") instead */
57
108
  export function loadNlpDocument(jurisprudencia_document) {
58
- return fs.readFileSync(`${generateFilePath(jurisprudencia_document)}/${ORIGINAL_NAME}.json`, 'utf-8');
109
+ return loadDocumentFile(jurisprudencia_document, "nlp");
59
110
  }
60
111
  export async function hasSelectableText(buffer) {
61
112
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mdgf11/filesystem-lib",
3
- "version": "2.2.16",
3
+ "version": "2.2.18",
4
4
  "description": "Library to extend usage of jurisprudencia-document",
5
5
  "license": "ISC",
6
6
  "author": "Miguel Fonseca",
@@ -1,6 +1,8 @@
1
1
  import { JurisprudenciaDocument, PartialJurisprudenciaDocument } from "@stjiris/jurisprudencia-document";
2
+ import { execFileSync } from "child_process";
2
3
  import fs from "fs";
3
4
  import mammoth from "mammoth";
5
+ import path from "path";
4
6
  import { ContentType, Date_Area_Section, DETAILS_NAME, FILESYSTEM_PATH, FilesystemDocument, ORIGINAL_NAME, Retrievable_Metadata, ROOT_PATH, SHAREPOINT_COPY_PATH, Sharepoint_Metadata, SupportedUpdateSources } from "./types.js";
5
7
  import { DescritorOficial } from "./descritores.js";
6
8
  import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
@@ -63,8 +65,68 @@ export function loadFilesystemDocument(jsonPath: string): FilesystemDocument {
63
65
  };
64
66
  }
65
67
 
68
+ export type DocumentFileType = "nlp" | "details" | "pdf" | "docx" | "txt";
69
+
70
+ export function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "nlp" | "details"): string;
71
+ export function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: "pdf" | "docx" | "txt"): Buffer;
72
+ export function loadDocumentFile(doc: PartialJurisprudenciaDocument, type: DocumentFileType): string | Buffer {
73
+ const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
74
+ if (type === "nlp") {
75
+ return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.json`, "utf-8");
76
+ }
77
+ if (type === "details") {
78
+ return fs.readFileSync(`${dirPath}/${DETAILS_NAME}.json`, "utf-8");
79
+ }
80
+ return fs.readFileSync(`${dirPath}/${ORIGINAL_NAME}.${type}`);
81
+ }
82
+
83
+ export const ANONIMIZADO_NAME = "Anonimizado";
84
+
85
+ /**
86
+ * Converts the anonymized HTML text (and optionally summary) to DOCX and PDF files
87
+ * saved as Anonimizado.docx / Anonimizado.pdf in the document's filesystem directory.
88
+ * Requires pandoc (and xelatex for PDF) to be available on PATH.
89
+ */
90
+ export async function saveAnonimizedDocument(
91
+ doc: PartialJurisprudenciaDocument,
92
+ textoHtml: string,
93
+ sumarioHtml?: string
94
+ ): Promise<void> {
95
+ const dirPath = `${ROOT_PATH}${FILESYSTEM_PATH}${generateFilePath(doc)}`;
96
+ fs.mkdirSync(dirPath, { recursive: true });
97
+
98
+ const bodyHtml = sumarioHtml
99
+ ? `<h2>Sum&aacute;rio</h2>${sumarioHtml}<h2>Texto</h2>${textoHtml}`
100
+ : textoHtml;
101
+
102
+ const fullHtml = `<!DOCTYPE html><html><head><meta charset="UTF-8"></head><body>${bodyHtml}</body></html>`;
103
+
104
+ const tmpHtml = path.join(dirPath, `_anonimizado_tmp.html`);
105
+ fs.writeFileSync(tmpHtml, fullHtml, { encoding: "utf-8" });
106
+
107
+ try {
108
+ const docxPath = path.join(dirPath, `${ANONIMIZADO_NAME}.docx`);
109
+ const pdfPath = path.join(dirPath, `${ANONIMIZADO_NAME}.pdf`);
110
+
111
+ try {
112
+ execFileSync("pandoc", [tmpHtml, "-o", docxPath]);
113
+ } catch (err) {
114
+ console.error("saveAnonimizedDocument: failed to generate DOCX:", err);
115
+ }
116
+
117
+ try {
118
+ execFileSync("pandoc", [tmpHtml, "--pdf-engine=xelatex", "-o", pdfPath]);
119
+ } catch (err) {
120
+ console.error("saveAnonimizedDocument: failed to generate PDF:", err);
121
+ }
122
+ } finally {
123
+ try { fs.unlinkSync(tmpHtml); } catch { /* ignore */ }
124
+ }
125
+ }
126
+
127
+ /** @deprecated use loadDocumentFile(doc, "nlp") instead */
66
128
  export function loadNlpDocument(jurisprudencia_document: JurisprudenciaDocument): string {
67
- return fs.readFileSync(`${generateFilePath(jurisprudencia_document)}/${ORIGINAL_NAME}.json`, 'utf-8');
129
+ return loadDocumentFile(jurisprudencia_document, "nlp");
68
130
  }
69
131
 
70
132
  export async function hasSelectableText(buffer: Buffer): Promise<boolean> {