@mdgf11/filesystem-lib 2.2.11 → 2.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { PartialJurisprudenciaDocument } from "@stjiris/jurisprudencia-document";
|
|
2
|
-
import { ContentType,
|
|
2
|
+
import { ContentType, FilesystemDocument } from "./types.js";
|
|
3
3
|
export declare function writeFilesystemDocument(filesystem_document: FilesystemDocument): void;
|
|
4
4
|
export declare function loadFilesystemDocument(jsonPath: string): FilesystemDocument;
|
|
5
|
-
export declare function createJurisprudenciaDocument(retrievable_Metadata: Retrievable_Metadata, contents: ContentType[], date_area_section: Date_Area_Section, sharepoint_metadata?: Sharepoint_Metadata): Promise<PartialJurisprudenciaDocument>;
|
|
6
5
|
export declare function hasSelectableText(buffer: Buffer): Promise<boolean>;
|
|
7
6
|
export declare function generateFilePath(jurisprudencia_document: PartialJurisprudenciaDocument): string;
|
|
7
|
+
export declare function extractContent(contents: ContentType[]): Promise<string[]>;
|
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import { calculateHASH, calculateUUID } from "@stjiris/jurisprudencia-document";
|
|
2
1
|
import fs from "fs";
|
|
3
2
|
import mammoth from "mammoth";
|
|
4
3
|
import { DETAILS_NAME, FILESYSTEM_PATH, ORIGINAL_NAME, ROOT_PATH, SHAREPOINT_COPY_PATH } from "./types.js";
|
|
5
|
-
import { DescritorOficial } from "./descritores.js";
|
|
6
4
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
7
5
|
export function writeFilesystemDocument(filesystem_document) {
|
|
8
6
|
if (!filesystem_document.content)
|
|
@@ -56,77 +54,6 @@ export function loadFilesystemDocument(jsonPath) {
|
|
|
56
54
|
}))
|
|
57
55
|
};
|
|
58
56
|
}
|
|
59
|
-
export async function createJurisprudenciaDocument(retrievable_Metadata, contents, date_area_section, sharepoint_metadata) {
|
|
60
|
-
if (!retrievable_Metadata) {
|
|
61
|
-
throw new Error("Missing metadata.");
|
|
62
|
-
}
|
|
63
|
-
const content = await extractContent(contents);
|
|
64
|
-
const url = sharepoint_metadata ? sharepoint_metadata.sharepoint_url : "";
|
|
65
|
-
let Original = {};
|
|
66
|
-
let CONTENT = content;
|
|
67
|
-
let numProc = retrievable_Metadata.process_number;
|
|
68
|
-
let Data = Intl.DateTimeFormat("pt-PT").format(date_area_section.file_date);
|
|
69
|
-
let origin = "STJ (Sharepoint)";
|
|
70
|
-
Original["Decisão Texto Integral"] = content.map(line => `<p><font>${line}</font><br>`).join('');
|
|
71
|
-
Original["Data"] = Data;
|
|
72
|
-
Original["Número de Processo"] = numProc;
|
|
73
|
-
Original["Fonte"] = origin;
|
|
74
|
-
Original["URL"] = url;
|
|
75
|
-
Original["Jurisprudência"] = "Simples";
|
|
76
|
-
let obj = {
|
|
77
|
-
"Original": Original,
|
|
78
|
-
"CONTENT": CONTENT,
|
|
79
|
-
"Data": Data,
|
|
80
|
-
"Número de Processo": numProc,
|
|
81
|
-
"Fonte": origin,
|
|
82
|
-
"URL": url,
|
|
83
|
-
"Jurisprudência": { Index: ["Simples"], Original: ["Simples"], Show: ["Simples"] },
|
|
84
|
-
"STATE": "importação",
|
|
85
|
-
};
|
|
86
|
-
if (retrievable_Metadata.process_mean.includes("Sumário")) {
|
|
87
|
-
obj.Sumário = content.map(line => `<p><font>${line}</font><br>`).join('');
|
|
88
|
-
obj.Texto = "";
|
|
89
|
-
}
|
|
90
|
-
else {
|
|
91
|
-
obj.Sumário = "";
|
|
92
|
-
obj.Texto = content.map(line => `<p><font>${line}</font><br>`).join('');
|
|
93
|
-
}
|
|
94
|
-
if (retrievable_Metadata.descriptors && retrievable_Metadata.descriptors.length > 0) {
|
|
95
|
-
obj.Descritores = {
|
|
96
|
-
Index: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc]),
|
|
97
|
-
Original: retrievable_Metadata.descriptors,
|
|
98
|
-
Show: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc])
|
|
99
|
-
};
|
|
100
|
-
}
|
|
101
|
-
if (date_area_section.area && date_area_section.area.length > 0) {
|
|
102
|
-
obj.Área = { Index: [date_area_section.area], Original: [date_area_section.area], Show: [date_area_section.area] };
|
|
103
|
-
}
|
|
104
|
-
if (date_area_section.section && date_area_section.section.length > 0) {
|
|
105
|
-
obj.Secção = { Index: [date_area_section.section], Original: [date_area_section.section], Show: [date_area_section.section] };
|
|
106
|
-
}
|
|
107
|
-
if (retrievable_Metadata.judge && retrievable_Metadata.judge.length > 0) {
|
|
108
|
-
obj["Relator Nome Profissional"] = { Index: [retrievable_Metadata.judge], Original: [retrievable_Metadata.judge], Show: [retrievable_Metadata.judge] };
|
|
109
|
-
}
|
|
110
|
-
if (retrievable_Metadata.process_mean && retrievable_Metadata.process_mean.length > 0) {
|
|
111
|
-
obj["Meio Processual"] = { Index: retrievable_Metadata.process_mean, Original: retrievable_Metadata.process_mean, Show: retrievable_Metadata.process_mean };
|
|
112
|
-
}
|
|
113
|
-
if (retrievable_Metadata.decision && retrievable_Metadata.decision.length > 0) {
|
|
114
|
-
obj["Decisão"] = { Index: [retrievable_Metadata.decision], Original: [retrievable_Metadata.decision], Show: [retrievable_Metadata.decision] };
|
|
115
|
-
}
|
|
116
|
-
obj["HASH"] = calculateHASH({
|
|
117
|
-
...obj,
|
|
118
|
-
Original: obj.Original,
|
|
119
|
-
"Número de Processo": obj["Número de Processo"],
|
|
120
|
-
Data: obj.Data,
|
|
121
|
-
"Meio Processual": obj["Meio Processual"],
|
|
122
|
-
"Texto": obj.Texto,
|
|
123
|
-
"Texto Não Anonimizado": obj.Texto,
|
|
124
|
-
"Sumário": obj.Sumário,
|
|
125
|
-
"Sumário Não Anonimizado": obj.Sumário,
|
|
126
|
-
});
|
|
127
|
-
obj["UUID"] = calculateUUID(obj["HASH"]);
|
|
128
|
-
return obj;
|
|
129
|
-
}
|
|
130
57
|
export async function hasSelectableText(buffer) {
|
|
131
58
|
try {
|
|
132
59
|
const uint8Array = new Uint8Array(buffer);
|
|
@@ -157,14 +84,7 @@ export function generateFilePath(jurisprudencia_document) {
|
|
|
157
84
|
const data = parseDateFromString(jurisprudencia_document.Data);
|
|
158
85
|
return `/${jurisprudencia_document.Área.Show}/${data.getFullYear()}/${data.getMonth() + 1}/${data.getDate()}/${jurisprudencia_document["Número de Processo"].replace("/", "-")}/${jurisprudencia_document.UUID}`;
|
|
159
86
|
}
|
|
160
|
-
function
|
|
161
|
-
const [dayStr, monthStr, yearStr] = dateStr.split("/");
|
|
162
|
-
const day = parseInt(dayStr, 10);
|
|
163
|
-
const month = parseInt(monthStr, 10) - 1;
|
|
164
|
-
const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
|
|
165
|
-
return new Date(year, month, day);
|
|
166
|
-
}
|
|
167
|
-
async function extractContent(contents) {
|
|
87
|
+
export async function extractContent(contents) {
|
|
168
88
|
for (const content of contents) {
|
|
169
89
|
if (content.extension === "txt") {
|
|
170
90
|
return content.data.toString('utf-8').split(/\r?\n/).filter(line => line.trim().length > 0);
|
|
@@ -178,6 +98,13 @@ async function extractContent(contents) {
|
|
|
178
98
|
}
|
|
179
99
|
throw new Error("Contents are not a supported format.");
|
|
180
100
|
}
|
|
101
|
+
function parseDateFromString(dateStr) {
|
|
102
|
+
const [dayStr, monthStr, yearStr] = dateStr.split("/");
|
|
103
|
+
const day = parseInt(dayStr, 10);
|
|
104
|
+
const month = parseInt(monthStr, 10) - 1;
|
|
105
|
+
const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
|
|
106
|
+
return new Date(year, month, day);
|
|
107
|
+
}
|
|
181
108
|
async function pdfToLines(buffer) {
|
|
182
109
|
const uint8Array = new Uint8Array(buffer);
|
|
183
110
|
const loadingTask = getDocument({ data: uint8Array, verbosity: 0 });
|
package/dist/types.d.ts
CHANGED
|
@@ -48,7 +48,7 @@ export type Date_Area_Section = {
|
|
|
48
48
|
area: string;
|
|
49
49
|
section: string;
|
|
50
50
|
};
|
|
51
|
-
export declare const SUPPORTED_EXTENSIONS: readonly ["txt", "pdf", "docx", "html"];
|
|
51
|
+
export declare const SUPPORTED_EXTENSIONS: readonly ["txt", "pdf", "docx", "html", "json"];
|
|
52
52
|
export type Supported_Content_Extensions = typeof SUPPORTED_EXTENSIONS[number];
|
|
53
53
|
export type ContentType = {
|
|
54
54
|
extension: Supported_Content_Extensions;
|
package/dist/types.js
CHANGED
|
@@ -12,7 +12,7 @@ export const DGSI_LOGS_PATH = `${LOGS_PATH}/DGSI`;
|
|
|
12
12
|
export const SHAREPOINT_UPDATE_DIR = `${ROOT_PATH}${SHAREPOINT_LOGS_PATH}`;
|
|
13
13
|
export const DGSI_UPDATE_DIR = `${ROOT_PATH}${DGSI_LOGS_PATH}`;
|
|
14
14
|
export const SOURCE_TO_PATH = { "STJ (Sharepoint)": SHAREPOINT_UPDATE_DIR, "DGSI": DGSI_UPDATE_DIR };
|
|
15
|
-
export const SUPPORTED_EXTENSIONS = ["txt", "pdf", "docx", "html"];
|
|
15
|
+
export const SUPPORTED_EXTENSIONS = ["txt", "pdf", "docx", "html", "json"];
|
|
16
16
|
export function isSupportedExtension(ext) {
|
|
17
17
|
return SUPPORTED_EXTENSIONS.includes(ext);
|
|
18
18
|
}
|
package/package.json
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { PartialJurisprudenciaDocument } from "@stjiris/jurisprudencia-document";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import mammoth from "mammoth";
|
|
4
4
|
import { ContentType, Date_Area_Section, DETAILS_NAME, FILESYSTEM_PATH, FilesystemDocument, ORIGINAL_NAME, Retrievable_Metadata, ROOT_PATH, SHAREPOINT_COPY_PATH, Sharepoint_Metadata, SupportedUpdateSources } from "./types.js";
|
|
@@ -63,82 +63,6 @@ export function loadFilesystemDocument(jsonPath: string): FilesystemDocument {
|
|
|
63
63
|
};
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
export async function createJurisprudenciaDocument(retrievable_Metadata: Retrievable_Metadata, contents: ContentType[], date_area_section: Date_Area_Section, sharepoint_metadata?: Sharepoint_Metadata): Promise<PartialJurisprudenciaDocument> {
|
|
67
|
-
if (!retrievable_Metadata) {
|
|
68
|
-
throw new Error("Missing metadata.");
|
|
69
|
-
}
|
|
70
|
-
const content = await extractContent(contents);
|
|
71
|
-
const url = sharepoint_metadata ? sharepoint_metadata.sharepoint_url : "";
|
|
72
|
-
|
|
73
|
-
let Original: JurisprudenciaDocument["Original"] = {};
|
|
74
|
-
let CONTENT: JurisprudenciaDocument["CONTENT"] = content;
|
|
75
|
-
let numProc: JurisprudenciaDocument["Número de Processo"] = retrievable_Metadata.process_number;
|
|
76
|
-
let Data: JurisprudenciaDocument["Data"] = Intl.DateTimeFormat("pt-PT").format(date_area_section.file_date);
|
|
77
|
-
let origin: SupportedUpdateSources = "STJ (Sharepoint)";
|
|
78
|
-
|
|
79
|
-
Original["Decisão Texto Integral"] = content.map(line => `<p><font>${line}</font><br>`).join('');
|
|
80
|
-
Original["Data"] = Data;
|
|
81
|
-
Original["Número de Processo"] = numProc;
|
|
82
|
-
Original["Fonte"] = origin;
|
|
83
|
-
Original["URL"] = url;
|
|
84
|
-
Original["Jurisprudência"] = "Simples";
|
|
85
|
-
|
|
86
|
-
let obj: PartialJurisprudenciaDocument = {
|
|
87
|
-
"Original": Original,
|
|
88
|
-
"CONTENT": CONTENT,
|
|
89
|
-
"Data": Data,
|
|
90
|
-
"Número de Processo": numProc,
|
|
91
|
-
"Fonte": origin,
|
|
92
|
-
"URL": url,
|
|
93
|
-
"Jurisprudência": { Index: ["Simples"], Original: ["Simples"], Show: ["Simples"] },
|
|
94
|
-
"STATE": "importação",
|
|
95
|
-
}
|
|
96
|
-
if (retrievable_Metadata.process_mean.includes("Sumário")) {
|
|
97
|
-
obj.Sumário = content.map(line => `<p><font>${line}</font><br>`).join('');
|
|
98
|
-
obj.Texto = "";
|
|
99
|
-
} else {
|
|
100
|
-
obj.Sumário = "";
|
|
101
|
-
obj.Texto = content.map(line => `<p><font>${line}</font><br>`).join('');
|
|
102
|
-
}
|
|
103
|
-
if (retrievable_Metadata.descriptors && retrievable_Metadata.descriptors.length > 0) {
|
|
104
|
-
obj.Descritores = {
|
|
105
|
-
Index: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc]),
|
|
106
|
-
Original: retrievable_Metadata.descriptors,
|
|
107
|
-
Show: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc])
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
if (date_area_section.area && date_area_section.area.length > 0) {
|
|
111
|
-
obj.Área = { Index: [date_area_section.area], Original: [date_area_section.area], Show: [date_area_section.area] };
|
|
112
|
-
}
|
|
113
|
-
if (date_area_section.section && date_area_section.section.length > 0) {
|
|
114
|
-
obj.Secção = { Index: [date_area_section.section], Original: [date_area_section.section], Show: [date_area_section.section] };
|
|
115
|
-
}
|
|
116
|
-
if (retrievable_Metadata.judge && retrievable_Metadata.judge.length > 0) {
|
|
117
|
-
obj["Relator Nome Profissional"] = { Index: [retrievable_Metadata.judge], Original: [retrievable_Metadata.judge], Show: [retrievable_Metadata.judge] };
|
|
118
|
-
}
|
|
119
|
-
if (retrievable_Metadata.process_mean && retrievable_Metadata.process_mean.length > 0) {
|
|
120
|
-
obj["Meio Processual"] = { Index: retrievable_Metadata.process_mean, Original: retrievable_Metadata.process_mean, Show: retrievable_Metadata.process_mean };
|
|
121
|
-
}
|
|
122
|
-
if (retrievable_Metadata.decision && retrievable_Metadata.decision.length > 0) {
|
|
123
|
-
obj["Decisão"] = { Index: [retrievable_Metadata.decision], Original: [retrievable_Metadata.decision], Show: [retrievable_Metadata.decision] };
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
obj["HASH"] = calculateHASH({
|
|
127
|
-
...obj,
|
|
128
|
-
Original: obj.Original,
|
|
129
|
-
"Número de Processo": obj["Número de Processo"],
|
|
130
|
-
Data: obj.Data,
|
|
131
|
-
"Meio Processual": obj["Meio Processual"],
|
|
132
|
-
"Texto": obj.Texto,
|
|
133
|
-
"Texto Não Anonimizado": obj.Texto,
|
|
134
|
-
"Sumário": obj.Sumário,
|
|
135
|
-
"Sumário Não Anonimizado": obj.Sumário,
|
|
136
|
-
})
|
|
137
|
-
|
|
138
|
-
obj["UUID"] = calculateUUID(obj["HASH"]);
|
|
139
|
-
return obj;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
66
|
export async function hasSelectableText(buffer: Buffer): Promise<boolean> {
|
|
143
67
|
try {
|
|
144
68
|
const uint8Array = new Uint8Array(buffer);
|
|
@@ -175,17 +99,7 @@ export function generateFilePath(jurisprudencia_document: PartialJurisprudenciaD
|
|
|
175
99
|
return `/${jurisprudencia_document.Área.Show}/${data.getFullYear()}/${data.getMonth() + 1}/${data.getDate()}/${jurisprudencia_document["Número de Processo"].replace("/", "-")}/${jurisprudencia_document.UUID}`
|
|
176
100
|
}
|
|
177
101
|
|
|
178
|
-
function
|
|
179
|
-
const [dayStr, monthStr, yearStr] = dateStr.split("/");
|
|
180
|
-
|
|
181
|
-
const day = parseInt(dayStr, 10);
|
|
182
|
-
const month = parseInt(monthStr, 10) - 1;
|
|
183
|
-
const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
|
|
184
|
-
|
|
185
|
-
return new Date(year, month, day);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
async function extractContent(contents: ContentType[]): Promise<string[]> {
|
|
102
|
+
export async function extractContent(contents: ContentType[]): Promise<string[]> {
|
|
189
103
|
for (const content of contents) {
|
|
190
104
|
if (content.extension === "txt") {
|
|
191
105
|
return content.data.toString('utf-8').split(/\r?\n/).filter(line => line.trim().length > 0);
|
|
@@ -200,6 +114,16 @@ async function extractContent(contents: ContentType[]): Promise<string[]> {
|
|
|
200
114
|
throw new Error("Contents are not a supported format.");
|
|
201
115
|
}
|
|
202
116
|
|
|
117
|
+
function parseDateFromString(dateStr: string): Date {
|
|
118
|
+
const [dayStr, monthStr, yearStr] = dateStr.split("/");
|
|
119
|
+
|
|
120
|
+
const day = parseInt(dayStr, 10);
|
|
121
|
+
const month = parseInt(monthStr, 10) - 1;
|
|
122
|
+
const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
|
|
123
|
+
|
|
124
|
+
return new Date(year, month, day);
|
|
125
|
+
}
|
|
126
|
+
|
|
203
127
|
async function pdfToLines(buffer: Buffer): Promise<string[]> {
|
|
204
128
|
const uint8Array = new Uint8Array(buffer);
|
|
205
129
|
const loadingTask = getDocument({ data: uint8Array, verbosity: 0 });
|
package/src/types.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
export const UpdateSources = ["STJ (Sharepoint)", "DGSI"] as const;
|
|
3
3
|
export type SupportedUpdateSources = typeof UpdateSources[number];
|
|
4
|
-
import { PartialJurisprudenciaDocument } from '@stjiris/jurisprudencia-document';
|
|
5
4
|
import dotenv from 'dotenv';
|
|
6
5
|
|
|
7
6
|
dotenv.config();
|
|
@@ -46,7 +45,7 @@ export type Sharepoint_Metadata = {
|
|
|
46
45
|
export type Retrievable_Metadata = { process_number: string, judge: string, process_mean: string[], decision: string, descriptors?: string[] };
|
|
47
46
|
export type Date_Area_Section = { file_date: Date, area: string, section: string };
|
|
48
47
|
|
|
49
|
-
export const SUPPORTED_EXTENSIONS = ["txt", "pdf", "docx", "html"] as const;
|
|
48
|
+
export const SUPPORTED_EXTENSIONS = ["txt", "pdf", "docx", "html", "json"] as const;
|
|
50
49
|
export type Supported_Content_Extensions = typeof SUPPORTED_EXTENSIONS[number];
|
|
51
50
|
|
|
52
51
|
export type ContentType = {
|