@mdgf11/filesystem-lib 2.2.11 → 2.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { PartialJurisprudenciaDocument } from "@stjiris/jurisprudencia-document";
2
- import { ContentType, Date_Area_Section, FilesystemDocument, Retrievable_Metadata, Sharepoint_Metadata } from "./types.js";
2
+ import { ContentType, FilesystemDocument } from "./types.js";
3
3
  export declare function writeFilesystemDocument(filesystem_document: FilesystemDocument): void;
4
4
  export declare function loadFilesystemDocument(jsonPath: string): FilesystemDocument;
5
- export declare function createJurisprudenciaDocument(retrievable_Metadata: Retrievable_Metadata, contents: ContentType[], date_area_section: Date_Area_Section, sharepoint_metadata?: Sharepoint_Metadata): Promise<PartialJurisprudenciaDocument>;
6
5
  export declare function hasSelectableText(buffer: Buffer): Promise<boolean>;
7
6
  export declare function generateFilePath(jurisprudencia_document: PartialJurisprudenciaDocument): string;
7
+ export declare function extractContent(contents: ContentType[]): Promise<string[]>;
@@ -1,8 +1,6 @@
1
- import { calculateHASH, calculateUUID } from "@stjiris/jurisprudencia-document";
2
1
  import fs from "fs";
3
2
  import mammoth from "mammoth";
4
3
  import { DETAILS_NAME, FILESYSTEM_PATH, ORIGINAL_NAME, ROOT_PATH, SHAREPOINT_COPY_PATH } from "./types.js";
5
- import { DescritorOficial } from "./descritores.js";
6
4
  import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
7
5
  export function writeFilesystemDocument(filesystem_document) {
8
6
  if (!filesystem_document.content)
@@ -56,77 +54,6 @@ export function loadFilesystemDocument(jsonPath) {
56
54
  }))
57
55
  };
58
56
  }
59
- export async function createJurisprudenciaDocument(retrievable_Metadata, contents, date_area_section, sharepoint_metadata) {
60
- if (!retrievable_Metadata) {
61
- throw new Error("Missing metadata.");
62
- }
63
- const content = await extractContent(contents);
64
- const url = sharepoint_metadata ? sharepoint_metadata.sharepoint_url : "";
65
- let Original = {};
66
- let CONTENT = content;
67
- let numProc = retrievable_Metadata.process_number;
68
- let Data = Intl.DateTimeFormat("pt-PT").format(date_area_section.file_date);
69
- let origin = "STJ (Sharepoint)";
70
- Original["Decisão Texto Integral"] = content.map(line => `<p><font>${line}</font><br>`).join('');
71
- Original["Data"] = Data;
72
- Original["Número de Processo"] = numProc;
73
- Original["Fonte"] = origin;
74
- Original["URL"] = url;
75
- Original["Jurisprudência"] = "Simples";
76
- let obj = {
77
- "Original": Original,
78
- "CONTENT": CONTENT,
79
- "Data": Data,
80
- "Número de Processo": numProc,
81
- "Fonte": origin,
82
- "URL": url,
83
- "Jurisprudência": { Index: ["Simples"], Original: ["Simples"], Show: ["Simples"] },
84
- "STATE": "importação",
85
- };
86
- if (retrievable_Metadata.process_mean.includes("Sumário")) {
87
- obj.Sumário = content.map(line => `<p><font>${line}</font><br>`).join('');
88
- obj.Texto = "";
89
- }
90
- else {
91
- obj.Sumário = "";
92
- obj.Texto = content.map(line => `<p><font>${line}</font><br>`).join('');
93
- }
94
- if (retrievable_Metadata.descriptors && retrievable_Metadata.descriptors.length > 0) {
95
- obj.Descritores = {
96
- Index: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc]),
97
- Original: retrievable_Metadata.descriptors,
98
- Show: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc])
99
- };
100
- }
101
- if (date_area_section.area && date_area_section.area.length > 0) {
102
- obj.Área = { Index: [date_area_section.area], Original: [date_area_section.area], Show: [date_area_section.area] };
103
- }
104
- if (date_area_section.section && date_area_section.section.length > 0) {
105
- obj.Secção = { Index: [date_area_section.section], Original: [date_area_section.section], Show: [date_area_section.section] };
106
- }
107
- if (retrievable_Metadata.judge && retrievable_Metadata.judge.length > 0) {
108
- obj["Relator Nome Profissional"] = { Index: [retrievable_Metadata.judge], Original: [retrievable_Metadata.judge], Show: [retrievable_Metadata.judge] };
109
- }
110
- if (retrievable_Metadata.process_mean && retrievable_Metadata.process_mean.length > 0) {
111
- obj["Meio Processual"] = { Index: retrievable_Metadata.process_mean, Original: retrievable_Metadata.process_mean, Show: retrievable_Metadata.process_mean };
112
- }
113
- if (retrievable_Metadata.decision && retrievable_Metadata.decision.length > 0) {
114
- obj["Decisão"] = { Index: [retrievable_Metadata.decision], Original: [retrievable_Metadata.decision], Show: [retrievable_Metadata.decision] };
115
- }
116
- obj["HASH"] = calculateHASH({
117
- ...obj,
118
- Original: obj.Original,
119
- "Número de Processo": obj["Número de Processo"],
120
- Data: obj.Data,
121
- "Meio Processual": obj["Meio Processual"],
122
- "Texto": obj.Texto,
123
- "Texto Não Anonimizado": obj.Texto,
124
- "Sumário": obj.Sumário,
125
- "Sumário Não Anonimizado": obj.Sumário,
126
- });
127
- obj["UUID"] = calculateUUID(obj["HASH"]);
128
- return obj;
129
- }
130
57
  export async function hasSelectableText(buffer) {
131
58
  try {
132
59
  const uint8Array = new Uint8Array(buffer);
@@ -157,14 +84,7 @@ export function generateFilePath(jurisprudencia_document) {
157
84
  const data = parseDateFromString(jurisprudencia_document.Data);
158
85
  return `/${jurisprudencia_document.Área.Show}/${data.getFullYear()}/${data.getMonth() + 1}/${data.getDate()}/${jurisprudencia_document["Número de Processo"].replace("/", "-")}/${jurisprudencia_document.UUID}`;
159
86
  }
160
- function parseDateFromString(dateStr) {
161
- const [dayStr, monthStr, yearStr] = dateStr.split("/");
162
- const day = parseInt(dayStr, 10);
163
- const month = parseInt(monthStr, 10) - 1;
164
- const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
165
- return new Date(year, month, day);
166
- }
167
- async function extractContent(contents) {
87
+ export async function extractContent(contents) {
168
88
  for (const content of contents) {
169
89
  if (content.extension === "txt") {
170
90
  return content.data.toString('utf-8').split(/\r?\n/).filter(line => line.trim().length > 0);
@@ -178,6 +98,13 @@ async function extractContent(contents) {
178
98
  }
179
99
  throw new Error("Contents are not a supported format.");
180
100
  }
101
+ function parseDateFromString(dateStr) {
102
+ const [dayStr, monthStr, yearStr] = dateStr.split("/");
103
+ const day = parseInt(dayStr, 10);
104
+ const month = parseInt(monthStr, 10) - 1;
105
+ const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
106
+ return new Date(year, month, day);
107
+ }
181
108
  async function pdfToLines(buffer) {
182
109
  const uint8Array = new Uint8Array(buffer);
183
110
  const loadingTask = getDocument({ data: uint8Array, verbosity: 0 });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mdgf11/filesystem-lib",
3
- "version": "2.2.11",
3
+ "version": "2.2.12",
4
4
  "description": "Library to extend usage of jurisprudencia-document",
5
5
  "license": "ISC",
6
6
  "author": "Miguel Fonseca",
@@ -63,82 +63,6 @@ export function loadFilesystemDocument(jsonPath: string): FilesystemDocument {
63
63
  };
64
64
  }
65
65
 
66
- export async function createJurisprudenciaDocument(retrievable_Metadata: Retrievable_Metadata, contents: ContentType[], date_area_section: Date_Area_Section, sharepoint_metadata?: Sharepoint_Metadata): Promise<PartialJurisprudenciaDocument> {
67
- if (!retrievable_Metadata) {
68
- throw new Error("Missing metadata.");
69
- }
70
- const content = await extractContent(contents);
71
- const url = sharepoint_metadata ? sharepoint_metadata.sharepoint_url : "";
72
-
73
- let Original: JurisprudenciaDocument["Original"] = {};
74
- let CONTENT: JurisprudenciaDocument["CONTENT"] = content;
75
- let numProc: JurisprudenciaDocument["Número de Processo"] = retrievable_Metadata.process_number;
76
- let Data: JurisprudenciaDocument["Data"] = Intl.DateTimeFormat("pt-PT").format(date_area_section.file_date);
77
- let origin: SupportedUpdateSources = "STJ (Sharepoint)";
78
-
79
- Original["Decisão Texto Integral"] = content.map(line => `<p><font>${line}</font><br>`).join('');
80
- Original["Data"] = Data;
81
- Original["Número de Processo"] = numProc;
82
- Original["Fonte"] = origin;
83
- Original["URL"] = url;
84
- Original["Jurisprudência"] = "Simples";
85
-
86
- let obj: PartialJurisprudenciaDocument = {
87
- "Original": Original,
88
- "CONTENT": CONTENT,
89
- "Data": Data,
90
- "Número de Processo": numProc,
91
- "Fonte": origin,
92
- "URL": url,
93
- "Jurisprudência": { Index: ["Simples"], Original: ["Simples"], Show: ["Simples"] },
94
- "STATE": "importação",
95
- }
96
- if (retrievable_Metadata.process_mean.includes("Sumário")) {
97
- obj.Sumário = content.map(line => `<p><font>${line}</font><br>`).join('');
98
- obj.Texto = "";
99
- } else {
100
- obj.Sumário = "";
101
- obj.Texto = content.map(line => `<p><font>${line}</font><br>`).join('');
102
- }
103
- if (retrievable_Metadata.descriptors && retrievable_Metadata.descriptors.length > 0) {
104
- obj.Descritores = {
105
- Index: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc]),
106
- Original: retrievable_Metadata.descriptors,
107
- Show: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc])
108
- }
109
- }
110
- if (date_area_section.area && date_area_section.area.length > 0) {
111
- obj.Área = { Index: [date_area_section.area], Original: [date_area_section.area], Show: [date_area_section.area] };
112
- }
113
- if (date_area_section.section && date_area_section.section.length > 0) {
114
- obj.Secção = { Index: [date_area_section.section], Original: [date_area_section.section], Show: [date_area_section.section] };
115
- }
116
- if (retrievable_Metadata.judge && retrievable_Metadata.judge.length > 0) {
117
- obj["Relator Nome Profissional"] = { Index: [retrievable_Metadata.judge], Original: [retrievable_Metadata.judge], Show: [retrievable_Metadata.judge] };
118
- }
119
- if (retrievable_Metadata.process_mean && retrievable_Metadata.process_mean.length > 0) {
120
- obj["Meio Processual"] = { Index: retrievable_Metadata.process_mean, Original: retrievable_Metadata.process_mean, Show: retrievable_Metadata.process_mean };
121
- }
122
- if (retrievable_Metadata.decision && retrievable_Metadata.decision.length > 0) {
123
- obj["Decisão"] = { Index: [retrievable_Metadata.decision], Original: [retrievable_Metadata.decision], Show: [retrievable_Metadata.decision] };
124
- }
125
-
126
- obj["HASH"] = calculateHASH({
127
- ...obj,
128
- Original: obj.Original,
129
- "Número de Processo": obj["Número de Processo"],
130
- Data: obj.Data,
131
- "Meio Processual": obj["Meio Processual"],
132
- "Texto": obj.Texto,
133
- "Texto Não Anonimizado": obj.Texto,
134
- "Sumário": obj.Sumário,
135
- "Sumário Não Anonimizado": obj.Sumário,
136
- })
137
-
138
- obj["UUID"] = calculateUUID(obj["HASH"]);
139
- return obj;
140
- }
141
-
142
66
  export async function hasSelectableText(buffer: Buffer): Promise<boolean> {
143
67
  try {
144
68
  const uint8Array = new Uint8Array(buffer);
@@ -175,17 +99,7 @@ export function generateFilePath(jurisprudencia_document: PartialJurisprudenciaD
175
99
  return `/${jurisprudencia_document.Área.Show}/${data.getFullYear()}/${data.getMonth() + 1}/${data.getDate()}/${jurisprudencia_document["Número de Processo"].replace("/", "-")}/${jurisprudencia_document.UUID}`
176
100
  }
177
101
 
178
- function parseDateFromString(dateStr: string): Date {
179
- const [dayStr, monthStr, yearStr] = dateStr.split("/");
180
-
181
- const day = parseInt(dayStr, 10);
182
- const month = parseInt(monthStr, 10) - 1;
183
- const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
184
-
185
- return new Date(year, month, day);
186
- }
187
-
188
- async function extractContent(contents: ContentType[]): Promise<string[]> {
102
+ export async function extractContent(contents: ContentType[]): Promise<string[]> {
189
103
  for (const content of contents) {
190
104
  if (content.extension === "txt") {
191
105
  return content.data.toString('utf-8').split(/\r?\n/).filter(line => line.trim().length > 0);
@@ -200,6 +114,16 @@ async function extractContent(contents: ContentType[]): Promise<string[]> {
200
114
  throw new Error("Contents are not a supported format.");
201
115
  }
202
116
 
117
+ function parseDateFromString(dateStr: string): Date {
118
+ const [dayStr, monthStr, yearStr] = dateStr.split("/");
119
+
120
+ const day = parseInt(dayStr, 10);
121
+ const month = parseInt(monthStr, 10) - 1;
122
+ const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
123
+
124
+ return new Date(year, month, day);
125
+ }
126
+
203
127
  async function pdfToLines(buffer: Buffer): Promise<string[]> {
204
128
  const uint8Array = new Uint8Array(buffer);
205
129
  const loadingTask = getDocument({ data: uint8Array, verbosity: 0 });