@mdgf11/filesystem-lib 2.2.10 → 2.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { PartialJurisprudenciaDocument } from "@stjiris/jurisprudencia-document";
2
- import { ContentType, Date_Area_Section, FilesystemDocument, Retrievable_Metadata, Sharepoint_Metadata } from "./types.js";
2
+ import { ContentType, FilesystemDocument } from "./types.js";
3
3
  export declare function writeFilesystemDocument(filesystem_document: FilesystemDocument): void;
4
4
  export declare function loadFilesystemDocument(jsonPath: string): FilesystemDocument;
5
- export declare function createJurisprudenciaDocument(retrievable_Metadata: Retrievable_Metadata, contents: ContentType[], date_area_section: Date_Area_Section, sharepoint_metadata?: Sharepoint_Metadata): Promise<PartialJurisprudenciaDocument>;
6
5
  export declare function hasSelectableText(buffer: Buffer): Promise<boolean>;
7
6
  export declare function generateFilePath(jurisprudencia_document: PartialJurisprudenciaDocument): string;
7
+ export declare function extractContent(contents: ContentType[]): Promise<string[]>;
@@ -1,8 +1,6 @@
1
- import { calculateHASH, calculateUUID } from "@stjiris/jurisprudencia-document";
2
1
  import fs from "fs";
3
2
  import mammoth from "mammoth";
4
3
  import { DETAILS_NAME, FILESYSTEM_PATH, ORIGINAL_NAME, ROOT_PATH, SHAREPOINT_COPY_PATH } from "./types.js";
5
- import { DescritorOficial } from "./descritores.js";
6
4
  import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
7
5
  export function writeFilesystemDocument(filesystem_document) {
8
6
  if (!filesystem_document.content)
@@ -10,9 +8,6 @@ export function writeFilesystemDocument(filesystem_document) {
10
8
  const safe = {
11
9
  ...filesystem_document,
12
10
  content: filesystem_document.content?.map(({ extension }) => ({ extension })),
13
- jurisprudencia_document: {
14
- uuid: filesystem_document.jurisprudencia_document.UUID
15
- }
16
11
  };
17
12
  const content = filesystem_document.content;
18
13
  if (filesystem_document.file_path) {
@@ -59,76 +54,6 @@ export function loadFilesystemDocument(jsonPath) {
59
54
  }))
60
55
  };
61
56
  }
62
- export async function createJurisprudenciaDocument(retrievable_Metadata, contents, date_area_section, sharepoint_metadata) {
63
- if (!retrievable_Metadata) {
64
- throw new Error("Missing metadata.");
65
- }
66
- const content = await extractContent(contents);
67
- const url = sharepoint_metadata ? sharepoint_metadata.sharepoint_url : "";
68
- let Original = {};
69
- let CONTENT = content;
70
- let numProc = retrievable_Metadata.process_number;
71
- let Data = Intl.DateTimeFormat("pt-PT").format(date_area_section.file_date);
72
- let origin = "STJ (Sharepoint)";
73
- Original["Decisão Texto Integral"] = content.map(line => `<p><font>${line}</font><br>`).join('');
74
- Original["Data"] = Data;
75
- Original["Número de Processo"] = numProc;
76
- Original["Fonte"] = origin;
77
- Original["URL"] = url;
78
- Original["Jurisprudência"] = "Simples";
79
- let obj = {
80
- "Original": Original,
81
- "CONTENT": CONTENT,
82
- "Data": Data,
83
- "Número de Processo": numProc,
84
- "Fonte": origin,
85
- "URL": url,
86
- "Jurisprudência": { Index: ["Simples"], Original: ["Simples"], Show: ["Simples"] },
87
- "STATE": "importação",
88
- };
89
- if (retrievable_Metadata.process_mean.includes("Sumário")) {
90
- obj.Sumário = content.map(line => `<p><font>${line}</font><br>`).join('');
91
- obj.Texto = "";
92
- }
93
- else {
94
- obj.Sumário = "";
95
- obj.Texto = content.map(line => `<p><font>${line}</font><br>`).join('');
96
- }
97
- if (retrievable_Metadata.descriptors && retrievable_Metadata.descriptors.length > 0) {
98
- obj.Descritores = {
99
- Index: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc]),
100
- Original: retrievable_Metadata.descriptors,
101
- Show: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc])
102
- };
103
- }
104
- if (date_area_section.area && date_area_section.area.length > 0) {
105
- obj.Área = { Index: [date_area_section.area], Original: [date_area_section.area], Show: [date_area_section.area] };
106
- }
107
- if (date_area_section.section && date_area_section.section.length > 0) {
108
- obj.Secção = { Index: [date_area_section.section], Original: [date_area_section.section], Show: [date_area_section.section] };
109
- }
110
- if (retrievable_Metadata.judge && retrievable_Metadata.judge.length > 0) {
111
- obj["Relator Nome Profissional"] = { Index: [retrievable_Metadata.judge], Original: [retrievable_Metadata.judge], Show: [retrievable_Metadata.judge] };
112
- }
113
- if (retrievable_Metadata.process_mean && retrievable_Metadata.process_mean.length > 0) {
114
- obj["Meio Processual"] = { Index: retrievable_Metadata.process_mean, Original: retrievable_Metadata.process_mean, Show: retrievable_Metadata.process_mean };
115
- }
116
- if (retrievable_Metadata.decision && retrievable_Metadata.decision.length > 0) {
117
- obj["Decisão"] = { Index: [retrievable_Metadata.decision], Original: [retrievable_Metadata.decision], Show: [retrievable_Metadata.decision] };
118
- }
119
- obj["HASH"] = calculateHASH({
120
- ...obj,
121
- Original: obj.Original,
122
- "Número de Processo": obj["Número de Processo"],
123
- Data: obj.Data,
124
- "Meio Processual": obj["Meio Processual"],
125
- "Texto": obj.Texto,
126
- "Sumário": obj.Sumário,
127
- STATE: obj.STATE
128
- });
129
- obj["UUID"] = calculateUUID(obj["HASH"]);
130
- return obj;
131
- }
132
57
  export async function hasSelectableText(buffer) {
133
58
  try {
134
59
  const uint8Array = new Uint8Array(buffer);
@@ -159,14 +84,7 @@ export function generateFilePath(jurisprudencia_document) {
159
84
  const data = parseDateFromString(jurisprudencia_document.Data);
160
85
  return `/${jurisprudencia_document.Área.Show}/${data.getFullYear()}/${data.getMonth() + 1}/${data.getDate()}/${jurisprudencia_document["Número de Processo"].replace("/", "-")}/${jurisprudencia_document.UUID}`;
161
86
  }
162
- function parseDateFromString(dateStr) {
163
- const [dayStr, monthStr, yearStr] = dateStr.split("/");
164
- const day = parseInt(dayStr, 10);
165
- const month = parseInt(monthStr, 10) - 1;
166
- const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
167
- return new Date(year, month, day);
168
- }
169
- async function extractContent(contents) {
87
+ export async function extractContent(contents) {
170
88
  for (const content of contents) {
171
89
  if (content.extension === "txt") {
172
90
  return content.data.toString('utf-8').split(/\r?\n/).filter(line => line.trim().length > 0);
@@ -180,6 +98,13 @@ async function extractContent(contents) {
180
98
  }
181
99
  throw new Error("Contents are not a supported format.");
182
100
  }
101
+ function parseDateFromString(dateStr) {
102
+ const [dayStr, monthStr, yearStr] = dateStr.split("/");
103
+ const day = parseInt(dayStr, 10);
104
+ const month = parseInt(monthStr, 10) - 1;
105
+ const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
106
+ return new Date(year, month, day);
107
+ }
183
108
  async function pdfToLines(buffer) {
184
109
  const uint8Array = new Uint8Array(buffer);
185
110
  const loadingTask = getDocument({ data: uint8Array, verbosity: 0 });
package/dist/types.d.ts CHANGED
@@ -1,6 +1,5 @@
1
1
  export declare const UpdateSources: readonly ["STJ (Sharepoint)", "DGSI"];
2
2
  export type SupportedUpdateSources = typeof UpdateSources[number];
3
- import { PartialJurisprudenciaDocument } from '@stjiris/jurisprudencia-document';
4
3
  export declare const ROOT_PATH: string;
5
4
  export declare const FILESYSTEM_PATH = "/FileSystem";
6
5
  export declare const SHAREPOINT_COPY_PATH = "/Sharepoint";
@@ -58,7 +57,7 @@ export type ContentType = {
58
57
  export type FilesystemDocument = {
59
58
  creation_date: Date;
60
59
  last_update_date: Date;
61
- jurisprudencia_document: PartialJurisprudenciaDocument;
60
+ jurisprudencia_document: string;
62
61
  file_path: string;
63
62
  sharepoint_metadata?: Sharepoint_Metadata;
64
63
  content?: ContentType[];
package/dist/types.js CHANGED
@@ -16,3 +16,4 @@ export const SUPPORTED_EXTENSIONS = ["txt", "pdf", "docx", "html"];
16
16
  export function isSupportedExtension(ext) {
17
17
  return SUPPORTED_EXTENSIONS.includes(ext);
18
18
  }
19
+ ;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mdgf11/filesystem-lib",
3
- "version": "2.2.10",
3
+ "version": "2.2.12",
4
4
  "description": "Library to extend usage of jurisprudencia-document",
5
5
  "license": "ISC",
6
6
  "author": "Miguel Fonseca",
@@ -13,7 +13,7 @@
13
13
  "start": "npm run build && npm run run"
14
14
  },
15
15
  "dependencies": {
16
- "@stjiris/jurisprudencia-document": "npm:@mdgf11/jurisprudencia-document@^13.2.5",
16
+ "@stjiris/jurisprudencia-document": "npm:@mdgf11/jurisprudencia-document@^13.2.6",
17
17
  "axios": "^1.13.1",
18
18
  "body-parser": "^2.2.0",
19
19
  "canvas": "^2.11.2",
@@ -12,9 +12,6 @@ export function writeFilesystemDocument(filesystem_document: FilesystemDocument)
12
12
  const safe = {
13
13
  ...filesystem_document,
14
14
  content: filesystem_document.content?.map(({ extension }) => ({ extension })),
15
- jurisprudencia_document: {
16
- uuid: filesystem_document.jurisprudencia_document.UUID
17
- }
18
15
  };
19
16
 
20
17
  const content: ContentType[] = filesystem_document.content
@@ -66,81 +63,6 @@ export function loadFilesystemDocument(jsonPath: string): FilesystemDocument {
66
63
  };
67
64
  }
68
65
 
69
- export async function createJurisprudenciaDocument(retrievable_Metadata: Retrievable_Metadata, contents: ContentType[], date_area_section: Date_Area_Section, sharepoint_metadata?: Sharepoint_Metadata): Promise<PartialJurisprudenciaDocument> {
70
- if (!retrievable_Metadata) {
71
- throw new Error("Missing metadata.");
72
- }
73
- const content = await extractContent(contents);
74
- const url = sharepoint_metadata ? sharepoint_metadata.sharepoint_url : "";
75
-
76
- let Original: JurisprudenciaDocument["Original"] = {};
77
- let CONTENT: JurisprudenciaDocument["CONTENT"] = content;
78
- let numProc: JurisprudenciaDocument["Número de Processo"] = retrievable_Metadata.process_number;
79
- let Data: JurisprudenciaDocument["Data"] = Intl.DateTimeFormat("pt-PT").format(date_area_section.file_date);
80
- let origin: SupportedUpdateSources = "STJ (Sharepoint)";
81
-
82
- Original["Decisão Texto Integral"] = content.map(line => `<p><font>${line}</font><br>`).join('');
83
- Original["Data"] = Data;
84
- Original["Número de Processo"] = numProc;
85
- Original["Fonte"] = origin;
86
- Original["URL"] = url;
87
- Original["Jurisprudência"] = "Simples";
88
-
89
- let obj: PartialJurisprudenciaDocument = {
90
- "Original": Original,
91
- "CONTENT": CONTENT,
92
- "Data": Data,
93
- "Número de Processo": numProc,
94
- "Fonte": origin,
95
- "URL": url,
96
- "Jurisprudência": { Index: ["Simples"], Original: ["Simples"], Show: ["Simples"] },
97
- "STATE": "importação",
98
- }
99
- if (retrievable_Metadata.process_mean.includes("Sumário")) {
100
- obj.Sumário = content.map(line => `<p><font>${line}</font><br>`).join('');
101
- obj.Texto = "";
102
- } else {
103
- obj.Sumário = "";
104
- obj.Texto = content.map(line => `<p><font>${line}</font><br>`).join('');
105
- }
106
- if (retrievable_Metadata.descriptors && retrievable_Metadata.descriptors.length > 0) {
107
- obj.Descritores = {
108
- Index: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc]),
109
- Original: retrievable_Metadata.descriptors,
110
- Show: retrievable_Metadata.descriptors.map(desc => DescritorOficial[desc])
111
- }
112
- }
113
- if (date_area_section.area && date_area_section.area.length > 0) {
114
- obj.Área = { Index: [date_area_section.area], Original: [date_area_section.area], Show: [date_area_section.area] };
115
- }
116
- if (date_area_section.section && date_area_section.section.length > 0) {
117
- obj.Secção = { Index: [date_area_section.section], Original: [date_area_section.section], Show: [date_area_section.section] };
118
- }
119
- if (retrievable_Metadata.judge && retrievable_Metadata.judge.length > 0) {
120
- obj["Relator Nome Profissional"] = { Index: [retrievable_Metadata.judge], Original: [retrievable_Metadata.judge], Show: [retrievable_Metadata.judge] };
121
- }
122
- if (retrievable_Metadata.process_mean && retrievable_Metadata.process_mean.length > 0) {
123
- obj["Meio Processual"] = { Index: retrievable_Metadata.process_mean, Original: retrievable_Metadata.process_mean, Show: retrievable_Metadata.process_mean };
124
- }
125
- if (retrievable_Metadata.decision && retrievable_Metadata.decision.length > 0) {
126
- obj["Decisão"] = { Index: [retrievable_Metadata.decision], Original: [retrievable_Metadata.decision], Show: [retrievable_Metadata.decision] };
127
- }
128
-
129
- obj["HASH"] = calculateHASH({
130
- ...obj,
131
- Original: obj.Original,
132
- "Número de Processo": obj["Número de Processo"],
133
- Data: obj.Data,
134
- "Meio Processual": obj["Meio Processual"],
135
- "Texto": obj.Texto,
136
- "Sumário": obj.Sumário,
137
- STATE: obj.STATE
138
- })
139
-
140
- obj["UUID"] = calculateUUID(obj["HASH"]);
141
- return obj;
142
- }
143
-
144
66
  export async function hasSelectableText(buffer: Buffer): Promise<boolean> {
145
67
  try {
146
68
  const uint8Array = new Uint8Array(buffer);
@@ -177,17 +99,7 @@ export function generateFilePath(jurisprudencia_document: PartialJurisprudenciaD
177
99
  return `/${jurisprudencia_document.Área.Show}/${data.getFullYear()}/${data.getMonth() + 1}/${data.getDate()}/${jurisprudencia_document["Número de Processo"].replace("/", "-")}/${jurisprudencia_document.UUID}`
178
100
  }
179
101
 
180
- function parseDateFromString(dateStr: string): Date {
181
- const [dayStr, monthStr, yearStr] = dateStr.split("/");
182
-
183
- const day = parseInt(dayStr, 10);
184
- const month = parseInt(monthStr, 10) - 1;
185
- const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
186
-
187
- return new Date(year, month, day);
188
- }
189
-
190
- async function extractContent(contents: ContentType[]): Promise<string[]> {
102
+ export async function extractContent(contents: ContentType[]): Promise<string[]> {
191
103
  for (const content of contents) {
192
104
  if (content.extension === "txt") {
193
105
  return content.data.toString('utf-8').split(/\r?\n/).filter(line => line.trim().length > 0);
@@ -202,6 +114,16 @@ async function extractContent(contents: ContentType[]): Promise<string[]> {
202
114
  throw new Error("Contents are not a supported format.");
203
115
  }
204
116
 
117
+ function parseDateFromString(dateStr: string): Date {
118
+ const [dayStr, monthStr, yearStr] = dateStr.split("/");
119
+
120
+ const day = parseInt(dayStr, 10);
121
+ const month = parseInt(monthStr, 10) - 1;
122
+ const year = yearStr ? parseInt(yearStr, 10) : new Date().getFullYear();
123
+
124
+ return new Date(year, month, day);
125
+ }
126
+
205
127
  async function pdfToLines(buffer: Buffer): Promise<string[]> {
206
128
  const uint8Array = new Uint8Array(buffer);
207
129
  const loadingTask = getDocument({ data: uint8Array, verbosity: 0 });
@@ -1,6 +1,6 @@
1
1
  import path from "path";
2
2
  import fs from "fs";
3
- import { DGSI_LOGS_PATH, DGSI_UPDATE_DIR, FilesystemDocument, FilesystemUpdate, SHAREPOINT_UPDATE_DIR, SOURCE_TO_PATH, SupportedUpdateSources } from "./types.js";
3
+ import { FilesystemDocument, FilesystemUpdate, SHAREPOINT_UPDATE_DIR, SOURCE_TO_PATH, SupportedUpdateSources } from "./types.js";
4
4
 
5
5
  export function logDocumentProcessingError(update: FilesystemUpdate, err: string) {
6
6
  update.file_errors.push(err);
package/src/types.ts CHANGED
@@ -6,14 +6,14 @@ import dotenv from 'dotenv';
6
6
 
7
7
  dotenv.config();
8
8
  export const ROOT_PATH = process.env['LOCAL_ROOT'] || 'results';
9
- export const FILESYSTEM_PATH = `/FileSystem`
10
- export const SHAREPOINT_COPY_PATH = `/Sharepoint`
11
- export const DETAILS_NAME = "Detalhes"
12
- export const ORIGINAL_NAME = "Original"
13
- export const LOGS_PATH = "/Updates"
9
+ export const FILESYSTEM_PATH = `/FileSystem`;
10
+ export const SHAREPOINT_COPY_PATH = `/Sharepoint`;
11
+ export const DETAILS_NAME = "Detalhes";
12
+ export const ORIGINAL_NAME = "Original";
13
+ export const LOGS_PATH = "/Updates";
14
14
 
15
- export const SHAREPOINT_LOGS_PATH = `${LOGS_PATH}/Sharepoint`
16
- export const DGSI_LOGS_PATH = `${LOGS_PATH}/DGSI`
15
+ export const SHAREPOINT_LOGS_PATH = `${LOGS_PATH}/Sharepoint`;
16
+ export const DGSI_LOGS_PATH = `${LOGS_PATH}/DGSI`;
17
17
 
18
18
  export const SHAREPOINT_UPDATE_DIR = `${ROOT_PATH}${SHAREPOINT_LOGS_PATH}`;
19
19
  export const DGSI_UPDATE_DIR = `${ROOT_PATH}${DGSI_LOGS_PATH}`;
@@ -41,7 +41,7 @@ export type Sharepoint_Metadata = {
41
41
  sharepoint_url: string,
42
42
  extensions: Supported_Content_Extensions[],
43
43
  xor_hash?: string,
44
- }
44
+ };
45
45
 
46
46
  export type Retrievable_Metadata = { process_number: string, judge: string, process_mean: string[], decision: string, descriptors?: string[] };
47
47
  export type Date_Area_Section = { file_date: Date, area: string, section: string };
@@ -53,15 +53,16 @@ export type ContentType = {
53
53
  extension: Supported_Content_Extensions;
54
54
  data: Buffer;
55
55
  };
56
+
56
57
  export type FilesystemDocument = {
57
58
  creation_date: Date,
58
59
  last_update_date: Date,
59
- jurisprudencia_document: PartialJurisprudenciaDocument,
60
+ jurisprudencia_document: string,
60
61
  file_path: string,
61
62
  sharepoint_metadata?: Sharepoint_Metadata
62
63
  content?: ContentType[],
63
- }
64
+ };
64
65
 
65
66
  export function isSupportedExtension(ext: string): ext is Supported_Content_Extensions {
66
67
  return (SUPPORTED_EXTENSIONS as readonly string[]).includes(ext);
67
- }
68
+ };