@igea/oac_backend 1.0.46 → 1.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/controllers/fuseki.js +52 -24
- package/src/models/vocabolaries/parser +61 -34
- package/src/models/vocabolaries/splitter.js +69 -0
- package/test/models/vocabolaries/parser.test.js +4 -1
- package/test/models/vocabolaries/splitter.test.js +36 -0
- package/test/models/vocabolaries/vocabolaries_2.xml +4508 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@igea/oac_backend",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.47",
|
|
4
4
|
"description": "Backend service for the OAC project",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"crypto": "1.0.1",
|
|
30
30
|
"express": "5.1.0",
|
|
31
31
|
"express-rate-limit": "8.1.0",
|
|
32
|
+
"fast-xml-parser": "^5.3.3",
|
|
32
33
|
"get-port": "7.1.0",
|
|
33
34
|
"knex": "3.1.0",
|
|
34
35
|
"libxmljs2": "0.37.0",
|
|
@@ -75,33 +75,61 @@ router.post('/upload/vocabularies', upload.array('files'), (req, res) => {
|
|
|
75
75
|
deleteFiles(uploadedFiles)
|
|
76
76
|
return res.status(400).json({ message: 'Uploaded XML file is not valid' });
|
|
77
77
|
}
|
|
78
|
-
VocabParser.insertQuery(xmlFile.path).then(
|
|
78
|
+
VocabParser.insertQuery(xmlFile.path).then(queries => {
|
|
79
79
|
//console.log("Query to insert vocabularies: ", query);
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
80
|
+
let results = Array(queries.length, null);
|
|
81
|
+
let checkCompleted = function(){
|
|
82
|
+
console.log(results)
|
|
83
|
+
deleteFiles(uploadedFiles)
|
|
84
|
+
let failed = results.filter(r => r.status === false);
|
|
85
|
+
console.log(failed);
|
|
86
|
+
if(failed.length > 0){
|
|
87
|
+
let message = `Error inserting vocabularies in ${failed.length} files.`;
|
|
88
|
+
return res.status(500).json({
|
|
89
|
+
message,
|
|
90
|
+
files: uploadedFiles,
|
|
91
|
+
results
|
|
92
|
+
});
|
|
93
|
+
}else{
|
|
94
|
+
res.json({
|
|
95
|
+
message: 'File correctly uploaded and vocabularies updated in the triple store',
|
|
96
|
+
files: uploadedFiles
|
|
97
|
+
});
|
|
87
98
|
}
|
|
99
|
+
}
|
|
100
|
+
let fusekiCall = function(index){
|
|
101
|
+
return new Promise((resolve, reject) => {
|
|
102
|
+
let query = queries[index];
|
|
103
|
+
try{
|
|
104
|
+
axios.post(fusekiUrlUpdate, query, {
|
|
105
|
+
headers: {
|
|
106
|
+
'Content-Type': 'application/sparql-update',
|
|
107
|
+
'Accept': 'application/sparql-results+json'
|
|
108
|
+
}
|
|
109
|
+
})
|
|
110
|
+
.then(() => {
|
|
111
|
+
resolve({
|
|
112
|
+
index, success: true, message: 'Vocabulary inserted correctly'
|
|
113
|
+
});
|
|
114
|
+
}).catch(error => {
|
|
115
|
+
let msg = (error.response?.status + error.response?.data) || error.message
|
|
116
|
+
resolve({
|
|
117
|
+
index, success: false, message: `Error from SPARQL end-point: ${msg}`
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
}catch(e){
|
|
121
|
+
resolve({
|
|
122
|
+
index, success: false, message: `Error: ${e}`
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
})
|
|
126
|
+
}
|
|
127
|
+
setTimeout(async ()=>{
|
|
128
|
+
for(let index=0; index<queries.length; index++){
|
|
129
|
+
results[index] = await fusekiCall(index);
|
|
130
|
+
}
|
|
131
|
+
checkCompleted();
|
|
88
132
|
})
|
|
89
|
-
.then(response => {
|
|
90
|
-
res.json({
|
|
91
|
-
message: 'File correctly uploaded and vocabularies updated: ' + response.data,
|
|
92
|
-
files: uploadedFiles
|
|
93
|
-
});
|
|
94
|
-
}).catch(error => {
|
|
95
|
-
let message = (error.response?.status + error.response?.data) || error.message
|
|
96
|
-
console.log(message);
|
|
97
|
-
//fs.writeFileSync('/home/nicole/Scaricati/spqr_error.txt', query);
|
|
98
|
-
res.status(500).json({
|
|
99
|
-
message: 'Error from SPARQL end-point: ' + message,
|
|
100
|
-
files: uploadedFiles,
|
|
101
|
-
query
|
|
102
|
-
});
|
|
103
|
-
});
|
|
104
|
-
|
|
105
133
|
}).catch(err => {
|
|
106
134
|
deleteFiles(uploadedFiles)
|
|
107
135
|
console.error('Error transforming XML:', err);
|
|
@@ -5,7 +5,7 @@ const { exec } = require('child_process');
|
|
|
5
5
|
const stripBom = require('strip-bom').default;
|
|
6
6
|
const config = require('../../config');
|
|
7
7
|
const VocabPrefix = config.fuseki.vocabularies.prefix || 'diagnostica';
|
|
8
|
-
|
|
8
|
+
const Splitter = require('./splitter');
|
|
9
9
|
|
|
10
10
|
const transformMode = {
|
|
11
11
|
default: 'default',
|
|
@@ -75,46 +75,73 @@ class Parser{
|
|
|
75
75
|
}catch(e){
|
|
76
76
|
reject(e)
|
|
77
77
|
}
|
|
78
|
-
|
|
79
|
-
/*
|
|
80
|
-
let xmlData = this._getXmlData(xmlPath);
|
|
81
|
-
const xml = xmlParser.xmlParse(xmlData);
|
|
82
|
-
xslt.xsltProcess(xml, this.xslt)
|
|
83
|
-
.then(result => {
|
|
84
|
-
var terms = result.split(' ')
|
|
85
|
-
resolve(terms.map(line => line.trim()).filter(line => line.length > 0));
|
|
86
|
-
}).catch(err => {
|
|
87
|
-
reject(err)
|
|
88
|
-
});
|
|
89
|
-
*/
|
|
90
78
|
});
|
|
91
79
|
}
|
|
92
80
|
|
|
93
81
|
insertQuery(xmlPath, className='crm:E55_Type'){
|
|
94
82
|
return new Promise((resolve, reject) => {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
83
|
+
const xmlFolder = path.dirname(xmlPath);
|
|
84
|
+
const xmlFile = path.basename(xmlPath);
|
|
85
|
+
const splitter = new Splitter(xmlFolder, xmlFile);
|
|
86
|
+
const files = splitter.splitFiles();
|
|
87
|
+
let queries = new Array(files.length);
|
|
88
|
+
let cleanTempFiles = function(queries){
|
|
89
|
+
for(let file of files){
|
|
90
|
+
try{
|
|
91
|
+
fs.unlinkSync(file.path);
|
|
92
|
+
}catch(e){
|
|
93
|
+
//console.log(`Error deleting temp file ${file.path}: ${e.message}`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
resolve(queries);
|
|
97
|
+
}
|
|
98
|
+
let checkCompleted = function(index, query, status, message){
|
|
99
|
+
console.log(`Completed ${index+1}/${files.length}`);
|
|
100
|
+
files[index].status = status;
|
|
101
|
+
files[index].message = message;
|
|
102
|
+
if(status) queries[index]=query;
|
|
103
|
+
let allDone = files.every(f => f.status !== null);
|
|
104
|
+
if(allDone){
|
|
105
|
+
cleanTempFiles(queries);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
let transformFn = function(instance, index, className, xmlPath){
|
|
109
|
+
return function(){
|
|
110
|
+
instance.transform(xmlPath, transformMode.forInsert).then(terms => {
|
|
111
|
+
let query = `
|
|
112
|
+
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
|
113
|
+
PREFIX basecpm: <http://ontome.net/ns/cpm/>
|
|
114
|
+
PREFIX j.0: <http://www.cidoc-crm.org/extensions/crmsci/>
|
|
115
|
+
PREFIX crmsci: <http://www.ics.forth.gr/isl/CRMsci>
|
|
116
|
+
PREFIX base: <http://www.ics.forth.gr/isl/CRMinf/>
|
|
117
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
99
118
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
119
|
+
INSERT {
|
|
120
|
+
?term a ${className} ;
|
|
121
|
+
rdfs:label ?label ;
|
|
122
|
+
crm:P127_has_broader_term ?broader .
|
|
123
|
+
}
|
|
124
|
+
WHERE {
|
|
125
|
+
VALUES (?term ?label ?broader) {
|
|
126
|
+
${terms.join(' \n')}
|
|
127
|
+
}
|
|
128
|
+
FILTER NOT EXISTS {
|
|
129
|
+
?term rdfs:label ?label .
|
|
130
|
+
}
|
|
131
|
+
}`
|
|
132
|
+
checkCompleted(index, query, true, null);
|
|
133
|
+
}).catch(err => {
|
|
134
|
+
console.log(err);
|
|
135
|
+
checkCompleted(index, null, false, `${err}`);
|
|
136
|
+
});
|
|
104
137
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
}`
|
|
113
|
-
resolve(query);
|
|
114
|
-
}).catch(err => {
|
|
115
|
-
console.log(err);
|
|
116
|
-
reject(err);
|
|
117
|
-
});
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
for(let index=0; index<files.length; index++){
|
|
141
|
+
let fn = transformFn(this, index, files[index].class, files[index].path);
|
|
142
|
+
fn();
|
|
143
|
+
}
|
|
144
|
+
|
|
118
145
|
})
|
|
119
146
|
}
|
|
120
147
|
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const { XMLParser, XMLBuilder } = require('fast-xml-parser');
|
|
4
|
+
|
|
5
|
+
class Splitter {
|
|
6
|
+
|
|
7
|
+
constructor(xmlFolder, xmlName, skipWrite=false) {
|
|
8
|
+
this.xmlFolder = xmlFolder;
|
|
9
|
+
this.xmlName = xmlName;
|
|
10
|
+
this.skipWrite = skipWrite;
|
|
11
|
+
const xmlPath = path.join(this.xmlFolder, this.xmlName);
|
|
12
|
+
this.xml = fs.readFileSync(xmlPath, 'utf8');
|
|
13
|
+
// Parser
|
|
14
|
+
this.parser = new XMLParser({
|
|
15
|
+
ignoreAttributes: false,
|
|
16
|
+
attributeNamePrefix: '@_'
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
splitByClass() {
|
|
21
|
+
const data = this.parser.parse(this.xml);
|
|
22
|
+
const vocabularies = data.vocabularies.vocabulary;
|
|
23
|
+
|
|
24
|
+
// Normalizza a array
|
|
25
|
+
const vocabArray = Array.isArray(vocabularies)
|
|
26
|
+
? vocabularies
|
|
27
|
+
: [vocabularies];
|
|
28
|
+
|
|
29
|
+
// Raggruppa per class
|
|
30
|
+
const grouped = {};
|
|
31
|
+
for (const vocab of vocabArray) {
|
|
32
|
+
const cls = vocab['@_class'] || 'NO_CLASS';
|
|
33
|
+
if (!grouped[cls]) grouped[cls] = [];
|
|
34
|
+
grouped[cls].push(vocab);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return grouped;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
splitFiles() {
|
|
41
|
+
const grouped = this.splitByClass();
|
|
42
|
+
const builder = new XMLBuilder({
|
|
43
|
+
ignoreAttributes: false,
|
|
44
|
+
attributeNamePrefix: '@_',
|
|
45
|
+
format: true,
|
|
46
|
+
indentBy: ' '
|
|
47
|
+
});
|
|
48
|
+
let index = 0
|
|
49
|
+
let results = [];
|
|
50
|
+
for (const cls in grouped) {
|
|
51
|
+
const outObj = {
|
|
52
|
+
vocabularies: {
|
|
53
|
+
vocabulary: grouped[cls]
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
index++;
|
|
57
|
+
const xmlOut = builder.build(outObj);
|
|
58
|
+
const xmlOutPath = path.join(this.xmlFolder, `${this.xmlName}_split_${index}.xml`);
|
|
59
|
+
if(!this.skipWrite)
|
|
60
|
+
fs.writeFileSync(xmlOutPath, xmlOut, 'utf8');
|
|
61
|
+
results.push({ class: cls, path: xmlOutPath, status:null, message:null });
|
|
62
|
+
}
|
|
63
|
+
return results.sort((a, b) => a.class.localeCompare(b.class));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
module.exports = Splitter;
|
|
@@ -44,7 +44,10 @@ describe('Vocabolaries.Parsers', () => {
|
|
|
44
44
|
it('should get the insert query for the vocabolaries.xml file', async () => {
|
|
45
45
|
const parser = Parser.GET_INSTANCE();
|
|
46
46
|
var query = await parser.insertQuery(__dirname + '/vocabolaries.xml');
|
|
47
|
-
expect(query.length).to.be.equal(
|
|
47
|
+
expect(query.length).to.be.equal(3);
|
|
48
|
+
expect(query[0].length).to.be.equal(5279);
|
|
49
|
+
expect(query[1].length).to.be.equal(2665);
|
|
50
|
+
expect(query[2].length).to.be.equal(35150);
|
|
48
51
|
});
|
|
49
52
|
|
|
50
53
|
});
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
const chai = require('chai');
|
|
2
|
+
const expect = chai.expect;
|
|
3
|
+
const request = require('supertest');
|
|
4
|
+
const Splitter = require('../../../src/models/vocabolaries/splitter');
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
describe('Vocabolaries.Splitter', () => {
|
|
8
|
+
|
|
9
|
+
beforeEach(() => {
|
|
10
|
+
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it('should create a parser instance', () => {
|
|
14
|
+
const splitter = new Splitter(__dirname, 'vocabolaries_2.xml');
|
|
15
|
+
expect(splitter).to.be.an.instanceof(Splitter);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('should get group of classes', () => {
|
|
19
|
+
const splitter = new Splitter(__dirname, 'vocabolaries_2.xml');
|
|
20
|
+
const groups = splitter.splitByClass();
|
|
21
|
+
expect(Object.keys(groups).length).to.be.equal(6);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('should get group of classes', () => {
|
|
25
|
+
const splitter = new Splitter(__dirname, 'vocabolaries_2.xml', true);
|
|
26
|
+
const files = splitter.splitFiles();
|
|
27
|
+
expect(files.length).to.be.equal(6);
|
|
28
|
+
expect(files[0].class).to.be.equal("base:I2_Belief");
|
|
29
|
+
expect(files[1].class).to.be.equal("basecpm:CP42_Material_Decay");
|
|
30
|
+
expect(files[2].class).to.be.equal("basecpm:CP43_Structural_Damage");
|
|
31
|
+
expect(files[3].class).to.be.equal("crm:E29_Design_or_Procedure");
|
|
32
|
+
expect(files[4].class).to.be.equal("crm:E55_Type");
|
|
33
|
+
expect(files[5].class).to.be.equal("crm:E58_Measurement_Unit");
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
});
|