@igea/oac_backend 1.0.46 → 1.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@igea/oac_backend",
3
- "version": "1.0.46",
3
+ "version": "1.0.48",
4
4
  "description": "Backend service for the OAC project",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -29,6 +29,7 @@
29
29
  "crypto": "1.0.1",
30
30
  "express": "5.1.0",
31
31
  "express-rate-limit": "8.1.0",
32
+ "fast-xml-parser": "^5.3.3",
32
33
  "get-port": "7.1.0",
33
34
  "knex": "3.1.0",
34
35
  "libxmljs2": "0.37.0",
@@ -3,16 +3,10 @@ const multer = require('multer');
3
3
  const router = express.Router();
4
4
  const path = require('path');
5
5
  const fs = require('fs');
6
- const config = require('../config')
7
- const configFuseki = config.fuseki || {
8
- "protocol": "http",
9
- "host": "127.0.0.1",
10
- "port": "3030",
11
- "dataset": "oac"
12
- }
13
- const fusekiUrlDataset = `${configFuseki.protocol}://${configFuseki.host}:${configFuseki.port}/${configFuseki.dataset}`;
14
- const fusekiUrl = `${fusekiUrlDataset}/sparql`;
15
- const fusekiUrlUpdate = `${fusekiUrlDataset}/update`;
6
+ const {
7
+ fusekiUrl,
8
+ fusekiUrlUpdate
9
+ } = require('../models/fusekiConfig');
16
10
  const axios = require('axios');
17
11
  const Fuseki = require('../models/fuseki');
18
12
  const { Parser, transformMode } = require('../models/vocabolaries/parser');
@@ -75,33 +69,61 @@ router.post('/upload/vocabularies', upload.array('files'), (req, res) => {
75
69
  deleteFiles(uploadedFiles)
76
70
  return res.status(400).json({ message: 'Uploaded XML file is not valid' });
77
71
  }
78
- VocabParser.insertQuery(xmlFile.path).then(query => {
72
+ VocabParser.insertQuery(xmlFile.path).then(queries => {
79
73
  //console.log("Query to insert vocabularies: ", query);
80
-
81
- deleteFiles(uploadedFiles)
82
-
83
- axios.post(fusekiUrlUpdate, query, {
84
- headers: {
85
- 'Content-Type': 'application/sparql-update',
86
- 'Accept': 'application/sparql-results+json'
74
+ let results = Array(queries.length, null);
75
+ let checkCompleted = function(){
76
+ console.log(results)
77
+ deleteFiles(uploadedFiles)
78
+ let failed = results.filter(r => r.status === false);
79
+ console.log(failed);
80
+ if(failed.length > 0){
81
+ let message = `Error inserting vocabularies in ${failed.length} files.`;
82
+ return res.status(500).json({
83
+ message,
84
+ files: uploadedFiles,
85
+ results
86
+ });
87
+ }else{
88
+ res.json({
89
+ message: 'File correctly uploaded and vocabularies updated in the triple store',
90
+ files: uploadedFiles
91
+ });
87
92
  }
93
+ }
94
+ let fusekiCall = function(index){
95
+ return new Promise((resolve, reject) => {
96
+ let query = queries[index];
97
+ try{
98
+ axios.post(fusekiUrlUpdate, query, {
99
+ headers: {
100
+ 'Content-Type': 'application/sparql-update',
101
+ 'Accept': 'application/sparql-results+json'
102
+ }
103
+ })
104
+ .then(() => {
105
+ resolve({
106
+ index, success: true, message: 'Vocabulary inserted correctly'
107
+ });
108
+ }).catch(error => {
109
+ let msg = (error.response?.status + error.response?.data) || error.message
110
+ resolve({
111
+ index, success: false, message: `Error from SPARQL end-point: ${msg}`
112
+ });
113
+ });
114
+ }catch(e){
115
+ resolve({
116
+ index, success: false, message: `Error: ${e}`
117
+ });
118
+ }
119
+ })
120
+ }
121
+ setTimeout(async ()=>{
122
+ for(let index=0; index<queries.length; index++){
123
+ results[index] = await fusekiCall(index);
124
+ }
125
+ checkCompleted();
88
126
  })
89
- .then(response => {
90
- res.json({
91
- message: 'File correctly uploaded and vocabularies updated: ' + response.data,
92
- files: uploadedFiles
93
- });
94
- }).catch(error => {
95
- let message = (error.response?.status + error.response?.data) || error.message
96
- console.log(message);
97
- //fs.writeFileSync('/home/nicole/Scaricati/spqr_error.txt', query);
98
- res.status(500).json({
99
- message: 'Error from SPARQL end-point: ' + message,
100
- files: uploadedFiles,
101
- query
102
- });
103
- });
104
-
105
127
  }).catch(err => {
106
128
  deleteFiles(uploadedFiles)
107
129
  console.error('Error transforming XML:', err);
@@ -7,6 +7,12 @@ const Converter = require('../models/converter');
7
7
  const Validator = require('../models/validator');
8
8
  const tmp = require('tmp');
9
9
  const Investigations = require('../models/investigations');
10
+ const {
11
+ fusekiUrlDataset,
12
+ fusekiUrl,
13
+ fusekiUrlUpdate
14
+ } = require('../models/fusekiConfig');
15
+ const axios = require('axios');
10
16
 
11
17
  let SCHEMAS = {}
12
18
 
@@ -112,19 +118,43 @@ router.post('/validate', (req, res) => {
112
118
  router.post('/form/save', (req, res) => {
113
119
  let dataset = req.body.turtle;
114
120
  let uuid = req.body.uuid;
115
- Investigations.save({
116
- uuid, dataset, format: 'turtle'
117
- }).then( () => {
118
- res.json({
119
- success: true
120
- });
121
- }).catch( (err) => {
122
- console.log("Error saving investigation: ", err);
121
+ try{
122
+ let updateQuery = Converter.turtle2Sparql(dataset);
123
+ Investigations.save({
124
+ uuid, dataset, format: 'turtle'
125
+ }).then( () => {
126
+ axios.post(fusekiUrlUpdate, updateQuery, {
127
+ headers: {
128
+ 'Content-Type': 'application/sparql-update',
129
+ 'Accept': 'application/sparql-results+json'
130
+ }
131
+ }).then(response => {
132
+ console.log(response.data);
133
+ res.status(200).json({
134
+ success: true
135
+ });
136
+ }).catch(error => {
137
+ //TODO: rollback investigation save
138
+ let message = (error.response?.status + error.response?.data) || error.message
139
+ res.status(500).json({
140
+ message: 'Error from SPARQL end-point: ' + message,
141
+ success: false
142
+ });
143
+ });
144
+ }).catch( (err) => {
145
+ console.log("Error saving investigation: ", err);
146
+ res.json({
147
+ success: false,
148
+ message: `Error: ${err}`
149
+ });
150
+ });
151
+ }catch(e){
123
152
  res.json({
124
153
  success: false,
125
- message: `Error: ${err}`
154
+ message: `Error: ${e.message}`
126
155
  });
127
- });
156
+ return;
157
+ }
128
158
  });
129
159
 
130
160
  router.get('/form/:uuid', (req, res) => {
@@ -17,6 +17,83 @@ class Converter {
17
17
  })
18
18
  }
19
19
 
20
+ static termToSparql(term) {
21
+ if (!term) return '';
22
+ const t = term.termType || term.type; // some versions use .type
23
+ const value = term.value;
24
+ if (t === 'NamedNode' || t === 'IRI') {
25
+ return `<${value}>`;
26
+ }
27
+ if (t === 'BlankNode' || t === 'Blank') {
28
+ return `_:${value}`;
29
+ }
30
+ if (t === 'Literal' || t === 'literal') {
31
+ // escape per basic N-Triples rules
32
+ const esc = value
33
+ .replace(/\\/g, '\\\\')
34
+ .replace(/"/g, '\\"')
35
+ .replace(/\r/g, '\\r')
36
+ .replace(/\n/g, '\\n');
37
+ const lang = term.language;
38
+ const dt = term.datatype && term.datatype.value;
39
+ if (lang) return `"${esc}"@${lang}`;
40
+ if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string')
41
+ return `"${esc}"^^<${dt}>`;
42
+ return `"${esc}"`;
43
+ }
44
+ return `${value}`;
45
+ }
46
+
47
+ static turtle2Sparql(turtle, opts={}){
48
+ const graph = opts.graph || null; // if null -> default graph
49
+ const parser = new Parser();
50
+ const quads = parser.parse(turtle);
51
+ const termToSparql = Converter.termToSparql;
52
+ // group objects by subject+predicate
53
+ const groups = new Map(); // key -> { subj, pred, objects: Set() }
54
+ for (const q of quads) {
55
+ const s = q.subject;
56
+ const p = q.predicate;
57
+ const o = q.object;
58
+ const key = `${termToSparql(s)} ${termToSparql(p)}`;
59
+ if (!groups.has(key)) groups.set(key, { subj: s, pred: p, objects: new Set() });
60
+ groups.get(key).objects.add(termToSparql(o));
61
+ }
62
+
63
+ // build SPARQL update parts
64
+ const parts = [];
65
+ for (const [, { subj, pred, objects }] of groups) {
66
+ const sStr = termToSparql(subj);
67
+ const pStr = termToSparql(pred);
68
+
69
+ // DELETE WHERE: remove any existing object for the subject/predicate
70
+ // we use a variable ?o to delete any existing triples with same s,p
71
+ let deleteBlock;
72
+ if (graph) {
73
+ deleteBlock = `DELETE WHERE { GRAPH <${graph}> { ${sStr} ${pStr} ?o } };`;
74
+ } else {
75
+ deleteBlock = `DELETE WHERE { ${sStr} ${pStr} ?o } ;`;
76
+ }
77
+
78
+ // INSERT DATA: insert the objects we parsed. If multiple objects, join with comma.
79
+ const objs = Array.from(objects);
80
+ const objectsList = objs.join(' ,\n '); // pretty print
81
+ let insertBlock;
82
+ if (graph) {
83
+ insertBlock = `INSERT DATA { GRAPH <${graph}> { ${sStr} ${pStr} ${objectsList} . } };`;
84
+ } else {
85
+ insertBlock = `INSERT DATA { ${sStr} ${pStr} ${objectsList} . } ;`;
86
+ }
87
+
88
+ // append as one atomic unit (delete then insert)
89
+ parts.push(`${deleteBlock}\n${insertBlock}`);
90
+ }
91
+
92
+ // join with double newline for readability
93
+ return parts.join('\n\n');
94
+
95
+ }
96
+
20
97
  static async turtle2RdfXml(inTurtlePath, outRdfXmlPath) {
21
98
  return new Promise((resolve, reject) => {
22
99
  const command = `rapper -i turtle -o rdfxml "${inTurtlePath}" > "${outRdfXmlPath}"`;
@@ -0,0 +1,16 @@
1
+ const config = require('../config')
2
+ const configFuseki = config.fuseki || {
3
+ "protocol": "http",
4
+ "host": "127.0.0.1",
5
+ "port": "3030",
6
+ "dataset": "oac"
7
+ }
8
+ const fusekiUrlDataset = `${configFuseki.protocol}://${configFuseki.host}:${configFuseki.port}/${configFuseki.dataset}`;
9
+ const fusekiUrl = `${fusekiUrlDataset}/sparql`;
10
+ const fusekiUrlUpdate = `${fusekiUrlDataset}/update`;
11
+
12
+ module.exports = {
13
+ fusekiUrlDataset,
14
+ fusekiUrl,
15
+ fusekiUrlUpdate
16
+ }
@@ -5,7 +5,7 @@ const { exec } = require('child_process');
5
5
  const stripBom = require('strip-bom').default;
6
6
  const config = require('../../config');
7
7
  const VocabPrefix = config.fuseki.vocabularies.prefix || 'diagnostica';
8
-
8
+ const Splitter = require('./splitter');
9
9
 
10
10
  const transformMode = {
11
11
  default: 'default',
@@ -75,46 +75,73 @@ class Parser{
75
75
  }catch(e){
76
76
  reject(e)
77
77
  }
78
-
79
- /*
80
- let xmlData = this._getXmlData(xmlPath);
81
- const xml = xmlParser.xmlParse(xmlData);
82
- xslt.xsltProcess(xml, this.xslt)
83
- .then(result => {
84
- var terms = result.split('&#10;')
85
- resolve(terms.map(line => line.trim()).filter(line => line.length > 0));
86
- }).catch(err => {
87
- reject(err)
88
- });
89
- */
90
78
  });
91
79
  }
92
80
 
93
81
  insertQuery(xmlPath, className='crm:E55_Type'){
94
82
  return new Promise((resolve, reject) => {
95
- this.transform(xmlPath, transformMode.forInsert).then(terms => {
96
- let query = `
97
- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
98
- PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
83
+ const xmlFolder = path.dirname(xmlPath);
84
+ const xmlFile = path.basename(xmlPath);
85
+ const splitter = new Splitter(xmlFolder, xmlFile);
86
+ const files = splitter.splitFiles();
87
+ let queries = new Array(files.length);
88
+ let cleanTempFiles = function(queries){
89
+ for(let file of files){
90
+ try{
91
+ fs.unlinkSync(file.path);
92
+ }catch(e){
93
+ //console.log(`Error deleting temp file ${file.path}: ${e.message}`);
94
+ }
95
+ }
96
+ resolve(queries);
97
+ }
98
+ let checkCompleted = function(index, query, status, message){
99
+ console.log(`Completed ${index+1}/${files.length}`);
100
+ files[index].status = status;
101
+ files[index].message = message;
102
+ if(status) queries[index]=query;
103
+ let allDone = files.every(f => f.status !== null);
104
+ if(allDone){
105
+ cleanTempFiles(queries);
106
+ }
107
+ }
108
+ let transformFn = function(instance, index, className, xmlPath){
109
+ return function(){
110
+ instance.transform(xmlPath, transformMode.forInsert).then(terms => {
111
+ let query = `
112
+ PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
113
+ PREFIX basecpm: <http://ontome.net/ns/cpm/>
114
+ PREFIX j.0: <http://www.cidoc-crm.org/extensions/crmsci/>
115
+ PREFIX crmsci: <http://www.ics.forth.gr/isl/CRMsci>
116
+ PREFIX base: <http://www.ics.forth.gr/isl/CRMinf/>
117
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
99
118
 
100
- INSERT {
101
- ?term a ${className} ;
102
- rdfs:label ?label ;
103
- crm:P127_has_broader_term ?broader .
119
+ INSERT {
120
+ ?term a ${className} ;
121
+ rdfs:label ?label ;
122
+ crm:P127_has_broader_term ?broader .
123
+ }
124
+ WHERE {
125
+ VALUES (?term ?label ?broader) {
126
+ ${terms.join(' \n')}
127
+ }
128
+ FILTER NOT EXISTS {
129
+ ?term rdfs:label ?label .
130
+ }
131
+ }`
132
+ checkCompleted(index, query, true, null);
133
+ }).catch(err => {
134
+ console.log(err);
135
+ checkCompleted(index, null, false, `${err}`);
136
+ });
104
137
  }
105
- WHERE {
106
- VALUES (?term ?label ?broader) {
107
- ${terms.join(' \n')}
108
- }
109
- FILTER NOT EXISTS {
110
- ?term rdfs:label ?label .
111
- }
112
- }`
113
- resolve(query);
114
- }).catch(err => {
115
- console.log(err);
116
- reject(err);
117
- });
138
+ }
139
+
140
+ for(let index=0; index<files.length; index++){
141
+ let fn = transformFn(this, index, files[index].class, files[index].path);
142
+ fn();
143
+ }
144
+
118
145
  })
119
146
  }
120
147
 
@@ -0,0 +1,69 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const { XMLParser, XMLBuilder } = require('fast-xml-parser');
4
+
5
+ class Splitter {
6
+
7
+ constructor(xmlFolder, xmlName, skipWrite=false) {
8
+ this.xmlFolder = xmlFolder;
9
+ this.xmlName = xmlName;
10
+ this.skipWrite = skipWrite;
11
+ const xmlPath = path.join(this.xmlFolder, this.xmlName);
12
+ this.xml = fs.readFileSync(xmlPath, 'utf8');
13
+ // Parser
14
+ this.parser = new XMLParser({
15
+ ignoreAttributes: false,
16
+ attributeNamePrefix: '@_'
17
+ });
18
+ }
19
+
20
+ splitByClass() {
21
+ const data = this.parser.parse(this.xml);
22
+ const vocabularies = data.vocabularies.vocabulary;
23
+
24
+ // Normalizza a array
25
+ const vocabArray = Array.isArray(vocabularies)
26
+ ? vocabularies
27
+ : [vocabularies];
28
+
29
+ // Raggruppa per class
30
+ const grouped = {};
31
+ for (const vocab of vocabArray) {
32
+ const cls = vocab['@_class'] || 'NO_CLASS';
33
+ if (!grouped[cls]) grouped[cls] = [];
34
+ grouped[cls].push(vocab);
35
+ }
36
+
37
+ return grouped;
38
+ }
39
+
40
+ splitFiles() {
41
+ const grouped = this.splitByClass();
42
+ const builder = new XMLBuilder({
43
+ ignoreAttributes: false,
44
+ attributeNamePrefix: '@_',
45
+ format: true,
46
+ indentBy: ' '
47
+ });
48
+ let index = 0
49
+ let results = [];
50
+ for (const cls in grouped) {
51
+ const outObj = {
52
+ vocabularies: {
53
+ vocabulary: grouped[cls]
54
+ }
55
+ };
56
+ index++;
57
+ const xmlOut = builder.build(outObj);
58
+ const xmlOutPath = path.join(this.xmlFolder, `${this.xmlName}_split_${index}.xml`);
59
+ if(!this.skipWrite)
60
+ fs.writeFileSync(xmlOutPath, xmlOut, 'utf8');
61
+ results.push({ class: cls, path: xmlOutPath, status:null, message:null });
62
+ }
63
+ return results.sort((a, b) => a.class.localeCompare(b.class));
64
+ }
65
+
66
+ }
67
+
68
+
69
+ module.exports = Splitter;
@@ -27,4 +27,13 @@ describe('Converter', () => {
27
27
  });
28
28
  });
29
29
 
30
+ it('should convert a turtle string to sparql UPSERT', (done) => {
31
+ let inputTurtle = fs.readFileSync(__dirname + '/example-investigation-01.ttl', 'utf8');
32
+ let sparql = converter.turtle2Sparql(inputTurtle)
33
+ expect(sparql).to.be.a('string');
34
+ expect(sparql.length).to.be.greaterThan(0);
35
+ console.log(sparql);
36
+ done();
37
+ });
38
+
30
39
  })
@@ -0,0 +1,32 @@
1
+ @prefix sh: <http://www.w3.org/ns/shacl#>.
2
+ @prefix ex: <http://example.org/shapes/>.
3
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
4
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
5
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
6
+ @prefix owl: <http://www.w3.org/2002/07/owl#>.
7
+ @prefix crm: <http://www.cidoc-crm.org/cidoc-crm/>.
8
+ @prefix basecpm: <http://ontome.net/ns/cpm/>.
9
+ @prefix base: <http://www.ics.forth.gr/isl/CRMinf/>.
10
+ @prefix cpm: <http://ontome.net/ns/cpm/>.
11
+ @prefix crmsci: <http://www.cidoc-crm.org/extensions/crmsci/>.
12
+ @prefix pref: <http://diagnostica/>.
13
+ @prefix skos: <http://www.w3.org/2004/02/skos/core#>.
14
+
15
+ pref:3cc24c8c-ce06-40ef-bdce-9da2c4c5394b ex:P48haspreferredidentifier01 <http://indagine/3>;
16
+ a crm:E42_Identifier.
17
+ pref:1adba9d5-c83b-41a8-b97f-c6e5b168658a a crm:E7_Activity;
18
+ crm:P48_has_preferred_identifier pref:3cc24c8c-ce06-40ef-bdce-9da2c4c5394b;
19
+ crm:P17_was_motivated_by pref:f30ead0e-edcc-4f6f-868e-8bc6f9d13813;
20
+ crm:P14_carried_out_by pref:57701c6c-ece4-472b-bc24-b75c1c254568;
21
+ <http://purl.org/dc/terms/conformsTo> ex:E7ActivityShape.
22
+ pref:40a75ae6-daa4-4ae1-bd41-2e07175f6f75 a crm:E55_Type;
23
+ ex:P2hastype02 <http://diagnostica/vocabularies/quesito-diagnostico/tecnologia-di-produzione>.
24
+ pref:f30ead0e-edcc-4f6f-868e-8bc6f9d13813 a base:I12_Adopted_Belief;
25
+ crm:P2_has_type pref:40a75ae6-daa4-4ae1-bd41-2e07175f6f75.
26
+ pref:95e957ad-55b5-45d3-bf62-6a292959c4f9 a crm:E41_Appellation;
27
+ ex:ente_richiedente "Università";
28
+ ex:schedatore "Christian".
29
+ pref:956f0c5a-6fc5-4a79-b399-6c6288fb000f a crm:E55_Type;
30
+ crm:P1_is_defined_by pref:95e957ad-55b5-45d3-bf62-6a292959c4f9.
31
+ pref:57701c6c-ece4-472b-bc24-b75c1c254568 a crm:E29_Actor;
32
+ crm:P2_has_type pref:956f0c5a-6fc5-4a79-b399-6c6288fb000f.
@@ -44,7 +44,10 @@ describe('Vocabolaries.Parsers', () => {
44
44
  it('should get the insert query for the vocabolaries.xml file', async () => {
45
45
  const parser = Parser.GET_INSTANCE();
46
46
  var query = await parser.insertQuery(__dirname + '/vocabolaries.xml');
47
- expect(query.length).to.be.equal(40619);
47
+ expect(query.length).to.be.equal(3);
48
+ expect(query[0].length).to.be.equal(5279);
49
+ expect(query[1].length).to.be.equal(2665);
50
+ expect(query[2].length).to.be.equal(35150);
48
51
  });
49
52
 
50
53
  });
@@ -0,0 +1,36 @@
1
+ const chai = require('chai');
2
+ const expect = chai.expect;
3
+ const request = require('supertest');
4
+ const Splitter = require('../../../src/models/vocabolaries/splitter');
5
+
6
+
7
+ describe('Vocabolaries.Splitter', () => {
8
+
9
+ beforeEach(() => {
10
+
11
+ });
12
+
13
+ it('should create a parser instance', () => {
14
+ const splitter = new Splitter(__dirname, 'vocabolaries_2.xml');
15
+ expect(splitter).to.be.an.instanceof(Splitter);
16
+ });
17
+
18
+ it('should get group of classes', () => {
19
+ const splitter = new Splitter(__dirname, 'vocabolaries_2.xml');
20
+ const groups = splitter.splitByClass();
21
+ expect(Object.keys(groups).length).to.be.equal(6);
22
+ });
23
+
24
+ it('should get group of classes', () => {
25
+ const splitter = new Splitter(__dirname, 'vocabolaries_2.xml', true);
26
+ const files = splitter.splitFiles();
27
+ expect(files.length).to.be.equal(6);
28
+ expect(files[0].class).to.be.equal("base:I2_Belief");
29
+ expect(files[1].class).to.be.equal("basecpm:CP42_Material_Decay");
30
+ expect(files[2].class).to.be.equal("basecpm:CP43_Structural_Damage");
31
+ expect(files[3].class).to.be.equal("crm:E29_Design_or_Procedure");
32
+ expect(files[4].class).to.be.equal("crm:E55_Type");
33
+ expect(files[5].class).to.be.equal("crm:E58_Measurement_Unit");
34
+ });
35
+
36
+ });