@lblod/graph-rdfa-processor 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- package/.woodpecker/release.yml +16 -0
- package/LICENSE +201 -0
- package/README.md +34 -0
- package/dist/graph-rdfa-processor.js +256 -0
- package/dist/index.js +32 -0
- package/dist/node.js +23 -0
- package/dist/rdfa-graph.js +603 -0
- package/dist/rdfa-processor.js +1106 -0
- package/dist/uri-resolver.js +274 -0
- package/package.json +47 -0
- package/release.sh +15 -0
- package/src/graph-rdfa-processor.js +177 -0
- package/src/index.js +21 -0
- package/src/node.js +16 -0
- package/src/rdfa-graph.js +518 -0
- package/src/rdfa-processor.js +1156 -0
- package/src/uri-resolver.js +203 -0
- package/test/test.js +50 -0
- package/test-page.html +405 -0
- package/test-prov-value.html +87 -0
@@ -0,0 +1,203 @@
|
|
1
|
+
export default class URIResolver {
|
2
|
+
|
3
|
+
parseURI(uri) {
|
4
|
+
var match = URIResolver.SCHEME.exec(uri);
|
5
|
+
if (!match) {
|
6
|
+
throw new Error("Bad URI value, no scheme: " + uri);
|
7
|
+
}
|
8
|
+
var parsed = { spec: uri };
|
9
|
+
parsed.scheme = match[0].substring(0,match[0].length-1);
|
10
|
+
parsed.schemeSpecificPart = parsed.spec.substring(match[0].length);
|
11
|
+
if (parsed.schemeSpecificPart.charAt(0)=='/' && parsed.schemeSpecificPart.charAt(1)=='/') {
|
12
|
+
this.parseGeneric(parsed);
|
13
|
+
} else {
|
14
|
+
parsed.isGeneric = false;
|
15
|
+
}
|
16
|
+
parsed.normalize = function() {
|
17
|
+
if (!this.isGeneric) {
|
18
|
+
return;
|
19
|
+
}
|
20
|
+
if (this.segments.length==0) {
|
21
|
+
return;
|
22
|
+
}
|
23
|
+
// edge case of ending in "/."
|
24
|
+
if (this.path.length>1 && this.path.substring(this.path.length-2)=="/.") {
|
25
|
+
this.path = this.path.substring(0,this.path.length-1);
|
26
|
+
this.segments.splice(this.segments.length-1,1);
|
27
|
+
this.schemeSpecificPart = "//"+this.authority+this.path;
|
28
|
+
if (typeof this.query != "undefined") {
|
29
|
+
this.schemeSpecificPart += "?" + this.query;
|
30
|
+
}
|
31
|
+
if (typeof this.fragment != "undefined") {
|
32
|
+
this.schemeSpecificPart += "#" + this.fragment;
|
33
|
+
}
|
34
|
+
this.spec = this.scheme+":"+this.schemeSpecificPart;
|
35
|
+
return;
|
36
|
+
}
|
37
|
+
var end = this.path.charAt(this.path.length-1);
|
38
|
+
if (end!="/") {
|
39
|
+
end = "";
|
40
|
+
}
|
41
|
+
for (var i=0; i < this.segments.length; i++) {
|
42
|
+
if (i>0 && this.segments[i]=="..") {
|
43
|
+
this.segments.splice(i-1,2);
|
44
|
+
i -= 2;
|
45
|
+
}
|
46
|
+
if (this.segments[i]==".") {
|
47
|
+
this.segments.splice(i,1);
|
48
|
+
i--;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
this.path = this.segments.length==0 ? "/" : "/" + this.segments.join("/") + end;
|
52
|
+
this.schemeSpecificPart = "//" + this.authority + this.path;
|
53
|
+
if (typeof this.query != "undefined") {
|
54
|
+
this.schemeSpecificPart += "?" + this.query;
|
55
|
+
}
|
56
|
+
if (typeof this.fragment != "undefined") {
|
57
|
+
this.schemeSpecificPart += "#" + this.fragment;
|
58
|
+
}
|
59
|
+
this.spec = this.scheme+":"+this.schemeSpecificPart;
|
60
|
+
}
|
61
|
+
|
62
|
+
parsed.resolve = function(href) {
|
63
|
+
if (!href) {
|
64
|
+
return this.spec;
|
65
|
+
}
|
66
|
+
if (href.charAt(0)=='#') {
|
67
|
+
var lastHash = this.spec.lastIndexOf('#');
|
68
|
+
return lastHash<0 ? this.spec+href : this.spec.substring(0,lastHash)+href;
|
69
|
+
}
|
70
|
+
if (!this.isGeneric) {
|
71
|
+
throw new Error("Cannot resolve uri against non-generic URI: " + this.spec);
|
72
|
+
}
|
73
|
+
var colon = href.indexOf(':');
|
74
|
+
if (href.charAt(0)=='/') {
|
75
|
+
return this.scheme+"://"+this.authority+href;
|
76
|
+
} else if (href.charAt(0)=='.' && href.charAt(1)=='/') {
|
77
|
+
if (this.path.charAt(this.path.length-1)=='/') {
|
78
|
+
return this.scheme+"://"+this.authority+this.path+href.substring(2);
|
79
|
+
} else {
|
80
|
+
var last = this.path.lastIndexOf('/');
|
81
|
+
return this.scheme+"://"+this.authority+this.path.substring(0,last)+href.substring(1);
|
82
|
+
}
|
83
|
+
} else if (URIResolver.SCHEME.test(href)) {
|
84
|
+
return href;
|
85
|
+
} else if (href.charAt(0)=="?") {
|
86
|
+
return this.scheme+"://"+this.authority+this.path+href;
|
87
|
+
} else {
|
88
|
+
if (this.path.charAt(this.path.length-1)=='/') {
|
89
|
+
return this.scheme+"://"+this.authority+this.path+href;
|
90
|
+
} else {
|
91
|
+
var last = this.path.lastIndexOf('/');
|
92
|
+
return this.scheme+"://"+this.authority+this.path.substring(0,last+1)+href;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
};
|
96
|
+
|
97
|
+
parsed.relativeTo = function(otherURI) {
|
98
|
+
if (otherURI.scheme!=this.scheme) {
|
99
|
+
return this.spec;
|
100
|
+
}
|
101
|
+
if (!this.isGeneric) {
|
102
|
+
throw new Error("A non generic URI cannot be made relative: " + this.spec);
|
103
|
+
}
|
104
|
+
if (!otherURI.isGeneric) {
|
105
|
+
throw new Error("Cannot make a relative URI against a non-generic URI: " + otherURI.spec);
|
106
|
+
}
|
107
|
+
if (otherURI.authority!=this.authority) {
|
108
|
+
return this.spec;
|
109
|
+
}
|
110
|
+
var i=0;
|
111
|
+
for (; i < this.segments.length && i < otherURI.segments.length; i++) {
|
112
|
+
if (this.segments[i]!=otherURI.segments[i]) {
|
113
|
+
//alert(this.path+" different from "+otherURI.path+" at '"+this.segments[i]+"' vs '"+otherURI.segments[i]+"'");
|
114
|
+
var offset = otherURI.path.charAt(otherURI.path.length-1)=='/' ? 0 : -1;
|
115
|
+
var relative = "";
|
116
|
+
for (var j=i; j < otherURI.segments.length+offset; j++) {
|
117
|
+
relative += "../";
|
118
|
+
}
|
119
|
+
for (var j=i; j < this.segments.length; j++) {
|
120
|
+
relative += this.segments[j];
|
121
|
+
if ((j+1) < this.segments.length) {
|
122
|
+
relative += "/";
|
123
|
+
}
|
124
|
+
}
|
125
|
+
if (this.path.charAt(this.path.length-1)=='/') {
|
126
|
+
relative += "/";
|
127
|
+
}
|
128
|
+
return relative;
|
129
|
+
}
|
130
|
+
}
|
131
|
+
if (this.segments.length==otherURI.segments.length) {
|
132
|
+
return this.hash ? this.hash : (this.query ? this.query : "");
|
133
|
+
} else if (i < this.segments.length) {
|
134
|
+
var relative = "";
|
135
|
+
for (var j=i; j < this.segments.length; j++) {
|
136
|
+
relative += this.segments[j];
|
137
|
+
if ((j+1) < this.segments.length) {
|
138
|
+
relative += "/";
|
139
|
+
}
|
140
|
+
}
|
141
|
+
if (this.path.charAt(this.path.length-1)=='/') {
|
142
|
+
relative += "/";
|
143
|
+
}
|
144
|
+
return relative;
|
145
|
+
} else {
|
146
|
+
throw new Error("Cannot calculate a relative URI for "+this.spec+" against " + otherURI.spec);
|
147
|
+
}
|
148
|
+
};
|
149
|
+
return parsed;
|
150
|
+
}
|
151
|
+
|
152
|
+
parseGeneric(parsed) {
|
153
|
+
if (parsed.schemeSpecificPart.charAt(0)!='/' || parsed.schemeSpecificPart.charAt(1)!='/') {
|
154
|
+
throw new Error("Generic URI values should start with '//':" + parsed.spec);
|
155
|
+
}
|
156
|
+
|
157
|
+
var work = parsed.schemeSpecificPart.substring(2);
|
158
|
+
var pathStart = work.indexOf("/");
|
159
|
+
parsed.authority = pathStart<0 ? work : work.substring(0,pathStart);
|
160
|
+
parsed.path = pathStart<0 ? "" : work.substring(pathStart);
|
161
|
+
var hash = parsed.path.indexOf('#');
|
162
|
+
if (hash>=0) {
|
163
|
+
parsed.fragment = parsed.path.substring(hash+1);
|
164
|
+
parsed.path = parsed.path.substring(0,hash);
|
165
|
+
}
|
166
|
+
var questionMark = parsed.path.indexOf('?');
|
167
|
+
if (questionMark>=0) {
|
168
|
+
parsed.query = parsed.path.substring(questionMark+1);
|
169
|
+
parsed.path = parsed.path.substring(0,questionMark);
|
170
|
+
}
|
171
|
+
if (parsed.path=="/" || parsed.path=="") {
|
172
|
+
parsed.segments = [];
|
173
|
+
} else {
|
174
|
+
parsed.segments = parsed.path.split(/\//);
|
175
|
+
if (parsed.segments.length>0 && parsed.segments[0] == '' && parsed.path.length>1 && parsed.path.charAt(1) != '/') {
|
176
|
+
// empty segment at the start, remove it
|
177
|
+
parsed.segments.shift();
|
178
|
+
}
|
179
|
+
if (parsed.segments.length > 0 && parsed.path.length>0 && parsed.path.charAt(parsed.path.length-1) == '/' && parsed.segments[parsed.segments.length-1] == '') {
|
180
|
+
// we may have an empty the end
|
181
|
+
// check to see if it is legimate
|
182
|
+
if (parsed.path.length> 1 && parsed.path.charAt(parsed.path.length-2) != '/') {
|
183
|
+
parsed.segments.pop();
|
184
|
+
}
|
185
|
+
}
|
186
|
+
// check for non-escaped characters
|
187
|
+
for (var i=0; i < parsed.segments.length; i++) {
|
188
|
+
var check = parsed.segments[i].split(/%[A-Za-z0-9][A-Za-z0-9]|[\ud800-\udfff][\ud800-\udfff]|[A-Za-z0-9\-\._~!$&'()*+,;=@:\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+/);
|
189
|
+
|
190
|
+
for (var j=0; j < check.length; j++) {
|
191
|
+
if (check[j].length>0) {
|
192
|
+
throw new Error("Unecaped character "+check[j].charAt(0)+" ("+check[j].charCodeAt(0)+") in URI " + parsed.spec);
|
193
|
+
}
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
parsed.isGeneric = true;
|
198
|
+
}
|
199
|
+
|
200
|
+
}
|
201
|
+
|
202
|
+
|
203
|
+
URIResolver.SCHEME = /^[A-Za-z][A-Za-z0-9\+\-\.]*\:/;
|
package/test/test.js
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
import { jsdom } from "jsdom";
|
2
|
+
import assert from "assert";
|
3
|
+
import getRDFaGraph from "../src";
|
4
|
+
import { readFileSync, writeFileSync } from "fs";
|
5
|
+
describe("getRDFaGraph", function() {
|
6
|
+
let html = `<div typeof="rdfs:Class" resource="http://schema.org/CreativeWork">
|
7
|
+
<span class="h" property="rdfs:label">CreativeWork</span>
|
8
|
+
<span property="rdfs:comment">The most generic kind of creative work, including books, movies, photographs, software programs, etc.</span>
|
9
|
+
<span>Subclass of: <a property="rdfs:subClassOf" href="http://schema.org/Thing">Thing</a></span>
|
10
|
+
<span>Source: <a property="dc:source" href="http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews">rNews</a></span>
|
11
|
+
</div>`;
|
12
|
+
|
13
|
+
let expected = `<http://schema.org/CreativeWork> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class>;
|
14
|
+
<http://www.w3.org/2000/01/rdf-schema#label> "CreativeWork";
|
15
|
+
<http://www.w3.org/2000/01/rdf-schema#comment> "The most generic kind of creative work, including books, movies, photographs, software programs, etc.";
|
16
|
+
<http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://schema.org/Thing>;
|
17
|
+
<http://purl.org/dc/terms/source> <http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews> .
|
18
|
+
`;
|
19
|
+
|
20
|
+
it("should getRDFaGraph from a document", function() {
|
21
|
+
let { document } = jsdom(html).defaultView.window;
|
22
|
+
let graph = getRDFaGraph(document, { baseURI: "http://localhost" });
|
23
|
+
assert.equal(graph.toString(), expected);
|
24
|
+
});
|
25
|
+
|
26
|
+
it.only("whatever", () => {
|
27
|
+
let ht = readFileSync("./test-page.html");
|
28
|
+
let { document } = jsdom(ht).defaultView.window;
|
29
|
+
|
30
|
+
let graph = getRDFaGraph(document, {
|
31
|
+
baseURI: "http://localhost",
|
32
|
+
specialHtmlPredicates: [
|
33
|
+
{
|
34
|
+
source: "http://www.w3.org/ns/prov#value",
|
35
|
+
target:
|
36
|
+
"http://lblod.data.gift/vocabularies/besluit/extractedDecisionContent",
|
37
|
+
},
|
38
|
+
],
|
39
|
+
});
|
40
|
+
writeFileSync("/tmp/x.out", graph.toString(), "utf8");
|
41
|
+
});
|
42
|
+
|
43
|
+
it("should getRDFaGraph from a node", function() {
|
44
|
+
let { document } = jsdom(html).defaultView.window;
|
45
|
+
let graph = getRDFaGraph(document.getElementsByTagName("div")[0], {
|
46
|
+
baseURI: "http://localhost",
|
47
|
+
});
|
48
|
+
assert.equal(graph.toString(), expected);
|
49
|
+
});
|
50
|
+
});
|