@lblod/graph-rdfa-processor 0.13.2
Sign up to get free protection for your applications and to get access to all the features.
- package/.woodpecker/release.yml +16 -0
- package/LICENSE +201 -0
- package/README.md +34 -0
- package/bug.html +2617 -0
- package/bug2.html +3247 -0
- package/bug3.html +455 -0
- package/bug4.html +2212 -0
- package/bug5.html +952 -0
- package/bug6.html +940 -0
- package/dist/graph-rdfa-processor.js +256 -0
- package/dist/index.js +32 -0
- package/dist/node.js +23 -0
- package/dist/rdfa-graph.js +603 -0
- package/dist/rdfa-processor.js +1110 -0
- package/dist/uri-resolver.js +276 -0
- package/package.json +47 -0
- package/release.sh +15 -0
- package/src/graph-rdfa-processor.js +177 -0
- package/src/index.js +21 -0
- package/src/node.js +16 -0
- package/src/rdfa-graph.js +526 -0
- package/src/rdfa-processor.js +1160 -0
- package/src/uri-resolver.js +264 -0
- package/test/test.js +50 -0
- package/test-page.html +405 -0
- package/test-prov-value.html +87 -0
@@ -0,0 +1,264 @@
|
|
1
|
+
export default class URIResolver {
|
2
|
+
parseURI(uri) {
|
3
|
+
uri = uri.replace("\n", ""); // bugfix: sometimes there is a \n in the uri...
|
4
|
+
var match = URIResolver.SCHEME.exec(uri);
|
5
|
+
if (!match) {
|
6
|
+
throw new Error("Bad URI value, no scheme: " + uri);
|
7
|
+
}
|
8
|
+
var parsed = { spec: uri };
|
9
|
+
parsed.scheme = match[0].substring(0, match[0].length - 1);
|
10
|
+
parsed.schemeSpecificPart = parsed.spec.substring(match[0].length);
|
11
|
+
if (
|
12
|
+
parsed.schemeSpecificPart.charAt(0) == "/" &&
|
13
|
+
parsed.schemeSpecificPart.charAt(1) == "/"
|
14
|
+
) {
|
15
|
+
this.parseGeneric(parsed);
|
16
|
+
} else {
|
17
|
+
parsed.isGeneric = false;
|
18
|
+
}
|
19
|
+
parsed.normalize = function() {
|
20
|
+
if (!this.isGeneric) {
|
21
|
+
return;
|
22
|
+
}
|
23
|
+
if (this.segments.length == 0) {
|
24
|
+
return;
|
25
|
+
}
|
26
|
+
// edge case of ending in "/."
|
27
|
+
if (
|
28
|
+
this.path.length > 1 &&
|
29
|
+
this.path.substring(this.path.length - 2) == "/."
|
30
|
+
) {
|
31
|
+
this.path = this.path.substring(0, this.path.length - 1);
|
32
|
+
this.segments.splice(this.segments.length - 1, 1);
|
33
|
+
this.schemeSpecificPart = "//" + this.authority + this.path;
|
34
|
+
if (typeof this.query != "undefined") {
|
35
|
+
this.schemeSpecificPart += "?" + this.query;
|
36
|
+
}
|
37
|
+
if (typeof this.fragment != "undefined") {
|
38
|
+
this.schemeSpecificPart += "#" + this.fragment;
|
39
|
+
}
|
40
|
+
this.spec = this.scheme + ":" + this.schemeSpecificPart;
|
41
|
+
return;
|
42
|
+
}
|
43
|
+
var end = this.path.charAt(this.path.length - 1);
|
44
|
+
if (end != "/") {
|
45
|
+
end = "";
|
46
|
+
}
|
47
|
+
for (var i = 0; i < this.segments.length; i++) {
|
48
|
+
if (i > 0 && this.segments[i] == "..") {
|
49
|
+
this.segments.splice(i - 1, 2);
|
50
|
+
i -= 2;
|
51
|
+
}
|
52
|
+
if (this.segments[i] == ".") {
|
53
|
+
this.segments.splice(i, 1);
|
54
|
+
i--;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
this.path =
|
58
|
+
this.segments.length == 0 ? "/" : "/" + this.segments.join("/") + end;
|
59
|
+
this.schemeSpecificPart = "//" + this.authority + this.path;
|
60
|
+
if (typeof this.query != "undefined") {
|
61
|
+
this.schemeSpecificPart += "?" + this.query;
|
62
|
+
}
|
63
|
+
if (typeof this.fragment != "undefined") {
|
64
|
+
this.schemeSpecificPart += "#" + this.fragment;
|
65
|
+
}
|
66
|
+
this.spec = this.scheme + ":" + this.schemeSpecificPart;
|
67
|
+
};
|
68
|
+
|
69
|
+
parsed.resolve = function(href) {
|
70
|
+
if (!href) {
|
71
|
+
return this.spec;
|
72
|
+
}
|
73
|
+
if (href.charAt(0) == "#") {
|
74
|
+
var lastHash = this.spec.lastIndexOf("#");
|
75
|
+
return lastHash < 0
|
76
|
+
? this.spec + href
|
77
|
+
: this.spec.substring(0, lastHash) + href;
|
78
|
+
}
|
79
|
+
if (!this.isGeneric) {
|
80
|
+
throw new Error(
|
81
|
+
"Cannot resolve uri against non-generic URI: " + this.spec,
|
82
|
+
);
|
83
|
+
}
|
84
|
+
var colon = href.indexOf(":");
|
85
|
+
if (href.charAt(0) == "/") {
|
86
|
+
return this.scheme + "://" + this.authority + href;
|
87
|
+
} else if (href.charAt(0) == "." && href.charAt(1) == "/") {
|
88
|
+
if (this.path.charAt(this.path.length - 1) == "/") {
|
89
|
+
return (
|
90
|
+
this.scheme + "://" + this.authority + this.path + href.substring(2)
|
91
|
+
);
|
92
|
+
} else {
|
93
|
+
var last = this.path.lastIndexOf("/");
|
94
|
+
return (
|
95
|
+
this.scheme +
|
96
|
+
"://" +
|
97
|
+
this.authority +
|
98
|
+
this.path.substring(0, last) +
|
99
|
+
href.substring(1)
|
100
|
+
);
|
101
|
+
}
|
102
|
+
} else if (URIResolver.SCHEME.test(href)) {
|
103
|
+
return href;
|
104
|
+
} else if (href.charAt(0) == "?") {
|
105
|
+
return this.scheme + "://" + this.authority + this.path + href;
|
106
|
+
} else {
|
107
|
+
if (this.path.charAt(this.path.length - 1) == "/") {
|
108
|
+
return this.scheme + "://" + this.authority + this.path + href;
|
109
|
+
} else {
|
110
|
+
var last = this.path.lastIndexOf("/");
|
111
|
+
return (
|
112
|
+
this.scheme +
|
113
|
+
"://" +
|
114
|
+
this.authority +
|
115
|
+
this.path.substring(0, last + 1) +
|
116
|
+
href
|
117
|
+
);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
};
|
121
|
+
|
122
|
+
parsed.relativeTo = function(otherURI) {
|
123
|
+
if (otherURI.scheme != this.scheme) {
|
124
|
+
return this.spec;
|
125
|
+
}
|
126
|
+
if (!this.isGeneric) {
|
127
|
+
throw new Error(
|
128
|
+
"A non generic URI cannot be made relative: " + this.spec,
|
129
|
+
);
|
130
|
+
}
|
131
|
+
if (!otherURI.isGeneric) {
|
132
|
+
throw new Error(
|
133
|
+
"Cannot make a relative URI against a non-generic URI: " +
|
134
|
+
otherURI.spec,
|
135
|
+
);
|
136
|
+
}
|
137
|
+
if (otherURI.authority != this.authority) {
|
138
|
+
return this.spec;
|
139
|
+
}
|
140
|
+
var i = 0;
|
141
|
+
for (; i < this.segments.length && i < otherURI.segments.length; i++) {
|
142
|
+
if (this.segments[i] != otherURI.segments[i]) {
|
143
|
+
//alert(this.path+" different from "+otherURI.path+" at '"+this.segments[i]+"' vs '"+otherURI.segments[i]+"'");
|
144
|
+
var offset =
|
145
|
+
otherURI.path.charAt(otherURI.path.length - 1) == "/" ? 0 : -1;
|
146
|
+
var relative = "";
|
147
|
+
for (var j = i; j < otherURI.segments.length + offset; j++) {
|
148
|
+
relative += "../";
|
149
|
+
}
|
150
|
+
for (var j = i; j < this.segments.length; j++) {
|
151
|
+
relative += this.segments[j];
|
152
|
+
if (j + 1 < this.segments.length) {
|
153
|
+
relative += "/";
|
154
|
+
}
|
155
|
+
}
|
156
|
+
if (this.path.charAt(this.path.length - 1) == "/") {
|
157
|
+
relative += "/";
|
158
|
+
}
|
159
|
+
return relative;
|
160
|
+
}
|
161
|
+
}
|
162
|
+
if (this.segments.length == otherURI.segments.length) {
|
163
|
+
return this.hash ? this.hash : this.query ? this.query : "";
|
164
|
+
} else if (i < this.segments.length) {
|
165
|
+
var relative = "";
|
166
|
+
for (var j = i; j < this.segments.length; j++) {
|
167
|
+
relative += this.segments[j];
|
168
|
+
if (j + 1 < this.segments.length) {
|
169
|
+
relative += "/";
|
170
|
+
}
|
171
|
+
}
|
172
|
+
if (this.path.charAt(this.path.length - 1) == "/") {
|
173
|
+
relative += "/";
|
174
|
+
}
|
175
|
+
return relative;
|
176
|
+
} else {
|
177
|
+
throw new Error(
|
178
|
+
"Cannot calculate a relative URI for " +
|
179
|
+
this.spec +
|
180
|
+
" against " +
|
181
|
+
otherURI.spec,
|
182
|
+
);
|
183
|
+
}
|
184
|
+
};
|
185
|
+
return parsed;
|
186
|
+
}
|
187
|
+
|
188
|
+
parseGeneric(parsed) {
|
189
|
+
if (
|
190
|
+
parsed.schemeSpecificPart.charAt(0) != "/" ||
|
191
|
+
parsed.schemeSpecificPart.charAt(1) != "/"
|
192
|
+
) {
|
193
|
+
throw new Error(
|
194
|
+
"Generic URI values should start with '//':" + parsed.spec,
|
195
|
+
);
|
196
|
+
}
|
197
|
+
|
198
|
+
var work = parsed.schemeSpecificPart.substring(2);
|
199
|
+
var pathStart = work.indexOf("/");
|
200
|
+
parsed.authority = pathStart < 0 ? work : work.substring(0, pathStart);
|
201
|
+
parsed.path = pathStart < 0 ? "" : work.substring(pathStart);
|
202
|
+
var hash = parsed.path.indexOf("#");
|
203
|
+
if (hash >= 0) {
|
204
|
+
parsed.fragment = parsed.path.substring(hash + 1);
|
205
|
+
parsed.path = parsed.path.substring(0, hash);
|
206
|
+
}
|
207
|
+
var questionMark = parsed.path.indexOf("?");
|
208
|
+
if (questionMark >= 0) {
|
209
|
+
parsed.query = parsed.path.substring(questionMark + 1);
|
210
|
+
parsed.path = parsed.path.substring(0, questionMark);
|
211
|
+
}
|
212
|
+
if (parsed.path == "/" || parsed.path == "") {
|
213
|
+
parsed.segments = [];
|
214
|
+
} else {
|
215
|
+
parsed.segments = parsed.path.split(/\//);
|
216
|
+
if (
|
217
|
+
parsed.segments.length > 0 &&
|
218
|
+
parsed.segments[0] == "" &&
|
219
|
+
parsed.path.length > 1 &&
|
220
|
+
parsed.path.charAt(1) != "/"
|
221
|
+
) {
|
222
|
+
// empty segment at the start, remove it
|
223
|
+
parsed.segments.shift();
|
224
|
+
}
|
225
|
+
if (
|
226
|
+
parsed.segments.length > 0 &&
|
227
|
+
parsed.path.length > 0 &&
|
228
|
+
parsed.path.charAt(parsed.path.length - 1) == "/" &&
|
229
|
+
parsed.segments[parsed.segments.length - 1] == ""
|
230
|
+
) {
|
231
|
+
// we may have an empty the end
|
232
|
+
// check to see if it is legimate
|
233
|
+
if (
|
234
|
+
parsed.path.length > 1 &&
|
235
|
+
parsed.path.charAt(parsed.path.length - 2) != "/"
|
236
|
+
) {
|
237
|
+
parsed.segments.pop();
|
238
|
+
}
|
239
|
+
}
|
240
|
+
// check for non-escaped characters
|
241
|
+
for (var i = 0; i < parsed.segments.length; i++) {
|
242
|
+
var check = parsed.segments[i].split(
|
243
|
+
/%[A-Za-z0-9][A-Za-z0-9]|[\ud800-\udfff][\ud800-\udfff]|[A-Za-z0-9\-\._~!$&'()*+,;=@:\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+/,
|
244
|
+
);
|
245
|
+
|
246
|
+
for (var j = 0; j < check.length; j++) {
|
247
|
+
if (check[j].length > 0) {
|
248
|
+
throw new Error(
|
249
|
+
"Unescaped character " +
|
250
|
+
check[j].charAt(0) +
|
251
|
+
" (" +
|
252
|
+
check[j].charCodeAt(0) +
|
253
|
+
") in URI " +
|
254
|
+
parsed.spec,
|
255
|
+
);
|
256
|
+
}
|
257
|
+
}
|
258
|
+
}
|
259
|
+
}
|
260
|
+
parsed.isGeneric = true;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
URIResolver.SCHEME = /^[A-Za-z][A-Za-z0-9\+\-\.]*\:/;
|
package/test/test.js
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
import { jsdom } from "jsdom";
|
2
|
+
import assert from "assert";
|
3
|
+
import getRDFaGraph from "../src";
|
4
|
+
import { readFileSync, writeFileSync } from "fs";
|
5
|
+
describe("getRDFaGraph", function() {
|
6
|
+
let html = `<div typeof="rdfs:Class" resource="http://schema.org/CreativeWork">
|
7
|
+
<span class="h" property="rdfs:label">CreativeWork</span>
|
8
|
+
<span property="rdfs:comment">The most generic kind of creative work, including books, movies, photographs, software programs, etc.</span>
|
9
|
+
<span>Subclass of: <a property="rdfs:subClassOf" href="http://schema.org/Thing">Thing</a></span>
|
10
|
+
<span>Source: <a property="dc:source" href="http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews">rNews</a></span>
|
11
|
+
</div>`;
|
12
|
+
|
13
|
+
let expected = `<http://schema.org/CreativeWork> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class>;
|
14
|
+
<http://www.w3.org/2000/01/rdf-schema#label> "CreativeWork";
|
15
|
+
<http://www.w3.org/2000/01/rdf-schema#comment> "The most generic kind of creative work, including books, movies, photographs, software programs, etc.";
|
16
|
+
<http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://schema.org/Thing>;
|
17
|
+
<http://purl.org/dc/terms/source> <http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews> .
|
18
|
+
`;
|
19
|
+
|
20
|
+
it("should getRDFaGraph from a document", function() {
|
21
|
+
let { document } = jsdom(html).defaultView.window;
|
22
|
+
let graph = getRDFaGraph(document, { baseURI: "http://localhost" });
|
23
|
+
assert.equal(graph.toString(), expected);
|
24
|
+
});
|
25
|
+
|
26
|
+
it.only("whatever", () => {
|
27
|
+
let ht = readFileSync("./bug6.html");
|
28
|
+
let { document } = jsdom(ht).defaultView.window;
|
29
|
+
|
30
|
+
let graph = getRDFaGraph(document, {
|
31
|
+
baseURI: "http://localhost",
|
32
|
+
specialHtmlPredicates: [
|
33
|
+
{
|
34
|
+
source: "http://www.w3.org/ns/prov#value",
|
35
|
+
target:
|
36
|
+
"http://lblod.data.gift/vocabularies/besluit/extractedDecisionContent",
|
37
|
+
},
|
38
|
+
],
|
39
|
+
});
|
40
|
+
writeFileSync("/tmp/x.ttl", graph.toString(), "utf8");
|
41
|
+
});
|
42
|
+
|
43
|
+
it("should getRDFaGraph from a node", function() {
|
44
|
+
let { document } = jsdom(html).defaultView.window;
|
45
|
+
let graph = getRDFaGraph(document.getElementsByTagName("div")[0], {
|
46
|
+
baseURI: "http://localhost",
|
47
|
+
});
|
48
|
+
assert.equal(graph.toString(), expected);
|
49
|
+
});
|
50
|
+
});
|