@lblod/graph-rdfa-processor 0.13.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,264 @@
1
+ export default class URIResolver {
2
+ parseURI(uri) {
3
+ uri = uri.replace("\n", ""); // bugfix: sometimes there is a \n in the uri...
4
+ var match = URIResolver.SCHEME.exec(uri);
5
+ if (!match) {
6
+ throw new Error("Bad URI value, no scheme: " + uri);
7
+ }
8
+ var parsed = { spec: uri };
9
+ parsed.scheme = match[0].substring(0, match[0].length - 1);
10
+ parsed.schemeSpecificPart = parsed.spec.substring(match[0].length);
11
+ if (
12
+ parsed.schemeSpecificPart.charAt(0) == "/" &&
13
+ parsed.schemeSpecificPart.charAt(1) == "/"
14
+ ) {
15
+ this.parseGeneric(parsed);
16
+ } else {
17
+ parsed.isGeneric = false;
18
+ }
19
+ parsed.normalize = function() {
20
+ if (!this.isGeneric) {
21
+ return;
22
+ }
23
+ if (this.segments.length == 0) {
24
+ return;
25
+ }
26
+ // edge case of ending in "/."
27
+ if (
28
+ this.path.length > 1 &&
29
+ this.path.substring(this.path.length - 2) == "/."
30
+ ) {
31
+ this.path = this.path.substring(0, this.path.length - 1);
32
+ this.segments.splice(this.segments.length - 1, 1);
33
+ this.schemeSpecificPart = "//" + this.authority + this.path;
34
+ if (typeof this.query != "undefined") {
35
+ this.schemeSpecificPart += "?" + this.query;
36
+ }
37
+ if (typeof this.fragment != "undefined") {
38
+ this.schemeSpecificPart += "#" + this.fragment;
39
+ }
40
+ this.spec = this.scheme + ":" + this.schemeSpecificPart;
41
+ return;
42
+ }
43
+ var end = this.path.charAt(this.path.length - 1);
44
+ if (end != "/") {
45
+ end = "";
46
+ }
47
+ for (var i = 0; i < this.segments.length; i++) {
48
+ if (i > 0 && this.segments[i] == "..") {
49
+ this.segments.splice(i - 1, 2);
50
+ i -= 2;
51
+ }
52
+ if (this.segments[i] == ".") {
53
+ this.segments.splice(i, 1);
54
+ i--;
55
+ }
56
+ }
57
+ this.path =
58
+ this.segments.length == 0 ? "/" : "/" + this.segments.join("/") + end;
59
+ this.schemeSpecificPart = "//" + this.authority + this.path;
60
+ if (typeof this.query != "undefined") {
61
+ this.schemeSpecificPart += "?" + this.query;
62
+ }
63
+ if (typeof this.fragment != "undefined") {
64
+ this.schemeSpecificPart += "#" + this.fragment;
65
+ }
66
+ this.spec = this.scheme + ":" + this.schemeSpecificPart;
67
+ };
68
+
69
+ parsed.resolve = function(href) {
70
+ if (!href) {
71
+ return this.spec;
72
+ }
73
+ if (href.charAt(0) == "#") {
74
+ var lastHash = this.spec.lastIndexOf("#");
75
+ return lastHash < 0
76
+ ? this.spec + href
77
+ : this.spec.substring(0, lastHash) + href;
78
+ }
79
+ if (!this.isGeneric) {
80
+ throw new Error(
81
+ "Cannot resolve uri against non-generic URI: " + this.spec,
82
+ );
83
+ }
84
+ var colon = href.indexOf(":");
85
+ if (href.charAt(0) == "/") {
86
+ return this.scheme + "://" + this.authority + href;
87
+ } else if (href.charAt(0) == "." && href.charAt(1) == "/") {
88
+ if (this.path.charAt(this.path.length - 1) == "/") {
89
+ return (
90
+ this.scheme + "://" + this.authority + this.path + href.substring(2)
91
+ );
92
+ } else {
93
+ var last = this.path.lastIndexOf("/");
94
+ return (
95
+ this.scheme +
96
+ "://" +
97
+ this.authority +
98
+ this.path.substring(0, last) +
99
+ href.substring(1)
100
+ );
101
+ }
102
+ } else if (URIResolver.SCHEME.test(href)) {
103
+ return href;
104
+ } else if (href.charAt(0) == "?") {
105
+ return this.scheme + "://" + this.authority + this.path + href;
106
+ } else {
107
+ if (this.path.charAt(this.path.length - 1) == "/") {
108
+ return this.scheme + "://" + this.authority + this.path + href;
109
+ } else {
110
+ var last = this.path.lastIndexOf("/");
111
+ return (
112
+ this.scheme +
113
+ "://" +
114
+ this.authority +
115
+ this.path.substring(0, last + 1) +
116
+ href
117
+ );
118
+ }
119
+ }
120
+ };
121
+
122
+ parsed.relativeTo = function(otherURI) {
123
+ if (otherURI.scheme != this.scheme) {
124
+ return this.spec;
125
+ }
126
+ if (!this.isGeneric) {
127
+ throw new Error(
128
+ "A non generic URI cannot be made relative: " + this.spec,
129
+ );
130
+ }
131
+ if (!otherURI.isGeneric) {
132
+ throw new Error(
133
+ "Cannot make a relative URI against a non-generic URI: " +
134
+ otherURI.spec,
135
+ );
136
+ }
137
+ if (otherURI.authority != this.authority) {
138
+ return this.spec;
139
+ }
140
+ var i = 0;
141
+ for (; i < this.segments.length && i < otherURI.segments.length; i++) {
142
+ if (this.segments[i] != otherURI.segments[i]) {
143
+ //alert(this.path+" different from "+otherURI.path+" at '"+this.segments[i]+"' vs '"+otherURI.segments[i]+"'");
144
+ var offset =
145
+ otherURI.path.charAt(otherURI.path.length - 1) == "/" ? 0 : -1;
146
+ var relative = "";
147
+ for (var j = i; j < otherURI.segments.length + offset; j++) {
148
+ relative += "../";
149
+ }
150
+ for (var j = i; j < this.segments.length; j++) {
151
+ relative += this.segments[j];
152
+ if (j + 1 < this.segments.length) {
153
+ relative += "/";
154
+ }
155
+ }
156
+ if (this.path.charAt(this.path.length - 1) == "/") {
157
+ relative += "/";
158
+ }
159
+ return relative;
160
+ }
161
+ }
162
+ if (this.segments.length == otherURI.segments.length) {
163
+ return this.hash ? this.hash : this.query ? this.query : "";
164
+ } else if (i < this.segments.length) {
165
+ var relative = "";
166
+ for (var j = i; j < this.segments.length; j++) {
167
+ relative += this.segments[j];
168
+ if (j + 1 < this.segments.length) {
169
+ relative += "/";
170
+ }
171
+ }
172
+ if (this.path.charAt(this.path.length - 1) == "/") {
173
+ relative += "/";
174
+ }
175
+ return relative;
176
+ } else {
177
+ throw new Error(
178
+ "Cannot calculate a relative URI for " +
179
+ this.spec +
180
+ " against " +
181
+ otherURI.spec,
182
+ );
183
+ }
184
+ };
185
+ return parsed;
186
+ }
187
+
188
+ parseGeneric(parsed) {
189
+ if (
190
+ parsed.schemeSpecificPart.charAt(0) != "/" ||
191
+ parsed.schemeSpecificPart.charAt(1) != "/"
192
+ ) {
193
+ throw new Error(
194
+ "Generic URI values should start with '//':" + parsed.spec,
195
+ );
196
+ }
197
+
198
+ var work = parsed.schemeSpecificPart.substring(2);
199
+ var pathStart = work.indexOf("/");
200
+ parsed.authority = pathStart < 0 ? work : work.substring(0, pathStart);
201
+ parsed.path = pathStart < 0 ? "" : work.substring(pathStart);
202
+ var hash = parsed.path.indexOf("#");
203
+ if (hash >= 0) {
204
+ parsed.fragment = parsed.path.substring(hash + 1);
205
+ parsed.path = parsed.path.substring(0, hash);
206
+ }
207
+ var questionMark = parsed.path.indexOf("?");
208
+ if (questionMark >= 0) {
209
+ parsed.query = parsed.path.substring(questionMark + 1);
210
+ parsed.path = parsed.path.substring(0, questionMark);
211
+ }
212
+ if (parsed.path == "/" || parsed.path == "") {
213
+ parsed.segments = [];
214
+ } else {
215
+ parsed.segments = parsed.path.split(/\//);
216
+ if (
217
+ parsed.segments.length > 0 &&
218
+ parsed.segments[0] == "" &&
219
+ parsed.path.length > 1 &&
220
+ parsed.path.charAt(1) != "/"
221
+ ) {
222
+ // empty segment at the start, remove it
223
+ parsed.segments.shift();
224
+ }
225
+ if (
226
+ parsed.segments.length > 0 &&
227
+ parsed.path.length > 0 &&
228
+ parsed.path.charAt(parsed.path.length - 1) == "/" &&
229
+ parsed.segments[parsed.segments.length - 1] == ""
230
+ ) {
231
+ // we may have an empty the end
232
+ // check to see if it is legimate
233
+ if (
234
+ parsed.path.length > 1 &&
235
+ parsed.path.charAt(parsed.path.length - 2) != "/"
236
+ ) {
237
+ parsed.segments.pop();
238
+ }
239
+ }
240
+ // check for non-escaped characters
241
+ for (var i = 0; i < parsed.segments.length; i++) {
242
+ var check = parsed.segments[i].split(
243
+ /%[A-Za-z0-9][A-Za-z0-9]|[\ud800-\udfff][\ud800-\udfff]|[A-Za-z0-9\-\._~!$&'()*+,;=@:\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+/,
244
+ );
245
+
246
+ for (var j = 0; j < check.length; j++) {
247
+ if (check[j].length > 0) {
248
+ throw new Error(
249
+ "Unescaped character " +
250
+ check[j].charAt(0) +
251
+ " (" +
252
+ check[j].charCodeAt(0) +
253
+ ") in URI " +
254
+ parsed.spec,
255
+ );
256
+ }
257
+ }
258
+ }
259
+ }
260
+ parsed.isGeneric = true;
261
+ }
262
+ }
263
+
264
+ URIResolver.SCHEME = /^[A-Za-z][A-Za-z0-9\+\-\.]*\:/;
package/test/test.js ADDED
@@ -0,0 +1,50 @@
1
+ import { jsdom } from "jsdom";
2
+ import assert from "assert";
3
+ import getRDFaGraph from "../src";
4
+ import { readFileSync, writeFileSync } from "fs";
5
+ describe("getRDFaGraph", function() {
6
+ let html = `<div typeof="rdfs:Class" resource="http://schema.org/CreativeWork">
7
+ <span class="h" property="rdfs:label">CreativeWork</span>
8
+ <span property="rdfs:comment">The most generic kind of creative work, including books, movies, photographs, software programs, etc.</span>
9
+ <span>Subclass of: <a property="rdfs:subClassOf" href="http://schema.org/Thing">Thing</a></span>
10
+ <span>Source: <a property="dc:source" href="http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews">rNews</a></span>
11
+ </div>`;
12
+
13
+ let expected = `<http://schema.org/CreativeWork> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class>;
14
+ <http://www.w3.org/2000/01/rdf-schema#label> "CreativeWork";
15
+ <http://www.w3.org/2000/01/rdf-schema#comment> "The most generic kind of creative work, including books, movies, photographs, software programs, etc.";
16
+ <http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://schema.org/Thing>;
17
+ <http://purl.org/dc/terms/source> <http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews> .
18
+ `;
19
+
20
+ it("should getRDFaGraph from a document", function() {
21
+ let { document } = jsdom(html).defaultView.window;
22
+ let graph = getRDFaGraph(document, { baseURI: "http://localhost" });
23
+ assert.equal(graph.toString(), expected);
24
+ });
25
+
26
+ it.only("whatever", () => {
27
+ let ht = readFileSync("./bug6.html");
28
+ let { document } = jsdom(ht).defaultView.window;
29
+
30
+ let graph = getRDFaGraph(document, {
31
+ baseURI: "http://localhost",
32
+ specialHtmlPredicates: [
33
+ {
34
+ source: "http://www.w3.org/ns/prov#value",
35
+ target:
36
+ "http://lblod.data.gift/vocabularies/besluit/extractedDecisionContent",
37
+ },
38
+ ],
39
+ });
40
+ writeFileSync("/tmp/x.ttl", graph.toString(), "utf8");
41
+ });
42
+
43
+ it("should getRDFaGraph from a node", function() {
44
+ let { document } = jsdom(html).defaultView.window;
45
+ let graph = getRDFaGraph(document.getElementsByTagName("div")[0], {
46
+ baseURI: "http://localhost",
47
+ });
48
+ assert.equal(graph.toString(), expected);
49
+ });
50
+ });