@lblod/graph-rdfa-processor 0.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,264 @@
1
+ export default class URIResolver {
2
+ parseURI(uri) {
3
+ uri = uri.replace("\n", ""); // bugfix: sometimes there is a \n in the uri...
4
+ var match = URIResolver.SCHEME.exec(uri);
5
+ if (!match) {
6
+ throw new Error("Bad URI value, no scheme: " + uri);
7
+ }
8
+ var parsed = { spec: uri };
9
+ parsed.scheme = match[0].substring(0, match[0].length - 1);
10
+ parsed.schemeSpecificPart = parsed.spec.substring(match[0].length);
11
+ if (
12
+ parsed.schemeSpecificPart.charAt(0) == "/" &&
13
+ parsed.schemeSpecificPart.charAt(1) == "/"
14
+ ) {
15
+ this.parseGeneric(parsed);
16
+ } else {
17
+ parsed.isGeneric = false;
18
+ }
19
+ parsed.normalize = function() {
20
+ if (!this.isGeneric) {
21
+ return;
22
+ }
23
+ if (this.segments.length == 0) {
24
+ return;
25
+ }
26
+ // edge case of ending in "/."
27
+ if (
28
+ this.path.length > 1 &&
29
+ this.path.substring(this.path.length - 2) == "/."
30
+ ) {
31
+ this.path = this.path.substring(0, this.path.length - 1);
32
+ this.segments.splice(this.segments.length - 1, 1);
33
+ this.schemeSpecificPart = "//" + this.authority + this.path;
34
+ if (typeof this.query != "undefined") {
35
+ this.schemeSpecificPart += "?" + this.query;
36
+ }
37
+ if (typeof this.fragment != "undefined") {
38
+ this.schemeSpecificPart += "#" + this.fragment;
39
+ }
40
+ this.spec = this.scheme + ":" + this.schemeSpecificPart;
41
+ return;
42
+ }
43
+ var end = this.path.charAt(this.path.length - 1);
44
+ if (end != "/") {
45
+ end = "";
46
+ }
47
+ for (var i = 0; i < this.segments.length; i++) {
48
+ if (i > 0 && this.segments[i] == "..") {
49
+ this.segments.splice(i - 1, 2);
50
+ i -= 2;
51
+ }
52
+ if (this.segments[i] == ".") {
53
+ this.segments.splice(i, 1);
54
+ i--;
55
+ }
56
+ }
57
+ this.path =
58
+ this.segments.length == 0 ? "/" : "/" + this.segments.join("/") + end;
59
+ this.schemeSpecificPart = "//" + this.authority + this.path;
60
+ if (typeof this.query != "undefined") {
61
+ this.schemeSpecificPart += "?" + this.query;
62
+ }
63
+ if (typeof this.fragment != "undefined") {
64
+ this.schemeSpecificPart += "#" + this.fragment;
65
+ }
66
+ this.spec = this.scheme + ":" + this.schemeSpecificPart;
67
+ };
68
+
69
+ parsed.resolve = function(href) {
70
+ if (!href) {
71
+ return this.spec;
72
+ }
73
+ if (href.charAt(0) == "#") {
74
+ var lastHash = this.spec.lastIndexOf("#");
75
+ return lastHash < 0
76
+ ? this.spec + href
77
+ : this.spec.substring(0, lastHash) + href;
78
+ }
79
+ if (!this.isGeneric) {
80
+ throw new Error(
81
+ "Cannot resolve uri against non-generic URI: " + this.spec,
82
+ );
83
+ }
84
+ var colon = href.indexOf(":");
85
+ if (href.charAt(0) == "/") {
86
+ return this.scheme + "://" + this.authority + href;
87
+ } else if (href.charAt(0) == "." && href.charAt(1) == "/") {
88
+ if (this.path.charAt(this.path.length - 1) == "/") {
89
+ return (
90
+ this.scheme + "://" + this.authority + this.path + href.substring(2)
91
+ );
92
+ } else {
93
+ var last = this.path.lastIndexOf("/");
94
+ return (
95
+ this.scheme +
96
+ "://" +
97
+ this.authority +
98
+ this.path.substring(0, last) +
99
+ href.substring(1)
100
+ );
101
+ }
102
+ } else if (URIResolver.SCHEME.test(href)) {
103
+ return href;
104
+ } else if (href.charAt(0) == "?") {
105
+ return this.scheme + "://" + this.authority + this.path + href;
106
+ } else {
107
+ if (this.path.charAt(this.path.length - 1) == "/") {
108
+ return this.scheme + "://" + this.authority + this.path + href;
109
+ } else {
110
+ var last = this.path.lastIndexOf("/");
111
+ return (
112
+ this.scheme +
113
+ "://" +
114
+ this.authority +
115
+ this.path.substring(0, last + 1) +
116
+ href
117
+ );
118
+ }
119
+ }
120
+ };
121
+
122
+ parsed.relativeTo = function(otherURI) {
123
+ if (otherURI.scheme != this.scheme) {
124
+ return this.spec;
125
+ }
126
+ if (!this.isGeneric) {
127
+ throw new Error(
128
+ "A non generic URI cannot be made relative: " + this.spec,
129
+ );
130
+ }
131
+ if (!otherURI.isGeneric) {
132
+ throw new Error(
133
+ "Cannot make a relative URI against a non-generic URI: " +
134
+ otherURI.spec,
135
+ );
136
+ }
137
+ if (otherURI.authority != this.authority) {
138
+ return this.spec;
139
+ }
140
+ var i = 0;
141
+ for (; i < this.segments.length && i < otherURI.segments.length; i++) {
142
+ if (this.segments[i] != otherURI.segments[i]) {
143
+ //alert(this.path+" different from "+otherURI.path+" at '"+this.segments[i]+"' vs '"+otherURI.segments[i]+"'");
144
+ var offset =
145
+ otherURI.path.charAt(otherURI.path.length - 1) == "/" ? 0 : -1;
146
+ var relative = "";
147
+ for (var j = i; j < otherURI.segments.length + offset; j++) {
148
+ relative += "../";
149
+ }
150
+ for (var j = i; j < this.segments.length; j++) {
151
+ relative += this.segments[j];
152
+ if (j + 1 < this.segments.length) {
153
+ relative += "/";
154
+ }
155
+ }
156
+ if (this.path.charAt(this.path.length - 1) == "/") {
157
+ relative += "/";
158
+ }
159
+ return relative;
160
+ }
161
+ }
162
+ if (this.segments.length == otherURI.segments.length) {
163
+ return this.hash ? this.hash : this.query ? this.query : "";
164
+ } else if (i < this.segments.length) {
165
+ var relative = "";
166
+ for (var j = i; j < this.segments.length; j++) {
167
+ relative += this.segments[j];
168
+ if (j + 1 < this.segments.length) {
169
+ relative += "/";
170
+ }
171
+ }
172
+ if (this.path.charAt(this.path.length - 1) == "/") {
173
+ relative += "/";
174
+ }
175
+ return relative;
176
+ } else {
177
+ throw new Error(
178
+ "Cannot calculate a relative URI for " +
179
+ this.spec +
180
+ " against " +
181
+ otherURI.spec,
182
+ );
183
+ }
184
+ };
185
+ return parsed;
186
+ }
187
+
188
+ parseGeneric(parsed) {
189
+ if (
190
+ parsed.schemeSpecificPart.charAt(0) != "/" ||
191
+ parsed.schemeSpecificPart.charAt(1) != "/"
192
+ ) {
193
+ throw new Error(
194
+ "Generic URI values should start with '//':" + parsed.spec,
195
+ );
196
+ }
197
+
198
+ var work = parsed.schemeSpecificPart.substring(2);
199
+ var pathStart = work.indexOf("/");
200
+ parsed.authority = pathStart < 0 ? work : work.substring(0, pathStart);
201
+ parsed.path = pathStart < 0 ? "" : work.substring(pathStart);
202
+ var hash = parsed.path.indexOf("#");
203
+ if (hash >= 0) {
204
+ parsed.fragment = parsed.path.substring(hash + 1);
205
+ parsed.path = parsed.path.substring(0, hash);
206
+ }
207
+ var questionMark = parsed.path.indexOf("?");
208
+ if (questionMark >= 0) {
209
+ parsed.query = parsed.path.substring(questionMark + 1);
210
+ parsed.path = parsed.path.substring(0, questionMark);
211
+ }
212
+ if (parsed.path == "/" || parsed.path == "") {
213
+ parsed.segments = [];
214
+ } else {
215
+ parsed.segments = parsed.path.split(/\//);
216
+ if (
217
+ parsed.segments.length > 0 &&
218
+ parsed.segments[0] == "" &&
219
+ parsed.path.length > 1 &&
220
+ parsed.path.charAt(1) != "/"
221
+ ) {
222
+ // empty segment at the start, remove it
223
+ parsed.segments.shift();
224
+ }
225
+ if (
226
+ parsed.segments.length > 0 &&
227
+ parsed.path.length > 0 &&
228
+ parsed.path.charAt(parsed.path.length - 1) == "/" &&
229
+ parsed.segments[parsed.segments.length - 1] == ""
230
+ ) {
231
+ // we may have an empty the end
232
+ // check to see if it is legimate
233
+ if (
234
+ parsed.path.length > 1 &&
235
+ parsed.path.charAt(parsed.path.length - 2) != "/"
236
+ ) {
237
+ parsed.segments.pop();
238
+ }
239
+ }
240
+ // check for non-escaped characters
241
+ for (var i = 0; i < parsed.segments.length; i++) {
242
+ var check = parsed.segments[i].split(
243
+ /%[A-Za-z0-9][A-Za-z0-9]|[\ud800-\udfff][\ud800-\udfff]|[A-Za-z0-9\-\._~!$&'()*+,;=@:\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+/,
244
+ );
245
+
246
+ for (var j = 0; j < check.length; j++) {
247
+ if (check[j].length > 0) {
248
+ throw new Error(
249
+ "Unescaped character " +
250
+ check[j].charAt(0) +
251
+ " (" +
252
+ check[j].charCodeAt(0) +
253
+ ") in URI " +
254
+ parsed.spec,
255
+ );
256
+ }
257
+ }
258
+ }
259
+ }
260
+ parsed.isGeneric = true;
261
+ }
262
+ }
263
+
264
+ URIResolver.SCHEME = /^[A-Za-z][A-Za-z0-9\+\-\.]*\:/;
package/test/test.js ADDED
@@ -0,0 +1,50 @@
1
+ import { jsdom } from "jsdom";
2
+ import assert from "assert";
3
+ import getRDFaGraph from "../src";
4
+ import { readFileSync, writeFileSync } from "fs";
5
+ describe("getRDFaGraph", function() {
6
+ let html = `<div typeof="rdfs:Class" resource="http://schema.org/CreativeWork">
7
+ <span class="h" property="rdfs:label">CreativeWork</span>
8
+ <span property="rdfs:comment">The most generic kind of creative work, including books, movies, photographs, software programs, etc.</span>
9
+ <span>Subclass of: <a property="rdfs:subClassOf" href="http://schema.org/Thing">Thing</a></span>
10
+ <span>Source: <a property="dc:source" href="http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews">rNews</a></span>
11
+ </div>`;
12
+
13
+ let expected = `<http://schema.org/CreativeWork> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class>;
14
+ <http://www.w3.org/2000/01/rdf-schema#label> "CreativeWork";
15
+ <http://www.w3.org/2000/01/rdf-schema#comment> "The most generic kind of creative work, including books, movies, photographs, software programs, etc.";
16
+ <http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://schema.org/Thing>;
17
+ <http://purl.org/dc/terms/source> <http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews> .
18
+ `;
19
+
20
+ it("should getRDFaGraph from a document", function() {
21
+ let { document } = jsdom(html).defaultView.window;
22
+ let graph = getRDFaGraph(document, { baseURI: "http://localhost" });
23
+ assert.equal(graph.toString(), expected);
24
+ });
25
+
26
+ it.only("whatever", () => {
27
+ let ht = readFileSync("./bug6.html");
28
+ let { document } = jsdom(ht).defaultView.window;
29
+
30
+ let graph = getRDFaGraph(document, {
31
+ baseURI: "http://localhost",
32
+ specialHtmlPredicates: [
33
+ {
34
+ source: "http://www.w3.org/ns/prov#value",
35
+ target:
36
+ "http://lblod.data.gift/vocabularies/besluit/extractedDecisionContent",
37
+ },
38
+ ],
39
+ });
40
+ writeFileSync("/tmp/x.ttl", graph.toString(), "utf8");
41
+ });
42
+
43
+ it("should getRDFaGraph from a node", function() {
44
+ let { document } = jsdom(html).defaultView.window;
45
+ let graph = getRDFaGraph(document.getElementsByTagName("div")[0], {
46
+ baseURI: "http://localhost",
47
+ });
48
+ assert.equal(graph.toString(), expected);
49
+ });
50
+ });