@lblod/graph-rdfa-processor 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,203 @@
1
+ export default class URIResolver {
2
+
3
+ parseURI(uri) {
4
+ var match = URIResolver.SCHEME.exec(uri);
5
+ if (!match) {
6
+ throw new Error("Bad URI value, no scheme: " + uri);
7
+ }
8
+ var parsed = { spec: uri };
9
+ parsed.scheme = match[0].substring(0,match[0].length-1);
10
+ parsed.schemeSpecificPart = parsed.spec.substring(match[0].length);
11
+ if (parsed.schemeSpecificPart.charAt(0)=='/' && parsed.schemeSpecificPart.charAt(1)=='/') {
12
+ this.parseGeneric(parsed);
13
+ } else {
14
+ parsed.isGeneric = false;
15
+ }
16
+ parsed.normalize = function() {
17
+ if (!this.isGeneric) {
18
+ return;
19
+ }
20
+ if (this.segments.length==0) {
21
+ return;
22
+ }
23
+ // edge case of ending in "/."
24
+ if (this.path.length>1 && this.path.substring(this.path.length-2)=="/.") {
25
+ this.path = this.path.substring(0,this.path.length-1);
26
+ this.segments.splice(this.segments.length-1,1);
27
+ this.schemeSpecificPart = "//"+this.authority+this.path;
28
+ if (typeof this.query != "undefined") {
29
+ this.schemeSpecificPart += "?" + this.query;
30
+ }
31
+ if (typeof this.fragment != "undefined") {
32
+ this.schemeSpecificPart += "#" + this.fragment;
33
+ }
34
+ this.spec = this.scheme+":"+this.schemeSpecificPart;
35
+ return;
36
+ }
37
+ var end = this.path.charAt(this.path.length-1);
38
+ if (end!="/") {
39
+ end = "";
40
+ }
41
+ for (var i=0; i < this.segments.length; i++) {
42
+ if (i>0 && this.segments[i]=="..") {
43
+ this.segments.splice(i-1,2);
44
+ i -= 2;
45
+ }
46
+ if (this.segments[i]==".") {
47
+ this.segments.splice(i,1);
48
+ i--;
49
+ }
50
+ }
51
+ this.path = this.segments.length==0 ? "/" : "/" + this.segments.join("/") + end;
52
+ this.schemeSpecificPart = "//" + this.authority + this.path;
53
+ if (typeof this.query != "undefined") {
54
+ this.schemeSpecificPart += "?" + this.query;
55
+ }
56
+ if (typeof this.fragment != "undefined") {
57
+ this.schemeSpecificPart += "#" + this.fragment;
58
+ }
59
+ this.spec = this.scheme+":"+this.schemeSpecificPart;
60
+ }
61
+
62
+ parsed.resolve = function(href) {
63
+ if (!href) {
64
+ return this.spec;
65
+ }
66
+ if (href.charAt(0)=='#') {
67
+ var lastHash = this.spec.lastIndexOf('#');
68
+ return lastHash<0 ? this.spec+href : this.spec.substring(0,lastHash)+href;
69
+ }
70
+ if (!this.isGeneric) {
71
+ throw new Error("Cannot resolve uri against non-generic URI: " + this.spec);
72
+ }
73
+ var colon = href.indexOf(':');
74
+ if (href.charAt(0)=='/') {
75
+ return this.scheme+"://"+this.authority+href;
76
+ } else if (href.charAt(0)=='.' && href.charAt(1)=='/') {
77
+ if (this.path.charAt(this.path.length-1)=='/') {
78
+ return this.scheme+"://"+this.authority+this.path+href.substring(2);
79
+ } else {
80
+ var last = this.path.lastIndexOf('/');
81
+ return this.scheme+"://"+this.authority+this.path.substring(0,last)+href.substring(1);
82
+ }
83
+ } else if (URIResolver.SCHEME.test(href)) {
84
+ return href;
85
+ } else if (href.charAt(0)=="?") {
86
+ return this.scheme+"://"+this.authority+this.path+href;
87
+ } else {
88
+ if (this.path.charAt(this.path.length-1)=='/') {
89
+ return this.scheme+"://"+this.authority+this.path+href;
90
+ } else {
91
+ var last = this.path.lastIndexOf('/');
92
+ return this.scheme+"://"+this.authority+this.path.substring(0,last+1)+href;
93
+ }
94
+ }
95
+ };
96
+
97
+ parsed.relativeTo = function(otherURI) {
98
+ if (otherURI.scheme!=this.scheme) {
99
+ return this.spec;
100
+ }
101
+ if (!this.isGeneric) {
102
+ throw new Error("A non generic URI cannot be made relative: " + this.spec);
103
+ }
104
+ if (!otherURI.isGeneric) {
105
+ throw new Error("Cannot make a relative URI against a non-generic URI: " + otherURI.spec);
106
+ }
107
+ if (otherURI.authority!=this.authority) {
108
+ return this.spec;
109
+ }
110
+ var i=0;
111
+ for (; i < this.segments.length && i < otherURI.segments.length; i++) {
112
+ if (this.segments[i]!=otherURI.segments[i]) {
113
+ //alert(this.path+" different from "+otherURI.path+" at '"+this.segments[i]+"' vs '"+otherURI.segments[i]+"'");
114
+ var offset = otherURI.path.charAt(otherURI.path.length-1)=='/' ? 0 : -1;
115
+ var relative = "";
116
+ for (var j=i; j < otherURI.segments.length+offset; j++) {
117
+ relative += "../";
118
+ }
119
+ for (var j=i; j < this.segments.length; j++) {
120
+ relative += this.segments[j];
121
+ if ((j+1) < this.segments.length) {
122
+ relative += "/";
123
+ }
124
+ }
125
+ if (this.path.charAt(this.path.length-1)=='/') {
126
+ relative += "/";
127
+ }
128
+ return relative;
129
+ }
130
+ }
131
+ if (this.segments.length==otherURI.segments.length) {
132
+ return this.hash ? this.hash : (this.query ? this.query : "");
133
+ } else if (i < this.segments.length) {
134
+ var relative = "";
135
+ for (var j=i; j < this.segments.length; j++) {
136
+ relative += this.segments[j];
137
+ if ((j+1) < this.segments.length) {
138
+ relative += "/";
139
+ }
140
+ }
141
+ if (this.path.charAt(this.path.length-1)=='/') {
142
+ relative += "/";
143
+ }
144
+ return relative;
145
+ } else {
146
+ throw new Error("Cannot calculate a relative URI for "+this.spec+" against " + otherURI.spec);
147
+ }
148
+ };
149
+ return parsed;
150
+ }
151
+
152
+ parseGeneric(parsed) {
153
+ if (parsed.schemeSpecificPart.charAt(0)!='/' || parsed.schemeSpecificPart.charAt(1)!='/') {
154
+ throw new Error("Generic URI values should start with '//':" + parsed.spec);
155
+ }
156
+
157
+ var work = parsed.schemeSpecificPart.substring(2);
158
+ var pathStart = work.indexOf("/");
159
+ parsed.authority = pathStart<0 ? work : work.substring(0,pathStart);
160
+ parsed.path = pathStart<0 ? "" : work.substring(pathStart);
161
+ var hash = parsed.path.indexOf('#');
162
+ if (hash>=0) {
163
+ parsed.fragment = parsed.path.substring(hash+1);
164
+ parsed.path = parsed.path.substring(0,hash);
165
+ }
166
+ var questionMark = parsed.path.indexOf('?');
167
+ if (questionMark>=0) {
168
+ parsed.query = parsed.path.substring(questionMark+1);
169
+ parsed.path = parsed.path.substring(0,questionMark);
170
+ }
171
+ if (parsed.path=="/" || parsed.path=="") {
172
+ parsed.segments = [];
173
+ } else {
174
+ parsed.segments = parsed.path.split(/\//);
175
+ if (parsed.segments.length>0 && parsed.segments[0] == '' && parsed.path.length>1 && parsed.path.charAt(1) != '/') {
176
+ // empty segment at the start, remove it
177
+ parsed.segments.shift();
178
+ }
179
+ if (parsed.segments.length > 0 && parsed.path.length>0 && parsed.path.charAt(parsed.path.length-1) == '/' && parsed.segments[parsed.segments.length-1] == '') {
180
+ // we may have an empty the end
181
+ // check to see if it is legimate
182
+ if (parsed.path.length> 1 && parsed.path.charAt(parsed.path.length-2) != '/') {
183
+ parsed.segments.pop();
184
+ }
185
+ }
186
+ // check for non-escaped characters
187
+ for (var i=0; i < parsed.segments.length; i++) {
188
+ var check = parsed.segments[i].split(/%[A-Za-z0-9][A-Za-z0-9]|[\ud800-\udfff][\ud800-\udfff]|[A-Za-z0-9\-\._~!$&'()*+,;=@:\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+/);
189
+
190
+ for (var j=0; j < check.length; j++) {
191
+ if (check[j].length>0) {
192
+ throw new Error("Unecaped character "+check[j].charAt(0)+" ("+check[j].charCodeAt(0)+") in URI " + parsed.spec);
193
+ }
194
+ }
195
+ }
196
+ }
197
+ parsed.isGeneric = true;
198
+ }
199
+
200
+ }
201
+
202
+
203
+ URIResolver.SCHEME = /^[A-Za-z][A-Za-z0-9\+\-\.]*\:/;
package/test/test.js ADDED
@@ -0,0 +1,50 @@
1
+ import { jsdom } from "jsdom";
2
+ import assert from "assert";
3
+ import getRDFaGraph from "../src";
4
+ import { readFileSync, writeFileSync } from "fs";
5
+ describe("getRDFaGraph", function() {
6
+ let html = `<div typeof="rdfs:Class" resource="http://schema.org/CreativeWork">
7
+ <span class="h" property="rdfs:label">CreativeWork</span>
8
+ <span property="rdfs:comment">The most generic kind of creative work, including books, movies, photographs, software programs, etc.</span>
9
+ <span>Subclass of: <a property="rdfs:subClassOf" href="http://schema.org/Thing">Thing</a></span>
10
+ <span>Source: <a property="dc:source" href="http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews">rNews</a></span>
11
+ </div>`;
12
+
13
+ let expected = `<http://schema.org/CreativeWork> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class>;
14
+ <http://www.w3.org/2000/01/rdf-schema#label> "CreativeWork";
15
+ <http://www.w3.org/2000/01/rdf-schema#comment> "The most generic kind of creative work, including books, movies, photographs, software programs, etc.";
16
+ <http://www.w3.org/2000/01/rdf-schema#subClassOf> <http://schema.org/Thing>;
17
+ <http://purl.org/dc/terms/source> <http://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources#source_rNews> .
18
+ `;
19
+
20
+ it("should getRDFaGraph from a document", function() {
21
+ let { document } = jsdom(html).defaultView.window;
22
+ let graph = getRDFaGraph(document, { baseURI: "http://localhost" });
23
+ assert.equal(graph.toString(), expected);
24
+ });
25
+
26
+ it.only("whatever", () => {
27
+ let ht = readFileSync("./test-page.html");
28
+ let { document } = jsdom(ht).defaultView.window;
29
+
30
+ let graph = getRDFaGraph(document, {
31
+ baseURI: "http://localhost",
32
+ specialHtmlPredicates: [
33
+ {
34
+ source: "http://www.w3.org/ns/prov#value",
35
+ target:
36
+ "http://lblod.data.gift/vocabularies/besluit/extractedDecisionContent",
37
+ },
38
+ ],
39
+ });
40
+ writeFileSync("/tmp/x.out", graph.toString(), "utf8");
41
+ });
42
+
43
+ it("should getRDFaGraph from a node", function() {
44
+ let { document } = jsdom(html).defaultView.window;
45
+ let graph = getRDFaGraph(document.getElementsByTagName("div")[0], {
46
+ baseURI: "http://localhost",
47
+ });
48
+ assert.equal(graph.toString(), expected);
49
+ });
50
+ });