@lde/pipeline 0.6.29 → 0.6.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -31,7 +31,9 @@ export declare class SparqlProbeResult extends ProbeResult {
|
|
|
31
31
|
*/
|
|
32
32
|
export declare class DataDumpProbeResult extends ProbeResult {
|
|
33
33
|
readonly contentSize: number | null;
|
|
34
|
-
|
|
34
|
+
readonly failureReason: string | null;
|
|
35
|
+
constructor(url: string, response: Response, failureReason?: string | null);
|
|
36
|
+
isSuccess(): boolean;
|
|
35
37
|
}
|
|
36
38
|
export type ProbeResultType = SparqlProbeResult | DataDumpProbeResult | NetworkError;
|
|
37
39
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../../src/distribution/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../../src/distribution/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAG5C;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;gBADf,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM;CAElC;AAED;;GAEG;AACH,uBAAe,WAAW;aAON,GAAG,EAAE,MAAM;IAN7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;gBAGzB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ;IAWb,SAAS,IAAI,OAAO;CAG5B;AAED;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD,SAAgB,mBAAmB,qCAAqC;IAE/D,SAAS,IAAI,OAAO;CAM9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;IAClD,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;gBAG3C,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,aAAa,GAAE,MAAM,GAAG,IAAW;IAU5B,SAAS,IAAI,OAAO;CAG9B;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAEjB;;;;;;;GAOG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,SAAO,GACb,OAAO,CAAC,eAAe,CAAC,CAY1B"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Parser } from 'n3';
|
|
1
2
|
/**
|
|
2
3
|
* Result of a network error during probing.
|
|
3
4
|
*/
|
|
@@ -47,13 +48,18 @@ export class SparqlProbeResult extends ProbeResult {
|
|
|
47
48
|
*/
|
|
48
49
|
export class DataDumpProbeResult extends ProbeResult {
|
|
49
50
|
contentSize = null;
|
|
50
|
-
|
|
51
|
+
failureReason;
|
|
52
|
+
constructor(url, response, failureReason = null) {
|
|
51
53
|
super(url, response);
|
|
54
|
+
this.failureReason = failureReason;
|
|
52
55
|
const contentLengthHeader = response.headers.get('Content-Length');
|
|
53
56
|
if (contentLengthHeader) {
|
|
54
57
|
this.contentSize = parseInt(contentLengthHeader);
|
|
55
58
|
}
|
|
56
59
|
}
|
|
60
|
+
isSuccess() {
|
|
61
|
+
return super.isSuccess() && this.failureReason === null;
|
|
62
|
+
}
|
|
57
63
|
}
|
|
58
64
|
/**
|
|
59
65
|
* Probe a distribution to check availability and gather metadata.
|
|
@@ -96,19 +102,48 @@ async function probeDataDump(distribution, timeout) {
|
|
|
96
102
|
'Accept-Encoding': 'identity', // Return uncompressed responses.
|
|
97
103
|
},
|
|
98
104
|
};
|
|
99
|
-
|
|
105
|
+
const headResponse = await fetch(url, {
|
|
100
106
|
method: 'HEAD',
|
|
101
107
|
...requestOptions,
|
|
102
108
|
});
|
|
103
|
-
const contentLength =
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
+
const contentLength = headResponse.headers.get('Content-Length');
|
|
110
|
+
const contentLengthBytes = contentLength ? parseInt(contentLength) : 0;
|
|
111
|
+
// For small or unknown-size files, do a GET to validate body content.
|
|
112
|
+
// This also handles servers that incorrectly return 0 Content-Length for HEAD.
|
|
113
|
+
if (contentLengthBytes <= 10_240) {
|
|
114
|
+
const getResponse = await fetch(url, {
|
|
109
115
|
method: 'GET',
|
|
110
116
|
...requestOptions,
|
|
111
117
|
});
|
|
118
|
+
const body = await getResponse.text();
|
|
119
|
+
const isHttpSuccess = getResponse.status >= 200 && getResponse.status < 400;
|
|
120
|
+
const failureReason = isHttpSuccess
|
|
121
|
+
? validateBody(body, getResponse.headers.get('Content-Type'))
|
|
122
|
+
: null;
|
|
123
|
+
return new DataDumpProbeResult(url, getResponse, failureReason);
|
|
124
|
+
}
|
|
125
|
+
return new DataDumpProbeResult(url, headResponse);
|
|
126
|
+
}
|
|
127
|
+
const rdfContentTypes = [
|
|
128
|
+
'text/turtle',
|
|
129
|
+
'application/n-triples',
|
|
130
|
+
'application/n-quads',
|
|
131
|
+
];
|
|
132
|
+
function validateBody(body, contentType) {
|
|
133
|
+
if (body.length === 0) {
|
|
134
|
+
return 'Distribution is empty';
|
|
135
|
+
}
|
|
136
|
+
if (contentType && rdfContentTypes.some((t) => contentType.startsWith(t))) {
|
|
137
|
+
try {
|
|
138
|
+
const parser = new Parser();
|
|
139
|
+
const quads = parser.parse(body);
|
|
140
|
+
if (quads.length === 0) {
|
|
141
|
+
return 'Distribution contains no RDF triples';
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (e) {
|
|
145
|
+
return e instanceof Error ? e.message : String(e);
|
|
146
|
+
}
|
|
112
147
|
}
|
|
113
|
-
return
|
|
148
|
+
return null;
|
|
114
149
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/distribution/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,YAAY,CAAC;AAUpB;;;;;;;;;GASG;AACH,wBAAuB,mBAAmB,CACxC,YAAY,EAAE,eAAe,EAAE,EAC/B,UAAU,EAAE,MAAM,EAClB,YAAY,CAAC,EAAE,YAAY,GAC1B,aAAa,CAAC,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/distribution/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,YAAY,CAAC;AAUpB;;;;;;;;;GASG;AACH,wBAAuB,mBAAmB,CACxC,YAAY,EAAE,eAAe,EAAE,EAC/B,UAAU,EAAE,MAAM,EAClB,YAAY,CAAC,EAAE,YAAY,GAC1B,aAAa,CAAC,IAAI,CAAC,CAwCrB"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { DataFactory } from 'n3';
|
|
2
|
-
import { NetworkError, SparqlProbeResult, } from './probe.js';
|
|
2
|
+
import { NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
|
|
3
3
|
const { quad, namedNode, blankNode, literal } = DataFactory;
|
|
4
4
|
const RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
|
5
5
|
const SCHEMA = 'https://schema.org/';
|
|
@@ -30,6 +30,9 @@ export async function* probeResultsToQuads(probeResults, datasetIri, importResul
|
|
|
30
30
|
else if (result.isSuccess()) {
|
|
31
31
|
yield* successQuads(action, result, datasetIri);
|
|
32
32
|
}
|
|
33
|
+
else if (result instanceof DataDumpProbeResult && result.failureReason) {
|
|
34
|
+
yield quad(action, namedNode(`${SCHEMA}error`), literal(result.failureReason));
|
|
35
|
+
}
|
|
33
36
|
else {
|
|
34
37
|
// HTTP error
|
|
35
38
|
const statusUri = `${HTTP_STATUS}${result.statusText.replace(/ /g, '')}`;
|