@comunica/actor-rdf-parse-html-script 4.4.2-alpha.49.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/components/ActorRdfParseHtmlScript.jsonld +15 -15
- package/components/components.jsonld +1 -1
- package/components/context.jsonld +1 -1
- package/lib/ActorRdfParseHtmlScript.js +4 -0
- package/lib/ActorRdfParseHtmlScript.js.map +1 -1
- package/lib/HtmlScriptListener.js +16 -4
- package/lib/HtmlScriptListener.js.map +1 -1
- package/package.json +8 -8
package/README.md
CHANGED
|
@@ -23,7 +23,7 @@ After installing, this package can be added to your engine's configuration as fo
|
|
|
23
23
|
{
|
|
24
24
|
"@context": [
|
|
25
25
|
...
|
|
26
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html-script/^
|
|
26
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html-script/^5.0.0/components/context.jsonld"
|
|
27
27
|
],
|
|
28
28
|
"actors": [
|
|
29
29
|
...
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"@context": [
|
|
3
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html-script/^
|
|
4
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/core/^
|
|
5
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^
|
|
3
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html-script/^5.0.0/components/context.jsonld",
|
|
4
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/core/^5.0.0/components/context.jsonld",
|
|
5
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^5.0.0/components/context.jsonld"
|
|
6
6
|
],
|
|
7
7
|
"@id": "npmd:@comunica/actor-rdf-parse-html-script",
|
|
8
8
|
"components": [
|
|
@@ -45,37 +45,37 @@
|
|
|
45
45
|
"genericTypeInstances": [
|
|
46
46
|
{
|
|
47
47
|
"@type": "ParameterRangeGenericTypeReference",
|
|
48
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
48
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
49
49
|
},
|
|
50
50
|
{
|
|
51
51
|
"@type": "ParameterRangeGenericTypeReference",
|
|
52
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
52
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
53
53
|
},
|
|
54
54
|
{
|
|
55
55
|
"@type": "ParameterRangeGenericTypeReference",
|
|
56
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
56
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
57
57
|
},
|
|
58
58
|
{
|
|
59
59
|
"@type": "ParameterRangeGenericTypeReference",
|
|
60
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
60
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_TS"
|
|
61
61
|
}
|
|
62
62
|
]
|
|
63
63
|
},
|
|
64
64
|
{
|
|
65
65
|
"@type": "ParameterRangeGenericTypeReference",
|
|
66
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
66
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
67
67
|
},
|
|
68
68
|
{
|
|
69
69
|
"@type": "ParameterRangeGenericTypeReference",
|
|
70
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
70
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
71
71
|
},
|
|
72
72
|
{
|
|
73
73
|
"@type": "ParameterRangeGenericTypeReference",
|
|
74
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
74
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
75
75
|
},
|
|
76
76
|
{
|
|
77
77
|
"@type": "ParameterRangeGenericTypeReference",
|
|
78
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
78
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_TS"
|
|
79
79
|
}
|
|
80
80
|
]
|
|
81
81
|
},
|
|
@@ -112,19 +112,19 @@
|
|
|
112
112
|
"genericTypeInstances": [
|
|
113
113
|
{
|
|
114
114
|
"@type": "ParameterRangeGenericTypeReference",
|
|
115
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
115
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
116
116
|
},
|
|
117
117
|
{
|
|
118
118
|
"@type": "ParameterRangeGenericTypeReference",
|
|
119
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
119
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
120
120
|
},
|
|
121
121
|
{
|
|
122
122
|
"@type": "ParameterRangeGenericTypeReference",
|
|
123
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
123
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
124
124
|
},
|
|
125
125
|
{
|
|
126
126
|
"@type": "ParameterRangeGenericTypeReference",
|
|
127
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
127
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^5.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_TS"
|
|
128
128
|
}
|
|
129
129
|
]
|
|
130
130
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"@context": [
|
|
3
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html-script/^
|
|
3
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html-script/^5.0.0/components/context.jsonld"
|
|
4
4
|
],
|
|
5
5
|
"@id": "npmd:@comunica/actor-rdf-parse-html-script",
|
|
6
6
|
"@type": "Module",
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^6.0.0/components/context.jsonld",
|
|
4
4
|
{
|
|
5
5
|
"npmd": "https://linkedsoftwaredependencies.org/bundles/npm/",
|
|
6
|
-
"carphs": "npmd:@comunica/actor-rdf-parse-html-script/^
|
|
6
|
+
"carphs": "npmd:@comunica/actor-rdf-parse-html-script/^5.0.0/",
|
|
7
7
|
"ActorRdfParseHtmlScript": {
|
|
8
8
|
"@id": "carphs:components/ActorRdfParseHtmlScript.jsonld#ActorRdfParseHtmlScript",
|
|
9
9
|
"@prefix": true,
|
|
@@ -11,8 +11,12 @@ const HtmlScriptListener_1 = require("./HtmlScriptListener");
|
|
|
11
11
|
* and announce the presence of them by media type.
|
|
12
12
|
*/
|
|
13
13
|
class ActorRdfParseHtmlScript extends bus_rdf_parse_html_1.ActorRdfParseHtml {
|
|
14
|
+
mediatorRdfParseMediatypes;
|
|
15
|
+
mediatorRdfParseHandle;
|
|
14
16
|
constructor(args) {
|
|
15
17
|
super(args);
|
|
18
|
+
this.mediatorRdfParseMediatypes = args.mediatorRdfParseMediatypes;
|
|
19
|
+
this.mediatorRdfParseHandle = args.mediatorRdfParseHandle;
|
|
16
20
|
}
|
|
17
21
|
async test(_action) {
|
|
18
22
|
return (0, core_1.passTestVoid)();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ActorRdfParseHtmlScript.js","sourceRoot":"","sources":["ActorRdfParseHtmlScript.ts"],"names":[],"mappings":";;;AAMA,qEAAiE;AAEjE,yCAA8C;AAC9C,6DAA0D;AAE1D;;;;;GAKG;AACH,MAAa,uBAAwB,SAAQ,sCAAiB;
|
|
1
|
+
{"version":3,"file":"ActorRdfParseHtmlScript.js","sourceRoot":"","sources":["ActorRdfParseHtmlScript.ts"],"names":[],"mappings":";;;AAMA,qEAAiE;AAEjE,yCAA8C;AAC9C,6DAA0D;AAE1D;;;;;GAKG;AACH,MAAa,uBAAwB,SAAQ,sCAAiB;IAC3C,0BAA0B,CAA6B;IACvD,sBAAsB,CAAyB;IAEhE,YAAmB,IAAkC;QACnD,KAAK,CAAC,IAAI,CAAC,CAAC;QACZ,IAAI,CAAC,0BAA0B,GAAG,IAAI,CAAC,0BAA0B,CAAC;QAClE,IAAI,CAAC,sBAAsB,GAAG,IAAI,CAAC,sBAAsB,CAAC;IAC5D,CAAC;IAEM,KAAK,CAAC,IAAI,CAAC,OAA4B;QAC5C,OAAO,IAAA,mBAAY,GAAE,CAAC;IACxB,CAAC;IAEM,KAAK,CAAC,GAAG,CAAC,MAA2B;QAC1C,MAAM,cAAc,GAA2B,CAAC,MAAM,IAAI,CAAC,0BAA0B;aAClF,OAAO,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;QACtE,MAAM,iBAAiB,GAAG,IAAI,uCAAkB,CAC9C,IAAI,CAAC,sBAAsB,EAC3B,MAAM,CAAC,IAAI,EACX,MAAM,CAAC,KAAK,EACZ,MAAM,CAAC,GAAG,EACV,cAAc,EACd,MAAM,CAAC,OAAO,EACd,MAAM,CAAC,OAAO,EACd,MAAM,CAAC,OAAO,CACf,CAAC;QACF,OAAO,EAAE,iBAAiB,EAAE,CAAC;IAC/B,CAAC;CACF;AA7BD,0DA6BC","sourcesContent":["import type { MediatorRdfParseHandle, MediatorRdfParseMediaTypes } from '@comunica/bus-rdf-parse';\nimport type {\n IActionRdfParseHtml,\n IActorRdfParseHtmlOutput,\n IActorRdfParseHtmlArgs,\n} from '@comunica/bus-rdf-parse-html';\nimport { ActorRdfParseHtml } from '@comunica/bus-rdf-parse-html';\nimport type { IActorTest, TestResult } from '@comunica/core';\nimport { passTestVoid } from '@comunica/core';\nimport { HtmlScriptListener } from './HtmlScriptListener';\n\n/**\n * A HTML script RDF Parse actor that listens on the 'rdf-parse' bus.\n *\n * It is able to extract and parse any RDF serialization from script tags in HTML files\n * and announce the presence of them by media type.\n */\nexport class ActorRdfParseHtmlScript extends ActorRdfParseHtml {\n private readonly mediatorRdfParseMediatypes: MediatorRdfParseMediaTypes;\n private readonly mediatorRdfParseHandle: MediatorRdfParseHandle;\n\n public constructor(args: IActorRdfParseHtmlScriptArgs) {\n super(args);\n this.mediatorRdfParseMediatypes = args.mediatorRdfParseMediatypes;\n this.mediatorRdfParseHandle = args.mediatorRdfParseHandle;\n }\n\n public async test(_action: IActionRdfParseHtml): Promise<TestResult<IActorTest>> {\n return passTestVoid();\n }\n\n public async run(action: IActionRdfParseHtml): Promise<IActorRdfParseHtmlOutput> {\n const supportedTypes: Record<string, number> = (await this.mediatorRdfParseMediatypes\n .mediate({ context: action.context, mediaTypes: true })).mediaTypes;\n const htmlParseListener = new HtmlScriptListener(\n this.mediatorRdfParseHandle,\n action.emit,\n action.error,\n action.end,\n supportedTypes,\n action.context,\n action.baseIRI,\n action.headers,\n );\n return { htmlParseListener };\n }\n}\n\nexport interface IActorRdfParseHtmlScriptArgs extends IActorRdfParseHtmlArgs {\n /**\n * The RDF Parse mediator for collecting media types\n */\n mediatorRdfParseMediatypes: MediatorRdfParseMediaTypes;\n /**\n * The RDF Parse mediator for handling parsing\n */\n mediatorRdfParseHandle: MediatorRdfParseHandle;\n}\n"]}
|
|
@@ -9,11 +9,23 @@ const relative_to_absolute_iri_1 = require("relative-to-absolute-iri");
|
|
|
9
9
|
* parses them, and outputs the resulting quads.
|
|
10
10
|
*/
|
|
11
11
|
class HtmlScriptListener {
|
|
12
|
+
mediatorRdfParseHandle;
|
|
13
|
+
cbQuad;
|
|
14
|
+
cbError;
|
|
15
|
+
cbEnd;
|
|
16
|
+
supportedTypes;
|
|
17
|
+
context;
|
|
18
|
+
baseIRI;
|
|
19
|
+
headers;
|
|
20
|
+
onlyFirstScript;
|
|
21
|
+
targetScriptId;
|
|
22
|
+
handleMediaType;
|
|
23
|
+
textChunks;
|
|
24
|
+
textChunksJsonLd = [];
|
|
25
|
+
endBarrier = 1;
|
|
26
|
+
passedScripts = 0;
|
|
27
|
+
isFinalJsonLdProcessing = false;
|
|
12
28
|
constructor(mediatorRdfParseHandle, cbQuad, cbError, cbEnd, supportedTypes, context, baseIRI, headers) {
|
|
13
|
-
this.textChunksJsonLd = [];
|
|
14
|
-
this.endBarrier = 1;
|
|
15
|
-
this.passedScripts = 0;
|
|
16
|
-
this.isFinalJsonLdProcessing = false;
|
|
17
29
|
this.mediatorRdfParseHandle = mediatorRdfParseHandle;
|
|
18
30
|
this.cbQuad = cbQuad;
|
|
19
31
|
this.cbError = cbError;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HtmlScriptListener.js","sourceRoot":"","sources":["HtmlScriptListener.ts"],"names":[],"mappings":";;;AAEA,+DAAmE;AAGnE,qDAA2C;AAC3C,uEAAiE;AAEjE;;;GAGG;AACH,MAAa,kBAAkB;IAoB7B,YACE,sBAA8C,EAC9C,MAAgC,EAChC,OAA+B,EAC/B,KAAiB,EACjB,cAAsC,EACtC,OAAuB,EACvB,OAAe,EACf,OAA4B;QAbtB,qBAAgB,GAAa,EAAE,CAAC;QAChC,eAAU,GAAG,CAAC,CAAC;QACf,kBAAa,GAAG,CAAC,CAAC;QAClB,4BAAuB,GAAG,KAAK,CAAC;QAYtC,IAAI,CAAC,sBAAsB,GAAG,sBAAsB,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,OAAO,GAAG,OAAO;aACnB,GAAG,CAAC,wCAAsB,CAAC,oBAAoB,EAAE,IAAI,CAAC,CAAC;QAC1D,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,wCAAsB,CAAC,iBAAiB,CAAC,KAAK,KAAK,CAAC;QACvF,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC9C,IAAI,CAAC,cAAc,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,GAAG,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1G,CAAC;IAEM,MAAM,CAAC,aAAa,CAAC,OAAe,EAAE,IAAY;QACvD,+CAA+C;QAC/C,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1B,KAAM,CAAC,IAAI,GAAG,IAAI,CAAC;QAC1B,OAAO,KAAK,CAAC;IACf,CAAC;IAEM,KAAK;QACV,IAAI,EAAE,IAAI,CAAC,UAAU,KAAK,CAAC,EAAE,CAAC;YAC5B,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrC,wDAAwD;gBACxD,IAAI,CAAC,eAAe,GAAG,qBAAqB,CAAC;gBAC7C,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC;gBACxC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC1B,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;gBAC3B,IAAI,CAAC,uBAAuB,GAAG,IAAI,CAAC;gBAEpC,IAAI,CAAC,UAAU,EAAE,CAAC;gBAElB,6BAA6B;gBAC7B,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,CAAC;iBAAM,CAAC;gBACN,4BAA4B;gBAC5B,IAAI,IAAI,CAAC,aAAa,KAAK,CAAC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;oBACpD,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,sCAAsC,IAAI,CAAC,cAAc,GAAG,EAAE,yBAAyB,CAAC,CAAC,CAAC;gBAC1I,CAAC;gBACD,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,CAAC;YACD,IAAI,CAAC,uBAAuB,GAAG,KAAK,CAAC;QACvC,CAAC;IACH,CAAC;IAEM,UAAU;QACf,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,4BAA4B,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,uBAAuB,EAAE,CAAC;gBAC7F,uCAAuC;gBACvC,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;gBACjC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;gBAE5B,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,iEAAiE;gBACjE,MAAM,UAAU,GAAG,IAAI,0BAAQ,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;gBACtD,UAAU,CAAC,KAAK,GAAG,GAAG,EAAE;oBACtB,aAAa;gBACf,CAAC,CAAC;gBACF,MAAM,eAAe,GAAG,IAAI,CAAC,UAAW,CAAC;gBAEzC,oCAAoC;gBACpC,MAAM,WAAW,GAAG;oBAClB,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,MAAM,EAAE;wBACN,QAAQ,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE;wBACnC,IAAI,EAAE,UAAU;wBAChB,OAAO,EAAE,IAAI,CAAC,OAAO;wBACrB,OAAO,EAAE,IAAI,CAAC,OAAO;qBACtB;oBACD,eAAe,EAAE,IAAI,CAAC,eAAe;iBACtC,CAAC;gBACF,IAAI,CAAC,sBAAsB,CAAC,OAAO,CAAC,WAAW,CAAC;qBAC7C,IAAI,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE;oBACnB,0BAA0B;oBAC1B,MAAM,CAAC,IAAI;yBACR,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB;yBAClD,aAAa,CAAC,KAAK,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC,CAAC;yBAC1D,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC;yBACvB,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;oBAEjC,2DAA2D;oBAC3D,KAAK,MAAM,SAAS,IAAI,eAAe,EAAE,CAAC;wBACxC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBAC7B,CAAC;oBACD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC,CAAC;qBACD,KAAK,CAAC,CAAC,KAAY,EAAE,EAAE;oBACtB,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;wBACxB,yDAAyD;wBACzD,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAC3C,KAAK,CAAC,OAAO,EACb,yBAAyB,CAC1B,CAAC,CAAC;oBACL,CAAC;yBAAM,CAAC;wBACN,8CAA8C;wBAC9C,IAAI,CAAC,KAAK,EAAE,CAAC;oBACf,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEL,uCAAuC;gBACvC,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;gBACjC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAEM,SAAS,CAAC,IAAY,EAAE,UAAkC;QAC/D,sCAAsC;QACtC,IAAI,IAAI,KAAK,MAAM,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC;YACvC,IAAI,CAAC,OAAO,GAAG,IAAA,kCAAU,EAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3D,CAAC;QAED,wDAAwD;QACxD,sEAAsE;QACtE,IAAI,IAAI,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,IAAI,UAAU,CAAC,EAAE,KAAK,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;YACzF,IAAI,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzC,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,aAAa,GAAG,CAAC,EAAE,CAAC;oBACnD,oDAAoD;oBACpD,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;gBACnC,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,aAAa,EAAE,CAAC;oBACrB,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC,IAAI,CAAC;oBACvC,IAAI,CAAC,UAAU,EAAE,CAAC;oBAClB,IAAI,IAAI,CAAC,4BAA4B,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;wBAC5D,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC;wBACxC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;oBACjE,CAAC;yBAAM,CAAC;wBACN,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;oBACvB,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;gBAC/B,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAC3C,oBAAoB,IAAI,CAAC,cAAc,kCAAkC,EACzE,yBAAyB,CAC1B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;QACnC,CAAC;IACH,CAAC;IAEM,MAAM,CAAC,IAAY;QACxB,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,IAAI,CAAC,UAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED;;;;;;;;OAQG;IACI,4BAA4B,CAAC,SAAiB;QACnD,OAAO,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,SAAS,KAAK,qBAAqB,CAAC;IAC9F,CAAC;CACF;AA/LD,gDA+LC","sourcesContent":["import type { MediatorRdfParseHandle } from '@comunica/bus-rdf-parse';\nimport type { IHtmlParseListener } from '@comunica/bus-rdf-parse-html';\nimport { KeysRdfParseHtmlScript } from '@comunica/context-entries';\nimport type { IActionContext } from '@comunica/types';\nimport type * as RDF from '@rdfjs/types';\nimport { Readable } from 'readable-stream';\nimport { resolve as resolveIri } from 'relative-to-absolute-iri';\n\n/**\n * An HTML parse listeners that detects <script> data blocks with known RDF media types,\n * parses them, and outputs the resulting quads.\n */\nexport class HtmlScriptListener implements IHtmlParseListener {\n private readonly mediatorRdfParseHandle: MediatorRdfParseHandle;\n\n private readonly cbQuad: (quad: RDF.Quad) => void;\n private readonly cbError: (error: Error) => void;\n private readonly cbEnd: () => void;\n private readonly supportedTypes: Record<string, number>;\n private readonly context: IActionContext;\n private baseIRI: string;\n private readonly headers?: Headers;\n private readonly onlyFirstScript: boolean;\n private readonly targetScriptId: string | null;\n\n private handleMediaType?: string;\n private textChunks?: string[];\n private textChunksJsonLd: string[] = [];\n private endBarrier = 1;\n private passedScripts = 0;\n private isFinalJsonLdProcessing = false;\n\n public constructor(\n mediatorRdfParseHandle: MediatorRdfParseHandle,\n cbQuad: (quad: RDF.Quad) => void,\n cbError: (error: Error) => void,\n cbEnd: () => void,\n supportedTypes: Record<string, number>,\n context: IActionContext,\n baseIRI: string,\n headers: Headers | undefined,\n ) {\n this.mediatorRdfParseHandle = mediatorRdfParseHandle;\n this.cbQuad = cbQuad;\n this.cbError = cbError;\n this.cbEnd = cbEnd;\n this.supportedTypes = supportedTypes;\n this.context = context\n .set(KeysRdfParseHtmlScript.processingHtmlScript, true);\n this.baseIRI = baseIRI;\n this.headers = headers;\n this.onlyFirstScript = context.get(KeysRdfParseHtmlScript.extractAllScripts) === false;\n const fragmentPos = this.baseIRI.indexOf('#');\n this.targetScriptId = fragmentPos > 0 ? this.baseIRI.slice(fragmentPos + 1, this.baseIRI.length) : null;\n }\n\n public static newErrorCoded(message: string, code: string): Error {\n // Error codes are required by the JSON-LD spec\n const error = new Error(message);\n (<any> error).code = code;\n return error;\n }\n\n public onEnd(): void {\n if (--this.endBarrier === 0) {\n if (this.textChunksJsonLd.length > 0) {\n // First process buffered JSON-LD chunks if we have any.\n this.handleMediaType = 'application/ld+json';\n this.textChunks = this.textChunksJsonLd;\n this.textChunks.push(']');\n this.textChunksJsonLd = [];\n this.isFinalJsonLdProcessing = true;\n\n this.endBarrier++;\n\n // This will call onEnd again\n this.onTagClose();\n } else {\n // Otherwise, end processing\n if (this.passedScripts === 0 && this.targetScriptId) {\n this.cbError(HtmlScriptListener.newErrorCoded(`Failed to find targeted script id \"${this.targetScriptId}\"`, 'loading document failed'));\n }\n this.cbEnd();\n }\n this.isFinalJsonLdProcessing = false;\n }\n }\n\n public onTagClose(): void {\n if (this.handleMediaType) {\n if (this.requiresCustomJsonLdHandling(this.handleMediaType) && !this.isFinalJsonLdProcessing) {\n // Reset the media type and text stream\n this.handleMediaType = undefined;\n this.textChunks = undefined;\n\n this.onEnd();\n } else {\n // Create a temporary text stream for pushing all the text chunks\n const textStream = new Readable({ objectMode: true });\n textStream._read = () => {\n // Do nothing\n };\n const textChunksLocal = this.textChunks!;\n\n // Send all collected text to parser\n const parseAction = {\n context: this.context,\n handle: {\n metadata: { baseIRI: this.baseIRI },\n data: textStream,\n headers: this.headers,\n context: this.context,\n },\n handleMediaType: this.handleMediaType,\n };\n this.mediatorRdfParseHandle.mediate(parseAction)\n .then(({ handle }) => {\n // Initialize text parsing\n handle.data\n .on('error', error => this.cbError(HtmlScriptListener\n .newErrorCoded(error.message, 'invalid script element')))\n .on('data', this.cbQuad)\n .on('end', () => this.onEnd());\n\n // Push the text stream after all events have been attached\n for (const textChunk of textChunksLocal) {\n textStream.push(textChunk);\n }\n textStream.push(null);\n })\n .catch((error: Error) => {\n if (this.targetScriptId) {\n // Error if we are targeting this script tag specifically\n this.cbError(HtmlScriptListener.newErrorCoded(\n error.message,\n 'loading document failed',\n ));\n } else {\n // Ignore script tags that we don't understand\n this.onEnd();\n }\n });\n\n // Reset the media type and text stream\n this.handleMediaType = undefined;\n this.textChunks = undefined;\n }\n }\n }\n\n public onTagOpen(name: string, attributes: Record<string, string>): void {\n // Take into account baseIRI overrides\n if (name === 'base' && attributes.href) {\n this.baseIRI = resolveIri(attributes.href, this.baseIRI);\n }\n\n // Only handle script tags with a parseable content type\n // If targetScriptId is defined, only extract from script with that id\n if (name === 'script' && (!this.targetScriptId || attributes.id === this.targetScriptId)) {\n if (this.supportedTypes[attributes.type]) {\n if (this.onlyFirstScript && this.passedScripts > 0) {\n // Ignore script tag if only one should be extracted\n this.handleMediaType = undefined;\n } else {\n this.passedScripts++;\n this.handleMediaType = attributes.type;\n this.endBarrier++;\n if (this.requiresCustomJsonLdHandling(this.handleMediaType)) {\n this.textChunks = this.textChunksJsonLd;\n this.textChunks.push(this.textChunks.length === 0 ? '[' : ',');\n } else {\n this.textChunks = [];\n }\n }\n } else if (this.targetScriptId) {\n this.cbError(HtmlScriptListener.newErrorCoded(\n `Targeted script \"${this.targetScriptId}\" does not have a supported type`,\n 'loading document failed',\n ));\n }\n } else {\n this.handleMediaType = undefined;\n }\n }\n\n public onText(data: string): void {\n if (this.handleMediaType) {\n this.textChunks!.push(data);\n }\n }\n\n /**\n * If we require custom JSON-LD handling for the given media type.\n *\n * The JSON-LD spec requires JSON-LD within script tags to be seen as a single document.\n * As such, we have to buffer all JSON-LD until the end of HTML processing,\n * and encapsulate all found contents in an array.\n *\n * @param mediaType A: IActionRdfParseHtml media type.\n */\n public requiresCustomJsonLdHandling(mediaType: string): boolean {\n return !this.onlyFirstScript && !this.targetScriptId && mediaType === 'application/ld+json';\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"HtmlScriptListener.js","sourceRoot":"","sources":["HtmlScriptListener.ts"],"names":[],"mappings":";;;AAEA,+DAAmE;AAGnE,qDAA2C;AAC3C,uEAAiE;AAEjE;;;GAGG;AACH,MAAa,kBAAkB;IACZ,sBAAsB,CAAyB;IAE/C,MAAM,CAA2B;IACjC,OAAO,CAAyB;IAChC,KAAK,CAAa;IAClB,cAAc,CAAyB;IACvC,OAAO,CAAiB;IACjC,OAAO,CAAS;IACP,OAAO,CAAW;IAClB,eAAe,CAAU;IACzB,cAAc,CAAgB;IAEvC,eAAe,CAAU;IACzB,UAAU,CAAY;IACtB,gBAAgB,GAAa,EAAE,CAAC;IAChC,UAAU,GAAG,CAAC,CAAC;IACf,aAAa,GAAG,CAAC,CAAC;IAClB,uBAAuB,GAAG,KAAK,CAAC;IAExC,YACE,sBAA8C,EAC9C,MAAgC,EAChC,OAA+B,EAC/B,KAAiB,EACjB,cAAsC,EACtC,OAAuB,EACvB,OAAe,EACf,OAA4B;QAE5B,IAAI,CAAC,sBAAsB,GAAG,sBAAsB,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,OAAO,GAAG,OAAO;aACnB,GAAG,CAAC,wCAAsB,CAAC,oBAAoB,EAAE,IAAI,CAAC,CAAC;QAC1D,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,wCAAsB,CAAC,iBAAiB,CAAC,KAAK,KAAK,CAAC;QACvF,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC9C,IAAI,CAAC,cAAc,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,GAAG,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1G,CAAC;IAEM,MAAM,CAAC,aAAa,CAAC,OAAe,EAAE,IAAY;QACvD,+CAA+C;QAC/C,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1B,KAAM,CAAC,IAAI,GAAG,IAAI,CAAC;QAC1B,OAAO,KAAK,CAAC;IACf,CAAC;IAEM,KAAK;QACV,IAAI,EAAE,IAAI,CAAC,UAAU,KAAK,CAAC,EAAE,CAAC;YAC5B,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrC,wDAAwD;gBACxD,IAAI,CAAC,eAAe,GAAG,qBAAqB,CAAC;gBAC7C,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC;gBACxC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC1B,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;gBAC3B,IAAI,CAAC,uBAAuB,GAAG,IAAI,CAAC;gBAEpC,IAAI,CAAC,UAAU,EAAE,CAAC;gBAElB,6BAA6B;gBAC7B,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,CAAC;iBAAM,CAAC;gBACN,4BAA4B;gBAC5B,IAAI,IAAI,CAAC,aAAa,KAAK,CAAC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;oBACpD,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,sCAAsC,IAAI,CAAC,cAAc,GAAG,EAAE,yBAAyB,CAAC,CAAC,CAAC;gBAC1I,CAAC;gBACD,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,CAAC;YACD,IAAI,CAAC,uBAAuB,GAAG,KAAK,CAAC;QACvC,CAAC;IACH,CAAC;IAEM,UAAU;QACf,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,4BAA4B,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,uBAAuB,EAAE,CAAC;gBAC7F,uCAAuC;gBACvC,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;gBACjC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;gBAE5B,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,iEAAiE;gBACjE,MAAM,UAAU,GAAG,IAAI,0BAAQ,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;gBACtD,UAAU,CAAC,KAAK,GAAG,GAAG,EAAE;oBACtB,aAAa;gBACf,CAAC,CAAC;gBACF,MAAM,eAAe,GAAG,IAAI,CAAC,UAAW,CAAC;gBAEzC,oCAAoC;gBACpC,MAAM,WAAW,GAAG;oBAClB,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,MAAM,EAAE;wBACN,QAAQ,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE;wBACnC,IAAI,EAAE,UAAU;wBAChB,OAAO,EAAE,IAAI,CAAC,OAAO;wBACrB,OAAO,EAAE,IAAI,CAAC,OAAO;qBACtB;oBACD,eAAe,EAAE,IAAI,CAAC,eAAe;iBACtC,CAAC;gBACF,IAAI,CAAC,sBAAsB,CAAC,OAAO,CAAC,WAAW,CAAC;qBAC7C,IAAI,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE;oBACnB,0BAA0B;oBAC1B,MAAM,CAAC,IAAI;yBACR,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB;yBAClD,aAAa,CAAC,KAAK,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC,CAAC;yBAC1D,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC;yBACvB,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;oBAEjC,2DAA2D;oBAC3D,KAAK,MAAM,SAAS,IAAI,eAAe,EAAE,CAAC;wBACxC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBAC7B,CAAC;oBACD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC,CAAC;qBACD,KAAK,CAAC,CAAC,KAAY,EAAE,EAAE;oBACtB,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;wBACxB,yDAAyD;wBACzD,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAC3C,KAAK,CAAC,OAAO,EACb,yBAAyB,CAC1B,CAAC,CAAC;oBACL,CAAC;yBAAM,CAAC;wBACN,8CAA8C;wBAC9C,IAAI,CAAC,KAAK,EAAE,CAAC;oBACf,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEL,uCAAuC;gBACvC,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;gBACjC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAEM,SAAS,CAAC,IAAY,EAAE,UAAkC;QAC/D,sCAAsC;QACtC,IAAI,IAAI,KAAK,MAAM,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC;YACvC,IAAI,CAAC,OAAO,GAAG,IAAA,kCAAU,EAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3D,CAAC;QAED,wDAAwD;QACxD,sEAAsE;QACtE,IAAI,IAAI,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,IAAI,UAAU,CAAC,EAAE,KAAK,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;YACzF,IAAI,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzC,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,aAAa,GAAG,CAAC,EAAE,CAAC;oBACnD,oDAAoD;oBACpD,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;gBACnC,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,aAAa,EAAE,CAAC;oBACrB,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC,IAAI,CAAC;oBACvC,IAAI,CAAC,UAAU,EAAE,CAAC;oBAClB,IAAI,IAAI,CAAC,4BAA4B,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;wBAC5D,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC;wBACxC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;oBACjE,CAAC;yBAAM,CAAC;wBACN,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;oBACvB,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;gBAC/B,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAC3C,oBAAoB,IAAI,CAAC,cAAc,kCAAkC,EACzE,yBAAyB,CAC1B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;QACnC,CAAC;IACH,CAAC;IAEM,MAAM,CAAC,IAAY;QACxB,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,IAAI,CAAC,UAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED;;;;;;;;OAQG;IACI,4BAA4B,CAAC,SAAiB;QACnD,OAAO,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,SAAS,KAAK,qBAAqB,CAAC;IAC9F,CAAC;CACF;AA/LD,gDA+LC","sourcesContent":["import type { MediatorRdfParseHandle } from '@comunica/bus-rdf-parse';\nimport type { IHtmlParseListener } from '@comunica/bus-rdf-parse-html';\nimport { KeysRdfParseHtmlScript } from '@comunica/context-entries';\nimport type { IActionContext } from '@comunica/types';\nimport type * as RDF from '@rdfjs/types';\nimport { Readable } from 'readable-stream';\nimport { resolve as resolveIri } from 'relative-to-absolute-iri';\n\n/**\n * An HTML parse listeners that detects <script> data blocks with known RDF media types,\n * parses them, and outputs the resulting quads.\n */\nexport class HtmlScriptListener implements IHtmlParseListener {\n private readonly mediatorRdfParseHandle: MediatorRdfParseHandle;\n\n private readonly cbQuad: (quad: RDF.Quad) => void;\n private readonly cbError: (error: Error) => void;\n private readonly cbEnd: () => void;\n private readonly supportedTypes: Record<string, number>;\n private readonly context: IActionContext;\n private baseIRI: string;\n private readonly headers?: Headers;\n private readonly onlyFirstScript: boolean;\n private readonly targetScriptId: string | null;\n\n private handleMediaType?: string;\n private textChunks?: string[];\n private textChunksJsonLd: string[] = [];\n private endBarrier = 1;\n private passedScripts = 0;\n private isFinalJsonLdProcessing = false;\n\n public constructor(\n mediatorRdfParseHandle: MediatorRdfParseHandle,\n cbQuad: (quad: RDF.Quad) => void,\n cbError: (error: Error) => void,\n cbEnd: () => void,\n supportedTypes: Record<string, number>,\n context: IActionContext,\n baseIRI: string,\n headers: Headers | undefined,\n ) {\n this.mediatorRdfParseHandle = mediatorRdfParseHandle;\n this.cbQuad = cbQuad;\n this.cbError = cbError;\n this.cbEnd = cbEnd;\n this.supportedTypes = supportedTypes;\n this.context = context\n .set(KeysRdfParseHtmlScript.processingHtmlScript, true);\n this.baseIRI = baseIRI;\n this.headers = headers;\n this.onlyFirstScript = context.get(KeysRdfParseHtmlScript.extractAllScripts) === false;\n const fragmentPos = this.baseIRI.indexOf('#');\n this.targetScriptId = fragmentPos > 0 ? this.baseIRI.slice(fragmentPos + 1, this.baseIRI.length) : null;\n }\n\n public static newErrorCoded(message: string, code: string): Error {\n // Error codes are required by the JSON-LD spec\n const error = new Error(message);\n (<any> error).code = code;\n return error;\n }\n\n public onEnd(): void {\n if (--this.endBarrier === 0) {\n if (this.textChunksJsonLd.length > 0) {\n // First process buffered JSON-LD chunks if we have any.\n this.handleMediaType = 'application/ld+json';\n this.textChunks = this.textChunksJsonLd;\n this.textChunks.push(']');\n this.textChunksJsonLd = [];\n this.isFinalJsonLdProcessing = true;\n\n this.endBarrier++;\n\n // This will call onEnd again\n this.onTagClose();\n } else {\n // Otherwise, end processing\n if (this.passedScripts === 0 && this.targetScriptId) {\n this.cbError(HtmlScriptListener.newErrorCoded(`Failed to find targeted script id \"${this.targetScriptId}\"`, 'loading document failed'));\n }\n this.cbEnd();\n }\n this.isFinalJsonLdProcessing = false;\n }\n }\n\n public onTagClose(): void {\n if (this.handleMediaType) {\n if (this.requiresCustomJsonLdHandling(this.handleMediaType) && !this.isFinalJsonLdProcessing) {\n // Reset the media type and text stream\n this.handleMediaType = undefined;\n this.textChunks = undefined;\n\n this.onEnd();\n } else {\n // Create a temporary text stream for pushing all the text chunks\n const textStream = new Readable({ objectMode: true });\n textStream._read = () => {\n // Do nothing\n };\n const textChunksLocal = this.textChunks!;\n\n // Send all collected text to parser\n const parseAction = {\n context: this.context,\n handle: {\n metadata: { baseIRI: this.baseIRI },\n data: textStream,\n headers: this.headers,\n context: this.context,\n },\n handleMediaType: this.handleMediaType,\n };\n this.mediatorRdfParseHandle.mediate(parseAction)\n .then(({ handle }) => {\n // Initialize text parsing\n handle.data\n .on('error', error => this.cbError(HtmlScriptListener\n .newErrorCoded(error.message, 'invalid script element')))\n .on('data', this.cbQuad)\n .on('end', () => this.onEnd());\n\n // Push the text stream after all events have been attached\n for (const textChunk of textChunksLocal) {\n textStream.push(textChunk);\n }\n textStream.push(null);\n })\n .catch((error: Error) => {\n if (this.targetScriptId) {\n // Error if we are targeting this script tag specifically\n this.cbError(HtmlScriptListener.newErrorCoded(\n error.message,\n 'loading document failed',\n ));\n } else {\n // Ignore script tags that we don't understand\n this.onEnd();\n }\n });\n\n // Reset the media type and text stream\n this.handleMediaType = undefined;\n this.textChunks = undefined;\n }\n }\n }\n\n public onTagOpen(name: string, attributes: Record<string, string>): void {\n // Take into account baseIRI overrides\n if (name === 'base' && attributes.href) {\n this.baseIRI = resolveIri(attributes.href, this.baseIRI);\n }\n\n // Only handle script tags with a parseable content type\n // If targetScriptId is defined, only extract from script with that id\n if (name === 'script' && (!this.targetScriptId || attributes.id === this.targetScriptId)) {\n if (this.supportedTypes[attributes.type]) {\n if (this.onlyFirstScript && this.passedScripts > 0) {\n // Ignore script tag if only one should be extracted\n this.handleMediaType = undefined;\n } else {\n this.passedScripts++;\n this.handleMediaType = attributes.type;\n this.endBarrier++;\n if (this.requiresCustomJsonLdHandling(this.handleMediaType)) {\n this.textChunks = this.textChunksJsonLd;\n this.textChunks.push(this.textChunks.length === 0 ? '[' : ',');\n } else {\n this.textChunks = [];\n }\n }\n } else if (this.targetScriptId) {\n this.cbError(HtmlScriptListener.newErrorCoded(\n `Targeted script \"${this.targetScriptId}\" does not have a supported type`,\n 'loading document failed',\n ));\n }\n } else {\n this.handleMediaType = undefined;\n }\n }\n\n public onText(data: string): void {\n if (this.handleMediaType) {\n this.textChunks!.push(data);\n }\n }\n\n /**\n * If we require custom JSON-LD handling for the given media type.\n *\n * The JSON-LD spec requires JSON-LD within script tags to be seen as a single document.\n * As such, we have to buffer all JSON-LD until the end of HTML processing,\n * and encapsulate all found contents in an array.\n *\n * @param mediaType A: IActionRdfParseHtml media type.\n */\n public requiresCustomJsonLdHandling(mediaType: string): boolean {\n return !this.onlyFirstScript && !this.targetScriptId && mediaType === 'application/ld+json';\n }\n}\n"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comunica/actor-rdf-parse-html-script",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "5.0.0",
|
|
4
4
|
"description": "A html-script rdf-parse actor",
|
|
5
5
|
"lsd:module": true,
|
|
6
6
|
"license": "MIT",
|
|
@@ -41,14 +41,14 @@
|
|
|
41
41
|
"build:components": "componentsjs-generator"
|
|
42
42
|
},
|
|
43
43
|
"dependencies": {
|
|
44
|
-
"@comunica/bus-rdf-parse": "
|
|
45
|
-
"@comunica/bus-rdf-parse-html": "
|
|
46
|
-
"@comunica/context-entries": "
|
|
47
|
-
"@comunica/core": "
|
|
48
|
-
"@comunica/types": "
|
|
44
|
+
"@comunica/bus-rdf-parse": "^5.0.0",
|
|
45
|
+
"@comunica/bus-rdf-parse-html": "^5.0.0",
|
|
46
|
+
"@comunica/context-entries": "^5.0.0",
|
|
47
|
+
"@comunica/core": "^5.0.0",
|
|
48
|
+
"@comunica/types": "^5.0.0",
|
|
49
49
|
"@rdfjs/types": "*",
|
|
50
|
-
"readable-stream": "^4.
|
|
50
|
+
"readable-stream": "^4.7.0",
|
|
51
51
|
"relative-to-absolute-iri": "^1.0.7"
|
|
52
52
|
},
|
|
53
|
-
"gitHead": "
|
|
53
|
+
"gitHead": "0b1756fdb9bef014133432489627c1bd71779bd0"
|
|
54
54
|
}
|