@comunica/actor-rdf-parse-html 2.8.2 → 3.0.1-alpha.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -24,7 +24,7 @@ After installing, this package can be added to your engine's configuration as fo
|
|
|
24
24
|
{
|
|
25
25
|
"@context": [
|
|
26
26
|
...
|
|
27
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^
|
|
27
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^3.0.0/components/context.jsonld"
|
|
28
28
|
],
|
|
29
29
|
"actors": [
|
|
30
30
|
...
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"@context": [
|
|
3
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^
|
|
4
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/core/^
|
|
5
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^
|
|
6
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse/^
|
|
3
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^3.0.0/components/context.jsonld",
|
|
4
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/core/^3.0.0/components/context.jsonld",
|
|
5
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^3.0.0/components/context.jsonld",
|
|
6
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse/^3.0.0/components/context.jsonld"
|
|
7
7
|
],
|
|
8
8
|
"@id": "npmd:@comunica/actor-rdf-parse-html",
|
|
9
9
|
"components": [
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
]
|
|
46
46
|
},
|
|
47
47
|
"default": {
|
|
48
|
-
"@id": "npmd:@comunica/bus-rdf-parse-html/^
|
|
48
|
+
"@id": "npmd:@comunica/bus-rdf-parse-html/^3.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus"
|
|
49
49
|
},
|
|
50
50
|
"comment": "The RDF Parse HTML bus for fetching HTML listeners"
|
|
51
51
|
},
|
|
@@ -106,29 +106,29 @@
|
|
|
106
106
|
"genericTypeInstances": [
|
|
107
107
|
{
|
|
108
108
|
"@type": "ParameterRangeGenericTypeReference",
|
|
109
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
109
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
110
110
|
},
|
|
111
111
|
{
|
|
112
112
|
"@type": "ParameterRangeGenericTypeReference",
|
|
113
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
113
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
114
114
|
},
|
|
115
115
|
{
|
|
116
116
|
"@type": "ParameterRangeGenericTypeReference",
|
|
117
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
117
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
118
118
|
}
|
|
119
119
|
]
|
|
120
120
|
},
|
|
121
121
|
{
|
|
122
122
|
"@type": "ParameterRangeGenericTypeReference",
|
|
123
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
123
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
124
124
|
},
|
|
125
125
|
{
|
|
126
126
|
"@type": "ParameterRangeGenericTypeReference",
|
|
127
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
127
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
128
128
|
},
|
|
129
129
|
{
|
|
130
130
|
"@type": "ParameterRangeGenericTypeReference",
|
|
131
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
131
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
132
132
|
}
|
|
133
133
|
]
|
|
134
134
|
},
|
|
@@ -151,15 +151,15 @@
|
|
|
151
151
|
"genericTypeInstances": [
|
|
152
152
|
{
|
|
153
153
|
"@type": "ParameterRangeGenericTypeReference",
|
|
154
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
154
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
155
155
|
},
|
|
156
156
|
{
|
|
157
157
|
"@type": "ParameterRangeGenericTypeReference",
|
|
158
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
158
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
159
159
|
},
|
|
160
160
|
{
|
|
161
161
|
"@type": "ParameterRangeGenericTypeReference",
|
|
162
|
-
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^
|
|
162
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
163
163
|
}
|
|
164
164
|
]
|
|
165
165
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"@context": [
|
|
3
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^
|
|
3
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^3.0.0/components/context.jsonld"
|
|
4
4
|
],
|
|
5
5
|
"@id": "npmd:@comunica/actor-rdf-parse-html",
|
|
6
6
|
"@type": "Module",
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^5.0.0/components/context.jsonld",
|
|
4
4
|
{
|
|
5
5
|
"npmd": "https://linkedsoftwaredependencies.org/bundles/npm/",
|
|
6
|
-
"carph": "npmd:@comunica/actor-rdf-parse-html/^
|
|
6
|
+
"carph": "npmd:@comunica/actor-rdf-parse-html/^3.0.0/",
|
|
7
7
|
"ActorRdfParseHtml": {
|
|
8
8
|
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml",
|
|
9
9
|
"@prefix": true,
|
|
@@ -26,7 +26,7 @@ export declare class ActorRdfParseHtml extends ActorRdfParseFixedMediaTypes {
|
|
|
26
26
|
export interface IActorRdfParseHtmlArgs extends IActorRdfParseFixedMediaTypesArgs {
|
|
27
27
|
/**
|
|
28
28
|
* The RDF Parse HTML bus for fetching HTML listeners
|
|
29
|
-
* @default {<npmd:@comunica/bus-rdf-parse-html/^
|
|
29
|
+
* @default {<npmd:@comunica/bus-rdf-parse-html/^3.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus>}
|
|
30
30
|
*/
|
|
31
31
|
busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;
|
|
32
32
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ActorRdfParseHtml.js","sourceRoot":"","sources":["ActorRdfParseHtml.ts"],"names":[],"mappings":";;;AAGA,2DAEiC;AASjC,6CAAqC;AACrC,qDAA2C;AAE3C;;;GAGG;AACH,MAAa,iBAAkB,SAAQ,4CAA4B;IAIjE;;;;;;;;;;OAUG;IACH,YAAmB,IAA4B;QAC7C,KAAK,CAAC,IAAI,CAAC,CAAC;IACd,CAAC;IAEM,KAAK,CAAC,SAAS,CAAC,MAAuB,EAAE,SAAiB,EAAE,OAAuB;QAExF,MAAM,IAAI,GAAG,IAAI,0BAAQ,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,KAAK,GAAG,KAAK,IAAG,EAAE;YACrB,uBAAuB;YACvB,IAAI,CAAC,KAAK,GAAG,GAAG,EAAE;gBAChB,aAAa;YACf,CAAC,CAAC;YAEF,0BAA0B;YAC1B,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,SAAS,IAAI,CAAC,IAAc;gBAC1B,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC1B,CAAC;YACD,SAAS,KAAK,CAAC,QAAiB;gBAC9B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAC/B,CAAC;YACD,SAAS,GAAG;gBACV,IAAI,EAAE,UAAU,KAAK,CAAC,EAAE;oBACtB,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBACjB;YACH,CAAC;YACD,MAAM,UAAU,GAAwB;gBACtC,OAAO,EAAE,MAAM,CAAC,QAAQ,EAAE,OAAO,IAAI,EAAE;gBACvC,OAAO;gBACP,IAAI;gBACJ,GAAG;gBACH,KAAK;gBACL,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YAEF,gCAAgC;YAChC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;iBAClD,IAAI,CAAC,KAAK,EAAC,OAAO,EAAC,EAAE;gBACpB,UAAU,IAAI,OAAO,CAAC,MAAM,CAAC;gBAE7B,MAAM,kBAAkB,GAAyB,EAAE,CAAC;gBACpD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;oBAC5B,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;oBACjE,kBAAkB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;iBAC5C;gBAED,gBAAgB;gBAChB,MAAM,MAAM,GAAG,IAAI,oBAAM,CAAC;oBACxB,UAAU;wBACR,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,UAAU,EAAE,CAAC;6BAChC;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;oBACH,CAAC;oBACD,KAAK;wBACH,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,KAAK,EAAE,CAAC;6BAC3B;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;wBACD,GAAG,EAAE,CAAC;oBACR,CAAC;oBACD,SAAS,CAAC,IAAY,EAAE,UAAkC;wBACxD,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;6BAC/C;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;oBACH,CAAC;oBACD,MAAM,CAAC,IAAY;wBACjB,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;6BAChC;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;oBACH,CAAC;iBACF,EAAE;oBACD,cAAc,EAAE,IAAI;oBACpB,oBAAoB,EAAE,IAAI;oBAC1B,OAAO,EAAE,KAAK;iBACf,CAAC,CAAC;gBAEH,wBAAwB;gBACxB,MAAM,CAAC,IAAI;qBACR,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;qBAClB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;qBACnD,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC,CAAC;QAEF,OAAO,EAAE,IAAI,EAAE,CAAC;IAClB,CAAC;CACF;AApHD,8CAoHC","sourcesContent":["import type { IActionRdfParse,\n IActorRdfParseFixedMediaTypesArgs,\n IActorRdfParseOutput } from '@comunica/bus-rdf-parse';\nimport {\n ActorRdfParseFixedMediaTypes,\n} from '@comunica/bus-rdf-parse';\nimport type {\n IActionRdfParseHtml,\n IActorRdfParseHtmlOutput,\n IHtmlParseListener,\n} from '@comunica/bus-rdf-parse-html';\nimport type { Actor, Bus, IActorTest } from '@comunica/core';\nimport type { IActionContext } from '@comunica/types';\nimport type * as RDF from '@rdfjs/types';\nimport { Parser } from 'htmlparser2';\nimport { Readable } from 'readable-stream';\n\n/**\n * A comunica HTML RDF Parse Actor.\n * It creates an HTML parser, and delegates its events via the bus-rdf-parse-html bus to other HTML parsing actors.\n */\nexport class ActorRdfParseHtml extends ActorRdfParseFixedMediaTypes {\n private readonly busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest,\n IActorRdfParseHtmlOutput>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;\n\n /**\n * @param args -\n * \\ @defaultNested {{\n * \"text/html\": 1.0,\n * \"application/xhtml+xml\": 0.9\n * }} mediaTypePriorities\n * \\ @defaultNested {{\n * \"text/html\": \"http://www.w3.org/ns/formats/HTML\",\n * \"application/xhtml+xml\": \"http://www.w3.org/ns/formats/HTML\"\n * }} mediaTypeFormats\n */\n public constructor(args: IActorRdfParseHtmlArgs) {\n super(args);\n }\n\n public async runHandle(action: IActionRdfParse, mediaType: string, context: IActionContext):\n Promise<IActorRdfParseOutput> {\n const data = new Readable({ objectMode: true });\n data._read = async() => {\n // Only initialize once\n data._read = () => {\n // Do nothing\n };\n\n // Create callbacks action\n let endBarrier = 1;\n function emit(quad: RDF.Quad): void {\n data.emit('data', quad);\n }\n function error(subError: unknown): void {\n data.emit('error', subError);\n }\n function end(): void {\n if (--endBarrier === 0) {\n data.push(null);\n }\n }\n const htmlAction: IActionRdfParseHtml = {\n baseIRI: action.metadata?.baseIRI ?? '',\n context,\n emit,\n end,\n error,\n headers: action.headers,\n };\n\n // Register html parse listeners\n Promise.all(this.busRdfParseHtml.publish(htmlAction))\n .then(async outputs => {\n endBarrier += outputs.length;\n\n const htmlParseListeners: IHtmlParseListener[] = [];\n for (const output of outputs) {\n const { htmlParseListener } = await output.actor.run(htmlAction);\n htmlParseListeners.push(htmlParseListener);\n }\n\n // Create parser\n const parser = new Parser({\n onclosetag() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagClose();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n onend() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onEnd();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n end();\n },\n onopentag(name: string, attributes: Record<string, string>) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagOpen(name, attributes);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n ontext(text: string) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onText(text);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n }, {\n decodeEntities: true,\n recognizeSelfClosing: true,\n xmlMode: false,\n });\n\n // Push stream to parser\n action.data\n .on('error', error)\n .on('data', chunk => parser.write(chunk.toString()))\n .on('end', () => parser.end());\n }).catch(error);\n };\n\n return { data };\n }\n}\n\nexport interface IActorRdfParseHtmlArgs extends IActorRdfParseFixedMediaTypesArgs {\n /* eslint-disable max-len */\n /**\n * The RDF Parse HTML bus for fetching HTML listeners\n * @default {<npmd:@comunica/bus-rdf-parse-html/^
|
|
1
|
+
{"version":3,"file":"ActorRdfParseHtml.js","sourceRoot":"","sources":["ActorRdfParseHtml.ts"],"names":[],"mappings":";;;AAGA,2DAEiC;AASjC,6CAAqC;AACrC,qDAA2C;AAE3C;;;GAGG;AACH,MAAa,iBAAkB,SAAQ,4CAA4B;IAIjE;;;;;;;;;;OAUG;IACH,YAAmB,IAA4B;QAC7C,KAAK,CAAC,IAAI,CAAC,CAAC;IACd,CAAC;IAEM,KAAK,CAAC,SAAS,CAAC,MAAuB,EAAE,SAAiB,EAAE,OAAuB;QAExF,MAAM,IAAI,GAAG,IAAI,0BAAQ,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,KAAK,GAAG,KAAK,IAAG,EAAE;YACrB,uBAAuB;YACvB,IAAI,CAAC,KAAK,GAAG,GAAG,EAAE;gBAChB,aAAa;YACf,CAAC,CAAC;YAEF,0BAA0B;YAC1B,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,SAAS,IAAI,CAAC,IAAc;gBAC1B,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC1B,CAAC;YACD,SAAS,KAAK,CAAC,QAAiB;gBAC9B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAC/B,CAAC;YACD,SAAS,GAAG;gBACV,IAAI,EAAE,UAAU,KAAK,CAAC,EAAE;oBACtB,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBACjB;YACH,CAAC;YACD,MAAM,UAAU,GAAwB;gBACtC,OAAO,EAAE,MAAM,CAAC,QAAQ,EAAE,OAAO,IAAI,EAAE;gBACvC,OAAO;gBACP,IAAI;gBACJ,GAAG;gBACH,KAAK;gBACL,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YAEF,gCAAgC;YAChC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;iBAClD,IAAI,CAAC,KAAK,EAAC,OAAO,EAAC,EAAE;gBACpB,UAAU,IAAI,OAAO,CAAC,MAAM,CAAC;gBAE7B,MAAM,kBAAkB,GAAyB,EAAE,CAAC;gBACpD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;oBAC5B,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;oBACjE,kBAAkB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;iBAC5C;gBAED,gBAAgB;gBAChB,MAAM,MAAM,GAAG,IAAI,oBAAM,CAAC;oBACxB,UAAU;wBACR,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,UAAU,EAAE,CAAC;6BAChC;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;oBACH,CAAC;oBACD,KAAK;wBACH,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,KAAK,EAAE,CAAC;6BAC3B;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;wBACD,GAAG,EAAE,CAAC;oBACR,CAAC;oBACD,SAAS,CAAC,IAAY,EAAE,UAAkC;wBACxD,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;6BAC/C;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;oBACH,CAAC;oBACD,MAAM,CAAC,IAAY;wBACjB,IAAI;4BACF,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE;gCAClD,iBAAiB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;6BAChC;yBACF;wBAAC,OAAO,MAAe,EAAE;4BACxB,KAAK,CAAC,MAAM,CAAC,CAAC;yBACf;oBACH,CAAC;iBACF,EAAE;oBACD,cAAc,EAAE,IAAI;oBACpB,oBAAoB,EAAE,IAAI;oBAC1B,OAAO,EAAE,KAAK;iBACf,CAAC,CAAC;gBAEH,wBAAwB;gBACxB,MAAM,CAAC,IAAI;qBACR,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;qBAClB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;qBACnD,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC,CAAC;QAEF,OAAO,EAAE,IAAI,EAAE,CAAC;IAClB,CAAC;CACF;AApHD,8CAoHC","sourcesContent":["import type { IActionRdfParse,\n IActorRdfParseFixedMediaTypesArgs,\n IActorRdfParseOutput } from '@comunica/bus-rdf-parse';\nimport {\n ActorRdfParseFixedMediaTypes,\n} from '@comunica/bus-rdf-parse';\nimport type {\n IActionRdfParseHtml,\n IActorRdfParseHtmlOutput,\n IHtmlParseListener,\n} from '@comunica/bus-rdf-parse-html';\nimport type { Actor, Bus, IActorTest } from '@comunica/core';\nimport type { IActionContext } from '@comunica/types';\nimport type * as RDF from '@rdfjs/types';\nimport { Parser } from 'htmlparser2';\nimport { Readable } from 'readable-stream';\n\n/**\n * A comunica HTML RDF Parse Actor.\n * It creates an HTML parser, and delegates its events via the bus-rdf-parse-html bus to other HTML parsing actors.\n */\nexport class ActorRdfParseHtml extends ActorRdfParseFixedMediaTypes {\n private readonly busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest,\n IActorRdfParseHtmlOutput>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;\n\n /**\n * @param args -\n * \\ @defaultNested {{\n * \"text/html\": 1.0,\n * \"application/xhtml+xml\": 0.9\n * }} mediaTypePriorities\n * \\ @defaultNested {{\n * \"text/html\": \"http://www.w3.org/ns/formats/HTML\",\n * \"application/xhtml+xml\": \"http://www.w3.org/ns/formats/HTML\"\n * }} mediaTypeFormats\n */\n public constructor(args: IActorRdfParseHtmlArgs) {\n super(args);\n }\n\n public async runHandle(action: IActionRdfParse, mediaType: string, context: IActionContext):\n Promise<IActorRdfParseOutput> {\n const data = new Readable({ objectMode: true });\n data._read = async() => {\n // Only initialize once\n data._read = () => {\n // Do nothing\n };\n\n // Create callbacks action\n let endBarrier = 1;\n function emit(quad: RDF.Quad): void {\n data.emit('data', quad);\n }\n function error(subError: unknown): void {\n data.emit('error', subError);\n }\n function end(): void {\n if (--endBarrier === 0) {\n data.push(null);\n }\n }\n const htmlAction: IActionRdfParseHtml = {\n baseIRI: action.metadata?.baseIRI ?? '',\n context,\n emit,\n end,\n error,\n headers: action.headers,\n };\n\n // Register html parse listeners\n Promise.all(this.busRdfParseHtml.publish(htmlAction))\n .then(async outputs => {\n endBarrier += outputs.length;\n\n const htmlParseListeners: IHtmlParseListener[] = [];\n for (const output of outputs) {\n const { htmlParseListener } = await output.actor.run(htmlAction);\n htmlParseListeners.push(htmlParseListener);\n }\n\n // Create parser\n const parser = new Parser({\n onclosetag() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagClose();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n onend() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onEnd();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n end();\n },\n onopentag(name: string, attributes: Record<string, string>) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagOpen(name, attributes);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n ontext(text: string) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onText(text);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n }, {\n decodeEntities: true,\n recognizeSelfClosing: true,\n xmlMode: false,\n });\n\n // Push stream to parser\n action.data\n .on('error', error)\n .on('data', chunk => parser.write(chunk.toString()))\n .on('end', () => parser.end());\n }).catch(error);\n };\n\n return { data };\n }\n}\n\nexport interface IActorRdfParseHtmlArgs extends IActorRdfParseFixedMediaTypesArgs {\n /* eslint-disable max-len */\n /**\n * The RDF Parse HTML bus for fetching HTML listeners\n * @default {<npmd:@comunica/bus-rdf-parse-html/^3.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus>}\n */\n busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>,\n IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;\n /* eslint-enable max-len */\n}\n"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comunica/actor-rdf-parse-html",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.1-alpha.43.0",
|
|
4
4
|
"description": "A html rdf-parse actor",
|
|
5
5
|
"lsd:module": true,
|
|
6
6
|
"main": "lib/index.js",
|
|
@@ -32,18 +32,18 @@
|
|
|
32
32
|
"lib/**/*.js.map"
|
|
33
33
|
],
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"@comunica/bus-rdf-parse": "
|
|
36
|
-
"@comunica/bus-rdf-parse-html": "
|
|
37
|
-
"@comunica/core": "
|
|
38
|
-
"@comunica/types": "
|
|
35
|
+
"@comunica/bus-rdf-parse": "3.0.1-alpha.43.0",
|
|
36
|
+
"@comunica/bus-rdf-parse-html": "3.0.1-alpha.43.0",
|
|
37
|
+
"@comunica/core": "3.0.1-alpha.43.0",
|
|
38
|
+
"@comunica/types": "3.0.1-alpha.43.0",
|
|
39
39
|
"@rdfjs/types": "*",
|
|
40
40
|
"htmlparser2": "^9.0.0",
|
|
41
|
-
"readable-stream": "^4.2
|
|
41
|
+
"readable-stream": "^4.4.2"
|
|
42
42
|
},
|
|
43
43
|
"scripts": {
|
|
44
44
|
"build": "npm run build:ts && npm run build:components",
|
|
45
45
|
"build:ts": "node \"../../node_modules/typescript/bin/tsc\"",
|
|
46
46
|
"build:components": "componentsjs-generator"
|
|
47
47
|
},
|
|
48
|
-
"gitHead": "
|
|
48
|
+
"gitHead": "d11e44cf07d4699f9d2c51d5851b5ed443de1997"
|
|
49
49
|
}
|