@comunica/actor-rdf-parse-html 3.3.0 → 4.0.1-alpha.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,7 +24,7 @@ After installing, this package can be added to your engine's configuration as fo
24
24
  {
25
25
  "@context": [
26
26
  ...
27
- "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^3.0.0/components/context.jsonld"
27
+ "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^4.0.0/components/context.jsonld"
28
28
  ],
29
29
  "actors": [
30
30
  ...
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "@context": [
3
- "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^3.0.0/components/context.jsonld",
4
- "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/core/^3.0.0/components/context.jsonld",
5
- "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^3.0.0/components/context.jsonld",
6
- "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse/^3.0.0/components/context.jsonld"
3
+ "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^4.0.0/components/context.jsonld",
4
+ "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/core/^4.0.0/components/context.jsonld",
5
+ "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^4.0.0/components/context.jsonld",
6
+ "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse/^4.0.0/components/context.jsonld"
7
7
  ],
8
8
  "@id": "npmd:@comunica/actor-rdf-parse-html",
9
9
  "components": [
@@ -32,6 +32,9 @@
32
32
  },
33
33
  {
34
34
  "@type": "ParameterRangeWildcard"
35
+ },
36
+ {
37
+ "@type": "ParameterRangeUndefined"
35
38
  }
36
39
  ]
37
40
  },
@@ -45,7 +48,7 @@
45
48
  ]
46
49
  },
47
50
  "default": {
48
- "@id": "npmd:@comunica/bus-rdf-parse-html/^3.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus"
51
+ "@id": "npmd:@comunica/bus-rdf-parse-html/^4.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus"
49
52
  },
50
53
  "comment": "The RDF Parse HTML bus for fetching HTML listeners"
51
54
  },
@@ -106,29 +109,37 @@
106
109
  "genericTypeInstances": [
107
110
  {
108
111
  "@type": "ParameterRangeGenericTypeReference",
109
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
112
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
110
113
  },
111
114
  {
112
115
  "@type": "ParameterRangeGenericTypeReference",
113
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
116
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
114
117
  },
115
118
  {
116
119
  "@type": "ParameterRangeGenericTypeReference",
117
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
120
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
121
+ },
122
+ {
123
+ "@type": "ParameterRangeGenericTypeReference",
124
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_TS"
118
125
  }
119
126
  ]
120
127
  },
121
128
  {
122
129
  "@type": "ParameterRangeGenericTypeReference",
123
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
130
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
124
131
  },
125
132
  {
126
133
  "@type": "ParameterRangeGenericTypeReference",
127
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
134
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
128
135
  },
129
136
  {
130
137
  "@type": "ParameterRangeGenericTypeReference",
131
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
138
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
139
+ },
140
+ {
141
+ "@type": "ParameterRangeGenericTypeReference",
142
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_TS"
132
143
  }
133
144
  ]
134
145
  },
@@ -138,6 +149,20 @@
138
149
  },
139
150
  "comment": "The bus this actor subscribes to."
140
151
  },
152
+ {
153
+ "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busFailMessage",
154
+ "range": {
155
+ "@type": "ParameterRangeUnion",
156
+ "parameterRangeElements": [
157
+ "xsd:string",
158
+ {
159
+ "@type": "ParameterRangeUndefined"
160
+ }
161
+ ]
162
+ },
163
+ "default": "RDF parsing failed: none of the configured parsers were able to handle the media type ${action.handle.mediaType} for ${action.handle.url}",
164
+ "comment": "The message that will be configured in the bus for reporting failures. This message may be a template string that contains references to the executed `action`. For example, the following templated string is allowed: \"RDF dereferencing failed: no actors could handle ${action.handle.mediaType}\""
165
+ },
141
166
  {
142
167
  "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_beforeActors",
143
168
  "range": {
@@ -151,15 +176,19 @@
151
176
  "genericTypeInstances": [
152
177
  {
153
178
  "@type": "ParameterRangeGenericTypeReference",
154
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
179
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
180
+ },
181
+ {
182
+ "@type": "ParameterRangeGenericTypeReference",
183
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
155
184
  },
156
185
  {
157
186
  "@type": "ParameterRangeGenericTypeReference",
158
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
187
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
159
188
  },
160
189
  {
161
190
  "@type": "ParameterRangeGenericTypeReference",
162
- "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^3.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
191
+ "parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^4.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_TS"
163
192
  }
164
193
  ]
165
194
  }
@@ -226,6 +255,12 @@
226
255
  "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_bus"
227
256
  }
228
257
  },
258
+ {
259
+ "keyRaw": "busFailMessage",
260
+ "value": {
261
+ "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busFailMessage"
262
+ }
263
+ },
229
264
  {
230
265
  "keyRaw": "beforeActors",
231
266
  "value": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "@context": [
3
- "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^3.0.0/components/context.jsonld"
3
+ "https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^4.0.0/components/context.jsonld"
4
4
  ],
5
5
  "@id": "npmd:@comunica/actor-rdf-parse-html",
6
6
  "@type": "Module",
@@ -3,7 +3,7 @@
3
3
  "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^6.0.0/components/context.jsonld",
4
4
  {
5
5
  "npmd": "https://linkedsoftwaredependencies.org/bundles/npm/",
6
- "carph": "npmd:@comunica/actor-rdf-parse-html/^3.0.0/",
6
+ "carph": "npmd:@comunica/actor-rdf-parse-html/^4.0.0/",
7
7
  "ActorRdfParseHtml": {
8
8
  "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml",
9
9
  "@prefix": true,
@@ -28,6 +28,9 @@
28
28
  "args_bus": {
29
29
  "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_bus"
30
30
  },
31
+ "args_busFailMessage": {
32
+ "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busFailMessage"
33
+ },
31
34
  "args_beforeActors": {
32
35
  "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_beforeActors",
33
36
  "@container": "@list"
@@ -52,6 +55,9 @@
52
55
  "bus": {
53
56
  "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_bus"
54
57
  },
58
+ "busFailMessage": {
59
+ "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busFailMessage"
60
+ },
55
61
  "beforeActors": {
56
62
  "@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_beforeActors",
57
63
  "@container": "@list"
@@ -26,7 +26,7 @@ export declare class ActorRdfParseHtml extends ActorRdfParseFixedMediaTypes {
26
26
  export interface IActorRdfParseHtmlArgs extends IActorRdfParseFixedMediaTypesArgs {
27
27
  /**
28
28
  * The RDF Parse HTML bus for fetching HTML listeners
29
- * @default {<npmd:@comunica/bus-rdf-parse-html/^3.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus>}
29
+ * @default {<npmd:@comunica/bus-rdf-parse-html/^4.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus>}
30
30
  */
31
- busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;
31
+ busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput, undefined>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;
32
32
  }
@@ -55,7 +55,8 @@ class ActorRdfParseHtml extends bus_rdf_parse_1.ActorRdfParseFixedMediaTypes {
55
55
  endBarrier += outputs.length;
56
56
  const htmlParseListeners = [];
57
57
  for (const output of outputs) {
58
- const { htmlParseListener } = await output.actor.run(htmlAction);
58
+ // eslint-disable-next-line unicorn/no-useless-undefined
59
+ const { htmlParseListener } = await output.actor.run(htmlAction, undefined);
59
60
  htmlParseListeners.push(htmlParseListener);
60
61
  }
61
62
  // Create parser
@@ -1 +1 @@
1
- {"version":3,"file":"ActorRdfParseHtml.js","sourceRoot":"","sources":["ActorRdfParseHtml.ts"],"names":[],"mappings":";;;AACA,2DAEiC;AASjC,6CAAqC;AACrC,qDAA2C;AAE3C;;;GAGG;AACH,MAAa,iBAAkB,SAAQ,4CAA4B;IAOjE;;;;;;;;;;OAUG;IACH,YAAmB,IAA4B;QAC7C,KAAK,CAAC,IAAI,CAAC,CAAC;IACd,CAAC;IAEM,KAAK,CAAC,SAAS,CAAC,MAAuB,EAAE,SAAiB,EAAE,OAAuB;QAExF,MAAM,IAAI,GAAG,IAAI,0BAAQ,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,KAAK,GAAG,GAAG,EAAE;YAChB,aAAa;QACf,CAAC,CAAC;QAEF,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,0BAA0B;QAC1B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,SAAS,KAAK,CAAC,QAAiB;YAC9B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAC/B,CAAC;QACD,SAAS,GAAG;YACV,IAAI,EAAE,UAAU,KAAK,CAAC,EAAE,CAAC;gBACvB,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,UAAU,GAAwB;YACtC,OAAO,EAAE,MAAM,CAAC,QAAQ,EAAE,OAAO,IAAI,EAAE;YACvC,OAAO;YACP,IAAI,EAAE,CAAC,IAAc,EAAE,EAAE;gBACvB,OAAO,EAAE,CAAC;gBACV,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,CAAC;YACD,GAAG;YACH,KAAK;YACL,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;YAC5E,UAAU,IAAI,OAAO,CAAC,MAAM,CAAC;YAE7B,MAAM,kBAAkB,GAAyB,EAAE,CAAC;YACpD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;gBACjE,kBAAkB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;YAC7C,CAAC;YAED,gBAAgB;YAChB,MAAM,MAAM,GAAG,IAAI,oBAAM,CAAC;gBACxB,UAAU;oBACR,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,UAAU,EAAE,CAAC;wBACjC,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBACD,KAAK;oBACH,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,KAAK,EAAE,CAAC;wBAC5B,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;oBACD,GAAG,EAAE,CAAC;gBACR,CAAC;gBACD,SAAS,CAAC,IAAY,EAAE,UAAkC;oBACxD,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;wBAChD,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBACD,MAAM,CAAC,IAAY;oBACjB,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;wBACjC,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;aACF,EAAE;gBACD,cAAc,EAAE,IAAI;gBACpB,oBAAoB,EAAE,IAAI;gBAC1B,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC,IAAY,EAAE,EAAE;gBACzC,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;gBAClC,wDAAwD;gBACxD,OAAO,OAAO,GAAG,CAAC,EAAE,CAAC;oBACnB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;oBAChC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;wBAClB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;wBAC5C,OAAO;oBACT,CAAC;oBACD,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAChC,CAAC;YACH,CAAC,CAAC;YAEF,MAAM,CAAC,IAAI;iBACR,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;iBAClB,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;QACnC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,UAAU,CAAC,GAAG,EAAE;gBACd,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YACxB,CAAC,CAAC,CAAC;QACL,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,CAAC;IAClB,CAAC;CACF;AApID,8CAoIC","sourcesContent":["import type { IActionRdfParse, IActorRdfParseFixedMediaTypesArgs, IActorRdfParseOutput } from '@comunica/bus-rdf-parse';\nimport {\n ActorRdfParseFixedMediaTypes,\n} from '@comunica/bus-rdf-parse';\nimport type {\n IActionRdfParseHtml,\n IActorRdfParseHtmlOutput,\n IHtmlParseListener,\n} from '@comunica/bus-rdf-parse-html';\nimport type { Actor, Bus, IActorTest } from '@comunica/core';\nimport type { IActionContext } from '@comunica/types';\nimport type * as RDF from '@rdfjs/types';\nimport { Parser } from 'htmlparser2';\nimport { Readable } from 'readable-stream';\n\n/**\n * A comunica HTML RDF Parse Actor.\n * It creates an HTML parser, and delegates its events via the bus-rdf-parse-html bus to other HTML parsing actors.\n */\nexport class ActorRdfParseHtml extends ActorRdfParseFixedMediaTypes {\n private readonly busRdfParseHtml: Bus<Actor<\n IActionRdfParseHtml,\n IActorTest,\n IActorRdfParseHtmlOutput\n >, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;\n\n /**\n * @param args -\n * \\ @defaultNested {{\n * \"text/html\": 1.0,\n * \"application/xhtml+xml\": 0.9\n * }} mediaTypePriorities\n * \\ @defaultNested {{\n * \"text/html\": \"http://www.w3.org/ns/formats/HTML\",\n * \"application/xhtml+xml\": \"http://www.w3.org/ns/formats/HTML\"\n * }} mediaTypeFormats\n */\n public constructor(args: IActorRdfParseHtmlArgs) {\n super(args);\n }\n\n public async runHandle(action: IActionRdfParse, mediaType: string, context: IActionContext):\n Promise<IActorRdfParseOutput> {\n const data = new Readable({ objectMode: true });\n data._read = () => {\n // Do nothing\n };\n\n let maxSize = 0;\n\n // Create callbacks action\n let endBarrier = 1;\n function error(subError: unknown): void {\n data.emit('error', subError);\n }\n function end(): void {\n if (--endBarrier === 0) {\n data.push(null);\n }\n }\n const htmlAction: IActionRdfParseHtml = {\n baseIRI: action.metadata?.baseIRI ?? '',\n context,\n emit: (quad: RDF.Quad) => {\n maxSize--;\n data.push(quad);\n },\n end,\n error,\n headers: action.headers,\n };\n\n try {\n const outputs = await Promise.all(this.busRdfParseHtml.publish(htmlAction));\n endBarrier += outputs.length;\n\n const htmlParseListeners: IHtmlParseListener[] = [];\n for (const output of outputs) {\n const { htmlParseListener } = await output.actor.run(htmlAction);\n htmlParseListeners.push(htmlParseListener);\n }\n\n // Create parser\n const parser = new Parser({\n onclosetag() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagClose();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n onend() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onEnd();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n end();\n },\n onopentag(name: string, attributes: Record<string, string>) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagOpen(name, attributes);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n ontext(text: string) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onText(text);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n }, {\n decodeEntities: true,\n recognizeSelfClosing: true,\n xmlMode: false,\n });\n\n const read = data._read = (size: number) => {\n maxSize = Math.max(size, maxSize);\n // eslint-disable-next-line no-unmodified-loop-condition\n while (maxSize > 0) {\n const item = action.data.read();\n if (item === null) {\n action.data.once('readable', () => read(0));\n return;\n }\n parser.write(item.toString());\n }\n };\n\n action.data\n .on('error', error)\n .on('end', () => parser.end());\n } catch (e) {\n setTimeout(() => {\n data.emit('error', e);\n });\n }\n\n return { data };\n }\n}\n\nexport interface IActorRdfParseHtmlArgs extends IActorRdfParseFixedMediaTypesArgs {\n /* eslint-disable max-len */\n /**\n * The RDF Parse HTML bus for fetching HTML listeners\n * @default {<npmd:@comunica/bus-rdf-parse-html/^3.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus>}\n */\n busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;\n /* eslint-enable max-len */\n}\n"]}
1
+ {"version":3,"file":"ActorRdfParseHtml.js","sourceRoot":"","sources":["ActorRdfParseHtml.ts"],"names":[],"mappings":";;;AACA,2DAEiC;AASjC,6CAAqC;AACrC,qDAA2C;AAE3C;;;GAGG;AACH,MAAa,iBAAkB,SAAQ,4CAA4B;IAQjE;;;;;;;;;;OAUG;IACH,YAAmB,IAA4B;QAC7C,KAAK,CAAC,IAAI,CAAC,CAAC;IACd,CAAC;IAEM,KAAK,CAAC,SAAS,CAAC,MAAuB,EAAE,SAAiB,EAAE,OAAuB;QAExF,MAAM,IAAI,GAAG,IAAI,0BAAQ,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,KAAK,GAAG,GAAG,EAAE;YAChB,aAAa;QACf,CAAC,CAAC;QAEF,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,0BAA0B;QAC1B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,SAAS,KAAK,CAAC,QAAiB;YAC9B,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAC/B,CAAC;QACD,SAAS,GAAG;YACV,IAAI,EAAE,UAAU,KAAK,CAAC,EAAE,CAAC;gBACvB,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,UAAU,GAAwB;YACtC,OAAO,EAAE,MAAM,CAAC,QAAQ,EAAE,OAAO,IAAI,EAAE;YACvC,OAAO;YACP,IAAI,EAAE,CAAC,IAAc,EAAE,EAAE;gBACvB,OAAO,EAAE,CAAC;gBACV,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,CAAC;YACD,GAAG;YACH,KAAK;YACL,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;YAC5E,UAAU,IAAI,OAAO,CAAC,MAAM,CAAC;YAE7B,MAAM,kBAAkB,GAAyB,EAAE,CAAC;YACpD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,wDAAwD;gBACxD,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;gBAC5E,kBAAkB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;YAC7C,CAAC;YAED,gBAAgB;YAChB,MAAM,MAAM,GAAG,IAAI,oBAAM,CAAC;gBACxB,UAAU;oBACR,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,UAAU,EAAE,CAAC;wBACjC,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBACD,KAAK;oBACH,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,KAAK,EAAE,CAAC;wBAC5B,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;oBACD,GAAG,EAAE,CAAC;gBACR,CAAC;gBACD,SAAS,CAAC,IAAY,EAAE,UAAkC;oBACxD,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;wBAChD,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBACD,MAAM,CAAC,IAAY;oBACjB,IAAI,CAAC;wBACH,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;4BACnD,iBAAiB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;wBACjC,CAAC;oBACH,CAAC;oBAAC,OAAO,MAAe,EAAE,CAAC;wBACzB,KAAK,CAAC,MAAM,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;aACF,EAAE;gBACD,cAAc,EAAE,IAAI;gBACpB,oBAAoB,EAAE,IAAI;gBAC1B,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC,IAAY,EAAE,EAAE;gBACzC,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;gBAClC,wDAAwD;gBACxD,OAAO,OAAO,GAAG,CAAC,EAAE,CAAC;oBACnB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;oBAChC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;wBAClB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;wBAC5C,OAAO;oBACT,CAAC;oBACD,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAChC,CAAC;YACH,CAAC,CAAC;YAEF,MAAM,CAAC,IAAI;iBACR,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;iBAClB,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;QACnC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,UAAU,CAAC,GAAG,EAAE;gBACd,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YACxB,CAAC,CAAC,CAAC;QACL,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,CAAC;IAClB,CAAC;CACF;AAtID,8CAsIC","sourcesContent":["import type { IActionRdfParse, IActorRdfParseFixedMediaTypesArgs, IActorRdfParseOutput } from '@comunica/bus-rdf-parse';\nimport {\n ActorRdfParseFixedMediaTypes,\n} from '@comunica/bus-rdf-parse';\nimport type {\n IActionRdfParseHtml,\n IActorRdfParseHtmlOutput,\n IHtmlParseListener,\n} from '@comunica/bus-rdf-parse-html';\nimport type { Actor, Bus, IActorTest } from '@comunica/core';\nimport type { IActionContext } from '@comunica/types';\nimport type * as RDF from '@rdfjs/types';\nimport { Parser } from 'htmlparser2';\nimport { Readable } from 'readable-stream';\n\n/**\n * A comunica HTML RDF Parse Actor.\n * It creates an HTML parser, and delegates its events via the bus-rdf-parse-html bus to other HTML parsing actors.\n */\nexport class ActorRdfParseHtml extends ActorRdfParseFixedMediaTypes {\n private readonly busRdfParseHtml: Bus<Actor<\n IActionRdfParseHtml,\n IActorTest,\n IActorRdfParseHtmlOutput,\n undefined\n >, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;\n\n /**\n * @param args -\n * \\ @defaultNested {{\n * \"text/html\": 1.0,\n * \"application/xhtml+xml\": 0.9\n * }} mediaTypePriorities\n * \\ @defaultNested {{\n * \"text/html\": \"http://www.w3.org/ns/formats/HTML\",\n * \"application/xhtml+xml\": \"http://www.w3.org/ns/formats/HTML\"\n * }} mediaTypeFormats\n */\n public constructor(args: IActorRdfParseHtmlArgs) {\n super(args);\n }\n\n public async runHandle(action: IActionRdfParse, mediaType: string, context: IActionContext):\n Promise<IActorRdfParseOutput> {\n const data = new Readable({ objectMode: true });\n data._read = () => {\n // Do nothing\n };\n\n let maxSize = 0;\n\n // Create callbacks action\n let endBarrier = 1;\n function error(subError: unknown): void {\n data.emit('error', subError);\n }\n function end(): void {\n if (--endBarrier === 0) {\n data.push(null);\n }\n }\n const htmlAction: IActionRdfParseHtml = {\n baseIRI: action.metadata?.baseIRI ?? '',\n context,\n emit: (quad: RDF.Quad) => {\n maxSize--;\n data.push(quad);\n },\n end,\n error,\n headers: action.headers,\n };\n\n try {\n const outputs = await Promise.all(this.busRdfParseHtml.publish(htmlAction));\n endBarrier += outputs.length;\n\n const htmlParseListeners: IHtmlParseListener[] = [];\n for (const output of outputs) {\n // eslint-disable-next-line unicorn/no-useless-undefined\n const { htmlParseListener } = await output.actor.run(htmlAction, undefined);\n htmlParseListeners.push(htmlParseListener);\n }\n\n // Create parser\n const parser = new Parser({\n onclosetag() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagClose();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n onend() {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onEnd();\n }\n } catch (error_: unknown) {\n error(error_);\n }\n end();\n },\n onopentag(name: string, attributes: Record<string, string>) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onTagOpen(name, attributes);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n ontext(text: string) {\n try {\n for (const htmlParseListener of htmlParseListeners) {\n htmlParseListener.onText(text);\n }\n } catch (error_: unknown) {\n error(error_);\n }\n },\n }, {\n decodeEntities: true,\n recognizeSelfClosing: true,\n xmlMode: false,\n });\n\n const read = data._read = (size: number) => {\n maxSize = Math.max(size, maxSize);\n // eslint-disable-next-line no-unmodified-loop-condition\n while (maxSize > 0) {\n const item = action.data.read();\n if (item === null) {\n action.data.once('readable', () => read(0));\n return;\n }\n parser.write(item.toString());\n }\n };\n\n action.data\n .on('error', error)\n .on('end', () => parser.end());\n } catch (e) {\n setTimeout(() => {\n data.emit('error', e);\n });\n }\n\n return { data };\n }\n}\n\nexport interface IActorRdfParseHtmlArgs extends IActorRdfParseFixedMediaTypesArgs {\n /* eslint-disable max-len */\n /**\n * The RDF Parse HTML bus for fetching HTML listeners\n * @default {<npmd:@comunica/bus-rdf-parse-html/^4.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus>}\n */\n busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput, undefined>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;\n /* eslint-enable max-len */\n}\n"]}
package/package.json CHANGED
@@ -1,9 +1,13 @@
1
1
  {
2
2
  "name": "@comunica/actor-rdf-parse-html",
3
- "version": "3.3.0",
3
+ "version": "4.0.1-alpha.48.0",
4
4
  "description": "A html rdf-parse actor",
5
5
  "lsd:module": true,
6
6
  "license": "MIT",
7
+ "funding": {
8
+ "type": "opencollective",
9
+ "url": "https://opencollective.com/comunica-association"
10
+ },
7
11
  "homepage": "https://comunica.dev/",
8
12
  "repository": {
9
13
  "type": "git",
@@ -37,13 +41,13 @@
37
41
  "build:components": "componentsjs-generator"
38
42
  },
39
43
  "dependencies": {
40
- "@comunica/bus-rdf-parse": "^3.3.0",
41
- "@comunica/bus-rdf-parse-html": "^3.3.0",
42
- "@comunica/core": "^3.3.0",
43
- "@comunica/types": "^3.3.0",
44
+ "@comunica/bus-rdf-parse": "4.0.1-alpha.48.0",
45
+ "@comunica/bus-rdf-parse-html": "4.0.1-alpha.48.0",
46
+ "@comunica/core": "4.0.1-alpha.48.0",
47
+ "@comunica/types": "4.0.1-alpha.48.0",
44
48
  "@rdfjs/types": "*",
45
49
  "htmlparser2": "^9.0.0",
46
- "readable-stream": "^4.4.2"
50
+ "readable-stream": "^4.5.2"
47
51
  },
48
- "gitHead": "02bde397d206f1f5a523643a6a604c89e792e2f9"
52
+ "gitHead": "4cc37905eab2c94a2e1d9cee0f51c53604f07da1"
49
53
  }