@comunica/actor-rdf-parse-html 1.21.0 → 2.0.1-alpha.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.txt +2 -2
- package/README.md +3 -3
- package/components/ActorRdfParseHtml.jsonld +253 -0
- package/components/components.jsonld +5 -3
- package/components/context.jsonld +62 -6
- package/lib/ActorRdfParseHtml.d.ts +18 -2
- package/lib/ActorRdfParseHtml.js +23 -12
- package/lib/index.d.ts +1 -0
- package/{index.js → lib/index.js} +1 -1
- package/package.json +13 -38
- package/components/Actor/RdfParse/Html.jsonld +0 -51
- package/index.d.ts +0 -1
package/LICENSE.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
The MIT License (MIT)
|
|
2
2
|
|
|
3
|
-
Copyright © 2017–
|
|
4
|
-
Ghent University – imec, Belgium
|
|
3
|
+
Copyright © 2017–now Ruben Taelman, Joachim Van Herwegen
|
|
4
|
+
Comunica Association and Ghent University – imec, Belgium
|
|
5
5
|
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
package/README.md
CHANGED
|
@@ -24,12 +24,12 @@ After installing, this package can be added to your engine's configuration as fo
|
|
|
24
24
|
{
|
|
25
25
|
"@context": [
|
|
26
26
|
...
|
|
27
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^
|
|
27
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^2.0.0/components/context.jsonld"
|
|
28
28
|
],
|
|
29
29
|
"actors": [
|
|
30
30
|
...
|
|
31
31
|
{
|
|
32
|
-
"@id": "
|
|
32
|
+
"@id": "urn:comunica:default:rdf-parse/actors#html",
|
|
33
33
|
"@type": "ActorRdfParseHtml",
|
|
34
34
|
"priorityScale": 0.2
|
|
35
35
|
}
|
|
@@ -39,4 +39,4 @@ After installing, this package can be added to your engine's configuration as fo
|
|
|
39
39
|
|
|
40
40
|
### Config Parameters
|
|
41
41
|
|
|
42
|
-
* `
|
|
42
|
+
* `priorityScale`: An optional priority for this parser, used for content negotiation, defaults to `1`.
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
{
|
|
2
|
+
"@context": [
|
|
3
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^2.0.0/components/context.jsonld",
|
|
4
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/core/^2.0.0/components/context.jsonld",
|
|
5
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^2.0.0/components/context.jsonld",
|
|
6
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse/^2.0.0/components/context.jsonld"
|
|
7
|
+
],
|
|
8
|
+
"@id": "npmd:@comunica/actor-rdf-parse-html",
|
|
9
|
+
"components": [
|
|
10
|
+
{
|
|
11
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml",
|
|
12
|
+
"@type": "Class",
|
|
13
|
+
"requireElement": "ActorRdfParseHtml",
|
|
14
|
+
"extends": [
|
|
15
|
+
"cbrp:components/ActorRdfParseFixedMediaTypes.jsonld#ActorRdfParseFixedMediaTypes"
|
|
16
|
+
],
|
|
17
|
+
"comment": "A comunica HTML RDF Parse Actor. It creates an HTML parser, and delegates its events via the bus-rdf-parse-html bus to other HTML parsing actors.",
|
|
18
|
+
"parameters": [
|
|
19
|
+
{
|
|
20
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busRdfParseHtml",
|
|
21
|
+
"range": {
|
|
22
|
+
"@type": "ParameterRangeGenericComponent",
|
|
23
|
+
"component": "cc:components/Bus.jsonld#Bus",
|
|
24
|
+
"genericTypeInstances": [
|
|
25
|
+
{
|
|
26
|
+
"@type": "ParameterRangeGenericComponent",
|
|
27
|
+
"component": "cc:components/Actor.jsonld#Actor",
|
|
28
|
+
"genericTypeInstances": [
|
|
29
|
+
"cbrph:components/ActorRdfParseHtml.jsonld#IActionRdfParseHtml",
|
|
30
|
+
{
|
|
31
|
+
"@type": "ParameterRangeWildcard"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"@type": "ParameterRangeWildcard"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
"cbrph:components/ActorRdfParseHtml.jsonld#IActionRdfParseHtml",
|
|
39
|
+
{
|
|
40
|
+
"@type": "ParameterRangeWildcard"
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"@type": "ParameterRangeWildcard"
|
|
44
|
+
}
|
|
45
|
+
]
|
|
46
|
+
},
|
|
47
|
+
"default": {
|
|
48
|
+
"@id": "npmd:@comunica/bus-rdf-parse-html/^2.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus"
|
|
49
|
+
},
|
|
50
|
+
"comment": "The RDF Parse HTML bus for fetching HTML listeners"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypePriorities",
|
|
54
|
+
"range": "rdf:JSON",
|
|
55
|
+
"default": {
|
|
56
|
+
"@type": "@json",
|
|
57
|
+
"@value": {
|
|
58
|
+
"text/html": 1,
|
|
59
|
+
"application/xhtml+xml": 0.9
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"comment": "A record of media types, with media type name as key, and its priority as value. Priorities are numbers between [0, 1]."
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypeFormats",
|
|
66
|
+
"range": "rdf:JSON",
|
|
67
|
+
"default": {
|
|
68
|
+
"@type": "@json",
|
|
69
|
+
"@value": {
|
|
70
|
+
"text/html": "http://www.w3.org/ns/formats/HTML",
|
|
71
|
+
"application/xhtml+xml": "http://www.w3.org/ns/formats/HTML"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
"comment": "A record of media types, with media type name as key, and its format IRI as value."
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_priorityScale",
|
|
78
|
+
"range": {
|
|
79
|
+
"@type": "ParameterRangeUnion",
|
|
80
|
+
"parameterRangeElements": [
|
|
81
|
+
"xsd:double",
|
|
82
|
+
{
|
|
83
|
+
"@type": "ParameterRangeUndefined"
|
|
84
|
+
}
|
|
85
|
+
]
|
|
86
|
+
},
|
|
87
|
+
"comment": "A multiplier for media type priorities. This can be used for keeping the original media types in place, but scaling all of their scores with a certain value."
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_name",
|
|
91
|
+
"range": "xsd:string",
|
|
92
|
+
"default": {
|
|
93
|
+
"@id": "rdf:subject"
|
|
94
|
+
},
|
|
95
|
+
"comment": "The name for this actor."
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_bus",
|
|
99
|
+
"range": {
|
|
100
|
+
"@type": "ParameterRangeGenericComponent",
|
|
101
|
+
"component": "cc:components/Bus.jsonld#Bus",
|
|
102
|
+
"genericTypeInstances": [
|
|
103
|
+
{
|
|
104
|
+
"@type": "ParameterRangeGenericComponent",
|
|
105
|
+
"component": "cc:components/Actor.jsonld#Actor",
|
|
106
|
+
"genericTypeInstances": [
|
|
107
|
+
{
|
|
108
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
109
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
113
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
117
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
118
|
+
}
|
|
119
|
+
]
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
123
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
127
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
131
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
132
|
+
}
|
|
133
|
+
]
|
|
134
|
+
},
|
|
135
|
+
"default": {
|
|
136
|
+
"@id": "cbrp:components/ActorRdfParse.jsonld#ActorRdfParse_default_bus",
|
|
137
|
+
"@type": "cc:components/Bus.jsonld#Bus"
|
|
138
|
+
},
|
|
139
|
+
"comment": "The bus this actor subscribes to."
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_beforeActors",
|
|
143
|
+
"range": {
|
|
144
|
+
"@type": "ParameterRangeUnion",
|
|
145
|
+
"parameterRangeElements": [
|
|
146
|
+
{
|
|
147
|
+
"@type": "ParameterRangeArray",
|
|
148
|
+
"parameterRangeValue": {
|
|
149
|
+
"@type": "ParameterRangeGenericComponent",
|
|
150
|
+
"component": "cc:components/Actor.jsonld#Actor",
|
|
151
|
+
"genericTypeInstances": [
|
|
152
|
+
{
|
|
153
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
154
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_I"
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
158
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_T"
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"@type": "ParameterRangeGenericTypeReference",
|
|
162
|
+
"parameterRangeGenericType": "npmd:@comunica/actor-abstract-mediatyped/^2.0.0/components/ActorAbstractMediaTyped.jsonld#ActorAbstractMediaTyped__generic_O"
|
|
163
|
+
}
|
|
164
|
+
]
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"@type": "ParameterRangeUndefined"
|
|
169
|
+
}
|
|
170
|
+
]
|
|
171
|
+
},
|
|
172
|
+
"comment": "Actor that must be registered in the bus before this actor."
|
|
173
|
+
}
|
|
174
|
+
],
|
|
175
|
+
"memberFields": [
|
|
176
|
+
{
|
|
177
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml__member_busRdfParseHtml",
|
|
178
|
+
"memberFieldName": "busRdfParseHtml"
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml__member_constructor",
|
|
182
|
+
"memberFieldName": "constructor"
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml__member_runHandle",
|
|
186
|
+
"memberFieldName": "runHandle"
|
|
187
|
+
}
|
|
188
|
+
],
|
|
189
|
+
"constructorArguments": [
|
|
190
|
+
{
|
|
191
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args__constructorArgument",
|
|
192
|
+
"fields": [
|
|
193
|
+
{
|
|
194
|
+
"keyRaw": "busRdfParseHtml",
|
|
195
|
+
"value": {
|
|
196
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busRdfParseHtml"
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
"keyRaw": "mediaTypePriorities",
|
|
201
|
+
"value": {
|
|
202
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypePriorities"
|
|
203
|
+
}
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
"keyRaw": "mediaTypeFormats",
|
|
207
|
+
"value": {
|
|
208
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypeFormats"
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"keyRaw": "priorityScale",
|
|
213
|
+
"value": {
|
|
214
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_priorityScale"
|
|
215
|
+
}
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
"keyRaw": "name",
|
|
219
|
+
"value": {
|
|
220
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_name"
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
"keyRaw": "bus",
|
|
225
|
+
"value": {
|
|
226
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_bus"
|
|
227
|
+
}
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
"keyRaw": "beforeActors",
|
|
231
|
+
"value": {
|
|
232
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_beforeActors"
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
]
|
|
236
|
+
}
|
|
237
|
+
]
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#IActorRdfParseHtmlArgs",
|
|
241
|
+
"@type": "AbstractClass",
|
|
242
|
+
"requireElement": "IActorRdfParseHtmlArgs",
|
|
243
|
+
"parameters": [],
|
|
244
|
+
"memberFields": [
|
|
245
|
+
{
|
|
246
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#IActorRdfParseHtmlArgs__member_busRdfParseHtml",
|
|
247
|
+
"memberFieldName": "busRdfParseHtml"
|
|
248
|
+
}
|
|
249
|
+
],
|
|
250
|
+
"constructorArguments": []
|
|
251
|
+
}
|
|
252
|
+
]
|
|
253
|
+
}
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
{
|
|
2
|
-
"@context":
|
|
2
|
+
"@context": [
|
|
3
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^2.0.0/components/context.jsonld"
|
|
4
|
+
],
|
|
3
5
|
"@id": "npmd:@comunica/actor-rdf-parse-html",
|
|
4
6
|
"@type": "Module",
|
|
5
7
|
"requireName": "@comunica/actor-rdf-parse-html",
|
|
6
8
|
"import": [
|
|
7
|
-
"
|
|
9
|
+
"carph:components/ActorRdfParseHtml.jsonld"
|
|
8
10
|
]
|
|
9
|
-
}
|
|
11
|
+
}
|
|
@@ -1,12 +1,68 @@
|
|
|
1
1
|
{
|
|
2
2
|
"@context": [
|
|
3
|
-
"https://linkedsoftwaredependencies.org/bundles/npm
|
|
3
|
+
"https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld",
|
|
4
4
|
{
|
|
5
5
|
"npmd": "https://linkedsoftwaredependencies.org/bundles/npm/",
|
|
6
|
-
"carph": "npmd:@comunica/actor-rdf-parse-html/",
|
|
7
|
-
"
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
"carph": "npmd:@comunica/actor-rdf-parse-html/^2.0.0/",
|
|
7
|
+
"ActorRdfParseHtml": {
|
|
8
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml",
|
|
9
|
+
"@prefix": true,
|
|
10
|
+
"@context": {
|
|
11
|
+
"args_busRdfParseHtml": {
|
|
12
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busRdfParseHtml"
|
|
13
|
+
},
|
|
14
|
+
"args_mediaTypePriorities": {
|
|
15
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypePriorities",
|
|
16
|
+
"@type": "@json"
|
|
17
|
+
},
|
|
18
|
+
"args_mediaTypeFormats": {
|
|
19
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypeFormats",
|
|
20
|
+
"@type": "@json"
|
|
21
|
+
},
|
|
22
|
+
"args_priorityScale": {
|
|
23
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_priorityScale"
|
|
24
|
+
},
|
|
25
|
+
"args_name": {
|
|
26
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_name"
|
|
27
|
+
},
|
|
28
|
+
"args_bus": {
|
|
29
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_bus"
|
|
30
|
+
},
|
|
31
|
+
"args_beforeActors": {
|
|
32
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_beforeActors",
|
|
33
|
+
"@container": "@list"
|
|
34
|
+
},
|
|
35
|
+
"busRdfParseHtml": {
|
|
36
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_busRdfParseHtml"
|
|
37
|
+
},
|
|
38
|
+
"mediaTypePriorities": {
|
|
39
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypePriorities",
|
|
40
|
+
"@type": "@json"
|
|
41
|
+
},
|
|
42
|
+
"mediaTypeFormats": {
|
|
43
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_mediaTypeFormats",
|
|
44
|
+
"@type": "@json"
|
|
45
|
+
},
|
|
46
|
+
"priorityScale": {
|
|
47
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_priorityScale"
|
|
48
|
+
},
|
|
49
|
+
"name": {
|
|
50
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_name"
|
|
51
|
+
},
|
|
52
|
+
"bus": {
|
|
53
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_bus"
|
|
54
|
+
},
|
|
55
|
+
"beforeActors": {
|
|
56
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_args_beforeActors",
|
|
57
|
+
"@container": "@list"
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"IActorRdfParseHtmlArgs": {
|
|
62
|
+
"@id": "carph:components/ActorRdfParseHtml.jsonld#IActorRdfParseHtmlArgs",
|
|
63
|
+
"@prefix": true,
|
|
64
|
+
"@context": {}
|
|
65
|
+
}
|
|
10
66
|
}
|
|
11
67
|
]
|
|
12
|
-
}
|
|
68
|
+
}
|
|
@@ -1,16 +1,32 @@
|
|
|
1
1
|
import type { IActionRdfParse, IActorRdfParseFixedMediaTypesArgs, IActorRdfParseOutput } from '@comunica/bus-rdf-parse';
|
|
2
2
|
import { ActorRdfParseFixedMediaTypes } from '@comunica/bus-rdf-parse';
|
|
3
3
|
import type { IActionRdfParseHtml, IActorRdfParseHtmlOutput } from '@comunica/bus-rdf-parse-html';
|
|
4
|
-
import type {
|
|
4
|
+
import type { Actor, Bus, IActorTest } from '@comunica/core';
|
|
5
|
+
import type { IActionContext } from '@comunica/types';
|
|
5
6
|
/**
|
|
6
7
|
* A comunica HTML RDF Parse Actor.
|
|
7
8
|
* It creates an HTML parser, and delegates its events via the bus-rdf-parse-html bus to other HTML parsing actors.
|
|
8
9
|
*/
|
|
9
10
|
export declare class ActorRdfParseHtml extends ActorRdfParseFixedMediaTypes {
|
|
10
11
|
private readonly busRdfParseHtml;
|
|
12
|
+
/**
|
|
13
|
+
* @param args -
|
|
14
|
+
* \ @defaultNested {{
|
|
15
|
+
* "text/html": 1.0,
|
|
16
|
+
* "application/xhtml+xml": 0.9
|
|
17
|
+
* }} mediaTypePriorities
|
|
18
|
+
* \ @defaultNested {{
|
|
19
|
+
* "text/html": "http://www.w3.org/ns/formats/HTML",
|
|
20
|
+
* "application/xhtml+xml": "http://www.w3.org/ns/formats/HTML"
|
|
21
|
+
* }} mediaTypeFormats
|
|
22
|
+
*/
|
|
11
23
|
constructor(args: IActorRdfParseHtmlArgs);
|
|
12
|
-
runHandle(action: IActionRdfParse, mediaType: string, context:
|
|
24
|
+
runHandle(action: IActionRdfParse, mediaType: string, context: IActionContext): Promise<IActorRdfParseOutput>;
|
|
13
25
|
}
|
|
14
26
|
export interface IActorRdfParseHtmlArgs extends IActorRdfParseFixedMediaTypesArgs {
|
|
27
|
+
/**
|
|
28
|
+
* The RDF Parse HTML bus for fetching HTML listeners
|
|
29
|
+
* @default {<npmd:@comunica/bus-rdf-parse-html/^2.0.0/components/ActorRdfParseHtml.jsonld#ActorRdfParseHtml_default_bus>}
|
|
30
|
+
*/
|
|
15
31
|
busRdfParseHtml: Bus<Actor<IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>, IActionRdfParseHtml, IActorTest, IActorRdfParseHtmlOutput>;
|
|
16
32
|
}
|
package/lib/ActorRdfParseHtml.js
CHANGED
|
@@ -9,31 +9,42 @@ const WritableStream_1 = require("htmlparser2/lib/WritableStream");
|
|
|
9
9
|
* It creates an HTML parser, and delegates its events via the bus-rdf-parse-html bus to other HTML parsing actors.
|
|
10
10
|
*/
|
|
11
11
|
class ActorRdfParseHtml extends bus_rdf_parse_1.ActorRdfParseFixedMediaTypes {
|
|
12
|
+
/**
|
|
13
|
+
* @param args -
|
|
14
|
+
* \ @defaultNested {{
|
|
15
|
+
* "text/html": 1.0,
|
|
16
|
+
* "application/xhtml+xml": 0.9
|
|
17
|
+
* }} mediaTypePriorities
|
|
18
|
+
* \ @defaultNested {{
|
|
19
|
+
* "text/html": "http://www.w3.org/ns/formats/HTML",
|
|
20
|
+
* "application/xhtml+xml": "http://www.w3.org/ns/formats/HTML"
|
|
21
|
+
* }} mediaTypeFormats
|
|
22
|
+
*/
|
|
12
23
|
constructor(args) {
|
|
13
24
|
super(args);
|
|
14
25
|
}
|
|
15
26
|
async runHandle(action, mediaType, context) {
|
|
16
|
-
const
|
|
17
|
-
|
|
27
|
+
const data = new stream_1.Readable({ objectMode: true });
|
|
28
|
+
data._read = async () => {
|
|
18
29
|
// Only initialize once
|
|
19
|
-
|
|
30
|
+
data._read = () => {
|
|
20
31
|
// Do nothing
|
|
21
32
|
};
|
|
22
33
|
// Create callbacks action
|
|
23
34
|
let endBarrier = 1;
|
|
24
35
|
function emit(quad) {
|
|
25
|
-
|
|
36
|
+
data.emit('data', quad);
|
|
26
37
|
}
|
|
27
38
|
function error(subError) {
|
|
28
|
-
|
|
39
|
+
data.emit('error', subError);
|
|
29
40
|
}
|
|
30
41
|
function end() {
|
|
31
42
|
if (--endBarrier === 0) {
|
|
32
|
-
|
|
43
|
+
data.push(null);
|
|
33
44
|
}
|
|
34
45
|
}
|
|
35
46
|
const htmlAction = {
|
|
36
|
-
baseIRI: action.baseIRI,
|
|
47
|
+
baseIRI: action.metadata?.baseIRI ?? '',
|
|
37
48
|
context,
|
|
38
49
|
emit,
|
|
39
50
|
end,
|
|
@@ -82,10 +93,10 @@ class ActorRdfParseHtml extends bus_rdf_parse_1.ActorRdfParseFixedMediaTypes {
|
|
|
82
93
|
error(error_);
|
|
83
94
|
}
|
|
84
95
|
},
|
|
85
|
-
ontext(
|
|
96
|
+
ontext(text) {
|
|
86
97
|
try {
|
|
87
98
|
for (const htmlParseListener of htmlParseListeners) {
|
|
88
|
-
htmlParseListener.onText(
|
|
99
|
+
htmlParseListener.onText(text);
|
|
89
100
|
}
|
|
90
101
|
}
|
|
91
102
|
catch (error_) {
|
|
@@ -98,11 +109,11 @@ class ActorRdfParseHtml extends bus_rdf_parse_1.ActorRdfParseFixedMediaTypes {
|
|
|
98
109
|
xmlMode: false,
|
|
99
110
|
});
|
|
100
111
|
// Push stream to parser
|
|
101
|
-
action.
|
|
102
|
-
action.
|
|
112
|
+
action.data.on('error', error);
|
|
113
|
+
action.data.pipe(parser);
|
|
103
114
|
}).catch(error);
|
|
104
115
|
};
|
|
105
|
-
return {
|
|
116
|
+
return { data };
|
|
106
117
|
}
|
|
107
118
|
}
|
|
108
119
|
exports.ActorRdfParseHtml = ActorRdfParseHtml;
|
package/lib/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './ActorRdfParseHtml';
|
|
@@ -10,5 +10,5 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
10
10
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
11
11
|
};
|
|
12
12
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
|
-
__exportStar(require("./
|
|
13
|
+
__exportStar(require("./ActorRdfParseHtml"), exports);
|
|
14
14
|
//# sourceMappingURL=index.js.map
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comunica/actor-rdf-parse-html",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "2.0.1-alpha.5.0",
|
|
4
4
|
"description": "A html rdf-parse actor",
|
|
5
5
|
"lsd:module": true,
|
|
6
|
-
"main": "index.js",
|
|
7
|
-
"typings": "index",
|
|
6
|
+
"main": "lib/index.js",
|
|
7
|
+
"typings": "lib/index",
|
|
8
8
|
"repository": {
|
|
9
9
|
"type": "git",
|
|
10
10
|
"url": "https://github.com/comunica/comunica.git",
|
|
@@ -27,44 +27,19 @@
|
|
|
27
27
|
"files": [
|
|
28
28
|
"components",
|
|
29
29
|
"lib/**/*.d.ts",
|
|
30
|
-
"lib/**/*.js"
|
|
31
|
-
"index.d.ts",
|
|
32
|
-
"index.js"
|
|
30
|
+
"lib/**/*.js"
|
|
33
31
|
],
|
|
34
32
|
"dependencies": {
|
|
35
|
-
"@comunica/bus-rdf-parse
|
|
36
|
-
"@
|
|
37
|
-
"
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
"@comunica/bus-rdf-parse": "^1.8.0",
|
|
41
|
-
"@comunica/core": "^1.8.0"
|
|
42
|
-
},
|
|
43
|
-
"devDependencies": {
|
|
44
|
-
"@comunica/bus-rdf-parse": "^1.21.0",
|
|
45
|
-
"@comunica/core": "^1.21.0"
|
|
46
|
-
},
|
|
47
|
-
"jest": {
|
|
48
|
-
"globals": {
|
|
49
|
-
"ts-jest": {
|
|
50
|
-
"tsConfig": "../../tsconfig.json"
|
|
51
|
-
}
|
|
52
|
-
},
|
|
53
|
-
"transform": {
|
|
54
|
-
"^.+\\.ts$": "ts-jest"
|
|
55
|
-
},
|
|
56
|
-
"testRegex": "(/test/.*|(\\.|/)(test|spec))\\.ts$",
|
|
57
|
-
"moduleFileExtensions": [
|
|
58
|
-
"ts",
|
|
59
|
-
"js"
|
|
60
|
-
],
|
|
61
|
-
"collectCoverage": true
|
|
33
|
+
"@comunica/bus-rdf-parse": "2.0.1-alpha.5.0",
|
|
34
|
+
"@comunica/bus-rdf-parse-html": "2.0.1-alpha.5.0",
|
|
35
|
+
"@comunica/core": "2.0.1-alpha.5.0",
|
|
36
|
+
"@rdfjs/types": "*",
|
|
37
|
+
"htmlparser2": "^7.0.0"
|
|
62
38
|
},
|
|
63
39
|
"scripts": {
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
"build": "
|
|
67
|
-
"validate": "npm ls"
|
|
40
|
+
"build": "npm run build:ts && npm run build:components",
|
|
41
|
+
"build:ts": "node \"../../node_modules/typescript/bin/tsc\"",
|
|
42
|
+
"build:components": "componentsjs-generator"
|
|
68
43
|
},
|
|
69
|
-
"gitHead": "
|
|
44
|
+
"gitHead": "e2ae2e9e924bf0656df60cc99774f7e560d47695"
|
|
70
45
|
}
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"@context": [
|
|
3
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-abstract-mediatyped/^1.0.0/components/context.jsonld",
|
|
4
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-html/^1.0.0/components/context.jsonld",
|
|
5
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-html/^1.0.0/components/context.jsonld",
|
|
6
|
-
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse/^1.0.0/components/context.jsonld"
|
|
7
|
-
],
|
|
8
|
-
"@id": "npmd:@comunica/actor-rdf-parse-html",
|
|
9
|
-
"components": [
|
|
10
|
-
{
|
|
11
|
-
"@id": "carph:Actor/RdfParse/Html",
|
|
12
|
-
"@type": "Class",
|
|
13
|
-
"extends": "cbrp:Actor/RdfParseFixedMediaTypes",
|
|
14
|
-
"requireElement": "ActorRdfParseHtml",
|
|
15
|
-
"comment": "A comunica HTML RDF Parse Actor.",
|
|
16
|
-
"parameters": [
|
|
17
|
-
{
|
|
18
|
-
"@id": "caam:Actor/AbstractMediaTypedFixed/mediaTypeEntry",
|
|
19
|
-
"defaultScoped": {
|
|
20
|
-
"defaultScope": "carph:Actor/RdfParse/Html",
|
|
21
|
-
"defaultScopedValue": [
|
|
22
|
-
{ "mediaTypeKey": "text/html", "priorityValue": "1.0", "mediaTypeFormat": "http://www.w3.org/ns/formats/HTML" },
|
|
23
|
-
{ "mediaTypeKey": "application/xhtml+xml", "priorityValue": "0.9", "mediaTypeFormat": "http://www.w3.org/ns/formats/HTML" }
|
|
24
|
-
]
|
|
25
|
-
}
|
|
26
|
-
},
|
|
27
|
-
{
|
|
28
|
-
"@id": "carph:busRdfParseHtml",
|
|
29
|
-
"comment": "The RDF Parse HTML bus for fetching HTML listeners",
|
|
30
|
-
"required": true,
|
|
31
|
-
"unique": true,
|
|
32
|
-
"defaultScoped": {
|
|
33
|
-
"defaultScope": "carph:Actor/RdfParse/Html",
|
|
34
|
-
"defaultScopedValue": { "@id": "cbrph:Bus/RdfParseHtml" }
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
],
|
|
38
|
-
"constructorArguments": [
|
|
39
|
-
{
|
|
40
|
-
"extends": "cbrp:Actor/RdfParseFixedMediaTypes/constructorArgumentsObject",
|
|
41
|
-
"fields": [
|
|
42
|
-
{
|
|
43
|
-
"keyRaw": "busRdfParseHtml",
|
|
44
|
-
"value": "carph:busRdfParseHtml"
|
|
45
|
-
}
|
|
46
|
-
]
|
|
47
|
-
}
|
|
48
|
-
]
|
|
49
|
-
}
|
|
50
|
-
]
|
|
51
|
-
}
|
package/index.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from './lib/ActorRdfParseHtml';
|