@markuplint/spec-generator 4.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017-2024 Yusuke Hirao
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # @markuplint/spec-generator
2
+
3
+ This is private package for generating `@markuplint/html-spec`
package/lib/aria.d.ts ADDED
@@ -0,0 +1,18 @@
1
+ import type { ARIAProperty, ARIARoleInSchema } from '@markuplint/ml-spec';
2
+ export declare function getAria(): Promise<{
3
+ '1.3': {
4
+ roles: ARIARoleInSchema[];
5
+ props: ARIAProperty[];
6
+ graphicsRoles: ARIARoleInSchema[];
7
+ };
8
+ '1.2': {
9
+ roles: ARIARoleInSchema[];
10
+ props: ARIAProperty[];
11
+ graphicsRoles: ARIARoleInSchema[];
12
+ };
13
+ '1.1': {
14
+ roles: ARIARoleInSchema[];
15
+ props: ARIAProperty[];
16
+ graphicsRoles: ARIARoleInSchema[];
17
+ };
18
+ }>;
package/lib/aria.js ADDED
@@ -0,0 +1,301 @@
1
+ /* global cheerio */
2
+ import { fetch } from './fetch.js';
3
+ import { arrayUnique, nameCompare } from './utils.js';
4
+ export async function getAria() {
5
+ const roles13 = await getRoles('1.3');
6
+ const roles12 = await getRoles('1.2');
7
+ const roles11 = await getRoles('1.1');
8
+ return {
9
+ '1.3': {
10
+ roles: roles13,
11
+ props: await getProps('1.3', roles13),
12
+ graphicsRoles: await getRoles('1.3', true),
13
+ },
14
+ '1.2': {
15
+ roles: roles12,
16
+ props: await getProps('1.2', roles12),
17
+ graphicsRoles: await getRoles('1.2', true),
18
+ },
19
+ '1.1': {
20
+ roles: roles11,
21
+ props: await getProps('1.1', roles11),
22
+ graphicsRoles: await getRoles('1.1', true),
23
+ },
24
+ };
25
+ }
26
+ function getARIASpecURLByVersion(version, graphicsAria = false) {
27
+ switch (version) {
28
+ case '1.3': {
29
+ if (!graphicsAria) {
30
+ return 'https://w3c.github.io/aria/';
31
+ }
32
+ return 'https://w3c.github.io/graphics-aria/';
33
+ }
34
+ case '1.2': {
35
+ if (!graphicsAria) {
36
+ return 'https://www.w3.org/TR/wai-aria-1.2/';
37
+ }
38
+ return 'https://w3c.github.io/graphics-aria/';
39
+ }
40
+ case '1.1': {
41
+ if (!graphicsAria) {
42
+ return 'https://www.w3.org/TR/wai-aria-1.1/';
43
+ }
44
+ return 'https://www.w3.org/TR/graphics-aria-1.0/';
45
+ }
46
+ }
47
+ }
48
+ async function getRoles(version, graphicsAria = false) {
49
+ const $ = await fetch(getARIASpecURLByVersion(version, graphicsAria));
50
+ const $roleList = $('#role_definitions section.role');
51
+ const roles = [];
52
+ const getAttr = (
53
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
54
+ li) => {
55
+ const $li = $(li);
56
+ const text = $li.text();
57
+ const isDeprecated = /deprecated/i.test(text) || undefined;
58
+ const $a = $li.find('a');
59
+ const name = $a.length > 0
60
+ ? $a
61
+ .text()
62
+ .replace(/\s*\(\s*state\s*\)\s*/i, '')
63
+ .trim()
64
+ : text.trim();
65
+ return {
66
+ name,
67
+ deprecated: isDeprecated,
68
+ };
69
+ };
70
+ $roleList.each((_, el) => {
71
+ const $el = $(el);
72
+ const name = $el.find('.role-name').attr('title')?.trim() ?? '';
73
+ const description = $el
74
+ .find('.role-description p')
75
+ .toArray()
76
+ .map(p => $(p).text().trim().replaceAll(/\s+/g, ' ').replaceAll(/\t+/g, ''))
77
+ .join('\n\n');
78
+ const $features = $el.find('.role-features tr');
79
+ const generalization = $features
80
+ .find('.role-parent a')
81
+ .toArray()
82
+ .map(a => $(a).text().trim());
83
+ const isAbstract = $features.find('.role-abstract').text().trim().toLowerCase() === 'true' || undefined;
84
+ let $ownedRequiredProps = $features.find('.role-required-properties li').toArray();
85
+ if ($ownedRequiredProps.length === 0) {
86
+ $ownedRequiredProps = $features.find('.role-required-properties').toArray();
87
+ }
88
+ const ownedRequiredProps = $ownedRequiredProps.map(getAttr).map(p => ({ ...p, required: true }));
89
+ const ownedInheritedProps = $features
90
+ .find('.role-inherited li')
91
+ .toArray()
92
+ .map(getAttr)
93
+ .map(p => ({ ...p, inherited: true }));
94
+ const ownedProps = $features.find('.role-properties li, .role-properties > a').toArray().map(getAttr);
95
+ const requiredContextRole = $$($features, ['.role-scope li', '.role-scope a'])
96
+ .toArray()
97
+ .map(el => {
98
+ const text = $(el).text().trim();
99
+ if (/owned\s+by|with\s+parent|with\s+accessibility\s+parent/i.test(text)) {
100
+ return text.replaceAll(/([a-z]+)\s+(?:owned\s+by|with\s+parent|with\s+accessibility\s+parent)\s+([a-z]+)/gi, '$2 > $1');
101
+ }
102
+ return text;
103
+ });
104
+ const requiredOwnedElements = $$($features, ['.role-mustcontain li', '.role-mustcontain a'])
105
+ .toArray()
106
+ .map(el => $(el)
107
+ .text()
108
+ .trim()
109
+ .replaceAll(/\s+(?:owning|→|with\s+child|with\s+accessibility\s+child)\s+/gi, ' > '));
110
+ const accessibleNameRequired = !!/true/i.test($features.find('.role-namerequired').text());
111
+ const accessibleNameFromAuthor = !!/author/i.test($features.find('.role-namefrom').text());
112
+ const accessibleNameFromContent = !!/content/i.test($features.find('.role-namefrom').text());
113
+ const accessibleNameProhibited = !!/prohibited/i.test($features.find('.role-namefrom').text());
114
+ const $childrenPresentational = $features.find('.role-childpresentational').text();
115
+ const childrenPresentational = /true/i.test($childrenPresentational)
116
+ ? true
117
+ : /false/i.test($childrenPresentational)
118
+ ? false
119
+ : undefined;
120
+ const ownedProperties = arrayUnique([...ownedRequiredProps, ...ownedInheritedProps, ...ownedProps].sort(nameCompare));
121
+ const prohibitedProperties = $features
122
+ .find('.role-disallowed li code')
123
+ .toArray()
124
+ .map(el => $(el).text().trim());
125
+ roles.push({
126
+ name,
127
+ description,
128
+ isAbstract,
129
+ generalization,
130
+ requiredContextRole,
131
+ requiredOwnedElements,
132
+ accessibleNameRequired,
133
+ accessibleNameFromAuthor,
134
+ accessibleNameFromContent,
135
+ accessibleNameProhibited,
136
+ childrenPresentational,
137
+ ownedProperties,
138
+ prohibitedProperties,
139
+ });
140
+ });
141
+ // the "none" role is synonym
142
+ if (version === '1.1' || version === '1.2') {
143
+ const presentationRole = roles.find(role => role.name === 'presentation');
144
+ if (presentationRole) {
145
+ const noneRoleIndex = roles.findIndex(role => role.name === 'none');
146
+ roles[noneRoleIndex] = {
147
+ ...presentationRole,
148
+ name: 'none',
149
+ description: roles[noneRoleIndex]?.description,
150
+ };
151
+ }
152
+ }
153
+ else {
154
+ const noneRole = roles.find(role => role.name === 'none');
155
+ if (noneRole) {
156
+ const noneRoleIndex = roles.findIndex(role => role.name === 'presentation');
157
+ roles[noneRoleIndex] = {
158
+ ...noneRole,
159
+ name: 'presentation',
160
+ description: roles[noneRoleIndex]?.description,
161
+ };
162
+ }
163
+ }
164
+ roles.sort(nameCompare);
165
+ return roles;
166
+ }
167
+ async function getProps(version, roles) {
168
+ const $ = await fetch(getARIASpecURLByVersion(version));
169
+ const ariaNameList = new Set();
170
+ for (const role of roles) {
171
+ if (role.ownedProperties)
172
+ for (const prop of role.ownedProperties) {
173
+ ariaNameList.add(prop.name);
174
+ }
175
+ }
176
+ const { implicitProps } = await getAriaInHtml();
177
+ const globalStatesAndProperties = new Set($('#global_states li a')
178
+ .toArray()
179
+ .map(el => $(el).attr('href')?.replace('#', ''))
180
+ .filter((s) => !!s));
181
+ const arias = [...ariaNameList].sort().map((name) => {
182
+ const $section = $(`#${name}`);
183
+ const className = $section.attr('class');
184
+ const type = className && /property/i.test(className) ? 'property' : 'state';
185
+ const deprecated = (className && /deprecated/i.test(className)) || undefined;
186
+ const $value = $section.find(`table.${type}-features .${type}-value, .state-features .property-value`);
187
+ const value = $value.text().trim();
188
+ const $valueDescriptions = $section.find('table.value-descriptions tbody tr');
189
+ const valueDescriptions = {};
190
+ $valueDescriptions.each((_, $tr) => {
191
+ const name = $($tr)
192
+ .find('.value-name')
193
+ .text()
194
+ .replaceAll(/\(default\)\s*:?/gi, '')
195
+ .trim();
196
+ const desc = $($tr).find('.value-description').text().trim();
197
+ valueDescriptions[name] = desc;
198
+ });
199
+ const enumValues = [];
200
+ if (value === 'token' || value === 'token list') {
201
+ const values = $valueDescriptions
202
+ .find('.value-name')
203
+ .toArray()
204
+ .map(el => $(el)
205
+ .text()
206
+ .replaceAll(/\(default\)\s*:?/gi, '')
207
+ .trim());
208
+ enumValues.push(...values);
209
+ }
210
+ const $defaultValue = $section.find('table.value-descriptions .value-name .default');
211
+ const defaultValue = $defaultValue
212
+ .text()
213
+ .replaceAll(/\(default\)/gi, '')
214
+ .trim() || undefined;
215
+ const isGlobal = globalStatesAndProperties.has(name) || undefined;
216
+ let equivalentHtmlAttrs;
217
+ const implicitOwnProps = implicitProps.filter(p => p.name === name);
218
+ if (implicitOwnProps.length > 0) {
219
+ equivalentHtmlAttrs = implicitOwnProps.map(attr => ({
220
+ htmlAttrName: attr.htmlAttrName,
221
+ value: attr.value,
222
+ }));
223
+ }
224
+ const aria = {
225
+ name,
226
+ type,
227
+ deprecated,
228
+ value,
229
+ enum: enumValues,
230
+ defaultValue,
231
+ isGlobal,
232
+ equivalentHtmlAttrs,
233
+ valueDescriptions: Object.keys(valueDescriptions).length > 0 ? valueDescriptions : undefined,
234
+ };
235
+ // Conditional Value
236
+ switch (name) {
237
+ case 'aria-checked': {
238
+ aria.value = 'true/false';
239
+ aria.conditionalValue = [
240
+ {
241
+ role: ['checkbox', 'menuitemcheckbox'],
242
+ value: 'tristate',
243
+ },
244
+ ];
245
+ break;
246
+ }
247
+ case 'aria-hidden': {
248
+ if (aria.equivalentHtmlAttrs)
249
+ for (const attr of aria.equivalentHtmlAttrs) {
250
+ if (attr.htmlAttrName === 'hidden') {
251
+ attr.isNotStrictEquivalent = true;
252
+ }
253
+ }
254
+ break;
255
+ }
256
+ }
257
+ return aria;
258
+ });
259
+ arias.sort(nameCompare);
260
+ return arias;
261
+ }
262
+ async function getAriaInHtml() {
263
+ const $ = await fetch('https://www.w3.org/TR/html-aria/');
264
+ const implicitProps = [];
265
+ const $implicitProps = $('#requirements-for-use-of-aria-attributes-in-place-of-equivalent-html-attributes table tbody tr').toArray();
266
+ for (const $implicitProp of $implicitProps) {
267
+ const htmlAttrName = $($implicitProp).find('th:nth-of-type(1) a').eq(0).text();
268
+ if (htmlAttrName === 'contenteditable') {
269
+ // FIXME:
270
+ // Cannot design yet because the contenteditable attribute is evaluated with ancestors.
271
+ continue;
272
+ }
273
+ const implicitProp = $($implicitProp).find('td:nth-of-type(1) code').eq(0).text();
274
+ const [name, _value] = implicitProp.split('=');
275
+ if (!name) {
276
+ continue;
277
+ }
278
+ const value = _value?.replace(/"|'/g, '').trim() ?? null;
279
+ const data = {
280
+ name: name,
281
+ value: value === '...' ? null : value,
282
+ htmlAttrName,
283
+ };
284
+ implicitProps.push(data);
285
+ }
286
+ return {
287
+ implicitProps,
288
+ };
289
+ }
290
+ function $$(
291
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
292
+ $el, selectors) {
293
+ let $found = $el;
294
+ for (const selector of selectors) {
295
+ $found = $el.find(selector);
296
+ if ($found.length > 0) {
297
+ return $found;
298
+ }
299
+ }
300
+ return $found;
301
+ }
package/lib/fetch.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ /// <reference types="cheerio" />
2
+ export declare function fetch(url: string): Promise<cheerio.Root>;
3
+ export declare function fetchText(url: string): Promise<string>;
4
+ export declare function getReferences(): string[];
package/lib/fetch.js ADDED
@@ -0,0 +1,47 @@
1
+ import cheerio from 'cheerio';
2
+ import { Bar, Presets } from 'cli-progress';
3
+ const cache = new Map();
4
+ const domCache = new Map();
5
+ let total = 1;
6
+ let current = 0;
7
+ const bar = new Bar({
8
+ format: '🔎 Fetch references... {bar} {percentage}% | ETA: {eta}s | {value}/{total} {process}',
9
+ }, Presets.shades_grey);
10
+ bar.start(total, current, { process: '🚀 Started.' });
11
+ export async function fetch(url) {
12
+ if (domCache.has(url)) {
13
+ return domCache.get(url);
14
+ }
15
+ const html = await fetchText(url);
16
+ const $ = cheerio.load(html);
17
+ domCache.set(url, $);
18
+ return $;
19
+ }
20
+ export async function fetchText(url) {
21
+ total += 1;
22
+ bar.setTotal(total);
23
+ let text = '';
24
+ if (cache.has(url)) {
25
+ text = cache.get(url);
26
+ }
27
+ else {
28
+ try {
29
+ const res = await global.fetch(url);
30
+ text = await res.text();
31
+ cache.set(url, text);
32
+ }
33
+ catch {
34
+ cache.set(url, '');
35
+ text = '';
36
+ }
37
+ }
38
+ current += 1;
39
+ bar.update(current, { process: `🔗 ${url.length > 30 ? `${url.slice(0, 15)}...${url.slice(-15)}` : url}` });
40
+ return text;
41
+ }
42
+ export function getReferences() {
43
+ current += 1;
44
+ bar.update(current, { process: '🎉 Finished.' });
45
+ bar.stop();
46
+ return [...cache.keys()].sort();
47
+ }
@@ -0,0 +1,3 @@
1
+ export declare function getGlobalAttrs(filePath: string): {
2
+ readonly [category: string]: Readonly<Record<string, Partial<import("@markuplint/ml-spec").Attribute>>>;
3
+ };
@@ -0,0 +1,5 @@
1
+ import { readJson } from './read-json.js';
2
+ export function getGlobalAttrs(filePath) {
3
+ const gAttrs = readJson(filePath);
4
+ return gAttrs;
5
+ }
@@ -0,0 +1,2 @@
1
+ import type { ExtendedElementSpec } from '@markuplint/ml-spec';
2
+ export declare function getElements(filePattern: string): Promise<ExtendedElementSpec[]>;
@@ -0,0 +1,107 @@
1
+ import { readJsons } from './read-json.js';
2
+ import { fetchHTMLElement, fetchObsoleteElements } from './scraping.js';
3
+ import { getSVGElementList } from './svg.js';
4
+ import { getName, nameCompare, sortObjectByKey } from './utils.js';
5
+ /**
6
+ * @see https://html.spec.whatwg.org/multipage/obsolete.html#non-conforming-features
7
+ */
8
+ const obsoleteList = [
9
+ 'applet',
10
+ 'acronym',
11
+ 'bgsound',
12
+ 'dir',
13
+ 'frame',
14
+ 'frameset',
15
+ 'noframes',
16
+ 'isindex',
17
+ 'keygen',
18
+ 'listing',
19
+ 'menuitem',
20
+ 'nextid',
21
+ 'noembed',
22
+ 'param',
23
+ 'plaintext',
24
+ 'rb',
25
+ 'rtc',
26
+ 'strike',
27
+ 'xmp',
28
+ 'basefont',
29
+ 'big',
30
+ 'blink',
31
+ 'center',
32
+ 'font',
33
+ 'marquee',
34
+ 'multicol',
35
+ 'nobr',
36
+ 'spacer',
37
+ 'tt',
38
+ ];
39
+ export async function getElements(filePattern) {
40
+ let specs = await readJsons(filePattern, (file, body) => {
41
+ const name = file.replace(/^.+spec\.([\w-]+)\.json$/i, '$1');
42
+ return {
43
+ // @ts-ignore
44
+ name,
45
+ ...body,
46
+ };
47
+ });
48
+ const deprecatedList = await getSVGElementList();
49
+ const obsoleteElements = fetchObsoleteElements([...obsoleteList, ...deprecatedList], specs);
50
+ specs.push(...obsoleteElements);
51
+ specs = await Promise.all(specs.map(async (el) => {
52
+ const { localName, namespace, ml } = getName(el.name);
53
+ const cite = `https://developer.mozilla.org/en-US/docs/Web/${ml}/Element/${localName}`;
54
+ const mdnData = await fetchHTMLElement(cite);
55
+ // @ts-ignore
56
+ delete el.name;
57
+ // @ts-ignore
58
+ delete el.namespace;
59
+ return {
60
+ // @ts-ignore
61
+ name: namespace === 'http://www.w3.org/2000/svg' ? `svg:${localName}` : localName,
62
+ namespace,
63
+ cite: el.cite ?? mdnData.cite,
64
+ description: mdnData.description,
65
+ categories: mdnData.categories,
66
+ contentModel: el.contentModel,
67
+ aria: el.aria,
68
+ omission: mdnData.omission,
69
+ ...el,
70
+ globalAttrs: sortObjectByKey(el.globalAttrs ?? {}),
71
+ attributes: sortObjectByKey((() => {
72
+ const attrs = { ...el.attributes };
73
+ for (const mdnAttr of Object.values(mdnData.attributes ?? {})) {
74
+ if (!mdnAttr.name) {
75
+ continue;
76
+ }
77
+ const current = attrs[mdnAttr.name];
78
+ if (!current) {
79
+ attrs[mdnAttr.name] = {
80
+ description: mdnAttr.description,
81
+ experimental: mdnAttr.experimental,
82
+ obsolete: mdnAttr.obsolete,
83
+ deprecated: mdnAttr.deprecated,
84
+ nonStandard: mdnAttr.nonStandard,
85
+ };
86
+ continue;
87
+ }
88
+ if (typeof current === 'object' && 'name' in current) {
89
+ attrs[mdnAttr.name] = {
90
+ // @ts-ignore for key order that "name" is first
91
+ name: mdnAttr.name,
92
+ // @ts-ignore for key order that "description" is second
93
+ ...mdnData.attributes,
94
+ // @ts-ignore
95
+ ...current,
96
+ };
97
+ }
98
+ }
99
+ return attrs;
100
+ })()),
101
+ };
102
+ }));
103
+ return specs
104
+ .sort(nameCompare)
105
+ .sort((a, b) => (a.namespace == b.namespace ? 0 : a.namespace === 'http://www.w3.org/2000/svg' ? 1 : -1))
106
+ .filter(spec => spec.name !== 'h1-h6');
107
+ }
package/lib/index.d.ts ADDED
@@ -0,0 +1,7 @@
1
+ export type Options = {
2
+ readonly outputFilePath: string;
3
+ readonly htmlFilePattern: string;
4
+ readonly commonAttrsFilePath: string;
5
+ readonly commonContentsFilePath: string;
6
+ };
7
+ export declare function main({ outputFilePath, htmlFilePattern, commonAttrsFilePath, commonContentsFilePath }: Options): Promise<void>;
package/lib/index.js ADDED
@@ -0,0 +1,27 @@
1
+ import { writeFile } from 'node:fs/promises';
2
+ import { getAria } from './aria.js';
3
+ import { getReferences } from './fetch.js';
4
+ import { getGlobalAttrs } from './global-attrs.js';
5
+ import { getElements } from './html-elements.js';
6
+ import { readJson } from './read-json.js';
7
+ export async function main({ outputFilePath, htmlFilePattern, commonAttrsFilePath, commonContentsFilePath }) {
8
+ const [specs, globalAttrs, aria] = await Promise.all([
9
+ getElements(htmlFilePattern),
10
+ getGlobalAttrs(commonAttrsFilePath),
11
+ getAria(),
12
+ ]);
13
+ const cites = getReferences();
14
+ const json = {
15
+ cites,
16
+ def: {
17
+ '#globalAttrs': globalAttrs,
18
+ '#aria': aria,
19
+ '#contentModels': readJson(commonContentsFilePath).models,
20
+ },
21
+ specs: [...specs],
22
+ };
23
+ const jsonString = JSON.stringify(json, null, 2);
24
+ await writeFile(outputFilePath, jsonString);
25
+ // eslint-disable-next-line no-console
26
+ console.log(`🎁 Output: ${outputFilePath}`);
27
+ }
@@ -0,0 +1,2 @@
1
+ export declare function readJson<T = Record<string, any>>(filePath: string): T;
2
+ export declare function readJsons<T = Record<string, any>>(pattern: string, hook?: (fileName: string, body: T) => T | Promise<T>): Promise<T[]>;
@@ -0,0 +1,22 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { glob } from 'glob';
4
+ import strip from 'strip-json-comments';
5
+ export function readJson(filePath) {
6
+ if (!path.isAbsolute(filePath)) {
7
+ throw new Error(`The path must be absolute path: ${filePath}`);
8
+ }
9
+ let json = fs.readFileSync(filePath, { encoding: 'utf8' });
10
+ json = strip(json);
11
+ return JSON.parse(json);
12
+ }
13
+ export async function readJsons(pattern, hook = (_, body) => body) {
14
+ if (!path.isAbsolute(pattern)) {
15
+ throw new Error(`The pattern must be absolute path: ${pattern}`);
16
+ }
17
+ const files = await glob(pattern);
18
+ return Promise.all(files.map(file => {
19
+ const json = readJson(file);
20
+ return hook(file, json);
21
+ }));
22
+ }
@@ -0,0 +1,4 @@
1
+ import type { ExtendedElementSpec } from '@markuplint/ml-spec';
2
+ export declare function fetchHTMLElementLinks(): Promise<string[]>;
3
+ export declare function fetchObsoleteElements(obsoleteList: readonly string[], specs: readonly ExtendedElementSpec[]): ExtendedElementSpec[];
4
+ export declare function fetchHTMLElement(link: string): Promise<ExtendedElementSpec>;
@@ -0,0 +1,232 @@
1
+ /* global cheerio */
2
+ import { fetch } from './fetch.js';
3
+ import { getThisOutline, sortObjectByKey } from './utils.js';
4
+ const MAIN_ARTICLE_SELECTOR = 'article.main-page-content, article.article';
5
+ export async function fetchHTMLElementLinks() {
6
+ const $ = await fetch('https://developer.mozilla.org/en-US/docs/Web/HTML/Element');
7
+ const $listHeading = $($('#sidebar-quicklinks summary')
8
+ .toArray()
9
+ .find(el => /html elements/i.test($(el).text())));
10
+ const $list = $listHeading.siblings('ol,ul');
11
+ const lists = $list
12
+ .find('li a')
13
+ .toArray()
14
+ .map(el => `https://developer.mozilla.org${$(el).attr('href')}`);
15
+ return lists;
16
+ }
17
+ export function fetchObsoleteElements(obsoleteList, specs) {
18
+ return obsoleteList
19
+ .map(name => {
20
+ const found = specs.find(e => e.name === name);
21
+ if (found) {
22
+ return null;
23
+ }
24
+ return {
25
+ name,
26
+ cite: 'https://html.spec.whatwg.org/multipage/obsolete.html#non-conforming-features',
27
+ obsolete: true,
28
+ categories: [],
29
+ contentModel: {
30
+ contents: true,
31
+ },
32
+ aria: {
33
+ permittedRoles: true,
34
+ implicitRole: false,
35
+ },
36
+ omission: false,
37
+ globalAttrs: {},
38
+ attributes: {},
39
+ };
40
+ })
41
+ .filter((e) => !!e);
42
+ }
43
+ export async function fetchHTMLElement(link) {
44
+ const $ = await fetch(link);
45
+ let name = link.replace(/.+\/([\w-]+)$/, '$1').toLowerCase();
46
+ if (name === 'heading_elements') {
47
+ name = 'h1-h6';
48
+ }
49
+ const $article = $(MAIN_ARTICLE_SELECTOR);
50
+ $article.find('p:empty').remove();
51
+ const description = $article.find('h2#summary').next('div').find('> p:first-of-type').text().trim().replaceAll(/\s+/g, ' ') ||
52
+ $article.find('.seoSummary').closest('p').text().trim().replaceAll(/\s+/g, ' ') ||
53
+ $article.find('h1').next('div').find('> p:first-of-type').text().trim().replaceAll(/\s+/g, ' ') ||
54
+ $article.find('.section-content:eq(0)').find('> p:eq(0)').text().trim().replaceAll(/\s+/g, ' ');
55
+ const $bcTable = $article.find('.bc-table');
56
+ const $bcTableFirstRow = $bcTable.find('tbody tr:first-child th');
57
+ const isBcTableIsAvailable = $bcTableFirstRow.find('code').text().trim() === name;
58
+ let experimental;
59
+ let obsolete;
60
+ let deprecated;
61
+ let nonStandard;
62
+ if (isBcTableIsAvailable) {
63
+ experimental = $bcTableFirstRow.find('.ic-experimental').length > 0 || undefined;
64
+ obsolete = $bcTableFirstRow.find('.ic-obsolete').length > 0 || undefined;
65
+ deprecated = $bcTableFirstRow.find('.ic-deprecated').length > 0 || undefined;
66
+ nonStandard = $bcTableFirstRow.find('.ic-non-standard').length > 0 || undefined;
67
+ }
68
+ else {
69
+ experimental =
70
+ $article.find('.blockIndicator.experimental, > div .notecard.experimental').length > 0 || undefined;
71
+ obsolete =
72
+ $article.find('.obsoleteHeader').length > 0 ||
73
+ !!/obsolete/i.test($('h1').text()) ||
74
+ $article.find('> div:first-child .notecard.obsolete').length > 0 ||
75
+ undefined;
76
+ deprecated =
77
+ $article.find('.deprecatedHeader, > div:first-child .notecard.deprecated').length > 0 ||
78
+ $article.find('h1').next().find('.notecard.deprecated').length > 0 ||
79
+ undefined;
80
+ nonStandard = $article.find('.nonStandardHeader, h4#Non-standard').length > 0 || undefined;
81
+ }
82
+ const categories = [];
83
+ const cat = getProperty($, 'Content categories');
84
+ if (/metadata content/i.test(cat))
85
+ categories.push('#metadata');
86
+ if (/flow content/i.test(cat))
87
+ categories.push('#flow');
88
+ if (/sectioning content/i.test(cat))
89
+ categories.push('#sectioning');
90
+ if (/heading content/i.test(cat))
91
+ categories.push('#heading');
92
+ if (/phrasing content/i.test(cat))
93
+ categories.push('#phrasing');
94
+ if (/embedded content/i.test(cat))
95
+ categories.push('#embedded');
96
+ if (/interactive content/i.test(cat))
97
+ categories.push('#interactive');
98
+ if (/palpable content/i.test(cat))
99
+ categories.push('#palpable');
100
+ if (/script-supporting/i.test(cat))
101
+ categories.push('#script-supporting');
102
+ let { attributes } = getAttributes($, '#attributes', name);
103
+ const { attributes: deprecatedAttributes } = getAttributes($, '#deprecated_attributes', name);
104
+ const { attributes: individualAttributes } = getAttributes($, '#individual_attributes', name);
105
+ const { attributes: nonStandardAttributes } = getAttributes($, '#non-standard_attributes', name);
106
+ const { attributes: obsoleteAttributes } = getAttributes($, '#obsolete_attributes', name);
107
+ attributes = sortObjectByKey({
108
+ ...attributes,
109
+ ...deprecatedAttributes,
110
+ ...individualAttributes,
111
+ ...nonStandardAttributes,
112
+ ...obsoleteAttributes,
113
+ });
114
+ const spec = {
115
+ name,
116
+ cite: link,
117
+ description,
118
+ experimental,
119
+ obsolete,
120
+ deprecated,
121
+ nonStandard,
122
+ categories,
123
+ contentModel: {
124
+ contents: false,
125
+ },
126
+ aria: {
127
+ implicitRole: false,
128
+ permittedRoles: true,
129
+ },
130
+ omission: false,
131
+ attributes,
132
+ };
133
+ return spec;
134
+ }
135
+ function getProperty(
136
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
137
+ $, prop) {
138
+ const $tr = $(MAIN_ARTICLE_SELECTOR).find('table.properties tr') ?? $('#Technical_summary').next('table tr');
139
+ const $th = $($tr
140
+ .find('th')
141
+ .toArray()
142
+ .filter(el => new RegExp(prop, 'i').test($(el).text())));
143
+ return $th.siblings('td').text().trim().replaceAll(/\s+/g, ' ');
144
+ }
145
+ function getAttributes(
146
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
147
+ $, heading, tagName) {
148
+ const $heading = $(heading);
149
+ const $outline = getThisOutline($, $heading);
150
+ const attributes = {};
151
+ for (const dt of $outline.find('> div > dl > dt').toArray()) {
152
+ const $dt = $(dt);
153
+ const name = $dt.find('code').text().trim();
154
+ if (!name) {
155
+ null;
156
+ continue;
157
+ }
158
+ const $myHeading = getItsHeading($dt);
159
+ const experimental = $dt.find('.icon-beaker, .icon.experimental, .icon.icon-experimental').length > 0 || undefined;
160
+ const obsolete = $dt.find('.icon-trash, .icon.obsolete, .icon.icon-obsolete').length > 0 ||
161
+ $dt.find('.obsolete').length > 0 ||
162
+ $myHeading?.attr('id') === 'obsolete_attributes' ||
163
+ undefined;
164
+ const deprecated = $dt.find('.icon-thumbs-down-alt, .icon.deprecated, .icon.icon-deprecated').length > 0 ||
165
+ $myHeading?.attr('id') === 'deprecated_attributes' ||
166
+ undefined;
167
+ const nonStandard = $dt.find('.icon-warning-sign, .icon.non-standard, .icon.icon-nonstandard').length > 0 || undefined;
168
+ const description = $dt
169
+ .next('dd')
170
+ .toArray()
171
+ .map(el => $(el).text())
172
+ .join('')
173
+ .trim()
174
+ .replaceAll(/\s+/g, ' ');
175
+ const current = attributes[name];
176
+ if (!current) {
177
+ attributes[name] = {
178
+ name,
179
+ type: 'Any',
180
+ // @ts-ignore
181
+ description,
182
+ experimental,
183
+ obsolete,
184
+ deprecated,
185
+ nonStandard,
186
+ };
187
+ continue;
188
+ }
189
+ if (typeof current === 'object' && 'name' in current) {
190
+ attributes[name] = {
191
+ // @ts-ignore for key order that "name" is first
192
+ name,
193
+ // @ts-ignore for key order that "description" is second
194
+ description,
195
+ experimental,
196
+ obsolete,
197
+ deprecated,
198
+ nonStandard,
199
+ // @ts-ignore
200
+ ...current,
201
+ };
202
+ }
203
+ }
204
+ return { attributes };
205
+ }
206
+ function getItsHeading(
207
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
208
+ $start) {
209
+ let $needle = upToPrevOrParent($start);
210
+ while ($needle.length > 0) {
211
+ if (isHeading($needle)) {
212
+ return $needle;
213
+ }
214
+ $needle = upToPrevOrParent($needle);
215
+ }
216
+ return null;
217
+ }
218
+ function upToPrevOrParent(
219
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
220
+ $start) {
221
+ let $needle = $start.prev();
222
+ if ($needle.length === 0) {
223
+ $needle = $start.parent();
224
+ }
225
+ return $needle;
226
+ }
227
+ function isHeading(
228
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
229
+ $el) {
230
+ // @ts-ignore
231
+ return /^h[1-6]$/i.test($el[0].tagName);
232
+ }
package/lib/svg.d.ts ADDED
@@ -0,0 +1 @@
1
+ export declare function getSVGElementList(): Promise<string[]>;
package/lib/svg.js ADDED
@@ -0,0 +1,18 @@
1
+ import { fetch } from './fetch.js';
2
+ import { getThisOutline } from './utils.js';
3
+ export async function getSVGElementList() {
4
+ const index = 'https://developer.mozilla.org/en-US/docs/Web/SVG/Element';
5
+ const $ = await fetch(index);
6
+ $('section').each((_, sec) => {
7
+ const $sec = $(sec);
8
+ const children = $sec.children();
9
+ $sec.before(children);
10
+ $sec.remove();
11
+ });
12
+ const $deprecatedIndex = getThisOutline($, $('#obsolete_and_deprecated_elements'));
13
+ const deprecatedList = $deprecatedIndex
14
+ .find('div > a')
15
+ .toArray()
16
+ .map(el => 'svg_' + $(el).text().trim().replaceAll(/<|>/g, ''));
17
+ return deprecatedList;
18
+ }
package/lib/utils.d.ts ADDED
@@ -0,0 +1,16 @@
1
+ /// <reference types="cheerio" />
2
+ type HasName = {
3
+ readonly name: string;
4
+ };
5
+ export declare function nameCompare(a: HasName | string, b: HasName | string): 1 | 0 | -1;
6
+ export declare function sortObjectByKey<T>(o: T): T;
7
+ export declare function arrayUnique<T extends HasName>(array: readonly T[]): T[];
8
+ export declare function getThisOutline($: cheerio.Root, $start: cheerio.Cheerio): cheerio.Cheerio;
9
+ export declare function mergeAttributes<T>(fromDocs: T, fromJSON: T): T;
10
+ export declare function keys<T, K = keyof T>(object: T): K[];
11
+ export declare function getName(origin: string): {
12
+ localName: string;
13
+ namespace: "http://www.w3.org/2000/svg" | undefined;
14
+ ml: string;
15
+ };
16
+ export {};
package/lib/utils.js ADDED
@@ -0,0 +1,73 @@
1
+ /* global cheerio */
2
+ export function nameCompare(a, b) {
3
+ const nameA = typeof a === 'string' ? a : a.name?.toUpperCase() ?? String(a);
4
+ const nameB = typeof b === 'string' ? b : b.name?.toUpperCase() ?? String(b);
5
+ if (nameA < nameB) {
6
+ return -1;
7
+ }
8
+ if (nameA > nameB) {
9
+ return 1;
10
+ }
11
+ return 0;
12
+ }
13
+ export function sortObjectByKey(o) {
14
+ // @ts-ignore
15
+ const keys = Object.keys(o);
16
+ keys.sort(nameCompare);
17
+ // @ts-ignore
18
+ const newObj = {};
19
+ for (const key of keys) {
20
+ // @ts-ignore
21
+ newObj[key] = o[key];
22
+ }
23
+ return newObj;
24
+ }
25
+ export function arrayUnique(array) {
26
+ const nameStack = [];
27
+ const result = [];
28
+ for (const item of array) {
29
+ if (nameStack.includes(item.name)) {
30
+ continue;
31
+ }
32
+ result.push(item);
33
+ nameStack.push(item.name);
34
+ }
35
+ return result;
36
+ }
37
+ export function getThisOutline(
38
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
39
+ $,
40
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
41
+ $start) {
42
+ const $container = $('<div></div>');
43
+ let $next = $start.next();
44
+ const els = [$start.clone()];
45
+ while ($next.length > 0 && $next.filter('h2').length === 0) {
46
+ els.push($next.clone());
47
+ $next = $next.next();
48
+ }
49
+ for (const el of els)
50
+ $container.append(el);
51
+ return $container;
52
+ }
53
+ export function mergeAttributes(fromDocs, fromJSON) {
54
+ return {
55
+ ...fromDocs,
56
+ ...fromJSON,
57
+ };
58
+ }
59
+ export function keys(object) {
60
+ // @ts-ignore
61
+ return Object.keys(object);
62
+ }
63
+ export function getName(origin) {
64
+ const [, ns, localName] = origin.match(/^(?:(svg)_)?(\w+)/i) ?? [];
65
+ const name = localName ?? origin;
66
+ const ml = ns === 'svg' ? 'SVG' : 'HTML';
67
+ const namespace = ns === 'svg' ? 'http://www.w3.org/2000/svg' : undefined;
68
+ return {
69
+ localName: name,
70
+ namespace,
71
+ ml,
72
+ };
73
+ }
package/package.json ADDED
@@ -0,0 +1,39 @@
1
+ {
2
+ "name": "@markuplint/spec-generator",
3
+ "version": "4.0.0-alpha.10",
4
+ "description": "Generates @markuplint/html-spec",
5
+ "repository": "git@github.com:markuplint/markuplint.git",
6
+ "author": "Yusuke Hirao <yusukehirao@me.com>",
7
+ "license": "MIT",
8
+ "private": false,
9
+ "type": "module",
10
+ "exports": {
11
+ ".": {
12
+ "import": "./lib/index.js",
13
+ "types": "./lib/index.d.ts"
14
+ }
15
+ },
16
+ "publishConfig": {
17
+ "access": "public"
18
+ },
19
+ "scripts": {
20
+ "build": "tsc",
21
+ "clean": "tsc --build --clean"
22
+ },
23
+ "dependencies": {
24
+ "@types/cheerio": "^0.22.35",
25
+ "ajv": "^8.12.0",
26
+ "cheerio": "^1.0.0-rc.12",
27
+ "cli-progress": "^3.12.0",
28
+ "fast-xml-parser": "^4.3.3",
29
+ "glob": "^10.3.6",
30
+ "strip-json-comments": "^5.0.1"
31
+ },
32
+ "devDependencies": {
33
+ "@markuplint/ml-spec": "4.0.0-alpha.10",
34
+ "@markuplint/test-tools": "4.0.0-alpha.10",
35
+ "@types/cli-progress": "^3.11.5",
36
+ "type-fest": "^4.10.1"
37
+ },
38
+ "gitHead": "b41153ea665aa8f091daf6114a06047f4ccb8350"
39
+ }