@sorane/core 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/src/build.ts +45 -21
- package/src/catalog.ts +171 -33
- package/src/creative-work-type.ts +28 -0
- package/src/dataset-page.ts +102 -0
- package/src/migrate.ts +2 -2
- package/src/open-data.ts +124 -0
- package/src/site-meta.ts +1 -1
- package/src/ssg.ts +32 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sorane/core",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.7",
|
|
4
4
|
"description": "OKF-native static site build engine for sorane",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@mermaid-js/mermaid-cli": "~11.15.0",
|
|
28
|
-
"@sorane/font": "0.2.
|
|
29
|
-
"@sorane/okf": "0.2.
|
|
30
|
-
"@sorane/search": "0.2.
|
|
28
|
+
"@sorane/font": "0.2.7",
|
|
29
|
+
"@sorane/okf": "0.2.7",
|
|
30
|
+
"@sorane/search": "0.2.7",
|
|
31
31
|
"mermaid": "~11.15.0",
|
|
32
32
|
"rehype-autolink-headings": "^7.1.0",
|
|
33
33
|
"rehype-raw": "^7.0.0",
|
package/src/build.ts
CHANGED
|
@@ -3,6 +3,8 @@ import { emitSearchAssets } from "@sorane/search";
|
|
|
3
3
|
import {
|
|
4
4
|
parseConcept,
|
|
5
5
|
buildBundleEntries,
|
|
6
|
+
isBuildableContentType,
|
|
7
|
+
resolveEffectiveType,
|
|
6
8
|
type ParsedConcept,
|
|
7
9
|
} from "@sorane/okf";
|
|
8
10
|
import {
|
|
@@ -23,7 +25,9 @@ import {
|
|
|
23
25
|
parseAiDisclosure,
|
|
24
26
|
resolveAiDisclosureFlags,
|
|
25
27
|
} from "./ai-disclosure.ts";
|
|
26
|
-
import { buildCatalogJsonLd } from "./catalog.ts";
|
|
28
|
+
import { buildCatalogJsonLd, buildDatasetPageJsonLd } from "./catalog.ts";
|
|
29
|
+
import { resolveCatalogCreativeWorkType } from "./creative-work-type.ts";
|
|
30
|
+
import { renderDatasetPageBody } from "./dataset-page.ts";
|
|
27
31
|
import {
|
|
28
32
|
DEFAULT_DIAGRAMS_CONFIG,
|
|
29
33
|
mergeConfig,
|
|
@@ -31,7 +35,7 @@ import {
|
|
|
31
35
|
type SoraneConfig,
|
|
32
36
|
} from "./config.ts";
|
|
33
37
|
import {
|
|
34
|
-
|
|
38
|
+
buildCreativeWorkJsonLd,
|
|
35
39
|
extractDescription,
|
|
36
40
|
renderArticleBodyWithMetaForConfig,
|
|
37
41
|
renderBlogIndexBody,
|
|
@@ -152,7 +156,7 @@ function isSystemPage(concept: ParsedConcept["concept"]): boolean {
|
|
|
152
156
|
|
|
153
157
|
function isBlogArticle(concept: ParsedConcept["concept"], relPath: string): boolean {
|
|
154
158
|
return (
|
|
155
|
-
concept.type === "article" &&
|
|
159
|
+
resolveEffectiveType(concept.type, concept.profile) === "article" &&
|
|
156
160
|
!isSystemPage(concept) &&
|
|
157
161
|
!isNotFoundSource(relPath) &&
|
|
158
162
|
!isSearchView(concept.frontmatter) &&
|
|
@@ -478,21 +482,23 @@ export async function runBuild(opts: BuildOptions): Promise<BuildResult> {
|
|
|
478
482
|
);
|
|
479
483
|
const staticDirName = config.build.static_dir ?? "static";
|
|
480
484
|
|
|
481
|
-
// --- Phase A:
|
|
485
|
+
// --- Phase A: content pages(article, dataset, reference, glossary, faq)---
|
|
482
486
|
for (const p of parsed) {
|
|
483
487
|
if (
|
|
484
|
-
p.concept.type
|
|
488
|
+
!isBuildableContentType(p.concept.type, p.concept.profile) ||
|
|
485
489
|
isSystemPage(p.concept) ||
|
|
486
490
|
isNotFoundSource(p.relPath)
|
|
487
491
|
) {
|
|
488
492
|
continue;
|
|
489
493
|
}
|
|
490
494
|
|
|
495
|
+
const effectiveType = resolveEffectiveType(p.concept.type, p.concept.profile);
|
|
491
496
|
const slug = slugFromRel(p.relPath);
|
|
492
497
|
const outRel = resolvePermalink(config.build.permalink, slug, p.concept.timestamp);
|
|
493
498
|
const depth = outRel.replace(/\\/g, "/").split("/").length - 1;
|
|
494
499
|
const rootPrefix = depth > 0 ? "../".repeat(depth) : "./";
|
|
495
|
-
const isSearch =
|
|
500
|
+
const isSearch =
|
|
501
|
+
effectiveType === "article" && isSearchView(p.concept.frontmatter);
|
|
496
502
|
const isDocsPage = docsMode && docsHrefSet.has(outRel);
|
|
497
503
|
const nav = isSearch
|
|
498
504
|
? undefined
|
|
@@ -514,7 +520,18 @@ export async function runBuild(opts: BuildOptions): Promise<BuildResult> {
|
|
|
514
520
|
: "";
|
|
515
521
|
let pageDiagrams = emptyDiagramMeta();
|
|
516
522
|
let bodyHtml: string;
|
|
517
|
-
|
|
523
|
+
const canonicalUrl = baseUrl.length > 0 ? `${baseUrl}/${outRel}` : undefined;
|
|
524
|
+
if (effectiveType === "dataset") {
|
|
525
|
+
const section = await renderBodySectionForConfig(
|
|
526
|
+
p.concept.body,
|
|
527
|
+
bodySectionOpts(rootPrefix),
|
|
528
|
+
);
|
|
529
|
+
pageDiagrams = section.diagrams;
|
|
530
|
+
bodyHtml = renderDatasetPageBody(p.concept, section.html, {
|
|
531
|
+
pageUrl: canonicalUrl ?? outRel,
|
|
532
|
+
baseUrl,
|
|
533
|
+
});
|
|
534
|
+
} else if (isSearch) {
|
|
518
535
|
const searchIntro = p.concept.body.trim()
|
|
519
536
|
? await renderBodySectionForConfig(p.concept.body, bodySectionOpts(rootPrefix))
|
|
520
537
|
: undefined;
|
|
@@ -552,7 +569,6 @@ export async function runBuild(opts: BuildOptions): Promise<BuildResult> {
|
|
|
552
569
|
|
|
553
570
|
const updated = frontmatterString(p.concept.frontmatter, "updated");
|
|
554
571
|
const author = frontmatterString(p.concept.frontmatter, "author");
|
|
555
|
-
const canonicalUrl = baseUrl.length > 0 ? `${baseUrl}/${outRel}` : undefined;
|
|
556
572
|
const pageImageRefs = collectMarkdownImageRefs({
|
|
557
573
|
body: p.concept.body,
|
|
558
574
|
sourceMdRel: p.relPath,
|
|
@@ -572,19 +588,26 @@ export async function runBuild(opts: BuildOptions): Promise<BuildResult> {
|
|
|
572
588
|
|
|
573
589
|
const jsonLd = isSearch
|
|
574
590
|
? ""
|
|
575
|
-
:
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
591
|
+
: effectiveType === "dataset"
|
|
592
|
+
? buildDatasetPageJsonLd(
|
|
593
|
+
{ slug, url: canonicalUrl ?? outRel, concept: p.concept },
|
|
594
|
+
pageAiFlags.jsonLd,
|
|
595
|
+
)
|
|
596
|
+
: buildCreativeWorkJsonLd({
|
|
597
|
+
workType: resolveCatalogCreativeWorkType(p.concept, docsMode),
|
|
598
|
+
title: p.concept.title,
|
|
599
|
+
description:
|
|
600
|
+
p.concept.description ?? extractDescription(p.concept.body) ?? undefined,
|
|
601
|
+
url: canonicalUrl ?? outRel,
|
|
602
|
+
datePublished: p.concept.timestamp,
|
|
603
|
+
dateModified: updated ?? p.concept.timestamp,
|
|
604
|
+
author,
|
|
605
|
+
siteTitle: config.site.title,
|
|
606
|
+
lang: config.site.lang,
|
|
607
|
+
aiDisclosure:
|
|
608
|
+
pageAiFlags.jsonLd && aiDisclosure ? aiDisclosure : undefined,
|
|
609
|
+
associatedMedia: associatedMedia.length > 0 ? associatedMedia : undefined,
|
|
610
|
+
});
|
|
588
611
|
|
|
589
612
|
const fontCss = await fontCssFor(p.concept, rootPrefix, bodyHtml);
|
|
590
613
|
const headerSearch = headerSearchFor(rootPrefix, {
|
|
@@ -963,6 +986,7 @@ export async function runBuild(opts: BuildOptions): Promise<BuildResult> {
|
|
|
963
986
|
join(outDir, "catalog.jsonld"),
|
|
964
987
|
buildCatalogJsonLd(catalogInputs, config.site.title, baseUrl, {
|
|
965
988
|
machineReadable: siteAiFlags.machineReadable,
|
|
989
|
+
docsMode,
|
|
966
990
|
}),
|
|
967
991
|
"utf8",
|
|
968
992
|
);
|
package/src/catalog.ts
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
import type { OkfConcept } from "@sorane/okf";
|
|
2
|
+
import { resolveEffectiveType } from "@sorane/okf";
|
|
2
3
|
import { parseAiDisclosure } from "./ai-disclosure.ts";
|
|
4
|
+
import {
|
|
5
|
+
isDatasetCatalogEntry,
|
|
6
|
+
resolveCatalogCreativeWorkType,
|
|
7
|
+
} from "./creative-work-type.ts";
|
|
8
|
+
import {
|
|
9
|
+
parseDistributions,
|
|
10
|
+
parsePublisher,
|
|
11
|
+
resolveDistributionUrl,
|
|
12
|
+
resolveLicenseUrl,
|
|
13
|
+
resolveMediaType,
|
|
14
|
+
} from "./open-data.ts";
|
|
3
15
|
|
|
4
16
|
export interface CatalogEntry {
|
|
5
17
|
readonly slug: string;
|
|
@@ -7,46 +19,151 @@ export interface CatalogEntry {
|
|
|
7
19
|
readonly concept: OkfConcept;
|
|
8
20
|
}
|
|
9
21
|
|
|
22
|
+
export interface CatalogPublisher {
|
|
23
|
+
readonly name: string;
|
|
24
|
+
readonly url?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface BuildCatalogOptions {
|
|
28
|
+
readonly machineReadable?: boolean;
|
|
29
|
+
readonly docsMode?: boolean;
|
|
30
|
+
readonly publisher?: CatalogPublisher;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function markdownDistribution(pageUrl: string): Record<string, unknown> {
|
|
34
|
+
return {
|
|
35
|
+
"@type": "DataDownload",
|
|
36
|
+
encodingFormat: "text/markdown",
|
|
37
|
+
contentUrl: pageUrl.replace(/\.html$/, ".md"),
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function applyAiDisclosure(
|
|
42
|
+
target: Record<string, unknown>,
|
|
43
|
+
concept: OkfConcept,
|
|
44
|
+
machineReadable: boolean,
|
|
45
|
+
): void {
|
|
46
|
+
const disclosure = machineReadable ? parseAiDisclosure(concept.frontmatter) : null;
|
|
47
|
+
if (!disclosure) return;
|
|
48
|
+
target.digitalSourceType = disclosure.digitalSourceType;
|
|
49
|
+
if (disclosure.systems?.length) {
|
|
50
|
+
const kw = (target.keywords as string[] | undefined) ?? [];
|
|
51
|
+
target.keywords = [
|
|
52
|
+
...kw,
|
|
53
|
+
...disclosure.systems.map((s) => `ai-system:${s.name}`),
|
|
54
|
+
];
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function buildDatasetNode(
|
|
59
|
+
e: CatalogEntry,
|
|
60
|
+
machineReadable: boolean,
|
|
61
|
+
): Record<string, unknown> {
|
|
62
|
+
const concept = e.concept;
|
|
63
|
+
const dataset: Record<string, unknown> = {
|
|
64
|
+
"@type": "Dataset",
|
|
65
|
+
"@id": e.url,
|
|
66
|
+
name: concept.title,
|
|
67
|
+
keywords: [resolveEffectiveType(concept.type, concept.profile), ...(concept.tags ?? [])],
|
|
68
|
+
};
|
|
69
|
+
if (concept.description) dataset.description = concept.description;
|
|
70
|
+
if (concept.timestamp) dataset.dateModified = concept.timestamp;
|
|
71
|
+
if (concept.resource) dataset.url = concept.resource;
|
|
72
|
+
|
|
73
|
+
const identifier = concept.frontmatter.identifier;
|
|
74
|
+
if (typeof identifier === "string" && identifier.length > 0) {
|
|
75
|
+
dataset.identifier = identifier;
|
|
76
|
+
}
|
|
77
|
+
const language = concept.frontmatter.language;
|
|
78
|
+
if (typeof language === "string" && language.length > 0) {
|
|
79
|
+
dataset.inLanguage = language;
|
|
80
|
+
}
|
|
81
|
+
const theme = concept.frontmatter.theme;
|
|
82
|
+
if (typeof theme === "string" && theme.length > 0) {
|
|
83
|
+
const kw = (dataset.keywords as string[]) ?? [];
|
|
84
|
+
dataset.keywords = [...kw, `theme:${theme}`];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const licenseRaw = concept.frontmatter.license;
|
|
88
|
+
if (typeof licenseRaw === "string" && licenseRaw.length > 0) {
|
|
89
|
+
dataset.license = resolveLicenseUrl(licenseRaw);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const publisher = parsePublisher(concept.frontmatter.publisher);
|
|
93
|
+
if (publisher) {
|
|
94
|
+
const org: Record<string, unknown> = {
|
|
95
|
+
"@type": "Organization",
|
|
96
|
+
name: publisher.name,
|
|
97
|
+
};
|
|
98
|
+
if (publisher.url) org.url = publisher.url;
|
|
99
|
+
dataset.publisher = org;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const distributions = parseDistributions(concept.frontmatter.distributions);
|
|
103
|
+
const downloads: Record<string, unknown>[] = distributions.map((d) => {
|
|
104
|
+
const contentUrl = resolveDistributionUrl(d.accessURL, "", e.url);
|
|
105
|
+
const node: Record<string, unknown> = {
|
|
106
|
+
"@type": "DataDownload",
|
|
107
|
+
name: d.title,
|
|
108
|
+
encodingFormat: resolveMediaType(d.format),
|
|
109
|
+
contentUrl,
|
|
110
|
+
};
|
|
111
|
+
if (d.byteSize !== undefined) node.contentSize = d.byteSize;
|
|
112
|
+
return node;
|
|
113
|
+
});
|
|
114
|
+
downloads.push(markdownDistribution(e.url));
|
|
115
|
+
dataset.distribution = downloads;
|
|
116
|
+
|
|
117
|
+
applyAiDisclosure(dataset, concept, machineReadable);
|
|
118
|
+
return dataset;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function buildCreativeWorkNode(
|
|
122
|
+
e: CatalogEntry,
|
|
123
|
+
docsMode: boolean,
|
|
124
|
+
machineReadable: boolean,
|
|
125
|
+
): Record<string, unknown> {
|
|
126
|
+
const workType = resolveCatalogCreativeWorkType(e.concept, docsMode);
|
|
127
|
+
const node: Record<string, unknown> = {
|
|
128
|
+
"@type": workType,
|
|
129
|
+
"@id": e.url,
|
|
130
|
+
name: e.concept.title,
|
|
131
|
+
keywords: [
|
|
132
|
+
resolveEffectiveType(e.concept.type, e.concept.profile),
|
|
133
|
+
...(e.concept.tags ?? []),
|
|
134
|
+
],
|
|
135
|
+
distribution: [markdownDistribution(e.url)],
|
|
136
|
+
};
|
|
137
|
+
if (e.concept.description) node.description = e.concept.description;
|
|
138
|
+
if (e.concept.timestamp) node.dateModified = e.concept.timestamp;
|
|
139
|
+
if (e.concept.resource) node.url = e.concept.resource;
|
|
140
|
+
applyAiDisclosure(node, e.concept, machineReadable);
|
|
141
|
+
return node;
|
|
142
|
+
}
|
|
143
|
+
|
|
10
144
|
export function buildCatalogJsonLd(
|
|
11
145
|
entries: readonly CatalogEntry[],
|
|
12
146
|
siteTitle: string,
|
|
13
147
|
baseUrl: string,
|
|
14
|
-
opts?:
|
|
148
|
+
opts?: BuildCatalogOptions,
|
|
15
149
|
): string {
|
|
16
150
|
const machineReadable = opts?.machineReadable !== false;
|
|
17
|
-
const
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
],
|
|
30
|
-
};
|
|
31
|
-
if (e.concept.description) dataset.description = e.concept.description;
|
|
32
|
-
if (e.concept.timestamp) dataset.dateModified = e.concept.timestamp;
|
|
33
|
-
const disclosure = machineReadable
|
|
34
|
-
? parseAiDisclosure(e.concept.frontmatter)
|
|
35
|
-
: null;
|
|
36
|
-
if (disclosure) {
|
|
37
|
-
dataset.digitalSourceType = disclosure.digitalSourceType;
|
|
38
|
-
if (disclosure.systems?.length) {
|
|
39
|
-
const kw = dataset.keywords as string[];
|
|
40
|
-
dataset.keywords = [
|
|
41
|
-
...kw,
|
|
42
|
-
...disclosure.systems.map((s) => `ai-system:${s.name}`),
|
|
43
|
-
];
|
|
44
|
-
}
|
|
151
|
+
const docsMode = opts?.docsMode === true;
|
|
152
|
+
|
|
153
|
+
const datasets: Record<string, unknown>[] = [];
|
|
154
|
+
const hasPart: Record<string, unknown>[] = [];
|
|
155
|
+
|
|
156
|
+
for (const e of entries) {
|
|
157
|
+
if (isDatasetCatalogEntry(e.concept)) {
|
|
158
|
+
datasets.push(buildDatasetNode(e, machineReadable));
|
|
159
|
+
} else {
|
|
160
|
+
const effective = resolveEffectiveType(e.concept.type, e.concept.profile);
|
|
161
|
+
if (effective === "index") continue;
|
|
162
|
+
hasPart.push(buildCreativeWorkNode(e, docsMode, machineReadable));
|
|
45
163
|
}
|
|
46
|
-
|
|
47
|
-
});
|
|
164
|
+
}
|
|
48
165
|
|
|
49
|
-
const catalog = {
|
|
166
|
+
const catalog: Record<string, unknown> = {
|
|
50
167
|
"@context": {
|
|
51
168
|
"@vocab": "https://schema.org/",
|
|
52
169
|
dcat: "http://www.w3.org/ns/dcat#",
|
|
@@ -54,8 +171,29 @@ export function buildCatalogJsonLd(
|
|
|
54
171
|
"@type": "DataCatalog",
|
|
55
172
|
name: siteTitle,
|
|
56
173
|
url: baseUrl.length > 0 ? baseUrl : undefined,
|
|
57
|
-
dataset: graph,
|
|
58
174
|
};
|
|
59
175
|
|
|
176
|
+
if (opts?.publisher) {
|
|
177
|
+
const org: Record<string, unknown> = {
|
|
178
|
+
"@type": "Organization",
|
|
179
|
+
name: opts.publisher.name,
|
|
180
|
+
};
|
|
181
|
+
if (opts.publisher.url) org.url = opts.publisher.url;
|
|
182
|
+
catalog.publisher = org;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (datasets.length > 0) catalog.dataset = datasets;
|
|
186
|
+
if (hasPart.length > 0) catalog.hasPart = hasPart;
|
|
187
|
+
|
|
60
188
|
return JSON.stringify(catalog, null, 2) + "\n";
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/** Per-page Dataset JSON-LD (`<script>` HTML). */
|
|
192
|
+
export function buildDatasetPageJsonLd(
|
|
193
|
+
entry: CatalogEntry,
|
|
194
|
+
machineReadable = true,
|
|
195
|
+
): string {
|
|
196
|
+
const node = buildDatasetNode(entry, machineReadable);
|
|
197
|
+
node["@context"] = "https://schema.org";
|
|
198
|
+
return `<script type="application/ld+json">${JSON.stringify(node)}</script>`;
|
|
61
199
|
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { OkfConcept } from "@sorane/okf";
|
|
2
|
+
import { resolveEffectiveType } from "@sorane/okf";
|
|
3
|
+
|
|
4
|
+
export type CatalogCreativeWorkType =
|
|
5
|
+
| "BlogPosting"
|
|
6
|
+
| "TechArticle"
|
|
7
|
+
| "FAQPage"
|
|
8
|
+
| "DefinedTermSet";
|
|
9
|
+
|
|
10
|
+
export function resolveCatalogCreativeWorkType(
|
|
11
|
+
concept: OkfConcept,
|
|
12
|
+
docsMode: boolean,
|
|
13
|
+
): CatalogCreativeWorkType {
|
|
14
|
+
const effective = resolveEffectiveType(concept.type, concept.profile);
|
|
15
|
+
if (effective === "reference") return "TechArticle";
|
|
16
|
+
if (effective === "faq") return "FAQPage";
|
|
17
|
+
if (effective === "glossary") return "DefinedTermSet";
|
|
18
|
+
|
|
19
|
+
const override = concept.frontmatter.creativeWorkType;
|
|
20
|
+
if (override === "TechArticle" || override === "BlogPosting") {
|
|
21
|
+
return override;
|
|
22
|
+
}
|
|
23
|
+
return docsMode ? "TechArticle" : "BlogPosting";
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function isDatasetCatalogEntry(concept: OkfConcept): boolean {
|
|
27
|
+
return resolveEffectiveType(concept.type, concept.profile) === "dataset";
|
|
28
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import type { OkfConcept } from "@sorane/okf";
|
|
2
|
+
import {
|
|
3
|
+
parseDistributions,
|
|
4
|
+
parsePublisher,
|
|
5
|
+
resolveDistributionUrl,
|
|
6
|
+
resolveLicenseUrl,
|
|
7
|
+
type DistributionRef,
|
|
8
|
+
} from "./open-data.ts";
|
|
9
|
+
import { escapeHtml } from "./render.ts";
|
|
10
|
+
|
|
11
|
+
function formatBytes(n: number): string {
|
|
12
|
+
if (n < 1024) return `${n} B`;
|
|
13
|
+
if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
|
|
14
|
+
return `${(n / (1024 * 1024)).toFixed(1)} MB`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function distributionRow(
|
|
18
|
+
dist: DistributionRef,
|
|
19
|
+
pageUrl: string,
|
|
20
|
+
baseUrl: string,
|
|
21
|
+
): string {
|
|
22
|
+
const href = resolveDistributionUrl(dist.accessURL, baseUrl, pageUrl);
|
|
23
|
+
const size =
|
|
24
|
+
dist.byteSize !== undefined ? ` <span class="dataset-size">(${formatBytes(dist.byteSize)})</span>` : "";
|
|
25
|
+
const checksum = dist.checksum
|
|
26
|
+
? `<div class="dataset-checksum"><code>${escapeHtml(dist.checksum)}</code></div>`
|
|
27
|
+
: "";
|
|
28
|
+
return (
|
|
29
|
+
`<tr>` +
|
|
30
|
+
`<td>${escapeHtml(dist.title)}</td>` +
|
|
31
|
+
`<td><code>${escapeHtml(dist.format)}</code></td>` +
|
|
32
|
+
`<td><a href="${escapeHtml(href)}">${escapeHtml(href)}</a>${size}${checksum}</td>` +
|
|
33
|
+
`</tr>`
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** Dataset landing: metadata block + distribution table + rendered body. */
|
|
38
|
+
export function renderDatasetPageBody(
|
|
39
|
+
concept: OkfConcept,
|
|
40
|
+
bodyHtml: string,
|
|
41
|
+
opts: { readonly pageUrl: string; readonly baseUrl: string },
|
|
42
|
+
): string {
|
|
43
|
+
const licenseRaw = concept.frontmatter.license;
|
|
44
|
+
const license =
|
|
45
|
+
typeof licenseRaw === "string" && licenseRaw.length > 0
|
|
46
|
+
? resolveLicenseUrl(licenseRaw)
|
|
47
|
+
: undefined;
|
|
48
|
+
const publisher = parsePublisher(concept.frontmatter.publisher);
|
|
49
|
+
const distributions = parseDistributions(concept.frontmatter.distributions);
|
|
50
|
+
const identifier = concept.frontmatter.identifier;
|
|
51
|
+
const theme = concept.frontmatter.theme;
|
|
52
|
+
const language = concept.frontmatter.language;
|
|
53
|
+
|
|
54
|
+
const meta: string[] = [];
|
|
55
|
+
if (license) {
|
|
56
|
+
meta.push(
|
|
57
|
+
`<p class="dataset-meta"><strong>License:</strong> <a href="${escapeHtml(license)}">${escapeHtml(String(licenseRaw))}</a></p>`,
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
if (publisher) {
|
|
61
|
+
const pub =
|
|
62
|
+
publisher.url !== undefined
|
|
63
|
+
? `<a href="${escapeHtml(publisher.url)}">${escapeHtml(publisher.name)}</a>`
|
|
64
|
+
: escapeHtml(publisher.name);
|
|
65
|
+
meta.push(`<p class="dataset-meta"><strong>Publisher:</strong> ${pub}</p>`);
|
|
66
|
+
}
|
|
67
|
+
if (typeof identifier === "string" && identifier.length > 0) {
|
|
68
|
+
meta.push(
|
|
69
|
+
`<p class="dataset-meta"><strong>Identifier:</strong> <code>${escapeHtml(identifier)}</code></p>`,
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
if (typeof theme === "string" && theme.length > 0) {
|
|
73
|
+
meta.push(`<p class="dataset-meta"><strong>Theme:</strong> <code>${escapeHtml(theme)}</code></p>`);
|
|
74
|
+
}
|
|
75
|
+
if (typeof language === "string" && language.length > 0) {
|
|
76
|
+
meta.push(`<p class="dataset-meta"><strong>Language:</strong> <code>${escapeHtml(language)}</code></p>`);
|
|
77
|
+
}
|
|
78
|
+
if (concept.resource) {
|
|
79
|
+
meta.push(
|
|
80
|
+
`<p class="dataset-meta"><strong>Resource:</strong> <a href="${escapeHtml(concept.resource)}">${escapeHtml(concept.resource)}</a></p>`,
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const table =
|
|
85
|
+
distributions.length > 0
|
|
86
|
+
? `<section class="dataset-distributions" aria-labelledby="dataset-distributions-heading">` +
|
|
87
|
+
`<h2 id="dataset-distributions-heading">Distributions</h2>` +
|
|
88
|
+
`<table class="dataset-table"><thead><tr><th>Name</th><th>Format</th><th>Access</th></tr></thead><tbody>` +
|
|
89
|
+
distributions
|
|
90
|
+
.map((d) => distributionRow(d, opts.pageUrl, opts.baseUrl))
|
|
91
|
+
.join("") +
|
|
92
|
+
`</tbody></table></section>`
|
|
93
|
+
: "";
|
|
94
|
+
|
|
95
|
+
return (
|
|
96
|
+
`<div class="dataset-landing">` +
|
|
97
|
+
meta.join("") +
|
|
98
|
+
table +
|
|
99
|
+
`</div>` +
|
|
100
|
+
`<div class="dataset-body">${bodyHtml}</div>`
|
|
101
|
+
);
|
|
102
|
+
}
|
package/src/migrate.ts
CHANGED
|
@@ -10,7 +10,7 @@ function slugFromPath(filePath: string): string {
|
|
|
10
10
|
return base.replace(/\.md$/i, "");
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
const SUPPORTED_BUMP_PROFILES = new Set(["0.1", "0.2"]);
|
|
13
|
+
const SUPPORTED_BUMP_PROFILES = new Set(["0.1", "0.2", "0.3"]);
|
|
14
14
|
|
|
15
15
|
export interface MigrateToOkfOptions {
|
|
16
16
|
readonly bumpProfile?: string;
|
|
@@ -54,7 +54,7 @@ export function parseBumpProfileArg(argv: readonly string[]): string | undefined
|
|
|
54
54
|
if (i < 0 || i + 1 >= argv.length) return undefined;
|
|
55
55
|
const version = argv[i + 1]!.trim();
|
|
56
56
|
if (!SUPPORTED_BUMP_PROFILES.has(version)) {
|
|
57
|
-
throw new Error(`unsupported --bump-profile version: ${version} (supported: 0.1, 0.2)`);
|
|
57
|
+
throw new Error(`unsupported --bump-profile version: ${version} (supported: 0.1, 0.2, 0.3)`);
|
|
58
58
|
}
|
|
59
59
|
return version;
|
|
60
60
|
}
|
package/src/open-data.ts
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
export interface PublisherRef {
|
|
2
|
+
readonly name: string;
|
|
3
|
+
readonly url?: string;
|
|
4
|
+
readonly email?: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export interface DistributionRef {
|
|
8
|
+
readonly title: string;
|
|
9
|
+
readonly format: string;
|
|
10
|
+
readonly accessURL: string;
|
|
11
|
+
readonly downloadURL?: string;
|
|
12
|
+
readonly byteSize?: number;
|
|
13
|
+
readonly checksum?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const SPDX_LICENSE_URL: Record<string, string> = {
|
|
17
|
+
"CC-BY-4.0": "https://creativecommons.org/licenses/by/4.0/",
|
|
18
|
+
"CC-BY-3.0": "https://creativecommons.org/licenses/by/3.0/",
|
|
19
|
+
"CC0-1.0": "https://creativecommons.org/publicdomain/zero/1.0/",
|
|
20
|
+
"EUPL-1.2": "https://interoperable.europe.eu/collection/eupl/eupl-text-eupl-12",
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const FORMAT_MEDIA: Record<string, string> = {
|
|
24
|
+
csv: "text/csv",
|
|
25
|
+
tsv: "text/tab-separated-values",
|
|
26
|
+
json: "application/json",
|
|
27
|
+
jsonld: "application/ld+json",
|
|
28
|
+
xml: "application/xml",
|
|
29
|
+
pdf: "application/pdf",
|
|
30
|
+
md: "text/markdown",
|
|
31
|
+
html: "text/html",
|
|
32
|
+
parquet: "application/vnd.apache.parquet",
|
|
33
|
+
xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export function resolveLicenseUrl(license: string): string {
|
|
37
|
+
const trimmed = license.trim();
|
|
38
|
+
if (/^https?:\/\//i.test(trimmed)) return trimmed;
|
|
39
|
+
return SPDX_LICENSE_URL[trimmed] ?? trimmed;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function resolveMediaType(format: string): string {
|
|
43
|
+
const key = format.trim().toLowerCase();
|
|
44
|
+
if (FORMAT_MEDIA[key]) return FORMAT_MEDIA[key]!;
|
|
45
|
+
if (key.includes("/")) return key;
|
|
46
|
+
return key;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function parsePublisher(
|
|
50
|
+
raw: unknown,
|
|
51
|
+
): PublisherRef | undefined {
|
|
52
|
+
if (raw === null || typeof raw !== "object" || Array.isArray(raw)) {
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
const name = (raw as { name?: unknown }).name;
|
|
56
|
+
if (typeof name !== "string" || name.length === 0) return undefined;
|
|
57
|
+
const url = (raw as { url?: unknown }).url;
|
|
58
|
+
const email = (raw as { email?: unknown }).email;
|
|
59
|
+
return {
|
|
60
|
+
name,
|
|
61
|
+
url: typeof url === "string" && url.length > 0 ? url : undefined,
|
|
62
|
+
email: typeof email === "string" && email.length > 0 ? email : undefined,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function parseDistributions(
|
|
67
|
+
raw: unknown,
|
|
68
|
+
): readonly DistributionRef[] {
|
|
69
|
+
if (!Array.isArray(raw)) return [];
|
|
70
|
+
const out: DistributionRef[] = [];
|
|
71
|
+
for (const item of raw) {
|
|
72
|
+
if (item === null || typeof item !== "object" || Array.isArray(item)) continue;
|
|
73
|
+
const title = (item as { title?: unknown }).title;
|
|
74
|
+
const format = (item as { format?: unknown }).format;
|
|
75
|
+
const accessURL = (item as { accessURL?: unknown }).accessURL;
|
|
76
|
+
if (
|
|
77
|
+
typeof title !== "string" ||
|
|
78
|
+
title.length === 0 ||
|
|
79
|
+
typeof format !== "string" ||
|
|
80
|
+
format.length === 0 ||
|
|
81
|
+
typeof accessURL !== "string" ||
|
|
82
|
+
accessURL.length === 0
|
|
83
|
+
) {
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
const downloadURL = (item as { downloadURL?: unknown }).downloadURL;
|
|
87
|
+
const byteSize = (item as { byteSize?: unknown }).byteSize;
|
|
88
|
+
const checksum = (item as { checksum?: unknown }).checksum;
|
|
89
|
+
out.push({
|
|
90
|
+
title,
|
|
91
|
+
format,
|
|
92
|
+
accessURL,
|
|
93
|
+
downloadURL:
|
|
94
|
+
typeof downloadURL === "string" && downloadURL.length > 0
|
|
95
|
+
? downloadURL
|
|
96
|
+
: undefined,
|
|
97
|
+
byteSize:
|
|
98
|
+
typeof byteSize === "number" && Number.isFinite(byteSize) && byteSize >= 0
|
|
99
|
+
? byteSize
|
|
100
|
+
: undefined,
|
|
101
|
+
checksum:
|
|
102
|
+
typeof checksum === "string" && checksum.length > 0 ? checksum : undefined,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
return out;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/** Resolve distribution URL against site base (absolute URLs unchanged). */
|
|
109
|
+
export function resolveDistributionUrl(
|
|
110
|
+
accessURL: string,
|
|
111
|
+
baseUrl: string,
|
|
112
|
+
pageUrl: string,
|
|
113
|
+
): string {
|
|
114
|
+
const trimmed = accessURL.trim();
|
|
115
|
+
if (/^https?:\/\//i.test(trimmed)) return trimmed;
|
|
116
|
+
if (trimmed.startsWith("/")) {
|
|
117
|
+
return baseUrl.length > 0 ? `${baseUrl}${trimmed}` : trimmed;
|
|
118
|
+
}
|
|
119
|
+
try {
|
|
120
|
+
return new URL(trimmed, pageUrl).href;
|
|
121
|
+
} catch {
|
|
122
|
+
return trimmed;
|
|
123
|
+
}
|
|
124
|
+
}
|
package/src/site-meta.ts
CHANGED
|
@@ -110,7 +110,7 @@ export function buildLlmsTxt(opts: LlmsTxtOptions): string {
|
|
|
110
110
|
"## Machine-readable",
|
|
111
111
|
"",
|
|
112
112
|
`- [OKF bundle](${abs("okf/bundle.tar.gz")}): all concepts as {type}/{slug}.md`,
|
|
113
|
-
`- [
|
|
113
|
+
`- [Site catalog](${abs("catalog.jsonld")}): open datasets in \`dataset[]\`; other pages in \`hasPart[]\``,
|
|
114
114
|
`- [Sitemap](${abs("sitemap.xml")})`,
|
|
115
115
|
];
|
|
116
116
|
if (opts.diagramsEnabled) {
|
package/src/ssg.ts
CHANGED
|
@@ -334,7 +334,14 @@ function articleNavHtml(nav?: ArticleNav): string {
|
|
|
334
334
|
return `<nav class="article-nav" aria-label="記事">${parts.join(" · ")}</nav>`;
|
|
335
335
|
}
|
|
336
336
|
|
|
337
|
-
export
|
|
337
|
+
export type PageCreativeWorkType =
|
|
338
|
+
| "BlogPosting"
|
|
339
|
+
| "TechArticle"
|
|
340
|
+
| "FAQPage"
|
|
341
|
+
| "DefinedTermSet";
|
|
342
|
+
|
|
343
|
+
export function buildCreativeWorkJsonLd(opts: {
|
|
344
|
+
workType: PageCreativeWorkType;
|
|
338
345
|
title: string;
|
|
339
346
|
description?: string;
|
|
340
347
|
url: string;
|
|
@@ -346,13 +353,19 @@ export function buildBlogPostingJsonLd(opts: {
|
|
|
346
353
|
aiDisclosure?: AiDisclosure;
|
|
347
354
|
associatedMedia?: readonly AssociatedMediaItem[];
|
|
348
355
|
}): string {
|
|
356
|
+
const isPartOf =
|
|
357
|
+
opts.workType === "BlogPosting"
|
|
358
|
+
? { "@type": "Blog", name: opts.siteTitle }
|
|
359
|
+
: { "@type": "WebSite", name: opts.siteTitle };
|
|
360
|
+
|
|
349
361
|
const data: Record<string, unknown> = {
|
|
350
362
|
"@context": "https://schema.org",
|
|
351
|
-
"@type":
|
|
363
|
+
"@type": opts.workType,
|
|
352
364
|
headline: opts.title,
|
|
365
|
+
name: opts.title,
|
|
353
366
|
url: opts.url,
|
|
354
367
|
inLanguage: opts.lang,
|
|
355
|
-
isPartOf
|
|
368
|
+
isPartOf,
|
|
356
369
|
};
|
|
357
370
|
if (opts.description) data.description = opts.description;
|
|
358
371
|
if (opts.datePublished) data.datePublished = opts.datePublished;
|
|
@@ -368,6 +381,22 @@ export function buildBlogPostingJsonLd(opts: {
|
|
|
368
381
|
return `<script type="application/ld+json">${JSON.stringify(data)}</script>`;
|
|
369
382
|
}
|
|
370
383
|
|
|
384
|
+
/** @deprecated Use buildCreativeWorkJsonLd({ workType: "BlogPosting", ... }) */
|
|
385
|
+
export function buildBlogPostingJsonLd(opts: {
|
|
386
|
+
title: string;
|
|
387
|
+
description?: string;
|
|
388
|
+
url: string;
|
|
389
|
+
datePublished?: string;
|
|
390
|
+
dateModified?: string;
|
|
391
|
+
author?: string;
|
|
392
|
+
siteTitle: string;
|
|
393
|
+
lang: string;
|
|
394
|
+
aiDisclosure?: AiDisclosure;
|
|
395
|
+
associatedMedia?: readonly AssociatedMediaItem[];
|
|
396
|
+
}): string {
|
|
397
|
+
return buildCreativeWorkJsonLd({ ...opts, workType: "BlogPosting" });
|
|
398
|
+
}
|
|
399
|
+
|
|
371
400
|
const SERIF_FONT_STYLES = new Set(["GJM", "serif", "mincho"]);
|
|
372
401
|
|
|
373
402
|
/** frontmatter の font スタイルが明朝指定なら class を返す。 */
|