@glossarist/concept-browser 0.7.43 → 0.7.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/index.mjs +12 -13
- package/package.json +3 -2
- package/scripts/__tests__/fetch-datasets.test.mjs +105 -0
- package/scripts/fetch-datasets.mjs +53 -51
- package/scripts/generate-data.mjs +41 -19
- package/scripts/lib/build/image-assets.mjs +190 -0
- package/scripts/lib/build/non-verbal-consumer.mjs +221 -0
- package/scripts/lib/local-path-safety.mjs +68 -0
- package/src/__tests__/bibliography-adapter.test.ts +79 -0
- package/src/__tests__/content-renderer-nvr-mentions.test.ts +57 -0
- package/src/__tests__/locale.test.ts +46 -0
- package/src/__tests__/model-bridge-entity-refs.test.ts +114 -0
- package/src/__tests__/non-verbal-anchor.test.ts +33 -0
- package/src/__tests__/non-verbal-cross-ref.test.ts +146 -0
- package/src/__tests__/non-verbal-highlight.test.ts +56 -0
- package/src/__tests__/non-verbal-kind.test.ts +77 -0
- package/src/__tests__/non-verbal-list.test.ts +67 -0
- package/src/__tests__/non-verbal-rep-display.test.ts +85 -0
- package/src/__tests__/non-verbal-scroll-guard.test.ts +116 -0
- package/src/__tests__/use-concept-entities.test.ts +76 -0
- package/src/adapters/bibliography-adapter.ts +49 -0
- package/src/adapters/factory.ts +14 -0
- package/src/adapters/model-bridge.ts +51 -0
- package/src/adapters/non-verbal/figure-bridge.ts +101 -0
- package/src/adapters/non-verbal/formula-bridge.ts +48 -0
- package/src/adapters/non-verbal/index.ts +55 -0
- package/src/adapters/non-verbal/kind.ts +46 -0
- package/src/adapters/non-verbal/prefix.ts +67 -0
- package/src/adapters/non-verbal/source-bridge.ts +81 -0
- package/src/adapters/non-verbal/table-bridge.ts +98 -0
- package/src/adapters/non-verbal/types.ts +133 -0
- package/src/adapters/non-verbal-resolver.ts +101 -0
- package/src/components/ConceptDetail.vue +17 -4
- package/src/components/LanguageDetail.vue +0 -3
- package/src/components/NonVerbalRepDisplay.vue +82 -24
- package/src/components/figure/FigureDisplay.vue +132 -0
- package/src/components/figure/FigureImages.vue +111 -0
- package/src/components/figure/figure-image-pick.ts +56 -0
- package/src/components/figure/figure-layout.ts +26 -0
- package/src/components/formula/FormulaDisplay.vue +90 -0
- package/src/components/formula/FormulaExpression.vue +70 -0
- package/src/components/non-verbal/NonVerbalCaption.vue +104 -0
- package/src/components/non-verbal/NonVerbalFallback.vue +69 -0
- package/src/components/non-verbal/NonVerbalList.vue +118 -0
- package/src/components/non-verbal/NonVerbalSources.vue +61 -0
- package/src/components/table/TableDisplay.vue +99 -0
- package/src/components/table/TableMarkup.vue +63 -0
- package/src/components/table/TableStructured.vue +66 -0
- package/src/composables/use-concept-entities.ts +70 -0
- package/src/composables/use-non-verbal-cross-ref.ts +79 -0
- package/src/composables/use-non-verbal-entity.ts +58 -0
- package/src/composables/use-reduced-motion.ts +26 -0
- package/src/composables/use-render-options.ts +30 -33
- package/src/router/index.ts +3 -0
- package/src/router/non-verbal-scroll-guard.ts +56 -0
- package/src/style.css +17 -0
- package/src/utils/content-renderer.ts +76 -64
- package/src/utils/locale.ts +92 -0
- package/src/utils/non-verbal-anchor.ts +51 -0
- package/src/utils/non-verbal-highlight.ts +27 -0
package/cli/index.mjs
CHANGED
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
*/
|
|
21
21
|
|
|
22
22
|
import { loadSiteConfig } from '../scripts/load-site-config.mjs';
|
|
23
|
-
import {
|
|
23
|
+
import { existsSync } from 'fs';
|
|
24
24
|
import { resolve, dirname } from 'path';
|
|
25
25
|
import { fileURLToPath } from 'url';
|
|
26
26
|
|
|
@@ -166,22 +166,21 @@ Environment:
|
|
|
166
166
|
}
|
|
167
167
|
}
|
|
168
168
|
|
|
169
|
-
// Run vite build using the package's vite.config.ts
|
|
169
|
+
// Run vite build using the package's vite.config.ts via programmatic API
|
|
170
170
|
console.log(`\n=== BUILD SPA ===\n`);
|
|
171
171
|
const viteConfig = resolve(pkgRoot, 'vite.config.ts');
|
|
172
|
-
const
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
env: { ...process.env },
|
|
172
|
+
const { build: viteBuild } = await import('vite');
|
|
173
|
+
await viteBuild({
|
|
174
|
+
configFile: viteConfig,
|
|
175
|
+
root: pkgRoot,
|
|
176
|
+
mode: 'production',
|
|
178
177
|
});
|
|
179
178
|
|
|
180
|
-
// Run postbuild (404 page)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
}
|
|
179
|
+
// Run postbuild (404 page) via dynamic import
|
|
180
|
+
const postbuild = resolve(pkgRoot, 'scripts', 'generate-404.js');
|
|
181
|
+
if (existsSync(postbuild)) {
|
|
182
|
+
await import(`file://${postbuild}`);
|
|
183
|
+
}
|
|
185
184
|
|
|
186
185
|
return;
|
|
187
186
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@glossarist/concept-browser",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.45",
|
|
4
4
|
"description": "Vue SPA for browsing Glossarist terminology datasets with cross-reference resolution, graph visualization, and multi-language support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -25,8 +25,9 @@
|
|
|
25
25
|
"autoprefixer": "^10.4.21",
|
|
26
26
|
"d3": "^7.9.0",
|
|
27
27
|
"favicons": "^7.2.0",
|
|
28
|
-
"glossarist": "^0.
|
|
28
|
+
"glossarist": "^0.4.0",
|
|
29
29
|
"js-yaml": "^4.1.0",
|
|
30
|
+
"jszip": "^3.10.1",
|
|
30
31
|
"pinia": "^2.3.1",
|
|
31
32
|
"postcss": "^8.5.3",
|
|
32
33
|
"sharp": "^0.34.5",
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
import { assertLocalPathSafe } from '../lib/local-path-safety.mjs';
|
|
6
|
+
|
|
7
|
+
function makeTmpTree() {
|
|
8
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'cb-fetch-'));
|
|
9
|
+
const datasetsDir = path.join(root, '.datasets');
|
|
10
|
+
const sourceDir = path.join(root, 'source-data');
|
|
11
|
+
fs.mkdirSync(datasetsDir);
|
|
12
|
+
fs.mkdirSync(path.join(sourceDir, 'concepts'), { recursive: true });
|
|
13
|
+
fs.writeFileSync(path.join(sourceDir, 'concepts', 'a.yaml'), 'termid: 1\n');
|
|
14
|
+
return { root, datasetsDir, sourceDir };
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
describe('assertLocalPathSafe', () => {
|
|
18
|
+
let tree;
|
|
19
|
+
|
|
20
|
+
beforeEach(() => { tree = makeTmpTree(); });
|
|
21
|
+
afterEach(() => {
|
|
22
|
+
fs.rmSync(tree.root, { recursive: true, force: true });
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('returns resolved path for a safe external location', () => {
|
|
26
|
+
const resolved = assertLocalPathSafe('foo', tree.sourceDir, {
|
|
27
|
+
root: tree.root,
|
|
28
|
+
datasetsDir: tree.datasetsDir,
|
|
29
|
+
});
|
|
30
|
+
// Returns the realpath (symlinks resolved); on macOS tmpdir resolves
|
|
31
|
+
// /var → /private/var, so compare against realpath, not path.resolve.
|
|
32
|
+
expect(resolved).toBe(fs.realpathSync(path.resolve(tree.root, tree.sourceDir)));
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('throws when localPath does not exist', () => {
|
|
36
|
+
expect(() =>
|
|
37
|
+
assertLocalPathSafe('foo', path.join(tree.root, 'nope'), {
|
|
38
|
+
root: tree.root,
|
|
39
|
+
datasetsDir: tree.datasetsDir,
|
|
40
|
+
})
|
|
41
|
+
).toThrow(/does not exist/);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('throws when localPath equals .datasets/<id>', () => {
|
|
45
|
+
const staged = path.join(tree.datasetsDir, 'foo');
|
|
46
|
+
fs.mkdirSync(staged, { recursive: true });
|
|
47
|
+
expect(() =>
|
|
48
|
+
assertLocalPathSafe('foo', staged, {
|
|
49
|
+
root: tree.root,
|
|
50
|
+
datasetsDir: tree.datasetsDir,
|
|
51
|
+
})
|
|
52
|
+
).toThrow(/same physical location/);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('throws when localPath is nested inside .datasets/<id>', () => {
|
|
56
|
+
const staged = path.join(tree.datasetsDir, 'foo');
|
|
57
|
+
fs.mkdirSync(path.join(staged, 'subdir'), { recursive: true });
|
|
58
|
+
expect(() =>
|
|
59
|
+
assertLocalPathSafe('foo', path.join(staged, 'subdir'), {
|
|
60
|
+
root: tree.root,
|
|
61
|
+
datasetsDir: tree.datasetsDir,
|
|
62
|
+
})
|
|
63
|
+
).toThrow(/nested inside/);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('throws when localPath contains .datasets/<id> (parent-of-staging hazard)', () => {
|
|
67
|
+
// localPath = root itself, datasetsDir = root/.datasets — staging ops
|
|
68
|
+
// (rm -rf .datasets/<id>) would touch files inside localPath.
|
|
69
|
+
expect(() =>
|
|
70
|
+
assertLocalPathSafe('foo', tree.root, {
|
|
71
|
+
root: tree.root,
|
|
72
|
+
datasetsDir: tree.datasetsDir,
|
|
73
|
+
})
|
|
74
|
+
).toThrow(/contains .datasets/);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('throws when localPath is a symlink to .datasets/<id> (the reported bug)', () => {
|
|
78
|
+
const staged = path.join(tree.datasetsDir, 'foo');
|
|
79
|
+
fs.mkdirSync(staged, { recursive: true });
|
|
80
|
+
const symlinkPath = path.join(tree.root, 'evil-link');
|
|
81
|
+
fs.symlinkSync(staged, symlinkPath);
|
|
82
|
+
expect(() =>
|
|
83
|
+
assertLocalPathSafe('foo', symlinkPath, {
|
|
84
|
+
root: tree.root,
|
|
85
|
+
datasetsDir: tree.datasetsDir,
|
|
86
|
+
})
|
|
87
|
+
).toThrow(/same physical location/);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('does NOT modify the source directory (regression for data-loss bug)', () => {
|
|
91
|
+
const sentinel = path.join(tree.sourceDir, 'concepts', 'SENTINEL.yaml');
|
|
92
|
+
fs.writeFileSync(sentinel, 'termid: sentinel\n');
|
|
93
|
+
const beforeMtime = fs.statSync(sentinel).mtimeMs;
|
|
94
|
+
|
|
95
|
+
assertLocalPathSafe('foo', tree.sourceDir, {
|
|
96
|
+
root: tree.root,
|
|
97
|
+
datasetsDir: tree.datasetsDir,
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// Source directory must be completely untouched after the safety check.
|
|
101
|
+
expect(fs.existsSync(sentinel)).toBe(true);
|
|
102
|
+
expect(fs.statSync(sentinel).mtimeMs).toBe(beforeMtime);
|
|
103
|
+
expect(fs.readdirSync(path.join(tree.sourceDir, 'concepts'))).toContain('SENTINEL.yaml');
|
|
104
|
+
});
|
|
105
|
+
});
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* fetch-datasets.mjs — Load datasets from .gcr files or
|
|
3
|
+
* fetch-datasets.mjs — Load datasets from .gcr files, local paths, or git repos.
|
|
4
4
|
*
|
|
5
5
|
* Reads site config (via load-site-config.mjs), for each dataset:
|
|
6
6
|
* 1. If .gcr/{id}.gcr exists, extract to .datasets/{id}/
|
|
7
7
|
* 2. Else download from gcrPackage URL and extract
|
|
8
|
-
* 3. Else
|
|
8
|
+
* 3. Else if localPath is set, use it in-place (NO copy, NO staging)
|
|
9
|
+
* 4. Else clone/update source repo into .datasets/{id}/
|
|
9
10
|
*
|
|
10
11
|
* After fetching, validates that all GCR dependencies are satisfiable
|
|
11
12
|
* (either provided locally or routed externally).
|
|
12
13
|
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
14
|
+
* No shell commands. All file ops use Node fs; ZIP uses JSZip; git uses
|
|
15
|
+
* execFileSync with array args (no shell interpolation).
|
|
15
16
|
*/
|
|
16
17
|
import fs from 'fs';
|
|
17
18
|
import path from 'path';
|
|
19
|
+
import JSZip from 'jszip';
|
|
18
20
|
import { loadGcr } from 'glossarist';
|
|
19
|
-
import {
|
|
21
|
+
import { execFileSync } from 'child_process';
|
|
20
22
|
import { loadSiteConfig } from './load-site-config.mjs';
|
|
23
|
+
import { assertLocalPathSafe } from './lib/local-path-safety.mjs';
|
|
21
24
|
|
|
22
25
|
const ROOT = process.cwd();
|
|
23
26
|
const DATASETS_DIR = path.join(ROOT, '.datasets');
|
|
@@ -39,23 +42,29 @@ async function downloadGcr(url, destPath) {
|
|
|
39
42
|
console.log(` Saved to ${destPath} (${(buf.length / 1024).toFixed(0)} KB)`);
|
|
40
43
|
}
|
|
41
44
|
|
|
42
|
-
// --- GCR extraction ---
|
|
43
|
-
function extractGcr(gcrPath, targetDir) {
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
// --- GCR extraction (pure JSZip; no shell, cross-platform) ---
|
|
46
|
+
async function extractGcr(gcrPath, targetDir) {
|
|
47
|
+
const targetAbs = path.resolve(targetDir);
|
|
48
|
+
if (fs.existsSync(targetAbs)) {
|
|
49
|
+
fs.rmSync(targetAbs, { recursive: true, force: true });
|
|
46
50
|
}
|
|
47
|
-
fs.mkdirSync(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
51
|
+
fs.mkdirSync(targetAbs, { recursive: true });
|
|
52
|
+
|
|
53
|
+
const buf = fs.readFileSync(gcrPath);
|
|
54
|
+
const zip = await JSZip.loadAsync(buf);
|
|
55
|
+
const entries = Object.values(zip.files);
|
|
56
|
+
for (const entry of entries) {
|
|
57
|
+
if (entry.dir) continue;
|
|
58
|
+
// zip-slip guard: refuse entries that escape targetDir
|
|
59
|
+
const dest = path.resolve(targetAbs, entry.name);
|
|
60
|
+
if (dest !== targetAbs && !dest.startsWith(targetAbs + path.sep)) {
|
|
61
|
+
throw new Error(`Refusing to extract entry outside target dir: ${entry.name}`);
|
|
56
62
|
}
|
|
63
|
+
fs.mkdirSync(path.dirname(dest), { recursive: true });
|
|
64
|
+
const content = await entry.async('nodebuffer');
|
|
65
|
+
fs.writeFileSync(dest, content);
|
|
57
66
|
}
|
|
58
|
-
console.log(` Extracted to ${
|
|
67
|
+
console.log(` Extracted to ${targetAbs}`);
|
|
59
68
|
}
|
|
60
69
|
|
|
61
70
|
// --- Read GCR metadata from ZIP without extraction ---
|
|
@@ -96,7 +105,7 @@ function validateDependencies(config, gcrMetadata) {
|
|
|
96
105
|
return errors;
|
|
97
106
|
}
|
|
98
107
|
|
|
99
|
-
// --- Git operations ---
|
|
108
|
+
// --- Git operations (execFileSync with array args — no shell) ---
|
|
100
109
|
function cloneOrUpdate(sourceRepo, targetDir) {
|
|
101
110
|
const env = { ...process.env };
|
|
102
111
|
let repoUrl = sourceRepo;
|
|
@@ -104,24 +113,28 @@ function cloneOrUpdate(sourceRepo, targetDir) {
|
|
|
104
113
|
repoUrl = sourceRepo.replace('https://', `https://x-access-token:${env.GITHUB_TOKEN}@`);
|
|
105
114
|
}
|
|
106
115
|
|
|
107
|
-
|
|
116
|
+
const targetAbs = path.resolve(targetDir);
|
|
117
|
+
|
|
118
|
+
if (fs.existsSync(path.join(targetAbs, '.git'))) {
|
|
108
119
|
console.log(` Updating existing clone...`);
|
|
109
120
|
try {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
121
|
+
execFileSync('git', ['fetch', 'origin'], { cwd: targetAbs, stdio: 'pipe', env });
|
|
122
|
+
execFileSync('git', ['reset', '--hard', 'origin/HEAD'], { cwd: targetAbs, stdio: 'pipe', env });
|
|
123
|
+
execFileSync('git', ['clean', '-fd'], { cwd: targetAbs, stdio: 'pipe', env });
|
|
113
124
|
} catch {
|
|
114
125
|
console.warn(` git update failed, re-cloning`);
|
|
115
|
-
fs.rmSync(
|
|
116
|
-
|
|
126
|
+
fs.rmSync(targetAbs, { recursive: true, force: true });
|
|
127
|
+
execFileSync('git', ['clone', '--depth', '1', repoUrl, targetAbs], { stdio: 'pipe', env });
|
|
117
128
|
}
|
|
118
129
|
} else {
|
|
119
|
-
fs.mkdirSync(
|
|
130
|
+
fs.mkdirSync(targetAbs, { recursive: true });
|
|
120
131
|
console.log(` Cloning ${sourceRepo}...`);
|
|
121
|
-
|
|
132
|
+
execFileSync('git', ['clone', '--depth', '1', repoUrl, targetAbs], { stdio: 'pipe', env });
|
|
122
133
|
}
|
|
123
134
|
}
|
|
124
135
|
|
|
136
|
+
// --- localPath safety check: see scripts/lib/local-path-safety.mjs ---
|
|
137
|
+
|
|
125
138
|
// --- Main ---
|
|
126
139
|
console.log('Fetching glossarist datasets...\n');
|
|
127
140
|
|
|
@@ -137,7 +150,7 @@ for (const ds of config.datasets) {
|
|
|
137
150
|
try {
|
|
138
151
|
if (fs.existsSync(gcrPath)) {
|
|
139
152
|
console.log(` Using local .gcr/${ds.id}.gcr`);
|
|
140
|
-
extractGcr(gcrPath, targetDir);
|
|
153
|
+
await extractGcr(gcrPath, targetDir);
|
|
141
154
|
} else if (ds.gcrPackage) {
|
|
142
155
|
console.log(` Using GCR package: ${ds.gcrPackage}`);
|
|
143
156
|
try {
|
|
@@ -148,29 +161,18 @@ for (const ds of config.datasets) {
|
|
|
148
161
|
console.log();
|
|
149
162
|
continue;
|
|
150
163
|
}
|
|
151
|
-
extractGcr(gcrPath, targetDir);
|
|
164
|
+
await extractGcr(gcrPath, targetDir);
|
|
165
|
+
} else if (ds.localPath) {
|
|
166
|
+
// localPath means "data is here, use in-place." No copy, no staging.
|
|
167
|
+
// generate-data.mjs reads from localPath directly via datasetDir(ds).
|
|
168
|
+
const localResolved = assertLocalPathSafe(ds.id, ds.localPath);
|
|
169
|
+
console.log(` Using localPath in-place: ${localResolved}`);
|
|
170
|
+
} else if (ds.sourceRepo) {
|
|
171
|
+
cloneOrUpdate(ds.sourceRepo, targetDir);
|
|
152
172
|
} else {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
if (!fs.existsSync(targetDir)) fs.mkdirSync(targetDir, { recursive: true });
|
|
157
|
-
const localConcepts = path.join(envOverride, 'concepts');
|
|
158
|
-
const targetConcepts = path.join(targetDir, 'concepts');
|
|
159
|
-
if (fs.existsSync(localConcepts)) {
|
|
160
|
-
if (fs.existsSync(targetConcepts)) fs.rmSync(targetConcepts, { recursive: true, force: true });
|
|
161
|
-
execSync(`cp -r "${localConcepts}" "${targetConcepts}"`, { stdio: 'pipe' });
|
|
162
|
-
}
|
|
163
|
-
const registerYaml = path.join(envOverride, 'register.yaml');
|
|
164
|
-
if (fs.existsSync(registerYaml)) {
|
|
165
|
-
fs.copyFileSync(registerYaml, path.join(targetDir, 'register.yaml'));
|
|
166
|
-
}
|
|
167
|
-
} else if (ds.sourceRepo) {
|
|
168
|
-
cloneOrUpdate(ds.sourceRepo, targetDir);
|
|
169
|
-
} else {
|
|
170
|
-
console.warn(` No source configured, skipping`);
|
|
171
|
-
console.log();
|
|
172
|
-
continue;
|
|
173
|
-
}
|
|
173
|
+
console.warn(` No source configured, skipping`);
|
|
174
|
+
console.log();
|
|
175
|
+
continue;
|
|
174
176
|
}
|
|
175
177
|
|
|
176
178
|
// Read metadata for dependency validation (from GCR ZIP, not extracted dir)
|
|
@@ -4,11 +4,25 @@ import yaml from 'js-yaml';
|
|
|
4
4
|
import { naturalSort, Register, parseMention } from 'glossarist';
|
|
5
5
|
import { loadSiteConfig } from './load-site-config.mjs';
|
|
6
6
|
import { getGroups } from './lib/concept-groups.mjs';
|
|
7
|
+
import { consumeDatasetEntities } from './lib/build/non-verbal-consumer.mjs';
|
|
8
|
+
import { copyImageAssets } from './lib/build/image-assets.mjs';
|
|
7
9
|
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
|
8
10
|
const ROOT = process.cwd();
|
|
9
11
|
const PUBLIC = path.join(ROOT, 'public');
|
|
10
12
|
const DATA = path.join(PUBLIC, 'data');
|
|
11
13
|
|
|
14
|
+
/**
|
|
15
|
+
* Resolve a dataset's source directory.
|
|
16
|
+
* - If `ds.localPath` is set, use it in-place (resolved against ROOT).
|
|
17
|
+
* No staging, no copy. fetch-datasets.mjs verifies the path is safe.
|
|
18
|
+
* - Otherwise fall back to the standard .datasets/<id>/ staging dir.
|
|
19
|
+
*/
|
|
20
|
+
function datasetDir(ds) {
|
|
21
|
+
return ds.localPath
|
|
22
|
+
? path.resolve(ROOT, ds.localPath)
|
|
23
|
+
: path.join(ROOT, '.datasets', ds.id);
|
|
24
|
+
}
|
|
25
|
+
|
|
12
26
|
const DS_PALETTE = [
|
|
13
27
|
'#3366ff', '#0d9488', '#d97706', '#8b5cf6',
|
|
14
28
|
'#ec4899', '#059669', '#dc2626', '#6366f1',
|
|
@@ -934,7 +948,8 @@ function processDataset(dir, register, opts) {
|
|
|
934
948
|
}
|
|
935
949
|
|
|
936
950
|
// Copy bulk format files from compiled/ directory (full GCR)
|
|
937
|
-
const
|
|
951
|
+
const sourceRoot = path.dirname(dir);
|
|
952
|
+
const compiledDir = path.join(sourceRoot, 'compiled');
|
|
938
953
|
const bulkFormats = [];
|
|
939
954
|
if (fs.existsSync(compiledDir)) {
|
|
940
955
|
for (const file of fs.readdirSync(compiledDir)) {
|
|
@@ -993,27 +1008,34 @@ function processDataset(dir, register, opts) {
|
|
|
993
1008
|
writeJson(path.join(DATA, register, 'manifest.json'), manifest);
|
|
994
1009
|
|
|
995
1010
|
// Copy bibliography.yaml → bibliography.json
|
|
996
|
-
const bibPath = path.join(
|
|
1011
|
+
const bibPath = path.join(sourceRoot, 'bibliography.yaml');
|
|
997
1012
|
if (fs.existsSync(bibPath)) {
|
|
998
1013
|
const bibData = readYaml(bibPath);
|
|
999
1014
|
writeJson(path.join(DATA, register, 'bibliography.json'), bibData);
|
|
1000
|
-
|
|
1015
|
+
const bibCount = Array.isArray(bibData?.bibliography) ? bibData.bibliography.length : 0;
|
|
1016
|
+
console.log(` Copied bibliography (${bibCount} entries)`);
|
|
1001
1017
|
}
|
|
1002
1018
|
|
|
1003
|
-
// Copy images/
|
|
1004
|
-
const imagesSrcDir = path.join(
|
|
1019
|
+
// Copy images/ with magic-byte validation + manifest emission.
|
|
1020
|
+
const imagesSrcDir = path.join(sourceRoot, 'images');
|
|
1005
1021
|
if (fs.existsSync(imagesSrcDir) && fs.statSync(imagesSrcDir).isDirectory()) {
|
|
1006
1022
|
const imagesDestDir = path.join(DATA, register, 'images');
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
for (const
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1023
|
+
const result = await copyImageAssets(imagesSrcDir, imagesDestDir);
|
|
1024
|
+
console.log(` Copied ${result.count} images (skipped ${result.skipped.length})`);
|
|
1025
|
+
for (const w of result.skipped) {
|
|
1026
|
+
console.warn(` Warning: skipped image ${w}`);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
// Consume non-verbal entities (figures/tables/formulas) — JSON-LD preferred,
|
|
1031
|
+
// YAML fallback. Writes per-entity JSON + indexes.
|
|
1032
|
+
const nvResult = await consumeDatasetEntities(sourceRoot, path.join(DATA, register));
|
|
1033
|
+
const nvTotal = nvResult.figures + nvResult.tables + nvResult.formulas;
|
|
1034
|
+
if (nvTotal > 0) {
|
|
1035
|
+
console.log(` Consumed ${nvResult.figures} figures, ${nvResult.tables} tables, ${nvResult.formulas} formulas`);
|
|
1036
|
+
for (const w of nvResult.warnings) {
|
|
1037
|
+
console.warn(` Warning: ${w}`);
|
|
1015
1038
|
}
|
|
1016
|
-
console.log(` Copied ${imgCount} images`);
|
|
1017
1039
|
}
|
|
1018
1040
|
|
|
1019
1041
|
console.log(` Generated ${concepts.length} concepts, manifest, ${chunks.length} index chunks`);
|
|
@@ -1030,8 +1052,8 @@ const registerCache = {};
|
|
|
1030
1052
|
|
|
1031
1053
|
// Pre-load all register.yaml files (needed before buildRefMaps for URI pattern indexing)
|
|
1032
1054
|
for (const ds of config.datasets) {
|
|
1033
|
-
const
|
|
1034
|
-
const registerYamlPath = path.join(
|
|
1055
|
+
const dsDir = datasetDir(ds);
|
|
1056
|
+
const registerYamlPath = path.join(dsDir, 'register.yaml');
|
|
1035
1057
|
if (fs.existsSync(registerYamlPath)) {
|
|
1036
1058
|
try {
|
|
1037
1059
|
const raw = yaml.load(fs.readFileSync(registerYamlPath, 'utf8'));
|
|
@@ -1047,7 +1069,7 @@ const refMaps = buildRefMaps(config, registerCache);
|
|
|
1047
1069
|
for (let i = 0; i < config.datasets.length; i++) {
|
|
1048
1070
|
const ds = config.datasets[i];
|
|
1049
1071
|
|
|
1050
|
-
const dir = path.join(
|
|
1072
|
+
const dir = path.join(datasetDir(ds), 'concepts');
|
|
1051
1073
|
if (!fs.existsSync(dir)) {
|
|
1052
1074
|
console.warn(`Skipping ${ds.id}: source directory not found (${dir})`);
|
|
1053
1075
|
console.warn(` Run: npm run fetch-datasets`);
|
|
@@ -1089,8 +1111,8 @@ for (let i = 0; i < config.datasets.length; i++) {
|
|
|
1089
1111
|
status: ds.editionStatus || reg?.status,
|
|
1090
1112
|
ordering: reg?.ordering || null,
|
|
1091
1113
|
sections: reg?.sections ? reg.sections.map(s => s.toJSON()) : [],
|
|
1092
|
-
hasBibliography: fs.existsSync(path.join(
|
|
1093
|
-
hasImages: fs.existsSync(path.join(
|
|
1114
|
+
hasBibliography: fs.existsSync(path.join(datasetDir(ds), 'bibliography.yaml')),
|
|
1115
|
+
hasImages: fs.existsSync(path.join(datasetDir(ds), 'images')),
|
|
1094
1116
|
});
|
|
1095
1117
|
registry.push({
|
|
1096
1118
|
id: ds.id,
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image asset pipeline — pure Node, no shell commands.
|
|
3
|
+
*
|
|
4
|
+
* Copies image bytes from the source directory to the runtime data
|
|
5
|
+
* directory, validates magic bytes for known formats, parses intrinsic
|
|
6
|
+
* dimensions for raster formats, and emits `images-manifest.json` with
|
|
7
|
+
* SHA-256 hashes + dimensions.
|
|
8
|
+
*
|
|
9
|
+
* If the source directory contains its own `manifest.json`, that file is
|
|
10
|
+
* reused as the SSOT (single source of truth) and only missing entries
|
|
11
|
+
* are computed. This lets glossarist-ruby ship a manifest if it wants to.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { createHash } from 'node:crypto';
|
|
15
|
+
import { readFile, writeFile, mkdir, readdir, stat, copyFile } from 'node:fs/promises';
|
|
16
|
+
import path from 'node:path';
|
|
17
|
+
|
|
18
|
+
/** @typedef {{ src: string, format: string, sha256: string, width?: number, height?: number, bytes: number }} ImageManifestEntry */
|
|
19
|
+
|
|
20
|
+
export const SUPPORTED_FORMATS = new Set(['svg', 'png', 'jpg', 'jpeg', 'gif', 'webp', 'avif']);
|
|
21
|
+
|
|
22
|
+
const MAGIC_BYTES = {
|
|
23
|
+
png: [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A],
|
|
24
|
+
jpg: [0xFF, 0xD8, 0xFF],
|
|
25
|
+
gif: [0x47, 0x49, 0x46, 0x38],
|
|
26
|
+
webp: [0x52, 0x49, 0x46, 0x46], // "RIFF" — full match needs bytes 8-11 = "WEBP"
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
const FILENAME_RE = /^[a-z0-9._-]+$/i;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Validate the file's magic bytes against its declared extension.
|
|
33
|
+
* Returns the canonical format (e.g. "jpg" for ".jpeg"), or null if
|
|
34
|
+
* validation fails.
|
|
35
|
+
*
|
|
36
|
+
* @param {Buffer} buf
|
|
37
|
+
* @param {string} ext
|
|
38
|
+
* @returns {string | null}
|
|
39
|
+
*/
|
|
40
|
+
export function detectFormat(buf, ext) {
|
|
41
|
+
const e = ext.toLowerCase();
|
|
42
|
+
if (e === 'svg') return buf.includes('<svg') || buf.includes('<SVG') ? 'svg' : null;
|
|
43
|
+
if (e === 'png' || e === 'jpg' || e === 'jpeg' || e === 'gif' || e === 'webp') {
|
|
44
|
+
const png = MAGIC_BYTES.png;
|
|
45
|
+
if (e === 'png' && buf.length >= png.length && png.every((b, i) => buf[i] === b)) return 'png';
|
|
46
|
+
const jpg = MAGIC_BYTES.jpg;
|
|
47
|
+
if ((e === 'jpg' || e === 'jpeg') && buf.length >= jpg.length && jpg.every((b, i) => buf[i] === b)) return 'jpg';
|
|
48
|
+
const gif = MAGIC_BYTES.gif;
|
|
49
|
+
if (e === 'gif' && buf.length >= gif.length && gif.every((b, i) => buf[i] === b)) return 'gif';
|
|
50
|
+
if (e === 'webp' && buf.length >= 12 && buf.toString('ascii', 0, 4) === 'RIFF' && buf.toString('ascii', 8, 12) === 'WEBP') return 'webp';
|
|
51
|
+
}
|
|
52
|
+
// Unknown formats (avif, etc.) — accept on trust; authors are responsible.
|
|
53
|
+
if (SUPPORTED_FORMATS.has(e)) return e;
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Parse intrinsic dimensions for PNG/JPEG. Returns null for unknown formats.
|
|
59
|
+
* WebP/AVIF parsing is intentionally V1-light — authors declare via YAML.
|
|
60
|
+
*
|
|
61
|
+
* @param {Buffer} buf
|
|
62
|
+
* @param {string} format
|
|
63
|
+
* @returns {{ width?: number, height?: number }}
|
|
64
|
+
*/
|
|
65
|
+
export function readIntrinsicDimensions(buf, format) {
|
|
66
|
+
if (format === 'png' && buf.length >= 24) {
|
|
67
|
+
return { width: buf.readUInt32BE(16), height: buf.readUInt32BE(20) };
|
|
68
|
+
}
|
|
69
|
+
if (format === 'jpg' || format === 'jpeg') {
|
|
70
|
+
return readJpegDimensions(buf);
|
|
71
|
+
}
|
|
72
|
+
return {};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function readJpegDimensions(buf) {
|
|
76
|
+
// Scan JPEG markers for SOFn (0xFFC0–0xFFCF, excluding 0xFFC4, 0xFFC8, 0xFFCC).
|
|
77
|
+
let i = 2;
|
|
78
|
+
while (i < buf.length - 9) {
|
|
79
|
+
if (buf[i] !== 0xFF) { i++; continue; }
|
|
80
|
+
const marker = buf[i + 1];
|
|
81
|
+
if (marker >= 0xC0 && marker <= 0xCF &&
|
|
82
|
+
marker !== 0xC4 && marker !== 0xC8 && marker !== 0xCC) {
|
|
83
|
+
const height = buf.readUInt16BE(i + 5);
|
|
84
|
+
const width = buf.readUInt16BE(i + 7);
|
|
85
|
+
return { width, height };
|
|
86
|
+
}
|
|
87
|
+
const segLen = buf.readUInt16BE(i + 2);
|
|
88
|
+
i += 2 + segLen;
|
|
89
|
+
}
|
|
90
|
+
return {};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Sanitize an image filename per the wire-format rule
|
|
95
|
+
* `[a-z0-9._-]+`. Returns the sanitized name (lowercased, with unsafe
|
|
96
|
+
* characters replaced by `-`).
|
|
97
|
+
*
|
|
98
|
+
* @param {string} name
|
|
99
|
+
* @returns {string}
|
|
100
|
+
*/
|
|
101
|
+
export function sanitizeImageFilename(name) {
|
|
102
|
+
const lowered = name.toLowerCase();
|
|
103
|
+
const sanitized = lowered.replace(/[^a-z0-9._-]/g, '-').replace(/-+/g, '-');
|
|
104
|
+
return sanitized.replace(/^-+|-+$/g, '');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function sha256(buf) {
|
|
108
|
+
return createHash('sha256').update(buf).digest('hex');
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Copy image assets from srcDir into destDir, validate, and emit
|
|
113
|
+
* images-manifest.json. Reuses upstream manifest if present.
|
|
114
|
+
*
|
|
115
|
+
* @param {string} srcDir Absolute path to source images/
|
|
116
|
+
* @param {string} destDir Absolute path to public/data/{ds}/images/
|
|
117
|
+
* @returns {Promise<{ count: number, manifest: Record<string, ImageManifestEntry>, skipped: string[] }>}
|
|
118
|
+
*/
|
|
119
|
+
export async function copyImageAssets(srcDir, destDir) {
|
|
120
|
+
await mkdir(destDir, { recursive: true });
|
|
121
|
+
const entries = await readdir(srcDir, { withFileTypes: true });
|
|
122
|
+
|
|
123
|
+
// Reuse upstream manifest if present (SSOT).
|
|
124
|
+
const upstreamManifestPath = path.join(srcDir, 'manifest.json');
|
|
125
|
+
let upstream = null;
|
|
126
|
+
try {
|
|
127
|
+
const st = await stat(upstreamManifestPath);
|
|
128
|
+
if (st.isFile()) {
|
|
129
|
+
upstream = JSON.parse(await readFile(upstreamManifestPath, 'utf8'));
|
|
130
|
+
}
|
|
131
|
+
} catch { /* no upstream manifest — compute fresh */ }
|
|
132
|
+
|
|
133
|
+
/** @type {Record<string, ImageManifestEntry>} */
|
|
134
|
+
const manifest = {};
|
|
135
|
+
const skipped = [];
|
|
136
|
+
let count = 0;
|
|
137
|
+
|
|
138
|
+
for (const entry of entries) {
|
|
139
|
+
if (!entry.isFile()) continue;
|
|
140
|
+
if (entry.name === 'manifest.json') continue;
|
|
141
|
+
|
|
142
|
+
const safeName = sanitizeImageFilename(entry.name);
|
|
143
|
+
if (!FILENAME_RE.test(safeName)) {
|
|
144
|
+
skipped.push(entry.name);
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
const ext = path.extname(safeName).slice(1);
|
|
148
|
+
if (!ext || !SUPPORTED_FORMATS.has(ext)) {
|
|
149
|
+
skipped.push(entry.name);
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const src = path.join(srcDir, entry.name);
|
|
154
|
+
const dest = path.join(destDir, safeName);
|
|
155
|
+
const buf = await readFile(src);
|
|
156
|
+
|
|
157
|
+
const format = detectFormat(buf, ext);
|
|
158
|
+
if (!format) {
|
|
159
|
+
skipped.push(`${entry.name} (format mismatch)`);
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
await copyFile(src, dest);
|
|
164
|
+
|
|
165
|
+
/** @type {ImageManifestEntry} */
|
|
166
|
+
let entry_record;
|
|
167
|
+
if (upstream && upstream[safeName]) {
|
|
168
|
+
entry_record = { ...upstream[safeName], src: safeName, format };
|
|
169
|
+
} else {
|
|
170
|
+
const dims = readIntrinsicDimensions(buf, format);
|
|
171
|
+
entry_record = {
|
|
172
|
+
src: safeName,
|
|
173
|
+
format,
|
|
174
|
+
sha256: sha256(buf),
|
|
175
|
+
bytes: buf.length,
|
|
176
|
+
...dims,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
manifest[safeName] = entry_record;
|
|
180
|
+
count++;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
await mkdir(destDir, { recursive: true });
|
|
184
|
+
await writeFile(
|
|
185
|
+
path.join(destDir, '..', 'images-manifest.json'),
|
|
186
|
+
JSON.stringify(manifest, null, 2),
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
return { count, manifest, skipped };
|
|
190
|
+
}
|