@glossarist/concept-browser 0.7.43 → 0.7.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/index.mjs
CHANGED
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
*/
|
|
21
21
|
|
|
22
22
|
import { loadSiteConfig } from '../scripts/load-site-config.mjs';
|
|
23
|
-
import {
|
|
23
|
+
import { existsSync } from 'fs';
|
|
24
24
|
import { resolve, dirname } from 'path';
|
|
25
25
|
import { fileURLToPath } from 'url';
|
|
26
26
|
|
|
@@ -166,22 +166,21 @@ Environment:
|
|
|
166
166
|
}
|
|
167
167
|
}
|
|
168
168
|
|
|
169
|
-
// Run vite build using the package's vite.config.ts
|
|
169
|
+
// Run vite build using the package's vite.config.ts via programmatic API
|
|
170
170
|
console.log(`\n=== BUILD SPA ===\n`);
|
|
171
171
|
const viteConfig = resolve(pkgRoot, 'vite.config.ts');
|
|
172
|
-
const
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
env: { ...process.env },
|
|
172
|
+
const { build: viteBuild } = await import('vite');
|
|
173
|
+
await viteBuild({
|
|
174
|
+
configFile: viteConfig,
|
|
175
|
+
root: pkgRoot,
|
|
176
|
+
mode: 'production',
|
|
178
177
|
});
|
|
179
178
|
|
|
180
|
-
// Run postbuild (404 page)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
}
|
|
179
|
+
// Run postbuild (404 page) via dynamic import
|
|
180
|
+
const postbuild = resolve(pkgRoot, 'scripts', 'generate-404.js');
|
|
181
|
+
if (existsSync(postbuild)) {
|
|
182
|
+
await import(`file://${postbuild}`);
|
|
183
|
+
}
|
|
185
184
|
|
|
186
185
|
return;
|
|
187
186
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@glossarist/concept-browser",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.44",
|
|
4
4
|
"description": "Vue SPA for browsing Glossarist terminology datasets with cross-reference resolution, graph visualization, and multi-language support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
"favicons": "^7.2.0",
|
|
28
28
|
"glossarist": "^0.3.7",
|
|
29
29
|
"js-yaml": "^4.1.0",
|
|
30
|
+
"jszip": "^3.10.1",
|
|
30
31
|
"pinia": "^2.3.1",
|
|
31
32
|
"postcss": "^8.5.3",
|
|
32
33
|
"sharp": "^0.34.5",
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
import { assertLocalPathSafe } from '../lib/local-path-safety.mjs';
|
|
6
|
+
|
|
7
|
+
function makeTmpTree() {
|
|
8
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'cb-fetch-'));
|
|
9
|
+
const datasetsDir = path.join(root, '.datasets');
|
|
10
|
+
const sourceDir = path.join(root, 'source-data');
|
|
11
|
+
fs.mkdirSync(datasetsDir);
|
|
12
|
+
fs.mkdirSync(path.join(sourceDir, 'concepts'), { recursive: true });
|
|
13
|
+
fs.writeFileSync(path.join(sourceDir, 'concepts', 'a.yaml'), 'termid: 1\n');
|
|
14
|
+
return { root, datasetsDir, sourceDir };
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
describe('assertLocalPathSafe', () => {
|
|
18
|
+
let tree;
|
|
19
|
+
|
|
20
|
+
beforeEach(() => { tree = makeTmpTree(); });
|
|
21
|
+
afterEach(() => {
|
|
22
|
+
fs.rmSync(tree.root, { recursive: true, force: true });
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('returns resolved path for a safe external location', () => {
|
|
26
|
+
const resolved = assertLocalPathSafe('foo', tree.sourceDir, {
|
|
27
|
+
root: tree.root,
|
|
28
|
+
datasetsDir: tree.datasetsDir,
|
|
29
|
+
});
|
|
30
|
+
// Returns the realpath (symlinks resolved); on macOS tmpdir resolves
|
|
31
|
+
// /var → /private/var, so compare against realpath, not path.resolve.
|
|
32
|
+
expect(resolved).toBe(fs.realpathSync(path.resolve(tree.root, tree.sourceDir)));
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('throws when localPath does not exist', () => {
|
|
36
|
+
expect(() =>
|
|
37
|
+
assertLocalPathSafe('foo', path.join(tree.root, 'nope'), {
|
|
38
|
+
root: tree.root,
|
|
39
|
+
datasetsDir: tree.datasetsDir,
|
|
40
|
+
})
|
|
41
|
+
).toThrow(/does not exist/);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('throws when localPath equals .datasets/<id>', () => {
|
|
45
|
+
const staged = path.join(tree.datasetsDir, 'foo');
|
|
46
|
+
fs.mkdirSync(staged, { recursive: true });
|
|
47
|
+
expect(() =>
|
|
48
|
+
assertLocalPathSafe('foo', staged, {
|
|
49
|
+
root: tree.root,
|
|
50
|
+
datasetsDir: tree.datasetsDir,
|
|
51
|
+
})
|
|
52
|
+
).toThrow(/same physical location/);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('throws when localPath is nested inside .datasets/<id>', () => {
|
|
56
|
+
const staged = path.join(tree.datasetsDir, 'foo');
|
|
57
|
+
fs.mkdirSync(path.join(staged, 'subdir'), { recursive: true });
|
|
58
|
+
expect(() =>
|
|
59
|
+
assertLocalPathSafe('foo', path.join(staged, 'subdir'), {
|
|
60
|
+
root: tree.root,
|
|
61
|
+
datasetsDir: tree.datasetsDir,
|
|
62
|
+
})
|
|
63
|
+
).toThrow(/nested inside/);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('throws when localPath contains .datasets/<id> (parent-of-staging hazard)', () => {
|
|
67
|
+
// localPath = root itself, datasetsDir = root/.datasets — staging ops
|
|
68
|
+
// (rm -rf .datasets/<id>) would touch files inside localPath.
|
|
69
|
+
expect(() =>
|
|
70
|
+
assertLocalPathSafe('foo', tree.root, {
|
|
71
|
+
root: tree.root,
|
|
72
|
+
datasetsDir: tree.datasetsDir,
|
|
73
|
+
})
|
|
74
|
+
).toThrow(/contains .datasets/);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('throws when localPath is a symlink to .datasets/<id> (the reported bug)', () => {
|
|
78
|
+
const staged = path.join(tree.datasetsDir, 'foo');
|
|
79
|
+
fs.mkdirSync(staged, { recursive: true });
|
|
80
|
+
const symlinkPath = path.join(tree.root, 'evil-link');
|
|
81
|
+
fs.symlinkSync(staged, symlinkPath);
|
|
82
|
+
expect(() =>
|
|
83
|
+
assertLocalPathSafe('foo', symlinkPath, {
|
|
84
|
+
root: tree.root,
|
|
85
|
+
datasetsDir: tree.datasetsDir,
|
|
86
|
+
})
|
|
87
|
+
).toThrow(/same physical location/);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('does NOT modify the source directory (regression for data-loss bug)', () => {
|
|
91
|
+
const sentinel = path.join(tree.sourceDir, 'concepts', 'SENTINEL.yaml');
|
|
92
|
+
fs.writeFileSync(sentinel, 'termid: sentinel\n');
|
|
93
|
+
const beforeMtime = fs.statSync(sentinel).mtimeMs;
|
|
94
|
+
|
|
95
|
+
assertLocalPathSafe('foo', tree.sourceDir, {
|
|
96
|
+
root: tree.root,
|
|
97
|
+
datasetsDir: tree.datasetsDir,
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// Source directory must be completely untouched after the safety check.
|
|
101
|
+
expect(fs.existsSync(sentinel)).toBe(true);
|
|
102
|
+
expect(fs.statSync(sentinel).mtimeMs).toBe(beforeMtime);
|
|
103
|
+
expect(fs.readdirSync(path.join(tree.sourceDir, 'concepts'))).toContain('SENTINEL.yaml');
|
|
104
|
+
});
|
|
105
|
+
});
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* fetch-datasets.mjs — Load datasets from .gcr files or
|
|
3
|
+
* fetch-datasets.mjs — Load datasets from .gcr files, local paths, or git repos.
|
|
4
4
|
*
|
|
5
5
|
* Reads site config (via load-site-config.mjs), for each dataset:
|
|
6
6
|
* 1. If .gcr/{id}.gcr exists, extract to .datasets/{id}/
|
|
7
7
|
* 2. Else download from gcrPackage URL and extract
|
|
8
|
-
* 3. Else
|
|
8
|
+
* 3. Else if localPath is set, use it in-place (NO copy, NO staging)
|
|
9
|
+
* 4. Else clone/update source repo into .datasets/{id}/
|
|
9
10
|
*
|
|
10
11
|
* After fetching, validates that all GCR dependencies are satisfiable
|
|
11
12
|
* (either provided locally or routed externally).
|
|
12
13
|
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
14
|
+
* No shell commands. All file ops use Node fs; ZIP uses JSZip; git uses
|
|
15
|
+
* execFileSync with array args (no shell interpolation).
|
|
15
16
|
*/
|
|
16
17
|
import fs from 'fs';
|
|
17
18
|
import path from 'path';
|
|
19
|
+
import JSZip from 'jszip';
|
|
18
20
|
import { loadGcr } from 'glossarist';
|
|
19
|
-
import {
|
|
21
|
+
import { execFileSync } from 'child_process';
|
|
20
22
|
import { loadSiteConfig } from './load-site-config.mjs';
|
|
23
|
+
import { assertLocalPathSafe } from './lib/local-path-safety.mjs';
|
|
21
24
|
|
|
22
25
|
const ROOT = process.cwd();
|
|
23
26
|
const DATASETS_DIR = path.join(ROOT, '.datasets');
|
|
@@ -39,23 +42,29 @@ async function downloadGcr(url, destPath) {
|
|
|
39
42
|
console.log(` Saved to ${destPath} (${(buf.length / 1024).toFixed(0)} KB)`);
|
|
40
43
|
}
|
|
41
44
|
|
|
42
|
-
// --- GCR extraction ---
|
|
43
|
-
function extractGcr(gcrPath, targetDir) {
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
// --- GCR extraction (pure JSZip; no shell, cross-platform) ---
|
|
46
|
+
async function extractGcr(gcrPath, targetDir) {
|
|
47
|
+
const targetAbs = path.resolve(targetDir);
|
|
48
|
+
if (fs.existsSync(targetAbs)) {
|
|
49
|
+
fs.rmSync(targetAbs, { recursive: true, force: true });
|
|
46
50
|
}
|
|
47
|
-
fs.mkdirSync(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
51
|
+
fs.mkdirSync(targetAbs, { recursive: true });
|
|
52
|
+
|
|
53
|
+
const buf = fs.readFileSync(gcrPath);
|
|
54
|
+
const zip = await JSZip.loadAsync(buf);
|
|
55
|
+
const entries = Object.values(zip.files);
|
|
56
|
+
for (const entry of entries) {
|
|
57
|
+
if (entry.dir) continue;
|
|
58
|
+
// zip-slip guard: refuse entries that escape targetDir
|
|
59
|
+
const dest = path.resolve(targetAbs, entry.name);
|
|
60
|
+
if (dest !== targetAbs && !dest.startsWith(targetAbs + path.sep)) {
|
|
61
|
+
throw new Error(`Refusing to extract entry outside target dir: ${entry.name}`);
|
|
56
62
|
}
|
|
63
|
+
fs.mkdirSync(path.dirname(dest), { recursive: true });
|
|
64
|
+
const content = await entry.async('nodebuffer');
|
|
65
|
+
fs.writeFileSync(dest, content);
|
|
57
66
|
}
|
|
58
|
-
console.log(` Extracted to ${
|
|
67
|
+
console.log(` Extracted to ${targetAbs}`);
|
|
59
68
|
}
|
|
60
69
|
|
|
61
70
|
// --- Read GCR metadata from ZIP without extraction ---
|
|
@@ -96,7 +105,7 @@ function validateDependencies(config, gcrMetadata) {
|
|
|
96
105
|
return errors;
|
|
97
106
|
}
|
|
98
107
|
|
|
99
|
-
// --- Git operations ---
|
|
108
|
+
// --- Git operations (execFileSync with array args — no shell) ---
|
|
100
109
|
function cloneOrUpdate(sourceRepo, targetDir) {
|
|
101
110
|
const env = { ...process.env };
|
|
102
111
|
let repoUrl = sourceRepo;
|
|
@@ -104,24 +113,28 @@ function cloneOrUpdate(sourceRepo, targetDir) {
|
|
|
104
113
|
repoUrl = sourceRepo.replace('https://', `https://x-access-token:${env.GITHUB_TOKEN}@`);
|
|
105
114
|
}
|
|
106
115
|
|
|
107
|
-
|
|
116
|
+
const targetAbs = path.resolve(targetDir);
|
|
117
|
+
|
|
118
|
+
if (fs.existsSync(path.join(targetAbs, '.git'))) {
|
|
108
119
|
console.log(` Updating existing clone...`);
|
|
109
120
|
try {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
121
|
+
execFileSync('git', ['fetch', 'origin'], { cwd: targetAbs, stdio: 'pipe', env });
|
|
122
|
+
execFileSync('git', ['reset', '--hard', 'origin/HEAD'], { cwd: targetAbs, stdio: 'pipe', env });
|
|
123
|
+
execFileSync('git', ['clean', '-fd'], { cwd: targetAbs, stdio: 'pipe', env });
|
|
113
124
|
} catch {
|
|
114
125
|
console.warn(` git update failed, re-cloning`);
|
|
115
|
-
fs.rmSync(
|
|
116
|
-
|
|
126
|
+
fs.rmSync(targetAbs, { recursive: true, force: true });
|
|
127
|
+
execFileSync('git', ['clone', '--depth', '1', repoUrl, targetAbs], { stdio: 'pipe', env });
|
|
117
128
|
}
|
|
118
129
|
} else {
|
|
119
|
-
fs.mkdirSync(
|
|
130
|
+
fs.mkdirSync(targetAbs, { recursive: true });
|
|
120
131
|
console.log(` Cloning ${sourceRepo}...`);
|
|
121
|
-
|
|
132
|
+
execFileSync('git', ['clone', '--depth', '1', repoUrl, targetAbs], { stdio: 'pipe', env });
|
|
122
133
|
}
|
|
123
134
|
}
|
|
124
135
|
|
|
136
|
+
// --- localPath safety check: see scripts/lib/local-path-safety.mjs ---
|
|
137
|
+
|
|
125
138
|
// --- Main ---
|
|
126
139
|
console.log('Fetching glossarist datasets...\n');
|
|
127
140
|
|
|
@@ -137,7 +150,7 @@ for (const ds of config.datasets) {
|
|
|
137
150
|
try {
|
|
138
151
|
if (fs.existsSync(gcrPath)) {
|
|
139
152
|
console.log(` Using local .gcr/${ds.id}.gcr`);
|
|
140
|
-
extractGcr(gcrPath, targetDir);
|
|
153
|
+
await extractGcr(gcrPath, targetDir);
|
|
141
154
|
} else if (ds.gcrPackage) {
|
|
142
155
|
console.log(` Using GCR package: ${ds.gcrPackage}`);
|
|
143
156
|
try {
|
|
@@ -148,29 +161,18 @@ for (const ds of config.datasets) {
|
|
|
148
161
|
console.log();
|
|
149
162
|
continue;
|
|
150
163
|
}
|
|
151
|
-
extractGcr(gcrPath, targetDir);
|
|
164
|
+
await extractGcr(gcrPath, targetDir);
|
|
165
|
+
} else if (ds.localPath) {
|
|
166
|
+
// localPath means "data is here, use in-place." No copy, no staging.
|
|
167
|
+
// generate-data.mjs reads from localPath directly via datasetDir(ds).
|
|
168
|
+
const localResolved = assertLocalPathSafe(ds.id, ds.localPath);
|
|
169
|
+
console.log(` Using localPath in-place: ${localResolved}`);
|
|
170
|
+
} else if (ds.sourceRepo) {
|
|
171
|
+
cloneOrUpdate(ds.sourceRepo, targetDir);
|
|
152
172
|
} else {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
if (!fs.existsSync(targetDir)) fs.mkdirSync(targetDir, { recursive: true });
|
|
157
|
-
const localConcepts = path.join(envOverride, 'concepts');
|
|
158
|
-
const targetConcepts = path.join(targetDir, 'concepts');
|
|
159
|
-
if (fs.existsSync(localConcepts)) {
|
|
160
|
-
if (fs.existsSync(targetConcepts)) fs.rmSync(targetConcepts, { recursive: true, force: true });
|
|
161
|
-
execSync(`cp -r "${localConcepts}" "${targetConcepts}"`, { stdio: 'pipe' });
|
|
162
|
-
}
|
|
163
|
-
const registerYaml = path.join(envOverride, 'register.yaml');
|
|
164
|
-
if (fs.existsSync(registerYaml)) {
|
|
165
|
-
fs.copyFileSync(registerYaml, path.join(targetDir, 'register.yaml'));
|
|
166
|
-
}
|
|
167
|
-
} else if (ds.sourceRepo) {
|
|
168
|
-
cloneOrUpdate(ds.sourceRepo, targetDir);
|
|
169
|
-
} else {
|
|
170
|
-
console.warn(` No source configured, skipping`);
|
|
171
|
-
console.log();
|
|
172
|
-
continue;
|
|
173
|
-
}
|
|
173
|
+
console.warn(` No source configured, skipping`);
|
|
174
|
+
console.log();
|
|
175
|
+
continue;
|
|
174
176
|
}
|
|
175
177
|
|
|
176
178
|
// Read metadata for dependency validation (from GCR ZIP, not extracted dir)
|
|
@@ -9,6 +9,18 @@ const ROOT = process.cwd();
|
|
|
9
9
|
const PUBLIC = path.join(ROOT, 'public');
|
|
10
10
|
const DATA = path.join(PUBLIC, 'data');
|
|
11
11
|
|
|
12
|
+
/**
|
|
13
|
+
* Resolve a dataset's source directory.
|
|
14
|
+
* - If `ds.localPath` is set, use it in-place (resolved against ROOT).
|
|
15
|
+
* No staging, no copy. fetch-datasets.mjs verifies the path is safe.
|
|
16
|
+
* - Otherwise fall back to the standard .datasets/<id>/ staging dir.
|
|
17
|
+
*/
|
|
18
|
+
function datasetDir(ds) {
|
|
19
|
+
return ds.localPath
|
|
20
|
+
? path.resolve(ROOT, ds.localPath)
|
|
21
|
+
: path.join(ROOT, '.datasets', ds.id);
|
|
22
|
+
}
|
|
23
|
+
|
|
12
24
|
const DS_PALETTE = [
|
|
13
25
|
'#3366ff', '#0d9488', '#d97706', '#8b5cf6',
|
|
14
26
|
'#ec4899', '#059669', '#dc2626', '#6366f1',
|
|
@@ -934,7 +946,8 @@ function processDataset(dir, register, opts) {
|
|
|
934
946
|
}
|
|
935
947
|
|
|
936
948
|
// Copy bulk format files from compiled/ directory (full GCR)
|
|
937
|
-
const
|
|
949
|
+
const sourceRoot = path.dirname(dir);
|
|
950
|
+
const compiledDir = path.join(sourceRoot, 'compiled');
|
|
938
951
|
const bulkFormats = [];
|
|
939
952
|
if (fs.existsSync(compiledDir)) {
|
|
940
953
|
for (const file of fs.readdirSync(compiledDir)) {
|
|
@@ -993,7 +1006,7 @@ function processDataset(dir, register, opts) {
|
|
|
993
1006
|
writeJson(path.join(DATA, register, 'manifest.json'), manifest);
|
|
994
1007
|
|
|
995
1008
|
// Copy bibliography.yaml → bibliography.json
|
|
996
|
-
const bibPath = path.join(
|
|
1009
|
+
const bibPath = path.join(sourceRoot, 'bibliography.yaml');
|
|
997
1010
|
if (fs.existsSync(bibPath)) {
|
|
998
1011
|
const bibData = readYaml(bibPath);
|
|
999
1012
|
writeJson(path.join(DATA, register, 'bibliography.json'), bibData);
|
|
@@ -1001,7 +1014,7 @@ function processDataset(dir, register, opts) {
|
|
|
1001
1014
|
}
|
|
1002
1015
|
|
|
1003
1016
|
// Copy images/
|
|
1004
|
-
const imagesSrcDir = path.join(
|
|
1017
|
+
const imagesSrcDir = path.join(sourceRoot, 'images');
|
|
1005
1018
|
if (fs.existsSync(imagesSrcDir) && fs.statSync(imagesSrcDir).isDirectory()) {
|
|
1006
1019
|
const imagesDestDir = path.join(DATA, register, 'images');
|
|
1007
1020
|
fs.mkdirSync(imagesDestDir, { recursive: true });
|
|
@@ -1030,8 +1043,8 @@ const registerCache = {};
|
|
|
1030
1043
|
|
|
1031
1044
|
// Pre-load all register.yaml files (needed before buildRefMaps for URI pattern indexing)
|
|
1032
1045
|
for (const ds of config.datasets) {
|
|
1033
|
-
const
|
|
1034
|
-
const registerYamlPath = path.join(
|
|
1046
|
+
const dsDir = datasetDir(ds);
|
|
1047
|
+
const registerYamlPath = path.join(dsDir, 'register.yaml');
|
|
1035
1048
|
if (fs.existsSync(registerYamlPath)) {
|
|
1036
1049
|
try {
|
|
1037
1050
|
const raw = yaml.load(fs.readFileSync(registerYamlPath, 'utf8'));
|
|
@@ -1047,7 +1060,7 @@ const refMaps = buildRefMaps(config, registerCache);
|
|
|
1047
1060
|
for (let i = 0; i < config.datasets.length; i++) {
|
|
1048
1061
|
const ds = config.datasets[i];
|
|
1049
1062
|
|
|
1050
|
-
const dir = path.join(
|
|
1063
|
+
const dir = path.join(datasetDir(ds), 'concepts');
|
|
1051
1064
|
if (!fs.existsSync(dir)) {
|
|
1052
1065
|
console.warn(`Skipping ${ds.id}: source directory not found (${dir})`);
|
|
1053
1066
|
console.warn(` Run: npm run fetch-datasets`);
|
|
@@ -1089,8 +1102,8 @@ for (let i = 0; i < config.datasets.length; i++) {
|
|
|
1089
1102
|
status: ds.editionStatus || reg?.status,
|
|
1090
1103
|
ordering: reg?.ordering || null,
|
|
1091
1104
|
sections: reg?.sections ? reg.sections.map(s => s.toJSON()) : [],
|
|
1092
|
-
hasBibliography: fs.existsSync(path.join(
|
|
1093
|
-
hasImages: fs.existsSync(path.join(
|
|
1105
|
+
hasBibliography: fs.existsSync(path.join(datasetDir(ds), 'bibliography.yaml')),
|
|
1106
|
+
hasImages: fs.existsSync(path.join(datasetDir(ds), 'images')),
|
|
1094
1107
|
});
|
|
1095
1108
|
registry.push({
|
|
1096
1109
|
id: ds.id,
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Assert that a dataset's `localPath` is safe to use as an in-place source.
|
|
6
|
+
*
|
|
7
|
+
* "Safe" means: the resolved physical location of `localPath` is disjoint
|
|
8
|
+
* from the staging directory (`.datasets/<id>`). If they overlap, staging
|
|
9
|
+
* operations (rm, extract, clone) would destroy the user's source data —
|
|
10
|
+
* the data-loss bug reported in v0.7.43.
|
|
11
|
+
*
|
|
12
|
+
* Returns the resolved absolute path on success; throws on any hazard.
|
|
13
|
+
*
|
|
14
|
+
* @param {string} datasetId
|
|
15
|
+
* @param {string} localPath - relative to `root` (or absolute)
|
|
16
|
+
* @param {{ root?: string, datasetsDir?: string }} [opts]
|
|
17
|
+
* @returns {string} resolved absolute path
|
|
18
|
+
*/
|
|
19
|
+
/**
|
|
20
|
+
* Compute the canonical physical path of `p`, resolving symlinks on the
|
|
21
|
+
* existing prefix. If `p` itself exists, this is just `realpathSync(p)`.
|
|
22
|
+
* If not, we walk up to the nearest existing ancestor, realpath it, and
|
|
23
|
+
* re-append the non-existent tail. This is needed because macOS tmpdir
|
|
24
|
+
* (`/var/folders/...`) is a symlink to `/private/var/folders/...`; without
|
|
25
|
+
* this, prefix comparisons across the symlink boundary silently fail.
|
|
26
|
+
*/
|
|
27
|
+
function physicalPath(p) {
|
|
28
|
+
if (fs.existsSync(p)) return fs.realpathSync(p);
|
|
29
|
+
const parent = path.dirname(p);
|
|
30
|
+
const parentReal = fs.existsSync(parent) ? fs.realpathSync(parent) : physicalPath(parent);
|
|
31
|
+
return path.join(parentReal, path.basename(p));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function assertLocalPathSafe(datasetId, localPath, { root = process.cwd(), datasetsDir } = {}) {
|
|
35
|
+
const datasetsRoot = datasetsDir || path.join(root, '.datasets');
|
|
36
|
+
const localResolved = path.resolve(root, localPath);
|
|
37
|
+
|
|
38
|
+
if (!fs.existsSync(localResolved)) {
|
|
39
|
+
throw new Error(`localPath for ${datasetId} does not exist: ${localResolved}`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const localReal = fs.realpathSync(localResolved);
|
|
43
|
+
const stagedAbs = path.join(datasetsRoot, datasetId);
|
|
44
|
+
const stagedReal = physicalPath(stagedAbs);
|
|
45
|
+
|
|
46
|
+
if (localReal === stagedReal) {
|
|
47
|
+
throw new Error(
|
|
48
|
+
`localPath for ${datasetId} resolves to the same physical location as .datasets/${datasetId} ` +
|
|
49
|
+
`(${localReal}). Refusing to operate — source and staging would clobber. ` +
|
|
50
|
+
`Use a path outside .datasets/.`
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
if (localReal.startsWith(stagedReal + path.sep)) {
|
|
54
|
+
throw new Error(
|
|
55
|
+
`localPath for ${datasetId} is nested inside .datasets/${datasetId}. ` +
|
|
56
|
+
`Refusing to operate — staging operations would destroy source data. ` +
|
|
57
|
+
`Use a path outside .datasets/.`
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
if (stagedReal.startsWith(localReal + path.sep)) {
|
|
61
|
+
throw new Error(
|
|
62
|
+
`localPath for ${datasetId} contains .datasets/${datasetId}. ` +
|
|
63
|
+
`Refusing to operate — staging operations would destroy source data. ` +
|
|
64
|
+
`Use a path outside localPath.`
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
return localReal;
|
|
68
|
+
}
|