zj-to-hydrooj 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +166 -0
- package/package.json +19 -0
- package/src/zjHtmlToMarkdown.js +227 -0
package/index.ts
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import AdmZip from 'adm-zip';
|
|
2
|
+
import { promises as fsNative } from 'fs';
|
|
3
|
+
import {
|
|
4
|
+
buildContent, Context, Handler, PERM,
|
|
5
|
+
ProblemModel, Schema, ValidationError, yaml,
|
|
6
|
+
} from 'hydrooj';
|
|
7
|
+
|
|
8
|
+
import { htmlToOJMarkdown } from './src/zjHtmlToMarkdown';
|
|
9
|
+
|
|
10
|
+
// define ZJson Schema
|
|
11
|
+
const ZJsonSchema = Schema.object({
|
|
12
|
+
title: Schema.string().required(),
|
|
13
|
+
problemid: Schema.string().required(),
|
|
14
|
+
author: Schema.string(),
|
|
15
|
+
content: Schema.string(),
|
|
16
|
+
theinput: Schema.string(),
|
|
17
|
+
theoutput: Schema.string(),
|
|
18
|
+
sampleinput: Schema.string(),
|
|
19
|
+
sampleoutput: Schema.string(),
|
|
20
|
+
hint: Schema.string(),
|
|
21
|
+
keywords: Schema.any(),
|
|
22
|
+
testfilelength: Schema.number().default(0),
|
|
23
|
+
testinfiles: Schema.array(Schema.string()),
|
|
24
|
+
testoutfiles: Schema.array(Schema.string()),
|
|
25
|
+
timelimits: Schema.any(),
|
|
26
|
+
memorylimit: Schema.number(),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
class ImportJsonHandler extends Handler {
|
|
30
|
+
async processZJson(domainId: string, rawData: any) {
|
|
31
|
+
let data;
|
|
32
|
+
try {
|
|
33
|
+
data = ZJsonSchema(rawData);
|
|
34
|
+
} catch (e : any) {
|
|
35
|
+
throw new ValidationError('file', null, `Invalid ZJSON content: ${e.message}`);
|
|
36
|
+
}
|
|
37
|
+
const pidRegex = /^[a-zA-Z]\d{3}$/;
|
|
38
|
+
if (!pidRegex.test(data.problemid)) {
|
|
39
|
+
throw new ValidationError('problemid', `Invalid PID: ${data.problemid}. Must be one letter + 3 digits.`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (await ProblemModel.get(domainId, data.problemid)) {
|
|
43
|
+
throw new ValidationError('problemid', `PID ${data.problemid} already exists.`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const convertHtmlToMarkdown = async (html: string): Promise<string> => {
|
|
47
|
+
if (!html) return '';
|
|
48
|
+
console.log('\n\n\nConverting HTML to Markdown. Original HTML:', html);
|
|
49
|
+
// Remove unnecessary backslashes before special characters throughout HTML
|
|
50
|
+
const result: string = htmlToOJMarkdown(html);
|
|
51
|
+
console.log('\n\n\nConverted HTML to Markdown:', result || '');
|
|
52
|
+
console.log('--- End of Conversion ---\n\n\n');
|
|
53
|
+
return result || '';
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
let descriptionMarkdown = await convertHtmlToMarkdown(data.content);
|
|
57
|
+
|
|
58
|
+
if (data.author) {
|
|
59
|
+
const authorUrl = `https://dandanjudge.fdhs.tyc.edu.tw/UserStatistic?account=${encodeURIComponent(data.author)}`;
|
|
60
|
+
descriptionMarkdown = `**Author**: [${data.author}](${authorUrl})\n\n${descriptionMarkdown}`;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const contentMarkdown = buildContent({
|
|
64
|
+
description: descriptionMarkdown,
|
|
65
|
+
input: await convertHtmlToMarkdown(data.theinput),
|
|
66
|
+
output: await convertHtmlToMarkdown(data.theoutput),
|
|
67
|
+
samples: [[data.sampleinput, data.sampleoutput]],
|
|
68
|
+
hint: await convertHtmlToMarkdown(data.hint),
|
|
69
|
+
}, 'markdown');
|
|
70
|
+
const tags = data.keywords ? (typeof data.keywords === 'string' ? JSON.parse(data.keywords) : data.keywords) : [];
|
|
71
|
+
const pid = await ProblemModel.add(
|
|
72
|
+
domainId, data.problemid, data.title, contentMarkdown,
|
|
73
|
+
this.user._id, tags,
|
|
74
|
+
);
|
|
75
|
+
const tasks = [];
|
|
76
|
+
const config = {
|
|
77
|
+
type: 'default',
|
|
78
|
+
time: Array.isArray(data.timelimits) ? `${data.timelimits[0]}s` : `${data.timelimits}s`,
|
|
79
|
+
memory: `${data.memorylimit}mb`,
|
|
80
|
+
subtasks: [] as any[],
|
|
81
|
+
};
|
|
82
|
+
if (!data.timelimits) config.time = '3s';
|
|
83
|
+
if (!data.memorylimit) config.memory = '100mb';
|
|
84
|
+
for (let i = 0; i < data.testfilelength; i++) {
|
|
85
|
+
const inName = `${i + 1}.in`;
|
|
86
|
+
const outName = `${i + 1}.out`;
|
|
87
|
+
const inContent = data.testinfiles && data.testinfiles[i] ? data.testinfiles[i] : "";
|
|
88
|
+
const outContent = data.testoutfiles && data.testoutfiles[i] ? data.testoutfiles[i] : "";
|
|
89
|
+
tasks.push(ProblemModel.addTestdata(domainId, pid, inName, Buffer.from(inContent || '')));
|
|
90
|
+
tasks.push(ProblemModel.addTestdata(domainId, pid, outName, Buffer.from(outContent || '')));
|
|
91
|
+
config.subtasks.push({
|
|
92
|
+
cases: [{ input: inName, output: outName }]
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
tasks.push(ProblemModel.addTestdata(domainId, pid, 'config.yaml', Buffer.from(yaml.dump(config))));
|
|
96
|
+
await Promise.all(tasks);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async fromFile(domainId: string, filePath: string) {
|
|
100
|
+
const buf = await fsNative.readFile(filePath);
|
|
101
|
+
console.log('DEBUG: File head bytes:', buf[0], buf[1], buf[2], buf[3]);
|
|
102
|
+
console.log('DEBUG: Is Buffer?', Buffer.isBuffer(buf));
|
|
103
|
+
const isZip = buf[0] === 0x50 && buf[1] === 0x4b;
|
|
104
|
+
|
|
105
|
+
if (isZip) {
|
|
106
|
+
console.log('DEBUG: ZIP logic triggered');
|
|
107
|
+
try {
|
|
108
|
+
const zip = new AdmZip(buf);
|
|
109
|
+
const zipEntries = zip.getEntries();
|
|
110
|
+
const jsonEntries = zipEntries.filter((entry: AdmZip.IZipEntry) =>
|
|
111
|
+
entry.entryName.toLowerCase().endsWith('.zjson')
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
if (jsonEntries.length === 0) throw new ValidationError('ZIP ?????? .zjson');
|
|
115
|
+
|
|
116
|
+
for (const jsonEntry of jsonEntries) {
|
|
117
|
+
const rawData = JSON.parse(jsonEntry.getData().toString('utf8'));
|
|
118
|
+
try {
|
|
119
|
+
await this.processZJson(domainId, rawData);
|
|
120
|
+
} catch (e) {
|
|
121
|
+
console.error(`Error processing ${jsonEntry.entryName}:`, e);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
} catch (e: any) {
|
|
125
|
+
throw new ValidationError('file', null, `ZIP??????: ${e.message}`);
|
|
126
|
+
}
|
|
127
|
+
} else {
|
|
128
|
+
console.log('DEBUG: Plain JSON logic triggered');
|
|
129
|
+
try {
|
|
130
|
+
const rawData = JSON.parse(buf.toString('utf8'));
|
|
131
|
+
await this.processZJson(domainId, rawData);
|
|
132
|
+
} catch (e: any) {
|
|
133
|
+
throw new ValidationError('file', null, `?JSON????: ${e.message}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
async get() {
|
|
138
|
+
this.response.body = { type: 'JSON' };
|
|
139
|
+
this.response.template = 'problem_import.html';
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async post({ domainId }: { domainId: string }) {
|
|
143
|
+
console.log('Post started');
|
|
144
|
+
const file = this.request.files.file;
|
|
145
|
+
if (!file) throw new ValidationError('file');
|
|
146
|
+
|
|
147
|
+
try {
|
|
148
|
+
console.log('File path:', file.filepath);
|
|
149
|
+
await this.fromFile(domainId, file.filepath);
|
|
150
|
+
console.log('fromFile finished');
|
|
151
|
+
this.response.redirect = this.url('problem_main', { domainId });
|
|
152
|
+
} catch (e: any) {
|
|
153
|
+
console.error('Import Error Trace:', e);
|
|
154
|
+
throw new ValidationError('file', null, `????:${e.message}`);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
export async function apply(ctx : Context) {
|
|
160
|
+
ctx.Route('problem_import_json', '/problem/import/json', ImportJsonHandler, PERM.PERM_CREATE_PROBLEM);
|
|
161
|
+
ctx.injectUI('ProblemAdd', 'problem_import_json', { icon: 'copy', text: 'From JSON/ZIP Export' });
|
|
162
|
+
ctx.i18n.load('zh', {
|
|
163
|
+
'From JSON/ZIP Export': 'Import from DDJ-v1',
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
package/package.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "zj-to-hydrooj",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "JSON to Hydro import plugin",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"keywords": [
|
|
8
|
+
"hydrooj",
|
|
9
|
+
"fdcs"
|
|
10
|
+
],
|
|
11
|
+
"author": "Miyun",
|
|
12
|
+
"license": "MIT",
|
|
13
|
+
"dependencies": {
|
|
14
|
+
"js-yaml": "^4.1.1",
|
|
15
|
+
"jsdom": "^27.4.0",
|
|
16
|
+
"jszip": "^3.10.1",
|
|
17
|
+
"pandoc-ts": "^1.0.5"
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
const { JSDOM } = require('jsdom');
|
|
2
|
+
|
|
3
|
+
export function htmlToOJMarkdown(html) {
|
|
4
|
+
const dom = new JSDOM(html);
|
|
5
|
+
const document = dom.window.document;
|
|
6
|
+
|
|
7
|
+
return walk(document.body)
|
|
8
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
9
|
+
.replace(/(\$\$[^\$]*\$\$)/g, '$1\n')
|
|
10
|
+
.trim();
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function walk(node) {
|
|
14
|
+
if (node.nodeType === node.TEXT_NODE) {
|
|
15
|
+
return node.textContent ?? '';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (node.nodeType === node.ELEMENT_NODE) {
|
|
19
|
+
const el = node;
|
|
20
|
+
const children = Array.from(el.childNodes).map(walk).join('');
|
|
21
|
+
|
|
22
|
+
switch (el.tagName.toLowerCase()) {
|
|
23
|
+
case 'h1': return `<h1> ${children}</h1>\n\n`;
|
|
24
|
+
case 'h2': return `<h2> ${children}</h2>\n\n`;
|
|
25
|
+
case 'h3': return `<h3> ${children}</h3>\n\n`;
|
|
26
|
+
case 'h4': return `<h4> ${children}</h4>\n\n`;
|
|
27
|
+
case 'h5': return `<h5> ${children}</h5>\n\n`;
|
|
28
|
+
case 'h6': return `<h6> ${children}</h6>\n\n`;
|
|
29
|
+
case 'p': return `${children}\n\n`;
|
|
30
|
+
case 'strong': return `<strong>${children}</strong>`;
|
|
31
|
+
case 'em': return `<em>${children}</em>`;
|
|
32
|
+
case 'sup': return `<sup>${children}</sup>`;
|
|
33
|
+
case 'sub': return `<sub>${children}</sub>`;
|
|
34
|
+
case 'code':
|
|
35
|
+
return el.parentElement?.tagName.toLowerCase() === 'pre'
|
|
36
|
+
? children
|
|
37
|
+
: `<code>${children}</code>`;
|
|
38
|
+
case 'pre':
|
|
39
|
+
return `\n\`\`\`\n${children}\n\`\`\`\n`;
|
|
40
|
+
case 'ul':
|
|
41
|
+
return `\n${children}`;
|
|
42
|
+
case 'ol':
|
|
43
|
+
return `\n${renderOrderedList(el)}`;
|
|
44
|
+
case 'li':
|
|
45
|
+
return `- ${children}\n`;
|
|
46
|
+
case 'img':
|
|
47
|
+
return renderImage(el);
|
|
48
|
+
case 'span':
|
|
49
|
+
return renderSpan(el);
|
|
50
|
+
case 'table':
|
|
51
|
+
return renderTable(el);
|
|
52
|
+
case 'hr':
|
|
53
|
+
return `\n---\n\n`;
|
|
54
|
+
case 'br':
|
|
55
|
+
return `\n`;
|
|
56
|
+
case 'div':
|
|
57
|
+
return `${children}\n`;
|
|
58
|
+
default:
|
|
59
|
+
return children;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return '';
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function renderSpan(el) {
|
|
67
|
+
const cls = el.className;
|
|
68
|
+
const children = Array.from(el.childNodes).map(walk).join('');
|
|
69
|
+
|
|
70
|
+
// Math spans
|
|
71
|
+
if (cls.includes('math-inline') || cls.includes('MathJax')) {
|
|
72
|
+
return `$${extractMath(el)}$`;
|
|
73
|
+
}
|
|
74
|
+
if (cls.includes('math-display')) {
|
|
75
|
+
return `\n$$\n${extractMath(el)}\n$$\n`;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Style-based decorations
|
|
79
|
+
const dec = (el.style && typeof el.style.textDecoration === 'string')
|
|
80
|
+
? el.style.textDecoration.toLowerCase()
|
|
81
|
+
: '';
|
|
82
|
+
if (dec.includes('underline')) {
|
|
83
|
+
return `<u>${children}</u>`;
|
|
84
|
+
}
|
|
85
|
+
if (dec.includes('line-through')) {
|
|
86
|
+
return `~~${children}~~`;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Fallback to raw style attribute parsing
|
|
90
|
+
const styleAttr = (el.getAttribute('style') || '').toLowerCase();
|
|
91
|
+
if (styleAttr.includes('text-decoration') && styleAttr.includes('underline')) {
|
|
92
|
+
return `<u>${children}</u>`;
|
|
93
|
+
}
|
|
94
|
+
if (styleAttr.includes('text-decoration') && styleAttr.includes('line-through')) {
|
|
95
|
+
return `~~${children}~~`;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return children;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function extractMath(el) {
|
|
102
|
+
return normalizeMath(el.textContent ?? '');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// LaTeX command to Unicode symbol mapping
|
|
106
|
+
const LATEX_TO_UNICODE = {
|
|
107
|
+
// Comparison operators
|
|
108
|
+
'\\leq': '≤', '\\le': '≤',
|
|
109
|
+
'\\geq': '≥', '\\ge': '≥',
|
|
110
|
+
'\\neq': '≠', '\\ne': '≠',
|
|
111
|
+
'\\approx': '≈',
|
|
112
|
+
'\\equiv': '≡',
|
|
113
|
+
|
|
114
|
+
// Arithmetic operators
|
|
115
|
+
'\\times': '×',
|
|
116
|
+
'\\cdot': '·',
|
|
117
|
+
'\\div': '÷',
|
|
118
|
+
'\\pm': '±',
|
|
119
|
+
'\\mp': '∓',
|
|
120
|
+
|
|
121
|
+
// Arrows
|
|
122
|
+
'\\to': '→', '\\rightarrow': '→',
|
|
123
|
+
'\\leftarrow': '←',
|
|
124
|
+
'\\leftrightarrow': '↔',
|
|
125
|
+
'\\Rightarrow': '⇒',
|
|
126
|
+
'\\Leftarrow': '⇐',
|
|
127
|
+
'\\Leftrightarrow': '⇔',
|
|
128
|
+
|
|
129
|
+
// Greek letters (lowercase)
|
|
130
|
+
'\\alpha': 'α', '\\beta': 'β', '\\gamma': 'γ', '\\delta': 'δ',
|
|
131
|
+
'\\epsilon': 'ε', '\\zeta': 'ζ', '\\eta': 'η', '\\theta': 'θ',
|
|
132
|
+
'\\iota': 'ι', '\\kappa': 'κ', '\\lambda': 'λ', '\\mu': 'μ',
|
|
133
|
+
'\\nu': 'ν', '\\xi': 'ξ', '\\pi': 'π', '\\rho': 'ρ',
|
|
134
|
+
'\\sigma': 'σ', '\\tau': 'τ', '\\upsilon': 'υ', '\\phi': 'φ',
|
|
135
|
+
'\\chi': 'χ', '\\psi': 'ψ', '\\omega': 'ω',
|
|
136
|
+
|
|
137
|
+
// Greek letters (uppercase)
|
|
138
|
+
'\\Gamma': 'Γ', '\\Delta': 'Δ', '\\Theta': 'Θ', '\\Lambda': 'Λ',
|
|
139
|
+
'\\Xi': 'Ξ', '\\Pi': 'Π', '\\Sigma': 'Σ', '\\Phi': 'Φ',
|
|
140
|
+
'\\Psi': 'Ψ', '\\Omega': 'Ω',
|
|
141
|
+
|
|
142
|
+
// Logic symbols
|
|
143
|
+
'\\forall': '∀', '\\exists': '∃',
|
|
144
|
+
'\\wedge': '∧', '\\vee': '∨', '\\neg': '¬',
|
|
145
|
+
'\\land': '∧', '\\lor': '∨',
|
|
146
|
+
|
|
147
|
+
// Set theory
|
|
148
|
+
'\\in': '∈', '\\notin': '∉',
|
|
149
|
+
'\\subset': '⊂', '\\subseteq': '⊆',
|
|
150
|
+
'\\supset': '⊃', '\\supseteq': '⊇',
|
|
151
|
+
'\\cup': '∪', '\\cap': '∩',
|
|
152
|
+
'\\emptyset': '∅',
|
|
153
|
+
|
|
154
|
+
// Misc
|
|
155
|
+
'\\infty': '∞', '\\partial': '∂',
|
|
156
|
+
'\\nabla': '∇', '\\sum': '∑',
|
|
157
|
+
'\\prod': '∏', '\\int': '∫',
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
function normalizeMath(s) {
|
|
161
|
+
// Normalize whitespace first
|
|
162
|
+
s = s.replace(/\s+/g, ' ');
|
|
163
|
+
|
|
164
|
+
// Replace all known LaTeX commands with Unicode equivalents
|
|
165
|
+
// Sort by length (descending) to match longer commands first
|
|
166
|
+
const commands = Object.keys(LATEX_TO_UNICODE).sort((a, b) => b.length - a.length);
|
|
167
|
+
for (const cmd of commands) {
|
|
168
|
+
s = s.replace(new RegExp(cmd.replace(/\\/g, '\\\\'), 'g'), LATEX_TO_UNICODE[cmd]);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Remove any remaining unrecognized LaTeX commands
|
|
172
|
+
s = s.replace(/\\[a-zA-Z]+/g, '');
|
|
173
|
+
|
|
174
|
+
// Remove any remaining backslashes
|
|
175
|
+
s = s.replace(/\\/g, '');
|
|
176
|
+
|
|
177
|
+
return s.trim();
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function renderImage(el) {
|
|
181
|
+
const src = el.getAttribute('src') ?? '';
|
|
182
|
+
const alt = el.getAttribute('alt') ?? '';
|
|
183
|
+
return ``;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function renderOrderedList(el) {
|
|
187
|
+
let counter = 1;
|
|
188
|
+
return Array.from(el.childNodes)
|
|
189
|
+
.filter(node => node.nodeType === node.ELEMENT_NODE && node.tagName.toLowerCase() === 'li')
|
|
190
|
+
.map(li => {
|
|
191
|
+
const content = Array.from(li.childNodes).map(walk).join('');
|
|
192
|
+
return `${counter++}. ${content}\n`;
|
|
193
|
+
})
|
|
194
|
+
.join('');
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function renderTable(el) {
|
|
198
|
+
const rows = Array.from(el.querySelectorAll('tr'));
|
|
199
|
+
if (rows.length === 0) return '';
|
|
200
|
+
|
|
201
|
+
let table = '\n| ';
|
|
202
|
+
|
|
203
|
+
// Process header row
|
|
204
|
+
const headerCells = rows[0].querySelectorAll('td, th');
|
|
205
|
+
table += Array.from(headerCells)
|
|
206
|
+
.map(cell => Array.from(cell.childNodes).map(walk).join('').trim())
|
|
207
|
+
.join(' | ');
|
|
208
|
+
table += ' |\n|';
|
|
209
|
+
|
|
210
|
+
// Add separator
|
|
211
|
+
for (let i = 0; i < headerCells.length; i++) {
|
|
212
|
+
table += ' --- |';
|
|
213
|
+
}
|
|
214
|
+
table += '\n';
|
|
215
|
+
|
|
216
|
+
// Process data rows
|
|
217
|
+
for (let i = 1; i < rows.length; i++) {
|
|
218
|
+
table += '| ';
|
|
219
|
+
const cells = rows[i].querySelectorAll('td, th');
|
|
220
|
+
table += Array.from(cells)
|
|
221
|
+
.map(cell => Array.from(cell.childNodes).map(walk).join('').trim() || ' ')
|
|
222
|
+
.join(' | ');
|
|
223
|
+
table += ' |\n';
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return table + '\n';
|
|
227
|
+
}
|