erdos-problems 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -3
- package/docs/ERDOS_PROBLEMS_PROBLEM_SCHEMA.md +20 -2
- package/package.json +1 -1
- package/src/cli/index.js +4 -0
- package/src/commands/pull.js +203 -0
- package/src/commands/workspace.js +1 -0
- package/src/runtime/paths.js +8 -0
- package/src/runtime/workspace.js +2 -0
- package/src/upstream/site.js +80 -0
package/README.md
CHANGED
|
@@ -18,9 +18,10 @@ Official binary:
|
|
|
18
18
|
|
|
19
19
|
- atlas layer with canonical local `problems/<id>/problem.yaml` records
|
|
20
20
|
- bundled upstream snapshot from `teorth/erdosproblems`
|
|
21
|
-
- workspace `.erdos/` state for active-problem selection, upstream refreshes, reports, and
|
|
21
|
+
- workspace `.erdos/` state for active-problem selection, upstream refreshes, reports, scaffolds, and pull bundles
|
|
22
22
|
- sunflower cluster as the first deep harness pack
|
|
23
23
|
- seeded atlas now includes open and solved problems beyond sunflower
|
|
24
|
+
- unseeded problems can still be pulled into a workspace from the bundled upstream snapshot
|
|
24
25
|
|
|
25
26
|
Seeded problems:
|
|
26
27
|
- `18`, `20`, `89`, `536`, `542`, `856`, `857`, `1008`
|
|
@@ -30,7 +31,7 @@ Seeded problems:
|
|
|
30
31
|
```bash
|
|
31
32
|
erdos problem list --cluster sunflower
|
|
32
33
|
erdos bootstrap problem 857
|
|
33
|
-
erdos problem artifacts 857
|
|
34
|
+
erdos problem artifacts 857 --json
|
|
34
35
|
erdos dossier show 857
|
|
35
36
|
```
|
|
36
37
|
|
|
@@ -40,6 +41,22 @@ What `bootstrap` does:
|
|
|
40
41
|
- includes the upstream record when a bundled or workspace snapshot is available
|
|
41
42
|
- gives an agent a ready-to-read local artifact bundle immediately after install
|
|
42
43
|
|
|
44
|
+
## Pull bundles
|
|
45
|
+
|
|
46
|
+
For any problem number in the upstream snapshot, you can create a workspace bundle even if the problem is not yet seeded locally:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
erdos pull problem 857
|
|
50
|
+
erdos pull problem 999 --include-site
|
|
51
|
+
erdos pull problem 999 --refresh-upstream
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
What `pull` does:
|
|
55
|
+
- creates `.erdos/pulls/<id>/`
|
|
56
|
+
- includes the upstream record when available
|
|
57
|
+
- includes the local canonical dossier too when the problem is seeded locally
|
|
58
|
+
- can optionally add a live site snapshot and plain-text extract
|
|
59
|
+
|
|
43
60
|
## CLI
|
|
44
61
|
|
|
45
62
|
```bash
|
|
@@ -62,6 +79,8 @@ erdos upstream diff
|
|
|
62
79
|
erdos scaffold problem 857
|
|
63
80
|
erdos bootstrap problem 857
|
|
64
81
|
erdos bootstrap problem 857 --sync-upstream
|
|
82
|
+
erdos pull problem 857
|
|
83
|
+
erdos pull problem 857 --include-site
|
|
65
84
|
```
|
|
66
85
|
|
|
67
86
|
## Canonical Sources
|
|
@@ -84,8 +103,9 @@ For each seeded problem, the canonical local dossier lives in `problems/<id>/`:
|
|
|
84
103
|
The CLI can surface these directly:
|
|
85
104
|
- `erdos problem artifacts <id>` shows the canonical inventory
|
|
86
105
|
- `erdos problem artifacts <id> --json` emits machine-readable inventory
|
|
87
|
-
- `erdos scaffold problem <id>` copies the
|
|
106
|
+
- `erdos scaffold problem <id>` copies the seeded dossier into the active workspace
|
|
88
107
|
- `erdos bootstrap problem <id>` selects the problem and creates the scaffold in one step
|
|
108
|
+
- `erdos pull problem <id>` creates a workspace bundle for any problem in the upstream snapshot
|
|
89
109
|
|
|
90
110
|
## Notes
|
|
91
111
|
|
|
@@ -12,10 +12,11 @@ The goal is:
|
|
|
12
12
|
- open and solved problems use the same shape
|
|
13
13
|
- local dossier truth and upstream public truth stay explicitly separated
|
|
14
14
|
- packaged CLI installs can scaffold problem workspaces from canonical artifacts immediately
|
|
15
|
+
- unseeded problems can still be pulled into a workspace bundle from upstream truth
|
|
15
16
|
|
|
16
17
|
## Canonical Files
|
|
17
18
|
|
|
18
|
-
Each problem should have:
|
|
19
|
+
Each seeded problem should have:
|
|
19
20
|
|
|
20
21
|
- `problems/<id>/problem.yaml`
|
|
21
22
|
- `problems/<id>/STATEMENT.md`
|
|
@@ -29,6 +30,12 @@ Bundled upstream snapshot artifacts live in:
|
|
|
29
30
|
- `data/upstream/erdosproblems/PROBLEMS_INDEX.json`
|
|
30
31
|
- `data/upstream/erdosproblems/SYNC_MANIFEST.json`
|
|
31
32
|
|
|
33
|
+
Workspace-generated artifacts may live in:
|
|
34
|
+
|
|
35
|
+
- `.erdos/scaffolds/<id>/`
|
|
36
|
+
- `.erdos/pulls/<id>/`
|
|
37
|
+
- `.erdos/upstream/erdosproblems/`
|
|
38
|
+
|
|
32
39
|
## Canonical Truth Split
|
|
33
40
|
|
|
34
41
|
### External public truth
|
|
@@ -178,4 +185,15 @@ The sync commands should produce:
|
|
|
178
185
|
- upstream record snapshot for that problem when available
|
|
179
186
|
- generated artifact index for agent consumption
|
|
180
187
|
|
|
181
|
-
This
|
|
188
|
+
This is the seeded-problem path.
|
|
189
|
+
|
|
190
|
+
## Pull Contract
|
|
191
|
+
|
|
192
|
+
`erdos pull problem <id>` should create a broader workspace-ready bundle containing:
|
|
193
|
+
|
|
194
|
+
- upstream record snapshot for that problem when available
|
|
195
|
+
- generated artifact index for agent consumption
|
|
196
|
+
- seeded local dossier files too when the problem already exists in `problems/<id>/`
|
|
197
|
+
- optional live site snapshot and extracted text when `--include-site` is used
|
|
198
|
+
|
|
199
|
+
This makes a fresh npm-installed CLI immediately useful to an agentic workflow even for problems that are not yet fully seeded as local dossiers.
|
package/package.json
CHANGED
package/src/cli/index.js
CHANGED
|
@@ -2,6 +2,7 @@ import { runBootstrapCommand } from '../commands/bootstrap.js';
|
|
|
2
2
|
import { runClusterCommand } from '../commands/cluster.js';
|
|
3
3
|
import { runDossierCommand } from '../commands/dossier.js';
|
|
4
4
|
import { runProblemCommand } from '../commands/problem.js';
|
|
5
|
+
import { runPullCommand } from '../commands/pull.js';
|
|
5
6
|
import { runScaffoldCommand } from '../commands/scaffold.js';
|
|
6
7
|
import { runUpstreamCommand } from '../commands/upstream.js';
|
|
7
8
|
import { runWorkspaceCommand } from '../commands/workspace.js';
|
|
@@ -24,6 +25,7 @@ function printUsage() {
|
|
|
24
25
|
console.log(' erdos upstream diff [--write-package-report]');
|
|
25
26
|
console.log(' erdos scaffold problem <id> [--dest <path>]');
|
|
26
27
|
console.log(' erdos bootstrap problem <id> [--dest <path>] [--sync-upstream]');
|
|
28
|
+
console.log(' erdos pull problem <id> [--dest <path>] [--include-site] [--refresh-upstream]');
|
|
27
29
|
}
|
|
28
30
|
|
|
29
31
|
const args = process.argv.slice(2);
|
|
@@ -47,6 +49,8 @@ if (!command || command === 'help' || command === '--help') {
|
|
|
47
49
|
exitCode = runScaffoldCommand(rest);
|
|
48
50
|
} else if (command === 'bootstrap') {
|
|
49
51
|
exitCode = await runBootstrapCommand(rest);
|
|
52
|
+
} else if (command === 'pull') {
|
|
53
|
+
exitCode = await runPullCommand(rest);
|
|
50
54
|
} else {
|
|
51
55
|
console.error(`Unknown command: ${command}`);
|
|
52
56
|
printUsage();
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { getProblem } from '../atlas/catalog.js';
|
|
3
|
+
import { ensureDir, writeJson, writeText } from '../runtime/files.js';
|
|
4
|
+
import { getWorkspaceProblemPullDir } from '../runtime/paths.js';
|
|
5
|
+
import { scaffoldProblem } from '../runtime/problem-artifacts.js';
|
|
6
|
+
import { loadActiveUpstreamSnapshot, syncUpstream } from '../upstream/sync.js';
|
|
7
|
+
import { fetchProblemSiteSnapshot } from '../upstream/site.js';
|
|
8
|
+
|
|
9
|
+
function parsePullArgs(args) {
|
|
10
|
+
const [kind, value, ...rest] = args;
|
|
11
|
+
if (kind !== 'problem') {
|
|
12
|
+
return { error: 'Only `erdos pull problem <id>` is supported right now.' };
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
let destination = null;
|
|
16
|
+
let includeSite = false;
|
|
17
|
+
let refreshUpstream = false;
|
|
18
|
+
|
|
19
|
+
for (let index = 0; index < rest.length; index += 1) {
|
|
20
|
+
const token = rest[index];
|
|
21
|
+
if (token === '--dest') {
|
|
22
|
+
destination = rest[index + 1];
|
|
23
|
+
if (!destination) {
|
|
24
|
+
return { error: 'Missing destination path after --dest.' };
|
|
25
|
+
}
|
|
26
|
+
index += 1;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
if (token === '--include-site') {
|
|
30
|
+
includeSite = true;
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
if (token === '--refresh-upstream') {
|
|
34
|
+
refreshUpstream = true;
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
return { error: `Unknown pull option: ${token}` };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
problemId: value,
|
|
42
|
+
destination,
|
|
43
|
+
includeSite,
|
|
44
|
+
refreshUpstream,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function writeUpstreamOnlyBundle(problemId, destination, upstreamRecord, snapshot) {
|
|
49
|
+
ensureDir(destination);
|
|
50
|
+
|
|
51
|
+
if (upstreamRecord) {
|
|
52
|
+
writeJson(path.join(destination, 'UPSTREAM_RECORD.json'), upstreamRecord);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const generatedAt = new Date().toISOString();
|
|
56
|
+
writeJson(path.join(destination, 'PROBLEM.json'), {
|
|
57
|
+
generatedAt,
|
|
58
|
+
problemId,
|
|
59
|
+
title: `Erdos Problem #${problemId}`,
|
|
60
|
+
cluster: null,
|
|
61
|
+
siteStatus: upstreamRecord?.status?.state ?? 'unknown',
|
|
62
|
+
repoStatus: 'upstream-only',
|
|
63
|
+
harnessDepth: 'unseeded',
|
|
64
|
+
sourceUrl: `https://www.erdosproblems.com/${problemId}`,
|
|
65
|
+
activeRoute: null,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
writeJson(path.join(destination, 'ARTIFACT_INDEX.json'), {
|
|
69
|
+
generatedAt,
|
|
70
|
+
problemId,
|
|
71
|
+
copiedArtifacts: [],
|
|
72
|
+
canonicalArtifacts: [],
|
|
73
|
+
upstreamSnapshot: snapshot
|
|
74
|
+
? {
|
|
75
|
+
kind: snapshot.kind,
|
|
76
|
+
manifestPath: snapshot.manifestPath,
|
|
77
|
+
indexPath: snapshot.indexPath,
|
|
78
|
+
yamlPath: snapshot.yamlPath,
|
|
79
|
+
upstreamCommit: snapshot.manifest.upstream_commit ?? null,
|
|
80
|
+
fetchedAt: snapshot.manifest.fetched_at,
|
|
81
|
+
}
|
|
82
|
+
: null,
|
|
83
|
+
includedUpstreamRecord: Boolean(upstreamRecord),
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
writeText(
|
|
87
|
+
path.join(destination, 'README.md'),
|
|
88
|
+
[
|
|
89
|
+
`# Erdos Problem ${problemId} Pull Bundle`,
|
|
90
|
+
'',
|
|
91
|
+
'This bundle was generated from upstream public metadata.',
|
|
92
|
+
'',
|
|
93
|
+
`- Source: https://www.erdosproblems.com/${problemId}`,
|
|
94
|
+
`- Upstream record included: ${upstreamRecord ? 'yes' : 'no'}`,
|
|
95
|
+
'',
|
|
96
|
+
'This problem is not yet seeded locally as a canonical dossier in this package.',
|
|
97
|
+
'',
|
|
98
|
+
].join('\n'),
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function maybeWriteSiteBundle(problemId, destination, includeSite) {
|
|
103
|
+
if (!includeSite) {
|
|
104
|
+
return { attempted: false, included: false, error: null };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
try {
|
|
108
|
+
const siteSnapshot = await fetchProblemSiteSnapshot(problemId);
|
|
109
|
+
writeText(path.join(destination, 'SITE_SNAPSHOT.html'), siteSnapshot.html);
|
|
110
|
+
writeText(path.join(destination, 'SITE_EXTRACT.txt'), siteSnapshot.text);
|
|
111
|
+
writeJson(path.join(destination, 'SITE_EXTRACT.json'), {
|
|
112
|
+
url: siteSnapshot.url,
|
|
113
|
+
fetchedAt: siteSnapshot.fetchedAt,
|
|
114
|
+
title: siteSnapshot.title,
|
|
115
|
+
previewLines: siteSnapshot.previewLines,
|
|
116
|
+
});
|
|
117
|
+
writeText(
|
|
118
|
+
path.join(destination, 'SITE_SUMMARY.md'),
|
|
119
|
+
[
|
|
120
|
+
`# Erdős Problem #${problemId} Site Summary`,
|
|
121
|
+
'',
|
|
122
|
+
`Source: ${siteSnapshot.url}`,
|
|
123
|
+
`Fetched at: ${siteSnapshot.fetchedAt}`,
|
|
124
|
+
`Title: ${siteSnapshot.title}`,
|
|
125
|
+
'',
|
|
126
|
+
'## Preview',
|
|
127
|
+
'',
|
|
128
|
+
...siteSnapshot.previewLines.map((line) => `- ${line}`),
|
|
129
|
+
'',
|
|
130
|
+
].join('\n'),
|
|
131
|
+
);
|
|
132
|
+
return { attempted: true, included: true, error: null };
|
|
133
|
+
} catch (error) {
|
|
134
|
+
writeText(path.join(destination, 'SITE_FETCH_ERROR.txt'), String(error.message ?? error));
|
|
135
|
+
return { attempted: true, included: false, error: String(error.message ?? error) };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export async function runPullCommand(args) {
|
|
140
|
+
if (args.length === 0 || args[0] === 'help' || args[0] === '--help') {
|
|
141
|
+
console.log('Usage:');
|
|
142
|
+
console.log(' erdos pull problem <id> [--dest <path>] [--include-site] [--refresh-upstream]');
|
|
143
|
+
return 0;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const parsed = parsePullArgs(args);
|
|
147
|
+
if (parsed.error) {
|
|
148
|
+
console.error(parsed.error);
|
|
149
|
+
return 1;
|
|
150
|
+
}
|
|
151
|
+
if (!parsed.problemId) {
|
|
152
|
+
console.error('Missing problem id.');
|
|
153
|
+
return 1;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (parsed.refreshUpstream) {
|
|
157
|
+
await syncUpstream();
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const localProblem = getProblem(parsed.problemId);
|
|
161
|
+
const snapshot = loadActiveUpstreamSnapshot();
|
|
162
|
+
const upstreamRecord = snapshot?.index?.by_number?.[String(parsed.problemId)] ?? null;
|
|
163
|
+
|
|
164
|
+
if (!localProblem && !upstreamRecord) {
|
|
165
|
+
console.error(`Problem ${parsed.problemId} is not present in the local dossier set or upstream snapshot.`);
|
|
166
|
+
return 1;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const destination = parsed.destination
|
|
170
|
+
? path.resolve(parsed.destination)
|
|
171
|
+
: getWorkspaceProblemPullDir(parsed.problemId);
|
|
172
|
+
|
|
173
|
+
let scaffoldResult = null;
|
|
174
|
+
if (localProblem) {
|
|
175
|
+
scaffoldResult = scaffoldProblem(localProblem, destination);
|
|
176
|
+
} else {
|
|
177
|
+
writeUpstreamOnlyBundle(String(parsed.problemId), destination, upstreamRecord, snapshot);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
const siteStatus = await maybeWriteSiteBundle(String(parsed.problemId), destination, parsed.includeSite);
|
|
182
|
+
|
|
183
|
+
writeJson(path.join(destination, 'PULL_STATUS.json'), {
|
|
184
|
+
generatedAt: new Date().toISOString(),
|
|
185
|
+
problemId: String(parsed.problemId),
|
|
186
|
+
usedLocalDossier: Boolean(localProblem),
|
|
187
|
+
includedUpstreamRecord: Boolean(upstreamRecord),
|
|
188
|
+
upstreamSnapshotKind: snapshot?.kind ?? null,
|
|
189
|
+
siteSnapshotAttempted: siteStatus.attempted,
|
|
190
|
+
siteSnapshotIncluded: siteStatus.included,
|
|
191
|
+
siteSnapshotError: siteStatus.error,
|
|
192
|
+
scaffoldArtifactsCopied: scaffoldResult?.copiedArtifacts.length ?? 0,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
console.log(`Pull bundle created: ${destination}`);
|
|
196
|
+
console.log(`Local canonical dossier included: ${localProblem ? 'yes' : 'no'}`);
|
|
197
|
+
console.log(`Upstream record included: ${upstreamRecord ? 'yes' : 'no'}`);
|
|
198
|
+
console.log(`Live site snapshot included: ${siteStatus.included ? 'yes' : 'no'}`);
|
|
199
|
+
if (siteStatus.error) {
|
|
200
|
+
console.log(`Live site snapshot note: ${siteStatus.error}`);
|
|
201
|
+
}
|
|
202
|
+
return 0;
|
|
203
|
+
}
|
|
@@ -21,6 +21,7 @@ export function runWorkspaceCommand(args) {
|
|
|
21
21
|
console.log(`Active problem: ${summary.activeProblem ?? '(none)'}`);
|
|
22
22
|
console.log(`Workspace upstream dir: ${summary.upstreamDir}`);
|
|
23
23
|
console.log(`Workspace scaffold dir: ${summary.scaffoldDir}`);
|
|
24
|
+
console.log(`Workspace pull dir: ${summary.pullDir}`);
|
|
24
25
|
console.log(`Updated at: ${summary.updatedAt ?? '(none)'}`);
|
|
25
26
|
return 0;
|
|
26
27
|
}
|
package/src/runtime/paths.js
CHANGED
|
@@ -52,6 +52,14 @@ export function getWorkspaceProblemScaffoldDir(problemId) {
|
|
|
52
52
|
return path.join(getWorkspaceScaffoldsDir(), String(problemId));
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
export function getWorkspacePullsDir() {
|
|
56
|
+
return path.join(getWorkspaceDir(), 'pulls');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export function getWorkspaceProblemPullDir(problemId) {
|
|
60
|
+
return path.join(getWorkspacePullsDir(), String(problemId));
|
|
61
|
+
}
|
|
62
|
+
|
|
55
63
|
export function getProblemDir(problemId) {
|
|
56
64
|
return path.join(repoRoot, 'problems', String(problemId));
|
|
57
65
|
}
|
package/src/runtime/workspace.js
CHANGED
|
@@ -2,6 +2,7 @@ import fs from 'node:fs';
|
|
|
2
2
|
import {
|
|
3
3
|
getCurrentProblemPath,
|
|
4
4
|
getWorkspaceDir,
|
|
5
|
+
getWorkspaceProblemPullDir,
|
|
5
6
|
getWorkspaceProblemScaffoldDir,
|
|
6
7
|
getWorkspaceRoot,
|
|
7
8
|
getWorkspaceStatePath,
|
|
@@ -66,6 +67,7 @@ export function getWorkspaceSummary() {
|
|
|
66
67
|
activeProblem,
|
|
67
68
|
upstreamDir: getWorkspaceUpstreamDir(),
|
|
68
69
|
scaffoldDir: activeProblem ? getWorkspaceProblemScaffoldDir(activeProblem) : getWorkspaceProblemScaffoldDir('<problem-id>'),
|
|
70
|
+
pullDir: activeProblem ? getWorkspaceProblemPullDir(activeProblem) : getWorkspaceProblemPullDir('<problem-id>'),
|
|
69
71
|
updatedAt: state?.updatedAt ?? null,
|
|
70
72
|
};
|
|
71
73
|
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
const SITE_BASE_URL = 'https://www.erdosproblems.com';
|
|
2
|
+
|
|
3
|
+
function decodeEntities(text) {
|
|
4
|
+
return text
|
|
5
|
+
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
|
|
6
|
+
.replace(/&#(\d+);/g, (_, decimal) => String.fromCodePoint(Number.parseInt(decimal, 10)))
|
|
7
|
+
.replace(/ /g, ' ')
|
|
8
|
+
.replace(/&/g, '&')
|
|
9
|
+
.replace(/"/g, '"')
|
|
10
|
+
.replace(/'/g, "'")
|
|
11
|
+
.replace(/</g, '<')
|
|
12
|
+
.replace(/>/g, '>');
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function collapseWhitespace(text) {
|
|
16
|
+
return text.replace(/[ \t]+/g, ' ').replace(/\s*\n\s*/g, '\n').trim();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function htmlToReadableText(html) {
|
|
20
|
+
const withoutScripts = html
|
|
21
|
+
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|
22
|
+
.replace(/<style[\s\S]*?<\/style>/gi, ' ');
|
|
23
|
+
const blockSeparated = withoutScripts
|
|
24
|
+
.replace(/<(br|\/p|\/div|\/li|\/h1|\/h2|\/h3|\/section|\/article|\/tr)>/gi, '\n')
|
|
25
|
+
.replace(/<li[^>]*>/gi, '- ')
|
|
26
|
+
.replace(/<p[^>]*>/gi, '\n')
|
|
27
|
+
.replace(/<div[^>]*>/gi, '\n')
|
|
28
|
+
.replace(/<h[1-6][^>]*>/gi, '\n');
|
|
29
|
+
const stripped = blockSeparated.replace(/<[^>]+>/g, ' ');
|
|
30
|
+
const decoded = decodeEntities(stripped);
|
|
31
|
+
const normalizedLines = decoded
|
|
32
|
+
.split('\n')
|
|
33
|
+
.map((line) => collapseWhitespace(line))
|
|
34
|
+
.filter(Boolean);
|
|
35
|
+
return normalizedLines.join('\n');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function extractTitle(html, problemId) {
|
|
39
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
40
|
+
if (!match) {
|
|
41
|
+
return `Erdos Problem #${problemId}`;
|
|
42
|
+
}
|
|
43
|
+
return collapseWhitespace(decodeEntities(match[1]));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function selectPreviewLines(lines) {
|
|
47
|
+
const anchorIndex = lines.findIndex((line) => /^(OPEN|SOLVED|PROVED|PARTIAL)\b/i.test(line));
|
|
48
|
+
if (anchorIndex >= 0) {
|
|
49
|
+
return lines.slice(anchorIndex, anchorIndex + 24);
|
|
50
|
+
}
|
|
51
|
+
return lines.slice(0, 24);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export async function fetchProblemSiteSnapshot(problemId) {
|
|
55
|
+
const url = `${SITE_BASE_URL}/${problemId}`;
|
|
56
|
+
const response = await fetch(url, {
|
|
57
|
+
headers: {
|
|
58
|
+
'User-Agent': 'erdos-problems-cli',
|
|
59
|
+
Accept: 'text/html',
|
|
60
|
+
},
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
if (!response.ok) {
|
|
64
|
+
throw new Error(`Unable to fetch problem page ${problemId}: ${response.status}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const html = await response.text();
|
|
68
|
+
const text = htmlToReadableText(html);
|
|
69
|
+
const title = extractTitle(html, problemId);
|
|
70
|
+
const lines = text.split('\n').filter(Boolean);
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
url,
|
|
74
|
+
fetchedAt: new Date().toISOString(),
|
|
75
|
+
html,
|
|
76
|
+
title,
|
|
77
|
+
text,
|
|
78
|
+
previewLines: selectPreviewLines(lines),
|
|
79
|
+
};
|
|
80
|
+
}
|