erdos-problems 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,9 +18,10 @@ Official binary:
18
18
 
19
19
  - atlas layer with canonical local `problems/<id>/problem.yaml` records
20
20
  - bundled upstream snapshot from `teorth/erdosproblems`
21
- - workspace `.erdos/` state for active-problem selection, upstream refreshes, reports, and scaffolds
21
+ - workspace `.erdos/` state for active-problem selection, upstream refreshes, reports, scaffolds, and pull bundles
22
22
  - sunflower cluster as the first deep harness pack
23
23
  - seeded atlas now includes open and solved problems beyond sunflower
24
+ - unseeded problems can still be pulled into a workspace from the bundled upstream snapshot
24
25
 
25
26
  Seeded problems:
26
27
  - `18`, `20`, `89`, `536`, `542`, `856`, `857`, `1008`
@@ -30,7 +31,7 @@ Seeded problems:
30
31
  ```bash
31
32
  erdos problem list --cluster sunflower
32
33
  erdos bootstrap problem 857
33
- erdos problem artifacts 857
34
+ erdos problem artifacts 857 --json
34
35
  erdos dossier show 857
35
36
  ```
36
37
 
@@ -40,6 +41,22 @@ What `bootstrap` does:
40
41
  - includes the upstream record when a bundled or workspace snapshot is available
41
42
  - gives an agent a ready-to-read local artifact bundle immediately after install
42
43
 
44
+ ## Pull bundles
45
+
46
+ For any problem number in the upstream snapshot, you can create a workspace bundle even if the problem is not yet seeded locally:
47
+
48
+ ```bash
49
+ erdos pull problem 857
50
+ erdos pull problem 999 --include-site
51
+ erdos pull problem 999 --refresh-upstream
52
+ ```
53
+
54
+ What `pull` does:
55
+ - creates `.erdos/pulls/<id>/`
56
+ - includes the upstream record when available
57
+ - includes the local canonical dossier too when the problem is seeded locally
58
+ - can optionally add a live site snapshot and plain-text extract
59
+
43
60
  ## CLI
44
61
 
45
62
  ```bash
@@ -62,6 +79,8 @@ erdos upstream diff
62
79
  erdos scaffold problem 857
63
80
  erdos bootstrap problem 857
64
81
  erdos bootstrap problem 857 --sync-upstream
82
+ erdos pull problem 857
83
+ erdos pull problem 857 --include-site
65
84
  ```
66
85
 
67
86
  ## Canonical Sources
@@ -84,8 +103,9 @@ For each seeded problem, the canonical local dossier lives in `problems/<id>/`:
84
103
  The CLI can surface these directly:
85
104
  - `erdos problem artifacts <id>` shows the canonical inventory
86
105
  - `erdos problem artifacts <id> --json` emits machine-readable inventory
87
- - `erdos scaffold problem <id>` copies the bundle into the active workspace
106
+ - `erdos scaffold problem <id>` copies the seeded dossier into the active workspace
88
107
  - `erdos bootstrap problem <id>` selects the problem and creates the scaffold in one step
108
+ - `erdos pull problem <id>` creates a workspace bundle for any problem in the upstream snapshot
89
109
 
90
110
  ## Notes
91
111
 
@@ -12,10 +12,11 @@ The goal is:
12
12
  - open and solved problems use the same shape
13
13
  - local dossier truth and upstream public truth stay explicitly separated
14
14
  - packaged CLI installs can scaffold problem workspaces from canonical artifacts immediately
15
+ - unseeded problems can still be pulled into a workspace bundle from upstream truth
15
16
 
16
17
  ## Canonical Files
17
18
 
18
- Each problem should have:
19
+ Each seeded problem should have:
19
20
 
20
21
  - `problems/<id>/problem.yaml`
21
22
  - `problems/<id>/STATEMENT.md`
@@ -29,6 +30,12 @@ Bundled upstream snapshot artifacts live in:
29
30
  - `data/upstream/erdosproblems/PROBLEMS_INDEX.json`
30
31
  - `data/upstream/erdosproblems/SYNC_MANIFEST.json`
31
32
 
33
+ Workspace-generated artifacts may live in:
34
+
35
+ - `.erdos/scaffolds/<id>/`
36
+ - `.erdos/pulls/<id>/`
37
+ - `.erdos/upstream/erdosproblems/`
38
+
32
39
  ## Canonical Truth Split
33
40
 
34
41
  ### External public truth
@@ -178,4 +185,15 @@ The sync commands should produce:
178
185
  - upstream record snapshot for that problem when available
179
186
  - generated artifact index for agent consumption
180
187
 
181
- This makes a fresh npm-installed CLI immediately useful to an agentic workflow.
188
+ This is the seeded-problem path.
189
+
190
+ ## Pull Contract
191
+
192
+ `erdos pull problem <id>` should create a broader workspace-ready bundle containing:
193
+
194
+ - upstream record snapshot for that problem when available
195
+ - generated artifact index for agent consumption
196
+ - seeded local dossier files too when the problem already exists in `problems/<id>/`
197
+ - optional live site snapshot and extracted text when `--include-site` is used
198
+
199
+ This makes a fresh npm-installed CLI immediately useful to an agentic workflow even for problems that are not yet fully seeded as local dossiers.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "erdos-problems",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "CLI atlas and staged research harness for Paul Erdos problems.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/index.js CHANGED
@@ -2,6 +2,7 @@ import { runBootstrapCommand } from '../commands/bootstrap.js';
2
2
  import { runClusterCommand } from '../commands/cluster.js';
3
3
  import { runDossierCommand } from '../commands/dossier.js';
4
4
  import { runProblemCommand } from '../commands/problem.js';
5
+ import { runPullCommand } from '../commands/pull.js';
5
6
  import { runScaffoldCommand } from '../commands/scaffold.js';
6
7
  import { runUpstreamCommand } from '../commands/upstream.js';
7
8
  import { runWorkspaceCommand } from '../commands/workspace.js';
@@ -24,6 +25,7 @@ function printUsage() {
24
25
  console.log(' erdos upstream diff [--write-package-report]');
25
26
  console.log(' erdos scaffold problem <id> [--dest <path>]');
26
27
  console.log(' erdos bootstrap problem <id> [--dest <path>] [--sync-upstream]');
28
+ console.log(' erdos pull problem <id> [--dest <path>] [--include-site] [--refresh-upstream]');
27
29
  }
28
30
 
29
31
  const args = process.argv.slice(2);
@@ -47,6 +49,8 @@ if (!command || command === 'help' || command === '--help') {
47
49
  exitCode = runScaffoldCommand(rest);
48
50
  } else if (command === 'bootstrap') {
49
51
  exitCode = await runBootstrapCommand(rest);
52
+ } else if (command === 'pull') {
53
+ exitCode = await runPullCommand(rest);
50
54
  } else {
51
55
  console.error(`Unknown command: ${command}`);
52
56
  printUsage();
@@ -0,0 +1,203 @@
1
+ import path from 'node:path';
2
+ import { getProblem } from '../atlas/catalog.js';
3
+ import { ensureDir, writeJson, writeText } from '../runtime/files.js';
4
+ import { getWorkspaceProblemPullDir } from '../runtime/paths.js';
5
+ import { scaffoldProblem } from '../runtime/problem-artifacts.js';
6
+ import { loadActiveUpstreamSnapshot, syncUpstream } from '../upstream/sync.js';
7
+ import { fetchProblemSiteSnapshot } from '../upstream/site.js';
8
+
9
+ function parsePullArgs(args) {
10
+ const [kind, value, ...rest] = args;
11
+ if (kind !== 'problem') {
12
+ return { error: 'Only `erdos pull problem <id>` is supported right now.' };
13
+ }
14
+
15
+ let destination = null;
16
+ let includeSite = false;
17
+ let refreshUpstream = false;
18
+
19
+ for (let index = 0; index < rest.length; index += 1) {
20
+ const token = rest[index];
21
+ if (token === '--dest') {
22
+ destination = rest[index + 1];
23
+ if (!destination) {
24
+ return { error: 'Missing destination path after --dest.' };
25
+ }
26
+ index += 1;
27
+ continue;
28
+ }
29
+ if (token === '--include-site') {
30
+ includeSite = true;
31
+ continue;
32
+ }
33
+ if (token === '--refresh-upstream') {
34
+ refreshUpstream = true;
35
+ continue;
36
+ }
37
+ return { error: `Unknown pull option: ${token}` };
38
+ }
39
+
40
+ return {
41
+ problemId: value,
42
+ destination,
43
+ includeSite,
44
+ refreshUpstream,
45
+ };
46
+ }
47
+
48
+ function writeUpstreamOnlyBundle(problemId, destination, upstreamRecord, snapshot) {
49
+ ensureDir(destination);
50
+
51
+ if (upstreamRecord) {
52
+ writeJson(path.join(destination, 'UPSTREAM_RECORD.json'), upstreamRecord);
53
+ }
54
+
55
+ const generatedAt = new Date().toISOString();
56
+ writeJson(path.join(destination, 'PROBLEM.json'), {
57
+ generatedAt,
58
+ problemId,
59
+ title: `Erdos Problem #${problemId}`,
60
+ cluster: null,
61
+ siteStatus: upstreamRecord?.status?.state ?? 'unknown',
62
+ repoStatus: 'upstream-only',
63
+ harnessDepth: 'unseeded',
64
+ sourceUrl: `https://www.erdosproblems.com/${problemId}`,
65
+ activeRoute: null,
66
+ });
67
+
68
+ writeJson(path.join(destination, 'ARTIFACT_INDEX.json'), {
69
+ generatedAt,
70
+ problemId,
71
+ copiedArtifacts: [],
72
+ canonicalArtifacts: [],
73
+ upstreamSnapshot: snapshot
74
+ ? {
75
+ kind: snapshot.kind,
76
+ manifestPath: snapshot.manifestPath,
77
+ indexPath: snapshot.indexPath,
78
+ yamlPath: snapshot.yamlPath,
79
+ upstreamCommit: snapshot.manifest.upstream_commit ?? null,
80
+ fetchedAt: snapshot.manifest.fetched_at,
81
+ }
82
+ : null,
83
+ includedUpstreamRecord: Boolean(upstreamRecord),
84
+ });
85
+
86
+ writeText(
87
+ path.join(destination, 'README.md'),
88
+ [
89
+ `# Erdos Problem ${problemId} Pull Bundle`,
90
+ '',
91
+ 'This bundle was generated from upstream public metadata.',
92
+ '',
93
+ `- Source: https://www.erdosproblems.com/${problemId}`,
94
+ `- Upstream record included: ${upstreamRecord ? 'yes' : 'no'}`,
95
+ '',
96
+ 'This problem is not yet seeded locally as a canonical dossier in this package.',
97
+ '',
98
+ ].join('\n'),
99
+ );
100
+ }
101
+
102
+ async function maybeWriteSiteBundle(problemId, destination, includeSite) {
103
+ if (!includeSite) {
104
+ return { attempted: false, included: false, error: null };
105
+ }
106
+
107
+ try {
108
+ const siteSnapshot = await fetchProblemSiteSnapshot(problemId);
109
+ writeText(path.join(destination, 'SITE_SNAPSHOT.html'), siteSnapshot.html);
110
+ writeText(path.join(destination, 'SITE_EXTRACT.txt'), siteSnapshot.text);
111
+ writeJson(path.join(destination, 'SITE_EXTRACT.json'), {
112
+ url: siteSnapshot.url,
113
+ fetchedAt: siteSnapshot.fetchedAt,
114
+ title: siteSnapshot.title,
115
+ previewLines: siteSnapshot.previewLines,
116
+ });
117
+ writeText(
118
+ path.join(destination, 'SITE_SUMMARY.md'),
119
+ [
120
+ `# Erdős Problem #${problemId} Site Summary`,
121
+ '',
122
+ `Source: ${siteSnapshot.url}`,
123
+ `Fetched at: ${siteSnapshot.fetchedAt}`,
124
+ `Title: ${siteSnapshot.title}`,
125
+ '',
126
+ '## Preview',
127
+ '',
128
+ ...siteSnapshot.previewLines.map((line) => `- ${line}`),
129
+ '',
130
+ ].join('\n'),
131
+ );
132
+ return { attempted: true, included: true, error: null };
133
+ } catch (error) {
134
+ writeText(path.join(destination, 'SITE_FETCH_ERROR.txt'), String(error.message ?? error));
135
+ return { attempted: true, included: false, error: String(error.message ?? error) };
136
+ }
137
+ }
138
+
139
+ export async function runPullCommand(args) {
140
+ if (args.length === 0 || args[0] === 'help' || args[0] === '--help') {
141
+ console.log('Usage:');
142
+ console.log(' erdos pull problem <id> [--dest <path>] [--include-site] [--refresh-upstream]');
143
+ return 0;
144
+ }
145
+
146
+ const parsed = parsePullArgs(args);
147
+ if (parsed.error) {
148
+ console.error(parsed.error);
149
+ return 1;
150
+ }
151
+ if (!parsed.problemId) {
152
+ console.error('Missing problem id.');
153
+ return 1;
154
+ }
155
+
156
+ if (parsed.refreshUpstream) {
157
+ await syncUpstream();
158
+ }
159
+
160
+ const localProblem = getProblem(parsed.problemId);
161
+ const snapshot = loadActiveUpstreamSnapshot();
162
+ const upstreamRecord = snapshot?.index?.by_number?.[String(parsed.problemId)] ?? null;
163
+
164
+ if (!localProblem && !upstreamRecord) {
165
+ console.error(`Problem ${parsed.problemId} is not present in the local dossier set or upstream snapshot.`);
166
+ return 1;
167
+ }
168
+
169
+ const destination = parsed.destination
170
+ ? path.resolve(parsed.destination)
171
+ : getWorkspaceProblemPullDir(parsed.problemId);
172
+
173
+ let scaffoldResult = null;
174
+ if (localProblem) {
175
+ scaffoldResult = scaffoldProblem(localProblem, destination);
176
+ } else {
177
+ writeUpstreamOnlyBundle(String(parsed.problemId), destination, upstreamRecord, snapshot);
178
+ }
179
+
180
+
181
+ const siteStatus = await maybeWriteSiteBundle(String(parsed.problemId), destination, parsed.includeSite);
182
+
183
+ writeJson(path.join(destination, 'PULL_STATUS.json'), {
184
+ generatedAt: new Date().toISOString(),
185
+ problemId: String(parsed.problemId),
186
+ usedLocalDossier: Boolean(localProblem),
187
+ includedUpstreamRecord: Boolean(upstreamRecord),
188
+ upstreamSnapshotKind: snapshot?.kind ?? null,
189
+ siteSnapshotAttempted: siteStatus.attempted,
190
+ siteSnapshotIncluded: siteStatus.included,
191
+ siteSnapshotError: siteStatus.error,
192
+ scaffoldArtifactsCopied: scaffoldResult?.copiedArtifacts.length ?? 0,
193
+ });
194
+
195
+ console.log(`Pull bundle created: ${destination}`);
196
+ console.log(`Local canonical dossier included: ${localProblem ? 'yes' : 'no'}`);
197
+ console.log(`Upstream record included: ${upstreamRecord ? 'yes' : 'no'}`);
198
+ console.log(`Live site snapshot included: ${siteStatus.included ? 'yes' : 'no'}`);
199
+ if (siteStatus.error) {
200
+ console.log(`Live site snapshot note: ${siteStatus.error}`);
201
+ }
202
+ return 0;
203
+ }
@@ -21,6 +21,7 @@ export function runWorkspaceCommand(args) {
21
21
  console.log(`Active problem: ${summary.activeProblem ?? '(none)'}`);
22
22
  console.log(`Workspace upstream dir: ${summary.upstreamDir}`);
23
23
  console.log(`Workspace scaffold dir: ${summary.scaffoldDir}`);
24
+ console.log(`Workspace pull dir: ${summary.pullDir}`);
24
25
  console.log(`Updated at: ${summary.updatedAt ?? '(none)'}`);
25
26
  return 0;
26
27
  }
@@ -52,6 +52,14 @@ export function getWorkspaceProblemScaffoldDir(problemId) {
52
52
  return path.join(getWorkspaceScaffoldsDir(), String(problemId));
53
53
  }
54
54
 
55
+ export function getWorkspacePullsDir() {
56
+ return path.join(getWorkspaceDir(), 'pulls');
57
+ }
58
+
59
+ export function getWorkspaceProblemPullDir(problemId) {
60
+ return path.join(getWorkspacePullsDir(), String(problemId));
61
+ }
62
+
55
63
  export function getProblemDir(problemId) {
56
64
  return path.join(repoRoot, 'problems', String(problemId));
57
65
  }
@@ -2,6 +2,7 @@ import fs from 'node:fs';
2
2
  import {
3
3
  getCurrentProblemPath,
4
4
  getWorkspaceDir,
5
+ getWorkspaceProblemPullDir,
5
6
  getWorkspaceProblemScaffoldDir,
6
7
  getWorkspaceRoot,
7
8
  getWorkspaceStatePath,
@@ -66,6 +67,7 @@ export function getWorkspaceSummary() {
66
67
  activeProblem,
67
68
  upstreamDir: getWorkspaceUpstreamDir(),
68
69
  scaffoldDir: activeProblem ? getWorkspaceProblemScaffoldDir(activeProblem) : getWorkspaceProblemScaffoldDir('<problem-id>'),
70
+ pullDir: activeProblem ? getWorkspaceProblemPullDir(activeProblem) : getWorkspaceProblemPullDir('<problem-id>'),
69
71
  updatedAt: state?.updatedAt ?? null,
70
72
  };
71
73
  }
@@ -0,0 +1,80 @@
1
+ const SITE_BASE_URL = 'https://www.erdosproblems.com';
2
+
3
+ function decodeEntities(text) {
4
+ return text
5
+ .replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
6
+ .replace(/&#(\d+);/g, (_, decimal) => String.fromCodePoint(Number.parseInt(decimal, 10)))
7
+ .replace(/&nbsp;/g, ' ')
8
+ .replace(/&amp;/g, '&')
9
+ .replace(/&quot;/g, '"')
10
+ .replace(/&#39;/g, "'")
11
+ .replace(/&lt;/g, '<')
12
+ .replace(/&gt;/g, '>');
13
+ }
14
+
15
+ function collapseWhitespace(text) {
16
+ return text.replace(/[ \t]+/g, ' ').replace(/\s*\n\s*/g, '\n').trim();
17
+ }
18
+
19
+ function htmlToReadableText(html) {
20
+ const withoutScripts = html
21
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
22
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ');
23
+ const blockSeparated = withoutScripts
24
+ .replace(/<(br|\/p|\/div|\/li|\/h1|\/h2|\/h3|\/section|\/article|\/tr)>/gi, '\n')
25
+ .replace(/<li[^>]*>/gi, '- ')
26
+ .replace(/<p[^>]*>/gi, '\n')
27
+ .replace(/<div[^>]*>/gi, '\n')
28
+ .replace(/<h[1-6][^>]*>/gi, '\n');
29
+ const stripped = blockSeparated.replace(/<[^>]+>/g, ' ');
30
+ const decoded = decodeEntities(stripped);
31
+ const normalizedLines = decoded
32
+ .split('\n')
33
+ .map((line) => collapseWhitespace(line))
34
+ .filter(Boolean);
35
+ return normalizedLines.join('\n');
36
+ }
37
+
38
+ function extractTitle(html, problemId) {
39
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
40
+ if (!match) {
41
+ return `Erdos Problem #${problemId}`;
42
+ }
43
+ return collapseWhitespace(decodeEntities(match[1]));
44
+ }
45
+
46
+ function selectPreviewLines(lines) {
47
+ const anchorIndex = lines.findIndex((line) => /^(OPEN|SOLVED|PROVED|PARTIAL)\b/i.test(line));
48
+ if (anchorIndex >= 0) {
49
+ return lines.slice(anchorIndex, anchorIndex + 24);
50
+ }
51
+ return lines.slice(0, 24);
52
+ }
53
+
54
+ export async function fetchProblemSiteSnapshot(problemId) {
55
+ const url = `${SITE_BASE_URL}/${problemId}`;
56
+ const response = await fetch(url, {
57
+ headers: {
58
+ 'User-Agent': 'erdos-problems-cli',
59
+ Accept: 'text/html',
60
+ },
61
+ });
62
+
63
+ if (!response.ok) {
64
+ throw new Error(`Unable to fetch problem page ${problemId}: ${response.status}`);
65
+ }
66
+
67
+ const html = await response.text();
68
+ const text = htmlToReadableText(html);
69
+ const title = extractTitle(html, problemId);
70
+ const lines = text.split('\n').filter(Boolean);
71
+
72
+ return {
73
+ url,
74
+ fetchedAt: new Date().toISOString(),
75
+ html,
76
+ title,
77
+ text,
78
+ previewLines: selectPreviewLines(lines),
79
+ };
80
+ }