open-research-protocol 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT_INTEGRATION.md +94 -0
- package/INSTALL.md +159 -0
- package/LICENSE +22 -0
- package/PROTOCOL.md +140 -0
- package/README.md +312 -0
- package/bin/orp.js +38 -0
- package/cli/orp.py +3595 -0
- package/cone/CONTEXT_LOG.md +33 -0
- package/docs/AGENT_LOOP.md +63 -0
- package/docs/CHOOSING_OR_IGNORING_INSTRUMENTS.md +128 -0
- package/docs/CODA_ORP_CONTRACT.md +222 -0
- package/docs/CORE_ABILITY_REFOCUS_CHECKLIST.md +62 -0
- package/docs/DISCOVER.md +69 -0
- package/docs/EXTERNAL_CONTRIBUTION_GOVERNANCE.md +275 -0
- package/docs/MATHLIB_COLLABORATION_FLOW_PROMPT.md +112 -0
- package/docs/NPM_RELEASE_CHECKLIST.md +55 -0
- package/docs/ORP_V1_ATOMIC_DISCOVERY_EVOLUTION.md +186 -0
- package/docs/OSS_CONTRIBUTION_AGENT_LOOP.md +69 -0
- package/docs/PRESENTATION_BOW.md +100 -0
- package/docs/PROFILE_PACKS.md +227 -0
- package/docs/SUNFLOWER_CODA_PR_GOVERNANCE_MAPPING.md +77 -0
- package/docs/WHY_INSTRUMENTS.md +118 -0
- package/examples/README.md +21 -0
- package/examples/example_claim.md +33 -0
- package/examples/example_failed.md +24 -0
- package/examples/example_verification.md +36 -0
- package/examples/orp.erdos-problems.catalog.yml +88 -0
- package/examples/orp.external-pr-governance.yml +223 -0
- package/examples/orp.sunflower-coda.atomic.yml +144 -0
- package/examples/orp.sunflower-coda.live-compare.yml +181 -0
- package/examples/orp.sunflower-coda.pr-governance.yml +253 -0
- package/examples/packet.problem_scope.example.json +123 -0
- package/examples/reports/README.md +16 -0
- package/examples/reports/sunflower_live_compare_20.RUN_SUMMARY.md +37 -0
- package/examples/reports/sunflower_live_compare_367.RUN_SUMMARY.md +37 -0
- package/examples/reports/sunflower_live_compare_857.RUN_SUMMARY.md +37 -0
- package/llms.txt +58 -0
- package/modules/instruments/ADVERSARIAL/README.md +109 -0
- package/modules/instruments/ADVERSARIAL/TEMPLATE.md +27 -0
- package/modules/instruments/COMPRESSION/README.md +112 -0
- package/modules/instruments/COMPRESSION/TEMPLATE.md +27 -0
- package/modules/instruments/INSTRUMENT_TEMPLATE.md +30 -0
- package/modules/instruments/ORBIT/README.md +124 -0
- package/modules/instruments/ORBIT/TEMPLATE.md +28 -0
- package/modules/instruments/README.md +179 -0
- package/package.json +54 -0
- package/packs/README.md +16 -0
- package/packs/erdos-open-problems/README.md +287 -0
- package/packs/erdos-open-problems/data/README.md +43 -0
- package/packs/erdos-open-problems/data/erdos_open_problems.md +697 -0
- package/packs/erdos-open-problems/data/erdos_problems.active.json +15561 -0
- package/packs/erdos-open-problems/data/erdos_problems.all.json +26289 -0
- package/packs/erdos-open-problems/data/erdos_problems.closed.json +10760 -0
- package/packs/erdos-open-problems/data/erdos_problems.open.json +15561 -0
- package/packs/erdos-open-problems/docs/SUNFLOWER_ADAPTER_DEPENDENCIES.md +63 -0
- package/packs/erdos-open-problems/pack.yml +131 -0
- package/packs/erdos-open-problems/profiles/erdos-problems-catalog-sync.yml.tmpl +99 -0
- package/packs/erdos-open-problems/profiles/sunflower-live-compare.yml.tmpl +188 -0
- package/packs/erdos-open-problems/profiles/sunflower-mathlib-pr-governance.yml.tmpl +253 -0
- package/packs/erdos-open-problems/profiles/sunflower-problem857-discovery-public-repo.yml.tmpl +152 -0
- package/packs/erdos-open-problems/profiles/sunflower-problem857-discovery.yml.tmpl +154 -0
- package/packs/external-pr-governance/README.md +116 -0
- package/packs/external-pr-governance/adapters/formal-conjectures/README.md +35 -0
- package/packs/external-pr-governance/adapters/mathlib/README.md +37 -0
- package/packs/external-pr-governance/pack.yml +146 -0
- package/packs/external-pr-governance/profiles/oss-feedback-hardening.yml.tmpl +92 -0
- package/packs/external-pr-governance/profiles/oss-pr-governance.yml.tmpl +233 -0
- package/packs/issue-smashers/README.md +92 -0
- package/packs/issue-smashers/adapters/formal-conjectures/README.md +17 -0
- package/packs/issue-smashers/adapters/generic-github/README.md +16 -0
- package/packs/issue-smashers/adapters/mathlib/README.md +32 -0
- package/packs/issue-smashers/bootstrap/README.md +19 -0
- package/packs/issue-smashers/bootstrap/setup-issue-smashers.sh +18 -0
- package/packs/issue-smashers/examples/issue-smashers.workspace.yml +24 -0
- package/packs/issue-smashers/pack.yml +178 -0
- package/packs/issue-smashers/profiles/issue-smashers-feedback-hardening.yml.tmpl +102 -0
- package/packs/issue-smashers/profiles/issue-smashers.yml.tmpl +258 -0
- package/scripts/npm-postinstall-check.js +31 -0
- package/scripts/orp +11 -0
- package/scripts/orp-agent-integrate.sh +197 -0
- package/scripts/orp-checkpoint.sh +184 -0
- package/scripts/orp-erdos-problems-sync.py +580 -0
- package/scripts/orp-init.sh +50 -0
- package/scripts/orp-pack-fetch.py +155 -0
- package/scripts/orp-pack-install.py +2273 -0
- package/scripts/orp-pack-render.py +188 -0
- package/spec/v1/LIFECYCLE_MAPPING.md +40 -0
- package/spec/v1/orp.config.schema.json +385 -0
- package/spec/v1/packet.schema.json +552 -0
- package/spec/v1/profile-pack.schema.json +95 -0
- package/templates/CLAIM.md +33 -0
- package/templates/FAILED_TOPIC.md +19 -0
- package/templates/ISSUE_TEMPLATE.md +22 -0
- package/templates/VERIFICATION_RECORD.md +34 -0
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Sync Erdos problems catalog data from erdosproblems.com.
|
|
3
|
+
|
|
4
|
+
Outputs four JSON files:
|
|
5
|
+
- all problems
|
|
6
|
+
- open problems
|
|
7
|
+
- closed problems
|
|
8
|
+
- active problems (default active set is open)
|
|
9
|
+
|
|
10
|
+
This keeps ORP core general while letting pack profiles use a canonical
|
|
11
|
+
problem catalog snapshot for gating and planning.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import datetime as dt
|
|
18
|
+
import hashlib
|
|
19
|
+
import html
|
|
20
|
+
import json
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
import re
|
|
23
|
+
import sys
|
|
24
|
+
from typing import Any
|
|
25
|
+
from urllib import request
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
DEFAULT_SOURCE_URL = "https://erdosproblems.com/range/1-end"
|
|
29
|
+
DEFAULT_USER_AGENT = "ORP-ErdosSync/1.0 (+https://github.com/teorth/erdosproblems)"
|
|
30
|
+
|
|
31
|
+
SOLVE_COUNT_RE = re.compile(
|
|
32
|
+
r"([0-9]+)\s+solved\s+out\s+of\s+([0-9]+)\s+shown", re.IGNORECASE
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _now_utc() -> str:
|
|
37
|
+
return (
|
|
38
|
+
dt.datetime.now(dt.timezone.utc)
|
|
39
|
+
.replace(microsecond=0)
|
|
40
|
+
.isoformat()
|
|
41
|
+
.replace("+00:00", "Z")
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _sha256_text(text: str) -> str:
|
|
46
|
+
h = hashlib.sha256()
|
|
47
|
+
h.update(text.encode("utf-8"))
|
|
48
|
+
return h.hexdigest()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _collapse_ws(text: str) -> str:
|
|
52
|
+
return re.sub(r"\s+", " ", text).strip()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _strip_tags(text: str) -> str:
|
|
56
|
+
without_tags = re.sub(r"<[^>]+>", " ", text)
|
|
57
|
+
return _collapse_ws(html.unescape(without_tags))
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _fetch_html(url: str, timeout_sec: int, user_agent: str) -> str:
|
|
61
|
+
req = request.Request(
|
|
62
|
+
url,
|
|
63
|
+
headers={
|
|
64
|
+
"User-Agent": user_agent,
|
|
65
|
+
"Accept": "text/html,application/xhtml+xml",
|
|
66
|
+
},
|
|
67
|
+
)
|
|
68
|
+
with request.urlopen(req, timeout=timeout_sec) as resp: # nosec B310
|
|
69
|
+
raw = resp.read()
|
|
70
|
+
charset = resp.headers.get_content_charset() or "utf-8"
|
|
71
|
+
return raw.decode(charset, errors="replace")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _extract_solve_count(page_html: str) -> dict[str, Any]:
|
|
75
|
+
m = SOLVE_COUNT_RE.search(page_html)
|
|
76
|
+
if not m:
|
|
77
|
+
return {
|
|
78
|
+
"raw": "",
|
|
79
|
+
"solved": None,
|
|
80
|
+
"shown": None,
|
|
81
|
+
}
|
|
82
|
+
return {
|
|
83
|
+
"raw": m.group(0),
|
|
84
|
+
"solved": int(m.group(1)),
|
|
85
|
+
"shown": int(m.group(2)),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _extract_first(pattern: str, text: str, flags: int = 0) -> str:
|
|
90
|
+
m = re.search(pattern, text, flags)
|
|
91
|
+
if not m:
|
|
92
|
+
return ""
|
|
93
|
+
return m.group(1)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _parse_problem_block(block_html: str) -> dict[str, Any] | None:
|
|
97
|
+
status_dom = _extract_first(
|
|
98
|
+
r'<div\s+class="problem-text"\s+id="(open|solved)"', block_html
|
|
99
|
+
)
|
|
100
|
+
if not status_dom:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
problem_id_text = _extract_first(r'<div id="problem_id">\s*<a href="/([0-9]+)">', block_html)
|
|
104
|
+
if not problem_id_text:
|
|
105
|
+
return None
|
|
106
|
+
problem_id = int(problem_id_text)
|
|
107
|
+
|
|
108
|
+
status_label = _strip_tags(
|
|
109
|
+
_extract_first(
|
|
110
|
+
r'<span class="tooltip">\s*([^<]+?)\s*<span class="tooltiptext">',
|
|
111
|
+
block_html,
|
|
112
|
+
flags=re.DOTALL,
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
status_detail = _strip_tags(
|
|
116
|
+
_extract_first(
|
|
117
|
+
r'<span class="tooltiptext">\s*(.*?)\s*</span>',
|
|
118
|
+
block_html,
|
|
119
|
+
flags=re.DOTALL,
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
prize_block = _extract_first(r"<div id=\"prize\">(.*?)</div>", block_html, flags=re.DOTALL)
|
|
124
|
+
prize_amount = _strip_tags(_extract_first(r"-\s*([^<]+)$", prize_block, flags=re.MULTILINE))
|
|
125
|
+
if not prize_amount:
|
|
126
|
+
prize_amount = _strip_tags(_extract_first(r"(\$[0-9][0-9,]*)", prize_block))
|
|
127
|
+
|
|
128
|
+
statement = _strip_tags(_extract_first(r'<div id="content">(.*?)</div>', block_html, flags=re.DOTALL))
|
|
129
|
+
|
|
130
|
+
tags_block = _extract_first(r'<div id="tags">(.*?)</div>', block_html, flags=re.DOTALL)
|
|
131
|
+
tags = [_strip_tags(x) for x in re.findall(r"<a [^>]*>(.*?)</a>", tags_block, flags=re.DOTALL)]
|
|
132
|
+
tags = [t for t in tags if t]
|
|
133
|
+
|
|
134
|
+
last_edited = _strip_tags(
|
|
135
|
+
_extract_first(r"This page was last edited\s+([^<]+)\.", block_html)
|
|
136
|
+
)
|
|
137
|
+
latex_path = _extract_first(r'<a href="(/latex/[0-9]+)">View the LaTeX source</a>', block_html)
|
|
138
|
+
|
|
139
|
+
external_block = _extract_first(r'<div class="external">(.*?)</div>', block_html, flags=re.DOTALL)
|
|
140
|
+
external_text = _strip_tags(external_block)
|
|
141
|
+
|
|
142
|
+
formalized_yes_url = _extract_first(
|
|
143
|
+
r'Formalised statement\?\s*<a href="([^"]*ErdosProblems/[^"]+\.lean)"',
|
|
144
|
+
external_block,
|
|
145
|
+
flags=re.DOTALL,
|
|
146
|
+
)
|
|
147
|
+
if formalized_yes_url:
|
|
148
|
+
formalized: bool | None = True
|
|
149
|
+
formalized_url = formalized_yes_url
|
|
150
|
+
elif re.search(r"Formalised statement\?\s*No\b", external_text):
|
|
151
|
+
formalized = False
|
|
152
|
+
formalized_url = ""
|
|
153
|
+
else:
|
|
154
|
+
formalized = None
|
|
155
|
+
formalized_url = ""
|
|
156
|
+
|
|
157
|
+
oeis_urls = sorted(
|
|
158
|
+
set(re.findall(r'https?://oeis\.org/[A-Za-z0-9]+', external_block))
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
comments_match = re.search(
|
|
162
|
+
r'href="/forum/discuss/([0-9]+)">\s*([0-9]+)\s+comments?',
|
|
163
|
+
block_html,
|
|
164
|
+
flags=re.DOTALL,
|
|
165
|
+
)
|
|
166
|
+
if comments_match:
|
|
167
|
+
comments_problem_id = int(comments_match.group(1))
|
|
168
|
+
comments_count = int(comments_match.group(2))
|
|
169
|
+
else:
|
|
170
|
+
comments_problem_id = problem_id
|
|
171
|
+
comments_count = None
|
|
172
|
+
|
|
173
|
+
if status_dom == "open":
|
|
174
|
+
status_bucket = "open"
|
|
175
|
+
elif status_dom == "solved":
|
|
176
|
+
status_bucket = "closed"
|
|
177
|
+
else:
|
|
178
|
+
status_bucket = "unknown"
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
"problem_id": problem_id,
|
|
182
|
+
"problem_url": f"/{problem_id}",
|
|
183
|
+
"status_bucket": status_bucket,
|
|
184
|
+
"status_dom_id": status_dom,
|
|
185
|
+
"status_label": status_label,
|
|
186
|
+
"status_detail": status_detail,
|
|
187
|
+
"prize_amount": prize_amount,
|
|
188
|
+
"statement": statement,
|
|
189
|
+
"tags": tags,
|
|
190
|
+
"last_edited": last_edited,
|
|
191
|
+
"latex_path": latex_path,
|
|
192
|
+
"formalized": formalized,
|
|
193
|
+
"formalized_url": formalized_url,
|
|
194
|
+
"oeis_urls": oeis_urls,
|
|
195
|
+
"comments_problem_id": comments_problem_id,
|
|
196
|
+
"comments_count": comments_count,
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _parse_problems(page_html: str) -> list[dict[str, Any]]:
|
|
201
|
+
chunks = page_html.split('<div class="problem-box">')
|
|
202
|
+
if len(chunks) <= 1:
|
|
203
|
+
return []
|
|
204
|
+
|
|
205
|
+
out: dict[int, dict[str, Any]] = {}
|
|
206
|
+
for chunk in chunks[1:]:
|
|
207
|
+
record = _parse_problem_block(chunk)
|
|
208
|
+
if not record:
|
|
209
|
+
continue
|
|
210
|
+
out[int(record["problem_id"])] = record
|
|
211
|
+
return [out[k] for k in sorted(out)]
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _summary(problems: list[dict[str, Any]]) -> dict[str, Any]:
|
|
215
|
+
total = len(problems)
|
|
216
|
+
open_count = sum(1 for p in problems if p.get("status_bucket") == "open")
|
|
217
|
+
closed_count = sum(1 for p in problems if p.get("status_bucket") == "closed")
|
|
218
|
+
unknown_count = total - open_count - closed_count
|
|
219
|
+
status_label_counts: dict[str, int] = {}
|
|
220
|
+
for p in problems:
|
|
221
|
+
label = str(p.get("status_label", "")).strip()
|
|
222
|
+
if not label:
|
|
223
|
+
label = "UNKNOWN"
|
|
224
|
+
status_label_counts[label] = status_label_counts.get(label, 0) + 1
|
|
225
|
+
return {
|
|
226
|
+
"total": total,
|
|
227
|
+
"open": open_count,
|
|
228
|
+
"closed": closed_count,
|
|
229
|
+
"unknown": unknown_count,
|
|
230
|
+
"status_label_counts": dict(sorted(status_label_counts.items())),
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _build_payload(
|
|
235
|
+
*,
|
|
236
|
+
subset: str,
|
|
237
|
+
active_status: str,
|
|
238
|
+
source_url: str,
|
|
239
|
+
source_hash: str,
|
|
240
|
+
solve_count: dict[str, Any],
|
|
241
|
+
synced_at_utc: str,
|
|
242
|
+
problems: list[dict[str, Any]],
|
|
243
|
+
) -> dict[str, Any]:
|
|
244
|
+
return {
|
|
245
|
+
"schema_version": "1.0.0",
|
|
246
|
+
"subset": subset,
|
|
247
|
+
"active_status": active_status,
|
|
248
|
+
"generated_at_utc": synced_at_utc,
|
|
249
|
+
"source": {
|
|
250
|
+
"site": "erdosproblems.com",
|
|
251
|
+
"url": source_url,
|
|
252
|
+
"source_sha256": source_hash,
|
|
253
|
+
"solve_count": solve_count,
|
|
254
|
+
},
|
|
255
|
+
"summary": _summary(problems),
|
|
256
|
+
"problem_ids": [int(p["problem_id"]) for p in problems],
|
|
257
|
+
"problems": problems,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _write_json(path: Path, payload: dict[str, Any]) -> None:
|
|
262
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
263
|
+
path.write_text(json.dumps(payload, indent=2, sort_keys=False) + "\n", encoding="utf-8")
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _statement_preview(text: str, max_chars: int) -> str:
|
|
267
|
+
t = _collapse_ws(text)
|
|
268
|
+
if len(t) <= max_chars:
|
|
269
|
+
return t
|
|
270
|
+
if max_chars <= 3:
|
|
271
|
+
return t[:max_chars]
|
|
272
|
+
return t[: max_chars - 3].rstrip() + "..."
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _problem_public_url(problem_id: int) -> str:
|
|
276
|
+
return f"https://erdosproblems.com/{problem_id}"
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _write_open_list_markdown(
|
|
280
|
+
*,
|
|
281
|
+
path: Path,
|
|
282
|
+
open_problems: list[dict[str, Any]],
|
|
283
|
+
source_url: str,
|
|
284
|
+
synced_at_utc: str,
|
|
285
|
+
max_statement_chars: int,
|
|
286
|
+
) -> None:
|
|
287
|
+
lines: list[str] = []
|
|
288
|
+
lines.append("# Erdos Open Problems (Active Snapshot)")
|
|
289
|
+
lines.append("")
|
|
290
|
+
lines.append(f"- generated_at_utc: `{synced_at_utc}`")
|
|
291
|
+
lines.append(f"- source_url: `{source_url}`")
|
|
292
|
+
lines.append(f"- total_open: `{len(open_problems)}`")
|
|
293
|
+
lines.append("")
|
|
294
|
+
for p in open_problems:
|
|
295
|
+
pid = int(p["problem_id"])
|
|
296
|
+
link = _problem_public_url(pid)
|
|
297
|
+
label = str(p.get("status_label", "OPEN")).strip() or "OPEN"
|
|
298
|
+
prize = str(p.get("prize_amount", "")).strip()
|
|
299
|
+
tags = [str(t).strip() for t in p.get("tags", []) if str(t).strip()]
|
|
300
|
+
tags_text = ", ".join(tags)
|
|
301
|
+
statement = _statement_preview(str(p.get("statement", "")), max_statement_chars)
|
|
302
|
+
last_edited = str(p.get("last_edited", "")).strip()
|
|
303
|
+
suffix_parts = [label]
|
|
304
|
+
if prize:
|
|
305
|
+
suffix_parts.append(prize)
|
|
306
|
+
if tags_text:
|
|
307
|
+
suffix_parts.append(tags_text)
|
|
308
|
+
suffix = " | ".join(suffix_parts)
|
|
309
|
+
line = f"- [#{pid}]({link})"
|
|
310
|
+
if suffix:
|
|
311
|
+
line += f" — {suffix}"
|
|
312
|
+
if last_edited:
|
|
313
|
+
line += f" — edited: {last_edited}"
|
|
314
|
+
if statement:
|
|
315
|
+
line += f" — {statement}"
|
|
316
|
+
lines.append(line)
|
|
317
|
+
|
|
318
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
319
|
+
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _write_selected_problem_payload(
|
|
323
|
+
*,
|
|
324
|
+
out_path: Path,
|
|
325
|
+
synced_at_utc: str,
|
|
326
|
+
source_url: str,
|
|
327
|
+
source_hash: str,
|
|
328
|
+
solve_count: dict[str, Any],
|
|
329
|
+
problem: dict[str, Any],
|
|
330
|
+
) -> None:
|
|
331
|
+
payload = {
|
|
332
|
+
"schema_version": "1.0.0",
|
|
333
|
+
"selected_at_utc": synced_at_utc,
|
|
334
|
+
"source": {
|
|
335
|
+
"site": "erdosproblems.com",
|
|
336
|
+
"url": source_url,
|
|
337
|
+
"source_sha256": source_hash,
|
|
338
|
+
"solve_count": solve_count,
|
|
339
|
+
},
|
|
340
|
+
"problem": problem,
|
|
341
|
+
}
|
|
342
|
+
_write_json(out_path, payload)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
346
|
+
p = argparse.ArgumentParser(description="Sync Erdos problems catalog from erdosproblems.com")
|
|
347
|
+
p.add_argument("--source-url", default=DEFAULT_SOURCE_URL, help="Source URL (default: range/1-end).")
|
|
348
|
+
p.add_argument("--input-html", default="", help="Optional local HTML file (skip network fetch).")
|
|
349
|
+
p.add_argument("--write-html-snapshot", default="", help="Optional path to write fetched HTML snapshot.")
|
|
350
|
+
p.add_argument("--timeout-sec", type=int, default=90, help="HTTP timeout in seconds (default: 90).")
|
|
351
|
+
p.add_argument("--user-agent", default=DEFAULT_USER_AGENT, help="HTTP user-agent header.")
|
|
352
|
+
p.add_argument(
|
|
353
|
+
"--active-status",
|
|
354
|
+
choices=["open", "closed", "all"],
|
|
355
|
+
default="open",
|
|
356
|
+
help="Default active set to publish (default: open).",
|
|
357
|
+
)
|
|
358
|
+
p.add_argument(
|
|
359
|
+
"--allow-count-mismatch",
|
|
360
|
+
action="store_true",
|
|
361
|
+
help="Allow parsed problem count mismatch vs solve_count banner.",
|
|
362
|
+
)
|
|
363
|
+
p.add_argument(
|
|
364
|
+
"--out-all",
|
|
365
|
+
default="packs/erdos-open-problems/data/erdos_problems.all.json",
|
|
366
|
+
help="Output file for all problems dataset.",
|
|
367
|
+
)
|
|
368
|
+
p.add_argument(
|
|
369
|
+
"--out-open",
|
|
370
|
+
default="packs/erdos-open-problems/data/erdos_problems.open.json",
|
|
371
|
+
help="Output file for open problems dataset.",
|
|
372
|
+
)
|
|
373
|
+
p.add_argument(
|
|
374
|
+
"--out-closed",
|
|
375
|
+
default="packs/erdos-open-problems/data/erdos_problems.closed.json",
|
|
376
|
+
help="Output file for closed problems dataset.",
|
|
377
|
+
)
|
|
378
|
+
p.add_argument(
|
|
379
|
+
"--out-active",
|
|
380
|
+
default="packs/erdos-open-problems/data/erdos_problems.active.json",
|
|
381
|
+
help="Output file for active-status dataset.",
|
|
382
|
+
)
|
|
383
|
+
p.add_argument(
|
|
384
|
+
"--out-open-list",
|
|
385
|
+
default="packs/erdos-open-problems/data/erdos_open_problems.md",
|
|
386
|
+
help=(
|
|
387
|
+
"Markdown output listing every open problem with direct links "
|
|
388
|
+
"(default: packs/erdos-open-problems/data/erdos_open_problems.md)."
|
|
389
|
+
),
|
|
390
|
+
)
|
|
391
|
+
p.add_argument(
|
|
392
|
+
"--open-list-max-statement-chars",
|
|
393
|
+
type=int,
|
|
394
|
+
default=140,
|
|
395
|
+
help="Statement preview character cap for --out-open-list (default: 140).",
|
|
396
|
+
)
|
|
397
|
+
p.add_argument(
|
|
398
|
+
"--problem-id",
|
|
399
|
+
action="append",
|
|
400
|
+
type=int,
|
|
401
|
+
default=[],
|
|
402
|
+
help="Problem id to print direct link/status metadata for (repeatable).",
|
|
403
|
+
)
|
|
404
|
+
p.add_argument(
|
|
405
|
+
"--out-problem-dir",
|
|
406
|
+
default="",
|
|
407
|
+
help="Optional output directory for --problem-id JSON payloads (files: erdos_problem.<id>.json).",
|
|
408
|
+
)
|
|
409
|
+
return p
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def main() -> int:
|
|
413
|
+
args = _build_parser().parse_args()
|
|
414
|
+
synced_at_utc = _now_utc()
|
|
415
|
+
|
|
416
|
+
if args.input_html.strip():
|
|
417
|
+
input_path = Path(args.input_html).resolve()
|
|
418
|
+
if not input_path.exists():
|
|
419
|
+
print(f"error: input HTML file not found: {input_path}", file=sys.stderr)
|
|
420
|
+
return 2
|
|
421
|
+
page_html = input_path.read_text(encoding="utf-8")
|
|
422
|
+
source_url = f"file://{input_path}"
|
|
423
|
+
else:
|
|
424
|
+
source_url = args.source_url
|
|
425
|
+
page_html = _fetch_html(source_url, args.timeout_sec, args.user_agent)
|
|
426
|
+
if args.write_html_snapshot.strip():
|
|
427
|
+
snap_path = Path(args.write_html_snapshot).resolve()
|
|
428
|
+
snap_path.parent.mkdir(parents=True, exist_ok=True)
|
|
429
|
+
snap_path.write_text(page_html, encoding="utf-8")
|
|
430
|
+
|
|
431
|
+
source_hash = _sha256_text(page_html)
|
|
432
|
+
solve_count = _extract_solve_count(page_html)
|
|
433
|
+
all_problems = _parse_problems(page_html)
|
|
434
|
+
open_problems = [p for p in all_problems if p.get("status_bucket") == "open"]
|
|
435
|
+
closed_problems = [p for p in all_problems if p.get("status_bucket") == "closed"]
|
|
436
|
+
|
|
437
|
+
if args.active_status == "open":
|
|
438
|
+
active_problems = open_problems
|
|
439
|
+
elif args.active_status == "closed":
|
|
440
|
+
active_problems = closed_problems
|
|
441
|
+
else:
|
|
442
|
+
active_problems = all_problems
|
|
443
|
+
|
|
444
|
+
reported_total = solve_count.get("shown")
|
|
445
|
+
reported_solved = solve_count.get("solved")
|
|
446
|
+
parsed_total = len(all_problems)
|
|
447
|
+
mismatch = isinstance(reported_total, int) and reported_total != parsed_total
|
|
448
|
+
|
|
449
|
+
if mismatch and not args.allow_count_mismatch:
|
|
450
|
+
print("error: parsed problem count does not match solve_count banner.", file=sys.stderr)
|
|
451
|
+
print(f"reported_total={reported_total}", file=sys.stderr)
|
|
452
|
+
print(f"parsed_total={parsed_total}", file=sys.stderr)
|
|
453
|
+
print("hint: rerun with --allow-count-mismatch if site markup changed.", file=sys.stderr)
|
|
454
|
+
return 3
|
|
455
|
+
|
|
456
|
+
out_all = Path(args.out_all).resolve()
|
|
457
|
+
out_open = Path(args.out_open).resolve()
|
|
458
|
+
out_closed = Path(args.out_closed).resolve()
|
|
459
|
+
out_active = Path(args.out_active).resolve()
|
|
460
|
+
|
|
461
|
+
payload_all = _build_payload(
|
|
462
|
+
subset="all",
|
|
463
|
+
active_status=args.active_status,
|
|
464
|
+
source_url=source_url,
|
|
465
|
+
source_hash=source_hash,
|
|
466
|
+
solve_count=solve_count,
|
|
467
|
+
synced_at_utc=synced_at_utc,
|
|
468
|
+
problems=all_problems,
|
|
469
|
+
)
|
|
470
|
+
payload_open = _build_payload(
|
|
471
|
+
subset="open",
|
|
472
|
+
active_status=args.active_status,
|
|
473
|
+
source_url=source_url,
|
|
474
|
+
source_hash=source_hash,
|
|
475
|
+
solve_count=solve_count,
|
|
476
|
+
synced_at_utc=synced_at_utc,
|
|
477
|
+
problems=open_problems,
|
|
478
|
+
)
|
|
479
|
+
payload_closed = _build_payload(
|
|
480
|
+
subset="closed",
|
|
481
|
+
active_status=args.active_status,
|
|
482
|
+
source_url=source_url,
|
|
483
|
+
source_hash=source_hash,
|
|
484
|
+
solve_count=solve_count,
|
|
485
|
+
synced_at_utc=synced_at_utc,
|
|
486
|
+
problems=closed_problems,
|
|
487
|
+
)
|
|
488
|
+
payload_active = _build_payload(
|
|
489
|
+
subset="active",
|
|
490
|
+
active_status=args.active_status,
|
|
491
|
+
source_url=source_url,
|
|
492
|
+
source_hash=source_hash,
|
|
493
|
+
solve_count=solve_count,
|
|
494
|
+
synced_at_utc=synced_at_utc,
|
|
495
|
+
problems=active_problems,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
_write_json(out_all, payload_all)
|
|
499
|
+
_write_json(out_open, payload_open)
|
|
500
|
+
_write_json(out_closed, payload_closed)
|
|
501
|
+
_write_json(out_active, payload_active)
|
|
502
|
+
|
|
503
|
+
out_open_list = None
|
|
504
|
+
if args.out_open_list.strip():
|
|
505
|
+
out_open_list = Path(args.out_open_list).resolve()
|
|
506
|
+
_write_open_list_markdown(
|
|
507
|
+
path=out_open_list,
|
|
508
|
+
open_problems=open_problems,
|
|
509
|
+
source_url=source_url,
|
|
510
|
+
synced_at_utc=synced_at_utc,
|
|
511
|
+
max_statement_chars=max(20, int(args.open_list_max_statement_chars)),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
selected_problem_ids = sorted(set(int(x) for x in (args.problem_id or [])))
|
|
515
|
+
selected_missing: list[int] = []
|
|
516
|
+
if selected_problem_ids:
|
|
517
|
+
by_id = {int(p["problem_id"]): p for p in all_problems}
|
|
518
|
+
out_problem_dir: Path | None = None
|
|
519
|
+
if args.out_problem_dir.strip():
|
|
520
|
+
out_problem_dir = Path(args.out_problem_dir).resolve()
|
|
521
|
+
out_problem_dir.mkdir(parents=True, exist_ok=True)
|
|
522
|
+
|
|
523
|
+
for pid in selected_problem_ids:
|
|
524
|
+
rec = by_id.get(pid)
|
|
525
|
+
if rec is None:
|
|
526
|
+
selected_missing.append(pid)
|
|
527
|
+
continue
|
|
528
|
+
link = _problem_public_url(pid)
|
|
529
|
+
print(f"selected.problem_id={pid}")
|
|
530
|
+
print(f"selected.url={link}")
|
|
531
|
+
print(f"selected.status_bucket={rec.get('status_bucket', '')}")
|
|
532
|
+
print(f"selected.status_label={rec.get('status_label', '')}")
|
|
533
|
+
print(f"selected.prize_amount={rec.get('prize_amount', '')}")
|
|
534
|
+
print(f"selected.last_edited={rec.get('last_edited', '')}")
|
|
535
|
+
print(
|
|
536
|
+
"selected.statement_preview="
|
|
537
|
+
+ _statement_preview(str(rec.get("statement", "")), 200)
|
|
538
|
+
)
|
|
539
|
+
if out_problem_dir is not None:
|
|
540
|
+
out_path = out_problem_dir / f"erdos_problem.{pid}.json"
|
|
541
|
+
_write_selected_problem_payload(
|
|
542
|
+
out_path=out_path,
|
|
543
|
+
synced_at_utc=synced_at_utc,
|
|
544
|
+
source_url=source_url,
|
|
545
|
+
source_hash=source_hash,
|
|
546
|
+
solve_count=solve_count,
|
|
547
|
+
problem=rec,
|
|
548
|
+
)
|
|
549
|
+
print(f"selected.out={out_path}")
|
|
550
|
+
|
|
551
|
+
summary_all = payload_all["summary"]
|
|
552
|
+
summary_active = payload_active["summary"]
|
|
553
|
+
|
|
554
|
+
print(f"source_url={source_url}")
|
|
555
|
+
print(f"source_sha256={source_hash}")
|
|
556
|
+
print(f"reported_total={reported_total}")
|
|
557
|
+
print(f"reported_solved={reported_solved}")
|
|
558
|
+
print(f"parsed_total={summary_all['total']}")
|
|
559
|
+
print(f"summary.open={summary_all['open']}")
|
|
560
|
+
print(f"summary.closed={summary_all['closed']}")
|
|
561
|
+
print(f"summary.unknown={summary_all['unknown']}")
|
|
562
|
+
print(f"active_status={args.active_status}")
|
|
563
|
+
print(f"summary.active={summary_active['total']}")
|
|
564
|
+
print(f"out_all={out_all}")
|
|
565
|
+
print(f"out_open={out_open}")
|
|
566
|
+
print(f"out_closed={out_closed}")
|
|
567
|
+
print(f"out_active={out_active}")
|
|
568
|
+
if out_open_list is not None:
|
|
569
|
+
print(f"out_open_list={out_open_list}")
|
|
570
|
+
if selected_problem_ids:
|
|
571
|
+
print(f"selected.count={len(selected_problem_ids)}")
|
|
572
|
+
if selected_missing:
|
|
573
|
+
missing_csv = ",".join(str(x) for x in selected_missing)
|
|
574
|
+
print(f"selected.missing={missing_csv}")
|
|
575
|
+
return 4
|
|
576
|
+
return 0
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
if __name__ == "__main__":
|
|
580
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#!/usr/bin/env sh
|
|
2
|
+
set -eu
|
|
3
|
+
|
|
4
|
+
# orp-init.sh — copy ORP into a target directory (no git, no dependencies).
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# ./scripts/orp-init.sh /path/to/your/repo/orp
|
|
8
|
+
#
|
|
9
|
+
# This script copies:
|
|
10
|
+
# LICENSE, README.md, INSTALL.md, PROTOCOL.md, AGENT_INTEGRATION.md, templates/, examples/, scripts/, modules/, docs/, cone/
|
|
11
|
+
#
|
|
12
|
+
# It does NOT:
|
|
13
|
+
# - initialize git
|
|
14
|
+
# - modify your repo README
|
|
15
|
+
# - enforce the protocol
|
|
16
|
+
|
|
17
|
+
SCRIPT_PATH="$0"
|
|
18
|
+
case "$SCRIPT_PATH" in
|
|
19
|
+
*/*) : ;;
|
|
20
|
+
*) SCRIPT_PATH="$(command -v "$SCRIPT_PATH" 2>/dev/null || printf '%s' "$SCRIPT_PATH")" ;;
|
|
21
|
+
esac
|
|
22
|
+
|
|
23
|
+
ROOT_DIR="$(cd "$(dirname "$SCRIPT_PATH")/.." && pwd)"
|
|
24
|
+
cd "$ROOT_DIR"
|
|
25
|
+
|
|
26
|
+
if [ "${1:-}" = "" ]; then
|
|
27
|
+
echo "usage: $0 /path/to/target/orp"
|
|
28
|
+
exit 2
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
TARGET="$1"
|
|
32
|
+
|
|
33
|
+
if [ -e "$TARGET/PROTOCOL.md" ]; then
|
|
34
|
+
echo "Warning: ORP files already exist at $TARGET. Aborting."
|
|
35
|
+
exit 3
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
mkdir -p "$TARGET"
|
|
39
|
+
mkdir -p "$TARGET/templates" "$TARGET/examples" "$TARGET/scripts"
|
|
40
|
+
|
|
41
|
+
cp -f "./LICENSE" "./README.md" "./INSTALL.md" "./PROTOCOL.md" "./AGENT_INTEGRATION.md" "$TARGET/"
|
|
42
|
+
cp -f "./templates/"*.md "$TARGET/templates/"
|
|
43
|
+
cp -f "./examples/"*.md "$TARGET/examples/"
|
|
44
|
+
cp -f "./scripts/"*.sh "$TARGET/scripts/"
|
|
45
|
+
cp -R "./modules" "$TARGET/"
|
|
46
|
+
cp -R "./docs" "$TARGET/"
|
|
47
|
+
cp -R "./cone" "$TARGET/"
|
|
48
|
+
|
|
49
|
+
echo "ORP copied to: $TARGET"
|
|
50
|
+
echo "IMPORTANT: Edit $TARGET/PROTOCOL.md and define Canonical Paths."
|