sophhub 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/package.json +1 -1
  2. package/skills/consensus/skill.json +20 -0
  3. package/skills/consensus/src/SKILL.md +93 -0
  4. package/skills/deepwiki/skill.json +20 -0
  5. package/skills/deepwiki/src/SKILL.md +45 -0
  6. package/skills/deepwiki/src/_meta.json +6 -0
  7. package/skills/deepwiki/src/scripts/deepwiki.js +135 -0
  8. package/skills/feishu-bitable/skill.json +20 -0
  9. package/skills/feishu-bitable/src/CHECKLIST.md +150 -0
  10. package/skills/feishu-bitable/src/README.md +178 -0
  11. package/skills/feishu-bitable/src/SKILL.md +113 -0
  12. package/skills/feishu-bitable/src/_meta.json +6 -0
  13. package/skills/feishu-bitable/src/api.js +381 -0
  14. package/skills/feishu-bitable/src/bin/cli.js +284 -0
  15. package/skills/feishu-bitable/src/description.md +143 -0
  16. package/skills/feishu-bitable/src/examples/create-records.json +52 -0
  17. package/skills/feishu-bitable/src/examples/create-table.json +64 -0
  18. package/skills/feishu-bitable/src/package-lock.json +324 -0
  19. package/skills/feishu-bitable/src/package.json +33 -0
  20. package/skills/feishu-bitable/src/publish-config.json +14 -0
  21. package/skills/feishu-bitable/src/test-simple.js +61 -0
  22. package/skills/feishu-bitable/src/utils.js +261 -0
  23. package/skills/google-maps/skill.json +20 -0
  24. package/skills/google-maps/src/SKILL.md +237 -0
  25. package/skills/google-maps/src/_meta.json +6 -0
  26. package/skills/google-maps/src/lib/map_helper.py +912 -0
  27. package/skills/large-task-router/skill.json +20 -0
  28. package/skills/large-task-router/src/SKILL.md +79 -0
  29. package/skills/large-task-router/src/templates/plan.md +74 -0
  30. package/skills/skillhub/skill.json +11 -4
  31. package/skills/skillhub/src/SKILL.md +11 -1
  32. package/skills/sophnet-dailynews/skill.json +20 -0
  33. package/skills/sophnet-dailynews/src/SKILL.md +179 -0
  34. package/skills/sophnet-dailynews/src/cache.json +151 -0
  35. package/skills/sophnet-dailynews/src/sources.json +230 -0
  36. package/skills/sophnet-schedule/skill.json +20 -0
  37. package/skills/sophnet-schedule/src/ARCHITECTURE.md +321 -0
  38. package/skills/sophnet-schedule/src/IMPROVEMENTS.md +145 -0
  39. package/skills/sophnet-schedule/src/SKILL.md +1050 -0
  40. package/skills/sophnet-schedule/src/_meta.json +6 -0
  41. package/skills/sophnet-schedule/src/api/__init__.py +0 -0
  42. package/skills/sophnet-schedule/src/api/models.py +245 -0
  43. package/skills/sophnet-schedule/src/apps/add_event.py +237 -0
  44. package/skills/sophnet-schedule/src/apps/check_reminders.py +112 -0
  45. package/skills/sophnet-schedule/src/apps/check_roc.py +246 -0
  46. package/skills/sophnet-schedule/src/apps/generate_daily_plan.py +342 -0
  47. package/skills/sophnet-schedule/src/apps/import_events.py +216 -0
  48. package/skills/sophnet-schedule/src/apps/monitor_calendar_changes.py +140 -0
  49. package/skills/sophnet-schedule/src/apps/register_tasks.py +169 -0
  50. package/skills/sophnet-schedule/src/apps/sync_roc_to_gcal.py +174 -0
  51. package/skills/sophnet-schedule/src/compat.py +66 -0
  52. package/skills/sophnet-schedule/src/config/__init__.py +0 -0
  53. package/skills/sophnet-schedule/src/config/reminder_rules.yaml +96 -0
  54. package/skills/sophnet-schedule/src/config/roc_events.yaml +44 -0
  55. package/skills/sophnet-schedule/src/config/settings.py +133 -0
  56. package/skills/sophnet-schedule/src/config/task_registry.yaml +92 -0
  57. package/skills/sophnet-schedule/src/docs/FRONTEND_INTEGRATION_GUIDE.md +437 -0
  58. package/skills/sophnet-schedule/src/gcal/__init__.py +0 -0
  59. package/skills/sophnet-schedule/src/gcal/client.py +374 -0
  60. package/skills/sophnet-schedule/src/gcal/models.py +91 -0
  61. package/skills/sophnet-schedule/src/requirements.txt +6 -0
  62. package/skills/sophnet-schedule/src/scripts/setup_gcal_token.py +85 -0
  63. package/skills/sophnet-schedule/src/server.py +669 -0
  64. package/skills/sophnet-schedule/src/services/__init__.py +0 -0
  65. package/skills/sophnet-schedule/src/services/calendar_backend.py +139 -0
  66. package/skills/sophnet-schedule/src/services/conflict_detector.py +96 -0
  67. package/skills/sophnet-schedule/src/services/datetime_utils.py +117 -0
  68. package/skills/sophnet-schedule/src/services/event_classifier.py +100 -0
  69. package/skills/sophnet-schedule/src/services/event_diff.py +160 -0
  70. package/skills/sophnet-schedule/src/services/google_integration.py +500 -0
  71. package/skills/sophnet-schedule/src/services/job_store.py +100 -0
  72. package/skills/sophnet-schedule/src/services/local_event_store.py +266 -0
  73. package/skills/sophnet-schedule/src/services/reminder_planner.py +116 -0
  74. package/skills/sophnet-schedule/src/services/runtime_utils.py +31 -0
  75. package/skills/sophnet-schedule/src/services/table_parser.py +286 -0
  76. package/skills/sophnet-schedule/src/services/task_builder.py +167 -0
  77. package/skills/sophnet-schedule/src/services/time_window.py +72 -0
  78. package/skills/sophnet-stock/skill.json +20 -0
  79. package/skills/sophnet-stock/src/App-Plan.md +442 -0
  80. package/skills/sophnet-stock/src/README.md +214 -0
  81. package/skills/sophnet-stock/src/SKILL.md +236 -0
  82. package/skills/sophnet-stock/src/TODO.md +394 -0
  83. package/skills/sophnet-stock/src/_meta.json +6 -0
  84. package/skills/sophnet-stock/src/docs/ARCHITECTURE.md +408 -0
  85. package/skills/sophnet-stock/src/docs/CONCEPT.md +233 -0
  86. package/skills/sophnet-stock/src/docs/HOT_SCANNER.md +288 -0
  87. package/skills/sophnet-stock/src/docs/README.md +95 -0
  88. package/skills/sophnet-stock/src/docs/USAGE.md +465 -0
  89. package/skills/sophnet-stock/src/scripts/analyze_stock.py +2565 -0
  90. package/skills/sophnet-stock/src/scripts/dividends.py +365 -0
  91. package/skills/sophnet-stock/src/scripts/hot_scanner.py +582 -0
  92. package/skills/sophnet-stock/src/scripts/portfolio.py +548 -0
  93. package/skills/sophnet-stock/src/scripts/rumor_scanner.py +342 -0
  94. package/skills/sophnet-stock/src/scripts/test_stock_analysis.py +409 -0
  95. package/skills/sophnet-stock/src/scripts/watchlist.py +336 -0
  96. package/skills/xiaohongshu/skill.json +20 -0
  97. package/skills/xiaohongshu/src/SKILL.md +91 -0
  98. package/skills/xiaohongshu/src/_meta.json +6 -0
  99. package/skills/xiaohongshu/src/assets/card.html +216 -0
  100. package/skills/xiaohongshu/src/assets/cover.html +82 -0
  101. package/skills/xiaohongshu/src/assets/example.md +84 -0
  102. package/skills/xiaohongshu/src/assets/styles.css +318 -0
  103. package/skills/xiaohongshu/src/scripts/render_xhs_v2.py +737 -0
  104. package/skills/xiaohongshu/src/scripts/sign_server.py +158 -0
  105. package/skills/xiaohongshu/src/scripts/stealth.min.js +7 -0
  106. package/skills/xiaohongshu/src/scripts/xhs_tool.py +186 -0
  107. package/skills/xiaohongshu/src/workflow.py +185 -0
@@ -0,0 +1,20 @@
1
+ {
2
+ "name": "large-task-router",
3
+ "version": "1.0.0",
4
+ "types": [
5
+ "builtin"
6
+ ],
7
+ "displayName": "",
8
+ "description": "",
9
+ "changelog": [
10
+ {
11
+ "version": "1.0.0",
12
+ "date": "2026-04-16",
13
+ "changes": [
14
+ "初次提交"
15
+ ]
16
+ }
17
+ ],
18
+ "createdAt": "2026-04-16",
19
+ "updatedAt": "2026-04-16"
20
+ }
@@ -0,0 +1,79 @@
1
+ ---
2
+ name: large-task-router
3
+ description: "大任务拆分规划。触发:单文件≥20MB、合计≥50MB、≥2 文件同类处理、用户提到并行/分批/拆分/subagent。命中时先 read SKILL.md 后按 Subagent Plan 执行。"
4
+ ---
5
+
6
+ # Large Task Router
7
+
8
+ ## 目标
9
+
10
+ 把"大任务先拆分后执行"变成**强制**固定流程,避免直接串行处理导致慢、易中断、难恢复。本技能一旦被加载进上下文,触发条件匹配即自动生效,不再需要用户重申。
11
+
12
+ ## 何时启用(命中任一即必须启用)
13
+
14
+ - 输入文件总大小 > 50MB
15
+ - 任意单文件 > 20MB(即使只有一个文件)
16
+ - 预计步骤 > 8
17
+ - 预计耗时 > 10 分钟
18
+ - 用户明确或隐含提到:并行 / 分批 / 拆分 / subagent / 多 agent / 同时处理 / 一起跑 / 并发
19
+
20
+ > **关键**:以上条件是**客观触发器**,不是建议。不要用"任务简单"、"一个子任务就够"、"我直接跑更快"等理由跳过。触发条件成立即必须输出 Subagent Plan,即使 subagent 数量 = 1。
21
+
22
+ ## 固定流程(必须按顺序)
23
+
24
+ 1. **规模评估**
25
+ - 估算:文件大小、文件数量、步骤数、预估时长
26
+ - 输出一行结论:`任务规模=小|中|大`
27
+ - 若任一触发条件命中,`任务规模` 至少为 `中`,进入步骤 2
28
+ 2. **拆分计划**
29
+ - 先完整输出 "Subagent Plan" 模板(参考 `templates/plan.md`),再开始执行
30
+ - 子任务命名统一为 `subagent-1..n`
31
+ - **即使 n = 1,也要填写完整模板**,显式说明"无法进一步拆分的理由"
32
+ 3. **并行执行**
33
+ - 按计划并发执行
34
+ - 子任务失败可重试 1 次(同参数)
35
+ 4. **聚合汇总**
36
+ - 合并成功结果
37
+ - 标注失败项、重试结果、遗留风险
38
+ 5. **最终报告**
39
+ - 给出结论 + 可执行下一步
40
+
41
+ ## 并发策略
42
+
43
+ - 默认并发数:`N = min(4, 子任务数)`
44
+ - 若环境允许且任务明显可并行:可提高到 `N = 6~8`
45
+ - 避免过度并发:当任务共享同一资源(同一大文件写入、同一数据库表)时改为分阶段并行
46
+
47
+ ## 拆分模板(按优先级选择)
48
+
49
+ 1. **按文件切块**:大文件按块/页/段/行数范围拆分(单文件 > 20MB 时首选此模式)
50
+ 2. **按阶段拆分**:采集 → 清洗 → 分析 → 汇总
51
+ 3. **按目标拆分**:A 子任务做提取,B 子任务做校验,C 子任务做格式化
52
+
53
+ 每个子任务必须包含:
54
+
55
+ - `objective`:目标
56
+ - `input`:输入范围
57
+ - `output`:期望产出
58
+ - `timeout`:超时限制
59
+ - `success_criteria`:成功判定
60
+
61
+ ## 输出格式
62
+
63
+ 先使用 `templates/plan.md` 输出计划,再执行任务。
64
+
65
+ 最终报告必须包含:
66
+
67
+ - `Summary`:总体结果
68
+ - `Completed`:完成项
69
+ - `Failed/Retried`:失败与重试
70
+ - `Risks`:风险与边界
71
+ - `Next Actions`:建议下一步
72
+
73
+ ## 护栏(硬性规则,不得绕过)
74
+
75
+ - **禁止直接开跑**:触发条件命中的任务不得跳过 Subagent Plan 阶段,直接调用工具/读写文件。
76
+ - **禁止自主降级**:不得以"任务简单 / 不需要拆分 / 我评估不用" 为由拒绝本流程。规模评估仅决定拆分粒度(subagent 数量),不决定是否输出计划。
77
+ - **禁止静默失败**:子任务失败必须显式列入最终报告的 `Failed/Retried`,并说明重试结果。
78
+ - **单文件大**:当触发器是"单文件 > 20MB"时,仍需输出 Subagent Plan;若该文件确实无法按块/行/段拆分,在计划中说明理由后可仅用 1 个 subagent 执行,但流程不变。
79
+ - **关键词即触发**:用户提到"并行/分批/拆分"等关键词时,即使规模小,也要输出简化版 Subagent Plan(可 1~2 个 subagent)以确认并行意图,避免误解为串行。
@@ -0,0 +1,74 @@
1
+ # Subagent Plan
2
+
3
+ ## 0) Task Size Assessment
4
+
5
+ - Input size:
6
+ - File count:
7
+ - Estimated steps:
8
+ - Estimated runtime:
9
+ - Decision: `small | medium | large`
10
+ - Split required: `yes | no` (if `no`, explain)
11
+
12
+ ## 1) Execution Strategy
13
+
14
+ - Split mode: `by-file | by-stage | by-objective`
15
+ - Planned subagents:
16
+ - Planned concurrency (`N`):
17
+ - Retry policy: `failed subtask retry once`
18
+
19
+ ## 2) Subtasks
20
+
21
+ ### subagent-1
22
+
23
+ - objective:
24
+ - input:
25
+ - output:
26
+ - timeout:
27
+ - success_criteria:
28
+
29
+ ### subagent-2
30
+
31
+ - objective:
32
+ - input:
33
+ - output:
34
+ - timeout:
35
+ - success_criteria:
36
+
37
+ <!-- Add subagent-3..n when needed -->
38
+
39
+ ## 3) Merge Rules
40
+
41
+ - Dedup strategy:
42
+ - Conflict resolution:
43
+ - Output ordering:
44
+
45
+ ---
46
+
47
+ # Final Report
48
+
49
+ ## Summary
50
+
51
+ - Overall result:
52
+ - Time spent:
53
+ - Coverage:
54
+
55
+ ## Completed
56
+
57
+ - item 1
58
+ - item 2
59
+
60
+ ## Failed/Retried
61
+
62
+ - failed item:
63
+ - reason:
64
+ - retry result:
65
+
66
+ ## Risks
67
+
68
+ - risk 1
69
+ - risk 2
70
+
71
+ ## Next Actions
72
+
73
+ - next step 1
74
+ - next step 2
@@ -1,12 +1,19 @@
1
1
  {
2
2
  "name": "skillhub",
3
- "version": "2.0.0",
3
+ "version": "2.0.1",
4
4
  "types": [
5
5
  "builtin"
6
6
  ],
7
- "displayName": "",
8
- "description": "",
7
+ "displayName": "技能商店与安装",
8
+ "description": "在 Sophnet/Sophclaw 上搜索、列出、安装 skill(含 UI 设计等关键词);SophHub 优先,ClawHub 兜底。",
9
9
  "changelog": [
10
+ {
11
+ "version": "2.0.1",
12
+ "date": "2026-04-16",
13
+ "changes": [
14
+ "扩充 SKILL.md 触发描述:覆盖「Sophnet UI 设计 skill」「再找一下」等口语检索场景"
15
+ ]
16
+ },
10
17
  {
11
18
  "version": "2.0.0",
12
19
  "date": "2026-04-09",
@@ -16,5 +23,5 @@
16
23
  }
17
24
  ],
18
25
  "createdAt": "2026-04-09",
19
- "updatedAt": "2026-04-09"
26
+ "updatedAt": "2026-04-16"
20
27
  }
@@ -1,12 +1,22 @@
1
1
  ---
2
2
  name: skillhub
3
- description: 当用户要求搜索、安装 skill (包括Sophnet/Sophclaw平台和clawhub社区) 时使用此 skill。
3
+ description: 当用户要在 Sophnet / Sophclaw / OpenClaw 上查找、搜索、列出或安装 skill(技能、插件)时使用;包括「商店里有没有某某 skill」「Sophnet 有没有 UI 设计 / 前端 / 图表 / 自动化 相关 skill」「再去 SophHub 找一下」「用 npx sophhub 搜 skill」等说法。也适用于 clawhub、社区技能商店、下载 skill 到本地。与具体业务实现无关、仅涉及「发现与安装技能」时选本 skill。
4
4
  ---
5
5
 
6
6
  # SkillHub
7
7
 
8
8
  统一的技能安装入口,整合 Sophnet 平台 Sophclaw 的技能商店(SophHub,优先)和 **ClawHub**(社区商店,兜底)。
9
9
 
10
+ ## 何时应使用本 Skill(触发提示)
11
+
12
+ 用户可能用口语或中英文混说,只要意图是 **在商店里找 skill / 安装 skill**,即应走本流程,例如:
13
+
14
+ - 「Sophnet / sophnet 有没有 **UI 设计** / UX / 界面 / 前端 相关的 skill?」「商店里有没有做海报 / 图表 / 视频的 skill?」
15
+ - 「你再去 **找一下** skill」「在 SophHub / 技能商店 **搜一下**」「**列出** 商店里能装的技能」
16
+ - 「**安装** 某个 skill」「从 Sophnet 平台 **下载** skill 到本地」「npx sophhub 怎么用」
17
+
18
+ **不要**把「已经知道 skill 名称、只想写业务代码」误判为本 skill;但若用户明确要先在商店里 **检索、确认、安装** 再使用,仍用本 skill。
19
+
10
20
  ## 安装流程
11
21
 
12
22
  当用户需要安装一个技能时,按以下顺序操作:
@@ -0,0 +1,20 @@
1
+ {
2
+ "name": "sophnet-dailynews",
3
+ "version": "1.0.0",
4
+ "types": [
5
+ "store"
6
+ ],
7
+ "displayName": "每日新闻助手",
8
+ "description": "支持实时新闻检索、分类、摘要与分析,以及基于 AI 的个性化新闻推荐和深度报道。",
9
+ "changelog": [
10
+ {
11
+ "version": "1.0.0",
12
+ "date": "2026-04-14",
13
+ "changes": [
14
+ "初次提交"
15
+ ]
16
+ }
17
+ ],
18
+ "createdAt": "2026-04-14",
19
+ "updatedAt": "2026-04-14"
20
+ }
@@ -0,0 +1,179 @@
1
+ ---
2
+ name: sophnet-dailynews
3
+ description: Generate a daily, high-signal Markdown news report by scraping preset sources, filtering and deduplicating items. Use when the user asks for daily news, tech digest, today's links, or requests an automated curated report.
4
+ ---
5
+
6
+ # Daily news report (Markdown)
7
+
8
+ ## Quick start
9
+
10
+ 1. Resolve the target date.
11
+ - Accept `YYYY-MM-DD` from the user.
12
+ - Default to local today.
13
+ 2. Read `sources.json` and `cache.json` **with explicit paths**.
14
+ - Always call the read tool with a `path` field (not `file_path`).
15
+ - Use these exact paths:
16
+ - `read(path="/Data/shutong.shan/clawd/skills/sophnet-dailynews/sources.json")`
17
+ - `read(path="/Data/shutong.shan/clawd/skills/sophnet-dailynews/cache.json")`
18
+ 3. Collect items in waves (Tier 1 → Tier 2 → Tier 3/browser) until the report has enough **high-quality** items.
19
+ 4. Write `NewsReport/YYYY-MM-DD-news-report.md`.
20
+ 5. Update `cache.json` (dedupe + historical stats).
21
+
22
+ ## Files in this skill
23
+
24
+ - `sources.json`: tiers, batches, URLs, fetch method (`webfetch`/`browser`), extraction hint, enable/disable flags, and quality thresholds.
25
+ - `cache.json`: last run metadata, per-source stats, URL/content dedupe caches, and per-day article history.
26
+
27
+ ## Common failure: `web_fetch` → `fetch failed`
28
+
29
+ This usually means the fetch tool hit one of these:
30
+
31
+ - a redirect it doesn't follow (302)
32
+ - a method mismatch (some sites don't like `HEAD`)
33
+ - bot protection / CDN quirks
34
+ - response too large / slow for the tool limits
35
+
36
+ This skill is tuned to avoid common failures:
37
+
38
+ - Prefer **HN RSS** over HTML (`https://news.ycombinator.com/rss`).
39
+ - Pin **HuggingFace Papers** to a concrete date URL to avoid redirects:
40
+ - `https://huggingface.co/papers/date/{{date}}`
41
+ - Replace `{{date}}` with the target date (`YYYY-MM-DD`) before fetching.
42
+
43
+ ## Output contract
44
+
45
+ Produce exactly one Markdown file:
46
+
47
+ - Path: `NewsReport/YYYY-MM-DD-news-report.md`
48
+ - Target: `quality_thresholds.target_items` items (default 20)
49
+ - Include only items with `quality_score >= quality_thresholds.min_score_to_include` (default 3)
50
+
51
+ Each item must include:
52
+
53
+ - `title`
54
+ - `summary` (2–4 sentences, concrete and non-hypey)
55
+ - `key_points` (max 3)
56
+ - `url` (canonical if possible)
57
+ - `source_id`
58
+ - `keywords` (2–6)
59
+ - `quality_score` (1–5, integer)
60
+
61
+ ## Collection workflow
62
+
63
+ ### 1) Initialize
64
+
65
+ - Load `sources.json` → treat it as the source of truth.
66
+ - Load `cache.json` → use it for dedupe and stats.
67
+ - Create `NewsReport/` if missing.
68
+ - If `NewsReport/YYYY-MM-DD-news-report.md` already exists, either:
69
+ - regenerate from scratch (preferred), or
70
+ - append only if explicitly requested.
71
+
72
+ ### 2) Fetch in waves (early stop)
73
+
74
+ Follow the tier order in `sources.json`:
75
+
76
+ - **Wave A (Tier 1 / batch_a + batch_b)**: high hit-rate sources first.
77
+ - If you still have fewer than ~15 included items after filtering, continue.
78
+ - **Wave B (Tier 2 / batch_a + batch_b)**: supplemental sources.
79
+ - If you still have fewer than `target_items`, continue.
80
+ - **Wave C (Tier 3 / browser sources)**: JS-rendered / blocked sources.
81
+
82
+ Stop fetching when you have:
83
+
84
+ - at least `target_items` included items, and
85
+ - at least `quality_thresholds.early_stop_threshold` total candidates evaluated (default 25), or you have exhausted all enabled sources.
86
+
87
+ ### 3) Extract and normalize
88
+
89
+ For each enabled source entry:
90
+
91
+ - Fetch the page content using the configured `fetch_method`:
92
+ - `webfetch`: normal HTTP fetch.
93
+ - `browser`: render with a headless browser when available; otherwise skip with a recorded error.
94
+ - Apply the `extract` hint from the source entry (examples: `top_10`, `latest_5`, `latest_issue`, `today_top_5`).
95
+ - For each candidate item, normalize:
96
+ - clean title (no site suffix noise)
97
+ - canonical URL (strip tracking params when safe)
98
+ - short summary + key points
99
+
100
+ **RSS-first rule (important):**
101
+
102
+ - For RSS sources, **do not** `web_fetch` the item URLs.
103
+ - Use the RSS item's title + description/summary as the source of truth.
104
+ - If the RSS description is too thin to summarize, either:
105
+ - drop the item, or
106
+ - keep it with a one-sentence, non-speculative summary based only on the RSS description.
107
+ - Only fetch item URLs if the source entry explicitly says `extract: fetch_items` (none do by default).
108
+
109
+ If a `webfetch` attempt fails:
110
+
111
+ - retry once (respect `fetch_config.webfetch.timeout_ms`)
112
+ - then mark the source as failed and continue (do not abort the whole report)
113
+
114
+ ### 4) Filter, score, and dedupe
115
+
116
+ Apply these rules in order:
117
+
118
+ 1. Reject obvious low-signal items (marketing fluff, generic science, job posts, thin announcements).
119
+ 2. Score each remaining item (`quality_score` 1–5) with a consistent rubric:
120
+ - 5: deeply useful + specific + actionable/insightful
121
+ - 4: strong signal, worth reading
122
+ - 3: decent, include only if you need more items
123
+ - 1–2: exclude
124
+ 3. Deduplicate:
125
+ - exact URL match (including `cache.json:url_cache`)
126
+ - near-duplicate title (treat ~80% similarity as duplicate; keep the higher-scored one)
127
+ - optional content hash match (when you have the full text)
128
+
129
+ ### 5) Select and sort
130
+
131
+ - Keep only items with `quality_score >= min_score_to_include`.
132
+ - Sort by `quality_score` descending.
133
+ - Break ties by source credibility (Tier 1 > Tier 2 > Tier 3) and recency.
134
+ - Take the top `target_items`.
135
+
136
+ ## Markdown template
137
+
138
+ Use this structure:
139
+
140
+ ```markdown
141
+ # Daily News Report (YYYY-MM-DD)
142
+
143
+ > Sources used: N | Candidates evaluated: M | Included: K
144
+ > Generated at: <local timestamp> | Skill: sophnet-dailynews
145
+
146
+ ---
147
+
148
+ ## 1. <Title>
149
+
150
+ - **Summary**: ...
151
+ - **Key Points**:
152
+ 1. ...
153
+ 2. ...
154
+ 3. ...
155
+ - **Source**: <source_id> — <url>
156
+ - **Keywords**: `k1` `k2` `k3`
157
+ - **Score**: 4/5
158
+ ```
159
+
160
+ ## Cache update rules (`cache.json`)
161
+
162
+ Update these fields every run:
163
+
164
+ - `last_run`: date, duration, items_collected, items_published, sources_used
165
+ - `source_stats[source_id]`: total_fetches, success_count, avg_items_per_fetch, avg_quality_score, last_fetch, last_success
166
+ - `url_cache.entries`: add included URLs (store timestamps; respect `_ttl_hours`)
167
+ - `content_hashes.entries`: add hashes when available (respect `_ttl_hours`)
168
+ - `article_history[YYYY-MM-DD]`: record the final included item list (at minimum: title + url + source_id + score)
169
+
170
+ ## Editing sources (`sources.json`)
171
+
172
+ - Disable a flaky/low-quality source by setting `enabled: false` or moving it to `disabled`.
173
+ - Prefer fixing extraction hints before adding new sources.
174
+ - Keep Tier 1 small and high-signal; use Tier 2 for "fill".
175
+
176
+ ## Failure handling
177
+
178
+ - If a source 403s on `webfetch`, try `browser` (if available) or skip and record the error.
179
+ - If all enabled sources fail, still write a report header and an explicit "no items" section; do not silently succeed.
@@ -0,0 +1,151 @@
1
+ {
2
+ "schema_version": "1.0",
3
+ "description": "Daily News Report 缓存文件,用于避免重复抓取和跟踪历史表现",
4
+
5
+ "last_run": {
6
+ "date": "2026-02-09",
7
+ "duration_seconds": 30,
8
+ "items_collected": 3,
9
+ "items_published": 3,
10
+ "sources_used": ["fs_blog", "hackernoon_pm"]
11
+ },
12
+
13
+ "source_stats": {
14
+ "_comment": "记录每个源的历史表现,用于动态调整优先级",
15
+ "hn": {
16
+ "total_fetches": 2,
17
+ "success_count": 0,
18
+ "avg_items_per_fetch": 0,
19
+ "avg_quality_score": 0,
20
+ "last_fetch": "2026-02-09T04:44:19Z",
21
+ "last_success": null,
22
+ "last_error": "fetch failed"
23
+ },
24
+ "hf_papers": {
25
+ "total_fetches": 2,
26
+ "success_count": 1,
27
+ "avg_items_per_fetch": 0,
28
+ "avg_quality_score": 0,
29
+ "last_fetch": "2026-02-09T04:44:19Z",
30
+ "last_success": "2026-02-09T04:44:19Z",
31
+ "last_error": null,
32
+ "note": "No new papers for 2026-02-09 date page"
33
+ },
34
+ "one_useful_thing": {
35
+ "total_fetches": 2,
36
+ "success_count": 1,
37
+ "avg_items_per_fetch": 0,
38
+ "avg_quality_score": 0,
39
+ "last_fetch": "2026-02-09T04:44:24Z",
40
+ "last_success": "2026-02-09T04:44:24Z",
41
+ "last_error": null,
42
+ "note": "Landing page only, newsletter content not accessible"
43
+ },
44
+ "paul_graham": {
45
+ "total_fetches": 2,
46
+ "success_count": 1,
47
+ "avg_items_per_fetch": 0,
48
+ "avg_quality_score": 0,
49
+ "last_fetch": "2026-02-09T04:44:26Z",
50
+ "last_success": "2026-02-09T04:44:26Z",
51
+ "last_error": null,
52
+ "note": "Minimal content returned from articles page"
53
+ },
54
+ "james_clear": {
55
+ "total_fetches": 2,
56
+ "success_count": 1,
57
+ "avg_items_per_fetch": 0,
58
+ "avg_quality_score": 0,
59
+ "last_fetch": "2026-02-09T04:44:32Z",
60
+ "last_success": "2026-02-09T04:44:32Z",
61
+ "last_error": null,
62
+ "note": "Main site only, newsletter requires email signup"
63
+ },
64
+ "fs_blog": {
65
+ "total_fetches": 3,
66
+ "success_count": 3,
67
+ "avg_items_per_fetch": 1.0,
68
+ "avg_quality_score": 5.0,
69
+ "last_fetch": "2026-02-09T04:44:38Z",
70
+ "last_success": "2026-02-09T04:44:38Z",
71
+ "last_error": null
72
+ },
73
+ "hackernoon_pm": {
74
+ "total_fetches": 3,
75
+ "success_count": 3,
76
+ "avg_items_per_fetch": 1.0,
77
+ "avg_quality_score": 4.0,
78
+ "last_fetch": "2026-02-09T04:44:43Z",
79
+ "last_success": "2026-02-09T04:44:43Z",
80
+ "last_error": null,
81
+ "note": "Category page fetched, extracted 2 high-quality articles"
82
+ },
83
+ "scotthyoung": {
84
+ "total_fetches": 2,
85
+ "success_count": 1,
86
+ "avg_items_per_fetch": 0,
87
+ "avg_quality_score": 0,
88
+ "last_fetch": "2026-02-09T04:44:48Z",
89
+ "last_success": "2026-02-09T04:44:48Z",
90
+ "last_error": null,
91
+ "note": "Articles page only, book and course promo without article links"
92
+ }
93
+ },
94
+
95
+ "url_cache": {
96
+ "_comment": "已处理的 URL 缓存,避免重复收录",
97
+ "_ttl_hours": 168,
98
+ "entries": {
99
+ "https://fs.blog/brain-food/january-25-2026/": {
100
+ "timestamp": "2026-02-09T04:44:38Z",
101
+ "title": "The Spectrum of Independence"
102
+ },
103
+ "https://hackernoon.com/10-proven-ways-to-reduce-misalignment-between-stakeholders-in-product-teams": {
104
+ "timestamp": "2026-02-09T04:44:43Z",
105
+ "title": "10 Proven Ways to Reduce Misalignment Between Stakeholders in Product Teams"
106
+ },
107
+ "https://hackernoon.com/why-good-products-feel-broken": {
108
+ "timestamp": "2026-02-09T04:44:43Z",
109
+ "title": "Why Good Products Feel Broken"
110
+ }
111
+ }
112
+ },
113
+
114
+ "content_hashes": {
115
+ "_comment": "内容指纹,用于去重",
116
+ "_ttl_hours": 168,
117
+ "entries": {}
118
+ },
119
+
120
+ "article_history": {
121
+ "_comment": "已收录文章的简要记录",
122
+ "2026-02-07": [
123
+ {
124
+ "title": "The Spectrum of Independence",
125
+ "url": "https://fs.blog/brain-food/january-25-2026/",
126
+ "source_id": "fs_blog",
127
+ "score": 5
128
+ }
129
+ ],
130
+ "2026-02-09": [
131
+ {
132
+ "title": "The Spectrum of Independence",
133
+ "url": "https://fs.blog/brain-food/january-25-2026/",
134
+ "source_id": "fs_blog",
135
+ "score": 5
136
+ },
137
+ {
138
+ "title": "10 Proven Ways to Reduce Misalignment Between Stakeholders in Product Teams",
139
+ "url": "https://hackernoon.com/10-proven-ways-to-reduce-misalignment-between-stakeholders-in-product-teams",
140
+ "source_id": "hackernoon_pm",
141
+ "score": 4
142
+ },
143
+ {
144
+ "title": "Why Good Products Feel Broken",
145
+ "url": "https://hackernoon.com/why-good-products-feel-broken",
146
+ "source_id": "hackernoon_pm",
147
+ "score": 4
148
+ }
149
+ ]
150
+ }
151
+ }