grokfeed 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grokfeed/__init__.py +1 -0
- grokfeed/app.py +290 -0
- grokfeed/config.py +72 -0
- grokfeed/main.py +19 -0
- grokfeed/sources/__init__.py +5 -0
- grokfeed/sources/comments.py +154 -0
- grokfeed/sources/hn.py +89 -0
- grokfeed/sources/lobsters.py +61 -0
- grokfeed/sources/reddit.py +92 -0
- grokfeed/widgets/__init__.py +6 -0
- grokfeed/widgets/comments_modal.py +124 -0
- grokfeed/widgets/content_modal.py +82 -0
- grokfeed/widgets/feed.py +75 -0
- grokfeed/widgets/post_split_modal.py +179 -0
- grokfeed/widgets/story.py +73 -0
- grokfeed-0.2.0.dist-info/METADATA +168 -0
- grokfeed-0.2.0.dist-info/RECORD +20 -0
- grokfeed-0.2.0.dist-info/WHEEL +4 -0
- grokfeed-0.2.0.dist-info/entry_points.txt +2 -0
- grokfeed-0.2.0.dist-info/licenses/LICENSE +21 -0
grokfeed/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
grokfeed/app.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
from textual.app import App, ComposeResult
|
|
7
|
+
from textual.binding import Binding
|
|
8
|
+
from textual.containers import Container
|
|
9
|
+
from textual.widgets import Footer, Header, Label, LoadingIndicator
|
|
10
|
+
|
|
11
|
+
from .config import Config, load_cache, save_cache
|
|
12
|
+
from .sources.hn import fetch_hn_stories_by_ids, fetch_hn_top_ids
|
|
13
|
+
from .sources.lobsters import fetch_lobsters_posts
|
|
14
|
+
from .sources.reddit import fetch_reddit_posts
|
|
15
|
+
from .widgets.feed import FeedList
|
|
16
|
+
from .widgets.post_split_modal import PostSplitModal
|
|
17
|
+
from .widgets.story import source_color as get_source_color
|
|
18
|
+
|
|
19
|
+
# Source filter sentinel
|
|
20
|
+
ALL = "all"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _interleave_by_score(items: list[dict]) -> list[dict]:
|
|
24
|
+
"""Sort items by score normalized within each source (0–1 scale)."""
|
|
25
|
+
from collections import defaultdict
|
|
26
|
+
|
|
27
|
+
groups: dict[str, list[dict]] = defaultdict(list)
|
|
28
|
+
for item in items:
|
|
29
|
+
groups[item["source"]].append(item)
|
|
30
|
+
for source_items in groups.values():
|
|
31
|
+
scores = [i["score"] for i in source_items]
|
|
32
|
+
lo, hi = min(scores), max(scores)
|
|
33
|
+
span = hi - lo or 1
|
|
34
|
+
for item in source_items:
|
|
35
|
+
item["_norm_score"] = (item["score"] - lo) / span
|
|
36
|
+
return sorted(items, key=lambda i: i.get("_norm_score", 0), reverse=True)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GrokFeedApp(App):
|
|
40
|
+
"""Hacker News + Reddit terminal feed."""
|
|
41
|
+
|
|
42
|
+
CSS = """
|
|
43
|
+
Screen {
|
|
44
|
+
background: $surface;
|
|
45
|
+
overflow-y: hidden;
|
|
46
|
+
}
|
|
47
|
+
#status-bar {
|
|
48
|
+
height: 1;
|
|
49
|
+
background: $primary-darken-3;
|
|
50
|
+
color: $text-muted;
|
|
51
|
+
padding: 0 1;
|
|
52
|
+
dock: top;
|
|
53
|
+
margin-top: 3;
|
|
54
|
+
}
|
|
55
|
+
#loading {
|
|
56
|
+
align: center middle;
|
|
57
|
+
}
|
|
58
|
+
LoadingIndicator {
|
|
59
|
+
color: #ff6600;
|
|
60
|
+
}
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
BINDINGS = [
|
|
64
|
+
Binding("j", "cursor_down", "Down"),
|
|
65
|
+
Binding("k", "cursor_up", "Up"),
|
|
66
|
+
Binding("down", "cursor_down", "Down", show=False),
|
|
67
|
+
Binding("up", "cursor_up", "Up", show=False),
|
|
68
|
+
Binding("enter", "open_or_body", "Open", priority=True),
|
|
69
|
+
Binding("f", "cycle_source", "Filter"),
|
|
70
|
+
Binding("r", "refresh", "Refresh"),
|
|
71
|
+
Binding("m", "load_more", "More"),
|
|
72
|
+
Binding("q", "quit", "Quit"),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
TITLE = "grokfeed"
|
|
76
|
+
SUB_TITLE = "HN + Reddit + lobste.rs terminal reader"
|
|
77
|
+
|
|
78
|
+
def __init__(self, config: Config) -> None:
|
|
79
|
+
super().__init__()
|
|
80
|
+
self.config = config
|
|
81
|
+
self._all_items: list[dict] = []
|
|
82
|
+
self._source_filter: str = ALL
|
|
83
|
+
self._sources: list[str] = []
|
|
84
|
+
self._loading = False
|
|
85
|
+
self._hn_ids: list[int] = []
|
|
86
|
+
self._hn_offset: int = 0
|
|
87
|
+
self._reddit_after: dict[str, str] = {}
|
|
88
|
+
|
|
89
|
+
def compose(self) -> ComposeResult:
|
|
90
|
+
yield Header()
|
|
91
|
+
yield Label("", id="status-bar")
|
|
92
|
+
yield Container(LoadingIndicator(), id="loading")
|
|
93
|
+
yield FeedList(id="feed")
|
|
94
|
+
yield Footer()
|
|
95
|
+
|
|
96
|
+
def on_mount(self) -> None:
|
|
97
|
+
self.query_one("#feed").display = False
|
|
98
|
+
self.run_worker(self._load_all(), exclusive=True, name="fetch")
|
|
99
|
+
|
|
100
|
+
async def _load_all(self) -> None:
|
|
101
|
+
loading = self.query_one("#loading")
|
|
102
|
+
feed = self.query_one(FeedList)
|
|
103
|
+
loading.display = True
|
|
104
|
+
feed.display = False
|
|
105
|
+
|
|
106
|
+
# Serve from cache if fresh enough
|
|
107
|
+
cached = load_cache(self.config.cache_ttl_minutes)
|
|
108
|
+
if cached:
|
|
109
|
+
self._all_items = cached
|
|
110
|
+
self._sources = [ALL] + list(dict.fromkeys(i["source"] for i in cached))
|
|
111
|
+
self._apply_filter(from_cache=True)
|
|
112
|
+
loading.display = False
|
|
113
|
+
feed.display = True
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
self._set_status("Fetching stories…")
|
|
117
|
+
try:
|
|
118
|
+
async with httpx.AsyncClient() as client:
|
|
119
|
+
self._hn_ids = await fetch_hn_top_ids(client)
|
|
120
|
+
hn_ids = self._hn_ids[: self.config.hn_story_count]
|
|
121
|
+
self._hn_offset = len(hn_ids)
|
|
122
|
+
|
|
123
|
+
hn_task = asyncio.create_task(fetch_hn_stories_by_ids(hn_ids, client))
|
|
124
|
+
reddit_task = asyncio.create_task(
|
|
125
|
+
fetch_reddit_posts(
|
|
126
|
+
self.config.subreddits, self.config.reddit_post_count, client
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
lobsters_task = asyncio.create_task(
|
|
130
|
+
fetch_lobsters_posts(self.config.lobsters_post_count, client)
|
|
131
|
+
)
|
|
132
|
+
(
|
|
133
|
+
hn_stories,
|
|
134
|
+
(reddit_posts, self._reddit_after),
|
|
135
|
+
lobsters_posts,
|
|
136
|
+
) = await asyncio.gather(hn_task, reddit_task, lobsters_task)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
self._set_status(f"Error: {e}")
|
|
139
|
+
loading.display = False
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
items: list[dict] = []
|
|
143
|
+
for s in hn_stories:
|
|
144
|
+
items.append(
|
|
145
|
+
{
|
|
146
|
+
"title": s.title,
|
|
147
|
+
"source": "HN",
|
|
148
|
+
"score": s.score,
|
|
149
|
+
"comments": s.comments,
|
|
150
|
+
"url": s.url,
|
|
151
|
+
"body": s.body,
|
|
152
|
+
"post_id": str(s.id),
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
for p in reddit_posts:
|
|
156
|
+
items.append(
|
|
157
|
+
{
|
|
158
|
+
"title": p.title,
|
|
159
|
+
"source": p.source,
|
|
160
|
+
"score": p.score,
|
|
161
|
+
"comments": p.comments,
|
|
162
|
+
"url": p.url,
|
|
163
|
+
"body": p.body,
|
|
164
|
+
"post_id": p.id,
|
|
165
|
+
"subreddit": p.subreddit,
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
for lp in lobsters_posts:
|
|
169
|
+
items.append(
|
|
170
|
+
{
|
|
171
|
+
"title": lp.title,
|
|
172
|
+
"source": lp.source,
|
|
173
|
+
"score": lp.score,
|
|
174
|
+
"comments": lp.comments,
|
|
175
|
+
"url": lp.url,
|
|
176
|
+
"body": lp.body,
|
|
177
|
+
"post_id": lp.id,
|
|
178
|
+
}
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
self._all_items = items
|
|
182
|
+
self._sources = [ALL] + list(dict.fromkeys(i["source"] for i in items))
|
|
183
|
+
self._apply_filter()
|
|
184
|
+
save_cache(items)
|
|
185
|
+
|
|
186
|
+
loading.display = False
|
|
187
|
+
feed.display = True
|
|
188
|
+
|
|
189
|
+
def _apply_filter(self, from_cache: bool = False) -> None:
|
|
190
|
+
feed = self.query_one(FeedList)
|
|
191
|
+
if self._source_filter == ALL:
|
|
192
|
+
visible = _interleave_by_score(self._all_items)
|
|
193
|
+
label = "all sources"
|
|
194
|
+
else:
|
|
195
|
+
visible = [i for i in self._all_items if i["source"] == self._source_filter]
|
|
196
|
+
label = self._source_filter
|
|
197
|
+
feed.load_items(visible)
|
|
198
|
+
suffix = " (cached)" if from_cache else ""
|
|
199
|
+
self._set_status(f"{len(visible)} stories — {label}{suffix}")
|
|
200
|
+
|
|
201
|
+
def _set_status(self, msg: str) -> None:
|
|
202
|
+
self.query_one("#status-bar", Label).update(msg)
|
|
203
|
+
|
|
204
|
+
def action_cursor_down(self) -> None:
|
|
205
|
+
self.query_one(FeedList).action_cursor_down()
|
|
206
|
+
|
|
207
|
+
def action_cursor_up(self) -> None:
|
|
208
|
+
self.query_one(FeedList).action_cursor_up()
|
|
209
|
+
|
|
210
|
+
def action_open_or_body(self) -> None:
|
|
211
|
+
feed = self.query_one(FeedList)
|
|
212
|
+
item = feed.current_item()
|
|
213
|
+
if not item:
|
|
214
|
+
return
|
|
215
|
+
color = get_source_color(item["source"], 0)
|
|
216
|
+
self.push_screen(PostSplitModal(item, color))
|
|
217
|
+
|
|
218
|
+
def action_load_more(self) -> None:
|
|
219
|
+
self.run_worker(self._fetch_more(), exclusive=True, name="fetch-more")
|
|
220
|
+
|
|
221
|
+
async def _fetch_more(self) -> None:
|
|
222
|
+
self._set_status("Loading more…")
|
|
223
|
+
try:
|
|
224
|
+
async with httpx.AsyncClient() as client:
|
|
225
|
+
hn_ids = self._hn_ids[
|
|
226
|
+
self._hn_offset : self._hn_offset + self.config.hn_story_count
|
|
227
|
+
]
|
|
228
|
+
hn_task = asyncio.create_task(fetch_hn_stories_by_ids(hn_ids, client))
|
|
229
|
+
reddit_task = asyncio.create_task(
|
|
230
|
+
fetch_reddit_posts(
|
|
231
|
+
self.config.subreddits,
|
|
232
|
+
self.config.reddit_post_count,
|
|
233
|
+
client,
|
|
234
|
+
after=self._reddit_after,
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
hn_stories, (reddit_posts, new_after) = await asyncio.gather(hn_task, reddit_task)
|
|
238
|
+
except Exception as e:
|
|
239
|
+
self._set_status(f"Error: {e}")
|
|
240
|
+
return
|
|
241
|
+
|
|
242
|
+
self._hn_offset += len(hn_ids)
|
|
243
|
+
self._reddit_after = new_after
|
|
244
|
+
|
|
245
|
+
existing_ids = {i["post_id"] for i in self._all_items}
|
|
246
|
+
new_items: list[dict] = []
|
|
247
|
+
for s in hn_stories:
|
|
248
|
+
if str(s.id) not in existing_ids:
|
|
249
|
+
new_items.append(
|
|
250
|
+
{
|
|
251
|
+
"title": s.title,
|
|
252
|
+
"source": "HN",
|
|
253
|
+
"score": s.score,
|
|
254
|
+
"comments": s.comments,
|
|
255
|
+
"url": s.url,
|
|
256
|
+
"body": s.body,
|
|
257
|
+
"post_id": str(s.id),
|
|
258
|
+
}
|
|
259
|
+
)
|
|
260
|
+
for p in reddit_posts:
|
|
261
|
+
if p.id not in existing_ids:
|
|
262
|
+
new_items.append(
|
|
263
|
+
{
|
|
264
|
+
"title": p.title,
|
|
265
|
+
"source": p.source,
|
|
266
|
+
"score": p.score,
|
|
267
|
+
"comments": p.comments,
|
|
268
|
+
"url": p.url,
|
|
269
|
+
"body": p.body,
|
|
270
|
+
"post_id": p.id,
|
|
271
|
+
"subreddit": p.subreddit,
|
|
272
|
+
}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
self._all_items.extend(new_items)
|
|
276
|
+
self._sources = [ALL] + list(dict.fromkeys(i["source"] for i in self._all_items))
|
|
277
|
+
self._apply_filter()
|
|
278
|
+
|
|
279
|
+
def action_refresh(self) -> None:
|
|
280
|
+
self.run_worker(self._load_all(), exclusive=True, name="fetch")
|
|
281
|
+
|
|
282
|
+
def action_cycle_source(self) -> None:
|
|
283
|
+
if not self._sources:
|
|
284
|
+
return
|
|
285
|
+
try:
|
|
286
|
+
idx = self._sources.index(self._source_filter)
|
|
287
|
+
except ValueError:
|
|
288
|
+
idx = 0
|
|
289
|
+
self._source_filter = self._sources[(idx + 1) % len(self._sources)]
|
|
290
|
+
self._apply_filter()
|
grokfeed/config.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
import tomllib
|
|
8
|
+
except ModuleNotFoundError:
|
|
9
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
CONFIG_DIR = Path.home() / ".grokfeed"
|
|
14
|
+
CONFIG_PATH = CONFIG_DIR / "config.toml"
|
|
15
|
+
CACHE_PATH = CONFIG_DIR / "cache.json"
|
|
16
|
+
|
|
17
|
+
DEFAULT_CONFIG = """\
|
|
18
|
+
subreddits = ["programming", "ClaudeAI", "machinelearning"]
|
|
19
|
+
hn_story_count = 30
|
|
20
|
+
reddit_post_count = 15
|
|
21
|
+
lobsters_post_count = 25
|
|
22
|
+
cache_ttl_minutes = 10
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class Config:
|
|
28
|
+
subreddits: list[str] = field(
|
|
29
|
+
default_factory=lambda: ["programming", "python", "machinelearning"]
|
|
30
|
+
)
|
|
31
|
+
hn_story_count: int = 30
|
|
32
|
+
reddit_post_count: int = 15
|
|
33
|
+
lobsters_post_count: int = 25
|
|
34
|
+
cache_ttl_minutes: int = 10
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def load_config() -> tuple[Config, bool]:
|
|
38
|
+
"""Return (Config, created_fresh). Creates default config on first run."""
|
|
39
|
+
created = False
|
|
40
|
+
if not CONFIG_PATH.exists():
|
|
41
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
CONFIG_PATH.write_text(DEFAULT_CONFIG)
|
|
43
|
+
created = True
|
|
44
|
+
|
|
45
|
+
raw = tomllib.loads(CONFIG_PATH.read_text())
|
|
46
|
+
return Config(
|
|
47
|
+
subreddits=raw.get("subreddits", ["programming", "python", "machinelearning"]),
|
|
48
|
+
hn_story_count=int(raw.get("hn_story_count", 30)),
|
|
49
|
+
reddit_post_count=int(raw.get("reddit_post_count", 15)),
|
|
50
|
+
lobsters_post_count=int(raw.get("lobsters_post_count", 25)),
|
|
51
|
+
cache_ttl_minutes=int(raw.get("cache_ttl_minutes", 10)),
|
|
52
|
+
), created
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def load_cache(ttl_minutes: int) -> list[dict] | None:
|
|
56
|
+
if not CACHE_PATH.exists():
|
|
57
|
+
return None
|
|
58
|
+
try:
|
|
59
|
+
data = json.loads(CACHE_PATH.read_bytes())
|
|
60
|
+
if time.time() - data["ts"] > ttl_minutes * 60:
|
|
61
|
+
return None
|
|
62
|
+
return data["items"]
|
|
63
|
+
except Exception:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def save_cache(items: list[dict]) -> None:
|
|
68
|
+
try:
|
|
69
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
CACHE_PATH.write_text(json.dumps({"ts": time.time(), "items": items}))
|
|
71
|
+
except Exception:
|
|
72
|
+
pass
|
grokfeed/main.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@click.command()
|
|
7
|
+
def app() -> None:
|
|
8
|
+
"""Hacker News + Reddit terminal feed viewer."""
|
|
9
|
+
from .app import GrokFeedApp
|
|
10
|
+
from .config import CONFIG_PATH, load_config
|
|
11
|
+
|
|
12
|
+
config, created = load_config()
|
|
13
|
+
if created:
|
|
14
|
+
click.echo(f"Config created: {CONFIG_PATH}\nEdit it to add/remove subreddits.")
|
|
15
|
+
GrokFeedApp(config).run()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
if __name__ == "__main__":
|
|
19
|
+
app()
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import html as _html
|
|
5
|
+
import re as _re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
USER_AGENT = "grokfeed:v0.1.0 (terminal feed reader)"
|
|
11
|
+
HN_ITEM = "https://hacker-news.firebaseio.com/v0/item/{}.json"
|
|
12
|
+
REDDIT_COMMENTS = "https://www.reddit.com/r/{subreddit}/comments/{post_id}.json?limit=50&depth=3"
|
|
13
|
+
LOBSTERS_POST = "https://lobste.rs/s/{short_id}.json"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Comment:
|
|
18
|
+
author: str
|
|
19
|
+
score: int
|
|
20
|
+
body: str
|
|
21
|
+
depth: int = 0
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _strip_html(raw: str) -> str:
|
|
25
|
+
text = _re.sub(r"<p>", "\n\n", raw)
|
|
26
|
+
text = _re.sub(r"<[^>]+>", "", text)
|
|
27
|
+
return _html.unescape(text).strip()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ── HN ────────────────────────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def _fetch_hn_comment(
|
|
34
|
+
client: httpx.AsyncClient, sem: asyncio.Semaphore, cid: int
|
|
35
|
+
) -> Comment | None:
|
|
36
|
+
async with sem:
|
|
37
|
+
try:
|
|
38
|
+
r = await client.get(HN_ITEM.format(cid), timeout=10)
|
|
39
|
+
r.raise_for_status()
|
|
40
|
+
d = r.json()
|
|
41
|
+
if not d or d.get("deleted") or d.get("dead") or d.get("type") != "comment":
|
|
42
|
+
return None
|
|
43
|
+
raw = d.get("text", "")
|
|
44
|
+
return Comment(
|
|
45
|
+
author=d.get("by", "[deleted]"),
|
|
46
|
+
score=0, # HN comments have no score in API
|
|
47
|
+
body=_strip_html(raw) if raw else "",
|
|
48
|
+
depth=0,
|
|
49
|
+
)
|
|
50
|
+
except Exception:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def fetch_hn_comments(story_id: int, limit: int = 30) -> list[Comment]:
|
|
55
|
+
sem = asyncio.Semaphore(10)
|
|
56
|
+
async with httpx.AsyncClient() as client:
|
|
57
|
+
r = await client.get(HN_ITEM.format(story_id), timeout=10)
|
|
58
|
+
r.raise_for_status()
|
|
59
|
+
d = r.json()
|
|
60
|
+
kids = (d.get("kids") or [])[:limit]
|
|
61
|
+
tasks = [_fetch_hn_comment(client, sem, kid) for kid in kids]
|
|
62
|
+
results = await asyncio.gather(*tasks)
|
|
63
|
+
return [c for c in results if c is not None]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ── Reddit ─────────────────────────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _flatten_reddit(children: list, depth: int = 0, max_depth: int = 2) -> list[Comment]:
|
|
70
|
+
out: list[Comment] = []
|
|
71
|
+
for child in children:
|
|
72
|
+
if child.get("kind") != "t1":
|
|
73
|
+
continue
|
|
74
|
+
d = child.get("data", {})
|
|
75
|
+
body = d.get("body", "")
|
|
76
|
+
if body in ("[deleted]", "[removed]", ""):
|
|
77
|
+
continue
|
|
78
|
+
out.append(
|
|
79
|
+
Comment(
|
|
80
|
+
author=d.get("author", "[deleted]"),
|
|
81
|
+
score=d.get("score", 0),
|
|
82
|
+
body=body,
|
|
83
|
+
depth=depth,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
if depth < max_depth:
|
|
87
|
+
replies = d.get("replies", "")
|
|
88
|
+
if isinstance(replies, dict):
|
|
89
|
+
reply_children = replies.get("data", {}).get("children", [])
|
|
90
|
+
out.extend(_flatten_reddit(reply_children, depth + 1, max_depth))
|
|
91
|
+
return out
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def fetch_reddit_comments(subreddit: str, post_id: str) -> list[Comment]:
|
|
95
|
+
headers = {"User-Agent": USER_AGENT}
|
|
96
|
+
url = REDDIT_COMMENTS.format(subreddit=subreddit, post_id=post_id)
|
|
97
|
+
async with httpx.AsyncClient(headers=headers) as client:
|
|
98
|
+
try:
|
|
99
|
+
r = await client.get(url, timeout=15)
|
|
100
|
+
r.raise_for_status()
|
|
101
|
+
data = r.json()
|
|
102
|
+
except Exception:
|
|
103
|
+
return []
|
|
104
|
+
# data is [post_listing, comments_listing]
|
|
105
|
+
if not isinstance(data, list) or len(data) < 2:
|
|
106
|
+
return []
|
|
107
|
+
children = data[1].get("data", {}).get("children", [])
|
|
108
|
+
return _flatten_reddit(children)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ── lobste.rs ──────────────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
async def fetch_lobsters_comments(short_id: str) -> list[Comment]:
|
|
115
|
+
headers = {"User-Agent": USER_AGENT}
|
|
116
|
+
url = LOBSTERS_POST.format(short_id=short_id)
|
|
117
|
+
async with httpx.AsyncClient(headers=headers) as client:
|
|
118
|
+
try:
|
|
119
|
+
r = await client.get(url, timeout=15)
|
|
120
|
+
r.raise_for_status()
|
|
121
|
+
data = r.json()
|
|
122
|
+
except Exception:
|
|
123
|
+
return []
|
|
124
|
+
out: list[Comment] = []
|
|
125
|
+
for c in data.get("comments", []):
|
|
126
|
+
raw = c.get("comment", "")
|
|
127
|
+
body = _strip_html(raw) if raw else ""
|
|
128
|
+
if not body:
|
|
129
|
+
continue
|
|
130
|
+
raw_user = c.get("commenting_user", "?")
|
|
131
|
+
author = raw_user if isinstance(raw_user, str) else raw_user.get("username", "?")
|
|
132
|
+
out.append(
|
|
133
|
+
Comment(
|
|
134
|
+
author=author,
|
|
135
|
+
score=c.get("score", 0),
|
|
136
|
+
body=body,
|
|
137
|
+
depth=c.get("indent_level", 0),
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
return out
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ── Dispatcher ─────────────────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
async def fetch_comments(item: dict) -> list[Comment]:
|
|
147
|
+
source = item.get("source", "")
|
|
148
|
+
if source == "HN":
|
|
149
|
+
return await fetch_hn_comments(int(item["post_id"]))
|
|
150
|
+
elif source.startswith("r/"):
|
|
151
|
+
return await fetch_reddit_comments(item["subreddit"], item["post_id"])
|
|
152
|
+
elif source == "lobste.rs":
|
|
153
|
+
return await fetch_lobsters_comments(item["post_id"])
|
|
154
|
+
return []
|
grokfeed/sources/hn.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import html as _html
|
|
5
|
+
import re as _re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
HN_BASE = "https://hacker-news.firebaseio.com/v0"
|
|
11
|
+
HN_ITEM = HN_BASE + "/item/{}.json"
|
|
12
|
+
HN_TOP = HN_BASE + "/topstories.json"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _strip_html(raw: str) -> str:
|
|
16
|
+
text = _re.sub(r"<p>", "\n\n", raw)
|
|
17
|
+
text = _re.sub(r"<[^>]+>", "", text)
|
|
18
|
+
return _html.unescape(text).strip()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class Story:
|
|
23
|
+
id: int
|
|
24
|
+
title: str
|
|
25
|
+
url: str
|
|
26
|
+
score: int
|
|
27
|
+
comments: int
|
|
28
|
+
body: str = "" # present for Ask HN / Tell HN posts
|
|
29
|
+
source: str = "HN"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def _fetch_item(client: httpx.AsyncClient, item_id: int) -> Story | None:
|
|
33
|
+
try:
|
|
34
|
+
r = await client.get(HN_ITEM.format(item_id), timeout=10)
|
|
35
|
+
r.raise_for_status()
|
|
36
|
+
d = r.json()
|
|
37
|
+
if not d or d.get("type") != "story":
|
|
38
|
+
return None
|
|
39
|
+
raw_text = d.get("text", "")
|
|
40
|
+
return Story(
|
|
41
|
+
id=d["id"],
|
|
42
|
+
title=d.get("title", "(no title)"),
|
|
43
|
+
url=d.get("url") or f"https://news.ycombinator.com/item?id={d['id']}",
|
|
44
|
+
score=d.get("score", 0),
|
|
45
|
+
comments=d.get("descendants", 0),
|
|
46
|
+
body=_strip_html(raw_text) if raw_text else "",
|
|
47
|
+
)
|
|
48
|
+
except Exception:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
async def fetch_hn_top_ids(client: httpx.AsyncClient) -> list[int]:
|
|
53
|
+
r = await client.get(HN_TOP, timeout=10)
|
|
54
|
+
r.raise_for_status()
|
|
55
|
+
return r.json()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def fetch_hn_stories_by_ids(
|
|
59
|
+
ids: list[int],
|
|
60
|
+
client: httpx.AsyncClient | None = None,
|
|
61
|
+
) -> list[Story]:
|
|
62
|
+
async def _run(c: httpx.AsyncClient) -> list[Story]:
|
|
63
|
+
sem = asyncio.Semaphore(10)
|
|
64
|
+
|
|
65
|
+
async def _bounded(item_id: int) -> Story | None:
|
|
66
|
+
async with sem:
|
|
67
|
+
return await _fetch_item(c, item_id)
|
|
68
|
+
|
|
69
|
+
results = await asyncio.gather(*[_bounded(i) for i in ids])
|
|
70
|
+
return [s for s in results if s is not None]
|
|
71
|
+
|
|
72
|
+
if client is not None:
|
|
73
|
+
return await _run(client)
|
|
74
|
+
async with httpx.AsyncClient() as c:
|
|
75
|
+
return await _run(c)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
async def fetch_hn_stories(
|
|
79
|
+
count: int = 30,
|
|
80
|
+
client: httpx.AsyncClient | None = None,
|
|
81
|
+
) -> list[Story]:
|
|
82
|
+
async def _run(c: httpx.AsyncClient) -> list[Story]:
|
|
83
|
+
ids = (await fetch_hn_top_ids(c))[:count]
|
|
84
|
+
return await fetch_hn_stories_by_ids(ids, c)
|
|
85
|
+
|
|
86
|
+
if client is not None:
|
|
87
|
+
return await _run(client)
|
|
88
|
+
async with httpx.AsyncClient() as c:
|
|
89
|
+
return await _run(c)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import html as _html
|
|
4
|
+
import re as _re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _strip_html(raw: str) -> str:
|
|
11
|
+
text = _re.sub(r"<p>", "\n\n", raw)
|
|
12
|
+
text = _re.sub(r"<[^>]+>", "", text)
|
|
13
|
+
return _html.unescape(text).strip()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
LOBSTERS_URL = "https://lobste.rs/hottest.json"
|
|
17
|
+
USER_AGENT = "grokfeed:v0.1.0 (terminal feed reader)"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class LobstersPost:
|
|
22
|
+
id: str
|
|
23
|
+
title: str
|
|
24
|
+
url: str
|
|
25
|
+
score: int
|
|
26
|
+
comments: int
|
|
27
|
+
body: str = "" # description for discussion posts
|
|
28
|
+
source: str = "lobste.rs"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def fetch_lobsters_posts(
|
|
32
|
+
count: int = 25,
|
|
33
|
+
client: httpx.AsyncClient | None = None,
|
|
34
|
+
) -> list[LobstersPost]:
|
|
35
|
+
async def _run(c: httpx.AsyncClient) -> list[LobstersPost]:
|
|
36
|
+
try:
|
|
37
|
+
r = await c.get(LOBSTERS_URL, timeout=15, headers={"User-Agent": USER_AGENT})
|
|
38
|
+
r.raise_for_status()
|
|
39
|
+
data = r.json()
|
|
40
|
+
except Exception:
|
|
41
|
+
return []
|
|
42
|
+
posts: list[LobstersPost] = []
|
|
43
|
+
for item in data[:count]:
|
|
44
|
+
raw_desc = item.get("description", "")
|
|
45
|
+
ext_url = item.get("url", "")
|
|
46
|
+
posts.append(
|
|
47
|
+
LobstersPost(
|
|
48
|
+
id=item.get("short_id", ""),
|
|
49
|
+
title=item.get("title", "(no title)"),
|
|
50
|
+
url=ext_url or item.get("comments_url", ""),
|
|
51
|
+
score=item.get("score", 0),
|
|
52
|
+
comments=item.get("comment_count", 0),
|
|
53
|
+
body=_strip_html(raw_desc) if raw_desc else "",
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
return posts
|
|
57
|
+
|
|
58
|
+
if client is not None:
|
|
59
|
+
return await _run(client)
|
|
60
|
+
async with httpx.AsyncClient() as c:
|
|
61
|
+
return await _run(c)
|