delimit-cli 4.0.3 → 4.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,106 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import urllib.parse
5
+ import urllib.request
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ logger = logging.getLogger("delimit.ai.reddit_proxy")
10
+
11
+ def _get_proxy_config() -> Dict[str, str]:
12
+ """Load proxy config from private secrets or environment."""
13
+ config = {"proxy_url": ""}
14
+
15
+ # 1. Check environment variable
16
+ env_url = os.environ.get("DELIMIT_REDDIT_PROXY")
17
+ if env_url:
18
+ config["proxy_url"] = env_url
19
+ return config
20
+
21
+ # 2. Check private secrets file
22
+ secrets_path = Path.home() / ".delimit" / "secrets" / "reddit-proxy.json"
23
+ if secrets_path.exists():
24
+ try:
25
+ secrets = json.loads(secrets_path.read_text())
26
+ config["proxy_url"] = secrets.get("proxy_url", "")
27
+ except Exception as e:
28
+ logger.debug(f"Failed to load reddit-proxy secrets: {e}")
29
+
30
+ return config
31
+
32
+ def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[Dict[str, Any]]:
33
+ """
34
+ Fetch posts from a single subreddit with fallback chain.
35
+ Returns standardized post dicts.
36
+ """
37
+ reddit_url = f"https://www.reddit.com/r/{subreddit}/{sort}.json?limit={limit}&raw_json=1"
38
+
39
+ # 1. Try Local Proxy (Residential IP)
40
+ proxy_cfg = _get_proxy_config()
41
+ proxy_url = proxy_cfg.get("proxy_url")
42
+ if proxy_url:
43
+ try:
44
+ fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
45
+ req = urllib.request.Request(fetch_url, headers={"User-Agent": "Delimit/1.0"})
46
+ with urllib.request.urlopen(req, timeout=10) as resp:
47
+ body = json.loads(resp.read().decode())
48
+ children = body.get("data", {}).get("children", [])
49
+ return [c.get("data", {}) for c in children if c.get("data")]
50
+ except Exception as e:
51
+ logger.debug(f"Local proxy failed for r/{subreddit}: {e}")
52
+
53
+ # 2. Fallback: PullPush API (Public Archive)
54
+ try:
55
+ pp_url = f"https://api.pullpush.io/reddit/search/submission/?subreddit={subreddit}&size={limit}&sort=desc"
56
+ req = urllib.request.Request(pp_url, headers={"User-Agent": "Delimit/1.0"})
57
+ with urllib.request.urlopen(req, timeout=10) as resp:
58
+ body = json.loads(resp.read().decode())
59
+ return body.get("data", [])
60
+ except Exception as e:
61
+ logger.debug(f"PullPush fallback failed for r/{subreddit}: {e}")
62
+
63
+ # 3. Fallback: Direct (Often blocked on servers)
64
+ try:
65
+ req = urllib.request.Request(reddit_url, headers={"User-Agent": "Mozilla/5.0 (Delimit)"})
66
+ with urllib.request.urlopen(req, timeout=5) as resp:
67
+ body = json.loads(resp.read().decode())
68
+ children = body.get("data", {}).get("children", [])
69
+ return [c.get("data", {}) for c in children if c.get("data")]
70
+ except Exception as e:
71
+ logger.warning(f"Direct fetch failed for r/{subreddit}: {e}")
72
+
73
+ return []
74
+
75
+ def fetch_thread(thread_id: str) -> Optional[Dict[str, Any]]:
76
+ """
77
+ Fetch a single Reddit thread by ID with fallback chain.
78
+ """
79
+ reddit_url = f"https://www.reddit.com/comments/{thread_id}.json?raw_json=1"
80
+
81
+ # 1. Try Local Proxy
82
+ proxy_cfg = _get_proxy_config()
83
+ proxy_url = proxy_cfg.get("proxy_url")
84
+ if proxy_url:
85
+ try:
86
+ fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
87
+ req = urllib.request.Request(fetch_url, headers={"User-Agent": "Delimit/1.0"})
88
+ with urllib.request.urlopen(req, timeout=10) as resp:
89
+ data = json.loads(resp.read().decode())
90
+ if isinstance(data, list) and len(data) > 0:
91
+ return data[0].get("data", {}).get("children", [{}])[0].get("data", {})
92
+ except Exception as e:
93
+ logger.debug(f"Local proxy failed for thread {thread_id}: {e}")
94
+
95
+ # 2. Fallback: PullPush
96
+ try:
97
+ pp_url = f"https://api.pullpush.io/reddit/search/submission/?ids={thread_id}"
98
+ req = urllib.request.Request(pp_url, headers={"User-Agent": "Delimit/1.0"})
99
+ with urllib.request.urlopen(req, timeout=10) as resp:
100
+ body = json.loads(resp.read().decode())
101
+ data = body.get("data", [])
102
+ return data[0] if data else None
103
+ except Exception as e:
104
+ logger.debug(f"PullPush fallback failed for thread {thread_id}: {e}")
105
+
106
+ return None
@@ -560,3 +560,37 @@ def _save_scan(result: Dict[str, Any], scan_time: datetime) -> Path:
560
560
  path.write_text(json.dumps(result, indent=2, default=str))
561
561
  logger.info("Scan saved to %s", path)
562
562
  return path
563
+
564
+
565
+ def fetch_thread(thread_id: str, *, proxy_url: str = PROXY_URL) -> Optional[Dict[str, Any]]:
566
+ """Fetch a single Reddit thread by ID via the residential proxy."""
567
+ import urllib.parse
568
+ import urllib.request
569
+ reddit_url = f"https://www.reddit.com/comments/{thread_id}.json?raw_json=1"
570
+ fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
571
+
572
+ req = urllib.request.Request(
573
+ fetch_url,
574
+ headers={"User-Agent": "delimit-scanner/1.0", "Accept": "application/json"},
575
+ )
576
+
577
+ try:
578
+ with urllib.request.urlopen(req, timeout=15) as resp:
579
+ data = json.loads(resp.read().decode())
580
+ if isinstance(data, list) and len(data) > 0:
581
+ post_data = data[0].get("data", {}).get("children", [{}])[0].get("data", {})
582
+ if post_data:
583
+ return {
584
+ "id": post_data.get("id", ""),
585
+ "title": post_data.get("title", ""),
586
+ "author": post_data.get("author", ""),
587
+ "score": post_data.get("score", 0),
588
+ "num_comments": post_data.get("num_comments", 0),
589
+ "subreddit": post_data.get("subreddit", ""),
590
+ "permalink": post_data.get("permalink", ""),
591
+ "selftext": post_data.get("selftext", ""),
592
+ "created_utc": post_data.get("created_utc", 0),
593
+ }
594
+ except Exception as exc:
595
+ logger.warning("Failed to fetch thread %s: %s", thread_id, exc)
596
+ return None