p8-platoon 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. p8_platoon-0.2.0/PKG-INFO +11 -0
  2. p8_platoon-0.2.0/README.md +277 -0
  3. p8_platoon-0.2.0/p8_platoon.egg-info/PKG-INFO +11 -0
  4. p8_platoon-0.2.0/p8_platoon.egg-info/SOURCES.txt +34 -0
  5. p8_platoon-0.2.0/p8_platoon.egg-info/dependency_links.txt +1 -0
  6. p8_platoon-0.2.0/p8_platoon.egg-info/entry_points.txt +2 -0
  7. p8_platoon-0.2.0/p8_platoon.egg-info/requires.txt +7 -0
  8. p8_platoon-0.2.0/p8_platoon.egg-info/top_level.txt +1 -0
  9. p8_platoon-0.2.0/platoon/__init__.py +53 -0
  10. p8_platoon-0.2.0/platoon/cli.py +289 -0
  11. p8_platoon-0.2.0/platoon/config.py +544 -0
  12. p8_platoon-0.2.0/platoon/fetcher.py +52 -0
  13. p8_platoon-0.2.0/platoon/images.py +110 -0
  14. p8_platoon-0.2.0/platoon/models.py +210 -0
  15. p8_platoon-0.2.0/platoon/providers.py +104 -0
  16. p8_platoon-0.2.0/platoon/renderer.py +86 -0
  17. p8_platoon-0.2.0/platoon/scorer.py +143 -0
  18. p8_platoon-0.2.0/platoon/sources/__init__.py +31 -0
  19. p8_platoon-0.2.0/platoon/sources/arxiv.py +60 -0
  20. p8_platoon-0.2.0/platoon/sources/flipboard.py +71 -0
  21. p8_platoon-0.2.0/platoon/sources/github_trending.py +95 -0
  22. p8_platoon-0.2.0/platoon/sources/google_news.py +96 -0
  23. p8_platoon-0.2.0/platoon/sources/hacker_news.py +46 -0
  24. p8_platoon-0.2.0/platoon/sources/hn_algolia.py +42 -0
  25. p8_platoon-0.2.0/platoon/sources/lobsters.py +40 -0
  26. p8_platoon-0.2.0/platoon/sources/openalex.py +61 -0
  27. p8_platoon-0.2.0/platoon/sources/papers_with_code.py +37 -0
  28. p8_platoon-0.2.0/platoon/sources/reddit.py +59 -0
  29. p8_platoon-0.2.0/platoon/sources/rss_feeds.py +164 -0
  30. p8_platoon-0.2.0/platoon/sources/semantic_scholar.py +38 -0
  31. p8_platoon-0.2.0/platoon/sources/trivia.py +95 -0
  32. p8_platoon-0.2.0/platoon/sources/web_search.py +159 -0
  33. p8_platoon-0.2.0/platoon/tavily_search.py +60 -0
  34. p8_platoon-0.2.0/platoon/templates/feed.html +362 -0
  35. p8_platoon-0.2.0/pyproject.toml +27 -0
  36. p8_platoon-0.2.0/setup.cfg +4 -0
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: p8-platoon
3
+ Version: 0.2.0
4
+ Summary: Feed aggregator and percolate entity producer — library + CLI
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: httpx>=0.27
7
+ Requires-Dist: pyyaml>=6.0
8
+ Requires-Dist: jinja2>=3.1
9
+ Requires-Dist: pydantic>=2.0
10
+ Provides-Extra: search
11
+ Requires-Dist: tavily-python; extra == "search"
@@ -0,0 +1,277 @@
1
+ # p8platoon
2
+
3
+ Agent workforce toolkit for [Percolate](https://github.com/Percolate-AI). Provides a provider pattern for producing percolate-compatible entities (Resources, Moments) from various data sources.
4
+
5
+ Ships with a **feed aggregator** as the first built-in provider — 13 sources, profile-based scoring, image enrichment, Tavily web search, and HTML output.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install p8platoon
11
+ ```
12
+
13
+ Or for development:
14
+
15
+ ```bash
16
+ git clone <repo-url> && cd p8-platoon
17
+ pip install -e ".[search]"
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ### CLI
23
+
24
+ ```bash
25
+ # Run all profiles, output HTML
26
+ platoon feed
27
+
28
+ # Single profile, dry-run (print scores to stdout)
29
+ platoon feed --profile default --dry-run
30
+
31
+ # JSON output
32
+ platoon feed --profile default --output json
33
+
34
+ # With web search enrichment (searches per-category, excludes feed domains)
35
+ platoon feed --tavily-key sk-... --open
36
+
37
+ # Export to percolate-compatible YAML + HTML
38
+ platoon export --profile default --output-dir ./export/
39
+
40
+ # Export with user ownership (deterministic resource IDs)
41
+ platoon export --profile default --user-id "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
42
+
43
+ # Standalone Tavily web search
44
+ platoon search "latest AI breakthroughs" --max-results 10 --time-range week
45
+ ```
46
+
47
+ Exported YAML files can be ingested via:
48
+
49
+ ```bash
50
+ p8 upsert resources export/resources-default.yaml
51
+ p8 upsert moments export/moments-default.yaml
52
+ ```
53
+
54
+ ### Library
55
+
56
+ Use platoon as a Python library — this is the integration path for percolate.
57
+ Pass a p8k8 `UserMetadata` object (or any dict with `interests`/`categories`)
58
+ and get back percolate-compatible Resources and Moments.
59
+
60
+ ```python
61
+ import platoon
62
+ from uuid import UUID
63
+
64
+ # UserMetadata from percolate (or any dict/pydantic model)
65
+ user_metadata = UserMetadata(
66
+ interests=["AI machine learning", "physics space", "food restaurants"],
67
+ categories={
68
+ "AI": {"keywords": ["AI", "LLM", "neural", "agent"], "weight": 1.5, "color": "#3b82f6"},
69
+ "Physics": {"keywords": ["physics", "quantum", "space"], "weight": 1.3, "color": "#8b5cf6"},
70
+ },
71
+ )
72
+
73
+ user_id = UUID("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
74
+
75
+ # Run — sources come from built-in defaults, user data from metadata
76
+ result = platoon.run(user_metadata, user_id)
77
+
78
+ # Percolate-compatible dicts ready for upsert
79
+ resource_dicts = [r.to_upsert_dict() for r in result.resources]
80
+ moment_dicts = [m.to_upsert_dict() for m in result.moments]
81
+ ```
82
+
83
+ Override global sources via config dict:
84
+
85
+ ```python
86
+ result = platoon.run(
87
+ user_metadata,
88
+ user_id,
89
+ config={
90
+ "sources": {
91
+ "hacker_news": {"enabled": True, "min_score": 200},
92
+ "reddit": {"enabled": True, "subreddits": ["MachineLearning"]},
93
+ },
94
+ },
95
+ )
96
+ ```
97
+
98
+ ### Environment Variables
99
+
100
+ | Variable | Description |
101
+ |----------|-------------|
102
+ | `P8_PLATOON_KEYS` | JSON dict of API keys: `{"tavily": "tvly-..."}` |
103
+ | `P8_TAVILY_KEY` | Single Tavily API key (alternative to above) |
104
+ | `TAVILY_API_KEY` | Legacy Tavily key (backward compat) |
105
+
106
+ Keys are resolved in priority order: `P8_PLATOON_KEYS` > `P8_TAVILY_KEY` > `TAVILY_API_KEY`.
107
+
108
+ ## Configuration
109
+
110
+ Copy and edit the example config:
111
+
112
+ ```bash
113
+ cp config.example.yaml config.yaml
114
+ ```
115
+
116
+ ### Profiles
117
+
118
+ Profiles define interests, category weights, and source configurations. Shared categories (Trivia, General) are injected into every profile.
119
+
120
+ Built-in profiles:
121
+ - **default** — AI, Physics, Business, Food
122
+ - **eunseo** — Cats, Crafts, Korean, Food, Beauty, Chemistry
123
+
124
+ ```yaml
125
+ profiles:
126
+ default:
127
+ name: "Default"
128
+ interests:
129
+ - "AI machine learning breakthroughs"
130
+ - "physics space quantum discoveries"
131
+ - "business startups economy"
132
+ - "food restaurants cooking recipes"
133
+ categories:
134
+ AI:
135
+ keywords: [AI, LLM, machine learning, neural, ChatGPT, agent, transformer]
136
+ weight: 1.5
137
+ color: "#3b82f6"
138
+ Physics:
139
+ keywords: [physics, quantum, particle, astrophysics, cosmology]
140
+ weight: 1.3
141
+ sources:
142
+ google_news:
143
+ enabled: true
144
+ topics: [science, technology]
145
+ queries: ["AI artificial intelligence"]
146
+ reddit:
147
+ enabled: true
148
+ subreddits: [todayilearned, space, food]
149
+ min_score: 1000
150
+ hacker_news:
151
+ enabled: true
152
+ min_score: 200
153
+ ```
154
+
155
+ ### Sources (13)
156
+
157
+ | Source | Type | Config Keys |
158
+ |--------|------|-------------|
159
+ | `google_news` | RSS | `topics`, `queries`, `max_items_per_query` |
160
+ | `reddit` | JSON API | `subreddits`, `min_score`, `max_items` |
161
+ | `hacker_news` | Firebase API | `fetch_top_n`, `min_score`, `max_items` |
162
+ | `rss_feeds` | RSS/Atom | `feeds` (list of `{url, label}`), `max_items_per_feed` |
163
+ | `flipboard` | RSS | `topics`, `max_items_per_topic` |
164
+ | `trivia` | REST API | `max_items`, `on_this_day`, `random_facts` |
165
+ | `arxiv` | RSS | `feeds`, `max_items_per_feed` |
166
+ | `github_trending` | Scrape | `language`, `since` |
167
+ | `lobsters` | JSON API | `tags_filter` |
168
+ | `papers_with_code` | REST API | `max_items` |
169
+ | `semantic_scholar` | Graph API | `queries` |
170
+ | `openalex` | REST API | `queries`, `email` |
171
+ | `hn_algolia` | Search API | `queries`, `sort` |
172
+
173
+ ### Web Search Enrichment
174
+
175
+ When a Tavily API key is provided, the feed pipeline runs per-category web searches that **exclude** all 35+ domains already covered by feed sources. This surfaces net-new content from sites not in your feeds.
176
+
177
+ ```bash
178
+ # Via CLI flag
179
+ platoon feed --tavily-key tvly-...
180
+
181
+ # Via environment variable
182
+ export TAVILY_API_KEY=tvly-...
183
+ platoon feed
184
+ ```
185
+
186
+ ## Feed Pipeline
187
+
188
+ ```
189
+ Config + Profile
190
+ -> Phase 1: Fetch from 13 source types
191
+ -> Phase 2: Tavily web search enrichment (excludes feed domains)
192
+ -> Phase 3: Score + dedup (keyword matching, interest boost, engagement bonus)
193
+ -> Phase 4: Backfill sparse categories via Google News fallback
194
+ -> Image enrichment (source → og:image scrape → Unsplash fallback)
195
+ -> Render HTML / JSON / export to percolate
196
+ ```
197
+
198
+ ## Provider Pattern
199
+
200
+ Build custom providers that emit percolate entities. `FeedProvider` is the built-in provider (see Library usage above); extend `BaseProvider` for other data sources:
201
+
202
+ ```python
203
+ from platoon.providers import BaseProvider, ProviderResult
204
+ from platoon.models import P8Resource, P8Moment
205
+
206
+ class MyProvider(BaseProvider):
207
+ name = "my-provider"
208
+
209
+ def run(self, config: dict) -> ProviderResult:
210
+ resources = [
211
+ P8Resource(
212
+ name="Example Resource",
213
+ uri="https://example.com/article",
214
+ content="Article content...",
215
+ category="news",
216
+ tags=["my-source", "tech"],
217
+ metadata={"score": 0.85},
218
+ )
219
+ ]
220
+ moment = P8Moment(
221
+ name="my-digest-2026-02-22",
222
+ moment_type="digest",
223
+ summary="Processed 1 resource",
224
+ )
225
+ return ProviderResult(resources=resources, moments=[moment])
226
+ ```
227
+
228
+ ## User Profile Schema
229
+
230
+ Platoon accepts p8k8 `UserMetadata` objects directly — no wrapper needed. The library reads `interests` and `categories` from the user's metadata; sources are global config.
231
+
232
+ **User-specific fields** (from `UserMetadata`):
233
+
234
+ | Field | Type | Used by pipeline |
235
+ |-------|------|-----------------|
236
+ | `interests` | `list[str]` | Scoring: interest-boost matching |
237
+ | `categories` | `dict` | Scoring: keyword matching, weights, category assignment |
238
+ | `relations` | `list[dict] \| None` | Reserved for future personalization |
239
+ | `feeds` | `list[dict] \| None` | Reserved for future per-user feed overrides |
240
+ | `preferences` | `dict \| None` | Reserved |
241
+ | `facts` | `dict \| None` | Reserved |
242
+
243
+ **Global config** (not per-user):
244
+
245
+ | Key | Description |
246
+ |-----|-------------|
247
+ | `sources` | Feed source configs — `{source_name: {enabled, ...}}` |
248
+ | `fetcher` | HTTP settings — timeout, retries, user-agent |
249
+ | `output` | Render settings — format, max items, min score |
250
+
251
+ ## Percolate Integration
252
+
253
+ p8platoon produces entities compatible with percolate's data model:
254
+
255
+ - **P8Resource** — Maps to percolate's `resources` table. One per article/item with `category="news"`, source tags, engagement metadata, and deterministic UUID5 IDs.
256
+ - **P8Moment** — Maps to percolate's `moments` table. One per digest run, links resources via `graph_edges`, contains run statistics.
257
+
258
+ All entities use deterministic UUID5 IDs matching percolate's `uuid5(P8_NAMESPACE, "table:key:user_id")` scheme, ensuring upserts are idempotent.
259
+
260
+ ### Resource fields
261
+
262
+ | Feed Item | P8Resource | Notes |
263
+ |-----------|------------|-------|
264
+ | `title` | `name` | Article title |
265
+ | `url` | `uri` | Article URL (also used for deterministic ID) |
266
+ | `summary` | `content` | Truncated summary |
267
+ | `image_url` | `image_uri` | 3-tier fallback ensures coverage |
268
+ | `source` | `tags[]` | e.g. "reddit", "google_news", "web_search" |
269
+ | `tags` | `tags[]` | Merged with source tag |
270
+ | `score` | `metadata.score` | Deterministic keyword+engagement score |
271
+ | `engagement` | `metadata.engagement` | `{upvotes, comments, stars, citations}` |
272
+ | `category` | `metadata.feed_category` | Scored category (AI, Physics, Food, etc.) |
273
+ | — | `category` | Always `"news"` |
274
+
275
+ ## License
276
+
277
+ MIT
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: p8-platoon
3
+ Version: 0.2.0
4
+ Summary: Feed aggregator and percolate entity producer — library + CLI
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: httpx>=0.27
7
+ Requires-Dist: pyyaml>=6.0
8
+ Requires-Dist: jinja2>=3.1
9
+ Requires-Dist: pydantic>=2.0
10
+ Provides-Extra: search
11
+ Requires-Dist: tavily-python; extra == "search"
@@ -0,0 +1,34 @@
1
+ README.md
2
+ pyproject.toml
3
+ p8_platoon.egg-info/PKG-INFO
4
+ p8_platoon.egg-info/SOURCES.txt
5
+ p8_platoon.egg-info/dependency_links.txt
6
+ p8_platoon.egg-info/entry_points.txt
7
+ p8_platoon.egg-info/requires.txt
8
+ p8_platoon.egg-info/top_level.txt
9
+ platoon/__init__.py
10
+ platoon/cli.py
11
+ platoon/config.py
12
+ platoon/fetcher.py
13
+ platoon/images.py
14
+ platoon/models.py
15
+ platoon/providers.py
16
+ platoon/renderer.py
17
+ platoon/scorer.py
18
+ platoon/tavily_search.py
19
+ platoon/sources/__init__.py
20
+ platoon/sources/arxiv.py
21
+ platoon/sources/flipboard.py
22
+ platoon/sources/github_trending.py
23
+ platoon/sources/google_news.py
24
+ platoon/sources/hacker_news.py
25
+ platoon/sources/hn_algolia.py
26
+ platoon/sources/lobsters.py
27
+ platoon/sources/openalex.py
28
+ platoon/sources/papers_with_code.py
29
+ platoon/sources/reddit.py
30
+ platoon/sources/rss_feeds.py
31
+ platoon/sources/semantic_scholar.py
32
+ platoon/sources/trivia.py
33
+ platoon/sources/web_search.py
34
+ platoon/templates/feed.html
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ platoon = platoon.cli:main
@@ -0,0 +1,7 @@
1
+ httpx>=0.27
2
+ pyyaml>=6.0
3
+ jinja2>=3.1
4
+ pydantic>=2.0
5
+
6
+ [search]
7
+ tavily-python
@@ -0,0 +1 @@
1
+ platoon
@@ -0,0 +1,53 @@
1
+ """p8-platoon: News feed digest with percolate integration."""
2
+
3
+ from typing import Optional
4
+ from uuid import UUID
5
+
6
+ from platoon.models import Item, P8Moment, P8Resource, UserProfile
7
+ from platoon.providers import BaseProvider, FeedProvider, ProviderResult
8
+
9
+
10
+ def run(
11
+ user_metadata,
12
+ user_id: UUID,
13
+ config: Optional[dict] = None,
14
+ ) -> ProviderResult:
15
+ """Run the feed pipeline for a user.
16
+
17
+ This is the primary library entry point. Pass a p8k8 UserMetadata
18
+ object (or any dict/pydantic model with ``interests`` and ``categories``)
19
+ and get back percolate-compatible Resources and Moments.
20
+
21
+ Sources and fetcher settings come from ``config`` (global);
22
+ user interests and categories come from ``user_metadata``.
23
+ API keys are resolved from env vars (P8_TAVILY_KEY or P8_PLATOON_KEYS).
24
+
25
+ Args:
26
+ user_metadata: p8k8 UserMetadata, UserProfile, or plain dict.
27
+ user_id: User UUID for entity ownership and deterministic IDs.
28
+ config: Global config dict with ``sources``, ``fetcher``, ``output``.
29
+ If None, built-in defaults are used.
30
+
31
+ Returns:
32
+ ProviderResult with resources, moments, and raw items.
33
+ """
34
+ from platoon.config import resolve_for_user, resolve_keys
35
+
36
+ pipeline_config = resolve_for_user(user_metadata, config)
37
+ keys = resolve_keys()
38
+ tavily_key = keys.get("tavily")
39
+
40
+ provider = FeedProvider(tavily_key=tavily_key or None)
41
+ return provider.run(pipeline_config, user_id=user_id)
42
+
43
+
44
+ __all__ = [
45
+ "run",
46
+ "Item",
47
+ "P8Resource",
48
+ "P8Moment",
49
+ "UserProfile",
50
+ "BaseProvider",
51
+ "FeedProvider",
52
+ "ProviderResult",
53
+ ]
@@ -0,0 +1,289 @@
1
+ """CLI entry point: platoon feed, platoon search, platoon export."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import subprocess
8
+ import sys
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ from platoon.config import load_config, resolve_profile
13
+ from platoon.fetcher import Fetcher
14
+ from platoon.images import enrich_images
15
+ from platoon.models import Item
16
+ from platoon.renderer import render_html, render_json, save_output
17
+ from platoon.scorer import score_and_sort
18
+ from platoon.sources import SOURCE_FETCHERS
19
+
20
+
21
+ def _fetch_and_score(config: dict, source_filter: set | None, fetcher: Fetcher,
22
+ tavily_key: str | None = None) -> list[Item]:
23
+ """Fetch feeds, enrich with web search, score, and backfill sparse categories."""
24
+ from platoon.config import INTEREST_FALLBACKS
25
+ from platoon.sources.google_news import fetch_google_news
26
+ from platoon.sources.web_search import web_search_enrich
27
+ import random
28
+
29
+ all_items: list[Item] = []
30
+ sources_cfg = config.get("sources", {})
31
+
32
+ # --- Phase 1: Fetch from configured feed sources ---
33
+ for source_name, source_cfg in sources_cfg.items():
34
+ if source_filter and source_name not in source_filter:
35
+ continue
36
+ if not source_cfg.get("enabled", True):
37
+ continue
38
+ fetch_fn = SOURCE_FETCHERS.get(source_name)
39
+ if not fetch_fn:
40
+ print(f"No fetcher for source: {source_name}", file=sys.stderr)
41
+ continue
42
+ try:
43
+ items = fetch_fn(source_cfg, fetcher)
44
+ all_items.extend(items)
45
+ except Exception as e:
46
+ print(f"Error fetching {source_name}: {e}", file=sys.stderr)
47
+
48
+ print(f" Feeds: {len(all_items)} raw items")
49
+
50
+ # --- Phase 2: Tavily web search (if key available) ---
51
+ # Searches each category for NEW content not already in feeds
52
+ if tavily_key:
53
+ existing_urls = {item.url for item in all_items}
54
+ categories = config.get("categories", {})
55
+ interests = config.get("interests", [])
56
+ web_items = web_search_enrich(
57
+ categories=categories,
58
+ interests=interests,
59
+ existing_urls=existing_urls,
60
+ api_key=tavily_key,
61
+ max_per_category=3,
62
+ time_range="week",
63
+ )
64
+ print(f" Web search: {len(web_items)} new items from unique sources")
65
+ all_items.extend(web_items)
66
+
67
+ # --- Phase 3: Score everything together ---
68
+ scored = score_and_sort(all_items, config)
69
+
70
+ # --- Phase 4: Backfill sparse categories via Google News ---
71
+ categories = config.get("categories", {})
72
+ min_per_cat = 2
73
+ cat_counts: dict[str, int] = {}
74
+ for item in scored:
75
+ cat_counts[item.category] = cat_counts.get(item.category, 0) + 1
76
+
77
+ sparse = [cat for cat in categories if cat_counts.get(cat, 0) < min_per_cat]
78
+ if sparse:
79
+ print(f" Backfilling sparse categories: {sparse}")
80
+ for cat in sparse:
81
+ queries = INTEREST_FALLBACKS.get(cat, [])
82
+ if not queries:
83
+ continue
84
+ query = random.choice(queries)
85
+ try:
86
+ fallback_cfg = {"queries": [query], "max_items_per_query": 4}
87
+ extra = fetch_google_news(fallback_cfg, fetcher)
88
+ all_items.extend(extra)
89
+ except Exception as e:
90
+ print(f" Fallback error for {cat}: {e}", file=sys.stderr)
91
+
92
+ scored = score_and_sort(all_items, config)
93
+
94
+ print(f" Final: {len(scored)} items")
95
+ return scored
96
+
97
+
98
+ def run_feed(args):
99
+ """Fetch, score, and render the feed for one or all profiles."""
100
+ from platoon.config import resolve_keys
101
+ config = load_config(args.config)
102
+ source_filter = set(args.sources) if args.sources else None
103
+
104
+ # Tavily key: CLI flag > P8_TAVILY_KEY > P8_PLATOON_KEYS > TAVILY_API_KEY
105
+ keys = resolve_keys()
106
+ tavily_key = getattr(args, "tavily_key", None) or keys.get("tavily", "")
107
+ if tavily_key:
108
+ print(f" Tavily web search: enabled")
109
+
110
+ # Determine which profiles to run
111
+ profiles = config.get("profiles", {})
112
+ if args.profile:
113
+ profile_names = [args.profile]
114
+ else:
115
+ profile_names = list(profiles.keys())
116
+
117
+ fetcher = Fetcher(config.get("fetcher", {}))
118
+ all_paths = []
119
+
120
+ for pname in profile_names:
121
+ print(f"\n{'='*50}")
122
+ print(f" Profile: {pname}")
123
+ print(f"{'='*50}")
124
+
125
+ pcfg = resolve_profile(config, pname)
126
+ scored = _fetch_and_score(pcfg, source_filter, fetcher, tavily_key=tavily_key)
127
+
128
+ # Enrich images (og:image scrape + category fallbacks)
129
+ enrich_images(scored, fetcher)
130
+
131
+ if args.dry_run:
132
+ _print_dry_run(scored)
133
+ continue
134
+
135
+ fmt = args.output or pcfg.get("output", {}).get("format", "html")
136
+ if fmt == "json":
137
+ content = render_json(scored)
138
+ else:
139
+ content = render_html(scored, pcfg)
140
+
141
+ path = save_output(content, fmt, pcfg, suffix=pname)
142
+ all_paths.append(path)
143
+
144
+ fetcher.close()
145
+
146
+ if args.open and all_paths:
147
+ for p in all_paths:
148
+ _open_file(p)
149
+
150
+
151
+ def _print_dry_run(items: list[Item]):
152
+ """Print scored items to stdout."""
153
+ current_cat = None
154
+ for item in items:
155
+ if item.category != current_cat:
156
+ current_cat = item.category
157
+ print(f"\n --- {current_cat} ---")
158
+ eng_parts = []
159
+ for k, v in item.engagement.items():
160
+ if isinstance(v, (int, float)) and v > 0:
161
+ eng_parts.append(f"{k}={v}")
162
+ eng_str = f" [{', '.join(eng_parts)}]" if eng_parts else ""
163
+ print(f" [{item.score:.2f}] {item.title}")
164
+ print(f" {item.source} {eng_str}")
165
+ if item.tags:
166
+ print(f" tags: {', '.join(item.tags[:5])}")
167
+
168
+
169
+ def run_export(args):
170
+ """Run feed pipeline and export YAML (percolate entities) + HTML viewer."""
171
+ from uuid import UUID
172
+
173
+ import yaml
174
+
175
+ from platoon.config import resolve_keys
176
+ from platoon.providers import FeedProvider
177
+
178
+ config = load_config(args.config)
179
+ keys = resolve_keys()
180
+ tavily_key = getattr(args, "tavily_key", None) or keys.get("tavily", "")
181
+
182
+ user_id = UUID(args.user_id) if args.user_id else None
183
+
184
+ profiles = config.get("profiles", {})
185
+ if args.profile:
186
+ profile_names = [args.profile]
187
+ else:
188
+ profile_names = list(profiles.keys())
189
+
190
+ provider = FeedProvider(tavily_key=tavily_key or None)
191
+
192
+ for pname in profile_names:
193
+ print(f"\n{'='*50}")
194
+ print(f" Export: {pname}")
195
+ print(f"{'='*50}")
196
+
197
+ pcfg = resolve_profile(config, pname)
198
+ result = provider.run(pcfg, user_id=user_id)
199
+
200
+ print(f" Resources: {len(result.resources)}")
201
+ print(f" Moments: {len(result.moments)}")
202
+
203
+ out_dir = Path(args.output_dir)
204
+ out_dir.mkdir(parents=True, exist_ok=True)
205
+
206
+ # YAML exports (percolate entities)
207
+ export = result.to_export_dicts()
208
+ res_path = out_dir / f"resources-{pname}.yaml"
209
+ mom_path = out_dir / f"moments-{pname}.yaml"
210
+ res_path.write_text(yaml.dump(export["resources"], default_flow_style=False, allow_unicode=True))
211
+ mom_path.write_text(yaml.dump(export["moments"], default_flow_style=False, allow_unicode=True))
212
+ print(f" Written: {res_path}")
213
+ print(f" Written: {mom_path}")
214
+
215
+ # HTML viewer
216
+ date_str = datetime.now().strftime("%Y-%m-%d")
217
+ html_path = out_dir / f"{date_str}-{pname}.html"
218
+ html_content = render_html(result.items, pcfg)
219
+ html_path.write_text(html_content)
220
+ print(f" Written: {html_path}")
221
+
222
+
223
+ def _open_file(path: Path):
224
+ """Open file in default browser."""
225
+ import platform
226
+ system = platform.system()
227
+ try:
228
+ if system == "Darwin":
229
+ subprocess.run(["open", str(path)])
230
+ elif system == "Linux":
231
+ subprocess.run(["xdg-open", str(path)])
232
+ elif system == "Windows":
233
+ subprocess.run(["start", str(path)], shell=True)
234
+ except Exception as e:
235
+ print(f"Could not open browser: {e}", file=sys.stderr)
236
+
237
+
238
+ def main():
239
+ parser = argparse.ArgumentParser(
240
+ prog="platoon",
241
+ description="News feed digest with HTML card viewer",
242
+ )
243
+ subparsers = parser.add_subparsers(dest="command")
244
+
245
+ # feed subcommand
246
+ feed_parser = subparsers.add_parser("feed", help="Fetch and render news feed")
247
+ feed_parser.add_argument("--config", default=None, help="Path to config YAML")
248
+ feed_parser.add_argument("--profile", default=None, help="Profile name (default: run all)")
249
+ feed_parser.add_argument("--dry-run", action="store_true", help="Fetch + score, print to stdout")
250
+ feed_parser.add_argument("--sources", nargs="+", help="Subset of sources to fetch")
251
+ feed_parser.add_argument("--output", choices=["html", "json"], help="Output format")
252
+ feed_parser.add_argument("--open", action="store_true", help="Open HTML in browser")
253
+ feed_parser.add_argument("--tavily-key", default=None, dest="tavily_key",
254
+ help="Tavily API key for web search enrichment (or set TAVILY_API_KEY)")
255
+
256
+ # search subcommand
257
+ search_parser = subparsers.add_parser("search", help="Tavily web search")
258
+ search_parser.add_argument("query", help="Search query")
259
+ search_parser.add_argument("--max-results", type=int, default=5, help="Max results")
260
+ search_parser.add_argument("--topic", default="news", help="Topic: news, general")
261
+ search_parser.add_argument("--time-range", default=None, help="Time range: day, week, month")
262
+ search_parser.add_argument("--api-key", default=None, help="Tavily API key (or set TAVILY_API_KEY)")
263
+
264
+ # export subcommand
265
+ export_parser = subparsers.add_parser("export", help="Export feed as percolate Resources + Moments")
266
+ export_parser.add_argument("--config", default=None, help="Path to config YAML")
267
+ export_parser.add_argument("--profile", default=None, help="Profile name (default: run all)")
268
+ export_parser.add_argument("--output-dir", default="./export", dest="output_dir",
269
+ help="Output directory (default: ./export)")
270
+ export_parser.add_argument("--user-id", default=None, dest="user_id",
271
+ help="User UUID for resource ownership")
272
+ export_parser.add_argument("--tavily-key", default=None, dest="tavily_key",
273
+ help="Tavily API key for web search enrichment")
274
+
275
+ args = parser.parse_args()
276
+
277
+ if args.command == "feed":
278
+ run_feed(args)
279
+ elif args.command == "search":
280
+ from platoon.tavily_search import main_search
281
+ main_search(args)
282
+ elif args.command == "export":
283
+ run_export(args)
284
+ else:
285
+ parser.print_help()
286
+
287
+
288
+ if __name__ == "__main__":
289
+ main()