p8-platoon 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- p8_platoon-0.2.0/PKG-INFO +11 -0
- p8_platoon-0.2.0/README.md +277 -0
- p8_platoon-0.2.0/p8_platoon.egg-info/PKG-INFO +11 -0
- p8_platoon-0.2.0/p8_platoon.egg-info/SOURCES.txt +34 -0
- p8_platoon-0.2.0/p8_platoon.egg-info/dependency_links.txt +1 -0
- p8_platoon-0.2.0/p8_platoon.egg-info/entry_points.txt +2 -0
- p8_platoon-0.2.0/p8_platoon.egg-info/requires.txt +7 -0
- p8_platoon-0.2.0/p8_platoon.egg-info/top_level.txt +1 -0
- p8_platoon-0.2.0/platoon/__init__.py +53 -0
- p8_platoon-0.2.0/platoon/cli.py +289 -0
- p8_platoon-0.2.0/platoon/config.py +544 -0
- p8_platoon-0.2.0/platoon/fetcher.py +52 -0
- p8_platoon-0.2.0/platoon/images.py +110 -0
- p8_platoon-0.2.0/platoon/models.py +210 -0
- p8_platoon-0.2.0/platoon/providers.py +104 -0
- p8_platoon-0.2.0/platoon/renderer.py +86 -0
- p8_platoon-0.2.0/platoon/scorer.py +143 -0
- p8_platoon-0.2.0/platoon/sources/__init__.py +31 -0
- p8_platoon-0.2.0/platoon/sources/arxiv.py +60 -0
- p8_platoon-0.2.0/platoon/sources/flipboard.py +71 -0
- p8_platoon-0.2.0/platoon/sources/github_trending.py +95 -0
- p8_platoon-0.2.0/platoon/sources/google_news.py +96 -0
- p8_platoon-0.2.0/platoon/sources/hacker_news.py +46 -0
- p8_platoon-0.2.0/platoon/sources/hn_algolia.py +42 -0
- p8_platoon-0.2.0/platoon/sources/lobsters.py +40 -0
- p8_platoon-0.2.0/platoon/sources/openalex.py +61 -0
- p8_platoon-0.2.0/platoon/sources/papers_with_code.py +37 -0
- p8_platoon-0.2.0/platoon/sources/reddit.py +59 -0
- p8_platoon-0.2.0/platoon/sources/rss_feeds.py +164 -0
- p8_platoon-0.2.0/platoon/sources/semantic_scholar.py +38 -0
- p8_platoon-0.2.0/platoon/sources/trivia.py +95 -0
- p8_platoon-0.2.0/platoon/sources/web_search.py +159 -0
- p8_platoon-0.2.0/platoon/tavily_search.py +60 -0
- p8_platoon-0.2.0/platoon/templates/feed.html +362 -0
- p8_platoon-0.2.0/pyproject.toml +27 -0
- p8_platoon-0.2.0/setup.cfg +4 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: p8-platoon
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Feed aggregator and percolate entity producer — library + CLI
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: httpx>=0.27
|
|
7
|
+
Requires-Dist: pyyaml>=6.0
|
|
8
|
+
Requires-Dist: jinja2>=3.1
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Provides-Extra: search
|
|
11
|
+
Requires-Dist: tavily-python; extra == "search"
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
# p8platoon
|
|
2
|
+
|
|
3
|
+
Agent workforce toolkit for [Percolate](https://github.com/Percolate-AI). Provides a provider pattern for producing percolate-compatible entities (Resources, Moments) from various data sources.
|
|
4
|
+
|
|
5
|
+
Ships with a **feed aggregator** as the first built-in provider — 13 sources, profile-based scoring, image enrichment, Tavily web search, and HTML output.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install p8platoon
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or for development:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
git clone <repo-url> && cd p8-platoon
|
|
17
|
+
pip install -e ".[search]"
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
### CLI
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# Run all profiles, output HTML
|
|
26
|
+
platoon feed
|
|
27
|
+
|
|
28
|
+
# Single profile, dry-run (print scores to stdout)
|
|
29
|
+
platoon feed --profile default --dry-run
|
|
30
|
+
|
|
31
|
+
# JSON output
|
|
32
|
+
platoon feed --profile default --output json
|
|
33
|
+
|
|
34
|
+
# With web search enrichment (searches per-category, excludes feed domains)
|
|
35
|
+
platoon feed --tavily-key sk-... --open
|
|
36
|
+
|
|
37
|
+
# Export to percolate-compatible YAML + HTML
|
|
38
|
+
platoon export --profile default --output-dir ./export/
|
|
39
|
+
|
|
40
|
+
# Export with user ownership (deterministic resource IDs)
|
|
41
|
+
platoon export --profile default --user-id "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
|
|
42
|
+
|
|
43
|
+
# Standalone Tavily web search
|
|
44
|
+
platoon search "latest AI breakthroughs" --max-results 10 --time-range week
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Exported YAML files can be ingested via:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
p8 upsert resources export/resources-default.yaml
|
|
51
|
+
p8 upsert moments export/moments-default.yaml
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Library
|
|
55
|
+
|
|
56
|
+
Use platoon as a Python library — this is the integration path for percolate.
|
|
57
|
+
Pass a p8k8 `UserMetadata` object (or any dict with `interests`/`categories`)
|
|
58
|
+
and get back percolate-compatible Resources and Moments.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import platoon
|
|
62
|
+
from uuid import UUID
|
|
63
|
+
|
|
64
|
+
# UserMetadata from percolate (or any dict/pydantic model)
|
|
65
|
+
user_metadata = UserMetadata(
|
|
66
|
+
interests=["AI machine learning", "physics space", "food restaurants"],
|
|
67
|
+
categories={
|
|
68
|
+
"AI": {"keywords": ["AI", "LLM", "neural", "agent"], "weight": 1.5, "color": "#3b82f6"},
|
|
69
|
+
"Physics": {"keywords": ["physics", "quantum", "space"], "weight": 1.3, "color": "#8b5cf6"},
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
user_id = UUID("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
|
|
74
|
+
|
|
75
|
+
# Run — sources come from built-in defaults, user data from metadata
|
|
76
|
+
result = platoon.run(user_metadata, user_id)
|
|
77
|
+
|
|
78
|
+
# Percolate-compatible dicts ready for upsert
|
|
79
|
+
resource_dicts = [r.to_upsert_dict() for r in result.resources]
|
|
80
|
+
moment_dicts = [m.to_upsert_dict() for m in result.moments]
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Override global sources via config dict:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
result = platoon.run(
|
|
87
|
+
user_metadata,
|
|
88
|
+
user_id,
|
|
89
|
+
config={
|
|
90
|
+
"sources": {
|
|
91
|
+
"hacker_news": {"enabled": True, "min_score": 200},
|
|
92
|
+
"reddit": {"enabled": True, "subreddits": ["MachineLearning"]},
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Environment Variables
|
|
99
|
+
|
|
100
|
+
| Variable | Description |
|
|
101
|
+
|----------|-------------|
|
|
102
|
+
| `P8_PLATOON_KEYS` | JSON dict of API keys: `{"tavily": "tvly-..."}` |
|
|
103
|
+
| `P8_TAVILY_KEY` | Single Tavily API key (alternative to above) |
|
|
104
|
+
| `TAVILY_API_KEY` | Legacy Tavily key (backward compat) |
|
|
105
|
+
|
|
106
|
+
Keys are resolved in priority order: `P8_PLATOON_KEYS` > `P8_TAVILY_KEY` > `TAVILY_API_KEY`.
|
|
107
|
+
|
|
108
|
+
## Configuration
|
|
109
|
+
|
|
110
|
+
Copy and edit the example config:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
cp config.example.yaml config.yaml
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Profiles
|
|
117
|
+
|
|
118
|
+
Profiles define interests, category weights, and source configurations. Shared categories (Trivia, General) are injected into every profile.
|
|
119
|
+
|
|
120
|
+
Built-in profiles:
|
|
121
|
+
- **default** — AI, Physics, Business, Food
|
|
122
|
+
- **eunseo** — Cats, Crafts, Korean, Food, Beauty, Chemistry
|
|
123
|
+
|
|
124
|
+
```yaml
|
|
125
|
+
profiles:
|
|
126
|
+
default:
|
|
127
|
+
name: "Default"
|
|
128
|
+
interests:
|
|
129
|
+
- "AI machine learning breakthroughs"
|
|
130
|
+
- "physics space quantum discoveries"
|
|
131
|
+
- "business startups economy"
|
|
132
|
+
- "food restaurants cooking recipes"
|
|
133
|
+
categories:
|
|
134
|
+
AI:
|
|
135
|
+
keywords: [AI, LLM, machine learning, neural, ChatGPT, agent, transformer]
|
|
136
|
+
weight: 1.5
|
|
137
|
+
color: "#3b82f6"
|
|
138
|
+
Physics:
|
|
139
|
+
keywords: [physics, quantum, particle, astrophysics, cosmology]
|
|
140
|
+
weight: 1.3
|
|
141
|
+
sources:
|
|
142
|
+
google_news:
|
|
143
|
+
enabled: true
|
|
144
|
+
topics: [science, technology]
|
|
145
|
+
queries: ["AI artificial intelligence"]
|
|
146
|
+
reddit:
|
|
147
|
+
enabled: true
|
|
148
|
+
subreddits: [todayilearned, space, food]
|
|
149
|
+
min_score: 1000
|
|
150
|
+
hacker_news:
|
|
151
|
+
enabled: true
|
|
152
|
+
min_score: 200
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Sources (13)
|
|
156
|
+
|
|
157
|
+
| Source | Type | Config Keys |
|
|
158
|
+
|--------|------|-------------|
|
|
159
|
+
| `google_news` | RSS | `topics`, `queries`, `max_items_per_query` |
|
|
160
|
+
| `reddit` | JSON API | `subreddits`, `min_score`, `max_items` |
|
|
161
|
+
| `hacker_news` | Firebase API | `fetch_top_n`, `min_score`, `max_items` |
|
|
162
|
+
| `rss_feeds` | RSS/Atom | `feeds` (list of `{url, label}`), `max_items_per_feed` |
|
|
163
|
+
| `flipboard` | RSS | `topics`, `max_items_per_topic` |
|
|
164
|
+
| `trivia` | REST API | `max_items`, `on_this_day`, `random_facts` |
|
|
165
|
+
| `arxiv` | RSS | `feeds`, `max_items_per_feed` |
|
|
166
|
+
| `github_trending` | Scrape | `language`, `since` |
|
|
167
|
+
| `lobsters` | JSON API | `tags_filter` |
|
|
168
|
+
| `papers_with_code` | REST API | `max_items` |
|
|
169
|
+
| `semantic_scholar` | Graph API | `queries` |
|
|
170
|
+
| `openalex` | REST API | `queries`, `email` |
|
|
171
|
+
| `hn_algolia` | Search API | `queries`, `sort` |
|
|
172
|
+
|
|
173
|
+
### Web Search Enrichment
|
|
174
|
+
|
|
175
|
+
When a Tavily API key is provided, the feed pipeline runs per-category web searches that **exclude** all 35+ domains already covered by feed sources. This surfaces net-new content from sites not in your feeds.
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# Via CLI flag
|
|
179
|
+
platoon feed --tavily-key tvly-...
|
|
180
|
+
|
|
181
|
+
# Via environment variable
|
|
182
|
+
export TAVILY_API_KEY=tvly-...
|
|
183
|
+
platoon feed
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Feed Pipeline
|
|
187
|
+
|
|
188
|
+
```
|
|
189
|
+
Config + Profile
|
|
190
|
+
-> Phase 1: Fetch from 13 source types
|
|
191
|
+
-> Phase 2: Tavily web search enrichment (excludes feed domains)
|
|
192
|
+
-> Phase 3: Score + dedup (keyword matching, interest boost, engagement bonus)
|
|
193
|
+
-> Phase 4: Backfill sparse categories via Google News fallback
|
|
194
|
+
-> Image enrichment (source → og:image scrape → Unsplash fallback)
|
|
195
|
+
-> Render HTML / JSON / export to percolate
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Provider Pattern
|
|
199
|
+
|
|
200
|
+
Build custom providers that emit percolate entities. `FeedProvider` is the built-in provider (see Library usage above); extend `BaseProvider` for other data sources:
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
from platoon.providers import BaseProvider, ProviderResult
|
|
204
|
+
from platoon.models import P8Resource, P8Moment
|
|
205
|
+
|
|
206
|
+
class MyProvider(BaseProvider):
|
|
207
|
+
name = "my-provider"
|
|
208
|
+
|
|
209
|
+
def run(self, config: dict) -> ProviderResult:
|
|
210
|
+
resources = [
|
|
211
|
+
P8Resource(
|
|
212
|
+
name="Example Resource",
|
|
213
|
+
uri="https://example.com/article",
|
|
214
|
+
content="Article content...",
|
|
215
|
+
category="news",
|
|
216
|
+
tags=["my-source", "tech"],
|
|
217
|
+
metadata={"score": 0.85},
|
|
218
|
+
)
|
|
219
|
+
]
|
|
220
|
+
moment = P8Moment(
|
|
221
|
+
name="my-digest-2026-02-22",
|
|
222
|
+
moment_type="digest",
|
|
223
|
+
summary="Processed 1 resource",
|
|
224
|
+
)
|
|
225
|
+
return ProviderResult(resources=resources, moments=[moment])
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## User Profile Schema
|
|
229
|
+
|
|
230
|
+
Platoon accepts p8k8 `UserMetadata` objects directly — no wrapper needed. The library reads `interests` and `categories` from the user's metadata; sources are global config.
|
|
231
|
+
|
|
232
|
+
**User-specific fields** (from `UserMetadata`):
|
|
233
|
+
|
|
234
|
+
| Field | Type | Used by pipeline |
|
|
235
|
+
|-------|------|-----------------|
|
|
236
|
+
| `interests` | `list[str]` | Scoring: interest-boost matching |
|
|
237
|
+
| `categories` | `dict` | Scoring: keyword matching, weights, category assignment |
|
|
238
|
+
| `relations` | `list[dict] \| None` | Reserved for future personalization |
|
|
239
|
+
| `feeds` | `list[dict] \| None` | Reserved for future per-user feed overrides |
|
|
240
|
+
| `preferences` | `dict \| None` | Reserved |
|
|
241
|
+
| `facts` | `dict \| None` | Reserved |
|
|
242
|
+
|
|
243
|
+
**Global config** (not per-user):
|
|
244
|
+
|
|
245
|
+
| Key | Description |
|
|
246
|
+
|-----|-------------|
|
|
247
|
+
| `sources` | Feed source configs — `{source_name: {enabled, ...}}` |
|
|
248
|
+
| `fetcher` | HTTP settings — timeout, retries, user-agent |
|
|
249
|
+
| `output` | Render settings — format, max items, min score |
|
|
250
|
+
|
|
251
|
+
## Percolate Integration
|
|
252
|
+
|
|
253
|
+
p8platoon produces entities compatible with percolate's data model:
|
|
254
|
+
|
|
255
|
+
- **P8Resource** — Maps to percolate's `resources` table. One per article/item with `category="news"`, source tags, engagement metadata, and deterministic UUID5 IDs.
|
|
256
|
+
- **P8Moment** — Maps to percolate's `moments` table. One per digest run, links resources via `graph_edges`, contains run statistics.
|
|
257
|
+
|
|
258
|
+
All entities use deterministic UUID5 IDs matching percolate's `uuid5(P8_NAMESPACE, "table:key:user_id")` scheme, ensuring upserts are idempotent.
|
|
259
|
+
|
|
260
|
+
### Resource fields
|
|
261
|
+
|
|
262
|
+
| Feed Item | P8Resource | Notes |
|
|
263
|
+
|-----------|------------|-------|
|
|
264
|
+
| `title` | `name` | Article title |
|
|
265
|
+
| `url` | `uri` | Article URL (also used for deterministic ID) |
|
|
266
|
+
| `summary` | `content` | Truncated summary |
|
|
267
|
+
| `image_url` | `image_uri` | 3-tier fallback ensures coverage |
|
|
268
|
+
| `source` | `tags[]` | e.g. "reddit", "google_news", "web_search" |
|
|
269
|
+
| `tags` | `tags[]` | Merged with source tag |
|
|
270
|
+
| `score` | `metadata.score` | Deterministic keyword+engagement score |
|
|
271
|
+
| `engagement` | `metadata.engagement` | `{upvotes, comments, stars, citations}` |
|
|
272
|
+
| `category` | `metadata.feed_category` | Scored category (AI, Physics, Food, etc.) |
|
|
273
|
+
| — | `category` | Always `"news"` |
|
|
274
|
+
|
|
275
|
+
## License
|
|
276
|
+
|
|
277
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: p8-platoon
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Feed aggregator and percolate entity producer — library + CLI
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: httpx>=0.27
|
|
7
|
+
Requires-Dist: pyyaml>=6.0
|
|
8
|
+
Requires-Dist: jinja2>=3.1
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Provides-Extra: search
|
|
11
|
+
Requires-Dist: tavily-python; extra == "search"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
p8_platoon.egg-info/PKG-INFO
|
|
4
|
+
p8_platoon.egg-info/SOURCES.txt
|
|
5
|
+
p8_platoon.egg-info/dependency_links.txt
|
|
6
|
+
p8_platoon.egg-info/entry_points.txt
|
|
7
|
+
p8_platoon.egg-info/requires.txt
|
|
8
|
+
p8_platoon.egg-info/top_level.txt
|
|
9
|
+
platoon/__init__.py
|
|
10
|
+
platoon/cli.py
|
|
11
|
+
platoon/config.py
|
|
12
|
+
platoon/fetcher.py
|
|
13
|
+
platoon/images.py
|
|
14
|
+
platoon/models.py
|
|
15
|
+
platoon/providers.py
|
|
16
|
+
platoon/renderer.py
|
|
17
|
+
platoon/scorer.py
|
|
18
|
+
platoon/tavily_search.py
|
|
19
|
+
platoon/sources/__init__.py
|
|
20
|
+
platoon/sources/arxiv.py
|
|
21
|
+
platoon/sources/flipboard.py
|
|
22
|
+
platoon/sources/github_trending.py
|
|
23
|
+
platoon/sources/google_news.py
|
|
24
|
+
platoon/sources/hacker_news.py
|
|
25
|
+
platoon/sources/hn_algolia.py
|
|
26
|
+
platoon/sources/lobsters.py
|
|
27
|
+
platoon/sources/openalex.py
|
|
28
|
+
platoon/sources/papers_with_code.py
|
|
29
|
+
platoon/sources/reddit.py
|
|
30
|
+
platoon/sources/rss_feeds.py
|
|
31
|
+
platoon/sources/semantic_scholar.py
|
|
32
|
+
platoon/sources/trivia.py
|
|
33
|
+
platoon/sources/web_search.py
|
|
34
|
+
platoon/templates/feed.html
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
platoon
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""p8-platoon: News feed digest with percolate integration."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from uuid import UUID
|
|
5
|
+
|
|
6
|
+
from platoon.models import Item, P8Moment, P8Resource, UserProfile
|
|
7
|
+
from platoon.providers import BaseProvider, FeedProvider, ProviderResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def run(
|
|
11
|
+
user_metadata,
|
|
12
|
+
user_id: UUID,
|
|
13
|
+
config: Optional[dict] = None,
|
|
14
|
+
) -> ProviderResult:
|
|
15
|
+
"""Run the feed pipeline for a user.
|
|
16
|
+
|
|
17
|
+
This is the primary library entry point. Pass a p8k8 UserMetadata
|
|
18
|
+
object (or any dict/pydantic model with ``interests`` and ``categories``)
|
|
19
|
+
and get back percolate-compatible Resources and Moments.
|
|
20
|
+
|
|
21
|
+
Sources and fetcher settings come from ``config`` (global);
|
|
22
|
+
user interests and categories come from ``user_metadata``.
|
|
23
|
+
API keys are resolved from env vars (P8_TAVILY_KEY or P8_PLATOON_KEYS).
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
user_metadata: p8k8 UserMetadata, UserProfile, or plain dict.
|
|
27
|
+
user_id: User UUID for entity ownership and deterministic IDs.
|
|
28
|
+
config: Global config dict with ``sources``, ``fetcher``, ``output``.
|
|
29
|
+
If None, built-in defaults are used.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
ProviderResult with resources, moments, and raw items.
|
|
33
|
+
"""
|
|
34
|
+
from platoon.config import resolve_for_user, resolve_keys
|
|
35
|
+
|
|
36
|
+
pipeline_config = resolve_for_user(user_metadata, config)
|
|
37
|
+
keys = resolve_keys()
|
|
38
|
+
tavily_key = keys.get("tavily")
|
|
39
|
+
|
|
40
|
+
provider = FeedProvider(tavily_key=tavily_key or None)
|
|
41
|
+
return provider.run(pipeline_config, user_id=user_id)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"run",
|
|
46
|
+
"Item",
|
|
47
|
+
"P8Resource",
|
|
48
|
+
"P8Moment",
|
|
49
|
+
"UserProfile",
|
|
50
|
+
"BaseProvider",
|
|
51
|
+
"FeedProvider",
|
|
52
|
+
"ProviderResult",
|
|
53
|
+
]
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"""CLI entry point: platoon feed, platoon search, platoon export."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from platoon.config import load_config, resolve_profile
|
|
13
|
+
from platoon.fetcher import Fetcher
|
|
14
|
+
from platoon.images import enrich_images
|
|
15
|
+
from platoon.models import Item
|
|
16
|
+
from platoon.renderer import render_html, render_json, save_output
|
|
17
|
+
from platoon.scorer import score_and_sort
|
|
18
|
+
from platoon.sources import SOURCE_FETCHERS
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _fetch_and_score(config: dict, source_filter: set | None, fetcher: Fetcher,
|
|
22
|
+
tavily_key: str | None = None) -> list[Item]:
|
|
23
|
+
"""Fetch feeds, enrich with web search, score, and backfill sparse categories."""
|
|
24
|
+
from platoon.config import INTEREST_FALLBACKS
|
|
25
|
+
from platoon.sources.google_news import fetch_google_news
|
|
26
|
+
from platoon.sources.web_search import web_search_enrich
|
|
27
|
+
import random
|
|
28
|
+
|
|
29
|
+
all_items: list[Item] = []
|
|
30
|
+
sources_cfg = config.get("sources", {})
|
|
31
|
+
|
|
32
|
+
# --- Phase 1: Fetch from configured feed sources ---
|
|
33
|
+
for source_name, source_cfg in sources_cfg.items():
|
|
34
|
+
if source_filter and source_name not in source_filter:
|
|
35
|
+
continue
|
|
36
|
+
if not source_cfg.get("enabled", True):
|
|
37
|
+
continue
|
|
38
|
+
fetch_fn = SOURCE_FETCHERS.get(source_name)
|
|
39
|
+
if not fetch_fn:
|
|
40
|
+
print(f"No fetcher for source: {source_name}", file=sys.stderr)
|
|
41
|
+
continue
|
|
42
|
+
try:
|
|
43
|
+
items = fetch_fn(source_cfg, fetcher)
|
|
44
|
+
all_items.extend(items)
|
|
45
|
+
except Exception as e:
|
|
46
|
+
print(f"Error fetching {source_name}: {e}", file=sys.stderr)
|
|
47
|
+
|
|
48
|
+
print(f" Feeds: {len(all_items)} raw items")
|
|
49
|
+
|
|
50
|
+
# --- Phase 2: Tavily web search (if key available) ---
|
|
51
|
+
# Searches each category for NEW content not already in feeds
|
|
52
|
+
if tavily_key:
|
|
53
|
+
existing_urls = {item.url for item in all_items}
|
|
54
|
+
categories = config.get("categories", {})
|
|
55
|
+
interests = config.get("interests", [])
|
|
56
|
+
web_items = web_search_enrich(
|
|
57
|
+
categories=categories,
|
|
58
|
+
interests=interests,
|
|
59
|
+
existing_urls=existing_urls,
|
|
60
|
+
api_key=tavily_key,
|
|
61
|
+
max_per_category=3,
|
|
62
|
+
time_range="week",
|
|
63
|
+
)
|
|
64
|
+
print(f" Web search: {len(web_items)} new items from unique sources")
|
|
65
|
+
all_items.extend(web_items)
|
|
66
|
+
|
|
67
|
+
# --- Phase 3: Score everything together ---
|
|
68
|
+
scored = score_and_sort(all_items, config)
|
|
69
|
+
|
|
70
|
+
# --- Phase 4: Backfill sparse categories via Google News ---
|
|
71
|
+
categories = config.get("categories", {})
|
|
72
|
+
min_per_cat = 2
|
|
73
|
+
cat_counts: dict[str, int] = {}
|
|
74
|
+
for item in scored:
|
|
75
|
+
cat_counts[item.category] = cat_counts.get(item.category, 0) + 1
|
|
76
|
+
|
|
77
|
+
sparse = [cat for cat in categories if cat_counts.get(cat, 0) < min_per_cat]
|
|
78
|
+
if sparse:
|
|
79
|
+
print(f" Backfilling sparse categories: {sparse}")
|
|
80
|
+
for cat in sparse:
|
|
81
|
+
queries = INTEREST_FALLBACKS.get(cat, [])
|
|
82
|
+
if not queries:
|
|
83
|
+
continue
|
|
84
|
+
query = random.choice(queries)
|
|
85
|
+
try:
|
|
86
|
+
fallback_cfg = {"queries": [query], "max_items_per_query": 4}
|
|
87
|
+
extra = fetch_google_news(fallback_cfg, fetcher)
|
|
88
|
+
all_items.extend(extra)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
print(f" Fallback error for {cat}: {e}", file=sys.stderr)
|
|
91
|
+
|
|
92
|
+
scored = score_and_sort(all_items, config)
|
|
93
|
+
|
|
94
|
+
print(f" Final: {len(scored)} items")
|
|
95
|
+
return scored
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def run_feed(args):
|
|
99
|
+
"""Fetch, score, and render the feed for one or all profiles."""
|
|
100
|
+
from platoon.config import resolve_keys
|
|
101
|
+
config = load_config(args.config)
|
|
102
|
+
source_filter = set(args.sources) if args.sources else None
|
|
103
|
+
|
|
104
|
+
# Tavily key: CLI flag > P8_TAVILY_KEY > P8_PLATOON_KEYS > TAVILY_API_KEY
|
|
105
|
+
keys = resolve_keys()
|
|
106
|
+
tavily_key = getattr(args, "tavily_key", None) or keys.get("tavily", "")
|
|
107
|
+
if tavily_key:
|
|
108
|
+
print(f" Tavily web search: enabled")
|
|
109
|
+
|
|
110
|
+
# Determine which profiles to run
|
|
111
|
+
profiles = config.get("profiles", {})
|
|
112
|
+
if args.profile:
|
|
113
|
+
profile_names = [args.profile]
|
|
114
|
+
else:
|
|
115
|
+
profile_names = list(profiles.keys())
|
|
116
|
+
|
|
117
|
+
fetcher = Fetcher(config.get("fetcher", {}))
|
|
118
|
+
all_paths = []
|
|
119
|
+
|
|
120
|
+
for pname in profile_names:
|
|
121
|
+
print(f"\n{'='*50}")
|
|
122
|
+
print(f" Profile: {pname}")
|
|
123
|
+
print(f"{'='*50}")
|
|
124
|
+
|
|
125
|
+
pcfg = resolve_profile(config, pname)
|
|
126
|
+
scored = _fetch_and_score(pcfg, source_filter, fetcher, tavily_key=tavily_key)
|
|
127
|
+
|
|
128
|
+
# Enrich images (og:image scrape + category fallbacks)
|
|
129
|
+
enrich_images(scored, fetcher)
|
|
130
|
+
|
|
131
|
+
if args.dry_run:
|
|
132
|
+
_print_dry_run(scored)
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
fmt = args.output or pcfg.get("output", {}).get("format", "html")
|
|
136
|
+
if fmt == "json":
|
|
137
|
+
content = render_json(scored)
|
|
138
|
+
else:
|
|
139
|
+
content = render_html(scored, pcfg)
|
|
140
|
+
|
|
141
|
+
path = save_output(content, fmt, pcfg, suffix=pname)
|
|
142
|
+
all_paths.append(path)
|
|
143
|
+
|
|
144
|
+
fetcher.close()
|
|
145
|
+
|
|
146
|
+
if args.open and all_paths:
|
|
147
|
+
for p in all_paths:
|
|
148
|
+
_open_file(p)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _print_dry_run(items: list[Item]):
|
|
152
|
+
"""Print scored items to stdout."""
|
|
153
|
+
current_cat = None
|
|
154
|
+
for item in items:
|
|
155
|
+
if item.category != current_cat:
|
|
156
|
+
current_cat = item.category
|
|
157
|
+
print(f"\n --- {current_cat} ---")
|
|
158
|
+
eng_parts = []
|
|
159
|
+
for k, v in item.engagement.items():
|
|
160
|
+
if isinstance(v, (int, float)) and v > 0:
|
|
161
|
+
eng_parts.append(f"{k}={v}")
|
|
162
|
+
eng_str = f" [{', '.join(eng_parts)}]" if eng_parts else ""
|
|
163
|
+
print(f" [{item.score:.2f}] {item.title}")
|
|
164
|
+
print(f" {item.source} {eng_str}")
|
|
165
|
+
if item.tags:
|
|
166
|
+
print(f" tags: {', '.join(item.tags[:5])}")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def run_export(args):
|
|
170
|
+
"""Run feed pipeline and export YAML (percolate entities) + HTML viewer."""
|
|
171
|
+
from uuid import UUID
|
|
172
|
+
|
|
173
|
+
import yaml
|
|
174
|
+
|
|
175
|
+
from platoon.config import resolve_keys
|
|
176
|
+
from platoon.providers import FeedProvider
|
|
177
|
+
|
|
178
|
+
config = load_config(args.config)
|
|
179
|
+
keys = resolve_keys()
|
|
180
|
+
tavily_key = getattr(args, "tavily_key", None) or keys.get("tavily", "")
|
|
181
|
+
|
|
182
|
+
user_id = UUID(args.user_id) if args.user_id else None
|
|
183
|
+
|
|
184
|
+
profiles = config.get("profiles", {})
|
|
185
|
+
if args.profile:
|
|
186
|
+
profile_names = [args.profile]
|
|
187
|
+
else:
|
|
188
|
+
profile_names = list(profiles.keys())
|
|
189
|
+
|
|
190
|
+
provider = FeedProvider(tavily_key=tavily_key or None)
|
|
191
|
+
|
|
192
|
+
for pname in profile_names:
|
|
193
|
+
print(f"\n{'='*50}")
|
|
194
|
+
print(f" Export: {pname}")
|
|
195
|
+
print(f"{'='*50}")
|
|
196
|
+
|
|
197
|
+
pcfg = resolve_profile(config, pname)
|
|
198
|
+
result = provider.run(pcfg, user_id=user_id)
|
|
199
|
+
|
|
200
|
+
print(f" Resources: {len(result.resources)}")
|
|
201
|
+
print(f" Moments: {len(result.moments)}")
|
|
202
|
+
|
|
203
|
+
out_dir = Path(args.output_dir)
|
|
204
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
205
|
+
|
|
206
|
+
# YAML exports (percolate entities)
|
|
207
|
+
export = result.to_export_dicts()
|
|
208
|
+
res_path = out_dir / f"resources-{pname}.yaml"
|
|
209
|
+
mom_path = out_dir / f"moments-{pname}.yaml"
|
|
210
|
+
res_path.write_text(yaml.dump(export["resources"], default_flow_style=False, allow_unicode=True))
|
|
211
|
+
mom_path.write_text(yaml.dump(export["moments"], default_flow_style=False, allow_unicode=True))
|
|
212
|
+
print(f" Written: {res_path}")
|
|
213
|
+
print(f" Written: {mom_path}")
|
|
214
|
+
|
|
215
|
+
# HTML viewer
|
|
216
|
+
date_str = datetime.now().strftime("%Y-%m-%d")
|
|
217
|
+
html_path = out_dir / f"{date_str}-{pname}.html"
|
|
218
|
+
html_content = render_html(result.items, pcfg)
|
|
219
|
+
html_path.write_text(html_content)
|
|
220
|
+
print(f" Written: {html_path}")
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _open_file(path: Path):
|
|
224
|
+
"""Open file in default browser."""
|
|
225
|
+
import platform
|
|
226
|
+
system = platform.system()
|
|
227
|
+
try:
|
|
228
|
+
if system == "Darwin":
|
|
229
|
+
subprocess.run(["open", str(path)])
|
|
230
|
+
elif system == "Linux":
|
|
231
|
+
subprocess.run(["xdg-open", str(path)])
|
|
232
|
+
elif system == "Windows":
|
|
233
|
+
subprocess.run(["start", str(path)], shell=True)
|
|
234
|
+
except Exception as e:
|
|
235
|
+
print(f"Could not open browser: {e}", file=sys.stderr)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def main():
|
|
239
|
+
parser = argparse.ArgumentParser(
|
|
240
|
+
prog="platoon",
|
|
241
|
+
description="News feed digest with HTML card viewer",
|
|
242
|
+
)
|
|
243
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
244
|
+
|
|
245
|
+
# feed subcommand
|
|
246
|
+
feed_parser = subparsers.add_parser("feed", help="Fetch and render news feed")
|
|
247
|
+
feed_parser.add_argument("--config", default=None, help="Path to config YAML")
|
|
248
|
+
feed_parser.add_argument("--profile", default=None, help="Profile name (default: run all)")
|
|
249
|
+
feed_parser.add_argument("--dry-run", action="store_true", help="Fetch + score, print to stdout")
|
|
250
|
+
feed_parser.add_argument("--sources", nargs="+", help="Subset of sources to fetch")
|
|
251
|
+
feed_parser.add_argument("--output", choices=["html", "json"], help="Output format")
|
|
252
|
+
feed_parser.add_argument("--open", action="store_true", help="Open HTML in browser")
|
|
253
|
+
feed_parser.add_argument("--tavily-key", default=None, dest="tavily_key",
|
|
254
|
+
help="Tavily API key for web search enrichment (or set TAVILY_API_KEY)")
|
|
255
|
+
|
|
256
|
+
# search subcommand
|
|
257
|
+
search_parser = subparsers.add_parser("search", help="Tavily web search")
|
|
258
|
+
search_parser.add_argument("query", help="Search query")
|
|
259
|
+
search_parser.add_argument("--max-results", type=int, default=5, help="Max results")
|
|
260
|
+
search_parser.add_argument("--topic", default="news", help="Topic: news, general")
|
|
261
|
+
search_parser.add_argument("--time-range", default=None, help="Time range: day, week, month")
|
|
262
|
+
search_parser.add_argument("--api-key", default=None, help="Tavily API key (or set TAVILY_API_KEY)")
|
|
263
|
+
|
|
264
|
+
# export subcommand
|
|
265
|
+
export_parser = subparsers.add_parser("export", help="Export feed as percolate Resources + Moments")
|
|
266
|
+
export_parser.add_argument("--config", default=None, help="Path to config YAML")
|
|
267
|
+
export_parser.add_argument("--profile", default=None, help="Profile name (default: run all)")
|
|
268
|
+
export_parser.add_argument("--output-dir", default="./export", dest="output_dir",
|
|
269
|
+
help="Output directory (default: ./export)")
|
|
270
|
+
export_parser.add_argument("--user-id", default=None, dest="user_id",
|
|
271
|
+
help="User UUID for resource ownership")
|
|
272
|
+
export_parser.add_argument("--tavily-key", default=None, dest="tavily_key",
|
|
273
|
+
help="Tavily API key for web search enrichment")
|
|
274
|
+
|
|
275
|
+
args = parser.parse_args()
|
|
276
|
+
|
|
277
|
+
if args.command == "feed":
|
|
278
|
+
run_feed(args)
|
|
279
|
+
elif args.command == "search":
|
|
280
|
+
from platoon.tavily_search import main_search
|
|
281
|
+
main_search(args)
|
|
282
|
+
elif args.command == "export":
|
|
283
|
+
run_export(args)
|
|
284
|
+
else:
|
|
285
|
+
parser.print_help()
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
if __name__ == "__main__":
|
|
289
|
+
main()
|