@opendirectory.dev/skills 0.1.39 → 0.1.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/registry.json +8 -0
- package/skills/product-update-logger/.env.example +4 -0
- package/skills/product-update-logger/README.md +197 -0
- package/skills/product-update-logger/SKILL.md +462 -0
- package/skills/product-update-logger/evals/evals.json +119 -0
- package/skills/product-update-logger/references/changelog-format.md +96 -0
- package/skills/product-update-logger/references/content-rules.md +154 -0
- package/skills/product-update-logger/references/noise-filter.md +86 -0
- package/skills/product-update-logger/scripts/gather.py +364 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
product-update-logger gather script
|
|
4
|
+
Collects shipped items from git commits, GitHub PRs, and free text.
|
|
5
|
+
No required API keys. GITHUB_TOKEN optional (enables GitHub PR fetching).
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python3 scripts/gather.py --since 7 --output /tmp/pul-raw.json
|
|
9
|
+
python3 scripts/gather.py --items "Add dark mode|Fix CSV bug" --output /tmp/pul-raw.json
|
|
10
|
+
GITHUB_TOKEN=your_token python3 scripts/gather.py --repo owner/repo --since 14
|
|
11
|
+
python3 scripts/gather.py --stdout | jq '.items'
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import re
|
|
18
|
+
import ssl
|
|
19
|
+
import subprocess
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime, timedelta, timezone
|
|
22
|
+
|
|
23
|
+
_ssl_ctx = ssl._create_unverified_context()
|
|
24
|
+
|
|
25
|
+
TODAY = datetime.now(timezone.utc)
|
|
26
|
+
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
|
|
27
|
+
|
|
28
|
+
quiet = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# HTTP helpers (reused from map-your-market pattern)
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
def fetch_json(url, headers=None, timeout=20):
|
|
36
|
+
import urllib.request, urllib.error
|
|
37
|
+
req = urllib.request.Request(url, headers=headers or {})
|
|
38
|
+
req.add_header("User-Agent", "product-update-logger/1.0")
|
|
39
|
+
try:
|
|
40
|
+
with urllib.request.urlopen(req, context=_ssl_ctx, timeout=timeout) as r:
|
|
41
|
+
return json.loads(r.read().decode("utf-8"))
|
|
42
|
+
except urllib.error.HTTPError as e:
|
|
43
|
+
if not quiet:
|
|
44
|
+
print(f" HTTP {e.code}: {url[:80]}", file=sys.stderr)
|
|
45
|
+
return None
|
|
46
|
+
except Exception as e:
|
|
47
|
+
if not quiet:
|
|
48
|
+
print(f" Error: {e} -- {url[:80]}", file=sys.stderr)
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def gh_get(path):
|
|
53
|
+
import urllib.request
|
|
54
|
+
headers = {"Accept": "application/vnd.github+json"}
|
|
55
|
+
if GITHUB_TOKEN:
|
|
56
|
+
headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
|
|
57
|
+
return fetch_json(f"https://api.github.com{path}", headers=headers)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# Noise filter patterns
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
NOISE_PATTERNS = [
|
|
65
|
+
re.compile(r"^Merge (pull request|branch)\b", re.IGNORECASE),
|
|
66
|
+
re.compile(r"^(bump|update) version\b", re.IGNORECASE),
|
|
67
|
+
re.compile(r"^chore[\s:(]", re.IGNORECASE),
|
|
68
|
+
re.compile(r"^ci[\s:(]", re.IGNORECASE),
|
|
69
|
+
re.compile(r"^build[\s:(]", re.IGNORECASE),
|
|
70
|
+
re.compile(r"^fix typo\b", re.IGNORECASE),
|
|
71
|
+
re.compile(r"^\s*typo\b", re.IGNORECASE),
|
|
72
|
+
re.compile(r"^wip\b", re.IGNORECASE),
|
|
73
|
+
re.compile(r"^test[\s:(]", re.IGNORECASE),
|
|
74
|
+
re.compile(r"^docs[\s:(]", re.IGNORECASE),
|
|
75
|
+
re.compile(r"^style[\s:(]", re.IGNORECASE),
|
|
76
|
+
re.compile(r"^revert\b", re.IGNORECASE),
|
|
77
|
+
re.compile(r"^Initial commit$", re.IGNORECASE),
|
|
78
|
+
re.compile(r"^init$", re.IGNORECASE),
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
NOISE_PR_LABELS = {"documentation", "chore", "dependencies", "ci", "test", "tests"}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def filter_noise(commits: list) -> list:
|
|
85
|
+
filtered = []
|
|
86
|
+
for c in commits:
|
|
87
|
+
subject = c.get("subject", "").strip()
|
|
88
|
+
if not subject:
|
|
89
|
+
continue
|
|
90
|
+
# Skip single-word subjects under 8 chars
|
|
91
|
+
if len(subject.split()) == 1 and len(subject) < 8:
|
|
92
|
+
continue
|
|
93
|
+
skip = False
|
|
94
|
+
for pat in NOISE_PATTERNS:
|
|
95
|
+
if pat.match(subject):
|
|
96
|
+
skip = True
|
|
97
|
+
break
|
|
98
|
+
if not skip:
|
|
99
|
+
filtered.append(c)
|
|
100
|
+
return filtered
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
# Git commits
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
def get_git_commits(since_days: int, repo_path: str = ".") -> list:
|
|
108
|
+
try:
|
|
109
|
+
result = subprocess.run(
|
|
110
|
+
[
|
|
111
|
+
"git", "-C", repo_path, "log",
|
|
112
|
+
f"--since={since_days} days ago",
|
|
113
|
+
"--pretty=format:%H|%s|%b|%ad",
|
|
114
|
+
"--date=short",
|
|
115
|
+
],
|
|
116
|
+
capture_output=True, text=True, timeout=30
|
|
117
|
+
)
|
|
118
|
+
if result.returncode != 0:
|
|
119
|
+
return []
|
|
120
|
+
commits = []
|
|
121
|
+
for line in result.stdout.strip().split("\n"):
|
|
122
|
+
if not line.strip():
|
|
123
|
+
continue
|
|
124
|
+
parts = line.split("|", 3)
|
|
125
|
+
commit_hash = parts[0].strip() if len(parts) > 0 else ""
|
|
126
|
+
subject = parts[1].strip() if len(parts) > 1 else ""
|
|
127
|
+
body = parts[2].strip() if len(parts) > 2 else ""
|
|
128
|
+
date = parts[3].strip() if len(parts) > 3 else ""
|
|
129
|
+
if subject:
|
|
130
|
+
commits.append({
|
|
131
|
+
"subject": subject,
|
|
132
|
+
"body": body,
|
|
133
|
+
"source": "git_commit",
|
|
134
|
+
"hash": commit_hash[:7],
|
|
135
|
+
"date": date,
|
|
136
|
+
})
|
|
137
|
+
return commits
|
|
138
|
+
except FileNotFoundError:
|
|
139
|
+
if not quiet:
|
|
140
|
+
print(" git not installed -- skipping git commits", file=sys.stderr)
|
|
141
|
+
return []
|
|
142
|
+
except Exception as e:
|
|
143
|
+
if not quiet:
|
|
144
|
+
print(f" git error: {e}", file=sys.stderr)
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
# GitHub PRs
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
def get_github_prs(repo: str, since_date: str) -> list:
|
|
153
|
+
if not GITHUB_TOKEN:
|
|
154
|
+
if not quiet:
|
|
155
|
+
print(" GITHUB_TOKEN not set -- skipping GitHub PRs", file=sys.stderr)
|
|
156
|
+
return []
|
|
157
|
+
if not repo:
|
|
158
|
+
return []
|
|
159
|
+
|
|
160
|
+
data = gh_get(f"/repos/{repo}/pulls?state=closed&sort=updated&direction=desc&per_page=50")
|
|
161
|
+
if not data:
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
prs = []
|
|
165
|
+
for pr in data:
|
|
166
|
+
merged_at = pr.get("merged_at")
|
|
167
|
+
if not merged_at:
|
|
168
|
+
continue
|
|
169
|
+
# Filter by since_date
|
|
170
|
+
merged_date = merged_at[:10]
|
|
171
|
+
if merged_date < since_date:
|
|
172
|
+
continue
|
|
173
|
+
labels = [lbl["name"].lower() for lbl in (pr.get("labels") or [])]
|
|
174
|
+
# Skip PRs with noise labels
|
|
175
|
+
if any(lbl in NOISE_PR_LABELS for lbl in labels):
|
|
176
|
+
continue
|
|
177
|
+
prs.append({
|
|
178
|
+
"subject": pr.get("title", "").strip(),
|
|
179
|
+
"body": (pr.get("body") or "").strip()[:500],
|
|
180
|
+
"source": "github_pr",
|
|
181
|
+
"pr_number": pr.get("number"),
|
|
182
|
+
"merged_at": merged_at[:10],
|
|
183
|
+
"labels": labels,
|
|
184
|
+
})
|
|
185
|
+
return prs
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# ---------------------------------------------------------------------------
|
|
189
|
+
# Free text input
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
def parse_free_text(items_str: str) -> list:
|
|
193
|
+
if not items_str or not items_str.strip():
|
|
194
|
+
return []
|
|
195
|
+
items = []
|
|
196
|
+
for item in re.split(r"[|\n]", items_str):
|
|
197
|
+
item = item.strip()
|
|
198
|
+
if item:
|
|
199
|
+
items.append({"subject": item, "source": "free_text"})
|
|
200
|
+
return items
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# ---------------------------------------------------------------------------
|
|
204
|
+
# Deduplication
|
|
205
|
+
# ---------------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
def deduplicate(commits: list, prs: list) -> list:
|
|
208
|
+
result = list(prs)
|
|
209
|
+
pr_titles = [pr["subject"].lower() for pr in prs]
|
|
210
|
+
|
|
211
|
+
for commit in commits:
|
|
212
|
+
subject = commit["subject"].lower()
|
|
213
|
+
is_dup = False
|
|
214
|
+
for pr_title in pr_titles:
|
|
215
|
+
# If commit subject is substring of PR title or vice versa -> same item
|
|
216
|
+
if subject in pr_title or pr_title in subject:
|
|
217
|
+
is_dup = True
|
|
218
|
+
break
|
|
219
|
+
if not is_dup:
|
|
220
|
+
result.append(commit)
|
|
221
|
+
|
|
222
|
+
return result
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
# Existing changelog detection
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
def detect_existing_changelog(path: str = "docs/changelog.md") -> dict:
|
|
230
|
+
if not os.path.exists(path):
|
|
231
|
+
return {"exists": False, "last_label": "", "format": "date-based"}
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
with open(path) as f:
|
|
235
|
+
lines = [f.readline() for _ in range(100)]
|
|
236
|
+
lines = [l.rstrip() for l in lines if l.strip()]
|
|
237
|
+
except Exception:
|
|
238
|
+
return {"exists": True, "last_label": "", "format": "date-based"}
|
|
239
|
+
|
|
240
|
+
last_label = ""
|
|
241
|
+
version_format = "date-based"
|
|
242
|
+
|
|
243
|
+
for line in lines:
|
|
244
|
+
# Look for ## headings (changelog entry labels)
|
|
245
|
+
m = re.match(r"^## (.+)$", line)
|
|
246
|
+
if m:
|
|
247
|
+
last_label = m.group(1).strip()
|
|
248
|
+
# Detect semver format
|
|
249
|
+
if re.match(r"^v?\d+\.\d+\.\d+", last_label):
|
|
250
|
+
version_format = "semver"
|
|
251
|
+
break
|
|
252
|
+
|
|
253
|
+
return {"exists": True, "last_label": last_label, "format": version_format}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
# ---------------------------------------------------------------------------
|
|
257
|
+
# Main
|
|
258
|
+
# ---------------------------------------------------------------------------
|
|
259
|
+
|
|
260
|
+
def main():
|
|
261
|
+
global quiet
|
|
262
|
+
|
|
263
|
+
parser = argparse.ArgumentParser(description="Gather shipped items for product-update-logger")
|
|
264
|
+
parser.add_argument("--since", default="7",
|
|
265
|
+
help="Days back to look, or YYYY-MM-DD date (default: 7)")
|
|
266
|
+
parser.add_argument("--repo", default="",
|
|
267
|
+
help="GitHub repo as owner/repo for PR fetching")
|
|
268
|
+
parser.add_argument("--items", default="",
|
|
269
|
+
help="Pipe-separated free text items")
|
|
270
|
+
parser.add_argument("--output", default="/tmp/pul-raw.json",
|
|
271
|
+
help="Output file path (default: /tmp/pul-raw.json)")
|
|
272
|
+
parser.add_argument("--stdout", action="store_true",
|
|
273
|
+
help="Print JSON to stdout instead of file")
|
|
274
|
+
parser.add_argument("--quiet", action="store_true",
|
|
275
|
+
help="Suppress progress output")
|
|
276
|
+
args = parser.parse_args()
|
|
277
|
+
|
|
278
|
+
quiet = args.quiet
|
|
279
|
+
use_stdout = args.stdout
|
|
280
|
+
|
|
281
|
+
def log(msg):
|
|
282
|
+
if not quiet:
|
|
283
|
+
print(msg, file=sys.stderr if use_stdout else sys.stdout)
|
|
284
|
+
|
|
285
|
+
# Resolve since_days and since_date
|
|
286
|
+
since_str = args.since.strip()
|
|
287
|
+
if re.match(r"^\d{4}-\d{2}-\d{2}$", since_str):
|
|
288
|
+
since_date = since_str
|
|
289
|
+
delta = TODAY.date() - datetime.strptime(since_str, "%Y-%m-%d").date()
|
|
290
|
+
since_days = max(delta.days, 1)
|
|
291
|
+
else:
|
|
292
|
+
since_days = int(since_str)
|
|
293
|
+
since_date = (TODAY - timedelta(days=since_days)).strftime("%Y-%m-%d")
|
|
294
|
+
|
|
295
|
+
today_str = TODAY.strftime("%Y-%m-%d")
|
|
296
|
+
|
|
297
|
+
log(f"Gathering items since {since_date} ({since_days} days)...")
|
|
298
|
+
|
|
299
|
+
# 1. Free text items
|
|
300
|
+
free_text_items = parse_free_text(args.items)
|
|
301
|
+
if free_text_items:
|
|
302
|
+
log(f" Free text: {len(free_text_items)} items")
|
|
303
|
+
|
|
304
|
+
# 2. Git commits
|
|
305
|
+
git_available = False
|
|
306
|
+
raw_commits = []
|
|
307
|
+
try:
|
|
308
|
+
result = subprocess.run(
|
|
309
|
+
["git", "rev-parse", "--is-inside-work-tree"],
|
|
310
|
+
capture_output=True, text=True, timeout=5
|
|
311
|
+
)
|
|
312
|
+
git_available = result.returncode == 0
|
|
313
|
+
except Exception:
|
|
314
|
+
git_available = False
|
|
315
|
+
|
|
316
|
+
noise_filtered = 0
|
|
317
|
+
if git_available:
|
|
318
|
+
raw_commits = get_git_commits(since_days)
|
|
319
|
+
before = len(raw_commits)
|
|
320
|
+
raw_commits = filter_noise(raw_commits)
|
|
321
|
+
noise_filtered = before - len(raw_commits)
|
|
322
|
+
log(f" Git: {len(raw_commits)} commits ({noise_filtered} noise filtered)")
|
|
323
|
+
|
|
324
|
+
# 3. GitHub PRs
|
|
325
|
+
github_available = bool(GITHUB_TOKEN and args.repo)
|
|
326
|
+
prs = []
|
|
327
|
+
if github_available:
|
|
328
|
+
prs = get_github_prs(args.repo, since_date)
|
|
329
|
+
log(f" GitHub PRs: {len(prs)} merged PRs")
|
|
330
|
+
|
|
331
|
+
# 4. Deduplicate commits + PRs
|
|
332
|
+
combined = deduplicate(raw_commits, prs)
|
|
333
|
+
|
|
334
|
+
# 5. Merge with free text (free text first, then git/PR items)
|
|
335
|
+
all_items = free_text_items + combined
|
|
336
|
+
free_text_provided = bool(free_text_items)
|
|
337
|
+
|
|
338
|
+
# 6. Detect existing changelog
|
|
339
|
+
existing_changelog = detect_existing_changelog()
|
|
340
|
+
|
|
341
|
+
output = {
|
|
342
|
+
"date": today_str,
|
|
343
|
+
"since_date": since_date,
|
|
344
|
+
"since_days": since_days,
|
|
345
|
+
"git_available": git_available,
|
|
346
|
+
"github_available": github_available,
|
|
347
|
+
"free_text_provided": free_text_provided,
|
|
348
|
+
"items": all_items,
|
|
349
|
+
"total_items": len(all_items),
|
|
350
|
+
"noise_filtered": noise_filtered,
|
|
351
|
+
"existing_changelog": existing_changelog,
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if use_stdout:
|
|
355
|
+
print(json.dumps(output, indent=2))
|
|
356
|
+
else:
|
|
357
|
+
with open(args.output, "w") as f:
|
|
358
|
+
json.dump(output, f, indent=2)
|
|
359
|
+
log(f"\nOutput: {args.output}")
|
|
360
|
+
log(f"Total items: {len(all_items)}")
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
if __name__ == "__main__":
|
|
364
|
+
main()
|