@opendirectory.dev/skills 0.1.39 → 0.1.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,364 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ product-update-logger gather script
4
+ Collects shipped items from git commits, GitHub PRs, and free text.
5
+ No required API keys. GITHUB_TOKEN optional (enables GitHub PR fetching).
6
+
7
+ Usage:
8
+ python3 scripts/gather.py --since 7 --output /tmp/pul-raw.json
9
+ python3 scripts/gather.py --items "Add dark mode|Fix CSV bug" --output /tmp/pul-raw.json
10
+ GITHUB_TOKEN=your_token python3 scripts/gather.py --repo owner/repo --since 14
11
+ python3 scripts/gather.py --stdout | jq '.items'
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import os
17
+ import re
18
+ import ssl
19
+ import subprocess
20
+ import sys
21
+ from datetime import datetime, timedelta, timezone
22
+
23
+ _ssl_ctx = ssl._create_unverified_context()
24
+
25
+ TODAY = datetime.now(timezone.utc)
26
+ GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
27
+
28
+ quiet = False
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # HTTP helpers (reused from map-your-market pattern)
33
+ # ---------------------------------------------------------------------------
34
+
35
+ def fetch_json(url, headers=None, timeout=20):
36
+ import urllib.request, urllib.error
37
+ req = urllib.request.Request(url, headers=headers or {})
38
+ req.add_header("User-Agent", "product-update-logger/1.0")
39
+ try:
40
+ with urllib.request.urlopen(req, context=_ssl_ctx, timeout=timeout) as r:
41
+ return json.loads(r.read().decode("utf-8"))
42
+ except urllib.error.HTTPError as e:
43
+ if not quiet:
44
+ print(f" HTTP {e.code}: {url[:80]}", file=sys.stderr)
45
+ return None
46
+ except Exception as e:
47
+ if not quiet:
48
+ print(f" Error: {e} -- {url[:80]}", file=sys.stderr)
49
+ return None
50
+
51
+
52
+ def gh_get(path):
53
+ import urllib.request
54
+ headers = {"Accept": "application/vnd.github+json"}
55
+ if GITHUB_TOKEN:
56
+ headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
57
+ return fetch_json(f"https://api.github.com{path}", headers=headers)
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Noise filter patterns
62
+ # ---------------------------------------------------------------------------
63
+
64
+ NOISE_PATTERNS = [
65
+ re.compile(r"^Merge (pull request|branch)\b", re.IGNORECASE),
66
+ re.compile(r"^(bump|update) version\b", re.IGNORECASE),
67
+ re.compile(r"^chore[\s:(]", re.IGNORECASE),
68
+ re.compile(r"^ci[\s:(]", re.IGNORECASE),
69
+ re.compile(r"^build[\s:(]", re.IGNORECASE),
70
+ re.compile(r"^fix typo\b", re.IGNORECASE),
71
+ re.compile(r"^\s*typo\b", re.IGNORECASE),
72
+ re.compile(r"^wip\b", re.IGNORECASE),
73
+ re.compile(r"^test[\s:(]", re.IGNORECASE),
74
+ re.compile(r"^docs[\s:(]", re.IGNORECASE),
75
+ re.compile(r"^style[\s:(]", re.IGNORECASE),
76
+ re.compile(r"^revert\b", re.IGNORECASE),
77
+ re.compile(r"^Initial commit$", re.IGNORECASE),
78
+ re.compile(r"^init$", re.IGNORECASE),
79
+ ]
80
+
81
+ NOISE_PR_LABELS = {"documentation", "chore", "dependencies", "ci", "test", "tests"}
82
+
83
+
84
+ def filter_noise(commits: list) -> list:
85
+ filtered = []
86
+ for c in commits:
87
+ subject = c.get("subject", "").strip()
88
+ if not subject:
89
+ continue
90
+ # Skip single-word subjects under 8 chars
91
+ if len(subject.split()) == 1 and len(subject) < 8:
92
+ continue
93
+ skip = False
94
+ for pat in NOISE_PATTERNS:
95
+ if pat.match(subject):
96
+ skip = True
97
+ break
98
+ if not skip:
99
+ filtered.append(c)
100
+ return filtered
101
+
102
+
103
+ # ---------------------------------------------------------------------------
104
+ # Git commits
105
+ # ---------------------------------------------------------------------------
106
+
107
+ def get_git_commits(since_days: int, repo_path: str = ".") -> list:
108
+ try:
109
+ result = subprocess.run(
110
+ [
111
+ "git", "-C", repo_path, "log",
112
+ f"--since={since_days} days ago",
113
+ "--pretty=format:%H|%s|%b|%ad",
114
+ "--date=short",
115
+ ],
116
+ capture_output=True, text=True, timeout=30
117
+ )
118
+ if result.returncode != 0:
119
+ return []
120
+ commits = []
121
+ for line in result.stdout.strip().split("\n"):
122
+ if not line.strip():
123
+ continue
124
+ parts = line.split("|", 3)
125
+ commit_hash = parts[0].strip() if len(parts) > 0 else ""
126
+ subject = parts[1].strip() if len(parts) > 1 else ""
127
+ body = parts[2].strip() if len(parts) > 2 else ""
128
+ date = parts[3].strip() if len(parts) > 3 else ""
129
+ if subject:
130
+ commits.append({
131
+ "subject": subject,
132
+ "body": body,
133
+ "source": "git_commit",
134
+ "hash": commit_hash[:7],
135
+ "date": date,
136
+ })
137
+ return commits
138
+ except FileNotFoundError:
139
+ if not quiet:
140
+ print(" git not installed -- skipping git commits", file=sys.stderr)
141
+ return []
142
+ except Exception as e:
143
+ if not quiet:
144
+ print(f" git error: {e}", file=sys.stderr)
145
+ return []
146
+
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # GitHub PRs
150
+ # ---------------------------------------------------------------------------
151
+
152
+ def get_github_prs(repo: str, since_date: str) -> list:
153
+ if not GITHUB_TOKEN:
154
+ if not quiet:
155
+ print(" GITHUB_TOKEN not set -- skipping GitHub PRs", file=sys.stderr)
156
+ return []
157
+ if not repo:
158
+ return []
159
+
160
+ data = gh_get(f"/repos/{repo}/pulls?state=closed&sort=updated&direction=desc&per_page=50")
161
+ if not data:
162
+ return []
163
+
164
+ prs = []
165
+ for pr in data:
166
+ merged_at = pr.get("merged_at")
167
+ if not merged_at:
168
+ continue
169
+ # Filter by since_date
170
+ merged_date = merged_at[:10]
171
+ if merged_date < since_date:
172
+ continue
173
+ labels = [lbl["name"].lower() for lbl in (pr.get("labels") or [])]
174
+ # Skip PRs with noise labels
175
+ if any(lbl in NOISE_PR_LABELS for lbl in labels):
176
+ continue
177
+ prs.append({
178
+ "subject": pr.get("title", "").strip(),
179
+ "body": (pr.get("body") or "").strip()[:500],
180
+ "source": "github_pr",
181
+ "pr_number": pr.get("number"),
182
+ "merged_at": merged_at[:10],
183
+ "labels": labels,
184
+ })
185
+ return prs
186
+
187
+
188
+ # ---------------------------------------------------------------------------
189
+ # Free text input
190
+ # ---------------------------------------------------------------------------
191
+
192
+ def parse_free_text(items_str: str) -> list:
193
+ if not items_str or not items_str.strip():
194
+ return []
195
+ items = []
196
+ for item in re.split(r"[|\n]", items_str):
197
+ item = item.strip()
198
+ if item:
199
+ items.append({"subject": item, "source": "free_text"})
200
+ return items
201
+
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # Deduplication
205
+ # ---------------------------------------------------------------------------
206
+
207
+ def deduplicate(commits: list, prs: list) -> list:
208
+ result = list(prs)
209
+ pr_titles = [pr["subject"].lower() for pr in prs]
210
+
211
+ for commit in commits:
212
+ subject = commit["subject"].lower()
213
+ is_dup = False
214
+ for pr_title in pr_titles:
215
+ # If commit subject is substring of PR title or vice versa -> same item
216
+ if subject in pr_title or pr_title in subject:
217
+ is_dup = True
218
+ break
219
+ if not is_dup:
220
+ result.append(commit)
221
+
222
+ return result
223
+
224
+
225
+ # ---------------------------------------------------------------------------
226
+ # Existing changelog detection
227
+ # ---------------------------------------------------------------------------
228
+
229
+ def detect_existing_changelog(path: str = "docs/changelog.md") -> dict:
230
+ if not os.path.exists(path):
231
+ return {"exists": False, "last_label": "", "format": "date-based"}
232
+
233
+ try:
234
+ with open(path) as f:
235
+ lines = [f.readline() for _ in range(100)]
236
+ lines = [l.rstrip() for l in lines if l.strip()]
237
+ except Exception:
238
+ return {"exists": True, "last_label": "", "format": "date-based"}
239
+
240
+ last_label = ""
241
+ version_format = "date-based"
242
+
243
+ for line in lines:
244
+ # Look for ## headings (changelog entry labels)
245
+ m = re.match(r"^## (.+)$", line)
246
+ if m:
247
+ last_label = m.group(1).strip()
248
+ # Detect semver format
249
+ if re.match(r"^v?\d+\.\d+\.\d+", last_label):
250
+ version_format = "semver"
251
+ break
252
+
253
+ return {"exists": True, "last_label": last_label, "format": version_format}
254
+
255
+
256
+ # ---------------------------------------------------------------------------
257
+ # Main
258
+ # ---------------------------------------------------------------------------
259
+
260
+ def main():
261
+ global quiet
262
+
263
+ parser = argparse.ArgumentParser(description="Gather shipped items for product-update-logger")
264
+ parser.add_argument("--since", default="7",
265
+ help="Days back to look, or YYYY-MM-DD date (default: 7)")
266
+ parser.add_argument("--repo", default="",
267
+ help="GitHub repo as owner/repo for PR fetching")
268
+ parser.add_argument("--items", default="",
269
+ help="Pipe-separated free text items")
270
+ parser.add_argument("--output", default="/tmp/pul-raw.json",
271
+ help="Output file path (default: /tmp/pul-raw.json)")
272
+ parser.add_argument("--stdout", action="store_true",
273
+ help="Print JSON to stdout instead of file")
274
+ parser.add_argument("--quiet", action="store_true",
275
+ help="Suppress progress output")
276
+ args = parser.parse_args()
277
+
278
+ quiet = args.quiet
279
+ use_stdout = args.stdout
280
+
281
+ def log(msg):
282
+ if not quiet:
283
+ print(msg, file=sys.stderr if use_stdout else sys.stdout)
284
+
285
+ # Resolve since_days and since_date
286
+ since_str = args.since.strip()
287
+ if re.match(r"^\d{4}-\d{2}-\d{2}$", since_str):
288
+ since_date = since_str
289
+ delta = TODAY.date() - datetime.strptime(since_str, "%Y-%m-%d").date()
290
+ since_days = max(delta.days, 1)
291
+ else:
292
+ since_days = int(since_str)
293
+ since_date = (TODAY - timedelta(days=since_days)).strftime("%Y-%m-%d")
294
+
295
+ today_str = TODAY.strftime("%Y-%m-%d")
296
+
297
+ log(f"Gathering items since {since_date} ({since_days} days)...")
298
+
299
+ # 1. Free text items
300
+ free_text_items = parse_free_text(args.items)
301
+ if free_text_items:
302
+ log(f" Free text: {len(free_text_items)} items")
303
+
304
+ # 2. Git commits
305
+ git_available = False
306
+ raw_commits = []
307
+ try:
308
+ result = subprocess.run(
309
+ ["git", "rev-parse", "--is-inside-work-tree"],
310
+ capture_output=True, text=True, timeout=5
311
+ )
312
+ git_available = result.returncode == 0
313
+ except Exception:
314
+ git_available = False
315
+
316
+ noise_filtered = 0
317
+ if git_available:
318
+ raw_commits = get_git_commits(since_days)
319
+ before = len(raw_commits)
320
+ raw_commits = filter_noise(raw_commits)
321
+ noise_filtered = before - len(raw_commits)
322
+ log(f" Git: {len(raw_commits)} commits ({noise_filtered} noise filtered)")
323
+
324
+ # 3. GitHub PRs
325
+ github_available = bool(GITHUB_TOKEN and args.repo)
326
+ prs = []
327
+ if github_available:
328
+ prs = get_github_prs(args.repo, since_date)
329
+ log(f" GitHub PRs: {len(prs)} merged PRs")
330
+
331
+ # 4. Deduplicate commits + PRs
332
+ combined = deduplicate(raw_commits, prs)
333
+
334
+ # 5. Merge with free text (free text first, then git/PR items)
335
+ all_items = free_text_items + combined
336
+ free_text_provided = bool(free_text_items)
337
+
338
+ # 6. Detect existing changelog
339
+ existing_changelog = detect_existing_changelog()
340
+
341
+ output = {
342
+ "date": today_str,
343
+ "since_date": since_date,
344
+ "since_days": since_days,
345
+ "git_available": git_available,
346
+ "github_available": github_available,
347
+ "free_text_provided": free_text_provided,
348
+ "items": all_items,
349
+ "total_items": len(all_items),
350
+ "noise_filtered": noise_filtered,
351
+ "existing_changelog": existing_changelog,
352
+ }
353
+
354
+ if use_stdout:
355
+ print(json.dumps(output, indent=2))
356
+ else:
357
+ with open(args.output, "w") as f:
358
+ json.dump(output, f, indent=2)
359
+ log(f"\nOutput: {args.output}")
360
+ log(f"Total items: {len(all_items)}")
361
+
362
+
363
+ if __name__ == "__main__":
364
+ main()