norn-cli 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/AGENTS.md +2 -2
  2. package/CHANGELOG.md +26 -1
  3. package/dist/cli.js +330 -85
  4. package/package.json +24 -5
  5. package/schemas/norn.config.schema.json +43 -1
  6. package/scripts/__pycache__/reddit_signal_miner.cpython-312.pyc +0 -0
  7. package/scripts/reddit_signal_miner.py +482 -0
  8. package/.claude/settings.local.json +0 -18
  9. package/.claude/skills/norn-social-campaign/SKILL.md +0 -70
  10. package/out/apiResponseIntellisenseCache.js +0 -394
  11. package/out/assertionRunner.js +0 -567
  12. package/out/cacheDir.js +0 -136
  13. package/out/chatParticipant.js +0 -763
  14. package/out/cli/colors.js +0 -127
  15. package/out/cli/formatters/assertion.js +0 -102
  16. package/out/cli/formatters/index.js +0 -23
  17. package/out/cli/formatters/response.js +0 -106
  18. package/out/cli/formatters/summary.js +0 -246
  19. package/out/cli/redaction.js +0 -237
  20. package/out/cli/reporters/html.js +0 -689
  21. package/out/cli/reporters/index.js +0 -22
  22. package/out/cli/reporters/junit.js +0 -226
  23. package/out/codeLensProvider.js +0 -351
  24. package/out/compareContentProvider.js +0 -85
  25. package/out/completionProvider.js +0 -3739
  26. package/out/contractAssertionSummary.js +0 -225
  27. package/out/contractDecorationProvider.js +0 -243
  28. package/out/coverageCalculator.js +0 -879
  29. package/out/coveragePanel.js +0 -597
  30. package/out/debug/breakpointResolver.js +0 -84
  31. package/out/debug/breakpoints.js +0 -52
  32. package/out/debug/nornDebugAdapter.js +0 -166
  33. package/out/debug/nornDebugSession.js +0 -613
  34. package/out/debug/sequenceLocationIndex.js +0 -77
  35. package/out/debug/types.js +0 -3
  36. package/out/deepClone.js +0 -21
  37. package/out/diagnosticProvider.js +0 -2554
  38. package/out/environmentParser.js +0 -736
  39. package/out/environmentProvider.js +0 -544
  40. package/out/environmentTemplates.js +0 -146
  41. package/out/errors/formatError.js +0 -113
  42. package/out/errors/nornError.js +0 -29
  43. package/out/formUrlEncoded.js +0 -89
  44. package/out/httpClient.js +0 -348
  45. package/out/httpRuntimeOptions.js +0 -16
  46. package/out/importErrors.js +0 -31
  47. package/out/inlayHintResolver.js +0 -70
  48. package/out/jsonFileReader.js +0 -323
  49. package/out/mcpClient.js +0 -193
  50. package/out/mcpConfig.js +0 -184
  51. package/out/mcpToolIntellisenseCache.js +0 -96
  52. package/out/mcpToolSchema.js +0 -50
  53. package/out/nornConfig.js +0 -132
  54. package/out/nornHoverProvider.js +0 -124
  55. package/out/nornInlayHintsProvider.js +0 -191
  56. package/out/nornPrompt.js +0 -755
  57. package/out/nornSqlParser.js +0 -286
  58. package/out/nornapiHoverProvider.js +0 -135
  59. package/out/nornapiInlayHintsProvider.js +0 -94
  60. package/out/nornapiParser.js +0 -324
  61. package/out/nornenvCodeActionProvider.js +0 -101
  62. package/out/nornenvDecorationProvider.js +0 -239
  63. package/out/nornenvFoldingProvider.js +0 -63
  64. package/out/nornenvHoverProvider.js +0 -114
  65. package/out/nornenvInlayHintsProvider.js +0 -99
  66. package/out/nornenvLanguageModel.js +0 -187
  67. package/out/nornenvRegionRefactor.js +0 -267
  68. package/out/nornsqlHoverProvider.js +0 -95
  69. package/out/nornsqlInlayHintsProvider.js +0 -114
  70. package/out/parser.js +0 -839
  71. package/out/pathAccess.js +0 -28
  72. package/out/postmanImportPanel.js +0 -732
  73. package/out/postmanImportPlanner.js +0 -1155
  74. package/out/postmanImportSidebarView.js +0 -532
  75. package/out/quotedString.js +0 -35
  76. package/out/requestPreparation.js +0 -179
  77. package/out/requestValidation.js +0 -146
  78. package/out/responsePanel.js +0 -7754
  79. package/out/schemaGenerator.js +0 -562
  80. package/out/scriptRunner.js +0 -419
  81. package/out/secrets/cliSecrets.js +0 -415
  82. package/out/secrets/crypto.js +0 -105
  83. package/out/secrets/envFileSecrets.js +0 -177
  84. package/out/secrets/keyStore.js +0 -259
  85. package/out/sequenceDeclaration.js +0 -15
  86. package/out/sequenceRunner.js +0 -3590
  87. package/out/sqlAdapterRunner.js +0 -122
  88. package/out/sqlBuiltInAdapters.js +0 -604
  89. package/out/sqlConfig.js +0 -184
  90. package/out/starterCatalog.js +0 -554
  91. package/out/stringUtils.js +0 -25
  92. package/out/swaggerBodyIntellisenseCache.js +0 -114
  93. package/out/swaggerParser.js +0 -464
  94. package/out/testProvider.js +0 -767
  95. package/out/theoryCaseLoader.js +0 -113
  96. package/out/validationCache.js +0 -211
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "norn-cli",
3
3
  "displayName": "Norn — API Tests in Your Repo",
4
4
  "description": "Version-controlled API tests your team can keep. Author and debug HTTP sequences in VS Code, then run the same files in CI.",
5
- "version": "2.4.0",
5
+ "version": "2.6.0",
6
6
  "publisher": "Norn-PeterKrustanov",
7
7
  "author": {
8
8
  "name": "Peter Krastanov"
@@ -291,7 +291,8 @@
291
291
  {
292
292
  "fileMatch": [
293
293
  "norn.config.json",
294
- "/norn.config.json"
294
+ "/norn.config.json",
295
+ "**/norn.config.json"
295
296
  ],
296
297
  "url": "./schemas/norn.config.schema.json"
297
298
  }
@@ -415,6 +416,23 @@
415
416
  "type": "boolean",
416
417
  "default": true,
417
418
  "description": "Verify SSL/TLS certificates for HTTPS requests and Swagger/OpenAPI fetches. Disable only for local development with self-signed certificates."
419
+ },
420
+ "norn.request.timeoutMs": {
421
+ "type": [
422
+ "number",
423
+ "null"
424
+ ],
425
+ "default": null,
426
+ "minimum": 1,
427
+ "description": "Override the HTTP request timeout in milliseconds for this VS Code workspace or user. Set to null to use norn.config.json http.timeoutMs, then the built-in 30000ms default."
428
+ },
429
+ "norn.testExplorer.exclude": {
430
+ "type": "array",
431
+ "default": [],
432
+ "items": {
433
+ "type": "string"
434
+ },
435
+ "description": "Workspace-relative glob patterns for .norn files or folders to hide from the Norn Test Explorer. Useful for negative fixtures, live-only tests, or documentation demos."
418
436
  }
419
437
  }
420
438
  },
@@ -445,7 +463,7 @@
445
463
  "watch:esbuild": "node esbuild.js --watch",
446
464
  "watch:tsc": "tsc --noEmit --watch --project tsconfig.json",
447
465
  "package": "npm run check-types && npm run lint && node esbuild.js --production",
448
- "compile-tests": "tsc -p . --outDir out",
466
+ "compile-tests": "node -e \"require('fs').rmSync('out/test',{recursive:true,force:true})\" && tsc -p . --outDir out",
449
467
  "watch-tests": "tsc -p . -w --outDir out",
450
468
  "pretest": "npm run compile-tests && npm run compile && npm run lint",
451
469
  "check-types": "tsc --noEmit",
@@ -453,7 +471,8 @@
453
471
  "validate:skills": "node ./scripts/validate-skills.mjs",
454
472
  "test": "vscode-test",
455
473
  "test:regression": "node ./dist/cli.js ./tests/Regression/ -e prelive",
456
- "publish:npm": "node -e \"const p=require('./package.json');p.name='norn-cli';require('fs').writeFileSync('package.json',JSON.stringify(p,null,2))\" && npm publish && node -e \"const p=require('./package.json');p.name='norn';require('fs').writeFileSync('package.json',JSON.stringify(p,null,2))\"",
474
+ "test:prerelease": "npm test && npm run test:regression",
475
+ "publish:npm": "node -e \"const fs=require('fs');const p=require('./package.json');p.name='norn-cli';fs.writeFileSync('package.json',JSON.stringify(p,null,2)+'\\n')\" && npm publish; exit_code=$?; node -e \"const fs=require('fs');const p=require('./package.json');p.name='norn';fs.writeFileSync('package.json',JSON.stringify(p,null,2)+'\\n')\"; exit $exit_code",
457
476
  "publish:vsce": "node -e \"const p=require('./package.json');p.name='norn';require('fs').writeFileSync('package.json',JSON.stringify(p,null,2))\" && npx vsce publish",
458
477
  "publish:all": "npm run publish:npm && npm run publish:vsce"
459
478
  },
@@ -485,4 +504,4 @@
485
504
  "bin": {
486
505
  "norn": "./dist/cli.js"
487
506
  }
488
- }
507
+ }
@@ -8,6 +8,16 @@
8
8
  "version"
9
9
  ],
10
10
  "defaultSnippets": [
11
+ {
12
+ "label": "Norn config with HTTP timeout",
13
+ "description": "Create a Norn config with a shared HTTP request timeout.",
14
+ "body": {
15
+ "version": 1,
16
+ "http": {
17
+ "timeoutMs": 180000
18
+ }
19
+ }
20
+ },
11
21
  {
12
22
  "label": "Norn config with SQL",
13
23
  "description": "Create a Norn config with an editable SQL connection and custom adapter example.",
@@ -68,6 +78,22 @@
68
78
  "const": 1,
69
79
  "description": "Config schema version."
70
80
  },
81
+ "http": {
82
+ "allOf": [
83
+ {
84
+ "$ref": "#/definitions/httpSection"
85
+ }
86
+ ],
87
+ "defaultSnippets": [
88
+ {
89
+ "label": "HTTP section definition",
90
+ "description": "Insert shared HTTP request options.",
91
+ "body": {
92
+ "timeoutMs": 180000
93
+ }
94
+ }
95
+ ]
96
+ },
71
97
  "sql": {
72
98
  "allOf": [
73
99
  {
@@ -144,6 +170,22 @@
144
170
  }
145
171
  ]
146
172
  },
173
+ "httpSection": {
174
+ "type": "object",
175
+ "additionalProperties": false,
176
+ "description": "HTTP request defaults used by .norn files.",
177
+ "properties": {
178
+ "_comment": {
179
+ "$ref": "#/definitions/comment",
180
+ "description": "Optional human-readable guidance ignored by Norn at runtime."
181
+ },
182
+ "timeoutMs": {
183
+ "type": "number",
184
+ "exclusiveMinimum": 0,
185
+ "description": "Default HTTP request timeout in milliseconds."
186
+ }
187
+ }
188
+ },
147
189
  "sqlSection": {
148
190
  "type": "object",
149
191
  "additionalProperties": false,
@@ -313,4 +355,4 @@
313
355
  }
314
356
  }
315
357
  }
316
- }
358
+ }
@@ -0,0 +1,482 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Reddit signal miner for the Norn LinkedIn campaign.
4
+
5
+ Anthropic's crawler is blocked by Reddit, so Claude can't fetch it — but this
6
+ script runs from YOUR machine/IP against Reddit's public JSON, so it can.
7
+
8
+ What it does:
9
+ - searches the campaign's target subreddits for the campaign's pain-terms
10
+ - pulls matching posts AND their top comments (the Post-5 gold was a comment)
11
+ - scores every bit of text by "juiciness" (spine-weighted keyword hits)
12
+ - isolates the exact verbatim sentences that carry the pain
13
+ - writes a ranked, paste-ready digest you can triage into Docs/market_signals.md
14
+
15
+ Discipline (from the campaign skill): quote VERBATIM + source URL + date.
16
+ Mining must not displace posting. Log it, pick one, go draft.
17
+
18
+ Usage:
19
+ python3 scripts/reddit_signal_miner.py
20
+ python3 scripts/reddit_signal_miner.py --time year --limit 25 --top-threads 15
21
+ python3 scripts/reddit_signal_miner.py --subs QualityAssurance devops --out harvest.md
22
+
23
+ Set REDDIT_USERNAME below to your handle (Reddit asks for it in the User-Agent).
24
+ Uses the stdlib + certifi (already installed) for TLS; no other packages needed.
25
+
26
+ Troubleshooting:
27
+ - 403 on every request: Reddit is blocking your egress IP. Datacenter / VPN /
28
+ cloud IPs are refused wholesale — run it from a normal home connection, VPN off.
29
+ (This is why Claude can't run it for you: its sandbox IP is in a blocked range.)
30
+ - 429: you're going too fast — raise REQUEST_PAUSE.
31
+ - still 403 from home: anonymous JSON has gotten flaky; create a Reddit "script"
32
+ app and switch to the OAuth endpoint. Ask Claude to add that path if you need it.
33
+ """
34
+
35
+ import argparse
36
+ import html
37
+ import json
38
+ import re
39
+ import ssl
40
+ import sys
41
+ import time
42
+ import urllib.error
43
+ import urllib.parse
44
+ import urllib.request
45
+ from datetime import datetime, timezone
46
+
47
+ # python.org builds on macOS don't trust the system keychain; use certifi's
48
+ # bundle if present, otherwise fall back to the interpreter default.
49
+ try:
50
+ import certifi
51
+ SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
52
+ except ImportError:
53
+ SSL_CONTEXT = ssl.create_default_context()
54
+
55
+ # --- config you can tweak --------------------------------------------------
56
+
57
+ REDDIT_USERNAME = "your_reddit_handle" # <- put your handle here (UA courtesy)
58
+
59
+ SUBREDDITS = [
60
+ "QualityAssurance",
61
+ "devops",
62
+ "ExperiencedDevs",
63
+ "webdev",
64
+ "programming",
65
+ "softwaretesting",
66
+ "QualityAssurance",
67
+ ]
68
+
69
+ QUERIES = [
70
+ "leaving Postman",
71
+ "Postman alternative",
72
+ ".http files",
73
+ "API testing",
74
+ "contract testing",
75
+ "flaky API tests",
76
+ "schema drift",
77
+ "tests passed but broke",
78
+ "staging didn't catch",
79
+ "lost my collection",
80
+ "tests out of date",
81
+ "nobody runs the tests",
82
+ "integration tests green",
83
+ "200 OK error body",
84
+ ]
85
+
86
+ # spine-weighted scoring. higher weight = closer to "tests on loan / rot / drift".
87
+ PAIN_PHRASES = {
88
+ # the ownership / loss wound (Post 5 territory) — heaviest
89
+ "lost years": 6, "lost my": 4, "lost all": 4, "evaporat": 5, "gone forever": 5,
90
+ "disappeared": 4, "wiped": 4, "no backup": 4, "couldn't recover": 4,
91
+ # rot / drift / staleness — the core thesis
92
+ "out of date": 5, "outdated": 4, "rot": 5, "stale": 5, "drift": 6,
93
+ "nobody updates": 6, "nobody maintains": 6, "nobody runs": 6, "never updated": 5,
94
+ "haven't touched": 4, "bit rot": 5, "abandoned": 4,
95
+ # passed-but-broke / staging-vs-prod
96
+ "passed but": 6, "tests passed": 5, "green but": 6, "didn't catch": 6,
97
+ "broke prod": 6, "broke in prod": 6, "worked on staging": 5, "works on staging": 5,
98
+ "only in production": 5, "false sense": 5, "lying": 5, "lied": 4,
99
+ # location / ownership / lock-in / friction
100
+ "source of truth": 5, "two sources": 5, "on someone": 4, "their laptop": 5,
101
+ "their account": 5, "behind a login": 5, "sign in": 3, "login wall": 5,
102
+ "paywall": 4, "enshittif": 5, "vendor lock": 5, "cloud sync": 4,
103
+ "fans": 3, "slow to open": 4, "bloat": 4, "enterprise monster": 5,
104
+ # 200-OK genre
105
+ "200 ok": 4, "success: false": 5, "status code": 2,
106
+ }
107
+
108
+ # leading word-boundary match: "rot" hits "rotten" but not "protocols"; stems
109
+ # like "evaporat"/"enshittif" still catch their variants.
110
+ _PAIN_PATTERNS = [(re.compile(r"\b" + re.escape(k)), k, w)
111
+ for k, w in PAIN_PHRASES.items()]
112
+
113
+ # a thread must actually be about APIs/testing to count — kills off-domain noise
114
+ # (a "schema drift" hit on Terraform, a "lost my collection" hit on WordPress).
115
+ DOMAIN_TERMS = [
116
+ "api", "endpoint", "postman", "graphql", "rest client", "request",
117
+ "response", "contract test", "mock", "openapi", "swagger", "insomnia",
118
+ "bruno", ".http", "integration test", "test suite", "payload", "qa ",
119
+ "automation", "regression",
120
+ ]
121
+ _DOMAIN_PATTERNS = [re.compile(r"\b" + re.escape(t)) for t in DOMAIN_TERMS]
122
+
123
+ REQUEST_PAUSE = 2.0 # seconds between requests (be polite)
124
+ COMMENT_FETCH_PAUSE = 2.0
125
+ MAX_RETRIES = 4
126
+
127
+ # --- http ------------------------------------------------------------------
128
+
129
+
130
+ def _ua() -> str:
131
+ return f"python:norn-signal-miner:1.0 (by /u/{REDDIT_USERNAME})"
132
+
133
+
134
+ def fetch_json(url: str) -> dict:
135
+ backoff = 3.0
136
+ for attempt in range(1, MAX_RETRIES + 1):
137
+ req = urllib.request.Request(url, headers={
138
+ "User-Agent": _ua(),
139
+ "Accept": "application/json",
140
+ "Accept-Language": "en-GB,en;q=0.9",
141
+ })
142
+ try:
143
+ with urllib.request.urlopen(req, timeout=30, context=SSL_CONTEXT) as resp:
144
+ return json.loads(resp.read().decode("utf-8"))
145
+ except urllib.error.HTTPError as e:
146
+ if e.code in (429, 500, 502, 503) and attempt < MAX_RETRIES:
147
+ wait = backoff * attempt
148
+ print(f" [{e.code}] backing off {wait:.0f}s "
149
+ f"(attempt {attempt}/{MAX_RETRIES})", file=sys.stderr)
150
+ time.sleep(wait)
151
+ continue
152
+ print(f" [http {e.code}] {url}", file=sys.stderr)
153
+ return {}
154
+ except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as e:
155
+ print(f" [err] {e} :: {url}", file=sys.stderr)
156
+ if attempt < MAX_RETRIES:
157
+ time.sleep(backoff * attempt)
158
+ continue
159
+ return {}
160
+ return {}
161
+
162
+
163
+ # pluggable fetcher: defaults to urllib, swapped to the browser in --browser mode
164
+ _FETCH = fetch_json
165
+
166
+
167
+ def get(url: str) -> dict:
168
+ return _FETCH(url)
169
+
170
+
171
+ class BrowserSession:
172
+ """Drives real Chrome so Reddit's anti-bot WAF serves us like a human.
173
+
174
+ The key move is the homepage warmup: landing on reddit.com first banks the
175
+ session cookie that the WAF then accepts on the .json endpoints. Without it
176
+ every request is a 'blocked by network security' 403.
177
+ """
178
+
179
+ def __init__(self, headless: bool = False):
180
+ from playwright.sync_api import sync_playwright
181
+ self._pw = sync_playwright().start()
182
+ self.browser = self._pw.chromium.launch(
183
+ channel="chrome", headless=headless,
184
+ args=["--disable-blink-features=AutomationControlled"])
185
+ self.ctx = self.browser.new_context(
186
+ locale="en-GB", timezone_id="Europe/London",
187
+ viewport={"width": 1280, "height": 900})
188
+ self.ctx.add_init_script(
189
+ "Object.defineProperty(navigator,'webdriver',{get:()=>undefined})")
190
+ self.page = self.ctx.new_page()
191
+ self._warmup()
192
+
193
+ def _warmup(self):
194
+ print(" [browser] homepage warmup (clearing WAF challenge)...", file=sys.stderr)
195
+ try:
196
+ self.page.goto("https://www.reddit.com/", wait_until="domcontentloaded",
197
+ timeout=45000)
198
+ time.sleep(6)
199
+ try:
200
+ self.page.goto("https://www.reddit.com/", wait_until="networkidle",
201
+ timeout=20000)
202
+ except Exception:
203
+ pass
204
+ except Exception as e:
205
+ print(f" [browser] warmup error: {e}", file=sys.stderr)
206
+
207
+ def fetch(self, url: str) -> dict:
208
+ try:
209
+ self.page.goto(url, wait_until="domcontentloaded", timeout=30000)
210
+ body = self.page.evaluate("document.body ? document.body.innerText : ''")
211
+ return json.loads(body)
212
+ except Exception as e:
213
+ print(f" [browser err] {e} :: {url}", file=sys.stderr)
214
+ return {}
215
+
216
+ def close(self):
217
+ try:
218
+ self.browser.close()
219
+ finally:
220
+ self._pw.stop()
221
+
222
+
223
+ # --- scoring ---------------------------------------------------------------
224
+
225
+
226
+ def clean(text: str) -> str:
227
+ text = html.unescape(text or "")
228
+ return re.sub(r"\s+", " ", text).strip()
229
+
230
+
231
+ def score_text(text: str):
232
+ low = text.lower()
233
+ hits = []
234
+ total = 0
235
+ for pattern, phrase, weight in _PAIN_PATTERNS:
236
+ if pattern.search(low):
237
+ total += weight
238
+ hits.append(phrase)
239
+ return total, hits
240
+
241
+
242
+ def is_on_domain(post: dict) -> bool:
243
+ blob = (post.get("title", "") + " " + post.get("selftext", "") + " " +
244
+ " ".join(c["body"] for c in post.get("juicy_comments", []))).lower()
245
+ return any(p.search(blob) for p in _DOMAIN_PATTERNS)
246
+
247
+
248
+ def juicy_sentences(text: str, max_n: int = 3):
249
+ """Return the verbatim sentences that actually carry the pain, best first."""
250
+ parts = re.split(r"(?<=[.!?])\s+|\n+", text)
251
+ scored = []
252
+ for s in parts:
253
+ s = s.strip()
254
+ if 25 <= len(s) <= 320:
255
+ sc, _ = score_text(s)
256
+ if sc > 0:
257
+ scored.append((sc, s))
258
+ scored.sort(key=lambda x: x[0], reverse=True)
259
+ seen, out = set(), []
260
+ for _, s in scored:
261
+ if s not in seen:
262
+ seen.add(s)
263
+ out.append(s)
264
+ if len(out) >= max_n:
265
+ break
266
+ return out
267
+
268
+
269
+ def when(ts) -> str:
270
+ try:
271
+ return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
272
+ except Exception:
273
+ return "????-??-??"
274
+
275
+
276
+ # --- reddit ----------------------------------------------------------------
277
+
278
+
279
+ def search(sub: str, query: str, t: str, limit: int):
280
+ q = urllib.parse.quote_plus(query)
281
+ url = (f"https://www.reddit.com/r/{sub}/search.json?"
282
+ f"q={q}&restrict_sr=on&sort=relevance&t={t}&limit={limit}")
283
+ data = get(url)
284
+ out = []
285
+ for child in data.get("data", {}).get("children", []):
286
+ d = child.get("data", {})
287
+ out.append({
288
+ "id": d.get("id"),
289
+ "sub": d.get("subreddit"),
290
+ "title": clean(d.get("title", "")),
291
+ "selftext": clean(d.get("selftext", "")),
292
+ "score": d.get("score", 0),
293
+ "num_comments": d.get("num_comments", 0),
294
+ "permalink": "https://www.reddit.com" + d.get("permalink", ""),
295
+ "created": when(d.get("created_utc", 0)),
296
+ "matched_query": query,
297
+ })
298
+ return out
299
+
300
+
301
+ def _walk_comments(children, out, depth, max_depth):
302
+ for child in children:
303
+ if child.get("kind") != "t1":
304
+ continue
305
+ d = child.get("data", {})
306
+ body = clean(d.get("body", ""))
307
+ if body and body not in ("[deleted]", "[removed]"):
308
+ out.append({
309
+ "author": d.get("author", "?"),
310
+ "body": body,
311
+ "score": d.get("score", 0),
312
+ "created": when(d.get("created_utc", 0)),
313
+ "depth": depth,
314
+ })
315
+ if depth < max_depth:
316
+ replies = d.get("replies")
317
+ if isinstance(replies, dict):
318
+ _walk_comments(replies.get("data", {}).get("children", []),
319
+ out, depth + 1, max_depth)
320
+
321
+
322
+ def top_comments(permalink: str, limit: int = 100, max_depth: int = 2):
323
+ # depth traversal captures the back-and-forth, not just top-level answers —
324
+ # a real argument in the replies is exactly the signal we want.
325
+ url = permalink.rstrip("/") + f"/.json?limit={limit}&sort=top"
326
+ data = get(url)
327
+ out = []
328
+ if not isinstance(data, list) or len(data) < 2:
329
+ return out
330
+ _walk_comments(data[1].get("data", {}).get("children", []), out, 0, max_depth)
331
+ return out
332
+
333
+
334
+ # --- main ------------------------------------------------------------------
335
+
336
+
337
+ def main():
338
+ ap = argparse.ArgumentParser(description="Mine Reddit for Norn campaign pain signals.")
339
+ ap.add_argument("--time", default="year", choices=["day", "week", "month", "year", "all"])
340
+ ap.add_argument("--limit", type=int, default=25, help="results per (sub, query)")
341
+ ap.add_argument("--top-threads", type=int, default=25,
342
+ help="how many threads to show in detail in the digest")
343
+ ap.add_argument("--dive", type=int, default=35,
344
+ help="how many most-discussed threads to fetch comments for")
345
+ ap.add_argument("--min-comments", type=int, default=15,
346
+ help="drop threads with fewer comments than this (dead posts)")
347
+ ap.add_argument("--subs", nargs="*", default=None, help="override subreddit list")
348
+ ap.add_argument("--queries", nargs="*", default=None, help="override query list")
349
+ ap.add_argument("--out", default="Docs/reddit_signal_harvest.md")
350
+ ap.add_argument("--browser", action="store_true",
351
+ help="drive real Chrome (beats Reddit's anti-bot WAF)")
352
+ ap.add_argument("--headless", action="store_true",
353
+ help="with --browser, run Chrome headless (less reliable vs WAF)")
354
+ args = ap.parse_args()
355
+
356
+ if REDDIT_USERNAME == "your_reddit_handle":
357
+ print("note: set REDDIT_USERNAME at the top of the script (Reddit UA courtesy).\n",
358
+ file=sys.stderr)
359
+
360
+ subs = list(dict.fromkeys(args.subs or SUBREDDITS))
361
+ queries = args.queries or QUERIES
362
+
363
+ print(f"mining {len(subs)} subs x {len(queries)} queries (t={args.time})"
364
+ f"{' [browser]' if args.browser else ''}...", file=sys.stderr)
365
+
366
+ global _FETCH
367
+ session = None
368
+ if args.browser:
369
+ session = BrowserSession(headless=args.headless)
370
+ _FETCH = session.fetch
371
+ try:
372
+ ranked = harvest(subs, queries, args)
373
+ finally:
374
+ if session:
375
+ session.close()
376
+
377
+ write_digest(ranked, args)
378
+ print(f"\ndone. ranked {len(ranked)} juicy threads -> {args.out}", file=sys.stderr)
379
+
380
+
381
+ def harvest(subs, queries, args):
382
+ posts = {}
383
+ for sub in subs:
384
+ for query in queries:
385
+ print(f" r/{sub} :: {query!r}", file=sys.stderr)
386
+ for p in search(sub, query, args.time, args.limit):
387
+ if not p["id"]:
388
+ continue
389
+ ps, phits = score_text(p["title"] + " " + p["selftext"])
390
+ p["score_title"] = ps
391
+ p["hits"] = phits
392
+ # keep the higher-scoring sighting if seen via multiple queries
393
+ if p["id"] not in posts or ps > posts[p["id"]].get("score_title", -1):
394
+ posts[p["id"]] = p
395
+ time.sleep(REQUEST_PAUSE)
396
+
397
+ # ENGAGEMENT-FIRST: the signal is a live debate, not a keyword match. Dive
398
+ # the most-discussed topical threads; dead posts (few comments) are useless
399
+ # even when they match the spine perfectly.
400
+ engaging = [p for p in posts.values() if p["num_comments"] >= args.min_comments]
401
+ engaging.sort(key=lambda p: (p["num_comments"], p["score"]), reverse=True)
402
+ dive = engaging[: args.dive]
403
+ print(f" {len(engaging)} threads >= {args.min_comments} comments; "
404
+ f"diving top {len(dive)}", file=sys.stderr)
405
+ for p in dive:
406
+ print(f" comments ({p['num_comments']}c): {p['title'][:55]!r}", file=sys.stderr)
407
+ scored = []
408
+ for c in top_comments(p["permalink"]):
409
+ cs, chits = score_text(c["body"])
410
+ if cs > 0:
411
+ c["score_juicy"] = cs
412
+ c["hits"] = chits
413
+ c["quotes"] = juicy_sentences(c["body"])
414
+ scored.append(c)
415
+ scored.sort(key=lambda c: (c["score_juicy"], c["score"]), reverse=True)
416
+ p["juicy_comments"] = scored[:6]
417
+ p["n_juicy_comments"] = len(scored)
418
+ p["score_comments"] = sum(c["score_juicy"] for c in scored[:3])
419
+ time.sleep(COMMENT_FETCH_PAUSE)
420
+
421
+ ranked = []
422
+ for p in dive:
423
+ p.setdefault("juicy_comments", [])
424
+ p.setdefault("score_comments", 0)
425
+ p.setdefault("n_juicy_comments", 0)
426
+ # engagement leads (comments weighted over upvotes); on-spine discussion boosts
427
+ p["engagement"] = p["num_comments"] + 0.25 * p["score"]
428
+ p["rank_score"] = p["engagement"] + 3 * p["score_comments"] + p["score_title"]
429
+ # keep only threads that are genuinely about APIs/testing AND have spine signal
430
+ if is_on_domain(p) and (p["score_comments"] > 0 or p["score_title"] > 0):
431
+ ranked.append(p)
432
+
433
+ ranked.sort(key=lambda p: (p["rank_score"], p["n_juicy_comments"]), reverse=True)
434
+ return ranked
435
+
436
+
437
+ def write_digest(ranked, args):
438
+ lines = []
439
+ lines.append("# Reddit signal harvest")
440
+ lines.append("")
441
+ lines.append(f"> Generated {datetime.now().strftime('%Y-%m-%d %H:%M')} · "
442
+ f"t={args.time} · ranked by LIVE ENGAGEMENT (comments) + on-spine discussion.")
443
+ lines.append("> Paste-ready for Docs/market_signals.md triage. Verbatim quotes only.")
444
+ lines.append("")
445
+
446
+ for i, p in enumerate(ranked[: args.top_threads], 1):
447
+ lines.append(f"## {i}. [{p['num_comments']}c · ↑{p['score']}] "
448
+ f"r/{p['sub']} — {p['title']}")
449
+ lines.append("")
450
+ lines.append(f"- **URL:** {p['permalink']}")
451
+ lines.append(f"- **Date:** {p['created']} · **Comments:** {p['num_comments']} · "
452
+ f"**Upvotes:** {p['score']} · **On-spine comments:** "
453
+ f"{p.get('n_juicy_comments', 0)} · **Found via:** {p['matched_query']!r}")
454
+ lines.append(f"- **Pain hits:** {', '.join(p['hits']) or '—'}")
455
+ if p.get("selftext"):
456
+ for q in juicy_sentences(p["selftext"]):
457
+ lines.append(f" - > {q}")
458
+ for c in p.get("juicy_comments", []):
459
+ lines.append(f"- **comment** (↑{c['score']}, {c['created']}) "
460
+ f"hits: {', '.join(c['hits'])}")
461
+ for q in c["quotes"]:
462
+ lines.append(f" - > {q}")
463
+ lines.append("")
464
+
465
+ # remaining threads, compact
466
+ rest = ranked[args.top_threads:]
467
+ if rest:
468
+ lines.append("---")
469
+ lines.append("")
470
+ lines.append("### More live candidates")
471
+ lines.append("")
472
+ for p in rest:
473
+ lines.append(f"- [{p['num_comments']}c ↑{p['score']}] r/{p['sub']} "
474
+ f"({p['created']}) — {p['title']} — {p['permalink']}")
475
+ lines.append("")
476
+
477
+ with open(args.out, "w", encoding="utf-8") as f:
478
+ f.write("\n".join(lines))
479
+
480
+
481
+ if __name__ == "__main__":
482
+ main()
@@ -1,18 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(npm run *)",
5
- "WebSearch",
6
- "Bash(git checkout *)",
7
- "Bash(node ./dist/cli.js tests/Regression/nornenv-templates/extends-resolution.norn -s PrintResolvedValues -e prod_uk)",
8
- "Bash(node ./dist/cli.js tests/Regression/nornenv-templates/extends-resolution.norn -s PrintResolvedValues -e prod_us)",
9
- "Bash(node ./dist/cli.js tests/Regression/nornenv-templates/extends-resolution.norn -s PrintResolvedValues -e diamond)",
10
- "Bash(node ./dist/cli.js tests/Regression/nornenv-templates/extends-resolution.norn -s PrintResolvedValues -e base)",
11
- "Bash(mv .nornenv .nornenv.bak)",
12
- "Bash(cp cycle.nornenv .nornenv)",
13
- "Bash(timeout 10 node ../../../dist/cli.js extends-resolution.norn -s PrintResolvedValues -e c)",
14
- "Bash(time node ../../../dist/cli.js extends-resolution.norn -s PrintResolvedValues -e c)",
15
- "Bash(time node /Users/petercrest/Worktable/Projects/vsApi/dist/cli.js extends-resolution.norn -s PrintResolvedValues -e c)"
16
- ]
17
- }
18
- }