@grainulation/silo 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,180 @@
1
+ {
2
+ "meta": {
3
+ "id": "coverage-ramp-playbook-v2-generalized",
4
+ "name": "coverage-ramp-playbook-v2-generalized",
5
+ "type": "claims",
6
+ "claimCount": 14,
7
+ "hash": "4288c54fcbe0d3ea836bea7dd17f660d9e747b57929726a333c0b46d9f535208",
8
+ "storedAt": "2026-03-24T16:48:23.846Z"
9
+ },
10
+ "claims": [
11
+ {
12
+ "id": "p001",
13
+ "type": "recommendation",
14
+ "topic": "phase-1-exclusions",
15
+ "content": "PHASE 1: Audit jest.config.js exclusions BEFORE writing tests. Categories to exclude: (1) generated/codegen files, (2) canvas/d3/charting that requires real browser, (3) vendored third-party libs, (4) barrel/index re-exports if trivial, (5) platform-specific files (Cordova/Electron-only). Each exclusion must be justified. Run coverage to establish the MEASURED scope — this is your denominator. Removing unjustified exclusions (like bootstrap/) later can ADD to denominator, so only remove when you have tests ready.",
16
+ "evidence": "Removing bootstrap/** exclusion without tests dropped coverage. Always test BEFORE removing exclusions.",
17
+ "tags": [
18
+ "phase-1",
19
+ "exclusions",
20
+ "denominator"
21
+ ]
22
+ },
23
+ {
24
+ "id": "p002",
25
+ "type": "recommendation",
26
+ "topic": "phase-2-zero-cov-grind",
27
+ "content": "PHASE 2: Write tests for all zero-coverage files using parallel worktree agents. Key rules: (1) Verify files exist on disk with fs.existsSync before targeting (coverage reports go stale), (2) Each agent gets 15-20 files, (3) Agents ONLY create new .test.* files — never edit jest.config or package.json, (4) Run 4-5 agents at a time, (5) After each wave: copy test files from worktree, run prettier, audit against testing rules, verify tests pass. Sort files by churn score × testability for priority.",
28
+ "evidence": "Coverage reports list files that may have been deleted. ~30% of files from stale reports don't exist. Verifying existence first prevents wasted agent time.",
29
+ "tags": [
30
+ "phase-2",
31
+ "zero-coverage",
32
+ "agents"
33
+ ]
34
+ },
35
+ {
36
+ "id": "p003",
37
+ "type": "recommendation",
38
+ "topic": "phase-3-deep-test-pattern",
39
+ "content": "PHASE 3: Deepen partial coverage using .deep.test.js files (NEVER rewrite existing tests). Create new test files alongside existing ones named *.deep.test.js that target only uncovered paths. If a file already has .deep.test.js, use .deep2.test.js. This is critical — worktree agents start from git HEAD and will REWRITE existing test files with shallower versions if told to 'deepen'. The .deep.test.js pattern is purely additive.",
40
+ "evidence": "Lost 1.5% coverage when agents 'deepened' files by rewriting them. The .deep.test.js pattern prevents this entirely.",
41
+ "tags": [
42
+ "phase-3",
43
+ "deepening",
44
+ "additive",
45
+ "critical"
46
+ ]
47
+ },
48
+ {
49
+ "id": "p004",
50
+ "type": "constraint",
51
+ "topic": "never-rewrite-tests",
52
+ "content": "NEVER let worktree agents modify existing test files. Worktrees start from git HEAD, not your working tree. An agent that 'deepens' a test file will read the HEAD version (which may be older/shallower) and write a 'new' version that loses coverage from your current working tree. Only create NEW test files from agents. If you need to modify existing tests, do it manually in the main tree.",
53
+ "evidence": "Coverage dropped from 60.01% to 59.22% when deepening agents overwrote test files with shallower versions from git HEAD.",
54
+ "tags": [
55
+ "constraint",
56
+ "critical",
57
+ "worktree"
58
+ ]
59
+ },
60
+ {
61
+ "id": "p005",
62
+ "type": "recommendation",
63
+ "topic": "churn-based-prioritization",
64
+ "content": "Prioritize files by git churn score: `git log --format=format: --name-only --since=12.month | sort | uniq -c | sort -nr`. High churn = actively developed = highest risk from low coverage = test first. Zero-churn files (0 commits in 12+ months) are candidates for exclusion since nobody is changing them. Use 3-year window for exclusion decisions (stricter), 1-year for prioritization (more selective).",
65
+ "evidence": "Churn-based prioritization ensures the most actively developed files get tested first. Zero-churn exclusions are defensible in code review.",
66
+ "tags": [
67
+ "prioritization",
68
+ "churn",
69
+ "strategy"
70
+ ]
71
+ },
72
+ {
73
+ "id": "p006",
74
+ "type": "recommendation",
75
+ "topic": "wave-structure",
76
+ "content": "Structure work in 4 phases with waves of 5 parallel agents each: (1) Zero-coverage grind — exhaust all untested files, sorted by churn × stmts. (2) Mixin/helper extraction — extract pure functions from untestable files into .helpers.js, test those. (3) Deep testing — .deep.test.js for 70-80% files (cheapest gains), then 50-70%. (4) Prescription mop-up — target specific uncovered lines from coverage-final.json. Each wave: launch agents → copy files → prettier → audit → verify → commit → coverage check.",
77
+ "evidence": "This ordering maximizes ROI. Phase 1 gives ~60% of gains. Phase 3 targets cheapest per-statement gains. Phase 4 is surgical.",
78
+ "tags": [
79
+ "process",
80
+ "waves",
81
+ "structure"
82
+ ]
83
+ },
84
+ {
85
+ "id": "p007",
86
+ "type": "recommendation",
87
+ "topic": "module-resolution-fixes",
88
+ "content": "Common jest module resolution blockers and fixes: (1) Missing bare-import aliases — add `'^@foo$': '<rootDir>/path/index.js'` to moduleNameMapper (not just `'^@foo/(.*)$'`). (2) Generated files (.gen.js) — create thin re-export stub: `export { default } from './File.gen'`. (3) Build-time-only modules — create stub files that re-export from canonical locations. (4) Files using `{ virtual: true }` in jest.mock don't work when moduleNameMapper already maps the path — use stubs instead.",
89
+ "evidence": "5 bare-import aliases + 4 module stubs unblocked ~15 previously-untestable files worth 500+ statements.",
90
+ "tags": [
91
+ "infrastructure",
92
+ "jest",
93
+ "module-resolution"
94
+ ]
95
+ },
96
+ {
97
+ "id": "p008",
98
+ "type": "recommendation",
99
+ "topic": "mixin-helper-extraction",
100
+ "content": "For excluded createReactClass mixins: DON'T refactor the mixin itself. Instead, identify pure module-scope functions (no `this` binding, no side effects) and extract them into a .helpers.js file. Test the helpers. The new file is automatically in-scope for coverage. Pattern: read first 100 lines of mixin → find functions defined outside the mixin object → extract to FileName.helpers.js → write thorough tests. Most mixins have 0-6 extractable pure functions. Assess before extracting — many have none.",
101
+ "evidence": "5 mixins assessed → 15 pure functions extracted → 55 tests. 6 other mixins had 0 extractable functions (all this-bound).",
102
+ "tags": [
103
+ "refactoring",
104
+ "mixins",
105
+ "helpers"
106
+ ]
107
+ },
108
+ {
109
+ "id": "p009",
110
+ "type": "recommendation",
111
+ "topic": "diminishing-returns-strategy",
112
+ "content": "Coverage gains follow a curve: 0-60% is fast (zero-cov grind), 60-75% is moderate (mix of new + deep), 75-80% is slow (deep testing of already-tested files). Strategy shifts: (1) Below 70%: batch zero-cov files, 15-20 per agent. (2) 70-80%: target files at 70-79% with smallest gaps (5-15 uncov stmts each — cheapest per-file). (3) Above 78%: test tiny 0% files (barrel re-exports, configs) that don't grow denominator. (4) The denominator grows ~5-10 stmts per new test file discovered, so the target keeps moving.",
113
+ "evidence": "Each wave above 78% yielded ~0.1-0.3% gain vs ~2-5% below 60%. Targeting 75-80% files with 5-8 uncov stmts was the most efficient late-game strategy.",
114
+ "tags": [
115
+ "strategy",
116
+ "efficiency",
117
+ "late-game"
118
+ ]
119
+ },
120
+ {
121
+ "id": "p010",
122
+ "type": "recommendation",
123
+ "topic": "testing-rules-enforcement",
124
+ "content": "Embed testing rules in EVERY agent prompt. Key rules to enforce: (1) import from @test-utils not @testing-library/react, (2) userEvent.setup() not fireEvent, (3) query by role/label first, (4) mock at boundaries not local components, (5) top-level imports only (no inline require), (6) max 2 describe nesting, (7) no 'should' in test names, (8) AAA pattern, (9) beforeEach with jest.clearAllMocks. Run audit after each wave: grep for violations, fix before committing.",
125
+ "evidence": "3 violations found in 1,700+ test files when rules were embedded in every prompt. Without embedding, early waves had 292 require() and 7 fireEvent violations.",
126
+ "tags": [
127
+ "quality",
128
+ "rules",
129
+ "enforcement"
130
+ ]
131
+ },
132
+ {
133
+ "id": "p011",
134
+ "type": "recommendation",
135
+ "topic": "exclusion-analysis",
136
+ "content": "When considering excluding files from coverage: (1) NEVER exclude to game the number — only exclude genuinely untestable code. (2) Cross-reference with churn: 0 commits in 3+ years = safe to exclude. 0 in 1 year = gray area. Any recent churn = keep measured. (3) Removing exclusions ADDS to denominator — only remove when tests are ready. (4) Categories that are permanently untestable in jsdom: canvas rendering, d3/charting, golden layout, CodeMirror/Monaco, PhoneGap/Cordova native APIs. (5) Track two numbers: measured scope coverage and original codebase coverage.",
137
+ "evidence": "Removing bootstrap/** exclusion without ready tests dropped coverage by 0.8%. Churn-based exclusion analysis showed 0 files with 0 churn in 5 years — everything was touched at least once.",
138
+ "tags": [
139
+ "exclusions",
140
+ "transparency",
141
+ "strategy"
142
+ ]
143
+ },
144
+ {
145
+ "id": "p012",
146
+ "type": "recommendation",
147
+ "topic": "merge-checklist",
148
+ "content": "After EVERY agent completes, run this checklist: (1) Copy ONLY .test.* and .helpers.* files from worktree (never jest.config.js or package.json). (2) npx prettier --write on all new files. (3) Audit against testing rules (grep for violations). (4) Verify jest.config.js thresholds not stomped. (5) npx jest --ci --no-coverage on new files to verify pass. (6) git add + commit. (7) Run full coverage after each wave to track progress. This checklist prevents the most common agent mistakes.",
149
+ "evidence": "Agent overwrote jest.config.js once, resetting thresholds. Another agent created module stubs that shouldn't have been copied to main tree. Checklist catches these.",
150
+ "tags": [
151
+ "checklist",
152
+ "quality",
153
+ "process"
154
+ ]
155
+ },
156
+ {
157
+ "id": "p013",
158
+ "type": "factual",
159
+ "topic": "velocity-benchmarks",
160
+ "content": "On a ~2,000-file / 300K-line React codebase: Phase 1 (zero-cov grind) covered ~350 files in ~8 waves, gaining ~25% coverage. Phase 2 (mixin extraction) added ~200 stmts from 5 mixins. Phase 3 (deep testing) pushed from 77% to 80% over ~6 waves. Total: 45% → 80% in one extended session, ~10,000 new tests, ~1,700 test files. Each 5-agent wave takes 5-15 minutes and yields 50-500 new covered stmts depending on phase.",
161
+ "evidence": "NT-38884 session data, March 2026.",
162
+ "tags": [
163
+ "benchmarks",
164
+ "velocity"
165
+ ]
166
+ },
167
+ {
168
+ "id": "p014",
169
+ "type": "recommendation",
170
+ "topic": "branch-coverage-strategy",
171
+ "content": "Branches are the hardest metric to improve. Each if/else, ternary, switch, &&, || has two paths. To specifically target branches: (1) Score files by uncovered-branches count, not stmts. (2) Tell agents explicitly to test every conditional path. (3) Create .branch.test.js files for branch-specific testing. (4) Small zero-cov files with high branch counts (e.g., 10 branches in a 15-stmt utility) give the best branch ROI. (5) Branches in complex components (Recoil state, router conditions) are the hardest — save for manual deepening.",
172
+ "evidence": "Branch-targeted waves moved branches from 58% to 66% — each wave adding ~1-2%. Statements and functions moved faster because they benefit from any test, while branches require path-specific tests.",
173
+ "tags": [
174
+ "branches",
175
+ "strategy",
176
+ "hard-metric"
177
+ ]
178
+ }
179
+ ]
180
+ }
@@ -8,7 +8,11 @@
8
8
  "type": "factual",
9
9
  "topic": "ETL vs ELT tradeoffs",
10
10
  "content": "ETL (Extract-Transform-Load) transforms data before loading into the target, reducing storage costs but making the pipeline brittle to schema changes. ELT (Extract-Load-Transform) loads raw data first and transforms in the warehouse, leveraging cheap columnar storage and enabling reprocessing without re-extraction. ELT is the dominant pattern for cloud data warehouses (Snowflake, BigQuery, Redshift).",
11
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
11
+ "source": {
12
+ "origin": "best-practice",
13
+ "artifact": null,
14
+ "connector": null
15
+ },
12
16
  "evidence": "documented",
13
17
  "status": "active",
14
18
  "phase_added": "define",
@@ -22,7 +26,11 @@
22
26
  "type": "constraint",
23
27
  "topic": "schema evolution compatibility",
24
28
  "content": "Schema changes must be backwards-compatible for consumers: adding optional fields (safe), removing fields (breaking), renaming fields (breaking), changing types (breaking). Use schema registries (Confluent, AWS Glue) to enforce compatibility rules. Avro supports full forward/backward/full compatibility modes. Breaking changes require a new topic or table version.",
25
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
29
+ "source": {
30
+ "origin": "best-practice",
31
+ "artifact": null,
32
+ "connector": null
33
+ },
26
34
  "evidence": "documented",
27
35
  "status": "active",
28
36
  "phase_added": "define",
@@ -36,7 +44,11 @@
36
44
  "type": "recommendation",
37
45
  "topic": "data quality checks in pipelines",
38
46
  "content": "Every data pipeline should include automated quality checks: row count expectations (within 10% of previous run), null rate thresholds per column, uniqueness constraints on key columns, freshness checks (data arrived within expected window), and referential integrity across tables. Use tools like Great Expectations, dbt tests, or Soda to define checks as code.",
39
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
47
+ "source": {
48
+ "origin": "best-practice",
49
+ "artifact": null,
50
+ "connector": null
51
+ },
40
52
  "evidence": "production",
41
53
  "status": "active",
42
54
  "phase_added": "define",
@@ -64,7 +76,11 @@
64
76
  "type": "factual",
65
77
  "topic": "batch vs streaming latency tradeoffs",
66
78
  "content": "Batch processing (hourly/daily) is simpler, cheaper, and sufficient when business requirements tolerate T+1 or T+hour latency. Streaming (Kafka, Kinesis, Flink) delivers sub-second latency but costs 3-10x more in infrastructure and 2-3x more in engineering complexity (exactly-once semantics, out-of-order events, state management). Default to batch unless latency requirements demand streaming.",
67
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
79
+ "source": {
80
+ "origin": "best-practice",
81
+ "artifact": null,
82
+ "connector": null
83
+ },
68
84
  "evidence": "production",
69
85
  "status": "active",
70
86
  "phase_added": "define",
@@ -78,7 +94,11 @@
78
94
  "type": "recommendation",
79
95
  "topic": "change data capture pattern",
80
96
  "content": "Change Data Capture (CDC) reads database transaction logs (binlog, WAL) to stream row-level changes to downstream systems. Use Debezium for open-source CDC from PostgreSQL, MySQL, MongoDB, and SQL Server. CDC avoids polling overhead, captures deletes (which polling misses), and preserves event ordering. Initial snapshot + streaming log is the standard bootstrap pattern.",
81
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
97
+ "source": {
98
+ "origin": "best-practice",
99
+ "artifact": null,
100
+ "connector": null
101
+ },
82
102
  "evidence": "documented",
83
103
  "status": "active",
84
104
  "phase_added": "define",
@@ -92,7 +112,11 @@
92
112
  "type": "risk",
93
113
  "topic": "late-arriving data in event streams",
94
114
  "content": "Event streams contain late-arriving data due to network delays, mobile offline sync, or batch uploads. Windowed aggregations must handle late data with watermarks (maximum allowed lateness). Flink default watermark is 0 (no late data tolerance). Set watermarks based on observed p99 lateness in your data. Late events beyond the watermark are either dropped or routed to a dead-letter topic for manual reconciliation.",
95
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
115
+ "source": {
116
+ "origin": "best-practice",
117
+ "artifact": null,
118
+ "connector": null
119
+ },
96
120
  "evidence": "production",
97
121
  "status": "active",
98
122
  "phase_added": "define",
@@ -106,7 +130,11 @@
106
130
  "type": "recommendation",
107
131
  "topic": "idempotent pipeline design",
108
132
  "content": "Data pipelines must be idempotent: running the same pipeline twice with the same input produces the same output without duplicates. Implement with: write to a staging table, then MERGE/upsert to the target (not INSERT). Use partition overwrite for append-only tables. Idempotency enables safe retries after partial failures, which occur in approximately 5-10% of batch runs.",
109
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
133
+ "source": {
134
+ "origin": "best-practice",
135
+ "artifact": null,
136
+ "connector": null
137
+ },
110
138
  "evidence": "production",
111
139
  "status": "active",
112
140
  "phase_added": "define",
@@ -120,7 +148,11 @@
120
148
  "type": "estimate",
121
149
  "topic": "Parquet vs CSV storage savings",
122
150
  "content": "Columnar formats (Parquet, ORC) reduce storage by 75-90% compared to CSV/JSON for analytical workloads, and query performance improves 10-100x due to column pruning and predicate pushdown. A 100 GB CSV dataset typically compresses to 5-15 GB in Parquet with Snappy compression. Always use Parquet or ORC for data lake storage, never raw CSV.",
123
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
151
+ "source": {
152
+ "origin": "best-practice",
153
+ "artifact": null,
154
+ "connector": null
155
+ },
124
156
  "evidence": "tested",
125
157
  "status": "active",
126
158
  "phase_added": "define",
@@ -134,7 +166,11 @@
134
166
  "type": "constraint",
135
167
  "topic": "PII handling in data pipelines",
136
168
  "content": "PII must be classified, tagged, and handled according to data governance policy at ingestion time, not after the fact. Apply column-level encryption or tokenization for sensitive fields (SSN, email, phone). Implement row-level access controls in the warehouse. Maintain a data catalog that tracks PII lineage from source to all downstream tables. GDPR right-to-erasure requires the ability to delete a user across all derived datasets.",
137
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
169
+ "source": {
170
+ "origin": "best-practice",
171
+ "artifact": null,
172
+ "connector": null
173
+ },
138
174
  "evidence": "documented",
139
175
  "status": "active",
140
176
  "phase_added": "define",
@@ -148,7 +184,11 @@
148
184
  "type": "factual",
149
185
  "topic": "exactly-once semantics cost",
150
186
  "content": "Exactly-once processing in streaming systems (Kafka transactions, Flink checkpointing) adds 10-30% throughput overhead compared to at-least-once. At-least-once with idempotent consumers (using unique event IDs and upsert writes) achieves the same end result with lower complexity. True exactly-once is only required when side effects cannot be made idempotent (sending emails, charging payments).",
151
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
187
+ "source": {
188
+ "origin": "best-practice",
189
+ "artifact": null,
190
+ "connector": null
191
+ },
152
192
  "evidence": "tested",
153
193
  "status": "active",
154
194
  "phase_added": "define",
@@ -169,7 +209,12 @@
169
209
  "timestamp": "2025-01-01T00:00:00.000Z",
170
210
  "conflicts_with": [],
171
211
  "resolved_by": null,
172
- "tags": ["data-engineering", "dbt", "transformation", "analytics-engineering"]
212
+ "tags": [
213
+ "data-engineering",
214
+ "dbt",
215
+ "transformation",
216
+ "analytics-engineering"
217
+ ]
173
218
  }
174
219
  ]
175
- }
220
+ }
@@ -8,7 +8,11 @@
8
8
  "type": "constraint",
9
9
  "topic": "Core Web Vitals thresholds",
10
10
  "content": "Google Core Web Vitals targets for good UX: LCP (Largest Contentful Paint) under 2.5s, INP (Interaction to Next Paint) under 200ms, CLS (Cumulative Layout Shift) under 0.1. These directly affect search ranking. Measure with field data (CrUX, RUM) not just lab data (Lighthouse), since field p75 is what Google uses.",
11
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
11
+ "source": {
12
+ "origin": "best-practice",
13
+ "artifact": null,
14
+ "connector": null
15
+ },
12
16
  "evidence": "documented",
13
17
  "status": "active",
14
18
  "phase_added": "define",
@@ -22,7 +26,11 @@
22
26
  "type": "recommendation",
23
27
  "topic": "JavaScript bundle size budget",
24
28
  "content": "Initial JavaScript bundle should be under 150 KB compressed (gzip) for mobile-first applications. Total page weight including images should be under 1.5 MB. Every 100 KB of JavaScript adds approximately 350ms parse/compile time on median mobile devices. Use bundle analyzer (webpack-bundle-analyzer, source-map-explorer) in CI to enforce budgets.",
25
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
29
+ "source": {
30
+ "origin": "best-practice",
31
+ "artifact": null,
32
+ "connector": null
33
+ },
26
34
  "evidence": "tested",
27
35
  "status": "active",
28
36
  "phase_added": "define",
@@ -36,7 +44,11 @@
36
44
  "type": "constraint",
37
45
  "topic": "WCAG 2.1 AA compliance",
38
46
  "content": "WCAG 2.1 Level AA is the legal standard in the US (ADA), EU (EAA 2025), and Canada (AODA). Key requirements: color contrast ratio 4.5:1 for normal text and 3:1 for large text, all interactive elements keyboard-accessible, form inputs have visible labels, images have alt text, focus indicators visible, no content reliant solely on color.",
39
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
47
+ "source": {
48
+ "origin": "best-practice",
49
+ "artifact": null,
50
+ "connector": null
51
+ },
40
52
  "evidence": "documented",
41
53
  "status": "active",
42
54
  "phase_added": "define",
@@ -50,7 +62,11 @@
50
62
  "type": "factual",
51
63
  "topic": "SSR vs CSR tradeoffs",
52
64
  "content": "Server-Side Rendering (SSR) provides faster First Contentful Paint and better SEO but increases server load and Time to Interactive (hydration cost). Client-Side Rendering (CSR) has faster subsequent navigations but a blank page until JS loads. Use SSR/SSG for content-heavy, SEO-critical pages; CSR for authenticated app-like interfaces behind login.",
53
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
65
+ "source": {
66
+ "origin": "best-practice",
67
+ "artifact": null,
68
+ "connector": null
69
+ },
54
70
  "evidence": "documented",
55
71
  "status": "active",
56
72
  "phase_added": "define",
@@ -64,7 +80,11 @@
64
80
  "type": "risk",
65
81
  "topic": "third-party script performance",
66
82
  "content": "Third-party scripts (analytics, ads, chat widgets) are the leading cause of performance regression. A single poorly-written third-party script can add 500ms-2s to page load. Load all third-party scripts with async or defer. Set performance budgets that include third-party weight. Use a tag manager with server-side option to control loading.",
67
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
83
+ "source": {
84
+ "origin": "best-practice",
85
+ "artifact": null,
86
+ "connector": null
87
+ },
68
88
  "evidence": "production",
69
89
  "status": "active",
70
90
  "phase_added": "define",
@@ -78,7 +98,11 @@
78
98
  "type": "recommendation",
79
99
  "topic": "image optimization pipeline",
80
100
  "content": "Serve images in WebP or AVIF format (30-50% smaller than JPEG). Use srcset with 2-4 size variants for responsive images. Lazy-load images below the fold with loading='lazy'. Set explicit width and height attributes to prevent CLS. Automate optimization in the build pipeline or use an image CDN (Cloudinary, imgix, Cloudflare Images).",
81
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
101
+ "source": {
102
+ "origin": "best-practice",
103
+ "artifact": null,
104
+ "connector": null
105
+ },
82
106
  "evidence": "tested",
83
107
  "status": "active",
84
108
  "phase_added": "define",
@@ -92,7 +116,11 @@
92
116
  "type": "recommendation",
93
117
  "topic": "state management selection criteria",
94
118
  "content": "Use local component state (useState/signals) for UI-only state. Use URL state (query params, path) for shareable/bookmarkable state. Use server state libraries (TanStack Query, SWR) for API data with caching. Use global state (Zustand, Redux, or context) only for truly cross-cutting client state (auth, theme, feature flags). Most applications over-use global state.",
95
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
119
+ "source": {
120
+ "origin": "best-practice",
121
+ "artifact": null,
122
+ "connector": null
123
+ },
96
124
  "evidence": "documented",
97
125
  "status": "active",
98
126
  "phase_added": "define",
@@ -106,7 +134,11 @@
106
134
  "type": "risk",
107
135
  "topic": "layout shift from web fonts",
108
136
  "content": "Custom web fonts cause either FOIT (Flash of Invisible Text) or FOUT (Flash of Unstyled Text), both contributing to CLS. Mitigate with font-display: swap, preload critical fonts with <link rel='preload'>, subset fonts to used character ranges (Latin subset is ~30 KB vs 200 KB for full Unicode), and use size-adjust to match fallback metrics.",
109
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
137
+ "source": {
138
+ "origin": "best-practice",
139
+ "artifact": null,
140
+ "connector": null
141
+ },
110
142
  "evidence": "tested",
111
143
  "status": "active",
112
144
  "phase_added": "define",
@@ -120,7 +152,11 @@
120
152
  "type": "factual",
121
153
  "topic": "code splitting impact",
122
154
  "content": "Route-based code splitting typically reduces initial bundle size by 40-60%. Dynamic import() splits a module into a separate chunk loaded on demand. For React, use React.lazy() with Suspense boundaries. Split at route boundaries first, then heavy component boundaries (charts, editors, maps). Avoid splitting components under 30 KB as the HTTP overhead negates the benefit.",
123
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
155
+ "source": {
156
+ "origin": "best-practice",
157
+ "artifact": null,
158
+ "connector": null
159
+ },
124
160
  "evidence": "tested",
125
161
  "status": "active",
126
162
  "phase_added": "define",
@@ -134,7 +170,11 @@
134
170
  "type": "constraint",
135
171
  "topic": "keyboard navigation requirements",
136
172
  "content": "Every interactive element must be reachable and operable via keyboard alone. Tab order must follow visual reading order. Focus must be visible (minimum 2px outline, 3:1 contrast ratio against adjacent colors). Custom components (dropdowns, modals, tabs) must implement ARIA roles and keyboard patterns from WAI-ARIA Authoring Practices. Test by unplugging the mouse.",
137
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
173
+ "source": {
174
+ "origin": "best-practice",
175
+ "artifact": null,
176
+ "connector": null
177
+ },
138
178
  "evidence": "documented",
139
179
  "status": "active",
140
180
  "phase_added": "define",
@@ -162,7 +202,11 @@
162
202
  "type": "recommendation",
163
203
  "topic": "error boundaries and fallbacks",
164
204
  "content": "Wrap major UI sections in error boundaries that catch render errors and display a fallback UI instead of a white screen. Log caught errors to your observability stack. Provide a retry mechanism in the fallback. Without error boundaries, a single failing component crashes the entire page. Test error boundaries by deliberately throwing in development.",
165
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
205
+ "source": {
206
+ "origin": "best-practice",
207
+ "artifact": null,
208
+ "connector": null
209
+ },
166
210
  "evidence": "documented",
167
211
  "status": "active",
168
212
  "phase_added": "define",
@@ -172,4 +216,4 @@
172
216
  "tags": ["frontend", "error-handling", "resilience", "react"]
173
217
  }
174
218
  ]
175
- }
219
+ }
@@ -0,0 +1,179 @@
1
+ {
2
+ "name": "Hackathon: Best Use of AI",
3
+ "description": "Scoring rubric and seed claims for the 'Best Use of AI' hackathon category. Features a unique ai_leverage sub-score rewarding quality AI-assisted research over shallow generation.",
4
+ "version": "1.0.0",
5
+ "claims": [
6
+ {
7
+ "id": "hai-001",
8
+ "type": "constraint",
9
+ "topic": "AI leverage scoring",
10
+ "content": "AI leverage sub-score = (claims_from_ai_assisted_research / total_claims) * quality_multiplier. Quality multiplier = average evidence tier of AI-assisted claims. Weight: 30%. Rewards using AI for deep research, not shallow claim generation.",
11
+ "source": {
12
+ "origin": "best-practice",
13
+ "artifact": null,
14
+ "connector": null
15
+ },
16
+ "evidence": "documented",
17
+ "status": "active",
18
+ "phase_added": "define",
19
+ "timestamp": "2026-01-01T00:00:00.000Z",
20
+ "conflicts_with": [],
21
+ "resolved_by": null,
22
+ "tags": ["hackathon", "scoring", "ai-leverage", "best-ai"]
23
+ },
24
+ {
25
+ "id": "hai-002",
26
+ "type": "constraint",
27
+ "topic": "evidence tier scoring",
28
+ "content": "Evidence tier sub-score uses weighted sum normalized to 0-100. Weight: 20%. AI-assisted claims should produce higher evidence tiers — web and documented, not just stated.",
29
+ "source": {
30
+ "origin": "best-practice",
31
+ "artifact": null,
32
+ "connector": null
33
+ },
34
+ "evidence": "documented",
35
+ "status": "active",
36
+ "phase_added": "define",
37
+ "timestamp": "2026-01-01T00:00:00.000Z",
38
+ "conflicts_with": [],
39
+ "resolved_by": null,
40
+ "tags": ["hackathon", "scoring", "evidence-tier", "best-ai"]
41
+ },
42
+ {
43
+ "id": "hai-003",
44
+ "type": "constraint",
45
+ "topic": "type diversity scoring",
46
+ "content": "Type diversity sub-score = (distinct_types / 6) * 100. Weight: 20%. Best AI use means leveraging AI for diverse research outputs — not just generating a list of facts.",
47
+ "source": {
48
+ "origin": "best-practice",
49
+ "artifact": null,
50
+ "connector": null
51
+ },
52
+ "evidence": "documented",
53
+ "status": "active",
54
+ "phase_added": "define",
55
+ "timestamp": "2026-01-01T00:00:00.000Z",
56
+ "conflicts_with": [],
57
+ "resolved_by": null,
58
+ "tags": ["hackathon", "scoring", "type-diversity", "best-ai"]
59
+ },
60
+ {
61
+ "id": "hai-004",
62
+ "type": "constraint",
63
+ "topic": "corroboration scoring",
64
+ "content": "Corroboration sub-score = witnessed_claims / total_claims * 100. Weight: 15%. AI can find cross-references humans miss — high corroboration signals effective AI-human collaboration.",
65
+ "source": {
66
+ "origin": "best-practice",
67
+ "artifact": null,
68
+ "connector": null
69
+ },
70
+ "evidence": "documented",
71
+ "status": "active",
72
+ "phase_added": "define",
73
+ "timestamp": "2026-01-01T00:00:00.000Z",
74
+ "conflicts_with": [],
75
+ "resolved_by": null,
76
+ "tags": ["hackathon", "scoring", "corroboration", "best-ai"]
77
+ },
78
+ {
79
+ "id": "hai-005",
80
+ "type": "constraint",
81
+ "topic": "challenge depth scoring",
82
+ "content": "Challenge depth sub-score = (challenge_claims + resolved_conflicts) / total_claims * 100. Weight: 15%. Best AI use includes AI-assisted devil's advocate — challenging claims and finding counter-evidence.",
83
+ "source": {
84
+ "origin": "best-practice",
85
+ "artifact": null,
86
+ "connector": null
87
+ },
88
+ "evidence": "documented",
89
+ "status": "active",
90
+ "phase_added": "define",
91
+ "timestamp": "2026-01-01T00:00:00.000Z",
92
+ "conflicts_with": [],
93
+ "resolved_by": null,
94
+ "tags": ["hackathon", "scoring", "challenge-depth", "best-ai"]
95
+ },
96
+ {
97
+ "id": "hai-006",
98
+ "type": "recommendation",
99
+ "topic": "AI collaboration patterns",
100
+ "content": "Effective AI research patterns: (1) AI finds sources, human validates and elevates evidence tier. (2) AI generates initial claims, human challenges and refines. (3) AI identifies conflicts between sources, human resolves. (4) AI drafts recommendations, human grounds in business context.",
101
+ "source": {
102
+ "origin": "best-practice",
103
+ "artifact": null,
104
+ "connector": null
105
+ },
106
+ "evidence": "stated",
107
+ "status": "active",
108
+ "phase_added": "define",
109
+ "timestamp": "2026-01-01T00:00:00.000Z",
110
+ "conflicts_with": [],
111
+ "resolved_by": null,
112
+ "tags": ["hackathon", "ai-patterns", "collaboration", "best-ai"]
113
+ },
114
+ {
115
+ "id": "hai-007",
116
+ "type": "risk",
117
+ "topic": "AI slop detection",
118
+ "content": "Risk of AI 'slop': bulk-generated low-quality claims that look comprehensive but lack depth. Signals: uniform evidence tiers, generic topics, no conflicts or challenges, repetitive phrasing across claims.",
119
+ "source": { "origin": "research", "artifact": null, "connector": null },
120
+ "evidence": "stated",
121
+ "status": "active",
122
+ "phase_added": "research",
123
+ "timestamp": "2026-01-01T00:00:00.000Z",
124
+ "conflicts_with": [],
125
+ "resolved_by": null,
126
+ "tags": ["hackathon", "gaming", "ai-slop", "risk"]
127
+ },
128
+ {
129
+ "id": "hai-008",
130
+ "type": "recommendation",
131
+ "topic": "MCP connector usage",
132
+ "content": "Best AI use should leverage MCP connectors: web search for real-time data, GitHub for code context, Confluence/Jira for organizational knowledge. Connector diversity signals sophisticated AI orchestration.",
133
+ "source": {
134
+ "origin": "best-practice",
135
+ "artifact": null,
136
+ "connector": null
137
+ },
138
+ "evidence": "stated",
139
+ "status": "active",
140
+ "phase_added": "define",
141
+ "timestamp": "2026-01-01T00:00:00.000Z",
142
+ "conflicts_with": [],
143
+ "resolved_by": null,
144
+ "tags": ["hackathon", "mcp-connectors", "ai-orchestration", "best-ai"]
145
+ },
146
+ {
147
+ "id": "hai-009",
148
+ "type": "recommendation",
149
+ "topic": "human judge criteria",
150
+ "content": "Human judges for 'Best Use of AI' score on: (1) human-AI synergy — did the team direct AI effectively or just let it run? (2) evidence elevation — did AI help reach higher evidence tiers? (3) creative prompting — did the team use novel approaches to extract insight?",
151
+ "source": { "origin": "research", "artifact": null, "connector": null },
152
+ "evidence": "stated",
153
+ "status": "active",
154
+ "phase_added": "research",
155
+ "timestamp": "2026-01-01T00:00:00.000Z",
156
+ "conflicts_with": [],
157
+ "resolved_by": null,
158
+ "tags": ["hackathon", "human-judging", "criteria", "best-ai"]
159
+ },
160
+ {
161
+ "id": "hai-010",
162
+ "type": "factual",
163
+ "topic": "HackEval precedent",
164
+ "content": "HackEval keeps AI as assistant, not decision-maker: structured rubrics with blind scoring to reduce bias, but final decisions remain human. AI Judge (Devpost) takes the opposite approach with full automation. The wheat hackathon model should be the middle ground.",
165
+ "source": {
166
+ "origin": "research",
167
+ "artifact": "https://www.scribd.com/document/973181548/HackEval-Full-Pitch",
168
+ "connector": null
169
+ },
170
+ "evidence": "web",
171
+ "status": "active",
172
+ "phase_added": "research",
173
+ "timestamp": "2026-01-01T00:00:00.000Z",
174
+ "conflicts_with": [],
175
+ "resolved_by": null,
176
+ "tags": ["hackathon", "precedent", "hackeval", "ai-judging"]
177
+ }
178
+ ]
179
+ }