@grainulation/silo 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,11 @@
8
8
  "type": "factual",
9
9
  "topic": "ETL vs ELT tradeoffs",
10
10
  "content": "ETL (Extract-Transform-Load) transforms data before loading into the target, reducing storage costs but making the pipeline brittle to schema changes. ELT (Extract-Load-Transform) loads raw data first and transforms in the warehouse, leveraging cheap columnar storage and enabling reprocessing without re-extraction. ELT is the dominant pattern for cloud data warehouses (Snowflake, BigQuery, Redshift).",
11
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
11
+ "source": {
12
+ "origin": "best-practice",
13
+ "artifact": null,
14
+ "connector": null
15
+ },
12
16
  "evidence": "documented",
13
17
  "status": "active",
14
18
  "phase_added": "define",
@@ -22,7 +26,11 @@
22
26
  "type": "constraint",
23
27
  "topic": "schema evolution compatibility",
24
28
  "content": "Schema changes must be backwards-compatible for consumers: adding optional fields (safe), removing fields (breaking), renaming fields (breaking), changing types (breaking). Use schema registries (Confluent, AWS Glue) to enforce compatibility rules. Avro supports full forward/backward/full compatibility modes. Breaking changes require a new topic or table version.",
25
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
29
+ "source": {
30
+ "origin": "best-practice",
31
+ "artifact": null,
32
+ "connector": null
33
+ },
26
34
  "evidence": "documented",
27
35
  "status": "active",
28
36
  "phase_added": "define",
@@ -36,7 +44,11 @@
36
44
  "type": "recommendation",
37
45
  "topic": "data quality checks in pipelines",
38
46
  "content": "Every data pipeline should include automated quality checks: row count expectations (within 10% of previous run), null rate thresholds per column, uniqueness constraints on key columns, freshness checks (data arrived within expected window), and referential integrity across tables. Use tools like Great Expectations, dbt tests, or Soda to define checks as code.",
39
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
47
+ "source": {
48
+ "origin": "best-practice",
49
+ "artifact": null,
50
+ "connector": null
51
+ },
40
52
  "evidence": "production",
41
53
  "status": "active",
42
54
  "phase_added": "define",
@@ -64,7 +76,11 @@
64
76
  "type": "factual",
65
77
  "topic": "batch vs streaming latency tradeoffs",
66
78
  "content": "Batch processing (hourly/daily) is simpler, cheaper, and sufficient when business requirements tolerate T+1 or T+hour latency. Streaming (Kafka, Kinesis, Flink) delivers sub-second latency but costs 3-10x more in infrastructure and 2-3x more in engineering complexity (exactly-once semantics, out-of-order events, state management). Default to batch unless latency requirements demand streaming.",
67
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
79
+ "source": {
80
+ "origin": "best-practice",
81
+ "artifact": null,
82
+ "connector": null
83
+ },
68
84
  "evidence": "production",
69
85
  "status": "active",
70
86
  "phase_added": "define",
@@ -78,7 +94,11 @@
78
94
  "type": "recommendation",
79
95
  "topic": "change data capture pattern",
80
96
  "content": "Change Data Capture (CDC) reads database transaction logs (binlog, WAL) to stream row-level changes to downstream systems. Use Debezium for open-source CDC from PostgreSQL, MySQL, MongoDB, and SQL Server. CDC avoids polling overhead, captures deletes (which polling misses), and preserves event ordering. Initial snapshot + streaming log is the standard bootstrap pattern.",
81
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
97
+ "source": {
98
+ "origin": "best-practice",
99
+ "artifact": null,
100
+ "connector": null
101
+ },
82
102
  "evidence": "documented",
83
103
  "status": "active",
84
104
  "phase_added": "define",
@@ -92,7 +112,11 @@
92
112
  "type": "risk",
93
113
  "topic": "late-arriving data in event streams",
94
114
  "content": "Event streams contain late-arriving data due to network delays, mobile offline sync, or batch uploads. Windowed aggregations must handle late data with watermarks (maximum allowed lateness). Flink default watermark is 0 (no late data tolerance). Set watermarks based on observed p99 lateness in your data. Late events beyond the watermark are either dropped or routed to a dead-letter topic for manual reconciliation.",
95
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
115
+ "source": {
116
+ "origin": "best-practice",
117
+ "artifact": null,
118
+ "connector": null
119
+ },
96
120
  "evidence": "production",
97
121
  "status": "active",
98
122
  "phase_added": "define",
@@ -106,7 +130,11 @@
106
130
  "type": "recommendation",
107
131
  "topic": "idempotent pipeline design",
108
132
  "content": "Data pipelines must be idempotent: running the same pipeline twice with the same input produces the same output without duplicates. Implement with: write to a staging table, then MERGE/upsert to the target (not INSERT). Use partition overwrite for append-only tables. Idempotency enables safe retries after partial failures, which occur in approximately 5-10% of batch runs.",
109
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
133
+ "source": {
134
+ "origin": "best-practice",
135
+ "artifact": null,
136
+ "connector": null
137
+ },
110
138
  "evidence": "production",
111
139
  "status": "active",
112
140
  "phase_added": "define",
@@ -120,7 +148,11 @@
120
148
  "type": "estimate",
121
149
  "topic": "Parquet vs CSV storage savings",
122
150
  "content": "Columnar formats (Parquet, ORC) reduce storage by 75-90% compared to CSV/JSON for analytical workloads, and query performance improves 10-100x due to column pruning and predicate pushdown. A 100 GB CSV dataset typically compresses to 5-15 GB in Parquet with Snappy compression. Always use Parquet or ORC for data lake storage, never raw CSV.",
123
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
151
+ "source": {
152
+ "origin": "best-practice",
153
+ "artifact": null,
154
+ "connector": null
155
+ },
124
156
  "evidence": "tested",
125
157
  "status": "active",
126
158
  "phase_added": "define",
@@ -134,7 +166,11 @@
134
166
  "type": "constraint",
135
167
  "topic": "PII handling in data pipelines",
136
168
  "content": "PII must be classified, tagged, and handled according to data governance policy at ingestion time, not after the fact. Apply column-level encryption or tokenization for sensitive fields (SSN, email, phone). Implement row-level access controls in the warehouse. Maintain a data catalog that tracks PII lineage from source to all downstream tables. GDPR right-to-erasure requires the ability to delete a user across all derived datasets.",
137
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
169
+ "source": {
170
+ "origin": "best-practice",
171
+ "artifact": null,
172
+ "connector": null
173
+ },
138
174
  "evidence": "documented",
139
175
  "status": "active",
140
176
  "phase_added": "define",
@@ -148,7 +184,11 @@
148
184
  "type": "factual",
149
185
  "topic": "exactly-once semantics cost",
150
186
  "content": "Exactly-once processing in streaming systems (Kafka transactions, Flink checkpointing) adds 10-30% throughput overhead compared to at-least-once. At-least-once with idempotent consumers (using unique event IDs and upsert writes) achieves the same end result with lower complexity. True exactly-once is only required when side effects cannot be made idempotent (sending emails, charging payments).",
151
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
187
+ "source": {
188
+ "origin": "best-practice",
189
+ "artifact": null,
190
+ "connector": null
191
+ },
152
192
  "evidence": "tested",
153
193
  "status": "active",
154
194
  "phase_added": "define",
@@ -169,7 +209,12 @@
169
209
  "timestamp": "2025-01-01T00:00:00.000Z",
170
210
  "conflicts_with": [],
171
211
  "resolved_by": null,
172
- "tags": ["data-engineering", "dbt", "transformation", "analytics-engineering"]
212
+ "tags": [
213
+ "data-engineering",
214
+ "dbt",
215
+ "transformation",
216
+ "analytics-engineering"
217
+ ]
173
218
  }
174
219
  ]
175
- }
220
+ }
@@ -8,7 +8,11 @@
8
8
  "type": "constraint",
9
9
  "topic": "Core Web Vitals thresholds",
10
10
  "content": "Google Core Web Vitals targets for good UX: LCP (Largest Contentful Paint) under 2.5s, INP (Interaction to Next Paint) under 200ms, CLS (Cumulative Layout Shift) under 0.1. These directly affect search ranking. Measure with field data (CrUX, RUM) not just lab data (Lighthouse), since field p75 is what Google uses.",
11
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
11
+ "source": {
12
+ "origin": "best-practice",
13
+ "artifact": null,
14
+ "connector": null
15
+ },
12
16
  "evidence": "documented",
13
17
  "status": "active",
14
18
  "phase_added": "define",
@@ -22,7 +26,11 @@
22
26
  "type": "recommendation",
23
27
  "topic": "JavaScript bundle size budget",
24
28
  "content": "Initial JavaScript bundle should be under 150 KB compressed (gzip) for mobile-first applications. Total page weight including images should be under 1.5 MB. Every 100 KB of JavaScript adds approximately 350ms parse/compile time on median mobile devices. Use bundle analyzer (webpack-bundle-analyzer, source-map-explorer) in CI to enforce budgets.",
25
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
29
+ "source": {
30
+ "origin": "best-practice",
31
+ "artifact": null,
32
+ "connector": null
33
+ },
26
34
  "evidence": "tested",
27
35
  "status": "active",
28
36
  "phase_added": "define",
@@ -36,7 +44,11 @@
36
44
  "type": "constraint",
37
45
  "topic": "WCAG 2.1 AA compliance",
38
46
  "content": "WCAG 2.1 Level AA is the legal standard in the US (ADA), EU (EAA 2025), and Canada (AODA). Key requirements: color contrast ratio 4.5:1 for normal text and 3:1 for large text, all interactive elements keyboard-accessible, form inputs have visible labels, images have alt text, focus indicators visible, no content reliant solely on color.",
39
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
47
+ "source": {
48
+ "origin": "best-practice",
49
+ "artifact": null,
50
+ "connector": null
51
+ },
40
52
  "evidence": "documented",
41
53
  "status": "active",
42
54
  "phase_added": "define",
@@ -50,7 +62,11 @@
50
62
  "type": "factual",
51
63
  "topic": "SSR vs CSR tradeoffs",
52
64
  "content": "Server-Side Rendering (SSR) provides faster First Contentful Paint and better SEO but increases server load and Time to Interactive (hydration cost). Client-Side Rendering (CSR) has faster subsequent navigations but a blank page until JS loads. Use SSR/SSG for content-heavy, SEO-critical pages; CSR for authenticated app-like interfaces behind login.",
53
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
65
+ "source": {
66
+ "origin": "best-practice",
67
+ "artifact": null,
68
+ "connector": null
69
+ },
54
70
  "evidence": "documented",
55
71
  "status": "active",
56
72
  "phase_added": "define",
@@ -64,7 +80,11 @@
64
80
  "type": "risk",
65
81
  "topic": "third-party script performance",
66
82
  "content": "Third-party scripts (analytics, ads, chat widgets) are the leading cause of performance regression. A single poorly-written third-party script can add 500ms-2s to page load. Load all third-party scripts with async or defer. Set performance budgets that include third-party weight. Use a tag manager with server-side option to control loading.",
67
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
83
+ "source": {
84
+ "origin": "best-practice",
85
+ "artifact": null,
86
+ "connector": null
87
+ },
68
88
  "evidence": "production",
69
89
  "status": "active",
70
90
  "phase_added": "define",
@@ -78,7 +98,11 @@
78
98
  "type": "recommendation",
79
99
  "topic": "image optimization pipeline",
80
100
  "content": "Serve images in WebP or AVIF format (30-50% smaller than JPEG). Use srcset with 2-4 size variants for responsive images. Lazy-load images below the fold with loading='lazy'. Set explicit width and height attributes to prevent CLS. Automate optimization in the build pipeline or use an image CDN (Cloudinary, imgix, Cloudflare Images).",
81
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
101
+ "source": {
102
+ "origin": "best-practice",
103
+ "artifact": null,
104
+ "connector": null
105
+ },
82
106
  "evidence": "tested",
83
107
  "status": "active",
84
108
  "phase_added": "define",
@@ -92,7 +116,11 @@
92
116
  "type": "recommendation",
93
117
  "topic": "state management selection criteria",
94
118
  "content": "Use local component state (useState/signals) for UI-only state. Use URL state (query params, path) for shareable/bookmarkable state. Use server state libraries (TanStack Query, SWR) for API data with caching. Use global state (Zustand, Redux, or context) only for truly cross-cutting client state (auth, theme, feature flags). Most applications over-use global state.",
95
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
119
+ "source": {
120
+ "origin": "best-practice",
121
+ "artifact": null,
122
+ "connector": null
123
+ },
96
124
  "evidence": "documented",
97
125
  "status": "active",
98
126
  "phase_added": "define",
@@ -106,7 +134,11 @@
106
134
  "type": "risk",
107
135
  "topic": "layout shift from web fonts",
108
136
  "content": "Custom web fonts cause either FOIT (Flash of Invisible Text) or FOUT (Flash of Unstyled Text), both contributing to CLS. Mitigate with font-display: swap, preload critical fonts with <link rel='preload'>, subset fonts to used character ranges (Latin subset is ~30 KB vs 200 KB for full Unicode), and use size-adjust to match fallback metrics.",
109
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
137
+ "source": {
138
+ "origin": "best-practice",
139
+ "artifact": null,
140
+ "connector": null
141
+ },
110
142
  "evidence": "tested",
111
143
  "status": "active",
112
144
  "phase_added": "define",
@@ -120,7 +152,11 @@
120
152
  "type": "factual",
121
153
  "topic": "code splitting impact",
122
154
  "content": "Route-based code splitting typically reduces initial bundle size by 40-60%. Dynamic import() splits a module into a separate chunk loaded on demand. For React, use React.lazy() with Suspense boundaries. Split at route boundaries first, then heavy component boundaries (charts, editors, maps). Avoid splitting components under 30 KB as the HTTP overhead negates the benefit.",
123
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
155
+ "source": {
156
+ "origin": "best-practice",
157
+ "artifact": null,
158
+ "connector": null
159
+ },
124
160
  "evidence": "tested",
125
161
  "status": "active",
126
162
  "phase_added": "define",
@@ -134,7 +170,11 @@
134
170
  "type": "constraint",
135
171
  "topic": "keyboard navigation requirements",
136
172
  "content": "Every interactive element must be reachable and operable via keyboard alone. Tab order must follow visual reading order. Focus must be visible (minimum 2px outline, 3:1 contrast ratio against adjacent colors). Custom components (dropdowns, modals, tabs) must implement ARIA roles and keyboard patterns from WAI-ARIA Authoring Practices. Test by unplugging the mouse.",
137
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
173
+ "source": {
174
+ "origin": "best-practice",
175
+ "artifact": null,
176
+ "connector": null
177
+ },
138
178
  "evidence": "documented",
139
179
  "status": "active",
140
180
  "phase_added": "define",
@@ -162,7 +202,11 @@
162
202
  "type": "recommendation",
163
203
  "topic": "error boundaries and fallbacks",
164
204
  "content": "Wrap major UI sections in error boundaries that catch render errors and display a fallback UI instead of a white screen. Log caught errors to your observability stack. Provide a retry mechanism in the fallback. Without error boundaries, a single failing component crashes the entire page. Test error boundaries by deliberately throwing in development.",
165
- "source": { "origin": "best-practice", "artifact": null, "connector": null },
205
+ "source": {
206
+ "origin": "best-practice",
207
+ "artifact": null,
208
+ "connector": null
209
+ },
166
210
  "evidence": "documented",
167
211
  "status": "active",
168
212
  "phase_added": "define",
@@ -172,4 +216,4 @@
172
216
  "tags": ["frontend", "error-handling", "resilience", "react"]
173
217
  }
174
218
  ]
175
- }
219
+ }
@@ -0,0 +1,179 @@
1
+ {
2
+ "name": "Hackathon: Best Use of AI",
3
+ "description": "Scoring rubric and seed claims for the 'Best Use of AI' hackathon category. Features a unique ai_leverage sub-score rewarding quality AI-assisted research over shallow generation.",
4
+ "version": "1.0.0",
5
+ "claims": [
6
+ {
7
+ "id": "hai-001",
8
+ "type": "constraint",
9
+ "topic": "AI leverage scoring",
10
+ "content": "AI leverage sub-score = (claims_from_ai_assisted_research / total_claims) * quality_multiplier. Quality multiplier = average evidence tier of AI-assisted claims. Weight: 30%. Rewards using AI for deep research, not shallow claim generation.",
11
+ "source": {
12
+ "origin": "best-practice",
13
+ "artifact": null,
14
+ "connector": null
15
+ },
16
+ "evidence": "documented",
17
+ "status": "active",
18
+ "phase_added": "define",
19
+ "timestamp": "2026-01-01T00:00:00.000Z",
20
+ "conflicts_with": [],
21
+ "resolved_by": null,
22
+ "tags": ["hackathon", "scoring", "ai-leverage", "best-ai"]
23
+ },
24
+ {
25
+ "id": "hai-002",
26
+ "type": "constraint",
27
+ "topic": "evidence tier scoring",
28
+ "content": "Evidence tier sub-score uses weighted sum normalized to 0-100. Weight: 20%. AI-assisted claims should produce higher evidence tiers — web and documented, not just stated.",
29
+ "source": {
30
+ "origin": "best-practice",
31
+ "artifact": null,
32
+ "connector": null
33
+ },
34
+ "evidence": "documented",
35
+ "status": "active",
36
+ "phase_added": "define",
37
+ "timestamp": "2026-01-01T00:00:00.000Z",
38
+ "conflicts_with": [],
39
+ "resolved_by": null,
40
+ "tags": ["hackathon", "scoring", "evidence-tier", "best-ai"]
41
+ },
42
+ {
43
+ "id": "hai-003",
44
+ "type": "constraint",
45
+ "topic": "type diversity scoring",
46
+ "content": "Type diversity sub-score = (distinct_types / 6) * 100. Weight: 20%. Best AI use means leveraging AI for diverse research outputs — not just generating a list of facts.",
47
+ "source": {
48
+ "origin": "best-practice",
49
+ "artifact": null,
50
+ "connector": null
51
+ },
52
+ "evidence": "documented",
53
+ "status": "active",
54
+ "phase_added": "define",
55
+ "timestamp": "2026-01-01T00:00:00.000Z",
56
+ "conflicts_with": [],
57
+ "resolved_by": null,
58
+ "tags": ["hackathon", "scoring", "type-diversity", "best-ai"]
59
+ },
60
+ {
61
+ "id": "hai-004",
62
+ "type": "constraint",
63
+ "topic": "corroboration scoring",
64
+ "content": "Corroboration sub-score = witnessed_claims / total_claims * 100. Weight: 15%. AI can find cross-references humans miss — high corroboration signals effective AI-human collaboration.",
65
+ "source": {
66
+ "origin": "best-practice",
67
+ "artifact": null,
68
+ "connector": null
69
+ },
70
+ "evidence": "documented",
71
+ "status": "active",
72
+ "phase_added": "define",
73
+ "timestamp": "2026-01-01T00:00:00.000Z",
74
+ "conflicts_with": [],
75
+ "resolved_by": null,
76
+ "tags": ["hackathon", "scoring", "corroboration", "best-ai"]
77
+ },
78
+ {
79
+ "id": "hai-005",
80
+ "type": "constraint",
81
+ "topic": "challenge depth scoring",
82
+ "content": "Challenge depth sub-score = (challenge_claims + resolved_conflicts) / total_claims * 100. Weight: 15%. Best AI use includes AI-assisted devil's advocate — challenging claims and finding counter-evidence.",
83
+ "source": {
84
+ "origin": "best-practice",
85
+ "artifact": null,
86
+ "connector": null
87
+ },
88
+ "evidence": "documented",
89
+ "status": "active",
90
+ "phase_added": "define",
91
+ "timestamp": "2026-01-01T00:00:00.000Z",
92
+ "conflicts_with": [],
93
+ "resolved_by": null,
94
+ "tags": ["hackathon", "scoring", "challenge-depth", "best-ai"]
95
+ },
96
+ {
97
+ "id": "hai-006",
98
+ "type": "recommendation",
99
+ "topic": "AI collaboration patterns",
100
+ "content": "Effective AI research patterns: (1) AI finds sources, human validates and elevates evidence tier. (2) AI generates initial claims, human challenges and refines. (3) AI identifies conflicts between sources, human resolves. (4) AI drafts recommendations, human grounds in business context.",
101
+ "source": {
102
+ "origin": "best-practice",
103
+ "artifact": null,
104
+ "connector": null
105
+ },
106
+ "evidence": "stated",
107
+ "status": "active",
108
+ "phase_added": "define",
109
+ "timestamp": "2026-01-01T00:00:00.000Z",
110
+ "conflicts_with": [],
111
+ "resolved_by": null,
112
+ "tags": ["hackathon", "ai-patterns", "collaboration", "best-ai"]
113
+ },
114
+ {
115
+ "id": "hai-007",
116
+ "type": "risk",
117
+ "topic": "AI slop detection",
118
+ "content": "Risk of AI 'slop': bulk-generated low-quality claims that look comprehensive but lack depth. Signals: uniform evidence tiers, generic topics, no conflicts or challenges, repetitive phrasing across claims.",
119
+ "source": { "origin": "research", "artifact": null, "connector": null },
120
+ "evidence": "stated",
121
+ "status": "active",
122
+ "phase_added": "research",
123
+ "timestamp": "2026-01-01T00:00:00.000Z",
124
+ "conflicts_with": [],
125
+ "resolved_by": null,
126
+ "tags": ["hackathon", "gaming", "ai-slop", "risk"]
127
+ },
128
+ {
129
+ "id": "hai-008",
130
+ "type": "recommendation",
131
+ "topic": "MCP connector usage",
132
+ "content": "Best AI use should leverage MCP connectors: web search for real-time data, GitHub for code context, Confluence/Jira for organizational knowledge. Connector diversity signals sophisticated AI orchestration.",
133
+ "source": {
134
+ "origin": "best-practice",
135
+ "artifact": null,
136
+ "connector": null
137
+ },
138
+ "evidence": "stated",
139
+ "status": "active",
140
+ "phase_added": "define",
141
+ "timestamp": "2026-01-01T00:00:00.000Z",
142
+ "conflicts_with": [],
143
+ "resolved_by": null,
144
+ "tags": ["hackathon", "mcp-connectors", "ai-orchestration", "best-ai"]
145
+ },
146
+ {
147
+ "id": "hai-009",
148
+ "type": "recommendation",
149
+ "topic": "human judge criteria",
150
+ "content": "Human judges for 'Best Use of AI' score on: (1) human-AI synergy — did the team direct AI effectively or just let it run? (2) evidence elevation — did AI help reach higher evidence tiers? (3) creative prompting — did the team use novel approaches to extract insight?",
151
+ "source": { "origin": "research", "artifact": null, "connector": null },
152
+ "evidence": "stated",
153
+ "status": "active",
154
+ "phase_added": "research",
155
+ "timestamp": "2026-01-01T00:00:00.000Z",
156
+ "conflicts_with": [],
157
+ "resolved_by": null,
158
+ "tags": ["hackathon", "human-judging", "criteria", "best-ai"]
159
+ },
160
+ {
161
+ "id": "hai-010",
162
+ "type": "factual",
163
+ "topic": "HackEval precedent",
164
+ "content": "HackEval keeps AI as assistant, not decision-maker: structured rubrics with blind scoring to reduce bias, but final decisions remain human. AI Judge (Devpost) takes the opposite approach with full automation. The wheat hackathon model should be the middle ground.",
165
+ "source": {
166
+ "origin": "research",
167
+ "artifact": "https://www.scribd.com/document/973181548/HackEval-Full-Pitch",
168
+ "connector": null
169
+ },
170
+ "evidence": "web",
171
+ "status": "active",
172
+ "phase_added": "research",
173
+ "timestamp": "2026-01-01T00:00:00.000Z",
174
+ "conflicts_with": [],
175
+ "resolved_by": null,
176
+ "tags": ["hackathon", "precedent", "hackeval", "ai-judging"]
177
+ }
178
+ ]
179
+ }
@@ -0,0 +1,180 @@
1
+ {
2
+ "name": "Hackathon: Best Business Impact",
3
+ "description": "Scoring rubric and seed claims for the 'Best Business Impact' hackathon category. Weights constraint satisfaction and evidence quality — rewards research that drives real decisions.",
4
+ "version": "1.0.0",
5
+ "claims": [
6
+ {
7
+ "id": "hbiz-001",
8
+ "type": "constraint",
9
+ "topic": "constraint satisfaction scoring",
10
+ "content": "Constraint satisfaction sub-score = (constraint_claims_with_evidence / total_constraint_claims) * 100. Weight: 30% — the dominant factor. Business impact requires understanding real constraints, not hypothetical ones.",
11
+ "source": {
12
+ "origin": "best-practice",
13
+ "artifact": null,
14
+ "connector": null
15
+ },
16
+ "evidence": "documented",
17
+ "status": "active",
18
+ "phase_added": "define",
19
+ "timestamp": "2026-01-01T00:00:00.000Z",
20
+ "conflicts_with": [],
21
+ "resolved_by": null,
22
+ "tags": [
23
+ "hackathon",
24
+ "scoring",
25
+ "constraint-satisfaction",
26
+ "business-impact"
27
+ ]
28
+ },
29
+ {
30
+ "id": "hbiz-002",
31
+ "type": "constraint",
32
+ "topic": "evidence tier scoring",
33
+ "content": "Evidence tier sub-score uses weighted sum normalized to 0-100. Weight: 25%. Business decisions need strong evidence — 'we tested it' beats 'someone blogged about it'.",
34
+ "source": {
35
+ "origin": "best-practice",
36
+ "artifact": null,
37
+ "connector": null
38
+ },
39
+ "evidence": "documented",
40
+ "status": "active",
41
+ "phase_added": "define",
42
+ "timestamp": "2026-01-01T00:00:00.000Z",
43
+ "conflicts_with": [],
44
+ "resolved_by": null,
45
+ "tags": ["hackathon", "scoring", "evidence-tier", "business-impact"]
46
+ },
47
+ {
48
+ "id": "hbiz-003",
49
+ "type": "constraint",
50
+ "topic": "corroboration scoring",
51
+ "content": "Corroboration sub-score = witnessed_claims / total_claims * 100. Weight: 20%. Business decisions carry more weight when findings are corroborated by multiple independent sources.",
52
+ "source": {
53
+ "origin": "best-practice",
54
+ "artifact": null,
55
+ "connector": null
56
+ },
57
+ "evidence": "documented",
58
+ "status": "active",
59
+ "phase_added": "define",
60
+ "timestamp": "2026-01-01T00:00:00.000Z",
61
+ "conflicts_with": [],
62
+ "resolved_by": null,
63
+ "tags": ["hackathon", "scoring", "corroboration", "business-impact"]
64
+ },
65
+ {
66
+ "id": "hbiz-004",
67
+ "type": "constraint",
68
+ "topic": "type diversity scoring",
69
+ "content": "Type diversity sub-score = (distinct_types / 6) * 100. Weight: 15%. Business-impactful research uses estimates (cost, timeline), risks, and constraints — not just facts.",
70
+ "source": {
71
+ "origin": "best-practice",
72
+ "artifact": null,
73
+ "connector": null
74
+ },
75
+ "evidence": "documented",
76
+ "status": "active",
77
+ "phase_added": "define",
78
+ "timestamp": "2026-01-01T00:00:00.000Z",
79
+ "conflicts_with": [],
80
+ "resolved_by": null,
81
+ "tags": ["hackathon", "scoring", "type-diversity", "business-impact"]
82
+ },
83
+ {
84
+ "id": "hbiz-005",
85
+ "type": "constraint",
86
+ "topic": "health scoring",
87
+ "content": "Health sub-score = max(0, (1 - warnings/total_claims) * 100). Weight: 10%. A clean compilation signals thoroughness — important when research informs business decisions.",
88
+ "source": {
89
+ "origin": "best-practice",
90
+ "artifact": null,
91
+ "connector": null
92
+ },
93
+ "evidence": "documented",
94
+ "status": "active",
95
+ "phase_added": "define",
96
+ "timestamp": "2026-01-01T00:00:00.000Z",
97
+ "conflicts_with": [],
98
+ "resolved_by": null,
99
+ "tags": ["hackathon", "scoring", "health", "business-impact"]
100
+ },
101
+ {
102
+ "id": "hbiz-006",
103
+ "type": "recommendation",
104
+ "topic": "business impact signals",
105
+ "content": "Signals of business impact: (1) estimate claims with cost/timeline numbers, (2) constraint claims tied to real stakeholder requirements, (3) risk claims with mitigation strategies, (4) recommendations with clear ROI framing.",
106
+ "source": {
107
+ "origin": "best-practice",
108
+ "artifact": null,
109
+ "connector": null
110
+ },
111
+ "evidence": "stated",
112
+ "status": "active",
113
+ "phase_added": "define",
114
+ "timestamp": "2026-01-01T00:00:00.000Z",
115
+ "conflicts_with": [],
116
+ "resolved_by": null,
117
+ "tags": ["hackathon", "business-signals", "business-impact"]
118
+ },
119
+ {
120
+ "id": "hbiz-007",
121
+ "type": "factual",
122
+ "topic": "enterprise use cases",
123
+ "content": "High-business-impact wheat use cases: vendor evaluation (45-day process compressed to minutes), architecture decision records (weeks to minutes), compliance gap analysis (weeks to 20 minutes), security questionnaire prep (days to hours).",
124
+ "source": { "origin": "research", "artifact": null, "connector": null },
125
+ "evidence": "web",
126
+ "status": "active",
127
+ "phase_added": "research",
128
+ "timestamp": "2026-01-01T00:00:00.000Z",
129
+ "conflicts_with": [],
130
+ "resolved_by": null,
131
+ "tags": ["hackathon", "enterprise", "time-savings", "business-impact"]
132
+ },
133
+ {
134
+ "id": "hbiz-008",
135
+ "type": "recommendation",
136
+ "topic": "human judge criteria",
137
+ "content": "Human judges for 'Best Business Impact' score on: (1) decision clarity — could a VP act on this brief? (2) stakeholder awareness — are constraints from real decision-makers? (3) actionability — are next steps concrete with owners and timelines?",
138
+ "source": { "origin": "research", "artifact": null, "connector": null },
139
+ "evidence": "stated",
140
+ "status": "active",
141
+ "phase_added": "research",
142
+ "timestamp": "2026-01-01T00:00:00.000Z",
143
+ "conflicts_with": [],
144
+ "resolved_by": null,
145
+ "tags": ["hackathon", "human-judging", "criteria", "business-impact"]
146
+ },
147
+ {
148
+ "id": "hbiz-009",
149
+ "type": "risk",
150
+ "topic": "false precision",
151
+ "content": "Risk of false precision in business impact scoring: teams may add fake cost estimates or timelines to boost their 'estimate' claim count. Counter: human judges verify estimate plausibility in final round.",
152
+ "source": { "origin": "research", "artifact": null, "connector": null },
153
+ "evidence": "stated",
154
+ "status": "active",
155
+ "phase_added": "research",
156
+ "timestamp": "2026-01-01T00:00:00.000Z",
157
+ "conflicts_with": [],
158
+ "resolved_by": null,
159
+ "tags": ["hackathon", "gaming", "false-precision", "risk"]
160
+ },
161
+ {
162
+ "id": "hbiz-010",
163
+ "type": "factual",
164
+ "topic": "WildHacks normalization",
165
+ "content": "WildHacks (Northwestern) dual-normalization: adjusts for judge leniency and project quality variance. Raw scores scaled to 100, then normalized per-judge. Gold standard for fair multi-judge hackathon scoring.",
166
+ "source": {
167
+ "origin": "research",
168
+ "artifact": "https://guide.wildhacks.net/judging-and-awards/scoring-formula/",
169
+ "connector": null
170
+ },
171
+ "evidence": "web",
172
+ "status": "active",
173
+ "phase_added": "research",
174
+ "timestamp": "2026-01-01T00:00:00.000Z",
175
+ "conflicts_with": [],
176
+ "resolved_by": null,
177
+ "tags": ["hackathon", "normalization", "wildhacks", "fairness"]
178
+ }
179
+ ]
180
+ }